From ba7f9214d2038882952eb50cd4dc5eff4eb0e6ff Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 9 Nov 2021 18:10:35 -0800 Subject: remove deprecated extid sqlite3 lookup table feature from importers This was used during initial bulk imports, but is no longer used and could create serious metadata problems if used accidentially. In retrospect, it also made metadata provenance less transparent, and may have done more harm than good overall. --- python/fatcat_tools/importers/datacite.py | 54 ------------------------------- 1 file changed, 54 deletions(-) (limited to 'python/fatcat_tools/importers/datacite.py') diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index d5622960..d4d7a9f5 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -12,7 +12,6 @@ import collections import datetime import json import re -import sqlite3 import sys from typing import Any, Dict, List, Optional, Sequence, Set, Tuple @@ -248,15 +247,6 @@ class DataciteImporter(EntityImporter): ) self.create_containers = kwargs.get("create_containers", True) - extid_map_file = kwargs.get("extid_map_file") - self.extid_map_db = None - if extid_map_file: - db_uri = "file:{}?mode=ro".format(extid_map_file) - print("Using external ID map: {}".format(db_uri), file=sys.stderr) - self.extid_map_db = sqlite3.connect(db_uri, uri=True) - else: - print("Not using external ID map", file=sys.stderr) - self.read_issn_map_file(issn_map_file) self.debug = debug self.insert_log_file = insert_log_file @@ -264,42 +254,6 @@ class DataciteImporter(EntityImporter): print("datacite with debug={}".format(self.debug), file=sys.stderr) - def lookup_ext_ids(self, doi: str) -> Dict[str, Any]: - """ - Return dictionary of identifiers referring to the same things as the given DOI. - """ - if self.extid_map_db is None: - return dict( - core_id=None, - pmid=None, - pmcid=None, - wikidata_qid=None, - arxiv_id=None, - jstor_id=None, - ) - row = self.extid_map_db.execute( - "SELECT core, pmid, pmcid, wikidata FROM ids WHERE doi=? LIMIT 1", [doi.lower()] - ).fetchone() - if row is None: - return dict( - core_id=None, - pmid=None, - pmcid=None, - wikidata_qid=None, - arxiv_id=None, - jstor_id=None, - ) - row = [str(cell or "") or None for cell in row] - return dict( - core_id=row[0], - pmid=row[1], - pmcid=row[2], - wikidata_qid=row[3], - # TODO: - arxiv_id=None, - jstor_id=None, - ) - def parse_record(self, obj: Dict[str, Any]) -> Optional[ReleaseEntity]: """ Mapping datacite JSON to ReleaseEntity. @@ -706,8 +660,6 @@ class DataciteImporter(EntityImporter): if release_month: extra["release_month"] = release_month - extids = self.lookup_ext_ids(doi=doi) - # Assemble release. re = fatcat_openapi_client.ReleaseEntity( work_id=None, @@ -722,12 +674,6 @@ class DataciteImporter(EntityImporter): publisher=publisher, ext_ids=fatcat_openapi_client.ReleaseExtIds( doi=doi, - pmid=extids["pmid"], - pmcid=extids["pmcid"], - wikidata_qid=extids["wikidata_qid"], - core=extids["core_id"], - arxiv=extids["arxiv_id"], - jstor=extids["jstor_id"], ), contribs=contribs, volume=volume, -- cgit v1.2.3