diff options
Diffstat (limited to 'python/fatcat_tools/importers')
-rw-r--r-- | python/fatcat_tools/importers/crossref.py | 54 | ||||
-rw-r--r-- | python/fatcat_tools/importers/datacite.py | 54 | ||||
-rw-r--r-- | python/fatcat_tools/importers/jalc.py | 52 |
3 files changed, 0 insertions, 160 deletions
diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index a41e2bf5..9c69fee3 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -1,5 +1,4 @@ import datetime -import sqlite3 from typing import Any, Dict, List, Optional, Sequence import fatcat_openapi_client @@ -128,8 +127,6 @@ class CrossrefImporter(EntityImporter): """ Importer for Crossref metadata. - Can use a local sqlite3 file for faster "external identifier" lookups - See https://github.com/CrossRef/rest-api-doc for JSON schema notes """ @@ -150,50 +147,8 @@ class CrossrefImporter(EntityImporter): ) self.create_containers: bool = kwargs.get("create_containers", True) - extid_map_file = kwargs.get("extid_map_file") - self.extid_map_db: Optional[Any] = None - if extid_map_file: - db_uri = "file:{}?mode=ro".format(extid_map_file) - print("Using external ID map: {}".format(db_uri)) - self.extid_map_db = sqlite3.connect(db_uri, uri=True) - else: - print("Not using external ID map") - self.read_issn_map_file(issn_map_file) - def lookup_ext_ids(self, doi: str) -> Optional[Any]: - if self.extid_map_db is None: - return dict( - core_id=None, - pmid=None, - pmcid=None, - wikidata_qid=None, - arxiv_id=None, - jstor_id=None, - ) - row = self.extid_map_db.execute( - "SELECT core, pmid, pmcid, wikidata FROM ids WHERE doi=? LIMIT 1", [doi.lower()] - ).fetchone() - if row is None: - return dict( - core_id=None, - pmid=None, - pmcid=None, - wikidata_qid=None, - arxiv_id=None, - jstor_id=None, - ) - row = [str(cell or "") or None for cell in row] - return dict( - core_id=row[0], - pmid=row[1], - pmcid=row[2], - wikidata_qid=row[3], - # TODO: - arxiv_id=None, - jstor_id=None, - ) - def map_release_type(self, crossref_type: str) -> Optional[str]: return CROSSREF_TYPE_MAP.get(crossref_type) @@ -473,9 +428,6 @@ class CrossrefImporter(EntityImporter): # unknown release_stage = None - # external identifiers - extids: Dict[str, Any] = self.lookup_ext_ids(doi=obj["DOI"].lower()) or {} - # filter out unreasonably huge releases if len(abstracts) > 100: self.counts["skip-huge-abstracts"] += 1 @@ -538,13 +490,7 @@ class CrossrefImporter(EntityImporter): publisher=publisher, ext_ids=fatcat_openapi_client.ReleaseExtIds( doi=obj["DOI"].lower(), - pmid=extids["pmid"], - pmcid=extids["pmcid"], - wikidata_qid=extids["wikidata_qid"], isbn13=isbn13, - core=extids["core_id"], - arxiv=extids["arxiv_id"], - jstor=extids["jstor_id"], ), volume=clean(obj.get("volume")), issue=clean(obj.get("issue")), diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index d5622960..d4d7a9f5 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -12,7 +12,6 @@ import collections import datetime import json import re -import sqlite3 import sys from typing import Any, Dict, List, Optional, Sequence, Set, Tuple @@ -248,15 +247,6 @@ class DataciteImporter(EntityImporter): ) self.create_containers = kwargs.get("create_containers", True) - extid_map_file = kwargs.get("extid_map_file") - self.extid_map_db = None - if extid_map_file: - db_uri = "file:{}?mode=ro".format(extid_map_file) - print("Using external ID map: {}".format(db_uri), file=sys.stderr) - self.extid_map_db = sqlite3.connect(db_uri, uri=True) - else: - print("Not using external ID map", file=sys.stderr) - self.read_issn_map_file(issn_map_file) self.debug = debug self.insert_log_file = insert_log_file @@ -264,42 +254,6 @@ class DataciteImporter(EntityImporter): print("datacite with debug={}".format(self.debug), file=sys.stderr) - def lookup_ext_ids(self, doi: str) -> Dict[str, Any]: - """ - Return dictionary of identifiers referring to the same things as the given DOI. - """ - if self.extid_map_db is None: - return dict( - core_id=None, - pmid=None, - pmcid=None, - wikidata_qid=None, - arxiv_id=None, - jstor_id=None, - ) - row = self.extid_map_db.execute( - "SELECT core, pmid, pmcid, wikidata FROM ids WHERE doi=? LIMIT 1", [doi.lower()] - ).fetchone() - if row is None: - return dict( - core_id=None, - pmid=None, - pmcid=None, - wikidata_qid=None, - arxiv_id=None, - jstor_id=None, - ) - row = [str(cell or "") or None for cell in row] - return dict( - core_id=row[0], - pmid=row[1], - pmcid=row[2], - wikidata_qid=row[3], - # TODO: - arxiv_id=None, - jstor_id=None, - ) - def parse_record(self, obj: Dict[str, Any]) -> Optional[ReleaseEntity]: """ Mapping datacite JSON to ReleaseEntity. @@ -706,8 +660,6 @@ class DataciteImporter(EntityImporter): if release_month: extra["release_month"] = release_month - extids = self.lookup_ext_ids(doi=doi) - # Assemble release. re = fatcat_openapi_client.ReleaseEntity( work_id=None, @@ -722,12 +674,6 @@ class DataciteImporter(EntityImporter): publisher=publisher, ext_ids=fatcat_openapi_client.ReleaseExtIds( doi=doi, - pmid=extids["pmid"], - pmcid=extids["pmcid"], - wikidata_qid=extids["wikidata_qid"], - core=extids["core_id"], - arxiv=extids["arxiv_id"], - jstor=extids["jstor_id"], ), contribs=contribs, volume=volume, diff --git a/python/fatcat_tools/importers/jalc.py b/python/fatcat_tools/importers/jalc.py index 2f10e533..a737ac9f 100644 --- a/python/fatcat_tools/importers/jalc.py +++ b/python/fatcat_tools/importers/jalc.py @@ -1,5 +1,4 @@ import datetime -import sqlite3 import sys from typing import Any, Dict, List, Optional, Sequence @@ -117,50 +116,8 @@ class JalcImporter(EntityImporter): ) self.create_containers = kwargs.get("create_containers", True) - extid_map_file = kwargs.get("extid_map_file") - self.extid_map_db = None - if extid_map_file: - db_uri = "file:{}?mode=ro".format(extid_map_file) - print("Using external ID map: {}".format(db_uri)) - self.extid_map_db = sqlite3.connect(db_uri, uri=True) - else: - print("Not using external ID map") - self.read_issn_map_file(issn_map_file) - def lookup_ext_ids(self, doi: str) -> Dict[str, Any]: - if self.extid_map_db is None: - return dict( - core_id=None, - pmid=None, - pmcid=None, - wikidata_qid=None, - arxiv_id=None, - jstor_id=None, - ) - row = self.extid_map_db.execute( - "SELECT core, pmid, pmcid, wikidata FROM ids WHERE doi=? LIMIT 1", [doi.lower()] - ).fetchone() - if row is None: - return dict( - core_id=None, - pmid=None, - pmcid=None, - wikidata_qid=None, - arxiv_id=None, - jstor_id=None, - ) - row = [str(cell or "") or None for cell in row] - return dict( - core_id=row[0], - pmid=row[1], - pmcid=row[2], - wikidata_qid=row[3], - # TODO: - arxiv_id=None, - jstor_id=None, - ) - def want(self, raw_record: Any) -> bool: return True @@ -330,9 +287,6 @@ class JalcImporter(EntityImporter): # reasonable default for this collection release_type = "article-journal" - # external identifiers - extids = self.lookup_ext_ids(doi=doi) - # extra: # translation_of # aliases @@ -356,12 +310,6 @@ class JalcImporter(EntityImporter): release_year=release_year, ext_ids=fatcat_openapi_client.ReleaseExtIds( doi=doi, - pmid=extids["pmid"], - pmcid=extids["pmcid"], - wikidata_qid=extids["wikidata_qid"], - core=extids["core_id"], - arxiv=extids["arxiv_id"], - jstor=extids["jstor_id"], ), volume=volume, issue=issue, |