aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2018-09-12 11:06:06 -0700
committerBryan Newbold <bnewbold@robocracy.org>2018-09-12 11:06:06 -0700
commit722e4321bfa6431136b1e03f0c9e4ac11903c4df (patch)
treefb688bf672f209616b0f8049b4749cbe8da0d3fa /python/fatcat
parent1db3bbe11139cee2034cc3f632d13bcce2c03a89 (diff)
downloadfatcat-722e4321bfa6431136b1e03f0c9e4ac11903c4df.tar.gz
fatcat-722e4321bfa6431136b1e03f0c9e4ac11903c4df.zip
extid support for crossref importer
Diffstat (limited to 'python/fatcat')
-rw-r--r--python/fatcat/crossref_importer.py31
1 files changed, 30 insertions, 1 deletions
diff --git a/python/fatcat/crossref_importer.py b/python/fatcat/crossref_importer.py
index a59d0a45..a2e14ed1 100644
--- a/python/fatcat/crossref_importer.py
+++ b/python/fatcat/crossref_importer.py
@@ -1,6 +1,7 @@
import sys
import json
+import sqlite3
import itertools
import fatcat_client
from fatcat.importer_common import FatcatImporter
@@ -8,10 +9,31 @@ from fatcat.importer_common import FatcatImporter
class FatcatCrossrefImporter(FatcatImporter):
- def __init__(self, host_url, issn_map_file, create_containers=True):
+ def __init__(self, host_url, issn_map_file, extid_map_file=None, create_containers=True):
super().__init__(host_url, issn_map_file)
+ self.extid_map_db = None
+ if extid_map_file:
+ db_uri = "file:{}?mode=ro".format(extid_map_file)
+ print("Using external ID map: {}".format(db_uri))
+ self.extid_map_db = sqlite3.connect(db_uri, uri=True)
+ else:
+ print("Not using external ID map")
self.create_containers = create_containers
+ def lookup_ext_ids(self, doi):
+ if self.extid_map_db is None:
+ return dict(core_id=None, pmid=None, pmcid=None, wikidata_qid=None)
+ row = self.extid_map_db.execute("SELECT core, pmid, pmcid, wikidata FROM ids WHERE doi=? LIMIT 1",
+ [doi.lower()]).fetchone()
+ if row is None:
+ return dict(core_id=None, pmid=None, pmcid=None, wikidata_qid=None)
+ row = [str((cell or None)) for cell in row]
+ return dict(
+ core_id=row[0],
+ pmid=row[1],
+ pmcid=row[2],
+ wikidata_qid=row[3])
+
def parse_crossref_dict(self, obj):
"""
obj is a python dict (parsed from json).
@@ -103,6 +125,9 @@ class FatcatCrossrefImporter(FatcatImporter):
'license': obj.get('license', [dict(URL=None)])[0]['URL'] or None,
'alternative-id': obj.get('alternative-id', [])})
+ # external identifiers
+ extids = self.lookup_ext_ids(doi=obj['DOI'].lower())
+
re = fatcat_client.ReleaseEntity(
work_id=None,
title=obj['title'][0],
@@ -111,6 +136,10 @@ class FatcatCrossrefImporter(FatcatImporter):
container_id=container_id,
release_type=obj['type'],
doi=obj['DOI'].lower(),
+ core_id=extids['core_id'],
+ pmid=extids['pmid'],
+ pmcid=extids['pmcid'],
+ wikidata_qid=extids['wikidata_qid'],
release_date=obj['created']['date-time'],
issue=obj.get('issue'),
volume=obj.get('volume'),