summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2018-11-14 21:45:09 -0800
committerBryan Newbold <bnewbold@robocracy.org>2018-11-14 21:45:09 -0800
commit30bdb1b0ba28b2e4a81aa7209d294c224d8a2245 (patch)
tree8556d05fef32c06ad8ad8b3b61649e8febae415f /python/fatcat_tools
parent18c1434319ed7be67d11b47ea7c5d1393caba193 (diff)
downloadfatcat-30bdb1b0ba28b2e4a81aa7209d294c224d8a2245.tar.gz
fatcat-30bdb1b0ba28b2e4a81aa7209d294c224d8a2245.zip
update crossref controlled vocab
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r--python/fatcat_tools/importers/crossref.py33
-rw-r--r--python/fatcat_tools/importers/grobid_metadata.py2
2 files changed, 32 insertions, 3 deletions
diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py
index c4695c7f..fe80c2d3 100644
--- a/python/fatcat_tools/importers/crossref.py
+++ b/python/fatcat_tools/importers/crossref.py
@@ -8,6 +8,28 @@ import fatcat_client
from fatcat_tools.importers.common import FatcatImporter
+CROSSREF_TYPE_MAP = {
+ 'book': 'book',
+ 'book-chapter': 'chapter',
+ 'book-part': 'chapter',
+ 'book-section': 'chapter',
+ 'component': None,
+ 'dataset': 'dataset',
+ 'dissertation': 'thesis',
+ 'edited-book': 'book',
+ 'journal-article': 'article-journal',
+ 'monograph': 'monograph',
+ 'other': None,
+ 'peer-review': 'peer_review',
+ 'posted-content': 'post',
+ 'proceedings-article': 'paper-conference',
+ 'reference-book': 'book',
+ 'reference-entry': 'entry',
+ 'report': 'report',
+ 'standard': 'standard',
+}
+
+
class FatcatCrossrefImporter(FatcatImporter):
def __init__(self, host_url, issn_map_file, extid_map_file=None, create_containers=True):
@@ -35,6 +57,9 @@ class FatcatCrossrefImporter(FatcatImporter):
pmcid=row[2],
wikidata_qid=row[3])
+ def map_release_type(self, crossref_type):
+ return CROSSREF_TYPE_MAP.get(crossref_type)
+
def parse_crossref_dict(self, obj):
"""
obj is a python dict (parsed from json).
@@ -46,7 +71,10 @@ class FatcatCrossrefImporter(FatcatImporter):
return None
# Other ways to be out of scope (provisionally)
- if (not 'type' in obj):
+ # journal-issue and journal-volume map to None, but allowed for now
+ if obj.get('type') in (None, 'journal', 'proceedings',
+ 'standard-series', 'report-series', 'book-series', 'book-set',
+ 'book-track', 'proceedings-series'):
return None
# contribs
@@ -76,6 +104,7 @@ class FatcatCrossrefImporter(FatcatImporter):
extra['sequence'] = am.get('sequence')
if not extra:
extra = None
+ assert(ctype in ("author", "editor", "translator"))
contribs.append(fatcat_client.ReleaseContrib(
creator_id=creator_id,
index=index,
@@ -216,7 +245,7 @@ class FatcatCrossrefImporter(FatcatImporter):
refs=refs,
container_id=container_id,
publisher=publisher,
- release_type=obj['type'],
+ release_type=self.map_release_type(obj['type']),
release_status=release_status,
doi=obj['DOI'].lower(),
isbn13=isbn13,
diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py
index 6d635479..dedc9728 100644
--- a/python/fatcat_tools/importers/grobid_metadata.py
+++ b/python/fatcat_tools/importers/grobid_metadata.py
@@ -67,7 +67,7 @@ class FatcatGrobidMetadataImporter(FatcatImporter):
ref['extra'] = cite_extra
refs.append(ref)
- release_type = "journal-article"
+ release_type = "article-journal"
release_date = None
if obj.get('date'):
# TODO: only returns year, ever? how to handle?