summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers/crossref.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-01-24 16:16:22 -0800
committerBryan Newbold <bnewbold@robocracy.org>2019-01-24 16:16:30 -0800
commita85de0b6e82c65d932c84a41b284083b4879934a (patch)
tree231688205efb16670471c48ca719d3e636771f3d /python/fatcat_tools/importers/crossref.py
parent6b1b131ba5e899a069fe280663d331932a8cbae5 (diff)
downloadfatcat-a85de0b6e82c65d932c84a41b284083b4879934a.tar.gz
fatcat-a85de0b6e82c65d932c84a41b284083b4879934a.zip
tweak crossref import, and update tests
Diffstat (limited to 'python/fatcat_tools/importers/crossref.py')
-rw-r--r--python/fatcat_tools/importers/crossref.py38
1 files changed, 27 insertions, 11 deletions
diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py
index 4a0322e7..fbf30a32 100644
--- a/python/fatcat_tools/importers/crossref.py
+++ b/python/fatcat_tools/importers/crossref.py
@@ -152,8 +152,8 @@ class CrossrefImporter(EntityImporter):
elif am.get('family'):
raw_name = am['family']
else:
- # TODO: defaults back to a pseudo-null value
- raw_name = am.get('given', '<blank>')
+ # TODO: can end up empty
+ raw_name = am.get('given')
extra = dict()
if ctype == "author":
index = i
@@ -244,9 +244,7 @@ class CrossrefImporter(EntityImporter):
'collection_title', 'chapter_number'):
if clean(rm.get(k)):
extra[k] = clean(rm[k])
- if extra:
- extra = dict(crossref=extra)
- else:
+ if not extra:
extra = None
refs.append(fatcat_client.ReleaseRef(
index=i,
@@ -269,20 +267,31 @@ class CrossrefImporter(EntityImporter):
# extra fields
extra = dict()
- for key in ('subject', 'type', 'alternative-id', 'container-title',
- 'subtitle', 'archive', 'funder', 'group-title'):
- # TODO: unpack "container-title" array?
+ extra_crossref = dict()
+ # top-level extra keys
+ if not container_id:
+ if obj.get('container-title'):
+ extra['container_name'] = clean(obj['container-title'][0])
+ for key in ('group-title', 'subtitle'):
val = obj.get(key)
if val:
if type(val) == str:
extra[key] = clean(val)
else:
extra[key] = val
+ # crossref-nested extra keys
+ for key in ('subject', 'type', 'alternative-id', 'archive', 'funder'):
+ val = obj.get(key)
+ if val:
+ if type(val) == str:
+ extra_crossref[key] = clean(val)
+ else:
+ extra_crossref[key] = val
if license_extra:
- extra['license'] = license_extra
+ extra_crossref['license'] = license_extra
if len(obj['title']) > 1:
- extra['other-titles'] = [clean(t) for t in obj['title'][1:]]
+ extra['aliases'] = [clean(t) for t in obj['title'][1:]]
# ISBN
isbn13 = None
@@ -325,11 +334,18 @@ class CrossrefImporter(EntityImporter):
release_year = raw_date[0]
release_date = None
+
original_title = None
if obj.get('original-title'):
original_title = clean(obj.get('original-title')[0], force_xml=True)
if obj.get('title'):
title = clean(obj.get('title')[0], force_xml=True)
+
+ if extra_crossref:
+ extra['crossref'] = extra_crossref
+ if not extra:
+ extra = None
+
re = fatcat_client.ReleaseEntity(
work_id=None,
container_id=container_id,
@@ -353,7 +369,7 @@ class CrossrefImporter(EntityImporter):
pages=clean(obj.get('page')),
language=None, # crossref doesn't supply language info
license_slug=license_slug,
- extra=dict(crossref=extra),
+ extra=extra,
abstracts=abstracts,
contribs=contribs,
refs=refs,