aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-05-24 11:04:57 -0700
committerBryan Newbold <bnewbold@robocracy.org>2019-05-24 11:04:57 -0700
commit5e7cf1e3e67863947db8f822e448baad6890aa26 (patch)
treea4e8f2e34a2028b2939ba66937ba3cfb47ec0124 /python/fatcat_tools
parentf9f97a23f5fe2ccbdab191effa20b8631383b89d (diff)
downloadfatcat-5e7cf1e3e67863947db8f822e448baad6890aa26.tar.gz
fatcat-5e7cf1e3e67863947db8f822e448baad6890aa26.zip
better crossref container_name handling
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r--python/fatcat_tools/importers/crossref.py19
1 files changed, 12 insertions, 7 deletions
diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py
index f2e230d1..eb4aacdd 100644
--- a/python/fatcat_tools/importers/crossref.py
+++ b/python/fatcat_tools/importers/crossref.py
@@ -237,13 +237,18 @@ class CrossrefImporter(EntityImporter):
container_id = self.lookup_issnl(issnl)
publisher = clean(obj.get('publisher'))
+ container_name = obj.get('container-title')
+ if container_name:
+ container_name = clean(container_name[0], force_xml=True)
+ if not container_name:
+ container_name = None
if (container_id is None and self.create_containers and (issnl is not None)
- and obj.get('container-title') and len(obj['container-title']) > 0):
+ and container_name):
ce = fatcat_client.ContainerEntity(
issnl=issnl,
publisher=publisher,
container_type=self.map_container_type(release_type),
- name=clean(obj['container-title'][0], force_xml=True))
+ name=container_name)
ce_edit = self.create_container(ce)
container_id = ce_edit.ident
self._issnl_id_map[issnl] = container_id
@@ -277,9 +282,9 @@ class CrossrefImporter(EntityImporter):
if key and key.startswith(obj['DOI'].upper()):
key = key.replace(obj['DOI'].upper() + "-", '')
key = key.replace(obj['DOI'].upper(), '')
- container_name = rm.get('volume-title')
- if not container_name:
- container_name = rm.get('journal-title')
+ ref_container_name = rm.get('volume-title')
+ if not ref_container_name:
+ ref_container_name = rm.get('journal-title')
elif rm.get('journal-title'):
ref_extra['journal-title'] = rm['journal-title']
if rm.get('DOI'):
@@ -301,7 +306,7 @@ class CrossrefImporter(EntityImporter):
target_release_id=None,
key=key,
year=year,
- container_name=clean(container_name),
+ container_name=clean(ref_container_name),
title=clean(rm.get('article-title')),
locator=clean(rm.get('first-page')),
# TODO: just dump JSON somewhere here?
@@ -321,7 +326,7 @@ class CrossrefImporter(EntityImporter):
# top-level extra keys
if not container_id:
if obj.get('container-title'):
- extra['container_name'] = clean(obj['container-title'][0])
+ extra['container_name'] = container_name
for key in ('group-title'):
val = obj.get(key)
if val: