diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-05-24 11:04:57 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-05-24 11:04:57 -0700 |
commit | 5e7cf1e3e67863947db8f822e448baad6890aa26 (patch) | |
tree | a4e8f2e34a2028b2939ba66937ba3cfb47ec0124 | |
parent | f9f97a23f5fe2ccbdab191effa20b8631383b89d (diff) | |
download | fatcat-5e7cf1e3e67863947db8f822e448baad6890aa26.tar.gz fatcat-5e7cf1e3e67863947db8f822e448baad6890aa26.zip |
better crossref container_name handling
-rw-r--r-- | python/fatcat_tools/importers/crossref.py | 19 |
1 files changed, 12 insertions, 7 deletions
diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index f2e230d1..eb4aacdd 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -237,13 +237,18 @@ class CrossrefImporter(EntityImporter): container_id = self.lookup_issnl(issnl) publisher = clean(obj.get('publisher')) + container_name = obj.get('container-title') + if container_name: + container_name = clean(container_name[0], force_xml=True) + if not container_name: + container_name = None if (container_id is None and self.create_containers and (issnl is not None) - and obj.get('container-title') and len(obj['container-title']) > 0): + and container_name): ce = fatcat_client.ContainerEntity( issnl=issnl, publisher=publisher, container_type=self.map_container_type(release_type), - name=clean(obj['container-title'][0], force_xml=True)) + name=container_name) ce_edit = self.create_container(ce) container_id = ce_edit.ident self._issnl_id_map[issnl] = container_id @@ -277,9 +282,9 @@ class CrossrefImporter(EntityImporter): if key and key.startswith(obj['DOI'].upper()): key = key.replace(obj['DOI'].upper() + "-", '') key = key.replace(obj['DOI'].upper(), '') - container_name = rm.get('volume-title') - if not container_name: - container_name = rm.get('journal-title') + ref_container_name = rm.get('volume-title') + if not ref_container_name: + ref_container_name = rm.get('journal-title') elif rm.get('journal-title'): ref_extra['journal-title'] = rm['journal-title'] if rm.get('DOI'): @@ -301,7 +306,7 @@ class CrossrefImporter(EntityImporter): target_release_id=None, key=key, year=year, - container_name=clean(container_name), + container_name=clean(ref_container_name), title=clean(rm.get('article-title')), locator=clean(rm.get('first-page')), # TODO: just dump JSON somewhere here? @@ -321,7 +326,7 @@ class CrossrefImporter(EntityImporter): # top-level extra keys if not container_id: if obj.get('container-title'): - extra['container_name'] = clean(obj['container-title'][0]) + extra['container_name'] = container_name for key in ('group-title'): val = obj.get(key) if val: |