summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-01-24 17:14:00 -0800
committerBryan Newbold <bnewbold@robocracy.org>2019-01-24 17:14:00 -0800
commit2e6e5d9b270044d3462a95512a12520650cc45af (patch)
tree63f17d39a1b1568c196aee5a8ff7561889035842
parent105fb88c4b32dd7428a4aa2c7a63607256d749ae (diff)
downloadfatcat-2e6e5d9b270044d3462a95512a12520650cc45af.tar.gz
fatcat-2e6e5d9b270044d3462a95512a12520650cc45af.zip
grobid import extra metadata tweaks
-rw-r--r--python/fatcat_tools/importers/grobid_metadata.py13
1 files changed, 7 insertions, 6 deletions
diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py
index 25f9fa89..bc09ec8f 100644
--- a/python/fatcat_tools/importers/grobid_metadata.py
+++ b/python/fatcat_tools/importers/grobid_metadata.py
@@ -77,7 +77,7 @@ class GrobidMetadataImporter(EntityImporter):
if not obj.get('title'):
return None
- extra = dict()
+ extra_grobid = dict()
if obj.get('abstract') and len(obj.get('abstract')) < MAX_ABSTRACT_BYTES:
abobj = dict(
@@ -128,19 +128,20 @@ class GrobidMetadataImporter(EntityImporter):
# only returns year, ever?
release_year = int(obj['date'][:4])
+ extra = dict()
+
if obj.get('doi'):
- extra['doi'] = obj['doi']
+ extra_grobid['doi'] = obj['doi']
if obj['journal'] and obj['journal'].get('name'):
extra['container_name'] = clean(obj['journal']['name'])
# TODO: ISSN/eISSN handling? or just journal name lookup?
+ if extra_grobid:
+ extra['grobid'] = extra_grobid
if self.longtail_oa:
extra['longtail_oa'] = True
-
- if extra:
- extra = dict(grobid=extra)
- else:
+ if not extra:
extra = None
re = fatcat_client.ReleaseEntity(