diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-01-24 17:14:00 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-01-24 17:14:00 -0800 |
commit | 2e6e5d9b270044d3462a95512a12520650cc45af (patch) | |
tree | 63f17d39a1b1568c196aee5a8ff7561889035842 /python/fatcat_tools | |
parent | 105fb88c4b32dd7428a4aa2c7a63607256d749ae (diff) | |
download | fatcat-2e6e5d9b270044d3462a95512a12520650cc45af.tar.gz fatcat-2e6e5d9b270044d3462a95512a12520650cc45af.zip |
grobid import extra metadata tweaks
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r-- | python/fatcat_tools/importers/grobid_metadata.py | 13 |
1 files changed, 7 insertions, 6 deletions
diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py index 25f9fa89..bc09ec8f 100644 --- a/python/fatcat_tools/importers/grobid_metadata.py +++ b/python/fatcat_tools/importers/grobid_metadata.py @@ -77,7 +77,7 @@ class GrobidMetadataImporter(EntityImporter): if not obj.get('title'): return None - extra = dict() + extra_grobid = dict() if obj.get('abstract') and len(obj.get('abstract')) < MAX_ABSTRACT_BYTES: abobj = dict( @@ -128,19 +128,20 @@ class GrobidMetadataImporter(EntityImporter): # only returns year, ever? release_year = int(obj['date'][:4]) + extra = dict() + if obj.get('doi'): - extra['doi'] = obj['doi'] + extra_grobid['doi'] = obj['doi'] if obj['journal'] and obj['journal'].get('name'): extra['container_name'] = clean(obj['journal']['name']) # TODO: ISSN/eISSN handling? or just journal name lookup? + if extra_grobid: + extra['grobid'] = extra_grobid if self.longtail_oa: extra['longtail_oa'] = True - - if extra: - extra = dict(grobid=extra) - else: + if not extra: extra = None re = fatcat_client.ReleaseEntity( |