diff options
| author | Bryan Newbold <bnewbold@robocracy.org> | 2019-01-24 17:14:00 -0800 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-01-24 17:14:00 -0800 | 
| commit | 2e6e5d9b270044d3462a95512a12520650cc45af (patch) | |
| tree | 63f17d39a1b1568c196aee5a8ff7561889035842 | |
| parent | 105fb88c4b32dd7428a4aa2c7a63607256d749ae (diff) | |
| download | fatcat-2e6e5d9b270044d3462a95512a12520650cc45af.tar.gz fatcat-2e6e5d9b270044d3462a95512a12520650cc45af.zip | |
grobid import extra metadata tweaks
| -rw-r--r-- | python/fatcat_tools/importers/grobid_metadata.py | 13 | 
1 files changed, 7 insertions, 6 deletions
| diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py index 25f9fa89..bc09ec8f 100644 --- a/python/fatcat_tools/importers/grobid_metadata.py +++ b/python/fatcat_tools/importers/grobid_metadata.py @@ -77,7 +77,7 @@ class GrobidMetadataImporter(EntityImporter):          if not obj.get('title'):              return None -        extra = dict() +        extra_grobid = dict()          if obj.get('abstract') and len(obj.get('abstract')) < MAX_ABSTRACT_BYTES:              abobj = dict( @@ -128,19 +128,20 @@ class GrobidMetadataImporter(EntityImporter):              # only returns year, ever?              release_year = int(obj['date'][:4]) +        extra = dict() +          if obj.get('doi'): -            extra['doi'] = obj['doi'] +            extra_grobid['doi'] = obj['doi']          if obj['journal'] and obj['journal'].get('name'):              extra['container_name'] = clean(obj['journal']['name'])          # TODO: ISSN/eISSN handling? or just journal name lookup? +        if extra_grobid: +            extra['grobid'] = extra_grobid          if self.longtail_oa:              extra['longtail_oa'] = True - -        if extra: -            extra = dict(grobid=extra) -        else: +        if not extra:              extra = None          re = fatcat_client.ReleaseEntity( | 
