diff options
Diffstat (limited to 'python/fatcat_tools/importers')
| -rw-r--r-- | python/fatcat_tools/importers/crossref.py | 17 | ||||
| -rw-r--r-- | python/fatcat_tools/importers/grobid_metadata.py | 6 | 
2 files changed, 14 insertions, 9 deletions
diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index 1ea47707..13179207 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -250,16 +250,18 @@ class CrossrefImporter(FatcatImporter):              return None          # release date parsing is amazingly complex -        release_date = obj['issued']['date-parts'][0] -        if not release_date or not release_date[0]: +        raw_date = obj['issued']['date-parts'][0] +        if not raw_date or not raw_date[0]:              # got some NoneType, even though at least year is supposed to be set +            release_year = None              release_date = None -        elif len(release_date) == 3: -            release_date = datetime.date(year=release_date[0], month=release_date[1], day=release_date[2]) +        elif len(raw_date) == 3: +            release_year = raw_date[0] +            release_date = datetime.date(year=raw_date[0], month=raw_date[1], day=raw_date[2])          else: -            # only the year is actually required; mangle to first day for date -            # (TODO: something better?) -            release_date = datetime.date(year=release_date[0], month=1, day=1) +            # sometimes only the year is included, not the full date +            release_year = raw_date[0] +            release_date = None          re = fatcat_client.ReleaseEntity(              work_id=None, @@ -277,6 +279,7 @@ class CrossrefImporter(FatcatImporter):              pmcid=extids['pmcid'],              wikidata_qid=extids['wikidata_qid'],              release_date=release_date, +            release_year=release_year,              issue=obj.get('issue'),              volume=obj.get('volume'),              pages=obj.get('page'), diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py index b84f7145..47a753a6 100644 --- a/python/fatcat_tools/importers/grobid_metadata.py +++ b/python/fatcat_tools/importers/grobid_metadata.py @@ -66,9 +66,10 @@ class GrobidMetadataImporter(FatcatImporter):              refs.append(ref)          release_date = None +        release_year = None          if obj.get('date'): -            # TODO: only returns year, ever? how to handle? -            release_date = datetime.datetime(year=int(obj['date'][:4]), month=1, day=1).date() +            # only returns year, ever? +            release_year = int(obj['date'][:4])          if obj.get('doi'):              extra['doi'] = obj['doi'] @@ -88,6 +89,7 @@ class GrobidMetadataImporter(FatcatImporter):              title=obj['title'].strip(),              release_type="article-journal",              release_date=release_date, +            release_year=release_year,              contribs=contribs,              refs=refs,              publisher=obj['journal'].get('publisher'),  | 
