diff options
Diffstat (limited to 'python/fatcat_tools/importers')
-rw-r--r-- | python/fatcat_tools/importers/crossref.py | 17 | ||||
-rw-r--r-- | python/fatcat_tools/importers/grobid_metadata.py | 6 |
2 files changed, 14 insertions, 9 deletions
diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index 1ea47707..13179207 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -250,16 +250,18 @@ class CrossrefImporter(FatcatImporter): return None # release date parsing is amazingly complex - release_date = obj['issued']['date-parts'][0] - if not release_date or not release_date[0]: + raw_date = obj['issued']['date-parts'][0] + if not raw_date or not raw_date[0]: # got some NoneType, even though at least year is supposed to be set + release_year = None release_date = None - elif len(release_date) == 3: - release_date = datetime.date(year=release_date[0], month=release_date[1], day=release_date[2]) + elif len(raw_date) == 3: + release_year = raw_date[0] + release_date = datetime.date(year=raw_date[0], month=raw_date[1], day=raw_date[2]) else: - # only the year is actually required; mangle to first day for date - # (TODO: something better?) - release_date = datetime.date(year=release_date[0], month=1, day=1) + # sometimes only the year is included, not the full date + release_year = raw_date[0] + release_date = None re = fatcat_client.ReleaseEntity( work_id=None, @@ -277,6 +279,7 @@ class CrossrefImporter(FatcatImporter): pmcid=extids['pmcid'], wikidata_qid=extids['wikidata_qid'], release_date=release_date, + release_year=release_year, issue=obj.get('issue'), volume=obj.get('volume'), pages=obj.get('page'), diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py index b84f7145..47a753a6 100644 --- a/python/fatcat_tools/importers/grobid_metadata.py +++ b/python/fatcat_tools/importers/grobid_metadata.py @@ -66,9 +66,10 @@ class GrobidMetadataImporter(FatcatImporter): refs.append(ref) release_date = None + release_year = None if obj.get('date'): - # TODO: only returns year, ever? how to handle? - release_date = datetime.datetime(year=int(obj['date'][:4]), month=1, day=1).date() + # only returns year, ever? + release_year = int(obj['date'][:4]) if obj.get('doi'): extra['doi'] = obj['doi'] @@ -88,6 +89,7 @@ class GrobidMetadataImporter(FatcatImporter): title=obj['title'].strip(), release_type="article-journal", release_date=release_date, + release_year=release_year, contribs=contribs, refs=refs, publisher=obj['journal'].get('publisher'), |