diff options
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r-- | python/fatcat_tools/importers/crossref.py | 17 | ||||
-rw-r--r-- | python/fatcat_tools/importers/grobid_metadata.py | 6 | ||||
-rw-r--r-- | python/fatcat_tools/transforms.py | 4 |
3 files changed, 18 insertions, 9 deletions
diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py index 1ea47707..13179207 100644 --- a/python/fatcat_tools/importers/crossref.py +++ b/python/fatcat_tools/importers/crossref.py @@ -250,16 +250,18 @@ class CrossrefImporter(FatcatImporter): return None # release date parsing is amazingly complex - release_date = obj['issued']['date-parts'][0] - if not release_date or not release_date[0]: + raw_date = obj['issued']['date-parts'][0] + if not raw_date or not raw_date[0]: # got some NoneType, even though at least year is supposed to be set + release_year = None release_date = None - elif len(release_date) == 3: - release_date = datetime.date(year=release_date[0], month=release_date[1], day=release_date[2]) + elif len(raw_date) == 3: + release_year = raw_date[0] + release_date = datetime.date(year=raw_date[0], month=raw_date[1], day=raw_date[2]) else: - # only the year is actually required; mangle to first day for date - # (TODO: something better?) - release_date = datetime.date(year=release_date[0], month=1, day=1) + # sometimes only the year is included, not the full date + release_year = raw_date[0] + release_date = None re = fatcat_client.ReleaseEntity( work_id=None, @@ -277,6 +279,7 @@ class CrossrefImporter(FatcatImporter): pmcid=extids['pmcid'], wikidata_qid=extids['wikidata_qid'], release_date=release_date, + release_year=release_year, issue=obj.get('issue'), volume=obj.get('volume'), pages=obj.get('page'), diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py index b84f7145..47a753a6 100644 --- a/python/fatcat_tools/importers/grobid_metadata.py +++ b/python/fatcat_tools/importers/grobid_metadata.py @@ -66,9 +66,10 @@ class GrobidMetadataImporter(FatcatImporter): refs.append(ref) release_date = None + release_year = None if obj.get('date'): - # TODO: only returns year, ever? how to handle? - release_date = datetime.datetime(year=int(obj['date'][:4]), month=1, day=1).date() + # only returns year, ever? + release_year = int(obj['date'][:4]) if obj.get('doi'): extra['doi'] = obj['doi'] @@ -88,6 +89,7 @@ class GrobidMetadataImporter(FatcatImporter): title=obj['title'].strip(), release_type="article-journal", release_date=release_date, + release_year=release_year, contribs=contribs, refs=refs, publisher=obj['journal'].get('publisher'), diff --git a/python/fatcat_tools/transforms.py b/python/fatcat_tools/transforms.py index 516b68ae..843c00a5 100644 --- a/python/fatcat_tools/transforms.py +++ b/python/fatcat_tools/transforms.py @@ -48,6 +48,10 @@ def release_to_elasticsearch(release): if release.release_date: # .isoformat() results in, eg, '2010-10-22' (YYYY-MM-DD) t['release_date'] = release.release_date.isoformat() + if release.release_year is None: + t['release_year'] = release.release_date.year + if release.release_year is not None: + t['release_year'] = release.release_year container = release.container container_is_kept = False |