aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools
diff options
context:
space:
mode:
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r--python/fatcat_tools/importers/crossref.py17
-rw-r--r--python/fatcat_tools/importers/grobid_metadata.py6
-rw-r--r--python/fatcat_tools/transforms.py4
3 files changed, 18 insertions, 9 deletions
diff --git a/python/fatcat_tools/importers/crossref.py b/python/fatcat_tools/importers/crossref.py
index 1ea47707..13179207 100644
--- a/python/fatcat_tools/importers/crossref.py
+++ b/python/fatcat_tools/importers/crossref.py
@@ -250,16 +250,18 @@ class CrossrefImporter(FatcatImporter):
return None
# release date parsing is amazingly complex
- release_date = obj['issued']['date-parts'][0]
- if not release_date or not release_date[0]:
+ raw_date = obj['issued']['date-parts'][0]
+ if not raw_date or not raw_date[0]:
# got some NoneType, even though at least year is supposed to be set
+ release_year = None
release_date = None
- elif len(release_date) == 3:
- release_date = datetime.date(year=release_date[0], month=release_date[1], day=release_date[2])
+ elif len(raw_date) == 3:
+ release_year = raw_date[0]
+ release_date = datetime.date(year=raw_date[0], month=raw_date[1], day=raw_date[2])
else:
- # only the year is actually required; mangle to first day for date
- # (TODO: something better?)
- release_date = datetime.date(year=release_date[0], month=1, day=1)
+ # sometimes only the year is included, not the full date
+ release_year = raw_date[0]
+ release_date = None
re = fatcat_client.ReleaseEntity(
work_id=None,
@@ -277,6 +279,7 @@ class CrossrefImporter(FatcatImporter):
pmcid=extids['pmcid'],
wikidata_qid=extids['wikidata_qid'],
release_date=release_date,
+ release_year=release_year,
issue=obj.get('issue'),
volume=obj.get('volume'),
pages=obj.get('page'),
diff --git a/python/fatcat_tools/importers/grobid_metadata.py b/python/fatcat_tools/importers/grobid_metadata.py
index b84f7145..47a753a6 100644
--- a/python/fatcat_tools/importers/grobid_metadata.py
+++ b/python/fatcat_tools/importers/grobid_metadata.py
@@ -66,9 +66,10 @@ class GrobidMetadataImporter(FatcatImporter):
refs.append(ref)
release_date = None
+ release_year = None
if obj.get('date'):
- # TODO: only returns year, ever? how to handle?
- release_date = datetime.datetime(year=int(obj['date'][:4]), month=1, day=1).date()
+ # only returns year, ever?
+ release_year = int(obj['date'][:4])
if obj.get('doi'):
extra['doi'] = obj['doi']
@@ -88,6 +89,7 @@ class GrobidMetadataImporter(FatcatImporter):
title=obj['title'].strip(),
release_type="article-journal",
release_date=release_date,
+ release_year=release_year,
contribs=contribs,
refs=refs,
publisher=obj['journal'].get('publisher'),
diff --git a/python/fatcat_tools/transforms.py b/python/fatcat_tools/transforms.py
index 516b68ae..843c00a5 100644
--- a/python/fatcat_tools/transforms.py
+++ b/python/fatcat_tools/transforms.py
@@ -48,6 +48,10 @@ def release_to_elasticsearch(release):
if release.release_date:
# .isoformat() results in, eg, '2010-10-22' (YYYY-MM-DD)
t['release_date'] = release.release_date.isoformat()
+ if release.release_year is None:
+ t['release_year'] = release.release_date.year
+ if release.release_year is not None:
+ t['release_year'] = release.release_year
container = release.container
container_is_kept = False