summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/importers/datacite.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/fatcat_tools/importers/datacite.py')
-rw-r--r--python/fatcat_tools/importers/datacite.py10
1 files changed, 9 insertions, 1 deletions
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index 9250fc5e..81f00876 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -222,6 +222,7 @@ class DataciteImporter(EntityImporter):
self.read_issn_map_file(issn_map_file)
self.debug = debug
self.insert_log_file = insert_log_file
+ self.this_year = datetime.datetime.now().year
print('datacite with debug={}'.format(self.debug), file=sys.stderr)
@@ -311,6 +312,12 @@ class DataciteImporter(EntityImporter):
release_date, release_month, release_year = parse_datacite_dates(
attributes.get('dates', []))
+ # block bogus far-future years/dates
+ if release_year is not None and (release_year > (self.this_year + 5) or release_year < 1000):
+ release_date = None
+ release_month = None
+ release_year = None
+
# Some records do not use the "dates" field (e.g. micropub), but:
# "attributes.published" or "attributes.publicationYear"
if not any((release_date, release_month, release_year)):
@@ -714,7 +721,8 @@ class DataciteImporter(EntityImporter):
name_scheme = nid.get('nameIdentifierScheme', '') or ''
if not name_scheme.lower() == "orcid":
continue
- orcid = nid.get('nameIdentifier', '').replace('https://orcid.org/', '')
+ orcid = nid.get('nameIdentifier') or ''
+ orcid = orcid.replace('https://orcid.org/', '')
if not orcid:
continue
creator_id = self.lookup_orcid(orcid)