summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2019-12-31 03:10:30 +0100
committerMartin Czygan <martin.czygan@gmail.com>2019-12-31 03:10:30 +0100
commitd951c59c1086f0cdda8683e1dd9083d9512886f3 (patch)
treed7bd0439143dabd2b3631bc043f6037b11d99552
parent5fcd26823207ae5ea0cdb5f1a6c8ddf7851ab6f4 (diff)
downloadfatcat-d951c59c1086f0cdda8683e1dd9083d9512886f3.tar.gz
fatcat-d951c59c1086f0cdda8683e1dd9083d9512886f3.zip
datacite: skip non-ascii doi for now
Example of a non-ascii doi: * https://doi.org/10.13125/américacrítica/3017
-rw-r--r--python/fatcat_tools/importers/datacite.py4
1 files changed, 4 insertions, 0 deletions
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index 5b3065aa..b16f333a 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -270,6 +270,10 @@ class DataciteImporter(EntityImporter):
attributes = obj['attributes']
doi = clean_doi(attributes.get('doi', '').lower())
+ if not doi.isascii():
+ print('[{}] skipping non-ascii doi for now'.format(doi))
+ return None
+
# Contributors. Many nameIdentifierSchemes, we do not use (yet):
# "attributes.creators[].nameIdentifiers[].nameIdentifierScheme":
# ["LCNA", "GND", "email", "NAF", "OSF", "RRID", "ORCID",