From d951c59c1086f0cdda8683e1dd9083d9512886f3 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Tue, 31 Dec 2019 03:10:30 +0100 Subject: datacite: skip non-ascii doi for now MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Example of a non-ascii doi: * https://doi.org/10.13125/américacrítica/3017 --- python/fatcat_tools/importers/datacite.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index 5b3065aa..b16f333a 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -270,6 +270,10 @@ class DataciteImporter(EntityImporter): attributes = obj['attributes'] doi = clean_doi(attributes.get('doi', '').lower()) + if not doi.isascii(): + print('[{}] skipping non-ascii doi for now'.format(doi)) + return None + # Contributors. Many nameIdentifierSchemes, we do not use (yet): # "attributes.creators[].nameIdentifiers[].nameIdentifierScheme": # ["LCNA", "GND", "email", "NAF", "OSF", "RRID", "ORCID", -- cgit v1.2.3