diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2019-12-31 03:10:30 +0100 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2019-12-31 03:10:30 +0100 |
commit | d951c59c1086f0cdda8683e1dd9083d9512886f3 (patch) | |
tree | d7bd0439143dabd2b3631bc043f6037b11d99552 | |
parent | 5fcd26823207ae5ea0cdb5f1a6c8ddf7851ab6f4 (diff) | |
download | fatcat-d951c59c1086f0cdda8683e1dd9083d9512886f3.tar.gz fatcat-d951c59c1086f0cdda8683e1dd9083d9512886f3.zip |
datacite: skip non-ascii doi for now
Example of a non-ascii doi:
* https://doi.org/10.13125/américacrÃtica/3017
-rw-r--r-- | python/fatcat_tools/importers/datacite.py | 4 |
1 files changed, 4 insertions, 0 deletions
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index 5b3065aa..b16f333a 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -270,6 +270,10 @@ class DataciteImporter(EntityImporter): attributes = obj['attributes'] doi = clean_doi(attributes.get('doi', '').lower()) + if not doi.isascii(): + print('[{}] skipping non-ascii doi for now'.format(doi)) + return None + # Contributors. Many nameIdentifierSchemes, we do not use (yet): # "attributes.creators[].nameIdentifiers[].nameIdentifierScheme": # ["LCNA", "GND", "email", "NAF", "OSF", "RRID", "ORCID", |