diff options
Diffstat (limited to 'python/fatcat_tools/importers')
| -rw-r--r-- | python/fatcat_tools/importers/datacite.py | 14 | 
1 files changed, 13 insertions, 1 deletions
| diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index 7f0482b4..5b3065aa 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -268,7 +268,7 @@ class DataciteImporter(EntityImporter):              return None          attributes = obj['attributes'] -        doi = attributes.get('doi', '').lower() +        doi = clean_doi(attributes.get('doi', '').lower())          # Contributors. Many nameIdentifierSchemes, we do not use (yet):          # "attributes.creators[].nameIdentifiers[].nameIdentifierScheme": @@ -832,3 +832,15 @@ def parse_datacite_dates(dates):                  break      return release_date, release_year + +def clean_doi(doi): +    """ +    10.25513/1812-3996.2017.1.34–42 // 8211, Hex 2013, Octal 20023 +    See also: https://github.com/miku/throwaway-check-doi + +    Replace unicode HYPHEN..HORIZONTAL BAR with HYPHEN-MINUS. +    """ +    for c in ('\u2010', '\u2011', '\u2012', '\u2013', '\u2014', '\u2015'): +        doi = doi.replace(c, "-") +    return doi + | 
