aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-01-03 22:53:23 +0100
committerMartin Czygan <martin.czygan@gmail.com>2020-01-03 22:53:23 +0100
commit328d7901df30ba94685d34d6a428e798b4604839 (patch)
tree2902301f25442497df3dd6ee450b2d20afa86f38
parent55dcece5a476b1492bf6c7f4597a469b48b41264 (diff)
downloadfatcat-328d7901df30ba94685d34d6a428e798b4604839.tar.gz
fatcat-328d7901df30ba94685d34d6a428e798b4604839.zip
datacite: use normal.clean_doi
-rw-r--r--python/fatcat_tools/importers/datacite.py12
-rw-r--r--python/tests/import_datacite.py4
2 files changed, 1 insertions, 15 deletions
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index 5891f8de..d0c75b6e 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -20,6 +20,7 @@ import langdetect
import sqlite3
import sys
from fatcat_tools.transforms import entity_to_dict
+from fatcat_tools.normal import clean_doi
# Cutoff length for abstracts.
@@ -872,17 +873,6 @@ def parse_datacite_dates(dates):
return release_date, release_month, release_year
-def clean_doi(doi):
- """
- 10.25513/1812-3996.2017.1.34–42 // 8211, Hex 2013, Octal 20023
- See also: https://github.com/miku/throwaway-check-doi
-
- Replace unicode HYPHEN..HORIZONTAL BAR with HYPHEN-MINUS.
- """
- for c in ('\u2010', '\u2011', '\u2012', '\u2013', '\u2014', '\u2015'):
- doi = doi.replace(c, "-")
- return doi
-
def index_form_to_display_name(s):
"""
Try to convert an index form name, like 'Razis, Panos A' into display_name,
diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py
index c2fcdec9..881452ed 100644
--- a/python/tests/import_datacite.py
+++ b/python/tests/import_datacite.py
@@ -281,10 +281,6 @@ def test_datacite_dict_parse(datacite_importer):
assert r.contribs[0].surname == None
assert len(r.refs) == 0
-def test_clean_doi():
- assert clean_doi("10.25513/1812-3996.2017.1.34\u201342") == "10.25513/1812-3996.2017.1.34-42"
- assert "123" == clean_doi("123")
-
def test_datacite_conversions(datacite_importer):
"""
Datacite JSON to release entity JSON representation. The count is hardcoded