diff options
| author | Martin Czygan <martin.czygan@gmail.com> | 2020-01-03 22:53:23 +0100 | 
|---|---|---|
| committer | Martin Czygan <martin.czygan@gmail.com> | 2020-01-03 22:53:23 +0100 | 
| commit | 328d7901df30ba94685d34d6a428e798b4604839 (patch) | |
| tree | 2902301f25442497df3dd6ee450b2d20afa86f38 | |
| parent | 55dcece5a476b1492bf6c7f4597a469b48b41264 (diff) | |
| download | fatcat-328d7901df30ba94685d34d6a428e798b4604839.tar.gz fatcat-328d7901df30ba94685d34d6a428e798b4604839.zip | |
datacite: use normal.clean_doi
| -rw-r--r-- | python/fatcat_tools/importers/datacite.py | 12 | ||||
| -rw-r--r-- | python/tests/import_datacite.py | 4 | 
2 files changed, 1 insertions, 15 deletions
| diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index 5891f8de..d0c75b6e 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -20,6 +20,7 @@ import langdetect  import sqlite3  import sys  from fatcat_tools.transforms import entity_to_dict +from fatcat_tools.normal import clean_doi  # Cutoff length for abstracts. @@ -872,17 +873,6 @@ def parse_datacite_dates(dates):      return release_date, release_month, release_year -def clean_doi(doi): -    """ -    10.25513/1812-3996.2017.1.34–42 // 8211, Hex 2013, Octal 20023 -    See also: https://github.com/miku/throwaway-check-doi - -    Replace unicode HYPHEN..HORIZONTAL BAR with HYPHEN-MINUS. -    """ -    for c in ('\u2010', '\u2011', '\u2012', '\u2013', '\u2014', '\u2015'): -        doi = doi.replace(c, "-") -    return doi -  def index_form_to_display_name(s):      """      Try to convert an index form name, like 'Razis, Panos A' into display_name, diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py index c2fcdec9..881452ed 100644 --- a/python/tests/import_datacite.py +++ b/python/tests/import_datacite.py @@ -281,10 +281,6 @@ def test_datacite_dict_parse(datacite_importer):          assert r.contribs[0].surname == None          assert len(r.refs) == 0 -def test_clean_doi(): -    assert clean_doi("10.25513/1812-3996.2017.1.34\u201342") == "10.25513/1812-3996.2017.1.34-42" -    assert "123" == clean_doi("123") -  def test_datacite_conversions(datacite_importer):      """      Datacite JSON to release entity JSON representation. The count is hardcoded | 
