summaryrefslogtreecommitdiffstats
path: root/python/tests/import_datacite.py
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-01-02 18:11:35 +0100
committerMartin Czygan <martin.czygan@gmail.com>2020-01-02 18:11:35 +0100
commitbe43049db0da2df4343bd5e1392d6c5201fc67d0 (patch)
tree219fa25011f424da745eece11226438cf741f345 /python/tests/import_datacite.py
parentcb223fccb64500a8e134b9ec721c8a08b1a60f19 (diff)
downloadfatcat-be43049db0da2df4343bd5e1392d6c5201fc67d0.tar.gz
fatcat-be43049db0da2df4343bd5e1392d6c5201fc67d0.zip
datacite: address raw_name index form comment
> The convention for display_name and raw_name is to be how the name would normally be printed, not in index form (surname comma given_name). So we might need to un-encode names like "Tricart, Pierre". Use an additional `index_form_to_display_name` function to convert index from to display form, heuristically.
Diffstat (limited to 'python/tests/import_datacite.py')
-rw-r--r--python/tests/import_datacite.py18
1 files changed, 17 insertions, 1 deletions
diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py
index cdc165d7..3e47fce8 100644
--- a/python/tests/import_datacite.py
+++ b/python/tests/import_datacite.py
@@ -7,7 +7,7 @@ import datetime
import pytest
import gzip
from fatcat_tools.importers import DataciteImporter, JsonLinePusher
-from fatcat_tools.importers.datacite import find_original_language_title, parse_datacite_titles, parse_datacite_dates, clean_doi
+from fatcat_tools.importers.datacite import find_original_language_title, parse_datacite_titles, parse_datacite_dates, clean_doi, index_form_to_display_name
from fatcat_tools.transforms import entity_to_dict
from fixtures import api
import json
@@ -294,3 +294,19 @@ def test_datacite_conversions(datacite_importer):
assert result == expected
+def test_index_form_to_display_name():
+ Case = collections.namedtuple('Case', 'input output')
+ cases = [
+ Case('', ''),
+ Case('ABC', 'ABC'),
+ Case('International Space Station', 'International Space Station'),
+ Case('Jin, Shan', 'Shan Jin'),
+ Case('Volkshochschule Der Bundesstadt Bonn', 'Volkshochschule Der Bundesstadt Bonn'),
+ Case('Solomon, P. M.', 'P. M. Solomon'),
+ Case('Sujeevan Ratnasingham', 'Sujeevan Ratnasingham'),
+ Case('Paul Stöckli (1906-1991), Künstler', 'Paul Stöckli (1906-1991), Künstler'),
+ ]
+
+ for c in cases:
+ assert c.output == index_form_to_display_name(c.input)
+