diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2020-01-02 18:11:35 +0100 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2020-01-02 18:11:35 +0100 |
commit | be43049db0da2df4343bd5e1392d6c5201fc67d0 (patch) | |
tree | 219fa25011f424da745eece11226438cf741f345 /python/tests/import_datacite.py | |
parent | cb223fccb64500a8e134b9ec721c8a08b1a60f19 (diff) | |
download | fatcat-be43049db0da2df4343bd5e1392d6c5201fc67d0.tar.gz fatcat-be43049db0da2df4343bd5e1392d6c5201fc67d0.zip |
datacite: address raw_name index form comment
> The convention for display_name and raw_name is to be how the name
would normally be printed, not in index form (surname comma given_name).
So we might need to un-encode names like "Tricart, Pierre".
Use an additional `index_form_to_display_name` function to convert index
from to display form, heuristically.
Diffstat (limited to 'python/tests/import_datacite.py')
-rw-r--r-- | python/tests/import_datacite.py | 18 |
1 files changed, 17 insertions, 1 deletions
diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py index cdc165d7..3e47fce8 100644 --- a/python/tests/import_datacite.py +++ b/python/tests/import_datacite.py @@ -7,7 +7,7 @@ import datetime import pytest import gzip from fatcat_tools.importers import DataciteImporter, JsonLinePusher -from fatcat_tools.importers.datacite import find_original_language_title, parse_datacite_titles, parse_datacite_dates, clean_doi +from fatcat_tools.importers.datacite import find_original_language_title, parse_datacite_titles, parse_datacite_dates, clean_doi, index_form_to_display_name from fatcat_tools.transforms import entity_to_dict from fixtures import api import json @@ -294,3 +294,19 @@ def test_datacite_conversions(datacite_importer): assert result == expected +def test_index_form_to_display_name(): + Case = collections.namedtuple('Case', 'input output') + cases = [ + Case('', ''), + Case('ABC', 'ABC'), + Case('International Space Station', 'International Space Station'), + Case('Jin, Shan', 'Shan Jin'), + Case('Volkshochschule Der Bundesstadt Bonn', 'Volkshochschule Der Bundesstadt Bonn'), + Case('Solomon, P. M.', 'P. M. Solomon'), + Case('Sujeevan Ratnasingham', 'Sujeevan Ratnasingham'), + Case('Paul Stöckli (1906-1991), Künstler', 'Paul Stöckli (1906-1991), Künstler'), + ] + + for c in cases: + assert c.output == index_form_to_display_name(c.input) + |