From be43049db0da2df4343bd5e1392d6c5201fc67d0 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Thu, 2 Jan 2020 18:11:35 +0100 Subject: datacite: address raw_name index form comment > The convention for display_name and raw_name is to be how the name would normally be printed, not in index form (surname comma given_name). So we might need to un-encode names like "Tricart, Pierre". Use an additional `index_form_to_display_name` function to convert index from to display form, heuristically. --- python/tests/import_datacite.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'python/tests/import_datacite.py') diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py index cdc165d7..3e47fce8 100644 --- a/python/tests/import_datacite.py +++ b/python/tests/import_datacite.py @@ -7,7 +7,7 @@ import datetime import pytest import gzip from fatcat_tools.importers import DataciteImporter, JsonLinePusher -from fatcat_tools.importers.datacite import find_original_language_title, parse_datacite_titles, parse_datacite_dates, clean_doi +from fatcat_tools.importers.datacite import find_original_language_title, parse_datacite_titles, parse_datacite_dates, clean_doi, index_form_to_display_name from fatcat_tools.transforms import entity_to_dict from fixtures import api import json @@ -294,3 +294,19 @@ def test_datacite_conversions(datacite_importer): assert result == expected +def test_index_form_to_display_name(): + Case = collections.namedtuple('Case', 'input output') + cases = [ + Case('', ''), + Case('ABC', 'ABC'), + Case('International Space Station', 'International Space Station'), + Case('Jin, Shan', 'Shan Jin'), + Case('Volkshochschule Der Bundesstadt Bonn', 'Volkshochschule Der Bundesstadt Bonn'), + Case('Solomon, P. M.', 'P. M. Solomon'), + Case('Sujeevan Ratnasingham', 'Sujeevan Ratnasingham'), + Case('Paul Stöckli (1906-1991), Künstler', 'Paul Stöckli (1906-1991), Künstler'), + ] + + for c in cases: + assert c.output == index_form_to_display_name(c.input) + -- cgit v1.2.3