summaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-01-02 18:11:35 +0100
committerMartin Czygan <martin.czygan@gmail.com>2020-01-02 18:11:35 +0100
commitbe43049db0da2df4343bd5e1392d6c5201fc67d0 (patch)
tree219fa25011f424da745eece11226438cf741f345 /python
parentcb223fccb64500a8e134b9ec721c8a08b1a60f19 (diff)
downloadfatcat-be43049db0da2df4343bd5e1392d6c5201fc67d0.tar.gz
fatcat-be43049db0da2df4343bd5e1392d6c5201fc67d0.zip
datacite: address raw_name index form comment
> The convention for display_name and raw_name is to be how the name would normally be printed, not in index form (surname comma given_name). So we might need to un-encode names like "Tricart, Pierre". Use an additional `index_form_to_display_name` function to convert index from to display form, heuristically.
Diffstat (limited to 'python')
-rw-r--r--python/fatcat_tools/importers/datacite.py43
-rw-r--r--python/tests/files/datacite/datacite_result_00.json4
-rw-r--r--python/tests/files/datacite/datacite_result_01.json2
-rw-r--r--python/tests/files/datacite/datacite_result_02.json2
-rw-r--r--python/tests/files/datacite/datacite_result_04.json2
-rw-r--r--python/tests/files/datacite/datacite_result_05.json142
-rw-r--r--python/tests/files/datacite/datacite_result_07.json6
-rw-r--r--python/tests/files/datacite/datacite_result_08.json4
-rw-r--r--python/tests/files/datacite/datacite_result_09.json2
-rw-r--r--python/tests/files/datacite/datacite_result_12.json8
-rw-r--r--python/tests/files/datacite/datacite_result_13.json2
-rw-r--r--python/tests/files/datacite/datacite_result_14.json16
-rw-r--r--python/tests/files/datacite/datacite_result_15.json2
-rw-r--r--python/tests/files/datacite/datacite_result_16.json2
-rw-r--r--python/tests/files/datacite/datacite_result_18.json2
-rw-r--r--python/tests/files/datacite/datacite_result_19.json2
-rw-r--r--python/tests/files/datacite/datacite_result_20.json2
-rw-r--r--python/tests/files/datacite/datacite_result_21.json6
-rw-r--r--python/tests/files/datacite/datacite_result_22.json10
-rw-r--r--python/tests/files/datacite/datacite_result_23.json6
-rw-r--r--python/tests/import_datacite.py18
21 files changed, 171 insertions, 112 deletions
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index a03587c0..bd135569 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -331,6 +331,10 @@ class DataciteImporter(EntityImporter):
if name in ('(:Unav)', 'NA', 'NN', '(:Null)'):
continue
+ # Unpack name, if we have an index form (e.g. 'Razis, Panos A') into 'Panos A razis'.
+ if name:
+ name = index_form_to_display_name(name)
+
contribs.append(
fatcat_openapi_client.ReleaseContrib(
creator_id=creator_id,
@@ -859,3 +863,42 @@ def clean_doi(doi):
doi = doi.replace(c, "-")
return doi
+def index_form_to_display_name(s):
+ """
+ Try to convert an index form name, like 'Razis, Panos A' into display_name,
+ e.g. 'Panos A Razis'.
+ """
+ if ',' not in s:
+ return s
+ skip_on_chars = ['(', ')', '*']
+ for char in skip_on_chars:
+ if char in s:
+ return s
+ if s.count(',') > 1:
+ # "Dr. Hina, Dr. Muhammad Usman Shahid, Dr. Muhammad Zeeshan Khan"
+ return s
+ stopwords = [
+ 'Archive',
+ 'Collection',
+ 'Coordinator',
+ 'Department',
+ 'Germany',
+ 'International',
+ 'National',
+ 'Netherlands',
+ 'Office',
+ 'Organisation',
+ 'Organization',
+ 'Service',
+ 'Services',
+ 'United States',
+ 'University',
+ 'Verein',
+ 'Volkshochschule',
+ ]
+ for stop in stopwords:
+ if stop.lower() in s.lower():
+ return s
+
+ a, b = s.split(',')
+ return '{} {}'.format(b.strip(), a.strip())
diff --git a/python/tests/files/datacite/datacite_result_00.json b/python/tests/files/datacite/datacite_result_00.json
index 085e23f3..a4b28076 100644
--- a/python/tests/files/datacite/datacite_result_00.json
+++ b/python/tests/files/datacite/datacite_result_00.json
@@ -32,14 +32,14 @@
"contribs": [
{
"index": 0,
- "raw_name": "Li, Qian-Jin",
+ "raw_name": "Qian-Jin Li",
"given_name": "Qian-Jin",
"surname": "Li",
"role": "author"
},
{
"index": 1,
- "raw_name": "Yang, Chun-Long",
+ "raw_name": "Chun-Long Yang",
"given_name": "Chun-Long",
"surname": "Yang",
"role": "author"
diff --git a/python/tests/files/datacite/datacite_result_01.json b/python/tests/files/datacite/datacite_result_01.json
index f8c6b930..46be2515 100644
--- a/python/tests/files/datacite/datacite_result_01.json
+++ b/python/tests/files/datacite/datacite_result_01.json
@@ -21,7 +21,7 @@
"contribs": [
{
"index": 0,
- "raw_name": "Dargenty, G.",
+ "raw_name": "G. Dargenty",
"given_name": "G.",
"surname": "Dargenty",
"role": "author"
diff --git a/python/tests/files/datacite/datacite_result_02.json b/python/tests/files/datacite/datacite_result_02.json
index f8b85f38..bdcb4951 100644
--- a/python/tests/files/datacite/datacite_result_02.json
+++ b/python/tests/files/datacite/datacite_result_02.json
@@ -25,7 +25,7 @@
"contribs": [
{
"index": 0,
- "raw_name": "Weyersberg, Albert",
+ "raw_name": "Albert Weyersberg",
"given_name": "Albert",
"surname": "Weyersberg",
"role": "author"
diff --git a/python/tests/files/datacite/datacite_result_04.json b/python/tests/files/datacite/datacite_result_04.json
index 7ca70d6c..54b19ef9 100644
--- a/python/tests/files/datacite/datacite_result_04.json
+++ b/python/tests/files/datacite/datacite_result_04.json
@@ -12,7 +12,7 @@
"contribs": [
{
"index": 0,
- "raw_name": "Nicollerat, Marc Andre",
+ "raw_name": "Marc Andre Nicollerat",
"given_name": "Marc Andre",
"surname": "Nicollerat",
"role": "author"
diff --git a/python/tests/files/datacite/datacite_result_05.json b/python/tests/files/datacite/datacite_result_05.json
index e61769de..a790c26e 100644
--- a/python/tests/files/datacite/datacite_result_05.json
+++ b/python/tests/files/datacite/datacite_result_05.json
@@ -24,497 +24,497 @@
"contribs": [
{
"index": 0,
- "raw_name": "K\u00f5ljalg, Urmas",
+ "raw_name": "Urmas K\u00f5ljalg",
"given_name": "Urmas",
"surname": "K\u00f5ljalg",
"role": "author"
},
{
"index": 1,
- "raw_name": "Abarenkov, Kessy",
+ "raw_name": "Kessy Abarenkov",
"given_name": "Kessy",
"surname": "Abarenkov",
"role": "author"
},
{
"index": 2,
- "raw_name": "Nilsson, R. Henrik",
+ "raw_name": "R. Henrik Nilsson",
"given_name": "R. Henrik",
"surname": "Nilsson",
"role": "author"
},
{
"index": 3,
- "raw_name": "Larsson, Karl-Henrik",
+ "raw_name": "Karl-Henrik Larsson",
"given_name": "Karl-Henrik",
"surname": "Larsson",
"role": "author"
},
{
"index": 4,
- "raw_name": "Aas, Anders Bj\u00f8rnsgard",
+ "raw_name": "Anders Bj\u00f8rnsgard Aas",
"given_name": "Anders Bj\u00f8rnsgard",
"surname": "Aas",
"role": "author"
},
{
"index": 5,
- "raw_name": "Adams, Rachel",
+ "raw_name": "Rachel Adams",
"given_name": "Rachel",
"surname": "Adams",
"role": "author"
},
{
"index": 6,
- "raw_name": "Alves, Artur",
+ "raw_name": "Artur Alves",
"given_name": "Artur",
"surname": "Alves",
"role": "author"
},
{
"index": 7,
- "raw_name": "Ammirati, Joseph F.",
+ "raw_name": "Joseph F. Ammirati",
"given_name": "Joseph F.",
"surname": "Ammirati",
"role": "author"
},
{
"index": 8,
- "raw_name": "Arnold, A. Elizabeth",
+ "raw_name": "A. Elizabeth Arnold",
"given_name": "A. Elizabeth",
"surname": "Arnold",
"role": "author"
},
{
"index": 9,
- "raw_name": "Bahram, Mohammad",
+ "raw_name": "Mohammad Bahram",
"given_name": "Mohammad",
"surname": "Bahram",
"role": "author"
},
{
"index": 10,
- "raw_name": "Bengtsson-Palme, Johan",
+ "raw_name": "Johan Bengtsson-Palme",
"given_name": "Johan",
"surname": "Bengtsson-Palme",
"role": "author"
},
{
"index": 11,
- "raw_name": "Berlin, Anna",
+ "raw_name": "Anna Berlin",
"given_name": "Anna",
"surname": "Berlin",
"role": "author"
},
{
"index": 12,
- "raw_name": "Botnen, Synn\u00f8ve",
+ "raw_name": "Synn\u00f8ve Botnen",
"given_name": "Synn\u00f8ve",
"surname": "Botnen",
"role": "author"
},
{
"index": 13,
- "raw_name": "Bourlat, Sarah",
+ "raw_name": "Sarah Bourlat",
"given_name": "Sarah",
"surname": "Bourlat",
"role": "author"
},
{
"index": 14,
- "raw_name": "Cheeke, Tanya",
+ "raw_name": "Tanya Cheeke",
"given_name": "Tanya",
"surname": "Cheeke",
"role": "author"
},
{
"index": 15,
- "raw_name": "Dima, B\u00e1lint",
+ "raw_name": "B\u00e1lint Dima",
"given_name": "B\u00e1lint",
"surname": "Dima",
"role": "author"
},
{
"index": 16,
- "raw_name": "Drenkhan, Rein",
+ "raw_name": "Rein Drenkhan",
"given_name": "Rein",
"surname": "Drenkhan",
"role": "author"
},
{
"index": 17,
- "raw_name": "Duarte, Camila",
+ "raw_name": "Camila Duarte",
"given_name": "Camila",
"surname": "Duarte",
"role": "author"
},
{
"index": 18,
- "raw_name": "Due\u00f1as, Margarita",
+ "raw_name": "Margarita Due\u00f1as",
"given_name": "Margarita",
"surname": "Due\u00f1as",
"role": "author"
},
{
"index": 19,
- "raw_name": "Eberhardt, Ursula",
+ "raw_name": "Ursula Eberhardt",
"given_name": "Ursula",
"surname": "Eberhardt",
"role": "author"
},
{
"index": 20,
- "raw_name": "Friberg, Hanna",
+ "raw_name": "Hanna Friberg",
"given_name": "Hanna",
"surname": "Friberg",
"role": "author"
},
{
"index": 21,
- "raw_name": "Fr\u00f8slev, Tobias G.",
+ "raw_name": "Tobias G. Fr\u00f8slev",
"given_name": "Tobias G.",
"surname": "Fr\u00f8slev",
"role": "author"
},
{
"index": 22,
- "raw_name": "Garnica, Sigisfredo",
+ "raw_name": "Sigisfredo Garnica",
"given_name": "Sigisfredo",
"surname": "Garnica",
"role": "author"
},
{
"index": 23,
- "raw_name": "Geml, J\u00f3zsef",
+ "raw_name": "J\u00f3zsef Geml",
"given_name": "J\u00f3zsef",
"surname": "Geml",
"role": "author"
},
{
"index": 24,
- "raw_name": "Ghobad-Nejhad, Masoomeh",
+ "raw_name": "Masoomeh Ghobad-Nejhad",
"given_name": "Masoomeh",
"surname": "Ghobad-Nejhad",
"role": "author"
},
{
"index": 25,
- "raw_name": "Grebenc, Tine",
+ "raw_name": "Tine Grebenc",
"given_name": "Tine",
"surname": "Grebenc",
"role": "author"
},
{
"index": 26,
- "raw_name": "Griffith, Gareth W.",
+ "raw_name": "Gareth W. Griffith",
"given_name": "Gareth W.",
"surname": "Griffith",
"role": "author"
},
{
"index": 27,
- "raw_name": "Hampe, Felix",
+ "raw_name": "Felix Hampe",
"given_name": "Felix",
"surname": "Hampe",
"role": "author"
},
{
"index": 28,
- "raw_name": "Kennedy, Peter",
+ "raw_name": "Peter Kennedy",
"given_name": "Peter",
"surname": "Kennedy",
"role": "author"
},
{
"index": 29,
- "raw_name": "Khomich, Maryia",
+ "raw_name": "Maryia Khomich",
"given_name": "Maryia",
"surname": "Khomich",
"role": "author"
},
{
"index": 30,
- "raw_name": "Kohout, Petr",
+ "raw_name": "Petr Kohout",
"given_name": "Petr",
"surname": "Kohout",
"role": "author"
},
{
"index": 31,
- "raw_name": "Kollom, Anu",
+ "raw_name": "Anu Kollom",
"given_name": "Anu",
"surname": "Kollom",
"role": "author"
},
{
"index": 32,
- "raw_name": "Larsson, Ellen",
+ "raw_name": "Ellen Larsson",
"given_name": "Ellen",
"surname": "Larsson",
"role": "author"
},
{
"index": 33,
- "raw_name": "Laszlo, Irinyi",
+ "raw_name": "Irinyi Laszlo",
"given_name": "Irinyi",
"surname": "Laszlo",
"role": "author"
},
{
"index": 34,
- "raw_name": "Leavitt, Steven",
+ "raw_name": "Steven Leavitt",
"given_name": "Steven",
"surname": "Leavitt",
"role": "author"
},
{
"index": 35,
- "raw_name": "Liimatainen, Kare",
+ "raw_name": "Kare Liimatainen",
"given_name": "Kare",
"surname": "Liimatainen",
"role": "author"
},
{
"index": 36,
- "raw_name": "Lindahl, Bj\u00f6rn",
+ "raw_name": "Bj\u00f6rn Lindahl",
"given_name": "Bj\u00f6rn",
"surname": "Lindahl",
"role": "author"
},
{
"index": 37,
- "raw_name": "Lodge, Deborah J.",
+ "raw_name": "Deborah J. Lodge",
"given_name": "Deborah J.",
"surname": "Lodge",
"role": "author"
},
{
"index": 38,
- "raw_name": "Lumbsch, Helge Thorsten",
+ "raw_name": "Helge Thorsten Lumbsch",
"given_name": "Helge Thorsten",
"surname": "Lumbsch",
"role": "author"
},
{
"index": 39,
- "raw_name": "Mart\u00edn Esteban, Mar\u00eda Paz",
+ "raw_name": "Mar\u00eda Paz Mart\u00edn Esteban",
"given_name": "Mar\u00eda Paz",
"surname": "Mart\u00edn Esteban",
"role": "author"
},
{
"index": 40,
- "raw_name": "Meyer, Wieland",
+ "raw_name": "Wieland Meyer",
"given_name": "Wieland",
"surname": "Meyer",
"role": "author"
},
{
"index": 41,
- "raw_name": "Miettinen, Otto",
+ "raw_name": "Otto Miettinen",
"given_name": "Otto",
"surname": "Miettinen",
"role": "author"
},
{
"index": 42,
- "raw_name": "Nguyen, Nhu",
+ "raw_name": "Nhu Nguyen",
"given_name": "Nhu",
"surname": "Nguyen",
"role": "author"
},
{
"index": 43,
- "raw_name": "Niskanen, Tuula",
+ "raw_name": "Tuula Niskanen",
"given_name": "Tuula",
"surname": "Niskanen",
"role": "author"
},
{
"index": 44,
- "raw_name": "Oono, Ryoko",
+ "raw_name": "Ryoko Oono",
"given_name": "Ryoko",
"surname": "Oono",
"role": "author"
},
{
"index": 45,
- "raw_name": "\u00d6pik, Maarja",
+ "raw_name": "Maarja \u00d6pik",
"given_name": "Maarja",
"surname": "\u00d6pik",
"role": "author"
},
{
"index": 46,
- "raw_name": "Ordynets, Alexander",
+ "raw_name": "Alexander Ordynets",
"given_name": "Alexander",
"surname": "Ordynets",
"role": "author"
},
{
"index": 47,
- "raw_name": "Paw\u0142owska, Julia",
+ "raw_name": "Julia Paw\u0142owska",
"given_name": "Julia",
"surname": "Paw\u0142owska",
"role": "author"
},
{
"index": 48,
- "raw_name": "Peintner, Ursula",
+ "raw_name": "Ursula Peintner",
"given_name": "Ursula",
"surname": "Peintner",
"role": "author"
},
{
"index": 49,
- "raw_name": "Pereira, Olinto Liparini",
+ "raw_name": "Olinto Liparini Pereira",
"given_name": "Olinto Liparini",
"surname": "Pereira",
"role": "author"
},
{
"index": 50,
- "raw_name": "Pinho, Danilo Batista",
+ "raw_name": "Danilo Batista Pinho",
"given_name": "Danilo Batista",
"surname": "Pinho",
"role": "author"
},
{
"index": 51,
- "raw_name": "P\u00f5ldmaa, Kadri",
+ "raw_name": "Kadri P\u00f5ldmaa",
"given_name": "Kadri",
"surname": "P\u00f5ldmaa",
"role": "author"
},
{
"index": 52,
- "raw_name": "Runnel, Kadri",
+ "raw_name": "Kadri Runnel",
"given_name": "Kadri",
"surname": "Runnel",
"role": "author"
},
{
"index": 53,
- "raw_name": "Ryberg, Martin",
+ "raw_name": "Martin Ryberg",
"given_name": "Martin",
"surname": "Ryberg",
"role": "author"
},
{
"index": 54,
- "raw_name": "Saar, Irja",
+ "raw_name": "Irja Saar",
"given_name": "Irja",
"surname": "Saar",
"role": "author"
},
{
"index": 55,
- "raw_name": "Sanli, Kemal",
+ "raw_name": "Kemal Sanli",
"given_name": "Kemal",
"surname": "Sanli",
"role": "author"
},
{
"index": 56,
- "raw_name": "Scott, James",
+ "raw_name": "James Scott",
"given_name": "James",
"surname": "Scott",
"role": "author"
},
{
"index": 57,
- "raw_name": "Spirin, Viacheslav",
+ "raw_name": "Viacheslav Spirin",
"given_name": "Viacheslav",
"surname": "Spirin",
"role": "author"
},
{
"index": 58,
- "raw_name": "Suija, Ave",
+ "raw_name": "Ave Suija",
"given_name": "Ave",
"surname": "Suija",
"role": "author"
},
{
"index": 59,
- "raw_name": "Svantesson, Sten",
+ "raw_name": "Sten Svantesson",
"given_name": "Sten",
"surname": "Svantesson",
"role": "author"
},
{
"index": 60,
- "raw_name": "Tadych, Mariusz",
+ "raw_name": "Mariusz Tadych",
"given_name": "Mariusz",
"surname": "Tadych",
"role": "author"
},
{
"index": 61,
- "raw_name": "Takamatsu, Susumu",
+ "raw_name": "Susumu Takamatsu",
"given_name": "Susumu",
"surname": "Takamatsu",
"role": "author"
},
{
"index": 62,
- "raw_name": "Tamm, Heidi",
+ "raw_name": "Heidi Tamm",
"given_name": "Heidi",
"surname": "Tamm",
"role": "author"
},
{
"index": 63,
- "raw_name": "Taylor, AFS.",
+ "raw_name": "AFS. Taylor",
"given_name": "AFS.",
"surname": "Taylor",
"role": "author"
},
{
"index": 64,
- "raw_name": "Tedersoo, Leho",
+ "raw_name": "Leho Tedersoo",
"given_name": "Leho",
"surname": "Tedersoo",
"role": "author"
},
{
"index": 65,
- "raw_name": "Telleria, M.T.",
+ "raw_name": "M.T. Telleria",
"given_name": "M.T.",
"surname": "Telleria",
"role": "author"
},
{
"index": 66,
- "raw_name": "Udayanga, Dhanushka",
+ "raw_name": "Dhanushka Udayanga",
"given_name": "Dhanushka",
"surname": "Udayanga",
"role": "author"
},
{
"index": 67,
- "raw_name": "Unterseher, Martin",
+ "raw_name": "Martin Unterseher",
"given_name": "Martin",
"surname": "Unterseher",
"role": "author"
},
{
"index": 68,
- "raw_name": "Volobuev, Sergey",
+ "raw_name": "Sergey Volobuev",
"given_name": "Sergey",
"surname": "Volobuev",
"role": "author"
},
{
"index": 69,
- "raw_name": "Weiss, Michael",
+ "raw_name": "Michael Weiss",
"given_name": "Michael",
"surname": "Weiss",
"role": "author"
},
{
"index": 70,
- "raw_name": "Wurzbacher, Christian",
+ "raw_name": "Christian Wurzbacher",
"given_name": "Christian",
"surname": "Wurzbacher",
"role": "author"
diff --git a/python/tests/files/datacite/datacite_result_07.json b/python/tests/files/datacite/datacite_result_07.json
index 324bb663..f572263c 100644
--- a/python/tests/files/datacite/datacite_result_07.json
+++ b/python/tests/files/datacite/datacite_result_07.json
@@ -38,21 +38,21 @@
"contribs": [
{
"index": 0,
- "raw_name": "ROTHUIZEN, E.",
+ "raw_name": "E. ROTHUIZEN",
"given_name": "E.",
"surname": "ROTHUIZEN",
"role": "author"
},
{
"index": 1,
- "raw_name": "ELMEGAARD, B.",
+ "raw_name": "B. ELMEGAARD",
"given_name": "B.",
"surname": "ELMEGAARD",
"role": "author"
},
{
"index": 2,
- "raw_name": "MARKUSSEN W., B.",
+ "raw_name": "B. MARKUSSEN W.",
"given_name": "B.",
"surname": "MARKUSSEN W.",
"role": "author"
diff --git a/python/tests/files/datacite/datacite_result_08.json b/python/tests/files/datacite/datacite_result_08.json
index 281c3679..581ca1eb 100644
--- a/python/tests/files/datacite/datacite_result_08.json
+++ b/python/tests/files/datacite/datacite_result_08.json
@@ -30,14 +30,14 @@
"contribs": [
{
"index": 0,
- "raw_name": "Kajisa, Kei",
+ "raw_name": "Kei Kajisa",
"given_name": "Kei",
"surname": "Kajisa",
"role": "author"
},
{
"index": 1,
- "raw_name": "Kajisa, Kei",
+ "raw_name": "Kei Kajisa",
"given_name": "Kei",
"surname": "Kajisa",
"role": "author"
diff --git a/python/tests/files/datacite/datacite_result_09.json b/python/tests/files/datacite/datacite_result_09.json
index 01f92f85..db103d2b 100644
--- a/python/tests/files/datacite/datacite_result_09.json
+++ b/python/tests/files/datacite/datacite_result_09.json
@@ -24,7 +24,7 @@
"contribs": [
{
"index": 0,
- "raw_name": "Kirstaedter, Nils",
+ "raw_name": "Nils Kirstaedter",
"given_name": "Nils",
"surname": "Kirstaedter",
"role": "author"
diff --git a/python/tests/files/datacite/datacite_result_12.json b/python/tests/files/datacite/datacite_result_12.json
index 6b6cad4a..192062e3 100644
--- a/python/tests/files/datacite/datacite_result_12.json
+++ b/python/tests/files/datacite/datacite_result_12.json
@@ -12,28 +12,28 @@
"contribs": [
{
"index": 0,
- "raw_name": "Spanias, Charalampos",
+ "raw_name": "Charalampos Spanias",
"given_name": "Charalampos",
"surname": "Spanias",
"role": "author"
},
{
"index": 1,
- "raw_name": "Nikolaidis, Pantelis T",
+ "raw_name": "Pantelis T Nikolaidis",
"given_name": "Pantelis T",
"surname": "Nikolaidis",
"role": "author"
},
{
"index": 2,
- "raw_name": "Rosemann, Thomas",
+ "raw_name": "Thomas Rosemann",
"given_name": "Thomas",
"surname": "Rosemann",
"role": "author"
},
{
"index": 3,
- "raw_name": "Knechtle, Beat",
+ "raw_name": "Beat Knechtle",
"given_name": "Beat",
"surname": "Knechtle",
"role": "author"
diff --git a/python/tests/files/datacite/datacite_result_13.json b/python/tests/files/datacite/datacite_result_13.json
index 3da3816d..c8971667 100644
--- a/python/tests/files/datacite/datacite_result_13.json
+++ b/python/tests/files/datacite/datacite_result_13.json
@@ -17,7 +17,7 @@
},
{
"index": 1,
- "raw_name": "Hiltbrunner, Hermann",
+ "raw_name": "Hermann Hiltbrunner",
"given_name": "Hermann",
"surname": "Hiltbrunner",
"role": "author"
diff --git a/python/tests/files/datacite/datacite_result_14.json b/python/tests/files/datacite/datacite_result_14.json
index 94c00472..94ad000a 100644
--- a/python/tests/files/datacite/datacite_result_14.json
+++ b/python/tests/files/datacite/datacite_result_14.json
@@ -45,56 +45,56 @@
"contribs": [
{
"index": 0,
- "raw_name": "Stulz, E.",
+ "raw_name": "E. Stulz",
"given_name": "E.",
"surname": "Stulz",
"role": "author"
},
{
"index": 1,
- "raw_name": "Scott, S.M.",
+ "raw_name": "S.M. Scott",
"given_name": "S.M.",
"surname": "Scott",
"role": "author"
},
{
"index": 2,
- "raw_name": "Ng, Yiu-Fai",
+ "raw_name": "Yiu-Fai Ng",
"given_name": "Yiu-Fai",
"surname": "Ng",
"role": "author"
},
{
"index": 3,
- "raw_name": "Bond, A.D.",
+ "raw_name": "A.D. Bond",
"given_name": "A.D.",
"surname": "Bond",
"role": "author"
},
{
"index": 4,
- "raw_name": "Teat, S.J.",
+ "raw_name": "S.J. Teat",
"given_name": "S.J.",
"surname": "Teat",
"role": "author"
},
{
"index": 5,
- "raw_name": "Darling, S.L.",
+ "raw_name": "S.L. Darling",
"given_name": "S.L.",
"surname": "Darling",
"role": "author"
},
{
"index": 6,
- "raw_name": "Feeder, N.",
+ "raw_name": "N. Feeder",
"given_name": "N.",
"surname": "Feeder",
"role": "author"
},
{
"index": 7,
- "raw_name": "Sanders, J.K.M.",
+ "raw_name": "J.K.M. Sanders",
"given_name": "J.K.M.",
"surname": "Sanders",
"role": "author"
diff --git a/python/tests/files/datacite/datacite_result_15.json b/python/tests/files/datacite/datacite_result_15.json
index 0614f6ba..bdeb8426 100644
--- a/python/tests/files/datacite/datacite_result_15.json
+++ b/python/tests/files/datacite/datacite_result_15.json
@@ -11,7 +11,7 @@
"contribs": [
{
"index": 0,
- "raw_name": "Richardson, David",
+ "raw_name": "David Richardson",
"given_name": "David",
"surname": "Richardson",
"role": "author"
diff --git a/python/tests/files/datacite/datacite_result_16.json b/python/tests/files/datacite/datacite_result_16.json
index 1d861cf6..ea8c2e59 100644
--- a/python/tests/files/datacite/datacite_result_16.json
+++ b/python/tests/files/datacite/datacite_result_16.json
@@ -20,7 +20,7 @@
"contribs": [
{
"index": 0,
- "raw_name": "Sochi, Taha",
+ "raw_name": "Taha Sochi",
"given_name": "Taha",
"surname": "Sochi",
"role": "author"
diff --git a/python/tests/files/datacite/datacite_result_18.json b/python/tests/files/datacite/datacite_result_18.json
index 12ab39fe..274858c3 100644
--- a/python/tests/files/datacite/datacite_result_18.json
+++ b/python/tests/files/datacite/datacite_result_18.json
@@ -12,4 +12,4 @@
"contribs": [],
"refs": [],
"abstracts": []
-}
+} \ No newline at end of file
diff --git a/python/tests/files/datacite/datacite_result_19.json b/python/tests/files/datacite/datacite_result_19.json
index 1505db92..8d797268 100644
--- a/python/tests/files/datacite/datacite_result_19.json
+++ b/python/tests/files/datacite/datacite_result_19.json
@@ -12,4 +12,4 @@
"contribs": [],
"refs": [],
"abstracts": []
-}
+} \ No newline at end of file
diff --git a/python/tests/files/datacite/datacite_result_20.json b/python/tests/files/datacite/datacite_result_20.json
index 1868eede..97d7ae75 100644
--- a/python/tests/files/datacite/datacite_result_20.json
+++ b/python/tests/files/datacite/datacite_result_20.json
@@ -11,4 +11,4 @@
"contribs": [],
"refs": [],
"abstracts": []
-}
+} \ No newline at end of file
diff --git a/python/tests/files/datacite/datacite_result_21.json b/python/tests/files/datacite/datacite_result_21.json
index 9214065a..0a05a7cd 100644
--- a/python/tests/files/datacite/datacite_result_21.json
+++ b/python/tests/files/datacite/datacite_result_21.json
@@ -8,8 +8,8 @@
"ext_ids": {
"doi": "10.7916/d86x0cg1"
},
+ "language": "de",
"contribs": [],
"refs": [],
- "abstracts": [],
- "language": "de"
-}
+ "abstracts": []
+} \ No newline at end of file
diff --git a/python/tests/files/datacite/datacite_result_22.json b/python/tests/files/datacite/datacite_result_22.json
index e9939e09..9e4225b5 100644
--- a/python/tests/files/datacite/datacite_result_22.json
+++ b/python/tests/files/datacite/datacite_result_22.json
@@ -8,15 +8,15 @@
"ext_ids": {
"doi": "10.7916/d86x0cg1"
},
+ "language": "de",
"contribs": [
{
- "raw_affiliation": "Department of pataphysics",
"index": 0,
"raw_name": "Anton Welch",
- "role": "author"
+ "role": "author",
+ "raw_affiliation": "Department of pataphysics"
}
],
"refs": [],
- "abstracts": [],
- "language": "de"
-}
+ "abstracts": []
+} \ No newline at end of file
diff --git a/python/tests/files/datacite/datacite_result_23.json b/python/tests/files/datacite/datacite_result_23.json
index 2bf66eae..46f60492 100644
--- a/python/tests/files/datacite/datacite_result_23.json
+++ b/python/tests/files/datacite/datacite_result_23.json
@@ -8,6 +8,7 @@
"ext_ids": {
"doi": "10.7916/d86x0cg1-xxx"
},
+ "language": "de",
"contribs": [
{
"index": 0,
@@ -17,6 +18,5 @@
}
],
"refs": [],
- "abstracts": [],
- "language": "de"
-}
+ "abstracts": []
+} \ No newline at end of file
diff --git a/python/tests/import_datacite.py b/python/tests/import_datacite.py
index cdc165d7..3e47fce8 100644
--- a/python/tests/import_datacite.py
+++ b/python/tests/import_datacite.py
@@ -7,7 +7,7 @@ import datetime
import pytest
import gzip
from fatcat_tools.importers import DataciteImporter, JsonLinePusher
-from fatcat_tools.importers.datacite import find_original_language_title, parse_datacite_titles, parse_datacite_dates, clean_doi
+from fatcat_tools.importers.datacite import find_original_language_title, parse_datacite_titles, parse_datacite_dates, clean_doi, index_form_to_display_name
from fatcat_tools.transforms import entity_to_dict
from fixtures import api
import json
@@ -294,3 +294,19 @@ def test_datacite_conversions(datacite_importer):
assert result == expected
+def test_index_form_to_display_name():
+ Case = collections.namedtuple('Case', 'input output')
+ cases = [
+ Case('', ''),
+ Case('ABC', 'ABC'),
+ Case('International Space Station', 'International Space Station'),
+ Case('Jin, Shan', 'Shan Jin'),
+ Case('Volkshochschule Der Bundesstadt Bonn', 'Volkshochschule Der Bundesstadt Bonn'),
+ Case('Solomon, P. M.', 'P. M. Solomon'),
+ Case('Sujeevan Ratnasingham', 'Sujeevan Ratnasingham'),
+ Case('Paul Stöckli (1906-1991), Künstler', 'Paul Stöckli (1906-1991), Künstler'),
+ ]
+
+ for c in cases:
+ assert c.output == index_form_to_display_name(c.input)
+