diff options
Diffstat (limited to 'python')
| -rw-r--r-- | python/fatcat_tools/importers/datacite.py | 6 | ||||
| -rw-r--r-- | python/tests/files/datacite/datacite_result_10.json | 8 | 
2 files changed, 6 insertions, 8 deletions
| diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index 7878ebfa..ed8b0906 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -148,8 +148,12 @@ UNKNOWN_MARKERS = set(DATACITE_UNKNOWN_MARKERS).union(set((      'NN',      'n.a.',      '[s.n.]', +    'Unknown',  ))) +# UNKNOWN_MARKERS_LOWER are lowercase version of UNKNOWN blacklist. +UNKNOWN_MARKERS_LOWER = set((v.lower() for v in UNKNOWN_MARKERS)) +  # TODO(martin): merge this with other maps, maybe.  LICENSE_SLUG_MAP = {      "//creativecommons.org/licenses/by/2.0/": "CC-BY", @@ -736,7 +740,7 @@ class DataciteImporter(EntityImporter):                      continue                  if name in name_blacklist:                      continue -                if name.lower() in UNKNOWN_MARKERS: +                if name.lower() in UNKNOWN_MARKERS_LOWER:                      continue                  # Unpack name, if we have an index form (e.g. 'Razis, Panos A') into 'Panos A razis'.                  if name: diff --git a/python/tests/files/datacite/datacite_result_10.json b/python/tests/files/datacite/datacite_result_10.json index 1bb70be6..ed10fe01 100644 --- a/python/tests/files/datacite/datacite_result_10.json +++ b/python/tests/files/datacite/datacite_result_10.json @@ -1,12 +1,6 @@  {    "abstracts": [], -  "contribs": [ -    { -      "index": 0, -      "raw_name": "Unknown", -      "role": "author" -    } -  ], +  "contribs": [],    "ext_ids": {      "doi": "10.25549/wpacards-m6171"    }, | 
