diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2020-01-09 02:08:08 +0100 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2020-01-09 02:08:08 +0100 |
commit | 8eab14ac12b17f4965ec5569853bf885ca6aacd2 (patch) | |
tree | 5f970575b52390e0f8362e7494306bc9890ee514 /python | |
parent | 24ac1dd2f0783583881320ef5fde6540d8530467 (diff) | |
download | fatcat-8eab14ac12b17f4965ec5569853bf885ca6aacd2.tar.gz fatcat-8eab14ac12b17f4965ec5569853bf885ca6aacd2.zip |
datacite: add 'Unknown' to blacklist
Diffstat (limited to 'python')
-rw-r--r-- | python/fatcat_tools/importers/datacite.py | 6 | ||||
-rw-r--r-- | python/tests/files/datacite/datacite_result_10.json | 8 |
2 files changed, 6 insertions, 8 deletions
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index 7878ebfa..ed8b0906 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -148,8 +148,12 @@ UNKNOWN_MARKERS = set(DATACITE_UNKNOWN_MARKERS).union(set(( 'NN', 'n.a.', '[s.n.]', + 'Unknown', ))) +# UNKNOWN_MARKERS_LOWER are lowercase version of UNKNOWN blacklist. +UNKNOWN_MARKERS_LOWER = set((v.lower() for v in UNKNOWN_MARKERS)) + # TODO(martin): merge this with other maps, maybe. LICENSE_SLUG_MAP = { "//creativecommons.org/licenses/by/2.0/": "CC-BY", @@ -736,7 +740,7 @@ class DataciteImporter(EntityImporter): continue if name in name_blacklist: continue - if name.lower() in UNKNOWN_MARKERS: + if name.lower() in UNKNOWN_MARKERS_LOWER: continue # Unpack name, if we have an index form (e.g. 'Razis, Panos A') into 'Panos A razis'. if name: diff --git a/python/tests/files/datacite/datacite_result_10.json b/python/tests/files/datacite/datacite_result_10.json index 1bb70be6..ed10fe01 100644 --- a/python/tests/files/datacite/datacite_result_10.json +++ b/python/tests/files/datacite/datacite_result_10.json @@ -1,12 +1,6 @@ { "abstracts": [], - "contribs": [ - { - "index": 0, - "raw_name": "Unknown", - "role": "author" - } - ], + "contribs": [], "ext_ids": { "doi": "10.25549/wpacards-m6171" }, |