aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-01-08 03:06:29 +0100
committerMartin Czygan <martin.czygan@gmail.com>2020-01-08 03:06:29 +0100
commit21e5cb620f7c8cb14f0c9d72b0504eeb7ff31977 (patch)
treeac714807e20946e5c8ce87b678e804c916eadeaa
parent6499e2911386f3f5e82a589c71da4003043bfc72 (diff)
downloadfatcat-21e5cb620f7c8cb14f0c9d72b0504eeb7ff31977.tar.gz
fatcat-21e5cb620f7c8cb14f0c9d72b0504eeb7ff31977.zip
datacite: ignore certain names
-rw-r--r--python/fatcat_tools/importers/datacite.py6
1 files changed, 6 insertions, 0 deletions
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index 52fede06..fe98d62a 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -309,6 +309,9 @@ class DataciteImporter(EntityImporter):
# "SCOPUS", "NRCPID", "schema.org", "GRID", "MGDS", "VIAF", "JACoW-ID"].
contribs = []
+ # Names, that should be ignored right away.
+ name_blacklist = set(('Occdownload Gbif.Org',))
+
for i, c in enumerate(attributes['creators']):
nameType = c.get('nameType', '') or ''
if nameType in ('', 'Personal'):
@@ -339,6 +342,9 @@ class DataciteImporter(EntityImporter):
if name:
name = clean(name)
+ if name in name_blacklist:
+ continue
+
if given_name:
given_name = clean(given_name)