From 21e5cb620f7c8cb14f0c9d72b0504eeb7ff31977 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Wed, 8 Jan 2020 03:06:29 +0100 Subject: datacite: ignore certain names --- python/fatcat_tools/importers/datacite.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index 52fede06..fe98d62a 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -309,6 +309,9 @@ class DataciteImporter(EntityImporter): # "SCOPUS", "NRCPID", "schema.org", "GRID", "MGDS", "VIAF", "JACoW-ID"]. contribs = [] + # Names, that should be ignored right away. + name_blacklist = set(('Occdownload Gbif.Org',)) + for i, c in enumerate(attributes['creators']): nameType = c.get('nameType', '') or '' if nameType in ('', 'Personal'): @@ -339,6 +342,9 @@ class DataciteImporter(EntityImporter): if name: name = clean(name) + if name in name_blacklist: + continue + if given_name: given_name = clean(given_name) -- cgit v1.2.3