diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2020-01-08 03:06:29 +0100 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2020-01-08 03:06:29 +0100 |
commit | 21e5cb620f7c8cb14f0c9d72b0504eeb7ff31977 (patch) | |
tree | ac714807e20946e5c8ce87b678e804c916eadeaa /python/fatcat_tools/importers/datacite.py | |
parent | 6499e2911386f3f5e82a589c71da4003043bfc72 (diff) | |
download | fatcat-21e5cb620f7c8cb14f0c9d72b0504eeb7ff31977.tar.gz fatcat-21e5cb620f7c8cb14f0c9d72b0504eeb7ff31977.zip |
datacite: ignore certain names
Diffstat (limited to 'python/fatcat_tools/importers/datacite.py')
-rw-r--r-- | python/fatcat_tools/importers/datacite.py | 6 |
1 files changed, 6 insertions, 0 deletions
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index 52fede06..fe98d62a 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -309,6 +309,9 @@ class DataciteImporter(EntityImporter): # "SCOPUS", "NRCPID", "schema.org", "GRID", "MGDS", "VIAF", "JACoW-ID"]. contribs = [] + # Names, that should be ignored right away. + name_blacklist = set(('Occdownload Gbif.Org',)) + for i, c in enumerate(attributes['creators']): nameType = c.get('nameType', '') or '' if nameType in ('', 'Personal'): @@ -339,6 +342,9 @@ class DataciteImporter(EntityImporter): if name: name = clean(name) + if name in name_blacklist: + continue + if given_name: given_name = clean(given_name) |