diff options
Diffstat (limited to 'python/fatcat_tools/importers/datacite.py')
-rw-r--r-- | python/fatcat_tools/importers/datacite.py | 5 |
1 files changed, 3 insertions, 2 deletions
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index 997f8dc8..6eed8991 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -376,10 +376,11 @@ class DataciteImporter(EntityImporter): # check for blocklisted "spam", e.g. "FULL MOVIE" for rule in DATACITE_TITLE_SPAM_WORDGROUPS: seen = set() - for token in rule.get("tokens", []): + token_list: List[str] = rule.get("tokens") or [] + for token in token_list: if token in title.lower(): seen.add(token) - if len(seen) >= rule.get("min"): + if len(seen) >= rule["min"]: print("[{}] skipping spammy title: {}".format(doi, obj), file=sys.stderr) return False |