From 36cedfde374a2643396b070d3116e4b568500e14 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 3 Nov 2021 14:01:33 -0700 Subject: more involved type wrangling and fixes for importers --- python/fatcat_tools/importers/datacite.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'python/fatcat_tools/importers/datacite.py') diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index 997f8dc8..6eed8991 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -376,10 +376,11 @@ class DataciteImporter(EntityImporter): # check for blocklisted "spam", e.g. "FULL MOVIE" for rule in DATACITE_TITLE_SPAM_WORDGROUPS: seen = set() - for token in rule.get("tokens", []): + token_list: List[str] = rule.get("tokens") or [] + for token in token_list: if token in title.lower(): seen.add(token) - if len(seen) >= rule.get("min"): + if len(seen) >= rule["min"]: print("[{}] skipping spammy title: {}".format(doi, obj), file=sys.stderr) return False -- cgit v1.2.3