summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2020-11-05 20:28:54 -0800
committerBryan Newbold <bnewbold@robocracy.org>2020-11-05 20:28:54 -0800
commit0c7dd38ed09c7a0584d079335fb3d1d53434628c (patch)
tree550c8da99ae1ff40e88c917702f74cf8fb3cce7f
parent1ed31621ae384f8b5e2a7d389347b8c97bcfefe3 (diff)
downloadfatcat-0c7dd38ed09c7a0584d079335fb3d1d53434628c.tar.gz
fatcat-0c7dd38ed09c7a0584d079335fb3d1d53434628c.zip
refactor: white/black -> allow/block
-rw-r--r--python/fatcat_tools/importers/datacite.py8
1 files changed, 4 insertions, 4 deletions
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py
index 86740e80..5cdc5577 100644
--- a/python/fatcat_tools/importers/datacite.py
+++ b/python/fatcat_tools/importers/datacite.py
@@ -151,7 +151,7 @@ UNKNOWN_MARKERS = set(DATACITE_UNKNOWN_MARKERS).union(set((
'Unknown',
)))
-# UNKNOWN_MARKERS_LOWER are lowercase version of UNKNOWN blacklist.
+# UNKNOWN_MARKERS_LOWER are lowercase version of UNKNOWN blocklist.
UNKNOWN_MARKERS_LOWER = set((v.lower() for v in UNKNOWN_MARKERS))
# Any "min" number of "tokens" will signal "spam", https://fatcat.wiki/release/rzcpjwukobd4pj36ipla22cnoi
@@ -346,7 +346,7 @@ class DataciteImporter(EntityImporter):
print('[{}] skipping record w/o title: {}'.format(doi, obj), file=sys.stderr)
return False
- # check for blacklisted "spam", e.g. "FULL MOVIE"
+ # check for blocklisted "spam", e.g. "FULL MOVIE"
for rule in DATACITE_TITLE_SPAM_WORDGROUPS:
seen = set()
for token in rule.get("tokens", []):
@@ -819,7 +819,7 @@ class DataciteImporter(EntityImporter):
contribs = []
# Names, that should be ignored right away.
- name_blacklist = set(('Occdownload Gbif.Org',))
+ name_blocklist = set(('Occdownload Gbif.Org',))
i = 0
for c in creators:
@@ -861,7 +861,7 @@ class DataciteImporter(EntityImporter):
continue
if not name:
name = "{} {}".format(given_name or '', surname or '').strip()
- if name in name_blacklist:
+ if name in name_blocklist:
continue
if name.lower() in UNKNOWN_MARKERS_LOWER:
continue