diff options
| author | Bryan Newbold <bnewbold@robocracy.org> | 2020-11-05 20:28:54 -0800 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@robocracy.org> | 2020-11-05 20:28:54 -0800 | 
| commit | 0c7dd38ed09c7a0584d079335fb3d1d53434628c (patch) | |
| tree | 550c8da99ae1ff40e88c917702f74cf8fb3cce7f /python/fatcat_tools/importers | |
| parent | 1ed31621ae384f8b5e2a7d389347b8c97bcfefe3 (diff) | |
| download | fatcat-0c7dd38ed09c7a0584d079335fb3d1d53434628c.tar.gz fatcat-0c7dd38ed09c7a0584d079335fb3d1d53434628c.zip  | |
refactor: white/black -> allow/block
Diffstat (limited to 'python/fatcat_tools/importers')
| -rw-r--r-- | python/fatcat_tools/importers/datacite.py | 8 | 
1 files changed, 4 insertions, 4 deletions
diff --git a/python/fatcat_tools/importers/datacite.py b/python/fatcat_tools/importers/datacite.py index 86740e80..5cdc5577 100644 --- a/python/fatcat_tools/importers/datacite.py +++ b/python/fatcat_tools/importers/datacite.py @@ -151,7 +151,7 @@ UNKNOWN_MARKERS = set(DATACITE_UNKNOWN_MARKERS).union(set((      'Unknown',  ))) -# UNKNOWN_MARKERS_LOWER are lowercase version of UNKNOWN blacklist. +# UNKNOWN_MARKERS_LOWER are lowercase version of UNKNOWN blocklist.  UNKNOWN_MARKERS_LOWER = set((v.lower() for v in UNKNOWN_MARKERS))  # Any "min" number of "tokens" will signal "spam", https://fatcat.wiki/release/rzcpjwukobd4pj36ipla22cnoi @@ -346,7 +346,7 @@ class DataciteImporter(EntityImporter):              print('[{}] skipping record w/o title: {}'.format(doi, obj), file=sys.stderr)              return False -        # check for blacklisted "spam", e.g. "FULL MOVIE" +        # check for blocklisted "spam", e.g. "FULL MOVIE"          for rule in DATACITE_TITLE_SPAM_WORDGROUPS:              seen = set()              for token in rule.get("tokens", []): @@ -819,7 +819,7 @@ class DataciteImporter(EntityImporter):          contribs = []          # Names, that should be ignored right away. -        name_blacklist = set(('Occdownload Gbif.Org',)) +        name_blocklist = set(('Occdownload Gbif.Org',))          i = 0          for c in creators: @@ -861,7 +861,7 @@ class DataciteImporter(EntityImporter):                      continue                  if not name:                      name = "{} {}".format(given_name or '', surname or '').strip() -                if name in name_blacklist: +                if name in name_blocklist:                      continue                  if name.lower() in UNKNOWN_MARKERS_LOWER:                      continue  | 
