From 826c7538e091fac14d987a3cd654975da964e240 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 27 Oct 2021 18:50:17 -0700 Subject: make fmt (black 21.9b0) --- python/scripts/filter_groupworks.py | 40 ++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'python/scripts/filter_groupworks.py') diff --git a/python/scripts/filter_groupworks.py b/python/scripts/filter_groupworks.py index fda9098..87dae16 100755 --- a/python/scripts/filter_groupworks.py +++ b/python/scripts/filter_groupworks.py @@ -31,15 +31,15 @@ REQUIRE_AUTHORS = False def tokenize(s, remove_whitespace=False): - s.replace(''', "'") + s.replace("'", "'") # Remove non-alphanumeric characters - s = ''.join([c for c in s.lower() if c.isalnum() or c.isspace()]) + s = "".join([c for c in s.lower() if c.isalnum() or c.isspace()]) if remove_whitespace: - s = ''.join(s.split()) + s = "".join(s.split()) # Encode as dumb ASCII (TODO: this is horrible) - return s.encode('ascii', 'replace').replace(b'?', b'') + return s.encode("ascii", "replace").replace(b"?", b"") def check_authors(left, right): @@ -53,7 +53,7 @@ def check_authors(left, right): return False right_all = tokenize(" ".join(right)) for i in range(len(left)): - l = left[i].lower().replace('jr.', '').split() + l = left[i].lower().replace("jr.", "").split() if not l: return False l = tokenize(l[-1]) @@ -61,21 +61,21 @@ def check_authors(left, right): # weird author name (single char) return False if l not in right_all: - #print("MISSING: {} from {}".format(l.decode('utf8'), right_all.decode('utf8'))) + # print("MISSING: {} from {}".format(l.decode('utf8'), right_all.decode('utf8'))) return False return True def test_check_authors(): assert check_authors([], []) == bool(not REQUIRE_AUTHORS) - assert not check_authors([], ['one']) - assert check_authors(['one'], ['one']) - assert check_authors(['one two'], ['One Two']) - assert check_authors(['two'], ['One Two']) - assert check_authors(['two'], ['two, one']) - assert check_authors(['mago'], ['Mr. Magoo']) - assert check_authors(['Mr. Magoo'], ['Mr Magoo']) - assert check_authors(['one', 'tw', 'thr'], ['one', 'two', 'three']) + assert not check_authors([], ["one"]) + assert check_authors(["one"], ["one"]) + assert check_authors(["one two"], ["One Two"]) + assert check_authors(["two"], ["One Two"]) + assert check_authors(["two"], ["two, one"]) + assert check_authors(["mago"], ["Mr. Magoo"]) + assert check_authors(["Mr. Magoo"], ["Mr Magoo"]) + assert check_authors(["one", "tw", "thr"], ["one", "two", "three"]) # Rows are (score, left, right) @@ -90,10 +90,10 @@ def process_group(rows): left = json.loads(row[1]) right = json.loads(row[2]) # authors must roughly match - if not check_authors(left['authors'], right['authors']): + if not check_authors(left["authors"], right["authors"]): continue # years must match (if defined) - if left['year'] and right['year'] and left['year'] != right['year']: + if left["year"] and right["year"] and left["year"] != right["year"]: continue filtered.append((left, right)) @@ -105,8 +105,8 @@ def process_group(rows): group_ids = set() for row in filtered[1:]: (left, right) = row - l_id = left['fatcat_release'] - r_id = right['fatcat_release'] + l_id = left["fatcat_release"] + r_id = right["fatcat_release"] releases[l_id] = left releases[r_id] = right if not group_ids: @@ -131,7 +131,7 @@ def run(): # group lines by slug, and process in batches for line in sys.stdin: - line = line.strip().split('\t') + line = line.strip().split("\t") assert len(line) == 4 slug = line[0] if last_slug and slug != last_slug and lines: @@ -146,5 +146,5 @@ def run(): process_group(lines) -if __name__ == '__main__': +if __name__ == "__main__": run() -- cgit v1.2.3