aboutsummaryrefslogtreecommitdiffstats
path: root/python/scripts/filter_groupworks.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/scripts/filter_groupworks.py')
-rwxr-xr-xpython/scripts/filter_groupworks.py8
1 files changed, 7 insertions, 1 deletions
diff --git a/python/scripts/filter_groupworks.py b/python/scripts/filter_groupworks.py
index 494da71..fda9098 100755
--- a/python/scripts/filter_groupworks.py
+++ b/python/scripts/filter_groupworks.py
@@ -28,6 +28,7 @@ MAX_SLUG_LINES = 50
REQUIRE_AUTHORS = False
+
def tokenize(s, remove_whitespace=False):
s.replace(''', "'")
@@ -40,6 +41,7 @@ def tokenize(s, remove_whitespace=False):
# Encode as dumb ASCII (TODO: this is horrible)
return s.encode('ascii', 'replace').replace(b'?', b'')
+
def check_authors(left, right):
"""
Intended to check GROBID extracted authors (right) against "known good"
@@ -63,6 +65,7 @@ def check_authors(left, right):
return False
return True
+
def test_check_authors():
assert check_authors([], []) == bool(not REQUIRE_AUTHORS)
assert not check_authors([], ['one'])
@@ -74,6 +77,7 @@ def test_check_authors():
assert check_authors(['Mr. Magoo'], ['Mr Magoo'])
assert check_authors(['one', 'tw', 'thr'], ['one', 'two', 'three'])
+
# Rows are (score, left, right)
def process_group(rows):
@@ -119,6 +123,7 @@ def process_group(rows):
print(json.dumps([releases[ident] for ident in group_ids]))
+
def run():
last_slug = None
@@ -140,5 +145,6 @@ def run():
if lines:
process_group(lines)
-if __name__=='__main__':
+
+if __name__ == '__main__':
run()