diff options
-rw-r--r-- | fuzzycat/__main__.py | 10 | ||||
-rw-r--r-- | fuzzycat/verify.py | 2 |
2 files changed, 10 insertions, 2 deletions
diff --git a/fuzzycat/__main__.py b/fuzzycat/__main__.py index d4176da..ab1727b 100644 --- a/fuzzycat/__main__.py +++ b/fuzzycat/__main__.py @@ -102,7 +102,10 @@ def run_verify(args): """ Run match verification over dataset from clustering step. """ - GroupVerifier(iterable=fileinput.input(files=args.files)).run() + verifier = GroupVerifier(iterable=fileinput.input(files=args.files), + verbose=args.verbose, + max_cluster_size=args.max_cluster_size) + verifier.run() def run_verify_single(args): @@ -208,6 +211,7 @@ if __name__ == '__main__': help="output format, e.g. tsv or json", default="tsv") parser.add_argument("-s", "--size", help="number of results to return", default=5, type=int) + parser.add_argument("-v", "--verbose", help="be verbose", action='store_true') subparsers = parser.add_subparsers() sub_cluster = subparsers.add_parser('cluster', help='group entities', parents=[parser]) @@ -225,6 +229,10 @@ if __name__ == '__main__': sub_verify = subparsers.add_parser('verify', help='verify groups', parents=[parser]) sub_verify.add_argument('-f', '--files', default="-", help='input files') + sub_verify.add_argument('--max-cluster-size', + default=10, + type=int, + help='ignore large clusters') sub_verify.set_defaults(func=run_verify) sub_verify_single = subparsers.add_parser('verify_single', diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py index 7f44f39..e9bbde9 100644 --- a/fuzzycat/verify.py +++ b/fuzzycat/verify.py @@ -109,7 +109,7 @@ class GroupVerifier: def __init__(self, iterable: collections.abc.Iterable, max_cluster_size: int = 10, - verbose=True): + verbose=False): self.iterable: collections.abc.Iterable = iterable self.max_cluster_size: int = max_cluster_size self.verbose: bool = verbose |