aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fuzzycat/__main__.py10
-rw-r--r--fuzzycat/verify.py2
2 files changed, 10 insertions, 2 deletions
diff --git a/fuzzycat/__main__.py b/fuzzycat/__main__.py
index d4176da..ab1727b 100644
--- a/fuzzycat/__main__.py
+++ b/fuzzycat/__main__.py
@@ -102,7 +102,10 @@ def run_verify(args):
"""
Run match verification over dataset from clustering step.
"""
- GroupVerifier(iterable=fileinput.input(files=args.files)).run()
+ verifier = GroupVerifier(iterable=fileinput.input(files=args.files),
+ verbose=args.verbose,
+ max_cluster_size=args.max_cluster_size)
+ verifier.run()
def run_verify_single(args):
@@ -208,6 +211,7 @@ if __name__ == '__main__':
help="output format, e.g. tsv or json",
default="tsv")
parser.add_argument("-s", "--size", help="number of results to return", default=5, type=int)
+ parser.add_argument("-v", "--verbose", help="be verbose", action='store_true')
subparsers = parser.add_subparsers()
sub_cluster = subparsers.add_parser('cluster', help='group entities', parents=[parser])
@@ -225,6 +229,10 @@ if __name__ == '__main__':
sub_verify = subparsers.add_parser('verify', help='verify groups', parents=[parser])
sub_verify.add_argument('-f', '--files', default="-", help='input files')
+ sub_verify.add_argument('--max-cluster-size',
+ default=10,
+ type=int,
+ help='ignore large clusters')
sub_verify.set_defaults(func=run_verify)
sub_verify_single = subparsers.add_parser('verify_single',
diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py
index 7f44f39..e9bbde9 100644
--- a/fuzzycat/verify.py
+++ b/fuzzycat/verify.py
@@ -109,7 +109,7 @@ class GroupVerifier:
def __init__(self,
iterable: collections.abc.Iterable,
max_cluster_size: int = 10,
- verbose=True):
+ verbose=False):
self.iterable: collections.abc.Iterable = iterable
self.max_cluster_size: int = max_cluster_size
self.verbose: bool = verbose