aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-02-11 13:53:26 +0100
committerMartin Czygan <martin.czygan@gmail.com>2021-02-11 13:53:26 +0100
commitcac2bc677fb7dfca9230ec4cee343b983e31fc96 (patch)
treed7b2fde327062032090b31f3036bb0ceaed64a3f
parentc9edc48814b643655240256f973edc319f6fc7f5 (diff)
downloadfuzzycat-cac2bc677fb7dfca9230ec4cee343b983e31fc96.tar.gz
fuzzycat-cac2bc677fb7dfca9230ec4cee343b983e31fc96.zip
update notes
-rw-r--r--fuzzycat/__main__.py14
-rw-r--r--fuzzycat/refs.py12
2 files changed, 16 insertions, 10 deletions
diff --git a/fuzzycat/__main__.py b/fuzzycat/__main__.py
index 67ffa40..9121bd8 100644
--- a/fuzzycat/__main__.py
+++ b/fuzzycat/__main__.py
@@ -6,6 +6,7 @@ COMMANDS
cluster
verify
verify_single
+ verify_ref
release_match
Run, e.g. fuzzycat cluster --help for more options.
@@ -38,6 +39,10 @@ EXAMPLES
]
}
+ Verify clustered refs:
+
+ $ python -m fuzzycat verify-ref
+
Release match (non-bulk).
$ python -m fuzzycat release_match -q "hello world"
@@ -66,8 +71,8 @@ from fuzzycat.cluster import (Cluster, release_key_title, release_key_title_ngra
release_key_title_sandcrawler)
from fuzzycat.entities import entity_to_dict
from fuzzycat.matching import anything_to_entity, match_release_fuzzy
-from fuzzycat.utils import random_idents_from_query, random_word
from fuzzycat.refs import RefsGroupVerifier
+from fuzzycat.utils import random_idents_from_query, random_word
from fuzzycat.verify import GroupVerifier, verify
logging.getLogger("requests").setLevel(logging.WARNING)
@@ -161,11 +166,12 @@ def run_verify_single(args):
})
print(json.dumps(result))
+
def run_ref_verify(args):
- verifier = RefsGroupVerifier(iterable=fileinput.input(files=args.files),
- verbose=args.verbose)
+ verifier = RefsGroupVerifier(iterable=fileinput.input(files=args.files), verbose=args.verbose)
verifier.run()
+
def run_release_match(args):
"""
Given a release, return similar releases.
@@ -253,7 +259,7 @@ if __name__ == '__main__':
sub_verify_single.add_argument('-b', help='ident or url to release')
sub_verify_single.set_defaults(func=run_verify_single)
- sub_ref_verify = subparsers.add_parser('verify', help='verify ref groups', parents=[parser])
+ sub_ref_verify = subparsers.add_parser('verify_ref', help='verify ref groups', parents=[parser])
sub_ref_verify.add_argument('-f', '--files', default="-", help='input files')
sub_ref_verify.set_defaults(func=run_ref_verify)
diff --git a/fuzzycat/refs.py b/fuzzycat/refs.py
index 04420d0..689d800 100644
--- a/fuzzycat/refs.py
+++ b/fuzzycat/refs.py
@@ -4,10 +4,11 @@ import json
import operator
import sys
-from fuzzycat.verify import verify
-from fuzzycat.common import Reason, Status
from glom import PathAccessError, glom
+from fuzzycat.common import Reason, Status
+from fuzzycat.verify import verify
+
def find_release_entity(docs):
"""
@@ -23,6 +24,7 @@ def find_release_entity(docs):
raise ValueError("docs do not contain any release")
+
def ref_entities(docs):
"""
Genator yielding ref entities only.
@@ -34,14 +36,13 @@ def ref_entities(docs):
except PathAccessError:
continue
+
class RefsGroupVerifier:
"""
A specific verifier for grouped releases and references. We do not need to
pair-wise compare, just compare one release to all references.
"""
- def __init__(self,
- iterable: collections.abc.Iterable,
- verbose=False):
+ def __init__(self, iterable: collections.abc.Iterable, verbose=False):
self.iterable: collections.abc.Iterable = iterable
self.verbose: bool = verbose
self.counter: Counter = collections.Counter()
@@ -64,4 +65,3 @@ class RefsGroupVerifier:
"https://fatcat.wiki/release/{}".format(b["ident"]), result, reason)
self.counter["total"] = sum(v for _, v in self.counter.items())
-