aboutsummaryrefslogtreecommitdiffstats
path: root/fuzzycat/refs.py
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2021-02-11 13:51:13 +0100
committerMartin Czygan <martin.czygan@gmail.com>2021-02-11 13:51:13 +0100
commitc9cd6f76dd1dd080c1bc52159ab02ff5898f5f46 (patch)
tree7ec8962c6d19d81f44463a517d8397942c242c49 /fuzzycat/refs.py
parente75a77fdedae4a4a37c5ddc12c796c70164900dc (diff)
downloadfuzzycat-c9cd6f76dd1dd080c1bc52159ab02ff5898f5f46.tar.gz
fuzzycat-c9cd6f76dd1dd080c1bc52159ab02ff5898f5f46.zip
add a batch verifier for ref groups
Diffstat (limited to 'fuzzycat/refs.py')
-rw-r--r--fuzzycat/refs.py67
1 files changed, 67 insertions, 0 deletions
diff --git a/fuzzycat/refs.py b/fuzzycat/refs.py
new file mode 100644
index 0000000..04420d0
--- /dev/null
+++ b/fuzzycat/refs.py
@@ -0,0 +1,67 @@
+import collections
+import itertools
+import json
+import operator
+import sys
+
+from fuzzycat.verify import verify
+from fuzzycat.common import Reason, Status
+from glom import PathAccessError, glom
+
+
+def find_release_entity(docs):
+ """
+ Return one "pivot" release entity (i.e. that does not have
+ "extra.skate.status == "ref").
+ """
+ for doc in docs:
+ try:
+ if glom(doc, "extra.skate.status") == "ref":
+ continue
+ except PathAccessError:
+ return doc
+
+ raise ValueError("docs do not contain any release")
+
+def ref_entities(docs):
+ """
+ Genator yielding ref entities only.
+ """
+ for doc in docs:
+ try:
+ if glom(doc, "extra.skate.status") == "ref":
+ yield doc
+ except PathAccessError:
+ continue
+
+class RefsGroupVerifier:
+ """
+ A specific verifier for grouped releases and references. We do not need to
+ pair-wise compare, just compare one release to all references.
+ """
+ def __init__(self,
+ iterable: collections.abc.Iterable,
+ verbose=False):
+ self.iterable: collections.abc.Iterable = iterable
+ self.verbose: bool = verbose
+ self.counter: Counter = collections.Counter()
+
+ def run(self):
+ get_key_values = operator.itemgetter("k", "v")
+ for i, line in enumerate(self.iterable):
+ if i % 20000 == 0 and self.verbose:
+ print(i, file=sys.stderr)
+ line = line.strip()
+ if not line:
+ continue
+ doc = json.loads(line)
+ k, vs = get_key_values(doc)
+ pivot = find_release_entity(vs)
+ for entity in ref_entities(vs):
+ result, reason = verify(a, b)
+ self.counter[reason] += 1
+ print("https://fatcat.wiki/release/{}".format(a["ident"]),
+ "https://fatcat.wiki/release/{}".format(b["ident"]), result, reason)
+
+ self.counter["total"] = sum(v for _, v in self.counter.items())
+