verify stub

author: Martin Czygan <martin.czygan@gmail.com> 2020-11-11 00:29:31 +0100
committer: Martin Czygan <martin.czygan@gmail.com> 2020-11-11 00:29:31 +0100
commit: fb9f734db3ec0619188488ba1e37acfcc121113e (patch)
tree: 071394cad657767c414df3519fdda369bc6ab2f7
parent: a871e19c2e0aa3c94e338a27f4cc73b76d8ff9c0 (diff)
download: fuzzycat-fb9f734db3ec0619188488ba1e37acfcc121113e.tar.gz
fuzzycat-fb9f734db3ec0619188488ba1e37acfcc121113e.zip
2 files changed, 8 insertions, 21 deletions
diff --git a/fuzzycat/main.py b/fuzzycat/main.py
index d2cdf4d..c7ba23d 100644
--- a/fuzzycat/main.py
+++ b/fuzzycat/main.py
@@ -15,9 +15,9 @@ import cProfile as profile
 import io
 import logging
 import pstats
-# import json
 import sys
 import tempfile
+import fileinput
 
 import orjson as json
 
@@ -25,7 +25,6 @@ from fuzzycat.build import NgramLookup, TitleTokenList
 from fuzzycat.cluster import (Cluster, release_key_title, release_key_title_normalized,
                               release_key_title_nysiis, release_key_title_ngram)
 
-
 def run_cluster(args):
     logger = logging.getLogger('main.run_cluster')
     types = {
@@ -44,9 +43,11 @@ def run_cluster(args):
 
 def run_verify(args):
     """
-    TODO.
+    TODO. Ok, we should not fetch data we have on disk (at the clustering
+    step).
     """
-    print('verify')
+    for line in fileinput.input(files=args.files):
+        pass
 
 
 def run_build(args):
@@ -88,6 +89,7 @@ if __name__ == '__main__':
                              help='cluster algorithm: title, tnorm, tnysi, tss')
 
     sub_verify = subparsers.add_parser('verify', help='verify groups', parents=[parser])
+    sub_verify.add_argument('-f', '--files', default="-", help='input files')
     sub_verify.set_defaults(func=run_verify)
 
     sub_build = subparsers.add_parser('build', help='build auxiliary databases', parents=[parser])
diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py
index a9cc799..841df49 100644
--- a/fuzzycat/verify.py
+++ b/fuzzycat/verify.py
@@ -21,22 +21,7 @@ Further steps:
 * fetch all releases, this might be via API, search index, some local key value
 store, or some other cache
 * apply various rules, return match status
+* alternatively: have a few more fields in the intermediate representation (to
+keep operation local)
 
 """
-
-import requests
-
-
-def fetch_release_entity(ident, api="https://api.fatcat.wiki/v0"):
-    """
-    Fetches a single release entity.
-    """
-    link = "https://api.fatcat.wiki/v0/release/{}".format(ident)
-    return requests.get(link).json()
-
-
-def ident_to_release_entities(ids):
-    """
-    Turn a list of ids into release entities.
-    """
-    return [fetch_release_entity(id) for id in ids]
author	Martin Czygan <martin.czygan@gmail.com>	2020-11-11 00:29:31 +0100
committer	Martin Czygan <martin.czygan@gmail.com>	2020-11-11 00:29:31 +0100
commit	fb9f734db3ec0619188488ba1e37acfcc121113e (patch)
tree	071394cad657767c414df3519fdda369bc6ab2f7
parent	a871e19c2e0aa3c94e338a27f4cc73b76d8ff9c0 (diff)
download	fuzzycat-fb9f734db3ec0619188488ba1e37acfcc121113e.tar.gz fuzzycat-fb9f734db3ec0619188488ba1e37acfcc121113e.zip