diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2020-11-11 00:29:31 +0100 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2020-11-11 00:29:31 +0100 |
commit | fb9f734db3ec0619188488ba1e37acfcc121113e (patch) | |
tree | 071394cad657767c414df3519fdda369bc6ab2f7 | |
parent | a871e19c2e0aa3c94e338a27f4cc73b76d8ff9c0 (diff) | |
download | fuzzycat-fb9f734db3ec0619188488ba1e37acfcc121113e.tar.gz fuzzycat-fb9f734db3ec0619188488ba1e37acfcc121113e.zip |
verify stub
-rw-r--r-- | fuzzycat/main.py | 10 | ||||
-rw-r--r-- | fuzzycat/verify.py | 19 |
2 files changed, 8 insertions, 21 deletions
diff --git a/fuzzycat/main.py b/fuzzycat/main.py index d2cdf4d..c7ba23d 100644 --- a/fuzzycat/main.py +++ b/fuzzycat/main.py @@ -15,9 +15,9 @@ import cProfile as profile import io import logging import pstats -# import json import sys import tempfile +import fileinput import orjson as json @@ -25,7 +25,6 @@ from fuzzycat.build import NgramLookup, TitleTokenList from fuzzycat.cluster import (Cluster, release_key_title, release_key_title_normalized, release_key_title_nysiis, release_key_title_ngram) - def run_cluster(args): logger = logging.getLogger('main.run_cluster') types = { @@ -44,9 +43,11 @@ def run_cluster(args): def run_verify(args): """ - TODO. + TODO. Ok, we should not fetch data we have on disk (at the clustering + step). """ - print('verify') + for line in fileinput.input(files=args.files): + pass def run_build(args): @@ -88,6 +89,7 @@ if __name__ == '__main__': help='cluster algorithm: title, tnorm, tnysi, tss') sub_verify = subparsers.add_parser('verify', help='verify groups', parents=[parser]) + sub_verify.add_argument('-f', '--files', default="-", help='input files') sub_verify.set_defaults(func=run_verify) sub_build = subparsers.add_parser('build', help='build auxiliary databases', parents=[parser]) diff --git a/fuzzycat/verify.py b/fuzzycat/verify.py index a9cc799..841df49 100644 --- a/fuzzycat/verify.py +++ b/fuzzycat/verify.py @@ -21,22 +21,7 @@ Further steps: * fetch all releases, this might be via API, search index, some local key value store, or some other cache * apply various rules, return match status +* alternatively: have a few more fields in the intermediate representation (to +keep operation local) """ - -import requests - - -def fetch_release_entity(ident, api="https://api.fatcat.wiki/v0"): - """ - Fetches a single release entity. - """ - link = "https://api.fatcat.wiki/v0/release/{}".format(ident) - return requests.get(link).json() - - -def ident_to_release_entities(ids): - """ - Turn a list of ids into release entities. - """ - return [fetch_release_entity(id) for id in ids] |