diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2020-12-16 03:01:21 +0100 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2020-12-16 03:01:21 +0100 |
commit | dbe01fde3db6f20c6d0e147fc80c7ff8c3473bab (patch) | |
tree | 377e3cde41b1ae22e024ad4152d861df948f1b29 | |
parent | 1f6fe58efab213f446b05a22ca77b09b28398343 (diff) | |
download | fuzzycat-dbe01fde3db6f20c6d0e147fc80c7ff8c3473bab.tar.gz fuzzycat-dbe01fde3db6f20c6d0e147fc80c7ff8c3473bab.zip |
add missing function
-rw-r--r-- | fuzzycat/__main__.py | 2 | ||||
-rw-r--r-- | fuzzycat/matching.py | 60 |
2 files changed, 60 insertions, 2 deletions
diff --git a/fuzzycat/__main__.py b/fuzzycat/__main__.py index 3f9d379..8ac188b 100644 --- a/fuzzycat/__main__.py +++ b/fuzzycat/__main__.py @@ -59,6 +59,7 @@ import sys import tempfile import requests +from fatcat_openapi_client import ReleaseEntity from fuzzycat.cluster import (Cluster, release_key_title, release_key_title_ngram, release_key_title_normalized, release_key_title_nysiis, @@ -67,7 +68,6 @@ from fuzzycat.entities import entity_to_dict from fuzzycat.matching import anything_to_entity, match_release_fuzzy from fuzzycat.utils import random_idents_from_query, random_word from fuzzycat.verify import GroupVerifier, verify -from fatcat_openapi_client import ReleaseEntity logging.getLogger("requests").setLevel(logging.WARNING) logging.getLogger("urllib3").setLevel(logging.WARNING) diff --git a/fuzzycat/matching.py b/fuzzycat/matching.py index 48cc397..2b5f53a 100644 --- a/fuzzycat/matching.py +++ b/fuzzycat/matching.py @@ -1,11 +1,13 @@ import os import re +import sys from typing import List, Type, Union import elasticsearch import elasticsearch_dsl +import fatcat_openapi_client import requests -from fatcat_openapi_client import ContainerEntity, ReleaseEntity +from fatcat_openapi_client import ContainerEntity, DefaultApi, ReleaseEntity from fuzzycat.entities import entity_from_dict, entity_from_json @@ -89,6 +91,62 @@ def match_release_fuzzy(release: ReleaseEntity, size=5, es=None) -> List[Release return [] +def public_api(host_uri): + """ + Note: unlike the authenticated variant, this helper might get called even + if the API isn't going to be used, so it's important that it doesn't try to + actually connect to the API host or something. + """ + conf = fatcat_openapi_client.Configuration() + conf.host = host_uri + return fatcat_openapi_client.DefaultApi(fatcat_openapi_client.ApiClient(conf)) + + +def retrieve_entity_list( + ids: List[str], + api: DefaultApi = None, + entity_type: Union[Type[ReleaseEntity], Type[ContainerEntity]] = ReleaseEntity, +) -> List[Union[Type[ReleaseEntity], Type[ContainerEntity]]]: + """ + Retrieve a list of entities. Some entities might be missing. Return all + that are accessible. + """ + if api is None: + api = public_api("https://api.fatcat.wiki/v0") + result = [] + if entity_type == ReleaseEntity: + for id in ids: + try: + re = api.get_release(id, hide="refs,abstracts", expand="container") + result.append(re) + except ApiException as exc: + if exc.status == 404: + print("[err] failed to retrieve release entity: {} (maybe stale index)".format( + id), + file=sys.stderr) + else: + print("[err] api failed with {}: {}".format(exc.status, exc.message), + file=sys.stderr) + elif entity_type == ContainerEntity: + for id in ids: + try: + re = api.get_container(id) + result.append(re) + except ApiException as exc: + if exc.status == 404: + print( + "[err] failed to retrieve container entity: {} (maybe stale index)".format( + id), + file=sys.stderr) + else: + print("[err] api failed with {}: {}".format(exc.status, exc.message), + file=sys.stderr) + else: + raise ValueError("[err] cannot retrieve ids {} of type {}".format(ids, entity_type)) + + return result + + def response_to_entity_list(response, size=5, entity_type=ReleaseEntity): """ Convert an elasticsearch result to a list of entities. Accepts both a |