aboutsummaryrefslogtreecommitdiffstats
path: root/fuzzycat
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2020-12-16 03:01:21 +0100
committerMartin Czygan <martin.czygan@gmail.com>2020-12-16 03:01:21 +0100
commitdbe01fde3db6f20c6d0e147fc80c7ff8c3473bab (patch)
tree377e3cde41b1ae22e024ad4152d861df948f1b29 /fuzzycat
parent1f6fe58efab213f446b05a22ca77b09b28398343 (diff)
downloadfuzzycat-dbe01fde3db6f20c6d0e147fc80c7ff8c3473bab.tar.gz
fuzzycat-dbe01fde3db6f20c6d0e147fc80c7ff8c3473bab.zip
add missing function
Diffstat (limited to 'fuzzycat')
-rw-r--r--fuzzycat/__main__.py2
-rw-r--r--fuzzycat/matching.py60
2 files changed, 60 insertions, 2 deletions
diff --git a/fuzzycat/__main__.py b/fuzzycat/__main__.py
index 3f9d379..8ac188b 100644
--- a/fuzzycat/__main__.py
+++ b/fuzzycat/__main__.py
@@ -59,6 +59,7 @@ import sys
import tempfile
import requests
+from fatcat_openapi_client import ReleaseEntity
from fuzzycat.cluster import (Cluster, release_key_title, release_key_title_ngram,
release_key_title_normalized, release_key_title_nysiis,
@@ -67,7 +68,6 @@ from fuzzycat.entities import entity_to_dict
from fuzzycat.matching import anything_to_entity, match_release_fuzzy
from fuzzycat.utils import random_idents_from_query, random_word
from fuzzycat.verify import GroupVerifier, verify
-from fatcat_openapi_client import ReleaseEntity
logging.getLogger("requests").setLevel(logging.WARNING)
logging.getLogger("urllib3").setLevel(logging.WARNING)
diff --git a/fuzzycat/matching.py b/fuzzycat/matching.py
index 48cc397..2b5f53a 100644
--- a/fuzzycat/matching.py
+++ b/fuzzycat/matching.py
@@ -1,11 +1,13 @@
import os
import re
+import sys
from typing import List, Type, Union
import elasticsearch
import elasticsearch_dsl
+import fatcat_openapi_client
import requests
-from fatcat_openapi_client import ContainerEntity, ReleaseEntity
+from fatcat_openapi_client import ContainerEntity, DefaultApi, ReleaseEntity
from fuzzycat.entities import entity_from_dict, entity_from_json
@@ -89,6 +91,62 @@ def match_release_fuzzy(release: ReleaseEntity, size=5, es=None) -> List[Release
return []
+def public_api(host_uri):
+ """
+ Note: unlike the authenticated variant, this helper might get called even
+ if the API isn't going to be used, so it's important that it doesn't try to
+ actually connect to the API host or something.
+ """
+ conf = fatcat_openapi_client.Configuration()
+ conf.host = host_uri
+ return fatcat_openapi_client.DefaultApi(fatcat_openapi_client.ApiClient(conf))
+
+
+def retrieve_entity_list(
+ ids: List[str],
+ api: DefaultApi = None,
+ entity_type: Union[Type[ReleaseEntity], Type[ContainerEntity]] = ReleaseEntity,
+) -> List[Union[Type[ReleaseEntity], Type[ContainerEntity]]]:
+ """
+ Retrieve a list of entities. Some entities might be missing. Return all
+ that are accessible.
+ """
+ if api is None:
+ api = public_api("https://api.fatcat.wiki/v0")
+ result = []
+ if entity_type == ReleaseEntity:
+ for id in ids:
+ try:
+ re = api.get_release(id, hide="refs,abstracts", expand="container")
+ result.append(re)
+ except ApiException as exc:
+ if exc.status == 404:
+ print("[err] failed to retrieve release entity: {} (maybe stale index)".format(
+ id),
+ file=sys.stderr)
+ else:
+ print("[err] api failed with {}: {}".format(exc.status, exc.message),
+ file=sys.stderr)
+ elif entity_type == ContainerEntity:
+ for id in ids:
+ try:
+ re = api.get_container(id)
+ result.append(re)
+ except ApiException as exc:
+ if exc.status == 404:
+ print(
+ "[err] failed to retrieve container entity: {} (maybe stale index)".format(
+ id),
+ file=sys.stderr)
+ else:
+ print("[err] api failed with {}: {}".format(exc.status, exc.message),
+ file=sys.stderr)
+ else:
+ raise ValueError("[err] cannot retrieve ids {} of type {}".format(ids, entity_type))
+
+ return result
+
+
def response_to_entity_list(response, size=5, entity_type=ReleaseEntity):
"""
Convert an elasticsearch result to a list of entities. Accepts both a