address es hits.total change in ES7

* https://www.elastic.co/guide/en/elasticsearch/reference/current/breaking-changes-7.0.html
author: Martin Czygan <martin.czygan@gmail.com> 2021-04-12 19:42:31 +0200
committer: Martin Czygan <martin.czygan@gmail.com> 2021-04-12 19:42:31 +0200
commit: 07c39548f848ded84bbce8455b974a5e298f1ea2 (patch)
tree: 03dfd6936667c3cfb3f2d8b96fd54ed6a004fbd7
parent: 81220a314a6bb179db3554ceb36958417535390f (diff)
download: fuzzycat-07c39548f848ded84bbce8455b974a5e298f1ea2.tar.gz
fuzzycat-07c39548f848ded84bbce8455b974a5e298f1ea2.zip
3 files changed, 28 insertions, 6 deletions
diff --git a/fuzzycat/matching.py b/fuzzycat/matching.py
index df9617b..9ccb62b 100644
--- a/fuzzycat/matching.py
+++ b/fuzzycat/matching.py
@@ -12,6 +12,7 @@ from fatcat_openapi_client import ContainerEntity, DefaultApi, ReleaseEntity
 from fatcat_openapi_client.rest import ApiException
 
 from fuzzycat.entities import entity_from_dict, entity_from_json
+from fuzzycat.utils import es_compat_hits_total
 
 settings = Dynaconf(envvar_prefix="FUZZYCAT")
 FATCAT_API_URL = settings.get("FATCAT_API_URL", "https://api.fatcat.wiki/v0")
@@ -79,7 +80,7 @@ def match_release_fuzzy(
         "size": size,
     }
     resp = es.search(body=body, index="fatcat_release")
-    if resp["hits"]["total"] > 0:
+    if es_compat_hits_total(resp) > 0:
         return response_to_entity_list(resp, entity_type=ReleaseEntity, api=api)
 
     # Get fuzzy.
@@ -97,7 +98,7 @@ def match_release_fuzzy(
         "size": size,
     }
     resp = es.search(body=body, index="fatcat_release")
-    if resp["hits"]["total"] > 0:
+    if es_compat_hits_total(resp) > 0:
         return response_to_entity_list(resp, entity_type=ReleaseEntity, api=api)
 
     # TODO: perform more queries on other fields.
@@ -209,9 +210,9 @@ def anything_to_entity(
 
     if re.match("[0-9]{4}(-)?[0-9]{3,3}[0-9xx]", s):
         # TODO: make index name configurable
-        url = "{}/fatcat_{}/_search?q=issns:{}".format(es_url, entity_name, s)
+        url = "{}/fatcat_{}/_search?track_total_hits=true&q=issns:{}".format(es_url, entity_name, s)
         doc = requests.get(url).json()
-        if doc["hits"]["total"] == 1:
+        if es_compat_hits_total(resp) == 1:
             ident = doc["hits"]["hits"][0]["_source"]["ident"]
             url = "{}/{}/{}".format(api_url, entity_name, ident)
             return entity_from_json(requests.get(url).text, entity_type)
diff --git a/fuzzycat/utils.py b/fuzzycat/utils.py
index 0f73456..bd7ceed 100644
--- a/fuzzycat/utils.py
+++ b/fuzzycat/utils.py
@@ -19,6 +19,18 @@ CHEM_FORMULA = re.compile(r"([A-Z]{1,2}[0-9]{1,2})+")
 
 ParsedPages = collections.namedtuple("ParsedPages", "start end count")
 
+def es_compat_hits_total(resp):
+    """
+    Given a search response dict, support ES6 and ES7 style total value. See:
+    https://www.elastic.co/guide/en/elasticsearch/reference/current/breaking-changes-7.0.html
+
+    It is responsibility of the call site to set `track_total_hits` in ES7 to
+    get an exact number.
+    """
+    try:
+        return resp["hits"]["total"]["value"]
+    except TypeError:
+        return resp["hits"]["total"]
 
 def parse_page_string(s):
     """
@@ -177,7 +189,7 @@ def random_idents_from_query(query="*",
     if resp.status_code != 200:
         raise RuntimeError('could not query {} for random item: {}'.format(es, r.url))
     payload = resp.json()
-    if payload["hits"]["total"] < 2:
+    if es_compat_hits_total(payload) < 2:
         raise RuntimeError('to few documents')
     idents = [doc["_source"]["ident"] for doc in payload["hits"]["hits"]]
     return random.sample(idents, r)
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 29b125b..24be9d1 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -3,7 +3,7 @@ import os
 
 from fuzzycat.utils import (author_similarity_score, cut, jaccard, nwise, slugify_string,
                             token_n_grams, tokenize_string, parse_page_string, dict_key_exists,
-                            zstdlines)
+                            zstdlines, es_compat_hits_total)
 
 
 def test_slugify_string():
@@ -98,3 +98,12 @@ def test_zstdlines():
     for zfn, fn in examples:
         with open(fn) as f:
             assert [s.strip() for s in f.readlines()] == list(zstdlines(zfn))
+
+def test_es_compat_hits_total():
+    cases = (
+        ({"hits": {"total": 6}}, 6),
+        ({"hits": {"total": {"value": 7, "relation": "eq"}}}, 7),
+    )
+    for r, expected in cases:
+        assert es_compat_hits_total(r) == expected
+
author	Martin Czygan <martin.czygan@gmail.com>	2021-04-12 19:42:31 +0200
committer	Martin Czygan <martin.czygan@gmail.com>	2021-04-12 19:42:31 +0200
commit	07c39548f848ded84bbce8455b974a5e298f1ea2 (patch)
tree	03dfd6936667c3cfb3f2d8b96fd54ed6a004fbd7
parent	81220a314a6bb179db3554ceb36958417535390f (diff)
download	fuzzycat-07c39548f848ded84bbce8455b974a5e298f1ea2.tar.gz fuzzycat-07c39548f848ded84bbce8455b974a5e298f1ea2.zip