diff options
| author | bnewbold <bnewbold@archive.org> | 2021-04-07 05:47:06 +0000 | 
|---|---|---|
| committer | bnewbold <bnewbold@archive.org> | 2021-04-07 05:47:06 +0000 | 
| commit | 0b9fc884dad8e3147d10c273725157ba60f48069 (patch) | |
| tree | 8090fcf43dfef8b2f46fc6a2161c46257e22ff2b /python/fatcat_web | |
| parent | c0b145978280d53207aa714aab67cb582d9399ad (diff) | |
| parent | c23f050426c1422e84019fe60d4d67865b962f31 (diff) | |
| download | fatcat-0b9fc884dad8e3147d10c273725157ba60f48069.tar.gz fatcat-0b9fc884dad8e3147d10c273725157ba60f48069.zip | |
Merge branch 'bnewbold-es7' into 'master'
elasticsearch 7.x support
See merge request webgroup/fatcat!100
Diffstat (limited to 'python/fatcat_web')
| -rw-r--r-- | python/fatcat_web/search.py | 30 | 
1 files changed, 21 insertions, 9 deletions
| diff --git a/python/fatcat_web/search.py b/python/fatcat_web/search.py index d7547cc8..0cdb604a 100644 --- a/python/fatcat_web/search.py +++ b/python/fatcat_web/search.py @@ -88,6 +88,16 @@ class SearchHits:      query_time_ms: int      results: List[Any] +def _hits_total_int(val: Any) -> int: +    """ +    Compatibility hack between ES 6.x and 7.x. In ES 6x, total is returned as +    an int in many places, in ES 7 as a dict (JSON object) with 'value' key +    """ +    if isinstance(val, int): +        return val +    else: +        return int(val['value']) +  def results_to_dict(response: elasticsearch_dsl.response.Response) -> List[dict]:      """ @@ -173,13 +183,14 @@ def do_container_search(          offset = deep_page_limit      search = search[offset : (offset + limit)] +    search = search.params(track_total_hits=True)      resp = wrap_es_execution(search)      results = results_to_dict(resp)      return SearchHits(          count_returned=len(results), -        count_found=int(resp.hits.total), +        count_found=_hits_total_int(resp.hits.total),          offset=offset,          limit=limit,          deep_page_limit=deep_page_limit, @@ -241,6 +252,7 @@ def do_release_search(          offset = deep_page_limit      search = search[offset : (offset + limit)] +    search = search.params(track_total_hits=True)      resp = wrap_es_execution(search)      results = results_to_dict(resp) @@ -253,7 +265,7 @@ def do_release_search(      return SearchHits(          count_returned=len(results), -        count_found=int(resp.hits.total), +        count_found=_hits_total_int(resp.hits.total),          offset=offset,          limit=limit,          deep_page_limit=deep_page_limit, @@ -310,7 +322,7 @@ def get_elastic_entity_stats() -> dict:      resp = wrap_es_execution(search)      stats['release'] = { -        "total": int(resp.hits.total), +        "total": _hits_total_int(resp.hits.total),          "refs_total": int(resp.aggregations.release_ref_count.value),      } @@ -344,7 +356,7 @@ def get_elastic_entity_stats() -> dict:      resp = wrap_es_execution(search)      buckets = resp.aggregations.paper_like.buckets      stats['papers'] = { -        'total': resp.hits.total, +        'total': _hits_total_int(resp.hits.total),          'in_web': buckets.in_web.doc_count,          'is_oa': buckets.is_oa.doc_count,          'in_kbart': buckets.in_kbart.doc_count, @@ -363,7 +375,7 @@ def get_elastic_entity_stats() -> dict:      search = search.params(request_cache=True)      resp = wrap_es_execution(search)      stats['container'] = { -        "total": resp.hits.total, +        "total": _hits_total_int(resp.hits.total),      }      return stats @@ -398,7 +410,7 @@ def get_elastic_search_coverage(query: ReleaseQuery) -> dict:      resp = wrap_es_execution(search)      preservation_bucket = agg_to_dict(resp.aggregations.preservation) -    preservation_bucket['total'] = resp.hits.total +    preservation_bucket['total'] = _hits_total_int(resp.hits.total)      for k in ('bright', 'dark', 'shadows_only', 'none'):          if not k in preservation_bucket:              preservation_bucket[k] = 0 @@ -406,7 +418,7 @@ def get_elastic_search_coverage(query: ReleaseQuery) -> dict:          preservation_bucket['none'] += preservation_bucket['shadows_only']          preservation_bucket['shadows_only'] = 0      stats = { -        'total': resp.hits.total, +        'total': _hits_total_int(resp.hits.total),          'preservation': preservation_bucket,      } @@ -463,7 +475,7 @@ def get_elastic_container_stats(ident, issnl=None):      container_stats = resp.aggregations.container_stats.buckets      preservation_bucket = agg_to_dict(resp.aggregations.preservation) -    preservation_bucket['total'] = resp.hits.total +    preservation_bucket['total'] = _hits_total_int(resp.hits.total)      for k in ('bright', 'dark', 'shadows_only', 'none'):          if not k in preservation_bucket:              preservation_bucket[k] = 0 @@ -474,7 +486,7 @@ def get_elastic_container_stats(ident, issnl=None):      stats = {          'ident': ident,          'issnl': issnl, -        'total': resp.hits.total, +        'total': _hits_total_int(resp.hits.total),          'in_web': container_stats['in_web']['doc_count'],          'in_kbart': container_stats['in_kbart']['doc_count'],          'is_preserved': container_stats['is_preserved']['doc_count'], | 
