From 7282174320c7ec9367bddd26fa6f14e424c8480a Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Mon, 6 Jul 2020 16:03:59 -0700 Subject: include new-style preservation+release_type aggs in container stats --- python/fatcat_web/search.py | 39 +++++++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) (limited to 'python/fatcat_web/search.py') diff --git a/python/fatcat_web/search.py b/python/fatcat_web/search.py index 3fd7f9dc..f60860c9 100644 --- a/python/fatcat_web/search.py +++ b/python/fatcat_web/search.py @@ -133,6 +133,20 @@ def wrap_es_execution(search: Search) -> Any: raise FatcatSearchError(e.status_code, str(e.error), description) return resp +def agg_to_dict(agg) -> dict: + """ + Takes a simple term aggregation result (with buckets) and returns a simple + dict with keys as terms and counts as values. Includes an extra value + '_other', and by convention aggregations should be writen to have "missing" + vaules as '_unknown'. + """ + result = dict() + for bucket in agg.buckets: + result[bucket.key] = bucket.doc_count + if agg.sum_other_doc_count: + result['_other'] = agg.sum_other_doc_count + return result + def do_container_search( query: GenericQuery, deep_page_limit: int = 2000 ) -> SearchHits: @@ -383,19 +397,36 @@ def get_elastic_container_stats(ident, issnl=None): }, }, ) + search.aggs.bucket( + 'preservation', + 'terms', + field='preservation', + missing='_unknown', + ) + search.aggs.bucket( + 'release_type', + 'terms', + field='release_type', + missing='_unknown', + ) + search = search[:0] search = search.params(request_cache=True) resp = wrap_es_execution(search) - buckets = resp.aggregations.container_stats.buckets + container_stats = resp.aggregations.container_stats.buckets + preservation_bucket = agg_to_dict(resp.aggregations.preservation) + release_type_bucket = agg_to_dict(resp.aggregations.release_type) stats = { 'ident': ident, 'issnl': issnl, 'total': resp.hits.total, - 'in_web': buckets['in_web']['doc_count'], - 'in_kbart': buckets['in_kbart']['doc_count'], - 'is_preserved': buckets['is_preserved']['doc_count'], + 'in_web': container_stats['in_web']['doc_count'], + 'in_kbart': container_stats['in_kbart']['doc_count'], + 'is_preserved': container_stats['is_preserved']['doc_count'], + 'preservation': preservation_bucket, + 'release_types': release_type_bucket, } return stats -- cgit v1.2.3