diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2020-07-06 16:03:59 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2020-07-30 18:17:54 -0700 |
commit | 7282174320c7ec9367bddd26fa6f14e424c8480a (patch) | |
tree | 3b0cb780c09143b16225f6fb82c6b248cd9dfb20 /python/fatcat_web | |
parent | a4e21d7651aded342c495e38a76e3d965ab2ff76 (diff) | |
download | fatcat-7282174320c7ec9367bddd26fa6f14e424c8480a.tar.gz fatcat-7282174320c7ec9367bddd26fa6f14e424c8480a.zip |
include new-style preservation+release_type aggs in container stats
Diffstat (limited to 'python/fatcat_web')
-rw-r--r-- | python/fatcat_web/search.py | 39 |
1 files changed, 35 insertions, 4 deletions
diff --git a/python/fatcat_web/search.py b/python/fatcat_web/search.py index 3fd7f9dc..f60860c9 100644 --- a/python/fatcat_web/search.py +++ b/python/fatcat_web/search.py @@ -133,6 +133,20 @@ def wrap_es_execution(search: Search) -> Any: raise FatcatSearchError(e.status_code, str(e.error), description) return resp +def agg_to_dict(agg) -> dict: + """ + Takes a simple term aggregation result (with buckets) and returns a simple + dict with keys as terms and counts as values. Includes an extra value + '_other', and by convention aggregations should be writen to have "missing" + vaules as '_unknown'. + """ + result = dict() + for bucket in agg.buckets: + result[bucket.key] = bucket.doc_count + if agg.sum_other_doc_count: + result['_other'] = agg.sum_other_doc_count + return result + def do_container_search( query: GenericQuery, deep_page_limit: int = 2000 ) -> SearchHits: @@ -383,19 +397,36 @@ def get_elastic_container_stats(ident, issnl=None): }, }, ) + search.aggs.bucket( + 'preservation', + 'terms', + field='preservation', + missing='_unknown', + ) + search.aggs.bucket( + 'release_type', + 'terms', + field='release_type', + missing='_unknown', + ) + search = search[:0] search = search.params(request_cache=True) resp = wrap_es_execution(search) - buckets = resp.aggregations.container_stats.buckets + container_stats = resp.aggregations.container_stats.buckets + preservation_bucket = agg_to_dict(resp.aggregations.preservation) + release_type_bucket = agg_to_dict(resp.aggregations.release_type) stats = { 'ident': ident, 'issnl': issnl, 'total': resp.hits.total, - 'in_web': buckets['in_web']['doc_count'], - 'in_kbart': buckets['in_kbart']['doc_count'], - 'is_preserved': buckets['is_preserved']['doc_count'], + 'in_web': container_stats['in_web']['doc_count'], + 'in_kbart': container_stats['in_kbart']['doc_count'], + 'is_preserved': container_stats['is_preserved']['doc_count'], + 'preservation': preservation_bucket, + 'release_types': release_type_bucket, } return stats |