From 19094b9994a1fc45450a96989ed41910d849c6d7 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Fri, 24 Jul 2020 20:00:29 -0700 Subject: make some ES agg methods work on general queries Eg, instead of container-specific year histogram, have the histogram fetch function take a ReleaseQuery. This is in preparation for more generic coverage visualization pages. --- python/fatcat_web/routes.py | 10 ++++-- python/fatcat_web/search.py | 80 +++++++++++++++++++++++++++++---------------- 2 files changed, 58 insertions(+), 32 deletions(-) diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py index 203d1621..45f6b0b6 100644 --- a/python/fatcat_web/routes.py +++ b/python/fatcat_web/routes.py @@ -205,7 +205,9 @@ def generic_entity_view(entity_type, ident, view_template): entity._random_releases = get_elastic_container_random_releases(entity.ident) if view_template == "container_view_coverage.html": entity._stats = get_elastic_container_stats(entity.ident, issnl=entity.issnl) - entity._type_preservation = get_elastic_container_preservation_by_type(ident) + entity._type_preservation = get_elastic_preservation_by_type( + ReleaseQuery(container_id=ident), + ) return render_template(view_template, entity_type=entity_type, entity=entity, editgroup_id=None) @@ -828,8 +830,9 @@ def container_ident_preservation_by_year_json(ident): container = api.get_container(ident) except ApiException as ae: abort(ae.status) + query = ReleaseQuery(container_id=container.ident) try: - histogram = get_elastic_container_preservation_by_year(container.ident) + histogram = get_elastic_preservation_by_year(query) except Exception as ae: app.log.error(ae) abort(503) @@ -842,8 +845,9 @@ def container_ident_preservation_by_year_svg(ident): container = api.get_container(ident) except ApiException as ae: abort(ae.status) + query = ReleaseQuery(container_id=container.ident) try: - histogram = get_elastic_container_preservation_by_year(container.ident) + histogram = get_elastic_preservation_by_year(query) except Exception as ae: app.log.error(ae) abort(503) diff --git a/python/fatcat_web/search.py b/python/fatcat_web/search.py index 90575818..7ae7e73b 100644 --- a/python/fatcat_web/search.py +++ b/python/fatcat_web/search.py @@ -492,7 +492,7 @@ def get_elastic_container_histogram_legacy(ident) -> List: return vals -def get_elastic_container_preservation_by_year(container_id: str) -> List[dict]: +def get_elastic_preservation_by_year(query) -> List[dict]: """ Fetches a stacked histogram of {year, preservation}. @@ -505,21 +505,32 @@ def get_elastic_container_preservation_by_year(container_id: str) -> List[dict]: """ search = Search(using=app.es_client, index=app.config['ELASTICSEARCH_RELEASE_INDEX']) - search = search.params(request_cache='true') - search = search.query( - 'bool', - must=[ - Q("range", release_year={ - "gte": datetime.datetime.today().year - 249, - "lte": datetime.datetime.today().year, - }), - ], - filter=[ - Q("bool", minimum_should_match=1, should=[ - Q("match", container_id=container_id), - ]), - ], + if query.q not in [None, "*"]: + search = search.query( + "query_string", + query=query.q, + default_operator="AND", + analyze_wildcard=True, + allow_leading_wildcard=False, + lenient=True, + fields=[ + "title^2", + "biblio", + ], + ) + if query.container_id: + search = search.filter( + "term", + container_id=query.container_id, + ) + search = search.filter( + "range", + release_year={ + "gte": datetime.datetime.today().year - 249, + "lte": datetime.datetime.today().year, + }, ) + search.aggs.bucket( 'year_preservation', 'composite', @@ -539,7 +550,7 @@ def get_elastic_container_preservation_by_year(container_id: str) -> List[dict]: ], ) search = search[:0] - + search = search.params(request_cache='true') resp = wrap_es_execution(search) buckets = resp.aggregations.year_preservation.buckets @@ -564,7 +575,6 @@ def get_elastic_container_preservation_by_volume(container_id: str) -> List[dict """ search = Search(using=app.es_client, index=app.config['ELASTICSEARCH_RELEASE_INDEX']) - search = search.params(request_cache='true') search = search.query( 'bool', filter=[ @@ -592,7 +602,7 @@ def get_elastic_container_preservation_by_volume(container_id: str) -> List[dict ], ) search = search[:0] - + search = search.params(request_cache='true') resp = wrap_es_execution(search) buckets = resp.aggregations.volume_preservation.buckets @@ -605,7 +615,7 @@ def get_elastic_container_preservation_by_volume(container_id: str) -> List[dict volume_dicts[int(row['key']['volume'])][row['key']['preservation']] = int(row['doc_count']) return sorted(volume_dicts.values(), key=lambda x: x['volume']) -def get_elastic_container_preservation_by_type(container_id: str) -> List[dict]: +def get_elastic_preservation_by_type(query: ReleaseQuery) -> List[dict]: """ Fetches preservation coverage by release type @@ -615,15 +625,27 @@ def get_elastic_container_preservation_by_type(container_id: str) -> List[dict]: """ search = Search(using=app.es_client, index=app.config['ELASTICSEARCH_RELEASE_INDEX']) - search = search.params(request_cache='true') - search = search.query( - 'bool', - filter=[ - Q("bool", must=[ - Q("match", container_id=container_id), - ]), - ], - ) + if query.q not in [None, "*"]: + search = search.query( + "query_string", + query=query.q, + default_operator="AND", + analyze_wildcard=True, + allow_leading_wildcard=False, + lenient=True, + fields=[ + "biblio", + ], + ) + if query.container_id: + search = search.query( + 'bool', + filter=[ + Q("bool", must=[ + Q("match", container_id=query.container_id), + ]), + ], + ) search.aggs.bucket( 'type_preservation', 'composite', @@ -642,7 +664,7 @@ def get_elastic_container_preservation_by_type(container_id: str) -> List[dict]: ], ) search = search[:0] - + search = search.params(request_cache='true') resp = wrap_es_execution(search) buckets = resp.aggregations.type_preservation.buckets -- cgit v1.2.3