make some ES agg methods work on general queries

Eg, instead of container-specific year histogram, have the histogram fetch function take a ReleaseQuery. This is in preparation for more generic coverage visualization pages.
author: Bryan Newbold <bnewbold@robocracy.org> 2020-07-24 20:00:29 -0700
committer: Bryan Newbold <bnewbold@robocracy.org> 2020-07-30 18:28:03 -0700
commit: 19094b9994a1fc45450a96989ed41910d849c6d7 (patch)
tree: 2ae346a565ebbeeeeb9813e542804ae90873624a /python
parent: 1f6f9c0e251f45f220dd48242d48fa61922fe55f (diff)
download: fatcat-19094b9994a1fc45450a96989ed41910d849c6d7.tar.gz
fatcat-19094b9994a1fc45450a96989ed41910d849c6d7.zip
2 files changed, 58 insertions, 32 deletions
diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py
index 203d1621..45f6b0b6 100644
--- a/python/fatcat_web/routes.py
+++ b/python/fatcat_web/routes.py
@@ -205,7 +205,9 @@ def generic_entity_view(entity_type, ident, view_template):
         entity._random_releases = get_elastic_container_random_releases(entity.ident)
     if view_template == "container_view_coverage.html":
         entity._stats = get_elastic_container_stats(entity.ident, issnl=entity.issnl)
-        entity._type_preservation = get_elastic_container_preservation_by_type(ident)
+        entity._type_preservation = get_elastic_preservation_by_type(
+            ReleaseQuery(container_id=ident),
+        )
 
     return render_template(view_template, entity_type=entity_type, entity=entity, editgroup_id=None)
 
@@ -828,8 +830,9 @@ def container_ident_preservation_by_year_json(ident):
         container = api.get_container(ident)
     except ApiException as ae:
         abort(ae.status)
+    query = ReleaseQuery(container_id=container.ident)
     try:
-        histogram = get_elastic_container_preservation_by_year(container.ident)
+        histogram = get_elastic_preservation_by_year(query)
     except Exception as ae:
         app.log.error(ae)
         abort(503)
@@ -842,8 +845,9 @@ def container_ident_preservation_by_year_svg(ident):
         container = api.get_container(ident)
     except ApiException as ae:
         abort(ae.status)
+    query = ReleaseQuery(container_id=container.ident)
     try:
-        histogram = get_elastic_container_preservation_by_year(container.ident)
+        histogram = get_elastic_preservation_by_year(query)
     except Exception as ae:
         app.log.error(ae)
         abort(503)
diff --git a/python/fatcat_web/search.py b/python/fatcat_web/search.py
index 90575818..7ae7e73b 100644
--- a/python/fatcat_web/search.py
+++ b/python/fatcat_web/search.py
@@ -492,7 +492,7 @@ def get_elastic_container_histogram_legacy(ident) -> List:
     return vals
 
 
-def get_elastic_container_preservation_by_year(container_id: str) -> List[dict]:
+def get_elastic_preservation_by_year(query) -> List[dict]:
     """
     Fetches a stacked histogram of {year, preservation}.
 
@@ -505,21 +505,32 @@ def get_elastic_container_preservation_by_year(container_id: str) -> List[dict]:
     """
 
     search = Search(using=app.es_client, index=app.config['ELASTICSEARCH_RELEASE_INDEX'])
-    search = search.params(request_cache='true')
-    search = search.query(
-        'bool',
-        must=[
-            Q("range", release_year={
-                "gte": datetime.datetime.today().year - 249,
-                "lte": datetime.datetime.today().year,
-            }),
-        ],
-        filter=[
-            Q("bool", minimum_should_match=1, should=[
-                Q("match", container_id=container_id),
-            ]),
-        ],
+    if query.q not in [None, "*"]:
+        search = search.query(
+            "query_string",
+            query=query.q,
+            default_operator="AND",
+            analyze_wildcard=True,
+            allow_leading_wildcard=False,
+            lenient=True,
+            fields=[
+                "title^2",
+                "biblio",
+            ],
+        )
+    if query.container_id:
+        search = search.filter(
+            "term",
+            container_id=query.container_id,
+        )
+    search = search.filter(
+        "range",
+        release_year={
+            "gte": datetime.datetime.today().year - 249,
+            "lte": datetime.datetime.today().year,
+        },
     )
+
     search.aggs.bucket(
         'year_preservation',
         'composite',
@@ -539,7 +550,7 @@ def get_elastic_container_preservation_by_year(container_id: str) -> List[dict]:
         ],
     )
     search = search[:0]
-
+    search = search.params(request_cache='true')
     resp = wrap_es_execution(search)
 
     buckets = resp.aggregations.year_preservation.buckets
@@ -564,7 +575,6 @@ def get_elastic_container_preservation_by_volume(container_id: str) -> List[dict
     """
 
     search = Search(using=app.es_client, index=app.config['ELASTICSEARCH_RELEASE_INDEX'])
-    search = search.params(request_cache='true')
     search = search.query(
         'bool',
         filter=[
@@ -592,7 +602,7 @@ def get_elastic_container_preservation_by_volume(container_id: str) -> List[dict
         ],
     )
     search = search[:0]
-
+    search = search.params(request_cache='true')
     resp = wrap_es_execution(search)
 
     buckets = resp.aggregations.volume_preservation.buckets
@@ -605,7 +615,7 @@ def get_elastic_container_preservation_by_volume(container_id: str) -> List[dict
             volume_dicts[int(row['key']['volume'])][row['key']['preservation']] = int(row['doc_count'])
     return sorted(volume_dicts.values(), key=lambda x: x['volume'])
 
-def get_elastic_container_preservation_by_type(container_id: str) -> List[dict]:
+def get_elastic_preservation_by_type(query: ReleaseQuery) -> List[dict]:
     """
     Fetches preservation coverage by release type
 
@@ -615,15 +625,27 @@ def get_elastic_container_preservation_by_type(container_id: str) -> List[dict]:
     """
 
     search = Search(using=app.es_client, index=app.config['ELASTICSEARCH_RELEASE_INDEX'])
-    search = search.params(request_cache='true')
-    search = search.query(
-        'bool',
-        filter=[
-            Q("bool", must=[
-                Q("match", container_id=container_id),
-            ]),
-        ],
-    )
+    if query.q not in [None, "*"]:
+        search = search.query(
+            "query_string",
+            query=query.q,
+            default_operator="AND",
+            analyze_wildcard=True,
+            allow_leading_wildcard=False,
+            lenient=True,
+            fields=[
+                "biblio",
+            ],
+        )
+    if query.container_id:
+        search = search.query(
+            'bool',
+            filter=[
+                Q("bool", must=[
+                    Q("match", container_id=query.container_id),
+                ]),
+            ],
+        )
     search.aggs.bucket(
         'type_preservation',
         'composite',
@@ -642,7 +664,7 @@ def get_elastic_container_preservation_by_type(container_id: str) -> List[dict]:
         ],
     )
     search = search[:0]
-
+    search = search.params(request_cache='true')
     resp = wrap_es_execution(search)
 
     buckets = resp.aggregations.type_preservation.buckets
author	Bryan Newbold <bnewbold@robocracy.org>	2020-07-24 20:00:29 -0700
committer	Bryan Newbold <bnewbold@robocracy.org>	2020-07-30 18:28:03 -0700
commit	19094b9994a1fc45450a96989ed41910d849c6d7 (patch)
tree	2ae346a565ebbeeeeb9813e542804ae90873624a /python
parent	1f6f9c0e251f45f220dd48242d48fa61922fe55f (diff)
download	fatcat-19094b9994a1fc45450a96989ed41910d849c6d7.tar.gz fatcat-19094b9994a1fc45450a96989ed41910d849c6d7.zip