aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2020-07-24 20:00:29 -0700
committerBryan Newbold <bnewbold@robocracy.org>2020-07-30 18:28:03 -0700
commit19094b9994a1fc45450a96989ed41910d849c6d7 (patch)
tree2ae346a565ebbeeeeb9813e542804ae90873624a /python
parent1f6f9c0e251f45f220dd48242d48fa61922fe55f (diff)
downloadfatcat-19094b9994a1fc45450a96989ed41910d849c6d7.tar.gz
fatcat-19094b9994a1fc45450a96989ed41910d849c6d7.zip
make some ES agg methods work on general queries
Eg, instead of container-specific year histogram, have the histogram fetch function take a ReleaseQuery. This is in preparation for more generic coverage visualization pages.
Diffstat (limited to 'python')
-rw-r--r--python/fatcat_web/routes.py10
-rw-r--r--python/fatcat_web/search.py80
2 files changed, 58 insertions, 32 deletions
diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py
index 203d1621..45f6b0b6 100644
--- a/python/fatcat_web/routes.py
+++ b/python/fatcat_web/routes.py
@@ -205,7 +205,9 @@ def generic_entity_view(entity_type, ident, view_template):
entity._random_releases = get_elastic_container_random_releases(entity.ident)
if view_template == "container_view_coverage.html":
entity._stats = get_elastic_container_stats(entity.ident, issnl=entity.issnl)
- entity._type_preservation = get_elastic_container_preservation_by_type(ident)
+ entity._type_preservation = get_elastic_preservation_by_type(
+ ReleaseQuery(container_id=ident),
+ )
return render_template(view_template, entity_type=entity_type, entity=entity, editgroup_id=None)
@@ -828,8 +830,9 @@ def container_ident_preservation_by_year_json(ident):
container = api.get_container(ident)
except ApiException as ae:
abort(ae.status)
+ query = ReleaseQuery(container_id=container.ident)
try:
- histogram = get_elastic_container_preservation_by_year(container.ident)
+ histogram = get_elastic_preservation_by_year(query)
except Exception as ae:
app.log.error(ae)
abort(503)
@@ -842,8 +845,9 @@ def container_ident_preservation_by_year_svg(ident):
container = api.get_container(ident)
except ApiException as ae:
abort(ae.status)
+ query = ReleaseQuery(container_id=container.ident)
try:
- histogram = get_elastic_container_preservation_by_year(container.ident)
+ histogram = get_elastic_preservation_by_year(query)
except Exception as ae:
app.log.error(ae)
abort(503)
diff --git a/python/fatcat_web/search.py b/python/fatcat_web/search.py
index 90575818..7ae7e73b 100644
--- a/python/fatcat_web/search.py
+++ b/python/fatcat_web/search.py
@@ -492,7 +492,7 @@ def get_elastic_container_histogram_legacy(ident) -> List:
return vals
-def get_elastic_container_preservation_by_year(container_id: str) -> List[dict]:
+def get_elastic_preservation_by_year(query) -> List[dict]:
"""
Fetches a stacked histogram of {year, preservation}.
@@ -505,21 +505,32 @@ def get_elastic_container_preservation_by_year(container_id: str) -> List[dict]:
"""
search = Search(using=app.es_client, index=app.config['ELASTICSEARCH_RELEASE_INDEX'])
- search = search.params(request_cache='true')
- search = search.query(
- 'bool',
- must=[
- Q("range", release_year={
- "gte": datetime.datetime.today().year - 249,
- "lte": datetime.datetime.today().year,
- }),
- ],
- filter=[
- Q("bool", minimum_should_match=1, should=[
- Q("match", container_id=container_id),
- ]),
- ],
+ if query.q not in [None, "*"]:
+ search = search.query(
+ "query_string",
+ query=query.q,
+ default_operator="AND",
+ analyze_wildcard=True,
+ allow_leading_wildcard=False,
+ lenient=True,
+ fields=[
+ "title^2",
+ "biblio",
+ ],
+ )
+ if query.container_id:
+ search = search.filter(
+ "term",
+ container_id=query.container_id,
+ )
+ search = search.filter(
+ "range",
+ release_year={
+ "gte": datetime.datetime.today().year - 249,
+ "lte": datetime.datetime.today().year,
+ },
)
+
search.aggs.bucket(
'year_preservation',
'composite',
@@ -539,7 +550,7 @@ def get_elastic_container_preservation_by_year(container_id: str) -> List[dict]:
],
)
search = search[:0]
-
+ search = search.params(request_cache='true')
resp = wrap_es_execution(search)
buckets = resp.aggregations.year_preservation.buckets
@@ -564,7 +575,6 @@ def get_elastic_container_preservation_by_volume(container_id: str) -> List[dict
"""
search = Search(using=app.es_client, index=app.config['ELASTICSEARCH_RELEASE_INDEX'])
- search = search.params(request_cache='true')
search = search.query(
'bool',
filter=[
@@ -592,7 +602,7 @@ def get_elastic_container_preservation_by_volume(container_id: str) -> List[dict
],
)
search = search[:0]
-
+ search = search.params(request_cache='true')
resp = wrap_es_execution(search)
buckets = resp.aggregations.volume_preservation.buckets
@@ -605,7 +615,7 @@ def get_elastic_container_preservation_by_volume(container_id: str) -> List[dict
volume_dicts[int(row['key']['volume'])][row['key']['preservation']] = int(row['doc_count'])
return sorted(volume_dicts.values(), key=lambda x: x['volume'])
-def get_elastic_container_preservation_by_type(container_id: str) -> List[dict]:
+def get_elastic_preservation_by_type(query: ReleaseQuery) -> List[dict]:
"""
Fetches preservation coverage by release type
@@ -615,15 +625,27 @@ def get_elastic_container_preservation_by_type(container_id: str) -> List[dict]:
"""
search = Search(using=app.es_client, index=app.config['ELASTICSEARCH_RELEASE_INDEX'])
- search = search.params(request_cache='true')
- search = search.query(
- 'bool',
- filter=[
- Q("bool", must=[
- Q("match", container_id=container_id),
- ]),
- ],
- )
+ if query.q not in [None, "*"]:
+ search = search.query(
+ "query_string",
+ query=query.q,
+ default_operator="AND",
+ analyze_wildcard=True,
+ allow_leading_wildcard=False,
+ lenient=True,
+ fields=[
+ "biblio",
+ ],
+ )
+ if query.container_id:
+ search = search.query(
+ 'bool',
+ filter=[
+ Q("bool", must=[
+ Q("match", container_id=query.container_id),
+ ]),
+ ],
+ )
search.aggs.bucket(
'type_preservation',
'composite',
@@ -642,7 +664,7 @@ def get_elastic_container_preservation_by_type(container_id: str) -> List[dict]:
],
)
search = search[:0]
-
+ search = search.params(request_cache='true')
resp = wrap_es_execution(search)
buckets = resp.aggregations.type_preservation.buckets