From d9d9c32528c168114c4b1a6afeedf21674322f69 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 3 Feb 2022 21:09:44 -0800 Subject: small changes to preservation coverage search queries - allow fetching of by-release-type preservation histograms as JSON - query flag to exclude 'stub' entity types - don't include 'stub' entities in container-by-year or container-by-volume charts (and JSON) --- python/fatcat_web/routes.py | 29 +++++++++++++++++++++++---- python/fatcat_web/search.py | 48 +++++++++++++++++++++++++++++++++++++++------ 2 files changed, 67 insertions(+), 10 deletions(-) (limited to 'python') diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py index f180e339..a7fb34bd 100644 --- a/python/fatcat_web/routes.py +++ b/python/fatcat_web/routes.py @@ -1183,7 +1183,7 @@ def container_ident_preservation_by_year_json(ident: str) -> AnyResponse: container = api.get_container(ident) except ApiException as ae: abort(ae.status) - query = ReleaseQuery(container_id=container.ident) + query = ReleaseQuery(container_id=container.ident, exclude_stubs=True) try: histogram = get_elastic_preservation_by_year(query) except Exception as ae: @@ -1201,7 +1201,7 @@ def container_ident_preservation_by_year_svg(ident: str) -> AnyResponse: container = api.get_container(ident) except ApiException as ae: abort(ae.status) - query = ReleaseQuery(container_id=container.ident) + query = ReleaseQuery(container_id=container.ident, exclude_stubs=True) try: histogram = get_elastic_preservation_by_year(query) except Exception as ae: @@ -1223,8 +1223,9 @@ def container_ident_preservation_by_volume_json(ident: str) -> AnyResponse: container = api.get_container(ident) except ApiException as ae: abort(ae.status) + query = ReleaseQuery(container_id=container.ident, exclude_stubs=True) try: - histogram = get_elastic_container_preservation_by_volume(container.ident) + histogram = get_elastic_container_preservation_by_volume(query) except Exception as ae: app.log.error(ae) abort(503) @@ -1241,8 +1242,9 @@ def container_ident_preservation_by_volume_svg(ident: str) -> AnyResponse: container = api.get_container(ident) except ApiException as ae: abort(ae.status) + query = ReleaseQuery(container_id=container.ident, exclude_stubs=True) try: - histogram = get_elastic_container_preservation_by_volume(container.ident) + histogram = get_elastic_container_preservation_by_volume(query) except Exception as ae: app.log.error(ae) abort(503) @@ -1252,6 +1254,25 @@ def container_ident_preservation_by_volume_svg(ident: str) -> AnyResponse: ).render_response() +@app.route( + "/container//preservation_by_type.json", + methods=["GET", "OPTIONS"], +) +@crossdomain(origin="*", headers=["access-control-allow-origin", "Content-Type"]) +def container_ident_preservation_by_type_json(ident: str) -> AnyResponse: + try: + container = api.get_container(ident) + except ApiException as ae: + abort(ae.status) + query = ReleaseQuery(container_id=container.ident) + try: + histogram = get_elastic_preservation_by_type(query) + except Exception as ae: + app.log.error(ae) + abort(503) + return jsonify({"container_id": ident, "histogram": histogram}) + + @app.route("/release/.bib", methods=["GET"]) def release_bibtex(ident: str) -> AnyResponse: try: diff --git a/python/fatcat_web/search.py b/python/fatcat_web/search.py index b9994f28..ad27d77b 100644 --- a/python/fatcat_web/search.py +++ b/python/fatcat_web/search.py @@ -37,6 +37,7 @@ class ReleaseQuery: fulltext_only: bool = False container_id: Optional[str] = None recent: bool = False + exclude_stubs: bool = False @staticmethod def from_args(args: Dict[str, Any]) -> "ReleaseQuery": @@ -62,6 +63,7 @@ class ReleaseQuery: fulltext_only=bool(args.get("fulltext_only")), container_id=container_id, recent=bool(args.get("recent")), + exclude_stubs=bool(args.get("exclude_stubs")), ) @@ -466,10 +468,14 @@ def get_elastic_container_stats( Returns dict: ident issnl (optional) - total - in_web - in_kbart - preserved + total: count + in_web: count + in_kbart: count + is_preserved: count + preservation{} + "histogram" by preservation status + release_type{} + "histogram" by release type """ if not es_client: @@ -620,6 +626,8 @@ def get_elastic_preservation_by_year(query: ReleaseQuery) -> List[Dict[str, Any] Returns a list of dicts, sorted by year, with keys/values like: {year (int), bright (int), dark (int), shadows_only (int), none (int)} + + Stubs can be excluded by setting the appropriate query flag """ search = Search(using=app.es_client, index=app.config["ELASTICSEARCH_RELEASE_INDEX"]) @@ -640,6 +648,18 @@ def get_elastic_preservation_by_year(query: ReleaseQuery) -> List[Dict[str, Any] "term", container_id=query.container_id, ) + if query.exclude_stubs: + search = search.query( + "bool", + filter=[ + Q( + "bool", + must_not=[ + Q("match", release_type="stub"), + ], + ), + ], + ) search = search.filter( "range", release_year={ @@ -777,7 +797,7 @@ def get_elastic_preservation_by_date(query: ReleaseQuery) -> List[dict]: return sorted(date_dicts.values(), key=lambda x: x["date"]) -def get_elastic_container_preservation_by_volume(container_id: str) -> List[dict]: +def get_elastic_container_preservation_by_volume(query: ReleaseQuery) -> List[dict]: """ Fetches a stacked histogram of {volume, preservation}. @@ -787,8 +807,11 @@ def get_elastic_container_preservation_by_volume(container_id: str) -> List[dict Returns a list of dicts, sorted by volume, with keys/values like: {year (int), bright (int), dark (int), shadows_only (int), none (int)} + + Stubs can be excluded by setting the appropriate query flag """ + assert query.container_id is not None search = Search(using=app.es_client, index=app.config["ELASTICSEARCH_RELEASE_INDEX"]) search = search.query( "bool", @@ -796,12 +819,25 @@ def get_elastic_container_preservation_by_volume(container_id: str) -> List[dict Q( "bool", must=[ - Q("match", container_id=container_id), + Q("match", container_id=query.container_id), Q("exists", field="volume"), ], ), ], ) + if query.exclude_stubs: + search = search.query( + "bool", + filter=[ + Q( + "bool", + must_not=[ + Q("match", release_type="stub"), + ], + ), + ], + ) + search.aggs.bucket( "volume_preservation", "composite", -- cgit v1.2.3