diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2022-02-03 21:09:44 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2022-02-03 21:09:47 -0800 |
commit | d9d9c32528c168114c4b1a6afeedf21674322f69 (patch) | |
tree | 54ca03b7dccb7d59a3cfb1dce9afc56064288e3e | |
parent | 3224fe8c43adcf3e2f6f5536e11a1ea793175b72 (diff) | |
download | fatcat-d9d9c32528c168114c4b1a6afeedf21674322f69.tar.gz fatcat-d9d9c32528c168114c4b1a6afeedf21674322f69.zip |
small changes to preservation coverage search queries
- allow fetching of by-release-type preservation histograms as JSON
- query flag to exclude 'stub' entity types
- don't include 'stub' entities in container-by-year or
container-by-volume charts (and JSON)
-rw-r--r-- | python/fatcat_web/routes.py | 29 | ||||
-rw-r--r-- | python/fatcat_web/search.py | 48 |
2 files changed, 67 insertions, 10 deletions
diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py index f180e339..a7fb34bd 100644 --- a/python/fatcat_web/routes.py +++ b/python/fatcat_web/routes.py @@ -1183,7 +1183,7 @@ def container_ident_preservation_by_year_json(ident: str) -> AnyResponse: container = api.get_container(ident) except ApiException as ae: abort(ae.status) - query = ReleaseQuery(container_id=container.ident) + query = ReleaseQuery(container_id=container.ident, exclude_stubs=True) try: histogram = get_elastic_preservation_by_year(query) except Exception as ae: @@ -1201,7 +1201,7 @@ def container_ident_preservation_by_year_svg(ident: str) -> AnyResponse: container = api.get_container(ident) except ApiException as ae: abort(ae.status) - query = ReleaseQuery(container_id=container.ident) + query = ReleaseQuery(container_id=container.ident, exclude_stubs=True) try: histogram = get_elastic_preservation_by_year(query) except Exception as ae: @@ -1223,8 +1223,9 @@ def container_ident_preservation_by_volume_json(ident: str) -> AnyResponse: container = api.get_container(ident) except ApiException as ae: abort(ae.status) + query = ReleaseQuery(container_id=container.ident, exclude_stubs=True) try: - histogram = get_elastic_container_preservation_by_volume(container.ident) + histogram = get_elastic_container_preservation_by_volume(query) except Exception as ae: app.log.error(ae) abort(503) @@ -1241,8 +1242,9 @@ def container_ident_preservation_by_volume_svg(ident: str) -> AnyResponse: container = api.get_container(ident) except ApiException as ae: abort(ae.status) + query = ReleaseQuery(container_id=container.ident, exclude_stubs=True) try: - histogram = get_elastic_container_preservation_by_volume(container.ident) + histogram = get_elastic_container_preservation_by_volume(query) except Exception as ae: app.log.error(ae) abort(503) @@ -1252,6 +1254,25 @@ def container_ident_preservation_by_volume_svg(ident: str) -> AnyResponse: ).render_response() +@app.route( + "/container/<string(length=26):ident>/preservation_by_type.json", + methods=["GET", "OPTIONS"], +) +@crossdomain(origin="*", headers=["access-control-allow-origin", "Content-Type"]) +def container_ident_preservation_by_type_json(ident: str) -> AnyResponse: + try: + container = api.get_container(ident) + except ApiException as ae: + abort(ae.status) + query = ReleaseQuery(container_id=container.ident) + try: + histogram = get_elastic_preservation_by_type(query) + except Exception as ae: + app.log.error(ae) + abort(503) + return jsonify({"container_id": ident, "histogram": histogram}) + + @app.route("/release/<string(length=26):ident>.bib", methods=["GET"]) def release_bibtex(ident: str) -> AnyResponse: try: diff --git a/python/fatcat_web/search.py b/python/fatcat_web/search.py index b9994f28..ad27d77b 100644 --- a/python/fatcat_web/search.py +++ b/python/fatcat_web/search.py @@ -37,6 +37,7 @@ class ReleaseQuery: fulltext_only: bool = False container_id: Optional[str] = None recent: bool = False + exclude_stubs: bool = False @staticmethod def from_args(args: Dict[str, Any]) -> "ReleaseQuery": @@ -62,6 +63,7 @@ class ReleaseQuery: fulltext_only=bool(args.get("fulltext_only")), container_id=container_id, recent=bool(args.get("recent")), + exclude_stubs=bool(args.get("exclude_stubs")), ) @@ -466,10 +468,14 @@ def get_elastic_container_stats( Returns dict: ident issnl (optional) - total - in_web - in_kbart - preserved + total: count + in_web: count + in_kbart: count + is_preserved: count + preservation{} + "histogram" by preservation status + release_type{} + "histogram" by release type """ if not es_client: @@ -620,6 +626,8 @@ def get_elastic_preservation_by_year(query: ReleaseQuery) -> List[Dict[str, Any] Returns a list of dicts, sorted by year, with keys/values like: {year (int), bright (int), dark (int), shadows_only (int), none (int)} + + Stubs can be excluded by setting the appropriate query flag """ search = Search(using=app.es_client, index=app.config["ELASTICSEARCH_RELEASE_INDEX"]) @@ -640,6 +648,18 @@ def get_elastic_preservation_by_year(query: ReleaseQuery) -> List[Dict[str, Any] "term", container_id=query.container_id, ) + if query.exclude_stubs: + search = search.query( + "bool", + filter=[ + Q( + "bool", + must_not=[ + Q("match", release_type="stub"), + ], + ), + ], + ) search = search.filter( "range", release_year={ @@ -777,7 +797,7 @@ def get_elastic_preservation_by_date(query: ReleaseQuery) -> List[dict]: return sorted(date_dicts.values(), key=lambda x: x["date"]) -def get_elastic_container_preservation_by_volume(container_id: str) -> List[dict]: +def get_elastic_container_preservation_by_volume(query: ReleaseQuery) -> List[dict]: """ Fetches a stacked histogram of {volume, preservation}. @@ -787,8 +807,11 @@ def get_elastic_container_preservation_by_volume(container_id: str) -> List[dict Returns a list of dicts, sorted by volume, with keys/values like: {year (int), bright (int), dark (int), shadows_only (int), none (int)} + + Stubs can be excluded by setting the appropriate query flag """ + assert query.container_id is not None search = Search(using=app.es_client, index=app.config["ELASTICSEARCH_RELEASE_INDEX"]) search = search.query( "bool", @@ -796,12 +819,25 @@ def get_elastic_container_preservation_by_volume(container_id: str) -> List[dict Q( "bool", must=[ - Q("match", container_id=container_id), + Q("match", container_id=query.container_id), Q("exists", field="volume"), ], ), ], ) + if query.exclude_stubs: + search = search.query( + "bool", + filter=[ + Q( + "bool", + must_not=[ + Q("match", release_type="stub"), + ], + ), + ], + ) + search.aggs.bucket( "volume_preservation", "composite", |