aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2022-02-03 21:09:44 -0800
committerBryan Newbold <bnewbold@robocracy.org>2022-02-03 21:09:47 -0800
commitd9d9c32528c168114c4b1a6afeedf21674322f69 (patch)
tree54ca03b7dccb7d59a3cfb1dce9afc56064288e3e /python
parent3224fe8c43adcf3e2f6f5536e11a1ea793175b72 (diff)
downloadfatcat-d9d9c32528c168114c4b1a6afeedf21674322f69.tar.gz
fatcat-d9d9c32528c168114c4b1a6afeedf21674322f69.zip
small changes to preservation coverage search queries
- allow fetching of by-release-type preservation histograms as JSON - query flag to exclude 'stub' entity types - don't include 'stub' entities in container-by-year or container-by-volume charts (and JSON)
Diffstat (limited to 'python')
-rw-r--r--python/fatcat_web/routes.py29
-rw-r--r--python/fatcat_web/search.py48
2 files changed, 67 insertions, 10 deletions
diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py
index f180e339..a7fb34bd 100644
--- a/python/fatcat_web/routes.py
+++ b/python/fatcat_web/routes.py
@@ -1183,7 +1183,7 @@ def container_ident_preservation_by_year_json(ident: str) -> AnyResponse:
container = api.get_container(ident)
except ApiException as ae:
abort(ae.status)
- query = ReleaseQuery(container_id=container.ident)
+ query = ReleaseQuery(container_id=container.ident, exclude_stubs=True)
try:
histogram = get_elastic_preservation_by_year(query)
except Exception as ae:
@@ -1201,7 +1201,7 @@ def container_ident_preservation_by_year_svg(ident: str) -> AnyResponse:
container = api.get_container(ident)
except ApiException as ae:
abort(ae.status)
- query = ReleaseQuery(container_id=container.ident)
+ query = ReleaseQuery(container_id=container.ident, exclude_stubs=True)
try:
histogram = get_elastic_preservation_by_year(query)
except Exception as ae:
@@ -1223,8 +1223,9 @@ def container_ident_preservation_by_volume_json(ident: str) -> AnyResponse:
container = api.get_container(ident)
except ApiException as ae:
abort(ae.status)
+ query = ReleaseQuery(container_id=container.ident, exclude_stubs=True)
try:
- histogram = get_elastic_container_preservation_by_volume(container.ident)
+ histogram = get_elastic_container_preservation_by_volume(query)
except Exception as ae:
app.log.error(ae)
abort(503)
@@ -1241,8 +1242,9 @@ def container_ident_preservation_by_volume_svg(ident: str) -> AnyResponse:
container = api.get_container(ident)
except ApiException as ae:
abort(ae.status)
+ query = ReleaseQuery(container_id=container.ident, exclude_stubs=True)
try:
- histogram = get_elastic_container_preservation_by_volume(container.ident)
+ histogram = get_elastic_container_preservation_by_volume(query)
except Exception as ae:
app.log.error(ae)
abort(503)
@@ -1252,6 +1254,25 @@ def container_ident_preservation_by_volume_svg(ident: str) -> AnyResponse:
).render_response()
+@app.route(
+ "/container/<string(length=26):ident>/preservation_by_type.json",
+ methods=["GET", "OPTIONS"],
+)
+@crossdomain(origin="*", headers=["access-control-allow-origin", "Content-Type"])
+def container_ident_preservation_by_type_json(ident: str) -> AnyResponse:
+ try:
+ container = api.get_container(ident)
+ except ApiException as ae:
+ abort(ae.status)
+ query = ReleaseQuery(container_id=container.ident)
+ try:
+ histogram = get_elastic_preservation_by_type(query)
+ except Exception as ae:
+ app.log.error(ae)
+ abort(503)
+ return jsonify({"container_id": ident, "histogram": histogram})
+
+
@app.route("/release/<string(length=26):ident>.bib", methods=["GET"])
def release_bibtex(ident: str) -> AnyResponse:
try:
diff --git a/python/fatcat_web/search.py b/python/fatcat_web/search.py
index b9994f28..ad27d77b 100644
--- a/python/fatcat_web/search.py
+++ b/python/fatcat_web/search.py
@@ -37,6 +37,7 @@ class ReleaseQuery:
fulltext_only: bool = False
container_id: Optional[str] = None
recent: bool = False
+ exclude_stubs: bool = False
@staticmethod
def from_args(args: Dict[str, Any]) -> "ReleaseQuery":
@@ -62,6 +63,7 @@ class ReleaseQuery:
fulltext_only=bool(args.get("fulltext_only")),
container_id=container_id,
recent=bool(args.get("recent")),
+ exclude_stubs=bool(args.get("exclude_stubs")),
)
@@ -466,10 +468,14 @@ def get_elastic_container_stats(
Returns dict:
ident
issnl (optional)
- total
- in_web
- in_kbart
- preserved
+ total: count
+ in_web: count
+ in_kbart: count
+ is_preserved: count
+ preservation{}
+ "histogram" by preservation status
+ release_type{}
+ "histogram" by release type
"""
if not es_client:
@@ -620,6 +626,8 @@ def get_elastic_preservation_by_year(query: ReleaseQuery) -> List[Dict[str, Any]
Returns a list of dicts, sorted by year, with keys/values like:
{year (int), bright (int), dark (int), shadows_only (int), none (int)}
+
+ Stubs can be excluded by setting the appropriate query flag
"""
search = Search(using=app.es_client, index=app.config["ELASTICSEARCH_RELEASE_INDEX"])
@@ -640,6 +648,18 @@ def get_elastic_preservation_by_year(query: ReleaseQuery) -> List[Dict[str, Any]
"term",
container_id=query.container_id,
)
+ if query.exclude_stubs:
+ search = search.query(
+ "bool",
+ filter=[
+ Q(
+ "bool",
+ must_not=[
+ Q("match", release_type="stub"),
+ ],
+ ),
+ ],
+ )
search = search.filter(
"range",
release_year={
@@ -777,7 +797,7 @@ def get_elastic_preservation_by_date(query: ReleaseQuery) -> List[dict]:
return sorted(date_dicts.values(), key=lambda x: x["date"])
-def get_elastic_container_preservation_by_volume(container_id: str) -> List[dict]:
+def get_elastic_container_preservation_by_volume(query: ReleaseQuery) -> List[dict]:
"""
Fetches a stacked histogram of {volume, preservation}.
@@ -787,8 +807,11 @@ def get_elastic_container_preservation_by_volume(container_id: str) -> List[dict
Returns a list of dicts, sorted by volume, with keys/values like:
{year (int), bright (int), dark (int), shadows_only (int), none (int)}
+
+ Stubs can be excluded by setting the appropriate query flag
"""
+ assert query.container_id is not None
search = Search(using=app.es_client, index=app.config["ELASTICSEARCH_RELEASE_INDEX"])
search = search.query(
"bool",
@@ -796,12 +819,25 @@ def get_elastic_container_preservation_by_volume(container_id: str) -> List[dict
Q(
"bool",
must=[
- Q("match", container_id=container_id),
+ Q("match", container_id=query.container_id),
Q("exists", field="volume"),
],
),
],
)
+ if query.exclude_stubs:
+ search = search.query(
+ "bool",
+ filter=[
+ Q(
+ "bool",
+ must_not=[
+ Q("match", release_type="stub"),
+ ],
+ ),
+ ],
+ )
+
search.aggs.bucket(
"volume_preservation",
"composite",