From f4da02fae5ebd179a0b3af4ff179543813fa146b Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 4 Sep 2019 12:04:08 -0700 Subject: start container coverage page --- python/fatcat_web/routes.py | 19 ++++++ python/fatcat_web/search.py | 68 +++++++++++++++++++++- .../templates/container_view_coverage.html | 15 +++++ python/fatcat_web/templates/entity_base.html | 4 +- 4 files changed, 104 insertions(+), 2 deletions(-) create mode 100644 python/fatcat_web/templates/container_view_coverage.html diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py index f313fce0..3f5af621 100644 --- a/python/fatcat_web/routes.py +++ b/python/fatcat_web/routes.py @@ -230,6 +230,10 @@ def generic_editgroup_entity_view(editgroup_id, entity_type, ident, view_templat def container_view(ident): return generic_entity_view('container', ident, 'container_view.html') +@app.route('/container//coverage', methods=['GET']) +def container_view_coverage(ident): + return generic_entity_view('container', ident, 'container_view_coverage.html') + @app.route('/container//metadata', methods=['GET']) def container_view_metadata(ident): return generic_entity_view('container', ident, 'entity_view_metadata.html') @@ -718,6 +722,21 @@ def container_ident_stats(ident): abort(503) return jsonify(stats) +@app.route('/container//ia_coverage_years.json', methods=['GET', 'OPTIONS']) +@crossdomain(origin='*',headers=['access-control-allow-origin','Content-Type']) +def container_ident_ia_coverage_years_json(ident): + try: + container = api.get_container(ident) + except ApiException as ae: + abort(ae.status) + try: + histogram = get_elastic_container_histogram(container.ident) + except Exception as ae: + app.log.error(ae) + abort(503) + histogram = [dict(year=h[0], in_ia=h[1], count=h[2]) for h in histogram] + return jsonify({'container_id': ident, "histogram": histogram}) + @app.route('/release/.bib', methods=['GET']) def release_bibtex(ident): try: diff --git a/python/fatcat_web/search.py b/python/fatcat_web/search.py index 94246329..523269ce 100644 --- a/python/fatcat_web/search.py +++ b/python/fatcat_web/search.py @@ -274,7 +274,7 @@ def get_elastic_container_random_releases(ident, limit=5): #print(resp.json()) resp.raise_for_status() resp = resp.json() - print(resp) + #print(resp) hits = [h['_source'] for h in resp['hits']['hits']] for h in hits: # Handle surrogate strings that elasticsearch returns sometimes, @@ -285,3 +285,69 @@ def get_elastic_container_random_releases(ident, limit=5): h[key] = h[key].encode('utf8', 'ignore').decode('utf8') return hits + +def get_elastic_container_histogram(ident): + """ + Fetches a stacked histogram of + + Filters to the past 500 years (at most), or about 1000 vaules. + + Returns a list of tuples: + (year, in_ia, count) + """ + + query = { + "aggs": { + "year_in_ia": { + "composite": { + "size": 1000, + "sources": [ + {"year": { + "histogram": { + "field": "release_year", + "interval": 1, + }}}, + {"in_ia": { + "terms": { + "field": "in_ia", + }}}, + ], + }, + }, + }, + "size": 0, + "query": { + "bool": { + "must": [{ + "range": { + "release_year": { + "gte": datetime.datetime.today().year - 499, + "lte": datetime.datetime.today().year, + } + } + }], + "filter": [{ + "bool": { + "should": [{ + "match": { + "container_id": ident + } + }], + "minimum_should_match": 1, + }, + }], + } + } + } + resp = requests.get( + "{}/fatcat_release/_search".format(app.config['ELASTICSEARCH_BACKEND']), + json=query, + params=dict(request_cache="true")) + resp.raise_for_status() + # TODO: abort() + resp = resp.json() + print(resp) + vals = [(h['key']['year'], h['key']['in_ia'], h['doc_count']) + for h in resp['aggregations']['year_in_ia']['buckets']] + vals = sorted(vals) + return vals diff --git a/python/fatcat_web/templates/container_view_coverage.html b/python/fatcat_web/templates/container_view_coverage.html new file mode 100644 index 00000000..eb9dba8a --- /dev/null +++ b/python/fatcat_web/templates/container_view_coverage.html @@ -0,0 +1,15 @@ +{% set container = entity %} +{% set entity_view = "coverage" %} +{% set entity_type = "container" %} +{% import "entity_macros.html" as entity_macros %} +{% extends "entity_base.html" %} + +{% block entity_main %} + +

Preservation Coverage By Year

+ + +
Download as JSON + +{% endblock %} + diff --git a/python/fatcat_web/templates/entity_base.html b/python/fatcat_web/templates/entity_base.html index bba95d9d..48f51ec6 100644 --- a/python/fatcat_web/templates/entity_base.html +++ b/python/fatcat_web/templates/entity_base.html @@ -78,7 +78,9 @@