aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2020-07-24 20:02:05 -0700
committerBryan Newbold <bnewbold@robocracy.org>2020-07-30 18:28:03 -0700
commit5bbb493b94a63d66151a53837aa66f0d986df497 (patch)
treef0dd843415efbf9da6c855e6c2fe504e0ba7e075
parent19094b9994a1fc45450a96989ed41910d849c6d7 (diff)
downloadfatcat-5bbb493b94a63d66151a53837aa66f0d986df497.tar.gz
fatcat-5bbb493b94a63d66151a53837aa66f0d986df497.zip
first iteration of flexible search coverage
-rw-r--r--python/fatcat_web/routes.py29
-rw-r--r--python/fatcat_web/search.py36
-rw-r--r--python/fatcat_web/templates/container_view.html2
-rw-r--r--python/fatcat_web/templates/container_view_coverage.html2
-rw-r--r--python/fatcat_web/templates/coverage_search.html81
5 files changed, 148 insertions, 2 deletions
diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py
index 45f6b0b6..a741112f 100644
--- a/python/fatcat_web/routes.py
+++ b/python/fatcat_web/routes.py
@@ -732,6 +732,35 @@ def container_search():
return render_template('container_search.html', query=query, es_error=fse), fse.status_code
return render_template('container_search.html', query=query, found=found)
+@app.route('/coverage/search', methods=['GET', 'POST'])
+def coverage_search():
+
+ if 'q' not in request.args.keys():
+ return render_template(
+ 'coverage_search.html',
+ query=ReleaseQuery(),
+ coverage_stats=None,
+ coverage_type_preservation=None,
+ year_histogram_svg=None,
+ )
+
+ query = ReleaseQuery.from_args(request.args)
+ coverage_stats = get_elastic_search_coverage(query)
+ if coverage_stats['total'] > 1:
+ year_histogram = get_elastic_preservation_by_year(query)
+ year_histogram_svg = preservation_by_year_histogram(year_histogram).render_data_uri()
+ coverage_type_preservation = get_elastic_preservation_by_type(query)
+ else:
+ year_histogram_svg = None
+ coverage_type_preservation = None
+ return render_template(
+ 'coverage_search.html',
+ query=query,
+ coverage_stats=coverage_stats,
+ coverage_type_preservation=coverage_type_preservation,
+ year_histogram_svg=year_histogram_svg,
+ )
+
def get_changelog_stats():
stats = {}
latest_changelog = api.get_changelog(limit=1)[0]
diff --git a/python/fatcat_web/search.py b/python/fatcat_web/search.py
index 7ae7e73b..3ba6fdb2 100644
--- a/python/fatcat_web/search.py
+++ b/python/fatcat_web/search.py
@@ -366,6 +366,42 @@ def get_elastic_entity_stats() -> dict:
return stats
+def get_elastic_search_coverage(query: ReleaseQuery) -> dict:
+
+ search = Search(using=app.es_client, index=app.config['ELASTICSEARCH_RELEASE_INDEX'])
+ search = search.query(
+ "query_string",
+ query=query.q,
+ default_operator="AND",
+ analyze_wildcard=True,
+ allow_leading_wildcard=False,
+ lenient=True,
+ fields=["biblio"],
+ )
+ search.aggs.bucket(
+ 'preservation',
+ 'terms',
+ field='preservation',
+ missing='_unknown',
+ )
+
+ search = search[:0]
+
+ search = search.params(request_cache=True)
+ resp = wrap_es_execution(search)
+
+ preservation_bucket = agg_to_dict(resp.aggregations.preservation)
+ preservation_bucket['total'] = resp.hits.total
+ for k in ('bright', 'dark', 'shadows_only', 'none'):
+ if not k in preservation_bucket:
+ preservation_bucket[k] = 0
+ stats = {
+ 'total': resp.hits.total,
+ 'preservation': preservation_bucket,
+ }
+
+ return stats
+
def get_elastic_container_stats(ident, issnl=None):
"""
Returns dict:
diff --git a/python/fatcat_web/templates/container_view.html b/python/fatcat_web/templates/container_view.html
index 785ad1ee..8b4ddeb9 100644
--- a/python/fatcat_web/templates/container_view.html
+++ b/python/fatcat_web/templates/container_view.html
@@ -86,7 +86,7 @@
{% else %}
<code>{{ type_row }}</code>
{% endif %}
- <td class="three wide right aligned">{{ container._stats.release_type[type_row] }}
+ <td class="three wide right aligned">{{ "{:,}".format(container._stats.release_type[type_row]) }}
{% endfor %}
</tbody>
</table>
diff --git a/python/fatcat_web/templates/container_view_coverage.html b/python/fatcat_web/templates/container_view_coverage.html
index fd173cd4..5ec31e73 100644
--- a/python/fatcat_web/templates/container_view_coverage.html
+++ b/python/fatcat_web/templates/container_view_coverage.html
@@ -57,7 +57,7 @@
{% for type_row in container._type_preservation %}
<tr>
<td class="two wide">{{ type_row.release_type }}
- <td class="two wide right aligned">{{ type_row.total }}
+ <td class="two wide right aligned">{{ "{:,}".format(type_row.total) }}
<td class="twelve wide">{{ entity_macros.preservation_bar(type_row) }}
{% endfor %}
</tbody>
diff --git a/python/fatcat_web/templates/coverage_search.html b/python/fatcat_web/templates/coverage_search.html
new file mode 100644
index 00000000..c730ef9d
--- /dev/null
+++ b/python/fatcat_web/templates/coverage_search.html
@@ -0,0 +1,81 @@
+{% import "entity_macros.html" as entity_macros %}
+{% extends "base.html" %}
+
+{% block title %}
+{% if query.q %}
+ Search: {{ query.q }}
+{% else %}
+ Coverage Search
+{% endif %}
+{% endblock %}
+
+{% block fullmain %}
+
+<div class="ui vertical stripe segment" style="background-color: #EEE; padding-top: 4.5em;">
+ <div class="ui container text">
+ <h1>Preservation Coverage Visualizer</h1>
+ <form class="" role="search" action="/coverage/search" method="get">
+ <div class="ui form">
+ <div class="ui action input huge fluid">
+ <input type="text" placeholder="Query..." name="q" value="{% if query.q %}{{ query.q }}{% endif %}" aria-label="visualize preservation coverage">
+ <button class="ui primary button">Search</button>
+ </div>
+ <br>Can also search for <b><a href="/release/search?q={{ query.q or "" }}">releases</a></b> (eg, individual papers) or <b><a href="/container/search?q={{ query.q or "" }}">containers</a></b> (eg, journals).
+ </div>
+ </form>
+ </div>
+</div>
+
+<div class="ui container" style="margin-top: 2em;">
+
+{% if coverage_stats != None %}
+<div class="ui centered grid">
+ <div class="row">
+ <div class="twelve wide column">
+ <div class="ui large horizontal statistic">
+ <div class="value">{{ "{:,}".format(coverage_stats.total) }}</div>
+ <div class="label" style="text-transform: none;">Known Releases</div>
+ </div>
+ {% if coverage_stats.total >= 1 %}
+ {{ entity_macros.preservation_bar(coverage_stats.preservation, extra_class="large") }}
+ {{ entity_macros.preservation_table(coverage_stats.preservation) }}
+ {% endif %}
+ </div>
+ </div>
+</div>
+{% endif %}
+
+{% if year_histogram_svg != None %}
+ <br><br>
+ <h2>Perpetual Access Coverage by Year</h2>
+
+ <figure style="margin: 0 0 0 0;">
+ <embed type="image/svg+xml" src="{{ year_histogram_svg|safe }}" />
+ </figure>
+
+{% endif %}
+
+{% if coverage_type_preservation != None %}
+ <br><br>
+ <h2>Perpetual Access Coverage by Release Type</h2>
+ <table class="ui table">
+ <thead>
+ <tr>
+ <th>Release Type
+ <th class="right aligned">Total Count
+ <th>Coverage
+ </thead>
+ <tbody>
+ {% for type_row in coverage_type_preservation %}
+ <tr>
+ <td class="two wide">{{ type_row.release_type }}
+ <td class="two wide right aligned">{{ "{:,}".format(type_row.total) }}
+ <td class="twelve wide">{{ entity_macros.preservation_bar(type_row) }}
+ {% endfor %}
+ </tbody>
+ </table>
+{% endif %}
+
+</div>
+{% endblock %}
+