aboutsummaryrefslogtreecommitdiffstats
path: root/fatcat_covid19
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-04-02 17:37:35 -0700
committerBryan Newbold <bnewbold@archive.org>2020-04-02 17:37:35 -0700
commit87f40d0f8d15f16020964773e75d35ea22da049f (patch)
treebb1bae721bc9249bea7a4f994201c0ef0b730962 /fatcat_covid19
parent7c5d51190cb8e58dea6231b48dc942fb35ef54a9 (diff)
downloadfatcat-covid19-87f40d0f8d15f16020964773e75d35ea22da049f.tar.gz
fatcat-covid19-87f40d0f8d15f16020964773e75d35ea22da049f.zip
basic fulltext search highlighting
Diffstat (limited to 'fatcat_covid19')
-rw-r--r--fatcat_covid19/search.py32
-rw-r--r--fatcat_covid19/templates/base.html4
-rw-r--r--fatcat_covid19/templates/entity_macros.html34
-rw-r--r--fatcat_covid19/templates/fulltext_search.html4
4 files changed, 58 insertions, 16 deletions
diff --git a/fatcat_covid19/search.py b/fatcat_covid19/search.py
index 2534374..08b6466 100644
--- a/fatcat_covid19/search.py
+++ b/fatcat_covid19/search.py
@@ -1,4 +1,5 @@
+import json
import datetime
import requests
from flask import abort, flash
@@ -32,7 +33,16 @@ def do_search(index, request, limit=30, offset=0, deep_page_limit=2000):
abort(resp.status_code)
content = resp.json()
- results = [h['_source'] for h in content['hits']['hits']]
+ #print(json.dumps(content, indent=2))
+ results = []
+ for h in content['hits']['hits']:
+ r = h['_source']
+ r['_highlights'] = []
+ highlights = h.get('highlight', {})
+ for k in highlights:
+ r['_highlights'] += highlights[k]
+ results.append(r)
+ print(json.dumps(results, indent=2))
for h in results:
# Handle surrogate strings that elasticsearch returns sometimes,
# probably due to mangled data processing in some pipeline.
@@ -66,7 +76,25 @@ def do_fulltext_search(q, limit=30, offset=0):
"default_operator": "AND",
"analyze_wildcard": True,
"lenient": True,
- "fields": ["everything"],
+ "fields": [
+ "everything",
+ "abstract",
+ "fulltext.body",
+ "fulltext.annex",
+ ],
+ },
+ },
+ "highlight" : {
+ "number_of_fragments" : 3,
+ "fragment_size" : 150,
+ "fields" : {
+ "abstract": { },
+ "fulltext.body": { },
+ "fulltext.annex": { },
+ #"everything": { "number_of_fragments" : 3 },
+ #"fulltext.abstract": { "number_of_fragments" : 3 },
+ #"fulltext.body": { "number_of_fragments" : 3 },
+ #"fulltext.annex": { "number_of_fragments" : 3 },
},
},
}
diff --git a/fatcat_covid19/templates/base.html b/fatcat_covid19/templates/base.html
index 07632dc..3193619 100644
--- a/fatcat_covid19/templates/base.html
+++ b/fatcat_covid19/templates/base.html
@@ -17,6 +17,10 @@
@media only screen and (max-width: 479px) {
.mobile-hide{ display: none !important; }
}
+ {# for fulltext search result highlighting #}
+ .search_highlights em {
+ background-color: #FFFFBB; {# yellow #}
+ }
</style>
{% block extra_head %}{% endblock %}
</head>
diff --git a/fatcat_covid19/templates/entity_macros.html b/fatcat_covid19/templates/entity_macros.html
index 66f0eb2..cc3a8a6 100644
--- a/fatcat_covid19/templates/entity_macros.html
+++ b/fatcat_covid19/templates/entity_macros.html
@@ -1,16 +1,7 @@
{% macro fulltext_search_result_row(paper) -%}
<div class="row">
-<div class="four wide right aligned column" style="padding: 1em;">
- {% if paper.fulltext.t_thumbnail_url %}
- <a href="{{ config.COVID19_FULLTEXT_HOST }}{{ paper.fulltext.pdf_url.replace('//', '/') }}">
- <img src="{{ config.COVID19_FULLTEXT_HOST }}{{ paper.fulltext.t_thumbnail_url.replace('//', '/') }}" style="border: 1px solid grey; max-height: 12em;">
- </a>
- {% else %}
- No Fulltext
- {% endif %}
-</div>
-<div class="twelve wide column">
+<div class="thirteen wide column">
{# ### TITLE ROW #}
<h4 style="margin-top: 1em; margin-bottom: 0px; font-size: 1.2em;">
@@ -41,14 +32,18 @@
<b style="text-transform: uppercase;">[{{ paper.release_type }}]</b>
{% endif %}
</h4>
+
+ {#
<div style="float: right; padding: 4px;">
&nbsp;&nbsp;<a href="{{ config.ELASTICSEARCH_BACKEND }}/{{ config.ELASTICSEARCH_FULLTEXT_INDEX }}/release/{{ paper.fatcat_ident }}" class="ui grey tag label"><i class="file icon"></i>json</a>
</div>
+
{% if paper.best_pdf_url %}
<div style="float: right; padding: 4px;">
&nbsp;&nbsp;<a href="{{ paper.best_pdf_url }}" class="ui violet tag label"><i class="file icon"></i>fulltext</a>
</div>
{% endif %}
+ #}
{# ### AUTHOR ROW #}
@@ -87,8 +82,14 @@
{% endif %}
{# ### ABSTRACT / QUERY HIGHLIGHT #}
- {% if paper.abstract %}
- <div style="padding: 1em;">
+ {% if paper._highlights %}
+ <div style="padding-top: 1em; padding-left: 0.5em;" class="search_highlights">
+ {% for highlight in paper._highlights %}
+ {{ highlight|safe }} ...
+ {% endfor %}
+ </div>
+ {% elif paper.abstract %}
+ <div style="padding-top: 1em; padding-left: 0.5em;">
{% if paper.abstract[0]|length > 500 %}
{{ paper.abstract[0][:500] }}...
{% else %}
@@ -115,6 +116,15 @@
<a href="/work/{{ paper.work_id }}"><i class="sitemap icon"></i> and 5 other versions of the same work!</a>
{% endif %}
</div>
+<div class="three wide left aligned column" style="padding: 1em;">
+ {% if paper.fulltext.thumbnail_url %}
+ <a href="{{ config.COVID19_FULLTEXT_HOST }}{{ paper.fulltext.pdf_url.replace('//', '/') }}">
+ <img src="{{ config.COVID19_FULLTEXT_HOST }}{{ paper.fulltext.thumbnail_url.replace('//', '/') }}" style="border: 1px solid grey; max-height: 12em; max-width: 100%;">
+ </a>
+ {% else %}
+ {# No Fulltext #}
+ {% endif %}
+</div>
</div>
{% endmacro %}
diff --git a/fatcat_covid19/templates/fulltext_search.html b/fatcat_covid19/templates/fulltext_search.html
index f1f2c6b..d2a8e4c 100644
--- a/fatcat_covid19/templates/fulltext_search.html
+++ b/fatcat_covid19/templates/fulltext_search.html
@@ -38,8 +38,8 @@
<div class="ui container text" style="padding: 1em;">
{{ entity_macros.top_results(found) }}
</div>
- <div class="ui container">
- <div class="ui centered grid">
+ <div class="ui container" style="max-width: 950px !important;">
+ <div class="ui centered stackable grid">
{% for paper in found.results %}
{{ entity_macros.fulltext_search_result_row(paper) }}
{% endfor %}