From 87f40d0f8d15f16020964773e75d35ea22da049f Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 2 Apr 2020 17:37:35 -0700 Subject: basic fulltext search highlighting --- fatcat_covid19/search.py | 32 +++++++++++++++++++++++-- fatcat_covid19/templates/base.html | 4 ++++ fatcat_covid19/templates/entity_macros.html | 34 +++++++++++++++++---------- fatcat_covid19/templates/fulltext_search.html | 4 ++-- 4 files changed, 58 insertions(+), 16 deletions(-) diff --git a/fatcat_covid19/search.py b/fatcat_covid19/search.py index 2534374..08b6466 100644 --- a/fatcat_covid19/search.py +++ b/fatcat_covid19/search.py @@ -1,4 +1,5 @@ +import json import datetime import requests from flask import abort, flash @@ -32,7 +33,16 @@ def do_search(index, request, limit=30, offset=0, deep_page_limit=2000): abort(resp.status_code) content = resp.json() - results = [h['_source'] for h in content['hits']['hits']] + #print(json.dumps(content, indent=2)) + results = [] + for h in content['hits']['hits']: + r = h['_source'] + r['_highlights'] = [] + highlights = h.get('highlight', {}) + for k in highlights: + r['_highlights'] += highlights[k] + results.append(r) + print(json.dumps(results, indent=2)) for h in results: # Handle surrogate strings that elasticsearch returns sometimes, # probably due to mangled data processing in some pipeline. @@ -66,7 +76,25 @@ def do_fulltext_search(q, limit=30, offset=0): "default_operator": "AND", "analyze_wildcard": True, "lenient": True, - "fields": ["everything"], + "fields": [ + "everything", + "abstract", + "fulltext.body", + "fulltext.annex", + ], + }, + }, + "highlight" : { + "number_of_fragments" : 3, + "fragment_size" : 150, + "fields" : { + "abstract": { }, + "fulltext.body": { }, + "fulltext.annex": { }, + #"everything": { "number_of_fragments" : 3 }, + #"fulltext.abstract": { "number_of_fragments" : 3 }, + #"fulltext.body": { "number_of_fragments" : 3 }, + #"fulltext.annex": { "number_of_fragments" : 3 }, }, }, } diff --git a/fatcat_covid19/templates/base.html b/fatcat_covid19/templates/base.html index 07632dc..3193619 100644 --- a/fatcat_covid19/templates/base.html +++ b/fatcat_covid19/templates/base.html @@ -17,6 +17,10 @@ @media only screen and (max-width: 479px) { .mobile-hide{ display: none !important; } } + {# for fulltext search result highlighting #} + .search_highlights em { + background-color: #FFFFBB; {# yellow #} + } {% block extra_head %}{% endblock %} diff --git a/fatcat_covid19/templates/entity_macros.html b/fatcat_covid19/templates/entity_macros.html index 66f0eb2..cc3a8a6 100644 --- a/fatcat_covid19/templates/entity_macros.html +++ b/fatcat_covid19/templates/entity_macros.html @@ -1,16 +1,7 @@ {% macro fulltext_search_result_row(paper) -%}
-
- {% if paper.fulltext.t_thumbnail_url %} - - - - {% else %} - No Fulltext - {% endif %} -
-
+
{# ### TITLE ROW #}

@@ -41,14 +32,18 @@ [{{ paper.release_type }}] {% endif %}

+ + {#
  json
+ {% if paper.best_pdf_url %}
  fulltext
{% endif %} + #} {# ### AUTHOR ROW #} @@ -87,8 +82,14 @@ {% endif %} {# ### ABSTRACT / QUERY HIGHLIGHT #} - {% if paper.abstract %} -
+ {% if paper._highlights %} +
+ {% for highlight in paper._highlights %} + {{ highlight|safe }} ... + {% endfor %} +
+ {% elif paper.abstract %} +
{% if paper.abstract[0]|length > 500 %} {{ paper.abstract[0][:500] }}... {% else %} @@ -115,6 +116,15 @@ and 5 other versions of the same work! {% endif %}
+
+ {% if paper.fulltext.thumbnail_url %} + + + + {% else %} + {# No Fulltext #} + {% endif %} +
{% endmacro %} diff --git a/fatcat_covid19/templates/fulltext_search.html b/fatcat_covid19/templates/fulltext_search.html index f1f2c6b..d2a8e4c 100644 --- a/fatcat_covid19/templates/fulltext_search.html +++ b/fatcat_covid19/templates/fulltext_search.html @@ -38,8 +38,8 @@
{{ entity_macros.top_results(found) }}
-
-
+
+
{% for paper in found.results %} {{ entity_macros.fulltext_search_result_row(paper) }} {% endfor %} -- cgit v1.2.3