diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-04-02 17:37:35 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-04-02 17:37:35 -0700 |
commit | 87f40d0f8d15f16020964773e75d35ea22da049f (patch) | |
tree | bb1bae721bc9249bea7a4f994201c0ef0b730962 /fatcat_covid19 | |
parent | 7c5d51190cb8e58dea6231b48dc942fb35ef54a9 (diff) | |
download | fatcat-covid19-87f40d0f8d15f16020964773e75d35ea22da049f.tar.gz fatcat-covid19-87f40d0f8d15f16020964773e75d35ea22da049f.zip |
basic fulltext search highlighting
Diffstat (limited to 'fatcat_covid19')
-rw-r--r-- | fatcat_covid19/search.py | 32 | ||||
-rw-r--r-- | fatcat_covid19/templates/base.html | 4 | ||||
-rw-r--r-- | fatcat_covid19/templates/entity_macros.html | 34 | ||||
-rw-r--r-- | fatcat_covid19/templates/fulltext_search.html | 4 |
4 files changed, 58 insertions, 16 deletions
diff --git a/fatcat_covid19/search.py b/fatcat_covid19/search.py index 2534374..08b6466 100644 --- a/fatcat_covid19/search.py +++ b/fatcat_covid19/search.py @@ -1,4 +1,5 @@ +import json import datetime import requests from flask import abort, flash @@ -32,7 +33,16 @@ def do_search(index, request, limit=30, offset=0, deep_page_limit=2000): abort(resp.status_code) content = resp.json() - results = [h['_source'] for h in content['hits']['hits']] + #print(json.dumps(content, indent=2)) + results = [] + for h in content['hits']['hits']: + r = h['_source'] + r['_highlights'] = [] + highlights = h.get('highlight', {}) + for k in highlights: + r['_highlights'] += highlights[k] + results.append(r) + print(json.dumps(results, indent=2)) for h in results: # Handle surrogate strings that elasticsearch returns sometimes, # probably due to mangled data processing in some pipeline. @@ -66,7 +76,25 @@ def do_fulltext_search(q, limit=30, offset=0): "default_operator": "AND", "analyze_wildcard": True, "lenient": True, - "fields": ["everything"], + "fields": [ + "everything", + "abstract", + "fulltext.body", + "fulltext.annex", + ], + }, + }, + "highlight" : { + "number_of_fragments" : 3, + "fragment_size" : 150, + "fields" : { + "abstract": { }, + "fulltext.body": { }, + "fulltext.annex": { }, + #"everything": { "number_of_fragments" : 3 }, + #"fulltext.abstract": { "number_of_fragments" : 3 }, + #"fulltext.body": { "number_of_fragments" : 3 }, + #"fulltext.annex": { "number_of_fragments" : 3 }, }, }, } diff --git a/fatcat_covid19/templates/base.html b/fatcat_covid19/templates/base.html index 07632dc..3193619 100644 --- a/fatcat_covid19/templates/base.html +++ b/fatcat_covid19/templates/base.html @@ -17,6 +17,10 @@ @media only screen and (max-width: 479px) { .mobile-hide{ display: none !important; } } + {# for fulltext search result highlighting #} + .search_highlights em { + background-color: #FFFFBB; {# yellow #} + } </style> {% block extra_head %}{% endblock %} </head> diff --git a/fatcat_covid19/templates/entity_macros.html b/fatcat_covid19/templates/entity_macros.html index 66f0eb2..cc3a8a6 100644 --- a/fatcat_covid19/templates/entity_macros.html +++ b/fatcat_covid19/templates/entity_macros.html @@ -1,16 +1,7 @@ {% macro fulltext_search_result_row(paper) -%} <div class="row"> -<div class="four wide right aligned column" style="padding: 1em;"> - {% if paper.fulltext.t_thumbnail_url %} - <a href="{{ config.COVID19_FULLTEXT_HOST }}{{ paper.fulltext.pdf_url.replace('//', '/') }}"> - <img src="{{ config.COVID19_FULLTEXT_HOST }}{{ paper.fulltext.t_thumbnail_url.replace('//', '/') }}" style="border: 1px solid grey; max-height: 12em;"> - </a> - {% else %} - No Fulltext - {% endif %} -</div> -<div class="twelve wide column"> +<div class="thirteen wide column"> {# ### TITLE ROW #} <h4 style="margin-top: 1em; margin-bottom: 0px; font-size: 1.2em;"> @@ -41,14 +32,18 @@ <b style="text-transform: uppercase;">[{{ paper.release_type }}]</b> {% endif %} </h4> + + {# <div style="float: right; padding: 4px;"> <a href="{{ config.ELASTICSEARCH_BACKEND }}/{{ config.ELASTICSEARCH_FULLTEXT_INDEX }}/release/{{ paper.fatcat_ident }}" class="ui grey tag label"><i class="file icon"></i>json</a> </div> + {% if paper.best_pdf_url %} <div style="float: right; padding: 4px;"> <a href="{{ paper.best_pdf_url }}" class="ui violet tag label"><i class="file icon"></i>fulltext</a> </div> {% endif %} + #} {# ### AUTHOR ROW #} @@ -87,8 +82,14 @@ {% endif %} {# ### ABSTRACT / QUERY HIGHLIGHT #} - {% if paper.abstract %} - <div style="padding: 1em;"> + {% if paper._highlights %} + <div style="padding-top: 1em; padding-left: 0.5em;" class="search_highlights"> + {% for highlight in paper._highlights %} + {{ highlight|safe }} ... + {% endfor %} + </div> + {% elif paper.abstract %} + <div style="padding-top: 1em; padding-left: 0.5em;"> {% if paper.abstract[0]|length > 500 %} {{ paper.abstract[0][:500] }}... {% else %} @@ -115,6 +116,15 @@ <a href="/work/{{ paper.work_id }}"><i class="sitemap icon"></i> and 5 other versions of the same work!</a> {% endif %} </div> +<div class="three wide left aligned column" style="padding: 1em;"> + {% if paper.fulltext.thumbnail_url %} + <a href="{{ config.COVID19_FULLTEXT_HOST }}{{ paper.fulltext.pdf_url.replace('//', '/') }}"> + <img src="{{ config.COVID19_FULLTEXT_HOST }}{{ paper.fulltext.thumbnail_url.replace('//', '/') }}" style="border: 1px solid grey; max-height: 12em; max-width: 100%;"> + </a> + {% else %} + {# No Fulltext #} + {% endif %} +</div> </div> {% endmacro %} diff --git a/fatcat_covid19/templates/fulltext_search.html b/fatcat_covid19/templates/fulltext_search.html index f1f2c6b..d2a8e4c 100644 --- a/fatcat_covid19/templates/fulltext_search.html +++ b/fatcat_covid19/templates/fulltext_search.html @@ -38,8 +38,8 @@ <div class="ui container text" style="padding: 1em;"> {{ entity_macros.top_results(found) }} </div> - <div class="ui container"> - <div class="ui centered grid"> + <div class="ui container" style="max-width: 950px !important;"> + <div class="ui centered stackable grid"> {% for paper in found.results %} {{ entity_macros.fulltext_search_result_row(paper) }} {% endfor %} |