diff options
| author | Bryan Newbold <bnewbold@archive.org> | 2020-04-02 17:37:35 -0700 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@archive.org> | 2020-04-02 17:37:35 -0700 | 
| commit | 87f40d0f8d15f16020964773e75d35ea22da049f (patch) | |
| tree | bb1bae721bc9249bea7a4f994201c0ef0b730962 | |
| parent | 7c5d51190cb8e58dea6231b48dc942fb35ef54a9 (diff) | |
| download | fatcat-covid19-87f40d0f8d15f16020964773e75d35ea22da049f.tar.gz fatcat-covid19-87f40d0f8d15f16020964773e75d35ea22da049f.zip  | |
basic fulltext search highlighting
| -rw-r--r-- | fatcat_covid19/search.py | 32 | ||||
| -rw-r--r-- | fatcat_covid19/templates/base.html | 4 | ||||
| -rw-r--r-- | fatcat_covid19/templates/entity_macros.html | 34 | ||||
| -rw-r--r-- | fatcat_covid19/templates/fulltext_search.html | 4 | 
4 files changed, 58 insertions, 16 deletions
diff --git a/fatcat_covid19/search.py b/fatcat_covid19/search.py index 2534374..08b6466 100644 --- a/fatcat_covid19/search.py +++ b/fatcat_covid19/search.py @@ -1,4 +1,5 @@ +import json  import datetime  import requests  from flask import abort, flash @@ -32,7 +33,16 @@ def do_search(index, request, limit=30, offset=0, deep_page_limit=2000):          abort(resp.status_code)      content = resp.json() -    results = [h['_source'] for h in content['hits']['hits']] +    #print(json.dumps(content, indent=2)) +    results = [] +    for h in content['hits']['hits']: +        r = h['_source'] +        r['_highlights'] = [] +        highlights = h.get('highlight', {}) +        for k in highlights: +            r['_highlights'] += highlights[k] +        results.append(r) +    print(json.dumps(results, indent=2))      for h in results:          # Handle surrogate strings that elasticsearch returns sometimes,          # probably due to mangled data processing in some pipeline. @@ -66,7 +76,25 @@ def do_fulltext_search(q, limit=30, offset=0):                  "default_operator": "AND",                  "analyze_wildcard": True,                  "lenient": True, -                "fields": ["everything"], +                "fields": [ +                    "everything", +                    "abstract", +                    "fulltext.body", +                    "fulltext.annex", +                ], +            }, +        }, +        "highlight" : { +            "number_of_fragments" : 3, +            "fragment_size" : 150, +            "fields" : { +                "abstract": { }, +                "fulltext.body": { }, +                "fulltext.annex": { }, +                #"everything": { "number_of_fragments" : 3 }, +                #"fulltext.abstract": { "number_of_fragments" : 3 }, +                #"fulltext.body":     { "number_of_fragments" : 3 }, +                #"fulltext.annex":    { "number_of_fragments" : 3 },              },          },      } diff --git a/fatcat_covid19/templates/base.html b/fatcat_covid19/templates/base.html index 07632dc..3193619 100644 --- a/fatcat_covid19/templates/base.html +++ b/fatcat_covid19/templates/base.html @@ -17,6 +17,10 @@      @media only screen and (max-width: 479px) {        .mobile-hide{ display: none !important; }      } +    {# for fulltext search result highlighting #} +    .search_highlights em { +      background-color: #FFFFBB; {# yellow #} +    }    </style>    {% block extra_head %}{% endblock %}  </head> diff --git a/fatcat_covid19/templates/entity_macros.html b/fatcat_covid19/templates/entity_macros.html index 66f0eb2..cc3a8a6 100644 --- a/fatcat_covid19/templates/entity_macros.html +++ b/fatcat_covid19/templates/entity_macros.html @@ -1,16 +1,7 @@  {% macro fulltext_search_result_row(paper) -%}  <div class="row"> -<div class="four wide right aligned column" style="padding: 1em;"> -  {% if paper.fulltext.t_thumbnail_url %} -    <a href="{{ config.COVID19_FULLTEXT_HOST }}{{ paper.fulltext.pdf_url.replace('//', '/') }}"> -      <img src="{{ config.COVID19_FULLTEXT_HOST }}{{ paper.fulltext.t_thumbnail_url.replace('//', '/') }}" style="border: 1px solid grey; max-height: 12em;"> -    </a> -  {% else %} -    No Fulltext -  {% endif %} -</div> -<div class="twelve wide column"> +<div class="thirteen wide column">    {# ### TITLE ROW #}    <h4 style="margin-top: 1em; margin-bottom: 0px; font-size: 1.2em;"> @@ -41,14 +32,18 @@        <b style="text-transform: uppercase;">[{{ paper.release_type }}]</b>      {% endif %}    </h4> + +  {#    <div style="float: right; padding: 4px;">      <a href="{{ config.ELASTICSEARCH_BACKEND }}/{{ config.ELASTICSEARCH_FULLTEXT_INDEX }}/release/{{ paper.fatcat_ident }}" class="ui grey tag label"><i class="file icon"></i>json</a>    </div> +    {% if paper.best_pdf_url %}    <div style="float: right; padding: 4px;">      <a href="{{ paper.best_pdf_url }}" class="ui violet tag label"><i class="file icon"></i>fulltext</a>    </div>    {% endif %} +  #}    {# ### AUTHOR ROW #} @@ -87,8 +82,14 @@    {% endif %}    {# ### ABSTRACT / QUERY HIGHLIGHT #} -  {% if paper.abstract %} -    <div style="padding: 1em;"> +  {% if paper._highlights %} +    <div style="padding-top: 1em; padding-left: 0.5em;" class="search_highlights"> +    {% for highlight in paper._highlights %} +      {{ highlight|safe }} ... +    {% endfor %} +    </div> +  {% elif paper.abstract %} +    <div style="padding-top: 1em; padding-left: 0.5em;">        {% if paper.abstract[0]|length > 500 %}          {{ paper.abstract[0][:500] }}...        {% else %} @@ -115,6 +116,15 @@    <a href="/work/{{ paper.work_id }}"><i class="sitemap icon"></i> and 5 other versions of the same work!</a>    {% endif %}  </div> +<div class="three wide left aligned column" style="padding: 1em;"> +  {% if paper.fulltext.thumbnail_url %} +    <a href="{{ config.COVID19_FULLTEXT_HOST }}{{ paper.fulltext.pdf_url.replace('//', '/') }}"> +      <img src="{{ config.COVID19_FULLTEXT_HOST }}{{ paper.fulltext.thumbnail_url.replace('//', '/') }}" style="border: 1px solid grey; max-height: 12em; max-width: 100%;"> +    </a> +  {% else %} +    {# No Fulltext #} +  {% endif %} +</div>  </div>  {% endmacro %} diff --git a/fatcat_covid19/templates/fulltext_search.html b/fatcat_covid19/templates/fulltext_search.html index f1f2c6b..d2a8e4c 100644 --- a/fatcat_covid19/templates/fulltext_search.html +++ b/fatcat_covid19/templates/fulltext_search.html @@ -38,8 +38,8 @@      <div class="ui container text" style="padding: 1em;">        {{ entity_macros.top_results(found) }}      </div> -    <div class="ui container"> -      <div class="ui centered grid"> +    <div class="ui container" style="max-width: 950px !important;"> +      <div class="ui centered stackable grid">          {% for paper in found.results %}            {{ entity_macros.fulltext_search_result_row(paper) }}          {% endfor %}  | 
