diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-06-04 13:18:35 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-06-04 13:18:35 -0700 |
commit | 198db52d3a93a2b7d7cab0a4140c6402a14eca84 (patch) | |
tree | b34d79b605c0a79e0f875f5b0bd3944e72381cd1 | |
parent | 35ff62b6383ba07f9549edbb652f04fa69fb046c (diff) | |
download | fatcat-scholar-198db52d3a93a2b7d7cab0a4140c6402a14eca84.tar.gz fatcat-scholar-198db52d3a93a2b7d7cab0a4140c6402a14eca84.zip |
collapse pages by SIM issue
-rw-r--r-- | fatcat_scholar/schema.py | 1 | ||||
-rw-r--r-- | fatcat_scholar/search.py | 28 | ||||
-rw-r--r-- | fatcat_scholar/templates/search.html | 2 | ||||
-rw-r--r-- | fatcat_scholar/templates/search_macros.html | 39 | ||||
-rw-r--r-- | fatcat_scholar/transform.py | 3 | ||||
-rw-r--r-- | proposals/work_schema.md | 3 | ||||
-rw-r--r-- | schema/scholar_fulltext.v01.json | 1 |
7 files changed, 57 insertions, 20 deletions
diff --git a/fatcat_scholar/schema.py b/fatcat_scholar/schema.py index c5f2927..1d2e7a3 100644 --- a/fatcat_scholar/schema.py +++ b/fatcat_scholar/schema.py @@ -165,6 +165,7 @@ class ScholarDoc(BaseModel): key: str doc_type: str # enum: work or page doc_index_ts: datetime.datetime + collapse_key: str work_ident: Optional[str] tags: List[str] = [] diff --git a/fatcat_scholar/search.py b/fatcat_scholar/search.py index bfc7c6e..ce06fb7 100644 --- a/fatcat_scholar/search.py +++ b/fatcat_scholar/search.py @@ -28,6 +28,7 @@ class FulltextQuery(BaseModel): filter_type: Optional[str] = None filter_availability: Optional[str] = None sort_order: Optional[str] = None + collapse_pages: bool = True time_options: Any = { "label": gettext("Release Date"), "slug": "filter_time", @@ -196,6 +197,13 @@ def do_fulltext_search( number_of_fragments=2, fragment_size=300, ) + if query.collapse_pages: + search = search.extra( + collapse={ + "field": "collapse_key", + "inner_hits": {"name": "more_pages", "size": 0,}, + } + ) # sort order if query.sort_order == "time_asc": @@ -234,12 +242,19 @@ def do_fulltext_search( results = [] for h in resp: r = h._d_ - # print(json.dumps(h.meta._d_, indent=2)) + # print(h.meta._d_) r["_highlights"] = [] if "highlight" in dir(h.meta): highlights = h.meta.highlight._d_ for k in highlights: r["_highlights"] += highlights[k] + r["_collapsed"] = [] + r["_collapsed_count"] = 0 + if "inner_hits" in dir(h.meta): + r["_collapsed_count"] = h.meta.inner_hits.more_pages.hits.total - 1 + for k in h.meta.inner_hits.more_pages: + if k["key"] != r["key"]: + r["_collapsed"].append(k) results.append(r) for h in results: @@ -250,9 +265,16 @@ def do_fulltext_search( if type(h[key]) is str: h[key] = h[key].encode("utf8", "ignore").decode("utf8") + count_found: int = int(resp.hits.total) + count_returned = len(results) + + # if we grouped to less than a page of hits, update returned count + if query.collapse_pages and offset == 0 and (count_returned < limit): + count_found = count_returned + return FulltextHits( - count_returned=len(results), - count_found=int(resp.hits.total), + count_returned=count_returned, + count_found=count_found, offset=offset, limit=limit, deep_page_limit=deep_page_limit, diff --git a/fatcat_scholar/templates/search.html b/fatcat_scholar/templates/search.html index 1f7a9b9..13d1aec 100644 --- a/fatcat_scholar/templates/search.html +++ b/fatcat_scholar/templates/search.html @@ -21,7 +21,6 @@ </details> {% if hits %} - {# <h2>{{ "{:,}".format(hits.count_found) }}</h2> #} <span style="font-size: 1.5em;">{{ "{:,}".format(hits.count_found) }}</span> Hits <span style="color: rgba(0,0,0,0.4);">in {{ "{:0.2}".format(hits.query_time_ms/1000.0) }}sec</span> @@ -35,7 +34,6 @@ <div class="ui tablet-hide two wide column"> {% if hits %} <div style="width: 100%; text-align: right;"> - {# <h2>{{ "{:,}".format(hits.count_found) }}</h2> #} <h3 style="font-size: {% if hits.count_found >= 10000000 %}1.0em{% elif hits.count_found >= 1000 %}1.5em{% else %}2.0em{% endif %};">{{ "{:,}".format(hits.count_found) }}</h3> Hits </div> diff --git a/fatcat_scholar/templates/search_macros.html b/fatcat_scholar/templates/search_macros.html index 07a4510..abdb6d3 100644 --- a/fatcat_scholar/templates/search_macros.html +++ b/fatcat_scholar/templates/search_macros.html @@ -67,7 +67,7 @@ {{ paper.biblio.release_year }} {% endif %} {% if paper.biblio.release_year and paper.biblio.container_name %} - + {% endif %} {% if paper.biblio.container_name %} <i> @@ -130,22 +130,33 @@ {# ### TAGS #} <div style="margin-top: 0.2em;"> - {# colors to use: olive, brown, grey, pink, red, etc #} - {# TODO: remove doc for ES 7.x-style lack of type #} - {# TODO: only show 'json' link if from cluster? #} - <a target="_blank" href="{{ settings.ELASTICSEARCH_BACKEND }}/{{ settings.ELASTICSEARCH_FULLTEXT_INDEX }}/_doc/{{ paper.key }}"> - <span class="ui label small">json</span> - </a> - {% if paper.biblio.release_ident %} - <a target="_blank" href="https://fatcat.wiki/release/{{ paper.biblio.release_ident }}"> - <span class="ui label small">metadata</span> + {# colors to use: olive, brown, grey, pink, red, etc #} + {# TODO: remove doc for ES 7.x-style lack of type #} + {# TODO: only show 'json' link if from cluster? #} + <a target="_blank" href="{{ settings.ELASTICSEARCH_BACKEND }}/{{ settings.ELASTICSEARCH_FULLTEXT_INDEX }}/_doc/{{ paper.key }}"> + <span class="ui label small">json</span> </a> - {% endif %} - {% for tag in paper.tags %} - <span class="ui label small">{{ _(tag) }}</span> - {% endfor %} + {% if paper.biblio.release_ident %} + <a target="_blank" href="https://fatcat.wiki/release/{{ paper.biblio.release_ident }}"> + <span class="ui label small">metadata</span> + </a> + {% endif %} + + {% for tag in paper.tags %} + <span class="ui label small">{{ _(tag) }}</span> + {% endfor %} </div> + {# ### COLLAPSED HITS #} + {% if paper._collapsed_count > 0 %} + <div style="padding-top: 0.5em;"> + <button class="ui compact basic blue button" form="search_form" type="submit" name="collapse_pages" value="false"> + <i class="ui icon zoom-in"></i> + Show {{ paper._collapsed_count }} additional hits from this issue + </button> + </div> + {% endif %} + </div> <div class="three wide left aligned column" style="padding-top: 0.5em; padding-right: 0.5em;"> {% if paper.fulltext.access_url %} diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py index 3d47fb4..847cc6e 100644 --- a/fatcat_scholar/transform.py +++ b/fatcat_scholar/transform.py @@ -187,6 +187,7 @@ def transform_heavy(heavy: IntermediateBundle) -> Optional[ScholarDoc]: tags: List[str] = [] work_ident: Optional[str] = None + sim_issue: Optional[str] = None abstracts: List[ScholarAbstract] = [] fulltext: Optional[ScholarFulltext] = None primary_release: Optional[ReleaseEntity] = None @@ -199,6 +200,7 @@ def transform_heavy(heavy: IntermediateBundle) -> Optional[ScholarDoc]: if heavy.doc_type == DocType.sim_page: assert ia_sim is not None key = f"page_{ia_sim.issue_item}_{ia_sim.first_page}" + sim_issue = ia_sim.issue_item biblio = es_biblio_from_sim(heavy.sim_fulltext) fulltext = es_fulltext_from_sim(heavy.sim_fulltext) elif heavy.doc_type == DocType.work: @@ -316,6 +318,7 @@ def transform_heavy(heavy: IntermediateBundle) -> Optional[ScholarDoc]: return ScholarDoc( key=key, + collapse_key=sim_issue or work_ident, doc_type=heavy.doc_type.value, doc_index_ts=datetime.datetime.utcnow(), work_ident=work_ident, diff --git a/proposals/work_schema.md b/proposals/work_schema.md index 933e750..97d60ac 100644 --- a/proposals/work_schema.md +++ b/proposals/work_schema.md @@ -3,9 +3,10 @@ - type: `_doc` (aka, no type, `include_type_name=false`) - key: keyword (same as `_id`) +- `collapse_key`: work ident, or SIM issue item (for collapsing/grouping search hits) - `doc_type`: keyword (work or page) - `doc_index_ts`: timestamp when document indexed -- `work_id`: fatcat work ident (optional) +- `work_ident`: fatcat work ident (optional) - `biblio`: obj - `fulltext`: obj diff --git a/schema/scholar_fulltext.v01.json b/schema/scholar_fulltext.v01.json index 7653633..1929512 100644 --- a/schema/scholar_fulltext.v01.json +++ b/schema/scholar_fulltext.v01.json @@ -63,6 +63,7 @@ "properties": { "key": { "type": "keyword", "normalizer": "default", "doc_values": false }, + "collapse_key": { "type": "keyword", "normalizer": "default" }, "doc_type": { "type": "keyword", "normalizer": "default" }, "doc_index_ts": { "type": "date" }, "work_ident": { "type": "keyword", "normalizer": "default", "doc_values": false }, |