aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-06-04 13:18:35 -0700
committerBryan Newbold <bnewbold@archive.org>2020-06-04 13:18:35 -0700
commit198db52d3a93a2b7d7cab0a4140c6402a14eca84 (patch)
treeb34d79b605c0a79e0f875f5b0bd3944e72381cd1
parent35ff62b6383ba07f9549edbb652f04fa69fb046c (diff)
downloadfatcat-scholar-198db52d3a93a2b7d7cab0a4140c6402a14eca84.tar.gz
fatcat-scholar-198db52d3a93a2b7d7cab0a4140c6402a14eca84.zip
collapse pages by SIM issue
-rw-r--r--fatcat_scholar/schema.py1
-rw-r--r--fatcat_scholar/search.py28
-rw-r--r--fatcat_scholar/templates/search.html2
-rw-r--r--fatcat_scholar/templates/search_macros.html39
-rw-r--r--fatcat_scholar/transform.py3
-rw-r--r--proposals/work_schema.md3
-rw-r--r--schema/scholar_fulltext.v01.json1
7 files changed, 57 insertions, 20 deletions
diff --git a/fatcat_scholar/schema.py b/fatcat_scholar/schema.py
index c5f2927..1d2e7a3 100644
--- a/fatcat_scholar/schema.py
+++ b/fatcat_scholar/schema.py
@@ -165,6 +165,7 @@ class ScholarDoc(BaseModel):
key: str
doc_type: str # enum: work or page
doc_index_ts: datetime.datetime
+ collapse_key: str
work_ident: Optional[str]
tags: List[str] = []
diff --git a/fatcat_scholar/search.py b/fatcat_scholar/search.py
index bfc7c6e..ce06fb7 100644
--- a/fatcat_scholar/search.py
+++ b/fatcat_scholar/search.py
@@ -28,6 +28,7 @@ class FulltextQuery(BaseModel):
filter_type: Optional[str] = None
filter_availability: Optional[str] = None
sort_order: Optional[str] = None
+ collapse_pages: bool = True
time_options: Any = {
"label": gettext("Release Date"),
"slug": "filter_time",
@@ -196,6 +197,13 @@ def do_fulltext_search(
number_of_fragments=2,
fragment_size=300,
)
+ if query.collapse_pages:
+ search = search.extra(
+ collapse={
+ "field": "collapse_key",
+ "inner_hits": {"name": "more_pages", "size": 0,},
+ }
+ )
# sort order
if query.sort_order == "time_asc":
@@ -234,12 +242,19 @@ def do_fulltext_search(
results = []
for h in resp:
r = h._d_
- # print(json.dumps(h.meta._d_, indent=2))
+ # print(h.meta._d_)
r["_highlights"] = []
if "highlight" in dir(h.meta):
highlights = h.meta.highlight._d_
for k in highlights:
r["_highlights"] += highlights[k]
+ r["_collapsed"] = []
+ r["_collapsed_count"] = 0
+ if "inner_hits" in dir(h.meta):
+ r["_collapsed_count"] = h.meta.inner_hits.more_pages.hits.total - 1
+ for k in h.meta.inner_hits.more_pages:
+ if k["key"] != r["key"]:
+ r["_collapsed"].append(k)
results.append(r)
for h in results:
@@ -250,9 +265,16 @@ def do_fulltext_search(
if type(h[key]) is str:
h[key] = h[key].encode("utf8", "ignore").decode("utf8")
+ count_found: int = int(resp.hits.total)
+ count_returned = len(results)
+
+ # if we grouped to less than a page of hits, update returned count
+ if query.collapse_pages and offset == 0 and (count_returned < limit):
+ count_found = count_returned
+
return FulltextHits(
- count_returned=len(results),
- count_found=int(resp.hits.total),
+ count_returned=count_returned,
+ count_found=count_found,
offset=offset,
limit=limit,
deep_page_limit=deep_page_limit,
diff --git a/fatcat_scholar/templates/search.html b/fatcat_scholar/templates/search.html
index 1f7a9b9..13d1aec 100644
--- a/fatcat_scholar/templates/search.html
+++ b/fatcat_scholar/templates/search.html
@@ -21,7 +21,6 @@
</details>
{% if hits %}
- {# <h2>{{ "{:,}".format(hits.count_found) }}</h2> #}
<span style="font-size: 1.5em;">{{ "{:,}".format(hits.count_found) }}</span>
Hits
<span style="color: rgba(0,0,0,0.4);">in {{ "{:0.2}".format(hits.query_time_ms/1000.0) }}sec</span>
@@ -35,7 +34,6 @@
<div class="ui tablet-hide two wide column">
{% if hits %}
<div style="width: 100%; text-align: right;">
- {# <h2>{{ "{:,}".format(hits.count_found) }}</h2> #}
<h3 style="font-size: {% if hits.count_found >= 10000000 %}1.0em{% elif hits.count_found >= 1000 %}1.5em{% else %}2.0em{% endif %};">{{ "{:,}".format(hits.count_found) }}</h3>
Hits
</div>
diff --git a/fatcat_scholar/templates/search_macros.html b/fatcat_scholar/templates/search_macros.html
index 07a4510..abdb6d3 100644
--- a/fatcat_scholar/templates/search_macros.html
+++ b/fatcat_scholar/templates/search_macros.html
@@ -67,7 +67,7 @@
{{ paper.biblio.release_year }}
{% endif %}
{% if paper.biblio.release_year and paper.biblio.container_name %}
-
+
{% endif %}
{% if paper.biblio.container_name %}
<i>
@@ -130,22 +130,33 @@
{# ### TAGS #}
<div style="margin-top: 0.2em;">
- {# colors to use: olive, brown, grey, pink, red, etc #}
- {# TODO: remove doc for ES 7.x-style lack of type #}
- {# TODO: only show 'json' link if from cluster? #}
- <a target="_blank" href="{{ settings.ELASTICSEARCH_BACKEND }}/{{ settings.ELASTICSEARCH_FULLTEXT_INDEX }}/_doc/{{ paper.key }}">
- <span class="ui label small">json</span>
- </a>
- {% if paper.biblio.release_ident %}
- <a target="_blank" href="https://fatcat.wiki/release/{{ paper.biblio.release_ident }}">
- <span class="ui label small">metadata</span>
+ {# colors to use: olive, brown, grey, pink, red, etc #}
+ {# TODO: remove doc for ES 7.x-style lack of type #}
+ {# TODO: only show 'json' link if from cluster? #}
+ <a target="_blank" href="{{ settings.ELASTICSEARCH_BACKEND }}/{{ settings.ELASTICSEARCH_FULLTEXT_INDEX }}/_doc/{{ paper.key }}">
+ <span class="ui label small">json</span>
</a>
- {% endif %}
- {% for tag in paper.tags %}
- <span class="ui label small">{{ _(tag) }}</span>
- {% endfor %}
+ {% if paper.biblio.release_ident %}
+ <a target="_blank" href="https://fatcat.wiki/release/{{ paper.biblio.release_ident }}">
+ <span class="ui label small">metadata</span>
+ </a>
+ {% endif %}
+
+ {% for tag in paper.tags %}
+ <span class="ui label small">{{ _(tag) }}</span>
+ {% endfor %}
</div>
+ {# ### COLLAPSED HITS #}
+ {% if paper._collapsed_count > 0 %}
+ <div style="padding-top: 0.5em;">
+ <button class="ui compact basic blue button" form="search_form" type="submit" name="collapse_pages" value="false">
+ <i class="ui icon zoom-in"></i>
+ Show {{ paper._collapsed_count }} additional hits from this issue
+ </button>
+ </div>
+ {% endif %}
+
</div>
<div class="three wide left aligned column" style="padding-top: 0.5em; padding-right: 0.5em;">
{% if paper.fulltext.access_url %}
diff --git a/fatcat_scholar/transform.py b/fatcat_scholar/transform.py
index 3d47fb4..847cc6e 100644
--- a/fatcat_scholar/transform.py
+++ b/fatcat_scholar/transform.py
@@ -187,6 +187,7 @@ def transform_heavy(heavy: IntermediateBundle) -> Optional[ScholarDoc]:
tags: List[str] = []
work_ident: Optional[str] = None
+ sim_issue: Optional[str] = None
abstracts: List[ScholarAbstract] = []
fulltext: Optional[ScholarFulltext] = None
primary_release: Optional[ReleaseEntity] = None
@@ -199,6 +200,7 @@ def transform_heavy(heavy: IntermediateBundle) -> Optional[ScholarDoc]:
if heavy.doc_type == DocType.sim_page:
assert ia_sim is not None
key = f"page_{ia_sim.issue_item}_{ia_sim.first_page}"
+ sim_issue = ia_sim.issue_item
biblio = es_biblio_from_sim(heavy.sim_fulltext)
fulltext = es_fulltext_from_sim(heavy.sim_fulltext)
elif heavy.doc_type == DocType.work:
@@ -316,6 +318,7 @@ def transform_heavy(heavy: IntermediateBundle) -> Optional[ScholarDoc]:
return ScholarDoc(
key=key,
+ collapse_key=sim_issue or work_ident,
doc_type=heavy.doc_type.value,
doc_index_ts=datetime.datetime.utcnow(),
work_ident=work_ident,
diff --git a/proposals/work_schema.md b/proposals/work_schema.md
index 933e750..97d60ac 100644
--- a/proposals/work_schema.md
+++ b/proposals/work_schema.md
@@ -3,9 +3,10 @@
- type: `_doc` (aka, no type, `include_type_name=false`)
- key: keyword (same as `_id`)
+- `collapse_key`: work ident, or SIM issue item (for collapsing/grouping search hits)
- `doc_type`: keyword (work or page)
- `doc_index_ts`: timestamp when document indexed
-- `work_id`: fatcat work ident (optional)
+- `work_ident`: fatcat work ident (optional)
- `biblio`: obj
- `fulltext`: obj
diff --git a/schema/scholar_fulltext.v01.json b/schema/scholar_fulltext.v01.json
index 7653633..1929512 100644
--- a/schema/scholar_fulltext.v01.json
+++ b/schema/scholar_fulltext.v01.json
@@ -63,6 +63,7 @@
"properties": {
"key": { "type": "keyword", "normalizer": "default", "doc_values": false },
+ "collapse_key": { "type": "keyword", "normalizer": "default" },
"doc_type": { "type": "keyword", "normalizer": "default" },
"doc_index_ts": { "type": "date" },
"work_ident": { "type": "keyword", "normalizer": "default", "doc_values": false },