aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorMartin Czygan <martin.czygan@gmail.com>2019-11-08 23:00:29 +0100
committerMartin Czygan <martin.czygan@gmail.com>2019-11-08 23:12:12 +0100
commited72027bbf36e933c8db069bd02b0163a84aef83 (patch)
treedc0568703c43eec59d4ddb3cefec50f0d38f22c8 /python
parent5748f3241117b52f5295dc589374ec0c219534e4 (diff)
downloadfatcat-ed72027bbf36e933c8db069bd02b0163a84aef83.tar.gz
fatcat-ed72027bbf36e933c8db069bd02b0163a84aef83.zip
Add basic pagination to search results
The "deep paging problem" imposes some limit, which currently is a hardcoded default value, `deep_page_limit=2000` in `do_search`. Elasticsearch can be configured, too: > Note that from + size can not be more than the index.max_result_window index setting, which defaults to 10,000. -- https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-body.html#request-body-search-from-size
Diffstat (limited to 'python')
-rw-r--r--python/fatcat_web/routes.py9
-rw-r--r--python/fatcat_web/search.py27
-rw-r--r--python/fatcat_web/templates/container_search.html21
-rw-r--r--python/fatcat_web/templates/release_search.html24
4 files changed, 67 insertions, 14 deletions
diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py
index 79b594e3..a41f388d 100644
--- a/python/fatcat_web/routes.py
+++ b/python/fatcat_web/routes.py
@@ -673,9 +673,12 @@ def release_search():
if container_id and query:
query += ' container_id:"{}"'.format(container_id)
+ offset = request.args.get('offset', '0')
+ offset = max(0, int(offset)) if offset.isnumeric() else 0
+
if 'q' in request.args.keys():
# always do files for HTML
- found = do_release_search(query, fulltext_only=fulltext_only)
+ found = do_release_search(query, fulltext_only=fulltext_only, offset=offset)
return render_template('release_search.html', found=found, query=query, fulltext_only=fulltext_only)
else:
return render_template('release_search.html', query=query, fulltext_only=fulltext_only)
@@ -684,10 +687,12 @@ def release_search():
def container_search():
query = request.args.get('q')
+ offset = request.args.get('offset', '0')
+ offset = max(0, int(offset)) if offset.isnumeric() else 0
if 'q' in request.args.keys():
# always do files for HTML
- found = do_container_search(query)
+ found = do_container_search(query, offset=offset)
return render_template('container_search.html', found=found, query=query)
else:
return render_template('container_search.html', query=query)
diff --git a/python/fatcat_web/search.py b/python/fatcat_web/search.py
index 52f05f71..7c60a6dd 100644
--- a/python/fatcat_web/search.py
+++ b/python/fatcat_web/search.py
@@ -11,15 +11,20 @@ import requests
from flask import abort, flash
from fatcat_web import app
+def do_search(index, request, limit=30, offset=0, deep_page_limit=2000):
-def do_search(index, request, limit=30):
-
+ # Sanity checks
if limit > 100:
- # Sanity check
limit = 100
+ if offset < 0:
+ offset = 0
+ if offset > deep_page_limit:
+ # Avoid deep paging problem.
+ offset = deep_page_limit
request["size"] = int(limit)
- #print(request)
+ request["from"] = int(offset)
+ # print(request)
resp = requests.get("%s/%s/_search" %
(app.config['ELASTICSEARCH_BACKEND'], index),
json=request)
@@ -45,10 +50,12 @@ def do_search(index, request, limit=30):
return {"count_returned": len(results),
"count_found": content['hits']['total'],
- "results": results }
+ "results": results,
+ "offset": offset,
+ "deep_page_limit": deep_page_limit}
-def do_release_search(q, limit=30, fulltext_only=True):
+def do_release_search(q, limit=30, fulltext_only=True, offset=0):
#print("Search hit: " + q)
if limit > 100:
@@ -75,17 +82,18 @@ def do_release_search(q, limit=30, fulltext_only=True):
},
}
- resp = do_search(app.config['ELASTICSEARCH_RELEASE_INDEX'], search_request)
+ resp = do_search(app.config['ELASTICSEARCH_RELEASE_INDEX'], search_request, offset=offset)
for h in resp['results']:
# Ensure 'contrib_names' is a list, not a single string
if type(h['contrib_names']) is not list:
h['contrib_names'] = [h['contrib_names'], ]
h['contrib_names'] = [name.encode('utf8', 'ignore').decode('utf8') for name in h['contrib_names']]
resp["query"] = { "q": q }
+ resp["limit"] = limit
return resp
-def do_container_search(q, limit=30):
+def do_container_search(q, limit=30, offset=0):
# Convert raw ISSN-L to ISSN-L query
if len(q.split()) == 1 and len(q) == 9 and q[0:4].isdigit() and q[4] == '-':
@@ -103,8 +111,9 @@ def do_container_search(q, limit=30):
},
}
- resp = do_search(app.config['ELASTICSEARCH_CONTAINER_INDEX'], search_request, limit=limit)
+ resp = do_search(app.config['ELASTICSEARCH_CONTAINER_INDEX'], search_request, limit=limit, offset=offset)
resp["query"] = { "q": q }
+ resp["limit"] = limit
return resp
def get_elastic_entity_stats():
diff --git a/python/fatcat_web/templates/container_search.html b/python/fatcat_web/templates/container_search.html
index 7f6799dd..9e2aa10a 100644
--- a/python/fatcat_web/templates/container_search.html
+++ b/python/fatcat_web/templates/container_search.html
@@ -30,7 +30,26 @@
{% if found %}
{% if found.results %}
- <i>Showing top {{ found.count_returned }} out of {{ found.count_found }} results for: <code>{{ found.query.q }}</code></i>
+ <i>Showing
+ {% if found.offset == 0 %}
+ first
+ {% else %}
+ results {{ found.offset }} &mdash;
+ {% endif %}
+ {{ found.offset + found.count_returned }} out of {{ found.count_found }} results for: <code>{{ found.query.q }}</code></i>
+ <br>
+ {% if found.offset > 0 %}
+ {% if found.offset - found.limit < 0 %}
+ <a href="{{ url_for('release_search', q=query, offset=0) }}">Prev</a>
+ {% else %}
+ <a href="{{ url_for('release_search', q=query, offset=found.offset - found.limit) }}">Prev</a>
+ {% endif %}
+ {% endif %}
+
+ {% if found.offset + found.limit < found.count_found and found.offset + found.limit < found.deep_page_limit %}
+ <a href="{{ url_for('release_search', q=query, offset=found.offset + found.limit) }}">Next</a>
+ {% endif %}
+
{% for entity in found.results %}
<div>
<h4 style="margin-top: 1em; margin-bottom: 4px; font-size: 1.1em;">
diff --git a/python/fatcat_web/templates/release_search.html b/python/fatcat_web/templates/release_search.html
index 7d6b0443..359038dc 100644
--- a/python/fatcat_web/templates/release_search.html
+++ b/python/fatcat_web/templates/release_search.html
@@ -36,8 +36,28 @@
{% if found %}
{% if found.results %}
- <i>Showing top {{ found.count_returned }} out of {{ found.count_found }} results for: <code>{{ found.query.q }}</code></i>
-{% for paper in found.results %}
+ <i>Showing
+ {% if found.offset == 0 %}
+ first
+ {% else %}
+ results {{ found.offset }} &mdash;
+ {% endif %}
+ {{ found.offset + found.count_returned }} out of {{ found.count_found }} results for: <code>{{ found.query.q }}</code></i>
+
+ <br>
+ {% if found.offset > 0 %}
+ {% if found.offset - found.limit < 0 %}
+ <a href="{{ url_for('release_search', q=query, offset=0) }}">Prev</a>
+ {% else %}
+ <a href="{{ url_for('release_search', q=query, offset=found.offset - found.limit) }}">Prev</a>
+ {% endif %}
+ {% endif %}
+
+ {% if found.offset + found.limit < found.count_found and found.offset + found.limit < found.deep_page_limit %}
+ <a href="{{ url_for('release_search', q=query, offset=found.offset + found.limit) }}">Next</a>
+ {% endif %}
+
+ {% for paper in found.results %}
{{ entity_macros.release_search_result_row(paper) }}
{% endfor %}
{% if found.results|length > 8 %}