From ed72027bbf36e933c8db069bd02b0163a84aef83 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Fri, 8 Nov 2019 23:00:29 +0100 Subject: Add basic pagination to search results The "deep paging problem" imposes some limit, which currently is a hardcoded default value, `deep_page_limit=2000` in `do_search`. Elasticsearch can be configured, too: > Note that from + size can not be more than the index.max_result_window index setting, which defaults to 10,000. -- https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-body.html#request-body-search-from-size --- python/fatcat_web/routes.py | 9 ++++++-- python/fatcat_web/search.py | 27 +++++++++++++++-------- python/fatcat_web/templates/container_search.html | 21 +++++++++++++++++- python/fatcat_web/templates/release_search.html | 24 ++++++++++++++++++-- 4 files changed, 67 insertions(+), 14 deletions(-) diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py index 79b594e3..a41f388d 100644 --- a/python/fatcat_web/routes.py +++ b/python/fatcat_web/routes.py @@ -673,9 +673,12 @@ def release_search(): if container_id and query: query += ' container_id:"{}"'.format(container_id) + offset = request.args.get('offset', '0') + offset = max(0, int(offset)) if offset.isnumeric() else 0 + if 'q' in request.args.keys(): # always do files for HTML - found = do_release_search(query, fulltext_only=fulltext_only) + found = do_release_search(query, fulltext_only=fulltext_only, offset=offset) return render_template('release_search.html', found=found, query=query, fulltext_only=fulltext_only) else: return render_template('release_search.html', query=query, fulltext_only=fulltext_only) @@ -684,10 +687,12 @@ def release_search(): def container_search(): query = request.args.get('q') + offset = request.args.get('offset', '0') + offset = max(0, int(offset)) if offset.isnumeric() else 0 if 'q' in request.args.keys(): # always do files for HTML - found = do_container_search(query) + found = do_container_search(query, offset=offset) return render_template('container_search.html', found=found, query=query) else: return render_template('container_search.html', query=query) diff --git a/python/fatcat_web/search.py b/python/fatcat_web/search.py index 52f05f71..7c60a6dd 100644 --- a/python/fatcat_web/search.py +++ b/python/fatcat_web/search.py @@ -11,15 +11,20 @@ import requests from flask import abort, flash from fatcat_web import app +def do_search(index, request, limit=30, offset=0, deep_page_limit=2000): -def do_search(index, request, limit=30): - + # Sanity checks if limit > 100: - # Sanity check limit = 100 + if offset < 0: + offset = 0 + if offset > deep_page_limit: + # Avoid deep paging problem. + offset = deep_page_limit request["size"] = int(limit) - #print(request) + request["from"] = int(offset) + # print(request) resp = requests.get("%s/%s/_search" % (app.config['ELASTICSEARCH_BACKEND'], index), json=request) @@ -45,10 +50,12 @@ def do_search(index, request, limit=30): return {"count_returned": len(results), "count_found": content['hits']['total'], - "results": results } + "results": results, + "offset": offset, + "deep_page_limit": deep_page_limit} -def do_release_search(q, limit=30, fulltext_only=True): +def do_release_search(q, limit=30, fulltext_only=True, offset=0): #print("Search hit: " + q) if limit > 100: @@ -75,17 +82,18 @@ def do_release_search(q, limit=30, fulltext_only=True): }, } - resp = do_search(app.config['ELASTICSEARCH_RELEASE_INDEX'], search_request) + resp = do_search(app.config['ELASTICSEARCH_RELEASE_INDEX'], search_request, offset=offset) for h in resp['results']: # Ensure 'contrib_names' is a list, not a single string if type(h['contrib_names']) is not list: h['contrib_names'] = [h['contrib_names'], ] h['contrib_names'] = [name.encode('utf8', 'ignore').decode('utf8') for name in h['contrib_names']] resp["query"] = { "q": q } + resp["limit"] = limit return resp -def do_container_search(q, limit=30): +def do_container_search(q, limit=30, offset=0): # Convert raw ISSN-L to ISSN-L query if len(q.split()) == 1 and len(q) == 9 and q[0:4].isdigit() and q[4] == '-': @@ -103,8 +111,9 @@ def do_container_search(q, limit=30): }, } - resp = do_search(app.config['ELASTICSEARCH_CONTAINER_INDEX'], search_request, limit=limit) + resp = do_search(app.config['ELASTICSEARCH_CONTAINER_INDEX'], search_request, limit=limit, offset=offset) resp["query"] = { "q": q } + resp["limit"] = limit return resp def get_elastic_entity_stats(): diff --git a/python/fatcat_web/templates/container_search.html b/python/fatcat_web/templates/container_search.html index 7f6799dd..9e2aa10a 100644 --- a/python/fatcat_web/templates/container_search.html +++ b/python/fatcat_web/templates/container_search.html @@ -30,7 +30,26 @@ {% if found %} {% if found.results %} - Showing top {{ found.count_returned }} out of {{ found.count_found }} results for: {{ found.query.q }} + Showing + {% if found.offset == 0 %} + first + {% else %} + results {{ found.offset }} — + {% endif %} + {{ found.offset + found.count_returned }} out of {{ found.count_found }} results for: {{ found.query.q }} +
+ {% if found.offset > 0 %} + {% if found.offset - found.limit < 0 %} + Prev + {% else %} + Prev + {% endif %} + {% endif %} + + {% if found.offset + found.limit < found.count_found and found.offset + found.limit < found.deep_page_limit %} + Next + {% endif %} + {% for entity in found.results %}

diff --git a/python/fatcat_web/templates/release_search.html b/python/fatcat_web/templates/release_search.html index 7d6b0443..359038dc 100644 --- a/python/fatcat_web/templates/release_search.html +++ b/python/fatcat_web/templates/release_search.html @@ -36,8 +36,28 @@ {% if found %} {% if found.results %} - Showing top {{ found.count_returned }} out of {{ found.count_found }} results for: {{ found.query.q }} -{% for paper in found.results %} + Showing + {% if found.offset == 0 %} + first + {% else %} + results {{ found.offset }} — + {% endif %} + {{ found.offset + found.count_returned }} out of {{ found.count_found }} results for: {{ found.query.q }} + +
+ {% if found.offset > 0 %} + {% if found.offset - found.limit < 0 %} + Prev + {% else %} + Prev + {% endif %} + {% endif %} + + {% if found.offset + found.limit < found.count_found and found.offset + found.limit < found.deep_page_limit %} + Next + {% endif %} + + {% for paper in found.results %} {{ entity_macros.release_search_result_row(paper) }} {% endfor %} {% if found.results|length > 8 %} -- cgit v1.2.3