diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2019-11-08 23:00:29 +0100 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2019-11-08 23:12:12 +0100 |
commit | ed72027bbf36e933c8db069bd02b0163a84aef83 (patch) | |
tree | dc0568703c43eec59d4ddb3cefec50f0d38f22c8 /python | |
parent | 5748f3241117b52f5295dc589374ec0c219534e4 (diff) | |
download | fatcat-ed72027bbf36e933c8db069bd02b0163a84aef83.tar.gz fatcat-ed72027bbf36e933c8db069bd02b0163a84aef83.zip |
Add basic pagination to search results
The "deep paging problem" imposes some limit, which currently is a
hardcoded default value, `deep_page_limit=2000` in `do_search`.
Elasticsearch can be configured, too:
> Note that from + size can not be more than the index.max_result_window
index setting, which defaults to 10,000.
-- https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-body.html#request-body-search-from-size
Diffstat (limited to 'python')
-rw-r--r-- | python/fatcat_web/routes.py | 9 | ||||
-rw-r--r-- | python/fatcat_web/search.py | 27 | ||||
-rw-r--r-- | python/fatcat_web/templates/container_search.html | 21 | ||||
-rw-r--r-- | python/fatcat_web/templates/release_search.html | 24 |
4 files changed, 67 insertions, 14 deletions
diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py index 79b594e3..a41f388d 100644 --- a/python/fatcat_web/routes.py +++ b/python/fatcat_web/routes.py @@ -673,9 +673,12 @@ def release_search(): if container_id and query: query += ' container_id:"{}"'.format(container_id) + offset = request.args.get('offset', '0') + offset = max(0, int(offset)) if offset.isnumeric() else 0 + if 'q' in request.args.keys(): # always do files for HTML - found = do_release_search(query, fulltext_only=fulltext_only) + found = do_release_search(query, fulltext_only=fulltext_only, offset=offset) return render_template('release_search.html', found=found, query=query, fulltext_only=fulltext_only) else: return render_template('release_search.html', query=query, fulltext_only=fulltext_only) @@ -684,10 +687,12 @@ def release_search(): def container_search(): query = request.args.get('q') + offset = request.args.get('offset', '0') + offset = max(0, int(offset)) if offset.isnumeric() else 0 if 'q' in request.args.keys(): # always do files for HTML - found = do_container_search(query) + found = do_container_search(query, offset=offset) return render_template('container_search.html', found=found, query=query) else: return render_template('container_search.html', query=query) diff --git a/python/fatcat_web/search.py b/python/fatcat_web/search.py index 52f05f71..7c60a6dd 100644 --- a/python/fatcat_web/search.py +++ b/python/fatcat_web/search.py @@ -11,15 +11,20 @@ import requests from flask import abort, flash from fatcat_web import app +def do_search(index, request, limit=30, offset=0, deep_page_limit=2000): -def do_search(index, request, limit=30): - + # Sanity checks if limit > 100: - # Sanity check limit = 100 + if offset < 0: + offset = 0 + if offset > deep_page_limit: + # Avoid deep paging problem. + offset = deep_page_limit request["size"] = int(limit) - #print(request) + request["from"] = int(offset) + # print(request) resp = requests.get("%s/%s/_search" % (app.config['ELASTICSEARCH_BACKEND'], index), json=request) @@ -45,10 +50,12 @@ def do_search(index, request, limit=30): return {"count_returned": len(results), "count_found": content['hits']['total'], - "results": results } + "results": results, + "offset": offset, + "deep_page_limit": deep_page_limit} -def do_release_search(q, limit=30, fulltext_only=True): +def do_release_search(q, limit=30, fulltext_only=True, offset=0): #print("Search hit: " + q) if limit > 100: @@ -75,17 +82,18 @@ def do_release_search(q, limit=30, fulltext_only=True): }, } - resp = do_search(app.config['ELASTICSEARCH_RELEASE_INDEX'], search_request) + resp = do_search(app.config['ELASTICSEARCH_RELEASE_INDEX'], search_request, offset=offset) for h in resp['results']: # Ensure 'contrib_names' is a list, not a single string if type(h['contrib_names']) is not list: h['contrib_names'] = [h['contrib_names'], ] h['contrib_names'] = [name.encode('utf8', 'ignore').decode('utf8') for name in h['contrib_names']] resp["query"] = { "q": q } + resp["limit"] = limit return resp -def do_container_search(q, limit=30): +def do_container_search(q, limit=30, offset=0): # Convert raw ISSN-L to ISSN-L query if len(q.split()) == 1 and len(q) == 9 and q[0:4].isdigit() and q[4] == '-': @@ -103,8 +111,9 @@ def do_container_search(q, limit=30): }, } - resp = do_search(app.config['ELASTICSEARCH_CONTAINER_INDEX'], search_request, limit=limit) + resp = do_search(app.config['ELASTICSEARCH_CONTAINER_INDEX'], search_request, limit=limit, offset=offset) resp["query"] = { "q": q } + resp["limit"] = limit return resp def get_elastic_entity_stats(): diff --git a/python/fatcat_web/templates/container_search.html b/python/fatcat_web/templates/container_search.html index 7f6799dd..9e2aa10a 100644 --- a/python/fatcat_web/templates/container_search.html +++ b/python/fatcat_web/templates/container_search.html @@ -30,7 +30,26 @@ {% if found %} {% if found.results %} - <i>Showing top {{ found.count_returned }} out of {{ found.count_found }} results for: <code>{{ found.query.q }}</code></i> + <i>Showing + {% if found.offset == 0 %} + first + {% else %} + results {{ found.offset }} — + {% endif %} + {{ found.offset + found.count_returned }} out of {{ found.count_found }} results for: <code>{{ found.query.q }}</code></i> + <br> + {% if found.offset > 0 %} + {% if found.offset - found.limit < 0 %} + <a href="{{ url_for('release_search', q=query, offset=0) }}">Prev</a> + {% else %} + <a href="{{ url_for('release_search', q=query, offset=found.offset - found.limit) }}">Prev</a> + {% endif %} + {% endif %} + + {% if found.offset + found.limit < found.count_found and found.offset + found.limit < found.deep_page_limit %} + <a href="{{ url_for('release_search', q=query, offset=found.offset + found.limit) }}">Next</a> + {% endif %} + {% for entity in found.results %} <div> <h4 style="margin-top: 1em; margin-bottom: 4px; font-size: 1.1em;"> diff --git a/python/fatcat_web/templates/release_search.html b/python/fatcat_web/templates/release_search.html index 7d6b0443..359038dc 100644 --- a/python/fatcat_web/templates/release_search.html +++ b/python/fatcat_web/templates/release_search.html @@ -36,8 +36,28 @@ {% if found %} {% if found.results %} - <i>Showing top {{ found.count_returned }} out of {{ found.count_found }} results for: <code>{{ found.query.q }}</code></i> -{% for paper in found.results %} + <i>Showing + {% if found.offset == 0 %} + first + {% else %} + results {{ found.offset }} — + {% endif %} + {{ found.offset + found.count_returned }} out of {{ found.count_found }} results for: <code>{{ found.query.q }}</code></i> + + <br> + {% if found.offset > 0 %} + {% if found.offset - found.limit < 0 %} + <a href="{{ url_for('release_search', q=query, offset=0) }}">Prev</a> + {% else %} + <a href="{{ url_for('release_search', q=query, offset=found.offset - found.limit) }}">Prev</a> + {% endif %} + {% endif %} + + {% if found.offset + found.limit < found.count_found and found.offset + found.limit < found.deep_page_limit %} + <a href="{{ url_for('release_search', q=query, offset=found.offset + found.limit) }}">Next</a> + {% endif %} + + {% for paper in found.results %} {{ entity_macros.release_search_result_row(paper) }} {% endfor %} {% if found.results|length > 8 %} |