diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2019-11-08 23:00:29 +0100 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2019-11-08 23:12:12 +0100 |
commit | ed72027bbf36e933c8db069bd02b0163a84aef83 (patch) | |
tree | dc0568703c43eec59d4ddb3cefec50f0d38f22c8 /python/fatcat_web/search.py | |
parent | 5748f3241117b52f5295dc589374ec0c219534e4 (diff) | |
download | fatcat-ed72027bbf36e933c8db069bd02b0163a84aef83.tar.gz fatcat-ed72027bbf36e933c8db069bd02b0163a84aef83.zip |
Add basic pagination to search results
The "deep paging problem" imposes some limit, which currently is a
hardcoded default value, `deep_page_limit=2000` in `do_search`.
Elasticsearch can be configured, too:
> Note that from + size can not be more than the index.max_result_window
index setting, which defaults to 10,000.
-- https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-body.html#request-body-search-from-size
Diffstat (limited to 'python/fatcat_web/search.py')
-rw-r--r-- | python/fatcat_web/search.py | 27 |
1 files changed, 18 insertions, 9 deletions
diff --git a/python/fatcat_web/search.py b/python/fatcat_web/search.py index 52f05f71..7c60a6dd 100644 --- a/python/fatcat_web/search.py +++ b/python/fatcat_web/search.py @@ -11,15 +11,20 @@ import requests from flask import abort, flash from fatcat_web import app +def do_search(index, request, limit=30, offset=0, deep_page_limit=2000): -def do_search(index, request, limit=30): - + # Sanity checks if limit > 100: - # Sanity check limit = 100 + if offset < 0: + offset = 0 + if offset > deep_page_limit: + # Avoid deep paging problem. + offset = deep_page_limit request["size"] = int(limit) - #print(request) + request["from"] = int(offset) + # print(request) resp = requests.get("%s/%s/_search" % (app.config['ELASTICSEARCH_BACKEND'], index), json=request) @@ -45,10 +50,12 @@ def do_search(index, request, limit=30): return {"count_returned": len(results), "count_found": content['hits']['total'], - "results": results } + "results": results, + "offset": offset, + "deep_page_limit": deep_page_limit} -def do_release_search(q, limit=30, fulltext_only=True): +def do_release_search(q, limit=30, fulltext_only=True, offset=0): #print("Search hit: " + q) if limit > 100: @@ -75,17 +82,18 @@ def do_release_search(q, limit=30, fulltext_only=True): }, } - resp = do_search(app.config['ELASTICSEARCH_RELEASE_INDEX'], search_request) + resp = do_search(app.config['ELASTICSEARCH_RELEASE_INDEX'], search_request, offset=offset) for h in resp['results']: # Ensure 'contrib_names' is a list, not a single string if type(h['contrib_names']) is not list: h['contrib_names'] = [h['contrib_names'], ] h['contrib_names'] = [name.encode('utf8', 'ignore').decode('utf8') for name in h['contrib_names']] resp["query"] = { "q": q } + resp["limit"] = limit return resp -def do_container_search(q, limit=30): +def do_container_search(q, limit=30, offset=0): # Convert raw ISSN-L to ISSN-L query if len(q.split()) == 1 and len(q) == 9 and q[0:4].isdigit() and q[4] == '-': @@ -103,8 +111,9 @@ def do_container_search(q, limit=30): }, } - resp = do_search(app.config['ELASTICSEARCH_CONTAINER_INDEX'], search_request, limit=limit) + resp = do_search(app.config['ELASTICSEARCH_CONTAINER_INDEX'], search_request, limit=limit, offset=offset) resp["query"] = { "q": q } + resp["limit"] = limit return resp def get_elastic_entity_stats(): |