Add basic pagination to search results

The "deep paging problem" imposes some limit, which currently is a hardcoded default value, `deep_page_limit=2000` in `do_search`. Elasticsearch can be configured, too: > Note that from + size can not be more than the index.max_result_window index setting, which defaults to 10,000. -- https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-body.html#request-body-search-from-size
author: Martin Czygan <martin.czygan@gmail.com> 2019-11-08 23:00:29 +0100
committer: Martin Czygan <martin.czygan@gmail.com> 2019-11-08 23:12:12 +0100
commit: ed72027bbf36e933c8db069bd02b0163a84aef83 (patch)
tree: dc0568703c43eec59d4ddb3cefec50f0d38f22c8 /python/fatcat_web/search.py
parent: 5748f3241117b52f5295dc589374ec0c219534e4 (diff)
download: fatcat-ed72027bbf36e933c8db069bd02b0163a84aef83.tar.gz
fatcat-ed72027bbf36e933c8db069bd02b0163a84aef83.zip
1 files changed, 18 insertions, 9 deletions
diff --git a/python/fatcat_web/search.py b/python/fatcat_web/search.py
index 52f05f71..7c60a6dd 100644
--- a/python/fatcat_web/search.py
+++ b/python/fatcat_web/search.py
@@ -11,15 +11,20 @@ import requests
 from flask import abort, flash
 from fatcat_web import app
 
+def do_search(index, request, limit=30, offset=0, deep_page_limit=2000):
 
-def do_search(index, request, limit=30):
-
+    # Sanity checks
     if limit > 100:
-        # Sanity check
         limit = 100
+    if offset < 0:
+        offset = 0
+    if offset > deep_page_limit:
+        # Avoid deep paging problem.
+        offset = deep_page_limit
 
     request["size"] = int(limit)
-    #print(request)
+    request["from"] = int(offset)
+    # print(request)
     resp = requests.get("%s/%s/_search" %
             (app.config['ELASTICSEARCH_BACKEND'], index),
         json=request)
@@ -45,10 +50,12 @@ def do_search(index, request, limit=30):
 
     return {"count_returned": len(results),
             "count_found": content['hits']['total'],
-            "results": results }
+            "results": results,
+            "offset": offset,
+            "deep_page_limit": deep_page_limit}
 
 
-def do_release_search(q, limit=30, fulltext_only=True):
+def do_release_search(q, limit=30, fulltext_only=True, offset=0):
 
     #print("Search hit: " + q)
     if limit > 100:
@@ -75,17 +82,18 @@ def do_release_search(q, limit=30, fulltext_only=True):
         },
     }
 
-    resp = do_search(app.config['ELASTICSEARCH_RELEASE_INDEX'], search_request)
+    resp = do_search(app.config['ELASTICSEARCH_RELEASE_INDEX'], search_request, offset=offset)
     for h in resp['results']:
         # Ensure 'contrib_names' is a list, not a single string
         if type(h['contrib_names']) is not list:
             h['contrib_names'] = [h['contrib_names'], ]
         h['contrib_names'] = [name.encode('utf8', 'ignore').decode('utf8') for name in h['contrib_names']]
     resp["query"] = { "q": q }
+    resp["limit"] = limit
     return resp
 
 
-def do_container_search(q, limit=30):
+def do_container_search(q, limit=30, offset=0):
 
     # Convert raw ISSN-L to ISSN-L query
     if len(q.split()) == 1 and len(q) == 9 and q[0:4].isdigit() and q[4] == '-':
@@ -103,8 +111,9 @@ def do_container_search(q, limit=30):
         },
     }
 
-    resp = do_search(app.config['ELASTICSEARCH_CONTAINER_INDEX'], search_request, limit=limit)
+    resp = do_search(app.config['ELASTICSEARCH_CONTAINER_INDEX'], search_request, limit=limit, offset=offset)
     resp["query"] = { "q": q }
+    resp["limit"] = limit
     return resp
 
 def get_elastic_entity_stats():
author	Martin Czygan <martin.czygan@gmail.com>	2019-11-08 23:00:29 +0100
committer	Martin Czygan <martin.czygan@gmail.com>	2019-11-08 23:12:12 +0100
commit	ed72027bbf36e933c8db069bd02b0163a84aef83 (patch)
tree	dc0568703c43eec59d4ddb3cefec50f0d38f22c8 /python/fatcat_web/search.py
parent	5748f3241117b52f5295dc589374ec0c219534e4 (diff)
download	fatcat-ed72027bbf36e933c8db069bd02b0163a84aef83.tar.gz fatcat-ed72027bbf36e933c8db069bd02b0163a84aef83.zip