add container search

And tweak release search a bit: DOIs aren't auto-replaced unless they are the only word/query This query code is very duplicative and should be refactored
author: Bryan Newbold <bnewbold@robocracy.org> 2019-02-20 19:21:19 -0800
committer: Bryan Newbold <bnewbold@robocracy.org> 2019-02-20 19:21:23 -0800
commit: 7c04b83a6612b7a0c87afe4a1ed4bbb65568fea1 (patch)
tree: 96eca26306465d8801d3bbb2eda62e611afb97c6 /python/fatcat_web/search.py
parent: 15ad67e4cd44c54a0f7a06f0eb0448d75c9ad1b6 (diff)
download: fatcat-7c04b83a6612b7a0c87afe4a1ed4bbb65568fea1.tar.gz
fatcat-7c04b83a6612b7a0c87afe4a1ed4bbb65568fea1.zip
1 files changed, 54 insertions, 1 deletions
diff --git a/python/fatcat_web/search.py b/python/fatcat_web/search.py
index 5c278c21..a301fcb5 100644
--- a/python/fatcat_web/search.py
+++ b/python/fatcat_web/search.py
@@ -6,9 +6,11 @@ from fatcat_web import app
 """
 Helpers for doing elasticsearch queries (used in the web interface; not part of
 the formal API)
+
+TODO: ELASTICSEARCH_*_INDEX should probably be factored out and just hard-coded
 """
 
-def do_search(q, limit=50, fulltext_only=True):
+def do_release_search(q, limit=50, fulltext_only=True):
 
     #print("Search hit: " + q)
     if limit > 100:
@@ -65,3 +67,54 @@ def do_search(q, limit=50, fulltext_only=True):
             "count_returned": len(results),
             "count_found": found,
             "results": results }
+
+def do_container_search(q, limit=50):
+
+    #print("Search hit: " + q)
+    if limit > 100:
+        # Sanity check
+        limit = 100
+
+    search_request = {
+        "query": {
+            "query_string": {
+            "query": q,
+            "default_operator": "AND",
+            "analyze_wildcard": True,
+            "lenient": True,
+            "fields": ["name^5", "publisher"]
+            },
+        },
+        "size": int(limit),
+    }
+
+    #print(search_request)
+    resp = requests.get("%s/%s/_search" %
+            (app.config['ELASTICSEARCH_BACKEND'], app.config['ELASTICSEARCH_CONTAINER_INDEX']),
+        json=search_request)
+
+    if resp.status_code == 400:
+        print("elasticsearch 400: " + str(resp.content))
+        flash("Search query failed to parse; you might need to use quotes.<p><code>{}</code>".format(resp.content))
+        abort(resp.status_code)
+    elif resp.status_code != 200:
+        print("elasticsearch non-200 status code: " + str(resp.status_code))
+        print(resp.content)
+        abort(resp.status_code)
+
+    content = resp.json()
+    #print(content)
+    results = [h['_source'] for h in content['hits']['hits']]
+    for h in results:
+        # Handle surrogate strings that elasticsearch returns sometimes,
+        # probably due to mangled data processing in some pipeline.
+        # "Crimes against Unicode"; production workaround
+        for key in h:
+            if type(h[key]) is str:
+                h[key] = h[key].encode('utf8', 'ignore').decode('utf8')
+
+    found = content['hits']['total']
+    return {"query": { "q": q },
+            "count_returned": len(results),
+            "count_found": found,
+            "results": results }
author	Bryan Newbold <bnewbold@robocracy.org>	2019-02-20 19:21:19 -0800
committer	Bryan Newbold <bnewbold@robocracy.org>	2019-02-20 19:21:23 -0800
commit	7c04b83a6612b7a0c87afe4a1ed4bbb65568fea1 (patch)
tree	96eca26306465d8801d3bbb2eda62e611afb97c6 /python/fatcat_web/search.py
parent	15ad67e4cd44c54a0f7a06f0eb0448d75c9ad1b6 (diff)
download	fatcat-7c04b83a6612b7a0c87afe4a1ed4bbb65568fea1.tar.gz fatcat-7c04b83a6612b7a0c87afe4a1ed4bbb65568fea1.zip