diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-02-20 19:21:19 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-02-20 19:21:23 -0800 |
commit | 7c04b83a6612b7a0c87afe4a1ed4bbb65568fea1 (patch) | |
tree | 96eca26306465d8801d3bbb2eda62e611afb97c6 /python/fatcat_web/search.py | |
parent | 15ad67e4cd44c54a0f7a06f0eb0448d75c9ad1b6 (diff) | |
download | fatcat-7c04b83a6612b7a0c87afe4a1ed4bbb65568fea1.tar.gz fatcat-7c04b83a6612b7a0c87afe4a1ed4bbb65568fea1.zip |
add container search
And tweak release search a bit: DOIs aren't auto-replaced unless they
are the only word/query
This query code is very duplicative and should be refactored
Diffstat (limited to 'python/fatcat_web/search.py')
-rw-r--r-- | python/fatcat_web/search.py | 55 |
1 files changed, 54 insertions, 1 deletions
diff --git a/python/fatcat_web/search.py b/python/fatcat_web/search.py index 5c278c21..a301fcb5 100644 --- a/python/fatcat_web/search.py +++ b/python/fatcat_web/search.py @@ -6,9 +6,11 @@ from fatcat_web import app """ Helpers for doing elasticsearch queries (used in the web interface; not part of the formal API) + +TODO: ELASTICSEARCH_*_INDEX should probably be factored out and just hard-coded """ -def do_search(q, limit=50, fulltext_only=True): +def do_release_search(q, limit=50, fulltext_only=True): #print("Search hit: " + q) if limit > 100: @@ -65,3 +67,54 @@ def do_search(q, limit=50, fulltext_only=True): "count_returned": len(results), "count_found": found, "results": results } + +def do_container_search(q, limit=50): + + #print("Search hit: " + q) + if limit > 100: + # Sanity check + limit = 100 + + search_request = { + "query": { + "query_string": { + "query": q, + "default_operator": "AND", + "analyze_wildcard": True, + "lenient": True, + "fields": ["name^5", "publisher"] + }, + }, + "size": int(limit), + } + + #print(search_request) + resp = requests.get("%s/%s/_search" % + (app.config['ELASTICSEARCH_BACKEND'], app.config['ELASTICSEARCH_CONTAINER_INDEX']), + json=search_request) + + if resp.status_code == 400: + print("elasticsearch 400: " + str(resp.content)) + flash("Search query failed to parse; you might need to use quotes.<p><code>{}</code>".format(resp.content)) + abort(resp.status_code) + elif resp.status_code != 200: + print("elasticsearch non-200 status code: " + str(resp.status_code)) + print(resp.content) + abort(resp.status_code) + + content = resp.json() + #print(content) + results = [h['_source'] for h in content['hits']['hits']] + for h in results: + # Handle surrogate strings that elasticsearch returns sometimes, + # probably due to mangled data processing in some pipeline. + # "Crimes against Unicode"; production workaround + for key in h: + if type(h[key]) is str: + h[key] = h[key].encode('utf8', 'ignore').decode('utf8') + + found = content['hits']['total'] + return {"query": { "q": q }, + "count_returned": len(results), + "count_found": found, + "results": results } |