diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2018-06-21 18:23:09 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2018-06-21 18:23:09 -0700 |
commit | 3075f0ab8853fd97c68d3f0b8086dfa5c863c7f2 (patch) | |
tree | 5bb022e5dc1dc57cd663afc9feec33e6b5039bb1 /python/fatcat/search.py | |
parent | 416117af51592b2a60b317427dc034544347b435 (diff) | |
download | fatcat-3075f0ab8853fd97c68d3f0b8086dfa5c863c7f2.tar.gz fatcat-3075f0ab8853fd97c68d3f0b8086dfa5c863c7f2.zip |
copy some of paper-search over
Diffstat (limited to 'python/fatcat/search.py')
-rw-r--r-- | python/fatcat/search.py | 49 |
1 files changed, 49 insertions, 0 deletions
diff --git a/python/fatcat/search.py b/python/fatcat/search.py new file mode 100644 index 00000000..7ed7a99c --- /dev/null +++ b/python/fatcat/search.py @@ -0,0 +1,49 @@ + +import requests +from flask import abort +from fatcat import app + + +def do_search(q, limit=20): + + print("Search hit: " + q) + if limit > 100: + # Sanity check + limit = 100 + + search_request = { + "query": { + "query_string": { + "query": q, + "analyzer": "textIcuSearch", + "default_operator": "AND", + "analyze_wildcard": True, + "lenient": True, + "auto_generate_phrase_queries": True, + "default_field": "_all", + }, + }, + "size": int(limit), + } + + resp = requests.get("%s/%s/_search" % + (app.config['ELASTIC_BACKEND'], app.config['ELASTIC_INDEX']), + json=search_request) + + if resp.status_code != 200: + print("elasticsearch non-200 status code: " + str(resp.status_code)) + print(resp.content) + abort(resp.status_code) + + content = resp.json() + results = [h['_source'] for h in content['hits']['hits']] + for h in results: + # Ensure 'authors' is a list, not a single string + if type(h['authors']) is not list: + h['authors'] = [h['authors'], ] + + found = content['hits']['total'] + return {"query": { "q": q }, + "count_returned": len(results), + "count_found": found, + "results": results } |