diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2018-09-26 22:44:58 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2018-09-26 22:44:58 -0700 |
commit | 996603e9bf874f066f7d13f6b91977bd2d34d0c1 (patch) | |
tree | 554e0e654b173eee226328a93df72f0fb58c939f /python/fatcat | |
parent | 25482954f473061ef3d480512a4b26c119036874 (diff) | |
download | fatcat-996603e9bf874f066f7d13f6b91977bd2d34d0c1.tar.gz fatcat-996603e9bf874f066f7d13f6b91977bd2d34d0c1.zip |
switch search to new elastic schema (and endpoint)
Diffstat (limited to 'python/fatcat')
-rw-r--r-- | python/fatcat/routes.py | 4 | ||||
-rw-r--r-- | python/fatcat/search.py | 22 |
2 files changed, 11 insertions, 15 deletions
diff --git a/python/fatcat/routes.py b/python/fatcat/routes.py index 965040f5..801397e8 100644 --- a/python/fatcat/routes.py +++ b/python/fatcat/routes.py @@ -313,10 +313,6 @@ def search(): if word.startswith("10.") and word.count("/") >= 1: query = query.replace(word, 'doi:"{}"'.format(word)) - # Convert "author:" query to "authors:" - if query is not None: - query = query.replace("author:", "authors:") - if 'q' in request.args.keys(): # always do files for HTML found = do_search(query, limit=limit) diff --git a/python/fatcat/search.py b/python/fatcat/search.py index 7ed7a99c..959bb85f 100644 --- a/python/fatcat/search.py +++ b/python/fatcat/search.py @@ -14,14 +14,13 @@ def do_search(q, limit=20): search_request = { "query": { "query_string": { - "query": q, - "analyzer": "textIcuSearch", - "default_operator": "AND", - "analyze_wildcard": True, - "lenient": True, - "auto_generate_phrase_queries": True, - "default_field": "_all", - }, + "query": q, + "analyzer": "textIcuSearch", + "default_operator": "AND", + "analyze_wildcard": True, + "lenient": True, + "fields": ["title^5", "contrib_names^2", "container_title"] + } }, "size": int(limit), } @@ -36,11 +35,12 @@ def do_search(q, limit=20): abort(resp.status_code) content = resp.json() + print(content) results = [h['_source'] for h in content['hits']['hits']] for h in results: - # Ensure 'authors' is a list, not a single string - if type(h['authors']) is not list: - h['authors'] = [h['authors'], ] + # Ensure 'contrib_names' is a list, not a single string + if type(h['contrib_names']) is not list: + h['contrib_names'] = [h['contrib_names'], ] found = content['hits']['total'] return {"query": { "q": q }, |