From 996603e9bf874f066f7d13f6b91977bd2d34d0c1 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 26 Sep 2018 22:44:58 -0700 Subject: switch search to new elastic schema (and endpoint) --- python/config.py | 4 ++-- python/fatcat/routes.py | 4 ---- python/fatcat/search.py | 22 +++++++++++----------- 3 files changed, 13 insertions(+), 17 deletions(-) diff --git a/python/config.py b/python/config.py index cb2c7bf0..3d6db049 100644 --- a/python/config.py +++ b/python/config.py @@ -11,8 +11,8 @@ class Config(object): GIT_REVISION = subprocess.check_output(["git", "describe", "--always"]).strip() # This is, effectively, the QA/PROD flag FATCAT_DOMAIN = "qa.fatcat.wiki" - ELASTIC_INDEX = "crossref-works" - ELASTIC_BACKEND = "http://search.{}:8088".format(FATCAT_DOMAIN) + ELASTIC_BACKEND = "https://search.fatcat.wiki" + ELASTIC_INDEX = "fatcat" # "Event more verbose" debug options. SECRET_KEY is bogus. #SQLALCHEMY_ECHO = True diff --git a/python/fatcat/routes.py b/python/fatcat/routes.py index 965040f5..801397e8 100644 --- a/python/fatcat/routes.py +++ b/python/fatcat/routes.py @@ -313,10 +313,6 @@ def search(): if word.startswith("10.") and word.count("/") >= 1: query = query.replace(word, 'doi:"{}"'.format(word)) - # Convert "author:" query to "authors:" - if query is not None: - query = query.replace("author:", "authors:") - if 'q' in request.args.keys(): # always do files for HTML found = do_search(query, limit=limit) diff --git a/python/fatcat/search.py b/python/fatcat/search.py index 7ed7a99c..959bb85f 100644 --- a/python/fatcat/search.py +++ b/python/fatcat/search.py @@ -14,14 +14,13 @@ def do_search(q, limit=20): search_request = { "query": { "query_string": { - "query": q, - "analyzer": "textIcuSearch", - "default_operator": "AND", - "analyze_wildcard": True, - "lenient": True, - "auto_generate_phrase_queries": True, - "default_field": "_all", - }, + "query": q, + "analyzer": "textIcuSearch", + "default_operator": "AND", + "analyze_wildcard": True, + "lenient": True, + "fields": ["title^5", "contrib_names^2", "container_title"] + } }, "size": int(limit), } @@ -36,11 +35,12 @@ def do_search(q, limit=20): abort(resp.status_code) content = resp.json() + print(content) results = [h['_source'] for h in content['hits']['hits']] for h in results: - # Ensure 'authors' is a list, not a single string - if type(h['authors']) is not list: - h['authors'] = [h['authors'], ] + # Ensure 'contrib_names' is a list, not a single string + if type(h['contrib_names']) is not list: + h['contrib_names'] = [h['contrib_names'], ] found = content['hits']['total'] return {"query": { "q": q }, -- cgit v1.2.3