diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2018-09-26 22:44:58 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2018-09-26 22:44:58 -0700 |
commit | 996603e9bf874f066f7d13f6b91977bd2d34d0c1 (patch) | |
tree | 554e0e654b173eee226328a93df72f0fb58c939f | |
parent | 25482954f473061ef3d480512a4b26c119036874 (diff) | |
download | fatcat-996603e9bf874f066f7d13f6b91977bd2d34d0c1.tar.gz fatcat-996603e9bf874f066f7d13f6b91977bd2d34d0c1.zip |
switch search to new elastic schema (and endpoint)
-rw-r--r-- | python/config.py | 4 | ||||
-rw-r--r-- | python/fatcat/routes.py | 4 | ||||
-rw-r--r-- | python/fatcat/search.py | 22 |
3 files changed, 13 insertions, 17 deletions
diff --git a/python/config.py b/python/config.py index cb2c7bf0..3d6db049 100644 --- a/python/config.py +++ b/python/config.py @@ -11,8 +11,8 @@ class Config(object): GIT_REVISION = subprocess.check_output(["git", "describe", "--always"]).strip() # This is, effectively, the QA/PROD flag FATCAT_DOMAIN = "qa.fatcat.wiki" - ELASTIC_INDEX = "crossref-works" - ELASTIC_BACKEND = "http://search.{}:8088".format(FATCAT_DOMAIN) + ELASTIC_BACKEND = "https://search.fatcat.wiki" + ELASTIC_INDEX = "fatcat" # "Event more verbose" debug options. SECRET_KEY is bogus. #SQLALCHEMY_ECHO = True diff --git a/python/fatcat/routes.py b/python/fatcat/routes.py index 965040f5..801397e8 100644 --- a/python/fatcat/routes.py +++ b/python/fatcat/routes.py @@ -313,10 +313,6 @@ def search(): if word.startswith("10.") and word.count("/") >= 1: query = query.replace(word, 'doi:"{}"'.format(word)) - # Convert "author:" query to "authors:" - if query is not None: - query = query.replace("author:", "authors:") - if 'q' in request.args.keys(): # always do files for HTML found = do_search(query, limit=limit) diff --git a/python/fatcat/search.py b/python/fatcat/search.py index 7ed7a99c..959bb85f 100644 --- a/python/fatcat/search.py +++ b/python/fatcat/search.py @@ -14,14 +14,13 @@ def do_search(q, limit=20): search_request = { "query": { "query_string": { - "query": q, - "analyzer": "textIcuSearch", - "default_operator": "AND", - "analyze_wildcard": True, - "lenient": True, - "auto_generate_phrase_queries": True, - "default_field": "_all", - }, + "query": q, + "analyzer": "textIcuSearch", + "default_operator": "AND", + "analyze_wildcard": True, + "lenient": True, + "fields": ["title^5", "contrib_names^2", "container_title"] + } }, "size": int(limit), } @@ -36,11 +35,12 @@ def do_search(q, limit=20): abort(resp.status_code) content = resp.json() + print(content) results = [h['_source'] for h in content['hits']['hits']] for h in results: - # Ensure 'authors' is a list, not a single string - if type(h['authors']) is not list: - h['authors'] = [h['authors'], ] + # Ensure 'contrib_names' is a list, not a single string + if type(h['contrib_names']) is not list: + h['contrib_names'] = [h['contrib_names'], ] found = content['hits']['total'] return {"query": { "q": q }, |