aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2018-09-26 22:44:58 -0700
committerBryan Newbold <bnewbold@robocracy.org>2018-09-26 22:44:58 -0700
commit996603e9bf874f066f7d13f6b91977bd2d34d0c1 (patch)
tree554e0e654b173eee226328a93df72f0fb58c939f
parent25482954f473061ef3d480512a4b26c119036874 (diff)
downloadfatcat-996603e9bf874f066f7d13f6b91977bd2d34d0c1.tar.gz
fatcat-996603e9bf874f066f7d13f6b91977bd2d34d0c1.zip
switch search to new elastic schema (and endpoint)
-rw-r--r--python/config.py4
-rw-r--r--python/fatcat/routes.py4
-rw-r--r--python/fatcat/search.py22
3 files changed, 13 insertions, 17 deletions
diff --git a/python/config.py b/python/config.py
index cb2c7bf0..3d6db049 100644
--- a/python/config.py
+++ b/python/config.py
@@ -11,8 +11,8 @@ class Config(object):
GIT_REVISION = subprocess.check_output(["git", "describe", "--always"]).strip()
# This is, effectively, the QA/PROD flag
FATCAT_DOMAIN = "qa.fatcat.wiki"
- ELASTIC_INDEX = "crossref-works"
- ELASTIC_BACKEND = "http://search.{}:8088".format(FATCAT_DOMAIN)
+ ELASTIC_BACKEND = "https://search.fatcat.wiki"
+ ELASTIC_INDEX = "fatcat"
# "Event more verbose" debug options. SECRET_KEY is bogus.
#SQLALCHEMY_ECHO = True
diff --git a/python/fatcat/routes.py b/python/fatcat/routes.py
index 965040f5..801397e8 100644
--- a/python/fatcat/routes.py
+++ b/python/fatcat/routes.py
@@ -313,10 +313,6 @@ def search():
if word.startswith("10.") and word.count("/") >= 1:
query = query.replace(word, 'doi:"{}"'.format(word))
- # Convert "author:" query to "authors:"
- if query is not None:
- query = query.replace("author:", "authors:")
-
if 'q' in request.args.keys():
# always do files for HTML
found = do_search(query, limit=limit)
diff --git a/python/fatcat/search.py b/python/fatcat/search.py
index 7ed7a99c..959bb85f 100644
--- a/python/fatcat/search.py
+++ b/python/fatcat/search.py
@@ -14,14 +14,13 @@ def do_search(q, limit=20):
search_request = {
"query": {
"query_string": {
- "query": q,
- "analyzer": "textIcuSearch",
- "default_operator": "AND",
- "analyze_wildcard": True,
- "lenient": True,
- "auto_generate_phrase_queries": True,
- "default_field": "_all",
- },
+ "query": q,
+ "analyzer": "textIcuSearch",
+ "default_operator": "AND",
+ "analyze_wildcard": True,
+ "lenient": True,
+ "fields": ["title^5", "contrib_names^2", "container_title"]
+ }
},
"size": int(limit),
}
@@ -36,11 +35,12 @@ def do_search(q, limit=20):
abort(resp.status_code)
content = resp.json()
+ print(content)
results = [h['_source'] for h in content['hits']['hits']]
for h in results:
- # Ensure 'authors' is a list, not a single string
- if type(h['authors']) is not list:
- h['authors'] = [h['authors'], ]
+ # Ensure 'contrib_names' is a list, not a single string
+ if type(h['contrib_names']) is not list:
+ h['contrib_names'] = [h['contrib_names'], ]
found = content['hits']['total']
return {"query": { "q": q },