From 7c04b83a6612b7a0c87afe4a1ed4bbb65568fea1 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 20 Feb 2019 19:21:19 -0800 Subject: add container search And tweak release search a bit: DOIs aren't auto-replaced unless they are the only word/query This query code is very duplicative and should be refactored --- python/fatcat_web/routes.py | 30 ++++++++++--- python/fatcat_web/search.py | 55 ++++++++++++++++++++++- python/fatcat_web/templates/container_search.html | 49 ++++++++++++++++++++ 3 files changed, 126 insertions(+), 8 deletions(-) create mode 100644 python/fatcat_web/templates/container_search.html diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py index f9faf328..7f10ee2b 100644 --- a/python/fatcat_web/routes.py +++ b/python/fatcat_web/routes.py @@ -7,7 +7,7 @@ from flask_login import login_required from fatcat_web import app, api, auth_api, priv_api from fatcat_web.auth import handle_token_login, handle_logout, load_user, handle_ia_xauth from fatcat_client.rest import ApiException -from fatcat_web.search import do_search +from fatcat_web.search import do_release_search, do_container_search from fatcat_tools.transforms import * @@ -351,7 +351,7 @@ def changelog_entry_view(index): ### Search ################################################################## @app.route('/release/search', methods=['GET', 'POST']) -def search(): +def release_search(): limit = 20 query = request.args.get('q') @@ -359,18 +359,34 @@ def search(): # Convert raw DOIs to DOI queries if query is not None: - oldquery = query.split() - for word in oldquery: - if word.startswith("10.") and word.count("/") >= 1: - query = query.replace(word, 'doi:"{}"'.format(word)) + if len(query.split()) == 1 and query.startswith("10.") and query.count("/") >= 1: + query = 'doi:"{}"'.format(query) if 'q' in request.args.keys(): # always do files for HTML - found = do_search(query, limit=limit, fulltext_only=fulltext_only) + found = do_release_search(query, limit=limit, fulltext_only=fulltext_only) return render_template('release_search.html', found=found, query=query, fulltext_only=fulltext_only) else: return render_template('release_search.html', query=query, fulltext_only=fulltext_only) +@app.route('/container/search', methods=['GET', 'POST']) +def container_search(): + + limit = 20 + query = request.args.get('q') + + # Convert raw ISSN-L to ISSN-L query + if query is not None: + if len(query.split()) == 1 and len(query) == 9 and isdigit(query[0:4]) and query[4] == '-': + query = 'issnl:"{}"'.format(query) + + if 'q' in request.args.keys(): + # always do files for HTML + found = do_container_search(query, limit=limit) + return render_template('container_search.html', found=found, query=query) + else: + return render_template('container_search.html', query=query) + ### Auth #################################################################### diff --git a/python/fatcat_web/search.py b/python/fatcat_web/search.py index 5c278c21..a301fcb5 100644 --- a/python/fatcat_web/search.py +++ b/python/fatcat_web/search.py @@ -6,9 +6,11 @@ from fatcat_web import app """ Helpers for doing elasticsearch queries (used in the web interface; not part of the formal API) + +TODO: ELASTICSEARCH_*_INDEX should probably be factored out and just hard-coded """ -def do_search(q, limit=50, fulltext_only=True): +def do_release_search(q, limit=50, fulltext_only=True): #print("Search hit: " + q) if limit > 100: @@ -65,3 +67,54 @@ def do_search(q, limit=50, fulltext_only=True): "count_returned": len(results), "count_found": found, "results": results } + +def do_container_search(q, limit=50): + + #print("Search hit: " + q) + if limit > 100: + # Sanity check + limit = 100 + + search_request = { + "query": { + "query_string": { + "query": q, + "default_operator": "AND", + "analyze_wildcard": True, + "lenient": True, + "fields": ["name^5", "publisher"] + }, + }, + "size": int(limit), + } + + #print(search_request) + resp = requests.get("%s/%s/_search" % + (app.config['ELASTICSEARCH_BACKEND'], app.config['ELASTICSEARCH_CONTAINER_INDEX']), + json=search_request) + + if resp.status_code == 400: + print("elasticsearch 400: " + str(resp.content)) + flash("Search query failed to parse; you might need to use quotes.

{}".format(resp.content)) + abort(resp.status_code) + elif resp.status_code != 200: + print("elasticsearch non-200 status code: " + str(resp.status_code)) + print(resp.content) + abort(resp.status_code) + + content = resp.json() + #print(content) + results = [h['_source'] for h in content['hits']['hits']] + for h in results: + # Handle surrogate strings that elasticsearch returns sometimes, + # probably due to mangled data processing in some pipeline. + # "Crimes against Unicode"; production workaround + for key in h: + if type(h[key]) is str: + h[key] = h[key].encode('utf8', 'ignore').decode('utf8') + + found = content['hits']['total'] + return {"query": { "q": q }, + "count_returned": len(results), + "count_found": found, + "results": results } diff --git a/python/fatcat_web/templates/container_search.html b/python/fatcat_web/templates/container_search.html new file mode 100644 index 00000000..9fc7aa98 --- /dev/null +++ b/python/fatcat_web/templates/container_search.html @@ -0,0 +1,49 @@ +{% extends "base.html" %} +{% block body %} + +

Journal/Conference Search

+
+
+
+ + +
+
+
+ +
+ +{% if found %} +{% if found.results %} + Showing top {{ found.count_returned }} out of {{ found.count_found }} results for: {{ found.query.q }} +{% for entity in found.results %} +
+

+ {{ entity['name'] }} + {% if entity.is_oa %}{% endif %} +

+ {% if entity.publisher %} +
{{ entity.publisher }}
+ {% endif %} + {% if entity.issnl %} + ISSN {{ entity.issnl }} + {% endif %} + {% if entity.container_type %} +  {{ entity.container_type }} + {% endif %} +
+{% endfor %} +{% else %} +
+

No results found!

+ Query was: {{ found.query.q }} +
+

Try:

+ +
+{% endif %} +{% endif %} + +{% endblock %} -- cgit v1.2.3