diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-02-20 19:21:19 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-02-20 19:21:23 -0800 |
commit | 7c04b83a6612b7a0c87afe4a1ed4bbb65568fea1 (patch) | |
tree | 96eca26306465d8801d3bbb2eda62e611afb97c6 | |
parent | 15ad67e4cd44c54a0f7a06f0eb0448d75c9ad1b6 (diff) | |
download | fatcat-7c04b83a6612b7a0c87afe4a1ed4bbb65568fea1.tar.gz fatcat-7c04b83a6612b7a0c87afe4a1ed4bbb65568fea1.zip |
add container search
And tweak release search a bit: DOIs aren't auto-replaced unless they
are the only word/query
This query code is very duplicative and should be refactored
-rw-r--r-- | python/fatcat_web/routes.py | 30 | ||||
-rw-r--r-- | python/fatcat_web/search.py | 55 | ||||
-rw-r--r-- | python/fatcat_web/templates/container_search.html | 49 |
3 files changed, 126 insertions, 8 deletions
diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py index f9faf328..7f10ee2b 100644 --- a/python/fatcat_web/routes.py +++ b/python/fatcat_web/routes.py @@ -7,7 +7,7 @@ from flask_login import login_required from fatcat_web import app, api, auth_api, priv_api from fatcat_web.auth import handle_token_login, handle_logout, load_user, handle_ia_xauth from fatcat_client.rest import ApiException -from fatcat_web.search import do_search +from fatcat_web.search import do_release_search, do_container_search from fatcat_tools.transforms import * @@ -351,7 +351,7 @@ def changelog_entry_view(index): ### Search ################################################################## @app.route('/release/search', methods=['GET', 'POST']) -def search(): +def release_search(): limit = 20 query = request.args.get('q') @@ -359,18 +359,34 @@ def search(): # Convert raw DOIs to DOI queries if query is not None: - oldquery = query.split() - for word in oldquery: - if word.startswith("10.") and word.count("/") >= 1: - query = query.replace(word, 'doi:"{}"'.format(word)) + if len(query.split()) == 1 and query.startswith("10.") and query.count("/") >= 1: + query = 'doi:"{}"'.format(query) if 'q' in request.args.keys(): # always do files for HTML - found = do_search(query, limit=limit, fulltext_only=fulltext_only) + found = do_release_search(query, limit=limit, fulltext_only=fulltext_only) return render_template('release_search.html', found=found, query=query, fulltext_only=fulltext_only) else: return render_template('release_search.html', query=query, fulltext_only=fulltext_only) +@app.route('/container/search', methods=['GET', 'POST']) +def container_search(): + + limit = 20 + query = request.args.get('q') + + # Convert raw ISSN-L to ISSN-L query + if query is not None: + if len(query.split()) == 1 and len(query) == 9 and isdigit(query[0:4]) and query[4] == '-': + query = 'issnl:"{}"'.format(query) + + if 'q' in request.args.keys(): + # always do files for HTML + found = do_container_search(query, limit=limit) + return render_template('container_search.html', found=found, query=query) + else: + return render_template('container_search.html', query=query) + ### Auth #################################################################### diff --git a/python/fatcat_web/search.py b/python/fatcat_web/search.py index 5c278c21..a301fcb5 100644 --- a/python/fatcat_web/search.py +++ b/python/fatcat_web/search.py @@ -6,9 +6,11 @@ from fatcat_web import app """ Helpers for doing elasticsearch queries (used in the web interface; not part of the formal API) + +TODO: ELASTICSEARCH_*_INDEX should probably be factored out and just hard-coded """ -def do_search(q, limit=50, fulltext_only=True): +def do_release_search(q, limit=50, fulltext_only=True): #print("Search hit: " + q) if limit > 100: @@ -65,3 +67,54 @@ def do_search(q, limit=50, fulltext_only=True): "count_returned": len(results), "count_found": found, "results": results } + +def do_container_search(q, limit=50): + + #print("Search hit: " + q) + if limit > 100: + # Sanity check + limit = 100 + + search_request = { + "query": { + "query_string": { + "query": q, + "default_operator": "AND", + "analyze_wildcard": True, + "lenient": True, + "fields": ["name^5", "publisher"] + }, + }, + "size": int(limit), + } + + #print(search_request) + resp = requests.get("%s/%s/_search" % + (app.config['ELASTICSEARCH_BACKEND'], app.config['ELASTICSEARCH_CONTAINER_INDEX']), + json=search_request) + + if resp.status_code == 400: + print("elasticsearch 400: " + str(resp.content)) + flash("Search query failed to parse; you might need to use quotes.<p><code>{}</code>".format(resp.content)) + abort(resp.status_code) + elif resp.status_code != 200: + print("elasticsearch non-200 status code: " + str(resp.status_code)) + print(resp.content) + abort(resp.status_code) + + content = resp.json() + #print(content) + results = [h['_source'] for h in content['hits']['hits']] + for h in results: + # Handle surrogate strings that elasticsearch returns sometimes, + # probably due to mangled data processing in some pipeline. + # "Crimes against Unicode"; production workaround + for key in h: + if type(h[key]) is str: + h[key] = h[key].encode('utf8', 'ignore').decode('utf8') + + found = content['hits']['total'] + return {"query": { "q": q }, + "count_returned": len(results), + "count_found": found, + "results": results } diff --git a/python/fatcat_web/templates/container_search.html b/python/fatcat_web/templates/container_search.html new file mode 100644 index 00000000..9fc7aa98 --- /dev/null +++ b/python/fatcat_web/templates/container_search.html @@ -0,0 +1,49 @@ +{% extends "base.html" %} +{% block body %} + +<h1>Journal/Conference Search</h1> +<form class="" role="search" action="/container/search" method="get"> + <div class="ui form"> + <div class="ui action input huge fluid"> + <input type="text" placeholder="Query..." name="q" value="{% if query %}{{ query }}{% endif %}" aria-label="search container metadata"> + <button class="ui button">Search</button> + </div> + </div> +</form> + +<br clear="all" /> + +{% if found %} +{% if found.results %} + <i>Showing top {{ found.count_returned }} out of {{ found.count_found }} results for: <code>{{ found.query.q }}</code></i> +{% for entity in found.results %} +<div> + <h4 style="margin-top: 1em; margin-bottom: 4px; font-size: 1.1em;"> + <a href="/container/{{ entity.ident }}" style="color: #2224c7;">{{ entity['name'] }}</a> + {% if entity.is_oa %}<i class="icon unlock orange small"></i>{% endif %} + </h4> + {% if entity.publisher %} + <h5 style="margin-top: 4px; margin-bottom: 4px; font-size: 1em;">{{ entity.publisher }}</h5> + {% endif %} + {% if entity.issnl %} + <a href="https://issn.org/{{entity.issnl }}" style="color: green;">ISSN {{ entity.issnl }}</a> + {% endif %} + {% if entity.container_type %} + {{ entity.container_type }} + {% endif %} +</div> +{% endfor %} +{% else %} +<div class="featurette-inner text-center" style="padding-top: 15%;"> + <h3>No results found!</h3> + <i>Query was: <code>{{ found.query.q }}</code></i> + <br/> + <p>Try:</p> + <ul> + <li>Search <a href="https://scholar.google.com/scholar?q={{ found.query.q | urlencode }}">Google Scholar</a></li> + </ul> +</div> +{% endif %} +{% endif %} + +{% endblock %} |