aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-02-20 19:21:19 -0800
committerBryan Newbold <bnewbold@robocracy.org>2019-02-20 19:21:23 -0800
commit7c04b83a6612b7a0c87afe4a1ed4bbb65568fea1 (patch)
tree96eca26306465d8801d3bbb2eda62e611afb97c6
parent15ad67e4cd44c54a0f7a06f0eb0448d75c9ad1b6 (diff)
downloadfatcat-7c04b83a6612b7a0c87afe4a1ed4bbb65568fea1.tar.gz
fatcat-7c04b83a6612b7a0c87afe4a1ed4bbb65568fea1.zip
add container search
And tweak release search a bit: DOIs aren't auto-replaced unless they are the only word/query This query code is very duplicative and should be refactored
-rw-r--r--python/fatcat_web/routes.py30
-rw-r--r--python/fatcat_web/search.py55
-rw-r--r--python/fatcat_web/templates/container_search.html49
3 files changed, 126 insertions, 8 deletions
diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py
index f9faf328..7f10ee2b 100644
--- a/python/fatcat_web/routes.py
+++ b/python/fatcat_web/routes.py
@@ -7,7 +7,7 @@ from flask_login import login_required
from fatcat_web import app, api, auth_api, priv_api
from fatcat_web.auth import handle_token_login, handle_logout, load_user, handle_ia_xauth
from fatcat_client.rest import ApiException
-from fatcat_web.search import do_search
+from fatcat_web.search import do_release_search, do_container_search
from fatcat_tools.transforms import *
@@ -351,7 +351,7 @@ def changelog_entry_view(index):
### Search ##################################################################
@app.route('/release/search', methods=['GET', 'POST'])
-def search():
+def release_search():
limit = 20
query = request.args.get('q')
@@ -359,18 +359,34 @@ def search():
# Convert raw DOIs to DOI queries
if query is not None:
- oldquery = query.split()
- for word in oldquery:
- if word.startswith("10.") and word.count("/") >= 1:
- query = query.replace(word, 'doi:"{}"'.format(word))
+ if len(query.split()) == 1 and query.startswith("10.") and query.count("/") >= 1:
+ query = 'doi:"{}"'.format(query)
if 'q' in request.args.keys():
# always do files for HTML
- found = do_search(query, limit=limit, fulltext_only=fulltext_only)
+ found = do_release_search(query, limit=limit, fulltext_only=fulltext_only)
return render_template('release_search.html', found=found, query=query, fulltext_only=fulltext_only)
else:
return render_template('release_search.html', query=query, fulltext_only=fulltext_only)
+@app.route('/container/search', methods=['GET', 'POST'])
+def container_search():
+
+ limit = 20
+ query = request.args.get('q')
+
+ # Convert raw ISSN-L to ISSN-L query
+ if query is not None:
+ if len(query.split()) == 1 and len(query) == 9 and isdigit(query[0:4]) and query[4] == '-':
+ query = 'issnl:"{}"'.format(query)
+
+ if 'q' in request.args.keys():
+ # always do files for HTML
+ found = do_container_search(query, limit=limit)
+ return render_template('container_search.html', found=found, query=query)
+ else:
+ return render_template('container_search.html', query=query)
+
### Auth ####################################################################
diff --git a/python/fatcat_web/search.py b/python/fatcat_web/search.py
index 5c278c21..a301fcb5 100644
--- a/python/fatcat_web/search.py
+++ b/python/fatcat_web/search.py
@@ -6,9 +6,11 @@ from fatcat_web import app
"""
Helpers for doing elasticsearch queries (used in the web interface; not part of
the formal API)
+
+TODO: ELASTICSEARCH_*_INDEX should probably be factored out and just hard-coded
"""
-def do_search(q, limit=50, fulltext_only=True):
+def do_release_search(q, limit=50, fulltext_only=True):
#print("Search hit: " + q)
if limit > 100:
@@ -65,3 +67,54 @@ def do_search(q, limit=50, fulltext_only=True):
"count_returned": len(results),
"count_found": found,
"results": results }
+
+def do_container_search(q, limit=50):
+
+ #print("Search hit: " + q)
+ if limit > 100:
+ # Sanity check
+ limit = 100
+
+ search_request = {
+ "query": {
+ "query_string": {
+ "query": q,
+ "default_operator": "AND",
+ "analyze_wildcard": True,
+ "lenient": True,
+ "fields": ["name^5", "publisher"]
+ },
+ },
+ "size": int(limit),
+ }
+
+ #print(search_request)
+ resp = requests.get("%s/%s/_search" %
+ (app.config['ELASTICSEARCH_BACKEND'], app.config['ELASTICSEARCH_CONTAINER_INDEX']),
+ json=search_request)
+
+ if resp.status_code == 400:
+ print("elasticsearch 400: " + str(resp.content))
+ flash("Search query failed to parse; you might need to use quotes.<p><code>{}</code>".format(resp.content))
+ abort(resp.status_code)
+ elif resp.status_code != 200:
+ print("elasticsearch non-200 status code: " + str(resp.status_code))
+ print(resp.content)
+ abort(resp.status_code)
+
+ content = resp.json()
+ #print(content)
+ results = [h['_source'] for h in content['hits']['hits']]
+ for h in results:
+ # Handle surrogate strings that elasticsearch returns sometimes,
+ # probably due to mangled data processing in some pipeline.
+ # "Crimes against Unicode"; production workaround
+ for key in h:
+ if type(h[key]) is str:
+ h[key] = h[key].encode('utf8', 'ignore').decode('utf8')
+
+ found = content['hits']['total']
+ return {"query": { "q": q },
+ "count_returned": len(results),
+ "count_found": found,
+ "results": results }
diff --git a/python/fatcat_web/templates/container_search.html b/python/fatcat_web/templates/container_search.html
new file mode 100644
index 00000000..9fc7aa98
--- /dev/null
+++ b/python/fatcat_web/templates/container_search.html
@@ -0,0 +1,49 @@
+{% extends "base.html" %}
+{% block body %}
+
+<h1>Journal/Conference Search</h1>
+<form class="" role="search" action="/container/search" method="get">
+ <div class="ui form">
+ <div class="ui action input huge fluid">
+ <input type="text" placeholder="Query..." name="q" value="{% if query %}{{ query }}{% endif %}" aria-label="search container metadata">
+ <button class="ui button">Search</button>
+ </div>
+ </div>
+</form>
+
+<br clear="all" />
+
+{% if found %}
+{% if found.results %}
+ <i>Showing top {{ found.count_returned }} out of {{ found.count_found }} results for: <code>{{ found.query.q }}</code></i>
+{% for entity in found.results %}
+<div>
+ <h4 style="margin-top: 1em; margin-bottom: 4px; font-size: 1.1em;">
+ <a href="/container/{{ entity.ident }}" style="color: #2224c7;">{{ entity['name'] }}</a>
+ {% if entity.is_oa %}<i class="icon unlock orange small"></i>{% endif %}
+ </h4>
+ {% if entity.publisher %}
+ <h5 style="margin-top: 4px; margin-bottom: 4px; font-size: 1em;">{{ entity.publisher }}</h5>
+ {% endif %}
+ {% if entity.issnl %}
+ <a href="https://issn.org/{{entity.issnl }}" style="color: green;">ISSN {{ entity.issnl }}</a>
+ {% endif %}
+ {% if entity.container_type %}
+ &nbsp;{{ entity.container_type }}
+ {% endif %}
+</div>
+{% endfor %}
+{% else %}
+<div class="featurette-inner text-center" style="padding-top: 15%;">
+ <h3>No results found!</h3>
+ <i>Query was: <code>{{ found.query.q }}</code></i>
+ <br/>
+ <p>Try:</p>
+ <ul>
+ <li>Search <a href="https://scholar.google.com/scholar?q={{ found.query.q | urlencode }}">Google Scholar</a></li>
+ </ul>
+</div>
+{% endif %}
+{% endif %}
+
+{% endblock %}