diff options
Diffstat (limited to 'python/fatcat_web')
| -rw-r--r-- | python/fatcat_web/routes.py | 30 | ||||
| -rw-r--r-- | python/fatcat_web/search.py | 55 | ||||
| -rw-r--r-- | python/fatcat_web/templates/container_search.html | 49 | 
3 files changed, 126 insertions, 8 deletions
| diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py index f9faf328..7f10ee2b 100644 --- a/python/fatcat_web/routes.py +++ b/python/fatcat_web/routes.py @@ -7,7 +7,7 @@ from flask_login import login_required  from fatcat_web import app, api, auth_api, priv_api  from fatcat_web.auth import handle_token_login, handle_logout, load_user, handle_ia_xauth  from fatcat_client.rest import ApiException -from fatcat_web.search import do_search +from fatcat_web.search import do_release_search, do_container_search  from fatcat_tools.transforms import * @@ -351,7 +351,7 @@ def changelog_entry_view(index):  ### Search ##################################################################  @app.route('/release/search', methods=['GET', 'POST']) -def search(): +def release_search():      limit = 20      query = request.args.get('q') @@ -359,18 +359,34 @@ def search():      # Convert raw DOIs to DOI queries      if query is not None: -        oldquery = query.split() -        for word in oldquery: -            if word.startswith("10.") and word.count("/") >= 1: -                query = query.replace(word, 'doi:"{}"'.format(word)) +        if len(query.split()) == 1 and query.startswith("10.") and query.count("/") >= 1: +            query = 'doi:"{}"'.format(query)      if 'q' in request.args.keys():          # always do files for HTML -        found = do_search(query, limit=limit, fulltext_only=fulltext_only) +        found = do_release_search(query, limit=limit, fulltext_only=fulltext_only)          return render_template('release_search.html', found=found, query=query, fulltext_only=fulltext_only)      else:          return render_template('release_search.html', query=query, fulltext_only=fulltext_only) +@app.route('/container/search', methods=['GET', 'POST']) +def container_search(): + +    limit = 20 +    query = request.args.get('q') + +    # Convert raw ISSN-L to ISSN-L query +    if query is not None: +        if len(query.split()) == 1 and len(query) == 9 and isdigit(query[0:4]) and query[4] == '-': +            query = 'issnl:"{}"'.format(query) + +    if 'q' in request.args.keys(): +        # always do files for HTML +        found = do_container_search(query, limit=limit) +        return render_template('container_search.html', found=found, query=query) +    else: +        return render_template('container_search.html', query=query) +  ### Auth #################################################################### diff --git a/python/fatcat_web/search.py b/python/fatcat_web/search.py index 5c278c21..a301fcb5 100644 --- a/python/fatcat_web/search.py +++ b/python/fatcat_web/search.py @@ -6,9 +6,11 @@ from fatcat_web import app  """  Helpers for doing elasticsearch queries (used in the web interface; not part of  the formal API) + +TODO: ELASTICSEARCH_*_INDEX should probably be factored out and just hard-coded  """ -def do_search(q, limit=50, fulltext_only=True): +def do_release_search(q, limit=50, fulltext_only=True):      #print("Search hit: " + q)      if limit > 100: @@ -65,3 +67,54 @@ def do_search(q, limit=50, fulltext_only=True):              "count_returned": len(results),              "count_found": found,              "results": results } + +def do_container_search(q, limit=50): + +    #print("Search hit: " + q) +    if limit > 100: +        # Sanity check +        limit = 100 + +    search_request = { +        "query": { +            "query_string": { +            "query": q, +            "default_operator": "AND", +            "analyze_wildcard": True, +            "lenient": True, +            "fields": ["name^5", "publisher"] +            }, +        }, +        "size": int(limit), +    } + +    #print(search_request) +    resp = requests.get("%s/%s/_search" % +            (app.config['ELASTICSEARCH_BACKEND'], app.config['ELASTICSEARCH_CONTAINER_INDEX']), +        json=search_request) + +    if resp.status_code == 400: +        print("elasticsearch 400: " + str(resp.content)) +        flash("Search query failed to parse; you might need to use quotes.<p><code>{}</code>".format(resp.content)) +        abort(resp.status_code) +    elif resp.status_code != 200: +        print("elasticsearch non-200 status code: " + str(resp.status_code)) +        print(resp.content) +        abort(resp.status_code) + +    content = resp.json() +    #print(content) +    results = [h['_source'] for h in content['hits']['hits']] +    for h in results: +        # Handle surrogate strings that elasticsearch returns sometimes, +        # probably due to mangled data processing in some pipeline. +        # "Crimes against Unicode"; production workaround +        for key in h: +            if type(h[key]) is str: +                h[key] = h[key].encode('utf8', 'ignore').decode('utf8') + +    found = content['hits']['total'] +    return {"query": { "q": q }, +            "count_returned": len(results), +            "count_found": found, +            "results": results } diff --git a/python/fatcat_web/templates/container_search.html b/python/fatcat_web/templates/container_search.html new file mode 100644 index 00000000..9fc7aa98 --- /dev/null +++ b/python/fatcat_web/templates/container_search.html @@ -0,0 +1,49 @@ +{% extends "base.html" %} +{% block body %} + +<h1>Journal/Conference Search</h1> +<form class="" role="search" action="/container/search" method="get"> +  <div class="ui form"> +    <div class="ui action input huge fluid"> +      <input type="text" placeholder="Query..." name="q" value="{% if query %}{{ query }}{% endif %}" aria-label="search container metadata"> +      <button class="ui button">Search</button> +    </div> +  </div> +</form> + +<br clear="all" /> + +{% if found %} +{% if found.results %} +  <i>Showing top {{ found.count_returned }} out of {{ found.count_found }} results for: <code>{{ found.query.q }}</code></i> +{% for entity in found.results %} +<div> +  <h4 style="margin-top: 1em; margin-bottom: 4px; font-size: 1.1em;"> +    <a href="/container/{{ entity.ident }}" style="color: #2224c7;">{{ entity['name'] }}</a> +    {% if entity.is_oa %}<i class="icon unlock orange small"></i>{% endif %} +  </h4> +  {% if entity.publisher %} +    <h5 style="margin-top: 4px; margin-bottom: 4px; font-size: 1em;">{{ entity.publisher }}</h5> +  {% endif %} +  {% if entity.issnl %} +    <a href="https://issn.org/{{entity.issnl }}" style="color: green;">ISSN {{ entity.issnl }}</a> +  {% endif %} +  {% if entity.container_type %} +   {{ entity.container_type }} +  {% endif %} +</div> +{% endfor %} +{% else %} +<div class="featurette-inner text-center" style="padding-top: 15%;"> +  <h3>No results found!</h3> +  <i>Query was: <code>{{ found.query.q }}</code></i> +  <br/> +  <p>Try:</p> +  <ul> +    <li>Search <a href="https://scholar.google.com/scholar?q={{ found.query.q | urlencode }}">Google Scholar</a></li> +  </ul> +</div> +{% endif %} +{% endif %} + +{% endblock %} | 
