diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-06-11 10:43:54 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-06-13 14:36:59 -0700 |
commit | 55ebdd8f06e73ef904e8c216a6e8a67f7fded8d2 (patch) | |
tree | 57f12af517a33c41f37de016d555add00bf56c58 /python | |
parent | 016315f69a03473625d4d8ea3c450eb814e26911 (diff) | |
download | fatcat-55ebdd8f06e73ef904e8c216a6e8a67f7fded8d2.tar.gz fatcat-55ebdd8f06e73ef904e8c216a6e8a67f7fded8d2.zip |
start work on 'generic' search box
Diffstat (limited to 'python')
-rw-r--r-- | python/fatcat_tools/normal.py | 95 | ||||
-rw-r--r-- | python/fatcat_web/routes.py | 30 | ||||
-rw-r--r-- | python/fatcat_web/templates/base.html | 2 | ||||
-rw-r--r-- | python/fatcat_web/templates/home.html | 6 |
4 files changed, 125 insertions, 8 deletions
diff --git a/python/fatcat_tools/normal.py b/python/fatcat_tools/normal.py new file mode 100644 index 00000000..044ab87d --- /dev/null +++ b/python/fatcat_tools/normal.py @@ -0,0 +1,95 @@ + +""" +A bunch of helpers to parse and normalize strings: external identifiers, +free-form input, titles, etc. +""" + +import re + + +def clean_doi(raw): + """ + Removes any: + - padding whitespace + - 'doi:' prefix + - URL prefix + + Does not try to un-URL-encode + + Returns None if not a valid DOI + """ + raw = raw.strip() + if len(raw.split()) != 1: + return None + if raw.startswith("doi:"): + raw = raw[4:] + if raw.startswith("http://"): + raw = raw[7:] + if raw.startswith("https://"): + raw = raw[8:] + if raw.startswith("doi.org/"): + raw = raw[8:] + if raw.startswith("dx.doi.org/"): + raw = raw[11:] + if not raw.startswith("10."): + return None + # TODO: actual regex + return raw + +def test_clean_doi(): + assert clean_doi("10.1234/asdf ") == "10.1234/asdf" + assert clean_doi("http://doi.org/10.1234/asdf ") == "10.1234/asdf" + assert clean_doi("https://dx.doi.org/10.1234/asdf ") == "10.1234/asdf" + assert clean_doi("doi:10.1234/asdf ") == "10.1234/asdf" + assert clean_doi("doi:10.1234/ asdf ") == None + +def clean_arxiv_id(raw): + """ + Removes any: + - 'arxiv:' prefix + + Works with versioned or un-versioned arxiv identifiers. + """ + pass + +def test_clean_arxiv_id(): + pass + +def clean_pmcid(raw): + raw = raw.strip() + if len(raw.split()) != 1: + return None + if raw.startswith("PMC") and raw[3:] and raw[3:].isdigit(): + return raw + return None + +def clean_sha1(raw): + raw = raw.strip() + if len(raw.split()) != 1: + return None + pass + +def clean_issn(raw): + raw = raw.strip() + if len(raw.split()) != 1: + return None + if len(raw) == 9 and raw[4] == "-" and raw[0:4].isdigit(): + return raw + return None + +def test_clean_issn(): + assert clean_issn("1234-4567") == "1234-4567" + assert clean_issn("134-4567") == None + assert clean_issn("123X-4567") == None + +def clean_isbn13(raw): + raw = raw.strip() + if len(raw.split()) != 1: + return None + return None + +def clean_orcid(raw): + raw = raw.strip() + if len(raw.split()) != 1: + return None + return None diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py index ebb70d90..61ba0029 100644 --- a/python/fatcat_web/routes.py +++ b/python/fatcat_web/routes.py @@ -9,6 +9,7 @@ from flask_wtf.csrf import CSRFError from fatcat_client import Editgroup, EditgroupAnnotation from fatcat_client.rest import ApiException from fatcat_tools.transforms import * +from fatcat_tools.normal import * from fatcat_web import app, api, auth_api, priv_api, mwoauth from fatcat_web.auth import handle_token_login, handle_logout, load_user, handle_ia_xauth, handle_wmoauth from fatcat_web.cors import crossdomain @@ -496,6 +497,31 @@ def reviewable_view(): ### Search ################################################################## +@app.route('/search', methods=['GET', 'POST']) +def generic_search(): + if not 'q' in request.args.keys(): + return redirect('/release/search') + query = request.args.get('q').strip() + + if len(query.split()) != 1: + # multi-term? must be a real search + return redirect(url_for('release_search', q=query)) + + if clean_doi(query): + return redirect(url_for('release_lookup', doi=clean_doi(query))) + if clean_pmcid(query): + return redirect(url_for('release_lookup', pmcid=clean_pmcid(query))) + if clean_sha1(query): + return redirect(url_for('file_lookup', sha1=clean_sha1(query))) + if clean_issn(query): + return redirect(url_for('container_lookup', issnl=clean_issn(query))) + if clean_isbn13(query): + return redirect(url_for('release_lookup', isbn13=clean_isbn13(query))) + if clean_orcid(query): + return redirect(url_for('creator_lookup', orcid=clean_orcid(query))) + + return redirect(url_for('release_search', q=query)) + @app.route('/release/search', methods=['GET', 'POST']) def release_search(): @@ -734,10 +760,6 @@ def page_about(): def page_rfc(): return render_template('rfc.html') -@app.route('/search', methods=['GET']) -def page_search_redirect(): - return redirect("/release/search") - @app.route('/robots.txt', methods=['GET']) def robots(): return send_from_directory(os.path.join(app.root_path, 'static'), diff --git a/python/fatcat_web/templates/base.html b/python/fatcat_web/templates/base.html index 9255c9c3..2ada551d 100644 --- a/python/fatcat_web/templates/base.html +++ b/python/fatcat_web/templates/base.html @@ -49,7 +49,7 @@ <a href="/changelog" class="item">Changelog</a> <div class="right menu"> <div class="item" style="padding: 0;"> - <form class="" action="/release/search" method="get" role="search" aria-label="Papers"> + <form class="" action="/search" method="get" role="search" aria-label="Papers"> <div class="ui transparent inverted icon input"> <i class="search icon" style="padding-right: 2em;"></i> <input type="text" placeholder="Search Papers..." name="q" style="border: 1px solid #777 !important; padding: 5px !important; width: 15em;"> diff --git a/python/fatcat_web/templates/home.html b/python/fatcat_web/templates/home.html index 26c45f03..afaab559 100644 --- a/python/fatcat_web/templates/home.html +++ b/python/fatcat_web/templates/home.html @@ -14,11 +14,11 @@ <div class="ui text container"> <h1 class="ui header inverted huge centered">Perpetual Access to Millions of Open Research Publications From Around The World</h1> <br> - <form class="" action="/release/search" method="get" role="search" aria-label="papers" itemprop="potentialAction" itemscope itemtype="https://schema.org/SearchAction"> - <meta itemprop="target" content="https://{{ config.FATCAT_DOMAIN }}/release/search?q={q}"/> + <form class="" action="/search" method="get" role="search" aria-label="papers" itemprop="potentialAction" itemscope itemtype="https://schema.org/SearchAction"> + <meta itemprop="target" content="https://{{ config.FATCAT_DOMAIN }}/search?q={q}"/> <div class="ui form"> <div class="ui action input huge fluid"> - <input type="text" placeholder="Query..." name="q" aria-label="search release metadata" required itemprop="query-input"> + <input type="text" placeholder="Query..." name="q" aria-label="search metadata" required itemprop="query-input"> <button class="ui green button">Search</button> </div> </div> |