aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-06-11 10:43:54 -0700
committerBryan Newbold <bnewbold@robocracy.org>2019-06-13 14:36:59 -0700
commit55ebdd8f06e73ef904e8c216a6e8a67f7fded8d2 (patch)
tree57f12af517a33c41f37de016d555add00bf56c58
parent016315f69a03473625d4d8ea3c450eb814e26911 (diff)
downloadfatcat-55ebdd8f06e73ef904e8c216a6e8a67f7fded8d2.tar.gz
fatcat-55ebdd8f06e73ef904e8c216a6e8a67f7fded8d2.zip
start work on 'generic' search box
-rw-r--r--python/fatcat_tools/normal.py95
-rw-r--r--python/fatcat_web/routes.py30
-rw-r--r--python/fatcat_web/templates/base.html2
-rw-r--r--python/fatcat_web/templates/home.html6
4 files changed, 125 insertions, 8 deletions
diff --git a/python/fatcat_tools/normal.py b/python/fatcat_tools/normal.py
new file mode 100644
index 00000000..044ab87d
--- /dev/null
+++ b/python/fatcat_tools/normal.py
@@ -0,0 +1,95 @@
+
+"""
+A bunch of helpers to parse and normalize strings: external identifiers,
+free-form input, titles, etc.
+"""
+
+import re
+
+
+def clean_doi(raw):
+ """
+ Removes any:
+ - padding whitespace
+ - 'doi:' prefix
+ - URL prefix
+
+ Does not try to un-URL-encode
+
+ Returns None if not a valid DOI
+ """
+ raw = raw.strip()
+ if len(raw.split()) != 1:
+ return None
+ if raw.startswith("doi:"):
+ raw = raw[4:]
+ if raw.startswith("http://"):
+ raw = raw[7:]
+ if raw.startswith("https://"):
+ raw = raw[8:]
+ if raw.startswith("doi.org/"):
+ raw = raw[8:]
+ if raw.startswith("dx.doi.org/"):
+ raw = raw[11:]
+ if not raw.startswith("10."):
+ return None
+ # TODO: actual regex
+ return raw
+
+def test_clean_doi():
+ assert clean_doi("10.1234/asdf ") == "10.1234/asdf"
+ assert clean_doi("http://doi.org/10.1234/asdf ") == "10.1234/asdf"
+ assert clean_doi("https://dx.doi.org/10.1234/asdf ") == "10.1234/asdf"
+ assert clean_doi("doi:10.1234/asdf ") == "10.1234/asdf"
+ assert clean_doi("doi:10.1234/ asdf ") == None
+
+def clean_arxiv_id(raw):
+ """
+ Removes any:
+ - 'arxiv:' prefix
+
+ Works with versioned or un-versioned arxiv identifiers.
+ """
+ pass
+
+def test_clean_arxiv_id():
+ pass
+
+def clean_pmcid(raw):
+ raw = raw.strip()
+ if len(raw.split()) != 1:
+ return None
+ if raw.startswith("PMC") and raw[3:] and raw[3:].isdigit():
+ return raw
+ return None
+
+def clean_sha1(raw):
+ raw = raw.strip()
+ if len(raw.split()) != 1:
+ return None
+ pass
+
+def clean_issn(raw):
+ raw = raw.strip()
+ if len(raw.split()) != 1:
+ return None
+ if len(raw) == 9 and raw[4] == "-" and raw[0:4].isdigit():
+ return raw
+ return None
+
+def test_clean_issn():
+ assert clean_issn("1234-4567") == "1234-4567"
+ assert clean_issn("134-4567") == None
+ assert clean_issn("123X-4567") == None
+
+def clean_isbn13(raw):
+ raw = raw.strip()
+ if len(raw.split()) != 1:
+ return None
+ return None
+
+def clean_orcid(raw):
+ raw = raw.strip()
+ if len(raw.split()) != 1:
+ return None
+ return None
diff --git a/python/fatcat_web/routes.py b/python/fatcat_web/routes.py
index ebb70d90..61ba0029 100644
--- a/python/fatcat_web/routes.py
+++ b/python/fatcat_web/routes.py
@@ -9,6 +9,7 @@ from flask_wtf.csrf import CSRFError
from fatcat_client import Editgroup, EditgroupAnnotation
from fatcat_client.rest import ApiException
from fatcat_tools.transforms import *
+from fatcat_tools.normal import *
from fatcat_web import app, api, auth_api, priv_api, mwoauth
from fatcat_web.auth import handle_token_login, handle_logout, load_user, handle_ia_xauth, handle_wmoauth
from fatcat_web.cors import crossdomain
@@ -496,6 +497,31 @@ def reviewable_view():
### Search ##################################################################
+@app.route('/search', methods=['GET', 'POST'])
+def generic_search():
+ if not 'q' in request.args.keys():
+ return redirect('/release/search')
+ query = request.args.get('q').strip()
+
+ if len(query.split()) != 1:
+ # multi-term? must be a real search
+ return redirect(url_for('release_search', q=query))
+
+ if clean_doi(query):
+ return redirect(url_for('release_lookup', doi=clean_doi(query)))
+ if clean_pmcid(query):
+ return redirect(url_for('release_lookup', pmcid=clean_pmcid(query)))
+ if clean_sha1(query):
+ return redirect(url_for('file_lookup', sha1=clean_sha1(query)))
+ if clean_issn(query):
+ return redirect(url_for('container_lookup', issnl=clean_issn(query)))
+ if clean_isbn13(query):
+ return redirect(url_for('release_lookup', isbn13=clean_isbn13(query)))
+ if clean_orcid(query):
+ return redirect(url_for('creator_lookup', orcid=clean_orcid(query)))
+
+ return redirect(url_for('release_search', q=query))
+
@app.route('/release/search', methods=['GET', 'POST'])
def release_search():
@@ -734,10 +760,6 @@ def page_about():
def page_rfc():
return render_template('rfc.html')
-@app.route('/search', methods=['GET'])
-def page_search_redirect():
- return redirect("/release/search")
-
@app.route('/robots.txt', methods=['GET'])
def robots():
return send_from_directory(os.path.join(app.root_path, 'static'),
diff --git a/python/fatcat_web/templates/base.html b/python/fatcat_web/templates/base.html
index 9255c9c3..2ada551d 100644
--- a/python/fatcat_web/templates/base.html
+++ b/python/fatcat_web/templates/base.html
@@ -49,7 +49,7 @@
<a href="/changelog" class="item">Changelog</a>
<div class="right menu">
<div class="item" style="padding: 0;">
- <form class="" action="/release/search" method="get" role="search" aria-label="Papers">
+ <form class="" action="/search" method="get" role="search" aria-label="Papers">
<div class="ui transparent inverted icon input">
<i class="search icon" style="padding-right: 2em;"></i>
<input type="text" placeholder="Search Papers..." name="q" style="border: 1px solid #777 !important; padding: 5px !important; width: 15em;">
diff --git a/python/fatcat_web/templates/home.html b/python/fatcat_web/templates/home.html
index 26c45f03..afaab559 100644
--- a/python/fatcat_web/templates/home.html
+++ b/python/fatcat_web/templates/home.html
@@ -14,11 +14,11 @@
<div class="ui text container">
<h1 class="ui header inverted huge centered">Perpetual Access to Millions of Open Research Publications From Around The World</h1>
<br>
- <form class="" action="/release/search" method="get" role="search" aria-label="papers" itemprop="potentialAction" itemscope itemtype="https://schema.org/SearchAction">
- <meta itemprop="target" content="https://{{ config.FATCAT_DOMAIN }}/release/search?q={q}"/>
+ <form class="" action="/search" method="get" role="search" aria-label="papers" itemprop="potentialAction" itemscope itemtype="https://schema.org/SearchAction">
+ <meta itemprop="target" content="https://{{ config.FATCAT_DOMAIN }}/search?q={q}"/>
<div class="ui form">
<div class="ui action input huge fluid">
- <input type="text" placeholder="Query..." name="q" aria-label="search release metadata" required itemprop="query-input">
+ <input type="text" placeholder="Query..." name="q" aria-label="search metadata" required itemprop="query-input">
<button class="ui green button">Search</button>
</div>
</div>