aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-04-01 21:29:56 -0700
committerBryan Newbold <bnewbold@archive.org>2020-04-01 21:29:59 -0700
commit986ce7a38029f7fb20a51271f67d943678e17386 (patch)
tree35d0533b5cba7134dce1f626df5f9caeb741f33d
parentbacbad25b60bf585abb03d6d897061a3d872f2db (diff)
downloadfatcat-covid19-986ce7a38029f7fb20a51271f67d943678e17386.tar.gz
fatcat-covid19-986ce7a38029f7fb20a51271f67d943678e17386.zip
first iteration of web interface
Copied and tweaked from fatcat:python/fatcat_web LICENSE file for this repo is a TODO and will need to match that of fatcat.
-rw-r--r--fatcat_covid19/babel.cfg3
-rw-r--r--fatcat_covid19/search.py82
-rw-r--r--fatcat_covid19/static/ia_logo.pngbin0 -> 8867 bytes
-rw-r--r--fatcat_covid19/static/ia_logo_text.pngbin0 -> 7463 bytes
-rw-r--r--fatcat_covid19/static/robots.txt1
-rw-r--r--fatcat_covid19/templates/400.html13
-rw-r--r--fatcat_covid19/templates/404.html9
-rw-r--r--fatcat_covid19/templates/500.html13
-rw-r--r--fatcat_covid19/templates/about_de.html13
-rw-r--r--fatcat_covid19/templates/about_en.html13
-rw-r--r--fatcat_covid19/templates/base.html106
-rw-r--r--fatcat_covid19/templates/entity_macros.html117
-rw-r--r--fatcat_covid19/templates/fulltext_search.html72
-rw-r--r--fatcat_covid19/templates/home.html94
-rw-r--r--fatcat_covid19/templates/sources.html119
-rw-r--r--fatcat_covid19/webface.py112
16 files changed, 767 insertions, 0 deletions
diff --git a/fatcat_covid19/babel.cfg b/fatcat_covid19/babel.cfg
new file mode 100644
index 0000000..0a5feb3
--- /dev/null
+++ b/fatcat_covid19/babel.cfg
@@ -0,0 +1,3 @@
+[python 1="**.py" language=":"][/python]
+[jinja2: **/templates/**.htm]
+extensions=jinja2.ext.autoescape,jinja2.ext.with_
diff --git a/fatcat_covid19/search.py b/fatcat_covid19/search.py
new file mode 100644
index 0000000..e939502
--- /dev/null
+++ b/fatcat_covid19/search.py
@@ -0,0 +1,82 @@
+
+import datetime
+import requests
+from flask import abort, flash
+from fatcat_covid19.webface import app
+
+def do_search(index, request, limit=30, offset=0, deep_page_limit=2000):
+
+ # Sanity checks
+ if limit > 100:
+ limit = 100
+ if offset < 0:
+ offset = 0
+ if offset > deep_page_limit:
+ # Avoid deep paging problem.
+ offset = deep_page_limit
+
+ request["size"] = int(limit)
+ request["from"] = int(offset)
+ # print(request)
+ resp = requests.get("%s/%s/_search" %
+ (app.config['ELASTICSEARCH_BACKEND'], index),
+ json=request)
+
+ if resp.status_code == 400:
+ print("elasticsearch 400: " + str(resp.content))
+ flash("Search query failed to parse; you might need to use quotes.<p><code>{}</code>".format(resp.content))
+ abort(resp.status_code)
+ elif resp.status_code != 200:
+ print("elasticsearch non-200 status code: " + str(resp.status_code))
+ print(resp.content)
+ abort(resp.status_code)
+
+ content = resp.json()
+ results = [h['_source'] for h in content['hits']['hits']]
+ for h in results:
+ # Handle surrogate strings that elasticsearch returns sometimes,
+ # probably due to mangled data processing in some pipeline.
+ # "Crimes against Unicode"; production workaround
+ for key in h:
+ if type(h[key]) is str:
+ h[key] = h[key].encode('utf8', 'ignore').decode('utf8')
+
+ return {"count_returned": len(results),
+ "count_found": content['hits']['total'],
+ "results": results,
+ "offset": offset,
+ "deep_page_limit": deep_page_limit}
+
+def do_fulltext_search(q, limit=30, offset=0):
+
+ #print("Search hit: " + q)
+ if limit > 100:
+ # Sanity check
+ limit = 100
+
+ # Convert raw DOIs to DOI queries
+ if len(q.split()) == 1 and q.startswith("10.") and q.count("/") >= 1:
+ q = 'doi:"{}"'.format(q)
+
+
+ search_request = {
+ "query": {
+ "query_string": {
+ "query": q,
+ "default_operator": "AND",
+ "analyze_wildcard": True,
+ "lenient": True,
+ "fields": ["everything"],
+ },
+ },
+ }
+
+ resp = do_search(app.config['ELASTICSEARCH_FULLTEXT_INDEX'], search_request, offset=offset)
+ for h in resp['results']:
+ # Ensure 'contrib_names' is a list, not a single string
+ if type(h['contrib_names']) is not list:
+ h['contrib_names'] = [h['contrib_names'], ]
+ h['contrib_names'] = [name.encode('utf8', 'ignore').decode('utf8') for name in h['contrib_names']]
+ resp["query"] = { "q": q }
+ resp["limit"] = limit
+ return resp
diff --git a/fatcat_covid19/static/ia_logo.png b/fatcat_covid19/static/ia_logo.png
new file mode 100644
index 0000000..97cc445
--- /dev/null
+++ b/fatcat_covid19/static/ia_logo.png
Binary files differ
diff --git a/fatcat_covid19/static/ia_logo_text.png b/fatcat_covid19/static/ia_logo_text.png
new file mode 100644
index 0000000..ddfc773
--- /dev/null
+++ b/fatcat_covid19/static/ia_logo_text.png
Binary files differ
diff --git a/fatcat_covid19/static/robots.txt b/fatcat_covid19/static/robots.txt
new file mode 100644
index 0000000..a168f11
--- /dev/null
+++ b/fatcat_covid19/static/robots.txt
@@ -0,0 +1 @@
+# Hello friends!
diff --git a/fatcat_covid19/templates/400.html b/fatcat_covid19/templates/400.html
new file mode 100644
index 0000000..f2659ca
--- /dev/null
+++ b/fatcat_covid19/templates/400.html
@@ -0,0 +1,13 @@
+{% extends "base.html" %}
+{% block body %}
+
+<center>
+<div style="font-size: 8em;">400</div>
+<div style="font-size: 3em;">Bad Request</div>
+
+<p>Wasn't able to handle the request, either due to incorrect or unexpected
+input. Usually more context should be available; if you hit this page it means
+you've discovered a new corner case!
+</center>
+
+{% endblock %}
diff --git a/fatcat_covid19/templates/404.html b/fatcat_covid19/templates/404.html
new file mode 100644
index 0000000..653b8ee
--- /dev/null
+++ b/fatcat_covid19/templates/404.html
@@ -0,0 +1,9 @@
+{% extends "base.html" %}
+{% block body %}
+
+<center>
+<div style="font-size: 8em;">404</div>
+<div style="font-size: 3em;">Not Found</div>
+</center>
+
+{% endblock %}
diff --git a/fatcat_covid19/templates/500.html b/fatcat_covid19/templates/500.html
new file mode 100644
index 0000000..a99232c
--- /dev/null
+++ b/fatcat_covid19/templates/500.html
@@ -0,0 +1,13 @@
+{% extends "base.html" %}
+{% block body %}
+
+<center>
+<div style="font-size: 8em;">500</div>
+<div style="font-size: 3em;">Internal Error</div>
+
+<p>Hrm, something unexpected went wrong. You may have found a bug! This request
+should be logged and reported automatically; you could re-try or contact us for
+more info.
+</center>
+
+{% endblock %}
diff --git a/fatcat_covid19/templates/about_de.html b/fatcat_covid19/templates/about_de.html
new file mode 100644
index 0000000..2dd2b5e
--- /dev/null
+++ b/fatcat_covid19/templates/about_de.html
@@ -0,0 +1,13 @@
+{% extends "base.html" %}
+
+{% block title %}About{% endblock %}
+
+{% block body %}
+
+{# <img class="ui fluid bordered image" src="/static/fatcat.jpg" title="CC0 photo of an oversized feline" alt=""> #}
+
+<h1></h1>
+
+TODO
+
+{% endblock %}
diff --git a/fatcat_covid19/templates/about_en.html b/fatcat_covid19/templates/about_en.html
new file mode 100644
index 0000000..2dd2b5e
--- /dev/null
+++ b/fatcat_covid19/templates/about_en.html
@@ -0,0 +1,13 @@
+{% extends "base.html" %}
+
+{% block title %}About{% endblock %}
+
+{% block body %}
+
+{# <img class="ui fluid bordered image" src="/static/fatcat.jpg" title="CC0 photo of an oversized feline" alt=""> #}
+
+<h1></h1>
+
+TODO
+
+{% endblock %}
diff --git a/fatcat_covid19/templates/base.html b/fatcat_covid19/templates/base.html
new file mode 100644
index 0000000..0ca8471
--- /dev/null
+++ b/fatcat_covid19/templates/base.html
@@ -0,0 +1,106 @@
+<!DOCTYPE html>
+<html lang="en" style="position: relative; min-height: 100%; height: auto;">
+<head>
+ <meta charset="utf-8" />
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+ <meta name="referrer" content="origin-when-cross-origin">
+
+ <title>COVID-19 Research Search</title>
+
+ <link rel="stylesheet"
+ href="https://cdn.jsdelivr.net/npm/semantic-ui@2.4.1/dist/semantic.min.css"
+ crossorigin="anonymous">
+ <style>
+ {# bnewbold: fix light grey bars in header #}
+ .ui.inverted.menu .item:before { background: none; }
+
+ @media only screen and (max-width: 479px) {
+ .mobile-hide{ display: none !important; }
+ }
+ </style>
+ {% block extra_head %}{% endblock %}
+</head>
+<body style="margin-bottom: 130px;">
+
+<header class="ui fixed inverted menu">
+ <div class="ui container">
+ <a href="/" class="header item">
+ <!-- <img class="logo" src="assets/images/logo.png"> -->
+ <span style="color: red;">COVID-19</span>
+ </a>
+ <a href="https://fatcat.wiki/" class="item mobile-hide">Fatcat</a>
+ <a href="/about" class="item mobile-hide">About</a>
+ <div class="right menu">
+ <div class="item" style="padding: 0;">
+ <form class="" action="/fulltext/search" method="get" role="search" aria-label="Papers">
+ <div class="ui transparent inverted icon input">
+ <i class="search icon" style="padding-right: 2em;"></i>
+ <input type="text" placeholder="Search Papers..." name="q" style="border: 1px solid #777 !important; padding: 5px !important; width: 15em;">
+ </div>
+ </form>
+ </div>
+ <div class="ui simple dropdown item">
+ <!-- language/translate SVG icon -->
+ <img src="data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiA/PjxzdmcgaGVpZ2h0PSIxNzkyIiB2aWV3Qm94PSIwIDAgMTc5MiAxNzkyIiB3aWR0aD0iMTc5MiIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48cGF0aCBkPSJNNzgyIDEwNzhxLTEgMy0xMi41LS41dC0zMS41LTExLjVsLTIwLTlxLTQ0LTIwLTg3LTQ5LTctNS00MS0zMS41dC0zOC0yOC41cS02NyAxMDMtMTM0IDE4MS04MSA5NS0xMDUgMTEwLTQgMi0xOS41IDR0LTE4LjUgMHE2LTQgODItOTIgMjEtMjQgODUuNS0xMTV0NzguNS0xMThxMTctMzAgNTEtOTguNXQzNi03Ny41cS04LTEtMTEwIDMzLTggMi0yNy41IDcuNXQtMzQuNSA5LjUtMTcgNXEtMiAyLTIgMTAuNXQtMSA5LjVxLTUgMTAtMzEgMTUtMjMgNy00NyAwLTE4LTQtMjgtMjEtNC02LTUtMjMgNi0yIDI0LjUtNXQyOS41LTZxNTgtMTYgMTA1LTMyIDEwMC0zNSAxMDItMzUgMTAtMiA0My0xOS41dDQ0LTIxLjVxOS0zIDIxLjUtOHQxNC41LTUuNSA2IC41cTIgMTItMSAzMyAwIDItMTIuNSAyN3QtMjYuNSA1My41LTE3IDMzLjVxLTI1IDUwLTc3IDEzMWw2NCAyOHExMiA2IDc0LjUgMzJ0NjcuNSAyOHE0IDEgMTAuNSAyNS41dDQuNSAzMC41em0tMjA1LTQ4NnEzIDE1LTQgMjgtMTIgMjMtNTAgMzgtMzAgMTItNjAgMTItMjYtMy00OS0yNi0xNC0xNS0xOC00MWwxLTNxMyAzIDE5LjUgNXQyNi41IDAgNTgtMTZxMzYtMTIgNTUtMTQgMTcgMCAyMSAxN3ptNjk4IDEyOWw2MyAyMjctMTM5LTQyem0tMTEwOCA4MDBsNjk0LTIzMnYtMTAzMmwtNjk0IDIzM3YxMDMxem0xMjQxLTMxN2wxMDIgMzEtMTgxLTY1Ny0xMDAtMzEtMjE2IDUzNiAxMDIgMzEgNDUtMTEwIDIxMSA2NXptLTUwMy05NjJsNTczIDE4NHYtMzgwem0zMTEgMTMyM2wxNTggMTMtNTQgMTYwLTQwLTY2cS0xMzAgODMtMjc2IDEwOC01OCAxMi05MSAxMmgtODRxLTc5IDAtMTk5LjUtMzl0LTE4My41LTg1cS04LTctOC0xNiAwLTggNS0xMy41dDEzLTUuNXE0IDAgMTggNy41dDMwLjUgMTYuNSAyMC41IDExcTczIDM3IDE1OS41IDYxLjV0MTU3LjUgMjQuNXE5NSAwIDE2Ny0xNC41dDE1Ny01MC41cTE1LTcgMzAuNS0xNS41dDM0LTE5IDI4LjUtMTYuNXptNDQ4LTEwNzl2MTA3OWwtNzc0LTI0NnEtMTQgNi0zNzUgMTI3LjV0LTM2OCAxMjEuNXEtMTMgMC0xOC0xMyAwLTEtMS0zdi0xMDc4cTMtOSA0LTEwIDUtNiAyMC0xMSAxMDYtMzUgMTQ5LTUwdi0zODRsNTU4IDE5OHEyIDAgMTYwLjUtNTV0MzE2LTEwOC41IDE2MS41LTUzLjVxMjAgMCAyMCAyMXY0MTh6Ii8+PC9zdmc+"
+ alt="select language" style="height: 1.5em; filter: invert(100%);">
+ <i class="dropdown icon"></i>
+ <div class="menu">
+ {# TODO #}
+ <a class="item" href="{{ url_for(request.endpoint, lang_code='en') }}">English</a>
+ <a class="item" href="{{ url_for(request.endpoint, lang_code='de') }}">Deutsch</a>
+ </div>
+ </div>
+ </div>
+ </div>
+</header>
+
+{% block fullmain %}
+<!-- 4em top margin is "enough" -->
+<main class="ui main container" style="margin-top: 6em; margin-bottom: 2em;" {% block main_extra_attr %}{% endblock %}>
+{% with messages = get_flashed_messages() %}
+ {% if messages %}
+ <div class="ui message">
+ {# Needs more javascript: <i class="close icon"></i> #}
+ <div class="header">Flash Message!</div>
+ <ul class="list">
+ {% for message in messages %}
+ <li>{{ message|safe }}
+ {% endfor %}
+ </ul>
+ </div>
+ {% endif %}
+{% endwith %}
+{% block fullbody %}
+ <div class="ui container text">
+ {% block body %}Nothing to see here.{% endblock %}
+ </div>
+{% endblock %}
+</main>
+{% endblock %}
+
+
+<footer class="ui inverted vertical footer segment" style="margin-top: 2em; padding-top: 2em; padding-bottom:2em; position: absolute; bottom: 0px; width: 100%;">
+ <div class="ui center aligned container">
+ <div class="ui horizontal inverted small divided link list">
+ <a class="item" href="https://fatcat.wiki/">fatcat</a>
+ <a class="item" href="/about">About</a>
+ <a class="item" href="/sources">Sources</a>
+ <a class="item" href="https://github.com/bnewbold/covid19-fatcat-wiki/">Code</a>
+ <a class="item" href="https://github.com/bnewbold/covid19-fatcat-wiki/tree/{{ config.GIT_REVISION }}"><code>{{ config.GIT_REVISION }}</code></a>
+ </div>
+ </div>
+</footer>
+
+<script
+ src="https://code.jquery.com/jquery-3.1.1.min.js"
+ integrity="sha256-hVVnYaiADRTO2PzUGmuLJr8BLUSjGIZsDYGmIJLv2b8="
+ crossorigin="anonymous">
+</script>
+<script
+ src="https://cdn.jsdelivr.net/npm/semantic-ui@2.3.2/dist/semantic.min.js"
+ crossorigin="anonymous">
+</script>
+{% block postscript %}{% endblock %}
+
+</body>
+</html>
diff --git a/fatcat_covid19/templates/entity_macros.html b/fatcat_covid19/templates/entity_macros.html
new file mode 100644
index 0000000..9cded8a
--- /dev/null
+++ b/fatcat_covid19/templates/entity_macros.html
@@ -0,0 +1,117 @@
+
+{% macro fulltext_search_result_row(paper) -%}
+<div>
+ <h4 style="margin-top: 1em; margin-bottom: 0px; font-size: 1.1em;">
+ <a href="/release/{{ paper.ident }}" style="color: #2224c7;">
+ {% if paper.title %}
+ {{ paper.title[:512] }}
+ {% if paper.title|length > 512 %}...{% endif %}
+ {% else %}
+ [blank]
+ {% endif %}
+ </a>
+ </h4>
+ {% if paper.best_pdf_url %}
+ <div style="float: right; padding: 4px;">
+ &nbsp;&nbsp;<a href="{{ paper.best_pdf_url }}" class="ui violet tag label"><i class="file icon"></i>fulltext</a>
+ </div>
+ {% endif %}
+ {#
+ <h5 style="margin-top: 4px; margin-bottom: 4px; font-size: 1em;">{{ ", ".join(paper.contrib_names[:12]) }}
+ {% if paper.contrib_names|length > 12 %}<i>(+{{ paper.contrib_names|length - 12 }} others)</i>{% endif %}
+ </h5>
+ #}
+ {% if paper.contrib_names %}
+ <div style="margin-top: 0px; margin-bottom: 0px; font-size: 1em;">
+ <b>
+ {{ ", ".join(paper.contrib_names[:12]) }}
+ {% if paper.contrib_names|length > 12 %}<i>(+{{ paper.contrib_names|length - 12 }} others)</i>{% endif %}
+ </b>
+ </div>
+ {% endif %}
+ {% if paper.release_year %}
+ {{ paper.release_year }}
+ {% endif %}
+ {% if paper.release_type %}
+ {% if paper.release_type in ("article-journal", "paper-conference") %}
+ <span class="ui black basic label small">{{ paper.release_type }}</span>
+ {% elif paper.release_type in ("book") %}
+ <span class="ui brown basic label small">{{ paper.release_type }}</span>
+ {% else %}
+ <span class="ui grey basic label small">{{ paper.release_type }}</span>
+ {% endif %}
+ {% endif %}
+ {% if paper.withdrawn_status %}
+ <span class="ui red label small">{{ paper.withdrawn_status }}</span>
+ {% endif %}
+ {% if paper.release_stage and paper.release_stage != "published" %}
+ <span class="ui pink basic label small">{{ paper.release_stage }}</span>
+ {% elif not paper.release_stage %}
+ <span class="ui red basic label small">unknown</span>
+ {% endif %}
+ {% if paper.container_name %}
+ {% if paper.container_id %}
+ <a href="/container/{{ paper.container_id }}" style="color: black;">{{ paper.container_name }}</a>
+ {% else %}
+ {{ paper.container_name }}
+ {% endif %}
+ {% if paper.container_is_oa %}<i class="icon unlock orange small"></i>{% endif %}
+ {% endif %}
+ {% if paper.doi or paper.pmid or paper.arxiv_id or paper.jstor_id %}
+ <br>
+ {% endif %}
+ {% if paper.doi %}
+ <a href="https://doi.org/{{paper.doi }}" style="color: green;">doi:{{ paper.doi }}</a> &nbsp;
+ {% endif %}
+ {% if paper.pmid %}
+ <a href="https://www.ncbi.nlm.nih.gov/pubmed/{{paper.pmid }}" style="color: green;">pmid:{{ paper.pmid }}</a> &nbsp;
+ {% endif %}
+ {% if paper.arxiv_id %}
+ <a href="https://arxiv.org/abs/{{paper.arxiv_id }}" style="color: green;">arXiv:{{ paper.arxiv_id }}</a> &nbsp;
+ {% endif %}
+ {% if False %} {# XXX: elastic release work grouping searches #}
+ <br>
+ <a href="/work/{{ paper.work_id }}"><i class="sitemap icon"></i> and 5 other versions of the same work!</a>
+ {% endif %}
+</div>
+{% endmacro %}
+
+
+{% macro top_results(found) -%}
+
+<i>Showing
+ {% if found.offset == 0 %}
+ first
+ {% else %}
+ results {{ found.offset }} &mdash;
+ {% endif %}
+
+ {{ found.offset + found.count_returned }}
+ out of {{ found.count_found }} results
+</i>
+
+{%- endmacro %}
+
+
+{% macro bottom_results(found, endpoint='search.fulltext_search') -%}
+
+{% if found.offset > 0 %}
+ {% if found.offset - found.limit < 0 %}
+ <a href="{{ url_for(endpoint, q=found.query.q, offset=0) }}">&#xab; Previous</a>
+ {% else %}
+ <a href="{{ url_for(endpoint, q=found.query.q, offset=found.offset - found.limit) }}">&#xab; Previous</a>
+ {% endif %}
+{% else %}
+ <span style="color:gray">&#xab; Previous</span>
+{% endif %}
+
+&nbsp;&nbsp;<i>Showing results {{ found.offset }} &mdash; {{ found.offset +
+found.count_returned }} out of {{ found.count_found }} results</i>&nbsp;&nbsp;
+
+{% if found.offset + found.limit < found.count_found and found.offset + found.limit < found.deep_page_limit %}
+ <a href="{{ url_for(endpoint, q=found.query.q, offset=found.offset + found.limit) }}">Next &#xbb;</a>
+ {% else %}
+ <span style="color:gray">Next &#xbb;</span>
+{% endif %}
+
+{%- endmacro %}
diff --git a/fatcat_covid19/templates/fulltext_search.html b/fatcat_covid19/templates/fulltext_search.html
new file mode 100644
index 0000000..dd42f9b
--- /dev/null
+++ b/fatcat_covid19/templates/fulltext_search.html
@@ -0,0 +1,72 @@
+{% import "entity_macros.html" as entity_macros %}
+{% extends "base.html" %}
+
+{% block title %}
+{% if query %}
+ Search: {{ query }}
+{% else %}
+ Fulltext Search
+{% endif %}
+{% endblock %}
+
+
+{% block fullmain %}
+
+<div class="ui vertical stripe segment" style="background-color: #EEE; padding-top: 4.5em;">
+ <div class="ui container text">
+ <h1>Search all COVID-19 Resources</h1>
+ <form class="" role="search" action="/fulltext/search" method="get">
+ <div class="ui form">
+ <div class="ui action input huge fluid">
+ <input type="text" placeholder="Query..." name="q" value="{% if query %}{{ query }}{% endif %}" aria-label="search metadata">
+ <button class="ui primary button">Search</button>
+ </div>
+ <div class="ui checkbox" style="float: right; margin: 1em;">
+ <input type="checkbox" name="fulltext_only" value="true" {% if fulltext_only %}checked{% endif %}>
+ <label>Fulltext Available Only</label>
+ </div>
+ <br>Can also lookup by <b><a href="/release/lookup">identifier</a></b> or search for <b><a href="/container/search?q={{ query or "" }}">containers</a></b> (eg, journals).
+ </div>
+ </form>
+ </div>
+</div>
+
+<div class="ui container text">
+<br>
+
+{% if found %}
+{% if found.results %}
+ {{ entity_macros.top_results(found) }}
+
+ {% for paper in found.results %}
+ {{ entity_macros.fulltext_search_result_row(paper) }}
+{% endfor %}
+{% if found.results|length > 8 %}
+ <div class="ui divider"></div>
+ <div style="text-align: center">
+ {{ entity_macros.bottom_results(found)}}
+ </div>
+{% endif %}
+{% else %}
+
+Raw query was: <i>{{ found.query.q }}</i>
+
+<div class="ui centered stackable grid" style="padding-top: 15%;">
+ <div class="row">
+ <div class="four wide column">
+ <img src="/static/paper_man_confused.gif" alt="confused paper man">
+ </div>
+ <div class="six wide column">
+ <h2>No results found!</h2>
+ <p>You could try elsewhere:</p>
+ <ul>
+ <li>Search <a href="https://dissem.in/search?q={{ found.query.q | urlencode }}">dissem.in</a></li>
+ <li>Search <a href="https://www.base-search.net/Search/Results?lookfor={{ found.query.q | urlencode }}">BASE</a></li>
+ <li>Search <a href="https://scholar.google.com/scholar?q={{ found.query.q | urlencode }}">Google Scholar</a></li>
+ </ul>
+</div>
+{% endif %}
+{% endif %}
+
+</div>
+{% endblock %}
diff --git a/fatcat_covid19/templates/home.html b/fatcat_covid19/templates/home.html
new file mode 100644
index 0000000..dbfb833
--- /dev/null
+++ b/fatcat_covid19/templates/home.html
@@ -0,0 +1,94 @@
+{% extends "base.html" %}
+
+{# no special title for now #}
+{# {% block title %}Perpetual Access to the Scholarly Record{% endblock %} #}
+
+{% block extra_head %}
+ <link rel="canonical" href="https://{{ config.FATCAT_DOMAIN }}/">
+{% endblock %}
+
+{% block fullmain %}
+
+<div class ="ui vertical inverted masthead center aligned segment" style="padding-top: 12em; padding-bottom: 10em;">
+ <div class="ui text container">
+ <h1 class="ui header inverted huge centered">
+ {{ _("Search tens of thousands of COVID-19 research papers and documents") }}
+ </h1>
+ <br>
+ <form class="" action="{{ url_for("search.fulltext_search") }}" method="get" role="search" aria-label="papers" itemprop="potentialAction" itemscope itemtype="https://schema.org/SearchAction">
+ <meta itemprop="target" content="https://{{ config.FATCAT_DOMAIN }}/fulltext/search?q={q}"/>
+ <div class="ui form">
+ <div class="ui action input huge fluid">
+ <input type="text" placeholder="{{ _("by title, authors, identifiers...") }}" name="q" aria-label="search metadata" required itemprop="query-input">
+ <button class="ui green button">{{ _("Search") }}</button>
+ </div>
+ </div>
+ </form>
+ </div>
+</div>
+
+<div class="ui vertical stripe segment" style="background-color: #fffaf3; color: #573a08;">
+ <div class="ui text container">
+ <div class="ui centered grid">
+ <div class="row">
+ <div class="fourteen wide column" style="font-size: 1.1rem;">
+ <b>{{ _("Project Status") }}: {{ _("Prototype") }}</b>
+ &nbsp;
+ {{ _("These resources are not qualified medical advice!") }}
+ </div>
+ </div>
+ </div>
+ </div>
+</div>
+
+{#
+<div class="ui vertical stripe segment" style="padding-top: 2em; padding-bottom: 2em;">
+ <div class="ui text container" style="max-width: 800px!important;">
+ <div class="ui centered grid">
+ <div class="row">
+ <div class="four wide column">
+ <!-- TODO: don't let it scale down -->
+ <img src="/static/paper_man_confused.gif" width="130" alt="confused paper man">
+ </div>
+ <div class="twelve wide column" style="font-size: 1.2rem;">
+ <p><b>Fatcat is a versioned, user-editable catalog of research
+ publications including journal articles, conference proceedings, and
+ datasets</b>
+ <p>Features include archival file-level metadata (verified digests and
+ long-term copies), an
+ <b><a href="https://api.{{ config.FATCAT_DOMAIN }}">open, documented API</a></b>,
+ and work/release indexing (eg, distinguishing between and linking
+ pre-prints, manuscripts, and version-of-record).
+ &nbsp;<a href="/about">Read more...</a>
+ </div>
+ </div>
+ </div>
+ </div>
+</div>
+#}
+
+<div class="ui vertical stripe segment" style="padding-top: 2em; padding-bottom: 2em; background-color: #F5F5F5;">
+ <div class="ui text container" style="max-width: 800px!important;">
+ <div class="ui centered grid">
+ <div class="row">
+ <div class="twelve wide column" style="font-size: 1.2rem;">
+ <p>This service is hosted at <b><a
+ href="https://archive.org">The Internet Archive</a></b>, a US
+ non-profit dedicated to providing Universal Access to All Knowledge.
+ {#
+ <a href="https://archive.org/donate/">Donations welcome!</a>
+ <p>Development funding comes from
+ <b><a href="https://blog.archive.org/2018/03/05/andrew-w-mellon-foundation-awards-grant-to-the-internet-archive-for-long-tail-journal-preservation/">The Andrew Mellon Foundation</a></b>
+ to improve preservation and access to "long-tail" open access works on
+ the public web which might otherwise be lost.
+ #}
+ </div>
+ <div class="four wide column">
+ <img src="/static/ia_logo_text.png" width="140" alt="IA logo">
+ </div>
+ </div>
+ </div>
+ </div>
+</div>
+
+{% endblock %}
diff --git a/fatcat_covid19/templates/sources.html b/fatcat_covid19/templates/sources.html
new file mode 100644
index 0000000..17b0818
--- /dev/null
+++ b/fatcat_covid19/templates/sources.html
@@ -0,0 +1,119 @@
+{% extends "base.html" %}
+
+{% block title %}About{% endblock %}
+
+{% block body %}
+
+{# <img class="ui fluid bordered image" src="/static/fatcat.jpg" title="CC0 photo of an oversized feline" alt=""> #}
+
+<h1></h1>
+
+<p>Fatcat is versioned, publicly-editable catalog of research publications:
+journal articles, conference proceedings, pre-prints, blog posts, and so forth.
+The goal is to improve the state of preservation and access to these works by
+providing a manifest of full-text content versions and locations.
+
+<p>This service does not directly contain full-text content itself, but
+provides basic access for human and machine readers through links to copies in
+web archives, repositories, and the public web.
+
+<p>Significantly more context and background information can be found in <a
+href="https://guide.{{ config.FATCAT_DOMAIN }}/">The Guide</a>.
+
+<p>Feedback and queries can be directed to
+<b><a href="mailto:webservices@archive.org">webservices@archive.org</a></b>.
+
+<h3>Goals and Features</h3>
+
+<p>A few things set Fatcat apart from similar indexing and discovery services:
+
+<ul>
+ <li>inclusion of archival, <b>file-level metadata (hashes)</b> in addition
+ to URLs, which allows automated verification ("do I have the right copy"),
+ reveals content-drift over time, and enables efficient distribution of
+ content through the ecosystem
+ <li>native support for "post-PDF" digital media, including <b>archival web
+ captures and datasets</b>, as well as content stored on the distributed web
+ <li>data model that captures the <b>work/edition distinction</b>,
+ grouping pre-print, post-review, published, re-published, and updated
+ versions of a work together
+ <li><b>public editing</b> interface, allowing metadata corrections and improvements
+ from individuals and bots in addition to automated imports from authoritative
+ sources
+ <li>focus on providing a stable API and corpus (making integration with
+ diverse user-facing applications simple), while enabling full replication and
+ mirroring of the corpus to <b>reduce the risks of centralized control</b>
+</ul>
+
+<p>This service aspires to be a piece of sustainable, long-term, non-profit,
+free-software, collaborative, open digital infrastructure. It is primarily
+designed to support the <i>archival</i> and <i>dissemination</i> roles of
+scholarly communication. It may also support the <i>registration</i> role
+(establishing precedence and authorship), but explicitly does not aid with
+<i>certification</i> of content, and is not intended to be used for
+<i>evaluation</i> of individuals, institutions, or venues. This service is
+"universal", not currated, and happily includes retracted and "predatory"
+content).
+
+<h3>Sources of Metadata</h3>
+
+The source of all bibliographic information is recorded in edit history
+metadata, which allows the provenance of all records to be reconstructed. A few
+major sources are worth highlighting here:
+
+<ul>
+ <li>Release metadata from <b>Crossref</b>, via their public
+ <a href="https://github.com/CrossRef/rest-api-doc">REST API</a>
+ <li>Release metadata and linked full-text content from NIH <b>Pubmed</b> and <b><a href="https://arxiv.org">arXiv.org</a></b>
+ <li>Release metadata and linked public domain full-text content the <b>JSTOR</b> Early Journal Content collection
+ <li>Creator names and de-duplication from <b>ORCID</b>, via their annual public data releases
+ <li>Journal title metadata from <b>DOAJ</b>, <b>ISSN ROAD</b>, and <b>SHERPA/RoMEO</b>
+ <li>Full-text URL lists from <b><a href="https://core.ac.uk">CORE</a></b>,
+ <b><a href="http://unpaywall.org">Unpaywall</a></b>,
+ <b><a href="https://www.semanticscholar.org">Semantic Scholar</a></b>,
+ <b><a href="https://citeseerx.ist.psu.edu">CiteseerX</a></b>,
+ and <b><a href="https://www.microsoft.com/en-us/research/project/academic">Microsoft Academic Graph</a></b>.
+ <li><a href="https://guide.{{ config.FATCAT_DOMAIN }}/sources.html">The Guide</a> lists more major sources
+</ul>
+
+Many thanks for the hard work of all these projects, institutions, and
+individuals!
+
+
+<h3>Support and Acknowledgments</h3>
+
+<p>Fatcat is a project of the <b><a href="https://archive.org">Internet Archive</a></b>,
+a US-based non-profit digital library, well known for its
+<a href="https://web.archive.org">Wayback Machine</a> web archive and
+<a href="https://openlibrary.org">Open Library</a> book digitization and
+lending service. All Fatcat databases and services run on Internet Archive
+servers in California, and a copy of most full-text content is stored in the
+Archive's collections and/or web archives.
+
+<p>Development of Fatcat and related web harvesting, indexing, and preservation
+efforts at the Archive have been partially funded (for the 2018-2019 period) by
+a generous grant from the <b>Mellon Foundation</b>
+(<a href="https://blog.archive.org/2018/03/05/andrew-w-mellon-foundation-awards-grant-to-the-internet-archive-for-long-tail-journal-preservation/">"Long-tail Open Access Journal Preservation"</a>).
+Fatcat supports this work by both tracking which open access works in known
+archives and providing minimum-viable indexing and access mechanisms for
+long-tail works which otherwise would lack them.
+
+<p>The service would not technically be possible without hundreds of Free
+Software components and the efforts of their individual and organizational
+maintainers, more than can be listed here (please see the source code for full
+lists). A few major components include the PostgreSQL database, Elasticsearch
+search engine, Flask python web framework, Rust programming language, Diesel
+database library, Swagger/OpenAPI code generators, Kafka distributed log,
+Ansible configuration management tool, and Ubuntu GNU/Linux operating system
+distribution.
+
+<p>The front-page photo of a large feline with a cup of coffee is by
+<a href="http://www.kampschroer.com/photography.html">Quinn Kampschroer</a>,
+under a CC-0 license. The name "Fatcat" can be interpreted as short for "large
+catalog", as the service aspires to be a <i>complete</i> catalog of the digital
+scholarly record.
+
+<p>A list of technical contributors, including volunteers, is maintained in the
+source code repository (<code>CONTRIBUTORS.md</code>). Thanks everybody!
+
+{% endblock %}
diff --git a/fatcat_covid19/webface.py b/fatcat_covid19/webface.py
new file mode 100644
index 0000000..5476884
--- /dev/null
+++ b/fatcat_covid19/webface.py
@@ -0,0 +1,112 @@
+
+"""
+This is the single-file Flask web application
+"""
+
+import os
+import subprocess
+
+from flask import Flask, Blueprint, g, app, render_template, request
+from flask_babel import Babel, gettext
+from flask.logging import create_logger
+
+import sentry_sdk
+from sentry_sdk.integrations.flask import FlaskIntegration
+
+
+class BaseConfig(object):
+
+ SUPPORTED_LANGUAGES = {'en': 'English', 'de': 'Deutsch'}
+ BABEL_DEFAULT_LOCALE = 'en'
+ BABEL_DEFAULT_TIMEZONE = 'UTC'
+ GIT_REVISION = subprocess.check_output(["git", "describe", "--always"]).strip().decode('utf-8')
+
+ ELASTICSEARCH_BACKEND = os.environ.get("ELASTICSEARCH_BACKEND", default="https://search.fatcat.wiki")
+ ELASTICSEARCH_FULLTEXT_INDEX = os.environ.get("ELASTICSEARCH_FULLTEXT_INDEX", default="covid19_fatcat_fulltext")
+
+ FATCAT_DOMAIN = "covid19.fatcat.wiki"
+
+ SENTRY_CONFIG = {
+ 'enable-threads': True, # for uWSGI
+ 'release': GIT_REVISION,
+ 'tags': {
+ 'service': 'covid19.fatcat.wiki',
+ },
+ }
+
+sentry_sdk.init(
+ # set SDN via environment variable SENTRY_DSN
+ integrations=[FlaskIntegration()]
+)
+
+app = Flask(__name__, static_url_path='/static')
+app.config.from_object(BaseConfig())
+app.log = create_logger(app)
+babel = Babel(app)
+
+from fatcat_covid19.search import *
+
+bp = Blueprint('search', __name__)
+
+@bp.url_defaults
+def add_language_code(endpoint, values):
+ if g.lang_code_set:
+ values.setdefault('lang_code', g.lang_code)
+
+@bp.url_value_preprocessor
+def pull_lang_code(endpoint, values):
+ g.lang_code_set = 'lang_code' in values
+ g.lang_code = values.pop('lang_code', app.config['BABEL_DEFAULT_LOCALE'])
+ if g.lang_code not in app.config['SUPPORTED_LANGUAGES']:
+ abort(404)
+
+@bp.route('/', methods=['GET'])
+def page_home():
+ return render_template('home.html')
+
+@bp.route('/fulltext/search', methods=['GET', 'POST'])
+def fulltext_search():
+
+ query = request.args.get('q')
+
+ offset = request.args.get('offset', '0')
+ offset = max(0, int(offset)) if offset.isnumeric() else 0
+
+ if 'q' in request.args.keys():
+ found = do_fulltext_search(query, offset=offset)
+ return render_template('fulltext_search.html', found=found, query=query)
+ else:
+ return render_template('fulltext_search.html', query=query)
+
+@bp.route('/about', methods=['GET'])
+def page_about():
+ return render_template('about_{}.html'.format(g.lang_code))
+
+@bp.route('/sources', methods=['GET'])
+def page_sources():
+ return render_template('sources.html')
+
+
+@bp.errorhandler(404)
+def page_not_found(e):
+ return render_template('404.html'), 404
+
+@bp.errorhandler(400)
+def page_bad_request(e):
+ return render_template('400.html'), 400
+
+@bp.errorhandler(502)
+@bp.errorhandler(503)
+@bp.errorhandler(504)
+@bp.errorhandler(500)
+def page_server_error(e):
+ return render_template('500.html'), 500
+
+@app.route('/robots.txt', methods=['GET'])
+def robots():
+ return send_from_directory(os.path.join(app.root_path, 'static'),
+ 'robots.txt',
+ mimetype='text/plain')
+
+app.register_blueprint(bp, url_prefix='/<string(length=2):lang_code>/')
+app.register_blueprint(bp, url_prefix='/')