aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2021-11-02 11:32:08 -0700
committerBryan Newbold <bnewbold@robocracy.org>2021-11-02 11:32:08 -0700
commitfdbfb8dc55df8c3739feca8c52c017c56b006573 (patch)
tree4a24adf10ea159f889aa7b6ed907624a8bbaf602 /python
parent641b6eb21f68e9e0a2f82a570031bb15ccd58d6f (diff)
parent3752237a30db843fb84a4197d7047f1c34eb5df2 (diff)
downloadfatcat-fdbfb8dc55df8c3739feca8c52c017c56b006573.tar.gz
fatcat-fdbfb8dc55df8c3739feca8c52c017c56b006573.zip
Merge branch 'bnewbold-match-get'
Diffstat (limited to 'python')
-rw-r--r--python/fatcat_tools/transforms/access.py12
-rw-r--r--python/fatcat_web/forms.py12
-rw-r--r--python/fatcat_web/ref_routes.py41
-rw-r--r--python/fatcat_web/templates/reference_match.html4
4 files changed, 60 insertions, 9 deletions
diff --git a/python/fatcat_tools/transforms/access.py b/python/fatcat_tools/transforms/access.py
index 5ed64c7c..39d4c6d3 100644
--- a/python/fatcat_tools/transforms/access.py
+++ b/python/fatcat_tools/transforms/access.py
@@ -36,10 +36,16 @@ def release_access_options(release: ReleaseEntity) -> List[AccessOption]:
"""
Extracts access options from a release.
- TODO: proper implementation
+ TODO: proper implementation and filtering, instead of just returning first
+ option found
"""
options = []
for f in (release.files or []):
+ thumbnail_url = None
+ if f.mimetype == 'application/pdf' and f.sha1 and f.urls:
+ # NOTE: scholar.archive.org does an actual database check before
+ # generating these URLs, but we skip that for speed
+ thumbnail_url = f"https://blobs.fatcat.wiki/thumbnail/pdf/{f.sha1[0:2]}/{f.sha1[2:4]}/{f.sha1}.180px.jpg"
for u in (f.urls or []):
if '://web.archive.org/' in u.url:
return [AccessOption(
@@ -47,7 +53,7 @@ def release_access_options(release: ReleaseEntity) -> List[AccessOption]:
access_url=u.url,
mimetype=f.mimetype,
size_bytes=f.size,
- thumbnail_url=None
+ thumbnail_url=thumbnail_url,
)]
elif '://archive.org/' in u.url:
return [AccessOption(
@@ -55,6 +61,6 @@ def release_access_options(release: ReleaseEntity) -> List[AccessOption]:
access_url=u.url,
mimetype=f.mimetype,
size_bytes=f.size,
- thumbnail_url=None
+ thumbnail_url=thumbnail_url,
)]
return options
diff --git a/python/fatcat_web/forms.py b/python/fatcat_web/forms.py
index a856ef22..2757ebd2 100644
--- a/python/fatcat_web/forms.py
+++ b/python/fatcat_web/forms.py
@@ -487,6 +487,10 @@ class EntityTomlForm(EntityEditForm):
class ReferenceMatchForm(FlaskForm):
+ class Meta:
+ # this is an API, so disable CSRF
+ csrf = False
+
submit_type = SelectField('submit_type',
[validators.DataRequired()],
choices=['parse', 'match'])
@@ -496,12 +500,20 @@ class ReferenceMatchForm(FlaskForm):
title = StringField("Title")
journal = StringField("Journal or Conference")
first_author = StringField("First Author")
+ #author_names = StringField("Author Names")
#year = IntegerField('Year Released',
# [validators.Optional(True), valid_year])
year = StringField("Year Released")
+ date = StringField("Date Released")
volume = StringField("Volume")
issue = StringField("Issue")
pages = StringField("Pages")
+ publisher = StringField("Publisher")
+ doi = StringField("DOI")
+ pmid = StringField("PubMed Identifier (PMID)")
+ arxiv_id = StringField("arxiv.org Identifier")
+ release_type = StringField("Release Type")
+ release_stage = StringField("Release Stage")
@staticmethod
def from_grobid_parse(parse_dict, raw_citation):
diff --git a/python/fatcat_web/ref_routes.py b/python/fatcat_web/ref_routes.py
index d4219012..2d8ed413 100644
--- a/python/fatcat_web/ref_routes.py
+++ b/python/fatcat_web/ref_routes.py
@@ -3,13 +3,16 @@ Flask endpoints for reference (citation) endpoints. Eg, listing references
"inbound" and "outbound" from a specific release or work.
"""
-from flask import render_template, request, Response
+import json
+
+from flask import render_template, request, Response, jsonify
from fatcat_openapi_client import *
from fuzzycat.grobid_unstructured import grobid_api_process_citation, transform_grobid_ref_xml, grobid_ref_to_release
from fuzzycat.simple import close_fuzzy_biblio_matches, close_fuzzy_release_matches
from fatcat_tools.references import enrich_inbound_refs, enrich_outbound_refs, get_inbound_refs, get_outbound_refs, RefHits
from fatcat_tools.transforms.access import release_access_options
+from fatcat_tools.transforms.entities import entity_to_dict
from fatcat_web import app, api
from fatcat_web.cors import crossdomain
from fatcat_web.forms import *
@@ -92,16 +95,18 @@ def wikipedia_view_refs_outbound(wiki_lang: str, wiki_article: str):
hits = _refs_web("out", wikipedia_article=wikipedia_article)
return render_template('wikipedia_view_fuzzy_refs.html', wiki_article=wiki_article, wiki_lang=wiki_lang, wiki_url=wiki_url, direction="out", hits=hits), 200
-
@app.route('/reference/match', methods=['GET', 'POST'])
def reference_match():
- form = ReferenceMatchForm()
grobid_status = None
grobid_dict = None
- if form.is_submitted():
- if form.validate_on_submit():
+ form = ReferenceMatchForm()
+ if not form.is_submitted() and request.args.get('submit_type'):
+ form = ReferenceMatchForm(request.args)
+
+ if form.is_submitted() or request.args.get('title'):
+ if form.validate():
if form.submit_type.data == 'parse':
resp_xml = grobid_api_process_citation(form.raw_citation.data)
if not resp_xml:
@@ -166,3 +171,29 @@ def wikipedia_view_refs_outbound_json(wiki_lang: str, wiki_article: str):
wikipedia_article = wiki_lang + ":" + wiki_article
hits = _refs_web("out", wikipedia_article=wikipedia_article)
return Response(hits.json(exclude_unset=True), mimetype="application/json")
+
+
+@app.route('/reference/match.json', methods=['GET', 'OPTIONS'])
+@crossdomain(origin='*',headers=['access-control-allow-origin','Content-Type'])
+def reference_match_json():
+ form = ReferenceMatchForm(request.args)
+ if form.validate():
+ if form.submit_type.data == 'match':
+ matches = close_fuzzy_biblio_matches(es_client=app.es_client, biblio=form.data, match_limit=10) or []
+ else:
+ raise NotImplementedError()
+ resp = []
+ for m in matches:
+ # expand releases more completely
+ m.release = api.get_release(m.release.ident, expand="container,files,filesets,webcaptures", hide="abstract,refs")
+ # hack in access options
+ m.access_options = release_access_options(m.release)
+
+ # and manually convert to dict (for jsonify)
+ info = m.__dict__
+ info['release'] = entity_to_dict(m.release)
+ info['access_options'] = [o.dict() for o in m.access_options]
+ resp.append(info)
+ return jsonify(resp), 200
+ else:
+ return Response(json.dumps(dict(errors=form.errors)), mimetype="application/json", status=400)
diff --git a/python/fatcat_web/templates/reference_match.html b/python/fatcat_web/templates/reference_match.html
index f2335f52..08ab33fc 100644
--- a/python/fatcat_web/templates/reference_match.html
+++ b/python/fatcat_web/templates/reference_match.html
@@ -83,7 +83,9 @@
<td class="">
{% if match.access_options %}
<a href="{{ match.access_options[0].access_url}}" class="ui tiny green active button">{{ match.access_options[0].access_type.name }}</a>
- {% endif %}
+ {% else %}
+ <i class="ui tiny grey inactive button">no fulltext</a>
+ {% endif %}
{% endfor %}
</tbody>
</table>