diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2021-11-02 11:32:08 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2021-11-02 11:32:08 -0700 |
commit | fdbfb8dc55df8c3739feca8c52c017c56b006573 (patch) | |
tree | 4a24adf10ea159f889aa7b6ed907624a8bbaf602 /python | |
parent | 641b6eb21f68e9e0a2f82a570031bb15ccd58d6f (diff) | |
parent | 3752237a30db843fb84a4197d7047f1c34eb5df2 (diff) | |
download | fatcat-fdbfb8dc55df8c3739feca8c52c017c56b006573.tar.gz fatcat-fdbfb8dc55df8c3739feca8c52c017c56b006573.zip |
Merge branch 'bnewbold-match-get'
Diffstat (limited to 'python')
-rw-r--r-- | python/fatcat_tools/transforms/access.py | 12 | ||||
-rw-r--r-- | python/fatcat_web/forms.py | 12 | ||||
-rw-r--r-- | python/fatcat_web/ref_routes.py | 41 | ||||
-rw-r--r-- | python/fatcat_web/templates/reference_match.html | 4 |
4 files changed, 60 insertions, 9 deletions
diff --git a/python/fatcat_tools/transforms/access.py b/python/fatcat_tools/transforms/access.py index 5ed64c7c..39d4c6d3 100644 --- a/python/fatcat_tools/transforms/access.py +++ b/python/fatcat_tools/transforms/access.py @@ -36,10 +36,16 @@ def release_access_options(release: ReleaseEntity) -> List[AccessOption]: """ Extracts access options from a release. - TODO: proper implementation + TODO: proper implementation and filtering, instead of just returning first + option found """ options = [] for f in (release.files or []): + thumbnail_url = None + if f.mimetype == 'application/pdf' and f.sha1 and f.urls: + # NOTE: scholar.archive.org does an actual database check before + # generating these URLs, but we skip that for speed + thumbnail_url = f"https://blobs.fatcat.wiki/thumbnail/pdf/{f.sha1[0:2]}/{f.sha1[2:4]}/{f.sha1}.180px.jpg" for u in (f.urls or []): if '://web.archive.org/' in u.url: return [AccessOption( @@ -47,7 +53,7 @@ def release_access_options(release: ReleaseEntity) -> List[AccessOption]: access_url=u.url, mimetype=f.mimetype, size_bytes=f.size, - thumbnail_url=None + thumbnail_url=thumbnail_url, )] elif '://archive.org/' in u.url: return [AccessOption( @@ -55,6 +61,6 @@ def release_access_options(release: ReleaseEntity) -> List[AccessOption]: access_url=u.url, mimetype=f.mimetype, size_bytes=f.size, - thumbnail_url=None + thumbnail_url=thumbnail_url, )] return options diff --git a/python/fatcat_web/forms.py b/python/fatcat_web/forms.py index a856ef22..2757ebd2 100644 --- a/python/fatcat_web/forms.py +++ b/python/fatcat_web/forms.py @@ -487,6 +487,10 @@ class EntityTomlForm(EntityEditForm): class ReferenceMatchForm(FlaskForm): + class Meta: + # this is an API, so disable CSRF + csrf = False + submit_type = SelectField('submit_type', [validators.DataRequired()], choices=['parse', 'match']) @@ -496,12 +500,20 @@ class ReferenceMatchForm(FlaskForm): title = StringField("Title") journal = StringField("Journal or Conference") first_author = StringField("First Author") + #author_names = StringField("Author Names") #year = IntegerField('Year Released', # [validators.Optional(True), valid_year]) year = StringField("Year Released") + date = StringField("Date Released") volume = StringField("Volume") issue = StringField("Issue") pages = StringField("Pages") + publisher = StringField("Publisher") + doi = StringField("DOI") + pmid = StringField("PubMed Identifier (PMID)") + arxiv_id = StringField("arxiv.org Identifier") + release_type = StringField("Release Type") + release_stage = StringField("Release Stage") @staticmethod def from_grobid_parse(parse_dict, raw_citation): diff --git a/python/fatcat_web/ref_routes.py b/python/fatcat_web/ref_routes.py index d4219012..2d8ed413 100644 --- a/python/fatcat_web/ref_routes.py +++ b/python/fatcat_web/ref_routes.py @@ -3,13 +3,16 @@ Flask endpoints for reference (citation) endpoints. Eg, listing references "inbound" and "outbound" from a specific release or work. """ -from flask import render_template, request, Response +import json + +from flask import render_template, request, Response, jsonify from fatcat_openapi_client import * from fuzzycat.grobid_unstructured import grobid_api_process_citation, transform_grobid_ref_xml, grobid_ref_to_release from fuzzycat.simple import close_fuzzy_biblio_matches, close_fuzzy_release_matches from fatcat_tools.references import enrich_inbound_refs, enrich_outbound_refs, get_inbound_refs, get_outbound_refs, RefHits from fatcat_tools.transforms.access import release_access_options +from fatcat_tools.transforms.entities import entity_to_dict from fatcat_web import app, api from fatcat_web.cors import crossdomain from fatcat_web.forms import * @@ -92,16 +95,18 @@ def wikipedia_view_refs_outbound(wiki_lang: str, wiki_article: str): hits = _refs_web("out", wikipedia_article=wikipedia_article) return render_template('wikipedia_view_fuzzy_refs.html', wiki_article=wiki_article, wiki_lang=wiki_lang, wiki_url=wiki_url, direction="out", hits=hits), 200 - @app.route('/reference/match', methods=['GET', 'POST']) def reference_match(): - form = ReferenceMatchForm() grobid_status = None grobid_dict = None - if form.is_submitted(): - if form.validate_on_submit(): + form = ReferenceMatchForm() + if not form.is_submitted() and request.args.get('submit_type'): + form = ReferenceMatchForm(request.args) + + if form.is_submitted() or request.args.get('title'): + if form.validate(): if form.submit_type.data == 'parse': resp_xml = grobid_api_process_citation(form.raw_citation.data) if not resp_xml: @@ -166,3 +171,29 @@ def wikipedia_view_refs_outbound_json(wiki_lang: str, wiki_article: str): wikipedia_article = wiki_lang + ":" + wiki_article hits = _refs_web("out", wikipedia_article=wikipedia_article) return Response(hits.json(exclude_unset=True), mimetype="application/json") + + +@app.route('/reference/match.json', methods=['GET', 'OPTIONS']) +@crossdomain(origin='*',headers=['access-control-allow-origin','Content-Type']) +def reference_match_json(): + form = ReferenceMatchForm(request.args) + if form.validate(): + if form.submit_type.data == 'match': + matches = close_fuzzy_biblio_matches(es_client=app.es_client, biblio=form.data, match_limit=10) or [] + else: + raise NotImplementedError() + resp = [] + for m in matches: + # expand releases more completely + m.release = api.get_release(m.release.ident, expand="container,files,filesets,webcaptures", hide="abstract,refs") + # hack in access options + m.access_options = release_access_options(m.release) + + # and manually convert to dict (for jsonify) + info = m.__dict__ + info['release'] = entity_to_dict(m.release) + info['access_options'] = [o.dict() for o in m.access_options] + resp.append(info) + return jsonify(resp), 200 + else: + return Response(json.dumps(dict(errors=form.errors)), mimetype="application/json", status=400) diff --git a/python/fatcat_web/templates/reference_match.html b/python/fatcat_web/templates/reference_match.html index f2335f52..08ab33fc 100644 --- a/python/fatcat_web/templates/reference_match.html +++ b/python/fatcat_web/templates/reference_match.html @@ -83,7 +83,9 @@ <td class=""> {% if match.access_options %} <a href="{{ match.access_options[0].access_url}}" class="ui tiny green active button">{{ match.access_options[0].access_type.name }}</a> - {% endif %} + {% else %} + <i class="ui tiny grey inactive button">no fulltext</a> + {% endif %} {% endfor %} </tbody> </table> |