From 7f85ecea4e5a844ad78d129ed0b32a759ca7c1ad Mon Sep 17 00:00:00 2001
From: Bryan Newbold <bnewbold@robocracy.org>
Date: Tue, 21 Sep 2021 22:26:07 -0700
Subject: add GET w/ query params to reference match endpoint (and JSON
 version)

---
 python/fatcat_web/forms.py                       | 12 ++++++++
 python/fatcat_web/ref_routes.py                  | 37 ++++++++++++++++++++----
 python/fatcat_web/templates/reference_match.html |  4 ++-
 3 files changed, 47 insertions(+), 6 deletions(-)

diff --git a/python/fatcat_web/forms.py b/python/fatcat_web/forms.py
index a856ef22..2757ebd2 100644
--- a/python/fatcat_web/forms.py
+++ b/python/fatcat_web/forms.py
@@ -487,6 +487,10 @@ class EntityTomlForm(EntityEditForm):
 
 class ReferenceMatchForm(FlaskForm):
 
+    class Meta:
+        # this is an API, so disable CSRF
+        csrf = False
+
     submit_type = SelectField('submit_type',
         [validators.DataRequired()],
         choices=['parse', 'match'])
@@ -496,12 +500,20 @@ class ReferenceMatchForm(FlaskForm):
     title = StringField("Title")
     journal = StringField("Journal or Conference")
     first_author = StringField("First Author")
+    #author_names = StringField("Author Names")
     #year = IntegerField('Year Released',
     #    [validators.Optional(True), valid_year])
     year = StringField("Year Released")
+    date = StringField("Date Released")
     volume = StringField("Volume")
     issue = StringField("Issue")
     pages = StringField("Pages")
+    publisher = StringField("Publisher")
+    doi = StringField("DOI")
+    pmid = StringField("PubMed Identifier (PMID)")
+    arxiv_id = StringField("arxiv.org Identifier")
+    release_type = StringField("Release Type")
+    release_stage = StringField("Release Stage")
 
     @staticmethod
     def from_grobid_parse(parse_dict, raw_citation):
diff --git a/python/fatcat_web/ref_routes.py b/python/fatcat_web/ref_routes.py
index d4219012..33d2f725 100644
--- a/python/fatcat_web/ref_routes.py
+++ b/python/fatcat_web/ref_routes.py
@@ -3,13 +3,16 @@ Flask endpoints for reference (citation) endpoints. Eg, listing references
 "inbound" and "outbound" from a specific release or work.
 """
 
-from flask import render_template, request, Response
+import json
+
+from flask import render_template, request, Response, jsonify
 from fatcat_openapi_client import *
 from fuzzycat.grobid_unstructured import grobid_api_process_citation, transform_grobid_ref_xml, grobid_ref_to_release
 from fuzzycat.simple import close_fuzzy_biblio_matches, close_fuzzy_release_matches
 
 from fatcat_tools.references import enrich_inbound_refs, enrich_outbound_refs, get_inbound_refs, get_outbound_refs, RefHits
 from fatcat_tools.transforms.access import release_access_options
+from fatcat_tools.transforms.entities import entity_to_dict
 from fatcat_web import app, api
 from fatcat_web.cors import crossdomain
 from fatcat_web.forms import *
@@ -92,16 +95,18 @@ def wikipedia_view_refs_outbound(wiki_lang: str, wiki_article: str):
     hits = _refs_web("out", wikipedia_article=wikipedia_article)
     return render_template('wikipedia_view_fuzzy_refs.html', wiki_article=wiki_article, wiki_lang=wiki_lang, wiki_url=wiki_url, direction="out", hits=hits), 200
 
-
 @app.route('/reference/match', methods=['GET', 'POST'])
 def reference_match():
 
-    form = ReferenceMatchForm()
     grobid_status = None
     grobid_dict = None
 
-    if form.is_submitted():
-        if form.validate_on_submit():
+    form = ReferenceMatchForm()
+    if not form.is_submitted() and request.args.get('submit_type'):
+        form = ReferenceMatchForm(request.args)
+
+    if form.is_submitted() or request.args.get('title'):
+        if form.validate():
             if form.submit_type.data == 'parse':
                 resp_xml = grobid_api_process_citation(form.raw_citation.data)
                 if not resp_xml:
@@ -166,3 +171,25 @@ def wikipedia_view_refs_outbound_json(wiki_lang: str, wiki_article: str):
     wikipedia_article = wiki_lang + ":" + wiki_article
     hits = _refs_web("out", wikipedia_article=wikipedia_article)
     return Response(hits.json(exclude_unset=True), mimetype="application/json")
+
+
+@app.route('/reference/match.json', methods=['GET', 'OPTIONS'])
+@crossdomain(origin='*',headers=['access-control-allow-origin','Content-Type'])
+def reference_match_json():
+    form = ReferenceMatchForm(request.args)
+    if form.validate():
+        if form.submit_type.data == 'match':
+            matches = close_fuzzy_biblio_matches(es_client=app.es_client, biblio=form.data, match_limit=10) or []
+        else:
+            raise NotImplementedError()
+        for m in matches:
+            # expand releases more completely
+            m.release = api.get_release(m.release.ident, expand="container,files,filesets,webcaptures", hide="abstract,refs")
+            # hack in access options
+            m.access_options = release_access_options(m.release)
+
+            # and convert to dict (for jsonify)
+            m.release = entity_to_dict(m.release)
+        return jsonify(matches), 200
+    else:
+        return Response(json.dumps(dict(errors=form.errors)), mimetype="application/json", status=400)
diff --git a/python/fatcat_web/templates/reference_match.html b/python/fatcat_web/templates/reference_match.html
index f2335f52..08ab33fc 100644
--- a/python/fatcat_web/templates/reference_match.html
+++ b/python/fatcat_web/templates/reference_match.html
@@ -83,7 +83,9 @@
         <td class="">
           {% if match.access_options %}
             <a href="{{ match.access_options[0].access_url}}" class="ui tiny green active button">{{ match.access_options[0].access_type.name }}</a>
-            {% endif %}
+          {% else %}
+            <i class="ui tiny grey inactive button">no fulltext</a>
+          {% endif %}
   {% endfor %}
   </tbody>
   </table>
-- 
cgit v1.2.3


From 6cbfaaa5e58ae4c0b482e3573e7e99300a857af8 Mon Sep 17 00:00:00 2001
From: Bryan Newbold <bnewbold@robocracy.org>
Date: Mon, 18 Oct 2021 10:42:26 -0700
Subject: access: populate thumbnail_url for PDFs

---
 python/fatcat_tools/transforms/access.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/python/fatcat_tools/transforms/access.py b/python/fatcat_tools/transforms/access.py
index 5ed64c7c..39d4c6d3 100644
--- a/python/fatcat_tools/transforms/access.py
+++ b/python/fatcat_tools/transforms/access.py
@@ -36,10 +36,16 @@ def release_access_options(release: ReleaseEntity) -> List[AccessOption]:
     """
     Extracts access options from a release.
 
-    TODO: proper implementation
+    TODO: proper implementation and filtering, instead of just returning first
+    option found
     """
     options = []
     for f in (release.files or []):
+        thumbnail_url = None
+        if f.mimetype == 'application/pdf' and f.sha1 and f.urls:
+            # NOTE: scholar.archive.org does an actual database check before
+            # generating these URLs, but we skip that for speed
+            thumbnail_url = f"https://blobs.fatcat.wiki/thumbnail/pdf/{f.sha1[0:2]}/{f.sha1[2:4]}/{f.sha1}.180px.jpg"
         for u in (f.urls or []):
             if '://web.archive.org/' in u.url:
                 return [AccessOption(
@@ -47,7 +53,7 @@ def release_access_options(release: ReleaseEntity) -> List[AccessOption]:
                     access_url=u.url,
                     mimetype=f.mimetype,
                     size_bytes=f.size,
-                    thumbnail_url=None
+                    thumbnail_url=thumbnail_url,
                 )]
             elif '://archive.org/' in u.url:
                 return [AccessOption(
@@ -55,6 +61,6 @@ def release_access_options(release: ReleaseEntity) -> List[AccessOption]:
                     access_url=u.url,
                     mimetype=f.mimetype,
                     size_bytes=f.size,
-                    thumbnail_url=None
+                    thumbnail_url=thumbnail_url,
                 )]
     return options
-- 
cgit v1.2.3


From 3752237a30db843fb84a4197d7047f1c34eb5df2 Mon Sep 17 00:00:00 2001
From: Bryan Newbold <bnewbold@robocracy.org>
Date: Mon, 18 Oct 2021 10:42:47 -0700
Subject: match: fix access_options in return

---
 python/fatcat_web/ref_routes.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/python/fatcat_web/ref_routes.py b/python/fatcat_web/ref_routes.py
index 33d2f725..2d8ed413 100644
--- a/python/fatcat_web/ref_routes.py
+++ b/python/fatcat_web/ref_routes.py
@@ -182,14 +182,18 @@ def reference_match_json():
             matches = close_fuzzy_biblio_matches(es_client=app.es_client, biblio=form.data, match_limit=10) or []
         else:
             raise NotImplementedError()
+        resp = []
         for m in matches:
             # expand releases more completely
             m.release = api.get_release(m.release.ident, expand="container,files,filesets,webcaptures", hide="abstract,refs")
             # hack in access options
             m.access_options = release_access_options(m.release)
 
-            # and convert to dict (for jsonify)
-            m.release = entity_to_dict(m.release)
-        return jsonify(matches), 200
+            # and manually convert to dict (for jsonify)
+            info = m.__dict__
+            info['release'] = entity_to_dict(m.release)
+            info['access_options'] = [o.dict() for o in m.access_options]
+            resp.append(info)
+        return jsonify(resp), 200
     else:
         return Response(json.dumps(dict(errors=form.errors)), mimetype="application/json", status=400)
-- 
cgit v1.2.3