diff options
| author | Bryan Newbold <bnewbold@robocracy.org> | 2021-11-02 11:32:08 -0700 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@robocracy.org> | 2021-11-02 11:32:08 -0700 | 
| commit | fdbfb8dc55df8c3739feca8c52c017c56b006573 (patch) | |
| tree | 4a24adf10ea159f889aa7b6ed907624a8bbaf602 /python | |
| parent | 641b6eb21f68e9e0a2f82a570031bb15ccd58d6f (diff) | |
| parent | 3752237a30db843fb84a4197d7047f1c34eb5df2 (diff) | |
| download | fatcat-fdbfb8dc55df8c3739feca8c52c017c56b006573.tar.gz fatcat-fdbfb8dc55df8c3739feca8c52c017c56b006573.zip | |
Merge branch 'bnewbold-match-get'
Diffstat (limited to 'python')
| -rw-r--r-- | python/fatcat_tools/transforms/access.py | 12 | ||||
| -rw-r--r-- | python/fatcat_web/forms.py | 12 | ||||
| -rw-r--r-- | python/fatcat_web/ref_routes.py | 41 | ||||
| -rw-r--r-- | python/fatcat_web/templates/reference_match.html | 4 | 
4 files changed, 60 insertions, 9 deletions
| diff --git a/python/fatcat_tools/transforms/access.py b/python/fatcat_tools/transforms/access.py index 5ed64c7c..39d4c6d3 100644 --- a/python/fatcat_tools/transforms/access.py +++ b/python/fatcat_tools/transforms/access.py @@ -36,10 +36,16 @@ def release_access_options(release: ReleaseEntity) -> List[AccessOption]:      """      Extracts access options from a release. -    TODO: proper implementation +    TODO: proper implementation and filtering, instead of just returning first +    option found      """      options = []      for f in (release.files or []): +        thumbnail_url = None +        if f.mimetype == 'application/pdf' and f.sha1 and f.urls: +            # NOTE: scholar.archive.org does an actual database check before +            # generating these URLs, but we skip that for speed +            thumbnail_url = f"https://blobs.fatcat.wiki/thumbnail/pdf/{f.sha1[0:2]}/{f.sha1[2:4]}/{f.sha1}.180px.jpg"          for u in (f.urls or []):              if '://web.archive.org/' in u.url:                  return [AccessOption( @@ -47,7 +53,7 @@ def release_access_options(release: ReleaseEntity) -> List[AccessOption]:                      access_url=u.url,                      mimetype=f.mimetype,                      size_bytes=f.size, -                    thumbnail_url=None +                    thumbnail_url=thumbnail_url,                  )]              elif '://archive.org/' in u.url:                  return [AccessOption( @@ -55,6 +61,6 @@ def release_access_options(release: ReleaseEntity) -> List[AccessOption]:                      access_url=u.url,                      mimetype=f.mimetype,                      size_bytes=f.size, -                    thumbnail_url=None +                    thumbnail_url=thumbnail_url,                  )]      return options diff --git a/python/fatcat_web/forms.py b/python/fatcat_web/forms.py index a856ef22..2757ebd2 100644 --- a/python/fatcat_web/forms.py +++ b/python/fatcat_web/forms.py @@ -487,6 +487,10 @@ class EntityTomlForm(EntityEditForm):  class ReferenceMatchForm(FlaskForm): +    class Meta: +        # this is an API, so disable CSRF +        csrf = False +      submit_type = SelectField('submit_type',          [validators.DataRequired()],          choices=['parse', 'match']) @@ -496,12 +500,20 @@ class ReferenceMatchForm(FlaskForm):      title = StringField("Title")      journal = StringField("Journal or Conference")      first_author = StringField("First Author") +    #author_names = StringField("Author Names")      #year = IntegerField('Year Released',      #    [validators.Optional(True), valid_year])      year = StringField("Year Released") +    date = StringField("Date Released")      volume = StringField("Volume")      issue = StringField("Issue")      pages = StringField("Pages") +    publisher = StringField("Publisher") +    doi = StringField("DOI") +    pmid = StringField("PubMed Identifier (PMID)") +    arxiv_id = StringField("arxiv.org Identifier") +    release_type = StringField("Release Type") +    release_stage = StringField("Release Stage")      @staticmethod      def from_grobid_parse(parse_dict, raw_citation): diff --git a/python/fatcat_web/ref_routes.py b/python/fatcat_web/ref_routes.py index d4219012..2d8ed413 100644 --- a/python/fatcat_web/ref_routes.py +++ b/python/fatcat_web/ref_routes.py @@ -3,13 +3,16 @@ Flask endpoints for reference (citation) endpoints. Eg, listing references  "inbound" and "outbound" from a specific release or work.  """ -from flask import render_template, request, Response +import json + +from flask import render_template, request, Response, jsonify  from fatcat_openapi_client import *  from fuzzycat.grobid_unstructured import grobid_api_process_citation, transform_grobid_ref_xml, grobid_ref_to_release  from fuzzycat.simple import close_fuzzy_biblio_matches, close_fuzzy_release_matches  from fatcat_tools.references import enrich_inbound_refs, enrich_outbound_refs, get_inbound_refs, get_outbound_refs, RefHits  from fatcat_tools.transforms.access import release_access_options +from fatcat_tools.transforms.entities import entity_to_dict  from fatcat_web import app, api  from fatcat_web.cors import crossdomain  from fatcat_web.forms import * @@ -92,16 +95,18 @@ def wikipedia_view_refs_outbound(wiki_lang: str, wiki_article: str):      hits = _refs_web("out", wikipedia_article=wikipedia_article)      return render_template('wikipedia_view_fuzzy_refs.html', wiki_article=wiki_article, wiki_lang=wiki_lang, wiki_url=wiki_url, direction="out", hits=hits), 200 -  @app.route('/reference/match', methods=['GET', 'POST'])  def reference_match(): -    form = ReferenceMatchForm()      grobid_status = None      grobid_dict = None -    if form.is_submitted(): -        if form.validate_on_submit(): +    form = ReferenceMatchForm() +    if not form.is_submitted() and request.args.get('submit_type'): +        form = ReferenceMatchForm(request.args) + +    if form.is_submitted() or request.args.get('title'): +        if form.validate():              if form.submit_type.data == 'parse':                  resp_xml = grobid_api_process_citation(form.raw_citation.data)                  if not resp_xml: @@ -166,3 +171,29 @@ def wikipedia_view_refs_outbound_json(wiki_lang: str, wiki_article: str):      wikipedia_article = wiki_lang + ":" + wiki_article      hits = _refs_web("out", wikipedia_article=wikipedia_article)      return Response(hits.json(exclude_unset=True), mimetype="application/json") + + +@app.route('/reference/match.json', methods=['GET', 'OPTIONS']) +@crossdomain(origin='*',headers=['access-control-allow-origin','Content-Type']) +def reference_match_json(): +    form = ReferenceMatchForm(request.args) +    if form.validate(): +        if form.submit_type.data == 'match': +            matches = close_fuzzy_biblio_matches(es_client=app.es_client, biblio=form.data, match_limit=10) or [] +        else: +            raise NotImplementedError() +        resp = [] +        for m in matches: +            # expand releases more completely +            m.release = api.get_release(m.release.ident, expand="container,files,filesets,webcaptures", hide="abstract,refs") +            # hack in access options +            m.access_options = release_access_options(m.release) + +            # and manually convert to dict (for jsonify) +            info = m.__dict__ +            info['release'] = entity_to_dict(m.release) +            info['access_options'] = [o.dict() for o in m.access_options] +            resp.append(info) +        return jsonify(resp), 200 +    else: +        return Response(json.dumps(dict(errors=form.errors)), mimetype="application/json", status=400) diff --git a/python/fatcat_web/templates/reference_match.html b/python/fatcat_web/templates/reference_match.html index f2335f52..08ab33fc 100644 --- a/python/fatcat_web/templates/reference_match.html +++ b/python/fatcat_web/templates/reference_match.html @@ -83,7 +83,9 @@          <td class="">            {% if match.access_options %}              <a href="{{ match.access_options[0].access_url}}" class="ui tiny green active button">{{ match.access_options[0].access_type.name }}</a> -            {% endif %} +          {% else %} +            <i class="ui tiny grey inactive button">no fulltext</a> +          {% endif %}    {% endfor %}    </tbody>    </table> | 
