aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_web/ref_routes.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2021-08-06 11:58:16 -0700
committerBryan Newbold <bnewbold@robocracy.org>2021-08-06 11:58:16 -0700
commit99885b458ad505ebb63b3e7cf5b1bae3dd2a459e (patch)
treede3fbb3e42b0bb7f6e447d2e13ac3f92a8bb90b2 /python/fatcat_web/ref_routes.py
parent950d3f08bd439aed92d01dbc3cca9747570aa82c (diff)
parent56e4ce2d8347cdfedd492d54fde080772f3d8725 (diff)
downloadfatcat-99885b458ad505ebb63b3e7cf5b1bae3dd2a459e.tar.gz
fatcat-99885b458ad505ebb63b3e7cf5b1bae3dd2a459e.zip
Merge branch 'bnewbold-refs-apis'
Diffstat (limited to 'python/fatcat_web/ref_routes.py')
-rw-r--r--python/fatcat_web/ref_routes.py168
1 files changed, 168 insertions, 0 deletions
diff --git a/python/fatcat_web/ref_routes.py b/python/fatcat_web/ref_routes.py
new file mode 100644
index 00000000..d4219012
--- /dev/null
+++ b/python/fatcat_web/ref_routes.py
@@ -0,0 +1,168 @@
+"""
+Flask endpoints for reference (citation) endpoints. Eg, listing references
+"inbound" and "outbound" from a specific release or work.
+"""
+
+from flask import render_template, request, Response
+from fatcat_openapi_client import *
+from fuzzycat.grobid_unstructured import grobid_api_process_citation, transform_grobid_ref_xml, grobid_ref_to_release
+from fuzzycat.simple import close_fuzzy_biblio_matches, close_fuzzy_release_matches
+
+from fatcat_tools.references import enrich_inbound_refs, enrich_outbound_refs, get_inbound_refs, get_outbound_refs, RefHits
+from fatcat_tools.transforms.access import release_access_options
+from fatcat_web import app, api
+from fatcat_web.cors import crossdomain
+from fatcat_web.forms import *
+from fatcat_web.entity_helpers import *
+
+def _refs_web(direction, release_ident=None, work_ident=None, openlibrary_id=None, wikipedia_article=None) -> RefHits:
+ offset = request.args.get('offset', '0')
+ offset = max(0, int(offset)) if offset.isnumeric() else 0
+ limit = request.args.get('limit', '30')
+ limit = min(max(0, int(limit)), 100) if limit.isnumeric() else 30
+ if direction == "in":
+ hits = get_inbound_refs(
+ release_ident=release_ident,
+ work_ident=work_ident,
+ openlibrary_work=openlibrary_id,
+ es_client=app.es_client,
+ offset=offset,
+ limit=limit,
+ )
+ hits.result_refs = enrich_inbound_refs(
+ hits.result_refs,
+ fatcat_api_client=api,
+ expand="container,files,webcaptures",
+ )
+ elif direction == "out":
+ hits = get_outbound_refs(
+ release_ident=release_ident,
+ wikipedia_article=wikipedia_article,
+ work_ident=work_ident,
+ es_client=app.es_client,
+ offset=offset,
+ limit=limit,
+ )
+ hits.result_refs = enrich_outbound_refs(
+ hits.result_refs,
+ fatcat_api_client=api,
+ expand="container,files,webcaptures",
+ )
+ else:
+ raise ValueError()
+ return hits
+
+
+@app.route('/release/<string(length=26):ident>/refs-in', methods=['GET'])
+def release_view_refs_inbound(ident):
+ if request.accept_mimetypes.best == "application/json":
+ return release_view_refs_inbound_json(ident)
+
+ release = generic_get_entity("release", ident)
+ hits = _refs_web("in", release_ident=ident)
+ return render_template('release_view_fuzzy_refs.html', direction="in", entity=release, hits=hits), 200
+
+
+@app.route('/release/<string(length=26):ident>/refs-out', methods=['GET'])
+def release_view_refs_outbound(ident):
+ if request.accept_mimetypes.best == "application/json":
+ return release_view_refs_outbound_json(ident)
+
+ release = generic_get_entity("release", ident)
+ hits = _refs_web("out", release_ident=ident)
+ return render_template('release_view_fuzzy_refs.html', direction="out", entity=release, hits=hits), 200
+
+@app.route('/openlibrary/OL<int:id_num>W/refs-in', methods=['GET'])
+def openlibrary_view_refs_inbound(id_num):
+ if request.accept_mimetypes.best == "application/json":
+ return openlibrary_view_refs_inbound_json(id_num)
+
+ openlibrary_id = f"OL{id_num}W"
+ hits = _refs_web("in", openlibrary_id=openlibrary_id)
+ return render_template('openlibrary_view_fuzzy_refs.html', openlibrary_id=openlibrary_id, direction="in", hits=hits), 200
+
+@app.route('/wikipedia/<string(length=2):wiki_lang>:<string:wiki_article>/refs-out', methods=['GET'])
+def wikipedia_view_refs_outbound(wiki_lang: str, wiki_article: str):
+ if request.accept_mimetypes.best == "application/json":
+ return wikipedia_view_refs_outbound_json(wiki_lang, wiki_article)
+
+ wiki_url = f"https://{wiki_lang}.wikipedia.org/wiki/{wiki_article}"
+ wiki_article = wiki_article.replace('_', ' ')
+ wikipedia_article = wiki_lang + ":" + wiki_article
+ hits = _refs_web("out", wikipedia_article=wikipedia_article)
+ return render_template('wikipedia_view_fuzzy_refs.html', wiki_article=wiki_article, wiki_lang=wiki_lang, wiki_url=wiki_url, direction="out", hits=hits), 200
+
+
+@app.route('/reference/match', methods=['GET', 'POST'])
+def reference_match():
+
+ form = ReferenceMatchForm()
+ grobid_status = None
+ grobid_dict = None
+
+ if form.is_submitted():
+ if form.validate_on_submit():
+ if form.submit_type.data == 'parse':
+ resp_xml = grobid_api_process_citation(form.raw_citation.data)
+ if not resp_xml:
+ grobid_status = "failed"
+ return render_template('reference_match.html', form=form, grobid_status=grobid_status), 400
+ grobid_dict = transform_grobid_ref_xml(resp_xml)
+ if not grobid_dict:
+ grobid_status = "empty"
+ return render_template('reference_match.html', form=form, grobid_status=grobid_status), 200
+ #print(grobid_dict)
+ release_stub = grobid_ref_to_release(grobid_dict)
+ # remove empty values from GROBID parsed dict
+ grobid_dict = {k: v for k, v in grobid_dict.items() if v is not None}
+ form = ReferenceMatchForm.from_grobid_parse(grobid_dict, form.raw_citation.data)
+ grobid_status = "success"
+ matches = close_fuzzy_release_matches(es_client=app.es_client, release=release_stub, match_limit=10) or []
+ elif form.submit_type.data == 'match':
+ matches = close_fuzzy_biblio_matches(es_client=app.es_client, biblio=form.data, match_limit=10) or []
+ else:
+ raise NotImplementedError()
+
+ for m in matches:
+ # expand releases more completely
+ m.release = api.get_release(m.release.ident, expand="container,files,filesets,webcaptures", hide="abstract,refs")
+ # hack in access options
+ m.access_options = release_access_options(m.release)
+
+ return render_template('reference_match.html', form=form, grobid_dict=grobid_dict, grobid_status=grobid_status, matches=matches), 200
+
+ elif form.errors:
+ return render_template('reference_match.html', form=form), 400
+
+ return render_template('reference_match.html', form=form), 200
+
+
+### Pseudo-APIs #############################################################
+
+@app.route('/release/<string(length=26):ident>/refs-out.json', methods=['GET', 'OPTIONS'])
+@crossdomain(origin='*',headers=['access-control-allow-origin','Content-Type'])
+def release_view_refs_outbound_json(ident):
+ hits = _refs_web("out", release_ident=ident)
+ return Response(hits.json(exclude_unset=True), mimetype="application/json")
+
+
+@app.route('/release/<string(length=26):ident>/refs-in.json', methods=['GET', 'OPTIONS'])
+@crossdomain(origin='*',headers=['access-control-allow-origin','Content-Type'])
+def release_view_refs_inbound_json(ident):
+ hits = _refs_web("in", release_ident=ident)
+ return Response(hits.json(exclude_unset=True), mimetype="application/json")
+
+@app.route('/openlibrary/OL<int:id_num>W/refs-in.json', methods=['GET', 'OPTIONS'])
+@crossdomain(origin='*',headers=['access-control-allow-origin','Content-Type'])
+def openlibrary_view_refs_inbound_json(id_num):
+ openlibrary_id = f"OL{id_num}W"
+ hits = _refs_web("in", openlibrary_id=openlibrary_id)
+ return Response(hits.json(exclude_unset=True), mimetype="application/json")
+
+@app.route('/wikipedia/<string(length=2):wiki_lang>:<string:wiki_article>/refs-out.json', methods=['GET', 'OPTIONS'])
+@crossdomain(origin='*',headers=['access-control-allow-origin','Content-Type'])
+def wikipedia_view_refs_outbound_json(wiki_lang: str, wiki_article: str):
+ wiki_article = wiki_article.replace('_', ' ')
+ wikipedia_article = wiki_lang + ":" + wiki_article
+ hits = _refs_web("out", wikipedia_article=wikipedia_article)
+ return Response(hits.json(exclude_unset=True), mimetype="application/json")