summaryrefslogtreecommitdiffstats
path: root/python/fatcat_web/ref_routes.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2021-04-15 23:31:07 -0700
committerBryan Newbold <bnewbold@robocracy.org>2021-07-23 10:55:09 -0700
commit314aba35d06eb80be0c5ffc068774bb9bca38e76 (patch)
tree1f780f3e2e1808247cffc167f73eba3272353df7 /python/fatcat_web/ref_routes.py
parent15680e0caae7ff6e24ddca8584b0c590d2b30581 (diff)
downloadfatcat-314aba35d06eb80be0c5ffc068774bb9bca38e76.tar.gz
fatcat-314aba35d06eb80be0c5ffc068774bb9bca38e76.zip
web: initial implementation of fuzzy citation parsing and matching tool
Diffstat (limited to 'python/fatcat_web/ref_routes.py')
-rw-r--r--python/fatcat_web/ref_routes.py46
1 files changed, 46 insertions, 0 deletions
diff --git a/python/fatcat_web/ref_routes.py b/python/fatcat_web/ref_routes.py
index a49813c4..dc39299f 100644
--- a/python/fatcat_web/ref_routes.py
+++ b/python/fatcat_web/ref_routes.py
@@ -8,8 +8,11 @@ from typing import Optional
from flask import render_template, abort, redirect, request
from fatcat_openapi_client import *
from fatcat_openapi_client.rest import ApiException
+from fuzzycat.grobid_unstructured import grobid_api_process_citation, transform_grobid_ref_xml, grobid_ref_to_release
+from fuzzycat.simple import close_fuzzy_biblio_matches, close_fuzzy_release_matches
from fatcat_tools.references import enrich_inbound_refs_fatcat, enrich_outbound_refs_fatcat, get_inbound_refs, get_outbound_refs
+from fatcat_tools.transforms.access import release_access_options
from fatcat_web import app, api, auth_api
from fatcat_web.forms import *
from fatcat_web.entity_helpers import *
@@ -48,3 +51,46 @@ def release_view_refs_outbound(ident):
enriched_refs = enrich_outbound_refs_fatcat(hits.result_refs, fatcat_api_client=api, expand="container,files,webcaptures")
return render_template('release_view_fuzzy_refs.html', direction="outbound", entity=release, hits=hits, enriched_refs=enriched_refs), 200
+
+@app.route('/reference/match', methods=['GET', 'POST'])
+def reference_match():
+
+ form = ReferenceMatchForm()
+ grobid_status = None
+ grobid_dict = None
+
+ if form.is_submitted():
+ if form.validate_on_submit():
+ if form.submit_type.data == 'parse':
+ resp_xml = grobid_api_process_citation(form.raw_citation.data)
+ if not resp_xml:
+ grobid_status = "failed"
+ return render_template('reference_match.html', form=form, grobid_status=grobid_status), 400
+ grobid_dict = transform_grobid_ref_xml(resp_xml)
+ if not grobid_dict:
+ grobid_status = "empty"
+ return render_template('reference_match.html', form=form, grobid_status=grobid_status), 200
+ #print(grobid_dict)
+ release_stub = grobid_ref_to_release(grobid_dict)
+ # remove empty values from GROBID parsed dict
+ grobid_dict = {k: v for k, v in grobid_dict.items() if v is not None}
+ form = ReferenceMatchForm.from_grobid_parse(grobid_dict, form.raw_citation.data)
+ grobid_status = "success"
+ matches = close_fuzzy_release_matches(es_client=app.es_client, release=release_stub, match_limit=10) or []
+ elif form.submit_type.data == 'match':
+ matches = close_fuzzy_biblio_matches(es_client=app.es_client, biblio=form.data, match_limit=10) or []
+ else:
+ raise NotImplementedError()
+
+ for m in matches:
+ # expand releases more completely
+ m.release = api.get_release(m.release.ident, expand="container,files,filesets,webcaptures", hide="abstract,refs")
+ # hack in access options
+ m.access_options = release_access_options(m.release)
+
+ return render_template('reference_match.html', form=form, grobid_dict=grobid_dict, grobid_status=grobid_status, matches=matches), 200
+
+ elif form.errors:
+ return render_template('reference_match.html', form=form), 400
+
+ return render_template('reference_match.html', form=form), 200