diff options
Diffstat (limited to 'python')
-rw-r--r-- | python/fatcat_tools/references.py | 77 | ||||
-rw-r--r-- | python/fatcat_tools/transforms/access.py | 2 | ||||
-rw-r--r-- | python/fatcat_web/ref_routes.py | 99 | ||||
-rw-r--r-- | python/fatcat_web/templates/openlibrary_view_fuzzy_refs.html | 29 |
4 files changed, 166 insertions, 41 deletions
diff --git a/python/fatcat_tools/references.py b/python/fatcat_tools/references.py index 73f57e18..81b55f41 100644 --- a/python/fatcat_tools/references.py +++ b/python/fatcat_tools/references.py @@ -10,13 +10,14 @@ import datetime import argparse from typing import Optional, List, Any, Dict, Union -from pydantic import BaseModel +from pydantic import BaseModel, validator import elasticsearch from elasticsearch_dsl import Search from fatcat_openapi_client import ReleaseEntity from fatcat_tools import public_api from fatcat_tools.transforms.access import release_access_options, AccessOption +from fatcat_tools.transforms.entities import entity_to_dict class BiblioRef(BaseModel): @@ -97,8 +98,17 @@ class EnrichedBiblioRef(BaseModel): # TODO: openlibrary work? access: List[AccessOption] + @validator('release') + def check_release(cls, v): + if v is not None and not isinstance(v, ReleaseEntity): + raise ValueError("expected a ReleaseEntity") + return v + class Config: arbitrary_types_allowed = True + json_encoders = { + ReleaseEntity: entity_to_dict, + } class RefHits(BaseModel): @@ -110,6 +120,11 @@ class RefHits(BaseModel): query_wall_time_ms: int result_refs: List[Union[BiblioRef,EnrichedBiblioRef]] + class Config: + json_encoders = { + ReleaseEntity: entity_to_dict, + } + def _execute_ref_query(search: Any, limit: int, offset: Optional[int] = None) -> RefHits: """ @@ -268,40 +283,58 @@ def count_inbound_refs( def enrich_inbound_refs(refs: List[BiblioRef], fatcat_api_client: Any, hide: Optional[str] = "refs", expand: Optional[str] = "container,files,webcaptures,filesets") -> List[EnrichedBiblioRef]: enriched = [] for ref in refs: + release = None + access = [] if ref.source_release_ident: release = fatcat_api_client.get_release(ref.source_release_ident, hide=hide, expand=expand) - enriched.append(EnrichedBiblioRef( - ref=ref, - #csl=None, - access=release_access_options(release), - release=release, - )) - else: - enriched.append(EnrichedBiblioRef( - ref=ref, - #csl=None, - access=[], - release=None, + access = release_access_options(release) + if ref.source_wikipedia_article: + wiki_lang = ref.source_wikipedia.split(':')[0] + wiki_article = ':'.join(ref.source_wikipedia.split(':')[1:]) + access.append(AccessOption( + access_type="wikipedia", + access_url=f"https://{wiki_lang}.wikipedia.org/wiki/{wiki_article}", + mimetype=None, + size_bytes=None, + thumbnail_url=None )) + enriched.append(EnrichedBiblioRef( + ref=ref, + access=access, + release=release, + )) return enriched def enrich_outbound_refs(refs: List[BiblioRef], fatcat_api_client: Any, hide: Optional[str] = "refs", expand: Optional[str] = "container,files,webcaptures,filesets") -> List[EnrichedBiblioRef]: enriched = [] for ref in refs: + release = None + access = [] if ref.target_release_ident: release = fatcat_api_client.get_release(ref.target_release_ident, hide=hide, expand=expand) - enriched.append(EnrichedBiblioRef( - ref=ref, - access=release_access_options(release), - release=release, + access = release_access_options(release) + if ref.target_openlibrary_work: + access.append(AccessOption( + access_type="openlibrary", + access_url=f"https://openlibrary.org/works/{ref.target_openlibrary_work}", + mimetype=None, + size_bytes=None, + thumbnail_url=None )) - else: - enriched.append(EnrichedBiblioRef( - ref=ref, - access=[], - release=None, + if ref.target_url and '://web.archive.org/' in ref.target_url: + access.append(AccessOption( + access_type="wayback", + access_url=ref.target_url, + mimetype=None, + size_bytes=None, + thumbnail_url=None )) + enriched.append(EnrichedBiblioRef( + ref=ref, + access=access, + release=release, + )) return enriched diff --git a/python/fatcat_tools/transforms/access.py b/python/fatcat_tools/transforms/access.py index add8ff3b..5ed64c7c 100644 --- a/python/fatcat_tools/transforms/access.py +++ b/python/fatcat_tools/transforms/access.py @@ -13,6 +13,8 @@ class AccessType(str, Enum): ia_file = "ia_file" ia_microfilm = "ia_microfilm" repository = "repository" + openlibrary = "openlibrary" + wikipedia = "wikipedia" class AccessOption(BaseModel): diff --git a/python/fatcat_web/ref_routes.py b/python/fatcat_web/ref_routes.py index 7c3ba5bd..72f115cf 100644 --- a/python/fatcat_web/ref_routes.py +++ b/python/fatcat_web/ref_routes.py @@ -3,44 +3,82 @@ Flask endpoints for reference (citation) endpoints. Eg, listing references "inbound" and "outbound" from a specific release or work. """ -from flask import render_template, request +from flask import render_template, request, jsonify, Response from fatcat_openapi_client import * from fuzzycat.grobid_unstructured import grobid_api_process_citation, transform_grobid_ref_xml, grobid_ref_to_release from fuzzycat.simple import close_fuzzy_biblio_matches, close_fuzzy_release_matches -from fatcat_tools.references import enrich_inbound_refs, enrich_outbound_refs, get_inbound_refs, get_outbound_refs +from fatcat_tools.references import enrich_inbound_refs, enrich_outbound_refs, get_inbound_refs, get_outbound_refs, RefHits from fatcat_tools.transforms.access import release_access_options from fatcat_web import app, api +from fatcat_web.cors import crossdomain from fatcat_web.forms import * from fatcat_web.entity_helpers import * - -@app.route('/release/<string(length=26):ident>/refs/in', methods=['GET']) -def release_view_refs_inbound(ident): - - release = generic_get_entity("release", ident) - +def _refs_web(direction, release_ident=None, work_ident=None, openlibrary_id=None) -> RefHits: offset = request.args.get('offset', '0') offset = max(0, int(offset)) if offset.isnumeric() else 0 + limit = request.args.get('offset', '30') + limit = min(max(0, int(limit)), 100) if limit.isnumeric() else 30 + if direction == "in": + hits = get_inbound_refs( + release_ident=release_ident, + work_ident=work_ident, + openlibrary_work=openlibrary_id, + es_client=app.es_client, + offset=offset, + limit=limit, + ) + hits.result_refs = enrich_inbound_refs( + hits.result_refs, + fatcat_api_client=api, + expand="container,files,webcaptures", + ) + elif direction == "out": + hits = get_outbound_refs( + release_ident=release_ident, + work_ident=work_ident, + es_client=app.es_client, + offset=offset, + limit=limit, + ) + hits.result_refs = enrich_outbound_refs( + hits.result_refs, + fatcat_api_client=api, + expand="container,files,webcaptures", + ) + else: + raise ValueError() + return hits + + +@app.route('/release/<string(length=26):ident>/refs-in', methods=['GET']) +def release_view_refs_inbound(ident): + if request.accept_mimetypes.best == "application/json": + return release_view_refs_inbound_json(ident) - hits = get_inbound_refs(release_ident=ident, es_client=app.es_client, offset=offset, limit=30) - enriched_refs = enrich_inbound_refs(hits.result_refs, fatcat_api_client=api, expand="container,files,webcaptures") - - return render_template('release_view_fuzzy_refs.html', direction="in", entity=release, hits=hits, enriched_refs=enriched_refs), 200 + release = generic_get_entity("release", ident) + hits = _refs_web("in", release_ident=ident) + return render_template('release_view_fuzzy_refs.html', direction="in", entity=release, hits=hits), 200 -@app.route('/release/<string(length=26):ident>/refs/out', methods=['GET']) +@app.route('/release/<string(length=26):ident>/refs-out', methods=['GET']) def release_view_refs_outbound(ident): + if request.accept_mimetypes.best == "application/json": + return release_view_refs_outbound_json(ident) release = generic_get_entity("release", ident) + hits = _refs_web("out", release_ident=ident) + return render_template('release_view_fuzzy_refs.html', direction="out", entity=release, hits=hits), 200 - offset = request.args.get('offset', '0') - offset = max(0, int(offset)) if offset.isnumeric() else 0 - - hits = get_outbound_refs(release_ident=ident, es_client=app.es_client, offset=offset, limit=30) - enriched_refs = enrich_outbound_refs(hits.result_refs, fatcat_api_client=api, expand="container,files,webcaptures") +@app.route('/openlibrary/OL<int:id_num>W/refs-in', methods=['GET']) +def openlibrary_view_refs_inbound(id_num): + if request.accept_mimetypes.best == "application/json": + return openlibrary_view_refs_inbound(id_num) - return render_template('release_view_fuzzy_refs.html', direction="out", entity=release, hits=hits, enriched_refs=enriched_refs), 200 + openlibrary_id = f"OL{id_num}W" + hits = _refs_web("in", openlibrary_id=openlibrary_id) + return render_template('openlibrary_view_fuzzy_refs.html', openlibrary_id=openlibrary_id, direction="in", hits=hits), 200 @app.route('/reference/match', methods=['GET', 'POST']) @@ -85,3 +123,26 @@ def reference_match(): return render_template('reference_match.html', form=form), 400 return render_template('reference_match.html', form=form), 200 + + +### Pseudo-APIs ############################################################# + +@app.route('/release/<string(length=26):ident>/refs-out.json', methods=['GET', 'OPTIONS']) +@crossdomain(origin='*',headers=['access-control-allow-origin','Content-Type']) +def release_view_refs_outbound_json(ident): + hits = _refs_web("out", release_ident=ident) + return Response(hits.json(exclude_unset=True), mimetype="application/json") + + +@app.route('/release/<string(length=26):ident>/refs-in.json', methods=['GET', 'OPTIONS']) +@crossdomain(origin='*',headers=['access-control-allow-origin','Content-Type']) +def release_view_refs_inbound_json(ident): + hits = _refs_web("in", release_ident=ident) + return Response(hits.json(exclude_unset=True), mimetype="application/json") + +@app.route('/openlibrary/OL<int:id_num>W/refs-in', methods=['GET', 'OPTIONS']) +@crossdomain(origin='*',headers=['access-control-allow-origin','Content-Type']) +def openlibrary_view_refs_inbound_json(ident): + openlibrary_id = f"OL{id_num}W" + hits = _refs_web("in", openlibrary_id=openlibrary_id) + return Response(hits.json(exclude_unset=True), mimetype="application/json") diff --git a/python/fatcat_web/templates/openlibrary_view_fuzzy_refs.html b/python/fatcat_web/templates/openlibrary_view_fuzzy_refs.html new file mode 100644 index 00000000..161a7b50 --- /dev/null +++ b/python/fatcat_web/templates/openlibrary_view_fuzzy_refs.html @@ -0,0 +1,29 @@ +{% extends "base.html" %} +{% import "refs_macros.html" as refs_macros %} + +{% block title %}Open Library Refs{% endblock %} + +{% block fullbody %} +<h1 class="ui header"> + {% if hits.result_refs and hits.result_refs[0].ref.target_unstructured %} + <i>{{ hits.result_refs[0].ref.target_unstructured }}</i> + {% endif %} + <span class="sub header"><a href="https://openlibrary.org/works/{{ openlibrary_id }}"><code>https://openlibrary.org/works/{{ openlibrary_id }}</code></a></span> +</h1> + +{% if direction == "in" %} + <h3>Cited By</h3> + <p>This page lists references to this book from other works (eg, journal articles). +{% elif direction == "out" %} + <h3>References</h3> + <i>Refernces from this book to other entities.</i> +{% endif %} + +{% if hits.result_refs %} + {{ refs_macros.refs_table(hits, direction) }} +{% else %} + <br><br><p><b>None found</b> +{% endif %} + +{% endblock %} + |