summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2021-07-23 17:51:22 -0700
committerBryan Newbold <bnewbold@robocracy.org>2021-07-23 17:51:41 -0700
commit05665c448e846e4834fa59dfe3cd0f11ac789ac9 (patch)
tree789ed4cb850f805e022e857359ca653060882550
parent5fde5d74738ce3c834248c12bae1860840a1287a (diff)
downloadfatcat-05665c448e846e4834fa59dfe3cd0f11ac789ac9.tar.gz
fatcat-05665c448e846e4834fa59dfe3cd0f11ac789ac9.zip
refs: generalize web endpoints; JSON content negotiation; openlibrary inbound view; etc
-rw-r--r--python/fatcat_tools/references.py77
-rw-r--r--python/fatcat_tools/transforms/access.py2
-rw-r--r--python/fatcat_web/ref_routes.py99
-rw-r--r--python/fatcat_web/templates/openlibrary_view_fuzzy_refs.html29
4 files changed, 166 insertions, 41 deletions
diff --git a/python/fatcat_tools/references.py b/python/fatcat_tools/references.py
index 73f57e18..81b55f41 100644
--- a/python/fatcat_tools/references.py
+++ b/python/fatcat_tools/references.py
@@ -10,13 +10,14 @@ import datetime
import argparse
from typing import Optional, List, Any, Dict, Union
-from pydantic import BaseModel
+from pydantic import BaseModel, validator
import elasticsearch
from elasticsearch_dsl import Search
from fatcat_openapi_client import ReleaseEntity
from fatcat_tools import public_api
from fatcat_tools.transforms.access import release_access_options, AccessOption
+from fatcat_tools.transforms.entities import entity_to_dict
class BiblioRef(BaseModel):
@@ -97,8 +98,17 @@ class EnrichedBiblioRef(BaseModel):
# TODO: openlibrary work?
access: List[AccessOption]
+ @validator('release')
+ def check_release(cls, v):
+ if v is not None and not isinstance(v, ReleaseEntity):
+ raise ValueError("expected a ReleaseEntity")
+ return v
+
class Config:
arbitrary_types_allowed = True
+ json_encoders = {
+ ReleaseEntity: entity_to_dict,
+ }
class RefHits(BaseModel):
@@ -110,6 +120,11 @@ class RefHits(BaseModel):
query_wall_time_ms: int
result_refs: List[Union[BiblioRef,EnrichedBiblioRef]]
+ class Config:
+ json_encoders = {
+ ReleaseEntity: entity_to_dict,
+ }
+
def _execute_ref_query(search: Any, limit: int, offset: Optional[int] = None) -> RefHits:
"""
@@ -268,40 +283,58 @@ def count_inbound_refs(
def enrich_inbound_refs(refs: List[BiblioRef], fatcat_api_client: Any, hide: Optional[str] = "refs", expand: Optional[str] = "container,files,webcaptures,filesets") -> List[EnrichedBiblioRef]:
enriched = []
for ref in refs:
+ release = None
+ access = []
if ref.source_release_ident:
release = fatcat_api_client.get_release(ref.source_release_ident, hide=hide, expand=expand)
- enriched.append(EnrichedBiblioRef(
- ref=ref,
- #csl=None,
- access=release_access_options(release),
- release=release,
- ))
- else:
- enriched.append(EnrichedBiblioRef(
- ref=ref,
- #csl=None,
- access=[],
- release=None,
+ access = release_access_options(release)
+ if ref.source_wikipedia_article:
+ wiki_lang = ref.source_wikipedia.split(':')[0]
+ wiki_article = ':'.join(ref.source_wikipedia.split(':')[1:])
+ access.append(AccessOption(
+ access_type="wikipedia",
+ access_url=f"https://{wiki_lang}.wikipedia.org/wiki/{wiki_article}",
+ mimetype=None,
+ size_bytes=None,
+ thumbnail_url=None
))
+ enriched.append(EnrichedBiblioRef(
+ ref=ref,
+ access=access,
+ release=release,
+ ))
return enriched
def enrich_outbound_refs(refs: List[BiblioRef], fatcat_api_client: Any, hide: Optional[str] = "refs", expand: Optional[str] = "container,files,webcaptures,filesets") -> List[EnrichedBiblioRef]:
enriched = []
for ref in refs:
+ release = None
+ access = []
if ref.target_release_ident:
release = fatcat_api_client.get_release(ref.target_release_ident, hide=hide, expand=expand)
- enriched.append(EnrichedBiblioRef(
- ref=ref,
- access=release_access_options(release),
- release=release,
+ access = release_access_options(release)
+ if ref.target_openlibrary_work:
+ access.append(AccessOption(
+ access_type="openlibrary",
+ access_url=f"https://openlibrary.org/works/{ref.target_openlibrary_work}",
+ mimetype=None,
+ size_bytes=None,
+ thumbnail_url=None
))
- else:
- enriched.append(EnrichedBiblioRef(
- ref=ref,
- access=[],
- release=None,
+ if ref.target_url and '://web.archive.org/' in ref.target_url:
+ access.append(AccessOption(
+ access_type="wayback",
+ access_url=ref.target_url,
+ mimetype=None,
+ size_bytes=None,
+ thumbnail_url=None
))
+ enriched.append(EnrichedBiblioRef(
+ ref=ref,
+ access=access,
+ release=release,
+ ))
return enriched
diff --git a/python/fatcat_tools/transforms/access.py b/python/fatcat_tools/transforms/access.py
index add8ff3b..5ed64c7c 100644
--- a/python/fatcat_tools/transforms/access.py
+++ b/python/fatcat_tools/transforms/access.py
@@ -13,6 +13,8 @@ class AccessType(str, Enum):
ia_file = "ia_file"
ia_microfilm = "ia_microfilm"
repository = "repository"
+ openlibrary = "openlibrary"
+ wikipedia = "wikipedia"
class AccessOption(BaseModel):
diff --git a/python/fatcat_web/ref_routes.py b/python/fatcat_web/ref_routes.py
index 7c3ba5bd..72f115cf 100644
--- a/python/fatcat_web/ref_routes.py
+++ b/python/fatcat_web/ref_routes.py
@@ -3,44 +3,82 @@ Flask endpoints for reference (citation) endpoints. Eg, listing references
"inbound" and "outbound" from a specific release or work.
"""
-from flask import render_template, request
+from flask import render_template, request, jsonify, Response
from fatcat_openapi_client import *
from fuzzycat.grobid_unstructured import grobid_api_process_citation, transform_grobid_ref_xml, grobid_ref_to_release
from fuzzycat.simple import close_fuzzy_biblio_matches, close_fuzzy_release_matches
-from fatcat_tools.references import enrich_inbound_refs, enrich_outbound_refs, get_inbound_refs, get_outbound_refs
+from fatcat_tools.references import enrich_inbound_refs, enrich_outbound_refs, get_inbound_refs, get_outbound_refs, RefHits
from fatcat_tools.transforms.access import release_access_options
from fatcat_web import app, api
+from fatcat_web.cors import crossdomain
from fatcat_web.forms import *
from fatcat_web.entity_helpers import *
-
-@app.route('/release/<string(length=26):ident>/refs/in', methods=['GET'])
-def release_view_refs_inbound(ident):
-
- release = generic_get_entity("release", ident)
-
+def _refs_web(direction, release_ident=None, work_ident=None, openlibrary_id=None) -> RefHits:
offset = request.args.get('offset', '0')
offset = max(0, int(offset)) if offset.isnumeric() else 0
+ limit = request.args.get('offset', '30')
+ limit = min(max(0, int(limit)), 100) if limit.isnumeric() else 30
+ if direction == "in":
+ hits = get_inbound_refs(
+ release_ident=release_ident,
+ work_ident=work_ident,
+ openlibrary_work=openlibrary_id,
+ es_client=app.es_client,
+ offset=offset,
+ limit=limit,
+ )
+ hits.result_refs = enrich_inbound_refs(
+ hits.result_refs,
+ fatcat_api_client=api,
+ expand="container,files,webcaptures",
+ )
+ elif direction == "out":
+ hits = get_outbound_refs(
+ release_ident=release_ident,
+ work_ident=work_ident,
+ es_client=app.es_client,
+ offset=offset,
+ limit=limit,
+ )
+ hits.result_refs = enrich_outbound_refs(
+ hits.result_refs,
+ fatcat_api_client=api,
+ expand="container,files,webcaptures",
+ )
+ else:
+ raise ValueError()
+ return hits
+
+
+@app.route('/release/<string(length=26):ident>/refs-in', methods=['GET'])
+def release_view_refs_inbound(ident):
+ if request.accept_mimetypes.best == "application/json":
+ return release_view_refs_inbound_json(ident)
- hits = get_inbound_refs(release_ident=ident, es_client=app.es_client, offset=offset, limit=30)
- enriched_refs = enrich_inbound_refs(hits.result_refs, fatcat_api_client=api, expand="container,files,webcaptures")
-
- return render_template('release_view_fuzzy_refs.html', direction="in", entity=release, hits=hits, enriched_refs=enriched_refs), 200
+ release = generic_get_entity("release", ident)
+ hits = _refs_web("in", release_ident=ident)
+ return render_template('release_view_fuzzy_refs.html', direction="in", entity=release, hits=hits), 200
-@app.route('/release/<string(length=26):ident>/refs/out', methods=['GET'])
+@app.route('/release/<string(length=26):ident>/refs-out', methods=['GET'])
def release_view_refs_outbound(ident):
+ if request.accept_mimetypes.best == "application/json":
+ return release_view_refs_outbound_json(ident)
release = generic_get_entity("release", ident)
+ hits = _refs_web("out", release_ident=ident)
+ return render_template('release_view_fuzzy_refs.html', direction="out", entity=release, hits=hits), 200
- offset = request.args.get('offset', '0')
- offset = max(0, int(offset)) if offset.isnumeric() else 0
-
- hits = get_outbound_refs(release_ident=ident, es_client=app.es_client, offset=offset, limit=30)
- enriched_refs = enrich_outbound_refs(hits.result_refs, fatcat_api_client=api, expand="container,files,webcaptures")
+@app.route('/openlibrary/OL<int:id_num>W/refs-in', methods=['GET'])
+def openlibrary_view_refs_inbound(id_num):
+ if request.accept_mimetypes.best == "application/json":
+ return openlibrary_view_refs_inbound(id_num)
- return render_template('release_view_fuzzy_refs.html', direction="out", entity=release, hits=hits, enriched_refs=enriched_refs), 200
+ openlibrary_id = f"OL{id_num}W"
+ hits = _refs_web("in", openlibrary_id=openlibrary_id)
+ return render_template('openlibrary_view_fuzzy_refs.html', openlibrary_id=openlibrary_id, direction="in", hits=hits), 200
@app.route('/reference/match', methods=['GET', 'POST'])
@@ -85,3 +123,26 @@ def reference_match():
return render_template('reference_match.html', form=form), 400
return render_template('reference_match.html', form=form), 200
+
+
+### Pseudo-APIs #############################################################
+
+@app.route('/release/<string(length=26):ident>/refs-out.json', methods=['GET', 'OPTIONS'])
+@crossdomain(origin='*',headers=['access-control-allow-origin','Content-Type'])
+def release_view_refs_outbound_json(ident):
+ hits = _refs_web("out", release_ident=ident)
+ return Response(hits.json(exclude_unset=True), mimetype="application/json")
+
+
+@app.route('/release/<string(length=26):ident>/refs-in.json', methods=['GET', 'OPTIONS'])
+@crossdomain(origin='*',headers=['access-control-allow-origin','Content-Type'])
+def release_view_refs_inbound_json(ident):
+ hits = _refs_web("in", release_ident=ident)
+ return Response(hits.json(exclude_unset=True), mimetype="application/json")
+
+@app.route('/openlibrary/OL<int:id_num>W/refs-in', methods=['GET', 'OPTIONS'])
+@crossdomain(origin='*',headers=['access-control-allow-origin','Content-Type'])
+def openlibrary_view_refs_inbound_json(ident):
+ openlibrary_id = f"OL{id_num}W"
+ hits = _refs_web("in", openlibrary_id=openlibrary_id)
+ return Response(hits.json(exclude_unset=True), mimetype="application/json")
diff --git a/python/fatcat_web/templates/openlibrary_view_fuzzy_refs.html b/python/fatcat_web/templates/openlibrary_view_fuzzy_refs.html
new file mode 100644
index 00000000..161a7b50
--- /dev/null
+++ b/python/fatcat_web/templates/openlibrary_view_fuzzy_refs.html
@@ -0,0 +1,29 @@
+{% extends "base.html" %}
+{% import "refs_macros.html" as refs_macros %}
+
+{% block title %}Open Library Refs{% endblock %}
+
+{% block fullbody %}
+<h1 class="ui header">
+ {% if hits.result_refs and hits.result_refs[0].ref.target_unstructured %}
+ <i>{{ hits.result_refs[0].ref.target_unstructured }}</i>
+ {% endif %}
+ <span class="sub header"><a href="https://openlibrary.org/works/{{ openlibrary_id }}"><code>https://openlibrary.org/works/{{ openlibrary_id }}</code></a></span>
+</h1>
+
+{% if direction == "in" %}
+ <h3>Cited By</h3>
+ <p>This page lists references to this book from other works (eg, journal articles).
+{% elif direction == "out" %}
+ <h3>References</h3>
+ <i>Refernces from this book to other entities.</i>
+{% endif %}
+
+{% if hits.result_refs %}
+ {{ refs_macros.refs_table(hits, direction) }}
+{% else %}
+ <br><br><p><b>None found</b>
+{% endif %}
+
+{% endblock %}
+