aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2021-07-23 17:51:22 -0700
committerBryan Newbold <bnewbold@robocracy.org>2021-07-23 17:51:41 -0700
commit05665c448e846e4834fa59dfe3cd0f11ac789ac9 (patch)
tree789ed4cb850f805e022e857359ca653060882550 /python/fatcat_tools
parent5fde5d74738ce3c834248c12bae1860840a1287a (diff)
downloadfatcat-05665c448e846e4834fa59dfe3cd0f11ac789ac9.tar.gz
fatcat-05665c448e846e4834fa59dfe3cd0f11ac789ac9.zip
refs: generalize web endpoints; JSON content negotiation; openlibrary inbound view; etc
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r--python/fatcat_tools/references.py77
-rw-r--r--python/fatcat_tools/transforms/access.py2
2 files changed, 57 insertions, 22 deletions
diff --git a/python/fatcat_tools/references.py b/python/fatcat_tools/references.py
index 73f57e18..81b55f41 100644
--- a/python/fatcat_tools/references.py
+++ b/python/fatcat_tools/references.py
@@ -10,13 +10,14 @@ import datetime
import argparse
from typing import Optional, List, Any, Dict, Union
-from pydantic import BaseModel
+from pydantic import BaseModel, validator
import elasticsearch
from elasticsearch_dsl import Search
from fatcat_openapi_client import ReleaseEntity
from fatcat_tools import public_api
from fatcat_tools.transforms.access import release_access_options, AccessOption
+from fatcat_tools.transforms.entities import entity_to_dict
class BiblioRef(BaseModel):
@@ -97,8 +98,17 @@ class EnrichedBiblioRef(BaseModel):
# TODO: openlibrary work?
access: List[AccessOption]
+ @validator('release')
+ def check_release(cls, v):
+ if v is not None and not isinstance(v, ReleaseEntity):
+ raise ValueError("expected a ReleaseEntity")
+ return v
+
class Config:
arbitrary_types_allowed = True
+ json_encoders = {
+ ReleaseEntity: entity_to_dict,
+ }
class RefHits(BaseModel):
@@ -110,6 +120,11 @@ class RefHits(BaseModel):
query_wall_time_ms: int
result_refs: List[Union[BiblioRef,EnrichedBiblioRef]]
+ class Config:
+ json_encoders = {
+ ReleaseEntity: entity_to_dict,
+ }
+
def _execute_ref_query(search: Any, limit: int, offset: Optional[int] = None) -> RefHits:
"""
@@ -268,40 +283,58 @@ def count_inbound_refs(
def enrich_inbound_refs(refs: List[BiblioRef], fatcat_api_client: Any, hide: Optional[str] = "refs", expand: Optional[str] = "container,files,webcaptures,filesets") -> List[EnrichedBiblioRef]:
enriched = []
for ref in refs:
+ release = None
+ access = []
if ref.source_release_ident:
release = fatcat_api_client.get_release(ref.source_release_ident, hide=hide, expand=expand)
- enriched.append(EnrichedBiblioRef(
- ref=ref,
- #csl=None,
- access=release_access_options(release),
- release=release,
- ))
- else:
- enriched.append(EnrichedBiblioRef(
- ref=ref,
- #csl=None,
- access=[],
- release=None,
+ access = release_access_options(release)
+ if ref.source_wikipedia_article:
+ wiki_lang = ref.source_wikipedia.split(':')[0]
+ wiki_article = ':'.join(ref.source_wikipedia.split(':')[1:])
+ access.append(AccessOption(
+ access_type="wikipedia",
+ access_url=f"https://{wiki_lang}.wikipedia.org/wiki/{wiki_article}",
+ mimetype=None,
+ size_bytes=None,
+ thumbnail_url=None
))
+ enriched.append(EnrichedBiblioRef(
+ ref=ref,
+ access=access,
+ release=release,
+ ))
return enriched
def enrich_outbound_refs(refs: List[BiblioRef], fatcat_api_client: Any, hide: Optional[str] = "refs", expand: Optional[str] = "container,files,webcaptures,filesets") -> List[EnrichedBiblioRef]:
enriched = []
for ref in refs:
+ release = None
+ access = []
if ref.target_release_ident:
release = fatcat_api_client.get_release(ref.target_release_ident, hide=hide, expand=expand)
- enriched.append(EnrichedBiblioRef(
- ref=ref,
- access=release_access_options(release),
- release=release,
+ access = release_access_options(release)
+ if ref.target_openlibrary_work:
+ access.append(AccessOption(
+ access_type="openlibrary",
+ access_url=f"https://openlibrary.org/works/{ref.target_openlibrary_work}",
+ mimetype=None,
+ size_bytes=None,
+ thumbnail_url=None
))
- else:
- enriched.append(EnrichedBiblioRef(
- ref=ref,
- access=[],
- release=None,
+ if ref.target_url and '://web.archive.org/' in ref.target_url:
+ access.append(AccessOption(
+ access_type="wayback",
+ access_url=ref.target_url,
+ mimetype=None,
+ size_bytes=None,
+ thumbnail_url=None
))
+ enriched.append(EnrichedBiblioRef(
+ ref=ref,
+ access=access,
+ release=release,
+ ))
return enriched
diff --git a/python/fatcat_tools/transforms/access.py b/python/fatcat_tools/transforms/access.py
index add8ff3b..5ed64c7c 100644
--- a/python/fatcat_tools/transforms/access.py
+++ b/python/fatcat_tools/transforms/access.py
@@ -13,6 +13,8 @@ class AccessType(str, Enum):
ia_file = "ia_file"
ia_microfilm = "ia_microfilm"
repository = "repository"
+ openlibrary = "openlibrary"
+ wikipedia = "wikipedia"
class AccessOption(BaseModel):