summaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools/transforms/access.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2021-08-06 11:58:16 -0700
committerBryan Newbold <bnewbold@robocracy.org>2021-08-06 11:58:16 -0700
commit99885b458ad505ebb63b3e7cf5b1bae3dd2a459e (patch)
treede3fbb3e42b0bb7f6e447d2e13ac3f92a8bb90b2 /python/fatcat_tools/transforms/access.py
parent950d3f08bd439aed92d01dbc3cca9747570aa82c (diff)
parent56e4ce2d8347cdfedd492d54fde080772f3d8725 (diff)
downloadfatcat-99885b458ad505ebb63b3e7cf5b1bae3dd2a459e.tar.gz
fatcat-99885b458ad505ebb63b3e7cf5b1bae3dd2a459e.zip
Merge branch 'bnewbold-refs-apis'
Diffstat (limited to 'python/fatcat_tools/transforms/access.py')
-rw-r--r--python/fatcat_tools/transforms/access.py60
1 files changed, 60 insertions, 0 deletions
diff --git a/python/fatcat_tools/transforms/access.py b/python/fatcat_tools/transforms/access.py
new file mode 100644
index 00000000..5ed64c7c
--- /dev/null
+++ b/python/fatcat_tools/transforms/access.py
@@ -0,0 +1,60 @@
+
+from enum import Enum
+from typing import Optional, List
+
+from pydantic import BaseModel
+from fatcat_openapi_client import ReleaseEntity
+
+
+class AccessType(str, Enum):
+ """describes type of access URL"""
+
+ wayback = "wayback"
+ ia_file = "ia_file"
+ ia_microfilm = "ia_microfilm"
+ repository = "repository"
+ openlibrary = "openlibrary"
+ wikipedia = "wikipedia"
+
+class AccessOption(BaseModel):
+
+ access_type: AccessType
+
+ # note: for `target_url` refs, would do a CDX lookup and this URL would be
+ # a valid/HTTP-200 web.archive.org capture URL
+ access_url: str
+
+ # application/pdf, text/html, etc
+ # blank for landing pages
+ mimetype: Optional[str]
+
+ size_bytes: Optional[int]
+ thumbnail_url: Optional[str]
+
+
+def release_access_options(release: ReleaseEntity) -> List[AccessOption]:
+ """
+ Extracts access options from a release.
+
+ TODO: proper implementation
+ """
+ options = []
+ for f in (release.files or []):
+ for u in (f.urls or []):
+ if '://web.archive.org/' in u.url:
+ return [AccessOption(
+ access_type="wayback",
+ access_url=u.url,
+ mimetype=f.mimetype,
+ size_bytes=f.size,
+ thumbnail_url=None
+ )]
+ elif '://archive.org/' in u.url:
+ return [AccessOption(
+ access_type="ia_file",
+ access_url=u.url,
+ mimetype=f.mimetype,
+ size_bytes=f.size,
+ thumbnail_url=None
+ )]
+ return options