aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2021-10-18 10:42:26 -0700
committerBryan Newbold <bnewbold@robocracy.org>2021-10-18 10:42:26 -0700
commit6cbfaaa5e58ae4c0b482e3573e7e99300a857af8 (patch)
tree4b2693dd0b125ee57c67419dd1ba8ca5b91918eb /python
parent7f85ecea4e5a844ad78d129ed0b32a759ca7c1ad (diff)
downloadfatcat-6cbfaaa5e58ae4c0b482e3573e7e99300a857af8.tar.gz
fatcat-6cbfaaa5e58ae4c0b482e3573e7e99300a857af8.zip
access: populate thumbnail_url for PDFs
Diffstat (limited to 'python')
-rw-r--r--python/fatcat_tools/transforms/access.py12
1 files changed, 9 insertions, 3 deletions
diff --git a/python/fatcat_tools/transforms/access.py b/python/fatcat_tools/transforms/access.py
index 5ed64c7c..39d4c6d3 100644
--- a/python/fatcat_tools/transforms/access.py
+++ b/python/fatcat_tools/transforms/access.py
@@ -36,10 +36,16 @@ def release_access_options(release: ReleaseEntity) -> List[AccessOption]:
"""
Extracts access options from a release.
- TODO: proper implementation
+ TODO: proper implementation and filtering, instead of just returning first
+ option found
"""
options = []
for f in (release.files or []):
+ thumbnail_url = None
+ if f.mimetype == 'application/pdf' and f.sha1 and f.urls:
+ # NOTE: scholar.archive.org does an actual database check before
+ # generating these URLs, but we skip that for speed
+ thumbnail_url = f"https://blobs.fatcat.wiki/thumbnail/pdf/{f.sha1[0:2]}/{f.sha1[2:4]}/{f.sha1}.180px.jpg"
for u in (f.urls or []):
if '://web.archive.org/' in u.url:
return [AccessOption(
@@ -47,7 +53,7 @@ def release_access_options(release: ReleaseEntity) -> List[AccessOption]:
access_url=u.url,
mimetype=f.mimetype,
size_bytes=f.size,
- thumbnail_url=None
+ thumbnail_url=thumbnail_url,
)]
elif '://archive.org/' in u.url:
return [AccessOption(
@@ -55,6 +61,6 @@ def release_access_options(release: ReleaseEntity) -> List[AccessOption]:
access_url=u.url,
mimetype=f.mimetype,
size_bytes=f.size,
- thumbnail_url=None
+ thumbnail_url=thumbnail_url,
)]
return options