diff options
| author | Bryan Newbold <bnewbold@robocracy.org> | 2021-10-18 10:42:26 -0700 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@robocracy.org> | 2021-10-18 10:42:26 -0700 | 
| commit | 6cbfaaa5e58ae4c0b482e3573e7e99300a857af8 (patch) | |
| tree | 4b2693dd0b125ee57c67419dd1ba8ca5b91918eb /python | |
| parent | 7f85ecea4e5a844ad78d129ed0b32a759ca7c1ad (diff) | |
| download | fatcat-6cbfaaa5e58ae4c0b482e3573e7e99300a857af8.tar.gz fatcat-6cbfaaa5e58ae4c0b482e3573e7e99300a857af8.zip | |
access: populate thumbnail_url for PDFs
Diffstat (limited to 'python')
| -rw-r--r-- | python/fatcat_tools/transforms/access.py | 12 | 
1 files changed, 9 insertions, 3 deletions
| diff --git a/python/fatcat_tools/transforms/access.py b/python/fatcat_tools/transforms/access.py index 5ed64c7c..39d4c6d3 100644 --- a/python/fatcat_tools/transforms/access.py +++ b/python/fatcat_tools/transforms/access.py @@ -36,10 +36,16 @@ def release_access_options(release: ReleaseEntity) -> List[AccessOption]:      """      Extracts access options from a release. -    TODO: proper implementation +    TODO: proper implementation and filtering, instead of just returning first +    option found      """      options = []      for f in (release.files or []): +        thumbnail_url = None +        if f.mimetype == 'application/pdf' and f.sha1 and f.urls: +            # NOTE: scholar.archive.org does an actual database check before +            # generating these URLs, but we skip that for speed +            thumbnail_url = f"https://blobs.fatcat.wiki/thumbnail/pdf/{f.sha1[0:2]}/{f.sha1[2:4]}/{f.sha1}.180px.jpg"          for u in (f.urls or []):              if '://web.archive.org/' in u.url:                  return [AccessOption( @@ -47,7 +53,7 @@ def release_access_options(release: ReleaseEntity) -> List[AccessOption]:                      access_url=u.url,                      mimetype=f.mimetype,                      size_bytes=f.size, -                    thumbnail_url=None +                    thumbnail_url=thumbnail_url,                  )]              elif '://archive.org/' in u.url:                  return [AccessOption( @@ -55,6 +61,6 @@ def release_access_options(release: ReleaseEntity) -> List[AccessOption]:                      access_url=u.url,                      mimetype=f.mimetype,                      size_bytes=f.size, -                    thumbnail_url=None +                    thumbnail_url=thumbnail_url,                  )]      return options | 
