diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2021-04-15 23:29:34 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2021-07-23 10:55:09 -0700 |
commit | bb085c92760d6ccbd6c92e13fcae0af02b5a3d17 (patch) | |
tree | dc63f2fe6050b16e52a0374126cc0e4b8fad1257 /python/fatcat_tools | |
parent | 7186a379f335dd2731d5db79ab85abf3506cee88 (diff) | |
download | fatcat-bb085c92760d6ccbd6c92e13fcae0af02b5a3d17.tar.gz fatcat-bb085c92760d6ccbd6c92e13fcae0af02b5a3d17.zip |
partial access options transform for releases
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r-- | python/fatcat_tools/transforms/access.py | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/python/fatcat_tools/transforms/access.py b/python/fatcat_tools/transforms/access.py new file mode 100644 index 00000000..231cd2b3 --- /dev/null +++ b/python/fatcat_tools/transforms/access.py @@ -0,0 +1,58 @@ + +from enum import Enum +from typing import Optional, List, Any, Dict + +from pydantic import BaseModel +from fatcat_openapi_client import ReleaseEntity + + +class AccessType(str, Enum): + """describes type of access URL""" + + wayback = "wayback" + ia_file = "ia_file" + ia_microfilm = "ia_microfilm" + repository = "repository" + +class AccessOption(BaseModel): + + access_type: AccessType + + # note: for `target_url` refs, would do a CDX lookup and this URL would be + # a valid/HTTP-200 web.archive.org capture URL + access_url: str + + # application/pdf, text/html, etc + # blank for landing pages + mimetype: Optional[str] + + size_bytes: Optional[int] + thumbnail_url: Optional[str] + + +def release_access_options(release: ReleaseEntity) -> List[AccessOption]: + """ + Extracts access options from a release. + + TODO: proper implementation + """ + options = [] + for f in (release.files or []): + for u in (f.urls or []): + if '://web.archive.org/' in u.url: + return [AccessOption( + access_type="wayback", + access_url=u.url, + mimetype=f.mimetype, + size_bytes=f.size, + thumbnail_url=None + )] + elif '://archive.org/' in u.url: + return [AccessOption( + access_type="ia_file", + access_url=u.url, + mimetype=f.mimetype, + size_bytes=f.size, + thumbnail_url=None + )] + return options |