diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2021-08-06 11:58:16 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2021-08-06 11:58:16 -0700 |
commit | 99885b458ad505ebb63b3e7cf5b1bae3dd2a459e (patch) | |
tree | de3fbb3e42b0bb7f6e447d2e13ac3f92a8bb90b2 /python/fatcat_tools/transforms | |
parent | 950d3f08bd439aed92d01dbc3cca9747570aa82c (diff) | |
parent | 56e4ce2d8347cdfedd492d54fde080772f3d8725 (diff) | |
download | fatcat-99885b458ad505ebb63b3e7cf5b1bae3dd2a459e.tar.gz fatcat-99885b458ad505ebb63b3e7cf5b1bae3dd2a459e.zip |
Merge branch 'bnewbold-refs-apis'
Diffstat (limited to 'python/fatcat_tools/transforms')
-rw-r--r-- | python/fatcat_tools/transforms/access.py | 60 |
1 files changed, 60 insertions, 0 deletions
diff --git a/python/fatcat_tools/transforms/access.py b/python/fatcat_tools/transforms/access.py new file mode 100644 index 00000000..5ed64c7c --- /dev/null +++ b/python/fatcat_tools/transforms/access.py @@ -0,0 +1,60 @@ + +from enum import Enum +from typing import Optional, List + +from pydantic import BaseModel +from fatcat_openapi_client import ReleaseEntity + + +class AccessType(str, Enum): + """describes type of access URL""" + + wayback = "wayback" + ia_file = "ia_file" + ia_microfilm = "ia_microfilm" + repository = "repository" + openlibrary = "openlibrary" + wikipedia = "wikipedia" + +class AccessOption(BaseModel): + + access_type: AccessType + + # note: for `target_url` refs, would do a CDX lookup and this URL would be + # a valid/HTTP-200 web.archive.org capture URL + access_url: str + + # application/pdf, text/html, etc + # blank for landing pages + mimetype: Optional[str] + + size_bytes: Optional[int] + thumbnail_url: Optional[str] + + +def release_access_options(release: ReleaseEntity) -> List[AccessOption]: + """ + Extracts access options from a release. + + TODO: proper implementation + """ + options = [] + for f in (release.files or []): + for u in (f.urls or []): + if '://web.archive.org/' in u.url: + return [AccessOption( + access_type="wayback", + access_url=u.url, + mimetype=f.mimetype, + size_bytes=f.size, + thumbnail_url=None + )] + elif '://archive.org/' in u.url: + return [AccessOption( + access_type="ia_file", + access_url=u.url, + mimetype=f.mimetype, + size_bytes=f.size, + thumbnail_url=None + )] + return options |