diff options
Diffstat (limited to 'python')
| -rw-r--r-- | python/fatcat_tools/transforms/access.py | 58 | 
1 files changed, 58 insertions, 0 deletions
| diff --git a/python/fatcat_tools/transforms/access.py b/python/fatcat_tools/transforms/access.py new file mode 100644 index 00000000..231cd2b3 --- /dev/null +++ b/python/fatcat_tools/transforms/access.py @@ -0,0 +1,58 @@ + +from enum import Enum +from typing import Optional, List, Any, Dict + +from pydantic import BaseModel +from fatcat_openapi_client import ReleaseEntity + + +class AccessType(str, Enum): +    """describes type of access URL""" + +    wayback = "wayback" +    ia_file = "ia_file" +    ia_microfilm = "ia_microfilm" +    repository = "repository" + +class AccessOption(BaseModel): + +    access_type: AccessType + +    # note: for `target_url` refs, would do a CDX lookup and this URL would be +    # a valid/HTTP-200 web.archive.org capture URL +    access_url: str + +    # application/pdf, text/html, etc +    # blank for landing pages +    mimetype: Optional[str] + +    size_bytes: Optional[int] +    thumbnail_url: Optional[str] + + +def release_access_options(release: ReleaseEntity) -> List[AccessOption]: +    """ +    Extracts access options from a release. + +    TODO: proper implementation +    """ +    options = [] +    for f in (release.files or []): +        for u in (f.urls or []): +            if '://web.archive.org/' in u.url: +                return [AccessOption( +                    access_type="wayback", +                    access_url=u.url, +                    mimetype=f.mimetype, +                    size_bytes=f.size, +                    thumbnail_url=None +                )] +            elif '://archive.org/' in u.url: +                return [AccessOption( +                    access_type="ia_file", +                    access_url=u.url, +                    mimetype=f.mimetype, +                    size_bytes=f.size, +                    thumbnail_url=None +                )] +    return options | 
