diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-10-04 13:01:58 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-10-15 18:15:20 -0700 |
commit | e2e0602114ccdf142b3ef0f30c67d2cb7a58ef7e (patch) | |
tree | bee2b4343f30d59fec463dbdcaafafc11d7cd513 /python/sandcrawler/fileset_types.py | |
parent | 452475df7619f3743eac5ad86e2e1fb8ba9972da (diff) | |
download | sandcrawler-e2e0602114ccdf142b3ef0f30c67d2cb7a58ef7e.tar.gz sandcrawler-e2e0602114ccdf142b3ef0f30c67d2cb7a58ef7e.zip |
progress on fileset/dataset ingest
Diffstat (limited to 'python/sandcrawler/fileset_types.py')
-rw-r--r-- | python/sandcrawler/fileset_types.py | 43 |
1 files changed, 43 insertions, 0 deletions
diff --git a/python/sandcrawler/fileset_types.py b/python/sandcrawler/fileset_types.py new file mode 100644 index 0000000..f0f03db --- /dev/null +++ b/python/sandcrawler/fileset_types.py @@ -0,0 +1,43 @@ + +from enum import Enum + +from pydantic import BaseModel + +class IngestStrategy(str, Enum): + WebFile = "web-file" + WebFileset = "web-fileset" + WebFilesetBundled = "web-fileset-bundled" + ArchiveorgFile = "archiveorg-file" + ArchiveorgFileset = "archiveorg-fileset" + ArchiveorgFilesetBundled = "archiveorg-fileset-bundled" + +class FilesetManifestFile(BaseModel): + path: str + size: Optional[int] + md5: Optional[str] + sha1: Optional[str] + sha256: Optional[str] + mimetype: Optional[str] + + status: Optional[str] + platform_url: Optional[str] + terminal_url: Optional[str] + terminal_dt: Optional[str] + extra: Optional[Dict[str, Any]] + +class DatasetPlatformItem(BaseModel): + platform_name: str + platform_status: str + manifest: Optional[List[FilesetManifestFile]] + + platform_domain: Optional[str] + platform_id: Optional[str] + archiveorg_item_name: Optional[str] + archiveorg_collection: Optional[str] + web_base_url: Optional[str] + web_bundle_url: Optional[str] + +class ArchiveStrategyResult(BaseModel): + ingest_strategy: str + status: str + manifest: List[FilesetManifestFile] |