aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/fileset_types.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-10-04 13:01:58 -0700
committerBryan Newbold <bnewbold@archive.org>2021-10-15 18:15:20 -0700
commite2e0602114ccdf142b3ef0f30c67d2cb7a58ef7e (patch)
treebee2b4343f30d59fec463dbdcaafafc11d7cd513 /python/sandcrawler/fileset_types.py
parent452475df7619f3743eac5ad86e2e1fb8ba9972da (diff)
downloadsandcrawler-e2e0602114ccdf142b3ef0f30c67d2cb7a58ef7e.tar.gz
sandcrawler-e2e0602114ccdf142b3ef0f30c67d2cb7a58ef7e.zip
progress on fileset/dataset ingest
Diffstat (limited to 'python/sandcrawler/fileset_types.py')
-rw-r--r--python/sandcrawler/fileset_types.py43
1 files changed, 43 insertions, 0 deletions
diff --git a/python/sandcrawler/fileset_types.py b/python/sandcrawler/fileset_types.py
new file mode 100644
index 0000000..f0f03db
--- /dev/null
+++ b/python/sandcrawler/fileset_types.py
@@ -0,0 +1,43 @@
+
+from enum import Enum
+
+from pydantic import BaseModel
+
+class IngestStrategy(str, Enum):
+ WebFile = "web-file"
+ WebFileset = "web-fileset"
+ WebFilesetBundled = "web-fileset-bundled"
+ ArchiveorgFile = "archiveorg-file"
+ ArchiveorgFileset = "archiveorg-fileset"
+ ArchiveorgFilesetBundled = "archiveorg-fileset-bundled"
+
+class FilesetManifestFile(BaseModel):
+ path: str
+ size: Optional[int]
+ md5: Optional[str]
+ sha1: Optional[str]
+ sha256: Optional[str]
+ mimetype: Optional[str]
+
+ status: Optional[str]
+ platform_url: Optional[str]
+ terminal_url: Optional[str]
+ terminal_dt: Optional[str]
+ extra: Optional[Dict[str, Any]]
+
+class DatasetPlatformItem(BaseModel):
+ platform_name: str
+ platform_status: str
+ manifest: Optional[List[FilesetManifestFile]]
+
+ platform_domain: Optional[str]
+ platform_id: Optional[str]
+ archiveorg_item_name: Optional[str]
+ archiveorg_collection: Optional[str]
+ web_base_url: Optional[str]
+ web_bundle_url: Optional[str]
+
+class ArchiveStrategyResult(BaseModel):
+ ingest_strategy: str
+ status: str
+ manifest: List[FilesetManifestFile]