diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-10-11 11:01:38 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-10-15 18:15:29 -0700 |
commit | 2e285e469251125ee70bc4c3408dbbcad8701b2c (patch) | |
tree | 6902a89a29d547cad1e7362ffef2cfe565e52d44 /python/sandcrawler/fileset_strategies.py | |
parent | 7a98b12907cff5f6d4a56898b49703289127df21 (diff) | |
download | sandcrawler-2e285e469251125ee70bc4c3408dbbcad8701b2c.tar.gz sandcrawler-2e285e469251125ee70bc4c3408dbbcad8701b2c.zip |
fileset ingest: improve error handling
Diffstat (limited to 'python/sandcrawler/fileset_strategies.py')
-rw-r--r-- | python/sandcrawler/fileset_strategies.py | 7 |
1 files changed, 3 insertions, 4 deletions
diff --git a/python/sandcrawler/fileset_strategies.py b/python/sandcrawler/fileset_strategies.py index 6bda9b4..43f1a53 100644 --- a/python/sandcrawler/fileset_strategies.py +++ b/python/sandcrawler/fileset_strategies.py @@ -12,7 +12,7 @@ import internetarchive from sandcrawler.html_metadata import BiblioMetadata from sandcrawler.ia import ResourceResult, WaybackClient, SavePageNowClient, fix_transfer_encoding -from sandcrawler.fileset_types import IngestStrategy, FilesetManifestFile, DatasetPlatformItem, ArchiveStrategyResult +from sandcrawler.fileset_types import IngestStrategy, FilesetManifestFile, DatasetPlatformItem, ArchiveStrategyResult, PlatformScopeError from sandcrawler.misc import gen_file_metadata, gen_file_metadata_path @@ -83,7 +83,7 @@ class ArchiveorgFilesetStrategy(FilesetIngestStrategy): return existing if item.platform_name == 'archiveorg': - raise ValueError("should't download archive.org into itself") + raise PlatformScopeError("should't download archive.org into itself") local_dir = self.working_dir + item.archiveorg_item_name assert local_dir.startswith('/') @@ -142,9 +142,8 @@ class ArchiveorgFilesetStrategy(FilesetIngestStrategy): m.mimetype = file_meta['mimetype'] m.status = 'verified-local' - # 2. setup archive.org item metadata + # 2. upload all files, with metadata assert item.archiveorg_item_meta['collection'] - # 3. upload all files item_files = [] for m in item.manifest: local_path = local_dir + '/' + m.path |