aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/fileset_strategies.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-10-11 11:01:38 -0700
committerBryan Newbold <bnewbold@archive.org>2021-10-15 18:15:29 -0700
commit2e285e469251125ee70bc4c3408dbbcad8701b2c (patch)
tree6902a89a29d547cad1e7362ffef2cfe565e52d44 /python/sandcrawler/fileset_strategies.py
parent7a98b12907cff5f6d4a56898b49703289127df21 (diff)
downloadsandcrawler-2e285e469251125ee70bc4c3408dbbcad8701b2c.tar.gz
sandcrawler-2e285e469251125ee70bc4c3408dbbcad8701b2c.zip
fileset ingest: improve error handling
Diffstat (limited to 'python/sandcrawler/fileset_strategies.py')
-rw-r--r--python/sandcrawler/fileset_strategies.py7
1 files changed, 3 insertions, 4 deletions
diff --git a/python/sandcrawler/fileset_strategies.py b/python/sandcrawler/fileset_strategies.py
index 6bda9b4..43f1a53 100644
--- a/python/sandcrawler/fileset_strategies.py
+++ b/python/sandcrawler/fileset_strategies.py
@@ -12,7 +12,7 @@ import internetarchive
from sandcrawler.html_metadata import BiblioMetadata
from sandcrawler.ia import ResourceResult, WaybackClient, SavePageNowClient, fix_transfer_encoding
-from sandcrawler.fileset_types import IngestStrategy, FilesetManifestFile, DatasetPlatformItem, ArchiveStrategyResult
+from sandcrawler.fileset_types import IngestStrategy, FilesetManifestFile, DatasetPlatformItem, ArchiveStrategyResult, PlatformScopeError
from sandcrawler.misc import gen_file_metadata, gen_file_metadata_path
@@ -83,7 +83,7 @@ class ArchiveorgFilesetStrategy(FilesetIngestStrategy):
return existing
if item.platform_name == 'archiveorg':
- raise ValueError("should't download archive.org into itself")
+ raise PlatformScopeError("should't download archive.org into itself")
local_dir = self.working_dir + item.archiveorg_item_name
assert local_dir.startswith('/')
@@ -142,9 +142,8 @@ class ArchiveorgFilesetStrategy(FilesetIngestStrategy):
m.mimetype = file_meta['mimetype']
m.status = 'verified-local'
- # 2. setup archive.org item metadata
+ # 2. upload all files, with metadata
assert item.archiveorg_item_meta['collection']
- # 3. upload all files
item_files = []
for m in item.manifest:
local_path = local_dir + '/' + m.path