aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/sandcrawler/fileset_strategies.py15
1 files changed, 8 insertions, 7 deletions
diff --git a/python/sandcrawler/fileset_strategies.py b/python/sandcrawler/fileset_strategies.py
index d2a8e99..1d84ce5 100644
--- a/python/sandcrawler/fileset_strategies.py
+++ b/python/sandcrawler/fileset_strategies.py
@@ -212,15 +212,16 @@ class ArchiveorgFilesetStrategy(FilesetIngestStrategy):
# 2. upload all files, with metadata
assert item.archiveorg_item_meta and item.archiveorg_item_meta["collection"]
- item_files = []
+ item_files = {}
for m in item.manifest:
local_path = local_dir + "/" + m.path
- item_files.append(
- {
- "name": local_path,
- "remote_name": m.path,
- }
- )
+ if m.path == "name":
+ raise NotImplementedError(
+ "fileset file path is 'name', which is a reserved keyword"
+ )
+ item_files[m.path] = local_path
+ if len(item_files) != len(item.manifest):
+ raise NotImplementedError("file/manifest length mismatch: duplicated file paths?")
print(
f" uploading all files to {item.archiveorg_item_name} under {item.archiveorg_item_meta.get('collection')}...",