aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/fileset_platforms.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-10-15 17:12:40 -0700
committerBryan Newbold <bnewbold@archive.org>2021-10-15 18:15:29 -0700
commita09396caefe709b521e560add5b01c1a5c94cb53 (patch)
tree6a4464f569d1d46fbc5e6cc54ba593c0680c499b /python/sandcrawler/fileset_platforms.py
parentba324ae5a6051c47d4cf7524c28caeda7abd6fc5 (diff)
downloadsandcrawler-a09396caefe709b521e560add5b01c1a5c94cb53.tar.gz
sandcrawler-a09396caefe709b521e560add5b01c1a5c94cb53.zip
more fileset iteration
Diffstat (limited to 'python/sandcrawler/fileset_platforms.py')
-rw-r--r--python/sandcrawler/fileset_platforms.py4
1 files changed, 2 insertions, 2 deletions
diff --git a/python/sandcrawler/fileset_platforms.py b/python/sandcrawler/fileset_platforms.py
index bcf2144..cc07948 100644
--- a/python/sandcrawler/fileset_platforms.py
+++ b/python/sandcrawler/fileset_platforms.py
@@ -674,8 +674,8 @@ class ArchiveOrgHelper(FilesetPlatformHelper):
assert base_url_split[2] == 'archive.org'
assert base_url_split[3] in ['details', 'download']
item_name = base_url_split[4]
- if len(base_url_split) == 6:
- assert not base_url_split[5]
+ if len(base_url_split) == 6 and base_url_split[5]:
+ raise PlatformScopeError("got an archive.org file path, not download/details page; individual files not handled yet")
#print(f" archiveorg processing item={item_name}", file=sys.stderr)
item = self.session.get_item(item_name)