diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-10-15 17:12:40 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-10-15 18:15:29 -0700 |
commit | a09396caefe709b521e560add5b01c1a5c94cb53 (patch) | |
tree | 6a4464f569d1d46fbc5e6cc54ba593c0680c499b /python/sandcrawler/fileset_platforms.py | |
parent | ba324ae5a6051c47d4cf7524c28caeda7abd6fc5 (diff) | |
download | sandcrawler-a09396caefe709b521e560add5b01c1a5c94cb53.tar.gz sandcrawler-a09396caefe709b521e560add5b01c1a5c94cb53.zip |
more fileset iteration
Diffstat (limited to 'python/sandcrawler/fileset_platforms.py')
-rw-r--r-- | python/sandcrawler/fileset_platforms.py | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/python/sandcrawler/fileset_platforms.py b/python/sandcrawler/fileset_platforms.py index bcf2144..cc07948 100644 --- a/python/sandcrawler/fileset_platforms.py +++ b/python/sandcrawler/fileset_platforms.py @@ -674,8 +674,8 @@ class ArchiveOrgHelper(FilesetPlatformHelper): assert base_url_split[2] == 'archive.org' assert base_url_split[3] in ['details', 'download'] item_name = base_url_split[4] - if len(base_url_split) == 6: - assert not base_url_split[5] + if len(base_url_split) == 6 and base_url_split[5]: + raise PlatformScopeError("got an archive.org file path, not download/details page; individual files not handled yet") #print(f" archiveorg processing item={item_name}", file=sys.stderr) item = self.session.get_item(item_name) |