aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-12-15 13:15:26 -0800
committerBryan Newbold <bnewbold@archive.org>2021-12-15 13:15:26 -0800
commit100e5348c4203dd346993a185ba3749105e91541 (patch)
treee28820c2be60de38292ad06d964dd0899bf04f01 /python
parentdd4a80c2ae16bcc7e6318886e68cea7a1a59da7d (diff)
downloadsandcrawler-100e5348c4203dd346993a185ba3749105e91541.tar.gz
sandcrawler-100e5348c4203dd346993a185ba3749105e91541.zip
fileset ingest: create tmp subdirectories if needed
Diffstat (limited to 'python')
-rw-r--r--python/sandcrawler/fileset_strategies.py5
1 files changed, 5 insertions, 0 deletions
diff --git a/python/sandcrawler/fileset_strategies.py b/python/sandcrawler/fileset_strategies.py
index 29b7edb..b0131f4 100644
--- a/python/sandcrawler/fileset_strategies.py
+++ b/python/sandcrawler/fileset_strategies.py
@@ -117,8 +117,13 @@ class ArchiveorgFilesetStrategy(FilesetIngestStrategy):
local_path = local_dir + "/" + m.path
assert m.platform_url
+ if not os.path.exists(os.path.dirname(local_path)):
+ os.mkdir(os.path.dirname(local_path))
if not os.path.exists(local_path):
print(f" downloading {m.path}", file=sys.stderr)
+ # create any sub-directories for this path, if necessary
+ if not os.path.exists(os.path.dirname(local_path)):
+ os.mkdir(os.path.dirname(local_path))
with self.ia_session.get(
m.platform_url, stream=True, allow_redirects=True
) as r: