diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-12-15 13:15:26 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-12-15 13:15:26 -0800 |
commit | 100e5348c4203dd346993a185ba3749105e91541 (patch) | |
tree | e28820c2be60de38292ad06d964dd0899bf04f01 | |
parent | dd4a80c2ae16bcc7e6318886e68cea7a1a59da7d (diff) | |
download | sandcrawler-100e5348c4203dd346993a185ba3749105e91541.tar.gz sandcrawler-100e5348c4203dd346993a185ba3749105e91541.zip |
fileset ingest: create tmp subdirectories if needed
-rw-r--r-- | python/sandcrawler/fileset_strategies.py | 5 |
1 files changed, 5 insertions, 0 deletions
diff --git a/python/sandcrawler/fileset_strategies.py b/python/sandcrawler/fileset_strategies.py index 29b7edb..b0131f4 100644 --- a/python/sandcrawler/fileset_strategies.py +++ b/python/sandcrawler/fileset_strategies.py @@ -117,8 +117,13 @@ class ArchiveorgFilesetStrategy(FilesetIngestStrategy): local_path = local_dir + "/" + m.path assert m.platform_url + if not os.path.exists(os.path.dirname(local_path)): + os.mkdir(os.path.dirname(local_path)) if not os.path.exists(local_path): print(f" downloading {m.path}", file=sys.stderr) + # create any sub-directories for this path, if necessary + if not os.path.exists(os.path.dirname(local_path)): + os.mkdir(os.path.dirname(local_path)) with self.ia_session.get( m.platform_url, stream=True, allow_redirects=True ) as r: |