diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-10-27 18:50:17 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-10-27 18:50:17 -0700 |
commit | 826c7538e091fac14d987a3cd654975da964e240 (patch) | |
tree | 90345b4cabb461c624ca5a218c2fc01dce3055cd /python/scripts/cdx_collection.py | |
parent | 020037d4714e7ba2ab172c7278494aed0b2148ad (diff) | |
download | sandcrawler-826c7538e091fac14d987a3cd654975da964e240.tar.gz sandcrawler-826c7538e091fac14d987a3cd654975da964e240.zip |
make fmt (black 21.9b0)
Diffstat (limited to 'python/scripts/cdx_collection.py')
-rwxr-xr-x | python/scripts/cdx_collection.py | 26 |
1 files changed, 14 insertions, 12 deletions
diff --git a/python/scripts/cdx_collection.py b/python/scripts/cdx_collection.py index aa78aec..0b60da3 100755 --- a/python/scripts/cdx_collection.py +++ b/python/scripts/cdx_collection.py @@ -29,7 +29,7 @@ def run(): collection = sys.argv[1] # Check collection name is clean - assert collection.replace('_', '').replace('-', '').replace('.', '').isalnum() + assert collection.replace("_", "").replace("-", "").replace(".", "").isalnum() tempdir = tempfile.mkdtemp() print("Looking up collection: {}".format(collection)) @@ -45,15 +45,17 @@ def run(): status = True errors = [] for item in item_list: - item = item['identifier'] + item = item["identifier"] # TODO: error handling try: - ret = ia.download(item, - files=[item + '.cdx.gz'], - verbose=True, - destdir=tempdir, - no_directory=True, - retries=1000) + ret = ia.download( + item, + files=[item + ".cdx.gz"], + verbose=True, + destdir=tempdir, + no_directory=True, + retries=1000, + ) status = ret and status except requests.exceptions.ReadTimeout as rt: print(str(rt), file=sys.stderr) @@ -67,14 +69,14 @@ def run(): # Combine files print("Merging and re-compressing all CDX files...") - #subprocess.run('zcat {0}/*.cdx.gz | pigz > {0}/combined.gz'.format(tempdir), - subprocess.run('zcat {0}/*.cdx.gz | gzip > {0}/combined.gz'.format(tempdir), shell=True) + # subprocess.run('zcat {0}/*.cdx.gz | pigz > {0}/combined.gz'.format(tempdir), + subprocess.run("zcat {0}/*.cdx.gz | gzip > {0}/combined.gz".format(tempdir), shell=True) # Move and cleanup - shutil.move('{}/combined.gz'.format(tempdir), '{}.cdx.gz'.format(collection)) + shutil.move("{}/combined.gz".format(tempdir), "{}.cdx.gz".format(collection)) print("Done!") -if __name__ == '__main__': +if __name__ == "__main__": run() |