diff options
Diffstat (limited to 'python/scripts/cdx_collection.py')
-rwxr-xr-x | python/scripts/cdx_collection.py | 26 |
1 files changed, 14 insertions, 12 deletions
diff --git a/python/scripts/cdx_collection.py b/python/scripts/cdx_collection.py index aa78aec..0b60da3 100755 --- a/python/scripts/cdx_collection.py +++ b/python/scripts/cdx_collection.py @@ -29,7 +29,7 @@ def run(): collection = sys.argv[1] # Check collection name is clean - assert collection.replace('_', '').replace('-', '').replace('.', '').isalnum() + assert collection.replace("_", "").replace("-", "").replace(".", "").isalnum() tempdir = tempfile.mkdtemp() print("Looking up collection: {}".format(collection)) @@ -45,15 +45,17 @@ def run(): status = True errors = [] for item in item_list: - item = item['identifier'] + item = item["identifier"] # TODO: error handling try: - ret = ia.download(item, - files=[item + '.cdx.gz'], - verbose=True, - destdir=tempdir, - no_directory=True, - retries=1000) + ret = ia.download( + item, + files=[item + ".cdx.gz"], + verbose=True, + destdir=tempdir, + no_directory=True, + retries=1000, + ) status = ret and status except requests.exceptions.ReadTimeout as rt: print(str(rt), file=sys.stderr) @@ -67,14 +69,14 @@ def run(): # Combine files print("Merging and re-compressing all CDX files...") - #subprocess.run('zcat {0}/*.cdx.gz | pigz > {0}/combined.gz'.format(tempdir), - subprocess.run('zcat {0}/*.cdx.gz | gzip > {0}/combined.gz'.format(tempdir), shell=True) + # subprocess.run('zcat {0}/*.cdx.gz | pigz > {0}/combined.gz'.format(tempdir), + subprocess.run("zcat {0}/*.cdx.gz | gzip > {0}/combined.gz".format(tempdir), shell=True) # Move and cleanup - shutil.move('{}/combined.gz'.format(tempdir), '{}.cdx.gz'.format(collection)) + shutil.move("{}/combined.gz".format(tempdir), "{}.cdx.gz".format(collection)) print("Done!") -if __name__ == '__main__': +if __name__ == "__main__": run() |