aboutsummaryrefslogtreecommitdiffstats
path: root/python/scripts/cdx_collection.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/scripts/cdx_collection.py')
-rwxr-xr-xpython/scripts/cdx_collection.py26
1 files changed, 14 insertions, 12 deletions
diff --git a/python/scripts/cdx_collection.py b/python/scripts/cdx_collection.py
index aa78aec..0b60da3 100755
--- a/python/scripts/cdx_collection.py
+++ b/python/scripts/cdx_collection.py
@@ -29,7 +29,7 @@ def run():
collection = sys.argv[1]
# Check collection name is clean
- assert collection.replace('_', '').replace('-', '').replace('.', '').isalnum()
+ assert collection.replace("_", "").replace("-", "").replace(".", "").isalnum()
tempdir = tempfile.mkdtemp()
print("Looking up collection: {}".format(collection))
@@ -45,15 +45,17 @@ def run():
status = True
errors = []
for item in item_list:
- item = item['identifier']
+ item = item["identifier"]
# TODO: error handling
try:
- ret = ia.download(item,
- files=[item + '.cdx.gz'],
- verbose=True,
- destdir=tempdir,
- no_directory=True,
- retries=1000)
+ ret = ia.download(
+ item,
+ files=[item + ".cdx.gz"],
+ verbose=True,
+ destdir=tempdir,
+ no_directory=True,
+ retries=1000,
+ )
status = ret and status
except requests.exceptions.ReadTimeout as rt:
print(str(rt), file=sys.stderr)
@@ -67,14 +69,14 @@ def run():
# Combine files
print("Merging and re-compressing all CDX files...")
- #subprocess.run('zcat {0}/*.cdx.gz | pigz > {0}/combined.gz'.format(tempdir),
- subprocess.run('zcat {0}/*.cdx.gz | gzip > {0}/combined.gz'.format(tempdir), shell=True)
+ # subprocess.run('zcat {0}/*.cdx.gz | pigz > {0}/combined.gz'.format(tempdir),
+ subprocess.run("zcat {0}/*.cdx.gz | gzip > {0}/combined.gz".format(tempdir), shell=True)
# Move and cleanup
- shutil.move('{}/combined.gz'.format(tempdir), '{}.cdx.gz'.format(collection))
+ shutil.move("{}/combined.gz".format(tempdir), "{}.cdx.gz".format(collection))
print("Done!")
-if __name__ == '__main__':
+if __name__ == "__main__":
run()