aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2019-05-09 17:47:44 -0700
committerBryan Newbold <bnewbold@archive.org>2019-05-09 17:47:44 -0700
commit27d149734439ee68738957df76cfb6f687b3f19b (patch)
treef8e9d48a773da90c0b1722f9a94bc7e75abd72ab /python
parent800f7c71b4d106c74b4bb76f2ee513ba4fcdae87 (diff)
downloadsandcrawler-27d149734439ee68738957df76cfb6f687b3f19b.tar.gz
sandcrawler-27d149734439ee68738957df76cfb6f687b3f19b.zip
deliver_dumpgrobid_to_s3: storage class config
Diffstat (limited to 'python')
-rwxr-xr-xpython/deliver_dumpgrobid_to_s3.py8
1 files changed, 7 insertions, 1 deletions
diff --git a/python/deliver_dumpgrobid_to_s3.py b/python/deliver_dumpgrobid_to_s3.py
index 7fb0f08..6a9aea6 100755
--- a/python/deliver_dumpgrobid_to_s3.py
+++ b/python/deliver_dumpgrobid_to_s3.py
@@ -46,6 +46,7 @@ class DeliverDumpGrobidS3():
self.s3_bucket = s3_bucket
self.s3_prefix = kwargs.get('s3_prefix', 'grobid/')
self.s3_suffix = kwargs.get('s3_suffix', '.tei.xml')
+ self.s3_storage_class = kwargs.get('s3_storage_class', 'STANDARD')
self.s3 = boto3.resource('s3')
self.bucket = self.s3.Bucket(self.s3_bucket)
@@ -71,7 +72,8 @@ class DeliverDumpGrobidS3():
self.s3_prefix,
sha1_hex[0:4],
sha1_hex,
- self.s3_suffix),
+ self.s3_suffix,
+ StorageClass=self.s3_storage_class),
Body=tei_xml)
print("{}\tsuccess\t{}\t{}".format(sha1_hex, obj.key, len(tei_xml)))
self.count['success-s3'] += 1
@@ -93,6 +95,10 @@ def main():
type=str,
default=".tei.xml",
help='file suffix for created objects')
+ parser.add_argument('--s3-storage-class',
+ type=str,
+ default="STANDARD",
+ help='AWS S3 storage class (redundancy) to use')
parser.add_argument('dump_file',
help="TSV/JSON dump file",
default=sys.stdin,