diff options
author | Bryan Newbold <bnewbold@archive.org> | 2019-05-09 17:47:44 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2019-05-09 17:47:44 -0700 |
commit | 27d149734439ee68738957df76cfb6f687b3f19b (patch) | |
tree | f8e9d48a773da90c0b1722f9a94bc7e75abd72ab | |
parent | 800f7c71b4d106c74b4bb76f2ee513ba4fcdae87 (diff) | |
download | sandcrawler-27d149734439ee68738957df76cfb6f687b3f19b.tar.gz sandcrawler-27d149734439ee68738957df76cfb6f687b3f19b.zip |
deliver_dumpgrobid_to_s3: storage class config
-rwxr-xr-x | python/deliver_dumpgrobid_to_s3.py | 8 |
1 files changed, 7 insertions, 1 deletions
diff --git a/python/deliver_dumpgrobid_to_s3.py b/python/deliver_dumpgrobid_to_s3.py index 7fb0f08..6a9aea6 100755 --- a/python/deliver_dumpgrobid_to_s3.py +++ b/python/deliver_dumpgrobid_to_s3.py @@ -46,6 +46,7 @@ class DeliverDumpGrobidS3(): self.s3_bucket = s3_bucket self.s3_prefix = kwargs.get('s3_prefix', 'grobid/') self.s3_suffix = kwargs.get('s3_suffix', '.tei.xml') + self.s3_storage_class = kwargs.get('s3_storage_class', 'STANDARD') self.s3 = boto3.resource('s3') self.bucket = self.s3.Bucket(self.s3_bucket) @@ -71,7 +72,8 @@ class DeliverDumpGrobidS3(): self.s3_prefix, sha1_hex[0:4], sha1_hex, - self.s3_suffix), + self.s3_suffix, + StorageClass=self.s3_storage_class), Body=tei_xml) print("{}\tsuccess\t{}\t{}".format(sha1_hex, obj.key, len(tei_xml))) self.count['success-s3'] += 1 @@ -93,6 +95,10 @@ def main(): type=str, default=".tei.xml", help='file suffix for created objects') + parser.add_argument('--s3-storage-class', + type=str, + default="STANDARD", + help='AWS S3 storage class (redundancy) to use') parser.add_argument('dump_file', help="TSV/JSON dump file", default=sys.stdin, |