diff options
author | Bryan Newbold <bnewbold@archive.org> | 2019-02-21 11:49:02 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2019-02-21 11:49:02 -0800 |
commit | f59e9895d3c9d198538b40e36d3b0cc3b4bb5b92 (patch) | |
tree | 407c6ca0f8245d1ef4bb6713dc0afd2679af36df /python | |
parent | 84459a91317c799c78ec94005606160e80843159 (diff) | |
download | sandcrawler-f59e9895d3c9d198538b40e36d3b0cc3b4bb5b92.tar.gz sandcrawler-f59e9895d3c9d198538b40e36d3b0cc3b4bb5b92.zip |
don't print secret, and MRO pylint skip
Diffstat (limited to 'python')
-rwxr-xr-x | python/deliver_gwb_to_s3.py | 10 |
1 files changed, 6 insertions, 4 deletions
diff --git a/python/deliver_gwb_to_s3.py b/python/deliver_gwb_to_s3.py index 8a52382..39ac000 100755 --- a/python/deliver_gwb_to_s3.py +++ b/python/deliver_gwb_to_s3.py @@ -29,6 +29,10 @@ Requires: - wayback/GWB libraries """ +# XXX: some broken MRO thing going on in here due to python3 object wrangling +# in `wayback` library. Means we can't run pylint. +# pylint: skip-file + import os import sys import json @@ -40,8 +44,7 @@ from collections import Counter import boto3 import raven import wayback.exception -from wayback.resource import Resource -from wayback.resource import ArcResource +from http.client import IncompleteRead from wayback.resourcestore import ResourceStore from gwb.loader import CDXLoaderFactory @@ -49,7 +52,7 @@ from gwb.loader import CDXLoaderFactory sentry_client = raven.Client() -class DeliverGwbS3(): +class DeliverGwbS3: def __init__(self, s3_bucket, **kwargs): self.warc_uri_prefix = kwargs.get('warc_uri_prefix') @@ -59,7 +62,6 @@ class DeliverGwbS3(): self.petabox_base_url = kwargs.get('petabox_base_url', 'http://archive.org/serve/') # gwb library will fall back to reading from /opt/.petabox/webdata.secret self.petabox_webdata_secret = kwargs.get('petabox_webdata_secret', os.environ.get('PETABOX_WEBDATA_SECRET')) - print("petabox_webdata_secret: {}".format(self.petabox_webdata_secret)) self.s3_bucket = s3_bucket self.s3_prefix = kwargs.get('s3_prefix', 'pdf/') self.s3_suffix = kwargs.get('s3_suffix', '.pdf') |