diff options
-rw-r--r-- | python/sandcrawler/ia.py | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py index 74cd978..198c8aa 100644 --- a/python/sandcrawler/ia.py +++ b/python/sandcrawler/ia.py @@ -206,7 +206,8 @@ class WaybackClient: else: self.cdx_client = CdxApiClient() # /serve/ instead of /download/ doesn't record view count - self.petabox_base_url = kwargs.get('petabox_base_url', 'https://archive.org/serve/') + # this *does* want to be http://, not https:// + self.petabox_base_url = kwargs.get('petabox_base_url', 'http://archive.org/serve/') # gwb library will fall back to reading from /opt/.petabox/webdata.secret self.petabox_webdata_secret = kwargs.get('petabox_webdata_secret', os.environ.get('PETABOX_WEBDATA_SECRET')) self.warc_uri_prefix = kwargs.get('warc_uri_prefix', 'https://archive.org/serve/') |