diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-01-09 12:19:44 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-01-09 16:32:51 -0800 |
commit | 55ef20102eaf8123dfc41e1d7ae80c50607c99f4 (patch) | |
tree | 429efd771bd1e1d8548a254a7f3ac5450bd3c163 /python | |
parent | d8689bce0a3a69254554dbdfa929e712e9bbc02c (diff) | |
download | sandcrawler-55ef20102eaf8123dfc41e1d7ae80c50607c99f4.tar.gz sandcrawler-55ef20102eaf8123dfc41e1d7ae80c50607c99f4.zip |
fix http/https issue with GlobalWayback library
Diffstat (limited to 'python')
-rw-r--r-- | python/sandcrawler/ia.py | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py index 74cd978..198c8aa 100644 --- a/python/sandcrawler/ia.py +++ b/python/sandcrawler/ia.py @@ -206,7 +206,8 @@ class WaybackClient: else: self.cdx_client = CdxApiClient() # /serve/ instead of /download/ doesn't record view count - self.petabox_base_url = kwargs.get('petabox_base_url', 'https://archive.org/serve/') + # this *does* want to be http://, not https:// + self.petabox_base_url = kwargs.get('petabox_base_url', 'http://archive.org/serve/') # gwb library will fall back to reading from /opt/.petabox/webdata.secret self.petabox_webdata_secret = kwargs.get('petabox_webdata_secret', os.environ.get('PETABOX_WEBDATA_SECRET')) self.warc_uri_prefix = kwargs.get('warc_uri_prefix', 'https://archive.org/serve/') |