aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-01-09 12:19:44 -0800
committerBryan Newbold <bnewbold@archive.org>2020-01-09 16:32:51 -0800
commit55ef20102eaf8123dfc41e1d7ae80c50607c99f4 (patch)
tree429efd771bd1e1d8548a254a7f3ac5450bd3c163
parentd8689bce0a3a69254554dbdfa929e712e9bbc02c (diff)
downloadsandcrawler-55ef20102eaf8123dfc41e1d7ae80c50607c99f4.tar.gz
sandcrawler-55ef20102eaf8123dfc41e1d7ae80c50607c99f4.zip
fix http/https issue with GlobalWayback library
-rw-r--r--python/sandcrawler/ia.py3
1 files changed, 2 insertions, 1 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py
index 74cd978..198c8aa 100644
--- a/python/sandcrawler/ia.py
+++ b/python/sandcrawler/ia.py
@@ -206,7 +206,8 @@ class WaybackClient:
else:
self.cdx_client = CdxApiClient()
# /serve/ instead of /download/ doesn't record view count
- self.petabox_base_url = kwargs.get('petabox_base_url', 'https://archive.org/serve/')
+ # this *does* want to be http://, not https://
+ self.petabox_base_url = kwargs.get('petabox_base_url', 'http://archive.org/serve/')
# gwb library will fall back to reading from /opt/.petabox/webdata.secret
self.petabox_webdata_secret = kwargs.get('petabox_webdata_secret', os.environ.get('PETABOX_WEBDATA_SECRET'))
self.warc_uri_prefix = kwargs.get('warc_uri_prefix', 'https://archive.org/serve/')