From 55ef20102eaf8123dfc41e1d7ae80c50607c99f4 Mon Sep 17 00:00:00 2001
From: Bryan Newbold <bnewbold@archive.org>
Date: Thu, 9 Jan 2020 12:19:44 -0800
Subject: fix http/https issue with GlobalWayback library

---
 python/sandcrawler/ia.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'python')

diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py
index 74cd978..198c8aa 100644
--- a/python/sandcrawler/ia.py
+++ b/python/sandcrawler/ia.py
@@ -206,7 +206,8 @@ class WaybackClient:
         else:
             self.cdx_client = CdxApiClient()
         # /serve/ instead of /download/ doesn't record view count
-        self.petabox_base_url = kwargs.get('petabox_base_url', 'https://archive.org/serve/')
+        # this *does* want to be http://, not https://
+        self.petabox_base_url = kwargs.get('petabox_base_url', 'http://archive.org/serve/')
         # gwb library will fall back to reading from /opt/.petabox/webdata.secret
         self.petabox_webdata_secret = kwargs.get('petabox_webdata_secret', os.environ.get('PETABOX_WEBDATA_SECRET'))
         self.warc_uri_prefix = kwargs.get('warc_uri_prefix', 'https://archive.org/serve/')
-- 
cgit v1.2.3