From 3d663242e2dc4128bd4613657870e8dd42cac570 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 18 Feb 2020 22:42:56 -0800 Subject: X-Archive-Src more robust than X-Archive-Redirect-Reason --- python/sandcrawler/ia.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'python') diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py index 9dad7a0..9a1b8c8 100644 --- a/python/sandcrawler/ia.py +++ b/python/sandcrawler/ia.py @@ -484,8 +484,9 @@ class WaybackClient: #print(resp.url, file=sys.stderr) # defensively check that this is actually correct replay based on headers - if not "X-Archive-Redirect-Reason" in resp.headers: - raise WaybackError("redirect replay fetch didn't return X-Archive-Redirect-Reason in headers") + # previously check for "X-Archive-Redirect-Reason" here + if not "X-Archive-Src" in resp.headers: + raise WaybackError("redirect replay fetch didn't return X-Archive-Src in headers") if not datetime in resp.url: raise WaybackError("didn't get exact reply (redirect?) datetime:{} got:{}".format(datetime, resp.url)) -- cgit v1.2.3