aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-02-18 22:42:56 -0800
committerBryan Newbold <bnewbold@archive.org>2020-02-18 22:42:56 -0800
commit3d663242e2dc4128bd4613657870e8dd42cac570 (patch)
tree692d67fa0511abe8097ec278690c183115fae078
parent418fcb1b24edd149be69ecf1081a4d7f44a3cd43 (diff)
downloadsandcrawler-3d663242e2dc4128bd4613657870e8dd42cac570.tar.gz
sandcrawler-3d663242e2dc4128bd4613657870e8dd42cac570.zip
X-Archive-Src more robust than X-Archive-Redirect-Reason
-rw-r--r--python/sandcrawler/ia.py5
1 files changed, 3 insertions, 2 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py
index 9dad7a0..9a1b8c8 100644
--- a/python/sandcrawler/ia.py
+++ b/python/sandcrawler/ia.py
@@ -484,8 +484,9 @@ class WaybackClient:
#print(resp.url, file=sys.stderr)
# defensively check that this is actually correct replay based on headers
- if not "X-Archive-Redirect-Reason" in resp.headers:
- raise WaybackError("redirect replay fetch didn't return X-Archive-Redirect-Reason in headers")
+ # previously check for "X-Archive-Redirect-Reason" here
+ if not "X-Archive-Src" in resp.headers:
+ raise WaybackError("redirect replay fetch didn't return X-Archive-Src in headers")
if not datetime in resp.url:
raise WaybackError("didn't get exact reply (redirect?) datetime:{} got:{}".format(datetime, resp.url))