diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-02-18 22:42:56 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-02-18 22:42:56 -0800 |
commit | 3d663242e2dc4128bd4613657870e8dd42cac570 (patch) | |
tree | 692d67fa0511abe8097ec278690c183115fae078 | |
parent | 418fcb1b24edd149be69ecf1081a4d7f44a3cd43 (diff) | |
download | sandcrawler-3d663242e2dc4128bd4613657870e8dd42cac570.tar.gz sandcrawler-3d663242e2dc4128bd4613657870e8dd42cac570.zip |
X-Archive-Src more robust than X-Archive-Redirect-Reason
-rw-r--r-- | python/sandcrawler/ia.py | 5 |
1 files changed, 3 insertions, 2 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py index 9dad7a0..9a1b8c8 100644 --- a/python/sandcrawler/ia.py +++ b/python/sandcrawler/ia.py @@ -484,8 +484,9 @@ class WaybackClient: #print(resp.url, file=sys.stderr) # defensively check that this is actually correct replay based on headers - if not "X-Archive-Redirect-Reason" in resp.headers: - raise WaybackError("redirect replay fetch didn't return X-Archive-Redirect-Reason in headers") + # previously check for "X-Archive-Redirect-Reason" here + if not "X-Archive-Src" in resp.headers: + raise WaybackError("redirect replay fetch didn't return X-Archive-Src in headers") if not datetime in resp.url: raise WaybackError("didn't get exact reply (redirect?) datetime:{} got:{}".format(datetime, resp.url)) |