aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler
diff options
context:
space:
mode:
Diffstat (limited to 'python/sandcrawler')
-rw-r--r--python/sandcrawler/ia.py7
1 files changed, 5 insertions, 2 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py
index 8462da1..7b9427e 100644
--- a/python/sandcrawler/ia.py
+++ b/python/sandcrawler/ia.py
@@ -699,13 +699,16 @@ class WaybackClient:
except Exception as e:
if resp is not None and "X-Archive-Src" in resp.headers:
raise WaybackContentError(
- f"expected redirect record but got HTTP status {resp.status_code}"
+ f"expected redirect record but got captured HTTP status: {resp.status_code}"
)
raise WaybackError(str(e))
# defensively check that this is actually correct replay based on headers
# previously check for "X-Archive-Redirect-Reason" here
- if "X-Archive-Src" not in resp.headers:
+ if (
+ "X-Archive-Src" not in resp.headers
+ and "X-Archive-Redirect-Reason" not in resp.headers
+ ):
raise WaybackError("redirect replay fetch didn't return X-Archive-Src in headers")
if datetime not in resp.url:
raise WaybackError(