From 2e112935a59993cf930558278362835056897c49 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 9 Jan 2020 17:54:12 -0800 Subject: wayback: datetime mismatch as an error --- python/sandcrawler/ia.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py index 6468743..2940d89 100644 --- a/python/sandcrawler/ia.py +++ b/python/sandcrawler/ia.py @@ -348,7 +348,8 @@ class WaybackClient: # defensively check that this is actually correct replay based on headers assert "X-Archive-Src" in resp.headers - assert datetime in resp.url + if not datetime in resp.url: + raise WaybackError("didn't get exact reply (redirect?) datetime:{} got:{}".format(datetime, resp.url)) if cdx_sha1hex: # verify that body matches CDX hash -- cgit v1.2.3