aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-01-09 17:54:12 -0800
committerBryan Newbold <bnewbold@archive.org>2020-01-09 17:54:12 -0800
commit2e112935a59993cf930558278362835056897c49 (patch)
tree721b9c39c3182c90c334cc52a8875868370e1277
parentd76e287a3b40370bcdd020c0560b14769f8bd009 (diff)
downloadsandcrawler-2e112935a59993cf930558278362835056897c49.tar.gz
sandcrawler-2e112935a59993cf930558278362835056897c49.zip
wayback: datetime mismatch as an error
-rw-r--r--python/sandcrawler/ia.py3
1 files changed, 2 insertions, 1 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py
index 6468743..2940d89 100644
--- a/python/sandcrawler/ia.py
+++ b/python/sandcrawler/ia.py
@@ -348,7 +348,8 @@ class WaybackClient:
# defensively check that this is actually correct replay based on headers
assert "X-Archive-Src" in resp.headers
- assert datetime in resp.url
+ if not datetime in resp.url:
+ raise WaybackError("didn't get exact reply (redirect?) datetime:{} got:{}".format(datetime, resp.url))
if cdx_sha1hex:
# verify that body matches CDX hash