diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-01-09 17:54:12 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-01-09 17:54:12 -0800 |
commit | 2e112935a59993cf930558278362835056897c49 (patch) | |
tree | 721b9c39c3182c90c334cc52a8875868370e1277 | |
parent | d76e287a3b40370bcdd020c0560b14769f8bd009 (diff) | |
download | sandcrawler-2e112935a59993cf930558278362835056897c49.tar.gz sandcrawler-2e112935a59993cf930558278362835056897c49.zip |
wayback: datetime mismatch as an error
-rw-r--r-- | python/sandcrawler/ia.py | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py index 6468743..2940d89 100644 --- a/python/sandcrawler/ia.py +++ b/python/sandcrawler/ia.py @@ -348,7 +348,8 @@ class WaybackClient: # defensively check that this is actually correct replay based on headers assert "X-Archive-Src" in resp.headers - assert datetime in resp.url + if not datetime in resp.url: + raise WaybackError("didn't get exact reply (redirect?) datetime:{} got:{}".format(datetime, resp.url)) if cdx_sha1hex: # verify that body matches CDX hash |