From ab0ee1bb7aae902f0e0b3812e8f328f44189fdc5 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 13 Nov 2019 20:28:30 -0800 Subject: handle wayback client return status correctly --- python/sandcrawler/ingest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'python') diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py index 2566973..6e96763 100644 --- a/python/sandcrawler/ingest.py +++ b/python/sandcrawler/ingest.py @@ -73,8 +73,8 @@ class IngestFileWorker(SandcrawlerWorker): return self.spn_client.save_url_now_v1(url) resp = requests.get(WAYBACK_ENDPOINT + cdx['datetime'] + "id_/" + cdx['url']) - if resp.status_code != 200: - raise WaybackError(resp.text) + if resp.status_code != cdx['http_status']: + raise WaybackError("Got unexpected wayback status (expected {} from CDX, got {})".format(cdx['http_status'], resp.status_code)) body = resp.content return (cdx, body) -- cgit v1.2.3