aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2019-11-13 20:28:30 -0800
committerBryan Newbold <bnewbold@archive.org>2019-11-13 20:28:30 -0800
commitab0ee1bb7aae902f0e0b3812e8f328f44189fdc5 (patch)
tree74562e08464d0758056512dc6331d855f4a19d46
parentb9ad3a9468086940aaba15ce105c092f19e57092 (diff)
downloadsandcrawler-ab0ee1bb7aae902f0e0b3812e8f328f44189fdc5.tar.gz
sandcrawler-ab0ee1bb7aae902f0e0b3812e8f328f44189fdc5.zip
handle wayback client return status correctly
-rw-r--r--python/sandcrawler/ingest.py4
1 files changed, 2 insertions, 2 deletions
diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py
index 2566973..6e96763 100644
--- a/python/sandcrawler/ingest.py
+++ b/python/sandcrawler/ingest.py
@@ -73,8 +73,8 @@ class IngestFileWorker(SandcrawlerWorker):
return self.spn_client.save_url_now_v1(url)
resp = requests.get(WAYBACK_ENDPOINT + cdx['datetime'] + "id_/" + cdx['url'])
- if resp.status_code != 200:
- raise WaybackError(resp.text)
+ if resp.status_code != cdx['http_status']:
+ raise WaybackError("Got unexpected wayback status (expected {} from CDX, got {})".format(cdx['http_status'], resp.status_code))
body = resp.content
return (cdx, body)