diff options
author | Bryan Newbold <bnewbold@archive.org> | 2019-11-13 20:28:30 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2019-11-13 20:28:30 -0800 |
commit | ab0ee1bb7aae902f0e0b3812e8f328f44189fdc5 (patch) | |
tree | 74562e08464d0758056512dc6331d855f4a19d46 | |
parent | b9ad3a9468086940aaba15ce105c092f19e57092 (diff) | |
download | sandcrawler-ab0ee1bb7aae902f0e0b3812e8f328f44189fdc5.tar.gz sandcrawler-ab0ee1bb7aae902f0e0b3812e8f328f44189fdc5.zip |
handle wayback client return status correctly
-rw-r--r-- | python/sandcrawler/ingest.py | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py index 2566973..6e96763 100644 --- a/python/sandcrawler/ingest.py +++ b/python/sandcrawler/ingest.py @@ -73,8 +73,8 @@ class IngestFileWorker(SandcrawlerWorker): return self.spn_client.save_url_now_v1(url) resp = requests.get(WAYBACK_ENDPOINT + cdx['datetime'] + "id_/" + cdx['url']) - if resp.status_code != 200: - raise WaybackError(resp.text) + if resp.status_code != cdx['http_status']: + raise WaybackError("Got unexpected wayback status (expected {} from CDX, got {})".format(cdx['http_status'], resp.status_code)) body = resp.content return (cdx, body) |