aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2019-11-13 21:24:22 -0800
committerBryan Newbold <bnewbold@archive.org>2019-11-13 21:24:22 -0800
commit31672b40c358b6dfbf29520838c8064ed2891cee (patch)
treebe779020ccad945f81eba9356e2a4528f3345b8a
parent8a58ccc381534db6bbcc1275cd561ccf3a2af23a (diff)
downloadsandcrawler-31672b40c358b6dfbf29520838c8064ed2891cee.tar.gz
sandcrawler-31672b40c358b6dfbf29520838c8064ed2891cee.zip
treat failure to get terminal capture as a SavePageNowError
-rw-r--r--python/sandcrawler/ingest.py2
1 files changed, 1 insertions, 1 deletions
diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py
index 6e96763..29bb78e 100644
--- a/python/sandcrawler/ingest.py
+++ b/python/sandcrawler/ingest.py
@@ -68,7 +68,7 @@ class IngestFileWorker(SandcrawlerWorker):
cdx = self.cdx_client.lookup_latest(url, follow_redirects=True)
if not cdx:
sys.stderr.write("{}\n".format(cdx_list))
- raise Exception("Failed to crawl PDF URL")
+ raise SavePageNowError("Failed to find terminal capture from SPNv2")
else:
return self.spn_client.save_url_now_v1(url)