diff options
author | Bryan Newbold <bnewbold@archive.org> | 2019-11-13 21:24:22 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2019-11-13 21:24:22 -0800 |
commit | 31672b40c358b6dfbf29520838c8064ed2891cee (patch) | |
tree | be779020ccad945f81eba9356e2a4528f3345b8a | |
parent | 8a58ccc381534db6bbcc1275cd561ccf3a2af23a (diff) | |
download | sandcrawler-31672b40c358b6dfbf29520838c8064ed2891cee.tar.gz sandcrawler-31672b40c358b6dfbf29520838c8064ed2891cee.zip |
treat failure to get terminal capture as a SavePageNowError
-rw-r--r-- | python/sandcrawler/ingest.py | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py index 6e96763..29bb78e 100644 --- a/python/sandcrawler/ingest.py +++ b/python/sandcrawler/ingest.py @@ -68,7 +68,7 @@ class IngestFileWorker(SandcrawlerWorker): cdx = self.cdx_client.lookup_latest(url, follow_redirects=True) if not cdx: sys.stderr.write("{}\n".format(cdx_list)) - raise Exception("Failed to crawl PDF URL") + raise SavePageNowError("Failed to find terminal capture from SPNv2") else: return self.spn_client.save_url_now_v1(url) |