From c216a9cdecd85db8296e6499593244686c430d8c Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Sat, 27 Jun 2020 10:51:12 -0700 Subject: handle empty fetched blob --- python/sandcrawler/workers.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'python') diff --git a/python/sandcrawler/workers.py b/python/sandcrawler/workers.py index 632cb0a..f6693bb 100644 --- a/python/sandcrawler/workers.py +++ b/python/sandcrawler/workers.py @@ -178,7 +178,12 @@ class SandcrawlerFetchWorker(SandcrawlerWorker): blob = resp.content else: raise ValueError("not a CDX (wayback) or petabox (archive.org) dict; not sure how to proceed") - assert blob + if not blob: + return dict( + key=default_key, + source=record, + status="empty-blob", + ) return dict( key=default_key, status="success", -- cgit v1.2.3