aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/workers.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-06-27 10:51:12 -0700
committerBryan Newbold <bnewbold@archive.org>2020-06-27 10:51:15 -0700
commitc216a9cdecd85db8296e6499593244686c430d8c (patch)
treef8f6f32dcbde36e7138c8a0475dd828266f5f382 /python/sandcrawler/workers.py
parent2d70b8629139663e2e3a137d38788103809f84ae (diff)
downloadsandcrawler-c216a9cdecd85db8296e6499593244686c430d8c.tar.gz
sandcrawler-c216a9cdecd85db8296e6499593244686c430d8c.zip
handle empty fetched blob
Diffstat (limited to 'python/sandcrawler/workers.py')
-rw-r--r--python/sandcrawler/workers.py7
1 files changed, 6 insertions, 1 deletions
diff --git a/python/sandcrawler/workers.py b/python/sandcrawler/workers.py
index 632cb0a..f6693bb 100644
--- a/python/sandcrawler/workers.py
+++ b/python/sandcrawler/workers.py
@@ -178,7 +178,12 @@ class SandcrawlerFetchWorker(SandcrawlerWorker):
blob = resp.content
else:
raise ValueError("not a CDX (wayback) or petabox (archive.org) dict; not sure how to proceed")
- assert blob
+ if not blob:
+ return dict(
+ key=default_key,
+ source=record,
+ status="empty-blob",
+ )
return dict(
key=default_key,
status="success",