diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-06-27 10:51:12 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-06-27 10:51:15 -0700 |
commit | c216a9cdecd85db8296e6499593244686c430d8c (patch) | |
tree | f8f6f32dcbde36e7138c8a0475dd828266f5f382 | |
parent | 2d70b8629139663e2e3a137d38788103809f84ae (diff) | |
download | sandcrawler-c216a9cdecd85db8296e6499593244686c430d8c.tar.gz sandcrawler-c216a9cdecd85db8296e6499593244686c430d8c.zip |
handle empty fetched blob
-rw-r--r-- | python/sandcrawler/workers.py | 7 |
1 files changed, 6 insertions, 1 deletions
diff --git a/python/sandcrawler/workers.py b/python/sandcrawler/workers.py index 632cb0a..f6693bb 100644 --- a/python/sandcrawler/workers.py +++ b/python/sandcrawler/workers.py @@ -178,7 +178,12 @@ class SandcrawlerFetchWorker(SandcrawlerWorker): blob = resp.content else: raise ValueError("not a CDX (wayback) or petabox (archive.org) dict; not sure how to proceed") - assert blob + if not blob: + return dict( + key=default_key, + source=record, + status="empty-blob", + ) return dict( key=default_key, status="success", |