aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/ingest.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-11-08 11:00:11 -0800
committerBryan Newbold <bnewbold@archive.org>2020-11-08 11:00:13 -0800
commit19962108e0fd23fe6af24f170da8c47149e531dc (patch)
treeaa4faf7ed02ae1266e2d9d6b942ceba0e749ca7c /python/sandcrawler/ingest.py
parentf74dbcd98c52cb5aa042c4f1a0daa7eddd42820b (diff)
downloadsandcrawler-19962108e0fd23fe6af24f170da8c47149e531dc.tar.gz
sandcrawler-19962108e0fd23fe6af24f170da8c47149e531dc.zip
ingest: fix null-body case
Broke this in earlier refactor.
Diffstat (limited to 'python/sandcrawler/ingest.py')
-rw-r--r--python/sandcrawler/ingest.py4
1 files changed, 4 insertions, 0 deletions
diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py
index 0c8eee6..2f9c523 100644
--- a/python/sandcrawler/ingest.py
+++ b/python/sandcrawler/ingest.py
@@ -531,6 +531,10 @@ class IngestFileWorker(SandcrawlerWorker):
result['status'] = 'blocked-cookie'
return result
+ if not resource.body:
+ result['status'] = 'null-body'
+ return result
+
file_meta = gen_file_metadata(resource.body)
try:
file_meta, resource = fix_transfer_encoding(file_meta, resource)