aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/fileset_strategies.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2022-01-13 15:38:26 -0800
committerBryan Newbold <bnewbold@archive.org>2022-01-13 15:38:26 -0800
commit97339426c4d0022c3fdf5948ef94b99bb1e120ee (patch)
tree53aa78c92f9dba9899ef37a1375433833e4646e5 /python/sandcrawler/fileset_strategies.py
parentff6894043576d3d51c9ab16623053f91780edc89 (diff)
downloadsandcrawler-97339426c4d0022c3fdf5948ef94b99bb1e120ee.tar.gz
sandcrawler-97339426c4d0022c3fdf5948ef94b99bb1e120ee.zip
null-body -> empty-blob
Diffstat (limited to 'python/sandcrawler/fileset_strategies.py')
-rw-r--r--python/sandcrawler/fileset_strategies.py4
1 files changed, 4 insertions, 0 deletions
diff --git a/python/sandcrawler/fileset_strategies.py b/python/sandcrawler/fileset_strategies.py
index 9696f3c..c3374f8 100644
--- a/python/sandcrawler/fileset_strategies.py
+++ b/python/sandcrawler/fileset_strategies.py
@@ -317,6 +317,10 @@ class WebFilesetStrategy(FilesetIngestStrategy):
else:
assert resource.terminal_status_code == 200
+ if not resource.body:
+ m.status = "empty-blob"
+ continue
+
file_meta = gen_file_metadata(resource.body)
file_meta, html_resource = fix_transfer_encoding(file_meta, resource)