From 97339426c4d0022c3fdf5948ef94b99bb1e120ee Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 13 Jan 2022 15:38:26 -0800 Subject: null-body -> empty-blob --- python/sandcrawler/fileset_strategies.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'python/sandcrawler/fileset_strategies.py') diff --git a/python/sandcrawler/fileset_strategies.py b/python/sandcrawler/fileset_strategies.py index 9696f3c..c3374f8 100644 --- a/python/sandcrawler/fileset_strategies.py +++ b/python/sandcrawler/fileset_strategies.py @@ -317,6 +317,10 @@ class WebFilesetStrategy(FilesetIngestStrategy): else: assert resource.terminal_status_code == 200 + if not resource.body: + m.status = "empty-blob" + continue + file_meta = gen_file_metadata(resource.body) file_meta, html_resource = fix_transfer_encoding(file_meta, resource) -- cgit v1.2.3