diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-11-08 11:00:11 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-11-08 11:00:13 -0800 |
commit | 19962108e0fd23fe6af24f170da8c47149e531dc (patch) | |
tree | aa4faf7ed02ae1266e2d9d6b942ceba0e749ca7c /python/sandcrawler/ia.py | |
parent | f74dbcd98c52cb5aa042c4f1a0daa7eddd42820b (diff) | |
download | sandcrawler-19962108e0fd23fe6af24f170da8c47149e531dc.tar.gz sandcrawler-19962108e0fd23fe6af24f170da8c47149e531dc.zip |
ingest: fix null-body case
Broke this in earlier refactor.
Diffstat (limited to 'python/sandcrawler/ia.py')
-rw-r--r-- | python/sandcrawler/ia.py | 2 |
1 files changed, 2 insertions, 0 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py index 0b58f3b..da667b6 100644 --- a/python/sandcrawler/ia.py +++ b/python/sandcrawler/ia.py @@ -1076,6 +1076,8 @@ def fix_transfer_encoding(file_meta: dict, resource: ResourceResult) -> Tuple[di if resource.body and file_meta['mimetype'] == 'application/gzip' and resource.cdx and resource.cdx.mimetype != 'application/gzip': print(" transfer encoding not stripped: {}".format(resource.cdx.mimetype), file=sys.stderr) inner_body = gzip.decompress(resource.body) + if not inner_body: + raise Exception("null body inside transfer encoding") inner_resource = ResourceResult( body=inner_body, # copy all other fields |