From b182e84b5f3e1ce5732bed657dc0d12fd3257537 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 3 Mar 2020 21:11:05 -0800 Subject: ingest: make content-decoding more robust --- python/sandcrawler/ingest.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'python') diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py index e2ef47a..7211ee0 100644 --- a/python/sandcrawler/ingest.py +++ b/python/sandcrawler/ingest.py @@ -299,8 +299,9 @@ class IngestFileWorker(SandcrawlerWorker): print("transfer encoding not stripped: {}".format(resource.cdx.mimetype), file=sys.stderr) try: inner_body = gzip.decompress(resource.body) - except EOFError: + except Exception as e: result['status'] = 'bad-gzip-encoding' + result['error_message'] = str(e) return result if not inner_body: result['status'] = 'null-body' -- cgit v1.2.3