From ab95b4b3ae669424581a55668a819eff03098dae Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 19 Feb 2019 18:38:57 -0800 Subject: fix empty blob errors --- python/deliver_gwb_to_s3.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'python') diff --git a/python/deliver_gwb_to_s3.py b/python/deliver_gwb_to_s3.py index 02f0c03..b842f97 100755 --- a/python/deliver_gwb_to_s3.py +++ b/python/deliver_gwb_to_s3.py @@ -115,10 +115,14 @@ class DeliverGwbS3(): continue # fetch from GWB/petabox via HTTP range-request blob, status = self.fetch_warc_content(file_cdx['warc'], file_cdx['offset'], file_cdx['c_size']) - if not blob: + if blob is None and status: print("{}\terror petabox\t{}\t{}".format(sha1_hex, file_cdx['warc'], status['reason'])) self.count['err-petabox'] += 1 continue + elif not blob: + print("{}\tskip-empty-blob".format(sha1_hex) + self.count['skip-empty-blob'] += 1 + continue # verify sha1 if sha1_hex != hashlib.sha1(blob).hexdigest(): #assert sha1_hex == hashlib.sha1(blob).hexdigest() -- cgit v1.2.3