From 46b93836752a6fb221df44111de43c435174926d Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 23 Apr 2019 17:19:29 -0700 Subject: ignore empty files on backwards import --- arabesque.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arabesque.py b/arabesque.py index 5b8e209..1c5663b 100755 --- a/arabesque.py +++ b/arabesque.py @@ -416,7 +416,11 @@ def backward(log_file, map_db, output_db, hit_mimetypes=FULLTEXT_MIMETYPES): continue if line.mimetype == "application/octet-stream" and int(line.size_bytes) < 1000: - counts['skip-tiny-octetstream-'] += 1 + counts['skip-tiny-octetstream'] += 1 + continue + + if int(line.size_bytes) == 0 or line.sha1 == "3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ": + counts['skip-empty-file'] += 1 continue #print(time.time()) -- cgit v1.2.3