diff options
author | Bryan Newbold <bnewbold@archive.org> | 2019-04-23 17:19:29 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2019-04-23 17:19:29 -0700 |
commit | 46b93836752a6fb221df44111de43c435174926d (patch) | |
tree | 82e58170e0690843412a7c2d9c7e6080139c8099 | |
parent | 26e46b05eafa964ec442d9f957fc26247597a591 (diff) | |
download | arabesque-46b93836752a6fb221df44111de43c435174926d.tar.gz arabesque-46b93836752a6fb221df44111de43c435174926d.zip |
ignore empty files on backwards import
-rwxr-xr-x | arabesque.py | 6 |
1 files changed, 5 insertions, 1 deletions
diff --git a/arabesque.py b/arabesque.py index 5b8e209..1c5663b 100755 --- a/arabesque.py +++ b/arabesque.py @@ -416,7 +416,11 @@ def backward(log_file, map_db, output_db, hit_mimetypes=FULLTEXT_MIMETYPES): continue if line.mimetype == "application/octet-stream" and int(line.size_bytes) < 1000: - counts['skip-tiny-octetstream-'] += 1 + counts['skip-tiny-octetstream'] += 1 + continue + + if int(line.size_bytes) == 0 or line.sha1 == "3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ": + counts['skip-empty-file'] += 1 continue #print(time.time()) |