aboutsummaryrefslogtreecommitdiffstats
path: root/arabesque.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2019-04-23 17:19:29 -0700
committerBryan Newbold <bnewbold@archive.org>2019-04-23 17:19:29 -0700
commit46b93836752a6fb221df44111de43c435174926d (patch)
tree82e58170e0690843412a7c2d9c7e6080139c8099 /arabesque.py
parent26e46b05eafa964ec442d9f957fc26247597a591 (diff)
downloadarabesque-46b93836752a6fb221df44111de43c435174926d.tar.gz
arabesque-46b93836752a6fb221df44111de43c435174926d.zip
ignore empty files on backwards import
Diffstat (limited to 'arabesque.py')
-rwxr-xr-xarabesque.py6
1 files changed, 5 insertions, 1 deletions
diff --git a/arabesque.py b/arabesque.py
index 5b8e209..1c5663b 100755
--- a/arabesque.py
+++ b/arabesque.py
@@ -416,7 +416,11 @@ def backward(log_file, map_db, output_db, hit_mimetypes=FULLTEXT_MIMETYPES):
continue
if line.mimetype == "application/octet-stream" and int(line.size_bytes) < 1000:
- counts['skip-tiny-octetstream-'] += 1
+ counts['skip-tiny-octetstream'] += 1
+ continue
+
+ if int(line.size_bytes) == 0 or line.sha1 == "3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ":
+ counts['skip-empty-file'] += 1
continue
#print(time.time())