aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2019-09-26 15:21:59 -0700
committerBryan Newbold <bnewbold@archive.org>2019-09-26 15:21:59 -0700
commit7eb019dcc158029a86c66b6035abb9f0076b9e45 (patch)
tree7ac5002b63502e1212281ed3db4f27ee552503f5
parent71756038bc376568f7bcf124b6f8a23fc9221594 (diff)
downloadsandcrawler-7eb019dcc158029a86c66b6035abb9f0076b9e45.tar.gz
sandcrawler-7eb019dcc158029a86c66b6035abb9f0076b9e45.zip
off-by-one error in batch sizes
-rw-r--r--python/sandcrawler/workers.py2
1 files changed, 1 insertions, 1 deletions
diff --git a/python/sandcrawler/workers.py b/python/sandcrawler/workers.py
index 81813a2..e6f5d4b 100644
--- a/python/sandcrawler/workers.py
+++ b/python/sandcrawler/workers.py
@@ -212,7 +212,7 @@ class JsonLinePusher(RecordPusher):
record = json.loads(line)
if self.batch_size:
batch.append(record)
- if len(batch) > self.batch_size:
+ if len(batch) >= self.batch_size:
self.worker.push_batch(batch)
self.counts['pushed'] += len(batch)
batch = []