From 7eb019dcc158029a86c66b6035abb9f0076b9e45 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 26 Sep 2019 15:21:59 -0700 Subject: off-by-one error in batch sizes --- python/sandcrawler/workers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/sandcrawler/workers.py b/python/sandcrawler/workers.py index 81813a2..e6f5d4b 100644 --- a/python/sandcrawler/workers.py +++ b/python/sandcrawler/workers.py @@ -212,7 +212,7 @@ class JsonLinePusher(RecordPusher): record = json.loads(line) if self.batch_size: batch.append(record) - if len(batch) > self.batch_size: + if len(batch) >= self.batch_size: self.worker.push_batch(batch) self.counts['pushed'] += len(batch) batch = [] -- cgit v1.2.3