diff options
author | Bryan Newbold <bnewbold@archive.org> | 2019-09-26 15:21:59 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2019-09-26 15:21:59 -0700 |
commit | 7eb019dcc158029a86c66b6035abb9f0076b9e45 (patch) | |
tree | 7ac5002b63502e1212281ed3db4f27ee552503f5 | |
parent | 71756038bc376568f7bcf124b6f8a23fc9221594 (diff) | |
download | sandcrawler-7eb019dcc158029a86c66b6035abb9f0076b9e45.tar.gz sandcrawler-7eb019dcc158029a86c66b6035abb9f0076b9e45.zip |
off-by-one error in batch sizes
-rw-r--r-- | python/sandcrawler/workers.py | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/python/sandcrawler/workers.py b/python/sandcrawler/workers.py index 81813a2..e6f5d4b 100644 --- a/python/sandcrawler/workers.py +++ b/python/sandcrawler/workers.py @@ -212,7 +212,7 @@ class JsonLinePusher(RecordPusher): record = json.loads(line) if self.batch_size: batch.append(record) - if len(batch) > self.batch_size: + if len(batch) >= self.batch_size: self.worker.push_batch(batch) self.counts['pushed'] += len(batch) batch = [] |