From 85a3355abe23fe9bf1cec480ddc9ab7c1f79322a Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 3 Oct 2019 18:32:11 -0700 Subject: workers: better generic batch-size arg handling --- python/sandcrawler/workers.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/sandcrawler/workers.py b/python/sandcrawler/workers.py index e86d400..3b46cb7 100644 --- a/python/sandcrawler/workers.py +++ b/python/sandcrawler/workers.py @@ -208,6 +208,8 @@ class JsonLinePusher(RecordPusher): self.worker = worker self.json_file = json_file self.batch_size = kwargs.get('batch_size', None) + if self.batch_size in (0, 1): + self.batch_size = None def run(self): batch = [] @@ -244,6 +246,8 @@ class CdxLinePusher(RecordPusher): self.filter_mimetypes = kwargs.get('filter_mimetypes', None) self.allow_octet_stream = kwargs.get('allow_octet_stream', False) self.batch_size = kwargs.get('batch_size', None) + if self.batch_size in (0, 1): + self.batch_size = None def run(self): batch = [] @@ -317,6 +321,8 @@ class KafkaJsonPusher(RecordPusher): ) self.poll_interval = kwargs.get('poll_interval', 5.0) self.batch_size = kwargs.get('batch_size', 100) + if self.batch_size in (0, 1): + self.batch_size = 1 self.batch_worker = kwargs.get('batch_worker', False) def run(self): -- cgit v1.2.3