aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2019-10-03 18:32:11 -0700
committerBryan Newbold <bnewbold@archive.org>2019-10-03 18:32:11 -0700
commit85a3355abe23fe9bf1cec480ddc9ab7c1f79322a (patch)
tree3d6f36d53ecb51f59545005ce454c14a4a04492f
parent4294c40a378c386b8158f563168a29e65553395c (diff)
downloadsandcrawler-85a3355abe23fe9bf1cec480ddc9ab7c1f79322a.tar.gz
sandcrawler-85a3355abe23fe9bf1cec480ddc9ab7c1f79322a.zip
workers: better generic batch-size arg handling
-rw-r--r--python/sandcrawler/workers.py6
1 files changed, 6 insertions, 0 deletions
diff --git a/python/sandcrawler/workers.py b/python/sandcrawler/workers.py
index e86d400..3b46cb7 100644
--- a/python/sandcrawler/workers.py
+++ b/python/sandcrawler/workers.py
@@ -208,6 +208,8 @@ class JsonLinePusher(RecordPusher):
self.worker = worker
self.json_file = json_file
self.batch_size = kwargs.get('batch_size', None)
+ if self.batch_size in (0, 1):
+ self.batch_size = None
def run(self):
batch = []
@@ -244,6 +246,8 @@ class CdxLinePusher(RecordPusher):
self.filter_mimetypes = kwargs.get('filter_mimetypes', None)
self.allow_octet_stream = kwargs.get('allow_octet_stream', False)
self.batch_size = kwargs.get('batch_size', None)
+ if self.batch_size in (0, 1):
+ self.batch_size = None
def run(self):
batch = []
@@ -317,6 +321,8 @@ class KafkaJsonPusher(RecordPusher):
)
self.poll_interval = kwargs.get('poll_interval', 5.0)
self.batch_size = kwargs.get('batch_size', 100)
+ if self.batch_size in (0, 1):
+ self.batch_size = 1
self.batch_worker = kwargs.get('batch_worker', False)
def run(self):