diff options
author | Bryan Newbold <bnewbold@archive.org> | 2019-10-03 18:32:11 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2019-10-03 18:32:11 -0700 |
commit | 85a3355abe23fe9bf1cec480ddc9ab7c1f79322a (patch) | |
tree | 3d6f36d53ecb51f59545005ce454c14a4a04492f | |
parent | 4294c40a378c386b8158f563168a29e65553395c (diff) | |
download | sandcrawler-85a3355abe23fe9bf1cec480ddc9ab7c1f79322a.tar.gz sandcrawler-85a3355abe23fe9bf1cec480ddc9ab7c1f79322a.zip |
workers: better generic batch-size arg handling
-rw-r--r-- | python/sandcrawler/workers.py | 6 |
1 files changed, 6 insertions, 0 deletions
diff --git a/python/sandcrawler/workers.py b/python/sandcrawler/workers.py index e86d400..3b46cb7 100644 --- a/python/sandcrawler/workers.py +++ b/python/sandcrawler/workers.py @@ -208,6 +208,8 @@ class JsonLinePusher(RecordPusher): self.worker = worker self.json_file = json_file self.batch_size = kwargs.get('batch_size', None) + if self.batch_size in (0, 1): + self.batch_size = None def run(self): batch = [] @@ -244,6 +246,8 @@ class CdxLinePusher(RecordPusher): self.filter_mimetypes = kwargs.get('filter_mimetypes', None) self.allow_octet_stream = kwargs.get('allow_octet_stream', False) self.batch_size = kwargs.get('batch_size', None) + if self.batch_size in (0, 1): + self.batch_size = None def run(self): batch = [] @@ -317,6 +321,8 @@ class KafkaJsonPusher(RecordPusher): ) self.poll_interval = kwargs.get('poll_interval', 5.0) self.batch_size = kwargs.get('batch_size', 100) + if self.batch_size in (0, 1): + self.batch_size = 1 self.batch_worker = kwargs.get('batch_worker', False) def run(self): |