From 6d6bf1c448246c6534d7087eb1db5b9341796b61 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 28 Jan 2020 19:19:36 -0800 Subject: worker kafka setting tweaks These are all attempts to get kafka workers operating more smoothly. --- python/sandcrawler/workers.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'python') diff --git a/python/sandcrawler/workers.py b/python/sandcrawler/workers.py index 9f7c913..1e54a28 100644 --- a/python/sandcrawler/workers.py +++ b/python/sandcrawler/workers.py @@ -107,7 +107,7 @@ class KafkaSink(SandcrawlerWorker): config = self.producer_config({ 'bootstrap.servers': kafka_hosts, - 'message.max.bytes': 20000000, # ~20 MBytes; broker is ~50 MBytes + 'message.max.bytes': 30000000, # ~30 MBytes; broker is ~50 MBytes 'api.version.request': True, 'api.version.fallback.ms': 0, }) @@ -127,6 +127,7 @@ class KafkaSink(SandcrawlerWorker): config.update({ 'delivery.report.only.error': True, 'default.topic.config': { + 'message.timeout.ms': 30000, 'request.required.acks': -1, # all brokers must confirm } }) @@ -171,10 +172,11 @@ class KafkaGrobidSink(KafkaSink): config.update({ 'compression.codec': 'gzip', 'retry.backoff.ms': 250, - 'linger.ms': 5000, + 'linger.ms': 1000, 'batch.num.messages': 50, 'delivery.report.only.error': True, 'default.topic.config': { + 'message.timeout.ms': 30000, 'request.required.acks': -1, # all brokers must confirm } }) -- cgit v1.2.3