diff options
Diffstat (limited to 'python/sandcrawler/workers.py')
-rw-r--r-- | python/sandcrawler/workers.py | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/python/sandcrawler/workers.py b/python/sandcrawler/workers.py index 9f7c913..1e54a28 100644 --- a/python/sandcrawler/workers.py +++ b/python/sandcrawler/workers.py @@ -107,7 +107,7 @@ class KafkaSink(SandcrawlerWorker): config = self.producer_config({ 'bootstrap.servers': kafka_hosts, - 'message.max.bytes': 20000000, # ~20 MBytes; broker is ~50 MBytes + 'message.max.bytes': 30000000, # ~30 MBytes; broker is ~50 MBytes 'api.version.request': True, 'api.version.fallback.ms': 0, }) @@ -127,6 +127,7 @@ class KafkaSink(SandcrawlerWorker): config.update({ 'delivery.report.only.error': True, 'default.topic.config': { + 'message.timeout.ms': 30000, 'request.required.acks': -1, # all brokers must confirm } }) @@ -171,10 +172,11 @@ class KafkaGrobidSink(KafkaSink): config.update({ 'compression.codec': 'gzip', 'retry.backoff.ms': 250, - 'linger.ms': 5000, + 'linger.ms': 1000, 'batch.num.messages': 50, 'delivery.report.only.error': True, 'default.topic.config': { + 'message.timeout.ms': 30000, 'request.required.acks': -1, # all brokers must confirm } }) |