diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-01-28 19:19:36 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-01-28 19:19:38 -0800 |
commit | 6d6bf1c448246c6534d7087eb1db5b9341796b61 (patch) | |
tree | 6c24689f8dea71eeea83afc9396cf0ddca081edd | |
parent | fd8b7f47a00ab364f6609a9c499996859d25e6a0 (diff) | |
download | sandcrawler-6d6bf1c448246c6534d7087eb1db5b9341796b61.tar.gz sandcrawler-6d6bf1c448246c6534d7087eb1db5b9341796b61.zip |
worker kafka setting tweaks
These are all attempts to get kafka workers operating more smoothly.
-rw-r--r-- | python/sandcrawler/workers.py | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/python/sandcrawler/workers.py b/python/sandcrawler/workers.py index 9f7c913..1e54a28 100644 --- a/python/sandcrawler/workers.py +++ b/python/sandcrawler/workers.py @@ -107,7 +107,7 @@ class KafkaSink(SandcrawlerWorker): config = self.producer_config({ 'bootstrap.servers': kafka_hosts, - 'message.max.bytes': 20000000, # ~20 MBytes; broker is ~50 MBytes + 'message.max.bytes': 30000000, # ~30 MBytes; broker is ~50 MBytes 'api.version.request': True, 'api.version.fallback.ms': 0, }) @@ -127,6 +127,7 @@ class KafkaSink(SandcrawlerWorker): config.update({ 'delivery.report.only.error': True, 'default.topic.config': { + 'message.timeout.ms': 30000, 'request.required.acks': -1, # all brokers must confirm } }) @@ -171,10 +172,11 @@ class KafkaGrobidSink(KafkaSink): config.update({ 'compression.codec': 'gzip', 'retry.backoff.ms': 250, - 'linger.ms': 5000, + 'linger.ms': 1000, 'batch.num.messages': 50, 'delivery.report.only.error': True, 'default.topic.config': { + 'message.timeout.ms': 30000, 'request.required.acks': -1, # all brokers must confirm } }) |