aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-01-28 19:19:36 -0800
committerBryan Newbold <bnewbold@archive.org>2020-01-28 19:19:38 -0800
commit6d6bf1c448246c6534d7087eb1db5b9341796b61 (patch)
tree6c24689f8dea71eeea83afc9396cf0ddca081edd /python
parentfd8b7f47a00ab364f6609a9c499996859d25e6a0 (diff)
downloadsandcrawler-6d6bf1c448246c6534d7087eb1db5b9341796b61.tar.gz
sandcrawler-6d6bf1c448246c6534d7087eb1db5b9341796b61.zip
worker kafka setting tweaks
These are all attempts to get kafka workers operating more smoothly.
Diffstat (limited to 'python')
-rw-r--r--python/sandcrawler/workers.py6
1 files changed, 4 insertions, 2 deletions
diff --git a/python/sandcrawler/workers.py b/python/sandcrawler/workers.py
index 9f7c913..1e54a28 100644
--- a/python/sandcrawler/workers.py
+++ b/python/sandcrawler/workers.py
@@ -107,7 +107,7 @@ class KafkaSink(SandcrawlerWorker):
config = self.producer_config({
'bootstrap.servers': kafka_hosts,
- 'message.max.bytes': 20000000, # ~20 MBytes; broker is ~50 MBytes
+ 'message.max.bytes': 30000000, # ~30 MBytes; broker is ~50 MBytes
'api.version.request': True,
'api.version.fallback.ms': 0,
})
@@ -127,6 +127,7 @@ class KafkaSink(SandcrawlerWorker):
config.update({
'delivery.report.only.error': True,
'default.topic.config': {
+ 'message.timeout.ms': 30000,
'request.required.acks': -1, # all brokers must confirm
}
})
@@ -171,10 +172,11 @@ class KafkaGrobidSink(KafkaSink):
config.update({
'compression.codec': 'gzip',
'retry.backoff.ms': 250,
- 'linger.ms': 5000,
+ 'linger.ms': 1000,
'batch.num.messages': 50,
'delivery.report.only.error': True,
'default.topic.config': {
+ 'message.timeout.ms': 30000,
'request.required.acks': -1, # all brokers must confirm
}
})