From 7c93fa7ce5d60efe4f1694c1dedc051245017009 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 6 Mar 2019 11:13:55 -0800 Subject: 10 MByte default Kafka produce (workers) --- python/fatcat_tools/workers/changelog.py | 8 ++++++-- python/fatcat_tools/workers/worker_common.py | 3 +++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/python/fatcat_tools/workers/changelog.py b/python/fatcat_tools/workers/changelog.py index 39a84f18..6319d55a 100644 --- a/python/fatcat_tools/workers/changelog.py +++ b/python/fatcat_tools/workers/changelog.py @@ -33,7 +33,9 @@ class ChangelogWorker(FatcatWorker): else: self.offset = 1 - with topic.get_producer() as producer: + with topic.get_producer( + max_request_size=self.produce_max_request_size, + ) as producer: while True: latest = int(self.api.get_changelog(limit=1)[0].index) if latest > self.offset: @@ -85,7 +87,9 @@ class EntityUpdatesWorker(FatcatWorker): # using a sync producer to try and avoid racey loss of delivery (aka, # if consumer group updated but produce didn't stick) - with release_topic.get_sync_producer() as producer: + with release_topic.get_sync_producer( + max_request_size=self.produce_max_request_size, + ) as producer: for msg in consumer: cle = json.loads(msg.value.decode('utf-8')) #print(cle) diff --git a/python/fatcat_tools/workers/worker_common.py b/python/fatcat_tools/workers/worker_common.py index 57fb710c..cb4e5dab 100644 --- a/python/fatcat_tools/workers/worker_common.py +++ b/python/fatcat_tools/workers/worker_common.py @@ -52,3 +52,6 @@ class FatcatWorker: self.kafka = KafkaClient(hosts=kafka_hosts, broker_version="1.0.0") self.produce_topic = produce_topic self.consume_topic = consume_topic + + # Kafka producer batch size tuning; also limit on size of single document + self.produce_max_request_size = 10000000 # 10 MByte-ish -- cgit v1.2.3