aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2019-03-06 11:13:55 -0800
committerBryan Newbold <bnewbold@robocracy.org>2019-03-06 11:13:55 -0800
commit7c93fa7ce5d60efe4f1694c1dedc051245017009 (patch)
tree3103ada7a449a3473a6f9ab8eb315cd9688beca5 /python/fatcat_tools
parentd8f022d6d183607860b6d1013af0127de2b6c1ba (diff)
downloadfatcat-7c93fa7ce5d60efe4f1694c1dedc051245017009.tar.gz
fatcat-7c93fa7ce5d60efe4f1694c1dedc051245017009.zip
10 MByte default Kafka produce (workers)
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r--python/fatcat_tools/workers/changelog.py8
-rw-r--r--python/fatcat_tools/workers/worker_common.py3
2 files changed, 9 insertions, 2 deletions
diff --git a/python/fatcat_tools/workers/changelog.py b/python/fatcat_tools/workers/changelog.py
index 39a84f18..6319d55a 100644
--- a/python/fatcat_tools/workers/changelog.py
+++ b/python/fatcat_tools/workers/changelog.py
@@ -33,7 +33,9 @@ class ChangelogWorker(FatcatWorker):
else:
self.offset = 1
- with topic.get_producer() as producer:
+ with topic.get_producer(
+ max_request_size=self.produce_max_request_size,
+ ) as producer:
while True:
latest = int(self.api.get_changelog(limit=1)[0].index)
if latest > self.offset:
@@ -85,7 +87,9 @@ class EntityUpdatesWorker(FatcatWorker):
# using a sync producer to try and avoid racey loss of delivery (aka,
# if consumer group updated but produce didn't stick)
- with release_topic.get_sync_producer() as producer:
+ with release_topic.get_sync_producer(
+ max_request_size=self.produce_max_request_size,
+ ) as producer:
for msg in consumer:
cle = json.loads(msg.value.decode('utf-8'))
#print(cle)
diff --git a/python/fatcat_tools/workers/worker_common.py b/python/fatcat_tools/workers/worker_common.py
index 57fb710c..cb4e5dab 100644
--- a/python/fatcat_tools/workers/worker_common.py
+++ b/python/fatcat_tools/workers/worker_common.py
@@ -52,3 +52,6 @@ class FatcatWorker:
self.kafka = KafkaClient(hosts=kafka_hosts, broker_version="1.0.0")
self.produce_topic = produce_topic
self.consume_topic = consume_topic
+
+ # Kafka producer batch size tuning; also limit on size of single document
+ self.produce_max_request_size = 10000000 # 10 MByte-ish