aboutsummaryrefslogtreecommitdiffstats
path: root/python/kafka_grobid.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2018-12-03 18:40:32 -0800
committerBryan Newbold <bnewbold@archive.org>2018-12-03 18:40:34 -0800
commit5a9d93c81e0671038fe94ddac7151117ad9b1720 (patch)
treeb58c7dc9053318055c6f1ab291ab4305a31e104e /python/kafka_grobid.py
parentd3901d21037ee740c0273cadf67d9783b420b029 (diff)
downloadsandcrawler-5a9d93c81e0671038fe94ddac7151117ad9b1720.tar.gz
sandcrawler-5a9d93c81e0671038fe94ddac7151117ad9b1720.zip
tweak grobid worker producer settings
Python CPU utilization shot way up; this is an attempt to bring it back down.
Diffstat (limited to 'python/kafka_grobid.py')
-rwxr-xr-xpython/kafka_grobid.py4
1 files changed, 2 insertions, 2 deletions
diff --git a/python/kafka_grobid.py b/python/kafka_grobid.py
index fc4e461..df222d8 100755
--- a/python/kafka_grobid.py
+++ b/python/kafka_grobid.py
@@ -252,8 +252,8 @@ class KafkaGrobidWorker:
compression=pykafka.common.CompressionType.GZIP,
retry_backoff_ms=250,
max_queued_messages=20,
- min_queued_messages=1,
- linger_ms=0,
+ min_queued_messages=3,
+ linger_ms=2000,
max_request_size=self.produce_max_request_size) as producer:
print("Producing to: {}".format(self.produce_topic))
consumer = consume_topic.get_balanced_consumer(