diff options
author | Bryan Newbold <bnewbold@archive.org> | 2018-12-03 18:40:32 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2018-12-03 18:40:34 -0800 |
commit | 5a9d93c81e0671038fe94ddac7151117ad9b1720 (patch) | |
tree | b58c7dc9053318055c6f1ab291ab4305a31e104e | |
parent | d3901d21037ee740c0273cadf67d9783b420b029 (diff) | |
download | sandcrawler-5a9d93c81e0671038fe94ddac7151117ad9b1720.tar.gz sandcrawler-5a9d93c81e0671038fe94ddac7151117ad9b1720.zip |
tweak grobid worker producer settings
Python CPU utilization shot way up; this is an attempt to bring it back
down.
-rwxr-xr-x | python/kafka_grobid.py | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/python/kafka_grobid.py b/python/kafka_grobid.py index fc4e461..df222d8 100755 --- a/python/kafka_grobid.py +++ b/python/kafka_grobid.py @@ -252,8 +252,8 @@ class KafkaGrobidWorker: compression=pykafka.common.CompressionType.GZIP, retry_backoff_ms=250, max_queued_messages=20, - min_queued_messages=1, - linger_ms=0, + min_queued_messages=3, + linger_ms=2000, max_request_size=self.produce_max_request_size) as producer: print("Producing to: {}".format(self.produce_topic)) consumer = consume_topic.get_balanced_consumer( |