diff options
author | Bryan Newbold <bnewbold@archive.org> | 2018-12-10 13:01:00 +0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2018-12-10 13:01:00 +0800 |
commit | 4736db1b1caca50a83bf7fb0d45e2e8d48d4e233 (patch) | |
tree | bb6afcf6176f67b76759590dd92404d04813025a | |
parent | 70b2dcb9cd27225b8259e9d16eecd55fab3bea3d (diff) | |
download | sandcrawler-4736db1b1caca50a83bf7fb0d45e2e8d48d4e233.tar.gz sandcrawler-4736db1b1caca50a83bf7fb0d45e2e8d48d4e233.zip |
crank hbase GROBID worker memory usage down
-rwxr-xr-x | python/kafka_grobid_hbase.py | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/python/kafka_grobid_hbase.py b/python/kafka_grobid_hbase.py index b6219eb..b52c386 100755 --- a/python/kafka_grobid_hbase.py +++ b/python/kafka_grobid_hbase.py @@ -146,7 +146,7 @@ class KafkaGrobidHbaseWorker: managed=True, auto_commit_enable=True, # needed to avoid MessageSet decode errors - fetch_message_max_bytes=32*1024*1024, + fetch_message_max_bytes=4*1024*1024, # LATEST because best to miss processing than waste time re-process auto_offset_reset=pykafka.common.OffsetType.LATEST, compacted_topic=True) |