aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2018-12-10 13:01:00 +0800
committerBryan Newbold <bnewbold@archive.org>2018-12-10 13:01:00 +0800
commit4736db1b1caca50a83bf7fb0d45e2e8d48d4e233 (patch)
treebb6afcf6176f67b76759590dd92404d04813025a
parent70b2dcb9cd27225b8259e9d16eecd55fab3bea3d (diff)
downloadsandcrawler-4736db1b1caca50a83bf7fb0d45e2e8d48d4e233.tar.gz
sandcrawler-4736db1b1caca50a83bf7fb0d45e2e8d48d4e233.zip
crank hbase GROBID worker memory usage down
-rwxr-xr-xpython/kafka_grobid_hbase.py2
1 files changed, 1 insertions, 1 deletions
diff --git a/python/kafka_grobid_hbase.py b/python/kafka_grobid_hbase.py
index b6219eb..b52c386 100755
--- a/python/kafka_grobid_hbase.py
+++ b/python/kafka_grobid_hbase.py
@@ -146,7 +146,7 @@ class KafkaGrobidHbaseWorker:
managed=True,
auto_commit_enable=True,
# needed to avoid MessageSet decode errors
- fetch_message_max_bytes=32*1024*1024,
+ fetch_message_max_bytes=4*1024*1024,
# LATEST because best to miss processing than waste time re-process
auto_offset_reset=pykafka.common.OffsetType.LATEST,
compacted_topic=True)