aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2018-12-10 12:12:45 +0800
committerBryan Newbold <bnewbold@archive.org>2018-12-10 12:12:45 +0800
commit70b2dcb9cd27225b8259e9d16eecd55fab3bea3d (patch)
tree4dfe9a93c9173965d278cab65abd6f894d900704
parente8ea9ad1370247fba573f4a4aa85edffa82a7494 (diff)
downloadsandcrawler-70b2dcb9cd27225b8259e9d16eecd55fab3bea3d.tar.gz
sandcrawler-70b2dcb9cd27225b8259e9d16eecd55fab3bea3d.zip
increase message size (kafka-grobid-hbase)
-rwxr-xr-xpython/kafka_grobid_hbase.py2
1 files changed, 2 insertions, 0 deletions
diff --git a/python/kafka_grobid_hbase.py b/python/kafka_grobid_hbase.py
index 466ccb6..b6219eb 100755
--- a/python/kafka_grobid_hbase.py
+++ b/python/kafka_grobid_hbase.py
@@ -145,6 +145,8 @@ class KafkaGrobidHbaseWorker:
consumer_group=self.consumer_group,
managed=True,
auto_commit_enable=True,
+ # needed to avoid MessageSet decode errors
+ fetch_message_max_bytes=32*1024*1024,
# LATEST because best to miss processing than waste time re-process
auto_offset_reset=pykafka.common.OffsetType.LATEST,
compacted_topic=True)