diff options
author | Bryan Newbold <bnewbold@archive.org> | 2018-12-10 12:12:45 +0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2018-12-10 12:12:45 +0800 |
commit | 70b2dcb9cd27225b8259e9d16eecd55fab3bea3d (patch) | |
tree | 4dfe9a93c9173965d278cab65abd6f894d900704 | |
parent | e8ea9ad1370247fba573f4a4aa85edffa82a7494 (diff) | |
download | sandcrawler-70b2dcb9cd27225b8259e9d16eecd55fab3bea3d.tar.gz sandcrawler-70b2dcb9cd27225b8259e9d16eecd55fab3bea3d.zip |
increase message size (kafka-grobid-hbase)
-rwxr-xr-x | python/kafka_grobid_hbase.py | 2 |
1 files changed, 2 insertions, 0 deletions
diff --git a/python/kafka_grobid_hbase.py b/python/kafka_grobid_hbase.py index 466ccb6..b6219eb 100755 --- a/python/kafka_grobid_hbase.py +++ b/python/kafka_grobid_hbase.py @@ -145,6 +145,8 @@ class KafkaGrobidHbaseWorker: consumer_group=self.consumer_group, managed=True, auto_commit_enable=True, + # needed to avoid MessageSet decode errors + fetch_message_max_bytes=32*1024*1024, # LATEST because best to miss processing than waste time re-process auto_offset_reset=pykafka.common.OffsetType.LATEST, compacted_topic=True) |