diff options
author | Bryan Newbold <bnewbold@archive.org> | 2018-12-03 18:15:29 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2018-12-03 18:15:29 -0800 |
commit | d3901d21037ee740c0273cadf67d9783b420b029 (patch) | |
tree | a7017246b84f90964c1c9e38fe996f9c251ce618 /python/kafka_grobid_hbase.py | |
parent | 255f76dcbe15eaa9f032f26c19a6f28b4690d204 (diff) | |
download | sandcrawler-d3901d21037ee740c0273cadf67d9783b420b029.tar.gz sandcrawler-d3901d21037ee740c0273cadf67d9783b420b029.zip |
tweak kafka config significantly
Diffstat (limited to 'python/kafka_grobid_hbase.py')
-rwxr-xr-x | python/kafka_grobid_hbase.py | 2 |
1 files changed, 2 insertions, 0 deletions
diff --git a/python/kafka_grobid_hbase.py b/python/kafka_grobid_hbase.py index 7c5c4a2..466ccb6 100755 --- a/python/kafka_grobid_hbase.py +++ b/python/kafka_grobid_hbase.py @@ -145,6 +145,8 @@ class KafkaGrobidHbaseWorker: consumer_group=self.consumer_group, managed=True, auto_commit_enable=True, + # LATEST because best to miss processing than waste time re-process + auto_offset_reset=pykafka.common.OffsetType.LATEST, compacted_topic=True) print("Kafka consuming {} in group {}".format( self.consume_topic, |