aboutsummaryrefslogtreecommitdiffstats
path: root/python/kafka_grobid_hbase.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2018-12-03 18:15:29 -0800
committerBryan Newbold <bnewbold@archive.org>2018-12-03 18:15:29 -0800
commitd3901d21037ee740c0273cadf67d9783b420b029 (patch)
treea7017246b84f90964c1c9e38fe996f9c251ce618 /python/kafka_grobid_hbase.py
parent255f76dcbe15eaa9f032f26c19a6f28b4690d204 (diff)
downloadsandcrawler-d3901d21037ee740c0273cadf67d9783b420b029.tar.gz
sandcrawler-d3901d21037ee740c0273cadf67d9783b420b029.zip
tweak kafka config significantly
Diffstat (limited to 'python/kafka_grobid_hbase.py')
-rwxr-xr-xpython/kafka_grobid_hbase.py2
1 files changed, 2 insertions, 0 deletions
diff --git a/python/kafka_grobid_hbase.py b/python/kafka_grobid_hbase.py
index 7c5c4a2..466ccb6 100755
--- a/python/kafka_grobid_hbase.py
+++ b/python/kafka_grobid_hbase.py
@@ -145,6 +145,8 @@ class KafkaGrobidHbaseWorker:
consumer_group=self.consumer_group,
managed=True,
auto_commit_enable=True,
+ # LATEST because best to miss processing than waste time re-process
+ auto_offset_reset=pykafka.common.OffsetType.LATEST,
compacted_topic=True)
print("Kafka consuming {} in group {}".format(
self.consume_topic,