diff options
author | Bryan Newbold <bnewbold@archive.org> | 2018-11-21 16:51:29 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2018-11-21 16:51:29 -0800 |
commit | 9e7c651806645b1e59b07f354ce7cdece17d76b7 (patch) | |
tree | f4e8a8b0b601a0a617e3498f42a96087899aa5f5 | |
parent | a15b5d17b342a2e05fe23be6a7f731cdb21ce0da (diff) | |
download | sandcrawler-9e7c651806645b1e59b07f354ce7cdece17d76b7.tar.gz sandcrawler-9e7c651806645b1e59b07f354ce7cdece17d76b7.zip |
small kafka_grobid tweaks
-rwxr-xr-x | python/kafka_grobid.py | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/python/kafka_grobid.py b/python/kafka_grobid.py index b12b492..e57ace6 100755 --- a/python/kafka_grobid.py +++ b/python/kafka_grobid.py @@ -32,6 +32,7 @@ Requires: # in `wayback` library. Means we can't run pylint. # pylint: skip-file +import sys import xml import json import raven @@ -227,7 +228,7 @@ class KafkaGrobidWorker: consumer = consume_topic.get_balanced_consumer( consumer_group=self.consumer_group, managed=True, - fetch_message_max_bytes=10000, # only ~10kbytes at a time + #fetch_message_max_bytes=100000, # only ~100kbytes at a time auto_commit_enable=True, auto_commit_interval_ms=60000, # 60 seconds compacted_topic=True) |