aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2018-11-21 16:51:29 -0800
committerBryan Newbold <bnewbold@archive.org>2018-11-21 16:51:29 -0800
commit9e7c651806645b1e59b07f354ce7cdece17d76b7 (patch)
treef4e8a8b0b601a0a617e3498f42a96087899aa5f5 /python
parenta15b5d17b342a2e05fe23be6a7f731cdb21ce0da (diff)
downloadsandcrawler-9e7c651806645b1e59b07f354ce7cdece17d76b7.tar.gz
sandcrawler-9e7c651806645b1e59b07f354ce7cdece17d76b7.zip
small kafka_grobid tweaks
Diffstat (limited to 'python')
-rwxr-xr-xpython/kafka_grobid.py3
1 files changed, 2 insertions, 1 deletions
diff --git a/python/kafka_grobid.py b/python/kafka_grobid.py
index b12b492..e57ace6 100755
--- a/python/kafka_grobid.py
+++ b/python/kafka_grobid.py
@@ -32,6 +32,7 @@ Requires:
# in `wayback` library. Means we can't run pylint.
# pylint: skip-file
+import sys
import xml
import json
import raven
@@ -227,7 +228,7 @@ class KafkaGrobidWorker:
consumer = consume_topic.get_balanced_consumer(
consumer_group=self.consumer_group,
managed=True,
- fetch_message_max_bytes=10000, # only ~10kbytes at a time
+ #fetch_message_max_bytes=100000, # only ~100kbytes at a time
auto_commit_enable=True,
auto_commit_interval_ms=60000, # 60 seconds
compacted_topic=True)