aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler_worker.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-01-28 19:13:22 -0800
committerBryan Newbold <bnewbold@archive.org>2020-01-28 19:13:24 -0800
commitfd8b7f47a00ab364f6609a9c499996859d25e6a0 (patch)
tree5eadb7ba5e22ad2c0701b15e779baa5152409043 /python/sandcrawler_worker.py
parent9f53880c746b9fd84261e3ab7dbbee81501df394 (diff)
downloadsandcrawler-fd8b7f47a00ab364f6609a9c499996859d25e6a0.tar.gz
sandcrawler-fd8b7f47a00ab364f6609a9c499996859d25e6a0.zip
make grobid-extract worker batch size 1
This is part of attempts to fix Kafka errors that look like they might be timeouts.
Diffstat (limited to 'python/sandcrawler_worker.py')
-rwxr-xr-xpython/sandcrawler_worker.py1
1 files changed, 1 insertions, 0 deletions
diff --git a/python/sandcrawler_worker.py b/python/sandcrawler_worker.py
index 12b8bb9..be3ced7 100755
--- a/python/sandcrawler_worker.py
+++ b/python/sandcrawler_worker.py
@@ -45,6 +45,7 @@ def run_grobid_extract(args):
kafka_hosts=args.kafka_hosts,
consume_topic=consume_topic,
group="grobid-extract",
+ batch_size=1,
)
pusher.run()