diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-01-28 19:13:22 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-01-28 19:13:24 -0800 |
commit | fd8b7f47a00ab364f6609a9c499996859d25e6a0 (patch) | |
tree | 5eadb7ba5e22ad2c0701b15e779baa5152409043 /python | |
parent | 9f53880c746b9fd84261e3ab7dbbee81501df394 (diff) | |
download | sandcrawler-fd8b7f47a00ab364f6609a9c499996859d25e6a0.tar.gz sandcrawler-fd8b7f47a00ab364f6609a9c499996859d25e6a0.zip |
make grobid-extract worker batch size 1
This is part of attempts to fix Kafka errors that look like they might
be timeouts.
Diffstat (limited to 'python')
-rwxr-xr-x | python/sandcrawler_worker.py | 1 |
1 files changed, 1 insertions, 0 deletions
diff --git a/python/sandcrawler_worker.py b/python/sandcrawler_worker.py index 12b8bb9..be3ced7 100755 --- a/python/sandcrawler_worker.py +++ b/python/sandcrawler_worker.py @@ -45,6 +45,7 @@ def run_grobid_extract(args): kafka_hosts=args.kafka_hosts, consume_topic=consume_topic, group="grobid-extract", + batch_size=1, ) pusher.run() |