aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-01-29 17:37:49 -0800
committerBryan Newbold <bnewbold@archive.org>2020-01-29 17:37:52 -0800
commit6a29e396da2289e6524097ef906b63358eb3cfec (patch)
treeada89e174742f1dfa899b938ca3452a87d5cc686 /python
parent41d25749b46d9ed0bfcb57de8c6cd5399ea54de7 (diff)
downloadsandcrawler-6a29e396da2289e6524097ef906b63358eb3cfec.tar.gz
sandcrawler-6a29e396da2289e6524097ef906b63358eb3cfec.zip
sandcrawler_worker: ingest worker distinct consumer groups
I'm in the process of resetting these consumer groups, so might as well take the opportunity to split by topic and use the new canonical naming format.
Diffstat (limited to 'python')
-rwxr-xr-xpython/sandcrawler_worker.py4
1 files changed, 3 insertions, 1 deletions
diff --git a/python/sandcrawler_worker.py b/python/sandcrawler_worker.py
index be3ced7..f13116a 100755
--- a/python/sandcrawler_worker.py
+++ b/python/sandcrawler_worker.py
@@ -71,8 +71,10 @@ def run_persist_grobid(args):
def run_ingest_file(args):
if args.bulk:
+ consume_group = "sandcrawler-{}-ingest-file-bulk".format(args.env)
consume_topic = "sandcrawler-{}.ingest-file-requests-bulk".format(args.env)
else:
+ consume_group = "sandcrawler-{}-ingest-file".format(args.env)
consume_topic = "sandcrawler-{}.ingest-file-requests".format(args.env)
produce_topic = "sandcrawler-{}.ingest-file-results".format(args.env)
grobid_topic = "sandcrawler-{}.grobid-output-pg".format(args.env)
@@ -96,7 +98,7 @@ def run_ingest_file(args):
worker=worker,
kafka_hosts=args.kafka_hosts,
consume_topic=consume_topic,
- group="ingest-file",
+ group=consume_group,
batch_size=1,
)
pusher.run()