From 6a29e396da2289e6524097ef906b63358eb3cfec Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 29 Jan 2020 17:37:49 -0800 Subject: sandcrawler_worker: ingest worker distinct consumer groups I'm in the process of resetting these consumer groups, so might as well take the opportunity to split by topic and use the new canonical naming format. --- python/sandcrawler_worker.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/sandcrawler_worker.py b/python/sandcrawler_worker.py index be3ced7..f13116a 100755 --- a/python/sandcrawler_worker.py +++ b/python/sandcrawler_worker.py @@ -71,8 +71,10 @@ def run_persist_grobid(args): def run_ingest_file(args): if args.bulk: + consume_group = "sandcrawler-{}-ingest-file-bulk".format(args.env) consume_topic = "sandcrawler-{}.ingest-file-requests-bulk".format(args.env) else: + consume_group = "sandcrawler-{}-ingest-file".format(args.env) consume_topic = "sandcrawler-{}.ingest-file-requests".format(args.env) produce_topic = "sandcrawler-{}.ingest-file-results".format(args.env) grobid_topic = "sandcrawler-{}.grobid-output-pg".format(args.env) @@ -96,7 +98,7 @@ def run_ingest_file(args): worker=worker, kafka_hosts=args.kafka_hosts, consume_topic=consume_topic, - group="ingest-file", + group=consume_group, batch_size=1, ) pusher.run() -- cgit v1.2.3