From be5a45fdf0622e6b65663d08f577aa41e0e89be0 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 25 Jun 2020 13:30:31 -0700 Subject: s3-only mode persist workers use different consumer group --- python/sandcrawler_worker.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/python/sandcrawler_worker.py b/python/sandcrawler_worker.py index 3b49cf2..638dc35 100755 --- a/python/sandcrawler_worker.py +++ b/python/sandcrawler_worker.py @@ -89,11 +89,14 @@ def run_persist_grobid(args): s3_only=args.s3_only, db_only=args.db_only, ) + kafka_group = "persist-grobid" + if args.s3_only: + kafka_group += "-s3" pusher = KafkaJsonPusher( worker=worker, kafka_hosts=args.kafka_hosts, consume_topic=consume_topic, - group="persist-grobid", + group=kafka_group, push_batches=True, batch_size=25, ) @@ -110,11 +113,14 @@ def run_persist_pdftext(args): s3_only=args.s3_only, db_only=args.db_only, ) + kafka_group = "persist-pdf-text" + if args.s3_only: + kafka_group += "-s3" pusher = KafkaJsonPusher( worker=worker, kafka_hosts=args.kafka_hosts, consume_topic=consume_topic, - group="persist-pdf-text", + group=kafka_group, push_batches=True, batch_size=25, ) -- cgit v1.2.3