From e4800fc4d0d0467d0e34a4059b941d001916e232 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 30 Sep 2021 15:24:22 -0700 Subject: new 'daily' and 'priority' ingest request topics The old ingest request queue was always getting lopsided, suspect because it was scaled up (additional partitions) at some point in the past, hoping new topics will fix this. New '-priority' queue is like '-bulk', but for smaller-volume SPN-like requests. Eg, interactive mode. --- python/sandcrawler_worker.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'python/sandcrawler_worker.py') diff --git a/python/sandcrawler_worker.py b/python/sandcrawler_worker.py index 6be8bac..bd4ff67 100755 --- a/python/sandcrawler_worker.py +++ b/python/sandcrawler_worker.py @@ -204,9 +204,12 @@ def run_ingest_file(args): if args.bulk: consume_group = "sandcrawler-{}-ingest-file-bulk".format(args.env) consume_topic = "sandcrawler-{}.ingest-file-requests-bulk".format(args.env) + elif args.priority: + consume_group = "sandcrawler-{}-ingest-file-priority".format(args.env) + consume_topic = "sandcrawler-{}.ingest-file-requests-priority".format(args.env) else: consume_group = "sandcrawler-{}-ingest-file".format(args.env) - consume_topic = "sandcrawler-{}.ingest-file-requests".format(args.env) + consume_topic = "sandcrawler-{}.ingest-file-requests-daily".format(args.env) produce_topic = "sandcrawler-{}.ingest-file-results".format(args.env) grobid_topic = "sandcrawler-{}.grobid-output-pg".format(args.env) pdftext_topic = "sandcrawler-{}.pdf-text".format(args.env) @@ -353,6 +356,9 @@ def main(): sub_ingest_file.add_argument('--bulk', action='store_true', help="consume from bulk kafka topic (eg, for ingest backfill)") + sub_ingest_file.add_argument('--priority', + action='store_true', + help="consume from priority kafka topic (eg, for SPN requests)") sub_ingest_file.set_defaults(func=run_ingest_file) sub_persist_ingest_file = subparsers.add_parser('persist-ingest-file', -- cgit v1.2.3