diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-09-30 15:24:22 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-09-30 15:24:24 -0700 |
commit | e4800fc4d0d0467d0e34a4059b941d001916e232 (patch) | |
tree | e789c6bfe7dfa95bc497b0329c9f9939864b1b71 /python | |
parent | 1c43b0d2a663815c7cb43c918933588f5184c714 (diff) | |
download | sandcrawler-e4800fc4d0d0467d0e34a4059b941d001916e232.tar.gz sandcrawler-e4800fc4d0d0467d0e34a4059b941d001916e232.zip |
new 'daily' and 'priority' ingest request topics
The old ingest request queue was always getting lopsided, suspect
because it was scaled up (additional partitions) at some point in the
past, hoping new topics will fix this.
New '-priority' queue is like '-bulk', but for smaller-volume SPN-like
requests. Eg, interactive mode.
Diffstat (limited to 'python')
-rwxr-xr-x | python/sandcrawler_worker.py | 8 |
1 files changed, 7 insertions, 1 deletions
diff --git a/python/sandcrawler_worker.py b/python/sandcrawler_worker.py index 6be8bac..bd4ff67 100755 --- a/python/sandcrawler_worker.py +++ b/python/sandcrawler_worker.py @@ -204,9 +204,12 @@ def run_ingest_file(args): if args.bulk: consume_group = "sandcrawler-{}-ingest-file-bulk".format(args.env) consume_topic = "sandcrawler-{}.ingest-file-requests-bulk".format(args.env) + elif args.priority: + consume_group = "sandcrawler-{}-ingest-file-priority".format(args.env) + consume_topic = "sandcrawler-{}.ingest-file-requests-priority".format(args.env) else: consume_group = "sandcrawler-{}-ingest-file".format(args.env) - consume_topic = "sandcrawler-{}.ingest-file-requests".format(args.env) + consume_topic = "sandcrawler-{}.ingest-file-requests-daily".format(args.env) produce_topic = "sandcrawler-{}.ingest-file-results".format(args.env) grobid_topic = "sandcrawler-{}.grobid-output-pg".format(args.env) pdftext_topic = "sandcrawler-{}.pdf-text".format(args.env) @@ -353,6 +356,9 @@ def main(): sub_ingest_file.add_argument('--bulk', action='store_true', help="consume from bulk kafka topic (eg, for ingest backfill)") + sub_ingest_file.add_argument('--priority', + action='store_true', + help="consume from priority kafka topic (eg, for SPN requests)") sub_ingest_file.set_defaults(func=run_ingest_file) sub_persist_ingest_file = subparsers.add_parser('persist-ingest-file', |