aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler_worker.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-09-30 15:24:22 -0700
committerBryan Newbold <bnewbold@archive.org>2021-09-30 15:24:24 -0700
commite4800fc4d0d0467d0e34a4059b941d001916e232 (patch)
treee789c6bfe7dfa95bc497b0329c9f9939864b1b71 /python/sandcrawler_worker.py
parent1c43b0d2a663815c7cb43c918933588f5184c714 (diff)
downloadsandcrawler-e4800fc4d0d0467d0e34a4059b941d001916e232.tar.gz
sandcrawler-e4800fc4d0d0467d0e34a4059b941d001916e232.zip
new 'daily' and 'priority' ingest request topics
The old ingest request queue was always getting lopsided, suspect because it was scaled up (additional partitions) at some point in the past, hoping new topics will fix this. New '-priority' queue is like '-bulk', but for smaller-volume SPN-like requests. Eg, interactive mode.
Diffstat (limited to 'python/sandcrawler_worker.py')
-rwxr-xr-xpython/sandcrawler_worker.py8
1 files changed, 7 insertions, 1 deletions
diff --git a/python/sandcrawler_worker.py b/python/sandcrawler_worker.py
index 6be8bac..bd4ff67 100755
--- a/python/sandcrawler_worker.py
+++ b/python/sandcrawler_worker.py
@@ -204,9 +204,12 @@ def run_ingest_file(args):
if args.bulk:
consume_group = "sandcrawler-{}-ingest-file-bulk".format(args.env)
consume_topic = "sandcrawler-{}.ingest-file-requests-bulk".format(args.env)
+ elif args.priority:
+ consume_group = "sandcrawler-{}-ingest-file-priority".format(args.env)
+ consume_topic = "sandcrawler-{}.ingest-file-requests-priority".format(args.env)
else:
consume_group = "sandcrawler-{}-ingest-file".format(args.env)
- consume_topic = "sandcrawler-{}.ingest-file-requests".format(args.env)
+ consume_topic = "sandcrawler-{}.ingest-file-requests-daily".format(args.env)
produce_topic = "sandcrawler-{}.ingest-file-results".format(args.env)
grobid_topic = "sandcrawler-{}.grobid-output-pg".format(args.env)
pdftext_topic = "sandcrawler-{}.pdf-text".format(args.env)
@@ -353,6 +356,9 @@ def main():
sub_ingest_file.add_argument('--bulk',
action='store_true',
help="consume from bulk kafka topic (eg, for ingest backfill)")
+ sub_ingest_file.add_argument('--priority',
+ action='store_true',
+ help="consume from priority kafka topic (eg, for SPN requests)")
sub_ingest_file.set_defaults(func=run_ingest_file)
sub_persist_ingest_file = subparsers.add_parser('persist-ingest-file',