diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-01-29 17:37:49 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-01-29 17:37:52 -0800 |
commit | 6a29e396da2289e6524097ef906b63358eb3cfec (patch) | |
tree | ada89e174742f1dfa899b938ca3452a87d5cc686 /python | |
parent | 41d25749b46d9ed0bfcb57de8c6cd5399ea54de7 (diff) | |
download | sandcrawler-6a29e396da2289e6524097ef906b63358eb3cfec.tar.gz sandcrawler-6a29e396da2289e6524097ef906b63358eb3cfec.zip |
sandcrawler_worker: ingest worker distinct consumer groups
I'm in the process of resetting these consumer groups, so might as well
take the opportunity to split by topic and use the new canonical naming
format.
Diffstat (limited to 'python')
-rwxr-xr-x | python/sandcrawler_worker.py | 4 |
1 files changed, 3 insertions, 1 deletions
diff --git a/python/sandcrawler_worker.py b/python/sandcrawler_worker.py index be3ced7..f13116a 100755 --- a/python/sandcrawler_worker.py +++ b/python/sandcrawler_worker.py @@ -71,8 +71,10 @@ def run_persist_grobid(args): def run_ingest_file(args): if args.bulk: + consume_group = "sandcrawler-{}-ingest-file-bulk".format(args.env) consume_topic = "sandcrawler-{}.ingest-file-requests-bulk".format(args.env) else: + consume_group = "sandcrawler-{}-ingest-file".format(args.env) consume_topic = "sandcrawler-{}.ingest-file-requests".format(args.env) produce_topic = "sandcrawler-{}.ingest-file-results".format(args.env) grobid_topic = "sandcrawler-{}.grobid-output-pg".format(args.env) @@ -96,7 +98,7 @@ def run_ingest_file(args): worker=worker, kafka_hosts=args.kafka_hosts, consume_topic=consume_topic, - group="ingest-file", + group=consume_group, batch_size=1, ) pusher.run() |