diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-09-30 17:17:30 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-09-30 17:17:30 -0700 |
commit | 8e26ab264190b998e9035f0883f00340ca220822 (patch) | |
tree | 89ad479146b482e798b0adfd533177926345dc3a /python/sandcrawler_worker.py | |
parent | f6125848f627ae9bfd3a36d807d2349e1c66bfe3 (diff) | |
download | sandcrawler-8e26ab264190b998e9035f0883f00340ca220822.tar.gz sandcrawler-8e26ab264190b998e9035f0883f00340ca220822.zip |
tune SPN CDX retry/wait depending on mode (priority vs daily)
Diffstat (limited to 'python/sandcrawler_worker.py')
-rwxr-xr-x | python/sandcrawler_worker.py | 4 |
1 files changed, 4 insertions, 0 deletions
diff --git a/python/sandcrawler_worker.py b/python/sandcrawler_worker.py index bd4ff67..8e275cf 100755 --- a/python/sandcrawler_worker.py +++ b/python/sandcrawler_worker.py @@ -201,13 +201,16 @@ def run_persist_pdftrio(args): pusher.run() def run_ingest_file(args): + spn_cdx_retry_sec = 9.0 if args.bulk: consume_group = "sandcrawler-{}-ingest-file-bulk".format(args.env) consume_topic = "sandcrawler-{}.ingest-file-requests-bulk".format(args.env) elif args.priority: + spn_cdx_retry_sec = 45.0 consume_group = "sandcrawler-{}-ingest-file-priority".format(args.env) consume_topic = "sandcrawler-{}.ingest-file-requests-priority".format(args.env) else: + spn_cdx_retry_sec = 1.0 consume_group = "sandcrawler-{}-ingest-file".format(args.env) consume_topic = "sandcrawler-{}.ingest-file-requests-daily".format(args.env) produce_topic = "sandcrawler-{}.ingest-file-results".format(args.env) @@ -253,6 +256,7 @@ def run_ingest_file(args): htmlteixml_sink=htmlteixml_sink, # don't SPNv2 for --bulk backfill try_spn2=not args.bulk, + spn_cdx_retry_sec=spn_cdx_retry_sec, ) pusher = KafkaJsonPusher( worker=worker, |