From 8993f5208a811b9f79013789d4e5150b7366421f Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 8 Feb 2022 17:51:55 -0800 Subject: sandcrawler_worker: add --skip-spn flag --- python/sandcrawler_worker.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/python/sandcrawler_worker.py b/python/sandcrawler_worker.py index 482dc33..dd7b07f 100755 --- a/python/sandcrawler_worker.py +++ b/python/sandcrawler_worker.py @@ -278,8 +278,8 @@ def run_ingest_file(args): pdftext_sink=pdftext_sink, xmldoc_sink=xmldoc_sink, htmlteixml_sink=htmlteixml_sink, - # don't SPNv2 for --bulk backfill - try_spn2=not args.bulk, + # don't SPNv2 for --bulk or --skip-spn + try_spn2=not (args.bulk or args.skip_spn), spn_cdx_retry_sec=spn_cdx_retry_sec, ) pusher = KafkaJsonPusher( @@ -447,6 +447,11 @@ def main(): action="store_true", help="consume from bulk kafka topic (eg, for ingest backfill)", ) + sub_ingest_file.add_argument( + "--skip-spn", + action="store_true", + help="don't do SPN lookups", + ) sub_ingest_file.add_argument( "--priority", action="store_true", -- cgit v1.2.3