diff options
-rwxr-xr-x | python/sandcrawler_worker.py | 9 |
1 files changed, 7 insertions, 2 deletions
diff --git a/python/sandcrawler_worker.py b/python/sandcrawler_worker.py index 482dc33..dd7b07f 100755 --- a/python/sandcrawler_worker.py +++ b/python/sandcrawler_worker.py @@ -278,8 +278,8 @@ def run_ingest_file(args): pdftext_sink=pdftext_sink, xmldoc_sink=xmldoc_sink, htmlteixml_sink=htmlteixml_sink, - # don't SPNv2 for --bulk backfill - try_spn2=not args.bulk, + # don't SPNv2 for --bulk or --skip-spn + try_spn2=not (args.bulk or args.skip_spn), spn_cdx_retry_sec=spn_cdx_retry_sec, ) pusher = KafkaJsonPusher( @@ -448,6 +448,11 @@ def main(): help="consume from bulk kafka topic (eg, for ingest backfill)", ) sub_ingest_file.add_argument( + "--skip-spn", + action="store_true", + help="don't do SPN lookups", + ) + sub_ingest_file.add_argument( "--priority", action="store_true", help="consume from priority kafka topic (eg, for SPN requests)", |