diff options
author | Bryan Newbold <bnewbold@archive.org> | 2022-02-08 17:51:55 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2022-02-08 17:51:55 -0800 |
commit | 8993f5208a811b9f79013789d4e5150b7366421f (patch) | |
tree | 69919ef00f00e4930ad2c4c5fcafa7b975acce5a | |
parent | dac74c04ac064dcfc8e28ab9fb659e8a09bdcba3 (diff) | |
download | sandcrawler-8993f5208a811b9f79013789d4e5150b7366421f.tar.gz sandcrawler-8993f5208a811b9f79013789d4e5150b7366421f.zip |
sandcrawler_worker: add --skip-spn flag
-rwxr-xr-x | python/sandcrawler_worker.py | 9 |
1 files changed, 7 insertions, 2 deletions
diff --git a/python/sandcrawler_worker.py b/python/sandcrawler_worker.py index 482dc33..dd7b07f 100755 --- a/python/sandcrawler_worker.py +++ b/python/sandcrawler_worker.py @@ -278,8 +278,8 @@ def run_ingest_file(args): pdftext_sink=pdftext_sink, xmldoc_sink=xmldoc_sink, htmlteixml_sink=htmlteixml_sink, - # don't SPNv2 for --bulk backfill - try_spn2=not args.bulk, + # don't SPNv2 for --bulk or --skip-spn + try_spn2=not (args.bulk or args.skip_spn), spn_cdx_retry_sec=spn_cdx_retry_sec, ) pusher = KafkaJsonPusher( @@ -448,6 +448,11 @@ def main(): help="consume from bulk kafka topic (eg, for ingest backfill)", ) sub_ingest_file.add_argument( + "--skip-spn", + action="store_true", + help="don't do SPN lookups", + ) + sub_ingest_file.add_argument( "--priority", action="store_true", help="consume from priority kafka topic (eg, for SPN requests)", |