aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2022-02-08 17:51:55 -0800
committerBryan Newbold <bnewbold@archive.org>2022-02-08 17:51:55 -0800
commit8993f5208a811b9f79013789d4e5150b7366421f (patch)
tree69919ef00f00e4930ad2c4c5fcafa7b975acce5a
parentdac74c04ac064dcfc8e28ab9fb659e8a09bdcba3 (diff)
downloadsandcrawler-8993f5208a811b9f79013789d4e5150b7366421f.tar.gz
sandcrawler-8993f5208a811b9f79013789d4e5150b7366421f.zip
sandcrawler_worker: add --skip-spn flag
-rwxr-xr-xpython/sandcrawler_worker.py9
1 files changed, 7 insertions, 2 deletions
diff --git a/python/sandcrawler_worker.py b/python/sandcrawler_worker.py
index 482dc33..dd7b07f 100755
--- a/python/sandcrawler_worker.py
+++ b/python/sandcrawler_worker.py
@@ -278,8 +278,8 @@ def run_ingest_file(args):
pdftext_sink=pdftext_sink,
xmldoc_sink=xmldoc_sink,
htmlteixml_sink=htmlteixml_sink,
- # don't SPNv2 for --bulk backfill
- try_spn2=not args.bulk,
+ # don't SPNv2 for --bulk or --skip-spn
+ try_spn2=not (args.bulk or args.skip_spn),
spn_cdx_retry_sec=spn_cdx_retry_sec,
)
pusher = KafkaJsonPusher(
@@ -448,6 +448,11 @@ def main():
help="consume from bulk kafka topic (eg, for ingest backfill)",
)
sub_ingest_file.add_argument(
+ "--skip-spn",
+ action="store_true",
+ help="don't do SPN lookups",
+ )
+ sub_ingest_file.add_argument(
"--priority",
action="store_true",
help="consume from priority kafka topic (eg, for SPN requests)",