aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/ia.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-09-30 17:17:30 -0700
committerBryan Newbold <bnewbold@archive.org>2021-09-30 17:17:30 -0700
commit8e26ab264190b998e9035f0883f00340ca220822 (patch)
tree89ad479146b482e798b0adfd533177926345dc3a /python/sandcrawler/ia.py
parentf6125848f627ae9bfd3a36d807d2349e1c66bfe3 (diff)
downloadsandcrawler-8e26ab264190b998e9035f0883f00340ca220822.tar.gz
sandcrawler-8e26ab264190b998e9035f0883f00340ca220822.zip
tune SPN CDX retry/wait depending on mode (priority vs daily)
Diffstat (limited to 'python/sandcrawler/ia.py')
-rw-r--r--python/sandcrawler/ia.py6
1 files changed, 4 insertions, 2 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py
index 68b3466..a5d19cd 100644
--- a/python/sandcrawler/ia.py
+++ b/python/sandcrawler/ia.py
@@ -805,6 +805,8 @@ class SavePageNowClient:
self.poll_count = 60
self.poll_seconds = 3.0
+ self.spn_cdx_retry_sec = kwargs.get('spn_cdx_retry_sec', 9.0)
+
def save_url_now_v2(self, request_url, force_simple_get=0, capture_outlinks=0):
"""
Returns a "SavePageNowResult" (namedtuple) if SPN request was processed
@@ -1023,7 +1025,7 @@ class SavePageNowClient:
url=spn_result.terminal_url,
datetime=spn_result.terminal_dt,
filter_status_code=filter_status_code,
- retry_sleep=9.0,
+ retry_sleep=self.spn_cdx_retry_sec,
)
# sometimes there are fuzzy http/https self-redirects with the
# same SURT; try to work around that
@@ -1032,7 +1034,7 @@ class SavePageNowClient:
url=spn_result.terminal_url,
datetime=spn_result.terminal_dt,
filter_status_code=200,
- retry_sleep=9.0,
+ retry_sleep=self.spn_cdx_retry_sec,
)
except KeyError as ke:
print(" CDX KeyError: {}".format(ke), file=sys.stderr)