aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-08-05 13:06:08 -0700
committerBryan Newbold <bnewbold@archive.org>2020-08-05 13:06:10 -0700
commitae531a3314742deb1bdd2560ffbcaa2d1f8d829b (patch)
tree7237acdce27b7c42690731aa01f9675a88067085
parent576b52831d9f17adaee9839db20b4145ba141d96 (diff)
downloadsandcrawler-ae531a3314742deb1bdd2560ffbcaa2d1f8d829b.tar.gz
sandcrawler-ae531a3314742deb1bdd2560ffbcaa2d1f8d829b.zip
spn2: skip js behavior (experiment)
Hoping this will increase crawling throughput with little-to-no impact on fidelity.
-rw-r--r--python/sandcrawler/ia.py1
1 files changed, 1 insertions, 0 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py
index 63abb45..150de53 100644
--- a/python/sandcrawler/ia.py
+++ b/python/sandcrawler/ia.py
@@ -814,6 +814,7 @@ class SavePageNowClient:
'force_get': force_get,
'skip_first_archive': 1,
'outlinks_availability': 0,
+ 'js_behavior_timeout': 0,
},
)
if resp.status_code == 429: