diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-08-05 13:06:08 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-08-05 13:06:10 -0700 |
commit | ae531a3314742deb1bdd2560ffbcaa2d1f8d829b (patch) | |
tree | 7237acdce27b7c42690731aa01f9675a88067085 | |
parent | 576b52831d9f17adaee9839db20b4145ba141d96 (diff) | |
download | sandcrawler-ae531a3314742deb1bdd2560ffbcaa2d1f8d829b.tar.gz sandcrawler-ae531a3314742deb1bdd2560ffbcaa2d1f8d829b.zip |
spn2: skip js behavior (experiment)
Hoping this will increase crawling throughput with little-to-no impact
on fidelity.
-rw-r--r-- | python/sandcrawler/ia.py | 1 |
1 files changed, 1 insertions, 0 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py index 63abb45..150de53 100644 --- a/python/sandcrawler/ia.py +++ b/python/sandcrawler/ia.py @@ -814,6 +814,7 @@ class SavePageNowClient: 'force_get': force_get, 'skip_first_archive': 1, 'outlinks_availability': 0, + 'js_behavior_timeout': 0, }, ) if resp.status_code == 429: |