aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-10-19 15:52:07 -0700
committerBryan Newbold <bnewbold@archive.org>2020-10-19 15:52:11 -0700
commit9c39bb6469843825efe67f5e4849cab5ea1eea9d (patch)
treeb6a5ee57f843b9a54ddb9156ce3cf8adfd0f5d84
parentc500720cd09fb0e11e19025f8803957435491c40 (diff)
downloadsandcrawler-9c39bb6469843825efe67f5e4849cab5ea1eea9d.tar.gz
sandcrawler-9c39bb6469843825efe67f5e4849cab5ea1eea9d.zip
SPN: look for non-200 CDX responses
Suspect that this has been the source of many `spn2-cdx-lookup-failure`
-rw-r--r--python/sandcrawler/ia.py2
1 files changed, 1 insertions, 1 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py
index 936ee97..0067b7e 100644
--- a/python/sandcrawler/ia.py
+++ b/python/sandcrawler/ia.py
@@ -977,7 +977,7 @@ class SavePageNowClient:
if not cdx_row:
# lookup exact
try:
- filter_status_code = 200
+ filter_status_code = None
if spn_result.terminal_url.startswith("ftp://"):
filter_status_code = 226
cdx_row = wayback_client.cdx_client.fetch(