diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-10-19 15:52:07 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-10-19 15:52:11 -0700 |
commit | 9c39bb6469843825efe67f5e4849cab5ea1eea9d (patch) | |
tree | b6a5ee57f843b9a54ddb9156ce3cf8adfd0f5d84 | |
parent | c500720cd09fb0e11e19025f8803957435491c40 (diff) | |
download | sandcrawler-9c39bb6469843825efe67f5e4849cab5ea1eea9d.tar.gz sandcrawler-9c39bb6469843825efe67f5e4849cab5ea1eea9d.zip |
SPN: look for non-200 CDX responses
Suspect that this has been the source of many `spn2-cdx-lookup-failure`
-rw-r--r-- | python/sandcrawler/ia.py | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py index 936ee97..0067b7e 100644 --- a/python/sandcrawler/ia.py +++ b/python/sandcrawler/ia.py @@ -977,7 +977,7 @@ class SavePageNowClient: if not cdx_row: # lookup exact try: - filter_status_code = 200 + filter_status_code = None if spn_result.terminal_url.startswith("ftp://"): filter_status_code = 226 cdx_row = wayback_client.cdx_client.fetch( |