aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-10-19 15:51:32 -0700
committerBryan Newbold <bnewbold@archive.org>2020-10-19 15:51:32 -0700
commitc500720cd09fb0e11e19025f8803957435491c40 (patch)
treeac13787524ac803150dc140e4b71cc7f63c6f5ea
parentb672a6fe5b0e51f9d2844443bf9f7e82e1fd41b1 (diff)
downloadsandcrawler-c500720cd09fb0e11e19025f8803957435491c40.tar.gz
sandcrawler-c500720cd09fb0e11e19025f8803957435491c40.zip
SPN: better check for partial URLs returned
-rw-r--r--python/sandcrawler/ia.py4
1 files changed, 2 insertions, 2 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py
index 426307a..936ee97 100644
--- a/python/sandcrawler/ia.py
+++ b/python/sandcrawler/ia.py
@@ -932,8 +932,8 @@ class SavePageNowClient:
)
#print(spn_result, file=sys.stderr)
- # detect partial URL response (aka, missing full URL)
- if spn_result.terminal_url.startswith('/'):
+ # detect partial URL response (aka, success, but missing full URL)
+ if not "://" in spn_result.terminal_url or spn_result.terminal_url.startswith('/'):
return ResourceResult(
start_url=start_url,
hit=False,