diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-10-19 15:51:32 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-10-19 15:51:32 -0700 |
commit | c500720cd09fb0e11e19025f8803957435491c40 (patch) | |
tree | ac13787524ac803150dc140e4b71cc7f63c6f5ea /python | |
parent | b672a6fe5b0e51f9d2844443bf9f7e82e1fd41b1 (diff) | |
download | sandcrawler-c500720cd09fb0e11e19025f8803957435491c40.tar.gz sandcrawler-c500720cd09fb0e11e19025f8803957435491c40.zip |
SPN: better check for partial URLs returned
Diffstat (limited to 'python')
-rw-r--r-- | python/sandcrawler/ia.py | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py index 426307a..936ee97 100644 --- a/python/sandcrawler/ia.py +++ b/python/sandcrawler/ia.py @@ -932,8 +932,8 @@ class SavePageNowClient: ) #print(spn_result, file=sys.stderr) - # detect partial URL response (aka, missing full URL) - if spn_result.terminal_url.startswith('/'): + # detect partial URL response (aka, success, but missing full URL) + if not "://" in spn_result.terminal_url or spn_result.terminal_url.startswith('/'): return ResourceResult( start_url=start_url, hit=False, |