diff options
| author | Bryan Newbold <bnewbold@archive.org> | 2020-10-19 15:51:32 -0700 | 
|---|---|---|
| committer | Bryan Newbold <bnewbold@archive.org> | 2020-10-19 15:51:32 -0700 | 
| commit | c500720cd09fb0e11e19025f8803957435491c40 (patch) | |
| tree | ac13787524ac803150dc140e4b71cc7f63c6f5ea | |
| parent | b672a6fe5b0e51f9d2844443bf9f7e82e1fd41b1 (diff) | |
| download | sandcrawler-c500720cd09fb0e11e19025f8803957435491c40.tar.gz sandcrawler-c500720cd09fb0e11e19025f8803957435491c40.zip | |
SPN: better check for partial URLs returned
| -rw-r--r-- | python/sandcrawler/ia.py | 4 | 
1 files changed, 2 insertions, 2 deletions
| diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py index 426307a..936ee97 100644 --- a/python/sandcrawler/ia.py +++ b/python/sandcrawler/ia.py @@ -932,8 +932,8 @@ class SavePageNowClient:              )          #print(spn_result, file=sys.stderr) -        # detect partial URL response (aka, missing full URL) -        if spn_result.terminal_url.startswith('/'): +        # detect partial URL response (aka, success, but missing full URL) +        if not "://" in spn_result.terminal_url or spn_result.terminal_url.startswith('/'):              return ResourceResult(                  start_url=start_url,                  hit=False, | 
