From c500720cd09fb0e11e19025f8803957435491c40 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Mon, 19 Oct 2020 15:51:32 -0700 Subject: SPN: better check for partial URLs returned --- python/sandcrawler/ia.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'python/sandcrawler/ia.py') diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py index 426307a..936ee97 100644 --- a/python/sandcrawler/ia.py +++ b/python/sandcrawler/ia.py @@ -932,8 +932,8 @@ class SavePageNowClient: ) #print(spn_result, file=sys.stderr) - # detect partial URL response (aka, missing full URL) - if spn_result.terminal_url.startswith('/'): + # detect partial URL response (aka, success, but missing full URL) + if not "://" in spn_result.terminal_url or spn_result.terminal_url.startswith('/'): return ResourceResult( start_url=start_url, hit=False, -- cgit v1.2.3