aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/sandcrawler/ia.py7
1 files changed, 6 insertions, 1 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py
index 99e92be..e31ff30 100644
--- a/python/sandcrawler/ia.py
+++ b/python/sandcrawler/ia.py
@@ -659,7 +659,12 @@ class WaybackClient:
cdx=cdx_row,
revisit_cdx=None,
)
- next_url = resource.location
+ if resource.location.startswith('/'):
+ # redirect location does not include hostname
+ domain_prefix = '/'.join(next_url.split('/')[:3])
+ next_url = domain_prefix + resource.location
+ else:
+ next_url = resource.location
else:
next_url = self.fetch_replay_redirect(
url=cdx_row.url,