diff options
| -rw-r--r-- | python/sandcrawler/ia.py | 5 | 
1 files changed, 4 insertions, 1 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py index 52ff4d5..39227ca 100644 --- a/python/sandcrawler/ia.py +++ b/python/sandcrawler/ia.py @@ -68,7 +68,10 @@ class CdxApiClient:              sha1hex=b32_hex(cdx[5]),          )          if follow_redirects and cdx['http_status'] in (301, 302): -            resp = requests.get(self.wayback_endpoint + cdx['datetime'] + "id_/" + cdx['url']) +            try: +                resp = requests.get(self.wayback_endpoint + cdx['datetime'] + "id_/" + cdx['url']) +            except requests.exceptions.TooManyRedirects: +                raise CdxApiError("redirect loop (wayback fetch)")              next_url = '/'.join(resp.url.split('/')[5:])              if next_url == url:                  raise CdxApiError("redirect loop (by url)")  | 
