aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-10-26 15:17:53 -0700
committerBryan Newbold <bnewbold@archive.org>2021-10-26 15:18:04 -0700
commit730103121e72ab515979a00341c8a44e362edc71 (patch)
tree69dc6d6b133743df6e3744426ff57442625b3e18
parent8046e127c7de43f162bdc58f2a5c68c0848aef27 (diff)
downloadsandcrawler-730103121e72ab515979a00341c8a44e362edc71.tar.gz
sandcrawler-730103121e72ab515979a00341c8a44e362edc71.zip
ia helpers: enforce max_redirects count correctly
AKA, should run fetch even if max_redirects = 0; the first loop iteration is not a redirect.
-rw-r--r--python/sandcrawler/ia.py2
1 files changed, 1 insertions, 1 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py
index ad685aa..1148de2 100644
--- a/python/sandcrawler/ia.py
+++ b/python/sandcrawler/ia.py
@@ -681,7 +681,7 @@ class WaybackClient:
"""
next_url = start_url
urls_seen = [start_url]
- for i in range(self.max_redirects):
+ for i in range(self.max_redirects + 1):
print(" URL: {}".format(next_url), file=sys.stderr)
cdx_row = self.cdx_client.lookup_best(next_url,
best_mimetype=best_mimetype,