diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-10-26 15:17:53 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-10-26 15:18:04 -0700 |
commit | 730103121e72ab515979a00341c8a44e362edc71 (patch) | |
tree | 69dc6d6b133743df6e3744426ff57442625b3e18 | |
parent | 8046e127c7de43f162bdc58f2a5c68c0848aef27 (diff) | |
download | sandcrawler-730103121e72ab515979a00341c8a44e362edc71.tar.gz sandcrawler-730103121e72ab515979a00341c8a44e362edc71.zip |
ia helpers: enforce max_redirects count correctly
AKA, should run fetch even if max_redirects = 0; the first loop
iteration is not a redirect.
-rw-r--r-- | python/sandcrawler/ia.py | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py index ad685aa..1148de2 100644 --- a/python/sandcrawler/ia.py +++ b/python/sandcrawler/ia.py @@ -681,7 +681,7 @@ class WaybackClient: """ next_url = start_url urls_seen = [start_url] - for i in range(self.max_redirects): + for i in range(self.max_redirects + 1): print(" URL: {}".format(next_url), file=sys.stderr) cdx_row = self.cdx_client.lookup_best(next_url, best_mimetype=best_mimetype, |