aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-03-18 18:50:44 -0700
committerBryan Newbold <bnewbold@archive.org>2020-03-18 18:50:44 -0700
commite21fac21cc5a4267357a499f75f048ee5fd38ddb (patch)
treee8968a42939e6d615e5582df9e04f4d507b17560
parentcb16d18137c936a634b75bf0eb6acb43c77d9290 (diff)
downloadsandcrawler-e21fac21cc5a4267357a499f75f048ee5fd38ddb.tar.gz
sandcrawler-e21fac21cc5a4267357a499f75f048ee5fd38ddb.zip
ingest: log every URL (from ia code side)
-rw-r--r--python/sandcrawler/ia.py1
1 files changed, 1 insertions, 0 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py
index 0a0e0ae..25697be 100644
--- a/python/sandcrawler/ia.py
+++ b/python/sandcrawler/ia.py
@@ -575,6 +575,7 @@ class WaybackClient:
next_url = start_url
urls_seen = [start_url]
for i in range(self.max_redirects):
+ print(" URL: {}".format(next_url), file=sys.stderr)
cdx_row = self.cdx_client.lookup_best(next_url, best_mimetype=best_mimetype)
#print(cdx_row, file=sys.stderr)
if not cdx_row: