From e21fac21cc5a4267357a499f75f048ee5fd38ddb Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 18 Mar 2020 18:50:44 -0700 Subject: ingest: log every URL (from ia code side) --- python/sandcrawler/ia.py | 1 + 1 file changed, 1 insertion(+) (limited to 'python') diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py index 0a0e0ae..25697be 100644 --- a/python/sandcrawler/ia.py +++ b/python/sandcrawler/ia.py @@ -575,6 +575,7 @@ class WaybackClient: next_url = start_url urls_seen = [start_url] for i in range(self.max_redirects): + print(" URL: {}".format(next_url), file=sys.stderr) cdx_row = self.cdx_client.lookup_best(next_url, best_mimetype=best_mimetype) #print(cdx_row, file=sys.stderr) if not cdx_row: -- cgit v1.2.3