diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-03-18 18:50:44 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-03-18 18:50:44 -0700 |
commit | e21fac21cc5a4267357a499f75f048ee5fd38ddb (patch) | |
tree | e8968a42939e6d615e5582df9e04f4d507b17560 | |
parent | cb16d18137c936a634b75bf0eb6acb43c77d9290 (diff) | |
download | sandcrawler-e21fac21cc5a4267357a499f75f048ee5fd38ddb.tar.gz sandcrawler-e21fac21cc5a4267357a499f75f048ee5fd38ddb.zip |
ingest: log every URL (from ia code side)
-rw-r--r-- | python/sandcrawler/ia.py | 1 |
1 files changed, 1 insertions, 0 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py index 0a0e0ae..25697be 100644 --- a/python/sandcrawler/ia.py +++ b/python/sandcrawler/ia.py @@ -575,6 +575,7 @@ class WaybackClient: next_url = start_url urls_seen = [start_url] for i in range(self.max_redirects): + print(" URL: {}".format(next_url), file=sys.stderr) cdx_row = self.cdx_client.lookup_best(next_url, best_mimetype=best_mimetype) #print(cdx_row, file=sys.stderr) if not cdx_row: |