diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-11-03 16:14:33 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-11-03 16:14:33 -0800 |
commit | a1a4e96e44bfb851003e578defd6f33008be6871 (patch) | |
tree | 7696c8450a591faecc704827ec933653c737e26b /python/sandcrawler/ingest.py | |
parent | 4c46d822463573b824e9dbb2a1acca99f58b6853 (diff) | |
download | sandcrawler-a1a4e96e44bfb851003e578defd6f33008be6871.tar.gz sandcrawler-a1a4e96e44bfb851003e578defd6f33008be6871.zip |
ingest: tweak debug printing alignment
Diffstat (limited to 'python/sandcrawler/ingest.py')
-rw-r--r-- | python/sandcrawler/ingest.py | 6 |
1 files changed, 3 insertions, 3 deletions
diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py index 35b37fc..a39d9ea 100644 --- a/python/sandcrawler/ingest.py +++ b/python/sandcrawler/ingest.py @@ -193,7 +193,7 @@ class IngestFileWorker(SandcrawlerWorker): force_simple_get = 1 break resource = self.spn_client.crawl_resource(url, self.wayback_client, force_simple_get=force_simple_get) - print("[FETCH {}\t] {}\t{}".format( + print("[FETCH {:>6}] {} {}".format( via, resource.status, resource.terminal_url or url), @@ -331,10 +331,10 @@ class IngestFileWorker(SandcrawlerWorker): for block in self.base_url_blocklist: if block in base_url: - print("[SKIP {}\t] {}".format(ingest_type, base_url), file=sys.stderr) + print("[SKIP {:>6}] {}".format(ingest_type, base_url), file=sys.stderr) return dict(request=request, hit=False, status="skip-url-blocklist") - print("[INGEST {}\t] {}".format(ingest_type, base_url), file=sys.stderr) + print("[INGEST {:>6}] {}".format(ingest_type, base_url), file=sys.stderr) best_mimetype = None if ingest_type == "pdf": |