aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/ingest.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-11-03 16:14:33 -0800
committerBryan Newbold <bnewbold@archive.org>2020-11-03 16:14:33 -0800
commita1a4e96e44bfb851003e578defd6f33008be6871 (patch)
tree7696c8450a591faecc704827ec933653c737e26b /python/sandcrawler/ingest.py
parent4c46d822463573b824e9dbb2a1acca99f58b6853 (diff)
downloadsandcrawler-a1a4e96e44bfb851003e578defd6f33008be6871.tar.gz
sandcrawler-a1a4e96e44bfb851003e578defd6f33008be6871.zip
ingest: tweak debug printing alignment
Diffstat (limited to 'python/sandcrawler/ingest.py')
-rw-r--r--python/sandcrawler/ingest.py6
1 files changed, 3 insertions, 3 deletions
diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py
index 35b37fc..a39d9ea 100644
--- a/python/sandcrawler/ingest.py
+++ b/python/sandcrawler/ingest.py
@@ -193,7 +193,7 @@ class IngestFileWorker(SandcrawlerWorker):
force_simple_get = 1
break
resource = self.spn_client.crawl_resource(url, self.wayback_client, force_simple_get=force_simple_get)
- print("[FETCH {}\t] {}\t{}".format(
+ print("[FETCH {:>6}] {} {}".format(
via,
resource.status,
resource.terminal_url or url),
@@ -331,10 +331,10 @@ class IngestFileWorker(SandcrawlerWorker):
for block in self.base_url_blocklist:
if block in base_url:
- print("[SKIP {}\t] {}".format(ingest_type, base_url), file=sys.stderr)
+ print("[SKIP {:>6}] {}".format(ingest_type, base_url), file=sys.stderr)
return dict(request=request, hit=False, status="skip-url-blocklist")
- print("[INGEST {}\t] {}".format(ingest_type, base_url), file=sys.stderr)
+ print("[INGEST {:>6}] {}".format(ingest_type, base_url), file=sys.stderr)
best_mimetype = None
if ingest_type == "pdf":