aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/fileset_strategies.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2022-07-12 15:03:29 -0700
committerBryan Newbold <bnewbold@archive.org>2022-07-12 15:03:29 -0700
commitc15432c0ce52c48efabcd7e3221a5d625ef3e9d0 (patch)
tree948d8e72a7235d07a5ea5d778e8ee388003418e6 /python/sandcrawler/fileset_strategies.py
parent8f85ab294eae50e31efa9e31bb0bca1bca76cf8b (diff)
downloadsandcrawler-bnewbold-refactor-loggging.tar.gz
sandcrawler-bnewbold-refactor-loggging.zip
WIP: refactor logging calls in ingest pipelinesbnewbold-refactor-loggging
Diffstat (limited to 'python/sandcrawler/fileset_strategies.py')
-rw-r--r--python/sandcrawler/fileset_strategies.py21
1 files changed, 10 insertions, 11 deletions
diff --git a/python/sandcrawler/fileset_strategies.py b/python/sandcrawler/fileset_strategies.py
index 1d84ce5..d49a5ff 100644
--- a/python/sandcrawler/fileset_strategies.py
+++ b/python/sandcrawler/fileset_strategies.py
@@ -1,3 +1,4 @@
+import logging
import os
import shutil
import sys
@@ -192,9 +193,8 @@ class ArchiveorgFilesetStrategy(FilesetIngestStrategy):
):
# these 'tab-separated-values' from dataverse are just noise, don't log them
if m.mimetype != "text/tab-separated-values":
- print(
- f" WARN: mimetype mismatch: expected {m.mimetype}, found {file_meta['mimetype']}",
- file=sys.stderr,
+ logging.warn(
+ f"mimetype mismatch expected={m.mimetype} found={file_meta['mimetype']}"
)
m.mimetype = file_meta["mimetype"]
else:
@@ -314,14 +314,13 @@ class WebFilesetStrategy(FilesetIngestStrategy):
fetch_url, self.wayback_client, force_simple_get=True
)
- print(
- "[FETCH {:>6}] {} {}".format(
- via,
- (resource and resource.status),
- (resource and resource.terminal_url) or fetch_url,
- ),
- file=sys.stderr,
- )
+ if resource:
+ print(
+ f"fetch {via=} {fetch_url=} {resource.status=} {resource.terminal_url=}",
+ file=sys.stderr,
+ )
+ else:
+ print(f"fetch {via=} {fetch_url=} status=", file=sys.stderr)
m.terminal_url = resource.terminal_url
m.terminal_dt = resource.terminal_dt