diff options
author | Bryan Newbold <bnewbold@archive.org> | 2022-07-12 15:03:29 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2022-07-12 15:03:29 -0700 |
commit | c15432c0ce52c48efabcd7e3221a5d625ef3e9d0 (patch) | |
tree | 948d8e72a7235d07a5ea5d778e8ee388003418e6 /python/sandcrawler/fileset_strategies.py | |
parent | 8f85ab294eae50e31efa9e31bb0bca1bca76cf8b (diff) | |
download | sandcrawler-bnewbold-refactor-loggging.tar.gz sandcrawler-bnewbold-refactor-loggging.zip |
WIP: refactor logging calls in ingest pipelinesbnewbold-refactor-loggging
Diffstat (limited to 'python/sandcrawler/fileset_strategies.py')
-rw-r--r-- | python/sandcrawler/fileset_strategies.py | 21 |
1 files changed, 10 insertions, 11 deletions
diff --git a/python/sandcrawler/fileset_strategies.py b/python/sandcrawler/fileset_strategies.py index 1d84ce5..d49a5ff 100644 --- a/python/sandcrawler/fileset_strategies.py +++ b/python/sandcrawler/fileset_strategies.py @@ -1,3 +1,4 @@ +import logging import os import shutil import sys @@ -192,9 +193,8 @@ class ArchiveorgFilesetStrategy(FilesetIngestStrategy): ): # these 'tab-separated-values' from dataverse are just noise, don't log them if m.mimetype != "text/tab-separated-values": - print( - f" WARN: mimetype mismatch: expected {m.mimetype}, found {file_meta['mimetype']}", - file=sys.stderr, + logging.warn( + f"mimetype mismatch expected={m.mimetype} found={file_meta['mimetype']}" ) m.mimetype = file_meta["mimetype"] else: @@ -314,14 +314,13 @@ class WebFilesetStrategy(FilesetIngestStrategy): fetch_url, self.wayback_client, force_simple_get=True ) - print( - "[FETCH {:>6}] {} {}".format( - via, - (resource and resource.status), - (resource and resource.terminal_url) or fetch_url, - ), - file=sys.stderr, - ) + if resource: + print( + f"fetch {via=} {fetch_url=} {resource.status=} {resource.terminal_url=}", + file=sys.stderr, + ) + else: + print(f"fetch {via=} {fetch_url=} status=", file=sys.stderr) m.terminal_url = resource.terminal_url m.terminal_dt = resource.terminal_dt |