diff options
Diffstat (limited to 'python/sandcrawler/fileset_strategies.py')
-rw-r--r-- | python/sandcrawler/fileset_strategies.py | 21 |
1 files changed, 10 insertions, 11 deletions
diff --git a/python/sandcrawler/fileset_strategies.py b/python/sandcrawler/fileset_strategies.py index 1d84ce5..d49a5ff 100644 --- a/python/sandcrawler/fileset_strategies.py +++ b/python/sandcrawler/fileset_strategies.py @@ -1,3 +1,4 @@ +import logging import os import shutil import sys @@ -192,9 +193,8 @@ class ArchiveorgFilesetStrategy(FilesetIngestStrategy): ): # these 'tab-separated-values' from dataverse are just noise, don't log them if m.mimetype != "text/tab-separated-values": - print( - f" WARN: mimetype mismatch: expected {m.mimetype}, found {file_meta['mimetype']}", - file=sys.stderr, + logging.warn( + f"mimetype mismatch expected={m.mimetype} found={file_meta['mimetype']}" ) m.mimetype = file_meta["mimetype"] else: @@ -314,14 +314,13 @@ class WebFilesetStrategy(FilesetIngestStrategy): fetch_url, self.wayback_client, force_simple_get=True ) - print( - "[FETCH {:>6}] {} {}".format( - via, - (resource and resource.status), - (resource and resource.terminal_url) or fetch_url, - ), - file=sys.stderr, - ) + if resource: + print( + f"fetch {via=} {fetch_url=} {resource.status=} {resource.terminal_url=}", + file=sys.stderr, + ) + else: + print(f"fetch {via=} {fetch_url=} status=", file=sys.stderr) m.terminal_url = resource.terminal_url m.terminal_dt = resource.terminal_dt |