diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-11-20 11:04:31 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-11-20 11:04:31 -0800 |
commit | 2d338ab3649642affbedeb28470a96a6a5ba7597 (patch) | |
tree | de50eb94070b97b6c93a73746a343cbba0a5c220 | |
parent | ed5ef9801fc94ab20defc77ade53b585f695aa6c (diff) | |
download | sandcrawler-2d338ab3649642affbedeb28470a96a6a5ba7597.tar.gz sandcrawler-2d338ab3649642affbedeb28470a96a6a5ba7597.zip |
handle more wayback error conditions
-rw-r--r-- | python/sandcrawler/ia.py | 6 |
1 files changed, 6 insertions, 0 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py index 4da07af..cb4fdf3 100644 --- a/python/sandcrawler/ia.py +++ b/python/sandcrawler/ia.py @@ -11,6 +11,7 @@ import json import requests import datetime import urllib.parse +import urllib3.exceptions from typing import Tuple from collections import namedtuple @@ -373,6 +374,11 @@ class WaybackClient: except wayback.exception.ResourceUnavailable: print(" Failed to fetch from warc_path:{}".format(warc_path), file=sys.stderr) raise PetaboxError("failed to load file contents from wayback/petabox (ResourceUnavailable)") + except wayback.exception.InvalidResource: + print(" Failed to fetch from warc_path:{}".format(warc_path), file=sys.stderr) + raise WaybackContentError("failed to load file contents from wayback/petabox (InvalidResource)") + except urllib3.exceptions.ReadTimeoutError as rte: + raise PetaboxError("failed to load file contents from wayback/petabox (ReadTimeoutError: {})".format(rte)) except ValueError as ve: raise PetaboxError("failed to load file contents from wayback/petabox (ValueError: {})".format(ve)) except EOFError as eofe: |