aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-11-20 11:04:31 -0800
committerBryan Newbold <bnewbold@archive.org>2020-11-20 11:04:31 -0800
commit2d338ab3649642affbedeb28470a96a6a5ba7597 (patch)
treede50eb94070b97b6c93a73746a343cbba0a5c220
parented5ef9801fc94ab20defc77ade53b585f695aa6c (diff)
downloadsandcrawler-2d338ab3649642affbedeb28470a96a6a5ba7597.tar.gz
sandcrawler-2d338ab3649642affbedeb28470a96a6a5ba7597.zip
handle more wayback error conditions
-rw-r--r--python/sandcrawler/ia.py6
1 files changed, 6 insertions, 0 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py
index 4da07af..cb4fdf3 100644
--- a/python/sandcrawler/ia.py
+++ b/python/sandcrawler/ia.py
@@ -11,6 +11,7 @@ import json
import requests
import datetime
import urllib.parse
+import urllib3.exceptions
from typing import Tuple
from collections import namedtuple
@@ -373,6 +374,11 @@ class WaybackClient:
except wayback.exception.ResourceUnavailable:
print(" Failed to fetch from warc_path:{}".format(warc_path), file=sys.stderr)
raise PetaboxError("failed to load file contents from wayback/petabox (ResourceUnavailable)")
+ except wayback.exception.InvalidResource:
+ print(" Failed to fetch from warc_path:{}".format(warc_path), file=sys.stderr)
+ raise WaybackContentError("failed to load file contents from wayback/petabox (InvalidResource)")
+ except urllib3.exceptions.ReadTimeoutError as rte:
+ raise PetaboxError("failed to load file contents from wayback/petabox (ReadTimeoutError: {})".format(rte))
except ValueError as ve:
raise PetaboxError("failed to load file contents from wayback/petabox (ValueError: {})".format(ve))
except EOFError as eofe: