aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-06-26 09:55:37 -0700
committerBryan Newbold <bnewbold@archive.org>2020-06-26 09:55:37 -0700
commitd47a51c927a57208bea22900b5b5c9f519140a3c (patch)
treeeb4e1cef8f2014857bec35331e032e0a7b28b2fc
parent19524304c40d8bbddc6252f33a592b5713d7799f (diff)
downloadsandcrawler-d47a51c927a57208bea22900b5b5c9f519140a3c.tar.gz
sandcrawler-d47a51c927a57208bea22900b5b5c9f519140a3c.zip
report revisit non-200 as a WaybackError
-rw-r--r--python/sandcrawler/ia.py14
1 files changed, 7 insertions, 7 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py
index 49f5ad4..ba9d6a8 100644
--- a/python/sandcrawler/ia.py
+++ b/python/sandcrawler/ia.py
@@ -389,15 +389,15 @@ class WaybackClient:
assert len(revisit_dt) == 14
try:
revisit_cdx = self.cdx_client.fetch(revisit_uri, revisit_dt)
+ body = self.fetch_petabox_body(
+ csize=revisit_cdx.warc_csize,
+ offset=revisit_cdx.warc_offset,
+ warc_path=revisit_cdx.warc_path,
+ resolve_revisit=False,
+ expected_status_code=revisit_cdx.status_code,
+ )
except KeyError as ke:
raise WaybackError("Revist resolution failed: {}".format(ke))
- body = self.fetch_petabox_body(
- csize=revisit_cdx.warc_csize,
- offset=revisit_cdx.warc_offset,
- warc_path=revisit_cdx.warc_path,
- resolve_revisit=False,
- expected_status_code=revisit_cdx.status_code,
- )
elif status_code in (200, 226):
try:
body = gwb_record.open_raw_content().read()