From d47a51c927a57208bea22900b5b5c9f519140a3c Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Fri, 26 Jun 2020 09:55:37 -0700 Subject: report revisit non-200 as a WaybackError --- python/sandcrawler/ia.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py index 49f5ad4..ba9d6a8 100644 --- a/python/sandcrawler/ia.py +++ b/python/sandcrawler/ia.py @@ -389,15 +389,15 @@ class WaybackClient: assert len(revisit_dt) == 14 try: revisit_cdx = self.cdx_client.fetch(revisit_uri, revisit_dt) + body = self.fetch_petabox_body( + csize=revisit_cdx.warc_csize, + offset=revisit_cdx.warc_offset, + warc_path=revisit_cdx.warc_path, + resolve_revisit=False, + expected_status_code=revisit_cdx.status_code, + ) except KeyError as ke: raise WaybackError("Revist resolution failed: {}".format(ke)) - body = self.fetch_petabox_body( - csize=revisit_cdx.warc_csize, - offset=revisit_cdx.warc_offset, - warc_path=revisit_cdx.warc_path, - resolve_revisit=False, - expected_status_code=revisit_cdx.status_code, - ) elif status_code in (200, 226): try: body = gwb_record.open_raw_content().read() -- cgit v1.2.3