diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-06-26 09:55:37 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-06-26 09:55:37 -0700 |
commit | d47a51c927a57208bea22900b5b5c9f519140a3c (patch) | |
tree | eb4e1cef8f2014857bec35331e032e0a7b28b2fc | |
parent | 19524304c40d8bbddc6252f33a592b5713d7799f (diff) | |
download | sandcrawler-d47a51c927a57208bea22900b5b5c9f519140a3c.tar.gz sandcrawler-d47a51c927a57208bea22900b5b5c9f519140a3c.zip |
report revisit non-200 as a WaybackError
-rw-r--r-- | python/sandcrawler/ia.py | 14 |
1 files changed, 7 insertions, 7 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py index 49f5ad4..ba9d6a8 100644 --- a/python/sandcrawler/ia.py +++ b/python/sandcrawler/ia.py @@ -389,15 +389,15 @@ class WaybackClient: assert len(revisit_dt) == 14 try: revisit_cdx = self.cdx_client.fetch(revisit_uri, revisit_dt) + body = self.fetch_petabox_body( + csize=revisit_cdx.warc_csize, + offset=revisit_cdx.warc_offset, + warc_path=revisit_cdx.warc_path, + resolve_revisit=False, + expected_status_code=revisit_cdx.status_code, + ) except KeyError as ke: raise WaybackError("Revist resolution failed: {}".format(ke)) - body = self.fetch_petabox_body( - csize=revisit_cdx.warc_csize, - offset=revisit_cdx.warc_offset, - warc_path=revisit_cdx.warc_path, - resolve_revisit=False, - expected_status_code=revisit_cdx.status_code, - ) elif status_code in (200, 226): try: body = gwb_record.open_raw_content().read() |