aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-02-24 16:50:50 -0800
committerBryan Newbold <bnewbold@archive.org>2020-02-24 16:50:50 -0800
commit6a9c1fe64941b4975fa5e52fc3df58b38bf9a0f0 (patch)
treebf47238cbf964a0894ba6989d9b4aafd3c64d98c /python
parent266b0a2d5928921d3b3f992fa249b22f7a5edb16 (diff)
downloadsandcrawler-6a9c1fe64941b4975fa5e52fc3df58b38bf9a0f0.tar.gz
sandcrawler-6a9c1fe64941b4975fa5e52fc3df58b38bf9a0f0.zip
ingest: treat CDX lookup error as a wayback-error
Diffstat (limited to 'python')
-rw-r--r--python/sandcrawler/ia.py5
1 files changed, 4 insertions, 1 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py
index c3ca80f..7230ee0 100644
--- a/python/sandcrawler/ia.py
+++ b/python/sandcrawler/ia.py
@@ -381,7 +381,10 @@ class WaybackClient:
revisit_uri = revisit_uri.decode('utf-8')
revisit_dt = revisit_dt.decode('utf-8').replace('-', '').replace(':', '').replace('T', '').replace('Z', '')
assert len(revisit_dt) == 14
- revisit_cdx = self.cdx_client.fetch(revisit_uri, revisit_dt)
+ try:
+ revisit_cdx = self.cdx_client.fetch(revisit_uri, revisit_dt)
+ except KeyError as ke:
+ raise WaybackError("Revist resolution failed: {}".format(ke))
body = self.fetch_petabox_body(
csize=revisit_cdx.warc_csize,
offset=revisit_cdx.warc_offset,