aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/sandcrawler/ia.py5
1 files changed, 4 insertions, 1 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py
index 7230ee0..07e46c3 100644
--- a/python/sandcrawler/ia.py
+++ b/python/sandcrawler/ia.py
@@ -373,8 +373,11 @@ class WaybackClient:
revisit_cdx = None
if gwb_record.is_revisit():
if not resolve_revisit:
- raise WaybackError( "found revisit record, but won't resolve (loop?)")
+ raise WaybackError("found revisit record, but won't resolve (loop?)")
revisit_uri, revisit_dt = gwb_record.refers_to
+ if not (revisit_uri and revisit_dt):
+ raise WaybackError("revisit record missing URI and/or DT: warc:{} offset:{}".format(
+ warc_path, warc_offset))
# convert revisit_dt
# len("2018-07-24T11:56:49"), or with "Z"
assert len(revisit_dt) in (19, 20)