aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/sandcrawler/ia.py6
1 files changed, 4 insertions, 2 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py
index 2a334cc..e71f1e8 100644
--- a/python/sandcrawler/ia.py
+++ b/python/sandcrawler/ia.py
@@ -341,9 +341,11 @@ class WaybackClient:
raise WaybackError( "found revisit record, but won't resolve (loop?)")
revisit_uri, revisit_dt = gwb_record.refers_to
# convert revisit_dt
- assert len(revisit_dt) == 19 # len("2018-07-24T11:56:49")
+ # len("2018-07-24T11:56:49"), or with "Z"
+ assert len(revisit_dt) in (19, 20)
revisit_uri = revisit_uri.decode('utf-8')
- revisit_dt = revisit_dt.decode('utf-8').replace('-', '').replace(':', '').replace('T', '')
+ revisit_dt = revisit_dt.decode('utf-8').replace('-', '').replace(':', '').replace('T', '').replace('Z', '')
+ assert len(revisit_dt) == 14
revisit_cdx = self.cdx_client.fetch(revisit_uri, revisit_dt)
body = self.fetch_petabox_body(
csize=revisit_cdx.warc_csize,