aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2019-11-14 11:03:20 -0800
committerBryan Newbold <bnewbold@archive.org>2019-11-14 11:03:20 -0800
commit7d754db1b5809d0cb4c18c4432b4347914757196 (patch)
treed3434d687f5c429657a3e77a879a5cf80e8225ec /python
parent8774b17dbb9c0be8ca44846188f77403fae3e867 (diff)
downloadsandcrawler-7d754db1b5809d0cb4c18c4432b4347914757196.tar.gz
sandcrawler-7d754db1b5809d0cb4c18c4432b4347914757196.zip
handle SPNv2 polling timeout
Diffstat (limited to 'python')
-rw-r--r--python/sandcrawler/ia.py16
1 files changed, 10 insertions, 6 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py
index 31ea84e..08b92e2 100644
--- a/python/sandcrawler/ia.py
+++ b/python/sandcrawler/ia.py
@@ -192,7 +192,7 @@ class SavePageNowClient:
def save_url_now_v2(self, url):
"""
- Returns a list of cdx objects, or raises an error on non-success.
+ Returns a list of URLs, or raises an error on non-success.
"""
if not (self.ia_access_key and self.ia_secret_key):
raise Exception("SPNv2 requires authentication (IA_ACCESS_KEY/IA_SECRET_KEY)")
@@ -210,20 +210,24 @@ class SavePageNowClient:
assert resp_json
# poll until complete
+ final_json = None
for i in range(90):
resp = self.v2_session.get("{}/status/{}".format(self.v2endpoint, resp_json['job_id']))
resp.raise_for_status()
status = resp.json()['status']
if status == 'success':
- resp = resp.json()
- if resp.get('message', '').startswith('The same snapshot had been made'):
- raise SavePageNowError("SPN2 re-snapshot withing short time window")
+ final_json = resp.json()
+ if final_json.get('message', '').startswith('The same snapshot had been made'):
+ raise SavePageNowError("SPN2 re-snapshot within short time window")
break
elif status == 'pending':
time.sleep(1.0)
else:
raise SavePageNowError("SPN2 status:{} url:{}".format(status, url))
- #print(resp)
- return resp['resources']
+ if not final_json:
+ raise SavePageNowError("SPN2 timed out (polling count exceeded)")
+
+ #print(final_json)
+ return final_json['resources']