diff options
-rw-r--r-- | python/sandcrawler/ia.py | 1 | ||||
-rw-r--r-- | python/sandcrawler/ingest_file.py | 7 |
2 files changed, 8 insertions, 0 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py index d123c9f..641aa52 100644 --- a/python/sandcrawler/ia.py +++ b/python/sandcrawler/ia.py @@ -1057,6 +1057,7 @@ class SavePageNowClient: job_id = resp_json["job_id"] print(f" SPNv2 running: job_id={job_id} url={request_url}", file=sys.stderr) + time.sleep(0.1) # poll until complete final_json = None diff --git a/python/sandcrawler/ingest_file.py b/python/sandcrawler/ingest_file.py index 857a212..4ec37c1 100644 --- a/python/sandcrawler/ingest_file.py +++ b/python/sandcrawler/ingest_file.py @@ -20,6 +20,7 @@ from sandcrawler.ia import ( NoCaptureError, PetaboxError, ResourceResult, + SavePageNowBackoffError, SavePageNowClient, SavePageNowError, WaybackClient, @@ -632,6 +633,12 @@ class IngestFileWorker(SandcrawlerWorker): result["status"] = "spn2-error" result["error_message"] = str(e)[:1600] return result + except SavePageNowBackoffError as e: + result["status"] = "spn2-backoff" + result["error_message"] = str(e)[:1600] + # small sleep as a slow-down + time.sleep(2.0) + return result except PetaboxError as e: result["status"] = "petabox-error" result["error_message"] = str(e)[:1600] |