aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
Diffstat (limited to 'python')
-rw-r--r--python/sandcrawler/ia.py1
-rw-r--r--python/sandcrawler/ingest_file.py7
2 files changed, 8 insertions, 0 deletions
diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py
index d123c9f..641aa52 100644
--- a/python/sandcrawler/ia.py
+++ b/python/sandcrawler/ia.py
@@ -1057,6 +1057,7 @@ class SavePageNowClient:
job_id = resp_json["job_id"]
print(f" SPNv2 running: job_id={job_id} url={request_url}", file=sys.stderr)
+ time.sleep(0.1)
# poll until complete
final_json = None
diff --git a/python/sandcrawler/ingest_file.py b/python/sandcrawler/ingest_file.py
index 857a212..4ec37c1 100644
--- a/python/sandcrawler/ingest_file.py
+++ b/python/sandcrawler/ingest_file.py
@@ -20,6 +20,7 @@ from sandcrawler.ia import (
NoCaptureError,
PetaboxError,
ResourceResult,
+ SavePageNowBackoffError,
SavePageNowClient,
SavePageNowError,
WaybackClient,
@@ -632,6 +633,12 @@ class IngestFileWorker(SandcrawlerWorker):
result["status"] = "spn2-error"
result["error_message"] = str(e)[:1600]
return result
+ except SavePageNowBackoffError as e:
+ result["status"] = "spn2-backoff"
+ result["error_message"] = str(e)[:1600]
+ # small sleep as a slow-down
+ time.sleep(2.0)
+ return result
except PetaboxError as e:
result["status"] = "petabox-error"
result["error_message"] = str(e)[:1600]