aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-10-19 17:23:21 -0700
committerBryan Newbold <bnewbold@archive.org>2020-10-19 17:23:21 -0700
commit36577de5bd84fbc9311d8938b8d5642cf856b1f8 (patch)
tree6da62cd2abd264c42c9d9c4fa1080237d7c028c3
parentd407bc1149bf6ba2b20ce049660f7b9d3c1d7f29 (diff)
downloadsandcrawler-36577de5bd84fbc9311d8938b8d5642cf856b1f8.tar.gz
sandcrawler-36577de5bd84fbc9311d8938b8d5642cf856b1f8.zip
ingest: add a cdx-error slowdown delay
-rw-r--r--python/sandcrawler/ingest.py3
1 files changed, 3 insertions, 0 deletions
diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py
index 6f9da86..322859a 100644
--- a/python/sandcrawler/ingest.py
+++ b/python/sandcrawler/ingest.py
@@ -2,6 +2,7 @@
import sys
import json
import gzip
+import time
import base64
import requests
from http.server import BaseHTTPRequestHandler, HTTPServer
@@ -380,6 +381,8 @@ class IngestFileWorker(SandcrawlerWorker):
except CdxApiError as e:
result['status'] = 'cdx-error'
result['error_message'] = str(e)[:1600]
+ # add a sleep in cdx-error path as a slow-down
+ time.sleep(2.0)
return result
except WaybackError as e:
result['status'] = 'wayback-error'