diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-10-19 17:23:21 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-10-19 17:23:21 -0700 |
commit | 36577de5bd84fbc9311d8938b8d5642cf856b1f8 (patch) | |
tree | 6da62cd2abd264c42c9d9c4fa1080237d7c028c3 | |
parent | d407bc1149bf6ba2b20ce049660f7b9d3c1d7f29 (diff) | |
download | sandcrawler-36577de5bd84fbc9311d8938b8d5642cf856b1f8.tar.gz sandcrawler-36577de5bd84fbc9311d8938b8d5642cf856b1f8.zip |
ingest: add a cdx-error slowdown delay
-rw-r--r-- | python/sandcrawler/ingest.py | 3 |
1 files changed, 3 insertions, 0 deletions
diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py index 6f9da86..322859a 100644 --- a/python/sandcrawler/ingest.py +++ b/python/sandcrawler/ingest.py @@ -2,6 +2,7 @@ import sys import json import gzip +import time import base64 import requests from http.server import BaseHTTPRequestHandler, HTTPServer @@ -380,6 +381,8 @@ class IngestFileWorker(SandcrawlerWorker): except CdxApiError as e: result['status'] = 'cdx-error' result['error_message'] = str(e)[:1600] + # add a sleep in cdx-error path as a slow-down + time.sleep(2.0) return result except WaybackError as e: result['status'] = 'wayback-error' |