diff options
author | Bryan Newbold <bnewbold@archive.org> | 2019-10-22 21:34:40 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2019-10-22 21:35:00 -0700 |
commit | 2e3611f0e66615ae007d4e46bb5905e2220fb690 (patch) | |
tree | 9d2fa6f8d62145a5ab31f37f26b6c293a2163acd /python/sandcrawler/misc.py | |
parent | b11fe8c8f444756ae246250cbbfe44e7dc62eac3 (diff) | |
download | sandcrawler-2e3611f0e66615ae007d4e46bb5905e2220fb690.tar.gz sandcrawler-2e3611f0e66615ae007d4e46bb5905e2220fb690.zip |
much progress on file ingest path
Diffstat (limited to 'python/sandcrawler/misc.py')
-rw-r--r-- | python/sandcrawler/misc.py | 24 |
1 files changed, 24 insertions, 0 deletions
diff --git a/python/sandcrawler/misc.py b/python/sandcrawler/misc.py index 4ffc5d7..5713199 100644 --- a/python/sandcrawler/misc.py +++ b/python/sandcrawler/misc.py @@ -3,6 +3,10 @@ import base64 import magic import hashlib import datetime +import requests +from requests.adapters import HTTPAdapter +from requests.packages.urllib3.util.retry import Retry # pylint: disable=import-error + def gen_file_metadata(blob): """ @@ -131,3 +135,23 @@ def parse_cdx_datetime(dt_str): return datetime.strptime(dt_str, "%Y%m%d%H%M%S") except Exception: return None + + +def requests_retry_session(retries=10, backoff_factor=3, + status_forcelist=(500, 502, 504), session=None): + """ + From: https://www.peterbe.com/plog/best-practice-with-retries-with-requests + """ + session = session or requests.Session() + retry = Retry( + total=retries, + read=retries, + connect=retries, + backoff_factor=backoff_factor, + status_forcelist=status_forcelist, + ) + adapter = HTTPAdapter(max_retries=retry) + session.mount('http://', adapter) + session.mount('https://', adapter) + return session + |