diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-03-06 11:39:36 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-03-06 11:39:36 -0800 |
commit | 5dc322c93eb3b92324c4f947697d2a2c69749040 (patch) | |
tree | 5b70e74cd5436b24378ac1aec4854b2fc8161c80 /python/fatcat_tools/harvest/harvest_common.py | |
parent | 10af3b5ab23b8df76b08ef1173f7547db3df4125 (diff) | |
download | fatcat-5dc322c93eb3b92324c4f947697d2a2c69749040.tar.gz fatcat-5dc322c93eb3b92324c4f947697d2a2c69749040.zip |
retry/backoff for Crossref harvester
Diffstat (limited to 'python/fatcat_tools/harvest/harvest_common.py')
-rw-r--r-- | python/fatcat_tools/harvest/harvest_common.py | 22 |
1 files changed, 22 insertions, 0 deletions
diff --git a/python/fatcat_tools/harvest/harvest_common.py b/python/fatcat_tools/harvest/harvest_common.py index f4d74be2..11fd5fe8 100644 --- a/python/fatcat_tools/harvest/harvest_common.py +++ b/python/fatcat_tools/harvest/harvest_common.py @@ -3,10 +3,32 @@ import sys import json import time import datetime +import requests +from requests.adapters import HTTPAdapter +from requests.packages.urllib3.util.retry import Retry + # Used for parsing ISO date format (YYYY-MM-DD) DATE_FMT = "%Y-%m-%d" +def requests_retry_session(retries=10, backoff_factor=3, + status_forcelist=(500, 502, 504), session=None): + """ + From: https://www.peterbe.com/plog/best-practice-with-retries-with-requests + """ + session = session or requests.Session() + retry = Retry( + total=retries, + read=retries, + connect=retries, + backoff_factor=backoff_factor, + status_forcelist=status_forcelist, + ) + adapter = HTTPAdapter(max_retries=retry) + session.mount('http://', adapter) + session.mount('https://', adapter) + return session + class HarvestState: """ First version of this works with full days (dates) |