diff options
author | Bryan Newbold <bnewbold@robocracy.org> | 2019-03-06 11:47:21 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@robocracy.org> | 2019-03-06 11:47:21 -0800 |
commit | 7da130abec5ce9a8edbe6d2a4ef4c8be60e16a0d (patch) | |
tree | d124dd8a2a72a8bd82124e853e5ebd2fc3de1df4 | |
parent | 5dc322c93eb3b92324c4f947697d2a2c69749040 (diff) | |
download | fatcat-7da130abec5ce9a8edbe6d2a4ef4c8be60e16a0d.tar.gz fatcat-7da130abec5ce9a8edbe6d2a4ef4c8be60e16a0d.zip |
fix harvester session.get() params
-rw-r--r-- | python/fatcat_tools/harvest/doi_registrars.py | 13 |
1 files changed, 8 insertions, 5 deletions
diff --git a/python/fatcat_tools/harvest/doi_registrars.py b/python/fatcat_tools/harvest/doi_registrars.py index 1aaad56a..802e0e22 100644 --- a/python/fatcat_tools/harvest/doi_registrars.py +++ b/python/fatcat_tools/harvest/doi_registrars.py @@ -87,15 +87,18 @@ class HarvestCrossrefWorker: date_str = date.isoformat() params = self.params(date_str) - headers = { - 'User-Agent': 'fatcat_tools/0.1.0 (https://fatcat.wiki; mailto:{}) python-requests'.format(self.contact_email), - } + http_session = requests_retry_session() + http_session.headers.update({ + 'User-Agent': 'fatcat_tools/0.1.0 (https://fatcat.wiki; mailto:{}) python-requests'.format( + self.contact_email), + }) count = 0 with produce_topic.get_producer() as producer: while True: - http_resp = requests_retry_session().get(self.api_host_url, params, headers=headers) + http_resp = http_session.get(self.api_host_url, params=params) if http_resp.status_code == 503: - # crude backoff + # crude backoff; now redundant with session exponential + # backoff, but allows for longer backoff/downtime on remote end print("got HTTP {}, pausing for 30 seconds".format(http_resp.status_code)) time.sleep(30.0) continue |