From fd50b9492b5fdf3c94f11dea909d63b4b60866b2 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Fri, 6 Dec 2019 15:12:32 +0100 Subject: Datacite API v2 throws 400, we cannot recover from, currently. As a first iteration, just mark the daily batch complete and continue. The occasional HTTP 400 issue has been reported as https://github.com/datacite/datacite/issues/897. A possible improvement would be to shrink the window, so losses will be smaller. --- python/fatcat_tools/harvest/doi_registrars.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'python/fatcat_tools/harvest') diff --git a/python/fatcat_tools/harvest/doi_registrars.py b/python/fatcat_tools/harvest/doi_registrars.py index 5af5395e..19b32e18 100644 --- a/python/fatcat_tools/harvest/doi_registrars.py +++ b/python/fatcat_tools/harvest/doi_registrars.py @@ -122,6 +122,10 @@ class HarvestCrossrefWorker: self.producer.poll(0) time.sleep(30.0) continue + if http_resp.status_code == 400: + # https://is.gd/0nsEll, https://github.com/datacite/datacite/issues/897 + print("skipping batch for {}, due to HTTP 400. Marking complete. Related: https://git.io/JeylE".format(date_str)) + break http_resp.raise_for_status() resp = http_resp.json() items = self.extract_items(resp) -- cgit v1.2.3