From 34a64b5d8c470ae2627458d791239cfc4d66d6b3 Mon Sep 17 00:00:00 2001 From: Martin Czygan Date: Fri, 29 May 2020 19:00:30 +0200 Subject: harvest: fail on HTTP 400 In the past harvest of datacite resulted in occasional HTTP 400. Meanwhile, various API bugs have been fixed (most recently: https://github.com/datacite/lupo/pull/537, https://github.com/datacite/datacite/issues/1038). Downside of ignoring this error was that state lives in kafka, which has limited support for deletion of arbitrary messages from a topic. --- python/fatcat_tools/harvest/doi_registrars.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/python/fatcat_tools/harvest/doi_registrars.py b/python/fatcat_tools/harvest/doi_registrars.py index f84acb24..37628f09 100644 --- a/python/fatcat_tools/harvest/doi_registrars.py +++ b/python/fatcat_tools/harvest/doi_registrars.py @@ -142,10 +142,6 @@ class HarvestCrossrefWorker: self.producer.poll(0) time.sleep(30.0) continue - if http_resp.status_code == 400: - print("skipping batch for {}, due to HTTP 400. Marking complete. Related: https://github.com/datacite/datacite/issues/897".format(date_str), - file=sys.stderr) - break http_resp.raise_for_status() resp = http_resp.json() items = self.extract_items(resp) -- cgit v1.2.3