diff options
author | Martin Czygan <martin.czygan@gmail.com> | 2020-08-10 19:55:14 +0200 |
---|---|---|
committer | Martin Czygan <martin.czygan@gmail.com> | 2020-08-10 19:58:12 +0200 |
commit | e18d48642cecb55d9f2270f9048953a7b543472e (patch) | |
tree | fa38a037143ae65d86bc91ffc3aba8d4ff97272e | |
parent | de0fb59f0e36d8079649feefb7592189d8f7c6ed (diff) | |
download | fatcat-e18d48642cecb55d9f2270f9048953a7b543472e.tar.gz fatcat-e18d48642cecb55d9f2270f9048953a7b543472e.zip |
harvest: datacite API yields HTTP 200 with broken JSON
As a first step: log response body for debugging.
-rw-r--r-- | python/fatcat_tools/harvest/doi_registrars.py | 9 |
1 files changed, 8 insertions, 1 deletions
diff --git a/python/fatcat_tools/harvest/doi_registrars.py b/python/fatcat_tools/harvest/doi_registrars.py index 2554fe96..03773c15 100644 --- a/python/fatcat_tools/harvest/doi_registrars.py +++ b/python/fatcat_tools/harvest/doi_registrars.py @@ -136,7 +136,14 @@ class HarvestCrossrefWorker: time.sleep(30.0) continue http_resp.raise_for_status() - resp = http_resp.json() + try: + resp_body = http_resp.text + resp = json.loads(resp_body) + except json.JSONDecodeError as exc: + # Datacite API returned HTTP 200, but JSON seemed unparseable. + # It might be a glitch, so we retry. + print("failed to decode body from {}: {}".format(http_resp.url, resp_body), file=sys.stderr) + raise items = self.extract_items(resp) count += len(items) print("... got {} ({} of {}), HTTP fetch took {}".format(len(items), count, |