aboutsummaryrefslogtreecommitdiffstats
path: root/python/fatcat_tools
diff options
context:
space:
mode:
authorbnewbold <bnewbold@archive.org>2020-08-10 21:30:37 +0000
committerbnewbold <bnewbold@archive.org>2020-08-10 21:30:37 +0000
commit1f75aa4cd10947f725eb3db2a51377579a09eb01 (patch)
treefa38a037143ae65d86bc91ffc3aba8d4ff97272e /python/fatcat_tools
parentde0fb59f0e36d8079649feefb7592189d8f7c6ed (diff)
parente18d48642cecb55d9f2270f9048953a7b543472e (diff)
downloadfatcat-1f75aa4cd10947f725eb3db2a51377579a09eb01.tar.gz
fatcat-1f75aa4cd10947f725eb3db2a51377579a09eb01.zip
Merge branch 'martin-datacite-json-decode-err-sentry-38625' into 'master'
harvest: datacite API yields HTTP 200 with broken JSON See merge request webgroup/fatcat!77
Diffstat (limited to 'python/fatcat_tools')
-rw-r--r--python/fatcat_tools/harvest/doi_registrars.py9
1 files changed, 8 insertions, 1 deletions
diff --git a/python/fatcat_tools/harvest/doi_registrars.py b/python/fatcat_tools/harvest/doi_registrars.py
index 2554fe96..03773c15 100644
--- a/python/fatcat_tools/harvest/doi_registrars.py
+++ b/python/fatcat_tools/harvest/doi_registrars.py
@@ -136,7 +136,14 @@ class HarvestCrossrefWorker:
time.sleep(30.0)
continue
http_resp.raise_for_status()
- resp = http_resp.json()
+ try:
+ resp_body = http_resp.text
+ resp = json.loads(resp_body)
+ except json.JSONDecodeError as exc:
+ # Datacite API returned HTTP 200, but JSON seemed unparseable.
+ # It might be a glitch, so we retry.
+ print("failed to decode body from {}: {}".format(http_resp.url, resp_body), file=sys.stderr)
+ raise
items = self.extract_items(resp)
count += len(items)
print("... got {} ({} of {}), HTTP fetch took {}".format(len(items), count,