aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-01-21 10:57:13 -0800
committerBryan Newbold <bnewbold@archive.org>2020-01-21 10:57:13 -0800
commitfb7717ae410f72ff33017c176f64dff556b86f5b (patch)
treee2ad5374c81c8c79861106fc07beeaca4b80a396
parent37e505333b1418ed270820cc2c347ab0c99c933b (diff)
downloadsandcrawler-fb7717ae410f72ff33017c176f64dff556b86f5b.tar.gz
sandcrawler-fb7717ae410f72ff33017c176f64dff556b86f5b.zip
grobid: fix error_msg typo; set status_code for timeouts
-rw-r--r--python/sandcrawler/grobid.py3
1 files changed, 2 insertions, 1 deletions
diff --git a/python/sandcrawler/grobid.py b/python/sandcrawler/grobid.py
index 6c8450b..8c3aec1 100644
--- a/python/sandcrawler/grobid.py
+++ b/python/sandcrawler/grobid.py
@@ -41,7 +41,8 @@ class GrobidClient(object):
except requests.Timeout:
return {
'status': 'error-timeout',
- 'status': 'GROBID request (HTTP POST) timeout',
+ 'status_code': -4, # heritrix3 "HTTP timeout" code
+ 'error_msg': 'GROBID request (HTTP POST) timeout',
}
info = dict(