aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-01-17 22:27:25 -0800
committerBryan Newbold <bnewbold@archive.org>2020-01-17 22:27:25 -0800
commit37e505333b1418ed270820cc2c347ab0c99c933b (patch)
tree517827f6f669d1277a65106bf1da8d935bc7f367
parent8f5c2ed46b809faad22c8f911ad77be4f8a156f8 (diff)
downloadsandcrawler-37e505333b1418ed270820cc2c347ab0c99c933b.tar.gz
sandcrawler-37e505333b1418ed270820cc2c347ab0c99c933b.zip
add 200 second timeout to GROBID requests
-rw-r--r--python/sandcrawler/grobid.py23
1 files changed, 15 insertions, 8 deletions
diff --git a/python/sandcrawler/grobid.py b/python/sandcrawler/grobid.py
index b989591..6c8450b 100644
--- a/python/sandcrawler/grobid.py
+++ b/python/sandcrawler/grobid.py
@@ -27,15 +27,22 @@ class GrobidClient(object):
if consolidate_mode == None:
consolidate_mode = self.consolidate_mode
- grobid_response = requests.post(
- self.host_url + "/api/processFulltextDocument",
- files={
- 'input': blob,
- 'consolidateHeader': self.consolidate_mode,
- 'consolidateCitations': 0, # too expensive for now
- 'includeRawCitations': 1,
+ try:
+ grobid_response = requests.post(
+ self.host_url + "/api/processFulltextDocument",
+ files={
+ 'input': blob,
+ 'consolidateHeader': self.consolidate_mode,
+ 'consolidateCitations': 0, # too expensive for now
+ 'includeRawCitations': 1,
+ },
+ timeout=180.0,
+ )
+ except requests.Timeout:
+ return {
+ 'status': 'error-timeout',
+ 'status': 'GROBID request (HTTP POST) timeout',
}
- )
info = dict(
status_code=grobid_response.status_code,