diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-01-17 22:27:25 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-01-17 22:27:25 -0800 |
commit | 37e505333b1418ed270820cc2c347ab0c99c933b (patch) | |
tree | 517827f6f669d1277a65106bf1da8d935bc7f367 /python | |
parent | 8f5c2ed46b809faad22c8f911ad77be4f8a156f8 (diff) | |
download | sandcrawler-37e505333b1418ed270820cc2c347ab0c99c933b.tar.gz sandcrawler-37e505333b1418ed270820cc2c347ab0c99c933b.zip |
add 200 second timeout to GROBID requests
Diffstat (limited to 'python')
-rw-r--r-- | python/sandcrawler/grobid.py | 23 |
1 files changed, 15 insertions, 8 deletions
diff --git a/python/sandcrawler/grobid.py b/python/sandcrawler/grobid.py index b989591..6c8450b 100644 --- a/python/sandcrawler/grobid.py +++ b/python/sandcrawler/grobid.py @@ -27,15 +27,22 @@ class GrobidClient(object): if consolidate_mode == None: consolidate_mode = self.consolidate_mode - grobid_response = requests.post( - self.host_url + "/api/processFulltextDocument", - files={ - 'input': blob, - 'consolidateHeader': self.consolidate_mode, - 'consolidateCitations': 0, # too expensive for now - 'includeRawCitations': 1, + try: + grobid_response = requests.post( + self.host_url + "/api/processFulltextDocument", + files={ + 'input': blob, + 'consolidateHeader': self.consolidate_mode, + 'consolidateCitations': 0, # too expensive for now + 'includeRawCitations': 1, + }, + timeout=180.0, + ) + except requests.Timeout: + return { + 'status': 'error-timeout', + 'status': 'GROBID request (HTTP POST) timeout', } - ) info = dict( status_code=grobid_response.status_code, |