From 37e505333b1418ed270820cc2c347ab0c99c933b Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Fri, 17 Jan 2020 22:27:25 -0800 Subject: add 200 second timeout to GROBID requests --- python/sandcrawler/grobid.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'python/sandcrawler/grobid.py') diff --git a/python/sandcrawler/grobid.py b/python/sandcrawler/grobid.py index b989591..6c8450b 100644 --- a/python/sandcrawler/grobid.py +++ b/python/sandcrawler/grobid.py @@ -27,15 +27,22 @@ class GrobidClient(object): if consolidate_mode == None: consolidate_mode = self.consolidate_mode - grobid_response = requests.post( - self.host_url + "/api/processFulltextDocument", - files={ - 'input': blob, - 'consolidateHeader': self.consolidate_mode, - 'consolidateCitations': 0, # too expensive for now - 'includeRawCitations': 1, + try: + grobid_response = requests.post( + self.host_url + "/api/processFulltextDocument", + files={ + 'input': blob, + 'consolidateHeader': self.consolidate_mode, + 'consolidateCitations': 0, # too expensive for now + 'includeRawCitations': 1, + }, + timeout=180.0, + ) + except requests.Timeout: + return { + 'status': 'error-timeout', + 'status': 'GROBID request (HTTP POST) timeout', } - ) info = dict( status_code=grobid_response.status_code, -- cgit v1.2.3