From f2177d5e30190dfc1e55f1b08fd21c2ce917ee86 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Mon, 27 Apr 2020 17:41:45 -0700 Subject: timeout message implementation for GROBID and ingest workers --- python/sandcrawler/grobid.py | 9 +++++++++ python/sandcrawler/ingest.py | 9 +++++++++ 2 files changed, 18 insertions(+) (limited to 'python') diff --git a/python/sandcrawler/grobid.py b/python/sandcrawler/grobid.py index 08e3a96..f329a73 100644 --- a/python/sandcrawler/grobid.py +++ b/python/sandcrawler/grobid.py @@ -87,6 +87,15 @@ class GrobidWorker(SandcrawlerWorker): self.sink = sink self.consolidate_mode = 2 + def timeout_response(self, task): + default_key = task['sha1hex'] + return dict( + status="error-timeout", + error_msg="internal GROBID worker timeout", + source=task, + key=default_key, + ) + def process(self, record): default_key = record['sha1hex'] if record.get('warc_path') and record.get('warc_offset'): diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py index 5cb3ef8..0be7653 100644 --- a/python/sandcrawler/ingest.py +++ b/python/sandcrawler/ingest.py @@ -229,6 +229,15 @@ class IngestFileWorker(SandcrawlerWorker): result.pop('key', None) return result + def timeout_response(self, task): + print("[TIMEOUT]", file=sys.stderr) + return dict( + request=task, + hit=False, + status="timeout", + error_message="ingest worker internal timeout", + ) + def process(self, request): # backwards compatibility -- cgit v1.2.3