diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-04-27 17:41:45 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-04-27 17:51:23 -0700 |
commit | f2177d5e30190dfc1e55f1b08fd21c2ce917ee86 (patch) | |
tree | cf326ea7006f423cdc5877bba93a90d57ab3345e /python/sandcrawler/grobid.py | |
parent | 060f86888c8638e3b2be1bb005c29718842ab2e1 (diff) | |
download | sandcrawler-f2177d5e30190dfc1e55f1b08fd21c2ce917ee86.tar.gz sandcrawler-f2177d5e30190dfc1e55f1b08fd21c2ce917ee86.zip |
timeout message implementation for GROBID and ingest workers
Diffstat (limited to 'python/sandcrawler/grobid.py')
-rw-r--r-- | python/sandcrawler/grobid.py | 9 |
1 files changed, 9 insertions, 0 deletions
diff --git a/python/sandcrawler/grobid.py b/python/sandcrawler/grobid.py index 08e3a96..f329a73 100644 --- a/python/sandcrawler/grobid.py +++ b/python/sandcrawler/grobid.py @@ -87,6 +87,15 @@ class GrobidWorker(SandcrawlerWorker): self.sink = sink self.consolidate_mode = 2 + def timeout_response(self, task): + default_key = task['sha1hex'] + return dict( + status="error-timeout", + error_msg="internal GROBID worker timeout", + source=task, + key=default_key, + ) + def process(self, record): default_key = record['sha1hex'] if record.get('warc_path') and record.get('warc_offset'): |