diff options
Diffstat (limited to 'python/sandcrawler/ingest.py')
-rw-r--r-- | python/sandcrawler/ingest.py | 14 |
1 files changed, 14 insertions, 0 deletions
diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py index 5cb3ef8..82b43fe 100644 --- a/python/sandcrawler/ingest.py +++ b/python/sandcrawler/ingest.py @@ -229,6 +229,20 @@ class IngestFileWorker(SandcrawlerWorker): result.pop('key', None) return result + def timeout_response(self, task): + print("[TIMEOUT]", file=sys.stderr) + return dict( + request=task, + hit=False, + status="timeout", + error_message="ingest worker internal timeout", + ) + + def want(self, request): + if not request.get('ingest_type') in ('file', 'pdf'): + return False + return True + def process(self, request): # backwards compatibility |