From abaa0c53c8f2aaff3c533747c2f310d8f60839c9 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Thu, 30 Apr 2020 16:22:59 -0700 Subject: ingest: don't 'want' non-PDF ingest --- python/sandcrawler/ingest.py | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'python') diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py index 0be7653..82b43fe 100644 --- a/python/sandcrawler/ingest.py +++ b/python/sandcrawler/ingest.py @@ -238,6 +238,11 @@ class IngestFileWorker(SandcrawlerWorker): error_message="ingest worker internal timeout", ) + def want(self, request): + if not request.get('ingest_type') in ('file', 'pdf'): + return False + return True + def process(self, request): # backwards compatibility -- cgit v1.2.3