diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-04-30 16:22:59 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-04-30 16:22:59 -0700 |
commit | abaa0c53c8f2aaff3c533747c2f310d8f60839c9 (patch) | |
tree | 315be87a3b15847e487f774d225cc3fd4fe0b240 | |
parent | 54dd46e58bbea58d4ba5d71ffcee0770b1f2e25b (diff) | |
download | sandcrawler-abaa0c53c8f2aaff3c533747c2f310d8f60839c9.tar.gz sandcrawler-abaa0c53c8f2aaff3c533747c2f310d8f60839c9.zip |
ingest: don't 'want' non-PDF ingest
-rw-r--r-- | python/sandcrawler/ingest.py | 5 |
1 files changed, 5 insertions, 0 deletions
diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py index 0be7653..82b43fe 100644 --- a/python/sandcrawler/ingest.py +++ b/python/sandcrawler/ingest.py @@ -238,6 +238,11 @@ class IngestFileWorker(SandcrawlerWorker): error_message="ingest worker internal timeout", ) + def want(self, request): + if not request.get('ingest_type') in ('file', 'pdf'): + return False + return True + def process(self, request): # backwards compatibility |