aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-04-30 16:22:59 -0700
committerBryan Newbold <bnewbold@archive.org>2020-04-30 16:22:59 -0700
commitabaa0c53c8f2aaff3c533747c2f310d8f60839c9 (patch)
tree315be87a3b15847e487f774d225cc3fd4fe0b240
parent54dd46e58bbea58d4ba5d71ffcee0770b1f2e25b (diff)
downloadsandcrawler-abaa0c53c8f2aaff3c533747c2f310d8f60839c9.tar.gz
sandcrawler-abaa0c53c8f2aaff3c533747c2f310d8f60839c9.zip
ingest: don't 'want' non-PDF ingest
-rw-r--r--python/sandcrawler/ingest.py5
1 files changed, 5 insertions, 0 deletions
diff --git a/python/sandcrawler/ingest.py b/python/sandcrawler/ingest.py
index 0be7653..82b43fe 100644
--- a/python/sandcrawler/ingest.py
+++ b/python/sandcrawler/ingest.py
@@ -238,6 +238,11 @@ class IngestFileWorker(SandcrawlerWorker):
error_message="ingest worker internal timeout",
)
+ def want(self, request):
+ if not request.get('ingest_type') in ('file', 'pdf'):
+ return False
+ return True
+
def process(self, request):
# backwards compatibility