aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/sandcrawler/workers.py9
1 files changed, 9 insertions, 0 deletions
diff --git a/python/sandcrawler/workers.py b/python/sandcrawler/workers.py
index 96aef3f..6425e99 100644
--- a/python/sandcrawler/workers.py
+++ b/python/sandcrawler/workers.py
@@ -27,6 +27,9 @@ class SandcrawlerWorker(object):
def push_record(self, task):
self.counts['total'] += 1
+ if not self.want(task):
+ self.counts['skip'] += 1
+ return
result = self.process(task)
if not result:
self.counts['failed'] += 1
@@ -90,6 +93,12 @@ class SandcrawlerWorker(object):
print("Worker: {}".format(self.counts), file=sys.stderr)
return self.counts
+ def want(self, task):
+ """
+ Optionally override this as a filter in implementations.
+ """
+ return True
+
def process(self, task):
"""
Derived workers need to implement business logic here.