diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-04-30 16:46:44 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-04-30 16:46:44 -0700 |
commit | 2d85d55ed1af4220f1469f4cf0cddd7e0888b3b3 (patch) | |
tree | d19a42fac5bf922c364b859498cb35d66519fc40 /python | |
parent | abaa0c53c8f2aaff3c533747c2f310d8f60839c9 (diff) | |
download | sandcrawler-2d85d55ed1af4220f1469f4cf0cddd7e0888b3b3.tar.gz sandcrawler-2d85d55ed1af4220f1469f4cf0cddd7e0888b3b3.zip |
workers: add missing want() dataflow path
Diffstat (limited to 'python')
-rw-r--r-- | python/sandcrawler/workers.py | 9 |
1 files changed, 9 insertions, 0 deletions
diff --git a/python/sandcrawler/workers.py b/python/sandcrawler/workers.py index 96aef3f..6425e99 100644 --- a/python/sandcrawler/workers.py +++ b/python/sandcrawler/workers.py @@ -27,6 +27,9 @@ class SandcrawlerWorker(object): def push_record(self, task): self.counts['total'] += 1 + if not self.want(task): + self.counts['skip'] += 1 + return result = self.process(task) if not result: self.counts['failed'] += 1 @@ -90,6 +93,12 @@ class SandcrawlerWorker(object): print("Worker: {}".format(self.counts), file=sys.stderr) return self.counts + def want(self, task): + """ + Optionally override this as a filter in implementations. + """ + return True + def process(self, task): """ Derived workers need to implement business logic here. |