diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-06-17 11:10:36 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-06-17 11:10:36 -0700 |
commit | 3287f08a788107815f366019060a7cbcfe9505d2 (patch) | |
tree | 29a867d2cf84d116b26be37508d4ea6462dede88 /python/sandcrawler/pdftrio.py | |
parent | 5a6bf449ac78586bf150216fe2310be178eeb6c3 (diff) | |
download | sandcrawler-3287f08a788107815f366019060a7cbcfe9505d2.tar.gz sandcrawler-3287f08a788107815f366019060a7cbcfe9505d2.zip |
workers: refactor to pass key to process()
Diffstat (limited to 'python/sandcrawler/pdftrio.py')
-rw-r--r-- | python/sandcrawler/pdftrio.py | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/python/sandcrawler/pdftrio.py b/python/sandcrawler/pdftrio.py index 14d8d04..c65b6c8 100644 --- a/python/sandcrawler/pdftrio.py +++ b/python/sandcrawler/pdftrio.py @@ -78,7 +78,7 @@ class PdfTrioWorker(SandcrawlerFetchWorker): self.pdftrio_client = pdftrio_client self.sink = sink - def process(self, record): + def process(self, record, key=None): start_process = time.time() default_key = record['sha1hex'] fetch_sec = None @@ -115,7 +115,7 @@ class PdfTrioBlobWorker(SandcrawlerWorker): self.sink = sink self.mode = mode - def process(self, blob): + def process(self, blob, key=None): start_process = time.time() if not blob: return None |