aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/pdftrio.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-06-17 11:10:36 -0700
committerBryan Newbold <bnewbold@archive.org>2020-06-17 11:10:36 -0700
commit3287f08a788107815f366019060a7cbcfe9505d2 (patch)
tree29a867d2cf84d116b26be37508d4ea6462dede88 /python/sandcrawler/pdftrio.py
parent5a6bf449ac78586bf150216fe2310be178eeb6c3 (diff)
downloadsandcrawler-3287f08a788107815f366019060a7cbcfe9505d2.tar.gz
sandcrawler-3287f08a788107815f366019060a7cbcfe9505d2.zip
workers: refactor to pass key to process()
Diffstat (limited to 'python/sandcrawler/pdftrio.py')
-rw-r--r--python/sandcrawler/pdftrio.py4
1 files changed, 2 insertions, 2 deletions
diff --git a/python/sandcrawler/pdftrio.py b/python/sandcrawler/pdftrio.py
index 14d8d04..c65b6c8 100644
--- a/python/sandcrawler/pdftrio.py
+++ b/python/sandcrawler/pdftrio.py
@@ -78,7 +78,7 @@ class PdfTrioWorker(SandcrawlerFetchWorker):
self.pdftrio_client = pdftrio_client
self.sink = sink
- def process(self, record):
+ def process(self, record, key=None):
start_process = time.time()
default_key = record['sha1hex']
fetch_sec = None
@@ -115,7 +115,7 @@ class PdfTrioBlobWorker(SandcrawlerWorker):
self.sink = sink
self.mode = mode
- def process(self, blob):
+ def process(self, blob, key=None):
start_process = time.time()
if not blob:
return None