aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/grobid.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-06-17 11:10:36 -0700
committerBryan Newbold <bnewbold@archive.org>2020-06-17 11:10:36 -0700
commit3287f08a788107815f366019060a7cbcfe9505d2 (patch)
tree29a867d2cf84d116b26be37508d4ea6462dede88 /python/sandcrawler/grobid.py
parent5a6bf449ac78586bf150216fe2310be178eeb6c3 (diff)
downloadsandcrawler-3287f08a788107815f366019060a7cbcfe9505d2.tar.gz
sandcrawler-3287f08a788107815f366019060a7cbcfe9505d2.zip
workers: refactor to pass key to process()
Diffstat (limited to 'python/sandcrawler/grobid.py')
-rw-r--r--python/sandcrawler/grobid.py4
1 files changed, 2 insertions, 2 deletions
diff --git a/python/sandcrawler/grobid.py b/python/sandcrawler/grobid.py
index d9db6c3..11623c5 100644
--- a/python/sandcrawler/grobid.py
+++ b/python/sandcrawler/grobid.py
@@ -95,7 +95,7 @@ class GrobidWorker(SandcrawlerFetchWorker):
key=default_key,
)
- def process(self, record):
+ def process(self, record, key=None):
default_key = record['sha1hex']
fetch_result = self.fetch_blob(record)
@@ -121,7 +121,7 @@ class GrobidBlobWorker(SandcrawlerWorker):
self.sink = sink
self.consolidate_mode = 2
- def process(self, blob):
+ def process(self, blob, key=None):
if not blob:
return None
result = self.grobid_client.process_fulltext(blob, consolidate_mode=self.consolidate_mode)