aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-04-07 13:49:27 -0700
committerBryan Newbold <bnewbold@archive.org>2021-04-07 13:49:27 -0700
commitdbb9f062a00eb4e9e95a9d2d75113341e5294d0d (patch)
tree93f9c390fe95e6c9c946c05ad2f9c6a2865abf1b
parent8f8ce8cc7585ededd2fb6d66abc13134d3182f0d (diff)
downloadsandcrawler-dbb9f062a00eb4e9e95a9d2d75113341e5294d0d.tar.gz
sandcrawler-dbb9f062a00eb4e9e95a9d2d75113341e5294d0d.zip
grobid: disable biblio-glutton consolidation
-rw-r--r--python/sandcrawler/grobid.py6
1 files changed, 3 insertions, 3 deletions
diff --git a/python/sandcrawler/grobid.py b/python/sandcrawler/grobid.py
index b010b2c..b4215dc 100644
--- a/python/sandcrawler/grobid.py
+++ b/python/sandcrawler/grobid.py
@@ -9,7 +9,7 @@ class GrobidClient(object):
def __init__(self, host_url="http://grobid.qa.fatcat.wiki", **kwargs):
self.host_url = host_url
- self.consolidate_mode = int(kwargs.get('consolidate_mode', 2))
+ self.consolidate_mode = int(kwargs.get('consolidate_mode', 0))
def process_fulltext(self, blob, consolidate_mode=None):
"""
@@ -83,7 +83,7 @@ class GrobidWorker(SandcrawlerFetchWorker):
super().__init__(wayback_client=wayback_client)
self.grobid_client = grobid_client
self.sink = sink
- self.consolidate_mode = 2
+ self.consolidate_mode = 0
def timeout_response(self, task):
default_key = task['sha1hex']
@@ -118,7 +118,7 @@ class GrobidBlobWorker(SandcrawlerWorker):
super().__init__()
self.grobid_client = grobid_client
self.sink = sink
- self.consolidate_mode = 2
+ self.consolidate_mode = 0
def process(self, blob, key=None):
if not blob: