From dbb9f062a00eb4e9e95a9d2d75113341e5294d0d Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 7 Apr 2021 13:49:27 -0700 Subject: grobid: disable biblio-glutton consolidation --- python/sandcrawler/grobid.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/sandcrawler/grobid.py b/python/sandcrawler/grobid.py index b010b2c..b4215dc 100644 --- a/python/sandcrawler/grobid.py +++ b/python/sandcrawler/grobid.py @@ -9,7 +9,7 @@ class GrobidClient(object): def __init__(self, host_url="http://grobid.qa.fatcat.wiki", **kwargs): self.host_url = host_url - self.consolidate_mode = int(kwargs.get('consolidate_mode', 2)) + self.consolidate_mode = int(kwargs.get('consolidate_mode', 0)) def process_fulltext(self, blob, consolidate_mode=None): """ @@ -83,7 +83,7 @@ class GrobidWorker(SandcrawlerFetchWorker): super().__init__(wayback_client=wayback_client) self.grobid_client = grobid_client self.sink = sink - self.consolidate_mode = 2 + self.consolidate_mode = 0 def timeout_response(self, task): default_key = task['sha1hex'] @@ -118,7 +118,7 @@ class GrobidBlobWorker(SandcrawlerWorker): super().__init__() self.grobid_client = grobid_client self.sink = sink - self.consolidate_mode = 2 + self.consolidate_mode = 0 def process(self, blob, key=None): if not blob: -- cgit v1.2.3