diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-04-07 13:49:27 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-04-07 13:49:27 -0700 |
commit | dbb9f062a00eb4e9e95a9d2d75113341e5294d0d (patch) | |
tree | 93f9c390fe95e6c9c946c05ad2f9c6a2865abf1b | |
parent | 8f8ce8cc7585ededd2fb6d66abc13134d3182f0d (diff) | |
download | sandcrawler-dbb9f062a00eb4e9e95a9d2d75113341e5294d0d.tar.gz sandcrawler-dbb9f062a00eb4e9e95a9d2d75113341e5294d0d.zip |
grobid: disable biblio-glutton consolidation
-rw-r--r-- | python/sandcrawler/grobid.py | 6 |
1 files changed, 3 insertions, 3 deletions
diff --git a/python/sandcrawler/grobid.py b/python/sandcrawler/grobid.py index b010b2c..b4215dc 100644 --- a/python/sandcrawler/grobid.py +++ b/python/sandcrawler/grobid.py @@ -9,7 +9,7 @@ class GrobidClient(object): def __init__(self, host_url="http://grobid.qa.fatcat.wiki", **kwargs): self.host_url = host_url - self.consolidate_mode = int(kwargs.get('consolidate_mode', 2)) + self.consolidate_mode = int(kwargs.get('consolidate_mode', 0)) def process_fulltext(self, blob, consolidate_mode=None): """ @@ -83,7 +83,7 @@ class GrobidWorker(SandcrawlerFetchWorker): super().__init__(wayback_client=wayback_client) self.grobid_client = grobid_client self.sink = sink - self.consolidate_mode = 2 + self.consolidate_mode = 0 def timeout_response(self, task): default_key = task['sha1hex'] @@ -118,7 +118,7 @@ class GrobidBlobWorker(SandcrawlerWorker): super().__init__() self.grobid_client = grobid_client self.sink = sink - self.consolidate_mode = 2 + self.consolidate_mode = 0 def process(self, blob, key=None): if not blob: |