diff options
Diffstat (limited to 'python/sandcrawler/grobid.py')
-rw-r--r-- | python/sandcrawler/grobid.py | 4 |
1 files changed, 0 insertions, 4 deletions
diff --git a/python/sandcrawler/grobid.py b/python/sandcrawler/grobid.py index 32addca..a610404 100644 --- a/python/sandcrawler/grobid.py +++ b/python/sandcrawler/grobid.py @@ -57,7 +57,6 @@ class GrobidWorker(SandcrawlerWorker): self.consolidate_mode = 1 def process(self, record): - self.counts['total'] += 1 if record.get('warc_path') and record.get('warc_offset'): # it's a full CDX dict. fetch using WaybackClient if not self.wayback_client: @@ -82,7 +81,6 @@ class GrobidWorker(SandcrawlerWorker): result['file_meta'] = gen_file_metadata(blob) result['source'] = record result['key'] = result['file_meta']['sha1hex'] - self.counts[result['status']] += 1 return result class GrobidBlobWorker(SandcrawlerWorker): @@ -98,11 +96,9 @@ class GrobidBlobWorker(SandcrawlerWorker): self.consolidate_mode = 1 def process(self, blob): - self.counts['total'] += 1 assert blob result = self.grobid_client.process_fulltext(blob, consolidate_mode=self.consolidate_mode) result['file_meta'] = gen_file_metadata(blob) result['key'] = result['file_meta']['sha1hex'] - self.counts[result['status']] += 1 return result |