diff options
author | Bryan Newbold <bnewbold@archive.org> | 2019-09-26 23:59:41 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2019-09-26 23:59:41 -0700 |
commit | 0da57660b034e051a45e84b18bd142f8dd6be927 (patch) | |
tree | ba31263259d4b85cb727999a61bc43a888825254 /python/sandcrawler/workers.py | |
parent | 7eb019dcc158029a86c66b6035abb9f0076b9e45 (diff) | |
download | sandcrawler-0da57660b034e051a45e84b18bd142f8dd6be927.tar.gz sandcrawler-0da57660b034e051a45e84b18bd142f8dd6be927.zip |
more counts and bugfixes in grobid_tool
Diffstat (limited to 'python/sandcrawler/workers.py')
-rw-r--r-- | python/sandcrawler/workers.py | 6 |
1 files changed, 6 insertions, 0 deletions
diff --git a/python/sandcrawler/workers.py b/python/sandcrawler/workers.py index e6f5d4b..e86d400 100644 --- a/python/sandcrawler/workers.py +++ b/python/sandcrawler/workers.py @@ -28,6 +28,9 @@ class SandcrawlerWorker(object): if not result: self.counts['failed'] += 1 return + elif type(result) == dict and 'status' in result and len(result['status']) < 32: + self.counts[result['status']] += 1 + if self.sink: self.sink.push_record(result) self.counts['pushed'] += 1 @@ -63,6 +66,9 @@ class MultiprocessWrapper(SandcrawlerWorker): if not result: self.counts['failed'] += 1 return + elif type(result) == dict and 'status' in result and len(result['status']) < 32: + self.counts[result['status']] += 1 + if self.sink: self.sink.push_record(result) self.counts['pushed'] += 1 |