aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/workers.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2019-09-26 23:59:41 -0700
committerBryan Newbold <bnewbold@archive.org>2019-09-26 23:59:41 -0700
commit0da57660b034e051a45e84b18bd142f8dd6be927 (patch)
treeba31263259d4b85cb727999a61bc43a888825254 /python/sandcrawler/workers.py
parent7eb019dcc158029a86c66b6035abb9f0076b9e45 (diff)
downloadsandcrawler-0da57660b034e051a45e84b18bd142f8dd6be927.tar.gz
sandcrawler-0da57660b034e051a45e84b18bd142f8dd6be927.zip
more counts and bugfixes in grobid_tool
Diffstat (limited to 'python/sandcrawler/workers.py')
-rw-r--r--python/sandcrawler/workers.py6
1 files changed, 6 insertions, 0 deletions
diff --git a/python/sandcrawler/workers.py b/python/sandcrawler/workers.py
index e6f5d4b..e86d400 100644
--- a/python/sandcrawler/workers.py
+++ b/python/sandcrawler/workers.py
@@ -28,6 +28,9 @@ class SandcrawlerWorker(object):
if not result:
self.counts['failed'] += 1
return
+ elif type(result) == dict and 'status' in result and len(result['status']) < 32:
+ self.counts[result['status']] += 1
+
if self.sink:
self.sink.push_record(result)
self.counts['pushed'] += 1
@@ -63,6 +66,9 @@ class MultiprocessWrapper(SandcrawlerWorker):
if not result:
self.counts['failed'] += 1
return
+ elif type(result) == dict and 'status' in result and len(result['status']) < 32:
+ self.counts[result['status']] += 1
+
if self.sink:
self.sink.push_record(result)
self.counts['pushed'] += 1