aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/persist.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/sandcrawler/persist.py')
-rw-r--r--python/sandcrawler/persist.py6
1 files changed, 0 insertions, 6 deletions
diff --git a/python/sandcrawler/persist.py b/python/sandcrawler/persist.py
index 9ef3e93..77a1a82 100644
--- a/python/sandcrawler/persist.py
+++ b/python/sandcrawler/persist.py
@@ -222,12 +222,6 @@ class PersistGrobidWorker(SandcrawlerWorker):
def push_batch(self, batch):
self.counts['total'] += len(batch)
- # filter out bad "missing status_code" timeout rows
- missing = [r for r in batch if not r.get('status_code')]
- if missing:
- self.counts['skip-missing-status'] += len(missing)
- batch = [r for r in batch if r.get('status_code')]
-
for r in batch:
if r['status_code'] != 200 or not r.get('tei_xml'):
self.counts['s3-skip-status'] += 1