From 6f4f375529e99cbb9c06e49805a8925ffeda269a Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Sun, 1 Dec 2019 15:49:41 -0800 Subject: count empty blobs as 'failed' instead of crashing Might be better to record an artificial kafka response instead? --- python/sandcrawler/grobid.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/sandcrawler/grobid.py b/python/sandcrawler/grobid.py index 06cba3e..63ca73a 100644 --- a/python/sandcrawler/grobid.py +++ b/python/sandcrawler/grobid.py @@ -127,7 +127,8 @@ class GrobidBlobWorker(SandcrawlerWorker): self.consolidate_mode = 2 def process(self, blob): - assert blob + if not blob: + return None result = self.grobid_client.process_fulltext(blob, consolidate_mode=self.consolidate_mode) result['file_meta'] = gen_file_metadata(blob) result['key'] = result['file_meta']['sha1hex'] -- cgit v1.2.3