diff options
-rw-r--r-- | python/sandcrawler/grobid.py | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/python/sandcrawler/grobid.py b/python/sandcrawler/grobid.py index 63ca73a..70f7b16 100644 --- a/python/sandcrawler/grobid.py +++ b/python/sandcrawler/grobid.py @@ -43,11 +43,11 @@ class GrobidClient(object): if grobid_response.status_code == 200: info['status'] = 'success' info['tei_xml'] = grobid_response.text - if len(info['tei_xml']) > 19500000: + if len(info['tei_xml']) > 12000000: # XML is larger than Kafka message size, and much larger than # an article in general; bail out info['status'] = 'error' - info['error_msg'] = "response XML too large: {} bytes".format(len(len(info['tei_xml']))) + info['error_msg'] = "response XML too large: {} bytes".format(len(info['tei_xml'])) info.pop('tei_xml') else: # response.text is .content decoded as utf-8 |