diff options
| -rw-r--r-- | python/sandcrawler/grobid.py | 4 | 
1 files changed, 2 insertions, 2 deletions
| diff --git a/python/sandcrawler/grobid.py b/python/sandcrawler/grobid.py index 63ca73a..70f7b16 100644 --- a/python/sandcrawler/grobid.py +++ b/python/sandcrawler/grobid.py @@ -43,11 +43,11 @@ class GrobidClient(object):          if grobid_response.status_code == 200:              info['status'] = 'success'              info['tei_xml'] = grobid_response.text -            if len(info['tei_xml']) > 19500000: +            if len(info['tei_xml']) > 12000000:                  # XML is larger than Kafka message size, and much larger than                  # an article in general; bail out                  info['status'] = 'error' -                info['error_msg'] = "response XML too large: {} bytes".format(len(len(info['tei_xml']))) +                info['error_msg'] = "response XML too large: {} bytes".format(len(info['tei_xml']))                  info.pop('tei_xml')          else:              # response.text is .content decoded as utf-8 | 
