aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/sandcrawler/grobid.py4
1 files changed, 2 insertions, 2 deletions
diff --git a/python/sandcrawler/grobid.py b/python/sandcrawler/grobid.py
index 63ca73a..70f7b16 100644
--- a/python/sandcrawler/grobid.py
+++ b/python/sandcrawler/grobid.py
@@ -43,11 +43,11 @@ class GrobidClient(object):
if grobid_response.status_code == 200:
info['status'] = 'success'
info['tei_xml'] = grobid_response.text
- if len(info['tei_xml']) > 19500000:
+ if len(info['tei_xml']) > 12000000:
# XML is larger than Kafka message size, and much larger than
# an article in general; bail out
info['status'] = 'error'
- info['error_msg'] = "response XML too large: {} bytes".format(len(len(info['tei_xml'])))
+ info['error_msg'] = "response XML too large: {} bytes".format(len(info['tei_xml']))
info.pop('tei_xml')
else:
# response.text is .content decoded as utf-8