aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/sandcrawler/pdftrio.py10
1 files changed, 9 insertions, 1 deletions
diff --git a/python/sandcrawler/pdftrio.py b/python/sandcrawler/pdftrio.py
index 12be9eb..41eed19 100644
--- a/python/sandcrawler/pdftrio.py
+++ b/python/sandcrawler/pdftrio.py
@@ -34,7 +34,7 @@ class PdfTrioClient(object):
files={
'pdf_content': blob,
},
- timeout=30.0,
+ timeout=60.0,
)
except requests.Timeout:
return {
@@ -42,6 +42,14 @@ class PdfTrioClient(object):
'status_code': -4, # heritrix3 "HTTP timeout" code
'error_msg': 'pdftrio request (HTTP POST) timeout',
}
+ except requests.exceptions.ConnectionError:
+ # crude back-off
+ time.sleep(2.0)
+ return {
+ 'status': 'error-connect',
+ 'status_code': -2, # heritrix3 "HTTP connect" code
+ 'error_msg': 'pdftrio request connection timout',
+ }
info = dict(
status_code=pdftrio_response.status_code,