aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/pdfextract.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-06-29 13:19:17 -0700
committerBryan Newbold <bnewbold@archive.org>2020-06-29 13:19:22 -0700
commit800860ecd25346ff4a638e9d42fa905396b8fa1b (patch)
tree736e032c1d412d2cfcc0b9807e2ce55bde35ba8c /python/sandcrawler/pdfextract.py
parentc216a9cdecd85db8296e6499593244686c430d8c (diff)
downloadsandcrawler-800860ecd25346ff4a638e9d42fa905396b8fa1b.tar.gz
sandcrawler-800860ecd25346ff4a638e9d42fa905396b8fa1b.zip
customize timeout per worker; 120sec for pdf-extract
This is a stab-in-the-dark attempt to resolve long timeouts with this worker in prod.
Diffstat (limited to 'python/sandcrawler/pdfextract.py')
-rw-r--r--python/sandcrawler/pdfextract.py2
1 files changed, 1 insertions, 1 deletions
diff --git a/python/sandcrawler/pdfextract.py b/python/sandcrawler/pdfextract.py
index 5f9b898..ac5f6ac 100644
--- a/python/sandcrawler/pdfextract.py
+++ b/python/sandcrawler/pdfextract.py
@@ -278,7 +278,7 @@ class PdfExtractWorker(SandcrawlerFetchWorker):
default_key = task['sha1hex']
return dict(
status="error-timeout",
- error_msg="internal GROBID worker timeout",
+ error_msg="internal pdf-extract worker timeout",
source=task,
sha1hex=default_key,
)