aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-06-25 21:14:25 -0700
committerBryan Newbold <bnewbold@archive.org>2020-06-25 21:14:25 -0700
commitfe29f997b43d502fda3353b6c59894e962de48f9 (patch)
tree8f069d5ac8da00a2e2a8426f28dc6fcc528650d7
parentb8db6fb555b8d29900015d9829bce1c372b8b1b9 (diff)
downloadsandcrawler-fe29f997b43d502fda3353b6c59894e962de48f9.tar.gz
sandcrawler-fe29f997b43d502fda3353b6c59894e962de48f9.zip
pdfextract: catch poppler.LockedDocumentError
-rw-r--r--python/sandcrawler/pdfextract.py2
1 files changed, 1 insertions, 1 deletions
diff --git a/python/sandcrawler/pdfextract.py b/python/sandcrawler/pdfextract.py
index 97c2f3b..e7bfa43 100644
--- a/python/sandcrawler/pdfextract.py
+++ b/python/sandcrawler/pdfextract.py
@@ -167,7 +167,7 @@ def process_pdf(blob: bytes, thumb_size=(180,300), thumb_type="JPEG") -> PdfExtr
)
# this call sometimes fails an returns an AttributeError
page0rect = page0.page_rect()
- except AttributeError as e:
+ except (AttributeError, poppler.LockedDocumentError) as e:
# may need to expand the set of exceptions caught here over time, but
# starting with a narrow set
return PdfExtractResult(