diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-06-25 21:20:01 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-06-25 21:20:24 -0700 |
commit | bfb8aeb387e1f3583b7ef295124b2637b2c368e0 (patch) | |
tree | 3494a3b6a8a929e4dd8428b58737192d5e47b810 /python/sandcrawler/pdfextract.py | |
parent | fe29f997b43d502fda3353b6c59894e962de48f9 (diff) | |
download | sandcrawler-bfb8aeb387e1f3583b7ef295124b2637b2c368e0.tar.gz sandcrawler-bfb8aeb387e1f3583b7ef295124b2637b2c368e0.zip |
another bad/non PDF test; catch correct error
This test doesn't actually catch the error. I'm not sure why type checks
don't discover the "LockedDocumentError not part of poppler" issue
though.
Diffstat (limited to 'python/sandcrawler/pdfextract.py')
-rw-r--r-- | python/sandcrawler/pdfextract.py | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/python/sandcrawler/pdfextract.py b/python/sandcrawler/pdfextract.py index e7bfa43..a6c25c1 100644 --- a/python/sandcrawler/pdfextract.py +++ b/python/sandcrawler/pdfextract.py @@ -167,7 +167,7 @@ def process_pdf(blob: bytes, thumb_size=(180,300), thumb_type="JPEG") -> PdfExtr ) # this call sometimes fails an returns an AttributeError page0rect = page0.page_rect() - except (AttributeError, poppler.LockedDocumentError) as e: + except (AttributeError, poppler.document.LockedDocumentError) as e: # may need to expand the set of exceptions caught here over time, but # starting with a narrow set return PdfExtractResult( |