diff options
-rw-r--r-- | python/sandcrawler/pdfextract.py | 2 | ||||
-rw-r--r-- | python/tests/test_pdfextract.py | 5 |
2 files changed, 6 insertions, 1 deletions
diff --git a/python/sandcrawler/pdfextract.py b/python/sandcrawler/pdfextract.py index e7bfa43..a6c25c1 100644 --- a/python/sandcrawler/pdfextract.py +++ b/python/sandcrawler/pdfextract.py @@ -167,7 +167,7 @@ def process_pdf(blob: bytes, thumb_size=(180,300), thumb_type="JPEG") -> PdfExtr ) # this call sometimes fails an returns an AttributeError page0rect = page0.page_rect() - except (AttributeError, poppler.LockedDocumentError) as e: + except (AttributeError, poppler.document.LockedDocumentError) as e: # may need to expand the set of exceptions caught here over time, but # starting with a narrow set return PdfExtractResult( diff --git a/python/tests/test_pdfextract.py b/python/tests/test_pdfextract.py index 2c54c85..ed93341 100644 --- a/python/tests/test_pdfextract.py +++ b/python/tests/test_pdfextract.py @@ -15,6 +15,11 @@ def test_process_fake_pdf(): print(resp) assert resp.status == "not-pdf" + with open('tests/files/dummy_zip.zip', 'rb') as f: + pdf_bytes = f.read() + resp = process_pdf(pdf_bytes) + assert resp.status == 'not-pdf' + def test_process_dummy_pdf(): with open('tests/files/dummy.pdf', 'rb') as f: pdf_bytes = f.read() |