aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-06-25 21:20:01 -0700
committerBryan Newbold <bnewbold@archive.org>2020-06-25 21:20:24 -0700
commitbfb8aeb387e1f3583b7ef295124b2637b2c368e0 (patch)
tree3494a3b6a8a929e4dd8428b58737192d5e47b810 /python
parentfe29f997b43d502fda3353b6c59894e962de48f9 (diff)
downloadsandcrawler-bfb8aeb387e1f3583b7ef295124b2637b2c368e0.tar.gz
sandcrawler-bfb8aeb387e1f3583b7ef295124b2637b2c368e0.zip
another bad/non PDF test; catch correct error
This test doesn't actually catch the error. I'm not sure why type checks don't discover the "LockedDocumentError not part of poppler" issue though.
Diffstat (limited to 'python')
-rw-r--r--python/sandcrawler/pdfextract.py2
-rw-r--r--python/tests/test_pdfextract.py5
2 files changed, 6 insertions, 1 deletions
diff --git a/python/sandcrawler/pdfextract.py b/python/sandcrawler/pdfextract.py
index e7bfa43..a6c25c1 100644
--- a/python/sandcrawler/pdfextract.py
+++ b/python/sandcrawler/pdfextract.py
@@ -167,7 +167,7 @@ def process_pdf(blob: bytes, thumb_size=(180,300), thumb_type="JPEG") -> PdfExtr
)
# this call sometimes fails an returns an AttributeError
page0rect = page0.page_rect()
- except (AttributeError, poppler.LockedDocumentError) as e:
+ except (AttributeError, poppler.document.LockedDocumentError) as e:
# may need to expand the set of exceptions caught here over time, but
# starting with a narrow set
return PdfExtractResult(
diff --git a/python/tests/test_pdfextract.py b/python/tests/test_pdfextract.py
index 2c54c85..ed93341 100644
--- a/python/tests/test_pdfextract.py
+++ b/python/tests/test_pdfextract.py
@@ -15,6 +15,11 @@ def test_process_fake_pdf():
print(resp)
assert resp.status == "not-pdf"
+ with open('tests/files/dummy_zip.zip', 'rb') as f:
+ pdf_bytes = f.read()
+ resp = process_pdf(pdf_bytes)
+ assert resp.status == 'not-pdf'
+
def test_process_dummy_pdf():
with open('tests/files/dummy.pdf', 'rb') as f:
pdf_bytes = f.read()