From bfb8aeb387e1f3583b7ef295124b2637b2c368e0 Mon Sep 17 00:00:00 2001
From: Bryan Newbold <bnewbold@archive.org>
Date: Thu, 25 Jun 2020 21:20:01 -0700
Subject: another bad/non PDF test; catch correct error

This test doesn't actually catch the error. I'm not sure why type checks
don't discover the "LockedDocumentError not part of poppler" issue
though.
---
 python/sandcrawler/pdfextract.py | 2 +-
 python/tests/test_pdfextract.py  | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'python')

diff --git a/python/sandcrawler/pdfextract.py b/python/sandcrawler/pdfextract.py
index e7bfa43..a6c25c1 100644
--- a/python/sandcrawler/pdfextract.py
+++ b/python/sandcrawler/pdfextract.py
@@ -167,7 +167,7 @@ def process_pdf(blob: bytes, thumb_size=(180,300), thumb_type="JPEG") -> PdfExtr
             )
         # this call sometimes fails an returns an AttributeError
         page0rect = page0.page_rect()
-    except (AttributeError, poppler.LockedDocumentError) as e:
+    except (AttributeError, poppler.document.LockedDocumentError) as e:
         # may need to expand the set of exceptions caught here over time, but
         # starting with a narrow set
         return PdfExtractResult(
diff --git a/python/tests/test_pdfextract.py b/python/tests/test_pdfextract.py
index 2c54c85..ed93341 100644
--- a/python/tests/test_pdfextract.py
+++ b/python/tests/test_pdfextract.py
@@ -15,6 +15,11 @@ def test_process_fake_pdf():
     print(resp)
     assert resp.status == "not-pdf"
 
+    with open('tests/files/dummy_zip.zip', 'rb') as f:
+        pdf_bytes = f.read()
+    resp = process_pdf(pdf_bytes)
+    assert resp.status == 'not-pdf'
+
 def test_process_dummy_pdf():
     with open('tests/files/dummy.pdf', 'rb') as f:
         pdf_bytes = f.read()
-- 
cgit v1.2.3