aboutsummaryrefslogtreecommitdiffstats
path: root/python/tests/test_pdfextract.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-11-06 18:32:35 -0800
committerBryan Newbold <bnewbold@archive.org>2020-11-06 18:32:35 -0800
commit175019c96fced3e21d0f60ea1a4a37da6b8872ac (patch)
treef42fbbe9c8ac06ae9eb06373ab9eec96d2b3a177 /python/tests/test_pdfextract.py
parentb0b66c20c6ffb9d8acc626068964d7dfd5d3bcdc (diff)
parent47ca1a273912c8836630b0930b71a4e66fd2c85b (diff)
downloadsandcrawler-175019c96fced3e21d0f60ea1a4a37da6b8872ac.tar.gz
sandcrawler-175019c96fced3e21d0f60ea1a4a37da6b8872ac.zip
Merge branch 'bnewbold-html-ingest'
Diffstat (limited to 'python/tests/test_pdfextract.py')
-rw-r--r--python/tests/test_pdfextract.py2
1 files changed, 2 insertions, 0 deletions
diff --git a/python/tests/test_pdfextract.py b/python/tests/test_pdfextract.py
index ed93341..255e3fb 100644
--- a/python/tests/test_pdfextract.py
+++ b/python/tests/test_pdfextract.py
@@ -2,6 +2,7 @@
import pytest
import struct
import responses
+import poppler
from sandcrawler import PdfExtractWorker, PdfExtractBlobWorker, CdxLinePusher, BlackholeSink, WaybackClient
from sandcrawler.pdfextract import process_pdf
@@ -20,6 +21,7 @@ def test_process_fake_pdf():
resp = process_pdf(pdf_bytes)
assert resp.status == 'not-pdf'
+@pytest.mark.skipif(poppler.version_string() == '0.71.0', reason="unsupported version of poppler")
def test_process_dummy_pdf():
with open('tests/files/dummy.pdf', 'rb') as f:
pdf_bytes = f.read()