aboutsummaryrefslogtreecommitdiffstats
path: root/python/tests/test_pdfextract.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/tests/test_pdfextract.py')
-rw-r--r--python/tests/test_pdfextract.py12
1 files changed, 8 insertions, 4 deletions
diff --git a/python/tests/test_pdfextract.py b/python/tests/test_pdfextract.py
index 1d334d6..146b138 100644
--- a/python/tests/test_pdfextract.py
+++ b/python/tests/test_pdfextract.py
@@ -1,4 +1,3 @@
-
import struct
import poppler
@@ -6,11 +5,13 @@ import pytest
import responses
from test_wayback import cdx_client, wayback_client
-from sandcrawler import BlackholeSink, CdxLinePusher, PdfExtractBlobWorker, PdfExtractWorker, WaybackClient
+from sandcrawler import (BlackholeSink, CdxLinePusher, PdfExtractBlobWorker, PdfExtractWorker,
+ WaybackClient)
from sandcrawler.pdfextract import process_pdf
FAKE_PDF_BYTES = b"%PDF SOME JUNK" + struct.pack("!q", 112853843)
+
def test_process_fake_pdf():
resp = process_pdf(FAKE_PDF_BYTES)
print(resp)
@@ -21,7 +22,9 @@ def test_process_fake_pdf():
resp = process_pdf(pdf_bytes)
assert resp.status == 'not-pdf'
-@pytest.mark.skipif(poppler.version_string() == '0.71.0', reason="unsupported version of poppler")
+
+@pytest.mark.skipif(poppler.version_string() == '0.71.0',
+ reason="unsupported version of poppler")
def test_process_dummy_pdf():
with open('tests/files/dummy.pdf', 'rb') as f:
pdf_bytes = f.read()
@@ -39,6 +42,7 @@ def test_process_dummy_pdf():
assert resp.pdf_extra['page0_width'] == 595
assert resp.pdf_extra['page_count'] == 1
+
def test_pdfextract_worker_cdx(wayback_client):
sink = BlackholeSink()
@@ -56,6 +60,7 @@ def test_pdfextract_worker_cdx(wayback_client):
assert pusher_counts['pushed'] == 7
assert pusher_counts['pushed'] == worker.counts['total']
+
def test_pdfextract_blob_worker():
sink = BlackholeSink()
@@ -65,4 +70,3 @@ def test_pdfextract_blob_worker():
pdf_bytes = f.read()
worker.process(pdf_bytes)
-