diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-10-27 18:50:17 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-10-27 18:50:17 -0700 |
commit | 826c7538e091fac14d987a3cd654975da964e240 (patch) | |
tree | 90345b4cabb461c624ca5a218c2fc01dce3055cd /python/tests/test_pdfextract.py | |
parent | 020037d4714e7ba2ab172c7278494aed0b2148ad (diff) | |
download | sandcrawler-826c7538e091fac14d987a3cd654975da964e240.tar.gz sandcrawler-826c7538e091fac14d987a3cd654975da964e240.zip |
make fmt (black 21.9b0)
Diffstat (limited to 'python/tests/test_pdfextract.py')
-rw-r--r-- | python/tests/test_pdfextract.py | 35 |
1 files changed, 18 insertions, 17 deletions
diff --git a/python/tests/test_pdfextract.py b/python/tests/test_pdfextract.py index 086243a..9d75655 100644 --- a/python/tests/test_pdfextract.py +++ b/python/tests/test_pdfextract.py @@ -15,30 +15,31 @@ def test_process_fake_pdf(): print(resp) assert resp.status == "not-pdf" - with open('tests/files/dummy_zip.zip', 'rb') as f: + with open("tests/files/dummy_zip.zip", "rb") as f: pdf_bytes = f.read() resp = process_pdf(pdf_bytes) - assert resp.status == 'not-pdf' + assert resp.status == "not-pdf" -@pytest.mark.skipif(poppler.version_string() == '0.71.0', - reason="unsupported version of poppler") +@pytest.mark.skipif( + poppler.version_string() == "0.71.0", reason="unsupported version of poppler" +) def test_process_dummy_pdf(): - with open('tests/files/dummy.pdf', 'rb') as f: + with open("tests/files/dummy.pdf", "rb") as f: pdf_bytes = f.read() resp = process_pdf(pdf_bytes) - assert resp.status == 'success' + assert resp.status == "success" assert resp.page0_thumbnail is not None assert len(resp.text) > 10 assert resp.meta_xml is None - assert resp.file_meta['mimetype'] == 'application/pdf' + assert resp.file_meta["mimetype"] == "application/pdf" print(resp.pdf_info) print(resp.pdf_extra) - assert resp.pdf_info['Author'] == "Evangelos Vlachogiannis" + assert resp.pdf_info["Author"] == "Evangelos Vlachogiannis" # 595 x 842 - assert resp.pdf_extra['page0_height'] == 842 - assert resp.pdf_extra['page0_width'] == 595 - assert resp.pdf_extra['page_count'] == 1 + assert resp.pdf_extra["page0_height"] == 842 + assert resp.pdf_extra["page0_width"] == 595 + assert resp.pdf_extra["page_count"] == 1 def test_pdfextract_worker_cdx(wayback_client): # noqa: F811 @@ -46,17 +47,17 @@ def test_pdfextract_worker_cdx(wayback_client): # noqa: F811 sink = BlackholeSink() worker = PdfExtractWorker(wayback_client, sink=sink, thumbnail_sink=sink) - with open('tests/files/example.cdx', 'r') as cdx_file: + with open("tests/files/example.cdx", "r") as cdx_file: pusher = CdxLinePusher( worker, cdx_file, filter_http_statuses=[200, 226], - filter_mimetypes=['application/pdf'], + filter_mimetypes=["application/pdf"], ) pusher_counts = pusher.run() - assert pusher_counts['total'] - assert pusher_counts['pushed'] == 7 - assert pusher_counts['pushed'] == worker.counts['total'] + assert pusher_counts["total"] + assert pusher_counts["pushed"] == 7 + assert pusher_counts["pushed"] == worker.counts["total"] def test_pdfextract_blob_worker(): @@ -64,7 +65,7 @@ def test_pdfextract_blob_worker(): sink = BlackholeSink() worker = PdfExtractBlobWorker(sink=sink, thumbnail_sink=sink) - with open('tests/files/dummy.pdf', 'rb') as f: + with open("tests/files/dummy.pdf", "rb") as f: pdf_bytes = f.read() worker.process(pdf_bytes) |