diff options
Diffstat (limited to 'python/tests')
-rw-r--r-- | python/tests/test_grobid.py | 8 | ||||
-rw-r--r-- | python/tests/test_grobid2json.py | 4 | ||||
-rw-r--r-- | python/tests/test_html.py | 2 | ||||
-rw-r--r-- | python/tests/test_html_ingest.py | 1 | ||||
-rw-r--r-- | python/tests/test_html_metadata.py | 1 | ||||
-rw-r--r-- | python/tests/test_ingest.py | 7 | ||||
-rw-r--r-- | python/tests/test_live_wayback.py | 4 | ||||
-rw-r--r-- | python/tests/test_misc.py | 3 | ||||
-rw-r--r-- | python/tests/test_pdfextract.py | 10 | ||||
-rw-r--r-- | python/tests/test_pushers.py | 2 | ||||
-rw-r--r-- | python/tests/test_savepagenow.py | 4 | ||||
-rw-r--r-- | python/tests/test_wayback.py | 4 |
12 files changed, 30 insertions, 20 deletions
diff --git a/python/tests/test_grobid.py b/python/tests/test_grobid.py index 36d90ef..7d950df 100644 --- a/python/tests/test_grobid.py +++ b/python/tests/test_grobid.py @@ -1,11 +1,11 @@ -import pytest import struct -import responses -from sandcrawler import GrobidClient, GrobidWorker, CdxLinePusher, BlackholeSink, WaybackClient -from test_wayback import wayback_client, cdx_client +import pytest +import responses +from test_wayback import cdx_client, wayback_client +from sandcrawler import BlackholeSink, CdxLinePusher, GrobidClient, GrobidWorker, WaybackClient FAKE_PDF_BYTES = b"%PDF SOME JUNK" + struct.pack("!q", 112853843) diff --git a/python/tests/test_grobid2json.py b/python/tests/test_grobid2json.py index 8497b10..b8999b1 100644 --- a/python/tests/test_grobid2json.py +++ b/python/tests/test_grobid2json.py @@ -1,7 +1,9 @@ -import xml import json +import xml + import pytest + from grobid2json import * diff --git a/python/tests/test_html.py b/python/tests/test_html.py index 9a81852..d4bffc1 100644 --- a/python/tests/test_html.py +++ b/python/tests/test_html.py @@ -1,10 +1,12 @@ import json + import pytest import responses from sandcrawler.html import extract_fulltext_url + def test_extract_fulltext_url(): resp = extract_fulltext_url("asdf", b"asdf") diff --git a/python/tests/test_html_ingest.py b/python/tests/test_html_ingest.py index efd1ddf..943e5da 100644 --- a/python/tests/test_html_ingest.py +++ b/python/tests/test_html_ingest.py @@ -1,5 +1,6 @@ import datetime + import pytest from sandcrawler.ingest_html import * diff --git a/python/tests/test_html_metadata.py b/python/tests/test_html_metadata.py index bf26a98..7f35d55 100644 --- a/python/tests/test_html_metadata.py +++ b/python/tests/test_html_metadata.py @@ -1,5 +1,6 @@ import datetime + import pytest from sandcrawler.html_metadata import * diff --git a/python/tests/test_ingest.py b/python/tests/test_ingest.py index b51f721..0965fcb 100644 --- a/python/tests/test_ingest.py +++ b/python/tests/test_ingest.py @@ -1,12 +1,13 @@ import json + import pytest import responses +from test_grobid import REAL_TEI_XML +from test_savepagenow import * +from test_wayback import * from sandcrawler import * -from test_wayback import * -from test_savepagenow import * -from test_grobid import REAL_TEI_XML @pytest.fixture diff --git a/python/tests/test_live_wayback.py b/python/tests/test_live_wayback.py index 429c6b0..b501dc3 100644 --- a/python/tests/test_live_wayback.py +++ b/python/tests/test_live_wayback.py @@ -8,9 +8,11 @@ Simply uncomment lines to run. """ import json + import pytest -from sandcrawler import CdxApiClient, CdxApiError, WaybackClient, WaybackError, PetaboxError, SavePageNowClient, SavePageNowError, CdxPartial, gen_file_metadata +from sandcrawler import (CdxApiClient, CdxApiError, CdxPartial, PetaboxError, SavePageNowClient, SavePageNowError, + WaybackClient, WaybackError, gen_file_metadata) @pytest.fixture diff --git a/python/tests/test_misc.py b/python/tests/test_misc.py index bd18e5c..0788c38 100644 --- a/python/tests/test_misc.py +++ b/python/tests/test_misc.py @@ -1,7 +1,8 @@ import pytest -from sandcrawler import gen_file_metadata, gen_file_metadata_path, b32_hex, parse_cdx_line, clean_url +from sandcrawler import b32_hex, clean_url, gen_file_metadata, gen_file_metadata_path, parse_cdx_line + def test_gen_file_metadata(): diff --git a/python/tests/test_pdfextract.py b/python/tests/test_pdfextract.py index 255e3fb..1d334d6 100644 --- a/python/tests/test_pdfextract.py +++ b/python/tests/test_pdfextract.py @@ -1,13 +1,13 @@ -import pytest import struct -import responses + import poppler +import pytest +import responses +from test_wayback import cdx_client, wayback_client -from sandcrawler import PdfExtractWorker, PdfExtractBlobWorker, CdxLinePusher, BlackholeSink, WaybackClient +from sandcrawler import BlackholeSink, CdxLinePusher, PdfExtractBlobWorker, PdfExtractWorker, WaybackClient from sandcrawler.pdfextract import process_pdf -from test_wayback import wayback_client, cdx_client - FAKE_PDF_BYTES = b"%PDF SOME JUNK" + struct.pack("!q", 112853843) diff --git a/python/tests/test_pushers.py b/python/tests/test_pushers.py index 52f26c0..62fa515 100644 --- a/python/tests/test_pushers.py +++ b/python/tests/test_pushers.py @@ -1,7 +1,7 @@ import pytest -from sandcrawler.workers import CdxLinePusher, BlackholeSink +from sandcrawler.workers import BlackholeSink, CdxLinePusher def test_cdx_line_pusher(): diff --git a/python/tests/test_savepagenow.py b/python/tests/test_savepagenow.py index 63dd887..f3fbfda 100644 --- a/python/tests/test_savepagenow.py +++ b/python/tests/test_savepagenow.py @@ -1,11 +1,11 @@ import json + import pytest import responses - -from sandcrawler import SavePageNowClient, SavePageNowError, CdxPartial from test_wayback import * +from sandcrawler import CdxPartial, SavePageNowClient, SavePageNowError TARGET = "http://dummy-target.dummy" JOB_ID = "e70f33c7-9eca-4c88-826d-26930564d7c8" diff --git a/python/tests/test_wayback.py b/python/tests/test_wayback.py index 6bc1ca4..83311b9 100644 --- a/python/tests/test_wayback.py +++ b/python/tests/test_wayback.py @@ -1,10 +1,10 @@ import json + import pytest import responses -from sandcrawler import CdxApiClient, CdxApiError, WaybackClient, WaybackError, PetaboxError - +from sandcrawler import CdxApiClient, CdxApiError, PetaboxError, WaybackClient, WaybackError CDX_TARGET = "http://fatcat.wiki/" CDX_DT = "20180812220054" |