aboutsummaryrefslogtreecommitdiffstats
path: root/python/tests
diff options
context:
space:
mode:
Diffstat (limited to 'python/tests')
-rw-r--r--python/tests/test_grobid.py8
-rw-r--r--python/tests/test_grobid2json.py4
-rw-r--r--python/tests/test_html.py2
-rw-r--r--python/tests/test_html_ingest.py1
-rw-r--r--python/tests/test_html_metadata.py1
-rw-r--r--python/tests/test_ingest.py7
-rw-r--r--python/tests/test_live_wayback.py4
-rw-r--r--python/tests/test_misc.py3
-rw-r--r--python/tests/test_pdfextract.py10
-rw-r--r--python/tests/test_pushers.py2
-rw-r--r--python/tests/test_savepagenow.py4
-rw-r--r--python/tests/test_wayback.py4
12 files changed, 30 insertions, 20 deletions
diff --git a/python/tests/test_grobid.py b/python/tests/test_grobid.py
index 36d90ef..7d950df 100644
--- a/python/tests/test_grobid.py
+++ b/python/tests/test_grobid.py
@@ -1,11 +1,11 @@
-import pytest
import struct
-import responses
-from sandcrawler import GrobidClient, GrobidWorker, CdxLinePusher, BlackholeSink, WaybackClient
-from test_wayback import wayback_client, cdx_client
+import pytest
+import responses
+from test_wayback import cdx_client, wayback_client
+from sandcrawler import BlackholeSink, CdxLinePusher, GrobidClient, GrobidWorker, WaybackClient
FAKE_PDF_BYTES = b"%PDF SOME JUNK" + struct.pack("!q", 112853843)
diff --git a/python/tests/test_grobid2json.py b/python/tests/test_grobid2json.py
index 8497b10..b8999b1 100644
--- a/python/tests/test_grobid2json.py
+++ b/python/tests/test_grobid2json.py
@@ -1,7 +1,9 @@
-import xml
import json
+import xml
+
import pytest
+
from grobid2json import *
diff --git a/python/tests/test_html.py b/python/tests/test_html.py
index 9a81852..d4bffc1 100644
--- a/python/tests/test_html.py
+++ b/python/tests/test_html.py
@@ -1,10 +1,12 @@
import json
+
import pytest
import responses
from sandcrawler.html import extract_fulltext_url
+
def test_extract_fulltext_url():
resp = extract_fulltext_url("asdf", b"asdf")
diff --git a/python/tests/test_html_ingest.py b/python/tests/test_html_ingest.py
index efd1ddf..943e5da 100644
--- a/python/tests/test_html_ingest.py
+++ b/python/tests/test_html_ingest.py
@@ -1,5 +1,6 @@
import datetime
+
import pytest
from sandcrawler.ingest_html import *
diff --git a/python/tests/test_html_metadata.py b/python/tests/test_html_metadata.py
index bf26a98..7f35d55 100644
--- a/python/tests/test_html_metadata.py
+++ b/python/tests/test_html_metadata.py
@@ -1,5 +1,6 @@
import datetime
+
import pytest
from sandcrawler.html_metadata import *
diff --git a/python/tests/test_ingest.py b/python/tests/test_ingest.py
index b51f721..0965fcb 100644
--- a/python/tests/test_ingest.py
+++ b/python/tests/test_ingest.py
@@ -1,12 +1,13 @@
import json
+
import pytest
import responses
+from test_grobid import REAL_TEI_XML
+from test_savepagenow import *
+from test_wayback import *
from sandcrawler import *
-from test_wayback import *
-from test_savepagenow import *
-from test_grobid import REAL_TEI_XML
@pytest.fixture
diff --git a/python/tests/test_live_wayback.py b/python/tests/test_live_wayback.py
index 429c6b0..b501dc3 100644
--- a/python/tests/test_live_wayback.py
+++ b/python/tests/test_live_wayback.py
@@ -8,9 +8,11 @@ Simply uncomment lines to run.
"""
import json
+
import pytest
-from sandcrawler import CdxApiClient, CdxApiError, WaybackClient, WaybackError, PetaboxError, SavePageNowClient, SavePageNowError, CdxPartial, gen_file_metadata
+from sandcrawler import (CdxApiClient, CdxApiError, CdxPartial, PetaboxError, SavePageNowClient, SavePageNowError,
+ WaybackClient, WaybackError, gen_file_metadata)
@pytest.fixture
diff --git a/python/tests/test_misc.py b/python/tests/test_misc.py
index bd18e5c..0788c38 100644
--- a/python/tests/test_misc.py
+++ b/python/tests/test_misc.py
@@ -1,7 +1,8 @@
import pytest
-from sandcrawler import gen_file_metadata, gen_file_metadata_path, b32_hex, parse_cdx_line, clean_url
+from sandcrawler import b32_hex, clean_url, gen_file_metadata, gen_file_metadata_path, parse_cdx_line
+
def test_gen_file_metadata():
diff --git a/python/tests/test_pdfextract.py b/python/tests/test_pdfextract.py
index 255e3fb..1d334d6 100644
--- a/python/tests/test_pdfextract.py
+++ b/python/tests/test_pdfextract.py
@@ -1,13 +1,13 @@
-import pytest
import struct
-import responses
+
import poppler
+import pytest
+import responses
+from test_wayback import cdx_client, wayback_client
-from sandcrawler import PdfExtractWorker, PdfExtractBlobWorker, CdxLinePusher, BlackholeSink, WaybackClient
+from sandcrawler import BlackholeSink, CdxLinePusher, PdfExtractBlobWorker, PdfExtractWorker, WaybackClient
from sandcrawler.pdfextract import process_pdf
-from test_wayback import wayback_client, cdx_client
-
FAKE_PDF_BYTES = b"%PDF SOME JUNK" + struct.pack("!q", 112853843)
diff --git a/python/tests/test_pushers.py b/python/tests/test_pushers.py
index 52f26c0..62fa515 100644
--- a/python/tests/test_pushers.py
+++ b/python/tests/test_pushers.py
@@ -1,7 +1,7 @@
import pytest
-from sandcrawler.workers import CdxLinePusher, BlackholeSink
+from sandcrawler.workers import BlackholeSink, CdxLinePusher
def test_cdx_line_pusher():
diff --git a/python/tests/test_savepagenow.py b/python/tests/test_savepagenow.py
index 63dd887..f3fbfda 100644
--- a/python/tests/test_savepagenow.py
+++ b/python/tests/test_savepagenow.py
@@ -1,11 +1,11 @@
import json
+
import pytest
import responses
-
-from sandcrawler import SavePageNowClient, SavePageNowError, CdxPartial
from test_wayback import *
+from sandcrawler import CdxPartial, SavePageNowClient, SavePageNowError
TARGET = "http://dummy-target.dummy"
JOB_ID = "e70f33c7-9eca-4c88-826d-26930564d7c8"
diff --git a/python/tests/test_wayback.py b/python/tests/test_wayback.py
index 6bc1ca4..83311b9 100644
--- a/python/tests/test_wayback.py
+++ b/python/tests/test_wayback.py
@@ -1,10 +1,10 @@
import json
+
import pytest
import responses
-from sandcrawler import CdxApiClient, CdxApiError, WaybackClient, WaybackError, PetaboxError
-
+from sandcrawler import CdxApiClient, CdxApiError, PetaboxError, WaybackClient, WaybackError
CDX_TARGET = "http://fatcat.wiki/"
CDX_DT = "20180812220054"