13 files changed, 294 insertions, 194 deletions
diff --git a/python/tests/test_grobid.py b/python/tests/test_grobid.py
index 7d950df..55636dc 100644
--- a/python/tests/test_grobid.py
+++ b/python/tests/test_grobid.py
@@ -1,4 +1,3 @@
-
 import struct
 
 import pytest
@@ -12,20 +11,21 @@ FAKE_PDF_BYTES = b"%PDF SOME JUNK" + struct.pack("!q", 112853843)
 with open('tests/files/23b29ea36382680716be08fc71aa81bd226e8a85.xml', 'rb') as f:
     REAL_TEI_XML = f.read()
 
+
 @pytest.fixture
 def grobid_client():
-    client = GrobidClient(
-        host_url="http://dummy-grobid",
-    )
+    client = GrobidClient(host_url="http://dummy-grobid", )
     return client
 
+
 @responses.activate
 def test_grobid_503(grobid_client):
 
     status = b'{"status": "done broke due to 503"}'
     responses.add(responses.POST,
-        'http://dummy-grobid/api/processFulltextDocument', status=503,
-        body=status)
+                  'http://dummy-grobid/api/processFulltextDocument',
+                  status=503,
+                  body=status)
 
     resp = grobid_client.process_fulltext(FAKE_PDF_BYTES)
 
@@ -35,12 +35,15 @@ def test_grobid_503(grobid_client):
     assert resp['status_code'] == 503
     assert resp['status'] == "error"
 
+
 @responses.activate
 def test_grobid_success(grobid_client):
 
     responses.add(responses.POST,
-        'http://dummy-grobid/api/processFulltextDocument', status=200,
-        body=REAL_TEI_XML, content_type='text/xml')
+                  'http://dummy-grobid/api/processFulltextDocument',
+                  status=200,
+                  body=REAL_TEI_XML,
+                  content_type='text/xml')
 
     resp = grobid_client.process_fulltext(FAKE_PDF_BYTES)
 
@@ -53,6 +56,7 @@ def test_grobid_success(grobid_client):
     #print(type(REAL_TEI_XML))
     assert resp['tei_xml'] == REAL_TEI_XML.decode('ISO-8859-1')
 
+
 @responses.activate
 def test_grobid_worker_cdx(grobid_client, wayback_client):
 
@@ -60,8 +64,10 @@ def test_grobid_worker_cdx(grobid_client, wayback_client):
     worker = GrobidWorker(grobid_client, wayback_client, sink=sink)
 
     responses.add(responses.POST,
-        'http://dummy-grobid/api/processFulltextDocument', status=200,
-        body=REAL_TEI_XML, content_type='text/xml')
+                  'http://dummy-grobid/api/processFulltextDocument',
+                  status=200,
+                  body=REAL_TEI_XML,
+                  content_type='text/xml')
 
     with open('tests/files/example.cdx', 'r') as cdx_file:
         pusher = CdxLinePusher(
@@ -76,4 +82,3 @@ def test_grobid_worker_cdx(grobid_client, wayback_client):
         assert pusher_counts['pushed'] == worker.counts['total']
 
     assert len(responses.calls) == worker.counts['total']
-
diff --git a/python/tests/test_grobid2json.py b/python/tests/test_grobid2json.py
index b8999b1..7637871 100644
--- a/python/tests/test_grobid2json.py
+++ b/python/tests/test_grobid2json.py
@@ -1,4 +1,3 @@
-
 import json
 import xml
 
@@ -8,14 +7,15 @@ from grobid2json import *
 
 
 def test_small_xml():
-    
+
     with open('tests/files/small.xml', 'r') as f:
         tei_xml = f.read()
     with open('tests/files/small.json', 'r') as f:
-        json_form  = json.loads(f.read())
+        json_form = json.loads(f.read())
 
     assert teixml2json(tei_xml) == json_form
 
+
 def test_invalid_xml():
 
     with pytest.raises(xml.etree.ElementTree.ParseError):
diff --git a/python/tests/test_html.py b/python/tests/test_html.py
index d4bffc1..c5f422e 100644
--- a/python/tests/test_html.py
+++ b/python/tests/test_html.py
@@ -1,4 +1,3 @@
-
 import json
 
 import pytest
@@ -13,8 +12,7 @@ def test_extract_fulltext_url():
     assert resp == {}
 
     resp = extract_fulltext_url(
-        "http://dummy-site/",
-        b"""<html>
+        "http://dummy-site/", b"""<html>
         <head>
           <meta name="citation_pdf_url" content="http://www.example.com/content/271/20/11761.full.pdf">
         </head>
@@ -22,8 +20,7 @@ def test_extract_fulltext_url():
         <h1>my big article here</h1>
         blah
         </body>
-        </html>"""
-    )
+        </html>""")
     assert resp['pdf_url'] == "http://www.example.com/content/271/20/11761.full.pdf"
     assert resp['technique'] == "citation_pdf_url"
 
@@ -32,4 +29,5 @@ def test_extract_fulltext_url():
             "https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0213978",
             f.read(),
         )
-    assert resp['pdf_url'] == "https://journals.plos.org/plosone/article/file?id=10.1371/journal.pone.0213978&type=printable"
+    assert resp[
+        'pdf_url'] == "https://journals.plos.org/plosone/article/file?id=10.1371/journal.pone.0213978&type=printable"
diff --git a/python/tests/test_html_ingest.py b/python/tests/test_html_ingest.py
index 943e5da..3bf94e2 100644
--- a/python/tests/test_html_ingest.py
+++ b/python/tests/test_html_ingest.py
@@ -1,4 +1,3 @@
-
 import datetime
 
 import pytest
diff --git a/python/tests/test_html_metadata.py b/python/tests/test_html_metadata.py
index 7f35d55..a4c1e41 100644
--- a/python/tests/test_html_metadata.py
+++ b/python/tests/test_html_metadata.py
@@ -1,4 +1,3 @@
-
 import datetime
 
 import pytest
@@ -44,11 +43,12 @@ def test_html_metadata_plos() -> None:
 
 
 def test_html_metadata_elife() -> None:
-    
+
     with open('tests/files/elife_article.html', 'r') as f:
         elife_html = f.read()
 
-    meta = html_extract_biblio("https://elifesciences.org/articles/44753", HTMLParser(elife_html))
+    meta = html_extract_biblio("https://elifesciences.org/articles/44753",
+                               HTMLParser(elife_html))
     assert meta is not None
     assert meta.title == "Parallel visual circuitry in a basal chordate"
     assert meta.doi == "10.7554/eLife.44753"
@@ -69,7 +69,7 @@ def test_html_metadata_elife() -> None:
 
 
 def test_html_metadata_peerj() -> None:
- 
+
     with open('tests/files/peerj_oa_article.html', 'r') as f:
         peerj_html = f.read()
 
@@ -78,15 +78,15 @@ def test_html_metadata_peerj() -> None:
     assert meta.title == "The state of OA: a large-scale analysis of the prevalence and impact of Open Access articles"
     assert meta.doi == "10.7717/peerj.4375"
     assert meta.contrib_names == [
-            "Heather Piwowar",
-      "Jason Priem",
-      "Vincent Larivière",
-      "Juan Pablo Alperin",
-      "Lisa Matthias",
-      "Bree Norlander",
-      "Ashley Farley",
-      "Jevin West",
-      "Stefanie Haustein",
+        "Heather Piwowar",
+        "Jason Priem",
+        "Vincent Larivière",
+        "Juan Pablo Alperin",
+        "Lisa Matthias",
+        "Bree Norlander",
+        "Ashley Farley",
+        "Jevin West",
+        "Stefanie Haustein",
     ]
     assert meta.container_name == "PeerJ"
     # "2018-02-13"
@@ -129,7 +129,7 @@ def test_html_metadata_ojs3() -> None:
         "Os Keyes",
     ]
     assert meta.container_name == "First Monday"
-    assert meta.container_abbrev == "1" # NOTE: bad source metadata
+    assert meta.container_abbrev == "1"  # NOTE: bad source metadata
     assert meta.container_issn == "1396-0466"
     # "2020/09/10"
     assert meta.release_date == datetime.date(year=2020, month=9, day=10)
@@ -150,6 +150,7 @@ def test_html_metadata_dlib() -> None:
     # "2017-05-15"
     assert meta.release_date == datetime.date(year=2017, month=5, day=15)
 
+
 def test_html_metadata_dc_case() -> None:
     """
     This tests that CSS selector <meta name=""> attribute lookups are not case-sensitive.
@@ -167,10 +168,12 @@ def test_html_metadata_dc_case() -> None:
     assert meta is not None
     assert meta.issue == "123"
 
+
 @pytest.fixture
 def adblock() -> Any:
     return load_adblock_rules()
 
+
 def test_html_resources(adblock) -> None:
 
     with open('tests/files/dlib_05vanhyning.html', 'r') as f:
@@ -227,4 +230,3 @@ def test_html_resources(adblock) -> None:
         HTMLParser(nature_html),
         adblock,
     )
-
diff --git a/python/tests/test_ingest.py b/python/tests/test_ingest.py
index 0965fcb..79f50f4 100644
--- a/python/tests/test_ingest.py
+++ b/python/tests/test_ingest.py
@@ -1,4 +1,3 @@
-
 import json
 
 import pytest
@@ -12,9 +11,7 @@ from sandcrawler import *
 
 @pytest.fixture
 def ingest_worker(wayback_client, spn_client):
-    grobid_client = GrobidClient(
-        host_url="http://dummy-grobid",
-    )
+    grobid_client = GrobidClient(host_url="http://dummy-grobid", )
     worker = IngestFileWorker(
         wayback_client=wayback_client,
         spn_client=spn_client,
@@ -22,14 +19,11 @@ def ingest_worker(wayback_client, spn_client):
     )
     return worker
 
+
 @pytest.fixture
 def ingest_worker_pdf(wayback_client_pdf, spn_client):
-    grobid_client = GrobidClient(
-        host_url="http://dummy-grobid",
-    )
-    pgrest_client = SandcrawlerPostgrestClient(
-        api_url="http://dummy-postgrest",
-    )
+    grobid_client = GrobidClient(host_url="http://dummy-grobid", )
+    pgrest_client = SandcrawlerPostgrestClient(api_url="http://dummy-postgrest", )
     worker = IngestFileWorker(
         wayback_client=wayback_client_pdf,
         spn_client=spn_client,
@@ -50,37 +44,45 @@ def test_ingest_success(ingest_worker_pdf):
         'base_url': "http://dummy-host/",
     }
     responses.add(responses.POST,
-        'http://dummy-spnv2/save',
-        status=200,
-        body=json.dumps({"url": TARGET, "job_id": JOB_ID}))
+                  'http://dummy-spnv2/save',
+                  status=200,
+                  body=json.dumps({
+                      "url": TARGET,
+                      "job_id": JOB_ID
+                  }))
     responses.add(responses.GET,
-        'http://dummy-spnv2/save/status/' + JOB_ID,
-        status=200,
-        body=json.dumps(PENDING_BODY))
+                  'http://dummy-spnv2/save/status/' + JOB_ID,
+                  status=200,
+                  body=json.dumps(PENDING_BODY))
     responses.add(responses.GET,
-        'http://dummy-spnv2/save/status/' + JOB_ID,
-        status=200,
-        body=json.dumps(SUCCESS_BODY))
+                  'http://dummy-spnv2/save/status/' + JOB_ID,
+                  status=200,
+                  body=json.dumps(SUCCESS_BODY))
     responses.add(responses.GET,
-        'http://dummy-cdx/cdx',
-        status=200,
-        body=json.dumps(CDX_SPN_HIT))
+                  'http://dummy-cdx/cdx',
+                  status=200,
+                  body=json.dumps(CDX_SPN_HIT))
     responses.add(responses.GET,
-        'https://web.archive.org/web/{}id_/{}'.format("20180326070330", TARGET + "/redirect"),
-        status=200,
-        headers={"X-Archive-Src": "liveweb-whatever.warc.gz"},
-        body=pdf_bytes)
+                  'https://web.archive.org/web/{}id_/{}'.format("20180326070330",
+                                                                TARGET + "/redirect"),
+                  status=200,
+                  headers={"X-Archive-Src": "liveweb-whatever.warc.gz"},
+                  body=pdf_bytes)
     responses.add(responses.GET,
-        'http://dummy-postgrest/grobid?sha1hex=eq.{}'.format("90ffd2359008d82298821d16b21778c5c39aec36"),
-        status=200,
-        body=json.dumps([]))
+                  'http://dummy-postgrest/grobid?sha1hex=eq.{}'.format(
+                      "90ffd2359008d82298821d16b21778c5c39aec36"),
+                  status=200,
+                  body=json.dumps([]))
     responses.add(responses.GET,
-        'http://dummy-postgrest/pdf_meta?sha1hex=eq.{}'.format("90ffd2359008d82298821d16b21778c5c39aec36"),
-        status=200,
-        body=json.dumps([]))
+                  'http://dummy-postgrest/pdf_meta?sha1hex=eq.{}'.format(
+                      "90ffd2359008d82298821d16b21778c5c39aec36"),
+                  status=200,
+                  body=json.dumps([]))
     responses.add(responses.POST,
-        'http://dummy-grobid/api/processFulltextDocument', status=200,
-        body=REAL_TEI_XML, content_type='text/xml')
+                  'http://dummy-grobid/api/processFulltextDocument',
+                  status=200,
+                  body=REAL_TEI_XML,
+                  content_type='text/xml')
 
     resp = ingest_worker_pdf.process(request)
 
@@ -108,6 +110,7 @@ def test_ingest_success(ingest_worker_pdf):
     assert resp['pdf_meta']['pdf_extra']['page_count'] == 1
     assert resp['pdf_meta'].get('text') is None
 
+
 @responses.activate
 def test_ingest_landing(ingest_worker):
 
@@ -116,34 +119,39 @@ def test_ingest_landing(ingest_worker):
         'base_url': "http://dummy-host/",
     }
     responses.add(responses.POST,
-        'http://dummy-spnv2/save',
-        status=200,
-        body=json.dumps({"url": TARGET, "job_id": JOB_ID}))
+                  'http://dummy-spnv2/save',
+                  status=200,
+                  body=json.dumps({
+                      "url": TARGET,
+                      "job_id": JOB_ID
+                  }))
     responses.add(responses.GET,
-        'http://dummy-spnv2/save/status/' + JOB_ID,
-        status=200,
-        body=json.dumps(PENDING_BODY))
+                  'http://dummy-spnv2/save/status/' + JOB_ID,
+                  status=200,
+                  body=json.dumps(PENDING_BODY))
     responses.add(responses.GET,
-        'http://dummy-spnv2/save/status/' + JOB_ID,
-        status=200,
-        body=json.dumps(SUCCESS_BODY))
+                  'http://dummy-spnv2/save/status/' + JOB_ID,
+                  status=200,
+                  body=json.dumps(SUCCESS_BODY))
     responses.add(responses.GET,
-        'http://dummy-cdx/cdx',
-        status=200,
-        body=json.dumps(CDX_SPN_HIT))
+                  'http://dummy-cdx/cdx',
+                  status=200,
+                  body=json.dumps(CDX_SPN_HIT))
     responses.add(responses.GET,
-        'https://web.archive.org/web/{}id_/{}'.format("20180326070330", TARGET + "/redirect"),
-        status=200,
-        headers={"X-Archive-Src": "liveweb-whatever.warc.gz"},
-        body=WARC_BODY)
+                  'https://web.archive.org/web/{}id_/{}'.format("20180326070330",
+                                                                TARGET + "/redirect"),
+                  status=200,
+                  headers={"X-Archive-Src": "liveweb-whatever.warc.gz"},
+                  body=WARC_BODY)
 
     # this is for second time around; don't want to fetch same landing page
     # HTML again and result in a loop
     responses.add(responses.GET,
-        'https://web.archive.org/web/{}id_/{}'.format("20180326070330", TARGET + "/redirect"),
-        status=200,
-        headers={"X-Archive-Src": "liveweb-whatever.warc.gz"},
-        body="<html></html>")
+                  'https://web.archive.org/web/{}id_/{}'.format("20180326070330",
+                                                                TARGET + "/redirect"),
+                  status=200,
+                  headers={"X-Archive-Src": "liveweb-whatever.warc.gz"},
+                  body="<html></html>")
 
     resp = ingest_worker.process(request)
 
@@ -157,6 +165,7 @@ def test_ingest_landing(ingest_worker):
     assert 'revisit_cdx' not in resp
     assert 'grobid' not in resp
 
+
 @responses.activate
 def test_ingest_blocklist(ingest_worker):
 
@@ -192,6 +201,7 @@ def test_ingest_wall_blocklist(ingest_worker):
     assert resp['status'] == "skip-wall"
     assert resp['request'] == request
 
+
 @responses.activate
 def test_ingest_cookie_blocklist(ingest_worker):
 
@@ -205,4 +215,3 @@ def test_ingest_cookie_blocklist(ingest_worker):
     assert resp['hit'] == False
     assert resp['status'] == "blocked-cookie"
     assert resp['request'] == request
-
diff --git a/python/tests/test_live_wayback.py b/python/tests/test_live_wayback.py
index b501dc3..0ff4902 100644
--- a/python/tests/test_live_wayback.py
+++ b/python/tests/test_live_wayback.py
@@ -1,4 +1,3 @@
-
 """
 This file contains tests to run against "live" wayback services. They default
 to "skip" because you need authentication, and we shouldn't hit these services
@@ -11,8 +10,8 @@ import json
 
 import pytest
 
-from sandcrawler import (CdxApiClient, CdxApiError, CdxPartial, PetaboxError, SavePageNowClient, SavePageNowError,
-                         WaybackClient, WaybackError, gen_file_metadata)
+from sandcrawler import (CdxApiClient, CdxApiError, CdxPartial, PetaboxError, SavePageNowClient,
+                         SavePageNowError, WaybackClient, WaybackError, gen_file_metadata)
 
 
 @pytest.fixture
@@ -20,16 +19,19 @@ def cdx_client():
     client = CdxApiClient()
     return client
 
+
 @pytest.fixture
 def wayback_client():
     client = WaybackClient()
     return client
 
+
 @pytest.fixture
 def spn_client():
     client = SavePageNowClient()
     return client
 
+
 @pytest.mark.skip(reason="hits prod services, requires auth")
 def test_cdx_fetch(cdx_client):
 
@@ -50,6 +52,7 @@ def test_cdx_fetch(cdx_client):
     with pytest.raises(KeyError):
         resp = cdx_client.fetch(url, "12345678123456")
 
+
 @pytest.mark.skip(reason="hits prod services, requires auth")
 def test_cdx_lookup_best(cdx_client):
 
@@ -68,13 +71,18 @@ def test_cdx_lookup_best(cdx_client):
     assert resp.mimetype == "text/html"
     assert resp.status_code == 200
 
+
 @pytest.mark.skip(reason="hits prod services, requires auth")
 def test_wayback_fetch(wayback_client):
 
-    resp = wayback_client.fetch_petabox(25683, 2676464871, "archiveteam_archivebot_go_20171205210002/arstechnica.co.uk-inf-20171201-061309-bb65j-00021.warc.gz")
+    resp = wayback_client.fetch_petabox(
+        25683, 2676464871,
+        "archiveteam_archivebot_go_20171205210002/arstechnica.co.uk-inf-20171201-061309-bb65j-00021.warc.gz"
+    )
 
     assert resp.body
 
+
 @pytest.mark.skip(reason="hits prod services, requires auth")
 def test_lookup_resource_success(wayback_client):
 
@@ -86,6 +94,7 @@ def test_lookup_resource_success(wayback_client):
     assert resp.terminal_url in (url, url.replace("https://", "http://"))
     assert resp.cdx.url in (url, url.replace("https://", "http://"))
 
+
 @pytest.mark.skip(reason="hits prod services, requires auth")
 def test_cdx_fetch_spn2(cdx_client):
 
@@ -107,8 +116,8 @@ def test_cdx_fetch_spn2(cdx_client):
     # https://onlinelibrary.wiley.com/doi/pdf/10.1002/lrh2.10209 20200110222410
 
     #com,wiley,onlinelibrary)/doi/pdf/10.1002/lrh2.10209 20200110222410 https://onlinelibrary.wiley.com/doi/pdf/10.1002/lrh2.10209 text/html 200 VYW7JXFK6EC2KC537N5B7PHYZC4B6MZL - - 9006 815069841 liveweb-20200110214015-wwwb-spn18.us.archive.org-8002.warc.gz
-#com,wiley,onlinelibrary)/doi/pdf/10.1002/lrh2.10209 20200110222410 https://onlinelibrary.wiley.com/doi/pdf/10.1002/lrh2.10209 text/html 302 AFI55BZE23HDTTEERUFKRP6WQVO3LOLS - - 1096 815066572 liveweb-20200110214015-wwwb-spn18.us.archive.org-8002.warc.gz
-#com,wiley,onlinelibrary)/doi/pdf/10.1002/lrh2.10209 20200110222422 https://onlinelibrary.wiley.com/doi/pdf/10.1002/lrh2.10209 text/html 302 AFI55BZE23HDTTEERUFKRP6WQVO3LOLS - - 1094 307563475 liveweb-20200110214449-wwwb-spn18.us.archive.org-8003.warc.gz
+    #com,wiley,onlinelibrary)/doi/pdf/10.1002/lrh2.10209 20200110222410 https://onlinelibrary.wiley.com/doi/pdf/10.1002/lrh2.10209 text/html 302 AFI55BZE23HDTTEERUFKRP6WQVO3LOLS - - 1096 815066572 liveweb-20200110214015-wwwb-spn18.us.archive.org-8002.warc.gz
+    #com,wiley,onlinelibrary)/doi/pdf/10.1002/lrh2.10209 20200110222422 https://onlinelibrary.wiley.com/doi/pdf/10.1002/lrh2.10209 text/html 302 AFI55BZE23HDTTEERUFKRP6WQVO3LOLS - - 1094 307563475 liveweb-20200110214449-wwwb-spn18.us.archive.org-8003.warc.gz
 
     url = "https://onlinelibrary.wiley.com/doi/pdf/10.1002/lrh2.10209"
     datetime = "20200110222410"
@@ -119,6 +128,7 @@ def test_cdx_fetch_spn2(cdx_client):
     assert resp.sha1b32 == "VYW7JXFK6EC2KC537N5B7PHYZC4B6MZL"
     assert resp.status_code == 200
 
+
 @pytest.mark.skip(reason="hits prod services, requires auth")
 def test_lookup_ftp(wayback_client):
     # ftp://ftp.ncbi.nlm.nih.gov/pub/pmc/oa_pdf/80/23/10.1177_1559827617708562.PMC6236633.pdf
@@ -153,6 +163,7 @@ def test_lookup_ftp(wayback_client):
     file_meta = gen_file_metadata(resp.body)
     assert file_meta['sha1hex'] == resp.cdx.sha1hex
 
+
 @pytest.mark.skip(reason="hits prod services, requires auth")
 def test_crawl_ftp(spn_client, wayback_client):
 
diff --git a/python/tests/test_misc.py b/python/tests/test_misc.py
index 0788c38..dcc1202 100644
--- a/python/tests/test_misc.py
+++ b/python/tests/test_misc.py
@@ -1,11 +1,11 @@
-
 import pytest
 
-from sandcrawler import b32_hex, clean_url, gen_file_metadata, gen_file_metadata_path, parse_cdx_line
+from sandcrawler import (b32_hex, clean_url, gen_file_metadata, gen_file_metadata_path,
+                         parse_cdx_line)
 
 
 def test_gen_file_metadata():
-    
+
     # valid (but very small) PDF file
     with open('tests/files/dummy.pdf', 'rb') as f:
         file_meta = gen_file_metadata(f.read())
@@ -27,8 +27,9 @@ def test_gen_file_metadata():
     assert fm['mimetype'] == 'text/plain'
     assert fm['size_bytes'] == 8
 
+
 def test_gen_file_metadata_path():
-    
+
     # valid (but very small) PDF file
     file_meta = gen_file_metadata_path('tests/files/dummy.pdf')
     assert file_meta == {
@@ -39,11 +40,14 @@ def test_gen_file_metadata_path():
         'size_bytes': 13264,
     }
 
+
 def test_b32_hex():
 
     # valid b32
-    assert b32_hex('sha1:TZCYZ2ULEHYGESS4L3RNH75I23KKFSMC') == '9e458cea8b21f0624a5c5ee2d3ffa8d6d4a2c982'
-    assert b32_hex('TZCYZ2ULEHYGESS4L3RNH75I23KKFSMC') == '9e458cea8b21f0624a5c5ee2d3ffa8d6d4a2c982'
+    assert b32_hex(
+        'sha1:TZCYZ2ULEHYGESS4L3RNH75I23KKFSMC') == '9e458cea8b21f0624a5c5ee2d3ffa8d6d4a2c982'
+    assert b32_hex(
+        'TZCYZ2ULEHYGESS4L3RNH75I23KKFSMC') == '9e458cea8b21f0624a5c5ee2d3ffa8d6d4a2c982'
 
     # sha1hex pass-through
     s = 'bda3c1017d52e826bbd1da51efad877272d300f9'
@@ -53,6 +57,7 @@ def test_b32_hex():
     with pytest.raises(ValueError):
         assert b32_hex('blah') == 'blah'
 
+
 def test_parse_cdx_line():
 
     raw = "edu,upenn,ldc)/sites/www.ldc.upenn.edu/files/medar2009-large-arabic-broadcast-collection.pdf 20170828233154 https://www.ldc.upenn.edu/sites/www.ldc.upenn.edu/files/medar2009-large-arabic-broadcast-collection.pdf application/pdf 200 WL3FEA62TEU4F52Y5DOVQ62VET4QJW7G - - 210251 931661233 SEMSCHOLAR-PDF-CRAWL-2017-08-04-20170828231135742-00000-00009-wbgrp-svc284/SEMSCHOLAR-PDF-CRAWL-2017-08-04-20170828232253025-00005-3480~wbgrp-svc284.us.archive.org~8443.warc.gz"
@@ -73,6 +78,7 @@ def test_parse_cdx_line():
     assert parse_cdx_line(raw + "\n") == correct
     assert parse_cdx_line(raw + " extra_field") == correct
 
+
 def test_invalid_cdx():
 
     print("missing warc")
@@ -80,11 +86,11 @@ def test_invalid_cdx():
     assert parse_cdx_line(raw) == None
 
     print("bad datetime")
-    raw = "edu,upenn,ldc)/sites/www.ldc.upenn.edu/files/medar2009-large-arabic-broadcast-collection.pdf 2070828233154 https://www.ldc.upenn.edu/sites/www.ldc.upenn.edu/files/medar2009-large-arabic-broadcast-collection.pdf application/pdf 200 WL3FEA62TEU4F52Y5DOVQ62VET4QJW7G - - 210251 931661233i SEMSCHOLAR-PDF-CRAWL-2017-08-04-20170828231135742-00000-00009-wbgrp-svc284/SEMSCHOLAR-PDF-CRAWL-2017-08-04-20170828232253025-00005-3480~wbgrp-svc284.us.archive.org~8443.warc.gz" 
+    raw = "edu,upenn,ldc)/sites/www.ldc.upenn.edu/files/medar2009-large-arabic-broadcast-collection.pdf 2070828233154 https://www.ldc.upenn.edu/sites/www.ldc.upenn.edu/files/medar2009-large-arabic-broadcast-collection.pdf application/pdf 200 WL3FEA62TEU4F52Y5DOVQ62VET4QJW7G - - 210251 931661233i SEMSCHOLAR-PDF-CRAWL-2017-08-04-20170828231135742-00000-00009-wbgrp-svc284/SEMSCHOLAR-PDF-CRAWL-2017-08-04-20170828232253025-00005-3480~wbgrp-svc284.us.archive.org~8443.warc.gz"
     assert parse_cdx_line(raw) == None
 
+
 def test_clean_url():
     assert clean_url("http://BLAH.COM/file.pdf") == "http://blah.com/file.pdf"
     assert clean_url("https://opensky.ucar.edu:/islandora/object/articles%3A10809/datastream/PDF/view") == \
         "https://opensky.ucar.edu/islandora/object/articles%3A10809/datastream/PDF/view"
-
diff --git a/python/tests/test_pdfextract.py b/python/tests/test_pdfextract.py
index 1d334d6..146b138 100644
--- a/python/tests/test_pdfextract.py
+++ b/python/tests/test_pdfextract.py
@@ -1,4 +1,3 @@
-
 import struct
 
 import poppler
@@ -6,11 +5,13 @@ import pytest
 import responses
 from test_wayback import cdx_client, wayback_client
 
-from sandcrawler import BlackholeSink, CdxLinePusher, PdfExtractBlobWorker, PdfExtractWorker, WaybackClient
+from sandcrawler import (BlackholeSink, CdxLinePusher, PdfExtractBlobWorker, PdfExtractWorker,
+                         WaybackClient)
 from sandcrawler.pdfextract import process_pdf
 
 FAKE_PDF_BYTES = b"%PDF SOME JUNK" + struct.pack("!q", 112853843)
 
+
 def test_process_fake_pdf():
     resp = process_pdf(FAKE_PDF_BYTES)
     print(resp)
@@ -21,7 +22,9 @@ def test_process_fake_pdf():
     resp = process_pdf(pdf_bytes)
     assert resp.status == 'not-pdf'
 
-@pytest.mark.skipif(poppler.version_string() == '0.71.0', reason="unsupported version of poppler")
+
+@pytest.mark.skipif(poppler.version_string() == '0.71.0',
+                    reason="unsupported version of poppler")
 def test_process_dummy_pdf():
     with open('tests/files/dummy.pdf', 'rb') as f:
         pdf_bytes = f.read()
@@ -39,6 +42,7 @@ def test_process_dummy_pdf():
     assert resp.pdf_extra['page0_width'] == 595
     assert resp.pdf_extra['page_count'] == 1
 
+
 def test_pdfextract_worker_cdx(wayback_client):
 
     sink = BlackholeSink()
@@ -56,6 +60,7 @@ def test_pdfextract_worker_cdx(wayback_client):
         assert pusher_counts['pushed'] == 7
         assert pusher_counts['pushed'] == worker.counts['total']
 
+
 def test_pdfextract_blob_worker():
 
     sink = BlackholeSink()
@@ -65,4 +70,3 @@ def test_pdfextract_blob_worker():
         pdf_bytes = f.read()
 
     worker.process(pdf_bytes)
-
diff --git a/python/tests/test_pushers.py b/python/tests/test_pushers.py
index 62fa515..63f90d3 100644
--- a/python/tests/test_pushers.py
+++ b/python/tests/test_pushers.py
@@ -1,4 +1,3 @@
-
 import pytest
 
 from sandcrawler.workers import BlackholeSink, CdxLinePusher
@@ -18,8 +17,10 @@ def test_cdx_line_pusher():
 
     # HTTP 200 and application/pdf
     with open('tests/files/example.cdx', 'r') as cdx_file:
-        pusher = CdxLinePusher(sink, cdx_file,
-            filter_mimetypes=['application/pdf'], filter_http_statuses=[200, 226])
+        pusher = CdxLinePusher(sink,
+                               cdx_file,
+                               filter_mimetypes=['application/pdf'],
+                               filter_http_statuses=[200, 226])
         counts = pusher.run()
     assert counts['total'] == 20
     assert counts['skip-parse'] == 1
diff --git a/python/tests/test_savepagenow.py b/python/tests/test_savepagenow.py
index f3fbfda..80334d9 100644
--- a/python/tests/test_savepagenow.py
+++ b/python/tests/test_savepagenow.py
@@ -1,4 +1,3 @@
-
 import json
 
 import pytest
@@ -26,9 +25,7 @@ SUCCESS_BODY = {
     "timestamp": "20180326070330",
     "duration_sec": 6.203,
     "resources": [
-        TARGET,
-        TARGET + "/redirect",
-        "http://brewster.kahle.org/",
+        TARGET, TARGET + "/redirect", "http://brewster.kahle.org/",
         "http://brewster.kahle.org/favicon.ico",
         "http://brewster.kahle.org/files/2011/07/bkheader-follow.jpg",
         "http://brewster.kahle.org/files/2016/12/amazon-unhappy.jpg",
@@ -43,8 +40,7 @@ SUCCESS_BODY = {
         "http://brewster.kahle.org/wp-content/themes/twentyten/style.css",
         "http://brewster.kahle.org/wp-includes/js/wp-embed.min.js?ver=4.9.4",
         "http://brewster.kahle.org/wp-includes/js/wp-emoji-release.min.js?ver=4.9.4",
-        "http://platform.twitter.com/widgets.js",
-        "https://archive-it.org/piwik.js",
+        "http://platform.twitter.com/widgets.js", "https://archive-it.org/piwik.js",
         "https://platform.twitter.com/jot.html",
         "https://platform.twitter.com/js/button.556f0ea0e4da4e66cfdc182016dbd6db.js",
         "https://platform.twitter.com/widgets/follow_button.f47a2e0b4471326b6fa0f163bda46011.en.html",
@@ -60,7 +56,7 @@ SUCCESS_BODY = {
         "https://www.syndikat.org/wp-includes/js/jquery/jquery.js?ver=1.12.4",
         "https://www.syndikat.org/wp-includes/js/wp-emoji-release.min.js?ver=4.9.4"
     ],
-    "outlinks":{
+    "outlinks": {
         "https://archive.org/": "xxxxxx89b-f3ca-48d0-9ea6-1d1225e98695",
         "https://other.com": "yyyy89b-f3ca-48d0-9ea6-1d1225e98695"
     }
@@ -74,10 +70,18 @@ ERROR_BODY = {
     "resources": []
 }
 CDX_SPN_HIT = [
- ["urlkey","timestamp","original","mimetype","statuscode","digest","redirect","robotflags","length","offset","filename"],
- ["wiki,fatcat)/", "20180326070330", TARGET + "/redirect", "application/pdf", "200", CDX_BEST_SHA1B32, "-", "-", "8445", "108062304", "liveweb-20200108215212-wwwb-spn04.us.archive.org-kols1pud.warc.gz"],
+    [
+        "urlkey", "timestamp", "original", "mimetype", "statuscode", "digest", "redirect",
+        "robotflags", "length", "offset", "filename"
+    ],
+    [
+        "wiki,fatcat)/", "20180326070330", TARGET + "/redirect", "application/pdf", "200",
+        CDX_BEST_SHA1B32, "-", "-", "8445", "108062304",
+        "liveweb-20200108215212-wwwb-spn04.us.archive.org-kols1pud.warc.gz"
+    ],
 ]
 
+
 @pytest.fixture
 def spn_client():
     client = SavePageNowClient(
@@ -88,25 +92,29 @@ def spn_client():
     client.poll_seconds = 0.0
     return client
 
+
 @responses.activate
 def test_savepagenow_success(spn_client):
 
     responses.add(responses.POST,
-        'http://dummy-spnv2/save',
-        status=200,
-        body=json.dumps({"url": TARGET, "job_id": JOB_ID}))
+                  'http://dummy-spnv2/save',
+                  status=200,
+                  body=json.dumps({
+                      "url": TARGET,
+                      "job_id": JOB_ID
+                  }))
     responses.add(responses.GET,
-        'http://dummy-spnv2/save/status/' + JOB_ID,
-        status=200,
-        body=json.dumps(PENDING_BODY))
+                  'http://dummy-spnv2/save/status/' + JOB_ID,
+                  status=200,
+                  body=json.dumps(PENDING_BODY))
     responses.add(responses.GET,
-        'http://dummy-spnv2/save/status/' + JOB_ID,
-        status=200,
-        body=json.dumps(PENDING_BODY))
+                  'http://dummy-spnv2/save/status/' + JOB_ID,
+                  status=200,
+                  body=json.dumps(PENDING_BODY))
     responses.add(responses.GET,
-        'http://dummy-spnv2/save/status/' + JOB_ID,
-        status=200,
-        body=json.dumps(SUCCESS_BODY))
+                  'http://dummy-spnv2/save/status/' + JOB_ID,
+                  status=200,
+                  body=json.dumps(SUCCESS_BODY))
 
     resp = spn_client.save_url_now_v2(TARGET)
 
@@ -119,21 +127,25 @@ def test_savepagenow_success(spn_client):
     assert resp.terminal_dt == SUCCESS_BODY['timestamp']
     assert resp.resources == SUCCESS_BODY['resources']
 
+
 @responses.activate
 def test_savepagenow_remote_error(spn_client):
 
     responses.add(responses.POST,
-        'http://dummy-spnv2/save',
-        status=200,
-        body=json.dumps({"url": TARGET, "job_id": JOB_ID}))
+                  'http://dummy-spnv2/save',
+                  status=200,
+                  body=json.dumps({
+                      "url": TARGET,
+                      "job_id": JOB_ID
+                  }))
     responses.add(responses.GET,
-        'http://dummy-spnv2/save/status/' + JOB_ID,
-        status=200,
-        body=json.dumps(PENDING_BODY))
+                  'http://dummy-spnv2/save/status/' + JOB_ID,
+                  status=200,
+                  body=json.dumps(PENDING_BODY))
     responses.add(responses.GET,
-        'http://dummy-spnv2/save/status/' + JOB_ID,
-        status=200,
-        body=json.dumps(ERROR_BODY))
+                  'http://dummy-spnv2/save/status/' + JOB_ID,
+                  status=200,
+                  body=json.dumps(ERROR_BODY))
 
     resp = spn_client.save_url_now_v2(TARGET)
 
@@ -146,47 +158,56 @@ def test_savepagenow_remote_error(spn_client):
     assert resp.terminal_dt == None
     assert resp.resources == None
 
+
 @responses.activate
 def test_savepagenow_500(spn_client):
 
     responses.add(responses.POST,
-        'http://dummy-spnv2/save',
-        status=200,
-        body=json.dumps({"url": TARGET, "job_id": JOB_ID}))
+                  'http://dummy-spnv2/save',
+                  status=200,
+                  body=json.dumps({
+                      "url": TARGET,
+                      "job_id": JOB_ID
+                  }))
     responses.add(responses.GET,
-        'http://dummy-spnv2/save/status/' + JOB_ID,
-        status=500,
-        body=json.dumps(ERROR_BODY))
+                  'http://dummy-spnv2/save/status/' + JOB_ID,
+                  status=500,
+                  body=json.dumps(ERROR_BODY))
 
     with pytest.raises(SavePageNowError):
         resp = spn_client.save_url_now_v2(TARGET)
 
     assert len(responses.calls) == 2
 
+
 @responses.activate
 def test_crawl_resource(spn_client, wayback_client):
 
     responses.add(responses.POST,
-        'http://dummy-spnv2/save',
-        status=200,
-        body=json.dumps({"url": TARGET, "job_id": JOB_ID}))
+                  'http://dummy-spnv2/save',
+                  status=200,
+                  body=json.dumps({
+                      "url": TARGET,
+                      "job_id": JOB_ID
+                  }))
     responses.add(responses.GET,
-        'http://dummy-spnv2/save/status/' + JOB_ID,
-        status=200,
-        body=json.dumps(PENDING_BODY))
+                  'http://dummy-spnv2/save/status/' + JOB_ID,
+                  status=200,
+                  body=json.dumps(PENDING_BODY))
     responses.add(responses.GET,
-        'http://dummy-spnv2/save/status/' + JOB_ID,
-        status=200,
-        body=json.dumps(SUCCESS_BODY))
+                  'http://dummy-spnv2/save/status/' + JOB_ID,
+                  status=200,
+                  body=json.dumps(SUCCESS_BODY))
     responses.add(responses.GET,
-        'http://dummy-cdx/cdx',
-        status=200,
-        body=json.dumps(CDX_SPN_HIT))
+                  'http://dummy-cdx/cdx',
+                  status=200,
+                  body=json.dumps(CDX_SPN_HIT))
     responses.add(responses.GET,
-        'https://web.archive.org/web/{}id_/{}'.format("20180326070330", TARGET + "/redirect"),
-        status=200,
-        headers={"X-Archive-Src": "liveweb-whatever.warc.gz"},
-        body=WARC_BODY)
+                  'https://web.archive.org/web/{}id_/{}'.format("20180326070330",
+                                                                TARGET + "/redirect"),
+                  status=200,
+                  headers={"X-Archive-Src": "liveweb-whatever.warc.gz"},
+                  body=WARC_BODY)
 
     print('https://web.archive.org/web/{}id_/{}'.format("20180326070330", TARGET + "/redirect"))
     resp = spn_client.crawl_resource(TARGET, wayback_client)
@@ -201,4 +222,3 @@ def test_crawl_resource(spn_client, wayback_client):
     assert type(resp.cdx) == CdxPartial
     with pytest.raises(AttributeError):
         print(resp.cdx.warc_path)
-
diff --git a/python/tests/test_wayback.py b/python/tests/test_wayback.py
index 83311b9..6ccf775 100644
--- a/python/tests/test_wayback.py
+++ b/python/tests/test_wayback.py
@@ -1,4 +1,3 @@
-
 import json
 
 import pytest
@@ -10,27 +9,66 @@ CDX_TARGET = "http://fatcat.wiki/"
 CDX_DT = "20180812220054"
 # cdx -m exact -p output=json -p from=20180812220054 -p to=20180812220054 http://fatcat.wiki/
 CDX_SINGLE_HIT = [
- ["urlkey","timestamp","original","mimetype","statuscode","digest","redirect","robotflags","length","offset","filename"],
- ["wiki,fatcat)/", CDX_DT, CDX_TARGET, "text/html", "200", "O5RHV6OQ7SIHDJIEP7ZW53DLRX5NFIJR", "-", "-", "8445", "108062304", "WIDE-20180810142205-crawl802/WIDE-20180812131623-00059.warc.gz"],
+    [
+        "urlkey", "timestamp", "original", "mimetype", "statuscode", "digest", "redirect",
+        "robotflags", "length", "offset", "filename"
+    ],
+    [
+        "wiki,fatcat)/", CDX_DT, CDX_TARGET, "text/html", "200",
+        "O5RHV6OQ7SIHDJIEP7ZW53DLRX5NFIJR", "-", "-", "8445", "108062304",
+        "WIDE-20180810142205-crawl802/WIDE-20180812131623-00059.warc.gz"
+    ],
 ]
 
 CDX_BEST_SHA1B32 = "AAAAAAAAASIHDJIEP7ZW53DLRX5NFIJR"
 # cdx -m exact -p output=json -p from=20180812220054 -p to=20180812220054 http://fatcat.wiki/
 CDX_MULTI_HIT = [
- ["urlkey","timestamp","original","mimetype","statuscode","digest","redirect","robotflags","length","offset","filename"],
- ["wiki,fatcat)/", CDX_DT, CDX_TARGET, "text/html", "200", "O5RHV6OQ7SIHDJIEP7ZW53DLRX5NFIJR", "-", "-", "8445", "108062304", "WIDE-20180810142205-crawl802/WIDE-20180812131623-00059.warc.gz"],
- # sooner, but not right mimetype
- ["wiki,fatcat)/", "20180912220054", CDX_TARGET, "text/html", "200", "O5RHV6OQ7SIHDJIEP7ZW53DLRX5NFIJR", "-", "-", "8445", "108062304", "WIDE-20180810142205-crawl802/WIDE-20180812131623-00059.warc.gz"],
- # sooner and mimetype, but wrong status code
- ["wiki,fatcat)/", "20180912220054", CDX_TARGET, "application/pdf", "400", "O5RHV6OQ7SIHDJIEP7ZW53DLRX5NFIJR", "-", "-", "8445", "108062304", "WIDE-20180810142205-crawl802/WIDE-20180812131623-00059.warc.gz"],
- ["wiki,fatcat)/", "20180912220054", CDX_TARGET, "application/pdf", "500", "O5RHV6OQ7SIHDJIEP7ZW53DLRX5NFIJR", "-", "-", "8445", "108062304", "WIDE-20180810142205-crawl802/WIDE-20180812131623-00059.warc.gz"],
- ["wiki,fatcat)/", "20180912220054", CDX_TARGET, "application/pdf", "150", "O5RHV6OQ7SIHDJIEP7ZW53DLRX5NFIJR", "-", "-", "8445", "108062304", "WIDE-20180810142205-crawl802/WIDE-20180812131623-00059.warc.gz"],
- # "best"
- ["wiki,fatcat)/", CDX_DT, CDX_TARGET, "application/pdf", "200", CDX_BEST_SHA1B32, "-", "-", "8445", "108062304", "WIDE-20180810142205-crawl802/WIDE-20180812131623-00059.warc.gz"],
- # older
- ["wiki,fatcat)/", "20180712220054", CDX_TARGET, "application/pdf", "200", "O5RHV6OQ7SIHDJIEP7ZW53DLRX5NFIJR", "-", "-", "8445", "108062304", "WIDE-20180810142205-crawl802/WIDE-20180812131623-00059.warc.gz"],
+    [
+        "urlkey", "timestamp", "original", "mimetype", "statuscode", "digest", "redirect",
+        "robotflags", "length", "offset", "filename"
+    ],
+    [
+        "wiki,fatcat)/", CDX_DT, CDX_TARGET, "text/html", "200",
+        "O5RHV6OQ7SIHDJIEP7ZW53DLRX5NFIJR", "-", "-", "8445", "108062304",
+        "WIDE-20180810142205-crawl802/WIDE-20180812131623-00059.warc.gz"
+    ],
+    # sooner, but not right mimetype
+    [
+        "wiki,fatcat)/", "20180912220054", CDX_TARGET, "text/html", "200",
+        "O5RHV6OQ7SIHDJIEP7ZW53DLRX5NFIJR", "-", "-", "8445", "108062304",
+        "WIDE-20180810142205-crawl802/WIDE-20180812131623-00059.warc.gz"
+    ],
+    # sooner and mimetype, but wrong status code
+    [
+        "wiki,fatcat)/", "20180912220054", CDX_TARGET, "application/pdf", "400",
+        "O5RHV6OQ7SIHDJIEP7ZW53DLRX5NFIJR", "-", "-", "8445", "108062304",
+        "WIDE-20180810142205-crawl802/WIDE-20180812131623-00059.warc.gz"
+    ],
+    [
+        "wiki,fatcat)/", "20180912220054", CDX_TARGET, "application/pdf", "500",
+        "O5RHV6OQ7SIHDJIEP7ZW53DLRX5NFIJR", "-", "-", "8445", "108062304",
+        "WIDE-20180810142205-crawl802/WIDE-20180812131623-00059.warc.gz"
+    ],
+    [
+        "wiki,fatcat)/", "20180912220054", CDX_TARGET, "application/pdf", "150",
+        "O5RHV6OQ7SIHDJIEP7ZW53DLRX5NFIJR", "-", "-", "8445", "108062304",
+        "WIDE-20180810142205-crawl802/WIDE-20180812131623-00059.warc.gz"
+    ],
+    # "best"
+    [
+        "wiki,fatcat)/", CDX_DT, CDX_TARGET, "application/pdf", "200", CDX_BEST_SHA1B32, "-",
+        "-", "8445", "108062304",
+        "WIDE-20180810142205-crawl802/WIDE-20180812131623-00059.warc.gz"
+    ],
+    # older
+    [
+        "wiki,fatcat)/", "20180712220054", CDX_TARGET, "application/pdf", "200",
+        "O5RHV6OQ7SIHDJIEP7ZW53DLRX5NFIJR", "-", "-", "8445", "108062304",
+        "WIDE-20180810142205-crawl802/WIDE-20180812131623-00059.warc.gz"
+    ],
 ]
 
+
 @pytest.fixture
 def cdx_client():
     client = CdxApiClient(
@@ -39,13 +77,14 @@ def cdx_client():
     )
     return client
 
+
 @responses.activate
 def test_cdx_fetch(cdx_client):
 
     responses.add(responses.GET,
-        'http://dummy-cdx/cdx',
-        status=200,
-        body=json.dumps(CDX_SINGLE_HIT))
+                  'http://dummy-cdx/cdx',
+                  status=200,
+                  body=json.dumps(CDX_SINGLE_HIT))
 
     resp = cdx_client.fetch(CDX_TARGET, CDX_DT)
 
@@ -58,6 +97,7 @@ def test_cdx_fetch(cdx_client):
     assert resp.warc_offset == 108062304
     assert resp.warc_path == "WIDE-20180810142205-crawl802/WIDE-20180812131623-00059.warc.gz"
 
+
 @responses.activate
 def test_cdx_fetch_errors(cdx_client):
 
@@ -65,9 +105,9 @@ def test_cdx_fetch_errors(cdx_client):
         resp = cdx_client.fetch(CDX_TARGET, "2019")
 
     responses.add(responses.GET,
-        'http://dummy-cdx/cdx',
-        status=200,
-        body=json.dumps(CDX_SINGLE_HIT))
+                  'http://dummy-cdx/cdx',
+                  status=200,
+                  body=json.dumps(CDX_SINGLE_HIT))
 
     with pytest.raises(KeyError):
         resp = cdx_client.fetch(CDX_TARGET, "20180812220055")
@@ -78,13 +118,14 @@ def test_cdx_fetch_errors(cdx_client):
     resp = cdx_client.fetch(CDX_TARGET, CDX_DT)
     assert len(responses.calls) == 3
 
+
 @responses.activate
 def test_cdx_lookup_best(cdx_client):
 
     responses.add(responses.GET,
-        'http://dummy-cdx/cdx',
-        status=200,
-        body=json.dumps(CDX_MULTI_HIT))
+                  'http://dummy-cdx/cdx',
+                  status=200,
+                  body=json.dumps(CDX_MULTI_HIT))
 
     resp = cdx_client.lookup_best(CDX_TARGET, best_mimetype="application/pdf")
 
@@ -95,6 +136,7 @@ def test_cdx_lookup_best(cdx_client):
     assert resp.sha1b32 == CDX_BEST_SHA1B32
     assert resp.warc_path == CDX_SINGLE_HIT[1][-1]
 
+
 WARC_TARGET = "http://fatcat.wiki/"
 WARC_BODY = b"""
 <html>
@@ -108,6 +150,7 @@ WARC_BODY = b"""
 </html>
 """
 
+
 @pytest.fixture
 def wayback_client(cdx_client, mocker):
     client = WaybackClient(
@@ -127,6 +170,7 @@ def wayback_client(cdx_client, mocker):
 
     return client
 
+
 @pytest.fixture
 def wayback_client_pdf(cdx_client, mocker):
 
@@ -150,6 +194,7 @@ def wayback_client_pdf(cdx_client, mocker):
 
     return client
 
+
 @responses.activate
 def test_wayback_fetch(wayback_client):
     resp = wayback_client.fetch_petabox(123, 456789, "here/there.warc.gz")
@@ -159,13 +204,14 @@ def test_wayback_fetch(wayback_client):
     resp = wayback_client.fetch_petabox_body(123, 456789, "here/there.warc.gz")
     assert resp == WARC_BODY
 
+
 @responses.activate
 def test_lookup_resource_success(wayback_client):
 
     responses.add(responses.GET,
-        'http://dummy-cdx/cdx',
-        status=200,
-        body=json.dumps(CDX_MULTI_HIT))
+                  'http://dummy-cdx/cdx',
+                  status=200,
+                  body=json.dumps(CDX_MULTI_HIT))
 
     resp = wayback_client.lookup_resource(CDX_TARGET)
 
diff --git a/python/tests/test_xml.py b/python/tests/test_xml.py
index a996c56..1742f3a 100644
--- a/python/tests/test_xml.py
+++ b/python/tests/test_xml.py
@@ -1,11 +1,10 @@
-
 import pytest
 
 from sandcrawler.xml import xml_reserialize
 
 
 def test_xml_reserialize() -> None:
-    
+
     with open('tests/files/scielo_article.jats.xml', 'rb') as f:
         raw_xml = f.read()