From 600ad67925a748200ddf21d5aeabd157d2bb3664 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 26 Oct 2021 13:35:36 -0700 Subject: start handling trivial lint cleanups: unused imports, 'is None', etc --- python/sandcrawler/ia.py | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) (limited to 'python/sandcrawler/ia.py') diff --git a/python/sandcrawler/ia.py b/python/sandcrawler/ia.py index a8ce193..fe739bb 100644 --- a/python/sandcrawler/ia.py +++ b/python/sandcrawler/ia.py @@ -11,15 +11,14 @@ import sys import time import urllib.parse from collections import namedtuple +from http.client import IncompleteRead from typing import Tuple import requests import urllib3.exceptions # not sure this will really work. Should go before wayback imports. -http.client._MAXHEADERS = 1000 # type: ignore - -from http.client import IncompleteRead +http.client._MAXHEADERS = 1000 # noqa import wayback.exception from gwb.loader import CDXLoaderFactory3 @@ -128,18 +127,18 @@ def fuzzy_match_url(left, right): def test_fuzzy_match_url(): - assert fuzzy_match_url("http://thing.com", "http://thing.com") == True - assert fuzzy_match_url("http://thing.com", "https://thing.com") == True - assert fuzzy_match_url("http://thing.com", "ftp://thing.com") == True - assert fuzzy_match_url("http://thing.com", "http://thing.com/") == True - assert fuzzy_match_url("https://thing.com", "http://thing.com/") == True - assert fuzzy_match_url("https://thing.com/", "http://thing.com") == True - assert fuzzy_match_url("http://thing.com", "http://thing.com/blue") == False + assert fuzzy_match_url("http://thing.com", "http://thing.com") is True + assert fuzzy_match_url("http://thing.com", "https://thing.com") is True + assert fuzzy_match_url("http://thing.com", "ftp://thing.com") is True + assert fuzzy_match_url("http://thing.com", "http://thing.com/") is True + assert fuzzy_match_url("https://thing.com", "http://thing.com/") is True + assert fuzzy_match_url("https://thing.com/", "http://thing.com") is True + assert fuzzy_match_url("http://thing.com", "http://thing.com/blue") is False # should probably handle these? - assert fuzzy_match_url("http://thing.com", "http://www.thing.com") == False - assert fuzzy_match_url("http://www.thing.com", "http://www2.thing.com") == False - assert fuzzy_match_url("http://www.thing.com", "https://www2.thing.com") == False + assert fuzzy_match_url("http://thing.com", "http://www.thing.com") is False + assert fuzzy_match_url("http://www.thing.com", "http://www2.thing.com") is False + assert fuzzy_match_url("http://www.thing.com", "https://www2.thing.com") is False class CdxApiError(Exception): @@ -951,7 +950,7 @@ class SavePageNowClient: resp = self.v2_session.get("{}/status/{}".format(self.v2endpoint, job_id)) try: resp.raise_for_status() - except: + except Exception: raise SavePageNowError(resp.content) status = resp.json()['status'] if status == 'pending': @@ -975,7 +974,7 @@ class SavePageNowClient: final_json['original_job_id'])) try: resp.raise_for_status() - except: + except Exception: raise SavePageNowError(resp.content) final_json = resp.json() -- cgit v1.2.3