diff options
author | Bryan Newbold <bnewbold@archive.org> | 2021-10-26 13:35:36 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2021-10-26 13:35:36 -0700 |
commit | 600ad67925a748200ddf21d5aeabd157d2bb3664 (patch) | |
tree | 89ae6bc24e6eb3821c03efd7d781430345c68aa0 /python/sandcrawler/html_metadata.py | |
parent | 05bd7cbcc62588e431c5efd533189e246b2a997e (diff) | |
download | sandcrawler-600ad67925a748200ddf21d5aeabd157d2bb3664.tar.gz sandcrawler-600ad67925a748200ddf21d5aeabd157d2bb3664.zip |
start handling trivial lint cleanups: unused imports, 'is None', etc
Diffstat (limited to 'python/sandcrawler/html_metadata.py')
-rw-r--r-- | python/sandcrawler/html_metadata.py | 6 |
1 files changed, 3 insertions, 3 deletions
diff --git a/python/sandcrawler/html_metadata.py b/python/sandcrawler/html_metadata.py index 6d27a3a..15a9f2b 100644 --- a/python/sandcrawler/html_metadata.py +++ b/python/sandcrawler/html_metadata.py @@ -1,7 +1,7 @@ import datetime import sys import urllib.parse -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, List, Optional, Tuple import braveblock import dateparser @@ -687,7 +687,7 @@ def html_extract_fulltext_url(doc_url: str, doc: HTMLParser, continue return (val, pattern.get('technique', 'unknown')) if self_doc_url: - print(f" WARN: returning fulltext URL pointing to self", file=sys.stderr) + print(" WARN: returning fulltext URL pointing to self", file=sys.stderr) return self_doc_url return None @@ -864,7 +864,7 @@ def html_extract_resources(doc_url: str, doc: HTMLParser, # filter using adblocker resources = [ r for r in resources if adblock.check_network_urls( - r['url'], source_url=doc_url, request_type=r['type']) == False + r['url'], source_url=doc_url, request_type=r['type']) is False ] # remove duplicates |