diff options
-rw-r--r-- | python/sandcrawler/html.py | 5 |
1 files changed, 4 insertions, 1 deletions
diff --git a/python/sandcrawler/html.py b/python/sandcrawler/html.py index 3eadc7b..88ea41b 100644 --- a/python/sandcrawler/html.py +++ b/python/sandcrawler/html.py @@ -42,7 +42,10 @@ def extract_fulltext_url(html_url, html_body): try: soup = BeautifulSoup(html_body, 'html.parser') except TypeError as te: - print("{} (url={})".format(te, html_url, file=sys.stderr)) + print(f"{te} (url={html_url})", file=sys.stderr) + return dict() + except UnboundLocalError as ule: + print(f"{ule} (url={html_url})", file=sys.stderr) return dict() ### General Tricks ### |