From db97d3dcfa497c6865399351fa8702b92a940459 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 19 May 2020 10:58:43 -0700 Subject: handle UnboundLocalError in HTML parsing --- python/sandcrawler/html.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/sandcrawler/html.py b/python/sandcrawler/html.py index 3eadc7b..88ea41b 100644 --- a/python/sandcrawler/html.py +++ b/python/sandcrawler/html.py @@ -42,7 +42,10 @@ def extract_fulltext_url(html_url, html_body): try: soup = BeautifulSoup(html_body, 'html.parser') except TypeError as te: - print("{} (url={})".format(te, html_url, file=sys.stderr)) + print(f"{te} (url={html_url})", file=sys.stderr) + return dict() + except UnboundLocalError as ule: + print(f"{ule} (url={html_url})", file=sys.stderr) return dict() ### General Tricks ### -- cgit v1.2.3