aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/sandcrawler/html_ingest.py4
1 files changed, 2 insertions, 2 deletions
diff --git a/python/sandcrawler/html_ingest.py b/python/sandcrawler/html_ingest.py
index 50b193c..7594365 100644
--- a/python/sandcrawler/html_ingest.py
+++ b/python/sandcrawler/html_ingest.py
@@ -25,10 +25,10 @@ def html_extract_body_teixml(doc: bytes) -> dict:
include_comments=False,
include_formatting=True,
)
- except ValueError as ve:
+ except (ValueError, TypeError) as e:
return dict(
status="parse-error",
- error_msg=str(ve)[:1000],
+ error_msg=str(e)[:1000],
)
if tei_xml:
body_txt = teixml_body_text(tei_xml)