aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2021-09-03 10:37:18 -0700
committerBryan Newbold <bnewbold@archive.org>2021-09-03 10:37:18 -0700
commitd963a61ea3e4bf278fd62047b258722967cd20c9 (patch)
tree52a219e09480b78d866abf62c68309fc356a320a
parentf074a6aafd9af06866829d35555afe10286126fb (diff)
downloadsandcrawler-d963a61ea3e4bf278fd62047b258722967cd20c9.tar.gz
sandcrawler-d963a61ea3e4bf278fd62047b258722967cd20c9.zip
HTML ingest: skip noisy print() statement
-rw-r--r--python/sandcrawler/html_metadata.py2
1 files changed, 1 insertions, 1 deletions
diff --git a/python/sandcrawler/html_metadata.py b/python/sandcrawler/html_metadata.py
index a1938e9..abcc428 100644
--- a/python/sandcrawler/html_metadata.py
+++ b/python/sandcrawler/html_metadata.py
@@ -661,7 +661,7 @@ def _extract_generic(doc: HTMLParser, selector: str, attrs: List[str], type_name
if skip:
continue
if url:
- print(url, file=sys.stderr)
+ #print(url, file=sys.stderr)
resources.append(dict(url=url.strip(), type=type_name))
return resources