aboutsummaryrefslogtreecommitdiffstats
path: root/python/sandcrawler/html_metadata.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-11-08 21:55:12 -0800
committerBryan Newbold <bnewbold@archive.org>2020-11-08 21:55:12 -0800
commitb6911f63a277007523e0dc265a339a80be80946e (patch)
tree56469e3cfdf146e235cdda50a5be68deb1406c18 /python/sandcrawler/html_metadata.py
parenta68aadc4107fc68dc2748c52dab8a4bd92cca022 (diff)
downloadsandcrawler-b6911f63a277007523e0dc265a339a80be80946e.tar.gz
sandcrawler-b6911f63a277007523e0dc265a339a80be80946e.zip
move fuzzy URL match method to misc
Diffstat (limited to 'python/sandcrawler/html_metadata.py')
-rw-r--r--python/sandcrawler/html_metadata.py2
1 files changed, 2 insertions, 0 deletions
diff --git a/python/sandcrawler/html_metadata.py b/python/sandcrawler/html_metadata.py
index 15f44f4..a52d339 100644
--- a/python/sandcrawler/html_metadata.py
+++ b/python/sandcrawler/html_metadata.py
@@ -9,6 +9,8 @@ from selectolax.parser import HTMLParser
import pydantic
import braveblock
+from sandcrawler.misc import url_fuzzy_equal
+
# this is a map of metadata keys to CSS selectors
# sources for this list include: