diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-11-08 21:55:12 -0800 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-11-08 21:55:12 -0800 |
commit | b6911f63a277007523e0dc265a339a80be80946e (patch) | |
tree | 56469e3cfdf146e235cdda50a5be68deb1406c18 /python/sandcrawler/html_metadata.py | |
parent | a68aadc4107fc68dc2748c52dab8a4bd92cca022 (diff) | |
download | sandcrawler-b6911f63a277007523e0dc265a339a80be80946e.tar.gz sandcrawler-b6911f63a277007523e0dc265a339a80be80946e.zip |
move fuzzy URL match method to misc
Diffstat (limited to 'python/sandcrawler/html_metadata.py')
-rw-r--r-- | python/sandcrawler/html_metadata.py | 2 |
1 files changed, 2 insertions, 0 deletions
diff --git a/python/sandcrawler/html_metadata.py b/python/sandcrawler/html_metadata.py index 15f44f4..a52d339 100644 --- a/python/sandcrawler/html_metadata.py +++ b/python/sandcrawler/html_metadata.py @@ -9,6 +9,8 @@ from selectolax.parser import HTMLParser import pydantic import braveblock +from sandcrawler.misc import url_fuzzy_equal + # this is a map of metadata keys to CSS selectors # sources for this list include: |