aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/sandcrawler/html_metadata.py4
1 files changed, 3 insertions, 1 deletions
diff --git a/python/sandcrawler/html_metadata.py b/python/sandcrawler/html_metadata.py
index a52d339..367fce4 100644
--- a/python/sandcrawler/html_metadata.py
+++ b/python/sandcrawler/html_metadata.py
@@ -560,6 +560,7 @@ def load_adblock_rules() -> braveblock.Adblocker:
"||www.mendeley.com^",
"||pbs.twimg.com^",
"||badge.dimensions.ai^",
+ "||recaptcha.net^",
# not sure about these CC badges (usually via a redirect)
#"||licensebuttons.net^",
@@ -570,9 +571,10 @@ def load_adblock_rules() -> braveblock.Adblocker:
#"||ajax.googleapis.com^",
#"||cdnjs.cloudflare.com^",
- # badges, "share" buttons, etc
+ # badges, "share" buttons, tracking, etc
"apis.google.com/js/plusone",
"www.google.com/recaptcha/",
+ "js/_getUACode.js"
# PLOS images
"/resource/img/icon.*.16.png^",