aboutsummaryrefslogtreecommitdiffstats
path: root/python/tests
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-08-11 17:22:10 -0700
committerBryan Newbold <bnewbold@archive.org>2020-08-11 17:22:10 -0700
commit7e8ff96fb90ddd1c853418a6c405d97afbc45355 (patch)
tree8efd62adf4a92f44bdb71384af955400102b0f34 /python/tests
parentd5f0602e80847adf3d359a7fd06cc131c07cb6dd (diff)
downloadsandcrawler-7e8ff96fb90ddd1c853418a6c405d97afbc45355.tar.gz
sandcrawler-7e8ff96fb90ddd1c853418a6c405d97afbc45355.zip
check for simple URL patterns that are usually paywalls or loginwalls
Diffstat (limited to 'python/tests')
-rw-r--r--python/tests/test_ingest.py18
1 files changed, 18 insertions, 0 deletions
diff --git a/python/tests/test_ingest.py b/python/tests/test_ingest.py
index c2d6266..46346b7 100644
--- a/python/tests/test_ingest.py
+++ b/python/tests/test_ingest.py
@@ -173,3 +173,21 @@ def test_ingest_blocklist(ingest_worker):
assert resp['status'] == "skip-url-blocklist"
assert resp['request'] == request
+
+@responses.activate
+def test_ingest_wall_blocklist(ingest_worker):
+
+ ingest_worker.wall_blocklist = [
+ '://test.fatcat.wiki/',
+ ]
+ request = {
+ 'ingest_type': 'pdf',
+ 'base_url': "https://test.fatcat.wiki/asdfasdf.pdf",
+ }
+
+ resp = ingest_worker.process(request)
+
+ assert resp['hit'] == False
+ assert resp['status'] == "skip-wall"
+ assert resp['request'] == request
+