From 7e8ff96fb90ddd1c853418a6c405d97afbc45355 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Tue, 11 Aug 2020 17:22:10 -0700 Subject: check for simple URL patterns that are usually paywalls or loginwalls --- python/tests/test_ingest.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'python/tests') diff --git a/python/tests/test_ingest.py b/python/tests/test_ingest.py index c2d6266..46346b7 100644 --- a/python/tests/test_ingest.py +++ b/python/tests/test_ingest.py @@ -173,3 +173,21 @@ def test_ingest_blocklist(ingest_worker): assert resp['status'] == "skip-url-blocklist" assert resp['request'] == request + +@responses.activate +def test_ingest_wall_blocklist(ingest_worker): + + ingest_worker.wall_blocklist = [ + '://test.fatcat.wiki/', + ] + request = { + 'ingest_type': 'pdf', + 'base_url': "https://test.fatcat.wiki/asdfasdf.pdf", + } + + resp = ingest_worker.process(request) + + assert resp['hit'] == False + assert resp['status'] == "skip-wall" + assert resp['request'] == request + -- cgit v1.2.3