From 58f744e97c8f3f1a3472aa821f4518d7d139e850 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Fri, 17 Jan 2020 12:12:05 -0800 Subject: ingest: add URL blocklist feature And, temporarily, block zenodo and figshare. --- python/tests/test_ingest.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'python/tests') diff --git a/python/tests/test_ingest.py b/python/tests/test_ingest.py index 050e2ea..33de35d 100644 --- a/python/tests/test_ingest.py +++ b/python/tests/test_ingest.py @@ -149,3 +149,20 @@ def test_ingest_landing(ingest_worker): assert 'revisit_cdx' not in resp assert 'grobid' not in resp +@responses.activate +def test_ingest_blocklist(ingest_worker): + + ingest_worker.base_url_blocklist = [ + '://test.fatcat.wiki/', + ] + request = { + 'ingest_type': 'pdf', + 'base_url': "https://test.fatcat.wiki/asdfasdf.pdf", + } + + resp = ingest_worker.process(request) + + assert resp['hit'] == False + assert resp['status'] == "skip-url-blocklist" + assert resp['request'] == request + -- cgit v1.2.3