diff options
author | Bryan Newbold <bnewbold@archive.org> | 2019-10-02 18:02:40 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2019-10-02 18:02:40 -0700 |
commit | 910e59e9011935fecaae62520eb3fc30cbd65800 (patch) | |
tree | 0008974f236c2c7d88e8cd614bf07c42973087e2 /python/tests/test_pushers.py | |
parent | d2cd959dd19a10e03ab9e8bbd11787266dbca309 (diff) | |
download | sandcrawler-910e59e9011935fecaae62520eb3fc30cbd65800.tar.gz sandcrawler-910e59e9011935fecaae62520eb3fc30cbd65800.zip |
python tests for pusher classes
Diffstat (limited to 'python/tests/test_pushers.py')
-rw-r--r-- | python/tests/test_pushers.py | 28 |
1 files changed, 28 insertions, 0 deletions
diff --git a/python/tests/test_pushers.py b/python/tests/test_pushers.py new file mode 100644 index 0000000..ed9c0bb --- /dev/null +++ b/python/tests/test_pushers.py @@ -0,0 +1,28 @@ + +import pytest + +from sandcrawler.workers import CdxLinePusher, BlackholeSink + + +def test_cdx_line_pusher(): + + sink = BlackholeSink() + + # vanilla (only default filters) + with open('tests/files/example.cdx', 'r') as cdx_file: + pusher = CdxLinePusher(sink, cdx_file) + counts = pusher.run() + assert counts['total'] == 20 + assert counts['skip-parse'] == 1 + assert counts['pushed'] == 19 + + # HTTP 200 and application/pdf + with open('tests/files/example.cdx', 'r') as cdx_file: + pusher = CdxLinePusher(sink, cdx_file, + filter_mimetypes=['application/pdf'], filter_http_statuses=[200]) + counts = pusher.run() + assert counts['total'] == 20 + assert counts['skip-parse'] == 1 + assert counts['skip-http_status'] == 10 + assert counts['skip-mimetype'] == 2 + assert counts['pushed'] == 7 |