aboutsummaryrefslogtreecommitdiffstats
path: root/python/tests
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2019-10-02 18:02:40 -0700
committerBryan Newbold <bnewbold@archive.org>2019-10-02 18:02:40 -0700
commit910e59e9011935fecaae62520eb3fc30cbd65800 (patch)
tree0008974f236c2c7d88e8cd614bf07c42973087e2 /python/tests
parentd2cd959dd19a10e03ab9e8bbd11787266dbca309 (diff)
downloadsandcrawler-910e59e9011935fecaae62520eb3fc30cbd65800.tar.gz
sandcrawler-910e59e9011935fecaae62520eb3fc30cbd65800.zip
python tests for pusher classes
Diffstat (limited to 'python/tests')
-rw-r--r--python/tests/files/dummy_zip.zipbin0 -> 37760 bytes
-rw-r--r--python/tests/test_pushers.py28
2 files changed, 28 insertions, 0 deletions
diff --git a/python/tests/files/dummy_zip.zip b/python/tests/files/dummy_zip.zip
new file mode 100644
index 0000000..cb72dc8
--- /dev/null
+++ b/python/tests/files/dummy_zip.zip
Binary files differ
diff --git a/python/tests/test_pushers.py b/python/tests/test_pushers.py
new file mode 100644
index 0000000..ed9c0bb
--- /dev/null
+++ b/python/tests/test_pushers.py
@@ -0,0 +1,28 @@
+
+import pytest
+
+from sandcrawler.workers import CdxLinePusher, BlackholeSink
+
+
+def test_cdx_line_pusher():
+
+ sink = BlackholeSink()
+
+ # vanilla (only default filters)
+ with open('tests/files/example.cdx', 'r') as cdx_file:
+ pusher = CdxLinePusher(sink, cdx_file)
+ counts = pusher.run()
+ assert counts['total'] == 20
+ assert counts['skip-parse'] == 1
+ assert counts['pushed'] == 19
+
+ # HTTP 200 and application/pdf
+ with open('tests/files/example.cdx', 'r') as cdx_file:
+ pusher = CdxLinePusher(sink, cdx_file,
+ filter_mimetypes=['application/pdf'], filter_http_statuses=[200])
+ counts = pusher.run()
+ assert counts['total'] == 20
+ assert counts['skip-parse'] == 1
+ assert counts['skip-http_status'] == 10
+ assert counts['skip-mimetype'] == 2
+ assert counts['pushed'] == 7