aboutsummaryrefslogtreecommitdiffstats
path: root/pig/tests/test_filter_cdx_paper_pdfs.py
diff options
context:
space:
mode:
Diffstat (limited to 'pig/tests/test_filter_cdx_paper_pdfs.py')
-rw-r--r--pig/tests/test_filter_cdx_paper_pdfs.py26
1 files changed, 26 insertions, 0 deletions
diff --git a/pig/tests/test_filter_cdx_paper_pdfs.py b/pig/tests/test_filter_cdx_paper_pdfs.py
new file mode 100644
index 0000000..a8ebd9f
--- /dev/null
+++ b/pig/tests/test_filter_cdx_paper_pdfs.py
@@ -0,0 +1,26 @@
+
+import os
+import unittest
+from pighelper import PigTestHelper
+
+def count_lines(s):
+ return len([l for l in s.strip().split('\n') if len(l) > 0])
+
+class TestFilterCDXPaperPdfs(PigTestHelper):
+
+ def test_papers_domain_words(self):
+ r = self.run_pig("filter-cdx-paper-pdfs.pig", "tests/files/papers_domain_words.cdx")
+ assert count_lines(r) == 4
+
+ def test_papers_edu_tilde(self):
+ r = self.run_pig("filter-cdx-paper-pdfs.pig", "tests/files/papers_edu_tilde.cdx")
+ assert count_lines(r) == 6
+
+ def test_papers_url_doi(self):
+ r = self.run_pig("filter-cdx-paper-pdfs.pig", "tests/files/papers_url_doi.cdx")
+ assert count_lines(r) == 2
+
+ def test_papers_url_words(self):
+ r = self.run_pig("filter-cdx-paper-pdfs.pig", "tests/files/papers_url_words.cdx")
+ assert count_lines(r) == 12
+