blob: a8ebd9ff5421d8457368411f87401fb62dd46780 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
|
import os
import unittest
from pighelper import PigTestHelper
def count_lines(s):
return len([l for l in s.strip().split('\n') if len(l) > 0])
class TestFilterCDXPaperPdfs(PigTestHelper):
def test_papers_domain_words(self):
r = self.run_pig("filter-cdx-paper-pdfs.pig", "tests/files/papers_domain_words.cdx")
assert count_lines(r) == 4
def test_papers_edu_tilde(self):
r = self.run_pig("filter-cdx-paper-pdfs.pig", "tests/files/papers_edu_tilde.cdx")
assert count_lines(r) == 6
def test_papers_url_doi(self):
r = self.run_pig("filter-cdx-paper-pdfs.pig", "tests/files/papers_url_doi.cdx")
assert count_lines(r) == 2
def test_papers_url_words(self):
r = self.run_pig("filter-cdx-paper-pdfs.pig", "tests/files/papers_url_words.cdx")
assert count_lines(r) == 12
|