blob: c2d2e6b28e8bd154e2063eb79d7cb5ffbe10641a (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
|
import os
import unittest
from pighelper import PigTestHelper, count_lines
class TestFilterCDXPaperPdfs(PigTestHelper):
def test_papers_domain_words(self):
r = self.run_pig("filter-cdx-paper-pdfs.pig", "tests/files/papers_domain_words.cdx")
assert count_lines(r) == 4
def test_papers_edu_tilde(self):
r = self.run_pig("filter-cdx-paper-pdfs.pig", "tests/files/papers_edu_tilde.cdx")
assert count_lines(r) == 6
def test_papers_url_doi(self):
r = self.run_pig("filter-cdx-paper-pdfs.pig", "tests/files/papers_url_doi.cdx")
assert count_lines(r) == 2
def test_papers_url_words(self):
r = self.run_pig("filter-cdx-paper-pdfs.pig", "tests/files/papers_url_words.cdx")
assert count_lines(r) == 12
|