aboutsummaryrefslogtreecommitdiffstats
path: root/pig/tests/test_filter_cdx_paper_pdfs.py
blob: c2d2e6b28e8bd154e2063eb79d7cb5ffbe10641a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24

import os
import unittest
from pighelper import PigTestHelper, count_lines


class TestFilterCDXPaperPdfs(PigTestHelper):

    def test_papers_domain_words(self):
        r = self.run_pig("filter-cdx-paper-pdfs.pig", "tests/files/papers_domain_words.cdx")
        assert count_lines(r) == 4

    def test_papers_edu_tilde(self):
        r = self.run_pig("filter-cdx-paper-pdfs.pig", "tests/files/papers_edu_tilde.cdx")
        assert count_lines(r) == 6

    def test_papers_url_doi(self):
        r = self.run_pig("filter-cdx-paper-pdfs.pig", "tests/files/papers_url_doi.cdx")
        assert count_lines(r) == 2

    def test_papers_url_words(self):
        r = self.run_pig("filter-cdx-paper-pdfs.pig", "tests/files/papers_url_words.cdx")
        assert count_lines(r) == 12