summaryrefslogtreecommitdiffstats
path: root/tests/test_djvu_parse.py
blob: 66c2804e7cf1fa2312ce2bb56b6c3fc7e65fc657 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16

from io import StringIO
from fatcat_scholar.djvu import djvu_extract_leaf_texts


def test_djvu_extract_leaf_texts():

    # https://archive.org/details/ERIC_ED441501
    with open('tests/files/ERIC_ED441501_djvu.xml', 'r') as f:
        blob = f.read()

    leaves = djvu_extract_leaf_texts(StringIO(blob), [3,6])
    assert 3 in leaves
    assert 6 in leaves
    assert "2. Original cataloging tools" in leaves[3]
    assert len(leaves) == 2