From f1f7842dd2ed110e9958f56d79ec504ae5d2bcd6 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Sat, 16 May 2020 18:40:26 -0700 Subject: crude djvu XML parsing --- tests/test_djvu_parse.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 tests/test_djvu_parse.py (limited to 'tests/test_djvu_parse.py') diff --git a/tests/test_djvu_parse.py b/tests/test_djvu_parse.py new file mode 100644 index 0000000..66c2804 --- /dev/null +++ b/tests/test_djvu_parse.py @@ -0,0 +1,16 @@ + +from io import StringIO +from fatcat_scholar.djvu import djvu_extract_leaf_texts + + +def test_djvu_extract_leaf_texts(): + + # https://archive.org/details/ERIC_ED441501 + with open('tests/files/ERIC_ED441501_djvu.xml', 'r') as f: + blob = f.read() + + leaves = djvu_extract_leaf_texts(StringIO(blob), [3,6]) + assert 3 in leaves + assert 6 in leaves + assert "2. Original cataloging tools" in leaves[3] + assert len(leaves) == 2 -- cgit v1.2.3