diff options
author | Bryan Newbold <bnewbold@archive.org> | 2020-05-16 18:40:26 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2020-05-16 18:40:26 -0700 |
commit | f1f7842dd2ed110e9958f56d79ec504ae5d2bcd6 (patch) | |
tree | f9404c524531c60ce96859ac851842d0b940783f /tests/test_djvu_parse.py | |
parent | f5cb3637ccc0914a027a452a214c845deb36494b (diff) | |
download | fatcat-scholar-f1f7842dd2ed110e9958f56d79ec504ae5d2bcd6.tar.gz fatcat-scholar-f1f7842dd2ed110e9958f56d79ec504ae5d2bcd6.zip |
crude djvu XML parsing
Diffstat (limited to 'tests/test_djvu_parse.py')
-rw-r--r-- | tests/test_djvu_parse.py | 16 |
1 files changed, 16 insertions, 0 deletions
diff --git a/tests/test_djvu_parse.py b/tests/test_djvu_parse.py new file mode 100644 index 0000000..66c2804 --- /dev/null +++ b/tests/test_djvu_parse.py @@ -0,0 +1,16 @@ + +from io import StringIO +from fatcat_scholar.djvu import djvu_extract_leaf_texts + + +def test_djvu_extract_leaf_texts(): + + # https://archive.org/details/ERIC_ED441501 + with open('tests/files/ERIC_ED441501_djvu.xml', 'r') as f: + blob = f.read() + + leaves = djvu_extract_leaf_texts(StringIO(blob), [3,6]) + assert 3 in leaves + assert 6 in leaves + assert "2. Original cataloging tools" in leaves[3] + assert len(leaves) == 2 |