aboutsummaryrefslogtreecommitdiffstats
path: root/tests/test_djvu_parse.py
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@archive.org>2020-05-16 18:40:26 -0700
committerBryan Newbold <bnewbold@archive.org>2020-05-16 18:40:26 -0700
commitf1f7842dd2ed110e9958f56d79ec504ae5d2bcd6 (patch)
treef9404c524531c60ce96859ac851842d0b940783f /tests/test_djvu_parse.py
parentf5cb3637ccc0914a027a452a214c845deb36494b (diff)
downloadfatcat-scholar-f1f7842dd2ed110e9958f56d79ec504ae5d2bcd6.tar.gz
fatcat-scholar-f1f7842dd2ed110e9958f56d79ec504ae5d2bcd6.zip
crude djvu XML parsing
Diffstat (limited to 'tests/test_djvu_parse.py')
-rw-r--r--tests/test_djvu_parse.py16
1 files changed, 16 insertions, 0 deletions
diff --git a/tests/test_djvu_parse.py b/tests/test_djvu_parse.py
new file mode 100644
index 0000000..66c2804
--- /dev/null
+++ b/tests/test_djvu_parse.py
@@ -0,0 +1,16 @@
+
+from io import StringIO
+from fatcat_scholar.djvu import djvu_extract_leaf_texts
+
+
+def test_djvu_extract_leaf_texts():
+
+ # https://archive.org/details/ERIC_ED441501
+ with open('tests/files/ERIC_ED441501_djvu.xml', 'r') as f:
+ blob = f.read()
+
+ leaves = djvu_extract_leaf_texts(StringIO(blob), [3,6])
+ assert 3 in leaves
+ assert 6 in leaves
+ assert "2. Original cataloging tools" in leaves[3]
+ assert len(leaves) == 2