diff options
author | Bryan Newbold <bnewbold@archive.org> | 2019-09-23 22:58:55 -0700 |
---|---|---|
committer | Bryan Newbold <bnewbold@archive.org> | 2019-09-23 22:58:55 -0700 |
commit | b438f52dbb7578c9a5c2153bc4ba50e33fdae7c3 (patch) | |
tree | a47d7c07aebd01e3d26c86e664fa0f59b0786bcc /python/tests | |
parent | e2508ee3da64b46f47aec25361839f29de5e73c0 (diff) | |
download | sandcrawler-b438f52dbb7578c9a5c2153bc4ba50e33fdae7c3.tar.gz sandcrawler-b438f52dbb7578c9a5c2153bc4ba50e33fdae7c3.zip |
start refactoring sandcrawler python common code
Diffstat (limited to 'python/tests')
-rw-r--r-- | python/tests/files/dummy.pdf | bin | 0 -> 13264 bytes | |||
-rw-r--r-- | python/tests/test_misc.py | 41 |
2 files changed, 41 insertions, 0 deletions
diff --git a/python/tests/files/dummy.pdf b/python/tests/files/dummy.pdf Binary files differnew file mode 100644 index 0000000..774c2ea --- /dev/null +++ b/python/tests/files/dummy.pdf diff --git a/python/tests/test_misc.py b/python/tests/test_misc.py new file mode 100644 index 0000000..a7879c8 --- /dev/null +++ b/python/tests/test_misc.py @@ -0,0 +1,41 @@ + +import pytest + +from sandcrawler import gen_file_metadata, b32_hex + +def test_gen_file_metadata(): + + # valid (but very small) PDF file + with open('tests/files/dummy.pdf', 'rb') as f: + file_meta = gen_file_metadata(f.read()) + assert file_meta == { + 'mimetype': 'application/pdf', + 'md5hex': '2942bfabb3d05332b66eb128e0842cff', + 'sha1hex': '90ffd2359008d82298821d16b21778c5c39aec36', + 'sha256hex': '3df79d34abbca99308e79cb94461c1893582604d68329a41fd4bec1885e6adb4', + 'size_bytes': 13264, + } + + # valid HTML + fm = gen_file_metadata( + b"""<html><head><title>dummy</title></head><body>html document</body></html>""") + assert fm['mimetype'] == 'text/html' + + # bogus text + fm = gen_file_metadata(b"asdf1234") + assert fm['mimetype'] == 'text/plain' + assert fm['size_bytes'] == 8 + +def test_b32_hex(): + + # valid b32 + assert b32_hex('sha1:TZCYZ2ULEHYGESS4L3RNH75I23KKFSMC') == '9e458cea8b21f0624a5c5ee2d3ffa8d6d4a2c982' + assert b32_hex('TZCYZ2ULEHYGESS4L3RNH75I23KKFSMC') == '9e458cea8b21f0624a5c5ee2d3ffa8d6d4a2c982' + + # sha1hex pass-through + s = 'bda3c1017d52e826bbd1da51efad877272d300f9' + assert b32_hex(s) == s + + # invalid + with pytest.raises(ValueError): + assert b32_hex('blah') == 'blah' |