import pytest
from sandcrawler import gen_file_metadata, b32_hex
def test_gen_file_metadata():
# valid (but very small) PDF file
with open('tests/files/dummy.pdf', 'rb') as f:
file_meta = gen_file_metadata(f.read())
assert file_meta == {
'mimetype': 'application/pdf',
'md5hex': '2942bfabb3d05332b66eb128e0842cff',
'sha1hex': '90ffd2359008d82298821d16b21778c5c39aec36',
'sha256hex': '3df79d34abbca99308e79cb94461c1893582604d68329a41fd4bec1885e6adb4',
'size_bytes': 13264,
}
# valid HTML
fm = gen_file_metadata(
b"""
dummyhtml document""")
assert fm['mimetype'] == 'text/html'
# bogus text
fm = gen_file_metadata(b"asdf1234")
assert fm['mimetype'] == 'text/plain'
assert fm['size_bytes'] == 8
def test_b32_hex():
# valid b32
assert b32_hex('sha1:TZCYZ2ULEHYGESS4L3RNH75I23KKFSMC') == '9e458cea8b21f0624a5c5ee2d3ffa8d6d4a2c982'
assert b32_hex('TZCYZ2ULEHYGESS4L3RNH75I23KKFSMC') == '9e458cea8b21f0624a5c5ee2d3ffa8d6d4a2c982'
# sha1hex pass-through
s = 'bda3c1017d52e826bbd1da51efad877272d300f9'
assert b32_hex(s) == s
# invalid
with pytest.raises(ValueError):
assert b32_hex('blah') == 'blah'