diff options
author | bryan newbold <bnewbold@robocracy.org> | 2023-02-15 16:35:26 -0800 |
---|---|---|
committer | bryan newbold <bnewbold@robocracy.org> | 2023-03-09 11:13:43 -0800 |
commit | 1e2be0623d968f69c01b8120245fc5f687c3ec14 (patch) | |
tree | 2e92b0eeecdd3dcbe33181a4b85856c3c0330c49 /adenosine/src/mst.rs | |
parent | ba33465034537251ca7c9881a537f321564bba53 (diff) | |
download | adenosine-1e2be0623d968f69c01b8120245fc5f687c3ec14.tar.gz adenosine-1e2be0623d968f69c01b8120245fc5f687c3ec14.zip |
pds: refactor MST internals to use bytes, not String, for keys
Diffstat (limited to 'adenosine/src/mst.rs')
-rw-r--r-- | adenosine/src/mst.rs | 91 |
1 files changed, 47 insertions, 44 deletions
diff --git a/adenosine/src/mst.rs b/adenosine/src/mst.rs index 0d66d5a..3ec4e25 100644 --- a/adenosine/src/mst.rs +++ b/adenosine/src/mst.rs @@ -47,7 +47,7 @@ pub struct MetadataNode { #[derive(Debug, DagCbor, PartialEq, Eq)] struct MstEntry { p: u32, - k: String, + k: Vec<u8>, v: Cid, t: Option<Cid>, } @@ -60,7 +60,7 @@ struct MstNode { struct WipEntry { height: u8, - key: String, + key: Vec<u8>, val: Cid, right: Option<Box<WipNode>>, } @@ -87,10 +87,10 @@ pub fn print_mst_keys(db: &mut BlockStore<libipld::DefaultParams>, cid: &Cid) -> if let Some(ref left) = node.l { print_mst_keys(db, left)?; } - let mut key: String = "".to_string(); + let mut key: Vec<u8> = vec![]; for entry in node.e.iter() { - key = format!("{}{}", &key[0..entry.p as usize], entry.k); - println!("\t{}\t-> {}", key, entry.v); + key = [&key[0..entry.p as usize], &entry.v.to_bytes()].concat(); + println!("\t{}\t-> {}", String::from_utf8_lossy(&key), entry.v); if let Some(ref right) = entry.t { print_mst_keys(db, right)?; } @@ -162,7 +162,11 @@ pub fn collect_mst_keys( } let mut key: String = "".to_string(); for entry in node.e.iter() { - key = format!("{}{}", &key[0..entry.p as usize], entry.k); + key = format!( + "{}{}", + &key[0..entry.p as usize], + String::from_utf8_lossy(&entry.k) + ); map.insert(key.clone(), entry.v); if let Some(ref right) = entry.t { collect_mst_keys(db, right, map)?; @@ -171,7 +175,7 @@ pub fn collect_mst_keys( Ok(()) } -fn leading_zeros(key: &str) -> u8 { +fn leading_zeros(key: &[u8]) -> u8 { let digest = sha256::digest(key); let digest = digest.as_bytes(); for (i, c) in digest.iter().enumerate() { @@ -193,14 +197,14 @@ fn leading_zeros(key: &str) -> u8 { #[test] fn test_leading_zeros() { - assert_eq!(leading_zeros(""), 0); - assert_eq!(leading_zeros("asdf"), 0); - assert_eq!(leading_zeros("2653ae71"), 0); - assert_eq!(leading_zeros("88bfafc7"), 1); - assert_eq!(leading_zeros("2a92d355"), 2); - assert_eq!(leading_zeros("884976f5"), 3); - assert_eq!(leading_zeros("app.bsky.feed.post/454397e440ec"), 2); - assert_eq!(leading_zeros("app.bsky.feed.post/9adeb165882c"), 4); + assert_eq!(leading_zeros(b""), 0); + assert_eq!(leading_zeros(b"asdf"), 0); + assert_eq!(leading_zeros(b"2653ae71"), 0); + assert_eq!(leading_zeros(b"88bfafc7"), 1); + assert_eq!(leading_zeros(b"2a92d355"), 2); + assert_eq!(leading_zeros(b"884976f5"), 3); + assert_eq!(leading_zeros(b"app.bsky.feed.post/454397e440ec"), 2); + assert_eq!(leading_zeros(b"app.bsky.feed.post/9adeb165882c"), 4); } pub fn generate_mst( @@ -210,10 +214,11 @@ pub fn generate_mst( // construct a "WIP" tree let mut root: Option<WipNode> = None; for (key, val) in map { + let key = key.as_bytes(); let height = leading_zeros(key); let entry = WipEntry { height, - key: key.clone(), + key: key.to_vec(), val: *val, right: None, }; @@ -293,9 +298,7 @@ fn insert_entry(mut node: WipNode, entry: WipEntry) -> WipNode { /// returns the length of common characters between the two strings. Strings must be simple ASCII, /// which should hold for current ATP MST keys (collection plus TID) -fn common_prefix_len(a: &str, b: &str) -> usize { - let a = a.as_bytes(); - let b = b.as_bytes(); +fn common_prefix_len(a: &[u8], b: &[u8]) -> usize { for i in 0..std::cmp::min(a.len(), b.len()) { if a[i] != b[i] { return i; @@ -307,34 +310,34 @@ fn common_prefix_len(a: &str, b: &str) -> usize { #[test] fn test_common_prefix_len() { - assert_eq!(common_prefix_len("abc", "abc"), 3); - assert_eq!(common_prefix_len("", "abc"), 0); - assert_eq!(common_prefix_len("abc", ""), 0); - assert_eq!(common_prefix_len("ab", "abc"), 2); - assert_eq!(common_prefix_len("abc", "ab"), 2); - assert_eq!(common_prefix_len("abcde", "abc"), 3); - assert_eq!(common_prefix_len("abc", "abcde"), 3); - assert_eq!(common_prefix_len("abcde", "abc1"), 3); - assert_eq!(common_prefix_len("abcde", "abb"), 2); - assert_eq!(common_prefix_len("abcde", "qbb"), 0); - assert_eq!(common_prefix_len("abc", "abc\x00"), 3); - assert_eq!(common_prefix_len("abc\x00", "abc"), 3); + assert_eq!(common_prefix_len(b"abc", b"abc"), 3); + assert_eq!(common_prefix_len(b"", b"abc"), 0); + assert_eq!(common_prefix_len(b"abc", b""), 0); + assert_eq!(common_prefix_len(b"ab", b"abc"), 2); + assert_eq!(common_prefix_len(b"abc", b"ab"), 2); + assert_eq!(common_prefix_len(b"abcde", b"abc"), 3); + assert_eq!(common_prefix_len(b"abc", b"abcde"), 3); + assert_eq!(common_prefix_len(b"abcde", b"abc1"), 3); + assert_eq!(common_prefix_len(b"abcde", b"abb"), 2); + assert_eq!(common_prefix_len(b"abcde", b"qbb"), 0); + assert_eq!(common_prefix_len(b"abc", b"abc\x00"), 3); + assert_eq!(common_prefix_len(b"abc\x00", b"abc"), 3); } #[test] fn test_common_prefix_len_wide() { // TODO: these are not cross-language consistent! - assert_eq!("jalapeño".len(), 9); // 8 in javascript - assert_eq!("💩".len(), 4); // 2 in javascript - assert_eq!("👩👧👧".len(), 18); // 8 in javascript + assert_eq!("jalapeño".as_bytes().len(), 9); // 8 in javascript + assert_eq!("💩".as_bytes().len(), 4); // 2 in javascript + assert_eq!("👩👧👧".as_bytes().len(), 18); // 8 in javascript // many of the below are different in JS; in Rust we *must* cast down to bytes to count - assert_eq!(common_prefix_len("jalapeño", "jalapeno"), 6); - assert_eq!(common_prefix_len("jalapeñoA", "jalapeñoB"), 9); - assert_eq!(common_prefix_len("coöperative", "coüperative"), 3); - assert_eq!(common_prefix_len("abc💩abc", "abcabc"), 3); - assert_eq!(common_prefix_len("💩abc", "💩ab"), 6); - assert_eq!(common_prefix_len("abc👩👦👦de", "abc👩👧👧de"), 13); + assert_eq!(common_prefix_len("jalapeño".as_bytes(), "jalapeno".as_bytes()), 6); + assert_eq!(common_prefix_len("jalapeñoA".as_bytes(), "jalapeñoB".as_bytes()), 9); + assert_eq!(common_prefix_len("coöperative".as_bytes(), "coüperative".as_bytes()), 3); + assert_eq!(common_prefix_len("abc💩abc".as_bytes(), "abcabc".as_bytes()), 3); + assert_eq!(common_prefix_len("💩abc".as_bytes(), "💩ab".as_bytes()), 6); + assert_eq!(common_prefix_len("abc👩👦👦de".as_bytes(), "abc👩👧👧de".as_bytes()), 13); } fn serialize_wip_tree( @@ -348,7 +351,7 @@ fn serialize_wip_tree( }; let mut entries: Vec<MstEntry> = vec![]; - let mut last_key = "".to_string(); + let mut last_key: Vec<u8> = vec![]; for wip_entry in wip_node.entries { let right: Option<Cid> = if let Some(right) = wip_entry.right { Some(serialize_wip_tree(db, *right)?) @@ -357,7 +360,7 @@ fn serialize_wip_tree( }; let prefix_len = common_prefix_len(&last_key, &wip_entry.key); entries.push(MstEntry { - k: wip_entry.key[prefix_len..].to_string(), + k: wip_entry.key[prefix_len..].to_vec(), p: prefix_len as u32, v: wip_entry.val, t: right, @@ -382,7 +385,7 @@ fn test_mst_node_cbor() { let node = MstNode { l: None, e: vec![MstEntry { - k: "asdf".to_string(), + k: "asdf".as_bytes().to_vec(), p: 0, v: cid1, t: None, @@ -394,6 +397,6 @@ fn test_mst_node_cbor() { let cid = *block.cid(); assert_eq!( cid.to_string(), - "bafyreidaftbr35xhh4lzmv5jcoeufqjh75ohzmz6u56v7n2ippbtxdgqqe" + "bafyreihwovqdtuzyymasojncipiplf3xgxaqamkld7oz7ct2my3be7zxae" ); } |