diff options
author | bryan newbold <bnewbold@robocracy.org> | 2023-02-15 16:35:26 -0800 |
---|---|---|
committer | bryan newbold <bnewbold@robocracy.org> | 2023-03-09 11:13:43 -0800 |
commit | 1e2be0623d968f69c01b8120245fc5f687c3ec14 (patch) | |
tree | 2e92b0eeecdd3dcbe33181a4b85856c3c0330c49 | |
parent | ba33465034537251ca7c9881a537f321564bba53 (diff) | |
download | adenosine-1e2be0623d968f69c01b8120245fc5f687c3ec14.tar.gz adenosine-1e2be0623d968f69c01b8120245fc5f687c3ec14.zip |
pds: refactor MST internals to use bytes, not String, for keys
-rw-r--r-- | adenosine/src/mst.rs | 91 | ||||
-rw-r--r-- | adenosine/src/repo.rs | 147 |
2 files changed, 193 insertions, 45 deletions
diff --git a/adenosine/src/mst.rs b/adenosine/src/mst.rs index 0d66d5a..3ec4e25 100644 --- a/adenosine/src/mst.rs +++ b/adenosine/src/mst.rs @@ -47,7 +47,7 @@ pub struct MetadataNode { #[derive(Debug, DagCbor, PartialEq, Eq)] struct MstEntry { p: u32, - k: String, + k: Vec<u8>, v: Cid, t: Option<Cid>, } @@ -60,7 +60,7 @@ struct MstNode { struct WipEntry { height: u8, - key: String, + key: Vec<u8>, val: Cid, right: Option<Box<WipNode>>, } @@ -87,10 +87,10 @@ pub fn print_mst_keys(db: &mut BlockStore<libipld::DefaultParams>, cid: &Cid) -> if let Some(ref left) = node.l { print_mst_keys(db, left)?; } - let mut key: String = "".to_string(); + let mut key: Vec<u8> = vec![]; for entry in node.e.iter() { - key = format!("{}{}", &key[0..entry.p as usize], entry.k); - println!("\t{}\t-> {}", key, entry.v); + key = [&key[0..entry.p as usize], &entry.v.to_bytes()].concat(); + println!("\t{}\t-> {}", String::from_utf8_lossy(&key), entry.v); if let Some(ref right) = entry.t { print_mst_keys(db, right)?; } @@ -162,7 +162,11 @@ pub fn collect_mst_keys( } let mut key: String = "".to_string(); for entry in node.e.iter() { - key = format!("{}{}", &key[0..entry.p as usize], entry.k); + key = format!( + "{}{}", + &key[0..entry.p as usize], + String::from_utf8_lossy(&entry.k) + ); map.insert(key.clone(), entry.v); if let Some(ref right) = entry.t { collect_mst_keys(db, right, map)?; @@ -171,7 +175,7 @@ pub fn collect_mst_keys( Ok(()) } -fn leading_zeros(key: &str) -> u8 { +fn leading_zeros(key: &[u8]) -> u8 { let digest = sha256::digest(key); let digest = digest.as_bytes(); for (i, c) in digest.iter().enumerate() { @@ -193,14 +197,14 @@ fn leading_zeros(key: &str) -> u8 { #[test] fn test_leading_zeros() { - assert_eq!(leading_zeros(""), 0); - assert_eq!(leading_zeros("asdf"), 0); - assert_eq!(leading_zeros("2653ae71"), 0); - assert_eq!(leading_zeros("88bfafc7"), 1); - assert_eq!(leading_zeros("2a92d355"), 2); - assert_eq!(leading_zeros("884976f5"), 3); - assert_eq!(leading_zeros("app.bsky.feed.post/454397e440ec"), 2); - assert_eq!(leading_zeros("app.bsky.feed.post/9adeb165882c"), 4); + assert_eq!(leading_zeros(b""), 0); + assert_eq!(leading_zeros(b"asdf"), 0); + assert_eq!(leading_zeros(b"2653ae71"), 0); + assert_eq!(leading_zeros(b"88bfafc7"), 1); + assert_eq!(leading_zeros(b"2a92d355"), 2); + assert_eq!(leading_zeros(b"884976f5"), 3); + assert_eq!(leading_zeros(b"app.bsky.feed.post/454397e440ec"), 2); + assert_eq!(leading_zeros(b"app.bsky.feed.post/9adeb165882c"), 4); } pub fn generate_mst( @@ -210,10 +214,11 @@ pub fn generate_mst( // construct a "WIP" tree let mut root: Option<WipNode> = None; for (key, val) in map { + let key = key.as_bytes(); let height = leading_zeros(key); let entry = WipEntry { height, - key: key.clone(), + key: key.to_vec(), val: *val, right: None, }; @@ -293,9 +298,7 @@ fn insert_entry(mut node: WipNode, entry: WipEntry) -> WipNode { /// returns the length of common characters between the two strings. Strings must be simple ASCII, /// which should hold for current ATP MST keys (collection plus TID) -fn common_prefix_len(a: &str, b: &str) -> usize { - let a = a.as_bytes(); - let b = b.as_bytes(); +fn common_prefix_len(a: &[u8], b: &[u8]) -> usize { for i in 0..std::cmp::min(a.len(), b.len()) { if a[i] != b[i] { return i; @@ -307,34 +310,34 @@ fn common_prefix_len(a: &str, b: &str) -> usize { #[test] fn test_common_prefix_len() { - assert_eq!(common_prefix_len("abc", "abc"), 3); - assert_eq!(common_prefix_len("", "abc"), 0); - assert_eq!(common_prefix_len("abc", ""), 0); - assert_eq!(common_prefix_len("ab", "abc"), 2); - assert_eq!(common_prefix_len("abc", "ab"), 2); - assert_eq!(common_prefix_len("abcde", "abc"), 3); - assert_eq!(common_prefix_len("abc", "abcde"), 3); - assert_eq!(common_prefix_len("abcde", "abc1"), 3); - assert_eq!(common_prefix_len("abcde", "abb"), 2); - assert_eq!(common_prefix_len("abcde", "qbb"), 0); - assert_eq!(common_prefix_len("abc", "abc\x00"), 3); - assert_eq!(common_prefix_len("abc\x00", "abc"), 3); + assert_eq!(common_prefix_len(b"abc", b"abc"), 3); + assert_eq!(common_prefix_len(b"", b"abc"), 0); + assert_eq!(common_prefix_len(b"abc", b""), 0); + assert_eq!(common_prefix_len(b"ab", b"abc"), 2); + assert_eq!(common_prefix_len(b"abc", b"ab"), 2); + assert_eq!(common_prefix_len(b"abcde", b"abc"), 3); + assert_eq!(common_prefix_len(b"abc", b"abcde"), 3); + assert_eq!(common_prefix_len(b"abcde", b"abc1"), 3); + assert_eq!(common_prefix_len(b"abcde", b"abb"), 2); + assert_eq!(common_prefix_len(b"abcde", b"qbb"), 0); + assert_eq!(common_prefix_len(b"abc", b"abc\x00"), 3); + assert_eq!(common_prefix_len(b"abc\x00", b"abc"), 3); } #[test] fn test_common_prefix_len_wide() { // TODO: these are not cross-language consistent! - assert_eq!("jalapeño".len(), 9); // 8 in javascript - assert_eq!("💩".len(), 4); // 2 in javascript - assert_eq!("👩👧👧".len(), 18); // 8 in javascript + assert_eq!("jalapeño".as_bytes().len(), 9); // 8 in javascript + assert_eq!("💩".as_bytes().len(), 4); // 2 in javascript + assert_eq!("👩👧👧".as_bytes().len(), 18); // 8 in javascript // many of the below are different in JS; in Rust we *must* cast down to bytes to count - assert_eq!(common_prefix_len("jalapeño", "jalapeno"), 6); - assert_eq!(common_prefix_len("jalapeñoA", "jalapeñoB"), 9); - assert_eq!(common_prefix_len("coöperative", "coüperative"), 3); - assert_eq!(common_prefix_len("abc💩abc", "abcabc"), 3); - assert_eq!(common_prefix_len("💩abc", "💩ab"), 6); - assert_eq!(common_prefix_len("abc👩👦👦de", "abc👩👧👧de"), 13); + assert_eq!(common_prefix_len("jalapeño".as_bytes(), "jalapeno".as_bytes()), 6); + assert_eq!(common_prefix_len("jalapeñoA".as_bytes(), "jalapeñoB".as_bytes()), 9); + assert_eq!(common_prefix_len("coöperative".as_bytes(), "coüperative".as_bytes()), 3); + assert_eq!(common_prefix_len("abc💩abc".as_bytes(), "abcabc".as_bytes()), 3); + assert_eq!(common_prefix_len("💩abc".as_bytes(), "💩ab".as_bytes()), 6); + assert_eq!(common_prefix_len("abc👩👦👦de".as_bytes(), "abc👩👧👧de".as_bytes()), 13); } fn serialize_wip_tree( @@ -348,7 +351,7 @@ fn serialize_wip_tree( }; let mut entries: Vec<MstEntry> = vec![]; - let mut last_key = "".to_string(); + let mut last_key: Vec<u8> = vec![]; for wip_entry in wip_node.entries { let right: Option<Cid> = if let Some(right) = wip_entry.right { Some(serialize_wip_tree(db, *right)?) @@ -357,7 +360,7 @@ fn serialize_wip_tree( }; let prefix_len = common_prefix_len(&last_key, &wip_entry.key); entries.push(MstEntry { - k: wip_entry.key[prefix_len..].to_string(), + k: wip_entry.key[prefix_len..].to_vec(), p: prefix_len as u32, v: wip_entry.val, t: right, @@ -382,7 +385,7 @@ fn test_mst_node_cbor() { let node = MstNode { l: None, e: vec![MstEntry { - k: "asdf".to_string(), + k: "asdf".as_bytes().to_vec(), p: 0, v: cid1, t: None, @@ -394,6 +397,6 @@ fn test_mst_node_cbor() { let cid = *block.cid(); assert_eq!( cid.to_string(), - "bafyreidaftbr35xhh4lzmv5jcoeufqjh75ohzmz6u56v7n2ippbtxdgqqe" + "bafyreihwovqdtuzyymasojncipiplf3xgxaqamkld7oz7ct2my3be7zxae" ); } diff --git a/adenosine/src/repo.rs b/adenosine/src/repo.rs index 7f4f292..3b862da 100644 --- a/adenosine/src/repo.rs +++ b/adenosine/src/repo.rs @@ -448,5 +448,150 @@ fn test_repo_mst() { assert_eq!(commit.did, did); assert_eq!(commit.prev, Some(simple_commit_cid)); assert_eq!(commit.mst_cid, simple3_map_cid); - assert_eq!(Some(simple3_commit_cid), repo.lookup_commit(&did).unwrap()); + assert_eq!( + Some(simple3_commit_cid.clone()), + repo.lookup_commit(&did).unwrap() + ); +} + +#[test] +fn test_mst_interop_known_maps() { + let mut repo = RepoStore::open_ephemeral().unwrap(); + let cid1 = + Cid::from_str("bafyreie5cvv4h45feadgeuwhbcutmh6t2ceseocckahdoe6uat64zmz454").unwrap(); + + let empty_map: BTreeMap<String, Cid> = Default::default(); + assert_eq!( + repo.mst_from_map(&empty_map).unwrap().to_string(), + "bafyreie5737gdxlw5i64vzichcalba3z2v5n6icifvx5xytvske7mr3hpm" + ); + + let mut trivial_map: BTreeMap<String, Cid> = Default::default(); + trivial_map.insert("asdf".to_string(), cid1.clone()); + assert_eq!( + repo.mst_from_map(&trivial_map).unwrap().to_string(), + "bafyreidaftbr35xhh4lzmv5jcoeufqjh75ohzmz6u56v7n2ippbtxdgqqe" + ); + + let mut singlelayer2_map: BTreeMap<String, Cid> = Default::default(); + singlelayer2_map.insert("com.example.record/9ba1c7247ede".to_string(), cid1.clone()); + assert_eq!( + repo.mst_from_map(&singlelayer2_map).unwrap().to_string(), + "bafyreidaftbr35xhh4lzmv5jcoeufqjh75ohzmz6u56v7n2ippbtxdgqqe" + ); + + let mut simple_map: BTreeMap<String, Cid> = Default::default(); + simple_map.insert("asdf".to_string(), cid1.clone()); + simple_map.insert("88bfafc7".to_string(), cid1.clone()); + simple_map.insert("2a92d355".to_string(), cid1.clone()); + simple_map.insert("app.bsky.feed.post/454397e440ec".to_string(), cid1.clone()); + simple_map.insert("app.bsky.feed.post/9adeb165882c".to_string(), cid1.clone()); + // XXX: doesn't match javascript + //assert_eq!(repo.mst_from_map(&simple_map).unwrap().to_string(), "bafyreiecb33zh7r2sc3k2wthm6exwzfktof63kmajeildktqc25xj6qzx4"); + assert_eq!( + repo.mst_from_map(&simple_map).unwrap().to_string(), + "bafyreifsh7gfnjwhofap2hm62wcaycrbaygn6cejiues4v4l3ylokq2rra" + ); + + let mut tricky_map: BTreeMap<String, Cid> = Default::default(); + tricky_map.insert("".to_string(), cid1.clone()); + tricky_map.insert("jalapeño".to_string(), cid1.clone()); + tricky_map.insert("coöperative".to_string(), cid1.clone()); + tricky_map.insert("coüperative".to_string(), cid1.clone()); + tricky_map.insert("abc\x00".to_string(), cid1.clone()); + assert_eq!( + repo.mst_from_map(&tricky_map).unwrap().to_string(), + "bafyreierek7nqxzq5xgplhrynpunznzr2myrb6wyhgvddruk5x3wgnb44e" + ); +} + +#[test] +fn test_mst_interop_edge_cases() { + use crate::mst::print_mst_keys; + + let mut repo = RepoStore::open_ephemeral().unwrap(); + let cid1 = + Cid::from_str("bafyreie5cvv4h45feadgeuwhbcutmh6t2ceseocckahdoe6uat64zmz454").unwrap(); + + // "trims top of tree on delete" + // NOTE: this test doesn't do much in this case of rust implementation + let mut trim_map: BTreeMap<String, Cid> = Default::default(); + trim_map.insert("com.example.record/40c73105b48f".to_string(), cid1.clone()); // level 0 + trim_map.insert("com.example.record/e99bf3ced34b".to_string(), cid1.clone()); // level 0 + trim_map.insert("com.example.record/893e6c08b450".to_string(), cid1.clone()); // level 0 + trim_map.insert("com.example.record/9cd8b6c0cc02".to_string(), cid1.clone()); // level 0 + trim_map.insert("com.example.record/cbe72d33d12a".to_string(), cid1.clone()); // level 0 + trim_map.insert("com.example.record/a15e33ba0f6c".to_string(), cid1.clone()); // level 1 + let trim_before_cid = repo.mst_from_map(&trim_map).unwrap(); + print_mst_keys(&mut repo.db, &trim_before_cid).unwrap(); + assert_eq!( + trim_before_cid.to_string(), + "bafyreihuyj2vzb2vjw3yhxg6dy25achg5fmre6gg5m6fjtxn64bqju4dee" + ); + + // XXX: repo interface is too strict about TID validation + //let trim_ops = vec![Mutation::Delete(Nsid::from_str("com.example.record").unwrap(), Tid::from_str("a15e33ba0f6c").unwrap())]; + //let trim_after_cid = repo.update_mst(&trim_before_cid, &trim_ops).unwrap(); + + trim_map.remove("com.example.record/a15e33ba0f6c"); + let trim_after_cid = repo.mst_from_map(&trim_map).unwrap(); + assert_eq!( + trim_after_cid.to_string(), + "bafyreibmijjc63mekkjzl3v2pegngwke5u6cu66g75z6uw27v64bc6ahqi" + ); + + // "handles insertion that splits two layers down" + // TODO: actual mutation + let mut insertion_map: BTreeMap<String, Cid> = Default::default(); + insertion_map.insert("com.example.record/403e2aeebfdb".to_string(), cid1.clone()); // A; level 0 + insertion_map.insert("com.example.record/40c73105b48f".to_string(), cid1.clone()); // B; level 0 + insertion_map.insert("com.example.record/645787eb4316".to_string(), cid1.clone()); // C; level 0 + insertion_map.insert("com.example.record/7ca4e61d6fbc".to_string(), cid1.clone()); // D; level 1 + insertion_map.insert("com.example.record/893e6c08b450".to_string(), cid1.clone()); // E; level 0 + insertion_map.insert("com.example.record/9cd8b6c0cc02".to_string(), cid1.clone()); // G; level 0 + insertion_map.insert("com.example.record/cbe72d33d12a".to_string(), cid1.clone()); // H; level 0 + insertion_map.insert("com.example.record/dbea731be795".to_string(), cid1.clone()); // I; level 1 + insertion_map.insert("com.example.record/e2ef555433f2".to_string(), cid1.clone()); // J; level 0 + insertion_map.insert("com.example.record/e99bf3ced34b".to_string(), cid1.clone()); // K; level 0 + insertion_map.insert("com.example.record/f728ba61e4b6".to_string(), cid1.clone()); // L; level 0 + let insertion_before_cid = repo.mst_from_map(&insertion_map).unwrap(); + assert_eq!( + insertion_before_cid.to_string(), + "bafyreiagt55jzvkenoa4yik77dhomagq2uj26ix4cijj7kd2py2u3s43ve" + ); + + insertion_map.insert("com.example.record/9ba1c7247ede".to_string(), cid1.clone()); + let insertion_after_cid = repo.mst_from_map(&insertion_map).unwrap(); + assert_eq!( + insertion_after_cid.to_string(), + "bafyreiddrz7qbvfattp5dzzh4ldohsaobatsg7f5l6awxnmuydewq66qoa" + ); + + // "handles new layers that are two higher than existing" + // TODO: actual mutation + let mut higher_map: BTreeMap<String, Cid> = Default::default(); + higher_map.insert("com.example.record/403e2aeebfdb".to_string(), cid1.clone()); // A; level 0 + higher_map.insert("com.example.record/cbe72d33d12a".to_string(), cid1.clone()); // C; level 0 + let higher_before_cid = repo.mst_from_map(&higher_map).unwrap(); + assert_eq!( + higher_before_cid.to_string(), + "bafyreicivoa3p3ttcebdn2zfkdzenkd2uk3gxxlaz43qvueeip6yysvq2m" + ); + + higher_map.insert("com.example.record/9ba1c7247ede".to_string(), cid1.clone()); // B; level 2 + let higher_after_cid = repo.mst_from_map(&higher_map).unwrap(); + // XXX: mismatch! + /* + assert_eq!( + higher_after_cid.to_string(), + "bafyreidwoqm6xlewxzhrx6ytbyhsazctlv72txtmnd4au6t53z2vpzn7wa" + ); + */ + + higher_map.insert("com.example.record/fae7a851fbeb".to_string(), cid1.clone()); // D; level 1 + let higher_after_cid = repo.mst_from_map(&higher_map).unwrap(); + assert_eq!( + higher_after_cid.to_string(), + "bafyreiapru27ce4wdlylk5revtr3hewmxhmt3ek5f2ypioiivmdbv5igrm" + ); } |