summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorbryan newbold <bnewbold@robocracy.org>2023-02-15 16:35:26 -0800
committerbryan newbold <bnewbold@robocracy.org>2023-03-09 11:13:43 -0800
commit1e2be0623d968f69c01b8120245fc5f687c3ec14 (patch)
tree2e92b0eeecdd3dcbe33181a4b85856c3c0330c49
parentba33465034537251ca7c9881a537f321564bba53 (diff)
downloadadenosine-1e2be0623d968f69c01b8120245fc5f687c3ec14.tar.gz
adenosine-1e2be0623d968f69c01b8120245fc5f687c3ec14.zip
pds: refactor MST internals to use bytes, not String, for keys
-rw-r--r--adenosine/src/mst.rs91
-rw-r--r--adenosine/src/repo.rs147
2 files changed, 193 insertions, 45 deletions
diff --git a/adenosine/src/mst.rs b/adenosine/src/mst.rs
index 0d66d5a..3ec4e25 100644
--- a/adenosine/src/mst.rs
+++ b/adenosine/src/mst.rs
@@ -47,7 +47,7 @@ pub struct MetadataNode {
#[derive(Debug, DagCbor, PartialEq, Eq)]
struct MstEntry {
p: u32,
- k: String,
+ k: Vec<u8>,
v: Cid,
t: Option<Cid>,
}
@@ -60,7 +60,7 @@ struct MstNode {
struct WipEntry {
height: u8,
- key: String,
+ key: Vec<u8>,
val: Cid,
right: Option<Box<WipNode>>,
}
@@ -87,10 +87,10 @@ pub fn print_mst_keys(db: &mut BlockStore<libipld::DefaultParams>, cid: &Cid) ->
if let Some(ref left) = node.l {
print_mst_keys(db, left)?;
}
- let mut key: String = "".to_string();
+ let mut key: Vec<u8> = vec![];
for entry in node.e.iter() {
- key = format!("{}{}", &key[0..entry.p as usize], entry.k);
- println!("\t{}\t-> {}", key, entry.v);
+ key = [&key[0..entry.p as usize], &entry.v.to_bytes()].concat();
+ println!("\t{}\t-> {}", String::from_utf8_lossy(&key), entry.v);
if let Some(ref right) = entry.t {
print_mst_keys(db, right)?;
}
@@ -162,7 +162,11 @@ pub fn collect_mst_keys(
}
let mut key: String = "".to_string();
for entry in node.e.iter() {
- key = format!("{}{}", &key[0..entry.p as usize], entry.k);
+ key = format!(
+ "{}{}",
+ &key[0..entry.p as usize],
+ String::from_utf8_lossy(&entry.k)
+ );
map.insert(key.clone(), entry.v);
if let Some(ref right) = entry.t {
collect_mst_keys(db, right, map)?;
@@ -171,7 +175,7 @@ pub fn collect_mst_keys(
Ok(())
}
-fn leading_zeros(key: &str) -> u8 {
+fn leading_zeros(key: &[u8]) -> u8 {
let digest = sha256::digest(key);
let digest = digest.as_bytes();
for (i, c) in digest.iter().enumerate() {
@@ -193,14 +197,14 @@ fn leading_zeros(key: &str) -> u8 {
#[test]
fn test_leading_zeros() {
- assert_eq!(leading_zeros(""), 0);
- assert_eq!(leading_zeros("asdf"), 0);
- assert_eq!(leading_zeros("2653ae71"), 0);
- assert_eq!(leading_zeros("88bfafc7"), 1);
- assert_eq!(leading_zeros("2a92d355"), 2);
- assert_eq!(leading_zeros("884976f5"), 3);
- assert_eq!(leading_zeros("app.bsky.feed.post/454397e440ec"), 2);
- assert_eq!(leading_zeros("app.bsky.feed.post/9adeb165882c"), 4);
+ assert_eq!(leading_zeros(b""), 0);
+ assert_eq!(leading_zeros(b"asdf"), 0);
+ assert_eq!(leading_zeros(b"2653ae71"), 0);
+ assert_eq!(leading_zeros(b"88bfafc7"), 1);
+ assert_eq!(leading_zeros(b"2a92d355"), 2);
+ assert_eq!(leading_zeros(b"884976f5"), 3);
+ assert_eq!(leading_zeros(b"app.bsky.feed.post/454397e440ec"), 2);
+ assert_eq!(leading_zeros(b"app.bsky.feed.post/9adeb165882c"), 4);
}
pub fn generate_mst(
@@ -210,10 +214,11 @@ pub fn generate_mst(
// construct a "WIP" tree
let mut root: Option<WipNode> = None;
for (key, val) in map {
+ let key = key.as_bytes();
let height = leading_zeros(key);
let entry = WipEntry {
height,
- key: key.clone(),
+ key: key.to_vec(),
val: *val,
right: None,
};
@@ -293,9 +298,7 @@ fn insert_entry(mut node: WipNode, entry: WipEntry) -> WipNode {
/// returns the length of common characters between the two strings. Strings must be simple ASCII,
/// which should hold for current ATP MST keys (collection plus TID)
-fn common_prefix_len(a: &str, b: &str) -> usize {
- let a = a.as_bytes();
- let b = b.as_bytes();
+fn common_prefix_len(a: &[u8], b: &[u8]) -> usize {
for i in 0..std::cmp::min(a.len(), b.len()) {
if a[i] != b[i] {
return i;
@@ -307,34 +310,34 @@ fn common_prefix_len(a: &str, b: &str) -> usize {
#[test]
fn test_common_prefix_len() {
- assert_eq!(common_prefix_len("abc", "abc"), 3);
- assert_eq!(common_prefix_len("", "abc"), 0);
- assert_eq!(common_prefix_len("abc", ""), 0);
- assert_eq!(common_prefix_len("ab", "abc"), 2);
- assert_eq!(common_prefix_len("abc", "ab"), 2);
- assert_eq!(common_prefix_len("abcde", "abc"), 3);
- assert_eq!(common_prefix_len("abc", "abcde"), 3);
- assert_eq!(common_prefix_len("abcde", "abc1"), 3);
- assert_eq!(common_prefix_len("abcde", "abb"), 2);
- assert_eq!(common_prefix_len("abcde", "qbb"), 0);
- assert_eq!(common_prefix_len("abc", "abc\x00"), 3);
- assert_eq!(common_prefix_len("abc\x00", "abc"), 3);
+ assert_eq!(common_prefix_len(b"abc", b"abc"), 3);
+ assert_eq!(common_prefix_len(b"", b"abc"), 0);
+ assert_eq!(common_prefix_len(b"abc", b""), 0);
+ assert_eq!(common_prefix_len(b"ab", b"abc"), 2);
+ assert_eq!(common_prefix_len(b"abc", b"ab"), 2);
+ assert_eq!(common_prefix_len(b"abcde", b"abc"), 3);
+ assert_eq!(common_prefix_len(b"abc", b"abcde"), 3);
+ assert_eq!(common_prefix_len(b"abcde", b"abc1"), 3);
+ assert_eq!(common_prefix_len(b"abcde", b"abb"), 2);
+ assert_eq!(common_prefix_len(b"abcde", b"qbb"), 0);
+ assert_eq!(common_prefix_len(b"abc", b"abc\x00"), 3);
+ assert_eq!(common_prefix_len(b"abc\x00", b"abc"), 3);
}
#[test]
fn test_common_prefix_len_wide() {
// TODO: these are not cross-language consistent!
- assert_eq!("jalapeño".len(), 9); // 8 in javascript
- assert_eq!("💩".len(), 4); // 2 in javascript
- assert_eq!("👩‍👧‍👧".len(), 18); // 8 in javascript
+ assert_eq!("jalapeño".as_bytes().len(), 9); // 8 in javascript
+ assert_eq!("💩".as_bytes().len(), 4); // 2 in javascript
+ assert_eq!("👩‍👧‍👧".as_bytes().len(), 18); // 8 in javascript
// many of the below are different in JS; in Rust we *must* cast down to bytes to count
- assert_eq!(common_prefix_len("jalapeño", "jalapeno"), 6);
- assert_eq!(common_prefix_len("jalapeñoA", "jalapeñoB"), 9);
- assert_eq!(common_prefix_len("coöperative", "coüperative"), 3);
- assert_eq!(common_prefix_len("abc💩abc", "abcabc"), 3);
- assert_eq!(common_prefix_len("💩abc", "💩ab"), 6);
- assert_eq!(common_prefix_len("abc👩‍👦‍👦de", "abc👩‍👧‍👧de"), 13);
+ assert_eq!(common_prefix_len("jalapeño".as_bytes(), "jalapeno".as_bytes()), 6);
+ assert_eq!(common_prefix_len("jalapeñoA".as_bytes(), "jalapeñoB".as_bytes()), 9);
+ assert_eq!(common_prefix_len("coöperative".as_bytes(), "coüperative".as_bytes()), 3);
+ assert_eq!(common_prefix_len("abc💩abc".as_bytes(), "abcabc".as_bytes()), 3);
+ assert_eq!(common_prefix_len("💩abc".as_bytes(), "💩ab".as_bytes()), 6);
+ assert_eq!(common_prefix_len("abc👩‍👦‍👦de".as_bytes(), "abc👩‍👧‍👧de".as_bytes()), 13);
}
fn serialize_wip_tree(
@@ -348,7 +351,7 @@ fn serialize_wip_tree(
};
let mut entries: Vec<MstEntry> = vec![];
- let mut last_key = "".to_string();
+ let mut last_key: Vec<u8> = vec![];
for wip_entry in wip_node.entries {
let right: Option<Cid> = if let Some(right) = wip_entry.right {
Some(serialize_wip_tree(db, *right)?)
@@ -357,7 +360,7 @@ fn serialize_wip_tree(
};
let prefix_len = common_prefix_len(&last_key, &wip_entry.key);
entries.push(MstEntry {
- k: wip_entry.key[prefix_len..].to_string(),
+ k: wip_entry.key[prefix_len..].to_vec(),
p: prefix_len as u32,
v: wip_entry.val,
t: right,
@@ -382,7 +385,7 @@ fn test_mst_node_cbor() {
let node = MstNode {
l: None,
e: vec![MstEntry {
- k: "asdf".to_string(),
+ k: "asdf".as_bytes().to_vec(),
p: 0,
v: cid1,
t: None,
@@ -394,6 +397,6 @@ fn test_mst_node_cbor() {
let cid = *block.cid();
assert_eq!(
cid.to_string(),
- "bafyreidaftbr35xhh4lzmv5jcoeufqjh75ohzmz6u56v7n2ippbtxdgqqe"
+ "bafyreihwovqdtuzyymasojncipiplf3xgxaqamkld7oz7ct2my3be7zxae"
);
}
diff --git a/adenosine/src/repo.rs b/adenosine/src/repo.rs
index 7f4f292..3b862da 100644
--- a/adenosine/src/repo.rs
+++ b/adenosine/src/repo.rs
@@ -448,5 +448,150 @@ fn test_repo_mst() {
assert_eq!(commit.did, did);
assert_eq!(commit.prev, Some(simple_commit_cid));
assert_eq!(commit.mst_cid, simple3_map_cid);
- assert_eq!(Some(simple3_commit_cid), repo.lookup_commit(&did).unwrap());
+ assert_eq!(
+ Some(simple3_commit_cid.clone()),
+ repo.lookup_commit(&did).unwrap()
+ );
+}
+
+#[test]
+fn test_mst_interop_known_maps() {
+ let mut repo = RepoStore::open_ephemeral().unwrap();
+ let cid1 =
+ Cid::from_str("bafyreie5cvv4h45feadgeuwhbcutmh6t2ceseocckahdoe6uat64zmz454").unwrap();
+
+ let empty_map: BTreeMap<String, Cid> = Default::default();
+ assert_eq!(
+ repo.mst_from_map(&empty_map).unwrap().to_string(),
+ "bafyreie5737gdxlw5i64vzichcalba3z2v5n6icifvx5xytvske7mr3hpm"
+ );
+
+ let mut trivial_map: BTreeMap<String, Cid> = Default::default();
+ trivial_map.insert("asdf".to_string(), cid1.clone());
+ assert_eq!(
+ repo.mst_from_map(&trivial_map).unwrap().to_string(),
+ "bafyreidaftbr35xhh4lzmv5jcoeufqjh75ohzmz6u56v7n2ippbtxdgqqe"
+ );
+
+ let mut singlelayer2_map: BTreeMap<String, Cid> = Default::default();
+ singlelayer2_map.insert("com.example.record/9ba1c7247ede".to_string(), cid1.clone());
+ assert_eq!(
+ repo.mst_from_map(&singlelayer2_map).unwrap().to_string(),
+ "bafyreidaftbr35xhh4lzmv5jcoeufqjh75ohzmz6u56v7n2ippbtxdgqqe"
+ );
+
+ let mut simple_map: BTreeMap<String, Cid> = Default::default();
+ simple_map.insert("asdf".to_string(), cid1.clone());
+ simple_map.insert("88bfafc7".to_string(), cid1.clone());
+ simple_map.insert("2a92d355".to_string(), cid1.clone());
+ simple_map.insert("app.bsky.feed.post/454397e440ec".to_string(), cid1.clone());
+ simple_map.insert("app.bsky.feed.post/9adeb165882c".to_string(), cid1.clone());
+ // XXX: doesn't match javascript
+ //assert_eq!(repo.mst_from_map(&simple_map).unwrap().to_string(), "bafyreiecb33zh7r2sc3k2wthm6exwzfktof63kmajeildktqc25xj6qzx4");
+ assert_eq!(
+ repo.mst_from_map(&simple_map).unwrap().to_string(),
+ "bafyreifsh7gfnjwhofap2hm62wcaycrbaygn6cejiues4v4l3ylokq2rra"
+ );
+
+ let mut tricky_map: BTreeMap<String, Cid> = Default::default();
+ tricky_map.insert("".to_string(), cid1.clone());
+ tricky_map.insert("jalapeño".to_string(), cid1.clone());
+ tricky_map.insert("coöperative".to_string(), cid1.clone());
+ tricky_map.insert("coüperative".to_string(), cid1.clone());
+ tricky_map.insert("abc\x00".to_string(), cid1.clone());
+ assert_eq!(
+ repo.mst_from_map(&tricky_map).unwrap().to_string(),
+ "bafyreierek7nqxzq5xgplhrynpunznzr2myrb6wyhgvddruk5x3wgnb44e"
+ );
+}
+
+#[test]
+fn test_mst_interop_edge_cases() {
+ use crate::mst::print_mst_keys;
+
+ let mut repo = RepoStore::open_ephemeral().unwrap();
+ let cid1 =
+ Cid::from_str("bafyreie5cvv4h45feadgeuwhbcutmh6t2ceseocckahdoe6uat64zmz454").unwrap();
+
+ // "trims top of tree on delete"
+ // NOTE: this test doesn't do much in this case of rust implementation
+ let mut trim_map: BTreeMap<String, Cid> = Default::default();
+ trim_map.insert("com.example.record/40c73105b48f".to_string(), cid1.clone()); // level 0
+ trim_map.insert("com.example.record/e99bf3ced34b".to_string(), cid1.clone()); // level 0
+ trim_map.insert("com.example.record/893e6c08b450".to_string(), cid1.clone()); // level 0
+ trim_map.insert("com.example.record/9cd8b6c0cc02".to_string(), cid1.clone()); // level 0
+ trim_map.insert("com.example.record/cbe72d33d12a".to_string(), cid1.clone()); // level 0
+ trim_map.insert("com.example.record/a15e33ba0f6c".to_string(), cid1.clone()); // level 1
+ let trim_before_cid = repo.mst_from_map(&trim_map).unwrap();
+ print_mst_keys(&mut repo.db, &trim_before_cid).unwrap();
+ assert_eq!(
+ trim_before_cid.to_string(),
+ "bafyreihuyj2vzb2vjw3yhxg6dy25achg5fmre6gg5m6fjtxn64bqju4dee"
+ );
+
+ // XXX: repo interface is too strict about TID validation
+ //let trim_ops = vec![Mutation::Delete(Nsid::from_str("com.example.record").unwrap(), Tid::from_str("a15e33ba0f6c").unwrap())];
+ //let trim_after_cid = repo.update_mst(&trim_before_cid, &trim_ops).unwrap();
+
+ trim_map.remove("com.example.record/a15e33ba0f6c");
+ let trim_after_cid = repo.mst_from_map(&trim_map).unwrap();
+ assert_eq!(
+ trim_after_cid.to_string(),
+ "bafyreibmijjc63mekkjzl3v2pegngwke5u6cu66g75z6uw27v64bc6ahqi"
+ );
+
+ // "handles insertion that splits two layers down"
+ // TODO: actual mutation
+ let mut insertion_map: BTreeMap<String, Cid> = Default::default();
+ insertion_map.insert("com.example.record/403e2aeebfdb".to_string(), cid1.clone()); // A; level 0
+ insertion_map.insert("com.example.record/40c73105b48f".to_string(), cid1.clone()); // B; level 0
+ insertion_map.insert("com.example.record/645787eb4316".to_string(), cid1.clone()); // C; level 0
+ insertion_map.insert("com.example.record/7ca4e61d6fbc".to_string(), cid1.clone()); // D; level 1
+ insertion_map.insert("com.example.record/893e6c08b450".to_string(), cid1.clone()); // E; level 0
+ insertion_map.insert("com.example.record/9cd8b6c0cc02".to_string(), cid1.clone()); // G; level 0
+ insertion_map.insert("com.example.record/cbe72d33d12a".to_string(), cid1.clone()); // H; level 0
+ insertion_map.insert("com.example.record/dbea731be795".to_string(), cid1.clone()); // I; level 1
+ insertion_map.insert("com.example.record/e2ef555433f2".to_string(), cid1.clone()); // J; level 0
+ insertion_map.insert("com.example.record/e99bf3ced34b".to_string(), cid1.clone()); // K; level 0
+ insertion_map.insert("com.example.record/f728ba61e4b6".to_string(), cid1.clone()); // L; level 0
+ let insertion_before_cid = repo.mst_from_map(&insertion_map).unwrap();
+ assert_eq!(
+ insertion_before_cid.to_string(),
+ "bafyreiagt55jzvkenoa4yik77dhomagq2uj26ix4cijj7kd2py2u3s43ve"
+ );
+
+ insertion_map.insert("com.example.record/9ba1c7247ede".to_string(), cid1.clone());
+ let insertion_after_cid = repo.mst_from_map(&insertion_map).unwrap();
+ assert_eq!(
+ insertion_after_cid.to_string(),
+ "bafyreiddrz7qbvfattp5dzzh4ldohsaobatsg7f5l6awxnmuydewq66qoa"
+ );
+
+ // "handles new layers that are two higher than existing"
+ // TODO: actual mutation
+ let mut higher_map: BTreeMap<String, Cid> = Default::default();
+ higher_map.insert("com.example.record/403e2aeebfdb".to_string(), cid1.clone()); // A; level 0
+ higher_map.insert("com.example.record/cbe72d33d12a".to_string(), cid1.clone()); // C; level 0
+ let higher_before_cid = repo.mst_from_map(&higher_map).unwrap();
+ assert_eq!(
+ higher_before_cid.to_string(),
+ "bafyreicivoa3p3ttcebdn2zfkdzenkd2uk3gxxlaz43qvueeip6yysvq2m"
+ );
+
+ higher_map.insert("com.example.record/9ba1c7247ede".to_string(), cid1.clone()); // B; level 2
+ let higher_after_cid = repo.mst_from_map(&higher_map).unwrap();
+ // XXX: mismatch!
+ /*
+ assert_eq!(
+ higher_after_cid.to_string(),
+ "bafyreidwoqm6xlewxzhrx6ytbyhsazctlv72txtmnd4au6t53z2vpzn7wa"
+ );
+ */
+
+ higher_map.insert("com.example.record/fae7a851fbeb".to_string(), cid1.clone()); // D; level 1
+ let higher_after_cid = repo.mst_from_map(&higher_map).unwrap();
+ assert_eq!(
+ higher_after_cid.to_string(),
+ "bafyreiapru27ce4wdlylk5revtr3hewmxhmt3ek5f2ypioiivmdbv5igrm"
+ );
}