From ba8f10afd51a6f629291abdcfe25a70b7b517f1c Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Sat, 29 Oct 2022 01:17:11 -0700 Subject: PDS: crude progress on MST parsing/serialization --- Cargo.lock | 88 +++++++- adenosine-pds/Cargo.toml | 6 + adenosine-pds/plan.txt | 17 +- adenosine-pds/src/bin/adenosine-pds-dump-mst.rs | 129 +++++++++++ adenosine-pds/src/bin/adenosine-pds-import.rs | 53 +++++ adenosine-pds/src/bin/adenosine-pds-repro.rs | 286 ++++++++++++++++++++++++ 6 files changed, 565 insertions(+), 14 deletions(-) create mode 100644 adenosine-pds/src/bin/adenosine-pds-dump-mst.rs create mode 100644 adenosine-pds/src/bin/adenosine-pds-import.rs create mode 100644 adenosine-pds/src/bin/adenosine-pds-repro.rs diff --git a/Cargo.lock b/Cargo.lock index 1ecce7f..8043d4b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -34,14 +34,20 @@ dependencies = [ name = "adenosine-pds" version = "0.1.0-dev.0" dependencies = [ + "anyhow", + "futures", "ipfs-sqlite-block-store", + "iroh-car", "jsonschema", + "libipld", "log", "pretty_env_logger", "rusqlite", "schemafy", "serde_json", + "sha256", "structopt", + "tokio", "warp", ] @@ -284,6 +290,15 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0d8c1fef690941d3e7788d328517591fecc684c084084702d6ff1641e993699a" +[[package]] +name = "block-buffer" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4152116fd6e9dadb291ae18fc1ec3575ed6d84c29642d97890f4b4a3417297e4" +dependencies = [ + "generic-array", +] + [[package]] name = "block-buffer" version = "0.10.3" @@ -834,13 +849,22 @@ dependencies = [ "syn", ] +[[package]] +name = "digest" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066" +dependencies = [ + "generic-array", +] + [[package]] name = "digest" version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adfbc57365a37acbd2ebf2b64d7e69bb766e2fea813521ed536f5d0520dcf86c" dependencies = [ - "block-buffer", + "block-buffer 0.10.3", "crypto-common", ] @@ -1519,6 +1543,12 @@ dependencies = [ "libc", ] +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + [[package]] name = "html5ever" version = "0.25.2" @@ -2209,9 +2239,9 @@ dependencies = [ "blake2s_simd", "blake3", "core2", - "digest", + "digest 0.10.5", "multihash-derive", - "sha2", + "sha2 0.10.6", "sha3", "unsigned-varint", ] @@ -2510,6 +2540,12 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e82dad04139b71a90c080c8463fe0dc7902db5192d939bd0950f074d014339e1" +[[package]] +name = "opaque-debug" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" + [[package]] name = "open" version = "3.0.3" @@ -3560,7 +3596,7 @@ checksum = "028f48d513f9678cda28f6e4064755b3fbb2af6acd672f2c209b62323f7aea0f" dependencies = [ "cfg-if", "cpufeatures", - "digest", + "digest 0.10.5", ] [[package]] @@ -3571,7 +3607,20 @@ checksum = "f04293dc80c3993519f2d7f6f511707ee7094fe0c6d3406feb330cdb3540eba3" dependencies = [ "cfg-if", "cpufeatures", - "digest", + "digest 0.10.5", +] + +[[package]] +name = "sha2" +version = "0.9.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d58a1e1bf39749807d89cf2d98ac2dfa0ff1cb3faa38fbb64dd88ac8013d800" +dependencies = [ + "block-buffer 0.9.0", + "cfg-if", + "cpufeatures", + "digest 0.9.0", + "opaque-debug", ] [[package]] @@ -3582,7 +3631,17 @@ checksum = "82e6b795fe2e3b1e845bafcb27aa35405c4d47cdfc92af5fc8d3002f76cebdc0" dependencies = [ "cfg-if", "cpufeatures", - "digest", + "digest 0.10.5", +] + +[[package]] +name = "sha256" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e334db67871c14c18fc066ad14af13f9fdf5f9a91c61af432d1e3a39c8c6a141" +dependencies = [ + "hex", + "sha2 0.9.9", ] [[package]] @@ -3591,7 +3650,7 @@ version = "0.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bdf0c33fae925bdc080598b84bc15c55e7b9a4a43b3c704da051f977469691c9" dependencies = [ - "digest", + "digest 0.10.5", "keccak", ] @@ -3614,6 +3673,15 @@ dependencies = [ "winapi", ] +[[package]] +name = "signal-hook-registry" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0" +dependencies = [ + "libc", +] + [[package]] name = "siphasher" version = "0.3.10" @@ -3964,7 +4032,7 @@ dependencies = [ "semver 1.0.14", "serde", "serde_json", - "sha2", + "sha2 0.10.6", "tauri-utils", "thiserror", "time", @@ -4199,7 +4267,9 @@ dependencies = [ "memchr", "mio", "num_cpus", + "parking_lot 0.12.1", "pin-project-lite", + "signal-hook-registry", "socket2", "tokio-macros", "winapi", @@ -5101,7 +5171,7 @@ dependencies = [ "once_cell", "serde", "serde_json", - "sha2", + "sha2 0.10.6", "tao", "thiserror", "url", diff --git a/adenosine-pds/Cargo.toml b/adenosine-pds/Cargo.toml index 5a80b6f..f08d999 100644 --- a/adenosine-pds/Cargo.toml +++ b/adenosine-pds/Cargo.toml @@ -13,15 +13,21 @@ readme = "../README.md" repository = "https://gitlab.com/bnewbold/adenosine" [dependencies] +anyhow = "*" structopt = "*" serde_json = "*" log = "*" pretty_env_logger = "*" +libipld = "*" ipfs-sqlite-block-store = "*" rusqlite = { version = "*", features = ["bundled"] } jsonschema = "*" schemafy = "*" warp = "*" +iroh-car = { version = "0.1.0-vendored.0", path = "../iroh-car" } +tokio = { version = "1", features = ["full"] } +futures = "0.3" +sha256 = "*" [package.metadata.deb] maintainer = "Bryan Newbold " diff --git a/adenosine-pds/plan.txt b/adenosine-pds/plan.txt index 1b55bb7..d615d77 100644 --- a/adenosine-pds/plan.txt +++ b/adenosine-pds/plan.txt @@ -1,12 +1,19 @@ PDS proof of concept: -- ipld sqlite driver importing CAR file -- MST code to read and mutate state -- sqlite schema -- JSON schema type generation (separate crate?) +x ipld sqlite driver importing CAR file + => simple binary, two args +- MST code to read and mutate tree state + => with tests +- implement basic non-authenticated CRUD on repository +? python test script +- sqlite schema (for application) - write wrapper which updates MST *and* updates other tables in a transaction -- did:web thingie? +- JSON schema type generation (separate crate?) - HTTP API handler implementing most endpoints +- did:web handler? + +other utils/helpers: +- pack/unpack a repo CAR into JSON files in a directory tree (plus a commit.json with sig?) libraries: - `jsonschema` to validate requests and records (rich validation) diff --git a/adenosine-pds/src/bin/adenosine-pds-dump-mst.rs b/adenosine-pds/src/bin/adenosine-pds-dump-mst.rs new file mode 100644 index 0000000..1f1b947 --- /dev/null +++ b/adenosine-pds/src/bin/adenosine-pds-dump-mst.rs @@ -0,0 +1,129 @@ +/// Helper program to print MST keys/docs from a sqlite repo +use anyhow::{anyhow, Result}; +use ipfs_sqlite_block_store::BlockStore; +use libipld::cbor::DagCborCodec; +use libipld::prelude::Codec; +use libipld::{Cid, DagCbor}; + +use std::env; + +#[derive(Debug, DagCbor, PartialEq, Eq)] +struct CommitNode { + root: Cid, + sig: Box<[u8]>, +} + +#[derive(Debug, DagCbor, PartialEq, Eq)] +struct RootNode { + auth_token: Option, + prev: Option, + // TODO: not 'metadata'? + meta: Cid, + data: Cid, +} + +#[derive(Debug, DagCbor, PartialEq, Eq)] +struct MetadataNode { + datastore: String, // "mst" + did: String, + version: u8, // 1 +} + +#[derive(Debug, DagCbor, PartialEq, Eq)] +struct MstEntry { + k: String, + p: u32, + v: Cid, + t: Option, +} + +#[derive(Debug, DagCbor, PartialEq)] +struct MstNode { + l: Option, + e: Vec, +} + +fn get_mst_node(db: &mut BlockStore, cid: &Cid) -> Result { + let mst_node: MstNode = DagCborCodec.decode( + &db.get_block(cid)? + .ok_or(anyhow!("expected block in store"))?, + )?; + Ok(mst_node) +} + +fn print_mst_keys(db: &mut BlockStore, cid: &Cid) -> Result<()> { + let node = get_mst_node(db, cid)?; + if let Some(ref left) = node.l { + print_mst_keys(db, left)?; + } + let mut key: String = "".to_string(); + for entry in node.e.iter() { + key = format!("{}{}", &key[0..entry.p as usize], entry.k); + println!("{}\t-> {}", key, entry.v); + if let Some(ref right) = entry.t { + print_mst_keys(db, right)?; + } + } + Ok(()) +} + +async fn dump_mst_keys(db_path: &str) -> Result<()> { + let mut db: BlockStore = { + let path = std::path::PathBuf::from(db_path); + let path = ipfs_sqlite_block_store::DbPath::File(path); + BlockStore::open_path(path, Default::default())? + }; + + let all_aliases: Vec<(Vec, Cid)> = db.aliases()?; + if all_aliases.is_empty() { + println!("expected at least one alias in block store"); + std::process::exit(-1); + } + let (alias, commit_cid) = all_aliases[0].clone(); + println!( + "starting from {} [{}]", + commit_cid, + String::from_utf8_lossy(&alias) + ); + + // NOTE: the faster way to develop would have been to decode to libipld::ipld::Ipld first? meh + + //println!("raw commit: {:?}", &db.get_block(&commit_cid)?.ok_or(anyhow!("expected commit block in store"))?); + let commit: CommitNode = DagCborCodec.decode( + &db.get_block(&commit_cid)? + .ok_or(anyhow!("expected commit block in store"))?, + )?; + println!("Commit: {:?}", commit); + //println!("raw root: {:?}", &db.get_block(&commit.root)?.ok_or(anyhow!("expected commit block in store"))?); + let root: RootNode = DagCborCodec.decode( + &db.get_block(&commit.root)? + .ok_or(anyhow!("expected root block in store"))?, + )?; + println!("Root: {:?}", root); + let metadata: MetadataNode = DagCborCodec.decode( + &db.get_block(&root.meta)? + .ok_or(anyhow!("expected metadata block in store"))?, + )?; + println!("Metadata: {:?}", metadata); + let mst_node: MstNode = DagCborCodec.decode( + &db.get_block(&root.data)? + .ok_or(anyhow!("expected block in store"))?, + )?; + println!("MST root node: {:?}", mst_node); + + println!("============"); + + print_mst_keys(&mut db, &root.data)?; + Ok(()) +} + +#[tokio::main] +async fn main() -> Result<()> { + let args: Vec = env::args().collect(); + if args.len() != 2 { + println!("expected 1 args: "); + std::process::exit(-1); + } + let db_path = &args[1]; + dump_mst_keys(db_path).await +} diff --git a/adenosine-pds/src/bin/adenosine-pds-import.rs b/adenosine-pds/src/bin/adenosine-pds-import.rs new file mode 100644 index 0000000..37ea6eb --- /dev/null +++ b/adenosine-pds/src/bin/adenosine-pds-import.rs @@ -0,0 +1,53 @@ +/// Helper program to import an IPLD CARv1 file in to sqlite data store +use anyhow::{anyhow, Result}; +use futures::TryStreamExt; +use ipfs_sqlite_block_store::BlockStore; +use iroh_car::CarReader; +use libipld::block::Block; +use tokio::fs::File; +use tokio::io::BufReader; + +use std::env; + +async fn load_car_to_sqlite(db_path: &str, car_path: &str) -> Result<()> { + let car_reader = { + let file = File::open(car_path).await?; + let buf_reader = BufReader::new(file); + CarReader::new(buf_reader).await? + }; + let car_header = car_reader.header().clone(); + let mut db: BlockStore = { + let path = std::path::PathBuf::from(db_path); + let path = ipfs_sqlite_block_store::DbPath::File(path); + BlockStore::open_path(path, Default::default())? + }; + + car_reader + .stream() + .try_for_each(|(cid, raw)| { + // TODO: error handling here instead of unwrap (?) + let block = Block::new(cid, raw).unwrap(); + db.put_block(block, None).unwrap(); + futures::future::ready(Ok(())) + }) + .await?; + + // pin the header + if car_header.roots().len() >= 1 { + db.alias(b"import".to_vec(), Some(&car_header.roots()[0]))?; + } + + Ok(()) +} + +#[tokio::main] +async fn main() -> Result<()> { + let args: Vec = env::args().collect(); + if args.len() != 3 { + println!("expected 2 args: "); + std::process::exit(-1); + } + let db_path = &args[1]; + let car_path = &args[2]; + load_car_to_sqlite(db_path, car_path).await +} diff --git a/adenosine-pds/src/bin/adenosine-pds-repro.rs b/adenosine-pds/src/bin/adenosine-pds-repro.rs new file mode 100644 index 0000000..05048ce --- /dev/null +++ b/adenosine-pds/src/bin/adenosine-pds-repro.rs @@ -0,0 +1,286 @@ +/// Development helper which loads MST keys and CIDs, re-generates MST structure, then compares +/// root node with what was originally found. +use anyhow::{anyhow, Result}; +use ipfs_sqlite_block_store::BlockStore; +use libipld::cbor::DagCborCodec; +use libipld::multihash::Code; +use libipld::prelude::Codec; +use libipld::store::DefaultParams; +use libipld::Block; +use libipld::{Cid, DagCbor}; +use std::collections::BTreeMap; + +use std::env; + +#[derive(Debug, DagCbor, PartialEq, Eq)] +struct CommitNode { + root: Cid, + sig: Box<[u8]>, +} + +#[derive(Debug, DagCbor, PartialEq, Eq)] +struct RootNode { + auth_token: Option, + prev: Option, + // TODO: not 'metadata'? + meta: Cid, + data: Cid, +} + +#[derive(Debug, DagCbor, PartialEq, Eq)] +struct MetadataNode { + datastore: String, // "mst" + did: String, + version: u8, // 1 +} + +#[derive(Debug, DagCbor, PartialEq, Eq)] +struct MstEntry { + k: String, + p: u32, + v: Cid, + t: Option, +} + +#[derive(Debug, DagCbor, PartialEq)] +struct MstNode { + l: Option, + e: Vec, +} + +struct WipEntry { + height: u8, + key: String, + val: Cid, + right: Option>, +} + +struct WipNode { + height: u8, + left: Option>, + entries: Vec, +} + +fn get_mst_node(db: &mut BlockStore, cid: &Cid) -> Result { + let mst_node: MstNode = DagCborCodec.decode( + &db.get_block(cid)? + .ok_or(anyhow!("expected block in store"))?, + )?; + Ok(mst_node) +} + +fn collect_mst_keys( + db: &mut BlockStore, + cid: &Cid, + map: &mut BTreeMap, +) -> Result<()> { + let node = get_mst_node(db, cid)?; + if let Some(ref left) = node.l { + collect_mst_keys(db, left, map)?; + } + let mut key: String = "".to_string(); + for entry in node.e.iter() { + key = format!("{}{}", &key[0..entry.p as usize], entry.k); + map.insert(key.clone(), entry.v); + if let Some(ref right) = entry.t { + collect_mst_keys(db, right, map)?; + } + } + Ok(()) +} + +fn leading_zeros(key: &str) -> u8 { + let digest = sha256::digest(key); + let digest = digest.as_bytes(); + for i in 0..digest.len() { + if digest[i] != '0' as u8 { + return i as u8; + } + } + digest.len() as u8 +} + +fn generate_mst( + db: &mut BlockStore, + map: &mut BTreeMap, +) -> Result { + // construct a "WIP" tree + let mut root: Option = None; + for (key, val) in map { + let height = leading_zeros(key); + let entry = WipEntry { + height, + key: key.clone(), + val: val.clone(), + right: None, + }; + if let Some(node) = root { + root = Some(insert_entry(node, entry)); + } else { + root = Some(WipNode { + height: entry.height, + left: None, + entries: vec![entry], + }); + } + } + serialize_wip_tree(db, root.expect("non-empty MST tree")) +} + +fn insert_entry(mut node: WipNode, entry: WipEntry) -> WipNode { + // if we are higher on tree than existing node, replace it with new layer first + if entry.height > node.height { + node = WipNode { + height: entry.height, + left: Some(Box::new(node)), + entries: vec![], + } + }; + // if we are lower on tree, then need to descend first + if entry.height < node.height { + // we should never be lower down the left than an existing node, and always to the right + let mut last = node.entries.pop().expect("hit empty existing entry list"); + assert!(entry.key > last.key); + if last.right.is_some() { + last.right = Some(Box::new(insert_entry(*last.right.unwrap(), entry))); + } else { + last.right = Some(Box::new(WipNode { + height: entry.height, + left: None, + entries: vec![entry], + })); + } + node.entries.push(last); + return node; + } + // same height, simply append to end (but verify first) + assert!(node.height == entry.height); + if !node.entries.is_empty() { + let last = &node.entries.last().unwrap(); + assert!(entry.key > last.key); + } + node.entries.push(entry); + node +} + +/// returns the length of common characters between the two strings. Strings must be simple ASCII, +/// which should hold for current ATP MST keys (collection plus TID) +fn common_prefix_len(a: &str, b: &str) -> usize { + let a = a.as_bytes(); + let b = b.as_bytes(); + for i in 0..std::cmp::min(a.len(), b.len()) { + if a[i] != b[i] { + return i; + } + } + // strings are the same, up to common length + a.len() +} + +#[test] +fn test_common_prefix_len() { + assert_eq!(common_prefix_len("abc", "abc"), 3); + assert_eq!(common_prefix_len("abcde", "abc"), 3); + assert_eq!(common_prefix_len("abcde", "abb"), 2); + assert_eq!(common_prefix_len("", "asdf"), 0); +} + +fn serialize_wip_tree( + db: &mut BlockStore, + wip_node: WipNode, +) -> Result { + let left: Option = if let Some(left) = wip_node.left { + Some(serialize_wip_tree(db, *left)?) + } else { + None + }; + + let mut entries: Vec = vec![]; + let mut last_key = "".to_string(); + for wip_entry in wip_node.entries { + let right: Option = if let Some(right) = wip_entry.right { + Some(serialize_wip_tree(db, *right)?) + } else { + None + }; + let prefix_len = common_prefix_len(&last_key, &wip_entry.key); + entries.push(MstEntry { + k: wip_entry.key[prefix_len..].to_string(), + p: prefix_len as u32, + v: wip_entry.val, + t: right, + }); + last_key = wip_entry.key; + } + let mst_node = MstNode { + l: left, + e: entries, + }; + let block = Block::::encode(DagCborCodec, Code::Sha2_256, &mst_node)?; + let cid = block.cid().clone(); + db.put_block(block, None)?; + Ok(cid) +} + +async fn repro_mst(db_path: &str) -> Result<()> { + let mut db: BlockStore = { + let path = std::path::PathBuf::from(db_path); + let path = ipfs_sqlite_block_store::DbPath::File(path); + BlockStore::open_path(path, Default::default())? + }; + + let all_aliases: Vec<(Vec, Cid)> = db.aliases()?; + if all_aliases.is_empty() { + println!("expected at least one alias in block store"); + std::process::exit(-1); + } + let (_alias, commit_cid) = all_aliases[0].clone(); + + let commit_node: CommitNode = DagCborCodec.decode( + &db.get_block(&commit_cid)? + .ok_or(anyhow!("expected commit block in store"))?, + )?; + let root_node: RootNode = DagCborCodec.decode( + &db.get_block(&commit_node.root)? + .ok_or(anyhow!("expected root block in store"))?, + )?; + let _metadata_node: MetadataNode = DagCborCodec.decode( + &db.get_block(&root_node.meta)? + .ok_or(anyhow!("expected metadata block in store"))?, + )?; + + // collect key/value sorted map of string/cid, as BTree + let mut repo_map: BTreeMap = BTreeMap::new(); + collect_mst_keys(&mut db, &root_node.data, &mut repo_map)?; + + for (k, v) in repo_map.iter() { + println!("{}\t-> {}", k, v); + } + + // now re-generate nodes + let updated = generate_mst(&mut db, &mut repo_map)?; + + println!("original root: {}", root_node.data); + println!("regenerated : {}", updated); + if root_node.data == updated { + println!("SUCCESS! (amazing)"); + } else { + println!("FAILED"); + let a = get_mst_node(&mut db, &root_node.data)?; + let b = get_mst_node(&mut db, &updated)?; + println!("A: {:?}", a); + println!("B: {:?}", b); + }; + Ok(()) +} + +#[tokio::main] +async fn main() -> Result<()> { + let args: Vec = env::args().collect(); + if args.len() != 2 { + println!("expected 1 args: "); + std::process::exit(-1); + } + let db_path = &args[1]; + repro_mst(db_path).await +} -- cgit v1.2.3