From 7f43d097d84c4b3f9a63981c3f6a67db82046bd3 Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Mon, 31 Oct 2022 17:19:29 -0700 Subject: pds: move earlier commands to lib and tests --- .gitignore | 1 + adenosine-pds/.gitignore | 1 - adenosine-pds/src/bin/adenosine-pds-dump-mst.rs | 129 --------- adenosine-pds/src/bin/adenosine-pds-import.rs | 53 ---- adenosine-pds/src/bin/adenosine-pds-repro.rs | 286 -------------------- adenosine-pds/src/bin/adenosine-pds.rs | 33 +-- adenosine-pds/src/lib.rs | 71 ++++- adenosine-pds/src/mst.rs | 336 ++++++++++++++++++++++++ adenosine-pds/tests/bigger.car | Bin 0 -> 60050 bytes adenosine-pds/tests/example_repo.car | Bin 0 -> 7390 bytes adenosine-pds/tests/test_repro_mst.rs | 10 + 11 files changed, 428 insertions(+), 492 deletions(-) delete mode 100644 adenosine-pds/.gitignore delete mode 100644 adenosine-pds/src/bin/adenosine-pds-dump-mst.rs delete mode 100644 adenosine-pds/src/bin/adenosine-pds-import.rs delete mode 100644 adenosine-pds/src/bin/adenosine-pds-repro.rs create mode 100644 adenosine-pds/src/mst.rs create mode 100644 adenosine-pds/tests/bigger.car create mode 100644 adenosine-pds/tests/example_repo.car create mode 100644 adenosine-pds/tests/test_repro_mst.rs diff --git a/.gitignore b/.gitignore index 6543366..d54b016 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,7 @@ _build/ src/build/ *.log target/ +*.sqlite # Don't ignore this file itself !.gitignore diff --git a/adenosine-pds/.gitignore b/adenosine-pds/.gitignore deleted file mode 100644 index ea8c4bf..0000000 --- a/adenosine-pds/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/target diff --git a/adenosine-pds/src/bin/adenosine-pds-dump-mst.rs b/adenosine-pds/src/bin/adenosine-pds-dump-mst.rs deleted file mode 100644 index 1f1b947..0000000 --- a/adenosine-pds/src/bin/adenosine-pds-dump-mst.rs +++ /dev/null @@ -1,129 +0,0 @@ -/// Helper program to print MST keys/docs from a sqlite repo -use anyhow::{anyhow, Result}; -use ipfs_sqlite_block_store::BlockStore; -use libipld::cbor::DagCborCodec; -use libipld::prelude::Codec; -use libipld::{Cid, DagCbor}; - -use std::env; - -#[derive(Debug, DagCbor, PartialEq, Eq)] -struct CommitNode { - root: Cid, - sig: Box<[u8]>, -} - -#[derive(Debug, DagCbor, PartialEq, Eq)] -struct RootNode { - auth_token: Option, - prev: Option, - // TODO: not 'metadata'? - meta: Cid, - data: Cid, -} - -#[derive(Debug, DagCbor, PartialEq, Eq)] -struct MetadataNode { - datastore: String, // "mst" - did: String, - version: u8, // 1 -} - -#[derive(Debug, DagCbor, PartialEq, Eq)] -struct MstEntry { - k: String, - p: u32, - v: Cid, - t: Option, -} - -#[derive(Debug, DagCbor, PartialEq)] -struct MstNode { - l: Option, - e: Vec, -} - -fn get_mst_node(db: &mut BlockStore, cid: &Cid) -> Result { - let mst_node: MstNode = DagCborCodec.decode( - &db.get_block(cid)? - .ok_or(anyhow!("expected block in store"))?, - )?; - Ok(mst_node) -} - -fn print_mst_keys(db: &mut BlockStore, cid: &Cid) -> Result<()> { - let node = get_mst_node(db, cid)?; - if let Some(ref left) = node.l { - print_mst_keys(db, left)?; - } - let mut key: String = "".to_string(); - for entry in node.e.iter() { - key = format!("{}{}", &key[0..entry.p as usize], entry.k); - println!("{}\t-> {}", key, entry.v); - if let Some(ref right) = entry.t { - print_mst_keys(db, right)?; - } - } - Ok(()) -} - -async fn dump_mst_keys(db_path: &str) -> Result<()> { - let mut db: BlockStore = { - let path = std::path::PathBuf::from(db_path); - let path = ipfs_sqlite_block_store::DbPath::File(path); - BlockStore::open_path(path, Default::default())? - }; - - let all_aliases: Vec<(Vec, Cid)> = db.aliases()?; - if all_aliases.is_empty() { - println!("expected at least one alias in block store"); - std::process::exit(-1); - } - let (alias, commit_cid) = all_aliases[0].clone(); - println!( - "starting from {} [{}]", - commit_cid, - String::from_utf8_lossy(&alias) - ); - - // NOTE: the faster way to develop would have been to decode to libipld::ipld::Ipld first? meh - - //println!("raw commit: {:?}", &db.get_block(&commit_cid)?.ok_or(anyhow!("expected commit block in store"))?); - let commit: CommitNode = DagCborCodec.decode( - &db.get_block(&commit_cid)? - .ok_or(anyhow!("expected commit block in store"))?, - )?; - println!("Commit: {:?}", commit); - //println!("raw root: {:?}", &db.get_block(&commit.root)?.ok_or(anyhow!("expected commit block in store"))?); - let root: RootNode = DagCborCodec.decode( - &db.get_block(&commit.root)? - .ok_or(anyhow!("expected root block in store"))?, - )?; - println!("Root: {:?}", root); - let metadata: MetadataNode = DagCborCodec.decode( - &db.get_block(&root.meta)? - .ok_or(anyhow!("expected metadata block in store"))?, - )?; - println!("Metadata: {:?}", metadata); - let mst_node: MstNode = DagCborCodec.decode( - &db.get_block(&root.data)? - .ok_or(anyhow!("expected block in store"))?, - )?; - println!("MST root node: {:?}", mst_node); - - println!("============"); - - print_mst_keys(&mut db, &root.data)?; - Ok(()) -} - -#[tokio::main] -async fn main() -> Result<()> { - let args: Vec = env::args().collect(); - if args.len() != 2 { - println!("expected 1 args: "); - std::process::exit(-1); - } - let db_path = &args[1]; - dump_mst_keys(db_path).await -} diff --git a/adenosine-pds/src/bin/adenosine-pds-import.rs b/adenosine-pds/src/bin/adenosine-pds-import.rs deleted file mode 100644 index 37ea6eb..0000000 --- a/adenosine-pds/src/bin/adenosine-pds-import.rs +++ /dev/null @@ -1,53 +0,0 @@ -/// Helper program to import an IPLD CARv1 file in to sqlite data store -use anyhow::{anyhow, Result}; -use futures::TryStreamExt; -use ipfs_sqlite_block_store::BlockStore; -use iroh_car::CarReader; -use libipld::block::Block; -use tokio::fs::File; -use tokio::io::BufReader; - -use std::env; - -async fn load_car_to_sqlite(db_path: &str, car_path: &str) -> Result<()> { - let car_reader = { - let file = File::open(car_path).await?; - let buf_reader = BufReader::new(file); - CarReader::new(buf_reader).await? - }; - let car_header = car_reader.header().clone(); - let mut db: BlockStore = { - let path = std::path::PathBuf::from(db_path); - let path = ipfs_sqlite_block_store::DbPath::File(path); - BlockStore::open_path(path, Default::default())? - }; - - car_reader - .stream() - .try_for_each(|(cid, raw)| { - // TODO: error handling here instead of unwrap (?) - let block = Block::new(cid, raw).unwrap(); - db.put_block(block, None).unwrap(); - futures::future::ready(Ok(())) - }) - .await?; - - // pin the header - if car_header.roots().len() >= 1 { - db.alias(b"import".to_vec(), Some(&car_header.roots()[0]))?; - } - - Ok(()) -} - -#[tokio::main] -async fn main() -> Result<()> { - let args: Vec = env::args().collect(); - if args.len() != 3 { - println!("expected 2 args: "); - std::process::exit(-1); - } - let db_path = &args[1]; - let car_path = &args[2]; - load_car_to_sqlite(db_path, car_path).await -} diff --git a/adenosine-pds/src/bin/adenosine-pds-repro.rs b/adenosine-pds/src/bin/adenosine-pds-repro.rs deleted file mode 100644 index 05048ce..0000000 --- a/adenosine-pds/src/bin/adenosine-pds-repro.rs +++ /dev/null @@ -1,286 +0,0 @@ -/// Development helper which loads MST keys and CIDs, re-generates MST structure, then compares -/// root node with what was originally found. -use anyhow::{anyhow, Result}; -use ipfs_sqlite_block_store::BlockStore; -use libipld::cbor::DagCborCodec; -use libipld::multihash::Code; -use libipld::prelude::Codec; -use libipld::store::DefaultParams; -use libipld::Block; -use libipld::{Cid, DagCbor}; -use std::collections::BTreeMap; - -use std::env; - -#[derive(Debug, DagCbor, PartialEq, Eq)] -struct CommitNode { - root: Cid, - sig: Box<[u8]>, -} - -#[derive(Debug, DagCbor, PartialEq, Eq)] -struct RootNode { - auth_token: Option, - prev: Option, - // TODO: not 'metadata'? - meta: Cid, - data: Cid, -} - -#[derive(Debug, DagCbor, PartialEq, Eq)] -struct MetadataNode { - datastore: String, // "mst" - did: String, - version: u8, // 1 -} - -#[derive(Debug, DagCbor, PartialEq, Eq)] -struct MstEntry { - k: String, - p: u32, - v: Cid, - t: Option, -} - -#[derive(Debug, DagCbor, PartialEq)] -struct MstNode { - l: Option, - e: Vec, -} - -struct WipEntry { - height: u8, - key: String, - val: Cid, - right: Option>, -} - -struct WipNode { - height: u8, - left: Option>, - entries: Vec, -} - -fn get_mst_node(db: &mut BlockStore, cid: &Cid) -> Result { - let mst_node: MstNode = DagCborCodec.decode( - &db.get_block(cid)? - .ok_or(anyhow!("expected block in store"))?, - )?; - Ok(mst_node) -} - -fn collect_mst_keys( - db: &mut BlockStore, - cid: &Cid, - map: &mut BTreeMap, -) -> Result<()> { - let node = get_mst_node(db, cid)?; - if let Some(ref left) = node.l { - collect_mst_keys(db, left, map)?; - } - let mut key: String = "".to_string(); - for entry in node.e.iter() { - key = format!("{}{}", &key[0..entry.p as usize], entry.k); - map.insert(key.clone(), entry.v); - if let Some(ref right) = entry.t { - collect_mst_keys(db, right, map)?; - } - } - Ok(()) -} - -fn leading_zeros(key: &str) -> u8 { - let digest = sha256::digest(key); - let digest = digest.as_bytes(); - for i in 0..digest.len() { - if digest[i] != '0' as u8 { - return i as u8; - } - } - digest.len() as u8 -} - -fn generate_mst( - db: &mut BlockStore, - map: &mut BTreeMap, -) -> Result { - // construct a "WIP" tree - let mut root: Option = None; - for (key, val) in map { - let height = leading_zeros(key); - let entry = WipEntry { - height, - key: key.clone(), - val: val.clone(), - right: None, - }; - if let Some(node) = root { - root = Some(insert_entry(node, entry)); - } else { - root = Some(WipNode { - height: entry.height, - left: None, - entries: vec![entry], - }); - } - } - serialize_wip_tree(db, root.expect("non-empty MST tree")) -} - -fn insert_entry(mut node: WipNode, entry: WipEntry) -> WipNode { - // if we are higher on tree than existing node, replace it with new layer first - if entry.height > node.height { - node = WipNode { - height: entry.height, - left: Some(Box::new(node)), - entries: vec![], - } - }; - // if we are lower on tree, then need to descend first - if entry.height < node.height { - // we should never be lower down the left than an existing node, and always to the right - let mut last = node.entries.pop().expect("hit empty existing entry list"); - assert!(entry.key > last.key); - if last.right.is_some() { - last.right = Some(Box::new(insert_entry(*last.right.unwrap(), entry))); - } else { - last.right = Some(Box::new(WipNode { - height: entry.height, - left: None, - entries: vec![entry], - })); - } - node.entries.push(last); - return node; - } - // same height, simply append to end (but verify first) - assert!(node.height == entry.height); - if !node.entries.is_empty() { - let last = &node.entries.last().unwrap(); - assert!(entry.key > last.key); - } - node.entries.push(entry); - node -} - -/// returns the length of common characters between the two strings. Strings must be simple ASCII, -/// which should hold for current ATP MST keys (collection plus TID) -fn common_prefix_len(a: &str, b: &str) -> usize { - let a = a.as_bytes(); - let b = b.as_bytes(); - for i in 0..std::cmp::min(a.len(), b.len()) { - if a[i] != b[i] { - return i; - } - } - // strings are the same, up to common length - a.len() -} - -#[test] -fn test_common_prefix_len() { - assert_eq!(common_prefix_len("abc", "abc"), 3); - assert_eq!(common_prefix_len("abcde", "abc"), 3); - assert_eq!(common_prefix_len("abcde", "abb"), 2); - assert_eq!(common_prefix_len("", "asdf"), 0); -} - -fn serialize_wip_tree( - db: &mut BlockStore, - wip_node: WipNode, -) -> Result { - let left: Option = if let Some(left) = wip_node.left { - Some(serialize_wip_tree(db, *left)?) - } else { - None - }; - - let mut entries: Vec = vec![]; - let mut last_key = "".to_string(); - for wip_entry in wip_node.entries { - let right: Option = if let Some(right) = wip_entry.right { - Some(serialize_wip_tree(db, *right)?) - } else { - None - }; - let prefix_len = common_prefix_len(&last_key, &wip_entry.key); - entries.push(MstEntry { - k: wip_entry.key[prefix_len..].to_string(), - p: prefix_len as u32, - v: wip_entry.val, - t: right, - }); - last_key = wip_entry.key; - } - let mst_node = MstNode { - l: left, - e: entries, - }; - let block = Block::::encode(DagCborCodec, Code::Sha2_256, &mst_node)?; - let cid = block.cid().clone(); - db.put_block(block, None)?; - Ok(cid) -} - -async fn repro_mst(db_path: &str) -> Result<()> { - let mut db: BlockStore = { - let path = std::path::PathBuf::from(db_path); - let path = ipfs_sqlite_block_store::DbPath::File(path); - BlockStore::open_path(path, Default::default())? - }; - - let all_aliases: Vec<(Vec, Cid)> = db.aliases()?; - if all_aliases.is_empty() { - println!("expected at least one alias in block store"); - std::process::exit(-1); - } - let (_alias, commit_cid) = all_aliases[0].clone(); - - let commit_node: CommitNode = DagCborCodec.decode( - &db.get_block(&commit_cid)? - .ok_or(anyhow!("expected commit block in store"))?, - )?; - let root_node: RootNode = DagCborCodec.decode( - &db.get_block(&commit_node.root)? - .ok_or(anyhow!("expected root block in store"))?, - )?; - let _metadata_node: MetadataNode = DagCborCodec.decode( - &db.get_block(&root_node.meta)? - .ok_or(anyhow!("expected metadata block in store"))?, - )?; - - // collect key/value sorted map of string/cid, as BTree - let mut repo_map: BTreeMap = BTreeMap::new(); - collect_mst_keys(&mut db, &root_node.data, &mut repo_map)?; - - for (k, v) in repo_map.iter() { - println!("{}\t-> {}", k, v); - } - - // now re-generate nodes - let updated = generate_mst(&mut db, &mut repo_map)?; - - println!("original root: {}", root_node.data); - println!("regenerated : {}", updated); - if root_node.data == updated { - println!("SUCCESS! (amazing)"); - } else { - println!("FAILED"); - let a = get_mst_node(&mut db, &root_node.data)?; - let b = get_mst_node(&mut db, &updated)?; - println!("A: {:?}", a); - println!("B: {:?}", b); - }; - Ok(()) -} - -#[tokio::main] -async fn main() -> Result<()> { - let args: Vec = env::args().collect(); - if args.len() != 2 { - println!("expected 1 args: "); - std::process::exit(-1); - } - let db_path = &args[1]; - repro_mst(db_path).await -} diff --git a/adenosine-pds/src/bin/adenosine-pds.rs b/adenosine-pds/src/bin/adenosine-pds.rs index 5d0e638..44c4cef 100644 --- a/adenosine-pds/src/bin/adenosine-pds.rs +++ b/adenosine-pds/src/bin/adenosine-pds.rs @@ -4,7 +4,6 @@ use anyhow::Result; use log::{self, debug}; use structopt::StructOpt; - #[derive(StructOpt)] #[structopt( rename_all = "kebab-case", @@ -12,7 +11,6 @@ use structopt::StructOpt; )] struct Opt { // TODO: different path type for structopt? - /// File path of sqlite database for storing IPLD blocks (aka, repository content) #[structopt( parse(from_os_str), @@ -44,12 +42,18 @@ struct Opt { #[derive(StructOpt)] enum Command { /// Start ATP server as a foreground process - Serve, + Serve { + #[structopt(long, default_value = "3030")] + port: u16, + }, - /// Import a CAR file (TODO) - Import, + /// Helper to import an IPLD CARv1 file in to sqlite data store + Import { + /// CARv1 file path to import from + car_path: std::path::PathBuf, + }, - /// Dump info from databases (TODO) + /// Helper to print MST keys/docs from a sqlite repo Inspect, } @@ -74,16 +78,13 @@ fn main() -> Result<()> { debug!("config parsed, starting up"); match opt.cmd { - Command::Serve {} => { + Command::Serve { port } => { // TODO: log some config stuff? - run_server() - }, - Command::Import {} => { - unimplemented!() - }, - Command::Inspect {} => { - unimplemented!() - }, + run_server(port) + } + Command::Import { car_path } => { + load_car_to_sqlite(&opt.blockstore_db_path, &car_path) + } + Command::Inspect {} => dump_mst_keys(&opt.blockstore_db_path), } } - diff --git a/adenosine-pds/src/lib.rs b/adenosine-pds/src/lib.rs index f321d3f..1785640 100644 --- a/adenosine-pds/src/lib.rs +++ b/adenosine-pds/src/lib.rs @@ -1,20 +1,35 @@ - use anyhow::Result; -use log::{info, error}; -use rouille::{Request, Response, router}; +use log::{error, info}; +use rouille::{router, Request, Response}; + +use futures::TryStreamExt; +use ipfs_sqlite_block_store::BlockStore; +use iroh_car::CarReader; +use libipld::block::Block; +use std::path::PathBuf; +use tokio::fs::File; +use tokio::io::BufReader; -pub fn run_server() -> Result<()> { +mod mst; +pub use mst::{dump_mst_keys, repro_mst}; + +pub fn run_server(port: u16) -> Result<()> { // TODO: log access requests // TODO: some static files? https://github.com/tomaka/rouille/blob/master/examples/static-files.rs - let log_ok = |req: &Request, resp: &Response, elap: std::time::Duration| { + let log_ok = |req: &Request, _resp: &Response, elap: std::time::Duration| { info!("{} {} ({:?})", req.method(), req.raw_url(), elap); }; let log_err = |req: &Request, elap: std::time::Duration| { - error!("HTTP handler panicked: {} {} ({:?})", req.method(), req.raw_url(), elap); + error!( + "HTTP handler panicked: {} {} ({:?})", + req.method(), + req.raw_url(), + elap + ); }; - rouille::start_server("localhost:3030", move |request| { + rouille::start_server(format!("localhost:{}", port), move |request| { rouille::log_custom(request, log_ok, log_err, || { router!(request, (GET) ["/"] => { @@ -33,3 +48,45 @@ pub fn run_server() -> Result<()> { }) }); } + +pub fn load_car_to_sqlite(db_path: &PathBuf, car_path: &PathBuf) -> Result<()> { + let mut db: BlockStore = + { BlockStore::open(db_path, Default::default())? }; + + load_car_to_blockstore(&mut db, car_path) +} + +pub fn load_car_to_blockstore(db: &mut BlockStore, car_path: &PathBuf) -> Result<()> { + let rt = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build()?; + rt.block_on(inner_car_loader(db, car_path)) +} + +// this async function is wrapped in the sync version above +async fn inner_car_loader(db: &mut BlockStore, car_path: &PathBuf) -> Result<()> { + println!("{} - {}", std::env::current_dir()?.display(), car_path.display()); + let car_reader = { + let file = File::open(car_path).await?; + let buf_reader = BufReader::new(file); + CarReader::new(buf_reader).await? + }; + let car_header = car_reader.header().clone(); + + car_reader + .stream() + .try_for_each(|(cid, raw)| { + // TODO: error handling here instead of unwrap (?) + let block = Block::new(cid, raw).unwrap(); + db.put_block(block, None).unwrap(); + futures::future::ready(Ok(())) + }) + .await?; + + // pin the header + if car_header.roots().len() >= 1 { + db.alias(b"import".to_vec(), Some(&car_header.roots()[0]))?; + } + + Ok(()) +} diff --git a/adenosine-pds/src/mst.rs b/adenosine-pds/src/mst.rs new file mode 100644 index 0000000..a2a394f --- /dev/null +++ b/adenosine-pds/src/mst.rs @@ -0,0 +1,336 @@ +use anyhow::{anyhow, Result}; +use ipfs_sqlite_block_store::BlockStore; +use libipld::cbor::DagCborCodec; +use libipld::multihash::Code; +use libipld::prelude::Codec; +use libipld::store::DefaultParams; +use libipld::Block; +use libipld::{Cid, DagCbor}; +use log::{debug, error, info}; +use std::collections::BTreeMap; +use std::path::PathBuf; +use crate::load_car_to_blockstore; + +#[derive(Debug, DagCbor, PartialEq, Eq)] +struct CommitNode { + root: Cid, + sig: Box<[u8]>, +} + +#[derive(Debug, DagCbor, PartialEq, Eq)] +struct RootNode { + auth_token: Option, + prev: Option, + // TODO: not 'metadata'? + meta: Cid, + data: Cid, +} + +#[derive(Debug, DagCbor, PartialEq, Eq)] +struct MetadataNode { + datastore: String, // "mst" + did: String, + version: u8, // 1 +} + +#[derive(Debug, DagCbor, PartialEq, Eq)] +struct MstEntry { + k: String, + p: u32, + v: Cid, + t: Option, +} + +#[derive(Debug, DagCbor, PartialEq)] +struct MstNode { + l: Option, + e: Vec, +} + +struct WipEntry { + height: u8, + key: String, + val: Cid, + right: Option>, +} + +struct WipNode { + height: u8, + left: Option>, + entries: Vec, +} + +fn get_mst_node(db: &mut BlockStore, cid: &Cid) -> Result { + let mst_node: MstNode = DagCborCodec.decode( + &db.get_block(cid)? + .ok_or(anyhow!("expected block in store"))?, + )?; + Ok(mst_node) +} + +fn print_mst_keys(db: &mut BlockStore, cid: &Cid) -> Result<()> { + let node = get_mst_node(db, cid)?; + if let Some(ref left) = node.l { + print_mst_keys(db, left)?; + } + let mut key: String = "".to_string(); + for entry in node.e.iter() { + key = format!("{}{}", &key[0..entry.p as usize], entry.k); + println!("{}\t-> {}", key, entry.v); + if let Some(ref right) = entry.t { + print_mst_keys(db, right)?; + } + } + Ok(()) +} + +pub fn dump_mst_keys(db_path: &PathBuf) -> Result<()> { + let mut db: BlockStore = + { BlockStore::open(db_path, Default::default())? }; + + let all_aliases: Vec<(Vec, Cid)> = db.aliases()?; + if all_aliases.is_empty() { + error!("expected at least one alias in block store"); + std::process::exit(-1); + } + let (alias, commit_cid) = all_aliases[0].clone(); + info!( + "starting from {} [{}]", + commit_cid, + String::from_utf8_lossy(&alias) + ); + + // NOTE: the faster way to develop would have been to decode to libipld::ipld::Ipld first? meh + + debug!( + "raw commit: {:?}", + &db.get_block(&commit_cid)? + .ok_or(anyhow!("expected commit block in store"))? + ); + let commit: CommitNode = DagCborCodec.decode( + &db.get_block(&commit_cid)? + .ok_or(anyhow!("expected commit block in store"))?, + )?; + debug!("Commit: {:?}", commit); + let root: RootNode = DagCborCodec.decode( + &db.get_block(&commit.root)? + .ok_or(anyhow!("expected root block in store"))?, + )?; + debug!("Root: {:?}", root); + let metadata: MetadataNode = DagCborCodec.decode( + &db.get_block(&root.meta)? + .ok_or(anyhow!("expected metadata block in store"))?, + )?; + debug!("Metadata: {:?}", metadata); + let mst_node: MstNode = DagCborCodec.decode( + &db.get_block(&root.data)? + .ok_or(anyhow!("expected block in store"))?, + )?; + debug!("MST root node: {:?}", mst_node); + debug!("============"); + + print_mst_keys(&mut db, &root.data)?; + Ok(()) +} + +fn collect_mst_keys( + db: &mut BlockStore, + cid: &Cid, + map: &mut BTreeMap, +) -> Result<()> { + let node = get_mst_node(db, cid)?; + if let Some(ref left) = node.l { + collect_mst_keys(db, left, map)?; + } + let mut key: String = "".to_string(); + for entry in node.e.iter() { + key = format!("{}{}", &key[0..entry.p as usize], entry.k); + map.insert(key.clone(), entry.v); + if let Some(ref right) = entry.t { + collect_mst_keys(db, right, map)?; + } + } + Ok(()) +} + +fn leading_zeros(key: &str) -> u8 { + let digest = sha256::digest(key); + let digest = digest.as_bytes(); + for i in 0..digest.len() { + if digest[i] != '0' as u8 { + return i as u8; + } + } + digest.len() as u8 +} + +fn generate_mst( + db: &mut BlockStore, + map: &mut BTreeMap, +) -> Result { + // construct a "WIP" tree + let mut root: Option = None; + for (key, val) in map { + let height = leading_zeros(key); + let entry = WipEntry { + height, + key: key.clone(), + val: val.clone(), + right: None, + }; + if let Some(node) = root { + root = Some(insert_entry(node, entry)); + } else { + root = Some(WipNode { + height: entry.height, + left: None, + entries: vec![entry], + }); + } + } + serialize_wip_tree(db, root.expect("non-empty MST tree")) +} + +fn insert_entry(mut node: WipNode, entry: WipEntry) -> WipNode { + // if we are higher on tree than existing node, replace it with new layer first + if entry.height > node.height { + node = WipNode { + height: entry.height, + left: Some(Box::new(node)), + entries: vec![], + } + }; + // if we are lower on tree, then need to descend first + if entry.height < node.height { + // we should never be lower down the left than an existing node, and always to the right + let mut last = node.entries.pop().expect("hit empty existing entry list"); + assert!(entry.key > last.key); + if last.right.is_some() { + last.right = Some(Box::new(insert_entry(*last.right.unwrap(), entry))); + } else { + last.right = Some(Box::new(WipNode { + height: entry.height, + left: None, + entries: vec![entry], + })); + } + node.entries.push(last); + return node; + } + // same height, simply append to end (but verify first) + assert!(node.height == entry.height); + if !node.entries.is_empty() { + let last = &node.entries.last().unwrap(); + assert!(entry.key > last.key); + } + node.entries.push(entry); + node +} + +/// returns the length of common characters between the two strings. Strings must be simple ASCII, +/// which should hold for current ATP MST keys (collection plus TID) +fn common_prefix_len(a: &str, b: &str) -> usize { + let a = a.as_bytes(); + let b = b.as_bytes(); + for i in 0..std::cmp::min(a.len(), b.len()) { + if a[i] != b[i] { + return i; + } + } + // strings are the same, up to common length + std::cmp::min(a.len(), b.len()) +} + +#[test] +fn test_common_prefix_len() { + assert_eq!(common_prefix_len("abc", "abc"), 3); + assert_eq!(common_prefix_len("abcde", "abc"), 3); + assert_eq!(common_prefix_len("abcde", "abb"), 2); + assert_eq!(common_prefix_len("", "asdf"), 0); +} + +fn serialize_wip_tree( + db: &mut BlockStore, + wip_node: WipNode, +) -> Result { + let left: Option = if let Some(left) = wip_node.left { + Some(serialize_wip_tree(db, *left)?) + } else { + None + }; + + let mut entries: Vec = vec![]; + let mut last_key = "".to_string(); + for wip_entry in wip_node.entries { + let right: Option = if let Some(right) = wip_entry.right { + Some(serialize_wip_tree(db, *right)?) + } else { + None + }; + let prefix_len = common_prefix_len(&last_key, &wip_entry.key); + entries.push(MstEntry { + k: wip_entry.key[prefix_len..].to_string(), + p: prefix_len as u32, + v: wip_entry.val, + t: right, + }); + last_key = wip_entry.key; + } + let mst_node = MstNode { + l: left, + e: entries, + }; + let block = Block::::encode(DagCborCodec, Code::Sha2_256, &mst_node)?; + let cid = block.cid().clone(); + db.put_block(block, None)?; + Ok(cid) +} + +pub fn repro_mst(car_path: &PathBuf) -> Result<()> { + + // open a temp block store + let mut db: BlockStore = { + BlockStore::open_path(ipfs_sqlite_block_store::DbPath::Memory, Default::default())? + }; + + // load CAR contents from file + load_car_to_blockstore(&mut db, car_path)?; + + let all_aliases: Vec<(Vec, Cid)> = db.aliases()?; + if all_aliases.is_empty() { + error!("expected at least one alias in block store"); + std::process::exit(-1); + } + let (_alias, commit_cid) = all_aliases[0].clone(); + + let commit_node: CommitNode = DagCborCodec.decode( + &db.get_block(&commit_cid)? + .ok_or(anyhow!("expected commit block in store"))?, + )?; + let root_node: RootNode = DagCborCodec.decode( + &db.get_block(&commit_node.root)? + .ok_or(anyhow!("expected root block in store"))?, + )?; + let _metadata_node: MetadataNode = DagCborCodec.decode( + &db.get_block(&root_node.meta)? + .ok_or(anyhow!("expected metadata block in store"))?, + )?; + + // collect key/value sorted map of string/cid, as BTree + let mut repo_map: BTreeMap = BTreeMap::new(); + collect_mst_keys(&mut db, &root_node.data, &mut repo_map)?; + + // now re-generate nodes + let updated = generate_mst(&mut db, &mut repo_map)?; + + info!("original root: {}", root_node.data); + info!("regenerated : {}", updated); + if root_node.data == updated { + Ok(()) + } else { + println!("FAILED"); + let a = get_mst_node(&mut db, &root_node.data)?; + let b = get_mst_node(&mut db, &updated)?; + Err(anyhow!("FAILED to reproduce MST: {:?} != {:?}", a, b)) + } +} diff --git a/adenosine-pds/tests/bigger.car b/adenosine-pds/tests/bigger.car new file mode 100644 index 0000000..7169013 Binary files /dev/null and b/adenosine-pds/tests/bigger.car differ diff --git a/adenosine-pds/tests/example_repo.car b/adenosine-pds/tests/example_repo.car new file mode 100644 index 0000000..b2ae723 Binary files /dev/null and b/adenosine-pds/tests/example_repo.car differ diff --git a/adenosine-pds/tests/test_repro_mst.rs b/adenosine-pds/tests/test_repro_mst.rs new file mode 100644 index 0000000..4dde364 --- /dev/null +++ b/adenosine-pds/tests/test_repro_mst.rs @@ -0,0 +1,10 @@ + +use adenosine_pds::repro_mst; +use std::path::PathBuf; +use std::str::FromStr; + +#[test] +fn test_repro_mst() { + repro_mst(&PathBuf::from_str("./tests/example_repo.car").unwrap()).unwrap(); + repro_mst(&PathBuf::from_str("./tests/bigger.car").unwrap()).unwrap(); +} -- cgit v1.2.3