aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2017-11-05 16:16:24 -0800
committerBryan Newbold <bnewbold@robocracy.org>2017-11-05 16:16:24 -0800
commitfcdc7a6d413f5e5ba213f6d421e8e117696bf0d0 (patch)
tree016e5e242d781168a5b6113aab40e2b2e7001026
parent12b3a405268ced4e55babb2dbbd7fe2d48c2c39e (diff)
downloadgeniza-fcdc7a6d413f5e5ba213f6d421e8e117696bf0d0.tar.gz
geniza-fcdc7a6d413f5e5ba213f6d421e8e117696bf0d0.zip
progress on drive implementation
-rw-r--r--src/drive.rs317
1 files changed, 261 insertions, 56 deletions
diff --git a/src/drive.rs b/src/drive.rs
index 622e0ed..14c55b0 100644
--- a/src/drive.rs
+++ b/src/drive.rs
@@ -3,6 +3,8 @@ use std::io::Read;
use std::path::{Path, PathBuf};
use std::os::unix::fs::MetadataExt;
use std::fs::File;
+use std::cmp::min;
+use std::ffi::OsStr;
use protobuf::Message;
use protobuf::parse_from_bytes;
use integer_encoding::VarInt;
@@ -51,9 +53,18 @@ impl DatDrive {
}
}
-fn decode_children(raw: &[u8]) -> Result<Vec<Vec<u64>>> {
+/// Inflates a binary-encoded child index table. `current` is the entry index number that this
+/// child index is associated with.
+fn decode_children(raw: &[u8], current: u64) -> Result<Vec<Vec<u64>>> {
let mut children = vec![];
let mut offset = 0; // byte offset that we have read up to
+ if raw.len() < 1 {
+ bail!("Expected (binary-encoded) children to have len>=1");
+ }
+ let (header, inc): (u64, usize) = VarInt::decode_var(&raw[offset..]);
+ offset += inc;
+ let append_current = (header & 0x01) == 0x01;
+ trace!("append_current: {} header: {}", append_current, header);
while offset < raw.len() {
trace!("offset={} len={}", offset, raw.len());
let mut sub = vec![];
@@ -70,18 +81,39 @@ fn decode_children(raw: &[u8]) -> Result<Vec<Vec<u64>>> {
offset += inc;
sub.push(run);
}
+ if append_current {
+ sub.push(current);
+ }
children.push(sub);
}
trace!("decoded children: {:?}", children);
Ok(children)
}
-fn encode_children(children: &Vec<Vec<u64>>) -> Result<Vec<u8>> {
+/// Binary encodes a child index table. `current` is the entry index number that this child index
+/// is associated with.
+fn encode_children(children: &Vec<Vec<u64>>, current: u64) -> Result<Vec<u8>> {
// Use of encode_var_vec() instead of encode_var() here is sort of lazy
let mut buf = vec![];
+
+ // Check if the "all arrays end with current index" flag is set
+ let mut current_appended = true;
+ for sub in children {
+ if sub.len() == 0 || sub[sub.len() - 1] != current {
+ current_appended = false;
+ break;
+ }
+ }
+
+ let header: u64 = if current_appended { 0x01 } else { 0x00 };
+ buf.append(&mut header.encode_var_vec());
+
for subvec in children {
let mut subvec = subvec.clone();
+ if current_appended {
+ subvec.pop();
+ }
buf.append(&mut subvec.len().encode_var_vec());
subvec.sort_unstable();
let mut last = 0;
@@ -94,10 +126,59 @@ fn encode_children(children: &Vec<Vec<u64>>) -> Result<Vec<u8>> {
Ok(buf)
}
+/// Returns the count of path components
+///
+/// NB: does not distinguish between paths ending in a directory ("/thing/") and those ending with
+/// a file ("/thing")
+fn longest_common_prefix<P: AsRef<Path>, Q: AsRef<Path>>(a: P, b: Q) -> u64 {
+
+ let a: Vec<&OsStr> = a.as_ref().iter().collect();
+ let b: Vec<&OsStr> = b.as_ref().iter().collect();
+
+ let mut common = 0;
+ for i in 0..min(a.len(), b.len()) {
+ if a[i] != b[i] {
+ break;
+ }
+ common = i + 1; // +1 for count
+ }
+ common as u64
+}
+
+#[test]
+fn test_longest_common_prefix() {
+
+ assert_eq!(longest_common_prefix(
+ "a",
+ "b"),
+ 0);
+ assert_eq!(longest_common_prefix(
+ "a",
+ "a"),
+ 1);
+ assert_eq!(longest_common_prefix(
+ "/hello/world",
+ "/hello/goodbye"),
+ 2);
+ assert_eq!(longest_common_prefix(
+ "/hello/my/friend/",
+ "/hello/my/friend"),
+ 4);
+ assert_eq!(longest_common_prefix(
+ "/hello/goodbye",
+ "/hello/my/friend"),
+ 2);
+ assert_eq!(longest_common_prefix(
+ "/ein/zwei",
+ "/one/two/three"),
+ 1);
+}
+
impl<'a> DatDrive {
fn broken_find_file(&mut self, path: &Path) -> Result<Option<DriveEntry>> {
- for i in (0..self.entry_count()?).rev() {
+ // pubkey increment-by-one
+ for i in (1..(self.entry_count()? + 1)).rev() {
let de = self.get_dir_entry(i)?;
if de.path == path {
if de.stat.is_none() {
@@ -119,15 +200,18 @@ impl<'a> DatDrive {
/// Entry index is counted by drive entries (not including the first register entry, which is
/// the content register public key)
fn get_dir_entry(&mut self, entry_index: u64) -> Result<DriveEntry> {
+ if entry_index == 0 {
+ bail!("First entry in a drive is pubkey metadata, not a DriveEntry");
+ }
trace!("fetching drive entry {} (of {})", entry_index, self.entry_count()?);
- let data = self.metadata.get_data_entry(entry_index+1)?;
+ let data = self.metadata.get_data_entry(entry_index)?;
let node = parse_from_bytes::<Node>(&data)?;
let stat = match node.has_value() {
true => Some(parse_from_bytes::<Stat>(&node.get_value())?),
false => None,
};
- let children = decode_children(node.get_paths())?;
+ let children = decode_children(node.get_paths(), entry_index)?;
Ok(DriveEntry {
index: entry_index,
@@ -137,20 +221,70 @@ impl<'a> DatDrive {
})
}
- fn get_nearest<P: AsRef<Path>>(&mut self, _path: P) -> Result<DriveEntry> {
- // 0. if register is empty, bail out early
- let len = self.entry_count()?;
- if len == 0 {
- bail!("Expected at least one entry, but drive is empty")
+ /// Returns the drive entry which: 1) has the longest common path prefix to the given path 2)
+ /// is the most recent
+ fn get_nearest<P: AsRef<Path>>(&mut self, path: P) -> Result<Option<DriveEntry>> {
+
+ let path = path.as_ref();
+
+ // If register is empty, bail early
+ let reg_len = self.entry_count()?;
+ if reg_len == 0 {
+ return Ok(None);
}
// 1. get most recent entry (tail of register)
- return self.get_dir_entry(len - 1);
+ let mut current = self.get_dir_entry(reg_len)?; // "len + 1 - 1"
+
+ // 1.1 If either path didn't start with '/', bail early
+ if !path.has_root() {
+ bail!("Passed a path with no root prefix: {}", path.display());
+ }
+ if !current.path.has_root() {
+ bail!("Passed a path with no root prefix: {}", current.path.display());
+ }
+ // If paths match, return current
+ if current.path.starts_with(path) {
+ return Ok(Some(current));
+ }
+
+ // 2. find longest common prefix; take all entries from that level
+ let mut common_components = longest_common_prefix(path, &current.path);
+ assert!(common_components >= 1);
+ let mut entries = current.children[(common_components-1) as usize].clone();
- // XXX: actual implementation...
+ // 3. for each of those entries (going in recent-first order):
+ // - if a full prefix match, return entry
+ // - if a closer (longer) match, clear entries and recurse
+ // - if not longer match, continue
+ // - if end of list, return current entry
+ loop {
+ if entries.len() == 0 {
+ break;
+ }
+ for e in entries.clone().iter().rev() {
+ let entry = self.get_dir_entry(*e)?;
+ if entry.path.starts_with(path) {
+ return Ok(Some(entry));
+ }
+ let this_common = longest_common_prefix(path, &entry.path);
+ if this_common > common_components {
+ common_components = this_common;
+ current = entry;
+ entries = current.children[(common_components-1) as usize].clone();
+ break;
+ } else {
+ continue;
+ }
+ }
+ }
+ Ok(Some(current))
}
+ /// 'start' is the drive metadata register entry index. Zero is skipped automatically.
pub fn history<'b>(&'b mut self, start: u64) -> DriveHistory<'b> {
+ // skip pubkey entry
+ let start = if start == 0 { 1 } else { start };
DriveHistory {
drive: self,
current: start,
@@ -183,8 +317,8 @@ impl<'a> DatDrive {
}
pub fn add_file<P: AsRef<Path>, R: Read>(&mut self, path: P, stat: &mut Stat, mut source: R) -> Result<()> {
+ // TODO: canonicalize path
// TODO: check if file already exists
- // XXX: verify stat size against passed size
let mut total_size: u64 = 0;
let mut data_entries: u64 = 0;
let mut buf = [0; 65536];
@@ -210,9 +344,8 @@ impl<'a> DatDrive {
stat.set_blocks(data_entries);
stat.set_offset(data_offset);
stat.set_byteOffset(data_byte_offset);
- // XXX: actual child implementation
- let children = vec![];
- let children = encode_children(&children)?;
+ let children = self.new_child_index(&path, data_offset)?;
+ let children = encode_children(&children, data_offset)?;
let mut node = Node::new();
node.set_name(path.as_ref().to_string_lossy().into_owned());
node.set_value(stat.write_to_bytes()?);
@@ -222,10 +355,42 @@ impl<'a> DatDrive {
Ok(())
}
+ fn new_child_index<P: AsRef<Path>>(&mut self, path: P, index: u64) -> Result<Vec<Vec<u64>>> {
+
+ let path = path.as_ref();
+ let path_len = path.iter().count() as u64;
+ let mut depth: u64 = 0;
+ let mut children: Vec<Vec<u64>> = vec![];
+ while depth < path_len {
+ // 1. get nearest at every level of path (starting at "/")
+ let prefix: Vec<String> = path.iter().take(depth as usize).map(|s| s.to_string_lossy().into_owned()).collect();
+ let prefix = Path::new("/").join(prefix.join("/"));
+ let nearest = match self.get_nearest(prefix)? {
+ None => {
+ children.push(vec![index]);
+ depth += 1;
+ continue;
+ },
+ Some(de) => de,
+ };
+ // 2. consider up to common components
+ let common = longest_common_prefix(path, &nearest.path);
+ for i in depth..common {
+ let mut component_entries = nearest.children[i as usize].clone();
+ // 3. add this entry to each component
+ component_entries.push(index);
+ children.push(component_entries);
+ }
+ // 4. loop for remaining components
+ assert!(common + 1 > depth);
+ depth = common + 1;
+ }
+ Ok(children)
+ }
+
/// Copies Stat metadata and all content from a file in the "real" filesystem into the
/// DatDrive.
pub fn import_file<P: AsRef<Path>, Q: AsRef<Path>>(&mut self, source: P, dest: Q) -> Result<()> {
- // XXX: check if file already exists
let in_file = File::open(source)?;
let in_metadata = in_file.metadata()?;
let mut stat = Stat::new();
@@ -284,16 +449,16 @@ fn test_dd_open() {
// verified from dat log
assert_eq!(dd.history(0).count(), 2);
- //XXX:assert_eq!(dd.read_dir("/").count(), 1);
- //XXX:assert_eq!(dd.read_dir_recursive("/").count(), 1);
+ assert_eq!(dd.read_dir("/").count(), 1);
+ assert_eq!(dd.read_dir_recursive("/").count(), 1);
let mut dd =
DatDrive::open(Path::new("test-data/dat/tree/.dat/"), false).unwrap();
// verified from dat log
assert_eq!(dd.history(0).count(), 8);
- // XXX: assert_eq!(dd.read_dir("/").count(), 2);
- // XXX: assert_eq!(dd.read_dir_recursive("/").count(), 6);
+ assert_eq!(dd.read_dir("/").count(), 2);
+ assert_eq!(dd.read_dir_recursive("/").count(), 6);
}
#[test]
@@ -307,6 +472,7 @@ fn test_dd_create() {
assert_eq!(dd.read_dir_recursive("/").count(), 0);
}
+#[cfg(test)]
fn make_test_stat() -> Stat {
let mut stat = Stat::new();
stat.set_mode(0o777);
@@ -329,19 +495,18 @@ fn test_dd_add() {
stat.set_size(123);
dd.add_file_bytes("/bytes.bin", &mut stat, &data).unwrap();
assert_eq!(dd.history(0).count(), 1);
- //XXX:assert_eq!(dd.read_dir("/").count(), 1);
- //XXX:assert_eq!(dd.read_dir_recursive("/").count(), 1);
+ assert_eq!(dd.read_dir("/").count(), 1);
+ assert_eq!(dd.read_dir_recursive("/").count(), 1);
assert_eq!(dd.content.len_bytes().unwrap(), 123);
stat.set_size(65);
dd.add_file("/bytes_read.bin", &mut stat, &data[0..65]).unwrap();
assert_eq!(dd.history(0).count(), 2);
- //XXX:assert_eq!(dd.read_dir("/").count(), 2);
- //XXX:assert_eq!(dd.read_dir_recursive("/").count(), 2);
+ assert_eq!(dd.read_dir("/").count(), 2);
+ assert_eq!(dd.read_dir_recursive("/").count(), 2);
assert_eq!(dd.content.len_bytes().unwrap(), 123+65);
}
-// TODO: unpack Node into a pub struct
#[derive(Debug)]
pub struct DriveEntry {
pub index: u64,
@@ -359,7 +524,9 @@ pub struct DriveHistory<'a> {
impl<'a> Iterator for DriveHistory<'a> {
type Item = Result<DriveEntry>;
fn next(&mut self) -> Option<Result<DriveEntry>> {
- if self.current >= self.drive.entry_count().unwrap() {
+ // pubkey increment-by-one logic here
+ // XXX: unwrap. on error, return Some(err), then None?
+ if self.current > self.drive.entry_count().unwrap() {
return None;
}
let de = self.drive.get_dir_entry(self.current);
@@ -372,45 +539,35 @@ impl<'a> Iterator for DriveHistory<'a> {
pub struct ReadDriveDir<'a> {
drive: &'a mut DatDrive,
recursive: bool,
+ path: PathBuf,
- // Entries to recurse over
- entries: Vec<u64>,
+ // Entries to iterate over. Tuple of (depth, entry_index), where depth is the path prefix count
+ // where this entry was encountered and added to the list.
+ entries: Vec<(u64, u64)>,
}
impl<'a> ReadDriveDir<'a> {
fn init<P: AsRef<Path>>(drive: &mut DatDrive, path: P, recursive: bool) -> Result<ReadDriveDir> {
+ let path = path.as_ref();
+
// first entry is content pub key
let entries = if drive.entry_count()? == 0 {
vec![]
} else {
-/* XXX: actual implementation
- let nearest = drive.get_nearest(path)?;
- // TODO: starting from the last data entry, recurse up to nearest directory, then recurse
- // down to base path
- let mut entries = vec![];
- /* XXX:
- if nearest.stat.is_some() {
- // XXX: mapping fixer
- entries.push(nearest.index - 1);
- }
- */
- // XXX: flatten entries, not really the right thing to do
- for mut sub in nearest.children {
- entries.append(&mut sub);
- }
-*/
- let mut entries = vec![];
- for i in 0..drive.entry_count()? {
- let e = drive.get_dir_entry(i)?;
- if e.stat.is_some() {
- entries.push(i);
- }
+ // start at the latest entry with the same path prefix
+ match drive.get_nearest(path)? {
+ Some(nearest) => {
+ let common_components = longest_common_prefix(path, nearest.path);
+ let list = nearest.children[(common_components - 1) as usize].clone();
+ list.iter().map(|e| (common_components, *e)).collect()
+ },
+ None => vec![],
}
- entries
};
Ok(ReadDriveDir {
drive,
+ path: path.to_path_buf(),
recursive,
entries: entries,
})
@@ -419,11 +576,59 @@ impl<'a> ReadDriveDir<'a> {
impl<'a> Iterator for ReadDriveDir<'a> {
type Item = Result<DriveEntry>;
+
fn next(&mut self) -> Option<Result<DriveEntry>> {
- // TODO: actually recurse
- match self.entries.pop() {
- None => None,
- Some(this_index) => Some(self.drive.get_dir_entry(this_index))
+ debug!("ReadDriveDir: {:?}", self.entries);
+ let (depth, entry) = match self.entries.pop() {
+ None => { return None },
+ Some((depth, this_index)) => (depth, self.drive.get_dir_entry(this_index))
+ };
+ let entry = match entry {
+ Err(_) => return Some(entry),
+ Ok(e) => e,
+ };
+
+ // defensive programming... shouldn't ever have entries that aren't children of path
+ if !entry.path.starts_with(&self.path) {
+ warn!("unexpected non-child path entry in ReadDriveDir iterator: {}",
+ entry.path.display());
+ return self.next();
+ }
+
+ if entry.path.iter().count() <= self.path.iter().count() + 1 {
+ // direct child of the path; always return
+ if entry.stat.is_some() {
+ return Some(Ok(entry));
+ } else {
+ return self.next();
+ }
+ } else {
+ // subdirectory entry; depends on recursion
+ if !self.recursive {
+ return self.next();
+ } else {
+ // if entry was added as a child of this depth, just return it...
+ if entry.children.len() as u64 <= depth + 1 {
+ if entry.stat.is_some() {
+ return Some(Ok(entry));
+ } else {
+ return self.next();
+ }
+ }
+ // ... else add child path entries and recurse
+ for subdir in ((depth+1) as usize)..entry.children.len() {
+ let mut new_children: Vec<(u64, u64)> = entry.children[subdir].iter()
+ .filter(|&e| (*e != entry.index || subdir == entry.children.len()))
+ .map(|&e| (subdir as u64, e))
+ .collect();
+ self.entries.append(&mut new_children);
+ }
+ if entry.stat.is_some() {
+ return Some(Ok(entry));
+ } else {
+ return self.next();
+ }
+ }
}
}
}