From 5c59ba4e6f3ca12877d79859b16d1e63e559bb8a Mon Sep 17 00:00:00 2001 From: Bryan Newbold Date: Wed, 18 Oct 2017 17:18:23 -0700 Subject: commit a bunch of meta files --- .gitignore | 1 + LICENSE | 7 +++++ TODO | 47 +++++++++++++++++++++++++++++++ notes/process.md | 34 ++++++++++++++++++++++ notes/spec_todo.txt | 15 ++++++++++ src/dat.proto | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 185 insertions(+) create mode 100644 LICENSE create mode 100644 TODO create mode 100644 notes/process.md create mode 100644 notes/spec_todo.txt create mode 100644 src/dat.proto diff --git a/.gitignore b/.gitignore index dc7276f..ee427b0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +dat-paper.pdf target/ **/*.rs.bk *.o diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..45a9903 --- /dev/null +++ b/LICENSE @@ -0,0 +1,7 @@ + +protobuf schema is: + + The MIT License (MIT) + Copyright (c) 2016 Mathias Buus + +from: https://github.com/mafintosh/hypercore-protocol/blob/master/schema.proto diff --git a/TODO b/TODO new file mode 100644 index 0000000..36ae6ea --- /dev/null +++ b/TODO @@ -0,0 +1,47 @@ + +next: +- appending to register +- verifying register +- reading/writing secret key for register +- bitfield stuff + https://docs.rs/bit_field/0.8.0/bit_field/trait.BitArray +- protobuf infra +- basic drive trait +- 'drive log' command (shows actions) +- network infra +- 'geniza clone' command +- 'geniza checkout' command +- 'geniza init', 'geniza commit' commands +- 'geniza broadcast' command + +meta: +- rustfmt +- github +- travis tests (linux, osx) + +specific tests: +- sleep info a variety of bad sleep files, should error +- sleep create a known file, should be same as empty (also a "Rot13" algo file) +- create a register, binary compare against known-good + +sleep: SLEEP file I/O +register: hypercore registers/journal streams, signing +drive: hyperdrive +sync: dat network protocol, discover +geniza: command line, swarm + + + +Backburner: +- refactor to something pwrite based for file I/O + https://doc.rust-lang.org/std/os/unix/fs/trait.FileExt.html + https://github.com/vasi/positioned-io +- mmap + https://docs.rs/scroll/0.7.0/scroll/ +- switch to byteorder for endian reads/writes +- --json args to most CLI commands +- simplify APIs +- SleepFile from existing File +- refactor key generation into generic/re-usable function +- turn magic numbers in tree nodes into declared (public?) constants +- root_nodes as an iterator? (vs returning a Vec) diff --git a/notes/process.md b/notes/process.md new file mode 100644 index 0000000..bd3a516 --- /dev/null +++ b/notes/process.md @@ -0,0 +1,34 @@ + +This is an ordered list of testable steps to get to a minimal dat client in +just about any language. + +sleep + read/write headers + read/write individual elements as raw bytes + read/write contiguous batches + +registers + read elements by index + verify signature by index (not leaves) + append elements + +drive + data register to a single file + single file to a data register + print metadata tree ("ls") + create metadata tree for a directory + directory to metadata+data registers + registers to directory + +sync + send/receive messages to a known host + pull register from a known host + wrapper command + + + + +Shortcuts: + key/value store instead of SLEEP files + pull/read/to-file only + don't verify any hashes (!) diff --git a/notes/spec_todo.txt b/notes/spec_todo.txt new file mode 100644 index 0000000..b361f70 --- /dev/null +++ b/notes/spec_todo.txt @@ -0,0 +1,15 @@ + +Things that were underspecified in the spec/whitepaper: + +- endianness of magic (big-endian) +- uppercase/lowercase of algorithm name (inconsistent in one place) +- what if there is only a single chunk/entry in a register tree? then a leaf + acts as a root? + +"You can use the byteOffset property in the Stat meta- data object to seek into +the right position in the content for the start of this chunk." => unnecessary, +node.js specific? + +Clarify: appending to tree SLEEP results in writes into middle of file (for +root nodes). This seems not-great for performance (can't bulk-write). Better to +cache or work in RAM then batch commit? diff --git a/src/dat.proto b/src/dat.proto new file mode 100644 index 0000000..1b7bc2c --- /dev/null +++ b/src/dat.proto @@ -0,0 +1,81 @@ +// wire format is (
) +// header is a varint, channel << 4 | <4-bit-type> + +// type=0, should be the first message sent on a channel +message Feed { + required bytes discoveryKey = 1; + optional bytes nonce = 2; +} + +// type=1, overall connection handshake. should be send just after the feed message on the first channel only +message Handshake { + optional bytes id = 1; + optional bool live = 2; // keep the connection open forever? both ends have to agree + optional bytes userData = 3; + repeated string extensions = 4; +} + +// type=2, message indicating state changes etc. +// initial state for uploading/downloading is true +// if both ends are not downloading and not live it is safe to consider the stream ended +message Info { + optional bool uploading = 1; + optional bool downloading = 2; +} + +// type=3, what do we have? +message Have { + required uint64 start = 1; + optional uint64 length = 2 [default = 1]; // defaults to 1 + optional bytes bitfield = 3; +} + +// type=4, what did we lose? +message Unhave { + required uint64 start = 1; + optional uint64 length = 2 [default = 1]; // defaults to 1 +} + +// type=5, what do we want? remote should start sending have messages in this range +message Want { + required uint64 start = 1; + optional uint64 length = 2; // defaults to Infinity or feed.length (if not live) +} + +// type=6, what don't we want anymore? +message Unwant { + required uint64 start = 1; + optional uint64 length = 2; // defaults to Infinity or feed.length (if not live) +} + +// type=7, ask for data +message Request { + required uint64 index = 1; + optional uint64 bytes = 2; + optional bool hash = 3; + optional uint64 nodes = 4; +} + +// type=8, cancel a request +message Cancel { + required uint64 index = 1; + optional uint64 bytes = 2; + optional bool hash = 3; +} + +// type=9, get some data +message Data { + message Node { + required uint64 index = 1; + required bytes hash = 2; + required uint64 size = 3; + } + + required uint64 index = 1; + optional bytes value = 2; + repeated Node nodes = 3; + optional bytes signature = 4; +} + +// type=15 (last massage) is an extension message +// that is encoded like this -- cgit v1.2.3