aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2017-10-18 17:18:23 -0700
committerBryan Newbold <bnewbold@robocracy.org>2017-10-18 17:18:23 -0700
commit5c59ba4e6f3ca12877d79859b16d1e63e559bb8a (patch)
treeb6842d9a76e6832642ca1a78f596ec1abe2a9094
parente46bfcad874c4cb0e100f8cae15d23501a376dc0 (diff)
downloadgeniza-5c59ba4e6f3ca12877d79859b16d1e63e559bb8a.tar.gz
geniza-5c59ba4e6f3ca12877d79859b16d1e63e559bb8a.zip
commit a bunch of meta files
-rw-r--r--.gitignore1
-rw-r--r--LICENSE7
-rw-r--r--TODO47
-rw-r--r--notes/process.md34
-rw-r--r--notes/spec_todo.txt15
-rw-r--r--src/dat.proto81
6 files changed, 185 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
index dc7276f..ee427b0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+dat-paper.pdf
target/
**/*.rs.bk
*.o
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..45a9903
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,7 @@
+
+protobuf schema is:
+
+ The MIT License (MIT)
+ Copyright (c) 2016 Mathias Buus
+
+from: https://github.com/mafintosh/hypercore-protocol/blob/master/schema.proto
diff --git a/TODO b/TODO
new file mode 100644
index 0000000..36ae6ea
--- /dev/null
+++ b/TODO
@@ -0,0 +1,47 @@
+
+next:
+- appending to register
+- verifying register
+- reading/writing secret key for register
+- bitfield stuff
+ https://docs.rs/bit_field/0.8.0/bit_field/trait.BitArray
+- protobuf infra
+- basic drive trait
+- 'drive log' command (shows actions)
+- network infra
+- 'geniza clone' command
+- 'geniza checkout' command
+- 'geniza init', 'geniza commit' commands
+- 'geniza broadcast' command
+
+meta:
+- rustfmt
+- github
+- travis tests (linux, osx)
+
+specific tests:
+- sleep info a variety of bad sleep files, should error
+- sleep create a known file, should be same as empty (also a "Rot13" algo file)
+- create a register, binary compare against known-good
+
+sleep: SLEEP file I/O
+register: hypercore registers/journal streams, signing
+drive: hyperdrive
+sync: dat network protocol, discover
+geniza: command line, swarm
+
+
+
+Backburner:
+- refactor to something pwrite based for file I/O
+ https://doc.rust-lang.org/std/os/unix/fs/trait.FileExt.html
+ https://github.com/vasi/positioned-io
+- mmap
+ https://docs.rs/scroll/0.7.0/scroll/
+- switch to byteorder for endian reads/writes
+- --json args to most CLI commands
+- simplify APIs
+- SleepFile from existing File
+- refactor key generation into generic/re-usable function
+- turn magic numbers in tree nodes into declared (public?) constants
+- root_nodes as an iterator? (vs returning a Vec)
diff --git a/notes/process.md b/notes/process.md
new file mode 100644
index 0000000..bd3a516
--- /dev/null
+++ b/notes/process.md
@@ -0,0 +1,34 @@
+
+This is an ordered list of testable steps to get to a minimal dat client in
+just about any language.
+
+sleep
+ read/write headers
+ read/write individual elements as raw bytes
+ read/write contiguous batches
+
+registers
+ read elements by index
+ verify signature by index (not leaves)
+ append elements
+
+drive
+ data register to a single file
+ single file to a data register
+ print metadata tree ("ls")
+ create metadata tree for a directory
+ directory to metadata+data registers
+ registers to directory
+
+sync
+ send/receive messages to a known host
+ pull register from a known host
+ wrapper command
+
+
+
+
+Shortcuts:
+ key/value store instead of SLEEP files
+ pull/read/to-file only
+ don't verify any hashes (!)
diff --git a/notes/spec_todo.txt b/notes/spec_todo.txt
new file mode 100644
index 0000000..b361f70
--- /dev/null
+++ b/notes/spec_todo.txt
@@ -0,0 +1,15 @@
+
+Things that were underspecified in the spec/whitepaper:
+
+- endianness of magic (big-endian)
+- uppercase/lowercase of algorithm name (inconsistent in one place)
+- what if there is only a single chunk/entry in a register tree? then a leaf
+ acts as a root?
+
+"You can use the byteOffset property in the Stat meta- data object to seek into
+the right position in the content for the start of this chunk." => unnecessary,
+node.js specific?
+
+Clarify: appending to tree SLEEP results in writes into middle of file (for
+root nodes). This seems not-great for performance (can't bulk-write). Better to
+cache or work in RAM then batch commit?
diff --git a/src/dat.proto b/src/dat.proto
new file mode 100644
index 0000000..1b7bc2c
--- /dev/null
+++ b/src/dat.proto
@@ -0,0 +1,81 @@
+// wire format is <len>(<header><message>)
+// header is a varint, channel << 4 | <4-bit-type>
+
+// type=0, should be the first message sent on a channel
+message Feed {
+ required bytes discoveryKey = 1;
+ optional bytes nonce = 2;
+}
+
+// type=1, overall connection handshake. should be send just after the feed message on the first channel only
+message Handshake {
+ optional bytes id = 1;
+ optional bool live = 2; // keep the connection open forever? both ends have to agree
+ optional bytes userData = 3;
+ repeated string extensions = 4;
+}
+
+// type=2, message indicating state changes etc.
+// initial state for uploading/downloading is true
+// if both ends are not downloading and not live it is safe to consider the stream ended
+message Info {
+ optional bool uploading = 1;
+ optional bool downloading = 2;
+}
+
+// type=3, what do we have?
+message Have {
+ required uint64 start = 1;
+ optional uint64 length = 2 [default = 1]; // defaults to 1
+ optional bytes bitfield = 3;
+}
+
+// type=4, what did we lose?
+message Unhave {
+ required uint64 start = 1;
+ optional uint64 length = 2 [default = 1]; // defaults to 1
+}
+
+// type=5, what do we want? remote should start sending have messages in this range
+message Want {
+ required uint64 start = 1;
+ optional uint64 length = 2; // defaults to Infinity or feed.length (if not live)
+}
+
+// type=6, what don't we want anymore?
+message Unwant {
+ required uint64 start = 1;
+ optional uint64 length = 2; // defaults to Infinity or feed.length (if not live)
+}
+
+// type=7, ask for data
+message Request {
+ required uint64 index = 1;
+ optional uint64 bytes = 2;
+ optional bool hash = 3;
+ optional uint64 nodes = 4;
+}
+
+// type=8, cancel a request
+message Cancel {
+ required uint64 index = 1;
+ optional uint64 bytes = 2;
+ optional bool hash = 3;
+}
+
+// type=9, get some data
+message Data {
+ message Node {
+ required uint64 index = 1;
+ required bytes hash = 2;
+ required uint64 size = 3;
+ }
+
+ required uint64 index = 1;
+ optional bytes value = 2;
+ repeated Node nodes = 3;
+ optional bytes signature = 4;
+}
+
+// type=15 (last massage) is an extension message
+// that is encoded like this <varint user-type><payload>