aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan Newbold <bnewbold@robocracy.org>2021-10-16 20:08:42 -0700
committerBryan Newbold <bnewbold@robocracy.org>2021-10-16 20:08:42 -0700
commitd797df5488bf7ed84b6a53cc503bec15262a31bf (patch)
treec2bf42c784d4dc2e080576e6d28fe214f80c2e52
parentfcc026dec1b6193c187a917e310b52c47b918029 (diff)
downloadcasual-d797df5488bf7ed84b6a53cc503bec15262a31bf.tar.gz
casual-d797df5488bf7ed84b6a53cc503bec15262a31bf.zip
initial copy of hand-written sexpr parsing
-rw-r--r--.gitignore1
-rw-r--r--Cargo.lock7
-rw-r--r--Cargo.toml12
-rw-r--r--src/main.rs115
-rw-r--r--src/sexpr.rs284
5 files changed, 419 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
index 81a4762..6543366 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,6 +16,7 @@ build/
_build/
src/build/
*.log
+target/
# Don't ignore this file itself
!.gitignore
diff --git a/Cargo.lock b/Cargo.lock
new file mode 100644
index 0000000..33515ef
--- /dev/null
+++ b/Cargo.lock
@@ -0,0 +1,7 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "casual"
+version = "0.1.0"
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..8d86d4e
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,12 @@
+[package]
+name = "casual"
+version = "0.1.0"
+edition = "2018"
+authors = ["Bryan Newbold <bnewbold@robocracy.org>"]
+license = "MIT"
+readme = "README.md"
+keywords = ["computer-algebra"]
+
+[dependencies]
+
+[dev-dependencies]
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..6b2bc57
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,115 @@
+
+use std::io;
+use std::env;
+use std::io::Write;
+use std::path::Path;
+
+mod sexpr;
+
+fn repl(verbose: bool) {
+
+ let stdin = io::stdin();
+ let mut stdout = io::stdout();
+
+ loop {
+ let raw_input = &mut String::new();
+ stdout.write(b"\ncasual> ").unwrap();
+ stdout.flush().unwrap();
+ stdin.read_line(raw_input).unwrap();
+ let raw_input = raw_input; // mutable to immutable reference
+ if raw_input.len() == 0 {
+ // end-of-line, aka Ctrl-D. Blank line will still have newline char
+ stdout.write(b"\nCiao!\n").unwrap();
+ return;
+ }
+ // TODO: use debug or something instead of "verbose"?
+ let tokens = match sexpr::sexpr_tokenize(&raw_input) {
+ Ok(tokens) => {
+ if verbose { println!("Tokens: {}", tokens.join(", ")); };
+ tokens
+ },
+ Err(e) => {
+ println!("couldn't tokenize: {}", e);
+ continue;
+ }
+ };
+ let ast = match sexpr::sexpr_parse(&tokens, 0) {
+ Ok((mut ast_list, _)) => {
+ if verbose {
+ for ast in &ast_list {
+ println!("AST: {}", sexpr::sexpr_repr(ast).unwrap());
+ };
+ };
+ // We're a REPL, so only one expression at a time
+ if ast_list.len() > 1 {
+ println!("one expression at a time on the REPL, please!");
+ continue;
+ } else if ast_list.len() == 0 {
+ sexpr::SExpr::SNull
+ } else {
+ let ast = ast_list.pop().unwrap();
+ ast
+ }
+ },
+ Err(e) => {
+ println!("couldn't parse: {}", e);
+ continue;
+ }
+ };
+ println!("{}", sexpr::sexpr_repr(&ast).unwrap());
+ }
+}
+
+fn usage() {
+ println!("usage:\tcasual [-h] [-v] [--no-repl] [<files>]");
+ println!("");
+ println!("Files will be loaded in order, then drop to REPL (unless \"--no-repl\" is passed).");
+ println!("Verbose flag (\"-v\") will result in lexed tokens and parsed AST \
+ being dumped to stdout (when on REPL).");
+}
+
+fn main() {
+
+ let mut verbose: bool = false;
+ let mut no_repl: bool = false;
+
+ let mut file_list = Vec::<String>::new();
+
+ for arg in env::args().skip(1) {
+ match &*arg {
+ "-v" | "--verbose" => { verbose = true; },
+ "--no-repl" => { no_repl = true; },
+ "-h" | "--help" => { usage(); return; },
+ _ if arg.starts_with("-") => {
+ println!("Unknown option: {}", arg);
+ println!("");
+ usage();
+ return;
+ },
+ _ => {
+ file_list.push(arg.clone());
+ }
+ }
+ }
+
+ for fname in file_list {
+ let fpath = Path::new(&fname);
+ if !fpath.is_file() {
+ println!("File not found (or not file): {}", fname);
+ return;
+ }
+ println!("Loading {}...", fname);
+ match sexpr::sexpr_parse_file(&fpath) {
+ Err(e) => {
+ println!("Error loading file: {}\n {}", fname, e);
+ return;
+ },
+ Ok(_) => ()
+ }
+ }
+
+ if !no_repl {
+ repl(verbose);
+ }
+}
+
diff --git a/src/sexpr.rs b/src/sexpr.rs
new file mode 100644
index 0000000..bc13bd4
--- /dev/null
+++ b/src/sexpr.rs
@@ -0,0 +1,284 @@
+/*
+ * Simple hand-rolled "S-Expression" parser with no dependencies.
+ *
+ * Copied and tweaked from my 'spectrum' rust scheme implementation.
+ *
+ * Not performant: allocates all over the place, takes a full string to parse as input, etc.
+ *
+ * string interning (or something like `smartstring`) would be better for symbols, but would pull
+ * in a dependency.
+ */
+
+use std::str;
+use std::io::Read;
+use std::fs::File;
+use std::path::Path;
+
+//////////// Types and Constants
+
+const SEXPR_BUILTINS: [&'static str; 14] = [
+ "=", ">", ">=", "<", "<=",
+ "+", "-", "*", "/",
+ "exp", "log", "sin", "cos", "tan",
+ ];
+
+#[derive(Clone, PartialEq)]
+pub enum SExpr {
+ SNull,
+ STrue,
+ SFalse,
+ SInteger(i64),
+ SFloat(f64),
+ SBuiltin(String),
+ SSymbol(String),
+ SIdentifier(String),
+ SString(String),
+ SList(Vec<SExpr>),
+ SVector(Vec<SExpr>),
+ //SQuote(Vec<SExpr>),
+}
+
+//////////// Lexing, Parsing, and Printing
+
+fn is_whitespace(c: char) -> bool{
+ " \t\r\n".find(c) != None
+}
+
+fn is_seperator(c: char) -> bool {
+ "()#".find(c) != None
+}
+
+fn is_valid_identifier(s: &str) -> bool {
+ // Valid Scheme (R7RS-small) identifier characters: ! $ % & * + - . / : < = > ? @ ^ _ ~
+ // Note: this is not complete or compliant
+ if s.len() == 0 {
+ return false;
+ }
+ if s == "." {
+ return false;
+ }
+ if s.starts_with("-") || s.starts_with("'") {
+ return false;
+ }
+ for (i, c) in s.chars().enumerate() {
+ if !( c.is_alphabetic() || "!$%&*+-./:<=>?@^_~".find(c) != None || (c.is_numeric() && i > 0) ) {
+ return false;
+ }
+ }
+ return true;
+}
+
+/*
+ * This function takes a raw string and splits it up into a flat sequence of string tokens.
+ * It should handle basic quotes (double quotes only) and comments (';' character to end-of-line).
+ */
+pub fn sexpr_tokenize<'a>(raw_str: &'a str) -> Result<Vec<&'a str>, String> {
+ let mut ret = Vec::<&str>::new();
+ let mut food: usize = 0; // "how many chars of current token have we read?"
+ let mut quoted: bool = false;
+ let mut commented: bool = false;
+ for (i, c) in raw_str.chars().enumerate() {
+ if quoted {
+ // Safe to look-back a character here because quoted can't be true for first char
+ if c == '"' && raw_str.chars().collect::<Vec<char>>()[i-1] != '\\' {
+ ret.push(&raw_str[i-food-1..i+1]);
+ quoted = false;
+ food = 0;
+ } else if raw_str.len() == i+1 {
+ return Err(format!("unmatched quote char"));
+ } else {
+ food += 1;
+ }
+ } else if commented {
+ if c == '\n' {
+ commented = false;
+ }
+ } else if c == ';' {
+ if food > 0 {
+ ret.push(&raw_str[i-food..i]);
+ }
+ commented = true;
+ food = 0;
+ } else if c == '"' {
+ if food > 0 {
+ return Err(format!("unexpected quote char"));
+ }
+ quoted = true;
+ } else if is_whitespace(c) || is_seperator(c) {
+ if food > 0 {
+ ret.push(&raw_str[i-food..i]);
+ }
+ if is_seperator(c) {
+ ret.push(&raw_str[i..i+1]);
+ }
+ food = 0;
+ } else if raw_str.len() == i+1 {
+ // end of input
+ ret.push(&raw_str[i-food..]);
+ } else {
+ food += 1;
+ }
+ }
+ if quoted {
+ return Err(format!("unmatched (trailing) quote char"));
+ }
+ return Ok(ret);
+}
+
+/*
+ * This function takes a token (still a string) and parses it into a single SExpression
+ */
+fn sexpr_parse_token(token: &str) -> Result<SExpr, String> {
+
+ // Is it a constant?
+ match token {
+ "#t" => return Ok(SExpr::STrue),
+ "#f" => return Ok(SExpr::SFalse),
+ _ => ()
+ }
+
+ // Is it a builtin?
+ if SEXPR_BUILTINS.contains(&token) {
+ return Ok(SExpr::SBuiltin(token.to_string()));
+ }
+
+ // Try to parse as an integer
+ match token.parse::<i64>() {
+ Ok(x) => return Ok(SExpr::SInteger(x)),
+ Err(_) => ()
+ }
+
+ // Try to parse as a number
+ match token.parse::<f64>() {
+ Ok(x) => return Ok(SExpr::SFloat(x)),
+ Err(_) => ()
+ }
+
+ // Is it a string?
+ if token.starts_with("\"") && token.ends_with("\"") {
+ return Ok(SExpr::SString(token.to_string()));
+ }
+
+ // Is it a quoted literal?
+ if token.starts_with("'") && token.len() > 1 {
+ match sexpr_parse_token(&token[1..]) {
+ Ok(SExpr::SIdentifier(t)) => return Ok(SExpr::SSymbol(t)),
+ Ok(e) => return Ok(e),
+ _ => {},
+ }
+ }
+
+ // Else, we'll treat it as an identifier
+ if is_valid_identifier(token) {
+ return Ok(SExpr::SIdentifier(token.to_string()));
+ }
+
+ return Err(format!("unparsable token: \"{}\"", token));
+}
+
+/*
+ * This function takes a flat sequence of string tokens (as output by sexpr_tokenize) and parses
+ * into a SExpression (eg, a nested list of expressions).
+ */
+pub fn sexpr_parse(tokens: &Vec<&str>, depth: u32) -> Result<(Vec<SExpr>, usize), String> {
+ let mut i: usize = 0;
+ if tokens.len() == 0 {
+ return Ok((vec![SExpr::SNull], 0));
+ } else if tokens.len() == 1 {
+ let expr = sexpr_parse_token(tokens[0])?;
+ return Ok((vec![expr], 1));
+ }
+ let mut parsed: usize = 0;
+ let mut ret = Vec::<SExpr>::new();
+ while i < tokens.len() {
+ parsed += 1;
+ match tokens[i] {
+ "(" | "#(" => {
+ // "Read ahead" to check for empty tuple
+ if i+1 < tokens.len() && tokens[i+1] == ")" {
+ ret.push(SExpr::SNull);
+ i += 1;
+ parsed += 1;
+ } else {
+ let (expr_list, skip) = sexpr_parse(&tokens[i+1..].to_vec(), depth+1)?;
+ i += skip;
+ parsed += skip;
+ match tokens[i-skip] {
+ "(" => ret.push(SExpr::SList(expr_list)),
+ "#(" => ret.push(SExpr::SVector(expr_list)),
+ _ => unreachable!(),
+ };
+ }
+ },
+ ")" => {
+ if depth == 0 {
+ return Err(format!("missing an open bracket"));
+ }
+ return Ok((ret, parsed));
+ },
+ "'" => {
+ },
+ token => {
+ let expr = sexpr_parse_token(token)?;
+ ret.push(expr);
+ }
+ }
+ i += 1;
+ }
+ if depth > 0 {
+ return Err(format!("missing a close bracket"));
+ }
+ return Ok((ret, parsed));
+}
+
+/*
+ * This function takes an arbitary SExpression and returns a string representation.
+ * It's basically the inverse of sexpr_tokenize and sexpr_parse; the output representation is
+ * just plain old LISP/Scheme s-expr syntax.
+ */
+pub fn sexpr_repr(ast: &SExpr) -> Result<String, String> {
+ return match ast {
+ &SExpr::STrue => Ok("#t".to_string()),
+ &SExpr::SFalse => Ok("#f".to_string()),
+ &SExpr::SNull => Ok("'()".to_string()), // TODO: just () ?
+ &SExpr::SInteger(num) => Ok(format!("{}", num).to_string()),
+ &SExpr::SFloat(num) => Ok(format!("{}", num).to_string()),
+ &SExpr::SBuiltin(ref b)=> Ok(b.clone()),
+ &SExpr::SString(ref s)=> Ok(s.clone()),
+ &SExpr::SSymbol(ref s)=> Ok(s.clone()),
+ &SExpr::SIdentifier(ref s)=> Ok(s.to_string()),
+ &SExpr::SList(ref list) => {
+ let elements: Vec<String> = list.iter().map(|ref el| sexpr_repr(&el).unwrap()).collect();
+ Ok(format!("({})", elements.join(" ")))
+ },
+ &SExpr::SVector(ref list) => {
+ let elements: Vec<String> = list.iter().map(|ref el| sexpr_repr(&el).unwrap()).collect();
+ Ok(format!("#({})", elements.join(" ")))
+ },
+ /*
+ &SExpr::SQuote(ref list) => {
+ let mut ret: String =
+ list.iter().fold("(quote ".to_string(),
+ |acc, ref el| acc + " " + &sexpr_repr(&el).unwrap());
+ ret.push_str(" )");
+ Ok(ret)
+ },
+ */
+ }
+}
+
+pub fn sexpr_parse_file(fpath: &Path) -> Result<(), String> {
+
+ let mut raw_bytes: Vec<u8> = Vec::new();
+
+ let mut f = File::open(fpath)
+ .expect(&format!("couldn't open file: {}", &fpath.to_str().unwrap()));
+ f.read_to_end(&mut raw_bytes)
+ .expect(&format!("couldn't read file: {}", &fpath.to_str().unwrap()));
+ let contents = String::from_utf8(raw_bytes)
+ .expect(&format!("UTF-8 decode error reading file: {}", &fpath.to_str().unwrap()));
+
+ let tokens = sexpr_tokenize(&contents)?;
+ let (_ast_list, _) = sexpr_parse(&tokens, 0)?;
+ Ok(())
+}