diff options
author | bnewbold <bnewbold@robocracy.org> | 2016-04-18 12:50:50 -0400 |
---|---|---|
committer | bnewbold <bnewbold@robocracy.org> | 2016-04-18 12:50:50 -0400 |
commit | 4d5770f0f41ccfc84f3e29701151ea29660539e0 (patch) | |
tree | 477f837fc333addd0d9f839b5f1018f16ecde4ca | |
parent | 277854dda63c5f6c4dfb30499e16fd7d9e78c575 (diff) | |
download | spectrum-4d5770f0f41ccfc84f3e29701151ea29660539e0.tar.gz spectrum-4d5770f0f41ccfc84f3e29701151ea29660539e0.zip |
commit WIP on python and julia s-expressions
-rw-r--r-- | sexpr.jl | 119 | ||||
-rw-r--r-- | sexpr.py | 40 |
2 files changed, 159 insertions, 0 deletions
diff --git a/sexpr.jl b/sexpr.jl new file mode 100644 index 0000000..1a0ba3c --- /dev/null +++ b/sexpr.jl @@ -0,0 +1,119 @@ + +""" +Takes a string 's' and will convert it to an Int64 or Float64 (if possible), or +return the string if not. +""" +function trynum(s::AbstractString) + # Test for: only number chars, and optionally a single decimal (period), + # but not just a single decimal on it's own ("." is not a number). + # Also allow a '-' in the first position. + # Could have just try/caught around Julia's float(), used below. + decimal_count = 0 + dash_count = 0 + for (i, c) in enumerate(s) + if c == '.' + decimal_count += 1 + elseif c == '-' && i == 1 && length(s) > 1 + dash_count += 1 + elseif !(c in "1234567890") + return s + end + end + if decimal_count > 1 || decimal_count + dash_count >= length(s) + return s + end + # Haven't written our own string-to-number function; use Julia's + if decimal_count > 0 + return float(s) + else + # Julia 0.4.3 complains with "use parse(Int,s) instead" of int(s) + return parse(Int, s) + end +end + +""" +Take a string 's' and splits it into elements (based on 'ws' white space +characters) and tuples (based on 'sep' separators). +""" +function tokenize(s::AbstractString; sep="()", ws=" \t\n") + L = AbstractString[] + food = 0 # num of yummy non-whitespace characters we have just eaten + for (i, c) in enumerate(s) + if c in sep || c in ws + if food > 0 + push!(L, s[i-food:i-1]) + end + if c in sep + push!(L, string(c)) + end + food = 0 + elseif i == length(s) + push!(L, s[i-food:end]) + # will break next iteration + else + food += 1 + end + end + # Convert Array of strings to an (immutable) Tuple + return tuple(L...) +end + +""" +Helper for `parse()`. + +Returns two values: the parsed expression, and the number of tokens consumed. + +Note that this function always returns a Tuple, even if only a single token is +passed: calling code must unwrap. Also, this will only parse out the first +complete expression, silently discarding trailing elements. + +Should probably use value exceptions instead of @assert on failure +""" +function _parse_tokens(tokens, depth=0) + L = [] + i = 1 + while i <= length(tokens) + el = tokens[i] + if el == "(" + (expr, skip) = _parse_tokens(tokens[(i+1):end], depth + 1) + push!(L, expr) + i += skip + elseif el == ")" + @assert depth > 0 "Missing open bracket..." + return (tuple(L...), i+1) + else + push!(L, el) + i += 1 + end + end + @assert depth == 0 "Missing close bracket..." + return (tuple(L...), i) +end + +""" +Takes a string and returns a tuple-based pseudo AST. + +Notes: all numbers are converted to Float64. No weird special characters are +parsed specially (eg, comma, pipe, hash, etc). +""" +function parse(s::AbstractString) + # First split into a flat list... + tokens = tokenize(s) + + # ... then convert any numbers ... + tokens = map(trynum, tokens) + + # ... then parse into nested tuples. + (expr, sz) = _parse_tokens(tokens) + + # Unwrap the first element and return that. + return expr[1] +end + +# parse("1") +# parse("((()) ())") +# parse("(a 134 ( 4 5 6) 2 ( ))") +# parse("(asdf 134 ( 4 5 6) 2 ( ))") +# parse("(1 2 3 -4567 123.25624 . -.)") +# parse("(1 2 3 -4567 123.25624 .1 1. -.1 -1. - . -.)") +# parse("(first (list 1 (+ 2 3) 9))") diff --git a/sexpr.py b/sexpr.py new file mode 100644 index 0000000..61bb09c --- /dev/null +++ b/sexpr.py @@ -0,0 +1,40 @@ + +def tokenize(s, sep="()", ws=" \t\n"): + L = [] + food = 0 + for i, c in enumerate(s): + if c in sep or c in ws: + if food > 0: + L.append(s[i-food:i]) + if c in sep: + L.append(c) + food = 0 + elif i+1 == len(s): + L.append(s[i-food:]) + else: + food += 1 + return L + +def _parse_tokens(tokens, depth=0): + L = [] + i = 0 + while i < len(tokens): + el = tokens[i] + if el == '(': + expr, skip = _parse_tokens(tokens[i+1:], depth+1) + L.append(expr) + i += skip + 1 + elif el == ')': + assert depth > 0, "Missing open bracket..." + return L, i+1 + else: + L.append(el) + i += 1 + assert depth == 0, "Missing close bracket..." + return L, i + +def parse(s): + tokens = tokenize(s) + expr, size = _parse_tokens(tokens) + return expr[0] + |