commit WIP on python and julia s-expressions

author: bnewbold <bnewbold@robocracy.org> 2016-04-18 12:50:50 -0400
committer: bnewbold <bnewbold@robocracy.org> 2016-04-18 12:50:50 -0400
commit: 4d5770f0f41ccfc84f3e29701151ea29660539e0 (patch)
tree: 477f837fc333addd0d9f839b5f1018f16ecde4ca
parent: 277854dda63c5f6c4dfb30499e16fd7d9e78c575 (diff)
download: spectrum-4d5770f0f41ccfc84f3e29701151ea29660539e0.tar.gz
spectrum-4d5770f0f41ccfc84f3e29701151ea29660539e0.zip
2 files changed, 159 insertions, 0 deletions
diff --git a/sexpr.jl b/sexpr.jl
new file mode 100644
index 0000000..1a0ba3c
--- /dev/null
+++ b/sexpr.jl
@@ -0,0 +1,119 @@
+
+"""
+Takes a string 's' and will convert it to an Int64 or Float64 (if possible), or
+return the string if not.
+"""
+function trynum(s::AbstractString)
+    # Test for: only number chars, and optionally a single decimal (period),
+    # but not just a single decimal on it's own ("." is not a number).
+    # Also allow a '-' in the first position.
+    # Could have just try/caught around Julia's float(), used below.
+    decimal_count = 0
+    dash_count = 0
+    for (i, c) in enumerate(s)
+        if c == '.'
+            decimal_count += 1
+        elseif c == '-' && i == 1 && length(s) > 1
+            dash_count += 1
+        elseif !(c in "1234567890")
+            return s
+        end
+    end
+    if decimal_count > 1 || decimal_count + dash_count >= length(s)
+        return s
+    end
+    # Haven't written our own string-to-number function; use Julia's
+    if decimal_count > 0
+        return float(s)
+    else
+        # Julia 0.4.3 complains with "use parse(Int,s) instead" of int(s)
+        return parse(Int, s)
+    end
+end
+
+"""
+Take a string 's' and splits it into elements (based on 'ws' white space
+characters) and tuples (based on 'sep' separators).
+"""
+function tokenize(s::AbstractString; sep="()", ws=" \t\n")
+    L = AbstractString[]
+    food = 0  # num of yummy non-whitespace characters we have just eaten
+    for (i, c) in enumerate(s)
+        if c in sep || c in ws
+            if food > 0
+                push!(L, s[i-food:i-1])
+            end
+            if c in sep
+                push!(L, string(c))
+            end
+            food = 0
+        elseif i == length(s)
+            push!(L, s[i-food:end])
+            # will break next iteration
+        else
+            food += 1
+        end
+    end
+    # Convert Array of strings to an (immutable) Tuple
+    return tuple(L...)
+end
+
+"""
+Helper for `parse()`.
+
+Returns two values: the parsed expression, and the number of tokens consumed.
+
+Note that this function always returns a Tuple, even if only a single token is
+passed: calling code must unwrap. Also, this will only parse out the first
+complete expression, silently discarding trailing elements.
+
+Should probably use value exceptions instead of @assert on failure
+"""
+function _parse_tokens(tokens, depth=0)
+    L = []
+    i = 1
+    while i <= length(tokens)
+        el = tokens[i]
+        if el == "("
+            (expr, skip) = _parse_tokens(tokens[(i+1):end], depth + 1)
+            push!(L, expr)
+            i += skip
+        elseif el == ")"
+            @assert depth > 0 "Missing open bracket..."
+            return (tuple(L...), i+1)
+        else
+            push!(L, el)
+            i += 1
+        end
+    end
+    @assert depth == 0 "Missing close bracket..."
+    return (tuple(L...), i)
+end
+
+"""
+Takes a string and returns a tuple-based pseudo AST.
+
+Notes: all numbers are converted to Float64. No weird special characters are
+parsed specially (eg, comma, pipe, hash, etc).
+"""
+function parse(s::AbstractString)
+    # First split into a flat list...
+    tokens = tokenize(s)
+
+    # ... then convert any numbers ...
+    tokens = map(trynum, tokens)
+
+    # ... then parse into nested tuples.
+    (expr, sz) = _parse_tokens(tokens)
+
+    # Unwrap the first element and return that.
+    return expr[1]
+end
+
+# parse("1")
+# parse("((()) ())")
+# parse("(a 134 (   4 5 6) 2 ( ))")
+# parse("(asdf 134 (   4 5 6) 2 ( ))")
+# parse("(1 2 3 -4567 123.25624 . -.)")
+# parse("(1 2 3 -4567 123.25624 .1 1. -.1 -1. - . -.)")
+# parse("(first (list 1 (+ 2 3) 9))")
diff --git a/sexpr.py b/sexpr.py
new file mode 100644
index 0000000..61bb09c
--- /dev/null
+++ b/sexpr.py
@@ -0,0 +1,40 @@
+
+def tokenize(s, sep="()", ws=" \t\n"):
+    L = []
+    food = 0
+    for i, c in enumerate(s):
+        if c in sep or c in ws:
+            if food > 0:
+                L.append(s[i-food:i])
+            if c in sep:
+                L.append(c)
+            food = 0
+        elif i+1 == len(s):
+            L.append(s[i-food:])
+        else:
+            food += 1
+    return L
+
+def _parse_tokens(tokens, depth=0):
+    L = []
+    i = 0
+    while i < len(tokens):
+        el = tokens[i]
+        if el == '(':
+            expr, skip = _parse_tokens(tokens[i+1:], depth+1)
+            L.append(expr)
+            i += skip + 1
+        elif el == ')':
+            assert depth > 0, "Missing open bracket..."
+            return L, i+1
+        else:
+            L.append(el)
+            i += 1
+    assert depth == 0, "Missing close bracket..."
+    return L, i
+
+def parse(s):
+    tokens = tokenize(s)
+    expr, size = _parse_tokens(tokens)
+    return expr[0]
+
author	bnewbold <bnewbold@robocracy.org>	2016-04-18 12:50:50 -0400
committer	bnewbold <bnewbold@robocracy.org>	2016-04-18 12:50:50 -0400
commit	4d5770f0f41ccfc84f3e29701151ea29660539e0 (patch)
tree	477f837fc333addd0d9f839b5f1018f16ecde4ca
parent	277854dda63c5f6c4dfb30499e16fd7d9e78c575 (diff)
download	spectrum-4d5770f0f41ccfc84f3e29701151ea29660539e0.tar.gz spectrum-4d5770f0f41ccfc84f3e29701151ea29660539e0.zip