aboutsummaryrefslogtreecommitdiffstats
path: root/julia/sexpr.jl
blob: 1a0ba3c8e1e0e5789debb909102db2004606cbdf (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
"""
Takes a string 's' and will convert it to an Int64 or Float64 (if possible), or
return the string if not.
"""
function trynum(s::AbstractString)
    # Test for: only number chars, and optionally a single decimal (period),
    # but not just a single decimal on it's own ("." is not a number).
    # Also allow a '-' in the first position.
    # Could have just try/caught around Julia's float(), used below.
    decimal_count = 0
    dash_count = 0
    for (i, c) in enumerate(s)
        if c == '.'
            decimal_count += 1
        elseif c == '-' && i == 1 && length(s) > 1
            dash_count += 1
        elseif !(c in "1234567890")
            return s
        end
    end
    if decimal_count > 1 || decimal_count + dash_count >= length(s)
        return s
    end
    # Haven't written our own string-to-number function; use Julia's
    if decimal_count > 0
        return float(s)
    else
        # Julia 0.4.3 complains with "use parse(Int,s) instead" of int(s)
        return parse(Int, s)
    end
end

"""
Take a string 's' and splits it into elements (based on 'ws' white space
characters) and tuples (based on 'sep' separators).
"""
function tokenize(s::AbstractString; sep="()", ws=" \t\n")
    L = AbstractString[]
    food = 0  # num of yummy non-whitespace characters we have just eaten
    for (i, c) in enumerate(s)
        if c in sep || c in ws
            if food > 0
                push!(L, s[i-food:i-1])
            end
            if c in sep
                push!(L, string(c))
            end
            food = 0
        elseif i == length(s)
            push!(L, s[i-food:end])
            # will break next iteration
        else
            food += 1
        end
    end
    # Convert Array of strings to an (immutable) Tuple
    return tuple(L...)
end

"""
Helper for `parse()`.

Returns two values: the parsed expression, and the number of tokens consumed.

Note that this function always returns a Tuple, even if only a single token is
passed: calling code must unwrap. Also, this will only parse out the first
complete expression, silently discarding trailing elements.

Should probably use value exceptions instead of @assert on failure
"""
function _parse_tokens(tokens, depth=0)
    L = []
    i = 1
    while i <= length(tokens)
        el = tokens[i]
        if el == "("
            (expr, skip) = _parse_tokens(tokens[(i+1):end], depth + 1)
            push!(L, expr)
            i += skip
        elseif el == ")"
            @assert depth > 0 "Missing open bracket..."
            return (tuple(L...), i+1)
        else
            push!(L, el)
            i += 1
        end
    end
    @assert depth == 0 "Missing close bracket..."
    return (tuple(L...), i)
end

"""
Takes a string and returns a tuple-based pseudo AST.

Notes: all numbers are converted to Float64. No weird special characters are
parsed specially (eg, comma, pipe, hash, etc).
"""
function parse(s::AbstractString)
    # First split into a flat list...
    tokens = tokenize(s)

    # ... then convert any numbers ...
    tokens = map(trynum, tokens)

    # ... then parse into nested tuples.
    (expr, sz) = _parse_tokens(tokens)

    # Unwrap the first element and return that.
    return expr[1]
end

# parse("1")
# parse("((()) ())")
# parse("(a 134 (   4 5 6) 2 ( ))")
# parse("(asdf 134 (   4 5 6) 2 ( ))")
# parse("(1 2 3 -4567 123.25624 . -.)")
# parse("(1 2 3 -4567 123.25624 .1 1. -.1 -1. - . -.)")
# parse("(first (list 1 (+ 2 3) 9))")