1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
|
"""
Takes a string 's' and will convert it to an Int64 or Float64 (if possible), or
return the string if not.
"""
function trynum(s::AbstractString)
# Test for: only number chars, and optionally a single decimal (period),
# but not just a single decimal on it's own ("." is not a number).
# Also allow a '-' in the first position.
# Could have just try/caught around Julia's float(), used below.
decimal_count = 0
dash_count = 0
for (i, c) in enumerate(s)
if c == '.'
decimal_count += 1
elseif c == '-' && i == 1 && length(s) > 1
dash_count += 1
elseif !(c in "1234567890")
return s
end
end
if decimal_count > 1 || decimal_count + dash_count >= length(s)
return s
end
# Haven't written our own string-to-number function; use Julia's
if decimal_count > 0
return float(s)
else
# Julia 0.4.3 complains with "use parse(Int,s) instead" of int(s)
return parse(Int, s)
end
end
"""
Take a string 's' and splits it into elements (based on 'ws' white space
characters) and tuples (based on 'sep' separators).
"""
function tokenize(s::AbstractString; sep="()", ws=" \t\n")
L = AbstractString[]
food = 0 # num of yummy non-whitespace characters we have just eaten
for (i, c) in enumerate(s)
if c in sep || c in ws
if food > 0
push!(L, s[i-food:i-1])
end
if c in sep
push!(L, string(c))
end
food = 0
elseif i == length(s)
push!(L, s[i-food:end])
# will break next iteration
else
food += 1
end
end
# Convert Array of strings to an (immutable) Tuple
return tuple(L...)
end
"""
Helper for `parse()`.
Returns two values: the parsed expression, and the number of tokens consumed.
Note that this function always returns a Tuple, even if only a single token is
passed: calling code must unwrap. Also, this will only parse out the first
complete expression, silently discarding trailing elements.
Should probably use value exceptions instead of @assert on failure
"""
function _parse_tokens(tokens, depth=0)
L = []
i = 1
while i <= length(tokens)
el = tokens[i]
if el == "("
(expr, skip) = _parse_tokens(tokens[(i+1):end], depth + 1)
push!(L, expr)
i += skip
elseif el == ")"
@assert depth > 0 "Missing open bracket..."
return (tuple(L...), i+1)
else
push!(L, el)
i += 1
end
end
@assert depth == 0 "Missing close bracket..."
return (tuple(L...), i)
end
"""
Takes a string and returns a tuple-based pseudo AST.
Notes: all numbers are converted to Float64. No weird special characters are
parsed specially (eg, comma, pipe, hash, etc).
"""
function parse(s::AbstractString)
# First split into a flat list...
tokens = tokenize(s)
# ... then convert any numbers ...
tokens = map(trynum, tokens)
# ... then parse into nested tuples.
(expr, sz) = _parse_tokens(tokens)
# Unwrap the first element and return that.
return expr[1]
end
# parse("1")
# parse("((()) ())")
# parse("(a 134 ( 4 5 6) 2 ( ))")
# parse("(asdf 134 ( 4 5 6) 2 ( ))")
# parse("(1 2 3 -4567 123.25624 . -.)")
# parse("(1 2 3 -4567 123.25624 .1 1. -.1 -1. - . -.)")
# parse("(first (list 1 (+ 2 3) 9))")
|