add warning comment
[lisp-parkour.git] / parser / init.lua
blob181e210f4f60950a30296cd963cf258673e23a36
1 require'lpeg'
2 local l = lpeg
3 local P, S, R, V, C, Cb, Cc, Cg, Cmt, Cp, Ct = l.P, l.S, l.R, l.V, l.C, l.Cb, l.Cc, l.Cg, l.Cmt, l.Cp, l.Ct
5 local M = {}
7 local cwd = ...
9 local hspace = S" \t"
10 local newline = S"\r\n"
12 local function incr(index, offset) return index + offset - 1 end
13 local function m1(pos) return pos - 1 end
14 local function neg(pos) return -pos end
15 local function past(_, position, pos) return position <= pos end
16 local function startof(element) return element.start end
17 local function _start(patt) return Cg(patt, "start") end
18 local function _finish(patt) return Cg(patt, "finish") end
19 local function _string(patt) return Cg(patt / function() return true end, "is_string") end
20 local function _char(patt) return Cg(patt / function() return true end, "is_char") end
21 local function _comment(patt) return Cg(patt / function() return true end, "is_comment") end
22 local function _line_comment(patt) return Cg(patt / function() return true end, "is_line_comment") end
23 local function delimited_range(d)
24 return Cg(d, "d") * (P(1) - "\\" - d + P"\\" * 1)^0 * d
25 end
26 local function nested_pair(d1, d2)
27 return P{Cg(d1, "d") * (P(1) - d1 - d2 + V(1))^0 * d2}
28 end
30 local function purge(base, sexps)
31 if not base then return sexps end
32 for _, list in pairs(sexps) do
33 for i = #list, 1, -1 do
34 if list[i].start >= base then
35 table.remove(list)
36 else
37 break
38 end
39 end
40 end
41 return sexps
42 end
44 local function append(old, new)
45 for name, list in pairs(new) do
46 for _, element in ipairs(list) do
47 table.insert(old[name], element)
48 end
49 if old[name].is_root then
50 -- XXX: check the length first and then access [1].
51 -- Otherwise the autovivification of [1] in a file with only blanks
52 -- will trigger an infinite recursion.
53 -- TODO: get rid of this trick altogether and instead make the parser set [1].indent.
54 local first = #old[name] > 0 and old[name][1]
55 -- XXX: this assumes that the first sexp starts at column 0.
56 -- if not true, _only_ the first sexp will have wrong indent.
57 if first and not first.indent then first.indent = 0 end
58 end
59 end
60 local last = #old.tree ~= 0 and old.tree[#old.tree]
61 old.tree.first_invalid = last and (last.finish or last.start) + 1 or 0
62 return old
63 end
65 local function make_driver(read, parse, before, sexps)
66 return function(advance)
67 local base
68 if sexps.tree.first_invalid then
69 local last_parsed = #sexps.tree ~= 0 and sexps.tree[#sexps.tree].finish + 1 or 0
70 if sexps.tree.first_invalid < last_parsed then
71 local nearest, n = before(sexps.tree, sexps.tree.first_invalid, startof)
72 local second_nearest = n and sexps.tree[n - 1]
73 local has_eol = second_nearest and second_nearest.is_line_comment
74 local nearest_finish = second_nearest and second_nearest.finish + (has_eol and 0 or 1)
75 local nearest_start = nearest and nearest.start
76 base = nearest_finish or nearest_start or sexps.tree.first_invalid
77 else
78 local last = sexps.tree[#sexps.tree]
79 local has_eol = last and last.is_line_comment
80 base = has_eol and last.finish or sexps.tree.first_invalid
81 end
82 end
83 local new
84 local chunk_size = 4 * 1024
85 local tries = 1
86 local content = ""
87 local from = base
88 local len = advance > 0 and advance - base + chunk_size or chunk_size
89 repeat
90 local chunk, further = read(from, len)
91 if not chunk then break end
92 content = content .. chunk
93 new = parse(content, advance, base)
94 from = from + len
95 tries = tries + 1
96 len = chunk_size * 2^(tries - 1)
97 until not new.tree.unbalanced and (advance > 0 or advance < 0 and #new.tree >= math.abs(advance))
98 or not further
99 return append(purge(base, sexps), new or {})
103 local function tag_next_node(t, i, pos)
104 local j = i
105 repeat
106 j = j + 1
107 until type(t[j]) == "table" or not t[j]
108 local nxt = t[j]
109 if pos >= 0 then
110 if nxt and not nxt.indent then
111 nxt.indent = nxt.start - pos - 1
113 else
114 nxt.section = true
118 local function make_parser(prefix, weak_prefix, opposite, d1, d2, D1, D2, atom_node, list_node, quasilist_node)
119 local function atom_methodify(t)
120 return setmetatable(t, (t.d and not t.is_list) and quasilist_node or atom_node)
122 local function extract_breaks(t)
123 for i = #t, 1, -1 do
124 local node = t[i]
125 if type(node) == "number" then
126 tag_next_node(t, i, node)
127 table.remove(t, i)
128 elseif node.is_line_comment then
129 tag_next_node(t, i, node[1])
130 table.remove(node)
133 return t
136 return function(content, advance, offset)
137 local tree, esc = nil, {}
138 local I = Cp() * Cc(offset) / incr
139 local I1 = I / m1
140 local opening = _start(I) * Cg(prefix^1, "p")^-1 * d1
141 local closing = _finish(I) * d2
142 local lone_prefix = _start(I) * Cg(prefix^1, "p") * -#P(d1 + "|") * _finish(I1)
143 local unbalanced
144 local function debalance(t) unbalanced = true return t end
145 local function collect_escaped(sexp)
146 if sexp.is_string or sexp.is_comment or sexp.is_char then
147 table.insert(esc, sexp)
149 return sexp
151 local char = ("\\" * (P"alarm" + "backspace" + "delete" + "escape"
152 + "newline" + "null" + "return" + "space" + "tab")
153 + "\\x" * R("09", "AF", "af")^1
154 + "\\" * P(1)) * _char(P(true))
155 local dq_string, multi_esc = delimited_range('"'), delimited_range("|")
156 local stringy = (opposite["|"] and (multi_esc + dq_string) or dq_string) * _string(P(true))
157 local nline = I * newline
158 local blank = nline + hspace + "\f"
159 local lcd = opposite["\n"]
160 local lc_level = lcd * #-P(lcd) + P(lcd)^1 * hspace^-1
161 local line_comment = Cg(lc_level, "d") * (1 - nline)^0 * nline * _comment(P(true)) * _line_comment(P(true))
162 local bcd1, bcd2
163 for o, c in pairs(opposite) do
164 -- XXX: here I assume that #| is the only multi-char delimiter,
165 -- and that it means block comment
166 if #o > 1 then
167 bcd1, bcd2 = o, c
168 break
171 local block_comment = bcd1 and bcd2 and nested_pair(bcd1, bcd2) * _comment(P(true))
172 local comment = block_comment and (block_comment + line_comment) or line_comment
173 local incomplete_comment = bcd1 and (lc_level + bcd1) or lc_level
174 local symbol = weak_prefix
175 and (1 - D1 - D2 - blank - char - stringy - comment - prefix + weak_prefix)^1
176 or (1 - D1 - D2 - blank - char - stringy - comment - prefix)^1
177 local atom = Ct(_start(I) * (
178 comment
179 + Cg(prefix^1, "p")^-1 * (
180 stringy
181 + char
182 + symbol
183 )) * _finish(I1)) / atom_methodify / collect_escaped
184 + Ct(lone_prefix)
185 local list = P{Ct(opening * blank^0 * (atom + blank^1 + V(1))^0 * closing) *
186 Cc(list_node) / setmetatable / extract_breaks}
187 local lone_delimiter = Ct(_start(I) * (D1 + D2 + incomplete_comment) * _finish(I1)) / debalance
188 local section_break = I * "\f" / neg
189 local sexp = (section_break + blank)^0 * (list + atom + lone_delimiter)
190 if advance ~= 0 then
191 local top_level = Ct(advance < 0
192 and sexp^advance
193 or (Cmt(Cc(advance - offset), past) * sexp)^0)
194 tree = P{top_level / extract_breaks}:match(content)
195 tree.unbalanced = unbalanced
197 return {tree = tree, escaped = esc}
201 local opposite_fallback = {
202 __index = function(t, delimiter)
203 local opening = t["\n"]
204 return delimiter and delimiter:find("^" .. opening) and t[opening]
208 local function catchup(tree, range)
209 if range.finish and range.finish >= tree.first_invalid then
210 tree.parse_to(range.finish + 1)
214 function M.new(syntax, node, read)
215 local L = require(cwd.."."..syntax)
216 local opposite = setmetatable(L.opposite, opposite_fallback)
217 local sexps = {
218 tree = {
219 first_invalid = 0,
220 is_root = true,
221 }, -- (sorted) tree of sexps
222 escaped = {}, -- sorted list of strings and comments (redundant, for faster search)
224 local opening, closing, reverse = {}, {}, {}
225 for o, c in pairs(opposite) do
226 if S"([{":match(o) then
227 table.insert(opening, o)
228 table.insert(closing, c)
230 if c ~= o and #c == 1 then -- XXX: don't reverse block comments and strings
231 reverse[c] = o
234 for c, o in pairs(reverse) do
235 opposite[c] = o
237 local D1 = S(table.concat(opening))
238 local d1 = Cg(D1, "d")
239 local d2 = Cmt(Cb("d") * C(1), function(_, _, o, c) return opposite[o] == c end)
240 local D2 = S(table.concat(closing))
241 local atom_node, list_node, quasilist_node = node.atom, node.list, node.quasilist(opposite)
242 local parse = make_parser(L.prefix, L.weak_prefix, opposite, d1, d2, D1, D2, atom_node, list_node, quasilist_node)
243 local drive = make_driver(read, parse, node._before, sexps)
244 local root_methods = node.root(opposite)
245 setmetatable(sexps.tree, {
246 __index = function(t, key)
247 if type(key) == "number" then
248 if #t < key then t.parse_to(#t - key) end
249 return rawget(t, key)
250 elseif root_methods[key] then
251 return root_methods[key](t)
252 elseif key == "parse_to" then
253 return drive
257 setmetatable(sexps.escaped, {
258 __index = {
259 around = function(range)
260 catchup(sexps.tree, range)
261 return node._around(sexps.escaped, range)
265 return {tree = sexps.tree, escaped = sexps.escaped, opposite = opposite}, d1, D2
268 return M