use an internal list of newlines
[lisp-parkour.git] / parser / init.lua
blobd4da70f0d4fd52ff8f9889823dd322adc1ec7922
1 require'lpeg'
2 local l = lpeg
3 local P, S, R, V, C, Cb, Cc, Cg, Cmt, Cp, Ct = l.P, l.S, l.R, l.V, l.C, l.Cb, l.Cc, l.Cg, l.Cmt, l.Cp, l.Ct
5 local M = {}
7 local cwd = ...
9 local hspace = S" \t"
10 local newline = S"\r\n"
12 local function incr(index, offset) return index + offset - 1 end
13 local function m1(pos) return pos - 1 end
14 local function neg(pos) return -pos end
15 local function past(_, position, pos) return position <= pos end
16 local function startof(element) return element.start end
17 local function _start(patt) return Cg(patt, "start") end
18 local function _finish(patt) return Cg(patt, "finish") end
19 local function _string(patt) return Cg(patt / function() return true end, "is_string") end
20 local function _char(patt) return Cg(patt / function() return true end, "is_char") end
21 local function _comment(patt) return Cg(patt / function() return true end, "is_comment") end
22 local function _line_comment(patt) return Cg(patt / function() return true end, "is_line_comment") end
23 local function delimited_range(d)
24 return Cg(d, "d") * (P(1) - "\\" - d + P"\\" * 1)^0 * d
25 end
26 local function nested_pair(d1, d2)
27 return P{Cg(d1, "d") * (P(1) - d1 - d2 + V(1))^0 * d2}
28 end
30 local function purge(base, sexps)
31 if not base then return sexps end
32 for _, list in pairs(sexps) do
33 -- XXX: check if list is not empty first, because otherwise the
34 -- autovivification of [1] triggers a recursion and a stack overflow.
35 local tbl = #list > 0 and type(list[1]) == "table"
36 for i = #list, 1, -1 do
37 if (tbl and list[i].start or list[i]) >= base then
38 table.remove(list)
39 else
40 break
41 end
42 end
43 end
44 return sexps
45 end
47 local function append(old, new)
48 for name, list in pairs(new) do
49 for _, element in ipairs(list) do
50 table.insert(old[name], element)
51 end
52 if old[name].is_root then
53 -- XXX: check the length first and then access [1].
54 -- Otherwise the autovivification of [1] in a file with only blanks
55 -- will trigger an infinite recursion.
56 -- TODO: get rid of this trick altogether and instead make the parser set [1].indent.
57 local first = #old[name] > 0 and old[name][1]
58 -- XXX: this assumes that the first sexp starts at column 0.
59 -- if not true, _only_ the first sexp will have wrong indent.
60 if first and not first.indent then first.indent = 0 end
61 end
62 end
63 local last = #old.tree ~= 0 and old.tree[#old.tree]
64 old.tree.first_invalid = last and (last.finish or last.start) + 1 or 0
65 return old
66 end
68 local function make_driver(read, parse, before, sexps)
69 return function(advance)
70 local base
71 if sexps.tree.first_invalid then
72 local last_parsed = #sexps.tree ~= 0 and sexps.tree[#sexps.tree].finish + 1 or 0
73 if sexps.tree.first_invalid < last_parsed then
74 local nearest, n = before(sexps.tree, sexps.tree.first_invalid, startof)
75 local second_nearest = n and sexps.tree[n - 1]
76 local has_eol = second_nearest and second_nearest.is_line_comment
77 local nearest_finish = second_nearest and second_nearest.finish + (has_eol and 0 or 1)
78 local nearest_start = nearest and nearest.start
79 base = nearest_finish or nearest_start or sexps.tree.first_invalid
80 else
81 local last = sexps.tree[#sexps.tree]
82 local has_eol = last and last.is_line_comment
83 base = has_eol and last.finish or sexps.tree.first_invalid
84 end
85 end
86 local new
87 local chunk_size = 4 * 1024
88 local tries = 1
89 local content = ""
90 local from = base
91 local len = advance > 0 and advance - base + chunk_size or chunk_size
92 repeat
93 local chunk, further = read(from, len)
94 if not chunk then break end
95 content = content .. chunk
96 new = parse(content, advance, base)
97 from = from + len
98 tries = tries + 1
99 len = chunk_size * 2^(tries - 1)
100 until not new.tree.unbalanced and (advance > 0 or advance < 0 and #new.tree >= math.abs(advance))
101 or not further
102 return append(purge(base, sexps), new or {})
106 local function tag_next_node(t, i, pos)
107 local j = i
108 repeat
109 j = j + 1
110 until type(t[j]) == "table" or not t[j]
111 local nxt = t[j]
112 if pos >= 0 then
113 if nxt and not nxt.indent then
114 nxt.indent = nxt.start - pos - 1
116 else
117 nxt.section = true
121 local function make_parser(prefix, opposite, d1, d2, D1, D2, atom_node, list_node, quasilist_node)
122 local function atom_methodify(t)
123 return setmetatable(t, (t.d and not t.is_list) and quasilist_node or atom_node)
125 local function extract_breaks(t)
126 for i = #t, 1, -1 do
127 local node = t[i]
128 if type(node) == "number" then
129 tag_next_node(t, i, node)
130 table.remove(t, i)
131 elseif node.is_line_comment then
132 tag_next_node(t, i, node[1])
133 table.remove(node)
136 return t
139 return function(content, advance, offset)
140 local tree, esc, newlines = nil, {}, {}
141 local I = Cp() * Cc(offset) / incr
142 local I1 = I / m1
143 local opening = _start(I) * Cg(prefix^1, "p")^-1 * d1
144 local closing = _finish(I) * d2
145 local lone_prefix = _start(I) * Cg(prefix^1, "p") * -#P(d1 + "|") * _finish(I1)
146 local unbalanced
147 local function debalance(t) unbalanced = true return t end
148 local function collect_escaped(sexp)
149 if sexp.is_string or sexp.is_comment or sexp.is_char then
150 table.insert(esc, sexp)
152 return sexp
154 local char = ("\\" * (P"alarm" + "backspace" + "delete" + "escape"
155 + "newline" + "null" + "return" + "space" + "tab")
156 + "\\x" * R("09", "AF", "af")^1
157 + "\\" * P(1)) * _char(P(true))
158 local dq_string, multi_esc = delimited_range('"'), delimited_range("|")
159 local stringy = (opposite["|"] and (multi_esc + dq_string) or dq_string) * _string(P(true))
160 local nline = I * (Cc(newlines) * I * newline / table.insert)
161 local blank = nline + hspace + "\f"
162 local lcd = opposite["\n"]
163 local lc_level = lcd * #-P(lcd) + P(lcd)^1 * hspace^-1
164 local line_comment = Cg(lc_level, "d") * (1 - nline)^0 * nline * _comment(P(true)) * _line_comment(P(true))
165 local bcd1, bcd2
166 for o, c in pairs(opposite) do
167 if #c > 1 then
168 bcd1, bcd2 = o, c
169 break
172 local block_comment = bcd1 and bcd2 and nested_pair(bcd1, bcd2) * _comment(P(true))
173 local comment = block_comment and (block_comment + line_comment) or line_comment
174 local incomplete_comment = bcd1 and (lc_level + bcd1) or lc_level
175 local atom = Ct(_start(I) * (
176 comment
177 + Cg(prefix^1, "p")^-1 * (
178 stringy
179 + char
180 + (1 - D1 - D2 - blank - char - stringy - comment - prefix)^1
181 )) * _finish(I1)) / atom_methodify / collect_escaped
182 + Ct(lone_prefix)
183 local list = P{Ct(opening * blank^0 * (atom + blank^1 + V(1))^0 * closing) *
184 Cc(list_node) / setmetatable / extract_breaks}
185 local lone_delimiter = Ct(_start(I) * (D1 + D2 + incomplete_comment) * _finish(I1)) / debalance
186 local section_break = I * "\f" / neg
187 local sexp = (section_break + blank)^0 * (list + atom + lone_delimiter)
188 if advance ~= 0 then
189 local top_level = Ct(advance < 0
190 and sexp^advance
191 or (Cmt(Cc(advance - offset), past) * sexp)^0)
192 tree = P{top_level / extract_breaks}:match(content)
193 tree.unbalanced = unbalanced
195 return {tree = tree, escaped = esc, newlines = newlines}
199 local opposite_fallback = {
200 __index = function(t, delimiter)
201 local opening = t["\n"]
202 return delimiter and delimiter:find("^" .. opening) and t[opening]
206 local function catchup(tree, range)
207 if range.finish and range.finish >= tree.first_invalid then
208 tree.parse_to(range.finish + 1)
212 function M.new(syntax, node, read)
213 local L = require(cwd.."."..syntax)
214 local opposite = setmetatable(L.opposite, opposite_fallback)
215 local sexps = {
216 tree = {
217 first_invalid = 0,
218 is_root = true,
219 }, -- (sorted) tree of sexps
220 escaped = {}, -- sorted list of strings and comments (redundant, for faster search)
221 newlines = {}, -- sorted list of newlines
223 local opening, closing, reverse = {}, {}, {}
224 for o, c in pairs(opposite) do
225 if S"([{":match(o) then
226 table.insert(opening, o)
227 table.insert(closing, c)
229 if c ~= o and #c == 1 then -- XXX: don't reverse block comments and strings
230 reverse[c] = o
233 for c, o in pairs(reverse) do
234 opposite[c] = o
236 local D1 = S(table.concat(opening))
237 local d1 = Cg(D1, "d")
238 local d2 = Cmt(Cb("d") * C(1), function(_, _, o, c) return opposite[o] == c end)
239 local D2 = S(table.concat(closing))
240 local atom_node, list_node, quasilist_node = node.atom, node.list, node.quasilist(opposite)
241 local parse = make_parser(L.prefix, opposite, d1, d2, D1, D2, atom_node, list_node, quasilist_node)
242 local drive = make_driver(read, parse, node._before, sexps)
243 local root_methods = node.root(opposite)
244 setmetatable(sexps.tree, {
245 __index = function(t, key)
246 if type(key) == "number" then
247 if #t < key then t.parse_to(#t - key) end
248 return rawget(t, key)
249 elseif root_methods[key] then
250 return root_methods[key](t)
251 elseif key == "parse_to" then
252 return drive
256 setmetatable(sexps.escaped, {
257 __index = {
258 around = function(range)
259 catchup(sexps.tree, range)
260 return node._around(sexps.escaped, range)
264 local function eol_at(pos)
265 return node._after(sexps.newlines, pos, function(n) return n end)
267 local function bol_at(pos)
268 local ret = node._before(sexps.newlines, pos, function(n) return n end)
269 return ret and ret + 1 or 0
271 return {tree = sexps.tree, escaped = sexps.escaped, opposite = opposite,
272 eol_at = eol_at, bol_at = bol_at}, d1, D2
275 return M