move the common Ct outside
[lisp-parkour.git] / parser / init.lua
blob34ae2a3873204530be1f1b367df7eb391f1e9889
1 require'lpeg'
2 local l = lpeg
3 local P, S, R, V, C, Cb, Cc, Cg, Cmt, Cp, Ct = l.P, l.S, l.R, l.V, l.C, l.Cb, l.Cc, l.Cg, l.Cmt, l.Cp, l.Ct
5 local M = {}
7 local cwd = ...
9 local hspace = S" \t"
10 local newline = S"\r\n"
12 local function incr(index, offset) return index + offset - 1 end
13 local function m1(pos) return pos - 1 end
14 local function neg(pos) return -pos end
15 local function past(_, position, pos) return position <= pos end
16 local function startof(element) return element.start end
17 local function _start(patt) return Cg(patt, "start") end
18 local function _finish(patt) return Cg(patt, "finish") end
19 local function _string(patt) return Cg(patt / function() return true end, "is_string") end
20 local function _char(patt) return Cg(patt / function() return true end, "is_char") end
21 local function _comment(patt) return Cg(patt / function() return true end, "is_comment") end
22 local function _line_comment(patt) return Cg(patt / function() return true end, "is_line_comment") end
23 local function delimited_range(d)
24 return Cg(d, "d") * (P(1) - "\\" - d + P"\\" * 1)^0 * d
25 end
26 local function nested_pair(d1, d2)
27 return P{Cg(d1, "d") * (P(1) - d1 - d2 + V(1))^0 * d2}
28 end
30 local function purge(base, sexps)
31 if not base then return sexps end
32 for _, list in pairs(sexps) do
33 for i = #list, 1, -1 do
34 if list[i].start >= base then
35 table.remove(list)
36 else
37 break
38 end
39 end
40 end
41 return sexps
42 end
44 local function append(old, new)
45 for name, list in pairs(new) do
46 for _, element in ipairs(list) do
47 table.insert(old[name], element)
48 end
49 if old[name].is_root then
50 -- XXX: check the length first and then access [1].
51 -- Otherwise the autovivification of [1] in a file with only blanks
52 -- will trigger an infinite recursion.
53 -- TODO: get rid of this trick altogether and instead make the parser set [1].indent.
54 local first = #old[name] > 0 and old[name][1]
55 -- XXX: this assumes that the first sexp starts at column 0.
56 -- if not true, _only_ the first sexp will have wrong indent.
57 if first and not first.indent then first.indent = 0 end
58 end
59 end
60 local last = #old.tree ~= 0 and old.tree[#old.tree]
61 old.tree.first_invalid = last and (last.finish or last.start) + 1 or 0
62 return old
63 end
65 local function make_driver(read, parse, before, sexps)
66 return function(advance)
67 local base
68 if sexps.tree.first_invalid then
69 local last_parsed = #sexps.tree ~= 0 and sexps.tree[#sexps.tree].finish + 1 or 0
70 if sexps.tree.first_invalid < last_parsed then
71 local nearest, n = before(sexps.tree, sexps.tree.first_invalid, startof)
72 local second_nearest = n and sexps.tree[n - 1]
73 local has_eol = second_nearest and second_nearest.is_line_comment
74 local nearest_finish = second_nearest and second_nearest.finish + (has_eol and 0 or 1)
75 local nearest_start = nearest and nearest.start
76 base = nearest_finish or nearest_start or sexps.tree.first_invalid
77 else
78 local last = sexps.tree[#sexps.tree]
79 local has_eol = last and last.is_line_comment
80 base = has_eol and last.finish or sexps.tree.first_invalid
81 end
82 end
83 local new
84 local chunk_size = 4 * 1024
85 local tries = 1
86 local content = ""
87 local from = base
88 local len = advance > 0 and advance - base + chunk_size or chunk_size
89 repeat
90 local chunk, further = read(from, len)
91 if not chunk then break end
92 content = content .. chunk
93 new = parse(content, advance, base)
94 from = from + len
95 tries = tries + 1
96 len = chunk_size * 2^(tries - 1)
97 until not new.tree.unbalanced and (advance > 0 or advance < 0 and #new.tree >= math.abs(advance))
98 or not further
99 return append(purge(base, sexps), new or {})
103 local function tag_next_node(t, i, pos)
104 local j = i
105 repeat
106 j = j + 1
107 until type(t[j]) == "table" or not t[j]
108 local nxt = t[j]
109 if pos >= 0 then
110 if nxt and not nxt.indent then
111 nxt.indent = nxt.start - pos - 1
113 else
114 nxt.section = true
118 local function make_parser(prefix, opposite, d1, d2, D1, D2, atom_node, list_node, quasilist_node)
119 local function atom_methodify(t)
120 return setmetatable(t, (t.d and not t.is_list) and quasilist_node or atom_node)
122 local function extract_breaks(t)
123 for i = #t, 1, -1 do
124 local node = t[i]
125 if type(node) == "number" then
126 tag_next_node(t, i, node)
127 table.remove(t, i)
128 elseif node.is_line_comment then
129 tag_next_node(t, i, node[1])
130 table.remove(node)
133 return t
136 return function(content, advance, offset)
137 local tree, esc = nil, {}
138 local I = Cp() * Cc(offset) / incr
139 local I1 = I / m1
140 local opening = _start(I) * Cg(prefix^1, "p")^-1 * d1
141 local closing = _finish(I) * d2
142 local lone_prefix = _start(I) * Cg(prefix^1, "p") * -#P(d1 + "|") * _finish(I1)
143 local unbalanced
144 local function debalance(t) unbalanced = true return t end
145 local function collect_escaped(sexp)
146 if sexp.is_string or sexp.is_comment or sexp.is_char then
147 table.insert(esc, sexp)
149 return sexp
151 local char = ("\\" * (P"alarm" + "backspace" + "delete" + "escape"
152 + "newline" + "null" + "return" + "space" + "tab")
153 + "\\x" * R("09", "AF", "af")^1
154 + "\\" * P(1)) * _char(P(true))
155 local dq_string, multi_esc = delimited_range('"'), delimited_range("|")
156 local stringy = (opposite["|"] and (multi_esc + dq_string) or dq_string) * _string(P(true))
157 local nline = I * newline
158 local blank = nline + hspace + "\f"
159 local lcd = opposite["\n"]
160 local lc_level = lcd * #-P(lcd) + P(lcd)^1 * hspace^-1
161 local line_comment = Cg(lc_level, "d") * (1 - nline)^0 * nline * _comment(P(true)) * _line_comment(P(true))
162 local bcd1, bcd2
163 for o, c in pairs(opposite) do
164 if #c > 1 then
165 bcd1, bcd2 = o, c
166 break
169 local block_comment = bcd1 and bcd2 and nested_pair(bcd1, bcd2) * _comment(P(true))
170 local comment = block_comment and (block_comment + line_comment) or line_comment
171 local incomplete_comment = bcd1 and (lc_level + bcd1) or lc_level
172 local atom = Ct(_start(I) * (
173 comment
174 + Cg(prefix^1, "p")^-1 * (
175 stringy
176 + char
177 + (1 - D1 - D2 - blank - char - stringy - comment - prefix)^1
178 )) * _finish(I1)) / atom_methodify / collect_escaped
179 + Ct(lone_prefix)
180 local list = P{Ct(opening * blank^0 * (atom + blank^1 + V(1))^0 * closing) *
181 Cc(list_node) / setmetatable / extract_breaks}
182 local lone_delimiter = Ct(_start(I) * (D1 + D2 + incomplete_comment) * _finish(I1)) / debalance
183 local section_break = I * "\f" / neg
184 local sexp = (section_break + blank)^0 * (list + atom + lone_delimiter)
185 if advance ~= 0 then
186 local top_level = Ct(advance < 0
187 and sexp^advance
188 or (Cmt(Cc(advance - offset), past) * sexp)^0)
189 tree = P{top_level / extract_breaks}:match(content)
190 tree.unbalanced = unbalanced
192 return {tree = tree, escaped = esc}
196 local opposite_fallback = {
197 __index = function(t, delimiter)
198 local opening = t["\n"]
199 return delimiter and delimiter:find("^" .. opening) and t[opening]
203 local function catchup(tree, range)
204 if range.finish and range.finish >= tree.first_invalid then
205 tree.parse_to(range.finish + 1)
209 function M.new(syntax, node, read)
210 local L = require(cwd.."."..syntax)
211 local opposite = setmetatable(L.opposite, opposite_fallback)
212 local sexps = {
213 tree = {
214 first_invalid = 0,
215 is_root = true,
216 }, -- (sorted) tree of sexps
217 escaped = {}, -- sorted list of strings and comments (redundant, for faster search)
219 local opening, closing, reverse = {}, {}, {}
220 for o, c in pairs(opposite) do
221 if S"([{":match(o) then
222 table.insert(opening, o)
223 table.insert(closing, c)
225 if c ~= o and #c == 1 then -- XXX: don't reverse block comments and strings
226 reverse[c] = o
229 for c, o in pairs(reverse) do
230 opposite[c] = o
232 local D1 = S(table.concat(opening))
233 local d1 = Cg(D1, "d")
234 local d2 = Cmt(Cb("d") * C(1), function(_, _, o, c) return opposite[o] == c end)
235 local D2 = S(table.concat(closing))
236 local atom_node, list_node, quasilist_node = node.atom, node.list, node.quasilist(opposite)
237 local parse = make_parser(L.prefix, opposite, d1, d2, D1, D2, atom_node, list_node, quasilist_node)
238 local drive = make_driver(read, parse, node._before, sexps)
239 local root_methods = node.root(opposite)
240 setmetatable(sexps.tree, {
241 __index = function(t, key)
242 if type(key) == "number" then
243 if #t < key then t.parse_to(#t - key) end
244 return rawget(t, key)
245 elseif root_methods[key] then
246 return root_methods[key](t)
247 elseif key == "parse_to" then
248 return drive
252 setmetatable(sexps.escaped, {
253 __index = {
254 around = function(range)
255 catchup(sexps.tree, range)
256 return node._around(sexps.escaped, range)
260 return {tree = sexps.tree, escaped = sexps.escaped, opposite = opposite}, d1, D2
263 return M