allow other block comment delimiters
[lisp-parkour.git] / parser / init.lua
blobfecfe537203fd1be7f882a4662e7c1cdfce1fa68
1 require'lpeg'
2 local l = lpeg
3 local P, S, R, V, C, Cb, Cc, Cg, Cmt, Cp, Ct = l.P, l.S, l.R, l.V, l.C, l.Cb, l.Cc, l.Cg, l.Cmt, l.Cp, l.Ct
5 local M = {}
7 local cwd = ...
9 local hspace = S" \t"
10 local newline = S"\r\n"
12 local function incr(index, offset) return index + offset - 1 end
13 local function m1(pos) return pos - 1 end
14 local function neg(pos) return -pos end
15 local function past(_, position, pos) return position <= pos end
16 local function startof(element) return element.start end
17 local function _start(patt) return Cg(patt, "start") end
18 local function _finish(patt) return Cg(patt, "finish") end
19 local function _string(patt) return Cg(patt / function() return true end, "is_string") end
20 local function _char(patt) return Cg(patt / function() return true end, "is_char") end
21 local function _comment(patt) return Cg(patt / function() return true end, "is_comment") end
22 local function _line_comment(patt) return Cg(patt / function() return true end, "is_line_comment") end
23 local function delimited_range(d)
24 return Cg(d, "d") * (P(1) - "\\" - d + P"\\" * 1)^0 * d
25 end
26 local function nested_pair(d1, d2)
27 return P{Cg(d1, "d") * (P(1) - d1 - d2 + V(1))^0 * d2}
28 end
30 local function purge(base, sexps)
31 if not base then return sexps end
32 for _, list in pairs(sexps) do
33 for i = #list, 1, -1 do
34 if list[i].start >= base then
35 table.remove(list)
36 else
37 break
38 end
39 end
40 end
41 return sexps
42 end
44 local function append(old, new)
45 for name, list in pairs(new) do
46 for _, element in ipairs(list) do
47 table.insert(old[name], element)
48 end
49 if old[name].is_root then
50 -- XXX: check the length first and then access [1].
51 -- Otherwise the autovivification of [1] in a file with only blanks
52 -- will trigger an infinite recursion.
53 -- TODO: get rid of this trick altoghether and instead make the parser set [1].indent.
54 local first = #old[name] > 0 and old[name][1]
55 -- XXX: this assumes that the first sexp starts at column 0.
56 -- if not true, _only_ the first sexp will have wrong indent.
57 if first and not first.indent then first.indent = 0 end
58 end
59 end
60 local last = #old.tree ~= 0 and old.tree[#old.tree]
61 old.tree.first_invalid = last and (last.finish or last.start) + 1 or 0
62 return old
63 end
65 local function make_driver(read, parse, before, sexps)
66 return function(advance)
67 local base
68 if sexps.tree.first_invalid then
69 local last_parsed = #sexps.tree ~= 0 and sexps.tree[#sexps.tree].finish + 1 or 0
70 if sexps.tree.first_invalid < last_parsed then
71 local nearest, n = before(sexps.tree, sexps.tree.first_invalid, startof)
72 local second_nearest = n and sexps.tree[n - 1]
73 local has_eol = second_nearest and second_nearest.is_line_comment
74 local nearest_finish = second_nearest and second_nearest.finish + (has_eol and 0 or 1)
75 local nearest_start = nearest and nearest.start
76 base = nearest_finish or nearest_start or sexps.tree.first_invalid
77 else
78 local last = sexps.tree[#sexps.tree]
79 local has_eol = last and last.is_line_comment
80 base = has_eol and last.finish or sexps.tree.first_invalid
81 end
82 end
83 local new
84 local chunk_size = 4 * 1024
85 local tries = 1
86 local content = ""
87 local from = base
88 local len = advance > 0 and advance - base + chunk_size or chunk_size
89 repeat
90 local chunk, further = read(from, len)
91 if not chunk then break end
92 content = content .. chunk
93 new = parse(content, advance, base)
94 from = from + len
95 tries = tries + 1
96 len = chunk_size * 2^(tries - 1)
97 until not new.tree.unbalanced and (advance > 0 or advance < 0 and #new.tree >= math.abs(advance))
98 or not further
99 return append(purge(base, sexps), new or {})
103 local function tag_next_node(t, i, pos)
104 local j = i
105 repeat
106 j = j + 1
107 until type(t[j]) == "table" or not t[j]
108 local nxt = t[j]
109 if pos >= 0 then
110 if nxt and not nxt.indent then
111 nxt.indent = nxt.start - pos - 1
113 else
114 nxt.section = true
118 local function make_parser(prefix, opposite, d1, d2, D1, D2, atom_node, list_node, quasilist_node)
119 local function atom_methodize(t)
120 return setmetatable(t, (t.d and not t.is_list) and quasilist_node or atom_node)
122 local function extract_breaks(t)
123 for i = #t, 1, -1 do
124 local node = t[i]
125 if type(node) == "number" then
126 tag_next_node(t, i, node)
127 table.remove(t, i)
128 elseif node.is_line_comment then
129 tag_next_node(t, i, node[1])
130 table.remove(node)
133 return t
136 return function(content, advance, offset)
137 local tree, esc = nil, {}
138 local I = Cp() * Cc(offset) / incr
139 local I1 = I / m1
140 local opening = _start(I) * Cg(prefix^1, "p")^-1 * d1
141 local closing = _finish(I) * d2
142 local lone_prefix = _start(I) * Cg(prefix^1, "p") * -#P(d1 + "|") * _finish(I1)
143 local unbalanced
144 local function debalance(t) unbalanced = true return t end
145 local function collect_escaped(sexp)
146 if sexp.is_string or sexp.is_comment or sexp.is_char then
147 table.insert(esc, sexp)
149 return sexp
151 local char = ("\\" * (P"alarm" + "backspace" + "delete" + "escape"
152 + "newline" + "null" + "return" + "space" + "tab")
153 + "\\x" * R("09", "AF", "af")^1
154 + "\\" * P(1)) * _char(P(true))
155 local dq_string = delimited_range('"') * _string(P(true))
156 local multi_esc = delimited_range("|") * _string(P(true))
157 local nline = I * newline
158 local blank = nline + hspace + "\f"
159 local lcd = opposite["\n"]
160 local lc_level = lcd * #-P(lcd) + P(lcd)^1 * hspace^-1
161 local line_comment = Cg(lc_level, "d") * (1 - nline)^0 * nline * _comment(P(true)) * _line_comment(P(true))
162 local bcd1, bcd2
163 for o, c in pairs(opposite) do
164 if #c > 1 then
165 bcd1, bcd2 = o, c
166 break
169 local block_comment = bcd1 and bcd2 and nested_pair(bcd1, bcd2) * _comment(P(true))
170 local comment = block_comment and (line_comment + block_comment) or line_comment
171 local incomplete_comment = bcd1 and (lc_level + bcd1) or lc_level
172 local atom = Ct(_start(I) * (
173 comment
174 + Cg(prefix^1, "p")^-1 * (
175 dq_string
176 + char
177 + multi_esc
178 + (1 - D1 - D2 - blank - multi_esc - char - dq_string - comment - prefix)^1
179 )) * _finish(I1)) / atom_methodize / collect_escaped
180 + Ct(lone_prefix)
181 local list = P{Ct(opening * blank^0 * (atom + blank^1 + V(1))^0 * closing) *
182 Cc(list_node) / setmetatable / extract_breaks}
183 local lone_delimiter = Ct(_start(I) * (D1 + D2 + incomplete_comment) * _finish(I1)) / debalance
184 local section_break = I * "\f" / neg
185 local sexp = (section_break + blank)^0 * (list + atom + lone_delimiter)
186 if advance ~= 0 then
187 local top_level = advance < 0
188 and Ct(sexp^advance)
189 or Ct((Cmt(Cc(advance - offset), past) * sexp)^0)
190 tree = P{top_level / extract_breaks}:match(content)
191 tree.unbalanced = unbalanced
193 return {tree = tree, escaped = esc}
197 local opposite_fallback = {
198 __index = function(t, delimiter)
199 local opening = t["\n"]
200 return delimiter and delimiter:find("^" .. opening) and t[opening]
204 local function catchup(tree, range)
205 if range.finish and range.finish >= tree.first_invalid then
206 tree.parse_to(range.finish + 1)
210 function M.new(syntax, node, read)
211 local L = require(cwd.."."..syntax)
212 local opposite = setmetatable(L.opposite, opposite_fallback)
213 local sexps = {
214 tree = {
215 first_invalid = 0,
216 is_root = true,
217 }, -- (sorted) tree of sexps
218 escaped = {}, -- sorted list of strings and comments (redundant, for faster search)
220 local opening, closing = {}, {}
221 for o, c in pairs(opposite) do
222 if S"([{":match(o) then
223 table.insert(opening, o)
224 table.insert(closing, c)
227 local D1 = S(table.concat(opening))
228 local d1 = Cg(D1, "d")
229 local d2 = Cmt(Cb("d") * C(1), function(_, _, o, c) return opposite[o] == c end)
230 local D2 = S(table.concat(closing))
231 local atom_node, list_node, quasilist_node = node.atom, node.list, node.quasilist(opposite)
232 local parse = make_parser(L.prefix, opposite, d1, d2, D1, D2, atom_node, list_node, quasilist_node)
233 local drive = make_driver(read, parse, node._before, sexps)
234 local root_methods = node.root(opposite)
235 setmetatable(sexps.tree, {
236 __index = function(t, key)
237 if type(key) == "number" then
238 if #t < key then t.parse_to(#t - key) end
239 return rawget(t, key)
240 elseif root_methods[key] then
241 return root_methods[key](t)
242 elseif key == "parse_to" then
243 return drive
247 setmetatable(sexps.escaped, {
248 __index = {
249 around = function(range)
250 catchup(sexps.tree, range)
251 return node._around(sexps.escaped, range)
255 return {tree = sexps.tree, escaped = sexps.escaped, opposite = opposite}, d1, D2
258 return M