allow other line comment opening delimiters
[lisp-parkour.git] / parser / init.lua
blob2b5133287cc20c40dceaf2b68d4c25fae3799d4a
1 require'lpeg'
2 local l = lpeg
3 local P, S, R, V, C, Cb, Cc, Cg, Cmt, Cp, Ct = l.P, l.S, l.R, l.V, l.C, l.Cb, l.Cc, l.Cg, l.Cmt, l.Cp, l.Ct
5 local M = {}
7 local cwd = ...
9 local hspace = S" \t"
10 local newline = S"\r\n"
12 local function incr(index, offset) return index + offset - 1 end
13 local function m1(pos) return pos - 1 end
14 local function neg(pos) return -pos end
15 local function past(_, position, pos) return position <= pos end
16 local function startof(element) return element.start end
17 local function _start(patt) return Cg(patt, "start") end
18 local function _finish(patt) return Cg(patt, "finish") end
19 local function _string(patt) return Cg(patt / function() return true end, "is_string") end
20 local function _char(patt) return Cg(patt / function() return true end, "is_char") end
21 local function _comment(patt) return Cg(patt / function() return true end, "is_comment") end
22 local function _line_comment(patt) return Cg(patt / function() return true end, "is_line_comment") end
23 local function delimited_range(d)
24 return Cg(d, "d") * (P(1) - "\\" - d + P"\\" * 1)^0 * d
25 end
26 local function nested_pair(d1, d2)
27 return P{Cg(d1, "d") * (P(1) - d1 - d2 + V(1))^0 * d2}
28 end
30 local function purge(base, sexps)
31 if not base then return sexps end
32 for _, list in pairs(sexps) do
33 for i = #list, 1, -1 do
34 if list[i].start >= base then
35 table.remove(list)
36 else
37 break
38 end
39 end
40 end
41 return sexps
42 end
44 local function append(old, new)
45 for name, list in pairs(new) do
46 for _, element in ipairs(list) do
47 table.insert(old[name], element)
48 end
49 if old[name].is_root then
50 -- XXX: check the length first and then access [1].
51 -- Otherwise the autovivification of [1] in a file with only blanks
52 -- will trigger an infinite recursion.
53 -- TODO: get rid of this trick altoghether and instead make the parser set [1].indent.
54 local first = #old[name] > 0 and old[name][1]
55 -- XXX: this assumes that the first sexp starts at column 0.
56 -- if not true, _only_ the first sexp will have wrong indent.
57 if first and not first.indent then first.indent = 0 end
58 end
59 end
60 local last = #old.tree ~= 0 and old.tree[#old.tree]
61 old.tree.first_invalid = last and (last.finish or last.start) + 1 or 0
62 return old
63 end
65 local function make_driver(read, parse, before, sexps)
66 return function(advance)
67 local base
68 if sexps.tree.first_invalid then
69 local last_parsed = #sexps.tree ~= 0 and sexps.tree[#sexps.tree].finish + 1 or 0
70 if sexps.tree.first_invalid < last_parsed then
71 local nearest, n = before(sexps.tree, sexps.tree.first_invalid, startof)
72 local second_nearest = n and sexps.tree[n - 1]
73 local has_eol = second_nearest and second_nearest.is_line_comment
74 local nearest_finish = second_nearest and second_nearest.finish + (has_eol and 0 or 1)
75 local nearest_start = nearest and nearest.start
76 base = nearest_finish or nearest_start or sexps.tree.first_invalid
77 else
78 local last = sexps.tree[#sexps.tree]
79 local has_eol = last and last.is_line_comment
80 base = has_eol and last.finish or sexps.tree.first_invalid
81 end
82 end
83 local new
84 local chunk_size = 4 * 1024
85 local tries = 1
86 local content = ""
87 local from = base
88 local len = advance > 0 and advance - base + chunk_size or chunk_size
89 repeat
90 local chunk, further = read(from, len)
91 if not chunk then break end
92 content = content .. chunk
93 new = parse(content, advance, base)
94 from = from + len
95 tries = tries + 1
96 len = chunk_size * 2^(tries - 1)
97 until not new.tree.unbalanced and (advance > 0 or advance < 0 and #new.tree >= math.abs(advance))
98 or not further
99 return append(purge(base, sexps), new or {})
103 local function tag_next_node(t, i, pos)
104 local j = i
105 repeat
106 j = j + 1
107 until type(t[j]) == "table" or not t[j]
108 local nxt = t[j]
109 if pos >= 0 then
110 if nxt and not nxt.indent then
111 nxt.indent = nxt.start - pos - 1
113 else
114 nxt.section = true
118 local function make_parser(prefix, opposite, d1, d2, D1, D2, atom_node, list_node, quasilist_node)
119 local function atom_methodize(t)
120 return setmetatable(t, (t.d and not t.is_list) and quasilist_node or atom_node)
122 local function extract_breaks(t)
123 for i = #t, 1, -1 do
124 local node = t[i]
125 if type(node) == "number" then
126 tag_next_node(t, i, node)
127 table.remove(t, i)
128 elseif node.is_line_comment then
129 tag_next_node(t, i, node[1])
130 table.remove(node)
133 return t
136 return function(content, advance, offset)
137 local tree, esc = nil, {}
138 local I = Cp() * Cc(offset) / incr
139 local I1 = I / m1
140 local opening = _start(I) * Cg(prefix^1, "p")^-1 * d1
141 local closing = _finish(I) * d2
142 local lone_prefix = _start(I) * Cg(prefix^1, "p") * -#P(d1 + "|") * _finish(I1)
143 local unbalanced
144 local function debalance(t) unbalanced = true return t end
145 local function collect_escaped(sexp)
146 if sexp.is_string or sexp.is_comment or sexp.is_char then
147 table.insert(esc, sexp)
149 return sexp
151 local char = ("\\" * (P"alarm" + "backspace" + "delete" + "escape"
152 + "newline" + "null" + "return" + "space" + "tab")
153 + "\\x" * R("09", "AF", "af")^1
154 + "\\" * P(1)) * _char(P(true))
155 local dq_string = delimited_range('"') * _string(P(true))
156 local multi_esc = delimited_range("|") * _string(P(true))
157 local nline = I * newline
158 local blank = nline + hspace + "\f"
159 local lcd = opposite["\n"]
160 local lc_level = lcd * #-P(lcd) + P(lcd)^1 * hspace^-1
161 local line_comment = Cg(lc_level, "d") * (1 - nline)^0 * nline * _comment(P(true)) * _line_comment(P(true))
162 local block_comment = nested_pair("#|", "|#") * _comment(P(true))
163 local comment = opposite["#|"] and (line_comment + block_comment) or line_comment
164 local incomplete_comment = opposite["#|"] and (lc_level + "#|") or lc_level
165 local atom = Ct(_start(I) * (
166 comment
167 + Cg(prefix^1, "p")^-1 * (
168 dq_string
169 + char
170 + multi_esc
171 + (1 - D1 - D2 - blank - multi_esc - char - dq_string - comment - prefix)^1
172 )) * _finish(I1)) / atom_methodize / collect_escaped
173 + Ct(lone_prefix)
174 local list = P{Ct(opening * blank^0 * (atom + blank^1 + V(1))^0 * closing) *
175 Cc(list_node) / setmetatable / extract_breaks}
176 local lone_delimiter = Ct(_start(I) * (D1 + D2 + incomplete_comment) * _finish(I1)) / debalance
177 local section_break = I * "\f" / neg
178 local sexp = (section_break + blank)^0 * (list + atom + lone_delimiter)
179 if advance ~= 0 then
180 local top_level = advance < 0
181 and Ct(sexp^advance)
182 or Ct((Cmt(Cc(advance - offset), past) * sexp)^0)
183 tree = P{top_level / extract_breaks}:match(content)
184 tree.unbalanced = unbalanced
186 return {tree = tree, escaped = esc}
190 local opposite_fallback = {
191 __index = function(t, delimiter)
192 local opening = t["\n"]
193 return delimiter and delimiter:find("^" .. opening) and t[opening]
197 local function catchup(tree, range)
198 if range.finish and range.finish >= tree.first_invalid then
199 tree.parse_to(range.finish + 1)
203 function M.new(syntax, node, read)
204 local L = require(cwd.."."..syntax)
205 local opposite = setmetatable(L.opposite, opposite_fallback)
206 local sexps = {
207 tree = {
208 first_invalid = 0,
209 is_root = true,
210 }, -- (sorted) tree of sexps
211 escaped = {}, -- sorted list of strings and comments (redundant, for faster search)
213 local opening, closing = {}, {}
214 for o, c in pairs(opposite) do
215 if S"([{":match(o) then
216 table.insert(opening, o)
217 table.insert(closing, c)
220 local D1 = S(table.concat(opening))
221 local d1 = Cg(D1, "d")
222 local d2 = Cmt(Cb("d") * C(1), function(_, _, o, c) return opposite[o] == c end)
223 local D2 = S(table.concat(closing))
224 local atom_node, list_node, quasilist_node = node.atom, node.list, node.quasilist(opposite)
225 local parse = make_parser(L.prefix, opposite, d1, d2, D1, D2, atom_node, list_node, quasilist_node)
226 local drive = make_driver(read, parse, node._before, sexps)
227 local root_methods = node.root(opposite)
228 setmetatable(sexps.tree, {
229 __index = function(t, key)
230 if type(key) == "number" then
231 if #t < key then t.parse_to(#t - key) end
232 return rawget(t, key)
233 elseif root_methods[key] then
234 return root_methods[key](t)
235 elseif key == "parse_to" then
236 return drive
240 setmetatable(sexps.escaped, {
241 __index = {
242 around = function(range)
243 catchup(sexps.tree, range)
244 return node._around(sexps.escaped, range)
248 return {tree = sexps.tree, escaped = sexps.escaped, opposite = opposite}, d1, D2
251 return M