fennel: add single-quote prefix
[lisp-parkour.git] / parser / init.lua
blob3e41a35f40a1dca0e5865a8fc1d90e8c3f9af910
1 require'lpeg'
2 local l = lpeg
3 local P, S, R, V, C, Cb, Cc, Cg, Cmt, Cp, Ct = l.P, l.S, l.R, l.V, l.C, l.Cb, l.Cc, l.Cg, l.Cmt, l.Cp, l.Ct
5 local M = {}
7 local cwd = ...
9 local hspace = S" \t"
10 local newline = S('\r\n')
12 local function incr(index, offset) return index + offset - 1 end
13 local function m1(pos) return pos - 1 end
14 local function neg(pos) return -pos end
15 local function past(_, position, pos) return position <= pos end
16 local function startof(element) return element.start end
17 local function _start(patt) return Cg(patt, "start") end
18 local function _finish(patt) return Cg(patt, "finish") end
19 local function _string(patt) return Cg(patt / function() return true end, "is_string") end
20 local function _char(patt) return Cg(patt / function() return true end, "is_char") end
21 local function _comment(patt) return Cg(patt / function() return true end, "is_comment") end
22 local function delimited_range(d)
23 return Cg(d, "d") * (P(1) - "\\" - d + P"\\" * 1)^0 * d
24 end
25 local function nested_pair(d1, d2)
26 return P{Cg(d1, "d") * (P(1) - d1 - d2 + V(1))^0 * d2}
27 end
29 local function purge(base, sexps)
30 if not base then return sexps end
31 for _, list in pairs(sexps) do
32 for i = #list, 1, -1 do
33 if list[i].start >= base then
34 table.remove(list)
35 else
36 break
37 end
38 end
39 end
40 return sexps
41 end
43 local function append(old, new)
44 for name, list in pairs(new) do
45 for _, element in ipairs(list) do
46 table.insert(old[name], element)
47 end
48 if old[name].is_root then
49 -- XXX: check the length first and then access [1].
50 -- Otherwise the autovivification of [1] in a file with only blanks
51 -- will trigger an infinite recursion.
52 -- TODO: get rid of this trick altoghether and instead make the parser set [1].indent.
53 local first = #old[name] > 0 and old[name][1]
54 -- XXX: this assumes that the first sexp starts at column 0.
55 -- if not true, _only_ the first sexp will have wrong indent.
56 if first and not first.indent then first.indent = 0 end
57 end
58 end
59 local last = #old.tree ~= 0 and old.tree[#old.tree]
60 old.tree.first_invalid = last and (last.finish or last.start) + 1 or 0
61 return old
62 end
64 local function make_driver(read, parse, before, sexps)
65 return function(advance)
66 local base
67 if sexps.tree.first_invalid then
68 local last_parsed = #sexps.tree ~= 0 and sexps.tree[#sexps.tree].finish + 1 or 0
69 if sexps.tree.first_invalid < last_parsed then
70 local nearest, n = before(sexps.tree, sexps.tree.first_invalid, startof)
71 local second_nearest = n and sexps.tree[n - 1]
72 local has_eol = second_nearest and second_nearest.is_comment and second_nearest.d:find"^;"
73 local nearest_finish = second_nearest and second_nearest.finish + (has_eol and 0 or 1)
74 local nearest_start = nearest and nearest.start
75 base = nearest_finish or nearest_start or sexps.tree.first_invalid
76 else
77 local last = sexps.tree[#sexps.tree]
78 local has_eol = last and last.is_comment and last.d:find"^;"
79 base = has_eol and last.finish or sexps.tree.first_invalid
80 end
81 end
82 local new
83 local chunk_size = 4 * 1024
84 local tries = 1
85 local content = ""
86 local from = base
87 local len = advance > 0 and advance - base + chunk_size or chunk_size
88 repeat
89 local chunk, further = read(from, len)
90 if not chunk then break end
91 content = content .. chunk
92 new = parse(content, advance, base)
93 from = from + len
94 tries = tries + 1
95 len = chunk_size * 2^(tries - 1)
96 until not new.tree.unbalanced and (advance > 0 or advance < 0 and #new.tree >= math.abs(advance))
97 or not further
98 return append(purge(base, sexps), new or {})
99 end
102 local function tag_next_node(t, i, pos)
103 local j = i
104 repeat
105 j = j + 1
106 until type(t[j]) == "table" or not t[j]
107 local nxt = t[j]
108 if pos >= 0 then
109 if nxt and not nxt.indent then
110 nxt.indent = nxt.start - pos - 1
112 else
113 nxt.section = true
117 local function make_parser(prefix, opposite, d1, d2, D1, D2, atom_node, list_node, quasilist_node)
118 local function atom_methodize(t)
119 return setmetatable(t, (t.d and not t.is_list) and quasilist_node or atom_node)
121 local function extract_breaks(t)
122 for i = #t, 1, -1 do
123 local node = t[i]
124 if type(node) == "number" then
125 tag_next_node(t, i, node)
126 table.remove(t, i)
127 elseif node.is_comment and opposite[node.d] == "\n" then
128 tag_next_node(t, i, node[1])
129 table.remove(node)
132 return t
135 return function(content, advance, offset)
136 local tree, esc = nil, {}
137 local I = Cp() * Cc(offset) / incr
138 local I1 = I / m1
139 local opening = _start(I) * Cg(prefix^1, "p")^-1 * d1
140 local closing = _finish(I) * d2
141 local lone_prefix = _start(I) * Cg(prefix^1, "p") * -#P(d1 + "|") * _finish(I1)
142 local unbalanced
143 local function debalance(t) unbalanced = true return t end
144 local function collect_escaped(sexp)
145 if sexp.is_string or sexp.is_comment or sexp.is_char then
146 table.insert(esc, sexp)
148 return sexp
150 local char = ('\\' * (P'alarm' + 'backspace' + 'delete' + 'escape'
151 + 'newline' + 'null' + 'return' + 'space' + 'tab')
152 + '\\x' * R('09', 'AF', 'af')^1
153 + '\\' * P(1)) * _char(P(true))
154 local dq_string = delimited_range('"') * _string(P(true))
155 local multi_esc = delimited_range("|") * _string(P(true))
156 local nline = I * newline
157 local blank = nline + hspace + "\f"
158 local semicolons = ';' * #-P';' + P';'^1 * hspace^-1
159 local line_comment = Cg(semicolons, "d") * (1 - nline)^0 * nline * _comment(P(true))
160 local block_comment = nested_pair("#|", "|#") * _comment(P(true))
161 local comment = opposite["#|"] and (line_comment + block_comment) or line_comment
162 local incomplete_comment = opposite["#|"] and (semicolons + "#|") or semicolons
163 local atom = Ct(_start(I) * (
164 comment
165 + Cg(prefix^1, "p")^-1 * (
166 dq_string
167 + char
168 + multi_esc
169 + (1 - D1 - D2 - blank - multi_esc - char - dq_string - comment - prefix)^1
170 )) * _finish(I1)) / atom_methodize / collect_escaped
171 + Ct(lone_prefix)
172 local list = P{Ct(opening * blank^0 * (atom + blank^1 + V(1))^0 * closing) *
173 Cc(list_node) / setmetatable / extract_breaks}
174 local lone_delimiter = Ct(_start(I) * (D1 + D2 + incomplete_comment) * _finish(I1)) / debalance
175 local section_break = I * "\f" / neg
176 local sexp = (section_break + blank)^0 * (list + atom + lone_delimiter)
177 if advance ~= 0 then
178 local top_level = advance < 0
179 and Ct(sexp^advance)
180 or Ct((Cmt(Cc(advance - offset), past) * sexp)^0)
181 tree = P{top_level / extract_breaks}:match(content)
182 tree.unbalanced = unbalanced
184 return {tree = tree, escaped = esc}
188 local opposite_fallback = {
189 __index = function(t, delimiter)
190 return delimiter and delimiter:find("^;") and t[";"]
194 local function catchup(tree, range)
195 if range.finish and range.finish >= tree.first_invalid then
196 tree.parse_to(range.finish + 1)
200 function M.new(syntax, node, read)
201 local L = require(cwd..'.'..syntax)
202 local opposite = setmetatable(L.opposite, opposite_fallback)
203 local sexps = {
204 tree = {
205 first_invalid = 0,
206 is_root = true,
207 }, -- (sorted) tree of sexps
208 escaped = {}, -- sorted list of strings and comments (redundant, for faster search)
210 local opening, closing = {}, {}
211 for o, c in pairs(opposite) do
212 if S"([{":match(o) then
213 table.insert(opening, o)
214 table.insert(closing, c)
217 local D1 = S(table.concat(opening))
218 local d1 = Cg(D1, "d")
219 local d2 = Cmt(Cb("d") * C(1), function(_, _, o, c) return opposite[o] == c end)
220 local D2 = S(table.concat(closing))
221 local atom_node, list_node, quasilist_node = node.atom, node.list, node.quasilist(opposite)
222 local parse = make_parser(L.prefix, opposite, d1, d2, D1, D2, atom_node, list_node, quasilist_node)
223 local drive = make_driver(read, parse, node._before, sexps)
224 local root_methods = node.root(opposite)
225 setmetatable(sexps.tree, {
226 __index = function(t, key)
227 if type(key) == "number" then
228 if #t < key then t.parse_to(#t - key) end
229 return rawget(t, key)
230 elseif root_methods[key] then
231 return root_methods[key](t)
232 elseif key == "parse_to" then
233 return drive
237 setmetatable(sexps.escaped, {
238 __index = {
239 around = function(range)
240 catchup(sexps.tree, range)
241 return node._around(sexps.escaped, range)
245 return {tree = sexps.tree, escaped = sexps.escaped, opposite = opposite}, d1, D2
248 return M