add copyright notice
[lisp-parkour.git] / parser / init.lua
blob0cadb90aabc1202e127a0ccd7ca20538328dada8
1 -- SPDX-License-Identifier: GPL-3.0-or-later
2 -- © 2020 Georgi Kirilov
4 require'lpeg'
5 local l = lpeg
6 local P, S, R, V, C, Cb, Cc, Cg, Cmt, Cp, Ct = l.P, l.S, l.R, l.V, l.C, l.Cb, l.Cc, l.Cg, l.Cmt, l.Cp, l.Ct
8 local M = {}
10 local cwd = ...
12 local hspace = S" \t"
13 local newline = S"\r\n"
15 local function incr(index, offset) return index + offset - 1 end
16 local function m1(pos) return pos - 1 end
17 local function neg(pos) return -pos end
18 local function past(_, position, pos) return position <= pos end
19 local function startof(element) return element.start end
20 local function _start(patt) return Cg(patt, "start") end
21 local function _finish(patt) return Cg(patt, "finish") end
22 local function _string(patt) return Cg(patt / function() return true end, "is_string") end
23 local function _char(patt) return Cg(patt / function() return true end, "is_char") end
24 local function _comment(patt) return Cg(patt / function() return true end, "is_comment") end
25 local function _line_comment(patt) return Cg(patt / function() return true end, "is_line_comment") end
26 local function delimited_range(d)
27 return Cg(d, "d") * (P(1) - "\\" - d + P"\\" * 1)^0 * d
28 end
29 local function nested_pair(d1, d2)
30 return P{Cg(d1, "d") * (P(1) - d1 - d2 + V(1))^0 * d2}
31 end
33 local function purge(base, sexps)
34 if not base then return sexps end
35 for _, list in pairs(sexps) do
36 for i = #list, 1, -1 do
37 if list[i].start >= base then
38 table.remove(list)
39 else
40 break
41 end
42 end
43 end
44 return sexps
45 end
47 local function append(old, new)
48 for name, list in pairs(new) do
49 for _, element in ipairs(list) do
50 table.insert(old[name], element)
51 end
52 if old[name].is_root then
53 -- XXX: check the length first and then access [1].
54 -- Otherwise the autovivification of [1] in a file with only blanks
55 -- will trigger an infinite recursion.
56 -- TODO: get rid of this trick altogether and instead make the parser set [1].indent.
57 local first = #old[name] > 0 and old[name][1]
58 -- XXX: this assumes that the first sexp starts at column 0.
59 -- if not true, _only_ the first sexp will have wrong indent.
60 if first and not first.indent then first.indent = 0 end
61 end
62 end
63 local last = #old.tree ~= 0 and old.tree[#old.tree]
64 old.tree.first_invalid = last and (last.finish or last.start) + 1 or 0
65 return old
66 end
68 local function make_driver(read, parse, before, sexps)
69 return function(advance)
70 local base
71 if sexps.tree.first_invalid then
72 local last_parsed = #sexps.tree ~= 0 and sexps.tree[#sexps.tree].finish + 1 or 0
73 if sexps.tree.first_invalid < last_parsed then
74 local nearest, n = before(sexps.tree, sexps.tree.first_invalid, startof)
75 local second_nearest = n and sexps.tree[n - 1]
76 local has_eol = second_nearest and second_nearest.is_line_comment
77 local nearest_finish = second_nearest and second_nearest.finish + (has_eol and 0 or 1)
78 local nearest_start = nearest and nearest.start
79 base = nearest_finish or nearest_start or sexps.tree.first_invalid
80 else
81 local last = sexps.tree[#sexps.tree]
82 local has_eol = last and last.is_line_comment
83 base = has_eol and last.finish or sexps.tree.first_invalid
84 end
85 end
86 local new
87 local chunk_size = 4 * 1024
88 local tries = 1
89 local content = ""
90 local from = base
91 local len = advance > 0 and advance - base + chunk_size or chunk_size
92 repeat
93 local chunk, further = read(from, len)
94 if not chunk then break end
95 content = content .. chunk
96 new = parse(content, advance, base)
97 from = from + len
98 tries = tries + 1
99 len = chunk_size * 2^(tries - 1)
100 until not new.tree.unbalanced and (advance > 0 or advance < 0 and #new.tree >= math.abs(advance))
101 or not further
102 return append(purge(base, sexps), new or {})
106 local function tag_next_node(t, i, pos)
107 local j = i
108 repeat
109 j = j + 1
110 until type(t[j]) == "table" or not t[j]
111 local nxt = t[j]
112 if pos >= 0 then
113 if nxt and not nxt.indent then
114 nxt.indent = nxt.start - pos - 1
116 else
117 nxt.section = true
121 local function make_parser(prefix, weak_prefix, opposite, d1, d2, D1, D2, atom_node, list_node, quasilist_node)
122 local function atom_methodify(t)
123 return setmetatable(t, (t.d and not t.is_list) and quasilist_node or atom_node)
125 local function extract_breaks(t)
126 for i = #t, 1, -1 do
127 local node = t[i]
128 if type(node) == "number" then
129 tag_next_node(t, i, node)
130 table.remove(t, i)
131 elseif node.is_line_comment then
132 tag_next_node(t, i, node[1])
133 table.remove(node)
136 return t
139 return function(content, advance, offset)
140 local tree, esc = nil, {}
141 local I = Cp() * Cc(offset) / incr
142 local I1 = I / m1
143 local opening = _start(I) * Cg(prefix^1, "p")^-1 * d1
144 local closing = _finish(I) * d2
145 local lone_prefix = _start(I) * Cg(prefix^1, "p") * -#P(d1 + "|") * _finish(I1)
146 local unbalanced
147 local function debalance(t) unbalanced = true return t end
148 local function collect_escaped(sexp)
149 if sexp.is_string or sexp.is_comment or sexp.is_char then
150 table.insert(esc, sexp)
152 return sexp
154 local char = ("\\" * (P"alarm" + "backspace" + "delete" + "escape"
155 + "newline" + "null" + "return" + "space" + "tab")
156 + "\\x" * R("09", "AF", "af")^1
157 + "\\" * P(1)) * _char(P(true))
158 local dq_string, multi_esc = delimited_range('"'), delimited_range("|")
159 local stringy = (opposite["|"] and (multi_esc + dq_string) or dq_string) * _string(P(true))
160 local nline = I * newline
161 local blank = nline + hspace + "\f"
162 local lcd = opposite["\n"]
163 local lc_level = lcd * #-P(lcd) + P(lcd)^1 * hspace^-1
164 local line_comment = Cg(lc_level, "d") * (1 - nline)^0 * nline * _comment(P(true)) * _line_comment(P(true))
165 local bcd1, bcd2
166 for o, c in pairs(opposite) do
167 -- XXX: here I assume that #| is the only multi-char delimiter,
168 -- and that it means block comment
169 if #o > 1 then
170 bcd1, bcd2 = o, c
171 break
174 local block_comment = bcd1 and bcd2 and nested_pair(bcd1, bcd2) * _comment(P(true))
175 local comment = block_comment and (block_comment + line_comment) or line_comment
176 local incomplete_comment = bcd1 and (lc_level + bcd1) or lc_level
177 local symbol = weak_prefix
178 and (1 - D1 - D2 - blank - char - stringy - comment - prefix + weak_prefix)^1
179 or (1 - D1 - D2 - blank - char - stringy - comment - prefix)^1
180 local atom = Ct(_start(I) * (
181 comment
182 + Cg(prefix^1, "p")^-1 * (
183 stringy
184 + char
185 + symbol
186 )) * _finish(I1)) / atom_methodify / collect_escaped
187 + Ct(lone_prefix)
188 local list = P{Ct(opening * blank^0 * (atom + blank^1 + V(1))^0 * closing) *
189 Cc(list_node) / setmetatable / extract_breaks}
190 local lone_delimiter = Ct(_start(I) * (D1 + D2 + incomplete_comment) * _finish(I1)) / debalance
191 local section_break = I * "\f" / neg
192 local sexp = (section_break + blank)^0 * (list + atom + lone_delimiter)
193 if advance ~= 0 then
194 local top_level = Ct(advance < 0
195 and sexp^advance
196 or (Cmt(Cc(advance - offset), past) * sexp)^0)
197 tree = P{top_level / extract_breaks}:match(content)
198 tree.unbalanced = unbalanced
200 return {tree = tree, escaped = esc}
204 local opposite_fallback = {
205 __index = function(t, delimiter)
206 local opening = t["\n"]
207 return delimiter and delimiter:find("^" .. opening) and t[opening]
211 local function catchup(tree, range)
212 if range.finish and range.finish >= tree.first_invalid then
213 tree.parse_to(range.finish + 1)
217 function M.new(syntax, node, read)
218 local L = require(cwd.."."..syntax)
219 local opposite = setmetatable(L.opposite, opposite_fallback)
220 local sexps = {
221 tree = {
222 first_invalid = 0,
223 is_root = true,
224 }, -- (sorted) tree of sexps
225 escaped = {}, -- sorted list of strings and comments (redundant, for faster search)
227 local opening, closing, reverse = {}, {}, {}
228 for o, c in pairs(opposite) do
229 if S"([{":match(o) then
230 table.insert(opening, o)
231 table.insert(closing, c)
233 if c ~= o and #c == 1 then -- XXX: don't reverse block comments and strings
234 reverse[c] = o
237 for c, o in pairs(reverse) do
238 opposite[c] = o
240 local D1 = S(table.concat(opening))
241 local d1 = Cg(D1, "d")
242 local d2 = Cmt(Cb("d") * C(1), function(_, _, o, c) return opposite[o] == c end)
243 local D2 = S(table.concat(closing))
244 local atom_node, list_node, quasilist_node = node.atom, node.list, node.quasilist(opposite)
245 local parse = make_parser(L.prefix, L.weak_prefix, opposite, d1, d2, D1, D2, atom_node, list_node, quasilist_node)
246 local drive = make_driver(read, parse, node._before, sexps)
247 local root_methods = node.root(opposite)
248 setmetatable(sexps.tree, {
249 __index = function(t, key)
250 if type(key) == "number" then
251 if #t < key then t.parse_to(#t - key) end
252 return rawget(t, key)
253 elseif root_methods[key] then
254 return root_methods[key](t)
255 elseif key == "parse_to" then
256 return drive
260 setmetatable(sexps.escaped, {
261 __index = {
262 around = function(range)
263 catchup(sexps.tree, range)
264 return node._around(sexps.escaped, range)
268 return {tree = sexps.tree, escaped = sexps.escaped, opposite = opposite}, d1, D2
271 return M