minimize diffs between keythemes
[lisp-parkour.git] / parser.lua
blobe35a0db556345aa31bfed8b927b56972d2fbd13a
1 require'lpeg'
2 local l = lpeg
3 local P, S, R, V, C, Cb, Cc, Cg, Cmt, Cp, Ct = l.P, l.S, l.R, l.V, l.C, l.Cb, l.Cc, l.Cg, l.Cmt, l.Cp, l.Ct
5 local M = {}
7 local hspace = S" \t"
8 local blank = S"\n\v\f" + hspace
9 local newline = S('\r\n\f')^1
11 local function gt(pos, val)
12 return val and pos > val
13 end
15 local function le(pos, val)
16 return val and pos <= val
17 end
19 -- binary search a list for the nearest node before pos
20 local function before(t, pos, key, skip)
21 local left, right = 1, #t
22 while left <= right do
23 local m = math.floor(left + (right - left) / 2)
24 local m_val = key(t[m])
25 if gt(pos, m_val) and (m == #t or le(pos, key(t[m + 1]))) then
26 if skip then
27 while (t[m] and skip(t[m])) do
28 m = m - 1
29 end
30 end
31 return t[m], m
32 end
33 if le(pos, m_val) then right = m - 1 else left = m + 1 end
34 end
35 end
37 -- binary search a list for the nearest node after pos
38 local function after(t, pos, key, skip)
39 if t.is_root and (#t == 0 or pos >= t[#t].start) then
40 -- XXX: if we are at the last parsed top-level node, try to access the next one
41 -- to get it parsed as well. Otherwise the search will stop prematurely.
42 local _ = t[#t + 1]
43 end
44 local left, right = 1, #t
45 while left <= right do
46 local m = math.floor(left + (right - left) / 2)
47 local m_val = key(t[m])
48 if le(pos, m_val) and (m == 1 or gt(pos, key(t[m - 1]))) then
49 if skip then
50 while (t[m] and skip(t[m])) do
51 m = m + 1
52 end
53 end
54 return t[m], m
55 end
56 if le(pos, m_val) then right = m - 1 else left = m + 1 end
57 end
58 end
60 -- binary search a list for the node that contains pos
61 local function around(t, range)
62 if not (range.start and range.finish) then return end
63 local left, right = 1, #t
64 while left <= right do
65 local m = math.floor(left + (right - left) / 2)
66 local e = t[m]
67 local nxt = t[m + 1]
68 if e.start and range.start >= e.start and e.finish and range.finish <= e.finish + 1 and
69 (not nxt or nxt.start > range.start) -- if adjoined - asdf|(qwer) - this will prefer (qwer)
70 then return e, m end
71 if e.start and range.start < e.start then right = m - 1 else left = m + 1 end
72 end
73 end
75 local function incr(index, offset) return index + offset - 1 end
76 local function m1(pos) return pos - 1 end
77 local function past(_, position, pos) return position <= pos end
78 local function startof(element) return element.start end
79 local function finishof(element) return element.finish end
80 local function _start(patt) return Cg(patt, "start") end
81 local function _finish(patt) return Cg(patt, "finish") end
82 local bof_or_bol = (hspace^0 * P"\n"^1)^1 + Cmt("", function(_, pos) return pos == 1 end)
83 local function _indent(I)
84 return Cg(bof_or_bol * I * P"\f", "section")^0 *
85 Cg(bof_or_bol * C(hspace^0) / function(indent) return #indent end, "indent")^-1
86 end
87 local function _string(patt) return Cg(patt / function() return true end, "is_string") end
88 local function _char(patt) return Cg(patt / function() return true end, "is_char") end
89 local function _comment(patt) return Cg(patt / function() return true end, "is_comment") end
90 local function delimited_range(d)
91 return Cg(d, "d") * (P(1) - "\\" - d + P"\\" * 1)^0 * d
92 end
93 local function nested_pair(d1, d2)
94 return P{Cg(d1, "d") * (P(1) - d1 - d2 + V(1))^0 * d2}
95 end
98 local function find_after(t, selection, pred, stop)
99 local _, n = t.after(selection.start, startof)
100 while t[n] and not pred(t[n]) do
101 if stop and stop(t[n]) then return end
102 n = n + 1
104 return t[n], n
107 local function find_before(t, selection, pred, stop)
108 local _, n = t.before(selection.finish, finishof)
109 while t[n] and not pred(t[n]) do
110 if stop and stop(t[n]) then return end
111 n = n - 1
113 return t[n], n
116 local function intersects(t)
117 return function(range)
118 local start = t.start + (t.p and #t.p or 0)
119 return start >= range.start and t.finish >= range.finish and start < range.finish
120 or start < range.start and t.finish < range.finish and range.start <= t.finish
124 local atom_methods = {
125 contains = function(t) return function(range) return t.start <= range.start and t.finish >= range.finish - 1 end end,
126 touches = function(t) return function(range) return t.start == range.start or t.finish == range.finish - 1 end end,
127 intersects = intersects,
130 local list_methods = {
131 is_list = function(_) return true end,
132 is_empty = function(t) return #t == 0 end,
133 contains = function(t)
134 return function(range)
135 return t.start + (t.p and #t.p or 0) < range.start and t.finish > range.finish - 1
137 end,
138 touches = function(t)
139 return function(range)
140 return t.start + (t.p and #t.p or 0) >= range.start and t.start <= range.start or
141 t.finish == range.finish - 1
143 end,
144 overlaps = function(t) return function(range)
145 return t.start == range.start and t.finish > range.finish - 1
146 or t.start < range.start and t.finish == range.finish - 1 end
147 end,
148 intersects = intersects,
149 around = function(t)
150 return function(range)
151 return around(t, range)
153 end,
154 before = function(t)
155 return function(pos, key, skip)
156 return before(t, pos, key, skip)
158 end,
159 after = function(t)
160 return function(pos, key, skip)
161 return after(t, pos, key, skip)
163 end,
164 find_after = function(t)
165 return function(range, pred, stop)
166 return find_after(t, range, pred, stop)
168 end,
169 find_before = function(t)
170 return function(range, pred, stop)
171 return find_before(t, range, pred, stop)
173 end,
176 local escaped_methods = {}
177 local pseudo_methods = {}
179 local function sexp_around(t, range)
180 if t.is_root or (t.is_list and t.contains(range)) then
181 local child, nth = t.around(range)
182 if child and child.is_list and not child.touches(range) then
183 return sexp_around(child, range)
184 else
185 return child, t, nth
190 local function find_before_innermost(t, range, pred)
191 if t.is_root or (t.d and t.start < range.start) then
192 if t.d and not t.is_list then
193 local newpos = t.start_before(range.start)
194 if newpos then
195 local word = t.word_at({start = newpos, finish = newpos})
196 if word and pred(word) then
197 return word
200 return
202 local _, nearest_n = t.before(range.start, startof)
203 if nearest_n then
204 for n = nearest_n, 1, -1 do
205 local child = t[n]
206 if child.d and not child.is_empty then
207 local ret = find_before_innermost(child, range, pred)
208 if ret then
209 return ret
211 elseif pred(child, n, #t) then
212 return child, n, #t
216 if not t.is_root and pred(t) then
217 return t
222 local function find_after_innermost(t, range, pred)
223 if t.is_root or (t.d and t.finish >= range.finish) then
224 if t.d and not t.is_list then
225 local newpos = t.finish_after(math.max(t.start + #t.d, range.finish))
226 if newpos then
227 local word = t.word_at({start = newpos, finish = newpos})
228 if word and pred(word) then
229 return word
232 return
234 local _, nearest_n = t.after(range.finish, finishof)
235 if nearest_n then
236 for n = nearest_n, #t do
237 local child = t[n]
238 if child.d and not child.is_empty then
239 local ret = find_after_innermost(child, range, pred)
240 if ret then
241 return ret
243 elseif pred(child, n, #t) then
244 return child, n, #t
248 if not t.is_root and pred(t) then
249 return t
254 -- save the current logical position in terms of sexps, not offsets
255 local function sexp_path(t, range, floors)
256 if t.is_root or (t.is_list and t.contains(range)) then
257 local child, nth = t.around(range)
258 table.insert(floors, nth)
259 if child and child.is_list and not child.touches(range) then
260 sexp_path(child, range, floors)
263 return floors
266 -- find a sexp by pattern-matching a saved "sexp path"
267 -- inspired by Interlisp "location specifications", but this here is very rudimentary in comparison
268 -- currently only used for restoring the cursor position after an aggressive reindent
269 local function goto_path(root, path)
270 local dest = root
271 local parent
272 for n, i in ipairs(path) do
273 dest = dest[i]
274 parent = not parent and root or parent[path[n - 1]]
276 return dest, parent
279 local function catchup(tree, range)
280 if range.finish and range.finish >= tree.first_invalid then
281 tree.parse_to(range.finish + 1)
285 local root_methods = {
286 around = function(t)
287 return function(range)
288 catchup(t, range)
289 return around(t, range)
291 end,
292 before = function(t)
293 return function(pos, key, skip)
294 catchup(t, {finish = pos})
295 return before(t, pos, key, skip)
297 end,
298 after = function(t)
299 return function(pos, key, skip)
300 catchup(t, {finish = pos})
301 return after(t, pos, key, skip)
303 end,
304 find_after = function(t)
305 return function(range, pred, stop)
306 catchup(t, range)
307 return find_after(t, range, pred, stop)
309 end,
310 find_before = function(t)
311 return function(range, pred, stop)
312 return find_before(t, range, pred, stop)
314 end,
315 rewind = function(t)
316 return function(index)
317 assert(index <= t.first_invalid, ("%d > %d - You can only rewind backwards!"):format(index, t.first_invalid))
318 t.first_invalid = index
320 end,
321 is_parsed = function(t)
322 return function(index)
323 return index < t.first_invalid
325 end,
326 sexp_at = function(t)
327 return function(range)
328 catchup(t, range)
329 return sexp_around(t, range)
331 end,
332 paragraph_at = function(t)
333 return function(range)
334 catchup(t, range)
335 local sexp, n = t.around(range)
336 return sexp, t, n
338 end,
339 find_before_innermost = function(t)
340 return function(range, pred)
341 catchup(t, range)
342 return find_before_innermost(t, range, pred)
344 end,
345 find_after_innermost = function(t)
346 return function(range, pred)
347 catchup(t, range)
348 return find_after_innermost(t, range, pred)
350 end,
351 sexp_path = function(t)
352 return function(range)
353 catchup(t, range)
354 return sexp_path(t, range, {})
356 end,
357 goto_path = function(t)
358 return function(path)
359 local range = {start = t[path[1]].finish, finish = t[path[1]].finish}
360 catchup(t, range)
361 return goto_path(t, path)
363 end,
366 local function dispatch(vtable)
367 return function(self, key)
368 if vtable[key] then
369 return vtable[key](self)
374 local atom_node = {
375 __index = dispatch(atom_methods)
378 local list_node = {
379 __index = dispatch(list_methods)
382 local escaped_node = {
383 __index = dispatch(escaped_methods)
386 local pseudo_node = {
387 __index = dispatch(pseudo_methods)
390 local function at_pos(_, position, start, finish, range)
391 if range.start + 1 >= start and range.start < finish and range.finish < finish then
392 return position, start - 1, finish - 1
396 local function purge(base, lists)
397 if not base then return lists end
398 for _, list in pairs(lists) do
399 for i = #list, 1, -1 do
400 if list[i].start >= base then
401 table.remove(list)
402 else
403 break
407 return lists
410 local function append(old, new)
411 for name, list in pairs(new) do
412 for _, element in ipairs(list) do
413 table.insert(old[name], element)
416 local last = #old.tree ~= 0 and old.tree[#old.tree]
417 old.tree.first_invalid = last and (last.finish or last.start) + 1 or 0
418 return old
421 local function make_driver(read, parse, lists)
422 return function(advance)
423 local base
424 if lists.tree.first_invalid then
425 local last_parsed = #lists.tree ~= 0 and lists.tree[#lists.tree].finish + 1 or 0
426 if lists.tree.first_invalid < last_parsed then
427 local nearest = before(lists.tree, lists.tree.first_invalid, startof)
428 base = nearest and nearest.start or lists.tree.first_invalid
429 else
430 base = lists.tree.first_invalid
433 local new
434 local chunk_size = 42 * 1024
435 local tries = 1
436 local content = ""
437 local from = base
438 local len = advance > 0 and advance - base + chunk_size or chunk_size
439 repeat
440 local chunk, further = read(from, len)
441 if not chunk then break end
442 content = content .. chunk
443 new = parse(content, advance, base)
444 from = from + len
445 tries = tries + 1
446 len = chunk_size * 2^(tries - 1)
447 until not new.tree.unbalanced and (advance > 0 or advance < 0 and #new.tree >= math.abs(advance))
448 or not further
449 return append(purge(base, lists), new or {})
453 local function make_parser(lispwords, prefix, opposite, xatom, word, match, d1, d2, D1, D2, read)
454 list_methods.last_distinguished = function(t)
455 return t[1] and lispwords[t[1].text]
457 local function text(t)
458 local len = t.finish + 1 - t.start
459 return read(t.start, len)
461 atom_methods.text = text
462 list_methods.text = text
463 pseudo_methods.text = text
464 function escaped_methods.start_before(t)
465 return function(pos)
466 pos = pos - (t.start + #t.d)
467 local stops = P{Ct(((1 - word)^0 * Cp() * Cmt(Cc(pos), past) * word)^0)}:match(t.itext)
468 local newpos = stops and stops[#stops]
469 return newpos and (t.start + #t.d + newpos - 1)
472 function escaped_methods.start_after(t)
473 return function(pos)
474 pos = pos - (t.start + #t.d)
475 local stop = P{(1 - word) * Cp() * word + 1 * V(1)}:match(t.itext, pos + 1)
476 return stop and (t.start + #t.d + stop - 1)
479 function escaped_methods.finish_before(t)
480 return function(pos)
481 pos = pos - (t.start + #t.d)
482 local stops = P{Ct(((1 - word)^0 * word * Cp() * Cmt(Cc(pos + 1), past))^0)}:match(t.itext)
483 local newpos = stops and stops[#stops]
484 return newpos and (t.start + #t.d + newpos - 1)
487 function escaped_methods.finish_after(t)
488 return function(pos)
489 pos = pos - (t.start + #t.d)
490 local stop = P{word * Cp() + 1 * V(1)}:match(t.itext, pos + 1)
491 return stop and (t.start + #t.d + stop - 1)
494 function escaped_methods.word_at(t)
495 return function(range)
496 local r = {}
497 r.start = range.start - (t.start + #t.d)
498 r.finish = range.finish - (t.start + #t.d)
499 local start, finish = P{Cmt(Cp() * word * Cp() * Cc(r), at_pos) +
500 Cmt(1 * Cc(r.finish + 1), past) * V(1)}:match(t.itext)
501 if not start then return end
502 local node = {start = t.start + #t.d + start, finish = t.start + #t.d + finish - 1}
503 return setmetatable(node, pseudo_node)
506 function escaped_methods.is_empty(t)
507 return not t.finish_after(t.start + #t.d)
509 for k, v in pairs(atom_methods) do
510 escaped_methods[k] = v
512 function escaped_methods.itext(t)
513 local len = t.finish + 1 - #match(t.d) - t.start - #t.d
514 return read(t.start + #t.d, len) or ""
516 local function atom_methodize(t)
517 return setmetatable(t, (t.is_string or t.is_comment) and escaped_node or atom_node)
519 return function(content, advance, offset)
520 local tree, esc = nil, {}
521 local I = Cp() * Cc(offset) / incr
522 local I1 = I / m1
523 local opening = _start(I) * Cg(prefix^1, "p")^-1 * d1
524 local closing = _finish(I) * d2
525 local lone_prefix = _indent(I) * _start(I) * Cg(prefix^1, "p") * -#P(d1) * _finish(I1)
526 local unbalanced
527 local function debalance(t) unbalanced = true return t end
528 local function collect_escaped(sexp)
529 if sexp.is_comment and match(sexp.d):find("^\n") then
530 -- XYZZY: Poof! You are in the parser room. There is a scroll here:
531 -- The pattern for line comments deliberately doesn't consume the newline
532 -- *during parsing*, so the parser gets to tag the next sexp with .indent.
533 -- The range is extended here afterwards, though, so when you are at the end
534 -- of a line comment, the editor knows that you are inside it and won't try
535 -- to apply behaviour that is meant for code.
536 sexp.finish = sexp.finish + 1
538 if sexp.is_string or sexp.is_comment then
539 table.insert(esc, sexp)
541 return sexp
543 local char = ('#\\' * (P'alarm' + 'backspace' + 'delete' + 'escape'
544 + 'newline' + 'null' + 'return' + 'space' + 'tab')
545 + '#\\x' * R('09', 'AF', 'af')^1
546 + '#\\' * P(1)) * _char(P(true))
547 local dq_string = delimited_range('"') * _string(P(true))
548 local multi_esc = delimited_range("|") * _string(P(true))
549 local line_comment = Cg(';' * #-P';' + P';'^1 * hspace^-1, "d") * (1 - newline)^0 * _comment(P(true))
550 local block_comment = nested_pair("#|", "|#") * _comment(P(true))
551 local comment = opposite["#|"] and (line_comment + block_comment) or line_comment
552 local atom = Ct(_indent(I) * _start(I) * (
553 comment
554 + dq_string
555 + char
556 + multi_esc -- before xatom, since the latter matches too
557 + xatom(Cg(prefix^1, "p")^-1)
558 ) * _finish(I1)) / atom_methodize / collect_escaped + Ct(lone_prefix)
559 local list = P{Ct(_indent(I) * opening * (atom + hspace^1 + V(1))^0 * blank^0 * closing) *
560 Cc(list_node) / setmetatable}
561 -- XXX: semicolon (for line comments) should be listed below, too, but since I don't consume the newline
562 -- (the XYZZY trick), I can't tell if a line comment was not completely read due to insufficient chunk length.
563 -- So, don't set chunk_size below the size of the longest line comment probable.
564 local lone_paren = Ct(_indent(I) * _start(I) * (D1 + D2) * _finish(I1)) / debalance
565 local sexp = hspace^0 * (list + atom) + lone_paren
566 if advance < 0 then
567 tree = Ct(sexp^advance):match(content)
568 elseif advance > 0 then
569 tree = Ct((Cmt(Cc(advance - offset), past) * sexp)^0):match(content)
571 tree.unbalanced = unbalanced
572 return {tree = tree, escaped = esc}
576 function M.new(syntax, read)
577 local L = require('parkour.dialect.'..syntax)
578 local lispwords, prefix, opposite, xatom, word = L.lispwords, L.prefix, L.opposite, L.atom, L.word
579 local function match(delimiter)
580 return opposite[delimiter] or delimiter:find("^;") and opposite[";"]
582 local function unbalanced_delimiters(t, range)
583 if t.d or t.is_root then
584 for i = 1, #t do
585 if (t[i].d) and (t[i].contains(range) or t[i].intersects(range)) then
586 unbalanced_delimiters(t[i], range)
589 if not t.is_root then
590 local start = t.start + (t.p and #t.p or 0) + #t.d
591 local finish = t.finish + 1 - #match(t.d)
592 if start <= range.start and finish < range.finish then
593 coroutine.yield({start = finish, finish = t.finish + 1, closing = true})
594 elseif start > range.start and t.finish >= range.finish then
595 coroutine.yield({start = t.start, finish = start, opening = true})
601 root_methods.unbalanced_delimiters = function(t)
602 local slice_at = coroutine.wrap(unbalanced_delimiters)
603 return function(range)
604 local skips = {}
605 catchup(t, range)
606 repeat
607 local slice_pos = slice_at(t, range)
608 if slice_pos then table.insert(skips, slice_pos) end
609 until not slice_pos
610 return skips
614 local lists = {
615 tree = {
616 first_invalid = 0,
617 is_root = true,
618 }, -- (sorted) tree of sexps
619 escaped = {}, -- sorted list of strings and comments (redundant, for faster search)
621 local opening, closing = {}, {}
622 for o, c in pairs(opposite) do
623 if S"([{":match(o) then
624 table.insert(opening, o)
625 table.insert(closing, c)
628 local D1 = S(table.concat(opening))
629 local d1 = Cg(D1, "d")
630 local d2 = Cmt(Cb("d") * C(1), function(_, _, o, c) return opposite[o] == c end)
631 local D2 = S(table.concat(closing))
632 local parse = make_parser(lispwords, prefix, opposite, xatom, word, match, d1, d2, D1, D2, read)
633 local drive = make_driver(read, parse, lists)
634 setmetatable(lists.tree, {
635 __index = function(t, key)
636 if type(key) == "number" then
637 if #t < key then t.parse_to(#t - key) end
638 return rawget(t, key)
639 elseif root_methods[key] then
640 return root_methods[key](t)
641 elseif key == "parse_to" then
642 return drive
643 elseif key == "opposite" then
644 return match
648 setmetatable(lists.escaped, {
649 __index = {
650 around = function(range)
651 catchup(lists.tree, range)
652 return around(lists.escaped, range)
656 return lists, d1, D2
659 return M