3 local P
, S
, R
, V
, C
, Cb
, Cc
, Cg
, Cmt
, Cp
, Ct
= l
.P
, l
.S
, l
.R
, l
.V
, l
.C
, l
.Cb
, l
.Cc
, l
.Cg
, l
.Cmt
, l
.Cp
, l
.Ct
8 local blank
= S
"\n\v\f" + hspace
9 local newline
= S('\r\n\f')^
1
11 local function gt(pos
, val
)
12 return val
and pos
> val
15 local function le(pos
, val
)
16 return val
and pos
<= val
19 -- binary search a list for the nearest node before pos
20 local function before(t
, pos
, key
, skip
)
21 local left
, right
= 1, #t
22 while left
<= right
do
23 local m
= math
.floor(left
+ (right
- left
) / 2)
24 local m_val
= key(t
[m
])
25 if gt(pos
, m_val
) and (m
== #t
or le(pos
, key(t
[m
+ 1]))) then
27 while (t
[m
] and skip(t
[m
])) do
33 if le(pos
, m_val
) then right
= m
- 1 else left
= m
+ 1 end
37 -- binary search a list for the nearest node after pos
38 local function after(t
, pos
, key
, skip
)
39 if t
.is_root
and (#t
== 0 or pos
>= t
[#t
].start
) then
40 -- XXX: if we are at the last parsed top-level node, try to access the next one
41 -- to get it parsed as well. Otherwise the search will stop prematurely.
44 local left
, right
= 1, #t
45 while left
<= right
do
46 local m
= math
.floor(left
+ (right
- left
) / 2)
47 local m_val
= key(t
[m
])
48 if le(pos
, m_val
) and (m
== 1 or gt(pos
, key(t
[m
- 1]))) then
50 while (t
[m
] and skip(t
[m
])) do
56 if le(pos
, m_val
) then right
= m
- 1 else left
= m
+ 1 end
60 -- binary search a list for the node that contains pos
61 local function around(t
, range
)
62 if not (range
.start
and range
.finish
) then return end
63 local left
, right
= 1, #t
64 while left
<= right
do
65 local m
= math
.floor(left
+ (right
- left
) / 2)
68 if e
.start
and range
.start
>= e
.start
and e
.finish
and range
.finish
<= e
.finish
+ 1 and
69 (not nxt
or nxt
.start
> range
.start
) -- if adjoined - asdf|(qwer) - this will prefer (qwer)
71 if e
.start
and range
.start
< e
.start
then right
= m
- 1 else left
= m
+ 1 end
75 local function incr(index
, offset
) return index
+ offset
- 1 end
76 local function m1(pos
) return pos
- 1 end
77 local function past(_
, position
, pos
) return position
<= pos
end
78 local function startof(element
) return element
.start
end
79 local function finishof(element
) return element
.finish
end
80 local function _start(patt
) return Cg(patt
, "start") end
81 local function _finish(patt
) return Cg(patt
, "finish") end
82 local bof_or_bol
= (hspace^
0 * P
"\n"^
1)^
1 + Cmt("", function(_
, pos
) return pos
== 1 end)
83 local function _indent(I
)
84 return Cg(bof_or_bol
* I
* P
"\f", "section")^
0 *
85 Cg(bof_or_bol
* C(hspace^
0) / function(indent
) return #indent
end, "indent")^
-1
87 local function _string(patt
) return Cg(patt
/ function() return true end, "is_string") end
88 local function _char(patt
) return Cg(patt
/ function() return true end, "is_char") end
89 local function _comment(patt
) return Cg(patt
/ function() return true end, "is_comment") end
90 local function delimited_range(d
)
91 return Cg(d
, "d") * (P(1) - "\\" - d
+ P
"\\" * 1)^
0 * d
93 local function nested_pair(d1
, d2
)
94 return P
{Cg(d1
, "d") * (P(1) - d1
- d2
+ V(1))^
0 * d2
}
98 local function find_after(t
, selection
, pred
, stop
)
99 local _
, n
= t
.after(selection
.start
, startof
)
100 while t
[n
] and not pred(t
[n
]) do
101 if stop
and stop(t
[n
]) then return end
107 local function find_before(t
, selection
, pred
, stop
)
108 local _
, n
= t
.before(selection
.finish
, finishof
)
109 while t
[n
] and not pred(t
[n
]) do
110 if stop
and stop(t
[n
]) then return end
116 local function intersects(t
)
117 return function(range
)
118 local start
= t
.start
+ (t
.p
and #t
.p
or 0)
119 return start
>= range
.start
and t
.finish
>= range
.finish
and start
< range
.finish
120 or start
< range
.start
and t
.finish
< range
.finish
and range
.start
<= t
.finish
124 local atom_methods
= {
125 contains
= function(t
) return function(range
) return t
.start
<= range
.start
and t
.finish
>= range
.finish
- 1 end end,
126 touches
= function(t
) return function(range
) return t
.start
== range
.start
or t
.finish
== range
.finish
- 1 end end,
127 intersects
= intersects
,
130 local list_methods
= {
131 is_list
= function(_
) return true end,
132 is_empty
= function(t
) return #t
== 0 end,
133 contains
= function(t
)
134 return function(range
)
135 return t
.start
+ (t
.p
and #t
.p
or 0) < range
.start
and t
.finish
> range
.finish
- 1
138 touches
= function(t
)
139 return function(range
)
140 return t
.start
+ (t
.p
and #t
.p
or 0) >= range
.start
and t
.start
<= range
.start
or
141 t
.finish
== range
.finish
- 1
144 overlaps
= function(t
) return function(range
)
145 return t
.start
== range
.start
and t
.finish
> range
.finish
- 1
146 or t
.start
< range
.start
and t
.finish
== range
.finish
- 1 end
148 intersects
= intersects
,
150 return function(range
)
151 return around(t
, range
)
155 return function(pos
, key
, skip
)
156 return before(t
, pos
, key
, skip
)
160 return function(pos
, key
, skip
)
161 return after(t
, pos
, key
, skip
)
164 find_after
= function(t
)
165 return function(range
, pred
, stop
)
166 return find_after(t
, range
, pred
, stop
)
169 find_before
= function(t
)
170 return function(range
, pred
, stop
)
171 return find_before(t
, range
, pred
, stop
)
176 local escaped_methods
= {}
177 local pseudo_methods
= {}
179 local function sexp_around(t
, range
)
180 if t
.is_root
or (t
.is_list
and t
.contains(range
)) then
181 local child
, nth
= t
.around(range
)
182 if child
and child
.is_list
and not child
.touches(range
) then
183 return sexp_around(child
, range
)
190 local function find_before_innermost(t
, range
, pred
)
191 if t
.is_root
or (t
.d
and t
.start
< range
.start
) then
192 if t
.d
and not t
.is_list
then
193 local newpos
= t
.start_before(range
.start
)
195 local word
= t
.word_at({start
= newpos
, finish
= newpos
})
196 if word
and pred(word
) then
202 local _
, nearest_n
= t
.before(range
.start
, startof
)
204 for n
= nearest_n
, 1, -1 do
206 if child
.d
and not child
.is_empty
then
207 local ret
= find_before_innermost(child
, range
, pred
)
211 elseif pred(child
, n
, #t
) then
216 if not t
.is_root
and pred(t
) then
222 local function find_after_innermost(t
, range
, pred
)
223 if t
.is_root
or (t
.d
and t
.finish
>= range
.finish
) then
224 if t
.d
and not t
.is_list
then
225 local newpos
= t
.finish_after(math
.max(t
.start
+ #t
.d
, range
.finish
))
227 local word
= t
.word_at({start
= newpos
, finish
= newpos
})
228 if word
and pred(word
) then
234 local _
, nearest_n
= t
.after(range
.finish
, finishof
)
236 for n
= nearest_n
, #t
do
238 if child
.d
and not child
.is_empty
then
239 local ret
= find_after_innermost(child
, range
, pred
)
243 elseif pred(child
, n
, #t
) then
248 if not t
.is_root
and pred(t
) then
254 -- save the current logical position in terms of sexps, not offsets
255 local function sexp_path(t
, range
, floors
)
256 if t
.is_root
or (t
.is_list
and t
.contains(range
)) then
257 local child
, nth
= t
.around(range
)
258 table.insert(floors
, nth
)
259 if child
and child
.is_list
and not child
.touches(range
) then
260 sexp_path(child
, range
, floors
)
266 -- find a sexp by pattern-matching a saved "sexp path"
267 -- inspired by Interlisp "location specifications", but this here is very rudimentary in comparison
268 -- currently only used for restoring the cursor position after an aggressive reindent
269 local function goto_path(root
, path
)
272 for n
, i
in ipairs(path
) do
274 parent
= not parent
and root
or parent
[path
[n
- 1]]
279 local function catchup(tree
, range
)
280 if range
.finish
and range
.finish
>= tree
.first_invalid
then
281 tree
.parse_to(range
.finish
+ 1)
285 local root_methods
= {
287 return function(range
)
289 return around(t
, range
)
293 return function(pos
, key
, skip
)
294 catchup(t
, {finish
= pos
})
295 return before(t
, pos
, key
, skip
)
299 return function(pos
, key
, skip
)
300 catchup(t
, {finish
= pos
})
301 return after(t
, pos
, key
, skip
)
304 find_after
= function(t
)
305 return function(range
, pred
, stop
)
307 return find_after(t
, range
, pred
, stop
)
310 find_before
= function(t
)
311 return function(range
, pred
, stop
)
312 return find_before(t
, range
, pred
, stop
)
316 return function(index
)
317 assert(index
<= t
.first_invalid
, ("%d > %d - You can only rewind backwards!"):format(index
, t
.first_invalid
))
318 t
.first_invalid
= index
321 is_parsed
= function(t
)
322 return function(index
)
323 return index
< t
.first_invalid
326 sexp_at
= function(t
)
327 return function(range
)
329 return sexp_around(t
, range
)
332 paragraph_at
= function(t
)
333 return function(range
)
335 local sexp
, n
= t
.around(range
)
339 find_before_innermost
= function(t
)
340 return function(range
, pred
)
342 return find_before_innermost(t
, range
, pred
)
345 find_after_innermost
= function(t
)
346 return function(range
, pred
)
348 return find_after_innermost(t
, range
, pred
)
351 sexp_path
= function(t
)
352 return function(range
)
354 return sexp_path(t
, range
, {})
357 goto_path
= function(t
)
358 return function(path
)
359 local range
= {start
= t
[path
[1]]
.finish
, finish
= t
[path
[1]]
.finish
}
361 return goto_path(t
, path
)
366 local function dispatch(vtable
)
367 return function(self
, key
)
369 return vtable
[key
](self
)
375 __index
= dispatch(atom_methods
)
379 __index
= dispatch(list_methods
)
382 local escaped_node
= {
383 __index
= dispatch(escaped_methods
)
386 local pseudo_node
= {
387 __index
= dispatch(pseudo_methods
)
390 local function at_pos(_
, position
, start
, finish
, range
)
391 if range
.start
+ 1 >= start
and range
.start
< finish
and range
.finish
< finish
then
392 return position
, start
- 1, finish
- 1
396 local function purge(base
, lists
)
397 if not base
then return lists
end
398 for _
, list
in pairs(lists
) do
399 for i
= #list
, 1, -1 do
400 if list
[i
].start
>= base
then
410 local function append(old
, new
)
411 for name
, list
in pairs(new
) do
412 for _
, element
in ipairs(list
) do
413 table.insert(old
[name
], element
)
416 local last
= #old
.tree
~= 0 and old
.tree
[#old
.tree
]
417 old
.tree
.first_invalid
= last
and (last
.finish
or last
.start
) + 1 or 0
421 local function make_driver(read, parse
, lists
)
422 return function(advance
)
424 if lists
.tree
.first_invalid
then
425 local last_parsed
= #lists
.tree
~= 0 and lists
.tree
[#lists
.tree
].finish
+ 1 or 0
426 if lists
.tree
.first_invalid
< last_parsed
then
427 local nearest
= before(lists
.tree
, lists
.tree
.first_invalid
, startof
)
428 base
= nearest
and nearest
.start
or lists
.tree
.first_invalid
430 base
= lists
.tree
.first_invalid
434 local chunk_size
= 42 * 1024
438 local len
= advance
> 0 and advance
- base
+ chunk_size
or chunk_size
440 local chunk
, further
= read(from
, len
)
441 if not chunk
then break end
442 content
= content
.. chunk
443 new
= parse(content
, advance
, base
)
446 len
= chunk_size
* 2^
(tries
- 1)
447 until not new
.tree
.unbalanced
and (advance
> 0 or advance
< 0 and #new
.tree
>= math
.abs(advance
))
449 return append(purge(base
, lists
), new
or {})
453 local function make_parser(lispwords
, prefix
, opposite
, xatom
, word
, match
, d1
, d2
, D1
, D2
, read)
454 list_methods
.last_distinguished
= function(t
)
455 return t
[1] and lispwords
[t
[1].text
]
457 local function text(t
)
458 local len
= t
.finish
+ 1 - t
.start
459 return read(t
.start
, len
)
461 atom_methods
.text
= text
462 list_methods
.text
= text
463 pseudo_methods
.text
= text
464 function escaped_methods
.start_before(t
)
466 pos
= pos
- (t
.start
+ #t
.d
)
467 local stops
= P
{Ct(((1 - word
)^
0 * Cp() * Cmt(Cc(pos
), past
) * word
)^
0)}:match(t
.itext
)
468 local newpos
= stops
and stops
[#stops
]
469 return newpos
and (t
.start
+ #t
.d
+ newpos
- 1)
472 function escaped_methods
.start_after(t
)
474 pos
= pos
- (t
.start
+ #t
.d
)
475 local stop
= P
{(1 - word
) * Cp() * word
+ 1 * V(1)}:match(t
.itext
, pos
+ 1)
476 return stop
and (t
.start
+ #t
.d
+ stop
- 1)
479 function escaped_methods
.finish_before(t
)
481 pos
= pos
- (t
.start
+ #t
.d
)
482 local stops
= P
{Ct(((1 - word
)^
0 * word
* Cp() * Cmt(Cc(pos
+ 1), past
))^
0)}:match(t
.itext
)
483 local newpos
= stops
and stops
[#stops
]
484 return newpos
and (t
.start
+ #t
.d
+ newpos
- 1)
487 function escaped_methods
.finish_after(t
)
489 pos
= pos
- (t
.start
+ #t
.d
)
490 local stop
= P
{word
* Cp() + 1 * V(1)}:match(t
.itext
, pos
+ 1)
491 return stop
and (t
.start
+ #t
.d
+ stop
- 1)
494 function escaped_methods
.word_at(t
)
495 return function(range
)
497 r
.start
= range
.start
- (t
.start
+ #t
.d
)
498 r
.finish
= range
.finish
- (t
.start
+ #t
.d
)
499 local start
, finish
= P
{Cmt(Cp() * word
* Cp() * Cc(r
), at_pos
) +
500 Cmt(1 * Cc(r
.finish
+ 1), past
) * V(1)}:match(t
.itext
)
501 if not start
then return end
502 local node
= {start
= t
.start
+ #t
.d
+ start
, finish
= t
.start
+ #t
.d
+ finish
- 1}
503 return setmetatable(node
, pseudo_node
)
506 function escaped_methods
.is_empty(t
)
507 return not t
.finish_after(t
.start
+ #t
.d
)
509 for k
, v
in pairs(atom_methods
) do
510 escaped_methods
[k
] = v
512 function escaped_methods
.itext(t
)
513 local len
= t
.finish
+ 1 - #match(t
.d
) - t
.start
- #t
.d
514 return read(t
.start
+ #t
.d
, len
) or ""
516 local function atom_methodize(t
)
517 return setmetatable(t
, (t
.is_string
or t
.is_comment
) and escaped_node
or atom_node
)
519 return function(content
, advance
, offset
)
520 local tree
, esc
= nil, {}
521 local I
= Cp() * Cc(offset
) / incr
523 local opening
= _start(I
) * Cg(prefix^
1, "p")^
-1 * d1
524 local closing
= _finish(I
) * d2
525 local lone_prefix
= _indent(I
) * _start(I
) * Cg(prefix^
1, "p") * -#P(d1
) * _finish(I1
)
527 local function debalance(t
) unbalanced
= true return t
end
528 local function collect_escaped(sexp
)
529 if sexp
.is_comment
and match(sexp
.d
):find("^\n") then
530 -- XYZZY: Poof! You are in the parser room. There is a scroll here:
531 -- The pattern for line comments deliberately doesn't consume the newline
532 -- *during parsing*, so the parser gets to tag the next sexp with .indent.
533 -- The range is extended here afterwards, though, so when you are at the end
534 -- of a line comment, the editor knows that you are inside it and won't try
535 -- to apply behaviour that is meant for code.
536 sexp
.finish
= sexp
.finish
+ 1
538 if sexp
.is_string
or sexp
.is_comment
then
539 table.insert(esc
, sexp
)
543 local char
= ('#\\' * (P
'alarm' + 'backspace' + 'delete' + 'escape'
544 + 'newline' + 'null' + 'return' + 'space' + 'tab')
545 + '#\\x' * R('09', 'AF', 'af')^
1
546 + '#\\' * P(1)) * _char(P(true))
547 local dq_string
= delimited_range('"') * _string(P(true))
548 local multi_esc
= delimited_range("|") * _string(P(true))
549 local line_comment
= Cg(';' * #-P
';' + P
';'^
1 * hspace^
-1, "d") * (1 - newline
)^
0 * _comment(P(true))
550 local block_comment
= nested_pair("#|", "|#") * _comment(P(true))
551 local comment
= opposite
["#|"] and (line_comment
+ block_comment
) or line_comment
552 local atom
= Ct(_indent(I
) * _start(I
) * (
556 + multi_esc
-- before xatom, since the latter matches too
557 + xatom(Cg(prefix^
1, "p")^
-1)
558 ) * _finish(I1
)) / atom_methodize
/ collect_escaped
+ Ct(lone_prefix
)
559 local list
= P
{Ct(_indent(I
) * opening
* (atom
+ hspace^
1 + V(1))^
0 * blank^
0 * closing
) *
560 Cc(list_node
) / setmetatable
}
561 -- XXX: semicolon (for line comments) should be listed below, too, but since I don't consume the newline
562 -- (the XYZZY trick), I can't tell if a line comment was not completely read due to insufficient chunk length.
563 -- So, don't set chunk_size below the size of the longest line comment probable.
564 local lone_paren
= Ct(_indent(I
) * _start(I
) * (D1
+ D2
) * _finish(I1
)) / debalance
565 local sexp
= hspace^
0 * (list
+ atom
) + lone_paren
567 tree
= Ct(sexp^advance
):match(content
)
568 elseif advance
> 0 then
569 tree
= Ct((Cmt(Cc(advance
- offset
), past
) * sexp
)^
0):match(content
)
571 tree
.unbalanced
= unbalanced
572 return {tree
= tree
, escaped
= esc
}
576 function M
.new(syntax
, read)
577 local L
= require('parkour.dialect.'..syntax
)
578 local lispwords
, prefix
, opposite
, xatom
, word
= L
.lispwords
, L
.prefix
, L
.opposite
, L
.atom
, L
.word
579 local function match(delimiter
)
580 return opposite
[delimiter
] or delimiter
:find("^;") and opposite
[";"]
582 local function unbalanced_delimiters(t
, range
)
583 if t
.d
or t
.is_root
then
585 if (t
[i
].d
) and (t
[i
].contains(range
) or t
[i
].intersects(range
)) then
586 unbalanced_delimiters(t
[i
], range
)
589 if not t
.is_root
then
590 local start
= t
.start
+ (t
.p
and #t
.p
or 0) + #t
.d
591 local finish
= t
.finish
+ 1 - #match(t
.d
)
592 if start
<= range
.start
and finish
< range
.finish
then
593 coroutine
.yield({start
= finish
, finish
= t
.finish
+ 1, closing
= true})
594 elseif start
> range
.start
and t
.finish
>= range
.finish
then
595 coroutine
.yield({start
= t
.start
, finish
= start
, opening
= true})
601 root_methods
.unbalanced_delimiters
= function(t
)
602 local slice_at
= coroutine
.wrap(unbalanced_delimiters
)
603 return function(range
)
607 local slice_pos
= slice_at(t
, range
)
608 if slice_pos
then table.insert(skips
, slice_pos
) end
618 }, -- (sorted) tree of sexps
619 escaped
= {}, -- sorted list of strings and comments (redundant, for faster search)
621 local opening
, closing
= {}, {}
622 for o
, c
in pairs(opposite
) do
623 if S
"([{":match(o
) then
624 table.insert(opening
, o
)
625 table.insert(closing
, c
)
628 local D1
= S(table.concat(opening
))
629 local d1
= Cg(D1
, "d")
630 local d2
= Cmt(Cb("d") * C(1), function(_
, _
, o
, c
) return opposite
[o
] == c
end)
631 local D2
= S(table.concat(closing
))
632 local parse
= make_parser(lispwords
, prefix
, opposite
, xatom
, word
, match
, d1
, d2
, D1
, D2
, read)
633 local drive
= make_driver(read, parse
, lists
)
634 setmetatable(lists
.tree
, {
635 __index
= function(t
, key
)
636 if type(key
) == "number" then
637 if #t
< key
then t
.parse_to(#t
- key
) end
638 return rawget(t
, key
)
639 elseif root_methods
[key
] then
640 return root_methods
[key
](t
)
641 elseif key
== "parse_to" then
643 elseif key
== "opposite" then
648 setmetatable(lists
.escaped
, {
650 around
= function(range
)
651 catchup(lists
.tree
, range
)
652 return around(lists
.escaped
, range
)