1 -- SPDX-License-Identifier: GPL-3.0-or-later
2 -- © 2020 Georgi Kirilov
6 local P
, S
, R
, V
, C
, Cb
, Cc
, Cg
, Cmt
, Cp
, Ct
= l
.P
, l
.S
, l
.R
, l
.V
, l
.C
, l
.Cb
, l
.Cc
, l
.Cg
, l
.Cmt
, l
.Cp
, l
.Ct
13 local newline
= S
"\r\n"
15 local function incr(index
, offset
) return index
+ offset
- 1 end
16 local function m1(pos
) return pos
- 1 end
17 local function neg(pos
) return -pos
end
18 local function past(_
, position
, pos
) return position
<= pos
end
19 local function startof(element
) return element
.start
end
20 local function _start(patt
) return Cg(patt
, "start") end
21 local function _finish(patt
) return Cg(patt
, "finish") end
22 local function _string(patt
) return Cg(patt
/ function() return true end, "is_string") end
23 local function _char(patt
) return Cg(patt
/ function() return true end, "is_char") end
24 local function _comment(patt
) return Cg(patt
/ function() return true end, "is_comment") end
25 local function _line_comment(patt
) return Cg(patt
/ function() return true end, "is_line_comment") end
26 local function delimited_range(d
)
27 return Cg(d
, "d") * (P(1) - "\\" - d
+ P
"\\" * 1)^
0 * d
29 local function nested_pair(d1
, d2
)
30 return P
{Cg(d1
, "d") * (P(1) - d1
- d2
+ V(1))^
0 * d2
}
33 local function purge(base
, sexps
)
34 if not base
then return sexps
end
35 for _
, list
in pairs(sexps
) do
36 for i
= #list
, 1, -1 do
37 if list
[i
].start
>= base
then
47 local function append(old
, new
)
48 for name
, list
in pairs(new
) do
49 for _
, element
in ipairs(list
) do
50 table.insert(old
[name
], element
)
52 if old
[name
].is_root
then
53 -- XXX: check the length first and then access [1].
54 -- Otherwise the autovivification of [1] in a file with only blanks
55 -- will trigger an infinite recursion.
56 -- TODO: get rid of this trick altogether and instead make the parser set [1].indent.
57 local first
= #old
[name
] > 0 and old
[name
][1]
58 -- XXX: this assumes that the first sexp starts at column 0.
59 -- if not true, _only_ the first sexp will have wrong indent.
60 if first
and not first
.indent
then first
.indent
= 0 end
63 local last
= #old
.tree
~= 0 and old
.tree
[#old
.tree
]
64 old
.tree
.first_invalid
= last
and (last
.finish
or last
.start
) + 1 or 0
68 local function make_driver(read, parse
, before
, sexps
)
69 return function(advance
)
71 if sexps
.tree
.first_invalid
then
72 local last_parsed
= #sexps
.tree
~= 0 and sexps
.tree
[#sexps
.tree
].finish
+ 1 or 0
73 if sexps
.tree
.first_invalid
< last_parsed
then
74 local nearest
, n
= before(sexps
.tree
, sexps
.tree
.first_invalid
, startof
)
75 local second_nearest
= n
and sexps
.tree
[n
- 1]
76 local has_eol
= second_nearest
and second_nearest
.is_line_comment
77 local nearest_finish
= second_nearest
and second_nearest
.finish
+ (has_eol
and 0 or 1)
78 local nearest_start
= nearest
and nearest
.start
79 base
= nearest_finish
or nearest_start
or sexps
.tree
.first_invalid
81 local last
= sexps
.tree
[#sexps
.tree
]
82 local has_eol
= last
and last
.is_line_comment
83 base
= has_eol
and last
.finish
or sexps
.tree
.first_invalid
87 local chunk_size
= 4 * 1024
91 local len
= advance
> 0 and advance
- base
+ chunk_size
or chunk_size
93 local chunk
, further
= read(from
, len
)
94 if not chunk
then break end
95 content
= content
.. chunk
96 new
= parse(content
, advance
, base
)
99 len
= chunk_size
* 2^
(tries
- 1)
100 until not new
.tree
.unbalanced
and (advance
> 0 or advance
< 0 and #new
.tree
>= math
.abs(advance
))
102 return append(purge(base
, sexps
), new
or {})
106 local function tag_next_node(t
, i
, pos
)
110 until type(t
[j
]) == "table" or not t
[j
]
113 if nxt
and not nxt
.indent
then
114 nxt
.indent
= nxt
.start
- pos
- 1
121 local function make_parser(prefix
, weak_prefix
, opposite
, d1
, d2
, D1
, D2
, atom_node
, list_node
, quasilist_node
)
122 local function atom_methodify(t
)
123 return setmetatable(t
, (t
.d
and not t
.is_list
) and quasilist_node
or atom_node
)
125 local function extract_breaks(t
)
128 if type(node
) == "number" then
129 tag_next_node(t
, i
, node
)
131 elseif node
.is_line_comment
then
132 tag_next_node(t
, i
, node
[1])
139 return function(content
, advance
, offset
)
140 local tree
, esc
= nil, {}
141 local I
= Cp() * Cc(offset
) / incr
143 local opening
= _start(I
) * Cg(prefix^
1, "p")^
-1 * d1
144 local closing
= _finish(I
) * d2
145 local lone_prefix
= _start(I
) * Cg(prefix^
1, "p") * -#P(d1
+ "|") * _finish(I1
)
147 local function debalance(t
) unbalanced
= true return t
end
148 local function collect_escaped(sexp
)
149 if sexp
.is_string
or sexp
.is_comment
or sexp
.is_char
then
150 table.insert(esc
, sexp
)
154 local char
= ("\\" * (P
"alarm" + "backspace" + "delete" + "escape"
155 + "newline" + "null" + "return" + "space" + "tab")
156 + "\\x" * R("09", "AF", "af")^
1
157 + "\\" * P(1)) * _char(P(true))
158 local dq_string
, multi_esc
= delimited_range('"'), delimited_range("|")
159 local stringy
= (opposite
["|"] and (multi_esc
+ dq_string
) or dq_string
) * _string(P(true))
160 local nline
= I
* newline
161 local blank
= nline
+ hspace
+ "\f"
162 local lcd
= opposite
["\n"]
163 local lc_level
= lcd
* #-P(lcd
) + P(lcd
)^
1 * hspace^
-1
164 local line_comment
= Cg(lc_level
, "d") * (1 - nline
)^
0 * nline
* _comment(P(true)) * _line_comment(P(true))
166 for o
, c
in pairs(opposite
) do
167 -- XXX: here I assume that #| is the only multi-char delimiter,
168 -- and that it means block comment
174 local block_comment
= bcd1
and bcd2
and nested_pair(bcd1
, bcd2
) * _comment(P(true))
175 local comment
= block_comment
and (block_comment
+ line_comment
) or line_comment
176 local incomplete_comment
= bcd1
and (lc_level
+ bcd1
) or lc_level
177 local symbol
= weak_prefix
178 and (1 - D1
- D2
- blank
- char
- stringy
- comment
- prefix
+ weak_prefix
)^
1
179 or (1 - D1
- D2
- blank
- char
- stringy
- comment
- prefix
)^
1
180 local atom
= Ct(_start(I
) * (
182 + Cg(prefix^
1, "p")^
-1 * (
186 )) * _finish(I1
)) / atom_methodify
/ collect_escaped
188 local list
= P
{Ct(opening
* blank^
0 * (atom
+ blank^
1 + V(1))^
0 * closing
) *
189 Cc(list_node
) / setmetatable
/ extract_breaks
}
190 local lone_delimiter
= Ct(_start(I
) * (D1
+ D2
+ incomplete_comment
) * _finish(I1
)) / debalance
191 local section_break
= I
* "\f" / neg
192 local sexp
= (section_break
+ blank
)^
0 * (list
+ atom
+ lone_delimiter
)
194 local top_level
= Ct(advance
< 0
196 or (Cmt(Cc(advance
- offset
), past
) * sexp
)^
0)
197 tree
= P
{top_level
/ extract_breaks
}:match(content
)
198 tree
.unbalanced
= unbalanced
200 return {tree
= tree
, escaped
= esc
}
204 local opposite_fallback
= {
205 __index
= function(t
, delimiter
)
206 local opening
= t
["\n"]
207 return delimiter
and delimiter
:find("^" .. opening
) and t
[opening
]
211 local function catchup(tree
, range
)
212 if range
.finish
and range
.finish
>= tree
.first_invalid
then
213 tree
.parse_to(range
.finish
+ 1)
217 function M
.new(syntax
, node
, read)
218 local L
= require(cwd
.."."..syntax
)
219 local opposite
= setmetatable(L
.opposite
, opposite_fallback
)
224 }, -- (sorted) tree of sexps
225 escaped
= {}, -- sorted list of strings and comments (redundant, for faster search)
227 local opening
, closing
, reverse
= {}, {}, {}
228 for o
, c
in pairs(opposite
) do
229 if S
"([{":match(o
) then
230 table.insert(opening
, o
)
231 table.insert(closing
, c
)
233 if c
~= o
and #c
== 1 then -- XXX: don't reverse block comments and strings
237 for c
, o
in pairs(reverse
) do
240 local D1
= S(table.concat(opening
))
241 local d1
= Cg(D1
, "d")
242 local d2
= Cmt(Cb("d") * C(1), function(_
, _
, o
, c
) return opposite
[o
] == c
end)
243 local D2
= S(table.concat(closing
))
244 local atom_node
, list_node
, quasilist_node
= node
.atom
, node
.list
, node
.quasilist(opposite
)
245 local parse
= make_parser(L
.prefix
, L
.weak_prefix
, opposite
, d1
, d2
, D1
, D2
, atom_node
, list_node
, quasilist_node
)
246 local drive
= make_driver(read, parse
, node
._before
, sexps
)
247 local root_methods
= node
.root(opposite
)
248 setmetatable(sexps
.tree
, {
249 __index
= function(t
, key
)
250 if type(key
) == "number" then
251 if #t
< key
then t
.parse_to(#t
- key
) end
252 return rawget(t
, key
)
253 elseif root_methods
[key
] then
254 return root_methods
[key
](t
)
255 elseif key
== "parse_to" then
260 setmetatable(sexps
.escaped
, {
262 around
= function(range
)
263 catchup(sexps
.tree
, range
)
264 return node
._around(sexps
.escaped
, range
)
268 return {tree
= sexps
.tree
, escaped
= sexps
.escaped
, opposite
= opposite
}, d1
, D2