3 local P
, S
, R
, V
, C
, Cb
, Cc
, Cg
, Cmt
, Cp
, Ct
= l
.P
, l
.S
, l
.R
, l
.V
, l
.C
, l
.Cb
, l
.Cc
, l
.Cg
, l
.Cmt
, l
.Cp
, l
.Ct
10 local newline
= S
"\r\n"
12 local function incr(index
, offset
) return index
+ offset
- 1 end
13 local function m1(pos
) return pos
- 1 end
14 local function neg(pos
) return -pos
end
15 local function past(_
, position
, pos
) return position
<= pos
end
16 local function startof(element
) return element
.start
end
17 local function _start(patt
) return Cg(patt
, "start") end
18 local function _finish(patt
) return Cg(patt
, "finish") end
19 local function _string(patt
) return Cg(patt
/ function() return true end, "is_string") end
20 local function _char(patt
) return Cg(patt
/ function() return true end, "is_char") end
21 local function _comment(patt
) return Cg(patt
/ function() return true end, "is_comment") end
22 local function _line_comment(patt
) return Cg(patt
/ function() return true end, "is_line_comment") end
23 local function delimited_range(d
)
24 return Cg(d
, "d") * (P(1) - "\\" - d
+ P
"\\" * 1)^
0 * d
26 local function nested_pair(d1
, d2
)
27 return P
{Cg(d1
, "d") * (P(1) - d1
- d2
+ V(1))^
0 * d2
}
30 local function purge(base
, sexps
)
31 if not base
then return sexps
end
32 for _
, list
in pairs(sexps
) do
33 for i
= #list
, 1, -1 do
34 if list
[i
].start
>= base
then
44 local function append(old
, new
)
45 for name
, list
in pairs(new
) do
46 for _
, element
in ipairs(list
) do
47 table.insert(old
[name
], element
)
49 if old
[name
].is_root
then
50 -- XXX: check the length first and then access [1].
51 -- Otherwise the autovivification of [1] in a file with only blanks
52 -- will trigger an infinite recursion.
53 -- TODO: get rid of this trick altogether and instead make the parser set [1].indent.
54 local first
= #old
[name
] > 0 and old
[name
][1]
55 -- XXX: this assumes that the first sexp starts at column 0.
56 -- if not true, _only_ the first sexp will have wrong indent.
57 if first
and not first
.indent
then first
.indent
= 0 end
60 local last
= #old
.tree
~= 0 and old
.tree
[#old
.tree
]
61 old
.tree
.first_invalid
= last
and (last
.finish
or last
.start
) + 1 or 0
65 local function make_driver(read, parse
, before
, sexps
)
66 return function(advance
)
68 if sexps
.tree
.first_invalid
then
69 local last_parsed
= #sexps
.tree
~= 0 and sexps
.tree
[#sexps
.tree
].finish
+ 1 or 0
70 if sexps
.tree
.first_invalid
< last_parsed
then
71 local nearest
, n
= before(sexps
.tree
, sexps
.tree
.first_invalid
, startof
)
72 local second_nearest
= n
and sexps
.tree
[n
- 1]
73 local has_eol
= second_nearest
and second_nearest
.is_line_comment
74 local nearest_finish
= second_nearest
and second_nearest
.finish
+ (has_eol
and 0 or 1)
75 local nearest_start
= nearest
and nearest
.start
76 base
= nearest_finish
or nearest_start
or sexps
.tree
.first_invalid
78 local last
= sexps
.tree
[#sexps
.tree
]
79 local has_eol
= last
and last
.is_line_comment
80 base
= has_eol
and last
.finish
or sexps
.tree
.first_invalid
84 local chunk_size
= 4 * 1024
88 local len
= advance
> 0 and advance
- base
+ chunk_size
or chunk_size
90 local chunk
, further
= read(from
, len
)
91 if not chunk
then break end
92 content
= content
.. chunk
93 new
= parse(content
, advance
, base
)
96 len
= chunk_size
* 2^
(tries
- 1)
97 until not new
.tree
.unbalanced
and (advance
> 0 or advance
< 0 and #new
.tree
>= math
.abs(advance
))
99 return append(purge(base
, sexps
), new
or {})
103 local function tag_next_node(t
, i
, pos
)
107 until type(t
[j
]) == "table" or not t
[j
]
110 if nxt
and not nxt
.indent
then
111 nxt
.indent
= nxt
.start
- pos
- 1
118 local function make_parser(prefix
, weak_prefix
, opposite
, d1
, d2
, D1
, D2
, atom_node
, list_node
, quasilist_node
)
119 local function atom_methodify(t
)
120 return setmetatable(t
, (t
.d
and not t
.is_list
) and quasilist_node
or atom_node
)
122 local function extract_breaks(t
)
125 if type(node
) == "number" then
126 tag_next_node(t
, i
, node
)
128 elseif node
.is_line_comment
then
129 tag_next_node(t
, i
, node
[1])
136 return function(content
, advance
, offset
)
137 local tree
, esc
= nil, {}
138 local I
= Cp() * Cc(offset
) / incr
140 local opening
= _start(I
) * Cg(prefix^
1, "p")^
-1 * d1
141 local closing
= _finish(I
) * d2
142 local lone_prefix
= _start(I
) * Cg(prefix^
1, "p") * -#P(d1
+ "|") * _finish(I1
)
144 local function debalance(t
) unbalanced
= true return t
end
145 local function collect_escaped(sexp
)
146 if sexp
.is_string
or sexp
.is_comment
or sexp
.is_char
then
147 table.insert(esc
, sexp
)
151 local char
= ("\\" * (P
"alarm" + "backspace" + "delete" + "escape"
152 + "newline" + "null" + "return" + "space" + "tab")
153 + "\\x" * R("09", "AF", "af")^
1
154 + "\\" * P(1)) * _char(P(true))
155 local dq_string
, multi_esc
= delimited_range('"'), delimited_range("|")
156 local stringy
= (opposite
["|"] and (multi_esc
+ dq_string
) or dq_string
) * _string(P(true))
157 local nline
= I
* newline
158 local blank
= nline
+ hspace
+ "\f"
159 local lcd
= opposite
["\n"]
160 local lc_level
= lcd
* #-P(lcd
) + P(lcd
)^
1 * hspace^
-1
161 local line_comment
= Cg(lc_level
, "d") * (1 - nline
)^
0 * nline
* _comment(P(true)) * _line_comment(P(true))
163 for o
, c
in pairs(opposite
) do
164 -- XXX: here I assume that #| is the only multi-char delimiter,
165 -- and that it means block comment
171 local block_comment
= bcd1
and bcd2
and nested_pair(bcd1
, bcd2
) * _comment(P(true))
172 local comment
= block_comment
and (block_comment
+ line_comment
) or line_comment
173 local incomplete_comment
= bcd1
and (lc_level
+ bcd1
) or lc_level
174 local symbol
= weak_prefix
175 and (1 - D1
- D2
- blank
- char
- stringy
- comment
- prefix
+ weak_prefix
)^
1
176 or (1 - D1
- D2
- blank
- char
- stringy
- comment
- prefix
)^
1
177 local atom
= Ct(_start(I
) * (
179 + Cg(prefix^
1, "p")^
-1 * (
183 )) * _finish(I1
)) / atom_methodify
/ collect_escaped
185 local list
= P
{Ct(opening
* blank^
0 * (atom
+ blank^
1 + V(1))^
0 * closing
) *
186 Cc(list_node
) / setmetatable
/ extract_breaks
}
187 local lone_delimiter
= Ct(_start(I
) * (D1
+ D2
+ incomplete_comment
) * _finish(I1
)) / debalance
188 local section_break
= I
* "\f" / neg
189 local sexp
= (section_break
+ blank
)^
0 * (list
+ atom
+ lone_delimiter
)
191 local top_level
= Ct(advance
< 0
193 or (Cmt(Cc(advance
- offset
), past
) * sexp
)^
0)
194 tree
= P
{top_level
/ extract_breaks
}:match(content
)
195 tree
.unbalanced
= unbalanced
197 return {tree
= tree
, escaped
= esc
}
201 local opposite_fallback
= {
202 __index
= function(t
, delimiter
)
203 local opening
= t
["\n"]
204 return delimiter
and delimiter
:find("^" .. opening
) and t
[opening
]
208 local function catchup(tree
, range
)
209 if range
.finish
and range
.finish
>= tree
.first_invalid
then
210 tree
.parse_to(range
.finish
+ 1)
214 function M
.new(syntax
, node
, read)
215 local L
= require(cwd
.."."..syntax
)
216 local opposite
= setmetatable(L
.opposite
, opposite_fallback
)
221 }, -- (sorted) tree of sexps
222 escaped
= {}, -- sorted list of strings and comments (redundant, for faster search)
224 local opening
, closing
, reverse
= {}, {}, {}
225 for o
, c
in pairs(opposite
) do
226 if S
"([{":match(o
) then
227 table.insert(opening
, o
)
228 table.insert(closing
, c
)
230 if c
~= o
and #c
== 1 then -- XXX: don't reverse block comments and strings
234 for c
, o
in pairs(reverse
) do
237 local D1
= S(table.concat(opening
))
238 local d1
= Cg(D1
, "d")
239 local d2
= Cmt(Cb("d") * C(1), function(_
, _
, o
, c
) return opposite
[o
] == c
end)
240 local D2
= S(table.concat(closing
))
241 local atom_node
, list_node
, quasilist_node
= node
.atom
, node
.list
, node
.quasilist(opposite
)
242 local parse
= make_parser(L
.prefix
, L
.weak_prefix
, opposite
, d1
, d2
, D1
, D2
, atom_node
, list_node
, quasilist_node
)
243 local drive
= make_driver(read, parse
, node
._before
, sexps
)
244 local root_methods
= node
.root(opposite
)
245 setmetatable(sexps
.tree
, {
246 __index
= function(t
, key
)
247 if type(key
) == "number" then
248 if #t
< key
then t
.parse_to(#t
- key
) end
249 return rawget(t
, key
)
250 elseif root_methods
[key
] then
251 return root_methods
[key
](t
)
252 elseif key
== "parse_to" then
257 setmetatable(sexps
.escaped
, {
259 around
= function(range
)
260 catchup(sexps
.tree
, range
)
261 return node
._around(sexps
.escaped
, range
)
265 return {tree
= sexps
.tree
, escaped
= sexps
.escaped
, opposite
= opposite
}, d1
, D2