strip html escapes from text too
[lqt.git] / new / xml.lua
blob5d96244c01df87fe386306394bf56d20b63d684b
1 #!/usr/bin/lua
3 local parseargs, collect
5 local escapes = {
6 gt = '>',
7 lt = '<',
10 local strip_escapes = function (s)
11 s = string.gsub(s, '&(%a+);', escapes)
12 return s
13 end
16 function parseargs(s)
17 local arg = {}
18 string.gsub(s, "([%w_]+)=([\"'])(.-)%2", function (w, _, a)
19 arg[strip_escapes(w)] = strip_escapes(a)
20 end)
21 return arg
22 end
24 function collect(s)
25 local stack = {}
26 local index = {}
27 local top = {}
28 table.insert(stack, top)
29 local ni,c,label,xarg, empty
30 local i, j = 1, 1
31 while true do
32 ni,j,c,label,xarg, empty = string.find(s, "<(%/?)(%w+)(.-)(%/?)>", j)
33 if not ni then break end
34 local text = string.sub(s, i, ni-1)
35 if not string.find(text, "^%s*$") then
36 table.insert(top, strip_escapes(text))
37 end
38 if empty == "/" then -- empty element tag
39 table.insert(top, {label=label, xarg=parseargs(xarg), empty=1})
40 elseif c == "" then -- start tag
41 top = {label=label, xarg=parseargs(xarg)}
42 table.insert(stack, top) -- new level
43 else -- end tag
44 local toclose = table.remove(stack) -- remove top
45 top = stack[#stack]
46 if #stack < 1 then
47 error("nothing to close with "..label)
48 end
49 if toclose.label ~= label then
50 error("trying to close "..toclose.label.." with "..label)
51 end
52 table.insert(top, toclose)
53 toclose.parent = top
54 toclose.index = #top
55 index[toclose] = true
56 end
57 i = j+1
58 end
59 local text = string.sub(s, i)
60 if not string.find(text, "^%s*$") then
61 table.insert(stack[#stack], text)
62 end
63 if #stack > 1 then
64 error("unclosed "..stack[stack.n].label)
65 end
66 return stack[1], index
67 end
69 return collect