Merge with git+ssh://pasky.or.cz/srv/git/elinks.git
[elinks.git] / src / dom / test / test-sgml-parser-basic
blobfc8e45e6bc95f7ab914e2ebcf0d616f0674084f1
1 #!/bin/sh
3 # Copyright (c) 2005 Jonas Fonseca
6 test_description='Test the very basic parsing of SGML documents.
8 This test runs very basic features, like checking that nodes are placed
9 correctly in the DOM tree.
12 . "$TEST_LIB"
14 test_output_equals () {
15 desc="$1"; shift
16 src="$1"; shift
17 out="$1"; shift
19 URI="test:$(normalize "$desc")"
21 sgml-parser --uri "$URI" --src "$src" > output
22 echo "#document: $URI" > expected
23 echo "$out" | sed -n '2,$p' | sed 's/^/ /' >> expected
25 test_expect_success "$desc" 'cmp output expected'
29 ################################################################
30 # Parse various SGML node types.
32 test_output_equals \
33 'Parse a small document.' \
34 '<html><body><p>Hello World!</p></body></html>' \
36 element: html
37 element: body
38 element: p
39 #text: Hello World!'
41 test_output_equals \
42 'Parse elements.' \
43 '<root><child attr="value" /><child2></><child3 >a</></root>' \
45 element: root
46 element: child
47 attribute: attr -> value
48 element: child2
49 element: child3
50 #text: a'
52 test_output_equals \
53 'Parse tag soup elements. (I)' \
54 '<parent attr="value" <child:1></><child:2</>a</parent>' \
56 element: parent
57 attribute: attr -> value
58 element: child:1
59 element: child:2
60 #text: a'
62 test_output_equals \
63 'Parse tag soup elements. (II)' \
64 '< a >< b < c / >< / >' \
66 element: a
67 element: b
68 element: c'
70 test_output_equals \
71 'Parse an enclosed comment.' \
72 '<root><!-- Hello World! --></root>' \
74 element: root
75 #comment: Hello World! '
77 test_output_equals \
78 'Parse comment combinations. (I)' \
79 '<root><!-- <!-- -- > --><!--foo--><!----></root>' \
81 element: root
82 #comment: <!-- -- >
83 #comment: foo
84 #comment: '
86 test_output_equals \
87 'Parse comment combinations. (II).' \
88 '<! -- comment -->s<!-->-->t<!----->u' \
90 #comment: comment
91 #text: s
92 #comment: >
93 #text: t
94 #comment: -
95 #text: u'
97 test_output_equals \
98 'Parse bad comment. (I)' \
99 '<!--->s' \
101 #comment: ->s'
103 test_output_equals \
104 'Parse bad comment. (II)' \
105 '<!--a--!>bad comment' \
107 #comment: a
108 #text: bad comment'
110 test_output_equals \
111 'Parse empty notation.' \
112 '<!>s' \
114 #text: s'
116 test_output_equals \
117 'Parse an enclosed CDATA section.' \
118 '<root><![CDATA[...] ]>...]]></root>' \
120 element: root
121 #cdata-section: ...] ]>...'
123 test_output_equals \
124 'Parse non-enclosed CDATA section.' \
125 '<![CDATA[...]]>' \
127 #cdata-section: ...'
129 test_output_equals \
130 'Parse a bad CDATA section.' \
131 '<![CDATA[...' \
133 #cdata-section: ...'
135 test_output_equals \
136 'Parse attributes.' \
137 '<root lang="fr" attr name="value with &foo; <stuff"></root>' \
139 element: root
140 attribute: lang -> fr
141 attribute: attr ->
142 attribute: name -> value with &foo; <stuff'
144 test_output_equals \
145 'Parse attributes with garbage.' \
146 "<root a=b c='d' e'f' g= h i = j k =></root>" \
148 element: root
149 attribute: a -> b
150 attribute: c -> d
151 attribute: g -> h
152 attribute: i -> j
153 attribute: k -> '
155 test_output_equals \
156 'Parse attribute with non-quoted values.' \
157 '<root color=#abc path=/to/%61-&\one";files/>...' \
159 element: root
160 attribute: color -> #abc
161 attribute: path -> /to/%61-&\one";files
162 #text: ...'
164 test_output_equals \
165 'Parse entity references.' \
166 '&amp;-&#42;' \
168 entity-reference: amp
169 #text: -
170 entity-reference: #42'
172 # Just how these should be gracefully handled is not clear to me.
173 test_output_equals \
174 'Parse badly formatted entity references.' \
175 '& m33p;-&.:-copy;-&;-&#;-&#xx;' \
177 #text: & m33p;
178 #text: -
179 entity-reference: .:-copy
180 #text: -
181 #text: &;
182 #text: -
183 entity-reference: #
184 #text: -
185 entity-reference: #xx'
187 test_output_equals \
188 'Parse processing instructions.' \
189 '<?xml encoding="UTF8"?>
191 <?ecmascript
192 var val=2;
193 ?>' \
195 proc-instruction: xml -> encoding="UTF8"
196 attribute: encoding -> UTF8
197 #text: \n...\n
198 proc-instruction: ecmascript -> var val=2;\n'
200 test_output_equals \
201 'Parse XML processing instructions.' \
202 '<?xml version="1.0" />?><?xml />-' \
204 proc-instruction: xml -> version="1.0" />
205 attribute: version -> 1.0
206 proc-instruction: xml -> />-'
208 test_output_equals \
209 'Parse XML stylesheet processing instructions.' \
210 '<?xml-stylesheet type="text/xsl" href="url"?>' \
212 proc-instruction: xml-stylesheet -> type="text/xsl" href="url"
213 attribute: type -> text/xsl
214 attribute: href -> url'
216 test_output_equals \
217 'Parse exotic processing instructions.' \
218 '<?xml ?+>+?>-?>-<?js?>-<??>-' \
220 proc-instruction: xml -> ?+>+
221 #text: -?>-
222 proc-instruction: js ->
223 #text: -
224 proc-instruction: ->
225 #text: -'
227 test_output_equals \
228 'Parse incorrect processing instructions. (I)' \
229 '<?js<?>-<?<??>-<?xml <=";&?>-<?' \
231 proc-instruction: js -> <
232 #text: -
233 proc-instruction: -> <?
234 #text: -
235 proc-instruction: xml -> <=";&
236 #text: -'
238 test_output_equals \
239 'Parse incorrect processing instructions (II).' \
240 '<?><?' \
242 proc-instruction: -> ><?'
244 test_output_equals \
245 'Skip spaces not inside text.' \
247 root
248 ns:attr
250 "value"
251 ><?
252 target
253 data?>< / root >' \
255 element: root
256 attribute: ns:attr -> value
257 proc-instruction: target -> data'
259 test_done