bug 1080: Fold UTF-8 and unibyte dumping together
[elinks.git] / src / dom / test / test-sgml-parser-incremental
blob291884705149a50cd0ab63f08a70a754ae10e738
1 #!/bin/sh
3 # Copyright (c) 2005 Jonas Fonseca
6 test_description='Test incremental parsing of SGML documents.
8 This test checks if the SGML parser correctly recovers during incremental
9 parsing.
12 . "$TEST_LIB"
14 test_incremental_parsing () {
15 desc="$1"; shift
16 src="$1"; shift
17 out="$1"; shift
19 URI="test:$(normalize "$desc")"
21 echo "#document: $URI" > expected
22 printf "%s\n" "$out" | sed -n '2,$p' | sed -e 's/^/ /' >> expected
24 for size in 1 2 3 4 5 6 7 8 9 10 15 20 25 50; do
25 printf "%s" "$src" | sgml-parser --uri "$URI" --stdin "$size" > output
27 test_run_ 'cmp output expected'
28 if [ "$?" != 0 -o "$eval_ret" != 0 ]
29 then
30 test_failure_ "$desc" "($size bytes)"
31 return
33 done
35 test_ok_ "$desc"
38 test_incremental_parsing \
39 "Parse a small document." \
40 '<html><body><p>Hello World!</p></body></html>' \
42 element: html
43 element: body
44 element: p
45 #text: Hello World!'
47 test_incremental_parsing \
48 'Parse elements.' \
49 '<root><child attr="value" /><child2></><child3 >a</></root>' \
51 element: root
52 element: child
53 attribute: attr -> value
54 element: child2
55 element: child3
56 #text: a'
58 test_incremental_parsing \
59 'Parse tag soup elements.' \
60 '<parent attr="value" <child:1></><child:2</>a</parent>' \
62 element: parent
63 attribute: attr -> value
64 element: child:1
65 element: child:2
66 #text: a'
68 test_incremental_parsing \
69 'Parse an enclosed comment.' \
70 '<root><!-- Hello World! --></root>' \
72 element: root
73 #comment: Hello World! '
75 test_incremental_parsing \
76 'Parse comment combinations. (I)' \
77 '<root><!-- <!-- -- > --><!--foo--><!----></root>' \
79 element: root
80 #comment: <!-- -- >
81 #comment: foo
82 #comment: '
84 test_incremental_parsing \
85 'Parse comment combinations. (II).' \
86 '<! -- comment -->s<!-->-->t<!----->u' \
88 #comment: comment
89 #text: s
90 #comment: >
91 #text: t
92 #comment: -
93 #text: u'
95 test_incremental_parsing \
96 'Parse bad comment. (I)' \
97 '<!--->s' \
99 #comment: ->s'
101 test_incremental_parsing \
102 'Parse bad comment. (II)' \
103 '<!--a--!>bad comment' \
105 #comment: a
106 #text: bad comment'
108 test_incremental_parsing \
109 'Parse empty notation.' \
110 '<!>s' \
112 #text: s'
114 test_incremental_parsing \
115 'Parse an enclosed CDATA section.' \
116 '<root><![CDATA[...] ]>...]]></root>' \
118 element: root
119 #cdata-section: ...] ]>...'
121 test_incremental_parsing \
122 'Parse non-enclosed CDATA section.' \
123 '<![CDATA[...]]>' \
125 #cdata-section: ...'
127 test_incremental_parsing \
128 'Parse a bad CDATA section.' \
129 '<![CDATA[...' \
131 #cdata-section: ...'
133 test_incremental_parsing \
134 'Parse attributes.' \
135 '<root lang="fr" attr name="value with &foo; <stuff"></root>' \
137 element: root
138 attribute: lang -> fr
139 attribute: attr ->
140 attribute: name -> value with &foo; <stuff'
142 test_incremental_parsing \
143 'Parse attributes with garbage.' \
144 "<root a=b c='d' e'f' g= h i = j k =></root>" \
146 element: root
147 attribute: a -> b
148 attribute: c -> d
149 attribute: g -> h
150 attribute: i -> j
151 attribute: k -> '
153 test_incremental_parsing \
154 'Parse attribute with non-quoted values.' \
155 '<root color=#abc path=/to/%61-&\one";files/>...' \
157 element: root
158 attribute: color -> #abc
159 attribute: path -> /to/%61-&\one";files
160 #text: ...'
162 test_incremental_parsing \
163 'Parse entity references.' \
164 '&amp;-&#42;' \
166 entity-reference: amp
167 #text: -
168 entity-reference: #42'
170 # Just how these should be gracefully handled is not clear to me.
171 test_incremental_parsing \
172 'Parse badly formatted entity references.' \
173 '& m33p;-&.:-copy;-&;-&#;-&#xx;' \
175 #text: & m33p;
176 #text: -
177 entity-reference: .:-copy
178 #text: -
179 #text: &;
180 #text: -
181 entity-reference: #
182 #text: -
183 entity-reference: #xx'
185 test_incremental_parsing \
186 'Parse processing instructions.' \
187 '<?xml encoding="UTF8"?>
189 <?ecmascript
190 var val=2;
191 ?>' \
193 proc-instruction: xml -> encoding="UTF8"
194 attribute: encoding -> UTF8
195 #text: \n...\n
196 proc-instruction: ecmascript -> var val=2;\n'
198 test_incremental_parsing \
199 'Parse XML processing instructions.' \
200 '<?xml version="1.0" />?><?xml />-' \
202 proc-instruction: xml -> version="1.0" />
203 attribute: version -> 1.0
204 proc-instruction: xml -> />-'
206 test_incremental_parsing \
207 'Parse XML stylesheet processing instructions.' \
208 '<?xml-stylesheet type="text/xsl" href="url"?>' \
210 proc-instruction: xml-stylesheet -> type="text/xsl" href="url"
211 attribute: type -> text/xsl
212 attribute: href -> url'
214 test_incremental_parsing \
215 'Parse exotic processing instructions.' \
216 '<?xml ?+>+?>-?>-<?js?>-<??>-' \
218 proc-instruction: xml -> ?+>+
219 #text: -?>-
220 proc-instruction: js ->
221 #text: -
222 proc-instruction: ->
223 #text: -'
225 test_incremental_parsing \
226 'Parse incorrect processing instructions.' \
227 '<?js<?>-<?<??>-<?xml <=";&?>-<?' \
229 proc-instruction: js -> <
230 #text: -
231 proc-instruction: -> <?
232 #text: -
233 proc-instruction: xml -> <=";&
234 #text: -'
236 test_incremental_parsing \
237 'Parse incorrect processing instructions (II).' \
238 '<?><?' \
240 proc-instruction: -> ><?'
242 test_incremental_parsing \
243 'Skip spaces not inside text.' \
245 root
246 ns:attr
248 "value"
249 ><?
250 target
251 data?>< / root >' \
253 element: root
254 attribute: ns:attr -> value
255 proc-instruction: target -> data'
258 test_done