3 # Copyright (c) 2005 Jonas Fonseca
6 test_description
='Test the very basic parsing of SGML documents.
8 This test runs very basic features, like checking that nodes are placed
9 correctly in the DOM tree.
14 test_output_equals
() {
19 URI
="test:$(normalize "$desc")"
21 sgml-parser
--uri "$URI" --src "$src" > output
22 echo "#document: $URI" > expected
23 echo "$out" |
sed -n '2,$p' |
sed 's/^/ /' >> expected
25 test_expect_success
"$desc" 'cmp output expected'
29 ################################################################
30 # Parse various SGML node types.
33 'Parse a small document.' \
34 '<html><body><p>Hello World!</p></body></html>' \
43 '<root><child attr="value" /><child2></><child3 >a</></root>' \
47 attribute: attr -> value
53 'Parse tag soup elements. (I)' \
54 '<parent attr="value" <child:1></><child:2</>a</parent>' \
57 attribute: attr -> value
63 'Parse tag soup elements. (II)' \
64 '< a >< b < c / >< / >' \
71 'Parse an enclosed comment.' \
72 '<root><!-- Hello World! --></root>' \
75 #comment: Hello World! '
78 'Parse comment combinations. (I)' \
79 '<root><!-- <!-- -- > --><!--foo--><!----></root>' \
87 'Parse comment combinations. (II).' \
88 '<! -- comment -->s<!-->-->t<!----->u' \
98 'Parse bad comment. (I)' \
104 'Parse bad comment. (II)' \
105 '<!--a--!>bad comment' \
111 'Parse empty notation.' \
117 'Parse an enclosed CDATA section.' \
118 '<root><![CDATA[...] ]>...]]></root>' \
121 #cdata-section: ...] ]>...'
124 'Parse non-enclosed CDATA section.' \
130 'Parse a bad CDATA section.' \
136 'Parse attributes.' \
137 '<root lang="fr" attr name="value with &foo; <stuff"></root>' \
140 attribute: lang -> fr
142 attribute: name -> value with &foo; <stuff'
145 'Parse attributes with garbage.' \
146 "<root a=b c='d' e'f' g= h i = j k =></root>" \
156 'Parse attribute with non-quoted values.' \
157 '<root color=#abc path=/to/%61-&\one";files/>...' \
160 attribute: color -> #abc
161 attribute: path -> /to/%61-&\one";files
165 'Parse entity references.' \
168 entity-reference: amp
170 entity-reference: #42'
172 # Just how these should be gracefully handled is not clear to me.
174 'Parse badly formatted entity references.' \
175 '& m33p;-&.:-copy;-&;-&#;-&#xx;' \
179 entity-reference: .:-copy
185 entity-reference: #xx'
188 'Parse processing instructions.' \
189 '<?xml encoding="UTF8"?>
195 proc-instruction: xml -> encoding="UTF8"
196 attribute: encoding -> UTF8
198 proc-instruction: ecmascript -> var val=2;\n'
201 'Parse XML processing instructions.' \
202 '<?xml version="1.0" />?><?xml />-' \
204 proc-instruction: xml -> version="1.0" />
205 attribute: version -> 1.0
206 proc-instruction: xml -> />-'
209 'Parse XML stylesheet processing instructions.' \
210 '<?xml-stylesheet type="text/xsl" href="url"?>' \
212 proc-instruction: xml-stylesheet -> type="text/xsl" href="url"
213 attribute: type -> text/xsl
214 attribute: href -> url'
217 'Parse exotic processing instructions.' \
218 '<?xml ?+>+?>-?>-<?js?>-<??>-' \
220 proc-instruction: xml -> ?+>+
222 proc-instruction: js ->
228 'Parse incorrect processing instructions. (I)' \
229 '<?js<?>-<?<??>-<?xml <=";&?>-<?' \
231 proc-instruction: js -> <
233 proc-instruction: -> <?
235 proc-instruction: xml -> <=";&
239 'Parse incorrect processing instructions (II).' \
242 proc-instruction: -> ><?'
245 'Skip spaces not inside text.' \
256 attribute: ns:attr -> value
257 proc-instruction: target -> data'