src/dom/test/test-sgml-parser-incremental

   1 #!/bin/sh
   2 #
   3 # Copyright (c) 2005 Jonas Fonseca
   4 #
   5
   6 test_description='Test incremental parsing of SGML documents.
   7
   8 This test checks if the SGML parser correctly recovers during incremental
   9 parsing.
  10 '
  11
  12 . "$TEST_LIB"
  13
  14 test_incremental_parsing () {
  15         desc="$1"; shift
  16         src="$1"; shift
  17         out="$1"; shift
  18
  19         URI="test:$(normalize "$desc")"
  20
  21         echo "#document: $URI" > expected
  22         printf "%s\n" "$out" | sed -n '2,$p' | sed -e 's/^/  /' >> expected
  23
  24         for size in 1 2 3 4 5 6 7 8 9 10 15 20 25 50; do
  25                 printf "%s" "$src" | sgml-parser --uri "$URI" --stdin "$size" > output
  26
  27                 test_run_ 'cmp output expected'
  28                 if [ "$?" != 0 -o "$eval_ret" != 0 ]
  29                 then
  30                         test_failure_ "$desc" "($size bytes)"
  31                         return
  32                 fi
  33         done
  34
  35         test_ok_ "$desc"
  36 }
  37
  38 test_incremental_parsing \
  39 "Parse a small document." \
  40 '<html><body><p>Hello World!</p></body></html>' \
  41 '
  42 element: html
  43   element: body
  44     element: p
  45       #text: Hello World!'
  46
  47 test_incremental_parsing \
  48 'Parse elements.' \
  49 '<root><child attr="value" /><child2></><child3 >a</></root>' \
  50 '
  51 element: root
  52   element: child
  53     attribute: attr -> value
  54   element: child2
  55   element: child3
  56     #text: a'
  57
  58 test_incremental_parsing \
  59 'Parse tag soup elements.' \
  60 '<parent attr="value" <child:1></><child:2</>a</parent>' \
  61 '
  62 element: parent
  63   attribute: attr -> value
  64   element: child:1
  65   element: child:2
  66   #text: a'
  67
  68 test_incremental_parsing \
  69 'Parse an enclosed comment.' \
  70 '<root><!-- Hello World! --></root>' \
  71 '
  72 element: root
  73   #comment:  Hello World! '
  74
  75 test_incremental_parsing \
  76 'Parse comment combinations. (I)' \
  77 '<root><!-- <!-- -- > --><!--foo--><!----></root>' \
  78 '
  79 element: root
  80   #comment:  <!-- -- >
  81   #comment: foo
  82   #comment: '
  83
  84 test_incremental_parsing \
  85 'Parse comment combinations. (II).' \
  86 '<! -- comment -->s<!-->-->t<!----->u' \
  87 '
  88 #comment:  comment
  89 #text: s
  90 #comment: >
  91 #text: t
  92 #comment: -
  93 #text: u'
  94
  95 test_incremental_parsing \
  96 'Parse bad comment. (I)' \
  97 '<!--->s' \
  98 '
  99 #comment: ->s'
 100
 101 test_incremental_parsing \
 102 'Parse bad comment. (II)' \
 103 '<!--a--!>bad comment' \
 104 '
 105 #comment: a
 106 #text: bad comment'
 107
 108 test_incremental_parsing \
 109 'Parse empty notation.' \
 110 '<!>s' \
 111 '
 112 #text: s'
 113
 114 test_incremental_parsing \
 115 'Parse an enclosed CDATA section.' \
 116 '<root><![CDATA[...] ]>...]]></root>' \
 117 '
 118 element: root
 119   #cdata-section: ...] ]>...'
 120
 121 test_incremental_parsing \
 122 'Parse non-enclosed CDATA section.' \
 123 '<![CDATA[...]]>' \
 124 '
 125 #cdata-section: ...'
 126
 127 test_incremental_parsing \
 128 'Parse a bad CDATA section.' \
 129 '<![CDATA[...' \
 130 '
 131 #cdata-section: ...'
 132
 133 test_incremental_parsing \
 134 'Parse attributes.' \
 135 '<root lang="fr" attr name="value with &foo; <stuff"></root>' \
 136 '
 137 element: root
 138   attribute: lang -> fr
 139   attribute: attr ->
 140   attribute: name -> value with &foo; <stuff'
 141
 142 test_incremental_parsing \
 143 'Parse attributes with garbage.' \
 144 "<root a=b c='d' e'f' g= h i = j k =></root>" \
 145 '
 146 element: root
 147   attribute: a -> b
 148   attribute: c -> d
 149   attribute: g -> h
 150   attribute: i -> j
 151   attribute: k -> '
 152
 153 test_incremental_parsing \
 154 'Parse attribute with non-quoted values.' \
 155 '<root color=#abc path=/to/%61-&\one";files/>...' \
 156 '
 157 element: root
 158   attribute: color -> #abc
 159   attribute: path -> /to/%61-&\one";files
 160 #text: ...'
 161
 162 test_incremental_parsing \
 163 'Parse entity references.' \
 164 '&amp;-&#42;' \
 165 '
 166 entity-reference: amp
 167 #text: -
 168 entity-reference: #42'
 169
 170 # Just how these should be gracefully handled is not clear to me.
 171 test_incremental_parsing \
 172 'Parse badly formatted entity references.' \
 173 '& m33p;-&.:-copy;-&;-&#;-&#xx;' \
 174 '
 175 #text: & m33p;
 176 #text: -
 177 entity-reference: .:-copy
 178 #text: -
 179 #text: &;
 180 #text: -
 181 entity-reference: #
 182 #text: -
 183 entity-reference: #xx'
 184
 185 test_incremental_parsing \
 186 'Parse processing instructions.' \
 187 '<?xml encoding="UTF8"?>
 188 ...
 189 <?ecmascript
 190 var val=2;
 191 ?>' \
 192 '
 193 proc-instruction: xml -> encoding="UTF8"
 194   attribute: encoding -> UTF8
 195 #text: \n...\n
 196 proc-instruction: ecmascript -> var val=2;\n'
 197
 198 test_incremental_parsing \
 199 'Parse XML processing instructions.' \
 200 '<?xml version="1.0" />?><?xml />-' \
 201 '
 202 proc-instruction: xml -> version="1.0" />
 203   attribute: version -> 1.0
 204 proc-instruction: xml -> />-'
 205
 206 test_incremental_parsing \
 207 'Parse XML stylesheet processing instructions.' \
 208 '<?xml-stylesheet type="text/xsl" href="url"?>' \
 209 '
 210 proc-instruction: xml-stylesheet -> type="text/xsl" href="url"
 211   attribute: type -> text/xsl
 212   attribute: href -> url'
 213
 214 test_incremental_parsing \
 215 'Parse exotic processing instructions.' \
 216 '<?xml ?+>+?>-?>-<?js?>-<??>-' \
 217 '
 218 proc-instruction: xml -> ?+>+
 219 #text: -?>-
 220 proc-instruction: js ->
 221 #text: -
 222 proc-instruction:  ->
 223 #text: -'
 224
 225 test_incremental_parsing \
 226 'Parse incorrect processing instructions.' \
 227 '<?js<?>-<?<??>-<?xml <=";&?>-<?' \
 228 '
 229 proc-instruction: js -> <
 230 #text: -
 231 proc-instruction:  -> <?
 232 #text: -
 233 proc-instruction: xml -> <=";&
 234 #text: -'
 235
 236 test_incremental_parsing \
 237 'Parse incorrect processing instructions (II).' \
 238 '<?><?' \
 239 '
 240 proc-instruction:  -> ><?'
 241
 242 test_incremental_parsing \
 243 'Skip spaces not inside text.' \
 244 '<
 245 root
 246 ns:attr
 247 =
 248 "value"
 249 ><?
 250         target
 251  data?><        /       root    >' \
 252 '
 253 element: root
 254   attribute: ns:attr -> value
 255   proc-instruction: target -> data'
 256
 257
 258 test_done