Merged revisions 195034,195219,195245,195357,195374,195428,195599,195673,195809 via...
[official-gcc.git] / main / libgo / go / exp / html / parse_test.go
blob4896dfb7a0f558695d6954087997978bb77ceeba
1 // Copyright 2010 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 package html
7 import (
8 "bufio"
9 "bytes"
10 "errors"
11 "exp/html/atom"
12 "fmt"
13 "io"
14 "io/ioutil"
15 "os"
16 "path/filepath"
17 "runtime"
18 "sort"
19 "strings"
20 "testing"
23 // readParseTest reads a single test case from r.
24 func readParseTest(r *bufio.Reader) (text, want, context string, err error) {
25 line, err := r.ReadSlice('\n')
26 if err != nil {
27 return "", "", "", err
29 var b []byte
31 // Read the HTML.
32 if string(line) != "#data\n" {
33 return "", "", "", fmt.Errorf(`got %q want "#data\n"`, line)
35 for {
36 line, err = r.ReadSlice('\n')
37 if err != nil {
38 return "", "", "", err
40 if line[0] == '#' {
41 break
43 b = append(b, line...)
45 text = string(b)
46 if strings.HasSuffix(text, "\n") {
47 text = text[:len(text)-1]
49 b = b[:0]
51 // Skip the error list.
52 if string(line) != "#errors\n" {
53 return "", "", "", fmt.Errorf(`got %q want "#errors\n"`, line)
55 for {
56 line, err = r.ReadSlice('\n')
57 if err != nil {
58 return "", "", "", err
60 if line[0] == '#' {
61 break
65 if string(line) == "#document-fragment\n" {
66 line, err = r.ReadSlice('\n')
67 if err != nil {
68 return "", "", "", err
70 context = strings.TrimSpace(string(line))
71 line, err = r.ReadSlice('\n')
72 if err != nil {
73 return "", "", "", err
77 // Read the dump of what the parse tree should be.
78 if string(line) != "#document\n" {
79 return "", "", "", fmt.Errorf(`got %q want "#document\n"`, line)
81 inQuote := false
82 for {
83 line, err = r.ReadSlice('\n')
84 if err != nil && err != io.EOF {
85 return "", "", "", err
87 trimmed := bytes.Trim(line, "| \n")
88 if len(trimmed) > 0 {
89 if line[0] == '|' && trimmed[0] == '"' {
90 inQuote = true
92 if trimmed[len(trimmed)-1] == '"' && !(line[0] == '|' && len(trimmed) == 1) {
93 inQuote = false
96 if len(line) == 0 || len(line) == 1 && line[0] == '\n' && !inQuote {
97 break
99 b = append(b, line...)
101 return text, string(b), context, nil
104 func dumpIndent(w io.Writer, level int) {
105 io.WriteString(w, "| ")
106 for i := 0; i < level; i++ {
107 io.WriteString(w, " ")
111 type sortedAttributes []Attribute
113 func (a sortedAttributes) Len() int {
114 return len(a)
117 func (a sortedAttributes) Less(i, j int) bool {
118 if a[i].Namespace != a[j].Namespace {
119 return a[i].Namespace < a[j].Namespace
121 return a[i].Key < a[j].Key
124 func (a sortedAttributes) Swap(i, j int) {
125 a[i], a[j] = a[j], a[i]
128 func dumpLevel(w io.Writer, n *Node, level int) error {
129 dumpIndent(w, level)
130 switch n.Type {
131 case ErrorNode:
132 return errors.New("unexpected ErrorNode")
133 case DocumentNode:
134 return errors.New("unexpected DocumentNode")
135 case ElementNode:
136 if n.Namespace != "" {
137 fmt.Fprintf(w, "<%s %s>", n.Namespace, n.Data)
138 } else {
139 fmt.Fprintf(w, "<%s>", n.Data)
141 attr := sortedAttributes(n.Attr)
142 sort.Sort(attr)
143 for _, a := range attr {
144 io.WriteString(w, "\n")
145 dumpIndent(w, level+1)
146 if a.Namespace != "" {
147 fmt.Fprintf(w, `%s %s="%s"`, a.Namespace, a.Key, a.Val)
148 } else {
149 fmt.Fprintf(w, `%s="%s"`, a.Key, a.Val)
152 case TextNode:
153 fmt.Fprintf(w, `"%s"`, n.Data)
154 case CommentNode:
155 fmt.Fprintf(w, "<!-- %s -->", n.Data)
156 case DoctypeNode:
157 fmt.Fprintf(w, "<!DOCTYPE %s", n.Data)
158 if n.Attr != nil {
159 var p, s string
160 for _, a := range n.Attr {
161 switch a.Key {
162 case "public":
163 p = a.Val
164 case "system":
165 s = a.Val
168 if p != "" || s != "" {
169 fmt.Fprintf(w, ` "%s"`, p)
170 fmt.Fprintf(w, ` "%s"`, s)
173 io.WriteString(w, ">")
174 case scopeMarkerNode:
175 return errors.New("unexpected scopeMarkerNode")
176 default:
177 return errors.New("unknown node type")
179 io.WriteString(w, "\n")
180 for c := n.FirstChild; c != nil; c = c.NextSibling {
181 if err := dumpLevel(w, c, level+1); err != nil {
182 return err
185 return nil
188 func dump(n *Node) (string, error) {
189 if n == nil || n.FirstChild == nil {
190 return "", nil
192 var b bytes.Buffer
193 for c := n.FirstChild; c != nil; c = c.NextSibling {
194 if err := dumpLevel(&b, c, 0); err != nil {
195 return "", err
198 return b.String(), nil
201 const testDataDir = "testdata/webkit/"
203 func TestParser(t *testing.T) {
204 testFiles, err := filepath.Glob(testDataDir + "*.dat")
205 if err != nil {
206 t.Fatal(err)
208 for _, tf := range testFiles {
209 f, err := os.Open(tf)
210 if err != nil {
211 t.Fatal(err)
213 defer f.Close()
214 r := bufio.NewReader(f)
216 for i := 0; ; i++ {
217 text, want, context, err := readParseTest(r)
218 if err == io.EOF {
219 break
221 if err != nil {
222 t.Fatal(err)
225 err = testParseCase(text, want, context)
227 if err != nil {
228 t.Errorf("%s test #%d %q, %s", tf, i, text, err)
234 // testParseCase tests one test case from the test files. If the test does not
235 // pass, it returns an error that explains the failure.
236 // text is the HTML to be parsed, want is a dump of the correct parse tree,
237 // and context is the name of the context node, if any.
238 func testParseCase(text, want, context string) (err error) {
239 defer func() {
240 if x := recover(); x != nil {
241 switch e := x.(type) {
242 case error:
243 err = e
244 default:
245 err = fmt.Errorf("%v", e)
250 var doc *Node
251 if context == "" {
252 doc, err = Parse(strings.NewReader(text))
253 if err != nil {
254 return err
256 } else {
257 contextNode := &Node{
258 Type: ElementNode,
259 DataAtom: atom.Lookup([]byte(context)),
260 Data: context,
262 nodes, err := ParseFragment(strings.NewReader(text), contextNode)
263 if err != nil {
264 return err
266 doc = &Node{
267 Type: DocumentNode,
269 for _, n := range nodes {
270 doc.AppendChild(n)
274 if err := checkTreeConsistency(doc); err != nil {
275 return err
278 got, err := dump(doc)
279 if err != nil {
280 return err
282 // Compare the parsed tree to the #document section.
283 if got != want {
284 return fmt.Errorf("got vs want:\n----\n%s----\n%s----", got, want)
287 if renderTestBlacklist[text] || context != "" {
288 return nil
291 // Check that rendering and re-parsing results in an identical tree.
292 pr, pw := io.Pipe()
293 go func() {
294 pw.CloseWithError(Render(pw, doc))
296 doc1, err := Parse(pr)
297 if err != nil {
298 return err
300 got1, err := dump(doc1)
301 if err != nil {
302 return err
304 if got != got1 {
305 return fmt.Errorf("got vs got1:\n----\n%s----\n%s----", got, got1)
308 return nil
311 // Some test input result in parse trees are not 'well-formed' despite
312 // following the HTML5 recovery algorithms. Rendering and re-parsing such a
313 // tree will not result in an exact clone of that tree. We blacklist such
314 // inputs from the render test.
315 var renderTestBlacklist = map[string]bool{
316 // The second <a> will be reparented to the first <table>'s parent. This
317 // results in an <a> whose parent is an <a>, which is not 'well-formed'.
318 `<a><table><td><a><table></table><a></tr><a></table><b>X</b>C<a>Y`: true,
319 // The same thing with a <p>:
320 `<p><table></p>`: true,
321 // More cases of <a> being reparented:
322 `<a href="blah">aba<table><a href="foo">br<tr><td></td></tr>x</table>aoe`: true,
323 `<a><table><a></table><p><a><div><a>`: true,
324 `<a><table><td><a><table></table><a></tr><a></table><a>`: true,
325 // A similar reparenting situation involving <nobr>:
326 `<!DOCTYPE html><body><b><nobr>1<table><nobr></b><i><nobr>2<nobr></i>3`: true,
327 // A <plaintext> element is reparented, putting it before a table.
328 // A <plaintext> element can't have anything after it in HTML.
329 `<table><plaintext><td>`: true,
330 `<!doctype html><table><plaintext></plaintext>`: true,
331 `<!doctype html><table><tbody><plaintext></plaintext>`: true,
332 `<!doctype html><table><tbody><tr><plaintext></plaintext>`: true,
333 // A form inside a table inside a form doesn't work either.
334 `<!doctype html><form><table></form><form></table></form>`: true,
335 // A script that ends at EOF may escape its own closing tag when rendered.
336 `<!doctype html><script><!--<script `: true,
337 `<!doctype html><script><!--<script <`: true,
338 `<!doctype html><script><!--<script <a`: true,
339 `<!doctype html><script><!--<script </`: true,
340 `<!doctype html><script><!--<script </s`: true,
341 `<!doctype html><script><!--<script </script`: true,
342 `<!doctype html><script><!--<script </scripta`: true,
343 `<!doctype html><script><!--<script -`: true,
344 `<!doctype html><script><!--<script -a`: true,
345 `<!doctype html><script><!--<script -<`: true,
346 `<!doctype html><script><!--<script --`: true,
347 `<!doctype html><script><!--<script --a`: true,
348 `<!doctype html><script><!--<script --<`: true,
349 `<script><!--<script `: true,
350 `<script><!--<script <a`: true,
351 `<script><!--<script </script`: true,
352 `<script><!--<script </scripta`: true,
353 `<script><!--<script -`: true,
354 `<script><!--<script -a`: true,
355 `<script><!--<script --`: true,
356 `<script><!--<script --a`: true,
357 `<script><!--<script <`: true,
358 `<script><!--<script </`: true,
359 `<script><!--<script </s`: true,
360 // Reconstructing the active formatting elements results in a <plaintext>
361 // element that contains an <a> element.
362 `<!doctype html><p><a><plaintext>b`: true,
365 func TestNodeConsistency(t *testing.T) {
366 // inconsistentNode is a Node whose DataAtom and Data do not agree.
367 inconsistentNode := &Node{
368 Type: ElementNode,
369 DataAtom: atom.Frameset,
370 Data: "table",
372 _, err := ParseFragment(strings.NewReader("<p>hello</p>"), inconsistentNode)
373 if err == nil {
374 t.Errorf("got nil error, want non-nil")
378 func BenchmarkParser(b *testing.B) {
379 buf, err := ioutil.ReadFile("testdata/go1.html")
380 if err != nil {
381 b.Fatalf("could not read testdata/go1.html: %v", err)
383 b.SetBytes(int64(len(buf)))
384 runtime.GC()
385 b.ReportAllocs()
386 b.ResetTimer()
387 for i := 0; i < b.N; i++ {
388 Parse(bytes.NewBuffer(buf))