Merge from mainline (167278:168000).
[official-gcc/graphite-test-results.git] / libgo / go / html / token_test.go
blob5759476eab433313516c510a4b2fa4bfc64b6381
1 // Copyright 2010 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 package html
7 import (
8 "bytes"
9 "os"
10 "testing"
13 type tokenTest struct {
14 // A short description of the test case.
15 desc string
16 // The HTML to parse.
17 html string
18 // The string representations of the expected tokens.
19 tokens []string
22 var tokenTests = []tokenTest{
23 // A single text node. The tokenizer should not break text nodes on whitespace,
24 // nor should it normalize whitespace within a text node.
26 "text",
27 "foo bar",
28 []string{
29 "foo bar",
32 // An entity.
34 "entity",
35 "one < two",
36 []string{
37 "one < two",
40 // A start, self-closing and end tag. The tokenizer does not care if the start
41 // and end tokens don't match; that is the job of the parser.
43 "tags",
44 "<a>b<c/>d</e>",
45 []string{
46 "<a>",
47 "b",
48 "<c/>",
49 "d",
50 "</e>",
53 // An attribute with a backslash.
55 "backslash",
56 `<p id="a\"b">`,
57 []string{
58 `<p id="a&quot;b">`,
61 // Entities, tag name and attribute key lower-casing, and whitespace
62 // normalization within a tag.
64 "tricky",
65 "<p \t\n iD=\"a&quot;B\" foo=\"bar\"><EM>te&lt;&amp;;xt</em></p>",
66 []string{
67 `<p id="a&quot;B" foo="bar">`,
68 "<em>",
69 "te&lt;&amp;;xt",
70 "</em>",
71 "</p>",
74 // A non-existant entity. Tokenizing and converting back to a string should
75 // escape the "&" to become "&amp;".
77 "noSuchEntity",
78 `<a b="c&noSuchEntity;d">&lt;&alsoDoesntExist;&`,
79 []string{
80 `<a b="c&amp;noSuchEntity;d">`,
81 "&lt;&amp;alsoDoesntExist;&amp;",
86 func TestTokenizer(t *testing.T) {
87 loop:
88 for _, tt := range tokenTests {
89 z := NewTokenizer(bytes.NewBuffer([]byte(tt.html)))
90 for i, s := range tt.tokens {
91 if z.Next() == Error {
92 t.Errorf("%s token %d: want %q got error %v", tt.desc, i, s, z.Error())
93 continue loop
95 actual := z.Token().String()
96 if s != actual {
97 t.Errorf("%s token %d: want %q got %q", tt.desc, i, s, actual)
98 continue loop
101 z.Next()
102 if z.Error() != os.EOF {
103 t.Errorf("%s: want EOF got %q", tt.desc, z.Token().String())
108 func TestUnescapeEscape(t *testing.T) {
109 ss := []string{
111 `abc def`,
112 `a & b`,
113 `a&amp;b`,
114 `a &amp b`,
115 `&quot;`,
116 `"`,
117 `"<&>"`,
118 `&quot;&lt;&amp;&gt;&quot;`,
119 `3&5==1 && 0<1, "0&lt;1", a+acute=&aacute;`,
121 for _, s := range ss {
122 if s != UnescapeString(EscapeString(s)) {
123 t.Errorf("s != UnescapeString(EscapeString(s)), s=%q", s)
128 func TestBufAPI(t *testing.T) {
129 s := "0<a>1</a>2<b>3<a>4<a>5</a>6</b>7</a>8<a/>9"
130 z := NewTokenizer(bytes.NewBuffer([]byte(s)))
131 result := bytes.NewBuffer(nil)
132 depth := 0
133 loop:
134 for {
135 tt := z.Next()
136 switch tt {
137 case Error:
138 if z.Error() != os.EOF {
139 t.Error(z.Error())
141 break loop
142 case Text:
143 if depth > 0 {
144 result.Write(z.Text())
146 case StartTag, EndTag:
147 tn, _ := z.TagName()
148 if len(tn) == 1 && tn[0] == 'a' {
149 if tt == StartTag {
150 depth++
151 } else {
152 depth--
157 u := "14567"
158 v := string(result.Bytes())
159 if u != v {
160 t.Errorf("TestBufAPI: want %q got %q", u, v)