* config/mn10300/mn10300.md (adddi3_degenerate): Remove bogus
[official-gcc.git] / libgo / go / encoding / xml / xml_test.go
blobee4ffa242089753a4e382634181915a82b74f6e7
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 package xml
7 import (
8 "bytes"
9 "fmt"
10 "io"
11 "reflect"
12 "strings"
13 "testing"
14 "unicode/utf8"
17 const testInput = `
18 <?xml version="1.0" encoding="UTF-8"?>
19 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
20 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
21 <body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` +
22 "\r\n\t" + ` >
23 <hello lang="en">World &lt;&gt;&apos;&quot; &#x767d;&#40300;翔</hello>
24 <query>&何; &is-it;</query>
25 <goodbye />
26 <outer foo:attr="value" xmlns:tag="ns4">
27 <inner/>
28 </outer>
29 <tag:name>
30 <![CDATA[Some text here.]]>
31 </tag:name>
32 </body><!-- missing final newline -->`
34 var testEntity = map[string]string{"何": "What", "is-it": "is it?"}
36 var rawTokens = []Token{
37 CharData("\n"),
38 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
39 CharData("\n"),
40 Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
41 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
42 CharData("\n"),
43 StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
44 CharData("\n "),
45 StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
46 CharData("World <>'\" 白鵬翔"),
47 EndElement{Name{"", "hello"}},
48 CharData("\n "),
49 StartElement{Name{"", "query"}, []Attr{}},
50 CharData("What is it?"),
51 EndElement{Name{"", "query"}},
52 CharData("\n "),
53 StartElement{Name{"", "goodbye"}, []Attr{}},
54 EndElement{Name{"", "goodbye"}},
55 CharData("\n "),
56 StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
57 CharData("\n "),
58 StartElement{Name{"", "inner"}, []Attr{}},
59 EndElement{Name{"", "inner"}},
60 CharData("\n "),
61 EndElement{Name{"", "outer"}},
62 CharData("\n "),
63 StartElement{Name{"tag", "name"}, []Attr{}},
64 CharData("\n "),
65 CharData("Some text here."),
66 CharData("\n "),
67 EndElement{Name{"tag", "name"}},
68 CharData("\n"),
69 EndElement{Name{"", "body"}},
70 Comment(" missing final newline "),
73 var cookedTokens = []Token{
74 CharData("\n"),
75 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
76 CharData("\n"),
77 Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
78 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
79 CharData("\n"),
80 StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
81 CharData("\n "),
82 StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
83 CharData("World <>'\" 白鵬翔"),
84 EndElement{Name{"ns2", "hello"}},
85 CharData("\n "),
86 StartElement{Name{"ns2", "query"}, []Attr{}},
87 CharData("What is it?"),
88 EndElement{Name{"ns2", "query"}},
89 CharData("\n "),
90 StartElement{Name{"ns2", "goodbye"}, []Attr{}},
91 EndElement{Name{"ns2", "goodbye"}},
92 CharData("\n "),
93 StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
94 CharData("\n "),
95 StartElement{Name{"ns2", "inner"}, []Attr{}},
96 EndElement{Name{"ns2", "inner"}},
97 CharData("\n "),
98 EndElement{Name{"ns2", "outer"}},
99 CharData("\n "),
100 StartElement{Name{"ns3", "name"}, []Attr{}},
101 CharData("\n "),
102 CharData("Some text here."),
103 CharData("\n "),
104 EndElement{Name{"ns3", "name"}},
105 CharData("\n"),
106 EndElement{Name{"ns2", "body"}},
107 Comment(" missing final newline "),
110 const testInputAltEncoding = `
111 <?xml version="1.0" encoding="x-testing-uppercase"?>
112 <TAG>VALUE</TAG>`
114 var rawTokensAltEncoding = []Token{
115 CharData("\n"),
116 ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)},
117 CharData("\n"),
118 StartElement{Name{"", "tag"}, []Attr{}},
119 CharData("value"),
120 EndElement{Name{"", "tag"}},
123 var xmlInput = []string{
124 // unexpected EOF cases
125 "<",
126 "<t",
127 "<t ",
128 "<t/",
129 "<!",
130 "<!-",
131 "<!--",
132 "<!--c-",
133 "<!--c--",
134 "<!d",
135 "<t></",
136 "<t></t",
137 "<?",
138 "<?p",
139 "<t a",
140 "<t a=",
141 "<t a='",
142 "<t a=''",
143 "<t/><![",
144 "<t/><![C",
145 "<t/><![CDATA[d",
146 "<t/><![CDATA[d]",
147 "<t/><![CDATA[d]]",
149 // other Syntax errors
150 "<>",
151 "<t/a",
152 "<0 />",
153 "<?0 >",
154 // "<!0 >", // let the Token() caller handle
155 "</0>",
156 "<t 0=''>",
157 "<t a='&'>",
158 "<t a='<'>",
159 "<t>&nbspc;</t>",
160 "<t a>",
161 "<t a=>",
162 "<t a=v>",
163 // "<![CDATA[d]]>", // let the Token() caller handle
164 "<t></e>",
165 "<t></>",
166 "<t></t!",
167 "<t>cdata]]></t>",
170 func TestRawToken(t *testing.T) {
171 d := NewDecoder(strings.NewReader(testInput))
172 d.Entity = testEntity
173 testRawToken(t, d, testInput, rawTokens)
176 const nonStrictInput = `
177 <tag>non&entity</tag>
178 <tag>&unknown;entity</tag>
179 <tag>&#123</tag>
180 <tag>&#zzz;</tag>
181 <tag>&なまえ3;</tag>
182 <tag>&lt-gt;</tag>
183 <tag>&;</tag>
184 <tag>&0a;</tag>
187 var nonStrictTokens = []Token{
188 CharData("\n"),
189 StartElement{Name{"", "tag"}, []Attr{}},
190 CharData("non&entity"),
191 EndElement{Name{"", "tag"}},
192 CharData("\n"),
193 StartElement{Name{"", "tag"}, []Attr{}},
194 CharData("&unknown;entity"),
195 EndElement{Name{"", "tag"}},
196 CharData("\n"),
197 StartElement{Name{"", "tag"}, []Attr{}},
198 CharData("&#123"),
199 EndElement{Name{"", "tag"}},
200 CharData("\n"),
201 StartElement{Name{"", "tag"}, []Attr{}},
202 CharData("&#zzz;"),
203 EndElement{Name{"", "tag"}},
204 CharData("\n"),
205 StartElement{Name{"", "tag"}, []Attr{}},
206 CharData("&なまえ3;"),
207 EndElement{Name{"", "tag"}},
208 CharData("\n"),
209 StartElement{Name{"", "tag"}, []Attr{}},
210 CharData("&lt-gt;"),
211 EndElement{Name{"", "tag"}},
212 CharData("\n"),
213 StartElement{Name{"", "tag"}, []Attr{}},
214 CharData("&;"),
215 EndElement{Name{"", "tag"}},
216 CharData("\n"),
217 StartElement{Name{"", "tag"}, []Attr{}},
218 CharData("&0a;"),
219 EndElement{Name{"", "tag"}},
220 CharData("\n"),
223 func TestNonStrictRawToken(t *testing.T) {
224 d := NewDecoder(strings.NewReader(nonStrictInput))
225 d.Strict = false
226 testRawToken(t, d, nonStrictInput, nonStrictTokens)
229 type downCaser struct {
230 t *testing.T
231 r io.ByteReader
234 func (d *downCaser) ReadByte() (c byte, err error) {
235 c, err = d.r.ReadByte()
236 if c >= 'A' && c <= 'Z' {
237 c += 'a' - 'A'
239 return
242 func (d *downCaser) Read(p []byte) (int, error) {
243 d.t.Fatalf("unexpected Read call on downCaser reader")
244 panic("unreachable")
247 func TestRawTokenAltEncoding(t *testing.T) {
248 d := NewDecoder(strings.NewReader(testInputAltEncoding))
249 d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
250 if charset != "x-testing-uppercase" {
251 t.Fatalf("unexpected charset %q", charset)
253 return &downCaser{t, input.(io.ByteReader)}, nil
255 testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding)
258 func TestRawTokenAltEncodingNoConverter(t *testing.T) {
259 d := NewDecoder(strings.NewReader(testInputAltEncoding))
260 token, err := d.RawToken()
261 if token == nil {
262 t.Fatalf("expected a token on first RawToken call")
264 if err != nil {
265 t.Fatal(err)
267 token, err = d.RawToken()
268 if token != nil {
269 t.Errorf("expected a nil token; got %#v", token)
271 if err == nil {
272 t.Fatalf("expected an error on second RawToken call")
274 const encoding = "x-testing-uppercase"
275 if !strings.Contains(err.Error(), encoding) {
276 t.Errorf("expected error to contain %q; got error: %v",
277 encoding, err)
281 func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) {
282 lastEnd := int64(0)
283 for i, want := range rawTokens {
284 start := d.InputOffset()
285 have, err := d.RawToken()
286 end := d.InputOffset()
287 if err != nil {
288 t.Fatalf("token %d: unexpected error: %s", i, err)
290 if !reflect.DeepEqual(have, want) {
291 var shave, swant string
292 if _, ok := have.(CharData); ok {
293 shave = fmt.Sprintf("CharData(%q)", have)
294 } else {
295 shave = fmt.Sprintf("%#v", have)
297 if _, ok := want.(CharData); ok {
298 swant = fmt.Sprintf("CharData(%q)", want)
299 } else {
300 swant = fmt.Sprintf("%#v", want)
302 t.Errorf("token %d = %s, want %s", i, shave, swant)
305 // Check that InputOffset returned actual token.
306 switch {
307 case start < lastEnd:
308 t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have)
309 case start >= end:
310 // Special case: EndElement can be synthesized.
311 if start == end && end == lastEnd {
312 break
314 t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have)
315 case end > int64(len(raw)):
316 t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have)
317 default:
318 text := raw[start:end]
319 if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) {
320 t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have)
323 lastEnd = end
327 // Ensure that directives (specifically !DOCTYPE) include the complete
328 // text of any nested directives, noting that < and > do not change
329 // nesting depth if they are in single or double quotes.
331 var nestedDirectivesInput = `
332 <!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
333 <!DOCTYPE [<!ENTITY xlt ">">]>
334 <!DOCTYPE [<!ENTITY xlt "<">]>
335 <!DOCTYPE [<!ENTITY xlt '>'>]>
336 <!DOCTYPE [<!ENTITY xlt '<'>]>
337 <!DOCTYPE [<!ENTITY xlt '">'>]>
338 <!DOCTYPE [<!ENTITY xlt "'<">]>
341 var nestedDirectivesTokens = []Token{
342 CharData("\n"),
343 Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
344 CharData("\n"),
345 Directive(`DOCTYPE [<!ENTITY xlt ">">]`),
346 CharData("\n"),
347 Directive(`DOCTYPE [<!ENTITY xlt "<">]`),
348 CharData("\n"),
349 Directive(`DOCTYPE [<!ENTITY xlt '>'>]`),
350 CharData("\n"),
351 Directive(`DOCTYPE [<!ENTITY xlt '<'>]`),
352 CharData("\n"),
353 Directive(`DOCTYPE [<!ENTITY xlt '">'>]`),
354 CharData("\n"),
355 Directive(`DOCTYPE [<!ENTITY xlt "'<">]`),
356 CharData("\n"),
359 func TestNestedDirectives(t *testing.T) {
360 d := NewDecoder(strings.NewReader(nestedDirectivesInput))
362 for i, want := range nestedDirectivesTokens {
363 have, err := d.Token()
364 if err != nil {
365 t.Fatalf("token %d: unexpected error: %s", i, err)
367 if !reflect.DeepEqual(have, want) {
368 t.Errorf("token %d = %#v want %#v", i, have, want)
373 func TestToken(t *testing.T) {
374 d := NewDecoder(strings.NewReader(testInput))
375 d.Entity = testEntity
377 for i, want := range cookedTokens {
378 have, err := d.Token()
379 if err != nil {
380 t.Fatalf("token %d: unexpected error: %s", i, err)
382 if !reflect.DeepEqual(have, want) {
383 t.Errorf("token %d = %#v want %#v", i, have, want)
388 func TestSyntax(t *testing.T) {
389 for i := range xmlInput {
390 d := NewDecoder(strings.NewReader(xmlInput[i]))
391 var err error
392 for _, err = d.Token(); err == nil; _, err = d.Token() {
394 if _, ok := err.(*SyntaxError); !ok {
395 t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i])
400 type allScalars struct {
401 True1 bool
402 True2 bool
403 False1 bool
404 False2 bool
405 Int int
406 Int8 int8
407 Int16 int16
408 Int32 int32
409 Int64 int64
410 Uint int
411 Uint8 uint8
412 Uint16 uint16
413 Uint32 uint32
414 Uint64 uint64
415 Uintptr uintptr
416 Float32 float32
417 Float64 float64
418 String string
419 PtrString *string
422 var all = allScalars{
423 True1: true,
424 True2: true,
425 False1: false,
426 False2: false,
427 Int: 1,
428 Int8: -2,
429 Int16: 3,
430 Int32: -4,
431 Int64: 5,
432 Uint: 6,
433 Uint8: 7,
434 Uint16: 8,
435 Uint32: 9,
436 Uint64: 10,
437 Uintptr: 11,
438 Float32: 13.0,
439 Float64: 14.0,
440 String: "15",
441 PtrString: &sixteen,
444 var sixteen = "16"
446 const testScalarsInput = `<allscalars>
447 <True1>true</True1>
448 <True2>1</True2>
449 <False1>false</False1>
450 <False2>0</False2>
451 <Int>1</Int>
452 <Int8>-2</Int8>
453 <Int16>3</Int16>
454 <Int32>-4</Int32>
455 <Int64>5</Int64>
456 <Uint>6</Uint>
457 <Uint8>7</Uint8>
458 <Uint16>8</Uint16>
459 <Uint32>9</Uint32>
460 <Uint64>10</Uint64>
461 <Uintptr>11</Uintptr>
462 <Float>12.0</Float>
463 <Float32>13.0</Float32>
464 <Float64>14.0</Float64>
465 <String>15</String>
466 <PtrString>16</PtrString>
467 </allscalars>`
469 func TestAllScalars(t *testing.T) {
470 var a allScalars
471 err := Unmarshal([]byte(testScalarsInput), &a)
473 if err != nil {
474 t.Fatal(err)
476 if !reflect.DeepEqual(a, all) {
477 t.Errorf("have %+v want %+v", a, all)
481 type item struct {
482 FieldA string
485 func TestIssue569(t *testing.T) {
486 data := `<item><FieldA>abcd</FieldA></item>`
487 var i item
488 err := Unmarshal([]byte(data), &i)
490 if err != nil || i.FieldA != "abcd" {
491 t.Fatal("Expecting abcd")
495 func TestUnquotedAttrs(t *testing.T) {
496 data := "<tag attr=azAZ09:-_\t>"
497 d := NewDecoder(strings.NewReader(data))
498 d.Strict = false
499 token, err := d.Token()
500 if _, ok := err.(*SyntaxError); ok {
501 t.Errorf("Unexpected error: %v", err)
503 if token.(StartElement).Name.Local != "tag" {
504 t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
506 attr := token.(StartElement).Attr[0]
507 if attr.Value != "azAZ09:-_" {
508 t.Errorf("Unexpected attribute value: %v", attr.Value)
510 if attr.Name.Local != "attr" {
511 t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
515 func TestValuelessAttrs(t *testing.T) {
516 tests := [][3]string{
517 {"<p nowrap>", "p", "nowrap"},
518 {"<p nowrap >", "p", "nowrap"},
519 {"<input checked/>", "input", "checked"},
520 {"<input checked />", "input", "checked"},
522 for _, test := range tests {
523 d := NewDecoder(strings.NewReader(test[0]))
524 d.Strict = false
525 token, err := d.Token()
526 if _, ok := err.(*SyntaxError); ok {
527 t.Errorf("Unexpected error: %v", err)
529 if token.(StartElement).Name.Local != test[1] {
530 t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
532 attr := token.(StartElement).Attr[0]
533 if attr.Value != test[2] {
534 t.Errorf("Unexpected attribute value: %v", attr.Value)
536 if attr.Name.Local != test[2] {
537 t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
542 func TestCopyTokenCharData(t *testing.T) {
543 data := []byte("same data")
544 var tok1 Token = CharData(data)
545 tok2 := CopyToken(tok1)
546 if !reflect.DeepEqual(tok1, tok2) {
547 t.Error("CopyToken(CharData) != CharData")
549 data[1] = 'o'
550 if reflect.DeepEqual(tok1, tok2) {
551 t.Error("CopyToken(CharData) uses same buffer.")
555 func TestCopyTokenStartElement(t *testing.T) {
556 elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}
557 var tok1 Token = elt
558 tok2 := CopyToken(tok1)
559 if tok1.(StartElement).Attr[0].Value != "en" {
560 t.Error("CopyToken overwrote Attr[0]")
562 if !reflect.DeepEqual(tok1, tok2) {
563 t.Error("CopyToken(StartElement) != StartElement")
565 tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"}
566 if reflect.DeepEqual(tok1, tok2) {
567 t.Error("CopyToken(CharData) uses same buffer.")
571 func TestSyntaxErrorLineNum(t *testing.T) {
572 testInput := "<P>Foo<P>\n\n<P>Bar</>\n"
573 d := NewDecoder(strings.NewReader(testInput))
574 var err error
575 for _, err = d.Token(); err == nil; _, err = d.Token() {
577 synerr, ok := err.(*SyntaxError)
578 if !ok {
579 t.Error("Expected SyntaxError.")
581 if synerr.Line != 3 {
582 t.Error("SyntaxError didn't have correct line number.")
586 func TestTrailingRawToken(t *testing.T) {
587 input := `<FOO></FOO> `
588 d := NewDecoder(strings.NewReader(input))
589 var err error
590 for _, err = d.RawToken(); err == nil; _, err = d.RawToken() {
592 if err != io.EOF {
593 t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err)
597 func TestTrailingToken(t *testing.T) {
598 input := `<FOO></FOO> `
599 d := NewDecoder(strings.NewReader(input))
600 var err error
601 for _, err = d.Token(); err == nil; _, err = d.Token() {
603 if err != io.EOF {
604 t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
608 func TestEntityInsideCDATA(t *testing.T) {
609 input := `<test><![CDATA[ &val=foo ]]></test>`
610 d := NewDecoder(strings.NewReader(input))
611 var err error
612 for _, err = d.Token(); err == nil; _, err = d.Token() {
614 if err != io.EOF {
615 t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
619 var characterTests = []struct {
620 in string
621 err string
623 {"\x12<doc/>", "illegal character code U+0012"},
624 {"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"},
625 {"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"},
626 {"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"},
627 {"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"},
628 {"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"},
629 {"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"},
630 {"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"},
631 {"<doc>&hello;</doc>", "invalid character entity &hello;"},
634 func TestDisallowedCharacters(t *testing.T) {
636 for i, tt := range characterTests {
637 d := NewDecoder(strings.NewReader(tt.in))
638 var err error
640 for err == nil {
641 _, err = d.Token()
643 synerr, ok := err.(*SyntaxError)
644 if !ok {
645 t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err)
647 if synerr.Msg != tt.err {
648 t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg)
653 func TestIsInCharacterRange(t *testing.T) {
654 invalid := []rune{
655 utf8.MaxRune + 1,
656 0xD800, // surrogate min
657 0xDFFF, // surrogate max
660 for _, r := range invalid {
661 if isInCharacterRange(r) {
662 t.Errorf("rune %U considered valid", r)
667 var procInstTests = []struct {
668 input string
669 expect [2]string
671 {`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}},
672 {`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}},
673 {`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}},
674 {`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}},
675 {`encoding="FOO" `, [2]string{"", "FOO"}},
678 func TestProcInstEncoding(t *testing.T) {
679 for _, test := range procInstTests {
680 if got := procInst("version", test.input); got != test.expect[0] {
681 t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0])
683 if got := procInst("encoding", test.input); got != test.expect[1] {
684 t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1])
689 // Ensure that directives with comments include the complete
690 // text of any nested directives.
692 var directivesWithCommentsInput = `
693 <!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
694 <!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]>
695 <!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]>
698 var directivesWithCommentsTokens = []Token{
699 CharData("\n"),
700 Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
701 CharData("\n"),
702 Directive(`DOCTYPE [<!ENTITY go "Golang">]`),
703 CharData("\n"),
704 Directive(`DOCTYPE <!-> <!> [<!ENTITY go "Golang">]`),
705 CharData("\n"),
708 func TestDirectivesWithComments(t *testing.T) {
709 d := NewDecoder(strings.NewReader(directivesWithCommentsInput))
711 for i, want := range directivesWithCommentsTokens {
712 have, err := d.Token()
713 if err != nil {
714 t.Fatalf("token %d: unexpected error: %s", i, err)
716 if !reflect.DeepEqual(have, want) {
717 t.Errorf("token %d = %#v want %#v", i, have, want)
722 // Writer whose Write method always returns an error.
723 type errWriter struct{}
725 func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") }
727 func TestEscapeTextIOErrors(t *testing.T) {
728 expectErr := "unwritable"
729 err := EscapeText(errWriter{}, []byte{'A'})
731 if err == nil || err.Error() != expectErr {
732 t.Errorf("have %v, want %v", err, expectErr)
736 func TestEscapeTextInvalidChar(t *testing.T) {
737 input := []byte("A \x00 terminated string.")
738 expected := "A \uFFFD terminated string."
740 buff := new(bytes.Buffer)
741 if err := EscapeText(buff, input); err != nil {
742 t.Fatalf("have %v, want nil", err)
744 text := buff.String()
746 if text != expected {
747 t.Errorf("have %v, want %v", text, expected)
751 func TestIssue5880(t *testing.T) {
752 type T []byte
753 data, err := Marshal(T{192, 168, 0, 1})
754 if err != nil {
755 t.Errorf("Marshal error: %v", err)
757 if !utf8.Valid(data) {
758 t.Errorf("Marshal generated invalid UTF-8: %x", data)
762 func TestIssue11405(t *testing.T) {
763 testCases := []string{
764 "<root>",
765 "<root><foo>",
766 "<root><foo></foo>",
768 for _, tc := range testCases {
769 d := NewDecoder(strings.NewReader(tc))
770 var err error
771 for {
772 _, err = d.Token()
773 if err != nil {
774 break
777 if _, ok := err.(*SyntaxError); !ok {
778 t.Errorf("%s: Token: Got error %v, want SyntaxError", tc, err)
783 func TestIssue12417(t *testing.T) {
784 testCases := []struct {
785 s string
786 ok bool
788 {`<?xml encoding="UtF-8" version="1.0"?><root/>`, true},
789 {`<?xml encoding="UTF-8" version="1.0"?><root/>`, true},
790 {`<?xml encoding="utf-8" version="1.0"?><root/>`, true},
791 {`<?xml encoding="uuu-9" version="1.0"?><root/>`, false},
793 for _, tc := range testCases {
794 d := NewDecoder(strings.NewReader(tc.s))
795 var err error
796 for {
797 _, err = d.Token()
798 if err != nil {
799 if err == io.EOF {
800 err = nil
802 break
805 if err != nil && tc.ok {
806 t.Errorf("%q: Encoding charset: expected no error, got %s", tc.s, err)
807 continue
809 if err == nil && !tc.ok {
810 t.Errorf("%q: Encoding charset: expected error, got nil", tc.s)
815 func tokenMap(mapping func(t Token) Token) func(TokenReader) TokenReader {
816 return func(src TokenReader) TokenReader {
817 return mapper{
818 t: src,
819 f: mapping,
824 type mapper struct {
825 t TokenReader
826 f func(Token) Token
829 func (m mapper) Token() (Token, error) {
830 tok, err := m.t.Token()
831 if err != nil {
832 return nil, err
834 return m.f(tok), nil
837 func TestNewTokenDecoderIdempotent(t *testing.T) {
838 d := NewDecoder(strings.NewReader(`<br/>`))
839 d2 := NewTokenDecoder(d)
840 if d != d2 {
841 t.Error("NewTokenDecoder did not detect underlying Decoder")
845 func TestWrapDecoder(t *testing.T) {
846 d := NewDecoder(strings.NewReader(`<quote>[Re-enter Clown with a letter, and FABIAN]</quote>`))
847 m := tokenMap(func(t Token) Token {
848 switch tok := t.(type) {
849 case StartElement:
850 if tok.Name.Local == "quote" {
851 tok.Name.Local = "blocking"
852 return tok
854 case EndElement:
855 if tok.Name.Local == "quote" {
856 tok.Name.Local = "blocking"
857 return tok
860 return t
863 d = NewTokenDecoder(m(d))
865 o := struct {
866 XMLName Name `xml:"blocking"`
867 Chardata string `xml:",chardata"`
870 if err := d.Decode(&o); err != nil {
871 t.Fatal("Got unexpected error while decoding:", err)
874 if o.Chardata != "[Re-enter Clown with a letter, and FABIAN]" {
875 t.Fatalf("Got unexpected chardata: `%s`\n", o.Chardata)
879 type tokReader struct{}
881 func (tokReader) Token() (Token, error) {
882 return StartElement{}, nil
885 type Failure struct{}
887 func (Failure) UnmarshalXML(*Decoder, StartElement) error {
888 return nil
891 func TestTokenUnmarshaler(t *testing.T) {
892 defer func() {
893 if r := recover(); r != nil {
894 t.Error("Unexpected panic using custom token unmarshaler")
898 d := NewTokenDecoder(tokReader{})
899 d.Decode(&Failure{})