1 // Copyright 2010 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
23 // readParseTest reads a single test case from r.
24 func readParseTest(r
*bufio
.Reader
) (text
, want
, context
string, err error
) {
25 line
, err
:= r
.ReadSlice('\n')
27 return "", "", "", err
32 if string(line
) != "#data\n" {
33 return "", "", "", fmt
.Errorf(`got %q want "#data\n"`, line
)
36 line
, err
= r
.ReadSlice('\n')
38 return "", "", "", err
43 b
= append(b
, line
...)
46 if strings
.HasSuffix(text
, "\n") {
47 text
= text
[:len(text
)-1]
51 // Skip the error list.
52 if string(line
) != "#errors\n" {
53 return "", "", "", fmt
.Errorf(`got %q want "#errors\n"`, line
)
56 line
, err
= r
.ReadSlice('\n')
58 return "", "", "", err
65 if string(line
) == "#document-fragment\n" {
66 line
, err
= r
.ReadSlice('\n')
68 return "", "", "", err
70 context
= strings
.TrimSpace(string(line
))
71 line
, err
= r
.ReadSlice('\n')
73 return "", "", "", err
77 // Read the dump of what the parse tree should be.
78 if string(line
) != "#document\n" {
79 return "", "", "", fmt
.Errorf(`got %q want "#document\n"`, line
)
83 line
, err
= r
.ReadSlice('\n')
84 if err
!= nil && err
!= io
.EOF
{
85 return "", "", "", err
87 trimmed
:= bytes
.Trim(line
, "| \n")
89 if line
[0] == '|' && trimmed
[0] == '"' {
92 if trimmed
[len(trimmed
)-1] == '"' && !(line
[0] == '|' && len(trimmed
) == 1) {
96 if len(line
) == 0 ||
len(line
) == 1 && line
[0] == '\n' && !inQuote
{
99 b
= append(b
, line
...)
101 return text
, string(b
), context
, nil
104 func dumpIndent(w io
.Writer
, level
int) {
105 io
.WriteString(w
, "| ")
106 for i
:= 0; i
< level
; i
++ {
107 io
.WriteString(w
, " ")
111 type sortedAttributes
[]Attribute
113 func (a sortedAttributes
) Len() int {
117 func (a sortedAttributes
) Less(i
, j
int) bool {
118 if a
[i
].Namespace
!= a
[j
].Namespace
{
119 return a
[i
].Namespace
< a
[j
].Namespace
121 return a
[i
].Key
< a
[j
].Key
124 func (a sortedAttributes
) Swap(i
, j
int) {
125 a
[i
], a
[j
] = a
[j
], a
[i
]
128 func dumpLevel(w io
.Writer
, n
*Node
, level
int) error
{
132 return errors
.New("unexpected ErrorNode")
134 return errors
.New("unexpected DocumentNode")
136 if n
.Namespace
!= "" {
137 fmt
.Fprintf(w
, "<%s %s>", n
.Namespace
, n
.Data
)
139 fmt
.Fprintf(w
, "<%s>", n
.Data
)
141 attr
:= sortedAttributes(n
.Attr
)
143 for _
, a
:= range attr
{
144 io
.WriteString(w
, "\n")
145 dumpIndent(w
, level
+1)
146 if a
.Namespace
!= "" {
147 fmt
.Fprintf(w
, `%s %s="%s"`, a
.Namespace
, a
.Key
, a
.Val
)
149 fmt
.Fprintf(w
, `%s="%s"`, a
.Key
, a
.Val
)
153 fmt
.Fprintf(w
, `"%s"`, n
.Data
)
155 fmt
.Fprintf(w
, "<!-- %s -->", n
.Data
)
157 fmt
.Fprintf(w
, "<!DOCTYPE %s", n
.Data
)
160 for _
, a
:= range n
.Attr
{
168 if p
!= "" || s
!= "" {
169 fmt
.Fprintf(w
, ` "%s"`, p
)
170 fmt
.Fprintf(w
, ` "%s"`, s
)
173 io
.WriteString(w
, ">")
174 case scopeMarkerNode
:
175 return errors
.New("unexpected scopeMarkerNode")
177 return errors
.New("unknown node type")
179 io
.WriteString(w
, "\n")
180 for c
:= n
.FirstChild
; c
!= nil; c
= c
.NextSibling
{
181 if err
:= dumpLevel(w
, c
, level
+1); err
!= nil {
188 func dump(n
*Node
) (string, error
) {
189 if n
== nil || n
.FirstChild
== nil {
193 for c
:= n
.FirstChild
; c
!= nil; c
= c
.NextSibling
{
194 if err
:= dumpLevel(&b
, c
, 0); err
!= nil {
198 return b
.String(), nil
201 const testDataDir
= "testdata/webkit/"
203 func TestParser(t
*testing
.T
) {
204 testFiles
, err
:= filepath
.Glob(testDataDir
+ "*.dat")
208 for _
, tf
:= range testFiles
{
209 f
, err
:= os
.Open(tf
)
214 r
:= bufio
.NewReader(f
)
217 text
, want
, context
, err
:= readParseTest(r
)
225 err
= testParseCase(text
, want
, context
)
228 t
.Errorf("%s test #%d %q, %s", tf
, i
, text
, err
)
234 // testParseCase tests one test case from the test files. If the test does not
235 // pass, it returns an error that explains the failure.
236 // text is the HTML to be parsed, want is a dump of the correct parse tree,
237 // and context is the name of the context node, if any.
238 func testParseCase(text
, want
, context
string) (err error
) {
240 if x
:= recover(); x
!= nil {
241 switch e
:= x
.(type) {
245 err
= fmt
.Errorf("%v", e
)
252 doc
, err
= Parse(strings
.NewReader(text
))
257 contextNode
:= &Node
{
259 DataAtom
: atom
.Lookup([]byte(context
)),
262 nodes
, err
:= ParseFragment(strings
.NewReader(text
), contextNode
)
269 for _
, n
:= range nodes
{
274 if err
:= checkTreeConsistency(doc
); err
!= nil {
278 got
, err
:= dump(doc
)
282 // Compare the parsed tree to the #document section.
284 return fmt
.Errorf("got vs want:\n----\n%s----\n%s----", got
, want
)
287 if renderTestBlacklist
[text
] || context
!= "" {
291 // Check that rendering and re-parsing results in an identical tree.
294 pw
.CloseWithError(Render(pw
, doc
))
296 doc1
, err
:= Parse(pr
)
300 got1
, err
:= dump(doc1
)
305 return fmt
.Errorf("got vs got1:\n----\n%s----\n%s----", got
, got1
)
311 // Some test input result in parse trees are not 'well-formed' despite
312 // following the HTML5 recovery algorithms. Rendering and re-parsing such a
313 // tree will not result in an exact clone of that tree. We blacklist such
314 // inputs from the render test.
315 var renderTestBlacklist
= map[string]bool{
316 // The second <a> will be reparented to the first <table>'s parent. This
317 // results in an <a> whose parent is an <a>, which is not 'well-formed'.
318 `<a><table><td><a><table></table><a></tr><a></table><b>X</b>C<a>Y`: true,
319 // The same thing with a <p>:
320 `<p><table></p>`: true,
321 // More cases of <a> being reparented:
322 `<a href="blah">aba<table><a href="foo">br<tr><td></td></tr>x</table>aoe`: true,
323 `<a><table><a></table><p><a><div><a>`: true,
324 `<a><table><td><a><table></table><a></tr><a></table><a>`: true,
325 // A similar reparenting situation involving <nobr>:
326 `<!DOCTYPE html><body><b><nobr>1<table><nobr></b><i><nobr>2<nobr></i>3`: true,
327 // A <plaintext> element is reparented, putting it before a table.
328 // A <plaintext> element can't have anything after it in HTML.
329 `<table><plaintext><td>`: true,
330 `<!doctype html><table><plaintext></plaintext>`: true,
331 `<!doctype html><table><tbody><plaintext></plaintext>`: true,
332 `<!doctype html><table><tbody><tr><plaintext></plaintext>`: true,
333 // A form inside a table inside a form doesn't work either.
334 `<!doctype html><form><table></form><form></table></form>`: true,
335 // A script that ends at EOF may escape its own closing tag when rendered.
336 `<!doctype html><script><!--<script `: true,
337 `<!doctype html><script><!--<script <`: true,
338 `<!doctype html><script><!--<script <a`: true,
339 `<!doctype html><script><!--<script </`: true,
340 `<!doctype html><script><!--<script </s`: true,
341 `<!doctype html><script><!--<script </script`: true,
342 `<!doctype html><script><!--<script </scripta`: true,
343 `<!doctype html><script><!--<script -`: true,
344 `<!doctype html><script><!--<script -a`: true,
345 `<!doctype html><script><!--<script -<`: true,
346 `<!doctype html><script><!--<script --`: true,
347 `<!doctype html><script><!--<script --a`: true,
348 `<!doctype html><script><!--<script --<`: true,
349 `<script><!--<script `: true,
350 `<script><!--<script <a`: true,
351 `<script><!--<script </script`: true,
352 `<script><!--<script </scripta`: true,
353 `<script><!--<script -`: true,
354 `<script><!--<script -a`: true,
355 `<script><!--<script --`: true,
356 `<script><!--<script --a`: true,
357 `<script><!--<script <`: true,
358 `<script><!--<script </`: true,
359 `<script><!--<script </s`: true,
360 // Reconstructing the active formatting elements results in a <plaintext>
361 // element that contains an <a> element.
362 `<!doctype html><p><a><plaintext>b`: true,
365 func TestNodeConsistency(t
*testing
.T
) {
366 // inconsistentNode is a Node whose DataAtom and Data do not agree.
367 inconsistentNode
:= &Node
{
369 DataAtom
: atom
.Frameset
,
372 _
, err
:= ParseFragment(strings
.NewReader("<p>hello</p>"), inconsistentNode
)
374 t
.Errorf("got nil error, want non-nil")
378 func BenchmarkParser(b
*testing
.B
) {
379 buf
, err
:= ioutil
.ReadFile("testdata/go1.html")
381 b
.Fatalf("could not read testdata/go1.html: %v", err
)
383 b
.SetBytes(int64(len(buf
)))
387 for i
:= 0; i
< b
.N
; i
++ {
388 Parse(bytes
.NewBuffer(buf
))