PR c++/86342 - -Wdeprecated-copy and system headers.
[official-gcc.git] / libgo / go / html / template / transition.go
blobc72cf1ea60ef0a50f83802bdad161dbe826ba741
1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 package template
7 import (
8 "bytes"
9 "strings"
12 // transitionFunc is the array of context transition functions for text nodes.
13 // A transition function takes a context and template text input, and returns
14 // the updated context and the number of bytes consumed from the front of the
15 // input.
16 var transitionFunc = [...]func(context, []byte) (context, int){
17 stateText: tText,
18 stateTag: tTag,
19 stateAttrName: tAttrName,
20 stateAfterName: tAfterName,
21 stateBeforeValue: tBeforeValue,
22 stateHTMLCmt: tHTMLCmt,
23 stateRCDATA: tSpecialTagEnd,
24 stateAttr: tAttr,
25 stateURL: tURL,
26 stateSrcset: tURL,
27 stateJS: tJS,
28 stateJSDqStr: tJSDelimited,
29 stateJSSqStr: tJSDelimited,
30 stateJSRegexp: tJSDelimited,
31 stateJSBlockCmt: tBlockCmt,
32 stateJSLineCmt: tLineCmt,
33 stateCSS: tCSS,
34 stateCSSDqStr: tCSSStr,
35 stateCSSSqStr: tCSSStr,
36 stateCSSDqURL: tCSSStr,
37 stateCSSSqURL: tCSSStr,
38 stateCSSURL: tCSSStr,
39 stateCSSBlockCmt: tBlockCmt,
40 stateCSSLineCmt: tLineCmt,
41 stateError: tError,
44 var commentStart = []byte("<!--")
45 var commentEnd = []byte("-->")
47 // tText is the context transition function for the text state.
48 func tText(c context, s []byte) (context, int) {
49 k := 0
50 for {
51 i := k + bytes.IndexByte(s[k:], '<')
52 if i < k || i+1 == len(s) {
53 return c, len(s)
54 } else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
55 return context{state: stateHTMLCmt}, i + 4
57 i++
58 end := false
59 if s[i] == '/' {
60 if i+1 == len(s) {
61 return c, len(s)
63 end, i = true, i+1
65 j, e := eatTagName(s, i)
66 if j != i {
67 if end {
68 e = elementNone
70 // We've found an HTML tag.
71 return context{state: stateTag, element: e}, j
73 k = j
77 var elementContentType = [...]state{
78 elementNone: stateText,
79 elementScript: stateJS,
80 elementStyle: stateCSS,
81 elementTextarea: stateRCDATA,
82 elementTitle: stateRCDATA,
85 // tTag is the context transition function for the tag state.
86 func tTag(c context, s []byte) (context, int) {
87 // Find the attribute name.
88 i := eatWhiteSpace(s, 0)
89 if i == len(s) {
90 return c, len(s)
92 if s[i] == '>' {
93 return context{
94 state: elementContentType[c.element],
95 element: c.element,
96 }, i + 1
98 j, err := eatAttrName(s, i)
99 if err != nil {
100 return context{state: stateError, err: err}, len(s)
102 state, attr := stateTag, attrNone
103 if i == j {
104 return context{
105 state: stateError,
106 err: errorf(ErrBadHTML, nil, 0, "expected space, attr name, or end of tag, but got %q", s[i:]),
107 }, len(s)
110 attrName := strings.ToLower(string(s[i:j]))
111 if c.element == elementScript && attrName == "type" {
112 attr = attrScriptType
113 } else {
114 switch attrType(attrName) {
115 case contentTypeURL:
116 attr = attrURL
117 case contentTypeCSS:
118 attr = attrStyle
119 case contentTypeJS:
120 attr = attrScript
121 case contentTypeSrcset:
122 attr = attrSrcset
126 if j == len(s) {
127 state = stateAttrName
128 } else {
129 state = stateAfterName
131 return context{state: state, element: c.element, attr: attr}, j
134 // tAttrName is the context transition function for stateAttrName.
135 func tAttrName(c context, s []byte) (context, int) {
136 i, err := eatAttrName(s, 0)
137 if err != nil {
138 return context{state: stateError, err: err}, len(s)
139 } else if i != len(s) {
140 c.state = stateAfterName
142 return c, i
145 // tAfterName is the context transition function for stateAfterName.
146 func tAfterName(c context, s []byte) (context, int) {
147 // Look for the start of the value.
148 i := eatWhiteSpace(s, 0)
149 if i == len(s) {
150 return c, len(s)
151 } else if s[i] != '=' {
152 // Occurs due to tag ending '>', and valueless attribute.
153 c.state = stateTag
154 return c, i
156 c.state = stateBeforeValue
157 // Consume the "=".
158 return c, i + 1
161 var attrStartStates = [...]state{
162 attrNone: stateAttr,
163 attrScript: stateJS,
164 attrScriptType: stateAttr,
165 attrStyle: stateCSS,
166 attrURL: stateURL,
167 attrSrcset: stateSrcset,
170 // tBeforeValue is the context transition function for stateBeforeValue.
171 func tBeforeValue(c context, s []byte) (context, int) {
172 i := eatWhiteSpace(s, 0)
173 if i == len(s) {
174 return c, len(s)
176 // Find the attribute delimiter.
177 delim := delimSpaceOrTagEnd
178 switch s[i] {
179 case '\'':
180 delim, i = delimSingleQuote, i+1
181 case '"':
182 delim, i = delimDoubleQuote, i+1
184 c.state, c.delim = attrStartStates[c.attr], delim
185 return c, i
188 // tHTMLCmt is the context transition function for stateHTMLCmt.
189 func tHTMLCmt(c context, s []byte) (context, int) {
190 if i := bytes.Index(s, commentEnd); i != -1 {
191 return context{}, i + 3
193 return c, len(s)
196 // specialTagEndMarkers maps element types to the character sequence that
197 // case-insensitively signals the end of the special tag body.
198 var specialTagEndMarkers = [...][]byte{
199 elementScript: []byte("script"),
200 elementStyle: []byte("style"),
201 elementTextarea: []byte("textarea"),
202 elementTitle: []byte("title"),
205 var (
206 specialTagEndPrefix = []byte("</")
207 tagEndSeparators = []byte("> \t\n\f/")
210 // tSpecialTagEnd is the context transition function for raw text and RCDATA
211 // element states.
212 func tSpecialTagEnd(c context, s []byte) (context, int) {
213 if c.element != elementNone {
214 if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 {
215 return context{}, i
218 return c, len(s)
221 // indexTagEnd finds the index of a special tag end in a case insensitive way, or returns -1
222 func indexTagEnd(s []byte, tag []byte) int {
223 res := 0
224 plen := len(specialTagEndPrefix)
225 for len(s) > 0 {
226 // Try to find the tag end prefix first
227 i := bytes.Index(s, specialTagEndPrefix)
228 if i == -1 {
229 return i
231 s = s[i+plen:]
232 // Try to match the actual tag if there is still space for it
233 if len(tag) <= len(s) && bytes.EqualFold(tag, s[:len(tag)]) {
234 s = s[len(tag):]
235 // Check the tag is followed by a proper separator
236 if len(s) > 0 && bytes.IndexByte(tagEndSeparators, s[0]) != -1 {
237 return res + i
239 res += len(tag)
241 res += i + plen
243 return -1
246 // tAttr is the context transition function for the attribute state.
247 func tAttr(c context, s []byte) (context, int) {
248 return c, len(s)
251 // tURL is the context transition function for the URL state.
252 func tURL(c context, s []byte) (context, int) {
253 if bytes.ContainsAny(s, "#?") {
254 c.urlPart = urlPartQueryOrFrag
255 } else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone {
256 // HTML5 uses "Valid URL potentially surrounded by spaces" for
257 // attrs: http://www.w3.org/TR/html5/index.html#attributes-1
258 c.urlPart = urlPartPreQuery
260 return c, len(s)
263 // tJS is the context transition function for the JS state.
264 func tJS(c context, s []byte) (context, int) {
265 i := bytes.IndexAny(s, `"'/`)
266 if i == -1 {
267 // Entire input is non string, comment, regexp tokens.
268 c.jsCtx = nextJSCtx(s, c.jsCtx)
269 return c, len(s)
271 c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
272 switch s[i] {
273 case '"':
274 c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
275 case '\'':
276 c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
277 case '/':
278 switch {
279 case i+1 < len(s) && s[i+1] == '/':
280 c.state, i = stateJSLineCmt, i+1
281 case i+1 < len(s) && s[i+1] == '*':
282 c.state, i = stateJSBlockCmt, i+1
283 case c.jsCtx == jsCtxRegexp:
284 c.state = stateJSRegexp
285 case c.jsCtx == jsCtxDivOp:
286 c.jsCtx = jsCtxRegexp
287 default:
288 return context{
289 state: stateError,
290 err: errorf(ErrSlashAmbig, nil, 0, "'/' could start a division or regexp: %.32q", s[i:]),
291 }, len(s)
293 default:
294 panic("unreachable")
296 return c, i + 1
299 // tJSDelimited is the context transition function for the JS string and regexp
300 // states.
301 func tJSDelimited(c context, s []byte) (context, int) {
302 specials := `\"`
303 switch c.state {
304 case stateJSSqStr:
305 specials = `\'`
306 case stateJSRegexp:
307 specials = `\/[]`
310 k, inCharset := 0, false
311 for {
312 i := k + bytes.IndexAny(s[k:], specials)
313 if i < k {
314 break
316 switch s[i] {
317 case '\\':
319 if i == len(s) {
320 return context{
321 state: stateError,
322 err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s),
323 }, len(s)
325 case '[':
326 inCharset = true
327 case ']':
328 inCharset = false
329 default:
330 // end delimiter
331 if !inCharset {
332 c.state, c.jsCtx = stateJS, jsCtxDivOp
333 return c, i + 1
336 k = i + 1
339 if inCharset {
340 // This can be fixed by making context richer if interpolation
341 // into charsets is desired.
342 return context{
343 state: stateError,
344 err: errorf(ErrPartialCharset, nil, 0, "unfinished JS regexp charset: %q", s),
345 }, len(s)
348 return c, len(s)
351 var blockCommentEnd = []byte("*/")
353 // tBlockCmt is the context transition function for /*comment*/ states.
354 func tBlockCmt(c context, s []byte) (context, int) {
355 i := bytes.Index(s, blockCommentEnd)
356 if i == -1 {
357 return c, len(s)
359 switch c.state {
360 case stateJSBlockCmt:
361 c.state = stateJS
362 case stateCSSBlockCmt:
363 c.state = stateCSS
364 default:
365 panic(c.state.String())
367 return c, i + 2
370 // tLineCmt is the context transition function for //comment states.
371 func tLineCmt(c context, s []byte) (context, int) {
372 var lineTerminators string
373 var endState state
374 switch c.state {
375 case stateJSLineCmt:
376 lineTerminators, endState = "\n\r\u2028\u2029", stateJS
377 case stateCSSLineCmt:
378 lineTerminators, endState = "\n\f\r", stateCSS
379 // Line comments are not part of any published CSS standard but
380 // are supported by the 4 major browsers.
381 // This defines line comments as
382 // LINECOMMENT ::= "//" [^\n\f\d]*
383 // since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
384 // newlines:
385 // nl ::= #xA | #xD #xA | #xD | #xC
386 default:
387 panic(c.state.String())
390 i := bytes.IndexAny(s, lineTerminators)
391 if i == -1 {
392 return c, len(s)
394 c.state = endState
395 // Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4
396 // "However, the LineTerminator at the end of the line is not
397 // considered to be part of the single-line comment; it is
398 // recognized separately by the lexical grammar and becomes part
399 // of the stream of input elements for the syntactic grammar."
400 return c, i
403 // tCSS is the context transition function for the CSS state.
404 func tCSS(c context, s []byte) (context, int) {
405 // CSS quoted strings are almost never used except for:
406 // (1) URLs as in background: "/foo.png"
407 // (2) Multiword font-names as in font-family: "Times New Roman"
408 // (3) List separators in content values as in inline-lists:
409 // <style>
410 // ul.inlineList { list-style: none; padding:0 }
411 // ul.inlineList > li { display: inline }
412 // ul.inlineList > li:before { content: ", " }
413 // ul.inlineList > li:first-child:before { content: "" }
414 // </style>
415 // <ul class=inlineList><li>One<li>Two<li>Three</ul>
416 // (4) Attribute value selectors as in a[href="http://example.com/"]
418 // We conservatively treat all strings as URLs, but make some
419 // allowances to avoid confusion.
421 // In (1), our conservative assumption is justified.
422 // In (2), valid font names do not contain ':', '?', or '#', so our
423 // conservative assumption is fine since we will never transition past
424 // urlPartPreQuery.
425 // In (3), our protocol heuristic should not be tripped, and there
426 // should not be non-space content after a '?' or '#', so as long as
427 // we only %-encode RFC 3986 reserved characters we are ok.
428 // In (4), we should URL escape for URL attributes, and for others we
429 // have the attribute name available if our conservative assumption
430 // proves problematic for real code.
432 k := 0
433 for {
434 i := k + bytes.IndexAny(s[k:], `("'/`)
435 if i < k {
436 return c, len(s)
438 switch s[i] {
439 case '(':
440 // Look for url to the left.
441 p := bytes.TrimRight(s[:i], "\t\n\f\r ")
442 if endsWithCSSKeyword(p, "url") {
443 j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r "))
444 switch {
445 case j != len(s) && s[j] == '"':
446 c.state, j = stateCSSDqURL, j+1
447 case j != len(s) && s[j] == '\'':
448 c.state, j = stateCSSSqURL, j+1
449 default:
450 c.state = stateCSSURL
452 return c, j
454 case '/':
455 if i+1 < len(s) {
456 switch s[i+1] {
457 case '/':
458 c.state = stateCSSLineCmt
459 return c, i + 2
460 case '*':
461 c.state = stateCSSBlockCmt
462 return c, i + 2
465 case '"':
466 c.state = stateCSSDqStr
467 return c, i + 1
468 case '\'':
469 c.state = stateCSSSqStr
470 return c, i + 1
472 k = i + 1
476 // tCSSStr is the context transition function for the CSS string and URL states.
477 func tCSSStr(c context, s []byte) (context, int) {
478 var endAndEsc string
479 switch c.state {
480 case stateCSSDqStr, stateCSSDqURL:
481 endAndEsc = `\"`
482 case stateCSSSqStr, stateCSSSqURL:
483 endAndEsc = `\'`
484 case stateCSSURL:
485 // Unquoted URLs end with a newline or close parenthesis.
486 // The below includes the wc (whitespace character) and nl.
487 endAndEsc = "\\\t\n\f\r )"
488 default:
489 panic(c.state.String())
492 k := 0
493 for {
494 i := k + bytes.IndexAny(s[k:], endAndEsc)
495 if i < k {
496 c, nread := tURL(c, decodeCSS(s[k:]))
497 return c, k + nread
499 if s[i] == '\\' {
501 if i == len(s) {
502 return context{
503 state: stateError,
504 err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in CSS string: %q", s),
505 }, len(s)
507 } else {
508 c.state = stateCSS
509 return c, i + 1
511 c, _ = tURL(c, decodeCSS(s[:i+1]))
512 k = i + 1
516 // tError is the context transition function for the error state.
517 func tError(c context, s []byte) (context, int) {
518 return c, len(s)
521 // eatAttrName returns the largest j such that s[i:j] is an attribute name.
522 // It returns an error if s[i:] does not look like it begins with an
523 // attribute name, such as encountering a quote mark without a preceding
524 // equals sign.
525 func eatAttrName(s []byte, i int) (int, *Error) {
526 for j := i; j < len(s); j++ {
527 switch s[j] {
528 case ' ', '\t', '\n', '\f', '\r', '=', '>':
529 return j, nil
530 case '\'', '"', '<':
531 // These result in a parse warning in HTML5 and are
532 // indicative of serious problems if seen in an attr
533 // name in a template.
534 return -1, errorf(ErrBadHTML, nil, 0, "%q in attribute name: %.32q", s[j:j+1], s)
535 default:
536 // No-op.
539 return len(s), nil
542 var elementNameMap = map[string]element{
543 "script": elementScript,
544 "style": elementStyle,
545 "textarea": elementTextarea,
546 "title": elementTitle,
549 // asciiAlpha reports whether c is an ASCII letter.
550 func asciiAlpha(c byte) bool {
551 return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
554 // asciiAlphaNum reports whether c is an ASCII letter or digit.
555 func asciiAlphaNum(c byte) bool {
556 return asciiAlpha(c) || '0' <= c && c <= '9'
559 // eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
560 func eatTagName(s []byte, i int) (int, element) {
561 if i == len(s) || !asciiAlpha(s[i]) {
562 return i, elementNone
564 j := i + 1
565 for j < len(s) {
566 x := s[j]
567 if asciiAlphaNum(x) {
569 continue
571 // Allow "x-y" or "x:y" but not "x-", "-y", or "x--y".
572 if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) {
573 j += 2
574 continue
576 break
578 return j, elementNameMap[strings.ToLower(string(s[i:j]))]
581 // eatWhiteSpace returns the largest j such that s[i:j] is white space.
582 func eatWhiteSpace(s []byte, i int) int {
583 for j := i; j < len(s); j++ {
584 switch s[j] {
585 case ' ', '\t', '\n', '\f', '\r':
586 // No-op.
587 default:
588 return j
591 return len(s)