libgo: update to go1.9
[official-gcc.git] / libgo / go / html / template / transition.go
blobdf7ac2289b456c4d4bc81d6a1ce39ca57b1f48bf
1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 package template
7 import (
8 "bytes"
9 "strings"
12 // transitionFunc is the array of context transition functions for text nodes.
13 // A transition function takes a context and template text input, and returns
14 // the updated context and the number of bytes consumed from the front of the
15 // input.
16 var transitionFunc = [...]func(context, []byte) (context, int){
17 stateText: tText,
18 stateTag: tTag,
19 stateAttrName: tAttrName,
20 stateAfterName: tAfterName,
21 stateBeforeValue: tBeforeValue,
22 stateHTMLCmt: tHTMLCmt,
23 stateRCDATA: tSpecialTagEnd,
24 stateAttr: tAttr,
25 stateURL: tURL,
26 stateJS: tJS,
27 stateJSDqStr: tJSDelimited,
28 stateJSSqStr: tJSDelimited,
29 stateJSRegexp: tJSDelimited,
30 stateJSBlockCmt: tBlockCmt,
31 stateJSLineCmt: tLineCmt,
32 stateCSS: tCSS,
33 stateCSSDqStr: tCSSStr,
34 stateCSSSqStr: tCSSStr,
35 stateCSSDqURL: tCSSStr,
36 stateCSSSqURL: tCSSStr,
37 stateCSSURL: tCSSStr,
38 stateCSSBlockCmt: tBlockCmt,
39 stateCSSLineCmt: tLineCmt,
40 stateError: tError,
43 var commentStart = []byte("<!--")
44 var commentEnd = []byte("-->")
46 // tText is the context transition function for the text state.
47 func tText(c context, s []byte) (context, int) {
48 k := 0
49 for {
50 i := k + bytes.IndexByte(s[k:], '<')
51 if i < k || i+1 == len(s) {
52 return c, len(s)
53 } else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
54 return context{state: stateHTMLCmt}, i + 4
56 i++
57 end := false
58 if s[i] == '/' {
59 if i+1 == len(s) {
60 return c, len(s)
62 end, i = true, i+1
64 j, e := eatTagName(s, i)
65 if j != i {
66 if end {
67 e = elementNone
69 // We've found an HTML tag.
70 return context{state: stateTag, element: e}, j
72 k = j
76 var elementContentType = [...]state{
77 elementNone: stateText,
78 elementScript: stateJS,
79 elementStyle: stateCSS,
80 elementTextarea: stateRCDATA,
81 elementTitle: stateRCDATA,
84 // tTag is the context transition function for the tag state.
85 func tTag(c context, s []byte) (context, int) {
86 // Find the attribute name.
87 i := eatWhiteSpace(s, 0)
88 if i == len(s) {
89 return c, len(s)
91 if s[i] == '>' {
92 return context{
93 state: elementContentType[c.element],
94 element: c.element,
95 }, i + 1
97 j, err := eatAttrName(s, i)
98 if err != nil {
99 return context{state: stateError, err: err}, len(s)
101 state, attr := stateTag, attrNone
102 if i == j {
103 return context{
104 state: stateError,
105 err: errorf(ErrBadHTML, nil, 0, "expected space, attr name, or end of tag, but got %q", s[i:]),
106 }, len(s)
109 attrName := strings.ToLower(string(s[i:j]))
110 if c.element == elementScript && attrName == "type" {
111 attr = attrScriptType
112 } else {
113 switch attrType(attrName) {
114 case contentTypeURL:
115 attr = attrURL
116 case contentTypeCSS:
117 attr = attrStyle
118 case contentTypeJS:
119 attr = attrScript
123 if j == len(s) {
124 state = stateAttrName
125 } else {
126 state = stateAfterName
128 return context{state: state, element: c.element, attr: attr}, j
131 // tAttrName is the context transition function for stateAttrName.
132 func tAttrName(c context, s []byte) (context, int) {
133 i, err := eatAttrName(s, 0)
134 if err != nil {
135 return context{state: stateError, err: err}, len(s)
136 } else if i != len(s) {
137 c.state = stateAfterName
139 return c, i
142 // tAfterName is the context transition function for stateAfterName.
143 func tAfterName(c context, s []byte) (context, int) {
144 // Look for the start of the value.
145 i := eatWhiteSpace(s, 0)
146 if i == len(s) {
147 return c, len(s)
148 } else if s[i] != '=' {
149 // Occurs due to tag ending '>', and valueless attribute.
150 c.state = stateTag
151 return c, i
153 c.state = stateBeforeValue
154 // Consume the "=".
155 return c, i + 1
158 var attrStartStates = [...]state{
159 attrNone: stateAttr,
160 attrScript: stateJS,
161 attrScriptType: stateAttr,
162 attrStyle: stateCSS,
163 attrURL: stateURL,
166 // tBeforeValue is the context transition function for stateBeforeValue.
167 func tBeforeValue(c context, s []byte) (context, int) {
168 i := eatWhiteSpace(s, 0)
169 if i == len(s) {
170 return c, len(s)
172 // Find the attribute delimiter.
173 delim := delimSpaceOrTagEnd
174 switch s[i] {
175 case '\'':
176 delim, i = delimSingleQuote, i+1
177 case '"':
178 delim, i = delimDoubleQuote, i+1
180 c.state, c.delim = attrStartStates[c.attr], delim
181 return c, i
184 // tHTMLCmt is the context transition function for stateHTMLCmt.
185 func tHTMLCmt(c context, s []byte) (context, int) {
186 if i := bytes.Index(s, commentEnd); i != -1 {
187 return context{}, i + 3
189 return c, len(s)
192 // specialTagEndMarkers maps element types to the character sequence that
193 // case-insensitively signals the end of the special tag body.
194 var specialTagEndMarkers = [...][]byte{
195 elementScript: []byte("script"),
196 elementStyle: []byte("style"),
197 elementTextarea: []byte("textarea"),
198 elementTitle: []byte("title"),
201 var (
202 specialTagEndPrefix = []byte("</")
203 tagEndSeparators = []byte("> \t\n\f/")
206 // tSpecialTagEnd is the context transition function for raw text and RCDATA
207 // element states.
208 func tSpecialTagEnd(c context, s []byte) (context, int) {
209 if c.element != elementNone {
210 if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 {
211 return context{}, i
214 return c, len(s)
217 // indexTagEnd finds the index of a special tag end in a case insensitive way, or returns -1
218 func indexTagEnd(s []byte, tag []byte) int {
219 res := 0
220 plen := len(specialTagEndPrefix)
221 for len(s) > 0 {
222 // Try to find the tag end prefix first
223 i := bytes.Index(s, specialTagEndPrefix)
224 if i == -1 {
225 return i
227 s = s[i+plen:]
228 // Try to match the actual tag if there is still space for it
229 if len(tag) <= len(s) && bytes.EqualFold(tag, s[:len(tag)]) {
230 s = s[len(tag):]
231 // Check the tag is followed by a proper separator
232 if len(s) > 0 && bytes.IndexByte(tagEndSeparators, s[0]) != -1 {
233 return res + i
235 res += len(tag)
237 res += i + plen
239 return -1
242 // tAttr is the context transition function for the attribute state.
243 func tAttr(c context, s []byte) (context, int) {
244 return c, len(s)
247 // tURL is the context transition function for the URL state.
248 func tURL(c context, s []byte) (context, int) {
249 if bytes.ContainsAny(s, "#?") {
250 c.urlPart = urlPartQueryOrFrag
251 } else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone {
252 // HTML5 uses "Valid URL potentially surrounded by spaces" for
253 // attrs: http://www.w3.org/TR/html5/index.html#attributes-1
254 c.urlPart = urlPartPreQuery
256 return c, len(s)
259 // tJS is the context transition function for the JS state.
260 func tJS(c context, s []byte) (context, int) {
261 i := bytes.IndexAny(s, `"'/`)
262 if i == -1 {
263 // Entire input is non string, comment, regexp tokens.
264 c.jsCtx = nextJSCtx(s, c.jsCtx)
265 return c, len(s)
267 c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
268 switch s[i] {
269 case '"':
270 c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
271 case '\'':
272 c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
273 case '/':
274 switch {
275 case i+1 < len(s) && s[i+1] == '/':
276 c.state, i = stateJSLineCmt, i+1
277 case i+1 < len(s) && s[i+1] == '*':
278 c.state, i = stateJSBlockCmt, i+1
279 case c.jsCtx == jsCtxRegexp:
280 c.state = stateJSRegexp
281 case c.jsCtx == jsCtxDivOp:
282 c.jsCtx = jsCtxRegexp
283 default:
284 return context{
285 state: stateError,
286 err: errorf(ErrSlashAmbig, nil, 0, "'/' could start a division or regexp: %.32q", s[i:]),
287 }, len(s)
289 default:
290 panic("unreachable")
292 return c, i + 1
295 // tJSDelimited is the context transition function for the JS string and regexp
296 // states.
297 func tJSDelimited(c context, s []byte) (context, int) {
298 specials := `\"`
299 switch c.state {
300 case stateJSSqStr:
301 specials = `\'`
302 case stateJSRegexp:
303 specials = `\/[]`
306 k, inCharset := 0, false
307 for {
308 i := k + bytes.IndexAny(s[k:], specials)
309 if i < k {
310 break
312 switch s[i] {
313 case '\\':
315 if i == len(s) {
316 return context{
317 state: stateError,
318 err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s),
319 }, len(s)
321 case '[':
322 inCharset = true
323 case ']':
324 inCharset = false
325 default:
326 // end delimiter
327 if !inCharset {
328 c.state, c.jsCtx = stateJS, jsCtxDivOp
329 return c, i + 1
332 k = i + 1
335 if inCharset {
336 // This can be fixed by making context richer if interpolation
337 // into charsets is desired.
338 return context{
339 state: stateError,
340 err: errorf(ErrPartialCharset, nil, 0, "unfinished JS regexp charset: %q", s),
341 }, len(s)
344 return c, len(s)
347 var blockCommentEnd = []byte("*/")
349 // tBlockCmt is the context transition function for /*comment*/ states.
350 func tBlockCmt(c context, s []byte) (context, int) {
351 i := bytes.Index(s, blockCommentEnd)
352 if i == -1 {
353 return c, len(s)
355 switch c.state {
356 case stateJSBlockCmt:
357 c.state = stateJS
358 case stateCSSBlockCmt:
359 c.state = stateCSS
360 default:
361 panic(c.state.String())
363 return c, i + 2
366 // tLineCmt is the context transition function for //comment states.
367 func tLineCmt(c context, s []byte) (context, int) {
368 var lineTerminators string
369 var endState state
370 switch c.state {
371 case stateJSLineCmt:
372 lineTerminators, endState = "\n\r\u2028\u2029", stateJS
373 case stateCSSLineCmt:
374 lineTerminators, endState = "\n\f\r", stateCSS
375 // Line comments are not part of any published CSS standard but
376 // are supported by the 4 major browsers.
377 // This defines line comments as
378 // LINECOMMENT ::= "//" [^\n\f\d]*
379 // since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
380 // newlines:
381 // nl ::= #xA | #xD #xA | #xD | #xC
382 default:
383 panic(c.state.String())
386 i := bytes.IndexAny(s, lineTerminators)
387 if i == -1 {
388 return c, len(s)
390 c.state = endState
391 // Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4
392 // "However, the LineTerminator at the end of the line is not
393 // considered to be part of the single-line comment; it is
394 // recognized separately by the lexical grammar and becomes part
395 // of the stream of input elements for the syntactic grammar."
396 return c, i
399 // tCSS is the context transition function for the CSS state.
400 func tCSS(c context, s []byte) (context, int) {
401 // CSS quoted strings are almost never used except for:
402 // (1) URLs as in background: "/foo.png"
403 // (2) Multiword font-names as in font-family: "Times New Roman"
404 // (3) List separators in content values as in inline-lists:
405 // <style>
406 // ul.inlineList { list-style: none; padding:0 }
407 // ul.inlineList > li { display: inline }
408 // ul.inlineList > li:before { content: ", " }
409 // ul.inlineList > li:first-child:before { content: "" }
410 // </style>
411 // <ul class=inlineList><li>One<li>Two<li>Three</ul>
412 // (4) Attribute value selectors as in a[href="http://example.com/"]
414 // We conservatively treat all strings as URLs, but make some
415 // allowances to avoid confusion.
417 // In (1), our conservative assumption is justified.
418 // In (2), valid font names do not contain ':', '?', or '#', so our
419 // conservative assumption is fine since we will never transition past
420 // urlPartPreQuery.
421 // In (3), our protocol heuristic should not be tripped, and there
422 // should not be non-space content after a '?' or '#', so as long as
423 // we only %-encode RFC 3986 reserved characters we are ok.
424 // In (4), we should URL escape for URL attributes, and for others we
425 // have the attribute name available if our conservative assumption
426 // proves problematic for real code.
428 k := 0
429 for {
430 i := k + bytes.IndexAny(s[k:], `("'/`)
431 if i < k {
432 return c, len(s)
434 switch s[i] {
435 case '(':
436 // Look for url to the left.
437 p := bytes.TrimRight(s[:i], "\t\n\f\r ")
438 if endsWithCSSKeyword(p, "url") {
439 j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r "))
440 switch {
441 case j != len(s) && s[j] == '"':
442 c.state, j = stateCSSDqURL, j+1
443 case j != len(s) && s[j] == '\'':
444 c.state, j = stateCSSSqURL, j+1
445 default:
446 c.state = stateCSSURL
448 return c, j
450 case '/':
451 if i+1 < len(s) {
452 switch s[i+1] {
453 case '/':
454 c.state = stateCSSLineCmt
455 return c, i + 2
456 case '*':
457 c.state = stateCSSBlockCmt
458 return c, i + 2
461 case '"':
462 c.state = stateCSSDqStr
463 return c, i + 1
464 case '\'':
465 c.state = stateCSSSqStr
466 return c, i + 1
468 k = i + 1
472 // tCSSStr is the context transition function for the CSS string and URL states.
473 func tCSSStr(c context, s []byte) (context, int) {
474 var endAndEsc string
475 switch c.state {
476 case stateCSSDqStr, stateCSSDqURL:
477 endAndEsc = `\"`
478 case stateCSSSqStr, stateCSSSqURL:
479 endAndEsc = `\'`
480 case stateCSSURL:
481 // Unquoted URLs end with a newline or close parenthesis.
482 // The below includes the wc (whitespace character) and nl.
483 endAndEsc = "\\\t\n\f\r )"
484 default:
485 panic(c.state.String())
488 k := 0
489 for {
490 i := k + bytes.IndexAny(s[k:], endAndEsc)
491 if i < k {
492 c, nread := tURL(c, decodeCSS(s[k:]))
493 return c, k + nread
495 if s[i] == '\\' {
497 if i == len(s) {
498 return context{
499 state: stateError,
500 err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in CSS string: %q", s),
501 }, len(s)
503 } else {
504 c.state = stateCSS
505 return c, i + 1
507 c, _ = tURL(c, decodeCSS(s[:i+1]))
508 k = i + 1
512 // tError is the context transition function for the error state.
513 func tError(c context, s []byte) (context, int) {
514 return c, len(s)
517 // eatAttrName returns the largest j such that s[i:j] is an attribute name.
518 // It returns an error if s[i:] does not look like it begins with an
519 // attribute name, such as encountering a quote mark without a preceding
520 // equals sign.
521 func eatAttrName(s []byte, i int) (int, *Error) {
522 for j := i; j < len(s); j++ {
523 switch s[j] {
524 case ' ', '\t', '\n', '\f', '\r', '=', '>':
525 return j, nil
526 case '\'', '"', '<':
527 // These result in a parse warning in HTML5 and are
528 // indicative of serious problems if seen in an attr
529 // name in a template.
530 return -1, errorf(ErrBadHTML, nil, 0, "%q in attribute name: %.32q", s[j:j+1], s)
531 default:
532 // No-op.
535 return len(s), nil
538 var elementNameMap = map[string]element{
539 "script": elementScript,
540 "style": elementStyle,
541 "textarea": elementTextarea,
542 "title": elementTitle,
545 // asciiAlpha reports whether c is an ASCII letter.
546 func asciiAlpha(c byte) bool {
547 return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
550 // asciiAlphaNum reports whether c is an ASCII letter or digit.
551 func asciiAlphaNum(c byte) bool {
552 return asciiAlpha(c) || '0' <= c && c <= '9'
555 // eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
556 func eatTagName(s []byte, i int) (int, element) {
557 if i == len(s) || !asciiAlpha(s[i]) {
558 return i, elementNone
560 j := i + 1
561 for j < len(s) {
562 x := s[j]
563 if asciiAlphaNum(x) {
565 continue
567 // Allow "x-y" or "x:y" but not "x-", "-y", or "x--y".
568 if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) {
569 j += 2
570 continue
572 break
574 return j, elementNameMap[strings.ToLower(string(s[i:j]))]
577 // eatWhiteSpace returns the largest j such that s[i:j] is white space.
578 func eatWhiteSpace(s []byte, i int) int {
579 for j := i; j < len(s); j++ {
580 switch s[j] {
581 case ' ', '\t', '\n', '\f', '\r':
582 // No-op.
583 default:
584 return j
587 return len(s)