1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
12 // transitionFunc is the array of context transition functions for text nodes.
13 // A transition function takes a context and template text input, and returns
14 // the updated context and the number of bytes consumed from the front of the
16 var transitionFunc
= [...]func(context
, []byte) (context
, int){
19 stateAttrName
: tAttrName
,
20 stateAfterName
: tAfterName
,
21 stateBeforeValue
: tBeforeValue
,
22 stateHTMLCmt
: tHTMLCmt
,
23 stateRCDATA
: tSpecialTagEnd
,
28 stateJSDqStr
: tJSDelimited
,
29 stateJSSqStr
: tJSDelimited
,
30 stateJSRegexp
: tJSDelimited
,
31 stateJSBlockCmt
: tBlockCmt
,
32 stateJSLineCmt
: tLineCmt
,
34 stateCSSDqStr
: tCSSStr
,
35 stateCSSSqStr
: tCSSStr
,
36 stateCSSDqURL
: tCSSStr
,
37 stateCSSSqURL
: tCSSStr
,
39 stateCSSBlockCmt
: tBlockCmt
,
40 stateCSSLineCmt
: tLineCmt
,
44 var commentStart
= []byte("<!--")
45 var commentEnd
= []byte("-->")
47 // tText is the context transition function for the text state.
48 func tText(c context
, s
[]byte) (context
, int) {
51 i
:= k
+ bytes
.IndexByte(s
[k
:], '<')
52 if i
< k || i
+1 == len(s
) {
54 } else if i
+4 <= len(s
) && bytes
.Equal(commentStart
, s
[i
:i
+4]) {
55 return context
{state
: stateHTMLCmt
}, i
+ 4
65 j
, e
:= eatTagName(s
, i
)
70 // We've found an HTML tag.
71 return context
{state
: stateTag
, element
: e
}, j
77 var elementContentType
= [...]state
{
78 elementNone
: stateText
,
79 elementScript
: stateJS
,
80 elementStyle
: stateCSS
,
81 elementTextarea
: stateRCDATA
,
82 elementTitle
: stateRCDATA
,
85 // tTag is the context transition function for the tag state.
86 func tTag(c context
, s
[]byte) (context
, int) {
87 // Find the attribute name.
88 i
:= eatWhiteSpace(s
, 0)
94 state
: elementContentType
[c
.element
],
98 j
, err
:= eatAttrName(s
, i
)
100 return context
{state
: stateError
, err
: err
}, len(s
)
102 state
, attr
:= stateTag
, attrNone
106 err
: errorf(ErrBadHTML
, nil, 0, "expected space, attr name, or end of tag, but got %q", s
[i
:]),
110 attrName
:= strings
.ToLower(string(s
[i
:j
]))
111 if c
.element
== elementScript
&& attrName
== "type" {
112 attr
= attrScriptType
114 switch attrType(attrName
) {
121 case contentTypeSrcset
:
127 state
= stateAttrName
129 state
= stateAfterName
131 return context
{state
: state
, element
: c
.element
, attr
: attr
}, j
134 // tAttrName is the context transition function for stateAttrName.
135 func tAttrName(c context
, s
[]byte) (context
, int) {
136 i
, err
:= eatAttrName(s
, 0)
138 return context
{state
: stateError
, err
: err
}, len(s
)
139 } else if i
!= len(s
) {
140 c
.state
= stateAfterName
145 // tAfterName is the context transition function for stateAfterName.
146 func tAfterName(c context
, s
[]byte) (context
, int) {
147 // Look for the start of the value.
148 i
:= eatWhiteSpace(s
, 0)
151 } else if s
[i
] != '=' {
152 // Occurs due to tag ending '>', and valueless attribute.
156 c
.state
= stateBeforeValue
161 var attrStartStates
= [...]state
{
164 attrScriptType
: stateAttr
,
167 attrSrcset
: stateSrcset
,
170 // tBeforeValue is the context transition function for stateBeforeValue.
171 func tBeforeValue(c context
, s
[]byte) (context
, int) {
172 i
:= eatWhiteSpace(s
, 0)
176 // Find the attribute delimiter.
177 delim
:= delimSpaceOrTagEnd
180 delim
, i
= delimSingleQuote
, i
+1
182 delim
, i
= delimDoubleQuote
, i
+1
184 c
.state
, c
.delim
= attrStartStates
[c
.attr
], delim
188 // tHTMLCmt is the context transition function for stateHTMLCmt.
189 func tHTMLCmt(c context
, s
[]byte) (context
, int) {
190 if i
:= bytes
.Index(s
, commentEnd
); i
!= -1 {
191 return context
{}, i
+ 3
196 // specialTagEndMarkers maps element types to the character sequence that
197 // case-insensitively signals the end of the special tag body.
198 var specialTagEndMarkers
= [...][]byte{
199 elementScript
: []byte("script"),
200 elementStyle
: []byte("style"),
201 elementTextarea
: []byte("textarea"),
202 elementTitle
: []byte("title"),
206 specialTagEndPrefix
= []byte("</")
207 tagEndSeparators
= []byte("> \t\n\f/")
210 // tSpecialTagEnd is the context transition function for raw text and RCDATA
212 func tSpecialTagEnd(c context
, s
[]byte) (context
, int) {
213 if c
.element
!= elementNone
{
214 if i
:= indexTagEnd(s
, specialTagEndMarkers
[c
.element
]); i
!= -1 {
221 // indexTagEnd finds the index of a special tag end in a case insensitive way, or returns -1
222 func indexTagEnd(s
[]byte, tag
[]byte) int {
224 plen
:= len(specialTagEndPrefix
)
226 // Try to find the tag end prefix first
227 i
:= bytes
.Index(s
, specialTagEndPrefix
)
232 // Try to match the actual tag if there is still space for it
233 if len(tag
) <= len(s
) && bytes
.EqualFold(tag
, s
[:len(tag
)]) {
235 // Check the tag is followed by a proper separator
236 if len(s
) > 0 && bytes
.IndexByte(tagEndSeparators
, s
[0]) != -1 {
246 // tAttr is the context transition function for the attribute state.
247 func tAttr(c context
, s
[]byte) (context
, int) {
251 // tURL is the context transition function for the URL state.
252 func tURL(c context
, s
[]byte) (context
, int) {
253 if bytes
.ContainsAny(s
, "#?") {
254 c
.urlPart
= urlPartQueryOrFrag
255 } else if len(s
) != eatWhiteSpace(s
, 0) && c
.urlPart
== urlPartNone
{
256 // HTML5 uses "Valid URL potentially surrounded by spaces" for
257 // attrs: http://www.w3.org/TR/html5/index.html#attributes-1
258 c
.urlPart
= urlPartPreQuery
263 // tJS is the context transition function for the JS state.
264 func tJS(c context
, s
[]byte) (context
, int) {
265 i
:= bytes
.IndexAny(s
, `"'/`)
267 // Entire input is non string, comment, regexp tokens.
268 c
.jsCtx
= nextJSCtx(s
, c
.jsCtx
)
271 c
.jsCtx
= nextJSCtx(s
[:i
], c
.jsCtx
)
274 c
.state
, c
.jsCtx
= stateJSDqStr
, jsCtxRegexp
276 c
.state
, c
.jsCtx
= stateJSSqStr
, jsCtxRegexp
279 case i
+1 < len(s
) && s
[i
+1] == '/':
280 c
.state
, i
= stateJSLineCmt
, i
+1
281 case i
+1 < len(s
) && s
[i
+1] == '*':
282 c
.state
, i
= stateJSBlockCmt
, i
+1
283 case c
.jsCtx
== jsCtxRegexp
:
284 c
.state
= stateJSRegexp
285 case c
.jsCtx
== jsCtxDivOp
:
286 c
.jsCtx
= jsCtxRegexp
290 err
: errorf(ErrSlashAmbig
, nil, 0, "'/' could start a division or regexp: %.32q", s
[i
:]),
299 // tJSDelimited is the context transition function for the JS string and regexp
301 func tJSDelimited(c context
, s
[]byte) (context
, int) {
310 k
, inCharset
:= 0, false
312 i
:= k
+ bytes
.IndexAny(s
[k
:], specials
)
322 err
: errorf(ErrPartialEscape
, nil, 0, "unfinished escape sequence in JS string: %q", s
),
332 c
.state
, c
.jsCtx
= stateJS
, jsCtxDivOp
340 // This can be fixed by making context richer if interpolation
341 // into charsets is desired.
344 err
: errorf(ErrPartialCharset
, nil, 0, "unfinished JS regexp charset: %q", s
),
351 var blockCommentEnd
= []byte("*/")
353 // tBlockCmt is the context transition function for /*comment*/ states.
354 func tBlockCmt(c context
, s
[]byte) (context
, int) {
355 i
:= bytes
.Index(s
, blockCommentEnd
)
360 case stateJSBlockCmt
:
362 case stateCSSBlockCmt
:
365 panic(c
.state
.String())
370 // tLineCmt is the context transition function for //comment states.
371 func tLineCmt(c context
, s
[]byte) (context
, int) {
372 var lineTerminators
string
376 lineTerminators
, endState
= "\n\r\u2028\u2029", stateJS
377 case stateCSSLineCmt
:
378 lineTerminators
, endState
= "\n\f\r", stateCSS
379 // Line comments are not part of any published CSS standard but
380 // are supported by the 4 major browsers.
381 // This defines line comments as
382 // LINECOMMENT ::= "//" [^\n\f\d]*
383 // since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
385 // nl ::= #xA | #xD #xA | #xD | #xC
387 panic(c
.state
.String())
390 i
:= bytes
.IndexAny(s
, lineTerminators
)
395 // Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4
396 // "However, the LineTerminator at the end of the line is not
397 // considered to be part of the single-line comment; it is
398 // recognized separately by the lexical grammar and becomes part
399 // of the stream of input elements for the syntactic grammar."
403 // tCSS is the context transition function for the CSS state.
404 func tCSS(c context
, s
[]byte) (context
, int) {
405 // CSS quoted strings are almost never used except for:
406 // (1) URLs as in background: "/foo.png"
407 // (2) Multiword font-names as in font-family: "Times New Roman"
408 // (3) List separators in content values as in inline-lists:
410 // ul.inlineList { list-style: none; padding:0 }
411 // ul.inlineList > li { display: inline }
412 // ul.inlineList > li:before { content: ", " }
413 // ul.inlineList > li:first-child:before { content: "" }
415 // <ul class=inlineList><li>One<li>Two<li>Three</ul>
416 // (4) Attribute value selectors as in a[href="http://example.com/"]
418 // We conservatively treat all strings as URLs, but make some
419 // allowances to avoid confusion.
421 // In (1), our conservative assumption is justified.
422 // In (2), valid font names do not contain ':', '?', or '#', so our
423 // conservative assumption is fine since we will never transition past
425 // In (3), our protocol heuristic should not be tripped, and there
426 // should not be non-space content after a '?' or '#', so as long as
427 // we only %-encode RFC 3986 reserved characters we are ok.
428 // In (4), we should URL escape for URL attributes, and for others we
429 // have the attribute name available if our conservative assumption
430 // proves problematic for real code.
434 i
:= k
+ bytes
.IndexAny(s
[k
:], `("'/`)
440 // Look for url to the left.
441 p
:= bytes
.TrimRight(s
[:i
], "\t\n\f\r ")
442 if endsWithCSSKeyword(p
, "url") {
443 j
:= len(s
) - len(bytes
.TrimLeft(s
[i
+1:], "\t\n\f\r "))
445 case j
!= len(s
) && s
[j
] == '"':
446 c
.state
, j
= stateCSSDqURL
, j
+1
447 case j
!= len(s
) && s
[j
] == '\'':
448 c
.state
, j
= stateCSSSqURL
, j
+1
450 c
.state
= stateCSSURL
458 c
.state
= stateCSSLineCmt
461 c
.state
= stateCSSBlockCmt
466 c
.state
= stateCSSDqStr
469 c
.state
= stateCSSSqStr
476 // tCSSStr is the context transition function for the CSS string and URL states.
477 func tCSSStr(c context
, s
[]byte) (context
, int) {
480 case stateCSSDqStr
, stateCSSDqURL
:
482 case stateCSSSqStr
, stateCSSSqURL
:
485 // Unquoted URLs end with a newline or close parenthesis.
486 // The below includes the wc (whitespace character) and nl.
487 endAndEsc
= "\\\t\n\f\r )"
489 panic(c
.state
.String())
494 i
:= k
+ bytes
.IndexAny(s
[k
:], endAndEsc
)
496 c
, nread
:= tURL(c
, decodeCSS(s
[k
:]))
504 err
: errorf(ErrPartialEscape
, nil, 0, "unfinished escape sequence in CSS string: %q", s
),
511 c
, _
= tURL(c
, decodeCSS(s
[:i
+1]))
516 // tError is the context transition function for the error state.
517 func tError(c context
, s
[]byte) (context
, int) {
521 // eatAttrName returns the largest j such that s[i:j] is an attribute name.
522 // It returns an error if s[i:] does not look like it begins with an
523 // attribute name, such as encountering a quote mark without a preceding
525 func eatAttrName(s
[]byte, i
int) (int, *Error
) {
526 for j
:= i
; j
< len(s
); j
++ {
528 case ' ', '\t', '\n', '\f', '\r', '=', '>':
531 // These result in a parse warning in HTML5 and are
532 // indicative of serious problems if seen in an attr
533 // name in a template.
534 return -1, errorf(ErrBadHTML
, nil, 0, "%q in attribute name: %.32q", s
[j
:j
+1], s
)
542 var elementNameMap
= map[string]element
{
543 "script": elementScript
,
544 "style": elementStyle
,
545 "textarea": elementTextarea
,
546 "title": elementTitle
,
549 // asciiAlpha reports whether c is an ASCII letter.
550 func asciiAlpha(c
byte) bool {
551 return 'A' <= c
&& c
<= 'Z' ||
'a' <= c
&& c
<= 'z'
554 // asciiAlphaNum reports whether c is an ASCII letter or digit.
555 func asciiAlphaNum(c
byte) bool {
556 return asciiAlpha(c
) ||
'0' <= c
&& c
<= '9'
559 // eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
560 func eatTagName(s
[]byte, i
int) (int, element
) {
561 if i
== len(s
) ||
!asciiAlpha(s
[i
]) {
562 return i
, elementNone
567 if asciiAlphaNum(x
) {
571 // Allow "x-y" or "x:y" but not "x-", "-y", or "x--y".
572 if (x
== ':' || x
== '-') && j
+1 < len(s
) && asciiAlphaNum(s
[j
+1]) {
578 return j
, elementNameMap
[strings
.ToLower(string(s
[i
:j
]))]
581 // eatWhiteSpace returns the largest j such that s[i:j] is white space.
582 func eatWhiteSpace(s
[]byte, i
int) int {
583 for j
:= i
; j
< len(s
); j
++ {
585 case ' ', '\t', '\n', '\f', '\r':