libgo/go/html/template/transition.go

   1 // Copyright 2011 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package template
   6
   7 import (
   8         "bytes"
   9         "strings"
  10 )
  11
  12 // transitionFunc is the array of context transition functions for text nodes.
  13 // A transition function takes a context and template text input, and returns
  14 // the updated context and the number of bytes consumed from the front of the
  15 // input.
  16 var transitionFunc = [...]func(context, []byte) (context, int){
  17         stateText:        tText,
  18         stateTag:         tTag,
  19         stateAttrName:    tAttrName,
  20         stateAfterName:   tAfterName,
  21         stateBeforeValue: tBeforeValue,
  22         stateHTMLCmt:     tHTMLCmt,
  23         stateRCDATA:      tSpecialTagEnd,
  24         stateAttr:        tAttr,
  25         stateURL:         tURL,
  26         stateSrcset:      tURL,
  27         stateJS:          tJS,
  28         stateJSDqStr:     tJSDelimited,
  29         stateJSSqStr:     tJSDelimited,
  30         stateJSRegexp:    tJSDelimited,
  31         stateJSBlockCmt:  tBlockCmt,
  32         stateJSLineCmt:   tLineCmt,
  33         stateCSS:         tCSS,
  34         stateCSSDqStr:    tCSSStr,
  35         stateCSSSqStr:    tCSSStr,
  36         stateCSSDqURL:    tCSSStr,
  37         stateCSSSqURL:    tCSSStr,
  38         stateCSSURL:      tCSSStr,
  39         stateCSSBlockCmt: tBlockCmt,
  40         stateCSSLineCmt:  tLineCmt,
  41         stateError:       tError,
  42 }
  43
  44 var commentStart = []byte("<!--")
  45 var commentEnd = []byte("-->")
  46
  47 // tText is the context transition function for the text state.
  48 func tText(c context, s []byte) (context, int) {
  49         k := 0
  50         for {
  51                 i := k + bytes.IndexByte(s[k:], '<')
  52                 if i < k || i+1 == len(s) {
  53                         return c, len(s)
  54                 } else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
  55                         return context{state: stateHTMLCmt}, i + 4
  56                 }
  57                 i++
  58                 end := false
  59                 if s[i] == '/' {
  60                         if i+1 == len(s) {
  61                                 return c, len(s)
  62                         }
  63                         end, i = true, i+1
  64                 }
  65                 j, e := eatTagName(s, i)
  66                 if j != i {
  67                         if end {
  68                                 e = elementNone
  69                         }
  70                         // We've found an HTML tag.
  71                         return context{state: stateTag, element: e}, j
  72                 }
  73                 k = j
  74         }
  75 }
  76
  77 var elementContentType = [...]state{
  78         elementNone:     stateText,
  79         elementScript:   stateJS,
  80         elementStyle:    stateCSS,
  81         elementTextarea: stateRCDATA,
  82         elementTitle:    stateRCDATA,
  83 }
  84
  85 // tTag is the context transition function for the tag state.
  86 func tTag(c context, s []byte) (context, int) {
  87         // Find the attribute name.
  88         i := eatWhiteSpace(s, 0)
  89         if i == len(s) {
  90                 return c, len(s)
  91         }
  92         if s[i] == '>' {
  93                 return context{
  94                         state:   elementContentType[c.element],
  95                         element: c.element,
  96                 }, i + 1
  97         }
  98         j, err := eatAttrName(s, i)
  99         if err != nil {
 100                 return context{state: stateError, err: err}, len(s)
 101         }
 102         state, attr := stateTag, attrNone
 103         if i == j {
 104                 return context{
 105                         state: stateError,
 106                         err:   errorf(ErrBadHTML, nil, 0, "expected space, attr name, or end of tag, but got %q", s[i:]),
 107                 }, len(s)
 108         }
 109
 110         attrName := strings.ToLower(string(s[i:j]))
 111         if c.element == elementScript && attrName == "type" {
 112                 attr = attrScriptType
 113         } else {
 114                 switch attrType(attrName) {
 115                 case contentTypeURL:
 116                         attr = attrURL
 117                 case contentTypeCSS:
 118                         attr = attrStyle
 119                 case contentTypeJS:
 120                         attr = attrScript
 121                 case contentTypeSrcset:
 122                         attr = attrSrcset
 123                 }
 124         }
 125
 126         if j == len(s) {
 127                 state = stateAttrName
 128         } else {
 129                 state = stateAfterName
 130         }
 131         return context{state: state, element: c.element, attr: attr}, j
 132 }
 133
 134 // tAttrName is the context transition function for stateAttrName.
 135 func tAttrName(c context, s []byte) (context, int) {
 136         i, err := eatAttrName(s, 0)
 137         if err != nil {
 138                 return context{state: stateError, err: err}, len(s)
 139         } else if i != len(s) {
 140                 c.state = stateAfterName
 141         }
 142         return c, i
 143 }
 144
 145 // tAfterName is the context transition function for stateAfterName.
 146 func tAfterName(c context, s []byte) (context, int) {
 147         // Look for the start of the value.
 148         i := eatWhiteSpace(s, 0)
 149         if i == len(s) {
 150                 return c, len(s)
 151         } else if s[i] != '=' {
 152                 // Occurs due to tag ending '>', and valueless attribute.
 153                 c.state = stateTag
 154                 return c, i
 155         }
 156         c.state = stateBeforeValue
 157         // Consume the "=".
 158         return c, i + 1
 159 }
 160
 161 var attrStartStates = [...]state{
 162         attrNone:       stateAttr,
 163         attrScript:     stateJS,
 164         attrScriptType: stateAttr,
 165         attrStyle:      stateCSS,
 166         attrURL:        stateURL,
 167         attrSrcset:     stateSrcset,
 168 }
 169
 170 // tBeforeValue is the context transition function for stateBeforeValue.
 171 func tBeforeValue(c context, s []byte) (context, int) {
 172         i := eatWhiteSpace(s, 0)
 173         if i == len(s) {
 174                 return c, len(s)
 175         }
 176         // Find the attribute delimiter.
 177         delim := delimSpaceOrTagEnd
 178         switch s[i] {
 179         case '\'':
 180                 delim, i = delimSingleQuote, i+1
 181         case '"':
 182                 delim, i = delimDoubleQuote, i+1
 183         }
 184         c.state, c.delim = attrStartStates[c.attr], delim
 185         return c, i
 186 }
 187
 188 // tHTMLCmt is the context transition function for stateHTMLCmt.
 189 func tHTMLCmt(c context, s []byte) (context, int) {
 190         if i := bytes.Index(s, commentEnd); i != -1 {
 191                 return context{}, i + 3
 192         }
 193         return c, len(s)
 194 }
 195
 196 // specialTagEndMarkers maps element types to the character sequence that
 197 // case-insensitively signals the end of the special tag body.
 198 var specialTagEndMarkers = [...][]byte{
 199         elementScript:   []byte("script"),
 200         elementStyle:    []byte("style"),
 201         elementTextarea: []byte("textarea"),
 202         elementTitle:    []byte("title"),
 203 }
 204
 205 var (
 206         specialTagEndPrefix = []byte("</")
 207         tagEndSeparators    = []byte("> \t\n\f/")
 208 )
 209
 210 // tSpecialTagEnd is the context transition function for raw text and RCDATA
 211 // element states.
 212 func tSpecialTagEnd(c context, s []byte) (context, int) {
 213         if c.element != elementNone {
 214                 if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 {
 215                         return context{}, i
 216                 }
 217         }
 218         return c, len(s)
 219 }
 220
 221 // indexTagEnd finds the index of a special tag end in a case insensitive way, or returns -1
 222 func indexTagEnd(s []byte, tag []byte) int {
 223         res := 0
 224         plen := len(specialTagEndPrefix)
 225         for len(s) > 0 {
 226                 // Try to find the tag end prefix first
 227                 i := bytes.Index(s, specialTagEndPrefix)
 228                 if i == -1 {
 229                         return i
 230                 }
 231                 s = s[i+plen:]
 232                 // Try to match the actual tag if there is still space for it
 233                 if len(tag) <= len(s) && bytes.EqualFold(tag, s[:len(tag)]) {
 234                         s = s[len(tag):]
 235                         // Check the tag is followed by a proper separator
 236                         if len(s) > 0 && bytes.IndexByte(tagEndSeparators, s[0]) != -1 {
 237                                 return res + i
 238                         }
 239                         res += len(tag)
 240                 }
 241                 res += i + plen
 242         }
 243         return -1
 244 }
 245
 246 // tAttr is the context transition function for the attribute state.
 247 func tAttr(c context, s []byte) (context, int) {
 248         return c, len(s)
 249 }
 250
 251 // tURL is the context transition function for the URL state.
 252 func tURL(c context, s []byte) (context, int) {
 253         if bytes.ContainsAny(s, "#?") {
 254                 c.urlPart = urlPartQueryOrFrag
 255         } else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone {
 256                 // HTML5 uses "Valid URL potentially surrounded by spaces" for
 257                 // attrs: http://www.w3.org/TR/html5/index.html#attributes-1
 258                 c.urlPart = urlPartPreQuery
 259         }
 260         return c, len(s)
 261 }
 262
 263 // tJS is the context transition function for the JS state.
 264 func tJS(c context, s []byte) (context, int) {
 265         i := bytes.IndexAny(s, `"'/`)
 266         if i == -1 {
 267                 // Entire input is non string, comment, regexp tokens.
 268                 c.jsCtx = nextJSCtx(s, c.jsCtx)
 269                 return c, len(s)
 270         }
 271         c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
 272         switch s[i] {
 273         case '"':
 274                 c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
 275         case '\'':
 276                 c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
 277         case '/':
 278                 switch {
 279                 case i+1 < len(s) && s[i+1] == '/':
 280                         c.state, i = stateJSLineCmt, i+1
 281                 case i+1 < len(s) && s[i+1] == '*':
 282                         c.state, i = stateJSBlockCmt, i+1
 283                 case c.jsCtx == jsCtxRegexp:
 284                         c.state = stateJSRegexp
 285                 case c.jsCtx == jsCtxDivOp:
 286                         c.jsCtx = jsCtxRegexp
 287                 default:
 288                         return context{
 289                                 state: stateError,
 290                                 err:   errorf(ErrSlashAmbig, nil, 0, "'/' could start a division or regexp: %.32q", s[i:]),
 291                         }, len(s)
 292                 }
 293         default:
 294                 panic("unreachable")
 295         }
 296         return c, i + 1
 297 }
 298
 299 // tJSDelimited is the context transition function for the JS string and regexp
 300 // states.
 301 func tJSDelimited(c context, s []byte) (context, int) {
 302         specials := `\"`
 303         switch c.state {
 304         case stateJSSqStr:
 305                 specials = `\'`
 306         case stateJSRegexp:
 307                 specials = `\/[]`
 308         }
 309
 310         k, inCharset := 0, false
 311         for {
 312                 i := k + bytes.IndexAny(s[k:], specials)
 313                 if i < k {
 314                         break
 315                 }
 316                 switch s[i] {
 317                 case '\\':
 318                         i++
 319                         if i == len(s) {
 320                                 return context{
 321                                         state: stateError,
 322                                         err:   errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s),
 323                                 }, len(s)
 324                         }
 325                 case '[':
 326                         inCharset = true
 327                 case ']':
 328                         inCharset = false
 329                 default:
 330                         // end delimiter
 331                         if !inCharset {
 332                                 c.state, c.jsCtx = stateJS, jsCtxDivOp
 333                                 return c, i + 1
 334                         }
 335                 }
 336                 k = i + 1
 337         }
 338
 339         if inCharset {
 340                 // This can be fixed by making context richer if interpolation
 341                 // into charsets is desired.
 342                 return context{
 343                         state: stateError,
 344                         err:   errorf(ErrPartialCharset, nil, 0, "unfinished JS regexp charset: %q", s),
 345                 }, len(s)
 346         }
 347
 348         return c, len(s)
 349 }
 350
 351 var blockCommentEnd = []byte("*/")
 352
 353 // tBlockCmt is the context transition function for /*comment*/ states.
 354 func tBlockCmt(c context, s []byte) (context, int) {
 355         i := bytes.Index(s, blockCommentEnd)
 356         if i == -1 {
 357                 return c, len(s)
 358         }
 359         switch c.state {
 360         case stateJSBlockCmt:
 361                 c.state = stateJS
 362         case stateCSSBlockCmt:
 363                 c.state = stateCSS
 364         default:
 365                 panic(c.state.String())
 366         }
 367         return c, i + 2
 368 }
 369
 370 // tLineCmt is the context transition function for //comment states.
 371 func tLineCmt(c context, s []byte) (context, int) {
 372         var lineTerminators string
 373         var endState state
 374         switch c.state {
 375         case stateJSLineCmt:
 376                 lineTerminators, endState = "\n\r\u2028\u2029", stateJS
 377         case stateCSSLineCmt:
 378                 lineTerminators, endState = "\n\f\r", stateCSS
 379                 // Line comments are not part of any published CSS standard but
 380                 // are supported by the 4 major browsers.
 381                 // This defines line comments as
 382                 //     LINECOMMENT ::= "//" [^\n\f\d]*
 383                 // since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
 384                 // newlines:
 385                 //     nl ::= #xA | #xD #xA | #xD | #xC
 386         default:
 387                 panic(c.state.String())
 388         }
 389
 390         i := bytes.IndexAny(s, lineTerminators)
 391         if i == -1 {
 392                 return c, len(s)
 393         }
 394         c.state = endState
 395         // Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4
 396         // "However, the LineTerminator at the end of the line is not
 397         // considered to be part of the single-line comment; it is
 398         // recognized separately by the lexical grammar and becomes part
 399         // of the stream of input elements for the syntactic grammar."
 400         return c, i
 401 }
 402
 403 // tCSS is the context transition function for the CSS state.
 404 func tCSS(c context, s []byte) (context, int) {
 405         // CSS quoted strings are almost never used except for:
 406         // (1) URLs as in background: "/foo.png"
 407         // (2) Multiword font-names as in font-family: "Times New Roman"
 408         // (3) List separators in content values as in inline-lists:
 409         //    <style>
 410         //    ul.inlineList { list-style: none; padding:0 }
 411         //    ul.inlineList > li { display: inline }
 412         //    ul.inlineList > li:before { content: ", " }
 413         //    ul.inlineList > li:first-child:before { content: "" }
 414         //    </style>
 415         //    <ul class=inlineList><li>One<li>Two<li>Three</ul>
 416         // (4) Attribute value selectors as in a[href="http://example.com/"]
 417         //
 418         // We conservatively treat all strings as URLs, but make some
 419         // allowances to avoid confusion.
 420         //
 421         // In (1), our conservative assumption is justified.
 422         // In (2), valid font names do not contain ':', '?', or '#', so our
 423         // conservative assumption is fine since we will never transition past
 424         // urlPartPreQuery.
 425         // In (3), our protocol heuristic should not be tripped, and there
 426         // should not be non-space content after a '?' or '#', so as long as
 427         // we only %-encode RFC 3986 reserved characters we are ok.
 428         // In (4), we should URL escape for URL attributes, and for others we
 429         // have the attribute name available if our conservative assumption
 430         // proves problematic for real code.
 431
 432         k := 0
 433         for {
 434                 i := k + bytes.IndexAny(s[k:], `("'/`)
 435                 if i < k {
 436                         return c, len(s)
 437                 }
 438                 switch s[i] {
 439                 case '(':
 440                         // Look for url to the left.
 441                         p := bytes.TrimRight(s[:i], "\t\n\f\r ")
 442                         if endsWithCSSKeyword(p, "url") {
 443                                 j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r "))
 444                                 switch {
 445                                 case j != len(s) && s[j] == '"':
 446                                         c.state, j = stateCSSDqURL, j+1
 447                                 case j != len(s) && s[j] == '\'':
 448                                         c.state, j = stateCSSSqURL, j+1
 449                                 default:
 450                                         c.state = stateCSSURL
 451                                 }
 452                                 return c, j
 453                         }
 454                 case '/':
 455                         if i+1 < len(s) {
 456                                 switch s[i+1] {
 457                                 case '/':
 458                                         c.state = stateCSSLineCmt
 459                                         return c, i + 2
 460                                 case '*':
 461                                         c.state = stateCSSBlockCmt
 462                                         return c, i + 2
 463                                 }
 464                         }
 465                 case '"':
 466                         c.state = stateCSSDqStr
 467                         return c, i + 1
 468                 case '\'':
 469                         c.state = stateCSSSqStr
 470                         return c, i + 1
 471                 }
 472                 k = i + 1
 473         }
 474 }
 475
 476 // tCSSStr is the context transition function for the CSS string and URL states.
 477 func tCSSStr(c context, s []byte) (context, int) {
 478         var endAndEsc string
 479         switch c.state {
 480         case stateCSSDqStr, stateCSSDqURL:
 481                 endAndEsc = `\"`
 482         case stateCSSSqStr, stateCSSSqURL:
 483                 endAndEsc = `\'`
 484         case stateCSSURL:
 485                 // Unquoted URLs end with a newline or close parenthesis.
 486                 // The below includes the wc (whitespace character) and nl.
 487                 endAndEsc = "\\\t\n\f\r )"
 488         default:
 489                 panic(c.state.String())
 490         }
 491
 492         k := 0
 493         for {
 494                 i := k + bytes.IndexAny(s[k:], endAndEsc)
 495                 if i < k {
 496                         c, nread := tURL(c, decodeCSS(s[k:]))
 497                         return c, k + nread
 498                 }
 499                 if s[i] == '\\' {
 500                         i++
 501                         if i == len(s) {
 502                                 return context{
 503                                         state: stateError,
 504                                         err:   errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in CSS string: %q", s),
 505                                 }, len(s)
 506                         }
 507                 } else {
 508                         c.state = stateCSS
 509                         return c, i + 1
 510                 }
 511                 c, _ = tURL(c, decodeCSS(s[:i+1]))
 512                 k = i + 1
 513         }
 514 }
 515
 516 // tError is the context transition function for the error state.
 517 func tError(c context, s []byte) (context, int) {
 518         return c, len(s)
 519 }
 520
 521 // eatAttrName returns the largest j such that s[i:j] is an attribute name.
 522 // It returns an error if s[i:] does not look like it begins with an
 523 // attribute name, such as encountering a quote mark without a preceding
 524 // equals sign.
 525 func eatAttrName(s []byte, i int) (int, *Error) {
 526         for j := i; j < len(s); j++ {
 527                 switch s[j] {
 528                 case ' ', '\t', '\n', '\f', '\r', '=', '>':
 529                         return j, nil
 530                 case '\'', '"', '<':
 531                         // These result in a parse warning in HTML5 and are
 532                         // indicative of serious problems if seen in an attr
 533                         // name in a template.
 534                         return -1, errorf(ErrBadHTML, nil, 0, "%q in attribute name: %.32q", s[j:j+1], s)
 535                 default:
 536                         // No-op.
 537                 }
 538         }
 539         return len(s), nil
 540 }
 541
 542 var elementNameMap = map[string]element{
 543         "script":   elementScript,
 544         "style":    elementStyle,
 545         "textarea": elementTextarea,
 546         "title":    elementTitle,
 547 }
 548
 549 // asciiAlpha reports whether c is an ASCII letter.
 550 func asciiAlpha(c byte) bool {
 551         return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
 552 }
 553
 554 // asciiAlphaNum reports whether c is an ASCII letter or digit.
 555 func asciiAlphaNum(c byte) bool {
 556         return asciiAlpha(c) || '0' <= c && c <= '9'
 557 }
 558
 559 // eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
 560 func eatTagName(s []byte, i int) (int, element) {
 561         if i == len(s) || !asciiAlpha(s[i]) {
 562                 return i, elementNone
 563         }
 564         j := i + 1
 565         for j < len(s) {
 566                 x := s[j]
 567                 if asciiAlphaNum(x) {
 568                         j++
 569                         continue
 570                 }
 571                 // Allow "x-y" or "x:y" but not "x-", "-y", or "x--y".
 572                 if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) {
 573                         j += 2
 574                         continue
 575                 }
 576                 break
 577         }
 578         return j, elementNameMap[strings.ToLower(string(s[i:j]))]
 579 }
 580
 581 // eatWhiteSpace returns the largest j such that s[i:j] is white space.
 582 func eatWhiteSpace(s []byte, i int) int {
 583         for j := i; j < len(s); j++ {
 584                 switch s[j] {
 585                 case ' ', '\t', '\n', '\f', '\r':
 586                         // No-op.
 587                 default:
 588                         return j
 589                 }
 590         }
 591         return len(s)
 592 }