libgo/go/html/template/transition.go

   1 // Copyright 2011 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package template
   6
   7 import (
   8         "bytes"
   9         "strings"
  10 )
  11
  12 // transitionFunc is the array of context transition functions for text nodes.
  13 // A transition function takes a context and template text input, and returns
  14 // the updated context and the number of bytes consumed from the front of the
  15 // input.
  16 var transitionFunc = [...]func(context, []byte) (context, int){
  17         stateText:        tText,
  18         stateTag:         tTag,
  19         stateAttrName:    tAttrName,
  20         stateAfterName:   tAfterName,
  21         stateBeforeValue: tBeforeValue,
  22         stateHTMLCmt:     tHTMLCmt,
  23         stateRCDATA:      tSpecialTagEnd,
  24         stateAttr:        tAttr,
  25         stateURL:         tURL,
  26         stateJS:          tJS,
  27         stateJSDqStr:     tJSDelimited,
  28         stateJSSqStr:     tJSDelimited,
  29         stateJSRegexp:    tJSDelimited,
  30         stateJSBlockCmt:  tBlockCmt,
  31         stateJSLineCmt:   tLineCmt,
  32         stateCSS:         tCSS,
  33         stateCSSDqStr:    tCSSStr,
  34         stateCSSSqStr:    tCSSStr,
  35         stateCSSDqURL:    tCSSStr,
  36         stateCSSSqURL:    tCSSStr,
  37         stateCSSURL:      tCSSStr,
  38         stateCSSBlockCmt: tBlockCmt,
  39         stateCSSLineCmt:  tLineCmt,
  40         stateError:       tError,
  41 }
  42
  43 var commentStart = []byte("<!--")
  44 var commentEnd = []byte("-->")
  45
  46 // tText is the context transition function for the text state.
  47 func tText(c context, s []byte) (context, int) {
  48         k := 0
  49         for {
  50                 i := k + bytes.IndexByte(s[k:], '<')
  51                 if i < k || i+1 == len(s) {
  52                         return c, len(s)
  53                 } else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
  54                         return context{state: stateHTMLCmt}, i + 4
  55                 }
  56                 i++
  57                 end := false
  58                 if s[i] == '/' {
  59                         if i+1 == len(s) {
  60                                 return c, len(s)
  61                         }
  62                         end, i = true, i+1
  63                 }
  64                 j, e := eatTagName(s, i)
  65                 if j != i {
  66                         if end {
  67                                 e = elementNone
  68                         }
  69                         // We've found an HTML tag.
  70                         return context{state: stateTag, element: e}, j
  71                 }
  72                 k = j
  73         }
  74 }
  75
  76 var elementContentType = [...]state{
  77         elementNone:     stateText,
  78         elementScript:   stateJS,
  79         elementStyle:    stateCSS,
  80         elementTextarea: stateRCDATA,
  81         elementTitle:    stateRCDATA,
  82 }
  83
  84 // tTag is the context transition function for the tag state.
  85 func tTag(c context, s []byte) (context, int) {
  86         // Find the attribute name.
  87         i := eatWhiteSpace(s, 0)
  88         if i == len(s) {
  89                 return c, len(s)
  90         }
  91         if s[i] == '>' {
  92                 return context{
  93                         state:   elementContentType[c.element],
  94                         element: c.element,
  95                 }, i + 1
  96         }
  97         j, err := eatAttrName(s, i)
  98         if err != nil {
  99                 return context{state: stateError, err: err}, len(s)
 100         }
 101         state, attr := stateTag, attrNone
 102         if i == j {
 103                 return context{
 104                         state: stateError,
 105                         err:   errorf(ErrBadHTML, nil, 0, "expected space, attr name, or end of tag, but got %q", s[i:]),
 106                 }, len(s)
 107         }
 108
 109         attrName := strings.ToLower(string(s[i:j]))
 110         if c.element == elementScript && attrName == "type" {
 111                 attr = attrScriptType
 112         } else {
 113                 switch attrType(attrName) {
 114                 case contentTypeURL:
 115                         attr = attrURL
 116                 case contentTypeCSS:
 117                         attr = attrStyle
 118                 case contentTypeJS:
 119                         attr = attrScript
 120                 }
 121         }
 122
 123         if j == len(s) {
 124                 state = stateAttrName
 125         } else {
 126                 state = stateAfterName
 127         }
 128         return context{state: state, element: c.element, attr: attr}, j
 129 }
 130
 131 // tAttrName is the context transition function for stateAttrName.
 132 func tAttrName(c context, s []byte) (context, int) {
 133         i, err := eatAttrName(s, 0)
 134         if err != nil {
 135                 return context{state: stateError, err: err}, len(s)
 136         } else if i != len(s) {
 137                 c.state = stateAfterName
 138         }
 139         return c, i
 140 }
 141
 142 // tAfterName is the context transition function for stateAfterName.
 143 func tAfterName(c context, s []byte) (context, int) {
 144         // Look for the start of the value.
 145         i := eatWhiteSpace(s, 0)
 146         if i == len(s) {
 147                 return c, len(s)
 148         } else if s[i] != '=' {
 149                 // Occurs due to tag ending '>', and valueless attribute.
 150                 c.state = stateTag
 151                 return c, i
 152         }
 153         c.state = stateBeforeValue
 154         // Consume the "=".
 155         return c, i + 1
 156 }
 157
 158 var attrStartStates = [...]state{
 159         attrNone:       stateAttr,
 160         attrScript:     stateJS,
 161         attrScriptType: stateAttr,
 162         attrStyle:      stateCSS,
 163         attrURL:        stateURL,
 164 }
 165
 166 // tBeforeValue is the context transition function for stateBeforeValue.
 167 func tBeforeValue(c context, s []byte) (context, int) {
 168         i := eatWhiteSpace(s, 0)
 169         if i == len(s) {
 170                 return c, len(s)
 171         }
 172         // Find the attribute delimiter.
 173         delim := delimSpaceOrTagEnd
 174         switch s[i] {
 175         case '\'':
 176                 delim, i = delimSingleQuote, i+1
 177         case '"':
 178                 delim, i = delimDoubleQuote, i+1
 179         }
 180         c.state, c.delim = attrStartStates[c.attr], delim
 181         return c, i
 182 }
 183
 184 // tHTMLCmt is the context transition function for stateHTMLCmt.
 185 func tHTMLCmt(c context, s []byte) (context, int) {
 186         if i := bytes.Index(s, commentEnd); i != -1 {
 187                 return context{}, i + 3
 188         }
 189         return c, len(s)
 190 }
 191
 192 // specialTagEndMarkers maps element types to the character sequence that
 193 // case-insensitively signals the end of the special tag body.
 194 var specialTagEndMarkers = [...][]byte{
 195         elementScript:   []byte("script"),
 196         elementStyle:    []byte("style"),
 197         elementTextarea: []byte("textarea"),
 198         elementTitle:    []byte("title"),
 199 }
 200
 201 var (
 202         specialTagEndPrefix = []byte("</")
 203         tagEndSeparators    = []byte("> \t\n\f/")
 204 )
 205
 206 // tSpecialTagEnd is the context transition function for raw text and RCDATA
 207 // element states.
 208 func tSpecialTagEnd(c context, s []byte) (context, int) {
 209         if c.element != elementNone {
 210                 if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 {
 211                         return context{}, i
 212                 }
 213         }
 214         return c, len(s)
 215 }
 216
 217 // indexTagEnd finds the index of a special tag end in a case insensitive way, or returns -1
 218 func indexTagEnd(s []byte, tag []byte) int {
 219         res := 0
 220         plen := len(specialTagEndPrefix)
 221         for len(s) > 0 {
 222                 // Try to find the tag end prefix first
 223                 i := bytes.Index(s, specialTagEndPrefix)
 224                 if i == -1 {
 225                         return i
 226                 }
 227                 s = s[i+plen:]
 228                 // Try to match the actual tag if there is still space for it
 229                 if len(tag) <= len(s) && bytes.EqualFold(tag, s[:len(tag)]) {
 230                         s = s[len(tag):]
 231                         // Check the tag is followed by a proper separator
 232                         if len(s) > 0 && bytes.IndexByte(tagEndSeparators, s[0]) != -1 {
 233                                 return res + i
 234                         }
 235                         res += len(tag)
 236                 }
 237                 res += i + plen
 238         }
 239         return -1
 240 }
 241
 242 // tAttr is the context transition function for the attribute state.
 243 func tAttr(c context, s []byte) (context, int) {
 244         return c, len(s)
 245 }
 246
 247 // tURL is the context transition function for the URL state.
 248 func tURL(c context, s []byte) (context, int) {
 249         if bytes.ContainsAny(s, "#?") {
 250                 c.urlPart = urlPartQueryOrFrag
 251         } else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone {
 252                 // HTML5 uses "Valid URL potentially surrounded by spaces" for
 253                 // attrs: http://www.w3.org/TR/html5/index.html#attributes-1
 254                 c.urlPart = urlPartPreQuery
 255         }
 256         return c, len(s)
 257 }
 258
 259 // tJS is the context transition function for the JS state.
 260 func tJS(c context, s []byte) (context, int) {
 261         i := bytes.IndexAny(s, `"'/`)
 262         if i == -1 {
 263                 // Entire input is non string, comment, regexp tokens.
 264                 c.jsCtx = nextJSCtx(s, c.jsCtx)
 265                 return c, len(s)
 266         }
 267         c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
 268         switch s[i] {
 269         case '"':
 270                 c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
 271         case '\'':
 272                 c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
 273         case '/':
 274                 switch {
 275                 case i+1 < len(s) && s[i+1] == '/':
 276                         c.state, i = stateJSLineCmt, i+1
 277                 case i+1 < len(s) && s[i+1] == '*':
 278                         c.state, i = stateJSBlockCmt, i+1
 279                 case c.jsCtx == jsCtxRegexp:
 280                         c.state = stateJSRegexp
 281                 case c.jsCtx == jsCtxDivOp:
 282                         c.jsCtx = jsCtxRegexp
 283                 default:
 284                         return context{
 285                                 state: stateError,
 286                                 err:   errorf(ErrSlashAmbig, nil, 0, "'/' could start a division or regexp: %.32q", s[i:]),
 287                         }, len(s)
 288                 }
 289         default:
 290                 panic("unreachable")
 291         }
 292         return c, i + 1
 293 }
 294
 295 // tJSDelimited is the context transition function for the JS string and regexp
 296 // states.
 297 func tJSDelimited(c context, s []byte) (context, int) {
 298         specials := `\"`
 299         switch c.state {
 300         case stateJSSqStr:
 301                 specials = `\'`
 302         case stateJSRegexp:
 303                 specials = `\/[]`
 304         }
 305
 306         k, inCharset := 0, false
 307         for {
 308                 i := k + bytes.IndexAny(s[k:], specials)
 309                 if i < k {
 310                         break
 311                 }
 312                 switch s[i] {
 313                 case '\\':
 314                         i++
 315                         if i == len(s) {
 316                                 return context{
 317                                         state: stateError,
 318                                         err:   errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s),
 319                                 }, len(s)
 320                         }
 321                 case '[':
 322                         inCharset = true
 323                 case ']':
 324                         inCharset = false
 325                 default:
 326                         // end delimiter
 327                         if !inCharset {
 328                                 c.state, c.jsCtx = stateJS, jsCtxDivOp
 329                                 return c, i + 1
 330                         }
 331                 }
 332                 k = i + 1
 333         }
 334
 335         if inCharset {
 336                 // This can be fixed by making context richer if interpolation
 337                 // into charsets is desired.
 338                 return context{
 339                         state: stateError,
 340                         err:   errorf(ErrPartialCharset, nil, 0, "unfinished JS regexp charset: %q", s),
 341                 }, len(s)
 342         }
 343
 344         return c, len(s)
 345 }
 346
 347 var blockCommentEnd = []byte("*/")
 348
 349 // tBlockCmt is the context transition function for /*comment*/ states.
 350 func tBlockCmt(c context, s []byte) (context, int) {
 351         i := bytes.Index(s, blockCommentEnd)
 352         if i == -1 {
 353                 return c, len(s)
 354         }
 355         switch c.state {
 356         case stateJSBlockCmt:
 357                 c.state = stateJS
 358         case stateCSSBlockCmt:
 359                 c.state = stateCSS
 360         default:
 361                 panic(c.state.String())
 362         }
 363         return c, i + 2
 364 }
 365
 366 // tLineCmt is the context transition function for //comment states.
 367 func tLineCmt(c context, s []byte) (context, int) {
 368         var lineTerminators string
 369         var endState state
 370         switch c.state {
 371         case stateJSLineCmt:
 372                 lineTerminators, endState = "\n\r\u2028\u2029", stateJS
 373         case stateCSSLineCmt:
 374                 lineTerminators, endState = "\n\f\r", stateCSS
 375                 // Line comments are not part of any published CSS standard but
 376                 // are supported by the 4 major browsers.
 377                 // This defines line comments as
 378                 //     LINECOMMENT ::= "//" [^\n\f\d]*
 379                 // since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
 380                 // newlines:
 381                 //     nl ::= #xA | #xD #xA | #xD | #xC
 382         default:
 383                 panic(c.state.String())
 384         }
 385
 386         i := bytes.IndexAny(s, lineTerminators)
 387         if i == -1 {
 388                 return c, len(s)
 389         }
 390         c.state = endState
 391         // Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4
 392         // "However, the LineTerminator at the end of the line is not
 393         // considered to be part of the single-line comment; it is
 394         // recognized separately by the lexical grammar and becomes part
 395         // of the stream of input elements for the syntactic grammar."
 396         return c, i
 397 }
 398
 399 // tCSS is the context transition function for the CSS state.
 400 func tCSS(c context, s []byte) (context, int) {
 401         // CSS quoted strings are almost never used except for:
 402         // (1) URLs as in background: "/foo.png"
 403         // (2) Multiword font-names as in font-family: "Times New Roman"
 404         // (3) List separators in content values as in inline-lists:
 405         //    <style>
 406         //    ul.inlineList { list-style: none; padding:0 }
 407         //    ul.inlineList > li { display: inline }
 408         //    ul.inlineList > li:before { content: ", " }
 409         //    ul.inlineList > li:first-child:before { content: "" }
 410         //    </style>
 411         //    <ul class=inlineList><li>One<li>Two<li>Three</ul>
 412         // (4) Attribute value selectors as in a[href="http://example.com/"]
 413         //
 414         // We conservatively treat all strings as URLs, but make some
 415         // allowances to avoid confusion.
 416         //
 417         // In (1), our conservative assumption is justified.
 418         // In (2), valid font names do not contain ':', '?', or '#', so our
 419         // conservative assumption is fine since we will never transition past
 420         // urlPartPreQuery.
 421         // In (3), our protocol heuristic should not be tripped, and there
 422         // should not be non-space content after a '?' or '#', so as long as
 423         // we only %-encode RFC 3986 reserved characters we are ok.
 424         // In (4), we should URL escape for URL attributes, and for others we
 425         // have the attribute name available if our conservative assumption
 426         // proves problematic for real code.
 427
 428         k := 0
 429         for {
 430                 i := k + bytes.IndexAny(s[k:], `("'/`)
 431                 if i < k {
 432                         return c, len(s)
 433                 }
 434                 switch s[i] {
 435                 case '(':
 436                         // Look for url to the left.
 437                         p := bytes.TrimRight(s[:i], "\t\n\f\r ")
 438                         if endsWithCSSKeyword(p, "url") {
 439                                 j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r "))
 440                                 switch {
 441                                 case j != len(s) && s[j] == '"':
 442                                         c.state, j = stateCSSDqURL, j+1
 443                                 case j != len(s) && s[j] == '\'':
 444                                         c.state, j = stateCSSSqURL, j+1
 445                                 default:
 446                                         c.state = stateCSSURL
 447                                 }
 448                                 return c, j
 449                         }
 450                 case '/':
 451                         if i+1 < len(s) {
 452                                 switch s[i+1] {
 453                                 case '/':
 454                                         c.state = stateCSSLineCmt
 455                                         return c, i + 2
 456                                 case '*':
 457                                         c.state = stateCSSBlockCmt
 458                                         return c, i + 2
 459                                 }
 460                         }
 461                 case '"':
 462                         c.state = stateCSSDqStr
 463                         return c, i + 1
 464                 case '\'':
 465                         c.state = stateCSSSqStr
 466                         return c, i + 1
 467                 }
 468                 k = i + 1
 469         }
 470 }
 471
 472 // tCSSStr is the context transition function for the CSS string and URL states.
 473 func tCSSStr(c context, s []byte) (context, int) {
 474         var endAndEsc string
 475         switch c.state {
 476         case stateCSSDqStr, stateCSSDqURL:
 477                 endAndEsc = `\"`
 478         case stateCSSSqStr, stateCSSSqURL:
 479                 endAndEsc = `\'`
 480         case stateCSSURL:
 481                 // Unquoted URLs end with a newline or close parenthesis.
 482                 // The below includes the wc (whitespace character) and nl.
 483                 endAndEsc = "\\\t\n\f\r )"
 484         default:
 485                 panic(c.state.String())
 486         }
 487
 488         k := 0
 489         for {
 490                 i := k + bytes.IndexAny(s[k:], endAndEsc)
 491                 if i < k {
 492                         c, nread := tURL(c, decodeCSS(s[k:]))
 493                         return c, k + nread
 494                 }
 495                 if s[i] == '\\' {
 496                         i++
 497                         if i == len(s) {
 498                                 return context{
 499                                         state: stateError,
 500                                         err:   errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in CSS string: %q", s),
 501                                 }, len(s)
 502                         }
 503                 } else {
 504                         c.state = stateCSS
 505                         return c, i + 1
 506                 }
 507                 c, _ = tURL(c, decodeCSS(s[:i+1]))
 508                 k = i + 1
 509         }
 510 }
 511
 512 // tError is the context transition function for the error state.
 513 func tError(c context, s []byte) (context, int) {
 514         return c, len(s)
 515 }
 516
 517 // eatAttrName returns the largest j such that s[i:j] is an attribute name.
 518 // It returns an error if s[i:] does not look like it begins with an
 519 // attribute name, such as encountering a quote mark without a preceding
 520 // equals sign.
 521 func eatAttrName(s []byte, i int) (int, *Error) {
 522         for j := i; j < len(s); j++ {
 523                 switch s[j] {
 524                 case ' ', '\t', '\n', '\f', '\r', '=', '>':
 525                         return j, nil
 526                 case '\'', '"', '<':
 527                         // These result in a parse warning in HTML5 and are
 528                         // indicative of serious problems if seen in an attr
 529                         // name in a template.
 530                         return -1, errorf(ErrBadHTML, nil, 0, "%q in attribute name: %.32q", s[j:j+1], s)
 531                 default:
 532                         // No-op.
 533                 }
 534         }
 535         return len(s), nil
 536 }
 537
 538 var elementNameMap = map[string]element{
 539         "script":   elementScript,
 540         "style":    elementStyle,
 541         "textarea": elementTextarea,
 542         "title":    elementTitle,
 543 }
 544
 545 // asciiAlpha reports whether c is an ASCII letter.
 546 func asciiAlpha(c byte) bool {
 547         return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
 548 }
 549
 550 // asciiAlphaNum reports whether c is an ASCII letter or digit.
 551 func asciiAlphaNum(c byte) bool {
 552         return asciiAlpha(c) || '0' <= c && c <= '9'
 553 }
 554
 555 // eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
 556 func eatTagName(s []byte, i int) (int, element) {
 557         if i == len(s) || !asciiAlpha(s[i]) {
 558                 return i, elementNone
 559         }
 560         j := i + 1
 561         for j < len(s) {
 562                 x := s[j]
 563                 if asciiAlphaNum(x) {
 564                         j++
 565                         continue
 566                 }
 567                 // Allow "x-y" or "x:y" but not "x-", "-y", or "x--y".
 568                 if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) {
 569                         j += 2
 570                         continue
 571                 }
 572                 break
 573         }
 574         return j, elementNameMap[strings.ToLower(string(s[i:j]))]
 575 }
 576
 577 // eatWhiteSpace returns the largest j such that s[i:j] is white space.
 578 func eatWhiteSpace(s []byte, i int) int {
 579         for j := i; j < len(s); j++ {
 580                 switch s[j] {
 581                 case ' ', '\t', '\n', '\f', '\r':
 582                         // No-op.
 583                 default:
 584                         return j
 585                 }
 586         }
 587         return len(s)
 588 }