libgo/go/html/template/js.go

   1 // Copyright 2011 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package template
   6
   7 import (
   8         "bytes"
   9         "encoding/json"
  10         "fmt"
  11         "reflect"
  12         "strings"
  13         "unicode/utf8"
  14 )
  15
  16 // nextJSCtx returns the context that determines whether a slash after the
  17 // given run of tokens starts a regular expression instead of a division
  18 // operator: / or /=.
  19 //
  20 // This assumes that the token run does not include any string tokens, comment
  21 // tokens, regular expression literal tokens, or division operators.
  22 //
  23 // This fails on some valid but nonsensical JavaScript programs like
  24 // "x = ++/foo/i" which is quite different than "x++/foo/i", but is not known to
  25 // fail on any known useful programs. It is based on the draft
  26 // JavaScript 2.0 lexical grammar and requires one token of lookbehind:
  27 // https://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html
  28 func nextJSCtx(s []byte, preceding jsCtx) jsCtx {
  29         s = bytes.TrimRight(s, "\t\n\f\r \u2028\u2029")
  30         if len(s) == 0 {
  31                 return preceding
  32         }
  33
  34         // All cases below are in the single-byte UTF-8 group.
  35         switch c, n := s[len(s)-1], len(s); c {
  36         case '+', '-':
  37                 // ++ and -- are not regexp preceders, but + and - are whether
  38                 // they are used as infix or prefix operators.
  39                 start := n - 1
  40                 // Count the number of adjacent dashes or pluses.
  41                 for start > 0 && s[start-1] == c {
  42                         start--
  43                 }
  44                 if (n-start)&1 == 1 {
  45                         // Reached for trailing minus signs since "---" is the
  46                         // same as "-- -".
  47                         return jsCtxRegexp
  48                 }
  49                 return jsCtxDivOp
  50         case '.':
  51                 // Handle "42."
  52                 if n != 1 && '0' <= s[n-2] && s[n-2] <= '9' {
  53                         return jsCtxDivOp
  54                 }
  55                 return jsCtxRegexp
  56         // Suffixes for all punctuators from section 7.7 of the language spec
  57         // that only end binary operators not handled above.
  58         case ',', '<', '>', '=', '*', '%', '&', '|', '^', '?':
  59                 return jsCtxRegexp
  60         // Suffixes for all punctuators from section 7.7 of the language spec
  61         // that are prefix operators not handled above.
  62         case '!', '~':
  63                 return jsCtxRegexp
  64         // Matches all the punctuators from section 7.7 of the language spec
  65         // that are open brackets not handled above.
  66         case '(', '[':
  67                 return jsCtxRegexp
  68         // Matches all the punctuators from section 7.7 of the language spec
  69         // that precede expression starts.
  70         case ':', ';', '{':
  71                 return jsCtxRegexp
  72         // CAVEAT: the close punctuators ('}', ']', ')') precede div ops and
  73         // are handled in the default except for '}' which can precede a
  74         // division op as in
  75         //    ({ valueOf: function () { return 42 } } / 2
  76         // which is valid, but, in practice, developers don't divide object
  77         // literals, so our heuristic works well for code like
  78         //    function () { ... }  /foo/.test(x) && sideEffect();
  79         // The ')' punctuator can precede a regular expression as in
  80         //     if (b) /foo/.test(x) && ...
  81         // but this is much less likely than
  82         //     (a + b) / c
  83         case '}':
  84                 return jsCtxRegexp
  85         default:
  86                 // Look for an IdentifierName and see if it is a keyword that
  87                 // can precede a regular expression.
  88                 j := n
  89                 for j > 0 && isJSIdentPart(rune(s[j-1])) {
  90                         j--
  91                 }
  92                 if regexpPrecederKeywords[string(s[j:])] {
  93                         return jsCtxRegexp
  94                 }
  95         }
  96         // Otherwise is a punctuator not listed above, or
  97         // a string which precedes a div op, or an identifier
  98         // which precedes a div op.
  99         return jsCtxDivOp
 100 }
 101
 102 // regexpPrecederKeywords is a set of reserved JS keywords that can precede a
 103 // regular expression in JS source.
 104 var regexpPrecederKeywords = map[string]bool{
 105         "break":      true,
 106         "case":       true,
 107         "continue":   true,
 108         "delete":     true,
 109         "do":         true,
 110         "else":       true,
 111         "finally":    true,
 112         "in":         true,
 113         "instanceof": true,
 114         "return":     true,
 115         "throw":      true,
 116         "try":        true,
 117         "typeof":     true,
 118         "void":       true,
 119 }
 120
 121 var jsonMarshalType = reflect.TypeOf((*json.Marshaler)(nil)).Elem()
 122
 123 // indirectToJSONMarshaler returns the value, after dereferencing as many times
 124 // as necessary to reach the base type (or nil) or an implementation of json.Marshal.
 125 func indirectToJSONMarshaler(a any) any {
 126         // text/template now supports passing untyped nil as a func call
 127         // argument, so we must support it. Otherwise we'd panic below, as one
 128         // cannot call the Type or Interface methods on an invalid
 129         // reflect.Value. See golang.org/issue/18716.
 130         if a == nil {
 131                 return nil
 132         }
 133
 134         v := reflect.ValueOf(a)
 135         for !v.Type().Implements(jsonMarshalType) && v.Kind() == reflect.Pointer && !v.IsNil() {
 136                 v = v.Elem()
 137         }
 138         return v.Interface()
 139 }
 140
 141 // jsValEscaper escapes its inputs to a JS Expression (section 11.14) that has
 142 // neither side-effects nor free variables outside (NaN, Infinity).
 143 func jsValEscaper(args ...any) string {
 144         var a any
 145         if len(args) == 1 {
 146                 a = indirectToJSONMarshaler(args[0])
 147                 switch t := a.(type) {
 148                 case JS:
 149                         return string(t)
 150                 case JSStr:
 151                         // TODO: normalize quotes.
 152                         return `"` + string(t) + `"`
 153                 case json.Marshaler:
 154                         // Do not treat as a Stringer.
 155                 case fmt.Stringer:
 156                         a = t.String()
 157                 }
 158         } else {
 159                 for i, arg := range args {
 160                         args[i] = indirectToJSONMarshaler(arg)
 161                 }
 162                 a = fmt.Sprint(args...)
 163         }
 164         // TODO: detect cycles before calling Marshal which loops infinitely on
 165         // cyclic data. This may be an unacceptable DoS risk.
 166         b, err := json.Marshal(a)
 167         if err != nil {
 168                 // Put a space before comment so that if it is flush against
 169                 // a division operator it is not turned into a line comment:
 170                 //     x/{{y}}
 171                 // turning into
 172                 //     x//* error marshaling y:
 173                 //          second line of error message */null
 174                 return fmt.Sprintf(" /* %s */null ", strings.ReplaceAll(err.Error(), "*/", "* /"))
 175         }
 176
 177         // TODO: maybe post-process output to prevent it from containing
 178         // "<!--", "-->", "<![CDATA[", "]]>", or "</script"
 179         // in case custom marshalers produce output containing those.
 180         // Note: Do not use \x escaping to save bytes because it is not JSON compatible and this escaper
 181         // supports ld+json content-type.
 182         if len(b) == 0 {
 183                 // In, `x=y/{{.}}*z` a json.Marshaler that produces "" should
 184                 // not cause the output `x=y/*z`.
 185                 return " null "
 186         }
 187         first, _ := utf8.DecodeRune(b)
 188         last, _ := utf8.DecodeLastRune(b)
 189         var buf strings.Builder
 190         // Prevent IdentifierNames and NumericLiterals from running into
 191         // keywords: in, instanceof, typeof, void
 192         pad := isJSIdentPart(first) || isJSIdentPart(last)
 193         if pad {
 194                 buf.WriteByte(' ')
 195         }
 196         written := 0
 197         // Make sure that json.Marshal escapes codepoints U+2028 & U+2029
 198         // so it falls within the subset of JSON which is valid JS.
 199         for i := 0; i < len(b); {
 200                 rune, n := utf8.DecodeRune(b[i:])
 201                 repl := ""
 202                 if rune == 0x2028 {
 203                         repl = `\u2028`
 204                 } else if rune == 0x2029 {
 205                         repl = `\u2029`
 206                 }
 207                 if repl != "" {
 208                         buf.Write(b[written:i])
 209                         buf.WriteString(repl)
 210                         written = i + n
 211                 }
 212                 i += n
 213         }
 214         if buf.Len() != 0 {
 215                 buf.Write(b[written:])
 216                 if pad {
 217                         buf.WriteByte(' ')
 218                 }
 219                 return buf.String()
 220         }
 221         return string(b)
 222 }
 223
 224 // jsStrEscaper produces a string that can be included between quotes in
 225 // JavaScript source, in JavaScript embedded in an HTML5 <script> element,
 226 // or in an HTML5 event handler attribute such as onclick.
 227 func jsStrEscaper(args ...any) string {
 228         s, t := stringify(args...)
 229         if t == contentTypeJSStr {
 230                 return replace(s, jsStrNormReplacementTable)
 231         }
 232         return replace(s, jsStrReplacementTable)
 233 }
 234
 235 // jsRegexpEscaper behaves like jsStrEscaper but escapes regular expression
 236 // specials so the result is treated literally when included in a regular
 237 // expression literal. /foo{{.X}}bar/ matches the string "foo" followed by
 238 // the literal text of {{.X}} followed by the string "bar".
 239 func jsRegexpEscaper(args ...any) string {
 240         s, _ := stringify(args...)
 241         s = replace(s, jsRegexpReplacementTable)
 242         if s == "" {
 243                 // /{{.X}}/ should not produce a line comment when .X == "".
 244                 return "(?:)"
 245         }
 246         return s
 247 }
 248
 249 // replace replaces each rune r of s with replacementTable[r], provided that
 250 // r < len(replacementTable). If replacementTable[r] is the empty string then
 251 // no replacement is made.
 252 // It also replaces runes U+2028 and U+2029 with the raw strings `\u2028` and
 253 // `\u2029`.
 254 func replace(s string, replacementTable []string) string {
 255         var b strings.Builder
 256         r, w, written := rune(0), 0, 0
 257         for i := 0; i < len(s); i += w {
 258                 // See comment in htmlEscaper.
 259                 r, w = utf8.DecodeRuneInString(s[i:])
 260                 var repl string
 261                 switch {
 262                 case int(r) < len(lowUnicodeReplacementTable):
 263                         repl = lowUnicodeReplacementTable[r]
 264                 case int(r) < len(replacementTable) && replacementTable[r] != "":
 265                         repl = replacementTable[r]
 266                 case r == '\u2028':
 267                         repl = `\u2028`
 268                 case r == '\u2029':
 269                         repl = `\u2029`
 270                 default:
 271                         continue
 272                 }
 273                 if written == 0 {
 274                         b.Grow(len(s))
 275                 }
 276                 b.WriteString(s[written:i])
 277                 b.WriteString(repl)
 278                 written = i + w
 279         }
 280         if written == 0 {
 281                 return s
 282         }
 283         b.WriteString(s[written:])
 284         return b.String()
 285 }
 286
 287 var lowUnicodeReplacementTable = []string{
 288         0: `\u0000`, 1: `\u0001`, 2: `\u0002`, 3: `\u0003`, 4: `\u0004`, 5: `\u0005`, 6: `\u0006`,
 289         '\a': `\u0007`,
 290         '\b': `\u0008`,
 291         '\t': `\t`,
 292         '\n': `\n`,
 293         '\v': `\u000b`, // "\v" == "v" on IE 6.
 294         '\f': `\f`,
 295         '\r': `\r`,
 296         0xe:  `\u000e`, 0xf: `\u000f`, 0x10: `\u0010`, 0x11: `\u0011`, 0x12: `\u0012`, 0x13: `\u0013`,
 297         0x14: `\u0014`, 0x15: `\u0015`, 0x16: `\u0016`, 0x17: `\u0017`, 0x18: `\u0018`, 0x19: `\u0019`,
 298         0x1a: `\u001a`, 0x1b: `\u001b`, 0x1c: `\u001c`, 0x1d: `\u001d`, 0x1e: `\u001e`, 0x1f: `\u001f`,
 299 }
 300
 301 var jsStrReplacementTable = []string{
 302         0:    `\u0000`,
 303         '\t': `\t`,
 304         '\n': `\n`,
 305         '\v': `\u000b`, // "\v" == "v" on IE 6.
 306         '\f': `\f`,
 307         '\r': `\r`,
 308         // Encode HTML specials as hex so the output can be embedded
 309         // in HTML attributes without further encoding.
 310         '"':  `\u0022`,
 311         '&':  `\u0026`,
 312         '\'': `\u0027`,
 313         '+':  `\u002b`,
 314         '/':  `\/`,
 315         '<':  `\u003c`,
 316         '>':  `\u003e`,
 317         '\\': `\\`,
 318 }
 319
 320 // jsStrNormReplacementTable is like jsStrReplacementTable but does not
 321 // overencode existing escapes since this table has no entry for `\`.
 322 var jsStrNormReplacementTable = []string{
 323         0:    `\u0000`,
 324         '\t': `\t`,
 325         '\n': `\n`,
 326         '\v': `\u000b`, // "\v" == "v" on IE 6.
 327         '\f': `\f`,
 328         '\r': `\r`,
 329         // Encode HTML specials as hex so the output can be embedded
 330         // in HTML attributes without further encoding.
 331         '"':  `\u0022`,
 332         '&':  `\u0026`,
 333         '\'': `\u0027`,
 334         '+':  `\u002b`,
 335         '/':  `\/`,
 336         '<':  `\u003c`,
 337         '>':  `\u003e`,
 338 }
 339 var jsRegexpReplacementTable = []string{
 340         0:    `\u0000`,
 341         '\t': `\t`,
 342         '\n': `\n`,
 343         '\v': `\u000b`, // "\v" == "v" on IE 6.
 344         '\f': `\f`,
 345         '\r': `\r`,
 346         // Encode HTML specials as hex so the output can be embedded
 347         // in HTML attributes without further encoding.
 348         '"':  `\u0022`,
 349         '$':  `\$`,
 350         '&':  `\u0026`,
 351         '\'': `\u0027`,
 352         '(':  `\(`,
 353         ')':  `\)`,
 354         '*':  `\*`,
 355         '+':  `\u002b`,
 356         '-':  `\-`,
 357         '.':  `\.`,
 358         '/':  `\/`,
 359         '<':  `\u003c`,
 360         '>':  `\u003e`,
 361         '?':  `\?`,
 362         '[':  `\[`,
 363         '\\': `\\`,
 364         ']':  `\]`,
 365         '^':  `\^`,
 366         '{':  `\{`,
 367         '|':  `\|`,
 368         '}':  `\}`,
 369 }
 370
 371 // isJSIdentPart reports whether the given rune is a JS identifier part.
 372 // It does not handle all the non-Latin letters, joiners, and combining marks,
 373 // but it does handle every codepoint that can occur in a numeric literal or
 374 // a keyword.
 375 func isJSIdentPart(r rune) bool {
 376         switch {
 377         case r == '$':
 378                 return true
 379         case '0' <= r && r <= '9':
 380                 return true
 381         case 'A' <= r && r <= 'Z':
 382                 return true
 383         case r == '_':
 384                 return true
 385         case 'a' <= r && r <= 'z':
 386                 return true
 387         }
 388         return false
 389 }
 390
 391 // isJSType reports whether the given MIME type should be considered JavaScript.
 392 //
 393 // It is used to determine whether a script tag with a type attribute is a javascript container.
 394 func isJSType(mimeType string) bool {
 395         // per
 396         //   https://www.w3.org/TR/html5/scripting-1.html#attr-script-type
 397         //   https://tools.ietf.org/html/rfc7231#section-3.1.1
 398         //   https://tools.ietf.org/html/rfc4329#section-3
 399         //   https://www.ietf.org/rfc/rfc4627.txt
 400         // discard parameters
 401         mimeType, _, _ = strings.Cut(mimeType, ";")
 402         mimeType = strings.ToLower(mimeType)
 403         mimeType = strings.TrimSpace(mimeType)
 404         switch mimeType {
 405         case
 406                 "application/ecmascript",
 407                 "application/javascript",
 408                 "application/json",
 409                 "application/ld+json",
 410                 "application/x-ecmascript",
 411                 "application/x-javascript",
 412                 "module",
 413                 "text/ecmascript",
 414                 "text/javascript",
 415                 "text/javascript1.0",
 416                 "text/javascript1.1",
 417                 "text/javascript1.2",
 418                 "text/javascript1.3",
 419                 "text/javascript1.4",
 420                 "text/javascript1.5",
 421                 "text/jscript",
 422                 "text/livescript",
 423                 "text/x-ecmascript",
 424                 "text/x-javascript":
 425                 return true
 426         default:
 427                 return false
 428         }
 429 }