1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
16 // nextJSCtx returns the context that determines whether a slash after the
17 // given run of tokens starts a regular expression instead of a division
20 // This assumes that the token run does not include any string tokens, comment
21 // tokens, regular expression literal tokens, or division operators.
23 // This fails on some valid but nonsensical JavaScript programs like
24 // "x = ++/foo/i" which is quite different than "x++/foo/i", but is not known to
25 // fail on any known useful programs. It is based on the draft
26 // JavaScript 2.0 lexical grammar and requires one token of lookbehind:
27 // https://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html
28 func nextJSCtx(s
[]byte, preceding jsCtx
) jsCtx
{
29 s
= bytes
.TrimRight(s
, "\t\n\f\r \u2028\u2029")
34 // All cases below are in the single-byte UTF-8 group.
35 switch c
, n
:= s
[len(s
)-1], len(s
); c
{
37 // ++ and -- are not regexp preceders, but + and - are whether
38 // they are used as infix or prefix operators.
40 // Count the number of adjacent dashes or pluses.
41 for start
> 0 && s
[start
-1] == c
{
45 // Reached for trailing minus signs since "---" is the
52 if n
!= 1 && '0' <= s
[n
-2] && s
[n
-2] <= '9' {
56 // Suffixes for all punctuators from section 7.7 of the language spec
57 // that only end binary operators not handled above.
58 case ',', '<', '>', '=', '*', '%', '&', '|', '^', '?':
60 // Suffixes for all punctuators from section 7.7 of the language spec
61 // that are prefix operators not handled above.
64 // Matches all the punctuators from section 7.7 of the language spec
65 // that are open brackets not handled above.
68 // Matches all the punctuators from section 7.7 of the language spec
69 // that precede expression starts.
72 // CAVEAT: the close punctuators ('}', ']', ')') precede div ops and
73 // are handled in the default except for '}' which can precede a
75 // ({ valueOf: function () { return 42 } } / 2
76 // which is valid, but, in practice, developers don't divide object
77 // literals, so our heuristic works well for code like
78 // function () { ... } /foo/.test(x) && sideEffect();
79 // The ')' punctuator can precede a regular expression as in
80 // if (b) /foo/.test(x) && ...
81 // but this is much less likely than
86 // Look for an IdentifierName and see if it is a keyword that
87 // can precede a regular expression.
89 for j
> 0 && isJSIdentPart(rune(s
[j
-1])) {
92 if regexpPrecederKeywords
[string(s
[j
:])] {
96 // Otherwise is a punctuator not listed above, or
97 // a string which precedes a div op, or an identifier
98 // which precedes a div op.
102 // regexpPrecederKeywords is a set of reserved JS keywords that can precede a
103 // regular expression in JS source.
104 var regexpPrecederKeywords
= map[string]bool{
121 var jsonMarshalType
= reflect
.TypeOf((*json
.Marshaler
)(nil)).Elem()
123 // indirectToJSONMarshaler returns the value, after dereferencing as many times
124 // as necessary to reach the base type (or nil) or an implementation of json.Marshal.
125 func indirectToJSONMarshaler(a any
) any
{
126 // text/template now supports passing untyped nil as a func call
127 // argument, so we must support it. Otherwise we'd panic below, as one
128 // cannot call the Type or Interface methods on an invalid
129 // reflect.Value. See golang.org/issue/18716.
134 v
:= reflect
.ValueOf(a
)
135 for !v
.Type().Implements(jsonMarshalType
) && v
.Kind() == reflect
.Pointer
&& !v
.IsNil() {
141 // jsValEscaper escapes its inputs to a JS Expression (section 11.14) that has
142 // neither side-effects nor free variables outside (NaN, Infinity).
143 func jsValEscaper(args
...any
) string {
146 a
= indirectToJSONMarshaler(args
[0])
147 switch t
:= a
.(type) {
151 // TODO: normalize quotes.
152 return `"` + string(t
) + `"`
154 // Do not treat as a Stringer.
159 for i
, arg
:= range args
{
160 args
[i
] = indirectToJSONMarshaler(arg
)
162 a
= fmt
.Sprint(args
...)
164 // TODO: detect cycles before calling Marshal which loops infinitely on
165 // cyclic data. This may be an unacceptable DoS risk.
166 b
, err
:= json
.Marshal(a
)
168 // Put a space before comment so that if it is flush against
169 // a division operator it is not turned into a line comment:
172 // x//* error marshaling y:
173 // second line of error message */null
174 return fmt
.Sprintf(" /* %s */null ", strings
.ReplaceAll(err
.Error(), "*/", "* /"))
177 // TODO: maybe post-process output to prevent it from containing
178 // "<!--", "-->", "<![CDATA[", "]]>", or "</script"
179 // in case custom marshalers produce output containing those.
180 // Note: Do not use \x escaping to save bytes because it is not JSON compatible and this escaper
181 // supports ld+json content-type.
183 // In, `x=y/{{.}}*z` a json.Marshaler that produces "" should
184 // not cause the output `x=y/*z`.
187 first
, _
:= utf8
.DecodeRune(b
)
188 last
, _
:= utf8
.DecodeLastRune(b
)
189 var buf strings
.Builder
190 // Prevent IdentifierNames and NumericLiterals from running into
191 // keywords: in, instanceof, typeof, void
192 pad
:= isJSIdentPart(first
) ||
isJSIdentPart(last
)
197 // Make sure that json.Marshal escapes codepoints U+2028 & U+2029
198 // so it falls within the subset of JSON which is valid JS.
199 for i
:= 0; i
< len(b
); {
200 rune
, n
:= utf8
.DecodeRune(b
[i
:])
204 } else if rune
== 0x2029 {
208 buf
.Write(b
[written
:i
])
209 buf
.WriteString(repl
)
215 buf
.Write(b
[written
:])
224 // jsStrEscaper produces a string that can be included between quotes in
225 // JavaScript source, in JavaScript embedded in an HTML5 <script> element,
226 // or in an HTML5 event handler attribute such as onclick.
227 func jsStrEscaper(args
...any
) string {
228 s
, t
:= stringify(args
...)
229 if t
== contentTypeJSStr
{
230 return replace(s
, jsStrNormReplacementTable
)
232 return replace(s
, jsStrReplacementTable
)
235 // jsRegexpEscaper behaves like jsStrEscaper but escapes regular expression
236 // specials so the result is treated literally when included in a regular
237 // expression literal. /foo{{.X}}bar/ matches the string "foo" followed by
238 // the literal text of {{.X}} followed by the string "bar".
239 func jsRegexpEscaper(args
...any
) string {
240 s
, _
:= stringify(args
...)
241 s
= replace(s
, jsRegexpReplacementTable
)
243 // /{{.X}}/ should not produce a line comment when .X == "".
249 // replace replaces each rune r of s with replacementTable[r], provided that
250 // r < len(replacementTable). If replacementTable[r] is the empty string then
251 // no replacement is made.
252 // It also replaces runes U+2028 and U+2029 with the raw strings `\u2028` and
254 func replace(s
string, replacementTable
[]string) string {
255 var b strings
.Builder
256 r
, w
, written
:= rune(0), 0, 0
257 for i
:= 0; i
< len(s
); i
+= w
{
258 // See comment in htmlEscaper.
259 r
, w
= utf8
.DecodeRuneInString(s
[i
:])
262 case int(r
) < len(lowUnicodeReplacementTable
):
263 repl
= lowUnicodeReplacementTable
[r
]
264 case int(r
) < len(replacementTable
) && replacementTable
[r
] != "":
265 repl
= replacementTable
[r
]
276 b
.WriteString(s
[written
:i
])
283 b
.WriteString(s
[written
:])
287 var lowUnicodeReplacementTable
= []string{
288 0: `\u0000`, 1: `\u0001`, 2: `\u0002`, 3: `\u0003`, 4: `\u0004`, 5: `\u0005`, 6: `\u0006`,
293 '\v': `\u000b`, // "\v" == "v" on IE 6.
296 0xe: `\u000e`, 0xf: `\u000f`, 0x10: `\u0010`, 0x11: `\u0011`, 0x12: `\u0012`, 0x13: `\u0013`,
297 0x14: `\u0014`, 0x15: `\u0015`, 0x16: `\u0016`, 0x17: `\u0017`, 0x18: `\u0018`, 0x19: `\u0019`,
298 0x1a: `\u001a`, 0x1b: `\u001b`, 0x1c: `\u001c`, 0x1d: `\u001d`, 0x1e: `\u001e`, 0x1f: `\u001f`,
301 var jsStrReplacementTable
= []string{
305 '\v': `\u000b`, // "\v" == "v" on IE 6.
308 // Encode HTML specials as hex so the output can be embedded
309 // in HTML attributes without further encoding.
320 // jsStrNormReplacementTable is like jsStrReplacementTable but does not
321 // overencode existing escapes since this table has no entry for `\`.
322 var jsStrNormReplacementTable
= []string{
326 '\v': `\u000b`, // "\v" == "v" on IE 6.
329 // Encode HTML specials as hex so the output can be embedded
330 // in HTML attributes without further encoding.
339 var jsRegexpReplacementTable
= []string{
343 '\v': `\u000b`, // "\v" == "v" on IE 6.
346 // Encode HTML specials as hex so the output can be embedded
347 // in HTML attributes without further encoding.
371 // isJSIdentPart reports whether the given rune is a JS identifier part.
372 // It does not handle all the non-Latin letters, joiners, and combining marks,
373 // but it does handle every codepoint that can occur in a numeric literal or
375 func isJSIdentPart(r rune
) bool {
379 case '0' <= r
&& r
<= '9':
381 case 'A' <= r
&& r
<= 'Z':
385 case 'a' <= r
&& r
<= 'z':
391 // isJSType reports whether the given MIME type should be considered JavaScript.
393 // It is used to determine whether a script tag with a type attribute is a javascript container.
394 func isJSType(mimeType
string) bool {
396 // https://www.w3.org/TR/html5/scripting-1.html#attr-script-type
397 // https://tools.ietf.org/html/rfc7231#section-3.1.1
398 // https://tools.ietf.org/html/rfc4329#section-3
399 // https://www.ietf.org/rfc/rfc4627.txt
400 // discard parameters
401 mimeType
, _
, _
= strings
.Cut(mimeType
, ";")
402 mimeType
= strings
.ToLower(mimeType
)
403 mimeType
= strings
.TrimSpace(mimeType
)
406 "application/ecmascript",
407 "application/javascript",
409 "application/ld+json",
410 "application/x-ecmascript",
411 "application/x-javascript",
415 "text/javascript1.0",
416 "text/javascript1.1",
417 "text/javascript1.2",
418 "text/javascript1.3",
419 "text/javascript1.4",
420 "text/javascript1.5",