1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
14 // htmlNospaceEscaper escapes for inclusion in unquoted attribute values.
15 func htmlNospaceEscaper(args
...any
) string {
16 s
, t
:= stringify(args
...)
17 if t
== contentTypeHTML
{
18 return htmlReplacer(stripTags(s
), htmlNospaceNormReplacementTable
, false)
20 return htmlReplacer(s
, htmlNospaceReplacementTable
, false)
23 // attrEscaper escapes for inclusion in quoted attribute values.
24 func attrEscaper(args
...any
) string {
25 s
, t
:= stringify(args
...)
26 if t
== contentTypeHTML
{
27 return htmlReplacer(stripTags(s
), htmlNormReplacementTable
, true)
29 return htmlReplacer(s
, htmlReplacementTable
, true)
32 // rcdataEscaper escapes for inclusion in an RCDATA element body.
33 func rcdataEscaper(args
...any
) string {
34 s
, t
:= stringify(args
...)
35 if t
== contentTypeHTML
{
36 return htmlReplacer(s
, htmlNormReplacementTable
, true)
38 return htmlReplacer(s
, htmlReplacementTable
, true)
41 // htmlEscaper escapes for inclusion in HTML text.
42 func htmlEscaper(args
...any
) string {
43 s
, t
:= stringify(args
...)
44 if t
== contentTypeHTML
{
47 return htmlReplacer(s
, htmlReplacementTable
, true)
50 // htmlReplacementTable contains the runes that need to be escaped
51 // inside a quoted attribute value or in a text node.
52 var htmlReplacementTable
= []string{
53 // https://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
54 // U+0000 NULL Parse error. Append a U+FFFD REPLACEMENT
55 // CHARACTER character to the current attribute's value.
58 // https://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
68 // htmlNormReplacementTable is like htmlReplacementTable but without '&' to
69 // avoid over-encoding existing entities.
70 var htmlNormReplacementTable
= []string{
79 // htmlNospaceReplacementTable contains the runes that need to be escaped
80 // inside an unquoted attribute value.
81 // The set of runes escaped is the union of the HTML specials and
82 // those determined by running the JS below in browsers:
84 // <script>(function () {
85 // var a = [], d = document.getElementById("d"), i, c, s;
86 // for (i = 0; i < 0x10000; ++i) {
87 // c = String.fromCharCode(i);
88 // d.innerHTML = "<span title=" + c + "lt" + c + "></span>"
89 // s = d.getElementsByTagName("SPAN")[0];
90 // if (!s || s.title !== c + "lt" + c) { a.push(i.toString(16)); }
92 // document.write(a.join(", "));
94 var htmlNospaceReplacementTable
= []string{
109 // A parse error in the attribute value (unquoted) and
110 // before attribute value states.
111 // Treated as a quoting character by IE.
115 // htmlNospaceNormReplacementTable is like htmlNospaceReplacementTable but
116 // without '&' to avoid over-encoding existing entities.
117 var htmlNospaceNormReplacementTable
= []string{
131 // A parse error in the attribute value (unquoted) and
132 // before attribute value states.
133 // Treated as a quoting character by IE.
137 // htmlReplacer returns s with runes replaced according to replacementTable
138 // and when badRunes is true, certain bad runes are allowed through unescaped.
139 func htmlReplacer(s
string, replacementTable
[]string, badRunes
bool) string {
140 written
, b
:= 0, new(strings
.Builder
)
142 for i
:= 0; i
< len(s
); i
+= w
{
143 // Cannot use 'for range s' because we need to preserve the width
144 // of the runes in the input. If we see a decoding error, the input
145 // width will not be utf8.Runelen(r) and we will overrun the buffer.
146 r
, w
= utf8
.DecodeRuneInString(s
[i
:])
147 if int(r
) < len(replacementTable
) {
148 if repl
:= replacementTable
[r
]; len(repl
) != 0 {
152 b
.WriteString(s
[written
:i
])
158 // IE does not allow these ranges in unquoted attrs.
159 } else if 0xfdd0 <= r
&& r
<= 0xfdef ||
0xfff0 <= r
&& r
<= 0xffff {
163 fmt
.Fprintf(b
, "%s&#x%x;", s
[written
:i
], r
)
170 b
.WriteString(s
[written
:])
174 // stripTags takes a snippet of HTML and returns only the text content.
175 // For example, `<b>¡Hi!</b> <script>...</script>` -> `¡Hi! `.
176 func stripTags(html
string) string {
178 s
, c
, i
, allText
:= []byte(html
), context
{}, 0, true
179 // Using the transition funcs helps us avoid mangling
180 // `<div title="1>2">` or `I <3 Ponies!`.
182 if c
.delim
== delimNone
{
184 // Use RCDATA instead of parsing into JS or CSS styles.
185 if c
.element
!= elementNone
&& !isInTag(st
) {
188 d
, nread
:= transitionFunc
[st
](c
, s
[i
:])
190 if c
.state
== stateText || c
.state
== stateRCDATA
{
191 // Emit text up to the start of the tag or comment.
193 if d
.state
!= c
.state
{
194 for j1
:= j
- 1; j1
>= i
; j1
-- {
208 i1
:= i
+ bytes
.IndexAny(s
[i
:], delimEnds
[c
.delim
])
212 if c
.delim
!= delimSpaceOrTagEnd
{
213 // Consume any quote.
216 c
, i
= context
{state
: stateTag
, element
: c
.element
}, i1
220 } else if c
.state
== stateText || c
.state
== stateRCDATA
{
226 // htmlNameFilter accepts valid parts of an HTML attribute or tag name or
227 // a known-safe HTML attribute.
228 func htmlNameFilter(args
...any
) string {
229 s
, t
:= stringify(args
...)
230 if t
== contentTypeHTMLAttr
{
234 // Avoid violation of structure preservation.
235 // <input checked {{.K}}={{.V}}>.
236 // Without this, if .K is empty then .V is the value of
237 // checked, but otherwise .V is the value of the attribute
239 return filterFailsafe
241 s
= strings
.ToLower(s
)
242 if t
:= attrType(s
); t
!= contentTypePlain
{
243 // TODO: Split attr and element name part filters so we can recognize known attributes.
244 return filterFailsafe
246 for _
, r
:= range s
{
248 case '0' <= r
&& r
<= '9':
249 case 'a' <= r
&& r
<= 'z':
251 return filterFailsafe
257 // commentEscaper returns the empty string regardless of input.
258 // Comment content does not correspond to any parsed structure or
259 // human-readable content, so the simplest and most secure policy is to drop
260 // content interpolated into comments.
261 // This approach is equally valid whether or not static comment content is
262 // removed from the template.
263 func commentEscaper(args
...any
) string {