libgo/go/html/template/html.go

   1 // Copyright 2011 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package template
   6
   7 import (
   8         "bytes"
   9         "fmt"
  10         "strings"
  11         "unicode/utf8"
  12 )
  13
  14 // htmlNospaceEscaper escapes for inclusion in unquoted attribute values.
  15 func htmlNospaceEscaper(args ...any) string {
  16         s, t := stringify(args...)
  17         if t == contentTypeHTML {
  18                 return htmlReplacer(stripTags(s), htmlNospaceNormReplacementTable, false)
  19         }
  20         return htmlReplacer(s, htmlNospaceReplacementTable, false)
  21 }
  22
  23 // attrEscaper escapes for inclusion in quoted attribute values.
  24 func attrEscaper(args ...any) string {
  25         s, t := stringify(args...)
  26         if t == contentTypeHTML {
  27                 return htmlReplacer(stripTags(s), htmlNormReplacementTable, true)
  28         }
  29         return htmlReplacer(s, htmlReplacementTable, true)
  30 }
  31
  32 // rcdataEscaper escapes for inclusion in an RCDATA element body.
  33 func rcdataEscaper(args ...any) string {
  34         s, t := stringify(args...)
  35         if t == contentTypeHTML {
  36                 return htmlReplacer(s, htmlNormReplacementTable, true)
  37         }
  38         return htmlReplacer(s, htmlReplacementTable, true)
  39 }
  40
  41 // htmlEscaper escapes for inclusion in HTML text.
  42 func htmlEscaper(args ...any) string {
  43         s, t := stringify(args...)
  44         if t == contentTypeHTML {
  45                 return s
  46         }
  47         return htmlReplacer(s, htmlReplacementTable, true)
  48 }
  49
  50 // htmlReplacementTable contains the runes that need to be escaped
  51 // inside a quoted attribute value or in a text node.
  52 var htmlReplacementTable = []string{
  53         // https://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
  54         // U+0000 NULL Parse error. Append a U+FFFD REPLACEMENT
  55         // CHARACTER character to the current attribute's value.
  56         // "
  57         // and similarly
  58         // https://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
  59         0:    "\uFFFD",
  60         '"':  "&#34;",
  61         '&':  "&amp;",
  62         '\'': "&#39;",
  63         '+':  "&#43;",
  64         '<':  "&lt;",
  65         '>':  "&gt;",
  66 }
  67
  68 // htmlNormReplacementTable is like htmlReplacementTable but without '&' to
  69 // avoid over-encoding existing entities.
  70 var htmlNormReplacementTable = []string{
  71         0:    "\uFFFD",
  72         '"':  "&#34;",
  73         '\'': "&#39;",
  74         '+':  "&#43;",
  75         '<':  "&lt;",
  76         '>':  "&gt;",
  77 }
  78
  79 // htmlNospaceReplacementTable contains the runes that need to be escaped
  80 // inside an unquoted attribute value.
  81 // The set of runes escaped is the union of the HTML specials and
  82 // those determined by running the JS below in browsers:
  83 // <div id=d></div>
  84 // <script>(function () {
  85 // var a = [], d = document.getElementById("d"), i, c, s;
  86 // for (i = 0; i < 0x10000; ++i) {
  87 //   c = String.fromCharCode(i);
  88 //   d.innerHTML = "<span title=" + c + "lt" + c + "></span>"
  89 //   s = d.getElementsByTagName("SPAN")[0];
  90 //   if (!s || s.title !== c + "lt" + c) { a.push(i.toString(16)); }
  91 // }
  92 // document.write(a.join(", "));
  93 // })()</script>
  94 var htmlNospaceReplacementTable = []string{
  95         0:    "&#xfffd;",
  96         '\t': "&#9;",
  97         '\n': "&#10;",
  98         '\v': "&#11;",
  99         '\f': "&#12;",
 100         '\r': "&#13;",
 101         ' ':  "&#32;",
 102         '"':  "&#34;",
 103         '&':  "&amp;",
 104         '\'': "&#39;",
 105         '+':  "&#43;",
 106         '<':  "&lt;",
 107         '=':  "&#61;",
 108         '>':  "&gt;",
 109         // A parse error in the attribute value (unquoted) and
 110         // before attribute value states.
 111         // Treated as a quoting character by IE.
 112         '`': "&#96;",
 113 }
 114
 115 // htmlNospaceNormReplacementTable is like htmlNospaceReplacementTable but
 116 // without '&' to avoid over-encoding existing entities.
 117 var htmlNospaceNormReplacementTable = []string{
 118         0:    "&#xfffd;",
 119         '\t': "&#9;",
 120         '\n': "&#10;",
 121         '\v': "&#11;",
 122         '\f': "&#12;",
 123         '\r': "&#13;",
 124         ' ':  "&#32;",
 125         '"':  "&#34;",
 126         '\'': "&#39;",
 127         '+':  "&#43;",
 128         '<':  "&lt;",
 129         '=':  "&#61;",
 130         '>':  "&gt;",
 131         // A parse error in the attribute value (unquoted) and
 132         // before attribute value states.
 133         // Treated as a quoting character by IE.
 134         '`': "&#96;",
 135 }
 136
 137 // htmlReplacer returns s with runes replaced according to replacementTable
 138 // and when badRunes is true, certain bad runes are allowed through unescaped.
 139 func htmlReplacer(s string, replacementTable []string, badRunes bool) string {
 140         written, b := 0, new(strings.Builder)
 141         r, w := rune(0), 0
 142         for i := 0; i < len(s); i += w {
 143                 // Cannot use 'for range s' because we need to preserve the width
 144                 // of the runes in the input. If we see a decoding error, the input
 145                 // width will not be utf8.Runelen(r) and we will overrun the buffer.
 146                 r, w = utf8.DecodeRuneInString(s[i:])
 147                 if int(r) < len(replacementTable) {
 148                         if repl := replacementTable[r]; len(repl) != 0 {
 149                                 if written == 0 {
 150                                         b.Grow(len(s))
 151                                 }
 152                                 b.WriteString(s[written:i])
 153                                 b.WriteString(repl)
 154                                 written = i + w
 155                         }
 156                 } else if badRunes {
 157                         // No-op.
 158                         // IE does not allow these ranges in unquoted attrs.
 159                 } else if 0xfdd0 <= r && r <= 0xfdef || 0xfff0 <= r && r <= 0xffff {
 160                         if written == 0 {
 161                                 b.Grow(len(s))
 162                         }
 163                         fmt.Fprintf(b, "%s&#x%x;", s[written:i], r)
 164                         written = i + w
 165                 }
 166         }
 167         if written == 0 {
 168                 return s
 169         }
 170         b.WriteString(s[written:])
 171         return b.String()
 172 }
 173
 174 // stripTags takes a snippet of HTML and returns only the text content.
 175 // For example, `<b>&iexcl;Hi!</b> <script>...</script>` -> `&iexcl;Hi! `.
 176 func stripTags(html string) string {
 177         var b bytes.Buffer
 178         s, c, i, allText := []byte(html), context{}, 0, true
 179         // Using the transition funcs helps us avoid mangling
 180         // `<div title="1>2">` or `I <3 Ponies!`.
 181         for i != len(s) {
 182                 if c.delim == delimNone {
 183                         st := c.state
 184                         // Use RCDATA instead of parsing into JS or CSS styles.
 185                         if c.element != elementNone && !isInTag(st) {
 186                                 st = stateRCDATA
 187                         }
 188                         d, nread := transitionFunc[st](c, s[i:])
 189                         i1 := i + nread
 190                         if c.state == stateText || c.state == stateRCDATA {
 191                                 // Emit text up to the start of the tag or comment.
 192                                 j := i1
 193                                 if d.state != c.state {
 194                                         for j1 := j - 1; j1 >= i; j1-- {
 195                                                 if s[j1] == '<' {
 196                                                         j = j1
 197                                                         break
 198                                                 }
 199                                         }
 200                                 }
 201                                 b.Write(s[i:j])
 202                         } else {
 203                                 allText = false
 204                         }
 205                         c, i = d, i1
 206                         continue
 207                 }
 208                 i1 := i + bytes.IndexAny(s[i:], delimEnds[c.delim])
 209                 if i1 < i {
 210                         break
 211                 }
 212                 if c.delim != delimSpaceOrTagEnd {
 213                         // Consume any quote.
 214                         i1++
 215                 }
 216                 c, i = context{state: stateTag, element: c.element}, i1
 217         }
 218         if allText {
 219                 return html
 220         } else if c.state == stateText || c.state == stateRCDATA {
 221                 b.Write(s[i:])
 222         }
 223         return b.String()
 224 }
 225
 226 // htmlNameFilter accepts valid parts of an HTML attribute or tag name or
 227 // a known-safe HTML attribute.
 228 func htmlNameFilter(args ...any) string {
 229         s, t := stringify(args...)
 230         if t == contentTypeHTMLAttr {
 231                 return s
 232         }
 233         if len(s) == 0 {
 234                 // Avoid violation of structure preservation.
 235                 // <input checked {{.K}}={{.V}}>.
 236                 // Without this, if .K is empty then .V is the value of
 237                 // checked, but otherwise .V is the value of the attribute
 238                 // named .K.
 239                 return filterFailsafe
 240         }
 241         s = strings.ToLower(s)
 242         if t := attrType(s); t != contentTypePlain {
 243                 // TODO: Split attr and element name part filters so we can recognize known attributes.
 244                 return filterFailsafe
 245         }
 246         for _, r := range s {
 247                 switch {
 248                 case '0' <= r && r <= '9':
 249                 case 'a' <= r && r <= 'z':
 250                 default:
 251                         return filterFailsafe
 252                 }
 253         }
 254         return s
 255 }
 256
 257 // commentEscaper returns the empty string regardless of input.
 258 // Comment content does not correspond to any parsed structure or
 259 // human-readable content, so the simplest and most secure policy is to drop
 260 // content interpolated into comments.
 261 // This approach is equally valid whether or not static comment content is
 262 // removed from the template.
 263 func commentEscaper(args ...any) string {
 264         return ""
 265 }