1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
13 // urlFilter returns its input unless it contains an unsafe protocol in which
14 // case it defangs the entire URL.
15 func urlFilter(args
...interface{}) string {
16 s
, t
:= stringify(args
...)
17 if t
== contentTypeURL
{
20 if i
:= strings
.IndexRune(s
, ':'); i
>= 0 && strings
.IndexRune(s
[:i
], '/') < 0 {
21 protocol
:= strings
.ToLower(s
[:i
])
22 if protocol
!= "http" && protocol
!= "https" && protocol
!= "mailto" {
23 return "#" + filterFailsafe
29 // urlEscaper produces an output that can be embedded in a URL query.
30 // The output can be embedded in an HTML attribute without further escaping.
31 func urlEscaper(args
...interface{}) string {
32 return urlProcessor(false, args
...)
35 // urlEscaper normalizes URL content so it can be embedded in a quote-delimited
36 // string or parenthesis delimited url(...).
37 // The normalizer does not encode all HTML specials. Specifically, it does not
38 // encode '&' so correct embedding in an HTML attribute requires escaping of
40 func urlNormalizer(args
...interface{}) string {
41 return urlProcessor(true, args
...)
44 // urlProcessor normalizes (when norm is true) or escapes its input to produce
45 // a valid hierarchical or opaque URL part.
46 func urlProcessor(norm
bool, args
...interface{}) string {
47 s
, t
:= stringify(args
...)
48 if t
== contentTypeURL
{
53 // The byte loop below assumes that all URLs use UTF-8 as the
54 // content-encoding. This is similar to the URI to IRI encoding scheme
55 // defined in section 3.1 of RFC 3987, and behaves the same as the
56 // EcmaScript builtin encodeURIComponent.
57 // It should not cause any misencoding of URLs in pages with
58 // Content-type: text/html;charset=UTF-8.
59 for i
, n
:= 0, len(s
); i
< n
; i
++ {
62 // Single quote and parens are sub-delims in RFC 3986, but we
63 // escape them so the output can be embedded in single
64 // quoted attributes and unquoted CSS url(...) constructs.
65 // Single quotes are reserved in URLs, but are only used in
66 // the obsolete "mark" rule in an appendix in RFC 3986
67 // so can be safely encoded.
68 case '!', '#', '$', '&', '*', '+', ',', '/', ':', ';', '=', '?', '@', '[', ']':
72 // Unreserved according to RFC 3986 sec 2.3
73 // "For consistency, percent-encoded octets in the ranges of
74 // ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D),
75 // period (%2E), underscore (%5F), or tilde (%7E) should not be
76 // created by URI producers
77 case '-', '.', '_', '~':
80 // When normalizing do not re-encode valid escapes.
81 if norm
&& i
+2 < len(s
) && isHex(s
[i
+1]) && isHex(s
[i
+2]) {
85 // Unreserved according to RFC 3986 sec 2.3
86 if 'a' <= c
&& c
<= 'z' {
89 if 'A' <= c
&& c
<= 'Z' {
92 if '0' <= c
&& c
<= '9' {
96 b
.WriteString(s
[written
:i
])
97 fmt
.Fprintf(&b
, "%%%02x", c
)
103 b
.WriteString(s
[written
:])