1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
11 const lowerhex
= "0123456789abcdef"
13 func quoteWith(s
string, quote
byte, ASCIIonly
bool) string {
14 var runeTmp
[utf8
.UTFMax
]byte
15 buf
:= make([]byte, 0, 3*len(s
)/2) // Try to avoid more allocations.
16 buf
= append(buf
, quote
)
17 for width
:= 0; len(s
) > 0; s
= s
[width
:] {
20 if r
>= utf8
.RuneSelf
{
21 r
, width
= utf8
.DecodeRuneInString(s
)
23 if width
== 1 && r
== utf8
.RuneError
{
24 buf
= append(buf
, `\x`...)
25 buf
= append(buf
, lowerhex
[s
[0]>>4])
26 buf
= append(buf
, lowerhex
[s
[0]&0xF])
29 if r
== rune(quote
) || r
== '\\' { // always backslashed
30 buf
= append(buf
, '\\')
31 buf
= append(buf
, byte(r
))
35 if r
< utf8
.RuneSelf
&& IsPrint(r
) {
36 buf
= append(buf
, byte(r
))
39 } else if IsPrint(r
) {
40 n
:= utf8
.EncodeRune(runeTmp
[:], r
)
41 buf
= append(buf
, runeTmp
[:n
]...)
46 buf
= append(buf
, `\a`...)
48 buf
= append(buf
, `\b`...)
50 buf
= append(buf
, `\f`...)
52 buf
= append(buf
, `\n`...)
54 buf
= append(buf
, `\r`...)
56 buf
= append(buf
, `\t`...)
58 buf
= append(buf
, `\v`...)
62 buf
= append(buf
, `\x`...)
63 buf
= append(buf
, lowerhex
[s
[0]>>4])
64 buf
= append(buf
, lowerhex
[s
[0]&0xF])
65 case r
> utf8
.MaxRune
:
69 buf
= append(buf
, `\u`...)
70 for s
:= 12; s
>= 0; s
-= 4 {
71 buf
= append(buf
, lowerhex
[r
>>uint(s
)&0xF])
74 buf
= append(buf
, `\U`...)
75 for s
:= 28; s
>= 0; s
-= 4 {
76 buf
= append(buf
, lowerhex
[r
>>uint(s
)&0xF])
81 buf
= append(buf
, quote
)
86 // Quote returns a double-quoted Go string literal representing s. The
87 // returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
88 // control characters and non-printable characters as defined by
90 func Quote(s
string) string {
91 return quoteWith(s
, '"', false)
94 // AppendQuote appends a double-quoted Go string literal representing s,
95 // as generated by Quote, to dst and returns the extended buffer.
96 func AppendQuote(dst
[]byte, s
string) []byte {
97 return append(dst
, Quote(s
)...)
100 // QuoteToASCII returns a double-quoted Go string literal representing s.
101 // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
102 // non-ASCII characters and non-printable characters as defined by IsPrint.
103 func QuoteToASCII(s
string) string {
104 return quoteWith(s
, '"', true)
107 // AppendQuoteToASCII appends a double-quoted Go string literal representing s,
108 // as generated by QuoteToASCII, to dst and returns the extended buffer.
109 func AppendQuoteToASCII(dst
[]byte, s
string) []byte {
110 return append(dst
, QuoteToASCII(s
)...)
113 // QuoteRune returns a single-quoted Go character literal representing the
114 // rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100)
115 // for control characters and non-printable characters as defined by IsPrint.
116 func QuoteRune(r rune
) string {
117 // TODO: avoid the allocation here.
118 return quoteWith(string(r
), '\'', false)
121 // AppendQuoteRune appends a single-quoted Go character literal representing the rune,
122 // as generated by QuoteRune, to dst and returns the extended buffer.
123 func AppendQuoteRune(dst
[]byte, r rune
) []byte {
124 return append(dst
, QuoteRune(r
)...)
127 // QuoteRuneToASCII returns a single-quoted Go character literal representing
128 // the rune. The returned string uses Go escape sequences (\t, \n, \xFF,
129 // \u0100) for non-ASCII characters and non-printable characters as defined
131 func QuoteRuneToASCII(r rune
) string {
132 // TODO: avoid the allocation here.
133 return quoteWith(string(r
), '\'', true)
136 // AppendQuoteRuneToASCII appends a single-quoted Go character literal representing the rune,
137 // as generated by QuoteRuneToASCII, to dst and returns the extended buffer.
138 func AppendQuoteRuneToASCII(dst
[]byte, r rune
) []byte {
139 return append(dst
, QuoteRuneToASCII(r
)...)
142 // CanBackquote reports whether the string s can be represented
143 // unchanged as a single-line backquoted string without control
144 // characters other than space and tab.
145 func CanBackquote(s
string) bool {
146 for i
:= 0; i
< len(s
); i
++ {
148 if (c
< ' ' && c
!= '\t') || c
== '`' || c
== '\u007F' {
155 func unhex(b
byte) (v rune
, ok
bool) {
158 case '0' <= c
&& c
<= '9':
160 case 'a' <= c
&& c
<= 'f':
161 return c
- 'a' + 10, true
162 case 'A' <= c
&& c
<= 'F':
163 return c
- 'A' + 10, true
168 // UnquoteChar decodes the first character or byte in the escaped string
169 // or character literal represented by the string s.
170 // It returns four values:
172 // 1) value, the decoded Unicode code point or byte value;
173 // 2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation;
174 // 3) tail, the remainder of the string after the character; and
175 // 4) an error that will be nil if the character is syntactically valid.
177 // The second argument, quote, specifies the type of literal being parsed
178 // and therefore which escaped quote character is permitted.
179 // If set to a single quote, it permits the sequence \' and disallows unescaped '.
180 // If set to a double quote, it permits \" and disallows unescaped ".
181 // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped.
182 func UnquoteChar(s
string, quote
byte) (value rune
, multibyte
bool, tail
string, err error
) {
185 case c
== quote
&& (quote
== '\'' || quote
== '"'):
188 case c
>= utf8
.RuneSelf
:
189 r
, size
:= utf8
.DecodeRuneInString(s
)
190 return r
, true, s
[size
:], nil
192 return rune(s
[0]), false, s
[1:], nil
195 // hard case: c is backslash
233 for j
:= 0; j
< n
; j
++ {
243 // single-byte string, possibly not UTF-8
247 if v
> utf8
.MaxRune
{
253 case '0', '1', '2', '3', '4', '5', '6', '7':
259 for j
:= 0; j
< 2; j
++ { // one digit already; two more
260 x
:= rune(s
[j
]) - '0'
289 // Unquote interprets s as a single-quoted, double-quoted,
290 // or backquoted Go string literal, returning the string value
291 // that s quotes. (If s is single-quoted, it would be a Go
292 // character literal; Unquote returns the corresponding
293 // one-character string.)
294 func Unquote(s
string) (t
string, err error
) {
306 if contains(s
, '`') {
311 if quote
!= '"' && quote
!= '\'' {
314 if contains(s
, '\n') {
318 // Is it trivial? Avoid allocation.
319 if !contains(s
, '\\') && !contains(s
, quote
) {
324 r
, size
:= utf8
.DecodeRuneInString(s
)
325 if size
== len(s
) && (r
!= utf8
.RuneError || size
!= 1) {
331 var runeTmp
[utf8
.UTFMax
]byte
332 buf
:= make([]byte, 0, 3*len(s
)/2) // Try to avoid more allocations.
334 c
, multibyte
, ss
, err
:= UnquoteChar(s
, quote
)
339 if c
< utf8
.RuneSelf ||
!multibyte
{
340 buf
= append(buf
, byte(c
))
342 n
:= utf8
.EncodeRune(runeTmp
[:], c
)
343 buf
= append(buf
, runeTmp
[:n
]...)
345 if quote
== '\'' && len(s
) != 0 {
346 // single-quoted must be single character
350 return string(buf
), nil
353 // contains reports whether the string contains the byte c.
354 func contains(s
string, c
byte) bool {
355 for i
:= 0; i
< len(s
); i
++ {
363 // bsearch16 returns the smallest i such that a[i] >= x.
364 // If there is no such i, bsearch16 returns len(a).
365 func bsearch16(a
[]uint16, x
uint16) int {
378 // bsearch32 returns the smallest i such that a[i] >= x.
379 // If there is no such i, bsearch32 returns len(a).
380 func bsearch32(a
[]uint32, x
uint32) int {
393 // TODO: IsPrint is a local implementation of unicode.IsPrint, verified by the tests
394 // to give the same answer. It allows this package not to depend on unicode,
395 // and therefore not pull in all the Unicode tables. If the linker were better
396 // at tossing unused tables, we could get rid of this implementation.
397 // That would be nice.
399 // IsPrint reports whether the rune is defined as printable by Go, with
400 // the same definition as unicode.IsPrint: letters, numbers, punctuation,
401 // symbols and ASCII space.
402 func IsPrint(r rune
) bool {
403 // Fast check for Latin-1
405 if 0x20 <= r
&& r
<= 0x7E {
406 // All the ASCII is printable from space through DEL-1.
409 if 0xA1 <= r
&& r
<= 0xFF {
410 // Similarly for ¡ through ÿ...
411 return r
!= 0xAD // ...except for the bizarre soft hyphen.
416 // Same algorithm, either on uint16 or uint32 value.
417 // First, find first i such that isPrint[i] >= x.
418 // This is the index of either the start or end of a pair that might span x.
419 // The start is even (isPrint[i&^1]) and the end is odd (isPrint[i|1]).
420 // If we find x in a range, make sure x is not in isNotPrint list.
422 if 0 <= r
&& r
< 1<<16 {
423 rr
, isPrint
, isNotPrint
:= uint16(r
), isPrint16
, isNotPrint16
424 i
:= bsearch16(isPrint
, rr
)
425 if i
>= len(isPrint
) || rr
< isPrint
[i
&^1] || isPrint
[i|
1] < rr
{
428 j
:= bsearch16(isNotPrint
, rr
)
429 return j
>= len(isNotPrint
) || isNotPrint
[j
] != rr
432 rr
, isPrint
, isNotPrint
:= uint32(r
), isPrint32
, isNotPrint32
433 i
:= bsearch32(isPrint
, rr
)
434 if i
>= len(isPrint
) || rr
< isPrint
[i
&^1] || isPrint
[i|
1] < rr
{
441 j
:= bsearch16(isNotPrint
, uint16(r
))
442 return j
>= len(isNotPrint
) || isNotPrint
[j
] != uint16(r
)