1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
11 const lowerhex
= "0123456789abcdef"
13 func quoteWith(s
string, quote
byte, ASCIIonly
bool) string {
14 var runeTmp
[utf8
.UTFMax
]byte
15 buf
:= make([]byte, 0, 3*len(s
)/2) // Try to avoid more allocations.
16 buf
= append(buf
, quote
)
17 for width
:= 0; len(s
) > 0; s
= s
[width
:] {
20 if r
>= utf8
.RuneSelf
{
21 r
, width
= utf8
.DecodeRuneInString(s
)
23 if width
== 1 && r
== utf8
.RuneError
{
24 buf
= append(buf
, `\x`...)
25 buf
= append(buf
, lowerhex
[s
[0]>>4])
26 buf
= append(buf
, lowerhex
[s
[0]&0xF])
29 if r
== rune(quote
) || r
== '\\' { // always backslashed
30 buf
= append(buf
, '\\')
31 buf
= append(buf
, byte(r
))
35 if r
< utf8
.RuneSelf
&& IsPrint(r
) {
36 buf
= append(buf
, byte(r
))
39 } else if IsPrint(r
) {
40 n
:= utf8
.EncodeRune(runeTmp
[:], r
)
41 buf
= append(buf
, runeTmp
[:n
]...)
46 buf
= append(buf
, `\a`...)
48 buf
= append(buf
, `\b`...)
50 buf
= append(buf
, `\f`...)
52 buf
= append(buf
, `\n`...)
54 buf
= append(buf
, `\r`...)
56 buf
= append(buf
, `\t`...)
58 buf
= append(buf
, `\v`...)
62 buf
= append(buf
, `\x`...)
63 buf
= append(buf
, lowerhex
[s
[0]>>4])
64 buf
= append(buf
, lowerhex
[s
[0]&0xF])
65 case r
> utf8
.MaxRune
:
69 buf
= append(buf
, `\u`...)
70 for s
:= 12; s
>= 0; s
-= 4 {
71 buf
= append(buf
, lowerhex
[r
>>uint(s
)&0xF])
74 buf
= append(buf
, `\U`...)
75 for s
:= 28; s
>= 0; s
-= 4 {
76 buf
= append(buf
, lowerhex
[r
>>uint(s
)&0xF])
81 buf
= append(buf
, quote
)
86 // Quote returns a double-quoted Go string literal representing s. The
87 // returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
88 // control characters and non-printable characters as defined by
90 func Quote(s
string) string {
91 return quoteWith(s
, '"', false)
94 // AppendQuote appends a double-quoted Go string literal representing s,
95 // as generated by Quote, to dst and returns the extended buffer.
96 func AppendQuote(dst
[]byte, s
string) []byte {
97 return append(dst
, Quote(s
)...)
100 // QuoteToASCII returns a double-quoted Go string literal representing s.
101 // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
102 // non-ASCII characters and non-printable characters as defined by IsPrint.
103 func QuoteToASCII(s
string) string {
104 return quoteWith(s
, '"', true)
107 // AppendQuoteToASCII appends a double-quoted Go string literal representing s,
108 // as generated by QuoteToASCII, to dst and returns the extended buffer.
109 func AppendQuoteToASCII(dst
[]byte, s
string) []byte {
110 return append(dst
, QuoteToASCII(s
)...)
113 // QuoteRune returns a single-quoted Go character literal representing the
114 // rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100)
115 // for control characters and non-printable characters as defined by IsPrint.
116 func QuoteRune(r rune
) string {
117 // TODO: avoid the allocation here.
118 return quoteWith(string(r
), '\'', false)
121 // AppendQuoteRune appends a single-quoted Go character literal representing the rune,
122 // as generated by QuoteRune, to dst and returns the extended buffer.
123 func AppendQuoteRune(dst
[]byte, r rune
) []byte {
124 return append(dst
, QuoteRune(r
)...)
127 // QuoteRuneToASCII returns a single-quoted Go character literal representing
128 // the rune. The returned string uses Go escape sequences (\t, \n, \xFF,
129 // \u0100) for non-ASCII characters and non-printable characters as defined
131 func QuoteRuneToASCII(r rune
) string {
132 // TODO: avoid the allocation here.
133 return quoteWith(string(r
), '\'', true)
136 // AppendQuoteRuneToASCII appends a single-quoted Go character literal representing the rune,
137 // as generated by QuoteRuneToASCII, to dst and returns the extended buffer.
138 func AppendQuoteRuneToASCII(dst
[]byte, r rune
) []byte {
139 return append(dst
, QuoteRuneToASCII(r
)...)
142 // CanBackquote reports whether the string s can be represented
143 // unchanged as a single-line backquoted string without control
144 // characters other than space and tab.
145 func CanBackquote(s
string) bool {
146 for i
:= 0; i
< len(s
); i
++ {
147 if (s
[i
] < ' ' && s
[i
] != '\t') || s
[i
] == '`' {
154 func unhex(b
byte) (v rune
, ok
bool) {
157 case '0' <= c
&& c
<= '9':
159 case 'a' <= c
&& c
<= 'f':
160 return c
- 'a' + 10, true
161 case 'A' <= c
&& c
<= 'F':
162 return c
- 'A' + 10, true
167 // UnquoteChar decodes the first character or byte in the escaped string
168 // or character literal represented by the string s.
169 // It returns four values:
171 // 1) value, the decoded Unicode code point or byte value;
172 // 2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation;
173 // 3) tail, the remainder of the string after the character; and
174 // 4) an error that will be nil if the character is syntactically valid.
176 // The second argument, quote, specifies the type of literal being parsed
177 // and therefore which escaped quote character is permitted.
178 // If set to a single quote, it permits the sequence \' and disallows unescaped '.
179 // If set to a double quote, it permits \" and disallows unescaped ".
180 // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped.
181 func UnquoteChar(s
string, quote
byte) (value rune
, multibyte
bool, tail
string, err error
) {
184 case c
== quote
&& (quote
== '\'' || quote
== '"'):
187 case c
>= utf8
.RuneSelf
:
188 r
, size
:= utf8
.DecodeRuneInString(s
)
189 return r
, true, s
[size
:], nil
191 return rune(s
[0]), false, s
[1:], nil
194 // hard case: c is backslash
232 for j
:= 0; j
< n
; j
++ {
242 // single-byte string, possibly not UTF-8
246 if v
> utf8
.MaxRune
{
252 case '0', '1', '2', '3', '4', '5', '6', '7':
258 for j
:= 0; j
< 2; j
++ { // one digit already; two more
259 x
:= rune(s
[j
]) - '0'
288 // Unquote interprets s as a single-quoted, double-quoted,
289 // or backquoted Go string literal, returning the string value
290 // that s quotes. (If s is single-quoted, it would be a Go
291 // character literal; Unquote returns the corresponding
292 // one-character string.)
293 func Unquote(s
string) (t
string, err error
) {
305 if contains(s
, '`') {
310 if quote
!= '"' && quote
!= '\'' {
313 if contains(s
, '\n') {
317 // Is it trivial? Avoid allocation.
318 if !contains(s
, '\\') && !contains(s
, quote
) {
323 r
, size
:= utf8
.DecodeRuneInString(s
)
324 if size
== len(s
) && (r
!= utf8
.RuneError || size
!= 1) {
330 var runeTmp
[utf8
.UTFMax
]byte
331 buf
:= make([]byte, 0, 3*len(s
)/2) // Try to avoid more allocations.
333 c
, multibyte
, ss
, err
:= UnquoteChar(s
, quote
)
338 if c
< utf8
.RuneSelf ||
!multibyte
{
339 buf
= append(buf
, byte(c
))
341 n
:= utf8
.EncodeRune(runeTmp
[:], c
)
342 buf
= append(buf
, runeTmp
[:n
]...)
344 if quote
== '\'' && len(s
) != 0 {
345 // single-quoted must be single character
349 return string(buf
), nil
352 // contains reports whether the string contains the byte c.
353 func contains(s
string, c
byte) bool {
354 for i
:= 0; i
< len(s
); i
++ {
362 // bsearch16 returns the smallest i such that a[i] >= x.
363 // If there is no such i, bsearch16 returns len(a).
364 func bsearch16(a
[]uint16, x
uint16) int {
377 // bsearch32 returns the smallest i such that a[i] >= x.
378 // If there is no such i, bsearch32 returns len(a).
379 func bsearch32(a
[]uint32, x
uint32) int {
392 // TODO: IsPrint is a local implementation of unicode.IsPrint, verified by the tests
393 // to give the same answer. It allows this package not to depend on unicode,
394 // and therefore not pull in all the Unicode tables. If the linker were better
395 // at tossing unused tables, we could get rid of this implementation.
396 // That would be nice.
398 // IsPrint reports whether the rune is defined as printable by Go, with
399 // the same definition as unicode.IsPrint: letters, numbers, punctuation,
400 // symbols and ASCII space.
401 func IsPrint(r rune
) bool {
402 // Fast check for Latin-1
404 if 0x20 <= r
&& r
<= 0x7E {
405 // All the ASCII is printable from space through DEL-1.
408 if 0xA1 <= r
&& r
<= 0xFF {
409 // Similarly for ¡ through ÿ...
410 return r
!= 0xAD // ...except for the bizarre soft hyphen.
415 // Same algorithm, either on uint16 or uint32 value.
416 // First, find first i such that isPrint[i] >= x.
417 // This is the index of either the start or end of a pair that might span x.
418 // The start is even (isPrint[i&^1]) and the end is odd (isPrint[i|1]).
419 // If we find x in a range, make sure x is not in isNotPrint list.
421 if 0 <= r
&& r
< 1<<16 {
422 rr
, isPrint
, isNotPrint
:= uint16(r
), isPrint16
, isNotPrint16
423 i
:= bsearch16(isPrint
, rr
)
424 if i
>= len(isPrint
) || rr
< isPrint
[i
&^1] || isPrint
[i|
1] < rr
{
427 j
:= bsearch16(isNotPrint
, rr
)
428 return j
>= len(isNotPrint
) || isNotPrint
[j
] != rr
431 rr
, isPrint
, isNotPrint
:= uint32(r
), isPrint32
, isNotPrint32
432 i
:= bsearch32(isPrint
, rr
)
433 if i
>= len(isPrint
) || rr
< isPrint
[i
&^1] || isPrint
[i|
1] < rr
{
440 j
:= bsearch16(isNotPrint
, uint16(r
))
441 return j
>= len(isNotPrint
) || isNotPrint
[j
] != uint16(r
)