1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 //go:generate go run makeisprint.go -output isprint.go
13 const lowerhex
= "0123456789abcdef"
15 func quoteWith(s
string, quote
byte, ASCIIonly
bool) string {
16 var runeTmp
[utf8
.UTFMax
]byte
17 buf
:= make([]byte, 0, 3*len(s
)/2) // Try to avoid more allocations.
18 buf
= append(buf
, quote
)
19 for width
:= 0; len(s
) > 0; s
= s
[width
:] {
22 if r
>= utf8
.RuneSelf
{
23 r
, width
= utf8
.DecodeRuneInString(s
)
25 if width
== 1 && r
== utf8
.RuneError
{
26 buf
= append(buf
, `\x`...)
27 buf
= append(buf
, lowerhex
[s
[0]>>4])
28 buf
= append(buf
, lowerhex
[s
[0]&0xF])
31 if r
== rune(quote
) || r
== '\\' { // always backslashed
32 buf
= append(buf
, '\\')
33 buf
= append(buf
, byte(r
))
37 if r
< utf8
.RuneSelf
&& IsPrint(r
) {
38 buf
= append(buf
, byte(r
))
41 } else if IsPrint(r
) {
42 n
:= utf8
.EncodeRune(runeTmp
[:], r
)
43 buf
= append(buf
, runeTmp
[:n
]...)
48 buf
= append(buf
, `\a`...)
50 buf
= append(buf
, `\b`...)
52 buf
= append(buf
, `\f`...)
54 buf
= append(buf
, `\n`...)
56 buf
= append(buf
, `\r`...)
58 buf
= append(buf
, `\t`...)
60 buf
= append(buf
, `\v`...)
64 buf
= append(buf
, `\x`...)
65 buf
= append(buf
, lowerhex
[s
[0]>>4])
66 buf
= append(buf
, lowerhex
[s
[0]&0xF])
67 case r
> utf8
.MaxRune
:
71 buf
= append(buf
, `\u`...)
72 for s
:= 12; s
>= 0; s
-= 4 {
73 buf
= append(buf
, lowerhex
[r
>>uint(s
)&0xF])
76 buf
= append(buf
, `\U`...)
77 for s
:= 28; s
>= 0; s
-= 4 {
78 buf
= append(buf
, lowerhex
[r
>>uint(s
)&0xF])
83 buf
= append(buf
, quote
)
88 // Quote returns a double-quoted Go string literal representing s. The
89 // returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
90 // control characters and non-printable characters as defined by
92 func Quote(s
string) string {
93 return quoteWith(s
, '"', false)
96 // AppendQuote appends a double-quoted Go string literal representing s,
97 // as generated by Quote, to dst and returns the extended buffer.
98 func AppendQuote(dst
[]byte, s
string) []byte {
99 return append(dst
, Quote(s
)...)
102 // QuoteToASCII returns a double-quoted Go string literal representing s.
103 // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
104 // non-ASCII characters and non-printable characters as defined by IsPrint.
105 func QuoteToASCII(s
string) string {
106 return quoteWith(s
, '"', true)
109 // AppendQuoteToASCII appends a double-quoted Go string literal representing s,
110 // as generated by QuoteToASCII, to dst and returns the extended buffer.
111 func AppendQuoteToASCII(dst
[]byte, s
string) []byte {
112 return append(dst
, QuoteToASCII(s
)...)
115 // QuoteRune returns a single-quoted Go character literal representing the
116 // rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100)
117 // for control characters and non-printable characters as defined by IsPrint.
118 func QuoteRune(r rune
) string {
119 // TODO: avoid the allocation here.
120 return quoteWith(string(r
), '\'', false)
123 // AppendQuoteRune appends a single-quoted Go character literal representing the rune,
124 // as generated by QuoteRune, to dst and returns the extended buffer.
125 func AppendQuoteRune(dst
[]byte, r rune
) []byte {
126 return append(dst
, QuoteRune(r
)...)
129 // QuoteRuneToASCII returns a single-quoted Go character literal representing
130 // the rune. The returned string uses Go escape sequences (\t, \n, \xFF,
131 // \u0100) for non-ASCII characters and non-printable characters as defined
133 func QuoteRuneToASCII(r rune
) string {
134 // TODO: avoid the allocation here.
135 return quoteWith(string(r
), '\'', true)
138 // AppendQuoteRuneToASCII appends a single-quoted Go character literal representing the rune,
139 // as generated by QuoteRuneToASCII, to dst and returns the extended buffer.
140 func AppendQuoteRuneToASCII(dst
[]byte, r rune
) []byte {
141 return append(dst
, QuoteRuneToASCII(r
)...)
144 // CanBackquote reports whether the string s can be represented
145 // unchanged as a single-line backquoted string without control
146 // characters other than tab.
147 func CanBackquote(s
string) bool {
149 r
, wid
:= utf8
.DecodeRuneInString(s
)
153 return false // BOMs are invisible and should not be quoted.
155 continue // All other multibyte runes are correctly encoded and assumed printable.
157 if r
== utf8
.RuneError
{
160 if (r
< ' ' && r
!= '\t') || r
== '`' || r
== '\u007F' {
167 func unhex(b
byte) (v rune
, ok
bool) {
170 case '0' <= c
&& c
<= '9':
172 case 'a' <= c
&& c
<= 'f':
173 return c
- 'a' + 10, true
174 case 'A' <= c
&& c
<= 'F':
175 return c
- 'A' + 10, true
180 // UnquoteChar decodes the first character or byte in the escaped string
181 // or character literal represented by the string s.
182 // It returns four values:
184 // 1) value, the decoded Unicode code point or byte value;
185 // 2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation;
186 // 3) tail, the remainder of the string after the character; and
187 // 4) an error that will be nil if the character is syntactically valid.
189 // The second argument, quote, specifies the type of literal being parsed
190 // and therefore which escaped quote character is permitted.
191 // If set to a single quote, it permits the sequence \' and disallows unescaped '.
192 // If set to a double quote, it permits \" and disallows unescaped ".
193 // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped.
194 func UnquoteChar(s
string, quote
byte) (value rune
, multibyte
bool, tail
string, err error
) {
197 case c
== quote
&& (quote
== '\'' || quote
== '"'):
200 case c
>= utf8
.RuneSelf
:
201 r
, size
:= utf8
.DecodeRuneInString(s
)
202 return r
, true, s
[size
:], nil
204 return rune(s
[0]), false, s
[1:], nil
207 // hard case: c is backslash
245 for j
:= 0; j
< n
; j
++ {
255 // single-byte string, possibly not UTF-8
259 if v
> utf8
.MaxRune
{
265 case '0', '1', '2', '3', '4', '5', '6', '7':
271 for j
:= 0; j
< 2; j
++ { // one digit already; two more
272 x
:= rune(s
[j
]) - '0'
301 // Unquote interprets s as a single-quoted, double-quoted,
302 // or backquoted Go string literal, returning the string value
303 // that s quotes. (If s is single-quoted, it would be a Go
304 // character literal; Unquote returns the corresponding
305 // one-character string.)
306 func Unquote(s
string) (t
string, err error
) {
318 if contains(s
, '`') {
323 if quote
!= '"' && quote
!= '\'' {
326 if contains(s
, '\n') {
330 // Is it trivial? Avoid allocation.
331 if !contains(s
, '\\') && !contains(s
, quote
) {
336 r
, size
:= utf8
.DecodeRuneInString(s
)
337 if size
== len(s
) && (r
!= utf8
.RuneError || size
!= 1) {
343 var runeTmp
[utf8
.UTFMax
]byte
344 buf
:= make([]byte, 0, 3*len(s
)/2) // Try to avoid more allocations.
346 c
, multibyte
, ss
, err
:= UnquoteChar(s
, quote
)
351 if c
< utf8
.RuneSelf ||
!multibyte
{
352 buf
= append(buf
, byte(c
))
354 n
:= utf8
.EncodeRune(runeTmp
[:], c
)
355 buf
= append(buf
, runeTmp
[:n
]...)
357 if quote
== '\'' && len(s
) != 0 {
358 // single-quoted must be single character
362 return string(buf
), nil
365 // contains reports whether the string contains the byte c.
366 func contains(s
string, c
byte) bool {
367 for i
:= 0; i
< len(s
); i
++ {
375 // bsearch16 returns the smallest i such that a[i] >= x.
376 // If there is no such i, bsearch16 returns len(a).
377 func bsearch16(a
[]uint16, x
uint16) int {
390 // bsearch32 returns the smallest i such that a[i] >= x.
391 // If there is no such i, bsearch32 returns len(a).
392 func bsearch32(a
[]uint32, x
uint32) int {
405 // TODO: IsPrint is a local implementation of unicode.IsPrint, verified by the tests
406 // to give the same answer. It allows this package not to depend on unicode,
407 // and therefore not pull in all the Unicode tables. If the linker were better
408 // at tossing unused tables, we could get rid of this implementation.
409 // That would be nice.
411 // IsPrint reports whether the rune is defined as printable by Go, with
412 // the same definition as unicode.IsPrint: letters, numbers, punctuation,
413 // symbols and ASCII space.
414 func IsPrint(r rune
) bool {
415 // Fast check for Latin-1
417 if 0x20 <= r
&& r
<= 0x7E {
418 // All the ASCII is printable from space through DEL-1.
421 if 0xA1 <= r
&& r
<= 0xFF {
422 // Similarly for ¡ through ÿ...
423 return r
!= 0xAD // ...except for the bizarre soft hyphen.
428 // Same algorithm, either on uint16 or uint32 value.
429 // First, find first i such that isPrint[i] >= x.
430 // This is the index of either the start or end of a pair that might span x.
431 // The start is even (isPrint[i&^1]) and the end is odd (isPrint[i|1]).
432 // If we find x in a range, make sure x is not in isNotPrint list.
434 if 0 <= r
&& r
< 1<<16 {
435 rr
, isPrint
, isNotPrint
:= uint16(r
), isPrint16
, isNotPrint16
436 i
:= bsearch16(isPrint
, rr
)
437 if i
>= len(isPrint
) || rr
< isPrint
[i
&^1] || isPrint
[i|
1] < rr
{
440 j
:= bsearch16(isNotPrint
, rr
)
441 return j
>= len(isNotPrint
) || isNotPrint
[j
] != rr
444 rr
, isPrint
, isNotPrint
:= uint32(r
), isPrint32
, isNotPrint32
445 i
:= bsearch32(isPrint
, rr
)
446 if i
>= len(isPrint
) || rr
< isPrint
[i
&^1] || isPrint
[i|
1] < rr
{
453 j
:= bsearch16(isNotPrint
, uint16(r
))
454 return j
>= len(isNotPrint
) || isNotPrint
[j
] != uint16(r
)