1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 //go:generate go run makeisprint.go -output isprint.go
11 const lowerhex
= "0123456789abcdef"
13 func quoteWith(s
string, quote
byte, ASCIIonly
, graphicOnly
bool) string {
14 return string(appendQuotedWith(make([]byte, 0, 3*len(s
)/2), s
, quote
, ASCIIonly
, graphicOnly
))
17 func quoteRuneWith(r rune
, quote
byte, ASCIIonly
, graphicOnly
bool) string {
18 return string(appendQuotedRuneWith(nil, r
, quote
, ASCIIonly
, graphicOnly
))
21 func appendQuotedWith(buf
[]byte, s
string, quote
byte, ASCIIonly
, graphicOnly
bool) []byte {
22 buf
= append(buf
, quote
)
23 for width
:= 0; len(s
) > 0; s
= s
[width
:] {
26 if r
>= utf8
.RuneSelf
{
27 r
, width
= utf8
.DecodeRuneInString(s
)
29 if width
== 1 && r
== utf8
.RuneError
{
30 buf
= append(buf
, `\x`...)
31 buf
= append(buf
, lowerhex
[s
[0]>>4])
32 buf
= append(buf
, lowerhex
[s
[0]&0xF])
35 buf
= appendEscapedRune(buf
, r
, quote
, ASCIIonly
, graphicOnly
)
37 buf
= append(buf
, quote
)
41 func appendQuotedRuneWith(buf
[]byte, r rune
, quote
byte, ASCIIonly
, graphicOnly
bool) []byte {
42 buf
= append(buf
, quote
)
43 if !utf8
.ValidRune(r
) {
46 buf
= appendEscapedRune(buf
, r
, quote
, ASCIIonly
, graphicOnly
)
47 buf
= append(buf
, quote
)
51 func appendEscapedRune(buf
[]byte, r rune
, quote
byte, ASCIIonly
, graphicOnly
bool) []byte {
52 var runeTmp
[utf8
.UTFMax
]byte
53 if r
== rune(quote
) || r
== '\\' { // always backslashed
54 buf
= append(buf
, '\\')
55 buf
= append(buf
, byte(r
))
59 if r
< utf8
.RuneSelf
&& IsPrint(r
) {
60 buf
= append(buf
, byte(r
))
63 } else if IsPrint(r
) || graphicOnly
&& isInGraphicList(r
) {
64 n
:= utf8
.EncodeRune(runeTmp
[:], r
)
65 buf
= append(buf
, runeTmp
[:n
]...)
70 buf
= append(buf
, `\a`...)
72 buf
= append(buf
, `\b`...)
74 buf
= append(buf
, `\f`...)
76 buf
= append(buf
, `\n`...)
78 buf
= append(buf
, `\r`...)
80 buf
= append(buf
, `\t`...)
82 buf
= append(buf
, `\v`...)
86 buf
= append(buf
, `\x`...)
87 buf
= append(buf
, lowerhex
[byte(r
)>>4])
88 buf
= append(buf
, lowerhex
[byte(r
)&0xF])
89 case r
> utf8
.MaxRune
:
93 buf
= append(buf
, `\u`...)
94 for s
:= 12; s
>= 0; s
-= 4 {
95 buf
= append(buf
, lowerhex
[r
>>uint(s
)&0xF])
98 buf
= append(buf
, `\U`...)
99 for s
:= 28; s
>= 0; s
-= 4 {
100 buf
= append(buf
, lowerhex
[r
>>uint(s
)&0xF])
107 // Quote returns a double-quoted Go string literal representing s. The
108 // returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
109 // control characters and non-printable characters as defined by
111 func Quote(s
string) string {
112 return quoteWith(s
, '"', false, false)
115 // AppendQuote appends a double-quoted Go string literal representing s,
116 // as generated by Quote, to dst and returns the extended buffer.
117 func AppendQuote(dst
[]byte, s
string) []byte {
118 return appendQuotedWith(dst
, s
, '"', false, false)
121 // QuoteToASCII returns a double-quoted Go string literal representing s.
122 // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
123 // non-ASCII characters and non-printable characters as defined by IsPrint.
124 func QuoteToASCII(s
string) string {
125 return quoteWith(s
, '"', true, false)
128 // AppendQuoteToASCII appends a double-quoted Go string literal representing s,
129 // as generated by QuoteToASCII, to dst and returns the extended buffer.
130 func AppendQuoteToASCII(dst
[]byte, s
string) []byte {
131 return appendQuotedWith(dst
, s
, '"', true, false)
134 // QuoteToGraphic returns a double-quoted Go string literal representing s.
135 // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
136 // non-ASCII characters and non-printable characters as defined by IsGraphic.
137 func QuoteToGraphic(s
string) string {
138 return quoteWith(s
, '"', false, true)
141 // AppendQuoteToGraphic appends a double-quoted Go string literal representing s,
142 // as generated by QuoteToGraphic, to dst and returns the extended buffer.
143 func AppendQuoteToGraphic(dst
[]byte, s
string) []byte {
144 return appendQuotedWith(dst
, s
, '"', false, true)
147 // QuoteRune returns a single-quoted Go character literal representing the
148 // rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100)
149 // for control characters and non-printable characters as defined by IsPrint.
150 func QuoteRune(r rune
) string {
151 return quoteRuneWith(r
, '\'', false, false)
154 // AppendQuoteRune appends a single-quoted Go character literal representing the rune,
155 // as generated by QuoteRune, to dst and returns the extended buffer.
156 func AppendQuoteRune(dst
[]byte, r rune
) []byte {
157 return appendQuotedRuneWith(dst
, r
, '\'', false, false)
160 // QuoteRuneToASCII returns a single-quoted Go character literal representing
161 // the rune. The returned string uses Go escape sequences (\t, \n, \xFF,
162 // \u0100) for non-ASCII characters and non-printable characters as defined
164 func QuoteRuneToASCII(r rune
) string {
165 return quoteRuneWith(r
, '\'', true, false)
168 // AppendQuoteRuneToASCII appends a single-quoted Go character literal representing the rune,
169 // as generated by QuoteRuneToASCII, to dst and returns the extended buffer.
170 func AppendQuoteRuneToASCII(dst
[]byte, r rune
) []byte {
171 return appendQuotedRuneWith(dst
, r
, '\'', true, false)
174 // QuoteRuneToGraphic returns a single-quoted Go character literal representing
175 // the rune. The returned string uses Go escape sequences (\t, \n, \xFF,
176 // \u0100) for non-ASCII characters and non-printable characters as defined
178 func QuoteRuneToGraphic(r rune
) string {
179 return quoteRuneWith(r
, '\'', false, true)
182 // AppendQuoteRuneToGraphic appends a single-quoted Go character literal representing the rune,
183 // as generated by QuoteRuneToGraphic, to dst and returns the extended buffer.
184 func AppendQuoteRuneToGraphic(dst
[]byte, r rune
) []byte {
185 return appendQuotedRuneWith(dst
, r
, '\'', false, true)
188 // CanBackquote reports whether the string s can be represented
189 // unchanged as a single-line backquoted string without control
190 // characters other than tab.
191 func CanBackquote(s
string) bool {
193 r
, wid
:= utf8
.DecodeRuneInString(s
)
197 return false // BOMs are invisible and should not be quoted.
199 continue // All other multibyte runes are correctly encoded and assumed printable.
201 if r
== utf8
.RuneError
{
204 if (r
< ' ' && r
!= '\t') || r
== '`' || r
== '\u007F' {
211 func unhex(b
byte) (v rune
, ok
bool) {
214 case '0' <= c
&& c
<= '9':
216 case 'a' <= c
&& c
<= 'f':
217 return c
- 'a' + 10, true
218 case 'A' <= c
&& c
<= 'F':
219 return c
- 'A' + 10, true
224 // UnquoteChar decodes the first character or byte in the escaped string
225 // or character literal represented by the string s.
226 // It returns four values:
228 // 1) value, the decoded Unicode code point or byte value;
229 // 2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation;
230 // 3) tail, the remainder of the string after the character; and
231 // 4) an error that will be nil if the character is syntactically valid.
233 // The second argument, quote, specifies the type of literal being parsed
234 // and therefore which escaped quote character is permitted.
235 // If set to a single quote, it permits the sequence \' and disallows unescaped '.
236 // If set to a double quote, it permits \" and disallows unescaped ".
237 // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped.
238 func UnquoteChar(s
string, quote
byte) (value rune
, multibyte
bool, tail
string, err error
) {
241 case c
== quote
&& (quote
== '\'' || quote
== '"'):
244 case c
>= utf8
.RuneSelf
:
245 r
, size
:= utf8
.DecodeRuneInString(s
)
246 return r
, true, s
[size
:], nil
248 return rune(s
[0]), false, s
[1:], nil
251 // hard case: c is backslash
289 for j
:= 0; j
< n
; j
++ {
299 // single-byte string, possibly not UTF-8
303 if v
> utf8
.MaxRune
{
309 case '0', '1', '2', '3', '4', '5', '6', '7':
315 for j
:= 0; j
< 2; j
++ { // one digit already; two more
316 x
:= rune(s
[j
]) - '0'
345 // Unquote interprets s as a single-quoted, double-quoted,
346 // or backquoted Go string literal, returning the string value
347 // that s quotes. (If s is single-quoted, it would be a Go
348 // character literal; Unquote returns the corresponding
349 // one-character string.)
350 func Unquote(s
string) (string, error
) {
362 if contains(s
, '`') {
365 if contains(s
, '\r') {
366 // -1 because we know there is at least one \r to remove.
367 buf
:= make([]byte, 0, len(s
)-1)
368 for i
:= 0; i
< len(s
); i
++ {
370 buf
= append(buf
, s
[i
])
373 return string(buf
), nil
377 if quote
!= '"' && quote
!= '\'' {
380 if contains(s
, '\n') {
384 // Is it trivial? Avoid allocation.
385 if !contains(s
, '\\') && !contains(s
, quote
) {
390 r
, size
:= utf8
.DecodeRuneInString(s
)
391 if size
== len(s
) && (r
!= utf8
.RuneError || size
!= 1) {
397 var runeTmp
[utf8
.UTFMax
]byte
398 buf
:= make([]byte, 0, 3*len(s
)/2) // Try to avoid more allocations.
400 c
, multibyte
, ss
, err
:= UnquoteChar(s
, quote
)
405 if c
< utf8
.RuneSelf ||
!multibyte
{
406 buf
= append(buf
, byte(c
))
408 n
:= utf8
.EncodeRune(runeTmp
[:], c
)
409 buf
= append(buf
, runeTmp
[:n
]...)
411 if quote
== '\'' && len(s
) != 0 {
412 // single-quoted must be single character
416 return string(buf
), nil
419 // contains reports whether the string contains the byte c.
420 func contains(s
string, c
byte) bool {
421 for i
:= 0; i
< len(s
); i
++ {
429 // bsearch16 returns the smallest i such that a[i] >= x.
430 // If there is no such i, bsearch16 returns len(a).
431 func bsearch16(a
[]uint16, x
uint16) int {
444 // bsearch32 returns the smallest i such that a[i] >= x.
445 // If there is no such i, bsearch32 returns len(a).
446 func bsearch32(a
[]uint32, x
uint32) int {
459 // TODO: IsPrint is a local implementation of unicode.IsPrint, verified by the tests
460 // to give the same answer. It allows this package not to depend on unicode,
461 // and therefore not pull in all the Unicode tables. If the linker were better
462 // at tossing unused tables, we could get rid of this implementation.
463 // That would be nice.
465 // IsPrint reports whether the rune is defined as printable by Go, with
466 // the same definition as unicode.IsPrint: letters, numbers, punctuation,
467 // symbols and ASCII space.
468 func IsPrint(r rune
) bool {
469 // Fast check for Latin-1
471 if 0x20 <= r
&& r
<= 0x7E {
472 // All the ASCII is printable from space through DEL-1.
475 if 0xA1 <= r
&& r
<= 0xFF {
476 // Similarly for ¡ through ÿ...
477 return r
!= 0xAD // ...except for the bizarre soft hyphen.
482 // Same algorithm, either on uint16 or uint32 value.
483 // First, find first i such that isPrint[i] >= x.
484 // This is the index of either the start or end of a pair that might span x.
485 // The start is even (isPrint[i&^1]) and the end is odd (isPrint[i|1]).
486 // If we find x in a range, make sure x is not in isNotPrint list.
488 if 0 <= r
&& r
< 1<<16 {
489 rr
, isPrint
, isNotPrint
:= uint16(r
), isPrint16
, isNotPrint16
490 i
:= bsearch16(isPrint
, rr
)
491 if i
>= len(isPrint
) || rr
< isPrint
[i
&^1] || isPrint
[i|
1] < rr
{
494 j
:= bsearch16(isNotPrint
, rr
)
495 return j
>= len(isNotPrint
) || isNotPrint
[j
] != rr
498 rr
, isPrint
, isNotPrint
:= uint32(r
), isPrint32
, isNotPrint32
499 i
:= bsearch32(isPrint
, rr
)
500 if i
>= len(isPrint
) || rr
< isPrint
[i
&^1] || isPrint
[i|
1] < rr
{
507 j
:= bsearch16(isNotPrint
, uint16(r
))
508 return j
>= len(isNotPrint
) || isNotPrint
[j
] != uint16(r
)
511 // IsGraphic reports whether the rune is defined as a Graphic by Unicode. Such
512 // characters include letters, marks, numbers, punctuation, symbols, and
513 // spaces, from categories L, M, N, P, S, and Zs.
514 func IsGraphic(r rune
) bool {
518 return isInGraphicList(r
)
521 // isInGraphicList reports whether the rune is in the isGraphic list. This separation
522 // from IsGraphic allows quoteWith to avoid two calls to IsPrint.
523 // Should be called only if IsPrint fails.
524 func isInGraphicList(r rune
) bool {
525 // We know r must fit in 16 bits - see makeisprint.go.
530 i
:= bsearch16(isGraphic
, rr
)
531 return i
< len(isGraphic
) && rr
== isGraphic
[i
]