Fix bootstrap/PR63632
[official-gcc.git] / libgo / go / strconv / quote.go
blobaded7e5930c329992c1ed3b865e48d5bb2f5accc
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 package strconv
7 import (
8 "unicode/utf8"
11 const lowerhex = "0123456789abcdef"
13 func quoteWith(s string, quote byte, ASCIIonly bool) string {
14 var runeTmp [utf8.UTFMax]byte
15 buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
16 buf = append(buf, quote)
17 for width := 0; len(s) > 0; s = s[width:] {
18 r := rune(s[0])
19 width = 1
20 if r >= utf8.RuneSelf {
21 r, width = utf8.DecodeRuneInString(s)
23 if width == 1 && r == utf8.RuneError {
24 buf = append(buf, `\x`...)
25 buf = append(buf, lowerhex[s[0]>>4])
26 buf = append(buf, lowerhex[s[0]&0xF])
27 continue
29 if r == rune(quote) || r == '\\' { // always backslashed
30 buf = append(buf, '\\')
31 buf = append(buf, byte(r))
32 continue
34 if ASCIIonly {
35 if r < utf8.RuneSelf && IsPrint(r) {
36 buf = append(buf, byte(r))
37 continue
39 } else if IsPrint(r) {
40 n := utf8.EncodeRune(runeTmp[:], r)
41 buf = append(buf, runeTmp[:n]...)
42 continue
44 switch r {
45 case '\a':
46 buf = append(buf, `\a`...)
47 case '\b':
48 buf = append(buf, `\b`...)
49 case '\f':
50 buf = append(buf, `\f`...)
51 case '\n':
52 buf = append(buf, `\n`...)
53 case '\r':
54 buf = append(buf, `\r`...)
55 case '\t':
56 buf = append(buf, `\t`...)
57 case '\v':
58 buf = append(buf, `\v`...)
59 default:
60 switch {
61 case r < ' ':
62 buf = append(buf, `\x`...)
63 buf = append(buf, lowerhex[s[0]>>4])
64 buf = append(buf, lowerhex[s[0]&0xF])
65 case r > utf8.MaxRune:
66 r = 0xFFFD
67 fallthrough
68 case r < 0x10000:
69 buf = append(buf, `\u`...)
70 for s := 12; s >= 0; s -= 4 {
71 buf = append(buf, lowerhex[r>>uint(s)&0xF])
73 default:
74 buf = append(buf, `\U`...)
75 for s := 28; s >= 0; s -= 4 {
76 buf = append(buf, lowerhex[r>>uint(s)&0xF])
81 buf = append(buf, quote)
82 return string(buf)
86 // Quote returns a double-quoted Go string literal representing s. The
87 // returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
88 // control characters and non-printable characters as defined by
89 // IsPrint.
90 func Quote(s string) string {
91 return quoteWith(s, '"', false)
94 // AppendQuote appends a double-quoted Go string literal representing s,
95 // as generated by Quote, to dst and returns the extended buffer.
96 func AppendQuote(dst []byte, s string) []byte {
97 return append(dst, Quote(s)...)
100 // QuoteToASCII returns a double-quoted Go string literal representing s.
101 // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
102 // non-ASCII characters and non-printable characters as defined by IsPrint.
103 func QuoteToASCII(s string) string {
104 return quoteWith(s, '"', true)
107 // AppendQuoteToASCII appends a double-quoted Go string literal representing s,
108 // as generated by QuoteToASCII, to dst and returns the extended buffer.
109 func AppendQuoteToASCII(dst []byte, s string) []byte {
110 return append(dst, QuoteToASCII(s)...)
113 // QuoteRune returns a single-quoted Go character literal representing the
114 // rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100)
115 // for control characters and non-printable characters as defined by IsPrint.
116 func QuoteRune(r rune) string {
117 // TODO: avoid the allocation here.
118 return quoteWith(string(r), '\'', false)
121 // AppendQuoteRune appends a single-quoted Go character literal representing the rune,
122 // as generated by QuoteRune, to dst and returns the extended buffer.
123 func AppendQuoteRune(dst []byte, r rune) []byte {
124 return append(dst, QuoteRune(r)...)
127 // QuoteRuneToASCII returns a single-quoted Go character literal representing
128 // the rune. The returned string uses Go escape sequences (\t, \n, \xFF,
129 // \u0100) for non-ASCII characters and non-printable characters as defined
130 // by IsPrint.
131 func QuoteRuneToASCII(r rune) string {
132 // TODO: avoid the allocation here.
133 return quoteWith(string(r), '\'', true)
136 // AppendQuoteRuneToASCII appends a single-quoted Go character literal representing the rune,
137 // as generated by QuoteRuneToASCII, to dst and returns the extended buffer.
138 func AppendQuoteRuneToASCII(dst []byte, r rune) []byte {
139 return append(dst, QuoteRuneToASCII(r)...)
142 // CanBackquote reports whether the string s can be represented
143 // unchanged as a single-line backquoted string without control
144 // characters other than space and tab.
145 func CanBackquote(s string) bool {
146 for i := 0; i < len(s); i++ {
147 c := s[i]
148 if (c < ' ' && c != '\t') || c == '`' || c == '\u007F' {
149 return false
152 return true
155 func unhex(b byte) (v rune, ok bool) {
156 c := rune(b)
157 switch {
158 case '0' <= c && c <= '9':
159 return c - '0', true
160 case 'a' <= c && c <= 'f':
161 return c - 'a' + 10, true
162 case 'A' <= c && c <= 'F':
163 return c - 'A' + 10, true
165 return
168 // UnquoteChar decodes the first character or byte in the escaped string
169 // or character literal represented by the string s.
170 // It returns four values:
172 // 1) value, the decoded Unicode code point or byte value;
173 // 2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation;
174 // 3) tail, the remainder of the string after the character; and
175 // 4) an error that will be nil if the character is syntactically valid.
177 // The second argument, quote, specifies the type of literal being parsed
178 // and therefore which escaped quote character is permitted.
179 // If set to a single quote, it permits the sequence \' and disallows unescaped '.
180 // If set to a double quote, it permits \" and disallows unescaped ".
181 // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped.
182 func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) {
183 // easy cases
184 switch c := s[0]; {
185 case c == quote && (quote == '\'' || quote == '"'):
186 err = ErrSyntax
187 return
188 case c >= utf8.RuneSelf:
189 r, size := utf8.DecodeRuneInString(s)
190 return r, true, s[size:], nil
191 case c != '\\':
192 return rune(s[0]), false, s[1:], nil
195 // hard case: c is backslash
196 if len(s) <= 1 {
197 err = ErrSyntax
198 return
200 c := s[1]
201 s = s[2:]
203 switch c {
204 case 'a':
205 value = '\a'
206 case 'b':
207 value = '\b'
208 case 'f':
209 value = '\f'
210 case 'n':
211 value = '\n'
212 case 'r':
213 value = '\r'
214 case 't':
215 value = '\t'
216 case 'v':
217 value = '\v'
218 case 'x', 'u', 'U':
219 n := 0
220 switch c {
221 case 'x':
222 n = 2
223 case 'u':
224 n = 4
225 case 'U':
226 n = 8
228 var v rune
229 if len(s) < n {
230 err = ErrSyntax
231 return
233 for j := 0; j < n; j++ {
234 x, ok := unhex(s[j])
235 if !ok {
236 err = ErrSyntax
237 return
239 v = v<<4 | x
241 s = s[n:]
242 if c == 'x' {
243 // single-byte string, possibly not UTF-8
244 value = v
245 break
247 if v > utf8.MaxRune {
248 err = ErrSyntax
249 return
251 value = v
252 multibyte = true
253 case '0', '1', '2', '3', '4', '5', '6', '7':
254 v := rune(c) - '0'
255 if len(s) < 2 {
256 err = ErrSyntax
257 return
259 for j := 0; j < 2; j++ { // one digit already; two more
260 x := rune(s[j]) - '0'
261 if x < 0 || x > 7 {
262 err = ErrSyntax
263 return
265 v = (v << 3) | x
267 s = s[2:]
268 if v > 255 {
269 err = ErrSyntax
270 return
272 value = v
273 case '\\':
274 value = '\\'
275 case '\'', '"':
276 if c != quote {
277 err = ErrSyntax
278 return
280 value = rune(c)
281 default:
282 err = ErrSyntax
283 return
285 tail = s
286 return
289 // Unquote interprets s as a single-quoted, double-quoted,
290 // or backquoted Go string literal, returning the string value
291 // that s quotes. (If s is single-quoted, it would be a Go
292 // character literal; Unquote returns the corresponding
293 // one-character string.)
294 func Unquote(s string) (t string, err error) {
295 n := len(s)
296 if n < 2 {
297 return "", ErrSyntax
299 quote := s[0]
300 if quote != s[n-1] {
301 return "", ErrSyntax
303 s = s[1 : n-1]
305 if quote == '`' {
306 if contains(s, '`') {
307 return "", ErrSyntax
309 return s, nil
311 if quote != '"' && quote != '\'' {
312 return "", ErrSyntax
314 if contains(s, '\n') {
315 return "", ErrSyntax
318 // Is it trivial? Avoid allocation.
319 if !contains(s, '\\') && !contains(s, quote) {
320 switch quote {
321 case '"':
322 return s, nil
323 case '\'':
324 r, size := utf8.DecodeRuneInString(s)
325 if size == len(s) && (r != utf8.RuneError || size != 1) {
326 return s, nil
331 var runeTmp [utf8.UTFMax]byte
332 buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
333 for len(s) > 0 {
334 c, multibyte, ss, err := UnquoteChar(s, quote)
335 if err != nil {
336 return "", err
338 s = ss
339 if c < utf8.RuneSelf || !multibyte {
340 buf = append(buf, byte(c))
341 } else {
342 n := utf8.EncodeRune(runeTmp[:], c)
343 buf = append(buf, runeTmp[:n]...)
345 if quote == '\'' && len(s) != 0 {
346 // single-quoted must be single character
347 return "", ErrSyntax
350 return string(buf), nil
353 // contains reports whether the string contains the byte c.
354 func contains(s string, c byte) bool {
355 for i := 0; i < len(s); i++ {
356 if s[i] == c {
357 return true
360 return false
363 // bsearch16 returns the smallest i such that a[i] >= x.
364 // If there is no such i, bsearch16 returns len(a).
365 func bsearch16(a []uint16, x uint16) int {
366 i, j := 0, len(a)
367 for i < j {
368 h := i + (j-i)/2
369 if a[h] < x {
370 i = h + 1
371 } else {
372 j = h
375 return i
378 // bsearch32 returns the smallest i such that a[i] >= x.
379 // If there is no such i, bsearch32 returns len(a).
380 func bsearch32(a []uint32, x uint32) int {
381 i, j := 0, len(a)
382 for i < j {
383 h := i + (j-i)/2
384 if a[h] < x {
385 i = h + 1
386 } else {
387 j = h
390 return i
393 // TODO: IsPrint is a local implementation of unicode.IsPrint, verified by the tests
394 // to give the same answer. It allows this package not to depend on unicode,
395 // and therefore not pull in all the Unicode tables. If the linker were better
396 // at tossing unused tables, we could get rid of this implementation.
397 // That would be nice.
399 // IsPrint reports whether the rune is defined as printable by Go, with
400 // the same definition as unicode.IsPrint: letters, numbers, punctuation,
401 // symbols and ASCII space.
402 func IsPrint(r rune) bool {
403 // Fast check for Latin-1
404 if r <= 0xFF {
405 if 0x20 <= r && r <= 0x7E {
406 // All the ASCII is printable from space through DEL-1.
407 return true
409 if 0xA1 <= r && r <= 0xFF {
410 // Similarly for ¡ through ÿ...
411 return r != 0xAD // ...except for the bizarre soft hyphen.
413 return false
416 // Same algorithm, either on uint16 or uint32 value.
417 // First, find first i such that isPrint[i] >= x.
418 // This is the index of either the start or end of a pair that might span x.
419 // The start is even (isPrint[i&^1]) and the end is odd (isPrint[i|1]).
420 // If we find x in a range, make sure x is not in isNotPrint list.
422 if 0 <= r && r < 1<<16 {
423 rr, isPrint, isNotPrint := uint16(r), isPrint16, isNotPrint16
424 i := bsearch16(isPrint, rr)
425 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
426 return false
428 j := bsearch16(isNotPrint, rr)
429 return j >= len(isNotPrint) || isNotPrint[j] != rr
432 rr, isPrint, isNotPrint := uint32(r), isPrint32, isNotPrint32
433 i := bsearch32(isPrint, rr)
434 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
435 return false
437 if r >= 0x20000 {
438 return true
440 r -= 0x10000
441 j := bsearch16(isNotPrint, uint16(r))
442 return j >= len(isNotPrint) || isNotPrint[j] != uint16(r)