2015-09-24 Vladimir Makarov <vmakarov@redhat.com>
[official-gcc.git] / libgo / go / strconv / quote.go
blob53d51b5a46a324a90bbdbf168efba9e3fab75129
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 //go:generate go run makeisprint.go -output isprint.go
7 package strconv
9 import (
10 "unicode/utf8"
13 const lowerhex = "0123456789abcdef"
15 func quoteWith(s string, quote byte, ASCIIonly bool) string {
16 var runeTmp [utf8.UTFMax]byte
17 buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
18 buf = append(buf, quote)
19 for width := 0; len(s) > 0; s = s[width:] {
20 r := rune(s[0])
21 width = 1
22 if r >= utf8.RuneSelf {
23 r, width = utf8.DecodeRuneInString(s)
25 if width == 1 && r == utf8.RuneError {
26 buf = append(buf, `\x`...)
27 buf = append(buf, lowerhex[s[0]>>4])
28 buf = append(buf, lowerhex[s[0]&0xF])
29 continue
31 if r == rune(quote) || r == '\\' { // always backslashed
32 buf = append(buf, '\\')
33 buf = append(buf, byte(r))
34 continue
36 if ASCIIonly {
37 if r < utf8.RuneSelf && IsPrint(r) {
38 buf = append(buf, byte(r))
39 continue
41 } else if IsPrint(r) {
42 n := utf8.EncodeRune(runeTmp[:], r)
43 buf = append(buf, runeTmp[:n]...)
44 continue
46 switch r {
47 case '\a':
48 buf = append(buf, `\a`...)
49 case '\b':
50 buf = append(buf, `\b`...)
51 case '\f':
52 buf = append(buf, `\f`...)
53 case '\n':
54 buf = append(buf, `\n`...)
55 case '\r':
56 buf = append(buf, `\r`...)
57 case '\t':
58 buf = append(buf, `\t`...)
59 case '\v':
60 buf = append(buf, `\v`...)
61 default:
62 switch {
63 case r < ' ':
64 buf = append(buf, `\x`...)
65 buf = append(buf, lowerhex[s[0]>>4])
66 buf = append(buf, lowerhex[s[0]&0xF])
67 case r > utf8.MaxRune:
68 r = 0xFFFD
69 fallthrough
70 case r < 0x10000:
71 buf = append(buf, `\u`...)
72 for s := 12; s >= 0; s -= 4 {
73 buf = append(buf, lowerhex[r>>uint(s)&0xF])
75 default:
76 buf = append(buf, `\U`...)
77 for s := 28; s >= 0; s -= 4 {
78 buf = append(buf, lowerhex[r>>uint(s)&0xF])
83 buf = append(buf, quote)
84 return string(buf)
88 // Quote returns a double-quoted Go string literal representing s. The
89 // returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
90 // control characters and non-printable characters as defined by
91 // IsPrint.
92 func Quote(s string) string {
93 return quoteWith(s, '"', false)
96 // AppendQuote appends a double-quoted Go string literal representing s,
97 // as generated by Quote, to dst and returns the extended buffer.
98 func AppendQuote(dst []byte, s string) []byte {
99 return append(dst, Quote(s)...)
102 // QuoteToASCII returns a double-quoted Go string literal representing s.
103 // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
104 // non-ASCII characters and non-printable characters as defined by IsPrint.
105 func QuoteToASCII(s string) string {
106 return quoteWith(s, '"', true)
109 // AppendQuoteToASCII appends a double-quoted Go string literal representing s,
110 // as generated by QuoteToASCII, to dst and returns the extended buffer.
111 func AppendQuoteToASCII(dst []byte, s string) []byte {
112 return append(dst, QuoteToASCII(s)...)
115 // QuoteRune returns a single-quoted Go character literal representing the
116 // rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100)
117 // for control characters and non-printable characters as defined by IsPrint.
118 func QuoteRune(r rune) string {
119 // TODO: avoid the allocation here.
120 return quoteWith(string(r), '\'', false)
123 // AppendQuoteRune appends a single-quoted Go character literal representing the rune,
124 // as generated by QuoteRune, to dst and returns the extended buffer.
125 func AppendQuoteRune(dst []byte, r rune) []byte {
126 return append(dst, QuoteRune(r)...)
129 // QuoteRuneToASCII returns a single-quoted Go character literal representing
130 // the rune. The returned string uses Go escape sequences (\t, \n, \xFF,
131 // \u0100) for non-ASCII characters and non-printable characters as defined
132 // by IsPrint.
133 func QuoteRuneToASCII(r rune) string {
134 // TODO: avoid the allocation here.
135 return quoteWith(string(r), '\'', true)
138 // AppendQuoteRuneToASCII appends a single-quoted Go character literal representing the rune,
139 // as generated by QuoteRuneToASCII, to dst and returns the extended buffer.
140 func AppendQuoteRuneToASCII(dst []byte, r rune) []byte {
141 return append(dst, QuoteRuneToASCII(r)...)
144 // CanBackquote reports whether the string s can be represented
145 // unchanged as a single-line backquoted string without control
146 // characters other than tab.
147 func CanBackquote(s string) bool {
148 for len(s) > 0 {
149 r, wid := utf8.DecodeRuneInString(s)
150 s = s[wid:]
151 if wid > 1 {
152 if r == '\ufeff' {
153 return false // BOMs are invisible and should not be quoted.
155 continue // All other multibyte runes are correctly encoded and assumed printable.
157 if r == utf8.RuneError {
158 return false
160 if (r < ' ' && r != '\t') || r == '`' || r == '\u007F' {
161 return false
164 return true
167 func unhex(b byte) (v rune, ok bool) {
168 c := rune(b)
169 switch {
170 case '0' <= c && c <= '9':
171 return c - '0', true
172 case 'a' <= c && c <= 'f':
173 return c - 'a' + 10, true
174 case 'A' <= c && c <= 'F':
175 return c - 'A' + 10, true
177 return
180 // UnquoteChar decodes the first character or byte in the escaped string
181 // or character literal represented by the string s.
182 // It returns four values:
184 // 1) value, the decoded Unicode code point or byte value;
185 // 2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation;
186 // 3) tail, the remainder of the string after the character; and
187 // 4) an error that will be nil if the character is syntactically valid.
189 // The second argument, quote, specifies the type of literal being parsed
190 // and therefore which escaped quote character is permitted.
191 // If set to a single quote, it permits the sequence \' and disallows unescaped '.
192 // If set to a double quote, it permits \" and disallows unescaped ".
193 // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped.
194 func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) {
195 // easy cases
196 switch c := s[0]; {
197 case c == quote && (quote == '\'' || quote == '"'):
198 err = ErrSyntax
199 return
200 case c >= utf8.RuneSelf:
201 r, size := utf8.DecodeRuneInString(s)
202 return r, true, s[size:], nil
203 case c != '\\':
204 return rune(s[0]), false, s[1:], nil
207 // hard case: c is backslash
208 if len(s) <= 1 {
209 err = ErrSyntax
210 return
212 c := s[1]
213 s = s[2:]
215 switch c {
216 case 'a':
217 value = '\a'
218 case 'b':
219 value = '\b'
220 case 'f':
221 value = '\f'
222 case 'n':
223 value = '\n'
224 case 'r':
225 value = '\r'
226 case 't':
227 value = '\t'
228 case 'v':
229 value = '\v'
230 case 'x', 'u', 'U':
231 n := 0
232 switch c {
233 case 'x':
234 n = 2
235 case 'u':
236 n = 4
237 case 'U':
238 n = 8
240 var v rune
241 if len(s) < n {
242 err = ErrSyntax
243 return
245 for j := 0; j < n; j++ {
246 x, ok := unhex(s[j])
247 if !ok {
248 err = ErrSyntax
249 return
251 v = v<<4 | x
253 s = s[n:]
254 if c == 'x' {
255 // single-byte string, possibly not UTF-8
256 value = v
257 break
259 if v > utf8.MaxRune {
260 err = ErrSyntax
261 return
263 value = v
264 multibyte = true
265 case '0', '1', '2', '3', '4', '5', '6', '7':
266 v := rune(c) - '0'
267 if len(s) < 2 {
268 err = ErrSyntax
269 return
271 for j := 0; j < 2; j++ { // one digit already; two more
272 x := rune(s[j]) - '0'
273 if x < 0 || x > 7 {
274 err = ErrSyntax
275 return
277 v = (v << 3) | x
279 s = s[2:]
280 if v > 255 {
281 err = ErrSyntax
282 return
284 value = v
285 case '\\':
286 value = '\\'
287 case '\'', '"':
288 if c != quote {
289 err = ErrSyntax
290 return
292 value = rune(c)
293 default:
294 err = ErrSyntax
295 return
297 tail = s
298 return
301 // Unquote interprets s as a single-quoted, double-quoted,
302 // or backquoted Go string literal, returning the string value
303 // that s quotes. (If s is single-quoted, it would be a Go
304 // character literal; Unquote returns the corresponding
305 // one-character string.)
306 func Unquote(s string) (t string, err error) {
307 n := len(s)
308 if n < 2 {
309 return "", ErrSyntax
311 quote := s[0]
312 if quote != s[n-1] {
313 return "", ErrSyntax
315 s = s[1 : n-1]
317 if quote == '`' {
318 if contains(s, '`') {
319 return "", ErrSyntax
321 return s, nil
323 if quote != '"' && quote != '\'' {
324 return "", ErrSyntax
326 if contains(s, '\n') {
327 return "", ErrSyntax
330 // Is it trivial? Avoid allocation.
331 if !contains(s, '\\') && !contains(s, quote) {
332 switch quote {
333 case '"':
334 return s, nil
335 case '\'':
336 r, size := utf8.DecodeRuneInString(s)
337 if size == len(s) && (r != utf8.RuneError || size != 1) {
338 return s, nil
343 var runeTmp [utf8.UTFMax]byte
344 buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
345 for len(s) > 0 {
346 c, multibyte, ss, err := UnquoteChar(s, quote)
347 if err != nil {
348 return "", err
350 s = ss
351 if c < utf8.RuneSelf || !multibyte {
352 buf = append(buf, byte(c))
353 } else {
354 n := utf8.EncodeRune(runeTmp[:], c)
355 buf = append(buf, runeTmp[:n]...)
357 if quote == '\'' && len(s) != 0 {
358 // single-quoted must be single character
359 return "", ErrSyntax
362 return string(buf), nil
365 // contains reports whether the string contains the byte c.
366 func contains(s string, c byte) bool {
367 for i := 0; i < len(s); i++ {
368 if s[i] == c {
369 return true
372 return false
375 // bsearch16 returns the smallest i such that a[i] >= x.
376 // If there is no such i, bsearch16 returns len(a).
377 func bsearch16(a []uint16, x uint16) int {
378 i, j := 0, len(a)
379 for i < j {
380 h := i + (j-i)/2
381 if a[h] < x {
382 i = h + 1
383 } else {
384 j = h
387 return i
390 // bsearch32 returns the smallest i such that a[i] >= x.
391 // If there is no such i, bsearch32 returns len(a).
392 func bsearch32(a []uint32, x uint32) int {
393 i, j := 0, len(a)
394 for i < j {
395 h := i + (j-i)/2
396 if a[h] < x {
397 i = h + 1
398 } else {
399 j = h
402 return i
405 // TODO: IsPrint is a local implementation of unicode.IsPrint, verified by the tests
406 // to give the same answer. It allows this package not to depend on unicode,
407 // and therefore not pull in all the Unicode tables. If the linker were better
408 // at tossing unused tables, we could get rid of this implementation.
409 // That would be nice.
411 // IsPrint reports whether the rune is defined as printable by Go, with
412 // the same definition as unicode.IsPrint: letters, numbers, punctuation,
413 // symbols and ASCII space.
414 func IsPrint(r rune) bool {
415 // Fast check for Latin-1
416 if r <= 0xFF {
417 if 0x20 <= r && r <= 0x7E {
418 // All the ASCII is printable from space through DEL-1.
419 return true
421 if 0xA1 <= r && r <= 0xFF {
422 // Similarly for ¡ through ÿ...
423 return r != 0xAD // ...except for the bizarre soft hyphen.
425 return false
428 // Same algorithm, either on uint16 or uint32 value.
429 // First, find first i such that isPrint[i] >= x.
430 // This is the index of either the start or end of a pair that might span x.
431 // The start is even (isPrint[i&^1]) and the end is odd (isPrint[i|1]).
432 // If we find x in a range, make sure x is not in isNotPrint list.
434 if 0 <= r && r < 1<<16 {
435 rr, isPrint, isNotPrint := uint16(r), isPrint16, isNotPrint16
436 i := bsearch16(isPrint, rr)
437 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
438 return false
440 j := bsearch16(isNotPrint, rr)
441 return j >= len(isNotPrint) || isNotPrint[j] != rr
444 rr, isPrint, isNotPrint := uint32(r), isPrint32, isNotPrint32
445 i := bsearch32(isPrint, rr)
446 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
447 return false
449 if r >= 0x20000 {
450 return true
452 r -= 0x10000
453 j := bsearch16(isNotPrint, uint16(r))
454 return j >= len(isNotPrint) || isNotPrint[j] != uint16(r)