2014-04-11 Marc Glisse <marc.glisse@inria.fr>
[official-gcc.git] / libgo / go / strconv / quote.go
blob7d6cdcf0b54985ce3133556be18500bab4db8da3
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 package strconv
7 import (
8 "unicode/utf8"
11 const lowerhex = "0123456789abcdef"
13 func quoteWith(s string, quote byte, ASCIIonly bool) string {
14 var runeTmp [utf8.UTFMax]byte
15 buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
16 buf = append(buf, quote)
17 for width := 0; len(s) > 0; s = s[width:] {
18 r := rune(s[0])
19 width = 1
20 if r >= utf8.RuneSelf {
21 r, width = utf8.DecodeRuneInString(s)
23 if width == 1 && r == utf8.RuneError {
24 buf = append(buf, `\x`...)
25 buf = append(buf, lowerhex[s[0]>>4])
26 buf = append(buf, lowerhex[s[0]&0xF])
27 continue
29 if r == rune(quote) || r == '\\' { // always backslashed
30 buf = append(buf, '\\')
31 buf = append(buf, byte(r))
32 continue
34 if ASCIIonly {
35 if r < utf8.RuneSelf && IsPrint(r) {
36 buf = append(buf, byte(r))
37 continue
39 } else if IsPrint(r) {
40 n := utf8.EncodeRune(runeTmp[:], r)
41 buf = append(buf, runeTmp[:n]...)
42 continue
44 switch r {
45 case '\a':
46 buf = append(buf, `\a`...)
47 case '\b':
48 buf = append(buf, `\b`...)
49 case '\f':
50 buf = append(buf, `\f`...)
51 case '\n':
52 buf = append(buf, `\n`...)
53 case '\r':
54 buf = append(buf, `\r`...)
55 case '\t':
56 buf = append(buf, `\t`...)
57 case '\v':
58 buf = append(buf, `\v`...)
59 default:
60 switch {
61 case r < ' ':
62 buf = append(buf, `\x`...)
63 buf = append(buf, lowerhex[s[0]>>4])
64 buf = append(buf, lowerhex[s[0]&0xF])
65 case r > utf8.MaxRune:
66 r = 0xFFFD
67 fallthrough
68 case r < 0x10000:
69 buf = append(buf, `\u`...)
70 for s := 12; s >= 0; s -= 4 {
71 buf = append(buf, lowerhex[r>>uint(s)&0xF])
73 default:
74 buf = append(buf, `\U`...)
75 for s := 28; s >= 0; s -= 4 {
76 buf = append(buf, lowerhex[r>>uint(s)&0xF])
81 buf = append(buf, quote)
82 return string(buf)
86 // Quote returns a double-quoted Go string literal representing s. The
87 // returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
88 // control characters and non-printable characters as defined by
89 // IsPrint.
90 func Quote(s string) string {
91 return quoteWith(s, '"', false)
94 // AppendQuote appends a double-quoted Go string literal representing s,
95 // as generated by Quote, to dst and returns the extended buffer.
96 func AppendQuote(dst []byte, s string) []byte {
97 return append(dst, Quote(s)...)
100 // QuoteToASCII returns a double-quoted Go string literal representing s.
101 // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
102 // non-ASCII characters and non-printable characters as defined by IsPrint.
103 func QuoteToASCII(s string) string {
104 return quoteWith(s, '"', true)
107 // AppendQuoteToASCII appends a double-quoted Go string literal representing s,
108 // as generated by QuoteToASCII, to dst and returns the extended buffer.
109 func AppendQuoteToASCII(dst []byte, s string) []byte {
110 return append(dst, QuoteToASCII(s)...)
113 // QuoteRune returns a single-quoted Go character literal representing the
114 // rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100)
115 // for control characters and non-printable characters as defined by IsPrint.
116 func QuoteRune(r rune) string {
117 // TODO: avoid the allocation here.
118 return quoteWith(string(r), '\'', false)
121 // AppendQuoteRune appends a single-quoted Go character literal representing the rune,
122 // as generated by QuoteRune, to dst and returns the extended buffer.
123 func AppendQuoteRune(dst []byte, r rune) []byte {
124 return append(dst, QuoteRune(r)...)
127 // QuoteRuneToASCII returns a single-quoted Go character literal representing
128 // the rune. The returned string uses Go escape sequences (\t, \n, \xFF,
129 // \u0100) for non-ASCII characters and non-printable characters as defined
130 // by IsPrint.
131 func QuoteRuneToASCII(r rune) string {
132 // TODO: avoid the allocation here.
133 return quoteWith(string(r), '\'', true)
136 // AppendQuoteRuneToASCII appends a single-quoted Go character literal representing the rune,
137 // as generated by QuoteRuneToASCII, to dst and returns the extended buffer.
138 func AppendQuoteRuneToASCII(dst []byte, r rune) []byte {
139 return append(dst, QuoteRuneToASCII(r)...)
142 // CanBackquote reports whether the string s can be represented
143 // unchanged as a single-line backquoted string without control
144 // characters other than space and tab.
145 func CanBackquote(s string) bool {
146 for i := 0; i < len(s); i++ {
147 if (s[i] < ' ' && s[i] != '\t') || s[i] == '`' {
148 return false
151 return true
154 func unhex(b byte) (v rune, ok bool) {
155 c := rune(b)
156 switch {
157 case '0' <= c && c <= '9':
158 return c - '0', true
159 case 'a' <= c && c <= 'f':
160 return c - 'a' + 10, true
161 case 'A' <= c && c <= 'F':
162 return c - 'A' + 10, true
164 return
167 // UnquoteChar decodes the first character or byte in the escaped string
168 // or character literal represented by the string s.
169 // It returns four values:
171 // 1) value, the decoded Unicode code point or byte value;
172 // 2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation;
173 // 3) tail, the remainder of the string after the character; and
174 // 4) an error that will be nil if the character is syntactically valid.
176 // The second argument, quote, specifies the type of literal being parsed
177 // and therefore which escaped quote character is permitted.
178 // If set to a single quote, it permits the sequence \' and disallows unescaped '.
179 // If set to a double quote, it permits \" and disallows unescaped ".
180 // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped.
181 func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) {
182 // easy cases
183 switch c := s[0]; {
184 case c == quote && (quote == '\'' || quote == '"'):
185 err = ErrSyntax
186 return
187 case c >= utf8.RuneSelf:
188 r, size := utf8.DecodeRuneInString(s)
189 return r, true, s[size:], nil
190 case c != '\\':
191 return rune(s[0]), false, s[1:], nil
194 // hard case: c is backslash
195 if len(s) <= 1 {
196 err = ErrSyntax
197 return
199 c := s[1]
200 s = s[2:]
202 switch c {
203 case 'a':
204 value = '\a'
205 case 'b':
206 value = '\b'
207 case 'f':
208 value = '\f'
209 case 'n':
210 value = '\n'
211 case 'r':
212 value = '\r'
213 case 't':
214 value = '\t'
215 case 'v':
216 value = '\v'
217 case 'x', 'u', 'U':
218 n := 0
219 switch c {
220 case 'x':
221 n = 2
222 case 'u':
223 n = 4
224 case 'U':
225 n = 8
227 var v rune
228 if len(s) < n {
229 err = ErrSyntax
230 return
232 for j := 0; j < n; j++ {
233 x, ok := unhex(s[j])
234 if !ok {
235 err = ErrSyntax
236 return
238 v = v<<4 | x
240 s = s[n:]
241 if c == 'x' {
242 // single-byte string, possibly not UTF-8
243 value = v
244 break
246 if v > utf8.MaxRune {
247 err = ErrSyntax
248 return
250 value = v
251 multibyte = true
252 case '0', '1', '2', '3', '4', '5', '6', '7':
253 v := rune(c) - '0'
254 if len(s) < 2 {
255 err = ErrSyntax
256 return
258 for j := 0; j < 2; j++ { // one digit already; two more
259 x := rune(s[j]) - '0'
260 if x < 0 || x > 7 {
261 err = ErrSyntax
262 return
264 v = (v << 3) | x
266 s = s[2:]
267 if v > 255 {
268 err = ErrSyntax
269 return
271 value = v
272 case '\\':
273 value = '\\'
274 case '\'', '"':
275 if c != quote {
276 err = ErrSyntax
277 return
279 value = rune(c)
280 default:
281 err = ErrSyntax
282 return
284 tail = s
285 return
288 // Unquote interprets s as a single-quoted, double-quoted,
289 // or backquoted Go string literal, returning the string value
290 // that s quotes. (If s is single-quoted, it would be a Go
291 // character literal; Unquote returns the corresponding
292 // one-character string.)
293 func Unquote(s string) (t string, err error) {
294 n := len(s)
295 if n < 2 {
296 return "", ErrSyntax
298 quote := s[0]
299 if quote != s[n-1] {
300 return "", ErrSyntax
302 s = s[1 : n-1]
304 if quote == '`' {
305 if contains(s, '`') {
306 return "", ErrSyntax
308 return s, nil
310 if quote != '"' && quote != '\'' {
311 return "", ErrSyntax
313 if contains(s, '\n') {
314 return "", ErrSyntax
317 // Is it trivial? Avoid allocation.
318 if !contains(s, '\\') && !contains(s, quote) {
319 switch quote {
320 case '"':
321 return s, nil
322 case '\'':
323 r, size := utf8.DecodeRuneInString(s)
324 if size == len(s) && (r != utf8.RuneError || size != 1) {
325 return s, nil
330 var runeTmp [utf8.UTFMax]byte
331 buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
332 for len(s) > 0 {
333 c, multibyte, ss, err := UnquoteChar(s, quote)
334 if err != nil {
335 return "", err
337 s = ss
338 if c < utf8.RuneSelf || !multibyte {
339 buf = append(buf, byte(c))
340 } else {
341 n := utf8.EncodeRune(runeTmp[:], c)
342 buf = append(buf, runeTmp[:n]...)
344 if quote == '\'' && len(s) != 0 {
345 // single-quoted must be single character
346 return "", ErrSyntax
349 return string(buf), nil
352 // contains reports whether the string contains the byte c.
353 func contains(s string, c byte) bool {
354 for i := 0; i < len(s); i++ {
355 if s[i] == c {
356 return true
359 return false
362 // bsearch16 returns the smallest i such that a[i] >= x.
363 // If there is no such i, bsearch16 returns len(a).
364 func bsearch16(a []uint16, x uint16) int {
365 i, j := 0, len(a)
366 for i < j {
367 h := i + (j-i)/2
368 if a[h] < x {
369 i = h + 1
370 } else {
371 j = h
374 return i
377 // bsearch32 returns the smallest i such that a[i] >= x.
378 // If there is no such i, bsearch32 returns len(a).
379 func bsearch32(a []uint32, x uint32) int {
380 i, j := 0, len(a)
381 for i < j {
382 h := i + (j-i)/2
383 if a[h] < x {
384 i = h + 1
385 } else {
386 j = h
389 return i
392 // TODO: IsPrint is a local implementation of unicode.IsPrint, verified by the tests
393 // to give the same answer. It allows this package not to depend on unicode,
394 // and therefore not pull in all the Unicode tables. If the linker were better
395 // at tossing unused tables, we could get rid of this implementation.
396 // That would be nice.
398 // IsPrint reports whether the rune is defined as printable by Go, with
399 // the same definition as unicode.IsPrint: letters, numbers, punctuation,
400 // symbols and ASCII space.
401 func IsPrint(r rune) bool {
402 // Fast check for Latin-1
403 if r <= 0xFF {
404 if 0x20 <= r && r <= 0x7E {
405 // All the ASCII is printable from space through DEL-1.
406 return true
408 if 0xA1 <= r && r <= 0xFF {
409 // Similarly for ¡ through ÿ...
410 return r != 0xAD // ...except for the bizarre soft hyphen.
412 return false
415 // Same algorithm, either on uint16 or uint32 value.
416 // First, find first i such that isPrint[i] >= x.
417 // This is the index of either the start or end of a pair that might span x.
418 // The start is even (isPrint[i&^1]) and the end is odd (isPrint[i|1]).
419 // If we find x in a range, make sure x is not in isNotPrint list.
421 if 0 <= r && r < 1<<16 {
422 rr, isPrint, isNotPrint := uint16(r), isPrint16, isNotPrint16
423 i := bsearch16(isPrint, rr)
424 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
425 return false
427 j := bsearch16(isNotPrint, rr)
428 return j >= len(isNotPrint) || isNotPrint[j] != rr
431 rr, isPrint, isNotPrint := uint32(r), isPrint32, isNotPrint32
432 i := bsearch32(isPrint, rr)
433 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
434 return false
436 if r >= 0x20000 {
437 return true
439 r -= 0x10000
440 j := bsearch16(isNotPrint, uint16(r))
441 return j >= len(isNotPrint) || isNotPrint[j] != uint16(r)