libgo: update to Go 1.11
[official-gcc.git] / libgo / go / strconv / quote.go
blob9b7194a0f041d456de6d51c6b16174f9b6062df8
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 //go:generate go run makeisprint.go -output isprint.go
7 package strconv
9 import "unicode/utf8"
11 const lowerhex = "0123456789abcdef"
13 func quoteWith(s string, quote byte, ASCIIonly, graphicOnly bool) string {
14 return string(appendQuotedWith(make([]byte, 0, 3*len(s)/2), s, quote, ASCIIonly, graphicOnly))
17 func quoteRuneWith(r rune, quote byte, ASCIIonly, graphicOnly bool) string {
18 return string(appendQuotedRuneWith(nil, r, quote, ASCIIonly, graphicOnly))
21 func appendQuotedWith(buf []byte, s string, quote byte, ASCIIonly, graphicOnly bool) []byte {
22 buf = append(buf, quote)
23 for width := 0; len(s) > 0; s = s[width:] {
24 r := rune(s[0])
25 width = 1
26 if r >= utf8.RuneSelf {
27 r, width = utf8.DecodeRuneInString(s)
29 if width == 1 && r == utf8.RuneError {
30 buf = append(buf, `\x`...)
31 buf = append(buf, lowerhex[s[0]>>4])
32 buf = append(buf, lowerhex[s[0]&0xF])
33 continue
35 buf = appendEscapedRune(buf, r, quote, ASCIIonly, graphicOnly)
37 buf = append(buf, quote)
38 return buf
41 func appendQuotedRuneWith(buf []byte, r rune, quote byte, ASCIIonly, graphicOnly bool) []byte {
42 buf = append(buf, quote)
43 if !utf8.ValidRune(r) {
44 r = utf8.RuneError
46 buf = appendEscapedRune(buf, r, quote, ASCIIonly, graphicOnly)
47 buf = append(buf, quote)
48 return buf
51 func appendEscapedRune(buf []byte, r rune, quote byte, ASCIIonly, graphicOnly bool) []byte {
52 var runeTmp [utf8.UTFMax]byte
53 if r == rune(quote) || r == '\\' { // always backslashed
54 buf = append(buf, '\\')
55 buf = append(buf, byte(r))
56 return buf
58 if ASCIIonly {
59 if r < utf8.RuneSelf && IsPrint(r) {
60 buf = append(buf, byte(r))
61 return buf
63 } else if IsPrint(r) || graphicOnly && isInGraphicList(r) {
64 n := utf8.EncodeRune(runeTmp[:], r)
65 buf = append(buf, runeTmp[:n]...)
66 return buf
68 switch r {
69 case '\a':
70 buf = append(buf, `\a`...)
71 case '\b':
72 buf = append(buf, `\b`...)
73 case '\f':
74 buf = append(buf, `\f`...)
75 case '\n':
76 buf = append(buf, `\n`...)
77 case '\r':
78 buf = append(buf, `\r`...)
79 case '\t':
80 buf = append(buf, `\t`...)
81 case '\v':
82 buf = append(buf, `\v`...)
83 default:
84 switch {
85 case r < ' ':
86 buf = append(buf, `\x`...)
87 buf = append(buf, lowerhex[byte(r)>>4])
88 buf = append(buf, lowerhex[byte(r)&0xF])
89 case r > utf8.MaxRune:
90 r = 0xFFFD
91 fallthrough
92 case r < 0x10000:
93 buf = append(buf, `\u`...)
94 for s := 12; s >= 0; s -= 4 {
95 buf = append(buf, lowerhex[r>>uint(s)&0xF])
97 default:
98 buf = append(buf, `\U`...)
99 for s := 28; s >= 0; s -= 4 {
100 buf = append(buf, lowerhex[r>>uint(s)&0xF])
104 return buf
107 // Quote returns a double-quoted Go string literal representing s. The
108 // returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
109 // control characters and non-printable characters as defined by
110 // IsPrint.
111 func Quote(s string) string {
112 return quoteWith(s, '"', false, false)
115 // AppendQuote appends a double-quoted Go string literal representing s,
116 // as generated by Quote, to dst and returns the extended buffer.
117 func AppendQuote(dst []byte, s string) []byte {
118 return appendQuotedWith(dst, s, '"', false, false)
121 // QuoteToASCII returns a double-quoted Go string literal representing s.
122 // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
123 // non-ASCII characters and non-printable characters as defined by IsPrint.
124 func QuoteToASCII(s string) string {
125 return quoteWith(s, '"', true, false)
128 // AppendQuoteToASCII appends a double-quoted Go string literal representing s,
129 // as generated by QuoteToASCII, to dst and returns the extended buffer.
130 func AppendQuoteToASCII(dst []byte, s string) []byte {
131 return appendQuotedWith(dst, s, '"', true, false)
134 // QuoteToGraphic returns a double-quoted Go string literal representing s.
135 // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
136 // non-ASCII characters and non-printable characters as defined by IsGraphic.
137 func QuoteToGraphic(s string) string {
138 return quoteWith(s, '"', false, true)
141 // AppendQuoteToGraphic appends a double-quoted Go string literal representing s,
142 // as generated by QuoteToGraphic, to dst and returns the extended buffer.
143 func AppendQuoteToGraphic(dst []byte, s string) []byte {
144 return appendQuotedWith(dst, s, '"', false, true)
147 // QuoteRune returns a single-quoted Go character literal representing the
148 // rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100)
149 // for control characters and non-printable characters as defined by IsPrint.
150 func QuoteRune(r rune) string {
151 return quoteRuneWith(r, '\'', false, false)
154 // AppendQuoteRune appends a single-quoted Go character literal representing the rune,
155 // as generated by QuoteRune, to dst and returns the extended buffer.
156 func AppendQuoteRune(dst []byte, r rune) []byte {
157 return appendQuotedRuneWith(dst, r, '\'', false, false)
160 // QuoteRuneToASCII returns a single-quoted Go character literal representing
161 // the rune. The returned string uses Go escape sequences (\t, \n, \xFF,
162 // \u0100) for non-ASCII characters and non-printable characters as defined
163 // by IsPrint.
164 func QuoteRuneToASCII(r rune) string {
165 return quoteRuneWith(r, '\'', true, false)
168 // AppendQuoteRuneToASCII appends a single-quoted Go character literal representing the rune,
169 // as generated by QuoteRuneToASCII, to dst and returns the extended buffer.
170 func AppendQuoteRuneToASCII(dst []byte, r rune) []byte {
171 return appendQuotedRuneWith(dst, r, '\'', true, false)
174 // QuoteRuneToGraphic returns a single-quoted Go character literal representing
175 // the rune. The returned string uses Go escape sequences (\t, \n, \xFF,
176 // \u0100) for non-ASCII characters and non-printable characters as defined
177 // by IsGraphic.
178 func QuoteRuneToGraphic(r rune) string {
179 return quoteRuneWith(r, '\'', false, true)
182 // AppendQuoteRuneToGraphic appends a single-quoted Go character literal representing the rune,
183 // as generated by QuoteRuneToGraphic, to dst and returns the extended buffer.
184 func AppendQuoteRuneToGraphic(dst []byte, r rune) []byte {
185 return appendQuotedRuneWith(dst, r, '\'', false, true)
188 // CanBackquote reports whether the string s can be represented
189 // unchanged as a single-line backquoted string without control
190 // characters other than tab.
191 func CanBackquote(s string) bool {
192 for len(s) > 0 {
193 r, wid := utf8.DecodeRuneInString(s)
194 s = s[wid:]
195 if wid > 1 {
196 if r == '\ufeff' {
197 return false // BOMs are invisible and should not be quoted.
199 continue // All other multibyte runes are correctly encoded and assumed printable.
201 if r == utf8.RuneError {
202 return false
204 if (r < ' ' && r != '\t') || r == '`' || r == '\u007F' {
205 return false
208 return true
211 func unhex(b byte) (v rune, ok bool) {
212 c := rune(b)
213 switch {
214 case '0' <= c && c <= '9':
215 return c - '0', true
216 case 'a' <= c && c <= 'f':
217 return c - 'a' + 10, true
218 case 'A' <= c && c <= 'F':
219 return c - 'A' + 10, true
221 return
224 // UnquoteChar decodes the first character or byte in the escaped string
225 // or character literal represented by the string s.
226 // It returns four values:
228 // 1) value, the decoded Unicode code point or byte value;
229 // 2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation;
230 // 3) tail, the remainder of the string after the character; and
231 // 4) an error that will be nil if the character is syntactically valid.
233 // The second argument, quote, specifies the type of literal being parsed
234 // and therefore which escaped quote character is permitted.
235 // If set to a single quote, it permits the sequence \' and disallows unescaped '.
236 // If set to a double quote, it permits \" and disallows unescaped ".
237 // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped.
238 func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) {
239 // easy cases
240 if len(s) == 0 {
241 err = ErrSyntax
242 return
244 switch c := s[0]; {
245 case c == quote && (quote == '\'' || quote == '"'):
246 err = ErrSyntax
247 return
248 case c >= utf8.RuneSelf:
249 r, size := utf8.DecodeRuneInString(s)
250 return r, true, s[size:], nil
251 case c != '\\':
252 return rune(s[0]), false, s[1:], nil
255 // hard case: c is backslash
256 if len(s) <= 1 {
257 err = ErrSyntax
258 return
260 c := s[1]
261 s = s[2:]
263 switch c {
264 case 'a':
265 value = '\a'
266 case 'b':
267 value = '\b'
268 case 'f':
269 value = '\f'
270 case 'n':
271 value = '\n'
272 case 'r':
273 value = '\r'
274 case 't':
275 value = '\t'
276 case 'v':
277 value = '\v'
278 case 'x', 'u', 'U':
279 n := 0
280 switch c {
281 case 'x':
282 n = 2
283 case 'u':
284 n = 4
285 case 'U':
286 n = 8
288 var v rune
289 if len(s) < n {
290 err = ErrSyntax
291 return
293 for j := 0; j < n; j++ {
294 x, ok := unhex(s[j])
295 if !ok {
296 err = ErrSyntax
297 return
299 v = v<<4 | x
301 s = s[n:]
302 if c == 'x' {
303 // single-byte string, possibly not UTF-8
304 value = v
305 break
307 if v > utf8.MaxRune {
308 err = ErrSyntax
309 return
311 value = v
312 multibyte = true
313 case '0', '1', '2', '3', '4', '5', '6', '7':
314 v := rune(c) - '0'
315 if len(s) < 2 {
316 err = ErrSyntax
317 return
319 for j := 0; j < 2; j++ { // one digit already; two more
320 x := rune(s[j]) - '0'
321 if x < 0 || x > 7 {
322 err = ErrSyntax
323 return
325 v = (v << 3) | x
327 s = s[2:]
328 if v > 255 {
329 err = ErrSyntax
330 return
332 value = v
333 case '\\':
334 value = '\\'
335 case '\'', '"':
336 if c != quote {
337 err = ErrSyntax
338 return
340 value = rune(c)
341 default:
342 err = ErrSyntax
343 return
345 tail = s
346 return
349 // Unquote interprets s as a single-quoted, double-quoted,
350 // or backquoted Go string literal, returning the string value
351 // that s quotes. (If s is single-quoted, it would be a Go
352 // character literal; Unquote returns the corresponding
353 // one-character string.)
354 func Unquote(s string) (string, error) {
355 n := len(s)
356 if n < 2 {
357 return "", ErrSyntax
359 quote := s[0]
360 if quote != s[n-1] {
361 return "", ErrSyntax
363 s = s[1 : n-1]
365 if quote == '`' {
366 if contains(s, '`') {
367 return "", ErrSyntax
369 if contains(s, '\r') {
370 // -1 because we know there is at least one \r to remove.
371 buf := make([]byte, 0, len(s)-1)
372 for i := 0; i < len(s); i++ {
373 if s[i] != '\r' {
374 buf = append(buf, s[i])
377 return string(buf), nil
379 return s, nil
381 if quote != '"' && quote != '\'' {
382 return "", ErrSyntax
384 if contains(s, '\n') {
385 return "", ErrSyntax
388 // Is it trivial? Avoid allocation.
389 if !contains(s, '\\') && !contains(s, quote) {
390 switch quote {
391 case '"':
392 if utf8.ValidString(s) {
393 return s, nil
395 case '\'':
396 r, size := utf8.DecodeRuneInString(s)
397 if size == len(s) && (r != utf8.RuneError || size != 1) {
398 return s, nil
403 var runeTmp [utf8.UTFMax]byte
404 buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
405 for len(s) > 0 {
406 c, multibyte, ss, err := UnquoteChar(s, quote)
407 if err != nil {
408 return "", err
410 s = ss
411 if c < utf8.RuneSelf || !multibyte {
412 buf = append(buf, byte(c))
413 } else {
414 n := utf8.EncodeRune(runeTmp[:], c)
415 buf = append(buf, runeTmp[:n]...)
417 if quote == '\'' && len(s) != 0 {
418 // single-quoted must be single character
419 return "", ErrSyntax
422 return string(buf), nil
425 // contains reports whether the string contains the byte c.
426 func contains(s string, c byte) bool {
427 for i := 0; i < len(s); i++ {
428 if s[i] == c {
429 return true
432 return false
435 // bsearch16 returns the smallest i such that a[i] >= x.
436 // If there is no such i, bsearch16 returns len(a).
437 func bsearch16(a []uint16, x uint16) int {
438 i, j := 0, len(a)
439 for i < j {
440 h := i + (j-i)/2
441 if a[h] < x {
442 i = h + 1
443 } else {
444 j = h
447 return i
450 // bsearch32 returns the smallest i such that a[i] >= x.
451 // If there is no such i, bsearch32 returns len(a).
452 func bsearch32(a []uint32, x uint32) int {
453 i, j := 0, len(a)
454 for i < j {
455 h := i + (j-i)/2
456 if a[h] < x {
457 i = h + 1
458 } else {
459 j = h
462 return i
465 // TODO: IsPrint is a local implementation of unicode.IsPrint, verified by the tests
466 // to give the same answer. It allows this package not to depend on unicode,
467 // and therefore not pull in all the Unicode tables. If the linker were better
468 // at tossing unused tables, we could get rid of this implementation.
469 // That would be nice.
471 // IsPrint reports whether the rune is defined as printable by Go, with
472 // the same definition as unicode.IsPrint: letters, numbers, punctuation,
473 // symbols and ASCII space.
474 func IsPrint(r rune) bool {
475 // Fast check for Latin-1
476 if r <= 0xFF {
477 if 0x20 <= r && r <= 0x7E {
478 // All the ASCII is printable from space through DEL-1.
479 return true
481 if 0xA1 <= r && r <= 0xFF {
482 // Similarly for ¡ through ÿ...
483 return r != 0xAD // ...except for the bizarre soft hyphen.
485 return false
488 // Same algorithm, either on uint16 or uint32 value.
489 // First, find first i such that isPrint[i] >= x.
490 // This is the index of either the start or end of a pair that might span x.
491 // The start is even (isPrint[i&^1]) and the end is odd (isPrint[i|1]).
492 // If we find x in a range, make sure x is not in isNotPrint list.
494 if 0 <= r && r < 1<<16 {
495 rr, isPrint, isNotPrint := uint16(r), isPrint16, isNotPrint16
496 i := bsearch16(isPrint, rr)
497 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
498 return false
500 j := bsearch16(isNotPrint, rr)
501 return j >= len(isNotPrint) || isNotPrint[j] != rr
504 rr, isPrint, isNotPrint := uint32(r), isPrint32, isNotPrint32
505 i := bsearch32(isPrint, rr)
506 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
507 return false
509 if r >= 0x20000 {
510 return true
512 r -= 0x10000
513 j := bsearch16(isNotPrint, uint16(r))
514 return j >= len(isNotPrint) || isNotPrint[j] != uint16(r)
517 // IsGraphic reports whether the rune is defined as a Graphic by Unicode. Such
518 // characters include letters, marks, numbers, punctuation, symbols, and
519 // spaces, from categories L, M, N, P, S, and Zs.
520 func IsGraphic(r rune) bool {
521 if IsPrint(r) {
522 return true
524 return isInGraphicList(r)
527 // isInGraphicList reports whether the rune is in the isGraphic list. This separation
528 // from IsGraphic allows quoteWith to avoid two calls to IsPrint.
529 // Should be called only if IsPrint fails.
530 func isInGraphicList(r rune) bool {
531 // We know r must fit in 16 bits - see makeisprint.go.
532 if r > 0xFFFF {
533 return false
535 rr := uint16(r)
536 i := bsearch16(isGraphic, rr)
537 return i < len(isGraphic) && rr == isGraphic[i]