PR c++/86342 - -Wdeprecated-copy and system headers.
[official-gcc.git] / libgo / go / strconv / quote.go
blob156a510d213b274cb6487e62c6b3ba9b4243db42
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 //go:generate go run makeisprint.go -output isprint.go
7 package strconv
9 import "unicode/utf8"
11 const lowerhex = "0123456789abcdef"
13 func quoteWith(s string, quote byte, ASCIIonly, graphicOnly bool) string {
14 return string(appendQuotedWith(make([]byte, 0, 3*len(s)/2), s, quote, ASCIIonly, graphicOnly))
17 func quoteRuneWith(r rune, quote byte, ASCIIonly, graphicOnly bool) string {
18 return string(appendQuotedRuneWith(nil, r, quote, ASCIIonly, graphicOnly))
21 func appendQuotedWith(buf []byte, s string, quote byte, ASCIIonly, graphicOnly bool) []byte {
22 buf = append(buf, quote)
23 for width := 0; len(s) > 0; s = s[width:] {
24 r := rune(s[0])
25 width = 1
26 if r >= utf8.RuneSelf {
27 r, width = utf8.DecodeRuneInString(s)
29 if width == 1 && r == utf8.RuneError {
30 buf = append(buf, `\x`...)
31 buf = append(buf, lowerhex[s[0]>>4])
32 buf = append(buf, lowerhex[s[0]&0xF])
33 continue
35 buf = appendEscapedRune(buf, r, quote, ASCIIonly, graphicOnly)
37 buf = append(buf, quote)
38 return buf
41 func appendQuotedRuneWith(buf []byte, r rune, quote byte, ASCIIonly, graphicOnly bool) []byte {
42 buf = append(buf, quote)
43 if !utf8.ValidRune(r) {
44 r = utf8.RuneError
46 buf = appendEscapedRune(buf, r, quote, ASCIIonly, graphicOnly)
47 buf = append(buf, quote)
48 return buf
51 func appendEscapedRune(buf []byte, r rune, quote byte, ASCIIonly, graphicOnly bool) []byte {
52 var runeTmp [utf8.UTFMax]byte
53 if r == rune(quote) || r == '\\' { // always backslashed
54 buf = append(buf, '\\')
55 buf = append(buf, byte(r))
56 return buf
58 if ASCIIonly {
59 if r < utf8.RuneSelf && IsPrint(r) {
60 buf = append(buf, byte(r))
61 return buf
63 } else if IsPrint(r) || graphicOnly && isInGraphicList(r) {
64 n := utf8.EncodeRune(runeTmp[:], r)
65 buf = append(buf, runeTmp[:n]...)
66 return buf
68 switch r {
69 case '\a':
70 buf = append(buf, `\a`...)
71 case '\b':
72 buf = append(buf, `\b`...)
73 case '\f':
74 buf = append(buf, `\f`...)
75 case '\n':
76 buf = append(buf, `\n`...)
77 case '\r':
78 buf = append(buf, `\r`...)
79 case '\t':
80 buf = append(buf, `\t`...)
81 case '\v':
82 buf = append(buf, `\v`...)
83 default:
84 switch {
85 case r < ' ':
86 buf = append(buf, `\x`...)
87 buf = append(buf, lowerhex[byte(r)>>4])
88 buf = append(buf, lowerhex[byte(r)&0xF])
89 case r > utf8.MaxRune:
90 r = 0xFFFD
91 fallthrough
92 case r < 0x10000:
93 buf = append(buf, `\u`...)
94 for s := 12; s >= 0; s -= 4 {
95 buf = append(buf, lowerhex[r>>uint(s)&0xF])
97 default:
98 buf = append(buf, `\U`...)
99 for s := 28; s >= 0; s -= 4 {
100 buf = append(buf, lowerhex[r>>uint(s)&0xF])
104 return buf
107 // Quote returns a double-quoted Go string literal representing s. The
108 // returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
109 // control characters and non-printable characters as defined by
110 // IsPrint.
111 func Quote(s string) string {
112 return quoteWith(s, '"', false, false)
115 // AppendQuote appends a double-quoted Go string literal representing s,
116 // as generated by Quote, to dst and returns the extended buffer.
117 func AppendQuote(dst []byte, s string) []byte {
118 return appendQuotedWith(dst, s, '"', false, false)
121 // QuoteToASCII returns a double-quoted Go string literal representing s.
122 // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
123 // non-ASCII characters and non-printable characters as defined by IsPrint.
124 func QuoteToASCII(s string) string {
125 return quoteWith(s, '"', true, false)
128 // AppendQuoteToASCII appends a double-quoted Go string literal representing s,
129 // as generated by QuoteToASCII, to dst and returns the extended buffer.
130 func AppendQuoteToASCII(dst []byte, s string) []byte {
131 return appendQuotedWith(dst, s, '"', true, false)
134 // QuoteToGraphic returns a double-quoted Go string literal representing s.
135 // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
136 // non-ASCII characters and non-printable characters as defined by IsGraphic.
137 func QuoteToGraphic(s string) string {
138 return quoteWith(s, '"', false, true)
141 // AppendQuoteToGraphic appends a double-quoted Go string literal representing s,
142 // as generated by QuoteToGraphic, to dst and returns the extended buffer.
143 func AppendQuoteToGraphic(dst []byte, s string) []byte {
144 return appendQuotedWith(dst, s, '"', false, true)
147 // QuoteRune returns a single-quoted Go character literal representing the
148 // rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100)
149 // for control characters and non-printable characters as defined by IsPrint.
150 func QuoteRune(r rune) string {
151 return quoteRuneWith(r, '\'', false, false)
154 // AppendQuoteRune appends a single-quoted Go character literal representing the rune,
155 // as generated by QuoteRune, to dst and returns the extended buffer.
156 func AppendQuoteRune(dst []byte, r rune) []byte {
157 return appendQuotedRuneWith(dst, r, '\'', false, false)
160 // QuoteRuneToASCII returns a single-quoted Go character literal representing
161 // the rune. The returned string uses Go escape sequences (\t, \n, \xFF,
162 // \u0100) for non-ASCII characters and non-printable characters as defined
163 // by IsPrint.
164 func QuoteRuneToASCII(r rune) string {
165 return quoteRuneWith(r, '\'', true, false)
168 // AppendQuoteRuneToASCII appends a single-quoted Go character literal representing the rune,
169 // as generated by QuoteRuneToASCII, to dst and returns the extended buffer.
170 func AppendQuoteRuneToASCII(dst []byte, r rune) []byte {
171 return appendQuotedRuneWith(dst, r, '\'', true, false)
174 // QuoteRuneToGraphic returns a single-quoted Go character literal representing
175 // the rune. The returned string uses Go escape sequences (\t, \n, \xFF,
176 // \u0100) for non-ASCII characters and non-printable characters as defined
177 // by IsGraphic.
178 func QuoteRuneToGraphic(r rune) string {
179 return quoteRuneWith(r, '\'', false, true)
182 // AppendQuoteRuneToGraphic appends a single-quoted Go character literal representing the rune,
183 // as generated by QuoteRuneToGraphic, to dst and returns the extended buffer.
184 func AppendQuoteRuneToGraphic(dst []byte, r rune) []byte {
185 return appendQuotedRuneWith(dst, r, '\'', false, true)
188 // CanBackquote reports whether the string s can be represented
189 // unchanged as a single-line backquoted string without control
190 // characters other than tab.
191 func CanBackquote(s string) bool {
192 for len(s) > 0 {
193 r, wid := utf8.DecodeRuneInString(s)
194 s = s[wid:]
195 if wid > 1 {
196 if r == '\ufeff' {
197 return false // BOMs are invisible and should not be quoted.
199 continue // All other multibyte runes are correctly encoded and assumed printable.
201 if r == utf8.RuneError {
202 return false
204 if (r < ' ' && r != '\t') || r == '`' || r == '\u007F' {
205 return false
208 return true
211 func unhex(b byte) (v rune, ok bool) {
212 c := rune(b)
213 switch {
214 case '0' <= c && c <= '9':
215 return c - '0', true
216 case 'a' <= c && c <= 'f':
217 return c - 'a' + 10, true
218 case 'A' <= c && c <= 'F':
219 return c - 'A' + 10, true
221 return
224 // UnquoteChar decodes the first character or byte in the escaped string
225 // or character literal represented by the string s.
226 // It returns four values:
228 // 1) value, the decoded Unicode code point or byte value;
229 // 2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation;
230 // 3) tail, the remainder of the string after the character; and
231 // 4) an error that will be nil if the character is syntactically valid.
233 // The second argument, quote, specifies the type of literal being parsed
234 // and therefore which escaped quote character is permitted.
235 // If set to a single quote, it permits the sequence \' and disallows unescaped '.
236 // If set to a double quote, it permits \" and disallows unescaped ".
237 // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped.
238 func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) {
239 // easy cases
240 switch c := s[0]; {
241 case c == quote && (quote == '\'' || quote == '"'):
242 err = ErrSyntax
243 return
244 case c >= utf8.RuneSelf:
245 r, size := utf8.DecodeRuneInString(s)
246 return r, true, s[size:], nil
247 case c != '\\':
248 return rune(s[0]), false, s[1:], nil
251 // hard case: c is backslash
252 if len(s) <= 1 {
253 err = ErrSyntax
254 return
256 c := s[1]
257 s = s[2:]
259 switch c {
260 case 'a':
261 value = '\a'
262 case 'b':
263 value = '\b'
264 case 'f':
265 value = '\f'
266 case 'n':
267 value = '\n'
268 case 'r':
269 value = '\r'
270 case 't':
271 value = '\t'
272 case 'v':
273 value = '\v'
274 case 'x', 'u', 'U':
275 n := 0
276 switch c {
277 case 'x':
278 n = 2
279 case 'u':
280 n = 4
281 case 'U':
282 n = 8
284 var v rune
285 if len(s) < n {
286 err = ErrSyntax
287 return
289 for j := 0; j < n; j++ {
290 x, ok := unhex(s[j])
291 if !ok {
292 err = ErrSyntax
293 return
295 v = v<<4 | x
297 s = s[n:]
298 if c == 'x' {
299 // single-byte string, possibly not UTF-8
300 value = v
301 break
303 if v > utf8.MaxRune {
304 err = ErrSyntax
305 return
307 value = v
308 multibyte = true
309 case '0', '1', '2', '3', '4', '5', '6', '7':
310 v := rune(c) - '0'
311 if len(s) < 2 {
312 err = ErrSyntax
313 return
315 for j := 0; j < 2; j++ { // one digit already; two more
316 x := rune(s[j]) - '0'
317 if x < 0 || x > 7 {
318 err = ErrSyntax
319 return
321 v = (v << 3) | x
323 s = s[2:]
324 if v > 255 {
325 err = ErrSyntax
326 return
328 value = v
329 case '\\':
330 value = '\\'
331 case '\'', '"':
332 if c != quote {
333 err = ErrSyntax
334 return
336 value = rune(c)
337 default:
338 err = ErrSyntax
339 return
341 tail = s
342 return
345 // Unquote interprets s as a single-quoted, double-quoted,
346 // or backquoted Go string literal, returning the string value
347 // that s quotes. (If s is single-quoted, it would be a Go
348 // character literal; Unquote returns the corresponding
349 // one-character string.)
350 func Unquote(s string) (string, error) {
351 n := len(s)
352 if n < 2 {
353 return "", ErrSyntax
355 quote := s[0]
356 if quote != s[n-1] {
357 return "", ErrSyntax
359 s = s[1 : n-1]
361 if quote == '`' {
362 if contains(s, '`') {
363 return "", ErrSyntax
365 if contains(s, '\r') {
366 // -1 because we know there is at least one \r to remove.
367 buf := make([]byte, 0, len(s)-1)
368 for i := 0; i < len(s); i++ {
369 if s[i] != '\r' {
370 buf = append(buf, s[i])
373 return string(buf), nil
375 return s, nil
377 if quote != '"' && quote != '\'' {
378 return "", ErrSyntax
380 if contains(s, '\n') {
381 return "", ErrSyntax
384 // Is it trivial? Avoid allocation.
385 if !contains(s, '\\') && !contains(s, quote) {
386 switch quote {
387 case '"':
388 return s, nil
389 case '\'':
390 r, size := utf8.DecodeRuneInString(s)
391 if size == len(s) && (r != utf8.RuneError || size != 1) {
392 return s, nil
397 var runeTmp [utf8.UTFMax]byte
398 buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
399 for len(s) > 0 {
400 c, multibyte, ss, err := UnquoteChar(s, quote)
401 if err != nil {
402 return "", err
404 s = ss
405 if c < utf8.RuneSelf || !multibyte {
406 buf = append(buf, byte(c))
407 } else {
408 n := utf8.EncodeRune(runeTmp[:], c)
409 buf = append(buf, runeTmp[:n]...)
411 if quote == '\'' && len(s) != 0 {
412 // single-quoted must be single character
413 return "", ErrSyntax
416 return string(buf), nil
419 // contains reports whether the string contains the byte c.
420 func contains(s string, c byte) bool {
421 for i := 0; i < len(s); i++ {
422 if s[i] == c {
423 return true
426 return false
429 // bsearch16 returns the smallest i such that a[i] >= x.
430 // If there is no such i, bsearch16 returns len(a).
431 func bsearch16(a []uint16, x uint16) int {
432 i, j := 0, len(a)
433 for i < j {
434 h := i + (j-i)/2
435 if a[h] < x {
436 i = h + 1
437 } else {
438 j = h
441 return i
444 // bsearch32 returns the smallest i such that a[i] >= x.
445 // If there is no such i, bsearch32 returns len(a).
446 func bsearch32(a []uint32, x uint32) int {
447 i, j := 0, len(a)
448 for i < j {
449 h := i + (j-i)/2
450 if a[h] < x {
451 i = h + 1
452 } else {
453 j = h
456 return i
459 // TODO: IsPrint is a local implementation of unicode.IsPrint, verified by the tests
460 // to give the same answer. It allows this package not to depend on unicode,
461 // and therefore not pull in all the Unicode tables. If the linker were better
462 // at tossing unused tables, we could get rid of this implementation.
463 // That would be nice.
465 // IsPrint reports whether the rune is defined as printable by Go, with
466 // the same definition as unicode.IsPrint: letters, numbers, punctuation,
467 // symbols and ASCII space.
468 func IsPrint(r rune) bool {
469 // Fast check for Latin-1
470 if r <= 0xFF {
471 if 0x20 <= r && r <= 0x7E {
472 // All the ASCII is printable from space through DEL-1.
473 return true
475 if 0xA1 <= r && r <= 0xFF {
476 // Similarly for ¡ through ÿ...
477 return r != 0xAD // ...except for the bizarre soft hyphen.
479 return false
482 // Same algorithm, either on uint16 or uint32 value.
483 // First, find first i such that isPrint[i] >= x.
484 // This is the index of either the start or end of a pair that might span x.
485 // The start is even (isPrint[i&^1]) and the end is odd (isPrint[i|1]).
486 // If we find x in a range, make sure x is not in isNotPrint list.
488 if 0 <= r && r < 1<<16 {
489 rr, isPrint, isNotPrint := uint16(r), isPrint16, isNotPrint16
490 i := bsearch16(isPrint, rr)
491 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
492 return false
494 j := bsearch16(isNotPrint, rr)
495 return j >= len(isNotPrint) || isNotPrint[j] != rr
498 rr, isPrint, isNotPrint := uint32(r), isPrint32, isNotPrint32
499 i := bsearch32(isPrint, rr)
500 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
501 return false
503 if r >= 0x20000 {
504 return true
506 r -= 0x10000
507 j := bsearch16(isNotPrint, uint16(r))
508 return j >= len(isNotPrint) || isNotPrint[j] != uint16(r)
511 // IsGraphic reports whether the rune is defined as a Graphic by Unicode. Such
512 // characters include letters, marks, numbers, punctuation, symbols, and
513 // spaces, from categories L, M, N, P, S, and Zs.
514 func IsGraphic(r rune) bool {
515 if IsPrint(r) {
516 return true
518 return isInGraphicList(r)
521 // isInGraphicList reports whether the rune is in the isGraphic list. This separation
522 // from IsGraphic allows quoteWith to avoid two calls to IsPrint.
523 // Should be called only if IsPrint fails.
524 func isInGraphicList(r rune) bool {
525 // We know r must fit in 16 bits - see makeisprint.go.
526 if r > 0xFFFF {
527 return false
529 rr := uint16(r)
530 i := bsearch16(isGraphic, rr)
531 return i < len(isGraphic) && rr == isGraphic[i]