re PR tree-optimization/79007 (gcc.dg/tree-ssa/dse-points-to.c fails starting with...
[official-gcc.git] / libgo / go / strings / strings.go
blob919e8c8354e55e4d633d4ddb065e1457c62ac89e
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Package strings implements simple functions to manipulate UTF-8 encoded strings.
6 //
7 // For information about UTF-8 strings in Go, see https://blog.golang.org/strings.
8 package strings
10 import (
11 "unicode"
12 "unicode/utf8"
15 // explode splits s into a slice of UTF-8 strings,
16 // one string per Unicode character up to a maximum of n (n < 0 means no limit).
17 // Invalid UTF-8 sequences become correct encodings of U+FFFD.
18 func explode(s string, n int) []string {
19 l := utf8.RuneCountInString(s)
20 if n < 0 || n > l {
21 n = l
23 a := make([]string, n)
24 for i := 0; i < n-1; i++ {
25 ch, size := utf8.DecodeRuneInString(s)
26 a[i] = s[:size]
27 s = s[size:]
28 if ch == utf8.RuneError {
29 a[i] = string(utf8.RuneError)
32 if n > 0 {
33 a[n-1] = s
35 return a
38 // primeRK is the prime base used in Rabin-Karp algorithm.
39 const primeRK = 16777619
41 // hashStr returns the hash and the appropriate multiplicative
42 // factor for use in Rabin-Karp algorithm.
43 func hashStr(sep string) (uint32, uint32) {
44 hash := uint32(0)
45 for i := 0; i < len(sep); i++ {
46 hash = hash*primeRK + uint32(sep[i])
48 var pow, sq uint32 = 1, primeRK
49 for i := len(sep); i > 0; i >>= 1 {
50 if i&1 != 0 {
51 pow *= sq
53 sq *= sq
55 return hash, pow
58 // hashStrRev returns the hash of the reverse of sep and the
59 // appropriate multiplicative factor for use in Rabin-Karp algorithm.
60 func hashStrRev(sep string) (uint32, uint32) {
61 hash := uint32(0)
62 for i := len(sep) - 1; i >= 0; i-- {
63 hash = hash*primeRK + uint32(sep[i])
65 var pow, sq uint32 = 1, primeRK
66 for i := len(sep); i > 0; i >>= 1 {
67 if i&1 != 0 {
68 pow *= sq
70 sq *= sq
72 return hash, pow
75 // Count counts the number of non-overlapping instances of sep in s.
76 // If sep is an empty string, Count returns 1 + the number of Unicode code points in s.
77 func Count(s, sep string) int {
78 n := 0
79 // special cases
80 switch {
81 case len(sep) == 0:
82 return utf8.RuneCountInString(s) + 1
83 case len(sep) == 1:
84 // special case worth making fast
85 c := sep[0]
86 for i := 0; i < len(s); i++ {
87 if s[i] == c {
88 n++
91 return n
92 case len(sep) > len(s):
93 return 0
94 case len(sep) == len(s):
95 if sep == s {
96 return 1
98 return 0
100 // Rabin-Karp search
101 hashsep, pow := hashStr(sep)
102 h := uint32(0)
103 for i := 0; i < len(sep); i++ {
104 h = h*primeRK + uint32(s[i])
106 lastmatch := 0
107 if h == hashsep && s[:len(sep)] == sep {
109 lastmatch = len(sep)
111 for i := len(sep); i < len(s); {
112 h *= primeRK
113 h += uint32(s[i])
114 h -= pow * uint32(s[i-len(sep)])
116 if h == hashsep && lastmatch <= i-len(sep) && s[i-len(sep):i] == sep {
118 lastmatch = i
121 return n
124 // Contains reports whether substr is within s.
125 func Contains(s, substr string) bool {
126 return Index(s, substr) >= 0
129 // ContainsAny reports whether any Unicode code points in chars are within s.
130 func ContainsAny(s, chars string) bool {
131 return IndexAny(s, chars) >= 0
134 // ContainsRune reports whether the Unicode code point r is within s.
135 func ContainsRune(s string, r rune) bool {
136 return IndexRune(s, r) >= 0
139 // LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s.
140 func LastIndex(s, sep string) int {
141 n := len(sep)
142 switch {
143 case n == 0:
144 return len(s)
145 case n == 1:
146 return LastIndexByte(s, sep[0])
147 case n == len(s):
148 if sep == s {
149 return 0
151 return -1
152 case n > len(s):
153 return -1
155 // Rabin-Karp search from the end of the string
156 hashsep, pow := hashStrRev(sep)
157 last := len(s) - n
158 var h uint32
159 for i := len(s) - 1; i >= last; i-- {
160 h = h*primeRK + uint32(s[i])
162 if h == hashsep && s[last:] == sep {
163 return last
165 for i := last - 1; i >= 0; i-- {
166 h *= primeRK
167 h += uint32(s[i])
168 h -= pow * uint32(s[i+n])
169 if h == hashsep && s[i:i+n] == sep {
170 return i
173 return -1
176 // IndexRune returns the index of the first instance of the Unicode code point
177 // r, or -1 if rune is not present in s.
178 func IndexRune(s string, r rune) int {
179 switch {
180 case r < utf8.RuneSelf:
181 return IndexByte(s, byte(r))
182 default:
183 for i, c := range s {
184 if c == r {
185 return i
189 return -1
192 // IndexAny returns the index of the first instance of any Unicode code point
193 // from chars in s, or -1 if no Unicode code point from chars is present in s.
194 func IndexAny(s, chars string) int {
195 if len(chars) > 0 {
196 for i, c := range s {
197 for _, m := range chars {
198 if c == m {
199 return i
204 return -1
207 // LastIndexAny returns the index of the last instance of any Unicode code
208 // point from chars in s, or -1 if no Unicode code point from chars is
209 // present in s.
210 func LastIndexAny(s, chars string) int {
211 if len(chars) > 0 {
212 for i := len(s); i > 0; {
213 rune, size := utf8.DecodeLastRuneInString(s[0:i])
214 i -= size
215 for _, m := range chars {
216 if rune == m {
217 return i
222 return -1
225 // LastIndexByte returns the index of the last instance of c in s, or -1 if c is not present in s.
226 func LastIndexByte(s string, c byte) int {
227 for i := len(s) - 1; i >= 0; i-- {
228 if s[i] == c {
229 return i
232 return -1
235 // Generic split: splits after each instance of sep,
236 // including sepSave bytes of sep in the subarrays.
237 func genSplit(s, sep string, sepSave, n int) []string {
238 if n == 0 {
239 return nil
241 if sep == "" {
242 return explode(s, n)
244 if n < 0 {
245 n = Count(s, sep) + 1
247 c := sep[0]
248 start := 0
249 a := make([]string, n)
250 na := 0
251 for i := 0; i+len(sep) <= len(s) && na+1 < n; i++ {
252 if s[i] == c && (len(sep) == 1 || s[i:i+len(sep)] == sep) {
253 a[na] = s[start : i+sepSave]
254 na++
255 start = i + len(sep)
256 i += len(sep) - 1
259 a[na] = s[start:]
260 return a[0 : na+1]
263 // SplitN slices s into substrings separated by sep and returns a slice of
264 // the substrings between those separators.
265 // If sep is empty, SplitN splits after each UTF-8 sequence.
266 // The count determines the number of substrings to return:
267 // n > 0: at most n substrings; the last substring will be the unsplit remainder.
268 // n == 0: the result is nil (zero substrings)
269 // n < 0: all substrings
270 func SplitN(s, sep string, n int) []string { return genSplit(s, sep, 0, n) }
272 // SplitAfterN slices s into substrings after each instance of sep and
273 // returns a slice of those substrings.
274 // If sep is empty, SplitAfterN splits after each UTF-8 sequence.
275 // The count determines the number of substrings to return:
276 // n > 0: at most n substrings; the last substring will be the unsplit remainder.
277 // n == 0: the result is nil (zero substrings)
278 // n < 0: all substrings
279 func SplitAfterN(s, sep string, n int) []string {
280 return genSplit(s, sep, len(sep), n)
283 // Split slices s into all substrings separated by sep and returns a slice of
284 // the substrings between those separators.
285 // If sep is empty, Split splits after each UTF-8 sequence.
286 // It is equivalent to SplitN with a count of -1.
287 func Split(s, sep string) []string { return genSplit(s, sep, 0, -1) }
289 // SplitAfter slices s into all substrings after each instance of sep and
290 // returns a slice of those substrings.
291 // If sep is empty, SplitAfter splits after each UTF-8 sequence.
292 // It is equivalent to SplitAfterN with a count of -1.
293 func SplitAfter(s, sep string) []string {
294 return genSplit(s, sep, len(sep), -1)
297 // Fields splits the string s around each instance of one or more consecutive white space
298 // characters, as defined by unicode.IsSpace, returning an array of substrings of s or an
299 // empty list if s contains only white space.
300 func Fields(s string) []string {
301 return FieldsFunc(s, unicode.IsSpace)
304 // FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c)
305 // and returns an array of slices of s. If all code points in s satisfy f(c) or the
306 // string is empty, an empty slice is returned.
307 // FieldsFunc makes no guarantees about the order in which it calls f(c).
308 // If f does not return consistent results for a given c, FieldsFunc may crash.
309 func FieldsFunc(s string, f func(rune) bool) []string {
310 // First count the fields.
311 n := 0
312 inField := false
313 for _, rune := range s {
314 wasInField := inField
315 inField = !f(rune)
316 if inField && !wasInField {
321 // Now create them.
322 a := make([]string, n)
323 na := 0
324 fieldStart := -1 // Set to -1 when looking for start of field.
325 for i, rune := range s {
326 if f(rune) {
327 if fieldStart >= 0 {
328 a[na] = s[fieldStart:i]
329 na++
330 fieldStart = -1
332 } else if fieldStart == -1 {
333 fieldStart = i
336 if fieldStart >= 0 { // Last field might end at EOF.
337 a[na] = s[fieldStart:]
339 return a
342 // Join concatenates the elements of a to create a single string. The separator string
343 // sep is placed between elements in the resulting string.
344 func Join(a []string, sep string) string {
345 if len(a) == 0 {
346 return ""
348 if len(a) == 1 {
349 return a[0]
351 n := len(sep) * (len(a) - 1)
352 for i := 0; i < len(a); i++ {
353 n += len(a[i])
356 b := make([]byte, n)
357 bp := copy(b, a[0])
358 for _, s := range a[1:] {
359 bp += copy(b[bp:], sep)
360 bp += copy(b[bp:], s)
362 return string(b)
365 // HasPrefix tests whether the string s begins with prefix.
366 func HasPrefix(s, prefix string) bool {
367 return len(s) >= len(prefix) && s[0:len(prefix)] == prefix
370 // HasSuffix tests whether the string s ends with suffix.
371 func HasSuffix(s, suffix string) bool {
372 return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix
375 // Map returns a copy of the string s with all its characters modified
376 // according to the mapping function. If mapping returns a negative value, the character is
377 // dropped from the string with no replacement.
378 func Map(mapping func(rune) rune, s string) string {
379 // In the worst case, the string can grow when mapped, making
380 // things unpleasant. But it's so rare we barge in assuming it's
381 // fine. It could also shrink but that falls out naturally.
382 maxbytes := len(s) // length of b
383 nbytes := 0 // number of bytes encoded in b
384 // The output buffer b is initialized on demand, the first
385 // time a character differs.
386 var b []byte
388 for i, c := range s {
389 r := mapping(c)
390 if b == nil {
391 if r == c {
392 continue
394 b = make([]byte, maxbytes)
395 nbytes = copy(b, s[:i])
397 if r >= 0 {
398 wid := 1
399 if r >= utf8.RuneSelf {
400 wid = utf8.RuneLen(r)
402 if nbytes+wid > maxbytes {
403 // Grow the buffer.
404 maxbytes = maxbytes*2 + utf8.UTFMax
405 nb := make([]byte, maxbytes)
406 copy(nb, b[0:nbytes])
407 b = nb
409 nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r)
412 if b == nil {
413 return s
415 return string(b[0:nbytes])
418 // Repeat returns a new string consisting of count copies of the string s.
419 func Repeat(s string, count int) string {
420 b := make([]byte, len(s)*count)
421 bp := copy(b, s)
422 for bp < len(b) {
423 copy(b[bp:], b[:bp])
424 bp *= 2
426 return string(b)
429 // ToUpper returns a copy of the string s with all Unicode letters mapped to their upper case.
430 func ToUpper(s string) string { return Map(unicode.ToUpper, s) }
432 // ToLower returns a copy of the string s with all Unicode letters mapped to their lower case.
433 func ToLower(s string) string { return Map(unicode.ToLower, s) }
435 // ToTitle returns a copy of the string s with all Unicode letters mapped to their title case.
436 func ToTitle(s string) string { return Map(unicode.ToTitle, s) }
438 // ToUpperSpecial returns a copy of the string s with all Unicode letters mapped to their
439 // upper case, giving priority to the special casing rules.
440 func ToUpperSpecial(_case unicode.SpecialCase, s string) string {
441 return Map(func(r rune) rune { return _case.ToUpper(r) }, s)
444 // ToLowerSpecial returns a copy of the string s with all Unicode letters mapped to their
445 // lower case, giving priority to the special casing rules.
446 func ToLowerSpecial(_case unicode.SpecialCase, s string) string {
447 return Map(func(r rune) rune { return _case.ToLower(r) }, s)
450 // ToTitleSpecial returns a copy of the string s with all Unicode letters mapped to their
451 // title case, giving priority to the special casing rules.
452 func ToTitleSpecial(_case unicode.SpecialCase, s string) string {
453 return Map(func(r rune) rune { return _case.ToTitle(r) }, s)
456 // isSeparator reports whether the rune could mark a word boundary.
457 // TODO: update when package unicode captures more of the properties.
458 func isSeparator(r rune) bool {
459 // ASCII alphanumerics and underscore are not separators
460 if r <= 0x7F {
461 switch {
462 case '0' <= r && r <= '9':
463 return false
464 case 'a' <= r && r <= 'z':
465 return false
466 case 'A' <= r && r <= 'Z':
467 return false
468 case r == '_':
469 return false
471 return true
473 // Letters and digits are not separators
474 if unicode.IsLetter(r) || unicode.IsDigit(r) {
475 return false
477 // Otherwise, all we can do for now is treat spaces as separators.
478 return unicode.IsSpace(r)
481 // Title returns a copy of the string s with all Unicode letters that begin words
482 // mapped to their title case.
484 // BUG(rsc): The rule Title uses for word boundaries does not handle Unicode punctuation properly.
485 func Title(s string) string {
486 // Use a closure here to remember state.
487 // Hackish but effective. Depends on Map scanning in order and calling
488 // the closure once per rune.
489 prev := ' '
490 return Map(
491 func(r rune) rune {
492 if isSeparator(prev) {
493 prev = r
494 return unicode.ToTitle(r)
496 prev = r
497 return r
502 // TrimLeftFunc returns a slice of the string s with all leading
503 // Unicode code points c satisfying f(c) removed.
504 func TrimLeftFunc(s string, f func(rune) bool) string {
505 i := indexFunc(s, f, false)
506 if i == -1 {
507 return ""
509 return s[i:]
512 // TrimRightFunc returns a slice of the string s with all trailing
513 // Unicode code points c satisfying f(c) removed.
514 func TrimRightFunc(s string, f func(rune) bool) string {
515 i := lastIndexFunc(s, f, false)
516 if i >= 0 && s[i] >= utf8.RuneSelf {
517 _, wid := utf8.DecodeRuneInString(s[i:])
518 i += wid
519 } else {
522 return s[0:i]
525 // TrimFunc returns a slice of the string s with all leading
526 // and trailing Unicode code points c satisfying f(c) removed.
527 func TrimFunc(s string, f func(rune) bool) string {
528 return TrimRightFunc(TrimLeftFunc(s, f), f)
531 // IndexFunc returns the index into s of the first Unicode
532 // code point satisfying f(c), or -1 if none do.
533 func IndexFunc(s string, f func(rune) bool) int {
534 return indexFunc(s, f, true)
537 // LastIndexFunc returns the index into s of the last
538 // Unicode code point satisfying f(c), or -1 if none do.
539 func LastIndexFunc(s string, f func(rune) bool) int {
540 return lastIndexFunc(s, f, true)
543 // indexFunc is the same as IndexFunc except that if
544 // truth==false, the sense of the predicate function is
545 // inverted.
546 func indexFunc(s string, f func(rune) bool, truth bool) int {
547 start := 0
548 for start < len(s) {
549 wid := 1
550 r := rune(s[start])
551 if r >= utf8.RuneSelf {
552 r, wid = utf8.DecodeRuneInString(s[start:])
554 if f(r) == truth {
555 return start
557 start += wid
559 return -1
562 // lastIndexFunc is the same as LastIndexFunc except that if
563 // truth==false, the sense of the predicate function is
564 // inverted.
565 func lastIndexFunc(s string, f func(rune) bool, truth bool) int {
566 for i := len(s); i > 0; {
567 r, size := utf8.DecodeLastRuneInString(s[0:i])
568 i -= size
569 if f(r) == truth {
570 return i
573 return -1
576 func makeCutsetFunc(cutset string) func(rune) bool {
577 return func(r rune) bool { return IndexRune(cutset, r) >= 0 }
580 // Trim returns a slice of the string s with all leading and
581 // trailing Unicode code points contained in cutset removed.
582 func Trim(s string, cutset string) string {
583 if s == "" || cutset == "" {
584 return s
586 return TrimFunc(s, makeCutsetFunc(cutset))
589 // TrimLeft returns a slice of the string s with all leading
590 // Unicode code points contained in cutset removed.
591 func TrimLeft(s string, cutset string) string {
592 if s == "" || cutset == "" {
593 return s
595 return TrimLeftFunc(s, makeCutsetFunc(cutset))
598 // TrimRight returns a slice of the string s, with all trailing
599 // Unicode code points contained in cutset removed.
600 func TrimRight(s string, cutset string) string {
601 if s == "" || cutset == "" {
602 return s
604 return TrimRightFunc(s, makeCutsetFunc(cutset))
607 // TrimSpace returns a slice of the string s, with all leading
608 // and trailing white space removed, as defined by Unicode.
609 func TrimSpace(s string) string {
610 return TrimFunc(s, unicode.IsSpace)
613 // TrimPrefix returns s without the provided leading prefix string.
614 // If s doesn't start with prefix, s is returned unchanged.
615 func TrimPrefix(s, prefix string) string {
616 if HasPrefix(s, prefix) {
617 return s[len(prefix):]
619 return s
622 // TrimSuffix returns s without the provided trailing suffix string.
623 // If s doesn't end with suffix, s is returned unchanged.
624 func TrimSuffix(s, suffix string) string {
625 if HasSuffix(s, suffix) {
626 return s[:len(s)-len(suffix)]
628 return s
631 // Replace returns a copy of the string s with the first n
632 // non-overlapping instances of old replaced by new.
633 // If old is empty, it matches at the beginning of the string
634 // and after each UTF-8 sequence, yielding up to k+1 replacements
635 // for a k-rune string.
636 // If n < 0, there is no limit on the number of replacements.
637 func Replace(s, old, new string, n int) string {
638 if old == new || n == 0 {
639 return s // avoid allocation
642 // Compute number of replacements.
643 if m := Count(s, old); m == 0 {
644 return s // avoid allocation
645 } else if n < 0 || m < n {
646 n = m
649 // Apply replacements to buffer.
650 t := make([]byte, len(s)+n*(len(new)-len(old)))
651 w := 0
652 start := 0
653 for i := 0; i < n; i++ {
654 j := start
655 if len(old) == 0 {
656 if i > 0 {
657 _, wid := utf8.DecodeRuneInString(s[start:])
658 j += wid
660 } else {
661 j += Index(s[start:], old)
663 w += copy(t[w:], s[start:j])
664 w += copy(t[w:], new)
665 start = j + len(old)
667 w += copy(t[w:], s[start:])
668 return string(t[0:w])
671 // EqualFold reports whether s and t, interpreted as UTF-8 strings,
672 // are equal under Unicode case-folding.
673 func EqualFold(s, t string) bool {
674 for s != "" && t != "" {
675 // Extract first rune from each string.
676 var sr, tr rune
677 if s[0] < utf8.RuneSelf {
678 sr, s = rune(s[0]), s[1:]
679 } else {
680 r, size := utf8.DecodeRuneInString(s)
681 sr, s = r, s[size:]
683 if t[0] < utf8.RuneSelf {
684 tr, t = rune(t[0]), t[1:]
685 } else {
686 r, size := utf8.DecodeRuneInString(t)
687 tr, t = r, t[size:]
690 // If they match, keep going; if not, return false.
692 // Easy case.
693 if tr == sr {
694 continue
697 // Make sr < tr to simplify what follows.
698 if tr < sr {
699 tr, sr = sr, tr
701 // Fast check for ASCII.
702 if tr < utf8.RuneSelf && 'A' <= sr && sr <= 'Z' {
703 // ASCII, and sr is upper case. tr must be lower case.
704 if tr == sr+'a'-'A' {
705 continue
707 return false
710 // General case. SimpleFold(x) returns the next equivalent rune > x
711 // or wraps around to smaller values.
712 r := unicode.SimpleFold(sr)
713 for r != sr && r < tr {
714 r = unicode.SimpleFold(r)
716 if r == tr {
717 continue
719 return false
722 // One string is empty. Are both?
723 return s == t