* gcc.dg/guality/guality.exp: Skip on AIX.
[official-gcc.git] / libgo / go / strings / strings.go
blob986f6d61ebc94d264751f0cbb1411463c479ae4e
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Package strings implements simple functions to manipulate strings.
6 package strings
8 import (
9 "unicode"
10 "unicode/utf8"
13 // explode splits s into an array of UTF-8 sequences, one per Unicode character (still strings) up to a maximum of n (n < 0 means no limit).
14 // Invalid UTF-8 sequences become correct encodings of U+FFF8.
15 func explode(s string, n int) []string {
16 if n == 0 {
17 return nil
19 l := utf8.RuneCountInString(s)
20 if n <= 0 || n > l {
21 n = l
23 a := make([]string, n)
24 var size int
25 var ch rune
26 i, cur := 0, 0
27 for ; i+1 < n; i++ {
28 ch, size = utf8.DecodeRuneInString(s[cur:])
29 if ch == utf8.RuneError {
30 a[i] = string(utf8.RuneError)
31 } else {
32 a[i] = s[cur : cur+size]
34 cur += size
36 // add the rest, if there is any
37 if cur < len(s) {
38 a[i] = s[cur:]
40 return a
43 // primeRK is the prime base used in Rabin-Karp algorithm.
44 const primeRK = 16777619
46 // hashstr returns the hash and the appropriate multiplicative
47 // factor for use in Rabin-Karp algorithm.
48 func hashstr(sep string) (uint32, uint32) {
49 hash := uint32(0)
50 for i := 0; i < len(sep); i++ {
51 hash = hash*primeRK + uint32(sep[i])
54 var pow, sq uint32 = 1, primeRK
55 for i := len(sep); i > 0; i >>= 1 {
56 if i&1 != 0 {
57 pow *= sq
59 sq *= sq
61 return hash, pow
64 // Count counts the number of non-overlapping instances of sep in s.
65 func Count(s, sep string) int {
66 n := 0
67 // special cases
68 switch {
69 case len(sep) == 0:
70 return utf8.RuneCountInString(s) + 1
71 case len(sep) == 1:
72 // special case worth making fast
73 c := sep[0]
74 for i := 0; i < len(s); i++ {
75 if s[i] == c {
76 n++
79 return n
80 case len(sep) > len(s):
81 return 0
82 case len(sep) == len(s):
83 if sep == s {
84 return 1
86 return 0
88 hashsep, pow := hashstr(sep)
89 h := uint32(0)
90 for i := 0; i < len(sep); i++ {
91 h = h*primeRK + uint32(s[i])
93 lastmatch := 0
94 if h == hashsep && s[:len(sep)] == sep {
95 n++
96 lastmatch = len(sep)
98 for i := len(sep); i < len(s); {
99 h *= primeRK
100 h += uint32(s[i])
101 h -= pow * uint32(s[i-len(sep)])
103 if h == hashsep && lastmatch <= i-len(sep) && s[i-len(sep):i] == sep {
105 lastmatch = i
108 return n
111 // Contains returns true if substr is within s.
112 func Contains(s, substr string) bool {
113 return Index(s, substr) >= 0
116 // ContainsAny returns true if any Unicode code points in chars are within s.
117 func ContainsAny(s, chars string) bool {
118 return IndexAny(s, chars) >= 0
121 // ContainsRune returns true if the Unicode code point r is within s.
122 func ContainsRune(s string, r rune) bool {
123 return IndexRune(s, r) >= 0
126 // Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
127 func Index(s, sep string) int {
128 n := len(sep)
129 switch {
130 case n == 0:
131 return 0
132 case n == 1:
133 c := sep[0]
134 // special case worth making fast
135 for i := 0; i < len(s); i++ {
136 if s[i] == c {
137 return i
140 return -1
141 case n == len(s):
142 if sep == s {
143 return 0
145 return -1
146 case n > len(s):
147 return -1
149 // Hash sep.
150 hashsep, pow := hashstr(sep)
151 var h uint32
152 for i := 0; i < n; i++ {
153 h = h*primeRK + uint32(s[i])
155 if h == hashsep && s[:n] == sep {
156 return 0
158 for i := n; i < len(s); {
159 h *= primeRK
160 h += uint32(s[i])
161 h -= pow * uint32(s[i-n])
163 if h == hashsep && s[i-n:i] == sep {
164 return i - n
167 return -1
170 // LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s.
171 func LastIndex(s, sep string) int {
172 n := len(sep)
173 if n == 0 {
174 return len(s)
176 c := sep[0]
177 if n == 1 {
178 // special case worth making fast
179 for i := len(s) - 1; i >= 0; i-- {
180 if s[i] == c {
181 return i
184 return -1
186 // n > 1
187 for i := len(s) - n; i >= 0; i-- {
188 if s[i] == c && s[i:i+n] == sep {
189 return i
192 return -1
195 // IndexRune returns the index of the first instance of the Unicode code point
196 // r, or -1 if rune is not present in s.
197 func IndexRune(s string, r rune) int {
198 switch {
199 case r < 0x80:
200 b := byte(r)
201 for i := 0; i < len(s); i++ {
202 if s[i] == b {
203 return i
206 default:
207 for i, c := range s {
208 if c == r {
209 return i
213 return -1
216 // IndexAny returns the index of the first instance of any Unicode code point
217 // from chars in s, or -1 if no Unicode code point from chars is present in s.
218 func IndexAny(s, chars string) int {
219 if len(chars) > 0 {
220 for i, c := range s {
221 for _, m := range chars {
222 if c == m {
223 return i
228 return -1
231 // LastIndexAny returns the index of the last instance of any Unicode code
232 // point from chars in s, or -1 if no Unicode code point from chars is
233 // present in s.
234 func LastIndexAny(s, chars string) int {
235 if len(chars) > 0 {
236 for i := len(s); i > 0; {
237 rune, size := utf8.DecodeLastRuneInString(s[0:i])
238 i -= size
239 for _, m := range chars {
240 if rune == m {
241 return i
246 return -1
249 // Generic split: splits after each instance of sep,
250 // including sepSave bytes of sep in the subarrays.
251 func genSplit(s, sep string, sepSave, n int) []string {
252 if n == 0 {
253 return nil
255 if sep == "" {
256 return explode(s, n)
258 if n < 0 {
259 n = Count(s, sep) + 1
261 c := sep[0]
262 start := 0
263 a := make([]string, n)
264 na := 0
265 for i := 0; i+len(sep) <= len(s) && na+1 < n; i++ {
266 if s[i] == c && (len(sep) == 1 || s[i:i+len(sep)] == sep) {
267 a[na] = s[start : i+sepSave]
268 na++
269 start = i + len(sep)
270 i += len(sep) - 1
273 a[na] = s[start:]
274 return a[0 : na+1]
277 // SplitN slices s into substrings separated by sep and returns a slice of
278 // the substrings between those separators.
279 // If sep is empty, SplitN splits after each UTF-8 sequence.
280 // The count determines the number of substrings to return:
281 // n > 0: at most n substrings; the last substring will be the unsplit remainder.
282 // n == 0: the result is nil (zero substrings)
283 // n < 0: all substrings
284 func SplitN(s, sep string, n int) []string { return genSplit(s, sep, 0, n) }
286 // SplitAfterN slices s into substrings after each instance of sep and
287 // returns a slice of those substrings.
288 // If sep is empty, SplitAfterN splits after each UTF-8 sequence.
289 // The count determines the number of substrings to return:
290 // n > 0: at most n substrings; the last substring will be the unsplit remainder.
291 // n == 0: the result is nil (zero substrings)
292 // n < 0: all substrings
293 func SplitAfterN(s, sep string, n int) []string {
294 return genSplit(s, sep, len(sep), n)
297 // Split slices s into all substrings separated by sep and returns a slice of
298 // the substrings between those separators.
299 // If sep is empty, Split splits after each UTF-8 sequence.
300 // It is equivalent to SplitN with a count of -1.
301 func Split(s, sep string) []string { return genSplit(s, sep, 0, -1) }
303 // SplitAfter slices s into all substrings after each instance of sep and
304 // returns a slice of those substrings.
305 // If sep is empty, SplitAfter splits after each UTF-8 sequence.
306 // It is equivalent to SplitAfterN with a count of -1.
307 func SplitAfter(s, sep string) []string {
308 return genSplit(s, sep, len(sep), -1)
311 // Fields splits the string s around each instance of one or more consecutive white space
312 // characters, as defined by unicode.IsSpace, returning an array of substrings of s or an
313 // empty list if s contains only white space.
314 func Fields(s string) []string {
315 return FieldsFunc(s, unicode.IsSpace)
318 // FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c)
319 // and returns an array of slices of s. If all code points in s satisfy f(c) or the
320 // string is empty, an empty slice is returned.
321 func FieldsFunc(s string, f func(rune) bool) []string {
322 // First count the fields.
323 n := 0
324 inField := false
325 for _, rune := range s {
326 wasInField := inField
327 inField = !f(rune)
328 if inField && !wasInField {
333 // Now create them.
334 a := make([]string, n)
335 na := 0
336 fieldStart := -1 // Set to -1 when looking for start of field.
337 for i, rune := range s {
338 if f(rune) {
339 if fieldStart >= 0 {
340 a[na] = s[fieldStart:i]
341 na++
342 fieldStart = -1
344 } else if fieldStart == -1 {
345 fieldStart = i
348 if fieldStart >= 0 { // Last field might end at EOF.
349 a[na] = s[fieldStart:]
351 return a
354 // Join concatenates the elements of a to create a single string. The separator string
355 // sep is placed between elements in the resulting string.
356 func Join(a []string, sep string) string {
357 if len(a) == 0 {
358 return ""
360 if len(a) == 1 {
361 return a[0]
363 n := len(sep) * (len(a) - 1)
364 for i := 0; i < len(a); i++ {
365 n += len(a[i])
368 b := make([]byte, n)
369 bp := copy(b, a[0])
370 for _, s := range a[1:] {
371 bp += copy(b[bp:], sep)
372 bp += copy(b[bp:], s)
374 return string(b)
377 // HasPrefix tests whether the string s begins with prefix.
378 func HasPrefix(s, prefix string) bool {
379 return len(s) >= len(prefix) && s[0:len(prefix)] == prefix
382 // HasSuffix tests whether the string s ends with suffix.
383 func HasSuffix(s, suffix string) bool {
384 return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix
387 // Map returns a copy of the string s with all its characters modified
388 // according to the mapping function. If mapping returns a negative value, the character is
389 // dropped from the string with no replacement.
390 func Map(mapping func(rune) rune, s string) string {
391 // In the worst case, the string can grow when mapped, making
392 // things unpleasant. But it's so rare we barge in assuming it's
393 // fine. It could also shrink but that falls out naturally.
394 maxbytes := len(s) // length of b
395 nbytes := 0 // number of bytes encoded in b
396 // The output buffer b is initialized on demand, the first
397 // time a character differs.
398 var b []byte
400 for i, c := range s {
401 r := mapping(c)
402 if b == nil {
403 if r == c {
404 continue
406 b = make([]byte, maxbytes)
407 nbytes = copy(b, s[:i])
409 if r >= 0 {
410 wid := 1
411 if r >= utf8.RuneSelf {
412 wid = utf8.RuneLen(r)
414 if nbytes+wid > maxbytes {
415 // Grow the buffer.
416 maxbytes = maxbytes*2 + utf8.UTFMax
417 nb := make([]byte, maxbytes)
418 copy(nb, b[0:nbytes])
419 b = nb
421 nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r)
424 if b == nil {
425 return s
427 return string(b[0:nbytes])
430 // Repeat returns a new string consisting of count copies of the string s.
431 func Repeat(s string, count int) string {
432 b := make([]byte, len(s)*count)
433 bp := 0
434 for i := 0; i < count; i++ {
435 for j := 0; j < len(s); j++ {
436 b[bp] = s[j]
437 bp++
440 return string(b)
443 // ToUpper returns a copy of the string s with all Unicode letters mapped to their upper case.
444 func ToUpper(s string) string { return Map(unicode.ToUpper, s) }
446 // ToLower returns a copy of the string s with all Unicode letters mapped to their lower case.
447 func ToLower(s string) string { return Map(unicode.ToLower, s) }
449 // ToTitle returns a copy of the string s with all Unicode letters mapped to their title case.
450 func ToTitle(s string) string { return Map(unicode.ToTitle, s) }
452 // ToUpperSpecial returns a copy of the string s with all Unicode letters mapped to their
453 // upper case, giving priority to the special casing rules.
454 func ToUpperSpecial(_case unicode.SpecialCase, s string) string {
455 return Map(func(r rune) rune { return _case.ToUpper(r) }, s)
458 // ToLowerSpecial returns a copy of the string s with all Unicode letters mapped to their
459 // lower case, giving priority to the special casing rules.
460 func ToLowerSpecial(_case unicode.SpecialCase, s string) string {
461 return Map(func(r rune) rune { return _case.ToLower(r) }, s)
464 // ToTitleSpecial returns a copy of the string s with all Unicode letters mapped to their
465 // title case, giving priority to the special casing rules.
466 func ToTitleSpecial(_case unicode.SpecialCase, s string) string {
467 return Map(func(r rune) rune { return _case.ToTitle(r) }, s)
470 // isSeparator reports whether the rune could mark a word boundary.
471 // TODO: update when package unicode captures more of the properties.
472 func isSeparator(r rune) bool {
473 // ASCII alphanumerics and underscore are not separators
474 if r <= 0x7F {
475 switch {
476 case '0' <= r && r <= '9':
477 return false
478 case 'a' <= r && r <= 'z':
479 return false
480 case 'A' <= r && r <= 'Z':
481 return false
482 case r == '_':
483 return false
485 return true
487 // Letters and digits are not separators
488 if unicode.IsLetter(r) || unicode.IsDigit(r) {
489 return false
491 // Otherwise, all we can do for now is treat spaces as separators.
492 return unicode.IsSpace(r)
495 // Title returns a copy of the string s with all Unicode letters that begin words
496 // mapped to their title case.
498 // BUG: The rule Title uses for word boundaries does not handle Unicode punctuation properly.
499 func Title(s string) string {
500 // Use a closure here to remember state.
501 // Hackish but effective. Depends on Map scanning in order and calling
502 // the closure once per rune.
503 prev := ' '
504 return Map(
505 func(r rune) rune {
506 if isSeparator(prev) {
507 prev = r
508 return unicode.ToTitle(r)
510 prev = r
511 return r
516 // TrimLeftFunc returns a slice of the string s with all leading
517 // Unicode code points c satisfying f(c) removed.
518 func TrimLeftFunc(s string, f func(rune) bool) string {
519 i := indexFunc(s, f, false)
520 if i == -1 {
521 return ""
523 return s[i:]
526 // TrimRightFunc returns a slice of the string s with all trailing
527 // Unicode code points c satisfying f(c) removed.
528 func TrimRightFunc(s string, f func(rune) bool) string {
529 i := lastIndexFunc(s, f, false)
530 if i >= 0 && s[i] >= utf8.RuneSelf {
531 _, wid := utf8.DecodeRuneInString(s[i:])
532 i += wid
533 } else {
536 return s[0:i]
539 // TrimFunc returns a slice of the string s with all leading
540 // and trailing Unicode code points c satisfying f(c) removed.
541 func TrimFunc(s string, f func(rune) bool) string {
542 return TrimRightFunc(TrimLeftFunc(s, f), f)
545 // IndexFunc returns the index into s of the first Unicode
546 // code point satisfying f(c), or -1 if none do.
547 func IndexFunc(s string, f func(rune) bool) int {
548 return indexFunc(s, f, true)
551 // LastIndexFunc returns the index into s of the last
552 // Unicode code point satisfying f(c), or -1 if none do.
553 func LastIndexFunc(s string, f func(rune) bool) int {
554 return lastIndexFunc(s, f, true)
557 // indexFunc is the same as IndexFunc except that if
558 // truth==false, the sense of the predicate function is
559 // inverted.
560 func indexFunc(s string, f func(rune) bool, truth bool) int {
561 start := 0
562 for start < len(s) {
563 wid := 1
564 r := rune(s[start])
565 if r >= utf8.RuneSelf {
566 r, wid = utf8.DecodeRuneInString(s[start:])
568 if f(r) == truth {
569 return start
571 start += wid
573 return -1
576 // lastIndexFunc is the same as LastIndexFunc except that if
577 // truth==false, the sense of the predicate function is
578 // inverted.
579 func lastIndexFunc(s string, f func(rune) bool, truth bool) int {
580 for i := len(s); i > 0; {
581 r, size := utf8.DecodeLastRuneInString(s[0:i])
582 i -= size
583 if f(r) == truth {
584 return i
587 return -1
590 func makeCutsetFunc(cutset string) func(rune) bool {
591 return func(r rune) bool { return IndexRune(cutset, r) >= 0 }
594 // Trim returns a slice of the string s with all leading and
595 // trailing Unicode code points contained in cutset removed.
596 func Trim(s string, cutset string) string {
597 if s == "" || cutset == "" {
598 return s
600 return TrimFunc(s, makeCutsetFunc(cutset))
603 // TrimLeft returns a slice of the string s with all leading
604 // Unicode code points contained in cutset removed.
605 func TrimLeft(s string, cutset string) string {
606 if s == "" || cutset == "" {
607 return s
609 return TrimLeftFunc(s, makeCutsetFunc(cutset))
612 // TrimRight returns a slice of the string s, with all trailing
613 // Unicode code points contained in cutset removed.
614 func TrimRight(s string, cutset string) string {
615 if s == "" || cutset == "" {
616 return s
618 return TrimRightFunc(s, makeCutsetFunc(cutset))
621 // TrimSpace returns a slice of the string s, with all leading
622 // and trailing white space removed, as defined by Unicode.
623 func TrimSpace(s string) string {
624 return TrimFunc(s, unicode.IsSpace)
627 // TrimPrefix returns s without the provided leading prefix string.
628 // If s doesn't start with prefix, s is returned unchanged.
629 func TrimPrefix(s, prefix string) string {
630 if HasPrefix(s, prefix) {
631 return s[len(prefix):]
633 return s
636 // TrimSuffix returns s without the provided trailing suffix string.
637 // If s doesn't end with suffix, s is returned unchanged.
638 func TrimSuffix(s, suffix string) string {
639 if HasSuffix(s, suffix) {
640 return s[:len(s)-len(suffix)]
642 return s
645 // Replace returns a copy of the string s with the first n
646 // non-overlapping instances of old replaced by new.
647 // If n < 0, there is no limit on the number of replacements.
648 func Replace(s, old, new string, n int) string {
649 if old == new || n == 0 {
650 return s // avoid allocation
653 // Compute number of replacements.
654 if m := Count(s, old); m == 0 {
655 return s // avoid allocation
656 } else if n < 0 || m < n {
657 n = m
660 // Apply replacements to buffer.
661 t := make([]byte, len(s)+n*(len(new)-len(old)))
662 w := 0
663 start := 0
664 for i := 0; i < n; i++ {
665 j := start
666 if len(old) == 0 {
667 if i > 0 {
668 _, wid := utf8.DecodeRuneInString(s[start:])
669 j += wid
671 } else {
672 j += Index(s[start:], old)
674 w += copy(t[w:], s[start:j])
675 w += copy(t[w:], new)
676 start = j + len(old)
678 w += copy(t[w:], s[start:])
679 return string(t[0:w])
682 // EqualFold reports whether s and t, interpreted as UTF-8 strings,
683 // are equal under Unicode case-folding.
684 func EqualFold(s, t string) bool {
685 for s != "" && t != "" {
686 // Extract first rune from each string.
687 var sr, tr rune
688 if s[0] < utf8.RuneSelf {
689 sr, s = rune(s[0]), s[1:]
690 } else {
691 r, size := utf8.DecodeRuneInString(s)
692 sr, s = r, s[size:]
694 if t[0] < utf8.RuneSelf {
695 tr, t = rune(t[0]), t[1:]
696 } else {
697 r, size := utf8.DecodeRuneInString(t)
698 tr, t = r, t[size:]
701 // If they match, keep going; if not, return false.
703 // Easy case.
704 if tr == sr {
705 continue
708 // Make sr < tr to simplify what follows.
709 if tr < sr {
710 tr, sr = sr, tr
712 // Fast check for ASCII.
713 if tr < utf8.RuneSelf && 'A' <= sr && sr <= 'Z' {
714 // ASCII, and sr is upper case. tr must be lower case.
715 if tr == sr+'a'-'A' {
716 continue
718 return false
721 // General case. SimpleFold(x) returns the next equivalent rune > x
722 // or wraps around to smaller values.
723 r := unicode.SimpleFold(sr)
724 for r != sr && r < tr {
725 r = unicode.SimpleFold(r)
727 if r == tr {
728 continue
730 return false
733 // One string is empty. Are both?
734 return s == t