libgo/go/strings/strings.go

   1 // Copyright 2009 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 // Package strings implements simple functions to manipulate UTF-8 encoded strings.
   6 //
   7 // For information about UTF-8 strings in Go, see https://blog.golang.org/strings.
   8 package strings
   9
  10 import (
  11         "internal/bytealg"
  12         "unicode"
  13         "unicode/utf8"
  14 )
  15
  16 // explode splits s into a slice of UTF-8 strings,
  17 // one string per Unicode character up to a maximum of n (n < 0 means no limit).
  18 // Invalid UTF-8 sequences become correct encodings of U+FFFD.
  19 func explode(s string, n int) []string {
  20         l := utf8.RuneCountInString(s)
  21         if n < 0 || n > l {
  22                 n = l
  23         }
  24         a := make([]string, n)
  25         for i := 0; i < n-1; i++ {
  26                 ch, size := utf8.DecodeRuneInString(s)
  27                 a[i] = s[:size]
  28                 s = s[size:]
  29                 if ch == utf8.RuneError {
  30                         a[i] = string(utf8.RuneError)
  31                 }
  32         }
  33         if n > 0 {
  34                 a[n-1] = s
  35         }
  36         return a
  37 }
  38
  39 // primeRK is the prime base used in Rabin-Karp algorithm.
  40 const primeRK = 16777619
  41
  42 // hashStr returns the hash and the appropriate multiplicative
  43 // factor for use in Rabin-Karp algorithm.
  44 func hashStr(sep string) (uint32, uint32) {
  45         hash := uint32(0)
  46         for i := 0; i < len(sep); i++ {
  47                 hash = hash*primeRK + uint32(sep[i])
  48         }
  49         var pow, sq uint32 = 1, primeRK
  50         for i := len(sep); i > 0; i >>= 1 {
  51                 if i&1 != 0 {
  52                         pow *= sq
  53                 }
  54                 sq *= sq
  55         }
  56         return hash, pow
  57 }
  58
  59 // hashStrRev returns the hash of the reverse of sep and the
  60 // appropriate multiplicative factor for use in Rabin-Karp algorithm.
  61 func hashStrRev(sep string) (uint32, uint32) {
  62         hash := uint32(0)
  63         for i := len(sep) - 1; i >= 0; i-- {
  64                 hash = hash*primeRK + uint32(sep[i])
  65         }
  66         var pow, sq uint32 = 1, primeRK
  67         for i := len(sep); i > 0; i >>= 1 {
  68                 if i&1 != 0 {
  69                         pow *= sq
  70                 }
  71                 sq *= sq
  72         }
  73         return hash, pow
  74 }
  75
  76 // Count counts the number of non-overlapping instances of substr in s.
  77 // If substr is an empty string, Count returns 1 + the number of Unicode code points in s.
  78 func Count(s, substr string) int {
  79         // special case
  80         if len(substr) == 0 {
  81                 return utf8.RuneCountInString(s) + 1
  82         }
  83         if len(substr) == 1 {
  84                 return bytealg.CountString(s, substr[0])
  85         }
  86         n := 0
  87         for {
  88                 i := Index(s, substr)
  89                 if i == -1 {
  90                         return n
  91                 }
  92                 n++
  93                 s = s[i+len(substr):]
  94         }
  95 }
  96
  97 // Contains reports whether substr is within s.
  98 func Contains(s, substr string) bool {
  99         return Index(s, substr) >= 0
 100 }
 101
 102 // ContainsAny reports whether any Unicode code points in chars are within s.
 103 func ContainsAny(s, chars string) bool {
 104         return IndexAny(s, chars) >= 0
 105 }
 106
 107 // ContainsRune reports whether the Unicode code point r is within s.
 108 func ContainsRune(s string, r rune) bool {
 109         return IndexRune(s, r) >= 0
 110 }
 111
 112 // LastIndex returns the index of the last instance of substr in s, or -1 if substr is not present in s.
 113 func LastIndex(s, substr string) int {
 114         n := len(substr)
 115         switch {
 116         case n == 0:
 117                 return len(s)
 118         case n == 1:
 119                 return LastIndexByte(s, substr[0])
 120         case n == len(s):
 121                 if substr == s {
 122                         return 0
 123                 }
 124                 return -1
 125         case n > len(s):
 126                 return -1
 127         }
 128         // Rabin-Karp search from the end of the string
 129         hashss, pow := hashStrRev(substr)
 130         last := len(s) - n
 131         var h uint32
 132         for i := len(s) - 1; i >= last; i-- {
 133                 h = h*primeRK + uint32(s[i])
 134         }
 135         if h == hashss && s[last:] == substr {
 136                 return last
 137         }
 138         for i := last - 1; i >= 0; i-- {
 139                 h *= primeRK
 140                 h += uint32(s[i])
 141                 h -= pow * uint32(s[i+n])
 142                 if h == hashss && s[i:i+n] == substr {
 143                         return i
 144                 }
 145         }
 146         return -1
 147 }
 148
 149 // IndexByte returns the index of the first instance of c in s, or -1 if c is not present in s.
 150 func IndexByte(s string, c byte) int {
 151         return bytealg.IndexByteString(s, c)
 152 }
 153
 154 // IndexRune returns the index of the first instance of the Unicode code point
 155 // r, or -1 if rune is not present in s.
 156 // If r is utf8.RuneError, it returns the first instance of any
 157 // invalid UTF-8 byte sequence.
 158 func IndexRune(s string, r rune) int {
 159         switch {
 160         case 0 <= r && r < utf8.RuneSelf:
 161                 return IndexByte(s, byte(r))
 162         case r == utf8.RuneError:
 163                 for i, r := range s {
 164                         if r == utf8.RuneError {
 165                                 return i
 166                         }
 167                 }
 168                 return -1
 169         case !utf8.ValidRune(r):
 170                 return -1
 171         default:
 172                 return Index(s, string(r))
 173         }
 174 }
 175
 176 // IndexAny returns the index of the first instance of any Unicode code point
 177 // from chars in s, or -1 if no Unicode code point from chars is present in s.
 178 func IndexAny(s, chars string) int {
 179         if chars == "" {
 180                 // Avoid scanning all of s.
 181                 return -1
 182         }
 183         if len(s) > 8 {
 184                 if as, isASCII := makeASCIISet(chars); isASCII {
 185                         for i := 0; i < len(s); i++ {
 186                                 if as.contains(s[i]) {
 187                                         return i
 188                                 }
 189                         }
 190                         return -1
 191                 }
 192         }
 193         for i, c := range s {
 194                 for _, m := range chars {
 195                         if c == m {
 196                                 return i
 197                         }
 198                 }
 199         }
 200         return -1
 201 }
 202
 203 // LastIndexAny returns the index of the last instance of any Unicode code
 204 // point from chars in s, or -1 if no Unicode code point from chars is
 205 // present in s.
 206 func LastIndexAny(s, chars string) int {
 207         if chars == "" {
 208                 // Avoid scanning all of s.
 209                 return -1
 210         }
 211         if len(s) > 8 {
 212                 if as, isASCII := makeASCIISet(chars); isASCII {
 213                         for i := len(s) - 1; i >= 0; i-- {
 214                                 if as.contains(s[i]) {
 215                                         return i
 216                                 }
 217                         }
 218                         return -1
 219                 }
 220         }
 221         for i := len(s); i > 0; {
 222                 r, size := utf8.DecodeLastRuneInString(s[:i])
 223                 i -= size
 224                 for _, c := range chars {
 225                         if r == c {
 226                                 return i
 227                         }
 228                 }
 229         }
 230         return -1
 231 }
 232
 233 // LastIndexByte returns the index of the last instance of c in s, or -1 if c is not present in s.
 234 func LastIndexByte(s string, c byte) int {
 235         for i := len(s) - 1; i >= 0; i-- {
 236                 if s[i] == c {
 237                         return i
 238                 }
 239         }
 240         return -1
 241 }
 242
 243 // Generic split: splits after each instance of sep,
 244 // including sepSave bytes of sep in the subarrays.
 245 func genSplit(s, sep string, sepSave, n int) []string {
 246         if n == 0 {
 247                 return nil
 248         }
 249         if sep == "" {
 250                 return explode(s, n)
 251         }
 252         if n < 0 {
 253                 n = Count(s, sep) + 1
 254         }
 255
 256         a := make([]string, n)
 257         n--
 258         i := 0
 259         for i < n {
 260                 m := Index(s, sep)
 261                 if m < 0 {
 262                         break
 263                 }
 264                 a[i] = s[:m+sepSave]
 265                 s = s[m+len(sep):]
 266                 i++
 267         }
 268         a[i] = s
 269         return a[:i+1]
 270 }
 271
 272 // SplitN slices s into substrings separated by sep and returns a slice of
 273 // the substrings between those separators.
 274 //
 275 // The count determines the number of substrings to return:
 276 //   n > 0: at most n substrings; the last substring will be the unsplit remainder.
 277 //   n == 0: the result is nil (zero substrings)
 278 //   n < 0: all substrings
 279 //
 280 // Edge cases for s and sep (for example, empty strings) are handled
 281 // as described in the documentation for Split.
 282 func SplitN(s, sep string, n int) []string { return genSplit(s, sep, 0, n) }
 283
 284 // SplitAfterN slices s into substrings after each instance of sep and
 285 // returns a slice of those substrings.
 286 //
 287 // The count determines the number of substrings to return:
 288 //   n > 0: at most n substrings; the last substring will be the unsplit remainder.
 289 //   n == 0: the result is nil (zero substrings)
 290 //   n < 0: all substrings
 291 //
 292 // Edge cases for s and sep (for example, empty strings) are handled
 293 // as described in the documentation for SplitAfter.
 294 func SplitAfterN(s, sep string, n int) []string {
 295         return genSplit(s, sep, len(sep), n)
 296 }
 297
 298 // Split slices s into all substrings separated by sep and returns a slice of
 299 // the substrings between those separators.
 300 //
 301 // If s does not contain sep and sep is not empty, Split returns a
 302 // slice of length 1 whose only element is s.
 303 //
 304 // If sep is empty, Split splits after each UTF-8 sequence. If both s
 305 // and sep are empty, Split returns an empty slice.
 306 //
 307 // It is equivalent to SplitN with a count of -1.
 308 func Split(s, sep string) []string { return genSplit(s, sep, 0, -1) }
 309
 310 // SplitAfter slices s into all substrings after each instance of sep and
 311 // returns a slice of those substrings.
 312 //
 313 // If s does not contain sep and sep is not empty, SplitAfter returns
 314 // a slice of length 1 whose only element is s.
 315 //
 316 // If sep is empty, SplitAfter splits after each UTF-8 sequence. If
 317 // both s and sep are empty, SplitAfter returns an empty slice.
 318 //
 319 // It is equivalent to SplitAfterN with a count of -1.
 320 func SplitAfter(s, sep string) []string {
 321         return genSplit(s, sep, len(sep), -1)
 322 }
 323
 324 var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}
 325
 326 // Fields splits the string s around each instance of one or more consecutive white space
 327 // characters, as defined by unicode.IsSpace, returning a slice of substrings of s or an
 328 // empty slice if s contains only white space.
 329 func Fields(s string) []string {
 330         // First count the fields.
 331         // This is an exact count if s is ASCII, otherwise it is an approximation.
 332         n := 0
 333         wasSpace := 1
 334         // setBits is used to track which bits are set in the bytes of s.
 335         setBits := uint8(0)
 336         for i := 0; i < len(s); i++ {
 337                 r := s[i]
 338                 setBits |= r
 339                 isSpace := int(asciiSpace[r])
 340                 n += wasSpace & ^isSpace
 341                 wasSpace = isSpace
 342         }
 343
 344         if setBits < utf8.RuneSelf { // ASCII fast path
 345                 a := make([]string, n)
 346                 na := 0
 347                 fieldStart := 0
 348                 i := 0
 349                 // Skip spaces in the front of the input.
 350                 for i < len(s) && asciiSpace[s[i]] != 0 {
 351                         i++
 352                 }
 353                 fieldStart = i
 354                 for i < len(s) {
 355                         if asciiSpace[s[i]] == 0 {
 356                                 i++
 357                                 continue
 358                         }
 359                         a[na] = s[fieldStart:i]
 360                         na++
 361                         i++
 362                         // Skip spaces in between fields.
 363                         for i < len(s) && asciiSpace[s[i]] != 0 {
 364                                 i++
 365                         }
 366                         fieldStart = i
 367                 }
 368                 if fieldStart < len(s) { // Last field might end at EOF.
 369                         a[na] = s[fieldStart:]
 370                 }
 371                 return a
 372         }
 373
 374         // Some runes in the input string are not ASCII.
 375         return FieldsFunc(s, unicode.IsSpace)
 376 }
 377
 378 // FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c)
 379 // and returns an array of slices of s. If all code points in s satisfy f(c) or the
 380 // string is empty, an empty slice is returned.
 381 // FieldsFunc makes no guarantees about the order in which it calls f(c).
 382 // If f does not return consistent results for a given c, FieldsFunc may crash.
 383 func FieldsFunc(s string, f func(rune) bool) []string {
 384         // A span is used to record a slice of s of the form s[start:end].
 385         // The start index is inclusive and the end index is exclusive.
 386         type span struct {
 387                 start int
 388                 end   int
 389         }
 390         spans := make([]span, 0, 32)
 391
 392         // Find the field start and end indices.
 393         wasField := false
 394         fromIndex := 0
 395         for i, rune := range s {
 396                 if f(rune) {
 397                         if wasField {
 398                                 spans = append(spans, span{start: fromIndex, end: i})
 399                                 wasField = false
 400                         }
 401                 } else {
 402                         if !wasField {
 403                                 fromIndex = i
 404                                 wasField = true
 405                         }
 406                 }
 407         }
 408
 409         // Last field might end at EOF.
 410         if wasField {
 411                 spans = append(spans, span{fromIndex, len(s)})
 412         }
 413
 414         // Create strings from recorded field indices.
 415         a := make([]string, len(spans))
 416         for i, span := range spans {
 417                 a[i] = s[span.start:span.end]
 418         }
 419
 420         return a
 421 }
 422
 423 // Join concatenates the elements of a to create a single string. The separator string
 424 // sep is placed between elements in the resulting string.
 425 func Join(a []string, sep string) string {
 426         switch len(a) {
 427         case 0:
 428                 return ""
 429         case 1:
 430                 return a[0]
 431         }
 432         n := len(sep) * (len(a) - 1)
 433         for i := 0; i < len(a); i++ {
 434                 n += len(a[i])
 435         }
 436
 437         var b Builder
 438         b.Grow(n)
 439         b.WriteString(a[0])
 440         for _, s := range a[1:] {
 441                 b.WriteString(sep)
 442                 b.WriteString(s)
 443         }
 444         return b.String()
 445 }
 446
 447 // HasPrefix tests whether the string s begins with prefix.
 448 func HasPrefix(s, prefix string) bool {
 449         return len(s) >= len(prefix) && s[0:len(prefix)] == prefix
 450 }
 451
 452 // HasSuffix tests whether the string s ends with suffix.
 453 func HasSuffix(s, suffix string) bool {
 454         return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix
 455 }
 456
 457 // Map returns a copy of the string s with all its characters modified
 458 // according to the mapping function. If mapping returns a negative value, the character is
 459 // dropped from the string with no replacement.
 460 func Map(mapping func(rune) rune, s string) string {
 461         // In the worst case, the string can grow when mapped, making
 462         // things unpleasant. But it's so rare we barge in assuming it's
 463         // fine. It could also shrink but that falls out naturally.
 464
 465         // The output buffer b is initialized on demand, the first
 466         // time a character differs.
 467         var b Builder
 468
 469         for i, c := range s {
 470                 r := mapping(c)
 471                 if r == c && c != utf8.RuneError {
 472                         continue
 473                 }
 474
 475                 var width int
 476                 if c == utf8.RuneError {
 477                         c, width = utf8.DecodeRuneInString(s[i:])
 478                         if width != 1 && r == c {
 479                                 continue
 480                         }
 481                 } else {
 482                         width = utf8.RuneLen(c)
 483                 }
 484
 485                 b.Grow(len(s) + utf8.UTFMax)
 486                 b.WriteString(s[:i])
 487                 if r >= 0 {
 488                         b.WriteRune(r)
 489                 }
 490
 491                 s = s[i+width:]
 492                 break
 493         }
 494
 495         // Fast path for unchanged input
 496         if b.Cap() == 0 { // didn't call b.Grow above
 497                 return s
 498         }
 499
 500         for _, c := range s {
 501                 r := mapping(c)
 502
 503                 if r >= 0 {
 504                         // common case
 505                         // Due to inlining, it is more performant to determine if WriteByte should be
 506                         // invoked rather than always call WriteRune
 507                         if r < utf8.RuneSelf {
 508                                 b.WriteByte(byte(r))
 509                         } else {
 510                                 // r is not a ASCII rune.
 511                                 b.WriteRune(r)
 512                         }
 513                 }
 514         }
 515
 516         return b.String()
 517 }
 518
 519 // Repeat returns a new string consisting of count copies of the string s.
 520 //
 521 // It panics if count is negative or if
 522 // the result of (len(s) * count) overflows.
 523 func Repeat(s string, count int) string {
 524         if count == 0 {
 525                 return ""
 526         }
 527
 528         // Since we cannot return an error on overflow,
 529         // we should panic if the repeat will generate
 530         // an overflow.
 531         // See Issue golang.org/issue/16237
 532         if count < 0 {
 533                 panic("strings: negative Repeat count")
 534         } else if len(s)*count/count != len(s) {
 535                 panic("strings: Repeat count causes overflow")
 536         }
 537
 538         n := len(s) * count
 539         var b Builder
 540         b.Grow(n)
 541         b.WriteString(s)
 542         for b.Len() < n {
 543                 if b.Len() <= n/2 {
 544                         b.WriteString(b.String())
 545                 } else {
 546                         b.WriteString(b.String()[:n-b.Len()])
 547                         break
 548                 }
 549         }
 550         return b.String()
 551 }
 552
 553 // ToUpper returns a copy of the string s with all Unicode letters mapped to their upper case.
 554 func ToUpper(s string) string {
 555         isASCII, hasLower := true, false
 556         for i := 0; i < len(s); i++ {
 557                 c := s[i]
 558                 if c >= utf8.RuneSelf {
 559                         isASCII = false
 560                         break
 561                 }
 562                 hasLower = hasLower || (c >= 'a' && c <= 'z')
 563         }
 564
 565         if isASCII { // optimize for ASCII-only strings.
 566                 if !hasLower {
 567                         return s
 568                 }
 569                 var b Builder
 570                 b.Grow(len(s))
 571                 for i := 0; i < len(s); i++ {
 572                         c := s[i]
 573                         if c >= 'a' && c <= 'z' {
 574                                 c -= 'a' - 'A'
 575                         }
 576                         b.WriteByte(c)
 577                 }
 578                 return b.String()
 579         }
 580         return Map(unicode.ToUpper, s)
 581 }
 582
 583 // ToLower returns a copy of the string s with all Unicode letters mapped to their lower case.
 584 func ToLower(s string) string {
 585         isASCII, hasUpper := true, false
 586         for i := 0; i < len(s); i++ {
 587                 c := s[i]
 588                 if c >= utf8.RuneSelf {
 589                         isASCII = false
 590                         break
 591                 }
 592                 hasUpper = hasUpper || (c >= 'A' && c <= 'Z')
 593         }
 594
 595         if isASCII { // optimize for ASCII-only strings.
 596                 if !hasUpper {
 597                         return s
 598                 }
 599                 var b Builder
 600                 b.Grow(len(s))
 601                 for i := 0; i < len(s); i++ {
 602                         c := s[i]
 603                         if c >= 'A' && c <= 'Z' {
 604                                 c += 'a' - 'A'
 605                         }
 606                         b.WriteByte(c)
 607                 }
 608                 return b.String()
 609         }
 610         return Map(unicode.ToLower, s)
 611 }
 612
 613 // ToTitle returns a copy of the string s with all Unicode letters mapped to their title case.
 614 func ToTitle(s string) string { return Map(unicode.ToTitle, s) }
 615
 616 // ToUpperSpecial returns a copy of the string s with all Unicode letters mapped to their
 617 // upper case using the case mapping specified by c.
 618 func ToUpperSpecial(c unicode.SpecialCase, s string) string {
 619         return Map(c.ToUpper, s)
 620 }
 621
 622 // ToLowerSpecial returns a copy of the string s with all Unicode letters mapped to their
 623 // lower case using the case mapping specified by c.
 624 func ToLowerSpecial(c unicode.SpecialCase, s string) string {
 625         return Map(c.ToLower, s)
 626 }
 627
 628 // ToTitleSpecial returns a copy of the string s with all Unicode letters mapped to their
 629 // title case, giving priority to the special casing rules.
 630 func ToTitleSpecial(c unicode.SpecialCase, s string) string {
 631         return Map(c.ToTitle, s)
 632 }
 633
 634 // isSeparator reports whether the rune could mark a word boundary.
 635 // TODO: update when package unicode captures more of the properties.
 636 func isSeparator(r rune) bool {
 637         // ASCII alphanumerics and underscore are not separators
 638         if r <= 0x7F {
 639                 switch {
 640                 case '0' <= r && r <= '9':
 641                         return false
 642                 case 'a' <= r && r <= 'z':
 643                         return false
 644                 case 'A' <= r && r <= 'Z':
 645                         return false
 646                 case r == '_':
 647                         return false
 648                 }
 649                 return true
 650         }
 651         // Letters and digits are not separators
 652         if unicode.IsLetter(r) || unicode.IsDigit(r) {
 653                 return false
 654         }
 655         // Otherwise, all we can do for now is treat spaces as separators.
 656         return unicode.IsSpace(r)
 657 }
 658
 659 // Title returns a copy of the string s with all Unicode letters that begin words
 660 // mapped to their title case.
 661 //
 662 // BUG(rsc): The rule Title uses for word boundaries does not handle Unicode punctuation properly.
 663 func Title(s string) string {
 664         // Use a closure here to remember state.
 665         // Hackish but effective. Depends on Map scanning in order and calling
 666         // the closure once per rune.
 667         prev := ' '
 668         return Map(
 669                 func(r rune) rune {
 670                         if isSeparator(prev) {
 671                                 prev = r
 672                                 return unicode.ToTitle(r)
 673                         }
 674                         prev = r
 675                         return r
 676                 },
 677                 s)
 678 }
 679
 680 // TrimLeftFunc returns a slice of the string s with all leading
 681 // Unicode code points c satisfying f(c) removed.
 682 func TrimLeftFunc(s string, f func(rune) bool) string {
 683         i := indexFunc(s, f, false)
 684         if i == -1 {
 685                 return ""
 686         }
 687         return s[i:]
 688 }
 689
 690 // TrimRightFunc returns a slice of the string s with all trailing
 691 // Unicode code points c satisfying f(c) removed.
 692 func TrimRightFunc(s string, f func(rune) bool) string {
 693         i := lastIndexFunc(s, f, false)
 694         if i >= 0 && s[i] >= utf8.RuneSelf {
 695                 _, wid := utf8.DecodeRuneInString(s[i:])
 696                 i += wid
 697         } else {
 698                 i++
 699         }
 700         return s[0:i]
 701 }
 702
 703 // TrimFunc returns a slice of the string s with all leading
 704 // and trailing Unicode code points c satisfying f(c) removed.
 705 func TrimFunc(s string, f func(rune) bool) string {
 706         return TrimRightFunc(TrimLeftFunc(s, f), f)
 707 }
 708
 709 // IndexFunc returns the index into s of the first Unicode
 710 // code point satisfying f(c), or -1 if none do.
 711 func IndexFunc(s string, f func(rune) bool) int {
 712         return indexFunc(s, f, true)
 713 }
 714
 715 // LastIndexFunc returns the index into s of the last
 716 // Unicode code point satisfying f(c), or -1 if none do.
 717 func LastIndexFunc(s string, f func(rune) bool) int {
 718         return lastIndexFunc(s, f, true)
 719 }
 720
 721 // indexFunc is the same as IndexFunc except that if
 722 // truth==false, the sense of the predicate function is
 723 // inverted.
 724 func indexFunc(s string, f func(rune) bool, truth bool) int {
 725         for i, r := range s {
 726                 if f(r) == truth {
 727                         return i
 728                 }
 729         }
 730         return -1
 731 }
 732
 733 // lastIndexFunc is the same as LastIndexFunc except that if
 734 // truth==false, the sense of the predicate function is
 735 // inverted.
 736 func lastIndexFunc(s string, f func(rune) bool, truth bool) int {
 737         for i := len(s); i > 0; {
 738                 r, size := utf8.DecodeLastRuneInString(s[0:i])
 739                 i -= size
 740                 if f(r) == truth {
 741                         return i
 742                 }
 743         }
 744         return -1
 745 }
 746
 747 // asciiSet is a 32-byte value, where each bit represents the presence of a
 748 // given ASCII character in the set. The 128-bits of the lower 16 bytes,
 749 // starting with the least-significant bit of the lowest word to the
 750 // most-significant bit of the highest word, map to the full range of all
 751 // 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed,
 752 // ensuring that any non-ASCII character will be reported as not in the set.
 753 type asciiSet [8]uint32
 754
 755 // makeASCIISet creates a set of ASCII characters and reports whether all
 756 // characters in chars are ASCII.
 757 func makeASCIISet(chars string) (as asciiSet, ok bool) {
 758         for i := 0; i < len(chars); i++ {
 759                 c := chars[i]
 760                 if c >= utf8.RuneSelf {
 761                         return as, false
 762                 }
 763                 as[c>>5] |= 1 << uint(c&31)
 764         }
 765         return as, true
 766 }
 767
 768 // contains reports whether c is inside the set.
 769 func (as *asciiSet) contains(c byte) bool {
 770         return (as[c>>5] & (1 << uint(c&31))) != 0
 771 }
 772
 773 func makeCutsetFunc(cutset string) func(rune) bool {
 774         if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
 775                 return func(r rune) bool {
 776                         return r == rune(cutset[0])
 777                 }
 778         }
 779         if as, isASCII := makeASCIISet(cutset); isASCII {
 780                 return func(r rune) bool {
 781                         return r < utf8.RuneSelf && as.contains(byte(r))
 782                 }
 783         }
 784         return func(r rune) bool { return IndexRune(cutset, r) >= 0 }
 785 }
 786
 787 // Trim returns a slice of the string s with all leading and
 788 // trailing Unicode code points contained in cutset removed.
 789 func Trim(s string, cutset string) string {
 790         if s == "" || cutset == "" {
 791                 return s
 792         }
 793         return TrimFunc(s, makeCutsetFunc(cutset))
 794 }
 795
 796 // TrimLeft returns a slice of the string s with all leading
 797 // Unicode code points contained in cutset removed.
 798 //
 799 // To remove a prefix, use TrimPrefix instead.
 800 func TrimLeft(s string, cutset string) string {
 801         if s == "" || cutset == "" {
 802                 return s
 803         }
 804         return TrimLeftFunc(s, makeCutsetFunc(cutset))
 805 }
 806
 807 // TrimRight returns a slice of the string s, with all trailing
 808 // Unicode code points contained in cutset removed.
 809 //
 810 // To remove a suffix, use TrimSuffix instead.
 811 func TrimRight(s string, cutset string) string {
 812         if s == "" || cutset == "" {
 813                 return s
 814         }
 815         return TrimRightFunc(s, makeCutsetFunc(cutset))
 816 }
 817
 818 // TrimSpace returns a slice of the string s, with all leading
 819 // and trailing white space removed, as defined by Unicode.
 820 func TrimSpace(s string) string {
 821         return TrimFunc(s, unicode.IsSpace)
 822 }
 823
 824 // TrimPrefix returns s without the provided leading prefix string.
 825 // If s doesn't start with prefix, s is returned unchanged.
 826 func TrimPrefix(s, prefix string) string {
 827         if HasPrefix(s, prefix) {
 828                 return s[len(prefix):]
 829         }
 830         return s
 831 }
 832
 833 // TrimSuffix returns s without the provided trailing suffix string.
 834 // If s doesn't end with suffix, s is returned unchanged.
 835 func TrimSuffix(s, suffix string) string {
 836         if HasSuffix(s, suffix) {
 837                 return s[:len(s)-len(suffix)]
 838         }
 839         return s
 840 }
 841
 842 // Replace returns a copy of the string s with the first n
 843 // non-overlapping instances of old replaced by new.
 844 // If old is empty, it matches at the beginning of the string
 845 // and after each UTF-8 sequence, yielding up to k+1 replacements
 846 // for a k-rune string.
 847 // If n < 0, there is no limit on the number of replacements.
 848 func Replace(s, old, new string, n int) string {
 849         if old == new || n == 0 {
 850                 return s // avoid allocation
 851         }
 852
 853         // Compute number of replacements.
 854         if m := Count(s, old); m == 0 {
 855                 return s // avoid allocation
 856         } else if n < 0 || m < n {
 857                 n = m
 858         }
 859
 860         // Apply replacements to buffer.
 861         t := make([]byte, len(s)+n*(len(new)-len(old)))
 862         w := 0
 863         start := 0
 864         for i := 0; i < n; i++ {
 865                 j := start
 866                 if len(old) == 0 {
 867                         if i > 0 {
 868                                 _, wid := utf8.DecodeRuneInString(s[start:])
 869                                 j += wid
 870                         }
 871                 } else {
 872                         j += Index(s[start:], old)
 873                 }
 874                 w += copy(t[w:], s[start:j])
 875                 w += copy(t[w:], new)
 876                 start = j + len(old)
 877         }
 878         w += copy(t[w:], s[start:])
 879         return string(t[0:w])
 880 }
 881
 882 // ReplaceAll returns a copy of the string s with all
 883 // non-overlapping instances of old replaced by new.
 884 // If old is empty, it matches at the beginning of the string
 885 // and after each UTF-8 sequence, yielding up to k+1 replacements
 886 // for a k-rune string.
 887 func ReplaceAll(s, old, new string) string {
 888         return Replace(s, old, new, -1)
 889 }
 890
 891 // EqualFold reports whether s and t, interpreted as UTF-8 strings,
 892 // are equal under Unicode case-folding.
 893 func EqualFold(s, t string) bool {
 894         for s != "" && t != "" {
 895                 // Extract first rune from each string.
 896                 var sr, tr rune
 897                 if s[0] < utf8.RuneSelf {
 898                         sr, s = rune(s[0]), s[1:]
 899                 } else {
 900                         r, size := utf8.DecodeRuneInString(s)
 901                         sr, s = r, s[size:]
 902                 }
 903                 if t[0] < utf8.RuneSelf {
 904                         tr, t = rune(t[0]), t[1:]
 905                 } else {
 906                         r, size := utf8.DecodeRuneInString(t)
 907                         tr, t = r, t[size:]
 908                 }
 909
 910                 // If they match, keep going; if not, return false.
 911
 912                 // Easy case.
 913                 if tr == sr {
 914                         continue
 915                 }
 916
 917                 // Make sr < tr to simplify what follows.
 918                 if tr < sr {
 919                         tr, sr = sr, tr
 920                 }
 921                 // Fast check for ASCII.
 922                 if tr < utf8.RuneSelf {
 923                         // ASCII only, sr/tr must be upper/lower case
 924                         if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' {
 925                                 continue
 926                         }
 927                         return false
 928                 }
 929
 930                 // General case. SimpleFold(x) returns the next equivalent rune > x
 931                 // or wraps around to smaller values.
 932                 r := unicode.SimpleFold(sr)
 933                 for r != sr && r < tr {
 934                         r = unicode.SimpleFold(r)
 935                 }
 936                 if r == tr {
 937                         continue
 938                 }
 939                 return false
 940         }
 941
 942         // One string is empty. Are both?
 943         return s == t
 944 }
 945
 946 // Index returns the index of the first instance of substr in s, or -1 if substr is not present in s.
 947 func Index(s, substr string) int {
 948         n := len(substr)
 949         switch {
 950         case n == 0:
 951                 return 0
 952         case n == 1:
 953                 return IndexByte(s, substr[0])
 954         case n == len(s):
 955                 if substr == s {
 956                         return 0
 957                 }
 958                 return -1
 959         case n > len(s):
 960                 return -1
 961         case n <= bytealg.MaxLen:
 962                 // Use brute force when s and substr both are small
 963                 if len(s) <= bytealg.MaxBruteForce {
 964                         return bytealg.IndexString(s, substr)
 965                 }
 966                 c0 := substr[0]
 967                 c1 := substr[1]
 968                 i := 0
 969                 t := len(s) - n + 1
 970                 fails := 0
 971                 for i < t {
 972                         if s[i] != c0 {
 973                                 // IndexByte is faster than bytealg.IndexString, so use it as long as
 974                                 // we're not getting lots of false positives.
 975                                 o := IndexByte(s[i:t], c0)
 976                                 if o < 0 {
 977                                         return -1
 978                                 }
 979                                 i += o
 980                         }
 981                         if s[i+1] == c1 && s[i:i+n] == substr {
 982                                 return i
 983                         }
 984                         fails++
 985                         i++
 986                         // Switch to bytealg.IndexString when IndexByte produces too many false positives.
 987                         if fails > bytealg.Cutover(i) {
 988                                 r := bytealg.IndexString(s[i:], substr)
 989                                 if r >= 0 {
 990                                         return r + i
 991                                 }
 992                                 return -1
 993                         }
 994                 }
 995                 return -1
 996         }
 997         c0 := substr[0]
 998         c1 := substr[1]
 999         i := 0
1000         t := len(s) - n + 1
1001         fails := 0
1002         for i < t {
1003                 if s[i] != c0 {
1004                         o := IndexByte(s[i:t], c0)
1005                         if o < 0 {
1006                                 return -1
1007                         }
1008                         i += o
1009                 }
1010                 if s[i+1] == c1 && s[i:i+n] == substr {
1011                         return i
1012                 }
1013                 i++
1014                 fails++
1015                 if fails >= 4+i>>4 && i < t {
1016                         // See comment in ../bytes/bytes_generic.go.
1017                         j := indexRabinKarp(s[i:], substr)
1018                         if j < 0 {
1019                                 return -1
1020                         }
1021                         return i + j
1022                 }
1023         }
1024         return -1
1025 }
1026
1027 func indexRabinKarp(s, substr string) int {
1028         // Rabin-Karp search
1029         hashss, pow := hashStr(substr)
1030         n := len(substr)
1031         var h uint32
1032         for i := 0; i < n; i++ {
1033                 h = h*primeRK + uint32(s[i])
1034         }
1035         if h == hashss && s[:n] == substr {
1036                 return 0
1037         }
1038         for i := n; i < len(s); {
1039                 h *= primeRK
1040                 h += uint32(s[i])
1041                 h -= pow * uint32(s[i-n])
1042                 i++
1043                 if h == hashss && s[i-n:i] == substr {
1044                         return i - n
1045                 }
1046         }
1047         return -1
1048 }