libgo/go/strings/strings.go

   1 // Copyright 2009 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 // Package strings implements simple functions to manipulate UTF-8 encoded strings.
   6 //
   7 // For information about UTF-8 strings in Go, see https://blog.golang.org/strings.
   8 package strings
   9
  10 import (
  11         "unicode"
  12         "unicode/utf8"
  13 )
  14
  15 // explode splits s into a slice of UTF-8 strings,
  16 // one string per Unicode character up to a maximum of n (n < 0 means no limit).
  17 // Invalid UTF-8 sequences become correct encodings of U+FFFD.
  18 func explode(s string, n int) []string {
  19         l := utf8.RuneCountInString(s)
  20         if n < 0 || n > l {
  21                 n = l
  22         }
  23         a := make([]string, n)
  24         for i := 0; i < n-1; i++ {
  25                 ch, size := utf8.DecodeRuneInString(s)
  26                 a[i] = s[:size]
  27                 s = s[size:]
  28                 if ch == utf8.RuneError {
  29                         a[i] = string(utf8.RuneError)
  30                 }
  31         }
  32         if n > 0 {
  33                 a[n-1] = s
  34         }
  35         return a
  36 }
  37
  38 // primeRK is the prime base used in Rabin-Karp algorithm.
  39 const primeRK = 16777619
  40
  41 // hashStr returns the hash and the appropriate multiplicative
  42 // factor for use in Rabin-Karp algorithm.
  43 func hashStr(sep string) (uint32, uint32) {
  44         hash := uint32(0)
  45         for i := 0; i < len(sep); i++ {
  46                 hash = hash*primeRK + uint32(sep[i])
  47         }
  48         var pow, sq uint32 = 1, primeRK
  49         for i := len(sep); i > 0; i >>= 1 {
  50                 if i&1 != 0 {
  51                         pow *= sq
  52                 }
  53                 sq *= sq
  54         }
  55         return hash, pow
  56 }
  57
  58 // hashStrRev returns the hash of the reverse of sep and the
  59 // appropriate multiplicative factor for use in Rabin-Karp algorithm.
  60 func hashStrRev(sep string) (uint32, uint32) {
  61         hash := uint32(0)
  62         for i := len(sep) - 1; i >= 0; i-- {
  63                 hash = hash*primeRK + uint32(sep[i])
  64         }
  65         var pow, sq uint32 = 1, primeRK
  66         for i := len(sep); i > 0; i >>= 1 {
  67                 if i&1 != 0 {
  68                         pow *= sq
  69                 }
  70                 sq *= sq
  71         }
  72         return hash, pow
  73 }
  74
  75 // countGeneric implements Count.
  76 func countGeneric(s, substr string) int {
  77         // special case
  78         if len(substr) == 0 {
  79                 return utf8.RuneCountInString(s) + 1
  80         }
  81         n := 0
  82         for {
  83                 i := Index(s, substr)
  84                 if i == -1 {
  85                         return n
  86                 }
  87                 n++
  88                 s = s[i+len(substr):]
  89         }
  90 }
  91
  92 // Contains reports whether substr is within s.
  93 func Contains(s, substr string) bool {
  94         return Index(s, substr) >= 0
  95 }
  96
  97 // ContainsAny reports whether any Unicode code points in chars are within s.
  98 func ContainsAny(s, chars string) bool {
  99         return IndexAny(s, chars) >= 0
 100 }
 101
 102 // ContainsRune reports whether the Unicode code point r is within s.
 103 func ContainsRune(s string, r rune) bool {
 104         return IndexRune(s, r) >= 0
 105 }
 106
 107 // LastIndex returns the index of the last instance of substr in s, or -1 if substr is not present in s.
 108 func LastIndex(s, substr string) int {
 109         n := len(substr)
 110         switch {
 111         case n == 0:
 112                 return len(s)
 113         case n == 1:
 114                 return LastIndexByte(s, substr[0])
 115         case n == len(s):
 116                 if substr == s {
 117                         return 0
 118                 }
 119                 return -1
 120         case n > len(s):
 121                 return -1
 122         }
 123         // Rabin-Karp search from the end of the string
 124         hashss, pow := hashStrRev(substr)
 125         last := len(s) - n
 126         var h uint32
 127         for i := len(s) - 1; i >= last; i-- {
 128                 h = h*primeRK + uint32(s[i])
 129         }
 130         if h == hashss && s[last:] == substr {
 131                 return last
 132         }
 133         for i := last - 1; i >= 0; i-- {
 134                 h *= primeRK
 135                 h += uint32(s[i])
 136                 h -= pow * uint32(s[i+n])
 137                 if h == hashss && s[i:i+n] == substr {
 138                         return i
 139                 }
 140         }
 141         return -1
 142 }
 143
 144 // IndexRune returns the index of the first instance of the Unicode code point
 145 // r, or -1 if rune is not present in s.
 146 // If r is utf8.RuneError, it returns the first instance of any
 147 // invalid UTF-8 byte sequence.
 148 func IndexRune(s string, r rune) int {
 149         switch {
 150         case 0 <= r && r < utf8.RuneSelf:
 151                 return IndexByte(s, byte(r))
 152         case r == utf8.RuneError:
 153                 for i, r := range s {
 154                         if r == utf8.RuneError {
 155                                 return i
 156                         }
 157                 }
 158                 return -1
 159         case !utf8.ValidRune(r):
 160                 return -1
 161         default:
 162                 return Index(s, string(r))
 163         }
 164 }
 165
 166 // IndexAny returns the index of the first instance of any Unicode code point
 167 // from chars in s, or -1 if no Unicode code point from chars is present in s.
 168 func IndexAny(s, chars string) int {
 169         if chars == "" {
 170                 // Avoid scanning all of s.
 171                 return -1
 172         }
 173         if len(s) > 8 {
 174                 if as, isASCII := makeASCIISet(chars); isASCII {
 175                         for i := 0; i < len(s); i++ {
 176                                 if as.contains(s[i]) {
 177                                         return i
 178                                 }
 179                         }
 180                         return -1
 181                 }
 182         }
 183         for i, c := range s {
 184                 for _, m := range chars {
 185                         if c == m {
 186                                 return i
 187                         }
 188                 }
 189         }
 190         return -1
 191 }
 192
 193 // LastIndexAny returns the index of the last instance of any Unicode code
 194 // point from chars in s, or -1 if no Unicode code point from chars is
 195 // present in s.
 196 func LastIndexAny(s, chars string) int {
 197         if chars == "" {
 198                 // Avoid scanning all of s.
 199                 return -1
 200         }
 201         if len(s) > 8 {
 202                 if as, isASCII := makeASCIISet(chars); isASCII {
 203                         for i := len(s) - 1; i >= 0; i-- {
 204                                 if as.contains(s[i]) {
 205                                         return i
 206                                 }
 207                         }
 208                         return -1
 209                 }
 210         }
 211         for i := len(s); i > 0; {
 212                 r, size := utf8.DecodeLastRuneInString(s[:i])
 213                 i -= size
 214                 for _, c := range chars {
 215                         if r == c {
 216                                 return i
 217                         }
 218                 }
 219         }
 220         return -1
 221 }
 222
 223 // LastIndexByte returns the index of the last instance of c in s, or -1 if c is not present in s.
 224 func LastIndexByte(s string, c byte) int {
 225         for i := len(s) - 1; i >= 0; i-- {
 226                 if s[i] == c {
 227                         return i
 228                 }
 229         }
 230         return -1
 231 }
 232
 233 // Generic split: splits after each instance of sep,
 234 // including sepSave bytes of sep in the subarrays.
 235 func genSplit(s, sep string, sepSave, n int) []string {
 236         if n == 0 {
 237                 return nil
 238         }
 239         if sep == "" {
 240                 return explode(s, n)
 241         }
 242         if n < 0 {
 243                 n = Count(s, sep) + 1
 244         }
 245
 246         a := make([]string, n)
 247         n--
 248         i := 0
 249         for i < n {
 250                 m := Index(s, sep)
 251                 if m < 0 {
 252                         break
 253                 }
 254                 a[i] = s[:m+sepSave]
 255                 s = s[m+len(sep):]
 256                 i++
 257         }
 258         a[i] = s
 259         return a[:i+1]
 260 }
 261
 262 // SplitN slices s into substrings separated by sep and returns a slice of
 263 // the substrings between those separators.
 264 //
 265 // The count determines the number of substrings to return:
 266 //   n > 0: at most n substrings; the last substring will be the unsplit remainder.
 267 //   n == 0: the result is nil (zero substrings)
 268 //   n < 0: all substrings
 269 //
 270 // Edge cases for s and sep (for example, empty strings) are handled
 271 // as described in the documentation for Split.
 272 func SplitN(s, sep string, n int) []string { return genSplit(s, sep, 0, n) }
 273
 274 // SplitAfterN slices s into substrings after each instance of sep and
 275 // returns a slice of those substrings.
 276 //
 277 // The count determines the number of substrings to return:
 278 //   n > 0: at most n substrings; the last substring will be the unsplit remainder.
 279 //   n == 0: the result is nil (zero substrings)
 280 //   n < 0: all substrings
 281 //
 282 // Edge cases for s and sep (for example, empty strings) are handled
 283 // as described in the documentation for SplitAfter.
 284 func SplitAfterN(s, sep string, n int) []string {
 285         return genSplit(s, sep, len(sep), n)
 286 }
 287
 288 // Split slices s into all substrings separated by sep and returns a slice of
 289 // the substrings between those separators.
 290 //
 291 // If s does not contain sep and sep is not empty, Split returns a
 292 // slice of length 1 whose only element is s.
 293 //
 294 // If sep is empty, Split splits after each UTF-8 sequence. If both s
 295 // and sep are empty, Split returns an empty slice.
 296 //
 297 // It is equivalent to SplitN with a count of -1.
 298 func Split(s, sep string) []string { return genSplit(s, sep, 0, -1) }
 299
 300 // SplitAfter slices s into all substrings after each instance of sep and
 301 // returns a slice of those substrings.
 302 //
 303 // If s does not contain sep and sep is not empty, SplitAfter returns
 304 // a slice of length 1 whose only element is s.
 305 //
 306 // If sep is empty, SplitAfter splits after each UTF-8 sequence. If
 307 // both s and sep are empty, SplitAfter returns an empty slice.
 308 //
 309 // It is equivalent to SplitAfterN with a count of -1.
 310 func SplitAfter(s, sep string) []string {
 311         return genSplit(s, sep, len(sep), -1)
 312 }
 313
 314 var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}
 315
 316 // Fields splits the string s around each instance of one or more consecutive white space
 317 // characters, as defined by unicode.IsSpace, returning a slice of substrings of s or an
 318 // empty slice if s contains only white space.
 319 func Fields(s string) []string {
 320         // First count the fields.
 321         // This is an exact count if s is ASCII, otherwise it is an approximation.
 322         n := 0
 323         wasSpace := 1
 324         // setBits is used to track which bits are set in the bytes of s.
 325         setBits := uint8(0)
 326         for i := 0; i < len(s); i++ {
 327                 r := s[i]
 328                 setBits |= r
 329                 isSpace := int(asciiSpace[r])
 330                 n += wasSpace & ^isSpace
 331                 wasSpace = isSpace
 332         }
 333
 334         if setBits < utf8.RuneSelf { // ASCII fast path
 335                 a := make([]string, n)
 336                 na := 0
 337                 fieldStart := 0
 338                 i := 0
 339                 // Skip spaces in the front of the input.
 340                 for i < len(s) && asciiSpace[s[i]] != 0 {
 341                         i++
 342                 }
 343                 fieldStart = i
 344                 for i < len(s) {
 345                         if asciiSpace[s[i]] == 0 {
 346                                 i++
 347                                 continue
 348                         }
 349                         a[na] = s[fieldStart:i]
 350                         na++
 351                         i++
 352                         // Skip spaces in between fields.
 353                         for i < len(s) && asciiSpace[s[i]] != 0 {
 354                                 i++
 355                         }
 356                         fieldStart = i
 357                 }
 358                 if fieldStart < len(s) { // Last field might end at EOF.
 359                         a[na] = s[fieldStart:]
 360                 }
 361                 return a
 362         }
 363
 364         // Some runes in the input string are not ASCII.
 365         return FieldsFunc(s, unicode.IsSpace)
 366 }
 367
 368 // FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c)
 369 // and returns an array of slices of s. If all code points in s satisfy f(c) or the
 370 // string is empty, an empty slice is returned.
 371 // FieldsFunc makes no guarantees about the order in which it calls f(c).
 372 // If f does not return consistent results for a given c, FieldsFunc may crash.
 373 func FieldsFunc(s string, f func(rune) bool) []string {
 374         // A span is used to record a slice of s of the form s[start:end].
 375         // The start index is inclusive and the end index is exclusive.
 376         type span struct {
 377                 start int
 378                 end   int
 379         }
 380         spans := make([]span, 0, 32)
 381
 382         // Find the field start and end indices.
 383         wasField := false
 384         fromIndex := 0
 385         for i, rune := range s {
 386                 if f(rune) {
 387                         if wasField {
 388                                 spans = append(spans, span{start: fromIndex, end: i})
 389                                 wasField = false
 390                         }
 391                 } else {
 392                         if !wasField {
 393                                 fromIndex = i
 394                                 wasField = true
 395                         }
 396                 }
 397         }
 398
 399         // Last field might end at EOF.
 400         if wasField {
 401                 spans = append(spans, span{fromIndex, len(s)})
 402         }
 403
 404         // Create strings from recorded field indices.
 405         a := make([]string, len(spans))
 406         for i, span := range spans {
 407                 a[i] = s[span.start:span.end]
 408         }
 409
 410         return a
 411 }
 412
 413 // Join concatenates the elements of a to create a single string. The separator string
 414 // sep is placed between elements in the resulting string.
 415 func Join(a []string, sep string) string {
 416         switch len(a) {
 417         case 0:
 418                 return ""
 419         case 1:
 420                 return a[0]
 421         case 2:
 422                 // Special case for common small values.
 423                 // Remove if golang.org/issue/6714 is fixed
 424                 return a[0] + sep + a[1]
 425         case 3:
 426                 // Special case for common small values.
 427                 // Remove if golang.org/issue/6714 is fixed
 428                 return a[0] + sep + a[1] + sep + a[2]
 429         }
 430         n := len(sep) * (len(a) - 1)
 431         for i := 0; i < len(a); i++ {
 432                 n += len(a[i])
 433         }
 434
 435         b := make([]byte, n)
 436         bp := copy(b, a[0])
 437         for _, s := range a[1:] {
 438                 bp += copy(b[bp:], sep)
 439                 bp += copy(b[bp:], s)
 440         }
 441         return string(b)
 442 }
 443
 444 // HasPrefix tests whether the string s begins with prefix.
 445 func HasPrefix(s, prefix string) bool {
 446         return len(s) >= len(prefix) && s[0:len(prefix)] == prefix
 447 }
 448
 449 // HasSuffix tests whether the string s ends with suffix.
 450 func HasSuffix(s, suffix string) bool {
 451         return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix
 452 }
 453
 454 // Map returns a copy of the string s with all its characters modified
 455 // according to the mapping function. If mapping returns a negative value, the character is
 456 // dropped from the string with no replacement.
 457 func Map(mapping func(rune) rune, s string) string {
 458         // In the worst case, the string can grow when mapped, making
 459         // things unpleasant. But it's so rare we barge in assuming it's
 460         // fine. It could also shrink but that falls out naturally.
 461
 462         // The output buffer b is initialized on demand, the first
 463         // time a character differs.
 464         var b []byte
 465         // nbytes is the number of bytes encoded in b.
 466         var nbytes int
 467
 468         for i, c := range s {
 469                 r := mapping(c)
 470                 if r == c {
 471                         continue
 472                 }
 473
 474                 b = make([]byte, len(s)+utf8.UTFMax)
 475                 nbytes = copy(b, s[:i])
 476                 if r >= 0 {
 477                         if r < utf8.RuneSelf {
 478                                 b[nbytes] = byte(r)
 479                                 nbytes++
 480                         } else {
 481                                 nbytes += utf8.EncodeRune(b[nbytes:], r)
 482                         }
 483                 }
 484
 485                 if c == utf8.RuneError {
 486                         // RuneError is the result of either decoding
 487                         // an invalid sequence or '\uFFFD'. Determine
 488                         // the correct number of bytes we need to advance.
 489                         _, w := utf8.DecodeRuneInString(s[i:])
 490                         i += w
 491                 } else {
 492                         i += utf8.RuneLen(c)
 493                 }
 494
 495                 s = s[i:]
 496                 break
 497         }
 498
 499         if b == nil {
 500                 return s
 501         }
 502
 503         for _, c := range s {
 504                 r := mapping(c)
 505
 506                 // common case
 507                 if (0 <= r && r < utf8.RuneSelf) && nbytes < len(b) {
 508                         b[nbytes] = byte(r)
 509                         nbytes++
 510                         continue
 511                 }
 512
 513                 // b is not big enough or r is not a ASCII rune.
 514                 if r >= 0 {
 515                         if nbytes+utf8.UTFMax >= len(b) {
 516                                 // Grow the buffer.
 517                                 nb := make([]byte, 2*len(b))
 518                                 copy(nb, b[:nbytes])
 519                                 b = nb
 520                         }
 521                         nbytes += utf8.EncodeRune(b[nbytes:], r)
 522                 }
 523         }
 524
 525         return string(b[:nbytes])
 526 }
 527
 528 // Repeat returns a new string consisting of count copies of the string s.
 529 //
 530 // It panics if count is negative or if
 531 // the result of (len(s) * count) overflows.
 532 func Repeat(s string, count int) string {
 533         // Since we cannot return an error on overflow,
 534         // we should panic if the repeat will generate
 535         // an overflow.
 536         // See Issue golang.org/issue/16237
 537         if count < 0 {
 538                 panic("strings: negative Repeat count")
 539         } else if count > 0 && len(s)*count/count != len(s) {
 540                 panic("strings: Repeat count causes overflow")
 541         }
 542
 543         b := make([]byte, len(s)*count)
 544         bp := copy(b, s)
 545         for bp < len(b) {
 546                 copy(b[bp:], b[:bp])
 547                 bp *= 2
 548         }
 549         return string(b)
 550 }
 551
 552 // ToUpper returns a copy of the string s with all Unicode letters mapped to their upper case.
 553 func ToUpper(s string) string {
 554         isASCII, hasLower := true, false
 555         for i := 0; i < len(s); i++ {
 556                 c := s[i]
 557                 if c >= utf8.RuneSelf {
 558                         isASCII = false
 559                         break
 560                 }
 561                 hasLower = hasLower || (c >= 'a' && c <= 'z')
 562         }
 563
 564         if isASCII { // optimize for ASCII-only strings.
 565                 if !hasLower {
 566                         return s
 567                 }
 568                 b := make([]byte, len(s))
 569                 for i := 0; i < len(s); i++ {
 570                         c := s[i]
 571                         if c >= 'a' && c <= 'z' {
 572                                 c -= 'a' - 'A'
 573                         }
 574                         b[i] = c
 575                 }
 576                 return string(b)
 577         }
 578         return Map(unicode.ToUpper, s)
 579 }
 580
 581 // ToLower returns a copy of the string s with all Unicode letters mapped to their lower case.
 582 func ToLower(s string) string {
 583         isASCII, hasUpper := true, false
 584         for i := 0; i < len(s); i++ {
 585                 c := s[i]
 586                 if c >= utf8.RuneSelf {
 587                         isASCII = false
 588                         break
 589                 }
 590                 hasUpper = hasUpper || (c >= 'A' && c <= 'Z')
 591         }
 592
 593         if isASCII { // optimize for ASCII-only strings.
 594                 if !hasUpper {
 595                         return s
 596                 }
 597                 b := make([]byte, len(s))
 598                 for i := 0; i < len(s); i++ {
 599                         c := s[i]
 600                         if c >= 'A' && c <= 'Z' {
 601                                 c += 'a' - 'A'
 602                         }
 603                         b[i] = c
 604                 }
 605                 return string(b)
 606         }
 607         return Map(unicode.ToLower, s)
 608 }
 609
 610 // ToTitle returns a copy of the string s with all Unicode letters mapped to their title case.
 611 func ToTitle(s string) string { return Map(unicode.ToTitle, s) }
 612
 613 // ToUpperSpecial returns a copy of the string s with all Unicode letters mapped to their
 614 // upper case, giving priority to the special casing rules.
 615 func ToUpperSpecial(c unicode.SpecialCase, s string) string {
 616         return Map(func(r rune) rune { return c.ToUpper(r) }, s)
 617 }
 618
 619 // ToLowerSpecial returns a copy of the string s with all Unicode letters mapped to their
 620 // lower case, giving priority to the special casing rules.
 621 func ToLowerSpecial(c unicode.SpecialCase, s string) string {
 622         return Map(func(r rune) rune { return c.ToLower(r) }, s)
 623 }
 624
 625 // ToTitleSpecial returns a copy of the string s with all Unicode letters mapped to their
 626 // title case, giving priority to the special casing rules.
 627 func ToTitleSpecial(c unicode.SpecialCase, s string) string {
 628         return Map(func(r rune) rune { return c.ToTitle(r) }, s)
 629 }
 630
 631 // isSeparator reports whether the rune could mark a word boundary.
 632 // TODO: update when package unicode captures more of the properties.
 633 func isSeparator(r rune) bool {
 634         // ASCII alphanumerics and underscore are not separators
 635         if r <= 0x7F {
 636                 switch {
 637                 case '0' <= r && r <= '9':
 638                         return false
 639                 case 'a' <= r && r <= 'z':
 640                         return false
 641                 case 'A' <= r && r <= 'Z':
 642                         return false
 643                 case r == '_':
 644                         return false
 645                 }
 646                 return true
 647         }
 648         // Letters and digits are not separators
 649         if unicode.IsLetter(r) || unicode.IsDigit(r) {
 650                 return false
 651         }
 652         // Otherwise, all we can do for now is treat spaces as separators.
 653         return unicode.IsSpace(r)
 654 }
 655
 656 // Title returns a copy of the string s with all Unicode letters that begin words
 657 // mapped to their title case.
 658 //
 659 // BUG(rsc): The rule Title uses for word boundaries does not handle Unicode punctuation properly.
 660 func Title(s string) string {
 661         // Use a closure here to remember state.
 662         // Hackish but effective. Depends on Map scanning in order and calling
 663         // the closure once per rune.
 664         prev := ' '
 665         return Map(
 666                 func(r rune) rune {
 667                         if isSeparator(prev) {
 668                                 prev = r
 669                                 return unicode.ToTitle(r)
 670                         }
 671                         prev = r
 672                         return r
 673                 },
 674                 s)
 675 }
 676
 677 // TrimLeftFunc returns a slice of the string s with all leading
 678 // Unicode code points c satisfying f(c) removed.
 679 func TrimLeftFunc(s string, f func(rune) bool) string {
 680         i := indexFunc(s, f, false)
 681         if i == -1 {
 682                 return ""
 683         }
 684         return s[i:]
 685 }
 686
 687 // TrimRightFunc returns a slice of the string s with all trailing
 688 // Unicode code points c satisfying f(c) removed.
 689 func TrimRightFunc(s string, f func(rune) bool) string {
 690         i := lastIndexFunc(s, f, false)
 691         if i >= 0 && s[i] >= utf8.RuneSelf {
 692                 _, wid := utf8.DecodeRuneInString(s[i:])
 693                 i += wid
 694         } else {
 695                 i++
 696         }
 697         return s[0:i]
 698 }
 699
 700 // TrimFunc returns a slice of the string s with all leading
 701 // and trailing Unicode code points c satisfying f(c) removed.
 702 func TrimFunc(s string, f func(rune) bool) string {
 703         return TrimRightFunc(TrimLeftFunc(s, f), f)
 704 }
 705
 706 // IndexFunc returns the index into s of the first Unicode
 707 // code point satisfying f(c), or -1 if none do.
 708 func IndexFunc(s string, f func(rune) bool) int {
 709         return indexFunc(s, f, true)
 710 }
 711
 712 // LastIndexFunc returns the index into s of the last
 713 // Unicode code point satisfying f(c), or -1 if none do.
 714 func LastIndexFunc(s string, f func(rune) bool) int {
 715         return lastIndexFunc(s, f, true)
 716 }
 717
 718 // indexFunc is the same as IndexFunc except that if
 719 // truth==false, the sense of the predicate function is
 720 // inverted.
 721 func indexFunc(s string, f func(rune) bool, truth bool) int {
 722         for i, r := range s {
 723                 if f(r) == truth {
 724                         return i
 725                 }
 726         }
 727         return -1
 728 }
 729
 730 // lastIndexFunc is the same as LastIndexFunc except that if
 731 // truth==false, the sense of the predicate function is
 732 // inverted.
 733 func lastIndexFunc(s string, f func(rune) bool, truth bool) int {
 734         for i := len(s); i > 0; {
 735                 r, size := utf8.DecodeLastRuneInString(s[0:i])
 736                 i -= size
 737                 if f(r) == truth {
 738                         return i
 739                 }
 740         }
 741         return -1
 742 }
 743
 744 // asciiSet is a 32-byte value, where each bit represents the presence of a
 745 // given ASCII character in the set. The 128-bits of the lower 16 bytes,
 746 // starting with the least-significant bit of the lowest word to the
 747 // most-significant bit of the highest word, map to the full range of all
 748 // 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed,
 749 // ensuring that any non-ASCII character will be reported as not in the set.
 750 type asciiSet [8]uint32
 751
 752 // makeASCIISet creates a set of ASCII characters and reports whether all
 753 // characters in chars are ASCII.
 754 func makeASCIISet(chars string) (as asciiSet, ok bool) {
 755         for i := 0; i < len(chars); i++ {
 756                 c := chars[i]
 757                 if c >= utf8.RuneSelf {
 758                         return as, false
 759                 }
 760                 as[c>>5] |= 1 << uint(c&31)
 761         }
 762         return as, true
 763 }
 764
 765 // contains reports whether c is inside the set.
 766 func (as *asciiSet) contains(c byte) bool {
 767         return (as[c>>5] & (1 << uint(c&31))) != 0
 768 }
 769
 770 func makeCutsetFunc(cutset string) func(rune) bool {
 771         if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
 772                 return func(r rune) bool {
 773                         return r == rune(cutset[0])
 774                 }
 775         }
 776         if as, isASCII := makeASCIISet(cutset); isASCII {
 777                 return func(r rune) bool {
 778                         return r < utf8.RuneSelf && as.contains(byte(r))
 779                 }
 780         }
 781         return func(r rune) bool { return IndexRune(cutset, r) >= 0 }
 782 }
 783
 784 // Trim returns a slice of the string s with all leading and
 785 // trailing Unicode code points contained in cutset removed.
 786 func Trim(s string, cutset string) string {
 787         if s == "" || cutset == "" {
 788                 return s
 789         }
 790         return TrimFunc(s, makeCutsetFunc(cutset))
 791 }
 792
 793 // TrimLeft returns a slice of the string s with all leading
 794 // Unicode code points contained in cutset removed.
 795 func TrimLeft(s string, cutset string) string {
 796         if s == "" || cutset == "" {
 797                 return s
 798         }
 799         return TrimLeftFunc(s, makeCutsetFunc(cutset))
 800 }
 801
 802 // TrimRight returns a slice of the string s, with all trailing
 803 // Unicode code points contained in cutset removed.
 804 func TrimRight(s string, cutset string) string {
 805         if s == "" || cutset == "" {
 806                 return s
 807         }
 808         return TrimRightFunc(s, makeCutsetFunc(cutset))
 809 }
 810
 811 // TrimSpace returns a slice of the string s, with all leading
 812 // and trailing white space removed, as defined by Unicode.
 813 func TrimSpace(s string) string {
 814         return TrimFunc(s, unicode.IsSpace)
 815 }
 816
 817 // TrimPrefix returns s without the provided leading prefix string.
 818 // If s doesn't start with prefix, s is returned unchanged.
 819 func TrimPrefix(s, prefix string) string {
 820         if HasPrefix(s, prefix) {
 821                 return s[len(prefix):]
 822         }
 823         return s
 824 }
 825
 826 // TrimSuffix returns s without the provided trailing suffix string.
 827 // If s doesn't end with suffix, s is returned unchanged.
 828 func TrimSuffix(s, suffix string) string {
 829         if HasSuffix(s, suffix) {
 830                 return s[:len(s)-len(suffix)]
 831         }
 832         return s
 833 }
 834
 835 // Replace returns a copy of the string s with the first n
 836 // non-overlapping instances of old replaced by new.
 837 // If old is empty, it matches at the beginning of the string
 838 // and after each UTF-8 sequence, yielding up to k+1 replacements
 839 // for a k-rune string.
 840 // If n < 0, there is no limit on the number of replacements.
 841 func Replace(s, old, new string, n int) string {
 842         if old == new || n == 0 {
 843                 return s // avoid allocation
 844         }
 845
 846         // Compute number of replacements.
 847         if m := Count(s, old); m == 0 {
 848                 return s // avoid allocation
 849         } else if n < 0 || m < n {
 850                 n = m
 851         }
 852
 853         // Apply replacements to buffer.
 854         t := make([]byte, len(s)+n*(len(new)-len(old)))
 855         w := 0
 856         start := 0
 857         for i := 0; i < n; i++ {
 858                 j := start
 859                 if len(old) == 0 {
 860                         if i > 0 {
 861                                 _, wid := utf8.DecodeRuneInString(s[start:])
 862                                 j += wid
 863                         }
 864                 } else {
 865                         j += Index(s[start:], old)
 866                 }
 867                 w += copy(t[w:], s[start:j])
 868                 w += copy(t[w:], new)
 869                 start = j + len(old)
 870         }
 871         w += copy(t[w:], s[start:])
 872         return string(t[0:w])
 873 }
 874
 875 // EqualFold reports whether s and t, interpreted as UTF-8 strings,
 876 // are equal under Unicode case-folding.
 877 func EqualFold(s, t string) bool {
 878         for s != "" && t != "" {
 879                 // Extract first rune from each string.
 880                 var sr, tr rune
 881                 if s[0] < utf8.RuneSelf {
 882                         sr, s = rune(s[0]), s[1:]
 883                 } else {
 884                         r, size := utf8.DecodeRuneInString(s)
 885                         sr, s = r, s[size:]
 886                 }
 887                 if t[0] < utf8.RuneSelf {
 888                         tr, t = rune(t[0]), t[1:]
 889                 } else {
 890                         r, size := utf8.DecodeRuneInString(t)
 891                         tr, t = r, t[size:]
 892                 }
 893
 894                 // If they match, keep going; if not, return false.
 895
 896                 // Easy case.
 897                 if tr == sr {
 898                         continue
 899                 }
 900
 901                 // Make sr < tr to simplify what follows.
 902                 if tr < sr {
 903                         tr, sr = sr, tr
 904                 }
 905                 // Fast check for ASCII.
 906                 if tr < utf8.RuneSelf && 'A' <= sr && sr <= 'Z' {
 907                         // ASCII, and sr is upper case.  tr must be lower case.
 908                         if tr == sr+'a'-'A' {
 909                                 continue
 910                         }
 911                         return false
 912                 }
 913
 914                 // General case. SimpleFold(x) returns the next equivalent rune > x
 915                 // or wraps around to smaller values.
 916                 r := unicode.SimpleFold(sr)
 917                 for r != sr && r < tr {
 918                         r = unicode.SimpleFold(r)
 919                 }
 920                 if r == tr {
 921                         continue
 922                 }
 923                 return false
 924         }
 925
 926         // One string is empty. Are both?
 927         return s == t
 928 }
 929
 930 func indexRabinKarp(s, substr string) int {
 931         // Rabin-Karp search
 932         hashss, pow := hashStr(substr)
 933         n := len(substr)
 934         var h uint32
 935         for i := 0; i < n; i++ {
 936                 h = h*primeRK + uint32(s[i])
 937         }
 938         if h == hashss && s[:n] == substr {
 939                 return 0
 940         }
 941         for i := n; i < len(s); {
 942                 h *= primeRK
 943                 h += uint32(s[i])
 944                 h -= pow * uint32(s[i-n])
 945                 i++
 946                 if h == hashss && s[i-n:i] == substr {
 947                         return i - n
 948                 }
 949         }
 950         return -1
 951
 952 }