PR ada/81103
[official-gcc.git] / libgo / go / bytes / bytes.go
blob9af177fa882d7e17376649395721e183332211e3
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Package bytes implements functions for the manipulation of byte slices.
6 // It is analogous to the facilities of the strings package.
7 package bytes
9 import (
10 "unicode"
11 "unicode/utf8"
14 func equalPortable(a, b []byte) bool {
15 if len(a) != len(b) {
16 return false
18 for i, c := range a {
19 if c != b[i] {
20 return false
23 return true
26 // explode splits s into a slice of UTF-8 sequences, one per Unicode code point (still slices of bytes),
27 // up to a maximum of n byte slices. Invalid UTF-8 sequences are chopped into individual bytes.
28 func explode(s []byte, n int) [][]byte {
29 if n <= 0 {
30 n = len(s)
32 a := make([][]byte, n)
33 var size int
34 na := 0
35 for len(s) > 0 {
36 if na+1 >= n {
37 a[na] = s
38 na++
39 break
41 _, size = utf8.DecodeRune(s)
42 a[na] = s[0:size:size]
43 s = s[size:]
44 na++
46 return a[0:na]
49 // countGeneric actually implements Count
50 func countGeneric(s, sep []byte) int {
51 // special case
52 if len(sep) == 0 {
53 return utf8.RuneCount(s) + 1
55 n := 0
56 for {
57 i := Index(s, sep)
58 if i == -1 {
59 return n
61 n++
62 s = s[i+len(sep):]
66 // Contains reports whether subslice is within b.
67 func Contains(b, subslice []byte) bool {
68 return Index(b, subslice) != -1
71 // ContainsAny reports whether any of the UTF-8-encoded code points in chars are within b.
72 func ContainsAny(b []byte, chars string) bool {
73 return IndexAny(b, chars) >= 0
76 // ContainsRune reports whether the rune is contained in the UTF-8-encoded byte slice b.
77 func ContainsRune(b []byte, r rune) bool {
78 return IndexRune(b, r) >= 0
81 func indexBytePortable(s []byte, c byte) int {
82 for i, b := range s {
83 if b == c {
84 return i
87 return -1
90 // LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s.
91 func LastIndex(s, sep []byte) int {
92 n := len(sep)
93 if n == 0 {
94 return len(s)
96 c := sep[0]
97 for i := len(s) - n; i >= 0; i-- {
98 if s[i] == c && (n == 1 || Equal(s[i:i+n], sep)) {
99 return i
102 return -1
105 // LastIndexByte returns the index of the last instance of c in s, or -1 if c is not present in s.
106 func LastIndexByte(s []byte, c byte) int {
107 for i := len(s) - 1; i >= 0; i-- {
108 if s[i] == c {
109 return i
112 return -1
115 // IndexRune interprets s as a sequence of UTF-8-encoded code points.
116 // It returns the byte index of the first occurrence in s of the given rune.
117 // It returns -1 if rune is not present in s.
118 // If r is utf8.RuneError, it returns the first instance of any
119 // invalid UTF-8 byte sequence.
120 func IndexRune(s []byte, r rune) int {
121 switch {
122 case 0 <= r && r < utf8.RuneSelf:
123 return IndexByte(s, byte(r))
124 case r == utf8.RuneError:
125 for i := 0; i < len(s); {
126 r1, n := utf8.DecodeRune(s[i:])
127 if r1 == utf8.RuneError {
128 return i
130 i += n
132 return -1
133 case !utf8.ValidRune(r):
134 return -1
135 default:
136 var b [utf8.UTFMax]byte
137 n := utf8.EncodeRune(b[:], r)
138 return Index(s, b[:n])
142 // IndexAny interprets s as a sequence of UTF-8-encoded Unicode code points.
143 // It returns the byte index of the first occurrence in s of any of the Unicode
144 // code points in chars. It returns -1 if chars is empty or if there is no code
145 // point in common.
146 func IndexAny(s []byte, chars string) int {
147 if chars == "" {
148 // Avoid scanning all of s.
149 return -1
151 if len(s) > 8 {
152 if as, isASCII := makeASCIISet(chars); isASCII {
153 for i, c := range s {
154 if as.contains(c) {
155 return i
158 return -1
161 var width int
162 for i := 0; i < len(s); i += width {
163 r := rune(s[i])
164 if r < utf8.RuneSelf {
165 width = 1
166 } else {
167 r, width = utf8.DecodeRune(s[i:])
169 for _, ch := range chars {
170 if r == ch {
171 return i
175 return -1
178 // LastIndexAny interprets s as a sequence of UTF-8-encoded Unicode code
179 // points. It returns the byte index of the last occurrence in s of any of
180 // the Unicode code points in chars. It returns -1 if chars is empty or if
181 // there is no code point in common.
182 func LastIndexAny(s []byte, chars string) int {
183 if chars == "" {
184 // Avoid scanning all of s.
185 return -1
187 if len(s) > 8 {
188 if as, isASCII := makeASCIISet(chars); isASCII {
189 for i := len(s) - 1; i >= 0; i-- {
190 if as.contains(s[i]) {
191 return i
194 return -1
197 for i := len(s); i > 0; {
198 r, size := utf8.DecodeLastRune(s[:i])
199 i -= size
200 for _, c := range chars {
201 if r == c {
202 return i
206 return -1
209 // Generic split: splits after each instance of sep,
210 // including sepSave bytes of sep in the subslices.
211 func genSplit(s, sep []byte, sepSave, n int) [][]byte {
212 if n == 0 {
213 return nil
215 if len(sep) == 0 {
216 return explode(s, n)
218 if n < 0 {
219 n = Count(s, sep) + 1
222 a := make([][]byte, n)
224 i := 0
225 for i < n {
226 m := Index(s, sep)
227 if m < 0 {
228 break
230 a[i] = s[: m+sepSave : m+sepSave]
231 s = s[m+len(sep):]
234 a[i] = s
235 return a[:i+1]
238 // SplitN slices s into subslices separated by sep and returns a slice of
239 // the subslices between those separators.
240 // If sep is empty, SplitN splits after each UTF-8 sequence.
241 // The count determines the number of subslices to return:
242 // n > 0: at most n subslices; the last subslice will be the unsplit remainder.
243 // n == 0: the result is nil (zero subslices)
244 // n < 0: all subslices
245 func SplitN(s, sep []byte, n int) [][]byte { return genSplit(s, sep, 0, n) }
247 // SplitAfterN slices s into subslices after each instance of sep and
248 // returns a slice of those subslices.
249 // If sep is empty, SplitAfterN splits after each UTF-8 sequence.
250 // The count determines the number of subslices to return:
251 // n > 0: at most n subslices; the last subslice will be the unsplit remainder.
252 // n == 0: the result is nil (zero subslices)
253 // n < 0: all subslices
254 func SplitAfterN(s, sep []byte, n int) [][]byte {
255 return genSplit(s, sep, len(sep), n)
258 // Split slices s into all subslices separated by sep and returns a slice of
259 // the subslices between those separators.
260 // If sep is empty, Split splits after each UTF-8 sequence.
261 // It is equivalent to SplitN with a count of -1.
262 func Split(s, sep []byte) [][]byte { return genSplit(s, sep, 0, -1) }
264 // SplitAfter slices s into all subslices after each instance of sep and
265 // returns a slice of those subslices.
266 // If sep is empty, SplitAfter splits after each UTF-8 sequence.
267 // It is equivalent to SplitAfterN with a count of -1.
268 func SplitAfter(s, sep []byte) [][]byte {
269 return genSplit(s, sep, len(sep), -1)
272 var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}
274 // Fields interprets s as a sequence of UTF-8-encoded code points.
275 // It splits the slice s around each instance of one or more consecutive white space
276 // characters, as defined by unicode.IsSpace, returning a slice of subslices of s or an
277 // empty slice if s contains only white space.
278 func Fields(s []byte) [][]byte {
279 // First count the fields.
280 // This is an exact count if s is ASCII, otherwise it is an approximation.
281 n := 0
282 wasSpace := 1
283 // setBits is used to track which bits are set in the bytes of s.
284 setBits := uint8(0)
285 for i := 0; i < len(s); i++ {
286 r := s[i]
287 setBits |= r
288 isSpace := int(asciiSpace[r])
289 n += wasSpace & ^isSpace
290 wasSpace = isSpace
293 if setBits >= utf8.RuneSelf {
294 // Some runes in the input slice are not ASCII.
295 return FieldsFunc(s, unicode.IsSpace)
298 // ASCII fast path
299 a := make([][]byte, n)
300 na := 0
301 fieldStart := 0
302 i := 0
303 // Skip spaces in the front of the input.
304 for i < len(s) && asciiSpace[s[i]] != 0 {
307 fieldStart = i
308 for i < len(s) {
309 if asciiSpace[s[i]] == 0 {
311 continue
313 a[na] = s[fieldStart:i:i]
314 na++
316 // Skip spaces in between fields.
317 for i < len(s) && asciiSpace[s[i]] != 0 {
320 fieldStart = i
322 if fieldStart < len(s) { // Last field might end at EOF.
323 a[na] = s[fieldStart:len(s):len(s)]
325 return a
328 // FieldsFunc interprets s as a sequence of UTF-8-encoded code points.
329 // It splits the slice s at each run of code points c satisfying f(c) and
330 // returns a slice of subslices of s. If all code points in s satisfy f(c), or
331 // len(s) == 0, an empty slice is returned.
332 // FieldsFunc makes no guarantees about the order in which it calls f(c).
333 // If f does not return consistent results for a given c, FieldsFunc may crash.
334 func FieldsFunc(s []byte, f func(rune) bool) [][]byte {
335 // A span is used to record a slice of s of the form s[start:end].
336 // The start index is inclusive and the end index is exclusive.
337 type span struct {
338 start int
339 end int
341 spans := make([]span, 0, 32)
343 // Find the field start and end indices.
344 wasField := false
345 fromIndex := 0
346 for i := 0; i < len(s); {
347 size := 1
348 r := rune(s[i])
349 if r >= utf8.RuneSelf {
350 r, size = utf8.DecodeRune(s[i:])
352 if f(r) {
353 if wasField {
354 spans = append(spans, span{start: fromIndex, end: i})
355 wasField = false
357 } else {
358 if !wasField {
359 fromIndex = i
360 wasField = true
363 i += size
366 // Last field might end at EOF.
367 if wasField {
368 spans = append(spans, span{fromIndex, len(s)})
371 // Create subslices from recorded field indices.
372 a := make([][]byte, len(spans))
373 for i, span := range spans {
374 a[i] = s[span.start:span.end:span.end]
377 return a
380 // Join concatenates the elements of s to create a new byte slice. The separator
381 // sep is placed between elements in the resulting slice.
382 func Join(s [][]byte, sep []byte) []byte {
383 if len(s) == 0 {
384 return []byte{}
386 if len(s) == 1 {
387 // Just return a copy.
388 return append([]byte(nil), s[0]...)
390 n := len(sep) * (len(s) - 1)
391 for _, v := range s {
392 n += len(v)
395 b := make([]byte, n)
396 bp := copy(b, s[0])
397 for _, v := range s[1:] {
398 bp += copy(b[bp:], sep)
399 bp += copy(b[bp:], v)
401 return b
404 // HasPrefix tests whether the byte slice s begins with prefix.
405 func HasPrefix(s, prefix []byte) bool {
406 return len(s) >= len(prefix) && Equal(s[0:len(prefix)], prefix)
409 // HasSuffix tests whether the byte slice s ends with suffix.
410 func HasSuffix(s, suffix []byte) bool {
411 return len(s) >= len(suffix) && Equal(s[len(s)-len(suffix):], suffix)
414 // Map returns a copy of the byte slice s with all its characters modified
415 // according to the mapping function. If mapping returns a negative value, the character is
416 // dropped from the byte slice with no replacement. The characters in s and the
417 // output are interpreted as UTF-8-encoded code points.
418 func Map(mapping func(r rune) rune, s []byte) []byte {
419 // In the worst case, the slice can grow when mapped, making
420 // things unpleasant. But it's so rare we barge in assuming it's
421 // fine. It could also shrink but that falls out naturally.
422 maxbytes := len(s) // length of b
423 nbytes := 0 // number of bytes encoded in b
424 b := make([]byte, maxbytes)
425 for i := 0; i < len(s); {
426 wid := 1
427 r := rune(s[i])
428 if r >= utf8.RuneSelf {
429 r, wid = utf8.DecodeRune(s[i:])
431 r = mapping(r)
432 if r >= 0 {
433 rl := utf8.RuneLen(r)
434 if rl < 0 {
435 rl = len(string(utf8.RuneError))
437 if nbytes+rl > maxbytes {
438 // Grow the buffer.
439 maxbytes = maxbytes*2 + utf8.UTFMax
440 nb := make([]byte, maxbytes)
441 copy(nb, b[0:nbytes])
442 b = nb
444 nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r)
446 i += wid
448 return b[0:nbytes]
451 // Repeat returns a new byte slice consisting of count copies of b.
453 // It panics if count is negative or if
454 // the result of (len(b) * count) overflows.
455 func Repeat(b []byte, count int) []byte {
456 // Since we cannot return an error on overflow,
457 // we should panic if the repeat will generate
458 // an overflow.
459 // See Issue golang.org/issue/16237.
460 if count < 0 {
461 panic("bytes: negative Repeat count")
462 } else if count > 0 && len(b)*count/count != len(b) {
463 panic("bytes: Repeat count causes overflow")
466 nb := make([]byte, len(b)*count)
467 bp := copy(nb, b)
468 for bp < len(nb) {
469 copy(nb[bp:], nb[:bp])
470 bp *= 2
472 return nb
475 // ToUpper treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters within it mapped to their upper case.
476 func ToUpper(s []byte) []byte { return Map(unicode.ToUpper, s) }
478 // ToLower treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their lower case.
479 func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) }
481 // ToTitle treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their title case.
482 func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) }
484 // ToUpperSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their
485 // upper case, giving priority to the special casing rules.
486 func ToUpperSpecial(c unicode.SpecialCase, s []byte) []byte {
487 return Map(func(r rune) rune { return c.ToUpper(r) }, s)
490 // ToLowerSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their
491 // lower case, giving priority to the special casing rules.
492 func ToLowerSpecial(c unicode.SpecialCase, s []byte) []byte {
493 return Map(func(r rune) rune { return c.ToLower(r) }, s)
496 // ToTitleSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their
497 // title case, giving priority to the special casing rules.
498 func ToTitleSpecial(c unicode.SpecialCase, s []byte) []byte {
499 return Map(func(r rune) rune { return c.ToTitle(r) }, s)
502 // isSeparator reports whether the rune could mark a word boundary.
503 // TODO: update when package unicode captures more of the properties.
504 func isSeparator(r rune) bool {
505 // ASCII alphanumerics and underscore are not separators
506 if r <= 0x7F {
507 switch {
508 case '0' <= r && r <= '9':
509 return false
510 case 'a' <= r && r <= 'z':
511 return false
512 case 'A' <= r && r <= 'Z':
513 return false
514 case r == '_':
515 return false
517 return true
519 // Letters and digits are not separators
520 if unicode.IsLetter(r) || unicode.IsDigit(r) {
521 return false
523 // Otherwise, all we can do for now is treat spaces as separators.
524 return unicode.IsSpace(r)
527 // Title treats s as UTF-8-encoded bytes and returns a copy with all Unicode letters that begin
528 // words mapped to their title case.
530 // BUG(rsc): The rule Title uses for word boundaries does not handle Unicode punctuation properly.
531 func Title(s []byte) []byte {
532 // Use a closure here to remember state.
533 // Hackish but effective. Depends on Map scanning in order and calling
534 // the closure once per rune.
535 prev := ' '
536 return Map(
537 func(r rune) rune {
538 if isSeparator(prev) {
539 prev = r
540 return unicode.ToTitle(r)
542 prev = r
543 return r
548 // TrimLeftFunc treats s as UTF-8-encoded bytes and returns a subslice of s by slicing off
549 // all leading UTF-8-encoded code points c that satisfy f(c).
550 func TrimLeftFunc(s []byte, f func(r rune) bool) []byte {
551 i := indexFunc(s, f, false)
552 if i == -1 {
553 return nil
555 return s[i:]
558 // TrimRightFunc returns a subslice of s by slicing off all trailing
559 // UTF-8-encoded code points c that satisfy f(c).
560 func TrimRightFunc(s []byte, f func(r rune) bool) []byte {
561 i := lastIndexFunc(s, f, false)
562 if i >= 0 && s[i] >= utf8.RuneSelf {
563 _, wid := utf8.DecodeRune(s[i:])
564 i += wid
565 } else {
568 return s[0:i]
571 // TrimFunc returns a subslice of s by slicing off all leading and trailing
572 // UTF-8-encoded code points c that satisfy f(c).
573 func TrimFunc(s []byte, f func(r rune) bool) []byte {
574 return TrimRightFunc(TrimLeftFunc(s, f), f)
577 // TrimPrefix returns s without the provided leading prefix string.
578 // If s doesn't start with prefix, s is returned unchanged.
579 func TrimPrefix(s, prefix []byte) []byte {
580 if HasPrefix(s, prefix) {
581 return s[len(prefix):]
583 return s
586 // TrimSuffix returns s without the provided trailing suffix string.
587 // If s doesn't end with suffix, s is returned unchanged.
588 func TrimSuffix(s, suffix []byte) []byte {
589 if HasSuffix(s, suffix) {
590 return s[:len(s)-len(suffix)]
592 return s
595 // IndexFunc interprets s as a sequence of UTF-8-encoded code points.
596 // It returns the byte index in s of the first Unicode
597 // code point satisfying f(c), or -1 if none do.
598 func IndexFunc(s []byte, f func(r rune) bool) int {
599 return indexFunc(s, f, true)
602 // LastIndexFunc interprets s as a sequence of UTF-8-encoded code points.
603 // It returns the byte index in s of the last Unicode
604 // code point satisfying f(c), or -1 if none do.
605 func LastIndexFunc(s []byte, f func(r rune) bool) int {
606 return lastIndexFunc(s, f, true)
609 // indexFunc is the same as IndexFunc except that if
610 // truth==false, the sense of the predicate function is
611 // inverted.
612 func indexFunc(s []byte, f func(r rune) bool, truth bool) int {
613 start := 0
614 for start < len(s) {
615 wid := 1
616 r := rune(s[start])
617 if r >= utf8.RuneSelf {
618 r, wid = utf8.DecodeRune(s[start:])
620 if f(r) == truth {
621 return start
623 start += wid
625 return -1
628 // lastIndexFunc is the same as LastIndexFunc except that if
629 // truth==false, the sense of the predicate function is
630 // inverted.
631 func lastIndexFunc(s []byte, f func(r rune) bool, truth bool) int {
632 for i := len(s); i > 0; {
633 r, size := rune(s[i-1]), 1
634 if r >= utf8.RuneSelf {
635 r, size = utf8.DecodeLastRune(s[0:i])
637 i -= size
638 if f(r) == truth {
639 return i
642 return -1
645 // asciiSet is a 32-byte value, where each bit represents the presence of a
646 // given ASCII character in the set. The 128-bits of the lower 16 bytes,
647 // starting with the least-significant bit of the lowest word to the
648 // most-significant bit of the highest word, map to the full range of all
649 // 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed,
650 // ensuring that any non-ASCII character will be reported as not in the set.
651 type asciiSet [8]uint32
653 // makeASCIISet creates a set of ASCII characters and reports whether all
654 // characters in chars are ASCII.
655 func makeASCIISet(chars string) (as asciiSet, ok bool) {
656 for i := 0; i < len(chars); i++ {
657 c := chars[i]
658 if c >= utf8.RuneSelf {
659 return as, false
661 as[c>>5] |= 1 << uint(c&31)
663 return as, true
666 // contains reports whether c is inside the set.
667 func (as *asciiSet) contains(c byte) bool {
668 return (as[c>>5] & (1 << uint(c&31))) != 0
671 func makeCutsetFunc(cutset string) func(r rune) bool {
672 if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
673 return func(r rune) bool {
674 return r == rune(cutset[0])
677 if as, isASCII := makeASCIISet(cutset); isASCII {
678 return func(r rune) bool {
679 return r < utf8.RuneSelf && as.contains(byte(r))
682 return func(r rune) bool {
683 for _, c := range cutset {
684 if c == r {
685 return true
688 return false
692 // Trim returns a subslice of s by slicing off all leading and
693 // trailing UTF-8-encoded code points contained in cutset.
694 func Trim(s []byte, cutset string) []byte {
695 return TrimFunc(s, makeCutsetFunc(cutset))
698 // TrimLeft returns a subslice of s by slicing off all leading
699 // UTF-8-encoded code points contained in cutset.
700 func TrimLeft(s []byte, cutset string) []byte {
701 return TrimLeftFunc(s, makeCutsetFunc(cutset))
704 // TrimRight returns a subslice of s by slicing off all trailing
705 // UTF-8-encoded code points that are contained in cutset.
706 func TrimRight(s []byte, cutset string) []byte {
707 return TrimRightFunc(s, makeCutsetFunc(cutset))
710 // TrimSpace returns a subslice of s by slicing off all leading and
711 // trailing white space, as defined by Unicode.
712 func TrimSpace(s []byte) []byte {
713 return TrimFunc(s, unicode.IsSpace)
716 // Runes interprets s as a sequence of UTF-8-encoded code points.
717 // It returns a slice of runes (Unicode code points) equivalent to s.
718 func Runes(s []byte) []rune {
719 t := make([]rune, utf8.RuneCount(s))
720 i := 0
721 for len(s) > 0 {
722 r, l := utf8.DecodeRune(s)
723 t[i] = r
725 s = s[l:]
727 return t
730 // Replace returns a copy of the slice s with the first n
731 // non-overlapping instances of old replaced by new.
732 // If old is empty, it matches at the beginning of the slice
733 // and after each UTF-8 sequence, yielding up to k+1 replacements
734 // for a k-rune slice.
735 // If n < 0, there is no limit on the number of replacements.
736 func Replace(s, old, new []byte, n int) []byte {
737 m := 0
738 if n != 0 {
739 // Compute number of replacements.
740 m = Count(s, old)
742 if m == 0 {
743 // Just return a copy.
744 return append([]byte(nil), s...)
746 if n < 0 || m < n {
747 n = m
750 // Apply replacements to buffer.
751 t := make([]byte, len(s)+n*(len(new)-len(old)))
752 w := 0
753 start := 0
754 for i := 0; i < n; i++ {
755 j := start
756 if len(old) == 0 {
757 if i > 0 {
758 _, wid := utf8.DecodeRune(s[start:])
759 j += wid
761 } else {
762 j += Index(s[start:], old)
764 w += copy(t[w:], s[start:j])
765 w += copy(t[w:], new)
766 start = j + len(old)
768 w += copy(t[w:], s[start:])
769 return t[0:w]
772 // EqualFold reports whether s and t, interpreted as UTF-8 strings,
773 // are equal under Unicode case-folding.
774 func EqualFold(s, t []byte) bool {
775 for len(s) != 0 && len(t) != 0 {
776 // Extract first rune from each.
777 var sr, tr rune
778 if s[0] < utf8.RuneSelf {
779 sr, s = rune(s[0]), s[1:]
780 } else {
781 r, size := utf8.DecodeRune(s)
782 sr, s = r, s[size:]
784 if t[0] < utf8.RuneSelf {
785 tr, t = rune(t[0]), t[1:]
786 } else {
787 r, size := utf8.DecodeRune(t)
788 tr, t = r, t[size:]
791 // If they match, keep going; if not, return false.
793 // Easy case.
794 if tr == sr {
795 continue
798 // Make sr < tr to simplify what follows.
799 if tr < sr {
800 tr, sr = sr, tr
802 // Fast check for ASCII.
803 if tr < utf8.RuneSelf && 'A' <= sr && sr <= 'Z' {
804 // ASCII, and sr is upper case. tr must be lower case.
805 if tr == sr+'a'-'A' {
806 continue
808 return false
811 // General case. SimpleFold(x) returns the next equivalent rune > x
812 // or wraps around to smaller values.
813 r := unicode.SimpleFold(sr)
814 for r != sr && r < tr {
815 r = unicode.SimpleFold(r)
817 if r == tr {
818 continue
820 return false
823 // One string is empty. Are both?
824 return len(s) == len(t)
827 func indexRabinKarp(s, sep []byte) int {
828 // Rabin-Karp search
829 hashsep, pow := hashStr(sep)
830 n := len(sep)
831 var h uint32
832 for i := 0; i < n; i++ {
833 h = h*primeRK + uint32(s[i])
835 if h == hashsep && Equal(s[:n], sep) {
836 return 0
838 for i := n; i < len(s); {
839 h *= primeRK
840 h += uint32(s[i])
841 h -= pow * uint32(s[i-n])
843 if h == hashsep && Equal(s[i-n:i], sep) {
844 return i - n
847 return -1
850 // primeRK is the prime base used in Rabin-Karp algorithm.
851 const primeRK = 16777619
853 // hashStr returns the hash and the appropriate multiplicative
854 // factor for use in Rabin-Karp algorithm.
855 func hashStr(sep []byte) (uint32, uint32) {
856 hash := uint32(0)
857 for i := 0; i < len(sep); i++ {
858 hash = hash*primeRK + uint32(sep[i])
860 var pow, sq uint32 = 1, primeRK
861 for i := len(sep); i > 0; i >>= 1 {
862 if i&1 != 0 {
863 pow *= sq
865 sq *= sq
867 return hash, pow