[RS6000] function for linux64 SUBSUBTARGET_OVERRIDE_OPTIONS
[official-gcc.git] / libgo / go / bytes / bytes.go
blobaa07b9fbc1604804957d779e4f50b8e35293bfb4
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Package bytes implements functions for the manipulation of byte slices.
6 // It is analogous to the facilities of the strings package.
7 package bytes
9 import (
10 "internal/bytealg"
11 "unicode"
12 "unicode/utf8"
15 // Equal reports whether a and b
16 // are the same length and contain the same bytes.
17 // A nil argument is equivalent to an empty slice.
18 func Equal(a, b []byte) bool {
19 // Neither cmd/compile nor gccgo allocates for these string conversions.
20 return string(a) == string(b)
23 // Compare returns an integer comparing two byte slices lexicographically.
24 // The result will be 0 if a==b, -1 if a < b, and +1 if a > b.
25 // A nil argument is equivalent to an empty slice.
26 func Compare(a, b []byte) int {
27 return bytealg.Compare(a, b)
30 // explode splits s into a slice of UTF-8 sequences, one per Unicode code point (still slices of bytes),
31 // up to a maximum of n byte slices. Invalid UTF-8 sequences are chopped into individual bytes.
32 func explode(s []byte, n int) [][]byte {
33 if n <= 0 {
34 n = len(s)
36 a := make([][]byte, n)
37 var size int
38 na := 0
39 for len(s) > 0 {
40 if na+1 >= n {
41 a[na] = s
42 na++
43 break
45 _, size = utf8.DecodeRune(s)
46 a[na] = s[0:size:size]
47 s = s[size:]
48 na++
50 return a[0:na]
53 // Count counts the number of non-overlapping instances of sep in s.
54 // If sep is an empty slice, Count returns 1 + the number of UTF-8-encoded code points in s.
55 func Count(s, sep []byte) int {
56 // special case
57 if len(sep) == 0 {
58 return utf8.RuneCount(s) + 1
60 if len(sep) == 1 {
61 return bytealg.Count(s, sep[0])
63 n := 0
64 for {
65 i := Index(s, sep)
66 if i == -1 {
67 return n
69 n++
70 s = s[i+len(sep):]
74 // Contains reports whether subslice is within b.
75 func Contains(b, subslice []byte) bool {
76 return Index(b, subslice) != -1
79 // ContainsAny reports whether any of the UTF-8-encoded code points in chars are within b.
80 func ContainsAny(b []byte, chars string) bool {
81 return IndexAny(b, chars) >= 0
84 // ContainsRune reports whether the rune is contained in the UTF-8-encoded byte slice b.
85 func ContainsRune(b []byte, r rune) bool {
86 return IndexRune(b, r) >= 0
89 // IndexByte returns the index of the first instance of c in b, or -1 if c is not present in b.
90 func IndexByte(b []byte, c byte) int {
91 return bytealg.IndexByte(b, c)
94 func indexBytePortable(s []byte, c byte) int {
95 for i, b := range s {
96 if b == c {
97 return i
100 return -1
103 // LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s.
104 func LastIndex(s, sep []byte) int {
105 n := len(sep)
106 switch {
107 case n == 0:
108 return len(s)
109 case n == 1:
110 return LastIndexByte(s, sep[0])
111 case n == len(s):
112 if Equal(s, sep) {
113 return 0
115 return -1
116 case n > len(s):
117 return -1
119 // Rabin-Karp search from the end of the string
120 hashss, pow := bytealg.HashStrRevBytes(sep)
121 last := len(s) - n
122 var h uint32
123 for i := len(s) - 1; i >= last; i-- {
124 h = h*bytealg.PrimeRK + uint32(s[i])
126 if h == hashss && Equal(s[last:], sep) {
127 return last
129 for i := last - 1; i >= 0; i-- {
130 h *= bytealg.PrimeRK
131 h += uint32(s[i])
132 h -= pow * uint32(s[i+n])
133 if h == hashss && Equal(s[i:i+n], sep) {
134 return i
137 return -1
140 // LastIndexByte returns the index of the last instance of c in s, or -1 if c is not present in s.
141 func LastIndexByte(s []byte, c byte) int {
142 for i := len(s) - 1; i >= 0; i-- {
143 if s[i] == c {
144 return i
147 return -1
150 // IndexRune interprets s as a sequence of UTF-8-encoded code points.
151 // It returns the byte index of the first occurrence in s of the given rune.
152 // It returns -1 if rune is not present in s.
153 // If r is utf8.RuneError, it returns the first instance of any
154 // invalid UTF-8 byte sequence.
155 func IndexRune(s []byte, r rune) int {
156 switch {
157 case 0 <= r && r < utf8.RuneSelf:
158 return IndexByte(s, byte(r))
159 case r == utf8.RuneError:
160 for i := 0; i < len(s); {
161 r1, n := utf8.DecodeRune(s[i:])
162 if r1 == utf8.RuneError {
163 return i
165 i += n
167 return -1
168 case !utf8.ValidRune(r):
169 return -1
170 default:
171 var b [utf8.UTFMax]byte
172 n := utf8.EncodeRune(b[:], r)
173 return Index(s, b[:n])
177 // IndexAny interprets s as a sequence of UTF-8-encoded Unicode code points.
178 // It returns the byte index of the first occurrence in s of any of the Unicode
179 // code points in chars. It returns -1 if chars is empty or if there is no code
180 // point in common.
181 func IndexAny(s []byte, chars string) int {
182 if chars == "" {
183 // Avoid scanning all of s.
184 return -1
186 if len(s) == 1 {
187 r := rune(s[0])
188 if r >= utf8.RuneSelf {
189 // search utf8.RuneError.
190 for _, r = range chars {
191 if r == utf8.RuneError {
192 return 0
195 return -1
197 if bytealg.IndexByteString(chars, s[0]) >= 0 {
198 return 0
200 return -1
202 if len(chars) == 1 {
203 r := rune(chars[0])
204 if r >= utf8.RuneSelf {
205 r = utf8.RuneError
207 return IndexRune(s, r)
209 if len(s) > 8 {
210 if as, isASCII := makeASCIISet(chars); isASCII {
211 for i, c := range s {
212 if as.contains(c) {
213 return i
216 return -1
219 var width int
220 for i := 0; i < len(s); i += width {
221 r := rune(s[i])
222 if r < utf8.RuneSelf {
223 if bytealg.IndexByteString(chars, s[i]) >= 0 {
224 return i
226 width = 1
227 continue
229 r, width = utf8.DecodeRune(s[i:])
230 if r == utf8.RuneError {
231 for _, r = range chars {
232 if r == utf8.RuneError {
233 return i
236 continue
238 // r is 2 to 4 bytes. Using strings.Index is more reasonable, but as the bytes
239 // package should not import the strings package, use bytealg.IndexString
240 // instead. And this does not seem to lose much performance.
241 if chars == string(r) || bytealg.IndexString(chars, string(r)) >= 0 {
242 return i
245 return -1
248 // LastIndexAny interprets s as a sequence of UTF-8-encoded Unicode code
249 // points. It returns the byte index of the last occurrence in s of any of
250 // the Unicode code points in chars. It returns -1 if chars is empty or if
251 // there is no code point in common.
252 func LastIndexAny(s []byte, chars string) int {
253 if chars == "" {
254 // Avoid scanning all of s.
255 return -1
257 if len(s) > 8 {
258 if as, isASCII := makeASCIISet(chars); isASCII {
259 for i := len(s) - 1; i >= 0; i-- {
260 if as.contains(s[i]) {
261 return i
264 return -1
267 if len(s) == 1 {
268 r := rune(s[0])
269 if r >= utf8.RuneSelf {
270 for _, r = range chars {
271 if r == utf8.RuneError {
272 return 0
275 return -1
277 if bytealg.IndexByteString(chars, s[0]) >= 0 {
278 return 0
280 return -1
282 if len(chars) == 1 {
283 cr := rune(chars[0])
284 if cr >= utf8.RuneSelf {
285 cr = utf8.RuneError
287 for i := len(s); i > 0; {
288 r, size := utf8.DecodeLastRune(s[:i])
289 i -= size
290 if r == cr {
291 return i
294 return -1
296 for i := len(s); i > 0; {
297 r := rune(s[i-1])
298 if r < utf8.RuneSelf {
299 if bytealg.IndexByteString(chars, s[i-1]) >= 0 {
300 return i - 1
303 continue
305 r, size := utf8.DecodeLastRune(s[:i])
306 i -= size
307 if r == utf8.RuneError {
308 for _, r = range chars {
309 if r == utf8.RuneError {
310 return i
313 continue
315 // r is 2 to 4 bytes. Using strings.Index is more reasonable, but as the bytes
316 // package should not import the strings package, use bytealg.IndexString
317 // instead. And this does not seem to lose much performance.
318 if chars == string(r) || bytealg.IndexString(chars, string(r)) >= 0 {
319 return i
322 return -1
325 // Generic split: splits after each instance of sep,
326 // including sepSave bytes of sep in the subslices.
327 func genSplit(s, sep []byte, sepSave, n int) [][]byte {
328 if n == 0 {
329 return nil
331 if len(sep) == 0 {
332 return explode(s, n)
334 if n < 0 {
335 n = Count(s, sep) + 1
338 a := make([][]byte, n)
340 i := 0
341 for i < n {
342 m := Index(s, sep)
343 if m < 0 {
344 break
346 a[i] = s[: m+sepSave : m+sepSave]
347 s = s[m+len(sep):]
350 a[i] = s
351 return a[:i+1]
354 // SplitN slices s into subslices separated by sep and returns a slice of
355 // the subslices between those separators.
356 // If sep is empty, SplitN splits after each UTF-8 sequence.
357 // The count determines the number of subslices to return:
358 // n > 0: at most n subslices; the last subslice will be the unsplit remainder.
359 // n == 0: the result is nil (zero subslices)
360 // n < 0: all subslices
361 func SplitN(s, sep []byte, n int) [][]byte { return genSplit(s, sep, 0, n) }
363 // SplitAfterN slices s into subslices after each instance of sep and
364 // returns a slice of those subslices.
365 // If sep is empty, SplitAfterN splits after each UTF-8 sequence.
366 // The count determines the number of subslices to return:
367 // n > 0: at most n subslices; the last subslice will be the unsplit remainder.
368 // n == 0: the result is nil (zero subslices)
369 // n < 0: all subslices
370 func SplitAfterN(s, sep []byte, n int) [][]byte {
371 return genSplit(s, sep, len(sep), n)
374 // Split slices s into all subslices separated by sep and returns a slice of
375 // the subslices between those separators.
376 // If sep is empty, Split splits after each UTF-8 sequence.
377 // It is equivalent to SplitN with a count of -1.
378 func Split(s, sep []byte) [][]byte { return genSplit(s, sep, 0, -1) }
380 // SplitAfter slices s into all subslices after each instance of sep and
381 // returns a slice of those subslices.
382 // If sep is empty, SplitAfter splits after each UTF-8 sequence.
383 // It is equivalent to SplitAfterN with a count of -1.
384 func SplitAfter(s, sep []byte) [][]byte {
385 return genSplit(s, sep, len(sep), -1)
388 var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}
390 // Fields interprets s as a sequence of UTF-8-encoded code points.
391 // It splits the slice s around each instance of one or more consecutive white space
392 // characters, as defined by unicode.IsSpace, returning a slice of subslices of s or an
393 // empty slice if s contains only white space.
394 func Fields(s []byte) [][]byte {
395 // First count the fields.
396 // This is an exact count if s is ASCII, otherwise it is an approximation.
397 n := 0
398 wasSpace := 1
399 // setBits is used to track which bits are set in the bytes of s.
400 setBits := uint8(0)
401 for i := 0; i < len(s); i++ {
402 r := s[i]
403 setBits |= r
404 isSpace := int(asciiSpace[r])
405 n += wasSpace & ^isSpace
406 wasSpace = isSpace
409 if setBits >= utf8.RuneSelf {
410 // Some runes in the input slice are not ASCII.
411 return FieldsFunc(s, unicode.IsSpace)
414 // ASCII fast path
415 a := make([][]byte, n)
416 na := 0
417 fieldStart := 0
418 i := 0
419 // Skip spaces in the front of the input.
420 for i < len(s) && asciiSpace[s[i]] != 0 {
423 fieldStart = i
424 for i < len(s) {
425 if asciiSpace[s[i]] == 0 {
427 continue
429 a[na] = s[fieldStart:i:i]
430 na++
432 // Skip spaces in between fields.
433 for i < len(s) && asciiSpace[s[i]] != 0 {
436 fieldStart = i
438 if fieldStart < len(s) { // Last field might end at EOF.
439 a[na] = s[fieldStart:len(s):len(s)]
441 return a
444 // FieldsFunc interprets s as a sequence of UTF-8-encoded code points.
445 // It splits the slice s at each run of code points c satisfying f(c) and
446 // returns a slice of subslices of s. If all code points in s satisfy f(c), or
447 // len(s) == 0, an empty slice is returned.
449 // FieldsFunc makes no guarantees about the order in which it calls f(c)
450 // and assumes that f always returns the same value for a given c.
451 func FieldsFunc(s []byte, f func(rune) bool) [][]byte {
452 // A span is used to record a slice of s of the form s[start:end].
453 // The start index is inclusive and the end index is exclusive.
454 type span struct {
455 start int
456 end int
458 spans := make([]span, 0, 32)
460 // Find the field start and end indices.
461 // Doing this in a separate pass (rather than slicing the string s
462 // and collecting the result substrings right away) is significantly
463 // more efficient, possibly due to cache effects.
464 start := -1 // valid span start if >= 0
465 for i := 0; i < len(s); {
466 size := 1
467 r := rune(s[i])
468 if r >= utf8.RuneSelf {
469 r, size = utf8.DecodeRune(s[i:])
471 if f(r) {
472 if start >= 0 {
473 spans = append(spans, span{start, i})
474 start = -1
476 } else {
477 if start < 0 {
478 start = i
481 i += size
484 // Last field might end at EOF.
485 if start >= 0 {
486 spans = append(spans, span{start, len(s)})
489 // Create subslices from recorded field indices.
490 a := make([][]byte, len(spans))
491 for i, span := range spans {
492 a[i] = s[span.start:span.end:span.end]
495 return a
498 // Join concatenates the elements of s to create a new byte slice. The separator
499 // sep is placed between elements in the resulting slice.
500 func Join(s [][]byte, sep []byte) []byte {
501 if len(s) == 0 {
502 return []byte{}
504 if len(s) == 1 {
505 // Just return a copy.
506 return append([]byte(nil), s[0]...)
508 n := len(sep) * (len(s) - 1)
509 for _, v := range s {
510 n += len(v)
513 b := make([]byte, n)
514 bp := copy(b, s[0])
515 for _, v := range s[1:] {
516 bp += copy(b[bp:], sep)
517 bp += copy(b[bp:], v)
519 return b
522 // HasPrefix tests whether the byte slice s begins with prefix.
523 func HasPrefix(s, prefix []byte) bool {
524 return len(s) >= len(prefix) && Equal(s[0:len(prefix)], prefix)
527 // HasSuffix tests whether the byte slice s ends with suffix.
528 func HasSuffix(s, suffix []byte) bool {
529 return len(s) >= len(suffix) && Equal(s[len(s)-len(suffix):], suffix)
532 // Map returns a copy of the byte slice s with all its characters modified
533 // according to the mapping function. If mapping returns a negative value, the character is
534 // dropped from the byte slice with no replacement. The characters in s and the
535 // output are interpreted as UTF-8-encoded code points.
536 func Map(mapping func(r rune) rune, s []byte) []byte {
537 // In the worst case, the slice can grow when mapped, making
538 // things unpleasant. But it's so rare we barge in assuming it's
539 // fine. It could also shrink but that falls out naturally.
540 maxbytes := len(s) // length of b
541 nbytes := 0 // number of bytes encoded in b
542 b := make([]byte, maxbytes)
543 for i := 0; i < len(s); {
544 wid := 1
545 r := rune(s[i])
546 if r >= utf8.RuneSelf {
547 r, wid = utf8.DecodeRune(s[i:])
549 r = mapping(r)
550 if r >= 0 {
551 rl := utf8.RuneLen(r)
552 if rl < 0 {
553 rl = len(string(utf8.RuneError))
555 if nbytes+rl > maxbytes {
556 // Grow the buffer.
557 maxbytes = maxbytes*2 + utf8.UTFMax
558 nb := make([]byte, maxbytes)
559 copy(nb, b[0:nbytes])
560 b = nb
562 nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r)
564 i += wid
566 return b[0:nbytes]
569 // Repeat returns a new byte slice consisting of count copies of b.
571 // It panics if count is negative or if
572 // the result of (len(b) * count) overflows.
573 func Repeat(b []byte, count int) []byte {
574 if count == 0 {
575 return []byte{}
577 // Since we cannot return an error on overflow,
578 // we should panic if the repeat will generate
579 // an overflow.
580 // See Issue golang.org/issue/16237.
581 if count < 0 {
582 panic("bytes: negative Repeat count")
583 } else if len(b)*count/count != len(b) {
584 panic("bytes: Repeat count causes overflow")
587 nb := make([]byte, len(b)*count)
588 bp := copy(nb, b)
589 for bp < len(nb) {
590 copy(nb[bp:], nb[:bp])
591 bp *= 2
593 return nb
596 // ToUpper returns a copy of the byte slice s with all Unicode letters mapped to
597 // their upper case.
598 func ToUpper(s []byte) []byte {
599 isASCII, hasLower := true, false
600 for i := 0; i < len(s); i++ {
601 c := s[i]
602 if c >= utf8.RuneSelf {
603 isASCII = false
604 break
606 hasLower = hasLower || ('a' <= c && c <= 'z')
609 if isASCII { // optimize for ASCII-only byte slices.
610 if !hasLower {
611 // Just return a copy.
612 return append([]byte(""), s...)
614 b := make([]byte, len(s))
615 for i := 0; i < len(s); i++ {
616 c := s[i]
617 if 'a' <= c && c <= 'z' {
618 c -= 'a' - 'A'
620 b[i] = c
622 return b
624 return Map(unicode.ToUpper, s)
627 // ToLower returns a copy of the byte slice s with all Unicode letters mapped to
628 // their lower case.
629 func ToLower(s []byte) []byte {
630 isASCII, hasUpper := true, false
631 for i := 0; i < len(s); i++ {
632 c := s[i]
633 if c >= utf8.RuneSelf {
634 isASCII = false
635 break
637 hasUpper = hasUpper || ('A' <= c && c <= 'Z')
640 if isASCII { // optimize for ASCII-only byte slices.
641 if !hasUpper {
642 return append([]byte(""), s...)
644 b := make([]byte, len(s))
645 for i := 0; i < len(s); i++ {
646 c := s[i]
647 if 'A' <= c && c <= 'Z' {
648 c += 'a' - 'A'
650 b[i] = c
652 return b
654 return Map(unicode.ToLower, s)
657 // ToTitle treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their title case.
658 func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) }
660 // ToUpperSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their
661 // upper case, giving priority to the special casing rules.
662 func ToUpperSpecial(c unicode.SpecialCase, s []byte) []byte {
663 return Map(c.ToUpper, s)
666 // ToLowerSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their
667 // lower case, giving priority to the special casing rules.
668 func ToLowerSpecial(c unicode.SpecialCase, s []byte) []byte {
669 return Map(c.ToLower, s)
672 // ToTitleSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their
673 // title case, giving priority to the special casing rules.
674 func ToTitleSpecial(c unicode.SpecialCase, s []byte) []byte {
675 return Map(c.ToTitle, s)
678 // ToValidUTF8 treats s as UTF-8-encoded bytes and returns a copy with each run of bytes
679 // representing invalid UTF-8 replaced with the bytes in replacement, which may be empty.
680 func ToValidUTF8(s, replacement []byte) []byte {
681 b := make([]byte, 0, len(s)+len(replacement))
682 invalid := false // previous byte was from an invalid UTF-8 sequence
683 for i := 0; i < len(s); {
684 c := s[i]
685 if c < utf8.RuneSelf {
687 invalid = false
688 b = append(b, byte(c))
689 continue
691 _, wid := utf8.DecodeRune(s[i:])
692 if wid == 1 {
694 if !invalid {
695 invalid = true
696 b = append(b, replacement...)
698 continue
700 invalid = false
701 b = append(b, s[i:i+wid]...)
702 i += wid
704 return b
707 // isSeparator reports whether the rune could mark a word boundary.
708 // TODO: update when package unicode captures more of the properties.
709 func isSeparator(r rune) bool {
710 // ASCII alphanumerics and underscore are not separators
711 if r <= 0x7F {
712 switch {
713 case '0' <= r && r <= '9':
714 return false
715 case 'a' <= r && r <= 'z':
716 return false
717 case 'A' <= r && r <= 'Z':
718 return false
719 case r == '_':
720 return false
722 return true
724 // Letters and digits are not separators
725 if unicode.IsLetter(r) || unicode.IsDigit(r) {
726 return false
728 // Otherwise, all we can do for now is treat spaces as separators.
729 return unicode.IsSpace(r)
732 // Title treats s as UTF-8-encoded bytes and returns a copy with all Unicode letters that begin
733 // words mapped to their title case.
735 // BUG(rsc): The rule Title uses for word boundaries does not handle Unicode punctuation properly.
736 func Title(s []byte) []byte {
737 // Use a closure here to remember state.
738 // Hackish but effective. Depends on Map scanning in order and calling
739 // the closure once per rune.
740 prev := ' '
741 return Map(
742 func(r rune) rune {
743 if isSeparator(prev) {
744 prev = r
745 return unicode.ToTitle(r)
747 prev = r
748 return r
753 // TrimLeftFunc treats s as UTF-8-encoded bytes and returns a subslice of s by slicing off
754 // all leading UTF-8-encoded code points c that satisfy f(c).
755 func TrimLeftFunc(s []byte, f func(r rune) bool) []byte {
756 i := indexFunc(s, f, false)
757 if i == -1 {
758 return nil
760 return s[i:]
763 // TrimRightFunc returns a subslice of s by slicing off all trailing
764 // UTF-8-encoded code points c that satisfy f(c).
765 func TrimRightFunc(s []byte, f func(r rune) bool) []byte {
766 i := lastIndexFunc(s, f, false)
767 if i >= 0 && s[i] >= utf8.RuneSelf {
768 _, wid := utf8.DecodeRune(s[i:])
769 i += wid
770 } else {
773 return s[0:i]
776 // TrimFunc returns a subslice of s by slicing off all leading and trailing
777 // UTF-8-encoded code points c that satisfy f(c).
778 func TrimFunc(s []byte, f func(r rune) bool) []byte {
779 return TrimRightFunc(TrimLeftFunc(s, f), f)
782 // TrimPrefix returns s without the provided leading prefix string.
783 // If s doesn't start with prefix, s is returned unchanged.
784 func TrimPrefix(s, prefix []byte) []byte {
785 if HasPrefix(s, prefix) {
786 return s[len(prefix):]
788 return s
791 // TrimSuffix returns s without the provided trailing suffix string.
792 // If s doesn't end with suffix, s is returned unchanged.
793 func TrimSuffix(s, suffix []byte) []byte {
794 if HasSuffix(s, suffix) {
795 return s[:len(s)-len(suffix)]
797 return s
800 // IndexFunc interprets s as a sequence of UTF-8-encoded code points.
801 // It returns the byte index in s of the first Unicode
802 // code point satisfying f(c), or -1 if none do.
803 func IndexFunc(s []byte, f func(r rune) bool) int {
804 return indexFunc(s, f, true)
807 // LastIndexFunc interprets s as a sequence of UTF-8-encoded code points.
808 // It returns the byte index in s of the last Unicode
809 // code point satisfying f(c), or -1 if none do.
810 func LastIndexFunc(s []byte, f func(r rune) bool) int {
811 return lastIndexFunc(s, f, true)
814 // indexFunc is the same as IndexFunc except that if
815 // truth==false, the sense of the predicate function is
816 // inverted.
817 func indexFunc(s []byte, f func(r rune) bool, truth bool) int {
818 start := 0
819 for start < len(s) {
820 wid := 1
821 r := rune(s[start])
822 if r >= utf8.RuneSelf {
823 r, wid = utf8.DecodeRune(s[start:])
825 if f(r) == truth {
826 return start
828 start += wid
830 return -1
833 // lastIndexFunc is the same as LastIndexFunc except that if
834 // truth==false, the sense of the predicate function is
835 // inverted.
836 func lastIndexFunc(s []byte, f func(r rune) bool, truth bool) int {
837 for i := len(s); i > 0; {
838 r, size := rune(s[i-1]), 1
839 if r >= utf8.RuneSelf {
840 r, size = utf8.DecodeLastRune(s[0:i])
842 i -= size
843 if f(r) == truth {
844 return i
847 return -1
850 // asciiSet is a 32-byte value, where each bit represents the presence of a
851 // given ASCII character in the set. The 128-bits of the lower 16 bytes,
852 // starting with the least-significant bit of the lowest word to the
853 // most-significant bit of the highest word, map to the full range of all
854 // 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed,
855 // ensuring that any non-ASCII character will be reported as not in the set.
856 type asciiSet [8]uint32
858 // makeASCIISet creates a set of ASCII characters and reports whether all
859 // characters in chars are ASCII.
860 func makeASCIISet(chars string) (as asciiSet, ok bool) {
861 for i := 0; i < len(chars); i++ {
862 c := chars[i]
863 if c >= utf8.RuneSelf {
864 return as, false
866 as[c>>5] |= 1 << uint(c&31)
868 return as, true
871 // contains reports whether c is inside the set.
872 func (as *asciiSet) contains(c byte) bool {
873 return (as[c>>5] & (1 << uint(c&31))) != 0
876 func makeCutsetFunc(cutset string) func(r rune) bool {
877 if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
878 return func(r rune) bool {
879 return r == rune(cutset[0])
882 if as, isASCII := makeASCIISet(cutset); isASCII {
883 return func(r rune) bool {
884 return r < utf8.RuneSelf && as.contains(byte(r))
887 return func(r rune) bool {
888 for _, c := range cutset {
889 if c == r {
890 return true
893 return false
897 // Trim returns a subslice of s by slicing off all leading and
898 // trailing UTF-8-encoded code points contained in cutset.
899 func Trim(s []byte, cutset string) []byte {
900 return TrimFunc(s, makeCutsetFunc(cutset))
903 // TrimLeft returns a subslice of s by slicing off all leading
904 // UTF-8-encoded code points contained in cutset.
905 func TrimLeft(s []byte, cutset string) []byte {
906 return TrimLeftFunc(s, makeCutsetFunc(cutset))
909 // TrimRight returns a subslice of s by slicing off all trailing
910 // UTF-8-encoded code points that are contained in cutset.
911 func TrimRight(s []byte, cutset string) []byte {
912 return TrimRightFunc(s, makeCutsetFunc(cutset))
915 // TrimSpace returns a subslice of s by slicing off all leading and
916 // trailing white space, as defined by Unicode.
917 func TrimSpace(s []byte) []byte {
918 // Fast path for ASCII: look for the first ASCII non-space byte
919 start := 0
920 for ; start < len(s); start++ {
921 c := s[start]
922 if c >= utf8.RuneSelf {
923 // If we run into a non-ASCII byte, fall back to the
924 // slower unicode-aware method on the remaining bytes
925 return TrimFunc(s[start:], unicode.IsSpace)
927 if asciiSpace[c] == 0 {
928 break
932 // Now look for the first ASCII non-space byte from the end
933 stop := len(s)
934 for ; stop > start; stop-- {
935 c := s[stop-1]
936 if c >= utf8.RuneSelf {
937 return TrimFunc(s[start:stop], unicode.IsSpace)
939 if asciiSpace[c] == 0 {
940 break
944 // At this point s[start:stop] starts and ends with an ASCII
945 // non-space bytes, so we're done. Non-ASCII cases have already
946 // been handled above.
947 if start == stop {
948 // Special case to preserve previous TrimLeftFunc behavior,
949 // returning nil instead of empty slice if all spaces.
950 return nil
952 return s[start:stop]
955 // Runes interprets s as a sequence of UTF-8-encoded code points.
956 // It returns a slice of runes (Unicode code points) equivalent to s.
957 func Runes(s []byte) []rune {
958 t := make([]rune, utf8.RuneCount(s))
959 i := 0
960 for len(s) > 0 {
961 r, l := utf8.DecodeRune(s)
962 t[i] = r
964 s = s[l:]
966 return t
969 // Replace returns a copy of the slice s with the first n
970 // non-overlapping instances of old replaced by new.
971 // If old is empty, it matches at the beginning of the slice
972 // and after each UTF-8 sequence, yielding up to k+1 replacements
973 // for a k-rune slice.
974 // If n < 0, there is no limit on the number of replacements.
975 func Replace(s, old, new []byte, n int) []byte {
976 m := 0
977 if n != 0 {
978 // Compute number of replacements.
979 m = Count(s, old)
981 if m == 0 {
982 // Just return a copy.
983 return append([]byte(nil), s...)
985 if n < 0 || m < n {
986 n = m
989 // Apply replacements to buffer.
990 t := make([]byte, len(s)+n*(len(new)-len(old)))
991 w := 0
992 start := 0
993 for i := 0; i < n; i++ {
994 j := start
995 if len(old) == 0 {
996 if i > 0 {
997 _, wid := utf8.DecodeRune(s[start:])
998 j += wid
1000 } else {
1001 j += Index(s[start:], old)
1003 w += copy(t[w:], s[start:j])
1004 w += copy(t[w:], new)
1005 start = j + len(old)
1007 w += copy(t[w:], s[start:])
1008 return t[0:w]
1011 // ReplaceAll returns a copy of the slice s with all
1012 // non-overlapping instances of old replaced by new.
1013 // If old is empty, it matches at the beginning of the slice
1014 // and after each UTF-8 sequence, yielding up to k+1 replacements
1015 // for a k-rune slice.
1016 func ReplaceAll(s, old, new []byte) []byte {
1017 return Replace(s, old, new, -1)
1020 // EqualFold reports whether s and t, interpreted as UTF-8 strings,
1021 // are equal under Unicode case-folding, which is a more general
1022 // form of case-insensitivity.
1023 func EqualFold(s, t []byte) bool {
1024 for len(s) != 0 && len(t) != 0 {
1025 // Extract first rune from each.
1026 var sr, tr rune
1027 if s[0] < utf8.RuneSelf {
1028 sr, s = rune(s[0]), s[1:]
1029 } else {
1030 r, size := utf8.DecodeRune(s)
1031 sr, s = r, s[size:]
1033 if t[0] < utf8.RuneSelf {
1034 tr, t = rune(t[0]), t[1:]
1035 } else {
1036 r, size := utf8.DecodeRune(t)
1037 tr, t = r, t[size:]
1040 // If they match, keep going; if not, return false.
1042 // Easy case.
1043 if tr == sr {
1044 continue
1047 // Make sr < tr to simplify what follows.
1048 if tr < sr {
1049 tr, sr = sr, tr
1051 // Fast check for ASCII.
1052 if tr < utf8.RuneSelf {
1053 // ASCII only, sr/tr must be upper/lower case
1054 if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' {
1055 continue
1057 return false
1060 // General case. SimpleFold(x) returns the next equivalent rune > x
1061 // or wraps around to smaller values.
1062 r := unicode.SimpleFold(sr)
1063 for r != sr && r < tr {
1064 r = unicode.SimpleFold(r)
1066 if r == tr {
1067 continue
1069 return false
1072 // One string is empty. Are both?
1073 return len(s) == len(t)
1076 // Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
1077 func Index(s, sep []byte) int {
1078 n := len(sep)
1079 switch {
1080 case n == 0:
1081 return 0
1082 case n == 1:
1083 return IndexByte(s, sep[0])
1084 case n == len(s):
1085 if Equal(sep, s) {
1086 return 0
1088 return -1
1089 case n > len(s):
1090 return -1
1091 case n <= bytealg.MaxLen:
1092 // Use brute force when s and sep both are small
1093 if len(s) <= bytealg.MaxBruteForce {
1094 return bytealg.Index(s, sep)
1096 c0 := sep[0]
1097 c1 := sep[1]
1098 i := 0
1099 t := len(s) - n + 1
1100 fails := 0
1101 for i < t {
1102 if s[i] != c0 {
1103 // IndexByte is faster than bytealg.Index, so use it as long as
1104 // we're not getting lots of false positives.
1105 o := IndexByte(s[i+1:t], c0)
1106 if o < 0 {
1107 return -1
1109 i += o + 1
1111 if s[i+1] == c1 && Equal(s[i:i+n], sep) {
1112 return i
1114 fails++
1116 // Switch to bytealg.Index when IndexByte produces too many false positives.
1117 if fails > bytealg.Cutover(i) {
1118 r := bytealg.Index(s[i:], sep)
1119 if r >= 0 {
1120 return r + i
1122 return -1
1125 return -1
1127 c0 := sep[0]
1128 c1 := sep[1]
1129 i := 0
1130 fails := 0
1131 t := len(s) - n + 1
1132 for i < t {
1133 if s[i] != c0 {
1134 o := IndexByte(s[i+1:t], c0)
1135 if o < 0 {
1136 break
1138 i += o + 1
1140 if s[i+1] == c1 && Equal(s[i:i+n], sep) {
1141 return i
1144 fails++
1145 if fails >= 4+i>>4 && i < t {
1146 // Give up on IndexByte, it isn't skipping ahead
1147 // far enough to be better than Rabin-Karp.
1148 // Experiments (using IndexPeriodic) suggest
1149 // the cutover is about 16 byte skips.
1150 // TODO: if large prefixes of sep are matching
1151 // we should cutover at even larger average skips,
1152 // because Equal becomes that much more expensive.
1153 // This code does not take that effect into account.
1154 j := bytealg.IndexRabinKarpBytes(s[i:], sep)
1155 if j < 0 {
1156 return -1
1158 return i + j
1161 return -1