1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Package bytes implements functions for the manipulation of byte slices.
6 // It is analogous to the facilities of the strings package.
15 // Equal reports whether a and b
16 // are the same length and contain the same bytes.
17 // A nil argument is equivalent to an empty slice.
18 func Equal(a
, b
[]byte) bool {
19 // Neither cmd/compile nor gccgo allocates for these string conversions.
20 return string(a
) == string(b
)
23 // Compare returns an integer comparing two byte slices lexicographically.
24 // The result will be 0 if a==b, -1 if a < b, and +1 if a > b.
25 // A nil argument is equivalent to an empty slice.
26 func Compare(a
, b
[]byte) int {
27 return bytealg
.Compare(a
, b
)
30 // explode splits s into a slice of UTF-8 sequences, one per Unicode code point (still slices of bytes),
31 // up to a maximum of n byte slices. Invalid UTF-8 sequences are chopped into individual bytes.
32 func explode(s
[]byte, n
int) [][]byte {
36 a
:= make([][]byte, n
)
45 _
, size
= utf8
.DecodeRune(s
)
46 a
[na
] = s
[0:size
:size
]
53 // Count counts the number of non-overlapping instances of sep in s.
54 // If sep is an empty slice, Count returns 1 + the number of UTF-8-encoded code points in s.
55 func Count(s
, sep
[]byte) int {
58 return utf8
.RuneCount(s
) + 1
61 return bytealg
.Count(s
, sep
[0])
74 // Contains reports whether subslice is within b.
75 func Contains(b
, subslice
[]byte) bool {
76 return Index(b
, subslice
) != -1
79 // ContainsAny reports whether any of the UTF-8-encoded code points in chars are within b.
80 func ContainsAny(b
[]byte, chars
string) bool {
81 return IndexAny(b
, chars
) >= 0
84 // ContainsRune reports whether the rune is contained in the UTF-8-encoded byte slice b.
85 func ContainsRune(b
[]byte, r rune
) bool {
86 return IndexRune(b
, r
) >= 0
89 // IndexByte returns the index of the first instance of c in b, or -1 if c is not present in b.
90 func IndexByte(b
[]byte, c
byte) int {
91 return bytealg
.IndexByte(b
, c
)
94 func indexBytePortable(s
[]byte, c
byte) int {
103 // LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s.
104 func LastIndex(s
, sep
[]byte) int {
110 return LastIndexByte(s
, sep
[0])
119 // Rabin-Karp search from the end of the string
120 hashss
, pow
:= bytealg
.HashStrRevBytes(sep
)
123 for i
:= len(s
) - 1; i
>= last
; i
-- {
124 h
= h
*bytealg
.PrimeRK
+ uint32(s
[i
])
126 if h
== hashss
&& Equal(s
[last
:], sep
) {
129 for i
:= last
- 1; i
>= 0; i
-- {
132 h
-= pow
* uint32(s
[i
+n
])
133 if h
== hashss
&& Equal(s
[i
:i
+n
], sep
) {
140 // LastIndexByte returns the index of the last instance of c in s, or -1 if c is not present in s.
141 func LastIndexByte(s
[]byte, c
byte) int {
142 for i
:= len(s
) - 1; i
>= 0; i
-- {
150 // IndexRune interprets s as a sequence of UTF-8-encoded code points.
151 // It returns the byte index of the first occurrence in s of the given rune.
152 // It returns -1 if rune is not present in s.
153 // If r is utf8.RuneError, it returns the first instance of any
154 // invalid UTF-8 byte sequence.
155 func IndexRune(s
[]byte, r rune
) int {
157 case 0 <= r
&& r
< utf8
.RuneSelf
:
158 return IndexByte(s
, byte(r
))
159 case r
== utf8
.RuneError
:
160 for i
:= 0; i
< len(s
); {
161 r1
, n
:= utf8
.DecodeRune(s
[i
:])
162 if r1
== utf8
.RuneError
{
168 case !utf8
.ValidRune(r
):
171 var b
[utf8
.UTFMax
]byte
172 n
:= utf8
.EncodeRune(b
[:], r
)
173 return Index(s
, b
[:n
])
177 // IndexAny interprets s as a sequence of UTF-8-encoded Unicode code points.
178 // It returns the byte index of the first occurrence in s of any of the Unicode
179 // code points in chars. It returns -1 if chars is empty or if there is no code
181 func IndexAny(s
[]byte, chars
string) int {
183 // Avoid scanning all of s.
188 if r
>= utf8
.RuneSelf
{
189 // search utf8.RuneError.
190 for _
, r
= range chars
{
191 if r
== utf8
.RuneError
{
197 if bytealg
.IndexByteString(chars
, s
[0]) >= 0 {
204 if r
>= utf8
.RuneSelf
{
207 return IndexRune(s
, r
)
210 if as
, isASCII
:= makeASCIISet(chars
); isASCII
{
211 for i
, c
:= range s
{
220 for i
:= 0; i
< len(s
); i
+= width
{
222 if r
< utf8
.RuneSelf
{
223 if bytealg
.IndexByteString(chars
, s
[i
]) >= 0 {
229 r
, width
= utf8
.DecodeRune(s
[i
:])
230 if r
== utf8
.RuneError
{
231 for _
, r
= range chars
{
232 if r
== utf8
.RuneError
{
238 // r is 2 to 4 bytes. Using strings.Index is more reasonable, but as the bytes
239 // package should not import the strings package, use bytealg.IndexString
240 // instead. And this does not seem to lose much performance.
241 if chars
== string(r
) || bytealg
.IndexString(chars
, string(r
)) >= 0 {
248 // LastIndexAny interprets s as a sequence of UTF-8-encoded Unicode code
249 // points. It returns the byte index of the last occurrence in s of any of
250 // the Unicode code points in chars. It returns -1 if chars is empty or if
251 // there is no code point in common.
252 func LastIndexAny(s
[]byte, chars
string) int {
254 // Avoid scanning all of s.
258 if as
, isASCII
:= makeASCIISet(chars
); isASCII
{
259 for i
:= len(s
) - 1; i
>= 0; i
-- {
260 if as
.contains(s
[i
]) {
269 if r
>= utf8
.RuneSelf
{
270 for _
, r
= range chars
{
271 if r
== utf8
.RuneError
{
277 if bytealg
.IndexByteString(chars
, s
[0]) >= 0 {
284 if cr
>= utf8
.RuneSelf
{
287 for i
:= len(s
); i
> 0; {
288 r
, size
:= utf8
.DecodeLastRune(s
[:i
])
296 for i
:= len(s
); i
> 0; {
298 if r
< utf8
.RuneSelf
{
299 if bytealg
.IndexByteString(chars
, s
[i
-1]) >= 0 {
305 r
, size
:= utf8
.DecodeLastRune(s
[:i
])
307 if r
== utf8
.RuneError
{
308 for _
, r
= range chars
{
309 if r
== utf8
.RuneError
{
315 // r is 2 to 4 bytes. Using strings.Index is more reasonable, but as the bytes
316 // package should not import the strings package, use bytealg.IndexString
317 // instead. And this does not seem to lose much performance.
318 if chars
== string(r
) || bytealg
.IndexString(chars
, string(r
)) >= 0 {
325 // Generic split: splits after each instance of sep,
326 // including sepSave bytes of sep in the subslices.
327 func genSplit(s
, sep
[]byte, sepSave
, n
int) [][]byte {
335 n
= Count(s
, sep
) + 1
338 a
:= make([][]byte, n
)
346 a
[i
] = s
[: m
+sepSave
: m
+sepSave
]
354 // SplitN slices s into subslices separated by sep and returns a slice of
355 // the subslices between those separators.
356 // If sep is empty, SplitN splits after each UTF-8 sequence.
357 // The count determines the number of subslices to return:
358 // n > 0: at most n subslices; the last subslice will be the unsplit remainder.
359 // n == 0: the result is nil (zero subslices)
360 // n < 0: all subslices
361 func SplitN(s
, sep
[]byte, n
int) [][]byte { return genSplit(s
, sep
, 0, n
) }
363 // SplitAfterN slices s into subslices after each instance of sep and
364 // returns a slice of those subslices.
365 // If sep is empty, SplitAfterN splits after each UTF-8 sequence.
366 // The count determines the number of subslices to return:
367 // n > 0: at most n subslices; the last subslice will be the unsplit remainder.
368 // n == 0: the result is nil (zero subslices)
369 // n < 0: all subslices
370 func SplitAfterN(s
, sep
[]byte, n
int) [][]byte {
371 return genSplit(s
, sep
, len(sep
), n
)
374 // Split slices s into all subslices separated by sep and returns a slice of
375 // the subslices between those separators.
376 // If sep is empty, Split splits after each UTF-8 sequence.
377 // It is equivalent to SplitN with a count of -1.
378 func Split(s
, sep
[]byte) [][]byte { return genSplit(s
, sep
, 0, -1) }
380 // SplitAfter slices s into all subslices after each instance of sep and
381 // returns a slice of those subslices.
382 // If sep is empty, SplitAfter splits after each UTF-8 sequence.
383 // It is equivalent to SplitAfterN with a count of -1.
384 func SplitAfter(s
, sep
[]byte) [][]byte {
385 return genSplit(s
, sep
, len(sep
), -1)
388 var asciiSpace
= [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}
390 // Fields interprets s as a sequence of UTF-8-encoded code points.
391 // It splits the slice s around each instance of one or more consecutive white space
392 // characters, as defined by unicode.IsSpace, returning a slice of subslices of s or an
393 // empty slice if s contains only white space.
394 func Fields(s
[]byte) [][]byte {
395 // First count the fields.
396 // This is an exact count if s is ASCII, otherwise it is an approximation.
399 // setBits is used to track which bits are set in the bytes of s.
401 for i
:= 0; i
< len(s
); i
++ {
404 isSpace
:= int(asciiSpace
[r
])
405 n
+= wasSpace
& ^isSpace
409 if setBits
>= utf8
.RuneSelf
{
410 // Some runes in the input slice are not ASCII.
411 return FieldsFunc(s
, unicode
.IsSpace
)
415 a
:= make([][]byte, n
)
419 // Skip spaces in the front of the input.
420 for i
< len(s
) && asciiSpace
[s
[i
]] != 0 {
425 if asciiSpace
[s
[i
]] == 0 {
429 a
[na
] = s
[fieldStart
:i
:i
]
432 // Skip spaces in between fields.
433 for i
< len(s
) && asciiSpace
[s
[i
]] != 0 {
438 if fieldStart
< len(s
) { // Last field might end at EOF.
439 a
[na
] = s
[fieldStart
:len(s
):len(s
)]
444 // FieldsFunc interprets s as a sequence of UTF-8-encoded code points.
445 // It splits the slice s at each run of code points c satisfying f(c) and
446 // returns a slice of subslices of s. If all code points in s satisfy f(c), or
447 // len(s) == 0, an empty slice is returned.
449 // FieldsFunc makes no guarantees about the order in which it calls f(c)
450 // and assumes that f always returns the same value for a given c.
451 func FieldsFunc(s
[]byte, f
func(rune
) bool) [][]byte {
452 // A span is used to record a slice of s of the form s[start:end].
453 // The start index is inclusive and the end index is exclusive.
458 spans
:= make([]span
, 0, 32)
460 // Find the field start and end indices.
461 // Doing this in a separate pass (rather than slicing the string s
462 // and collecting the result substrings right away) is significantly
463 // more efficient, possibly due to cache effects.
464 start
:= -1 // valid span start if >= 0
465 for i
:= 0; i
< len(s
); {
468 if r
>= utf8
.RuneSelf
{
469 r
, size
= utf8
.DecodeRune(s
[i
:])
473 spans
= append(spans
, span
{start
, i
})
484 // Last field might end at EOF.
486 spans
= append(spans
, span
{start
, len(s
)})
489 // Create subslices from recorded field indices.
490 a
:= make([][]byte, len(spans
))
491 for i
, span
:= range spans
{
492 a
[i
] = s
[span
.start
:span
.end
:span
.end
]
498 // Join concatenates the elements of s to create a new byte slice. The separator
499 // sep is placed between elements in the resulting slice.
500 func Join(s
[][]byte, sep
[]byte) []byte {
505 // Just return a copy.
506 return append([]byte(nil), s
[0]...)
508 n
:= len(sep
) * (len(s
) - 1)
509 for _
, v
:= range s
{
515 for _
, v
:= range s
[1:] {
516 bp
+= copy(b
[bp
:], sep
)
517 bp
+= copy(b
[bp
:], v
)
522 // HasPrefix tests whether the byte slice s begins with prefix.
523 func HasPrefix(s
, prefix
[]byte) bool {
524 return len(s
) >= len(prefix
) && Equal(s
[0:len(prefix
)], prefix
)
527 // HasSuffix tests whether the byte slice s ends with suffix.
528 func HasSuffix(s
, suffix
[]byte) bool {
529 return len(s
) >= len(suffix
) && Equal(s
[len(s
)-len(suffix
):], suffix
)
532 // Map returns a copy of the byte slice s with all its characters modified
533 // according to the mapping function. If mapping returns a negative value, the character is
534 // dropped from the byte slice with no replacement. The characters in s and the
535 // output are interpreted as UTF-8-encoded code points.
536 func Map(mapping
func(r rune
) rune
, s
[]byte) []byte {
537 // In the worst case, the slice can grow when mapped, making
538 // things unpleasant. But it's so rare we barge in assuming it's
539 // fine. It could also shrink but that falls out naturally.
540 maxbytes
:= len(s
) // length of b
541 nbytes
:= 0 // number of bytes encoded in b
542 b
:= make([]byte, maxbytes
)
543 for i
:= 0; i
< len(s
); {
546 if r
>= utf8
.RuneSelf
{
547 r
, wid
= utf8
.DecodeRune(s
[i
:])
551 rl
:= utf8
.RuneLen(r
)
553 rl
= len(string(utf8
.RuneError
))
555 if nbytes
+rl
> maxbytes
{
557 maxbytes
= maxbytes
*2 + utf8
.UTFMax
558 nb
:= make([]byte, maxbytes
)
559 copy(nb
, b
[0:nbytes
])
562 nbytes
+= utf8
.EncodeRune(b
[nbytes
:maxbytes
], r
)
569 // Repeat returns a new byte slice consisting of count copies of b.
571 // It panics if count is negative or if
572 // the result of (len(b) * count) overflows.
573 func Repeat(b
[]byte, count
int) []byte {
577 // Since we cannot return an error on overflow,
578 // we should panic if the repeat will generate
580 // See Issue golang.org/issue/16237.
582 panic("bytes: negative Repeat count")
583 } else if len(b
)*count
/count
!= len(b
) {
584 panic("bytes: Repeat count causes overflow")
587 nb
:= make([]byte, len(b
)*count
)
590 copy(nb
[bp
:], nb
[:bp
])
596 // ToUpper returns a copy of the byte slice s with all Unicode letters mapped to
598 func ToUpper(s
[]byte) []byte {
599 isASCII
, hasLower
:= true, false
600 for i
:= 0; i
< len(s
); i
++ {
602 if c
>= utf8
.RuneSelf
{
606 hasLower
= hasLower ||
('a' <= c
&& c
<= 'z')
609 if isASCII
{ // optimize for ASCII-only byte slices.
611 // Just return a copy.
612 return append([]byte(""), s
...)
614 b
:= make([]byte, len(s
))
615 for i
:= 0; i
< len(s
); i
++ {
617 if 'a' <= c
&& c
<= 'z' {
624 return Map(unicode
.ToUpper
, s
)
627 // ToLower returns a copy of the byte slice s with all Unicode letters mapped to
629 func ToLower(s
[]byte) []byte {
630 isASCII
, hasUpper
:= true, false
631 for i
:= 0; i
< len(s
); i
++ {
633 if c
>= utf8
.RuneSelf
{
637 hasUpper
= hasUpper ||
('A' <= c
&& c
<= 'Z')
640 if isASCII
{ // optimize for ASCII-only byte slices.
642 return append([]byte(""), s
...)
644 b
:= make([]byte, len(s
))
645 for i
:= 0; i
< len(s
); i
++ {
647 if 'A' <= c
&& c
<= 'Z' {
654 return Map(unicode
.ToLower
, s
)
657 // ToTitle treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their title case.
658 func ToTitle(s
[]byte) []byte { return Map(unicode
.ToTitle
, s
) }
660 // ToUpperSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their
661 // upper case, giving priority to the special casing rules.
662 func ToUpperSpecial(c unicode
.SpecialCase
, s
[]byte) []byte {
663 return Map(c
.ToUpper
, s
)
666 // ToLowerSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their
667 // lower case, giving priority to the special casing rules.
668 func ToLowerSpecial(c unicode
.SpecialCase
, s
[]byte) []byte {
669 return Map(c
.ToLower
, s
)
672 // ToTitleSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their
673 // title case, giving priority to the special casing rules.
674 func ToTitleSpecial(c unicode
.SpecialCase
, s
[]byte) []byte {
675 return Map(c
.ToTitle
, s
)
678 // ToValidUTF8 treats s as UTF-8-encoded bytes and returns a copy with each run of bytes
679 // representing invalid UTF-8 replaced with the bytes in replacement, which may be empty.
680 func ToValidUTF8(s
, replacement
[]byte) []byte {
681 b
:= make([]byte, 0, len(s
)+len(replacement
))
682 invalid
:= false // previous byte was from an invalid UTF-8 sequence
683 for i
:= 0; i
< len(s
); {
685 if c
< utf8
.RuneSelf
{
688 b
= append(b
, byte(c
))
691 _
, wid
:= utf8
.DecodeRune(s
[i
:])
696 b
= append(b
, replacement
...)
701 b
= append(b
, s
[i
:i
+wid
]...)
707 // isSeparator reports whether the rune could mark a word boundary.
708 // TODO: update when package unicode captures more of the properties.
709 func isSeparator(r rune
) bool {
710 // ASCII alphanumerics and underscore are not separators
713 case '0' <= r
&& r
<= '9':
715 case 'a' <= r
&& r
<= 'z':
717 case 'A' <= r
&& r
<= 'Z':
724 // Letters and digits are not separators
725 if unicode
.IsLetter(r
) || unicode
.IsDigit(r
) {
728 // Otherwise, all we can do for now is treat spaces as separators.
729 return unicode
.IsSpace(r
)
732 // Title treats s as UTF-8-encoded bytes and returns a copy with all Unicode letters that begin
733 // words mapped to their title case.
735 // BUG(rsc): The rule Title uses for word boundaries does not handle Unicode punctuation properly.
736 func Title(s
[]byte) []byte {
737 // Use a closure here to remember state.
738 // Hackish but effective. Depends on Map scanning in order and calling
739 // the closure once per rune.
743 if isSeparator(prev
) {
745 return unicode
.ToTitle(r
)
753 // TrimLeftFunc treats s as UTF-8-encoded bytes and returns a subslice of s by slicing off
754 // all leading UTF-8-encoded code points c that satisfy f(c).
755 func TrimLeftFunc(s
[]byte, f
func(r rune
) bool) []byte {
756 i
:= indexFunc(s
, f
, false)
763 // TrimRightFunc returns a subslice of s by slicing off all trailing
764 // UTF-8-encoded code points c that satisfy f(c).
765 func TrimRightFunc(s
[]byte, f
func(r rune
) bool) []byte {
766 i
:= lastIndexFunc(s
, f
, false)
767 if i
>= 0 && s
[i
] >= utf8
.RuneSelf
{
768 _
, wid
:= utf8
.DecodeRune(s
[i
:])
776 // TrimFunc returns a subslice of s by slicing off all leading and trailing
777 // UTF-8-encoded code points c that satisfy f(c).
778 func TrimFunc(s
[]byte, f
func(r rune
) bool) []byte {
779 return TrimRightFunc(TrimLeftFunc(s
, f
), f
)
782 // TrimPrefix returns s without the provided leading prefix string.
783 // If s doesn't start with prefix, s is returned unchanged.
784 func TrimPrefix(s
, prefix
[]byte) []byte {
785 if HasPrefix(s
, prefix
) {
786 return s
[len(prefix
):]
791 // TrimSuffix returns s without the provided trailing suffix string.
792 // If s doesn't end with suffix, s is returned unchanged.
793 func TrimSuffix(s
, suffix
[]byte) []byte {
794 if HasSuffix(s
, suffix
) {
795 return s
[:len(s
)-len(suffix
)]
800 // IndexFunc interprets s as a sequence of UTF-8-encoded code points.
801 // It returns the byte index in s of the first Unicode
802 // code point satisfying f(c), or -1 if none do.
803 func IndexFunc(s
[]byte, f
func(r rune
) bool) int {
804 return indexFunc(s
, f
, true)
807 // LastIndexFunc interprets s as a sequence of UTF-8-encoded code points.
808 // It returns the byte index in s of the last Unicode
809 // code point satisfying f(c), or -1 if none do.
810 func LastIndexFunc(s
[]byte, f
func(r rune
) bool) int {
811 return lastIndexFunc(s
, f
, true)
814 // indexFunc is the same as IndexFunc except that if
815 // truth==false, the sense of the predicate function is
817 func indexFunc(s
[]byte, f
func(r rune
) bool, truth
bool) int {
822 if r
>= utf8
.RuneSelf
{
823 r
, wid
= utf8
.DecodeRune(s
[start
:])
833 // lastIndexFunc is the same as LastIndexFunc except that if
834 // truth==false, the sense of the predicate function is
836 func lastIndexFunc(s
[]byte, f
func(r rune
) bool, truth
bool) int {
837 for i
:= len(s
); i
> 0; {
838 r
, size
:= rune(s
[i
-1]), 1
839 if r
>= utf8
.RuneSelf
{
840 r
, size
= utf8
.DecodeLastRune(s
[0:i
])
850 // asciiSet is a 32-byte value, where each bit represents the presence of a
851 // given ASCII character in the set. The 128-bits of the lower 16 bytes,
852 // starting with the least-significant bit of the lowest word to the
853 // most-significant bit of the highest word, map to the full range of all
854 // 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed,
855 // ensuring that any non-ASCII character will be reported as not in the set.
856 type asciiSet
[8]uint32
858 // makeASCIISet creates a set of ASCII characters and reports whether all
859 // characters in chars are ASCII.
860 func makeASCIISet(chars
string) (as asciiSet
, ok
bool) {
861 for i
:= 0; i
< len(chars
); i
++ {
863 if c
>= utf8
.RuneSelf
{
866 as
[c
>>5] |
= 1 << uint(c
&31)
871 // contains reports whether c is inside the set.
872 func (as
*asciiSet
) contains(c
byte) bool {
873 return (as
[c
>>5] & (1 << uint(c
&31))) != 0
876 func makeCutsetFunc(cutset
string) func(r rune
) bool {
877 if len(cutset
) == 1 && cutset
[0] < utf8
.RuneSelf
{
878 return func(r rune
) bool {
879 return r
== rune(cutset
[0])
882 if as
, isASCII
:= makeASCIISet(cutset
); isASCII
{
883 return func(r rune
) bool {
884 return r
< utf8
.RuneSelf
&& as
.contains(byte(r
))
887 return func(r rune
) bool {
888 for _
, c
:= range cutset
{
897 // Trim returns a subslice of s by slicing off all leading and
898 // trailing UTF-8-encoded code points contained in cutset.
899 func Trim(s
[]byte, cutset
string) []byte {
900 return TrimFunc(s
, makeCutsetFunc(cutset
))
903 // TrimLeft returns a subslice of s by slicing off all leading
904 // UTF-8-encoded code points contained in cutset.
905 func TrimLeft(s
[]byte, cutset
string) []byte {
906 return TrimLeftFunc(s
, makeCutsetFunc(cutset
))
909 // TrimRight returns a subslice of s by slicing off all trailing
910 // UTF-8-encoded code points that are contained in cutset.
911 func TrimRight(s
[]byte, cutset
string) []byte {
912 return TrimRightFunc(s
, makeCutsetFunc(cutset
))
915 // TrimSpace returns a subslice of s by slicing off all leading and
916 // trailing white space, as defined by Unicode.
917 func TrimSpace(s
[]byte) []byte {
918 // Fast path for ASCII: look for the first ASCII non-space byte
920 for ; start
< len(s
); start
++ {
922 if c
>= utf8
.RuneSelf
{
923 // If we run into a non-ASCII byte, fall back to the
924 // slower unicode-aware method on the remaining bytes
925 return TrimFunc(s
[start
:], unicode
.IsSpace
)
927 if asciiSpace
[c
] == 0 {
932 // Now look for the first ASCII non-space byte from the end
934 for ; stop
> start
; stop
-- {
936 if c
>= utf8
.RuneSelf
{
937 return TrimFunc(s
[start
:stop
], unicode
.IsSpace
)
939 if asciiSpace
[c
] == 0 {
944 // At this point s[start:stop] starts and ends with an ASCII
945 // non-space bytes, so we're done. Non-ASCII cases have already
946 // been handled above.
948 // Special case to preserve previous TrimLeftFunc behavior,
949 // returning nil instead of empty slice if all spaces.
955 // Runes interprets s as a sequence of UTF-8-encoded code points.
956 // It returns a slice of runes (Unicode code points) equivalent to s.
957 func Runes(s
[]byte) []rune
{
958 t
:= make([]rune
, utf8
.RuneCount(s
))
961 r
, l
:= utf8
.DecodeRune(s
)
969 // Replace returns a copy of the slice s with the first n
970 // non-overlapping instances of old replaced by new.
971 // If old is empty, it matches at the beginning of the slice
972 // and after each UTF-8 sequence, yielding up to k+1 replacements
973 // for a k-rune slice.
974 // If n < 0, there is no limit on the number of replacements.
975 func Replace(s
, old
, new []byte, n
int) []byte {
978 // Compute number of replacements.
982 // Just return a copy.
983 return append([]byte(nil), s
...)
989 // Apply replacements to buffer.
990 t
:= make([]byte, len(s
)+n
*(len(new)-len(old
)))
993 for i
:= 0; i
< n
; i
++ {
997 _
, wid
:= utf8
.DecodeRune(s
[start
:])
1001 j
+= Index(s
[start
:], old
)
1003 w
+= copy(t
[w
:], s
[start
:j
])
1004 w
+= copy(t
[w
:], new)
1005 start
= j
+ len(old
)
1007 w
+= copy(t
[w
:], s
[start
:])
1011 // ReplaceAll returns a copy of the slice s with all
1012 // non-overlapping instances of old replaced by new.
1013 // If old is empty, it matches at the beginning of the slice
1014 // and after each UTF-8 sequence, yielding up to k+1 replacements
1015 // for a k-rune slice.
1016 func ReplaceAll(s
, old
, new []byte) []byte {
1017 return Replace(s
, old
, new, -1)
1020 // EqualFold reports whether s and t, interpreted as UTF-8 strings,
1021 // are equal under Unicode case-folding, which is a more general
1022 // form of case-insensitivity.
1023 func EqualFold(s
, t
[]byte) bool {
1024 for len(s
) != 0 && len(t
) != 0 {
1025 // Extract first rune from each.
1027 if s
[0] < utf8
.RuneSelf
{
1028 sr
, s
= rune(s
[0]), s
[1:]
1030 r
, size
:= utf8
.DecodeRune(s
)
1033 if t
[0] < utf8
.RuneSelf
{
1034 tr
, t
= rune(t
[0]), t
[1:]
1036 r
, size
:= utf8
.DecodeRune(t
)
1040 // If they match, keep going; if not, return false.
1047 // Make sr < tr to simplify what follows.
1051 // Fast check for ASCII.
1052 if tr
< utf8
.RuneSelf
{
1053 // ASCII only, sr/tr must be upper/lower case
1054 if 'A' <= sr
&& sr
<= 'Z' && tr
== sr
+'a'-'A' {
1060 // General case. SimpleFold(x) returns the next equivalent rune > x
1061 // or wraps around to smaller values.
1062 r
:= unicode
.SimpleFold(sr
)
1063 for r
!= sr
&& r
< tr
{
1064 r
= unicode
.SimpleFold(r
)
1072 // One string is empty. Are both?
1073 return len(s
) == len(t
)
1076 // Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
1077 func Index(s
, sep
[]byte) int {
1083 return IndexByte(s
, sep
[0])
1091 case n
<= bytealg
.MaxLen
:
1092 // Use brute force when s and sep both are small
1093 if len(s
) <= bytealg
.MaxBruteForce
{
1094 return bytealg
.Index(s
, sep
)
1103 // IndexByte is faster than bytealg.Index, so use it as long as
1104 // we're not getting lots of false positives.
1105 o
:= IndexByte(s
[i
+1:t
], c0
)
1111 if s
[i
+1] == c1
&& Equal(s
[i
:i
+n
], sep
) {
1116 // Switch to bytealg.Index when IndexByte produces too many false positives.
1117 if fails
> bytealg
.Cutover(i
) {
1118 r
:= bytealg
.Index(s
[i
:], sep
)
1134 o
:= IndexByte(s
[i
+1:t
], c0
)
1140 if s
[i
+1] == c1
&& Equal(s
[i
:i
+n
], sep
) {
1145 if fails
>= 4+i
>>4 && i
< t
{
1146 // Give up on IndexByte, it isn't skipping ahead
1147 // far enough to be better than Rabin-Karp.
1148 // Experiments (using IndexPeriodic) suggest
1149 // the cutover is about 16 byte skips.
1150 // TODO: if large prefixes of sep are matching
1151 // we should cutover at even larger average skips,
1152 // because Equal becomes that much more expensive.
1153 // This code does not take that effect into account.
1154 j
:= bytealg
.IndexRabinKarpBytes(s
[i
:], sep
)