1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Package bytes implements functions for the manipulation of byte slices.
6 // It is analogous to the facilities of the strings package.
14 func equalPortable(a
, b
[]byte) bool {
26 // explode splits s into a slice of UTF-8 sequences, one per Unicode code point (still slices of bytes),
27 // up to a maximum of n byte slices. Invalid UTF-8 sequences are chopped into individual bytes.
28 func explode(s
[]byte, n
int) [][]byte {
32 a
:= make([][]byte, n
)
41 _
, size
= utf8
.DecodeRune(s
)
49 // Count counts the number of non-overlapping instances of sep in s.
50 // If sep is an empty slice, Count returns 1 + the number of Unicode code points in s.
51 func Count(s
, sep
[]byte) int {
54 return utf8
.RuneCount(s
) + 1
65 o
:= IndexByte(t
[i
:], c
)
71 if n
== 1 ||
Equal(s
[i
:i
+n
], sep
) {
81 // Contains reports whether subslice is within b.
82 func Contains(b
, subslice
[]byte) bool {
83 return Index(b
, subslice
) != -1
86 // ContainsAny reports whether any of the UTF-8-encoded Unicode code points in chars are within b.
87 func ContainsAny(b
[]byte, chars
string) bool {
88 return IndexAny(b
, chars
) >= 0
91 // ContainsRune reports whether the Unicode code point r is within b.
92 func ContainsRune(b
[]byte, r rune
) bool {
93 return IndexRune(b
, r
) >= 0
96 func indexBytePortable(s
[]byte, c
byte) int {
105 // LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s.
106 func LastIndex(s
, sep
[]byte) int {
112 for i
:= len(s
) - n
; i
>= 0; i
-- {
113 if s
[i
] == c
&& (n
== 1 ||
Equal(s
[i
:i
+n
], sep
)) {
120 // LastIndexByte returns the index of the last instance of c in s, or -1 if c is not present in s.
121 func LastIndexByte(s
[]byte, c
byte) int {
122 for i
:= len(s
) - 1; i
>= 0; i
-- {
130 // IndexRune interprets s as a sequence of UTF-8-encoded Unicode code points.
131 // It returns the byte index of the first occurrence in s of the given rune.
132 // It returns -1 if rune is not present in s.
133 // If r is utf8.RuneError, it returns the first instance of any
134 // invalid UTF-8 byte sequence.
135 func IndexRune(s
[]byte, r rune
) int {
137 case 0 <= r
&& r
< utf8
.RuneSelf
:
138 return IndexByte(s
, byte(r
))
139 case r
== utf8
.RuneError
:
140 for i
:= 0; i
< len(s
); {
141 r1
, n
:= utf8
.DecodeRune(s
[i
:])
142 if r1
== utf8
.RuneError
{
148 case !utf8
.ValidRune(r
):
151 var b
[utf8
.UTFMax
]byte
152 n
:= utf8
.EncodeRune(b
[:], r
)
153 return Index(s
, b
[:n
])
157 // IndexAny interprets s as a sequence of UTF-8-encoded Unicode code points.
158 // It returns the byte index of the first occurrence in s of any of the Unicode
159 // code points in chars. It returns -1 if chars is empty or if there is no code
161 func IndexAny(s
[]byte, chars
string) int {
164 if as
, isASCII
:= makeASCIISet(chars
); isASCII
{
165 for i
, c
:= range s
{
174 for i
:= 0; i
< len(s
); i
+= width
{
176 if r
< utf8
.RuneSelf
{
179 r
, width
= utf8
.DecodeRune(s
[i
:])
181 for _
, ch
:= range chars
{
191 // LastIndexAny interprets s as a sequence of UTF-8-encoded Unicode code
192 // points. It returns the byte index of the last occurrence in s of any of
193 // the Unicode code points in chars. It returns -1 if chars is empty or if
194 // there is no code point in common.
195 func LastIndexAny(s
[]byte, chars
string) int {
198 if as
, isASCII
:= makeASCIISet(chars
); isASCII
{
199 for i
:= len(s
) - 1; i
>= 0; i
-- {
200 if as
.contains(s
[i
]) {
207 for i
:= len(s
); i
> 0; {
208 r
, size
:= utf8
.DecodeLastRune(s
[:i
])
210 for _
, c
:= range chars
{
220 // Generic split: splits after each instance of sep,
221 // including sepSave bytes of sep in the subslices.
222 func genSplit(s
, sep
[]byte, sepSave
, n
int) [][]byte {
230 n
= Count(s
, sep
) + 1
234 a
:= make([][]byte, n
)
236 for i
:= 0; i
+len(sep
) <= len(s
) && na
+1 < n
; i
++ {
237 if s
[i
] == c
&& (len(sep
) == 1 ||
Equal(s
[i
:i
+len(sep
)], sep
)) {
238 a
[na
] = s
[start
: i
+sepSave
]
248 // SplitN slices s into subslices separated by sep and returns a slice of
249 // the subslices between those separators.
250 // If sep is empty, SplitN splits after each UTF-8 sequence.
251 // The count determines the number of subslices to return:
252 // n > 0: at most n subslices; the last subslice will be the unsplit remainder.
253 // n == 0: the result is nil (zero subslices)
254 // n < 0: all subslices
255 func SplitN(s
, sep
[]byte, n
int) [][]byte { return genSplit(s
, sep
, 0, n
) }
257 // SplitAfterN slices s into subslices after each instance of sep and
258 // returns a slice of those subslices.
259 // If sep is empty, SplitAfterN splits after each UTF-8 sequence.
260 // The count determines the number of subslices to return:
261 // n > 0: at most n subslices; the last subslice will be the unsplit remainder.
262 // n == 0: the result is nil (zero subslices)
263 // n < 0: all subslices
264 func SplitAfterN(s
, sep
[]byte, n
int) [][]byte {
265 return genSplit(s
, sep
, len(sep
), n
)
268 // Split slices s into all subslices separated by sep and returns a slice of
269 // the subslices between those separators.
270 // If sep is empty, Split splits after each UTF-8 sequence.
271 // It is equivalent to SplitN with a count of -1.
272 func Split(s
, sep
[]byte) [][]byte { return genSplit(s
, sep
, 0, -1) }
274 // SplitAfter slices s into all subslices after each instance of sep and
275 // returns a slice of those subslices.
276 // If sep is empty, SplitAfter splits after each UTF-8 sequence.
277 // It is equivalent to SplitAfterN with a count of -1.
278 func SplitAfter(s
, sep
[]byte) [][]byte {
279 return genSplit(s
, sep
, len(sep
), -1)
282 // Fields splits the slice s around each instance of one or more consecutive white space
283 // characters, returning a slice of subslices of s or an empty list if s contains only white space.
284 func Fields(s
[]byte) [][]byte {
285 return FieldsFunc(s
, unicode
.IsSpace
)
288 // FieldsFunc interprets s as a sequence of UTF-8-encoded Unicode code points.
289 // It splits the slice s at each run of code points c satisfying f(c) and
290 // returns a slice of subslices of s. If all code points in s satisfy f(c), or
291 // len(s) == 0, an empty slice is returned.
292 // FieldsFunc makes no guarantees about the order in which it calls f(c).
293 // If f does not return consistent results for a given c, FieldsFunc may crash.
294 func FieldsFunc(s
[]byte, f
func(rune
) bool) [][]byte {
297 for i
:= 0; i
< len(s
); {
298 r
, size
:= utf8
.DecodeRune(s
[i
:])
299 wasInField
:= inField
301 if inField
&& !wasInField
{
307 a
:= make([][]byte, n
)
310 for i
:= 0; i
<= len(s
) && na
< n
; {
311 r
, size
:= utf8
.DecodeRune(s
[i
:])
312 if fieldStart
< 0 && size
> 0 && !f(r
) {
317 if fieldStart
>= 0 && (size
== 0 ||
f(r
)) {
318 a
[na
] = s
[fieldStart
:i
]
330 // Join concatenates the elements of s to create a new byte slice. The separator
331 // sep is placed between elements in the resulting slice.
332 func Join(s
[][]byte, sep
[]byte) []byte {
337 // Just return a copy.
338 return append([]byte(nil), s
[0]...)
340 n
:= len(sep
) * (len(s
) - 1)
341 for _
, v
:= range s
{
347 for _
, v
:= range s
[1:] {
348 bp
+= copy(b
[bp
:], sep
)
349 bp
+= copy(b
[bp
:], v
)
354 // HasPrefix tests whether the byte slice s begins with prefix.
355 func HasPrefix(s
, prefix
[]byte) bool {
356 return len(s
) >= len(prefix
) && Equal(s
[0:len(prefix
)], prefix
)
359 // HasSuffix tests whether the byte slice s ends with suffix.
360 func HasSuffix(s
, suffix
[]byte) bool {
361 return len(s
) >= len(suffix
) && Equal(s
[len(s
)-len(suffix
):], suffix
)
364 // Map returns a copy of the byte slice s with all its characters modified
365 // according to the mapping function. If mapping returns a negative value, the character is
366 // dropped from the string with no replacement. The characters in s and the
367 // output are interpreted as UTF-8-encoded Unicode code points.
368 func Map(mapping
func(r rune
) rune
, s
[]byte) []byte {
369 // In the worst case, the slice can grow when mapped, making
370 // things unpleasant. But it's so rare we barge in assuming it's
371 // fine. It could also shrink but that falls out naturally.
372 maxbytes
:= len(s
) // length of b
373 nbytes
:= 0 // number of bytes encoded in b
374 b
:= make([]byte, maxbytes
)
375 for i
:= 0; i
< len(s
); {
378 if r
>= utf8
.RuneSelf
{
379 r
, wid
= utf8
.DecodeRune(s
[i
:])
383 rl
:= utf8
.RuneLen(r
)
385 rl
= len(string(utf8
.RuneError
))
387 if nbytes
+rl
> maxbytes
{
389 maxbytes
= maxbytes
*2 + utf8
.UTFMax
390 nb
:= make([]byte, maxbytes
)
391 copy(nb
, b
[0:nbytes
])
394 nbytes
+= utf8
.EncodeRune(b
[nbytes
:maxbytes
], r
)
401 // Repeat returns a new byte slice consisting of count copies of b.
403 // It panics if count is negative or if
404 // the result of (len(b) * count) overflows.
405 func Repeat(b
[]byte, count
int) []byte {
406 // Since we cannot return an error on overflow,
407 // we should panic if the repeat will generate
409 // See Issue golang.org/issue/16237.
411 panic("bytes: negative Repeat count")
412 } else if count
> 0 && len(b
)*count
/count
!= len(b
) {
413 panic("bytes: Repeat count causes overflow")
416 nb
:= make([]byte, len(b
)*count
)
419 copy(nb
[bp
:], nb
[:bp
])
425 // ToUpper returns a copy of the byte slice s with all Unicode letters mapped to their upper case.
426 func ToUpper(s
[]byte) []byte { return Map(unicode
.ToUpper
, s
) }
428 // ToLower returns a copy of the byte slice s with all Unicode letters mapped to their lower case.
429 func ToLower(s
[]byte) []byte { return Map(unicode
.ToLower
, s
) }
431 // ToTitle returns a copy of the byte slice s with all Unicode letters mapped to their title case.
432 func ToTitle(s
[]byte) []byte { return Map(unicode
.ToTitle
, s
) }
434 // ToUpperSpecial returns a copy of the byte slice s with all Unicode letters mapped to their
435 // upper case, giving priority to the special casing rules.
436 func ToUpperSpecial(c unicode
.SpecialCase
, s
[]byte) []byte {
437 return Map(func(r rune
) rune
{ return c
.ToUpper(r
) }, s
)
440 // ToLowerSpecial returns a copy of the byte slice s with all Unicode letters mapped to their
441 // lower case, giving priority to the special casing rules.
442 func ToLowerSpecial(c unicode
.SpecialCase
, s
[]byte) []byte {
443 return Map(func(r rune
) rune
{ return c
.ToLower(r
) }, s
)
446 // ToTitleSpecial returns a copy of the byte slice s with all Unicode letters mapped to their
447 // title case, giving priority to the special casing rules.
448 func ToTitleSpecial(c unicode
.SpecialCase
, s
[]byte) []byte {
449 return Map(func(r rune
) rune
{ return c
.ToTitle(r
) }, s
)
452 // isSeparator reports whether the rune could mark a word boundary.
453 // TODO: update when package unicode captures more of the properties.
454 func isSeparator(r rune
) bool {
455 // ASCII alphanumerics and underscore are not separators
458 case '0' <= r
&& r
<= '9':
460 case 'a' <= r
&& r
<= 'z':
462 case 'A' <= r
&& r
<= 'Z':
469 // Letters and digits are not separators
470 if unicode
.IsLetter(r
) || unicode
.IsDigit(r
) {
473 // Otherwise, all we can do for now is treat spaces as separators.
474 return unicode
.IsSpace(r
)
477 // Title returns a copy of s with all Unicode letters that begin words
478 // mapped to their title case.
480 // BUG(rsc): The rule Title uses for word boundaries does not handle Unicode punctuation properly.
481 func Title(s
[]byte) []byte {
482 // Use a closure here to remember state.
483 // Hackish but effective. Depends on Map scanning in order and calling
484 // the closure once per rune.
488 if isSeparator(prev
) {
490 return unicode
.ToTitle(r
)
498 // TrimLeftFunc returns a subslice of s by slicing off all leading UTF-8-encoded
499 // Unicode code points c that satisfy f(c).
500 func TrimLeftFunc(s
[]byte, f
func(r rune
) bool) []byte {
501 i
:= indexFunc(s
, f
, false)
508 // TrimRightFunc returns a subslice of s by slicing off all trailing UTF-8
509 // encoded Unicode code points c that satisfy f(c).
510 func TrimRightFunc(s
[]byte, f
func(r rune
) bool) []byte {
511 i
:= lastIndexFunc(s
, f
, false)
512 if i
>= 0 && s
[i
] >= utf8
.RuneSelf
{
513 _
, wid
:= utf8
.DecodeRune(s
[i
:])
521 // TrimFunc returns a subslice of s by slicing off all leading and trailing
522 // UTF-8-encoded Unicode code points c that satisfy f(c).
523 func TrimFunc(s
[]byte, f
func(r rune
) bool) []byte {
524 return TrimRightFunc(TrimLeftFunc(s
, f
), f
)
527 // TrimPrefix returns s without the provided leading prefix string.
528 // If s doesn't start with prefix, s is returned unchanged.
529 func TrimPrefix(s
, prefix
[]byte) []byte {
530 if HasPrefix(s
, prefix
) {
531 return s
[len(prefix
):]
536 // TrimSuffix returns s without the provided trailing suffix string.
537 // If s doesn't end with suffix, s is returned unchanged.
538 func TrimSuffix(s
, suffix
[]byte) []byte {
539 if HasSuffix(s
, suffix
) {
540 return s
[:len(s
)-len(suffix
)]
545 // IndexFunc interprets s as a sequence of UTF-8-encoded Unicode code points.
546 // It returns the byte index in s of the first Unicode
547 // code point satisfying f(c), or -1 if none do.
548 func IndexFunc(s
[]byte, f
func(r rune
) bool) int {
549 return indexFunc(s
, f
, true)
552 // LastIndexFunc interprets s as a sequence of UTF-8-encoded Unicode code points.
553 // It returns the byte index in s of the last Unicode
554 // code point satisfying f(c), or -1 if none do.
555 func LastIndexFunc(s
[]byte, f
func(r rune
) bool) int {
556 return lastIndexFunc(s
, f
, true)
559 // indexFunc is the same as IndexFunc except that if
560 // truth==false, the sense of the predicate function is
562 func indexFunc(s
[]byte, f
func(r rune
) bool, truth
bool) int {
567 if r
>= utf8
.RuneSelf
{
568 r
, wid
= utf8
.DecodeRune(s
[start
:])
578 // lastIndexFunc is the same as LastIndexFunc except that if
579 // truth==false, the sense of the predicate function is
581 func lastIndexFunc(s
[]byte, f
func(r rune
) bool, truth
bool) int {
582 for i
:= len(s
); i
> 0; {
583 r
, size
:= rune(s
[i
-1]), 1
584 if r
>= utf8
.RuneSelf
{
585 r
, size
= utf8
.DecodeLastRune(s
[0:i
])
595 // asciiSet is a 32-byte value, where each bit represents the presence of a
596 // given ASCII character in the set. The 128-bits of the lower 16 bytes,
597 // starting with the least-significant bit of the lowest word to the
598 // most-significant bit of the highest word, map to the full range of all
599 // 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed,
600 // ensuring that any non-ASCII character will be reported as not in the set.
601 type asciiSet
[8]uint32
603 // makeASCIISet creates a set of ASCII characters and reports whether all
604 // characters in chars are ASCII.
605 func makeASCIISet(chars
string) (as asciiSet
, ok
bool) {
606 for i
:= 0; i
< len(chars
); i
++ {
608 if c
>= utf8
.RuneSelf
{
611 as
[c
>>5] |
= 1 << uint(c
&31)
616 // contains reports whether c is inside the set.
617 func (as
*asciiSet
) contains(c
byte) bool {
618 return (as
[c
>>5] & (1 << uint(c
&31))) != 0
621 func makeCutsetFunc(cutset
string) func(r rune
) bool {
622 if len(cutset
) == 1 && cutset
[0] < utf8
.RuneSelf
{
623 return func(r rune
) bool {
624 return r
== rune(cutset
[0])
627 if as
, isASCII
:= makeASCIISet(cutset
); isASCII
{
628 return func(r rune
) bool {
629 return r
< utf8
.RuneSelf
&& as
.contains(byte(r
))
632 return func(r rune
) bool {
633 for _
, c
:= range cutset
{
642 // Trim returns a subslice of s by slicing off all leading and
643 // trailing UTF-8-encoded Unicode code points contained in cutset.
644 func Trim(s
[]byte, cutset
string) []byte {
645 return TrimFunc(s
, makeCutsetFunc(cutset
))
648 // TrimLeft returns a subslice of s by slicing off all leading
649 // UTF-8-encoded Unicode code points contained in cutset.
650 func TrimLeft(s
[]byte, cutset
string) []byte {
651 return TrimLeftFunc(s
, makeCutsetFunc(cutset
))
654 // TrimRight returns a subslice of s by slicing off all trailing
655 // UTF-8-encoded Unicode code points that are contained in cutset.
656 func TrimRight(s
[]byte, cutset
string) []byte {
657 return TrimRightFunc(s
, makeCutsetFunc(cutset
))
660 // TrimSpace returns a subslice of s by slicing off all leading and
661 // trailing white space, as defined by Unicode.
662 func TrimSpace(s
[]byte) []byte {
663 return TrimFunc(s
, unicode
.IsSpace
)
666 // Runes returns a slice of runes (Unicode code points) equivalent to s.
667 func Runes(s
[]byte) []rune
{
668 t
:= make([]rune
, utf8
.RuneCount(s
))
671 r
, l
:= utf8
.DecodeRune(s
)
679 // Replace returns a copy of the slice s with the first n
680 // non-overlapping instances of old replaced by new.
681 // If old is empty, it matches at the beginning of the slice
682 // and after each UTF-8 sequence, yielding up to k+1 replacements
683 // for a k-rune slice.
684 // If n < 0, there is no limit on the number of replacements.
685 func Replace(s
, old
, new []byte, n
int) []byte {
688 // Compute number of replacements.
692 // Just return a copy.
693 return append([]byte(nil), s
...)
699 // Apply replacements to buffer.
700 t
:= make([]byte, len(s
)+n
*(len(new)-len(old
)))
703 for i
:= 0; i
< n
; i
++ {
707 _
, wid
:= utf8
.DecodeRune(s
[start
:])
711 j
+= Index(s
[start
:], old
)
713 w
+= copy(t
[w
:], s
[start
:j
])
714 w
+= copy(t
[w
:], new)
717 w
+= copy(t
[w
:], s
[start
:])
721 // EqualFold reports whether s and t, interpreted as UTF-8 strings,
722 // are equal under Unicode case-folding.
723 func EqualFold(s
, t
[]byte) bool {
724 for len(s
) != 0 && len(t
) != 0 {
725 // Extract first rune from each.
727 if s
[0] < utf8
.RuneSelf
{
728 sr
, s
= rune(s
[0]), s
[1:]
730 r
, size
:= utf8
.DecodeRune(s
)
733 if t
[0] < utf8
.RuneSelf
{
734 tr
, t
= rune(t
[0]), t
[1:]
736 r
, size
:= utf8
.DecodeRune(t
)
740 // If they match, keep going; if not, return false.
747 // Make sr < tr to simplify what follows.
751 // Fast check for ASCII.
752 if tr
< utf8
.RuneSelf
&& 'A' <= sr
&& sr
<= 'Z' {
753 // ASCII, and sr is upper case. tr must be lower case.
754 if tr
== sr
+'a'-'A' {
760 // General case. SimpleFold(x) returns the next equivalent rune > x
761 // or wraps around to smaller values.
762 r
:= unicode
.SimpleFold(sr
)
763 for r
!= sr
&& r
< tr
{
764 r
= unicode
.SimpleFold(r
)
772 // One string is empty. Are both?
773 return len(s
) == len(t
)