1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Package bytes implements functions for the manipulation of byte slices.
6 // It is analogous to the facilities of the strings package.
14 func equalPortable(a
, b
[]byte) bool {
26 // explode splits s into a slice of UTF-8 sequences, one per Unicode character (still slices of bytes),
27 // up to a maximum of n byte slices. Invalid UTF-8 sequences are chopped into individual bytes.
28 func explode(s
[]byte, n
int) [][]byte {
32 a
:= make([][]byte, n
)
41 _
, size
= utf8
.DecodeRune(s
)
49 // Count counts the number of non-overlapping instances of sep in s.
50 func Count(s
, sep
[]byte) int {
53 return utf8
.RuneCount(s
) + 1
64 o
:= IndexByte(t
[i
:], c
)
70 if n
== 1 ||
Equal(s
[i
:i
+n
], sep
) {
80 // Contains reports whether subslice is within b.
81 func Contains(b
, subslice
[]byte) bool {
82 return Index(b
, subslice
) != -1
85 // Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
86 func Index(s
, sep
[]byte) int {
96 return IndexByte(s
, c
)
102 o
:= IndexByte(t
[i
:], c
)
108 if Equal(s
[i
:i
+n
], sep
) {
116 func indexBytePortable(s
[]byte, c
byte) int {
117 for i
, b
:= range s
{
125 // LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s.
126 func LastIndex(s
, sep
[]byte) int {
132 for i
:= len(s
) - n
; i
>= 0; i
-- {
133 if s
[i
] == c
&& (n
== 1 ||
Equal(s
[i
:i
+n
], sep
)) {
140 // IndexRune interprets s as a sequence of UTF-8-encoded Unicode code points.
141 // It returns the byte index of the first occurrence in s of the given rune.
142 // It returns -1 if rune is not present in s.
143 func IndexRune(s
[]byte, r rune
) int {
144 for i
:= 0; i
< len(s
); {
145 r1
, size
:= utf8
.DecodeRune(s
[i
:])
154 // IndexAny interprets s as a sequence of UTF-8-encoded Unicode code points.
155 // It returns the byte index of the first occurrence in s of any of the Unicode
156 // code points in chars. It returns -1 if chars is empty or if there is no code
158 func IndexAny(s
[]byte, chars
string) int {
162 for i
:= 0; i
< len(s
); i
+= width
{
164 if r
< utf8
.RuneSelf
{
167 r
, width
= utf8
.DecodeRune(s
[i
:])
169 for _
, ch
:= range chars
{
179 // LastIndexAny interprets s as a sequence of UTF-8-encoded Unicode code
180 // points. It returns the byte index of the last occurrence in s of any of
181 // the Unicode code points in chars. It returns -1 if chars is empty or if
182 // there is no code point in common.
183 func LastIndexAny(s
[]byte, chars
string) int {
185 for i
:= len(s
); i
> 0; {
186 r
, size
:= utf8
.DecodeLastRune(s
[0:i
])
188 for _
, ch
:= range chars
{
198 // Generic split: splits after each instance of sep,
199 // including sepSave bytes of sep in the subslices.
200 func genSplit(s
, sep
[]byte, sepSave
, n
int) [][]byte {
208 n
= Count(s
, sep
) + 1
212 a
:= make([][]byte, n
)
214 for i
:= 0; i
+len(sep
) <= len(s
) && na
+1 < n
; i
++ {
215 if s
[i
] == c
&& (len(sep
) == 1 ||
Equal(s
[i
:i
+len(sep
)], sep
)) {
216 a
[na
] = s
[start
: i
+sepSave
]
226 // SplitN slices s into subslices separated by sep and returns a slice of
227 // the subslices between those separators.
228 // If sep is empty, SplitN splits after each UTF-8 sequence.
229 // The count determines the number of subslices to return:
230 // n > 0: at most n subslices; the last subslice will be the unsplit remainder.
231 // n == 0: the result is nil (zero subslices)
232 // n < 0: all subslices
233 func SplitN(s
, sep
[]byte, n
int) [][]byte { return genSplit(s
, sep
, 0, n
) }
235 // SplitAfterN slices s into subslices after each instance of sep and
236 // returns a slice of those subslices.
237 // If sep is empty, SplitAfterN splits after each UTF-8 sequence.
238 // The count determines the number of subslices to return:
239 // n > 0: at most n subslices; the last subslice will be the unsplit remainder.
240 // n == 0: the result is nil (zero subslices)
241 // n < 0: all subslices
242 func SplitAfterN(s
, sep
[]byte, n
int) [][]byte {
243 return genSplit(s
, sep
, len(sep
), n
)
246 // Split slices s into all subslices separated by sep and returns a slice of
247 // the subslices between those separators.
248 // If sep is empty, Split splits after each UTF-8 sequence.
249 // It is equivalent to SplitN with a count of -1.
250 func Split(s
, sep
[]byte) [][]byte { return genSplit(s
, sep
, 0, -1) }
252 // SplitAfter slices s into all subslices after each instance of sep and
253 // returns a slice of those subslices.
254 // If sep is empty, SplitAfter splits after each UTF-8 sequence.
255 // It is equivalent to SplitAfterN with a count of -1.
256 func SplitAfter(s
, sep
[]byte) [][]byte {
257 return genSplit(s
, sep
, len(sep
), -1)
260 // Fields splits the slice s around each instance of one or more consecutive white space
261 // characters, returning a slice of subslices of s or an empty list if s contains only white space.
262 func Fields(s
[]byte) [][]byte {
263 return FieldsFunc(s
, unicode
.IsSpace
)
266 // FieldsFunc interprets s as a sequence of UTF-8-encoded Unicode code points.
267 // It splits the slice s at each run of code points c satisfying f(c) and
268 // returns a slice of subslices of s. If all code points in s satisfy f(c), or
269 // len(s) == 0, an empty slice is returned.
270 // FieldsFunc makes no guarantees about the order in which it calls f(c).
271 // If f does not return consistent results for a given c, FieldsFunc may crash.
272 func FieldsFunc(s
[]byte, f
func(rune
) bool) [][]byte {
275 for i
:= 0; i
< len(s
); {
276 r
, size
:= utf8
.DecodeRune(s
[i
:])
277 wasInField
:= inField
279 if inField
&& !wasInField
{
285 a
:= make([][]byte, n
)
288 for i
:= 0; i
<= len(s
) && na
< n
; {
289 r
, size
:= utf8
.DecodeRune(s
[i
:])
290 if fieldStart
< 0 && size
> 0 && !f(r
) {
295 if fieldStart
>= 0 && (size
== 0 ||
f(r
)) {
296 a
[na
] = s
[fieldStart
:i
]
308 // Join concatenates the elements of s to create a new byte slice. The separator
309 // sep is placed between elements in the resulting slice.
310 func Join(s
[][]byte, sep
[]byte) []byte {
315 // Just return a copy.
316 return append([]byte(nil), s
[0]...)
318 n
:= len(sep
) * (len(s
) - 1)
319 for _
, v
:= range s
{
325 for _
, v
:= range s
[1:] {
326 bp
+= copy(b
[bp
:], sep
)
327 bp
+= copy(b
[bp
:], v
)
332 // HasPrefix tests whether the byte slice s begins with prefix.
333 func HasPrefix(s
, prefix
[]byte) bool {
334 return len(s
) >= len(prefix
) && Equal(s
[0:len(prefix
)], prefix
)
337 // HasSuffix tests whether the byte slice s ends with suffix.
338 func HasSuffix(s
, suffix
[]byte) bool {
339 return len(s
) >= len(suffix
) && Equal(s
[len(s
)-len(suffix
):], suffix
)
342 // Map returns a copy of the byte slice s with all its characters modified
343 // according to the mapping function. If mapping returns a negative value, the character is
344 // dropped from the string with no replacement. The characters in s and the
345 // output are interpreted as UTF-8-encoded Unicode code points.
346 func Map(mapping
func(r rune
) rune
, s
[]byte) []byte {
347 // In the worst case, the slice can grow when mapped, making
348 // things unpleasant. But it's so rare we barge in assuming it's
349 // fine. It could also shrink but that falls out naturally.
350 maxbytes
:= len(s
) // length of b
351 nbytes
:= 0 // number of bytes encoded in b
352 b
:= make([]byte, maxbytes
)
353 for i
:= 0; i
< len(s
); {
356 if r
>= utf8
.RuneSelf
{
357 r
, wid
= utf8
.DecodeRune(s
[i
:])
361 rl
:= utf8
.RuneLen(r
)
363 rl
= len(string(utf8
.RuneError
))
365 if nbytes
+rl
> maxbytes
{
367 maxbytes
= maxbytes
*2 + utf8
.UTFMax
368 nb
:= make([]byte, maxbytes
)
369 copy(nb
, b
[0:nbytes
])
372 nbytes
+= utf8
.EncodeRune(b
[nbytes
:maxbytes
], r
)
379 // Repeat returns a new byte slice consisting of count copies of b.
380 func Repeat(b
[]byte, count
int) []byte {
381 nb
:= make([]byte, len(b
)*count
)
384 copy(nb
[bp
:], nb
[:bp
])
390 // ToUpper returns a copy of the byte slice s with all Unicode letters mapped to their upper case.
391 func ToUpper(s
[]byte) []byte { return Map(unicode
.ToUpper
, s
) }
393 // ToLower returns a copy of the byte slice s with all Unicode letters mapped to their lower case.
394 func ToLower(s
[]byte) []byte { return Map(unicode
.ToLower
, s
) }
396 // ToTitle returns a copy of the byte slice s with all Unicode letters mapped to their title case.
397 func ToTitle(s
[]byte) []byte { return Map(unicode
.ToTitle
, s
) }
399 // ToUpperSpecial returns a copy of the byte slice s with all Unicode letters mapped to their
400 // upper case, giving priority to the special casing rules.
401 func ToUpperSpecial(_case unicode
.SpecialCase
, s
[]byte) []byte {
402 return Map(func(r rune
) rune
{ return _case
.ToUpper(r
) }, s
)
405 // ToLowerSpecial returns a copy of the byte slice s with all Unicode letters mapped to their
406 // lower case, giving priority to the special casing rules.
407 func ToLowerSpecial(_case unicode
.SpecialCase
, s
[]byte) []byte {
408 return Map(func(r rune
) rune
{ return _case
.ToLower(r
) }, s
)
411 // ToTitleSpecial returns a copy of the byte slice s with all Unicode letters mapped to their
412 // title case, giving priority to the special casing rules.
413 func ToTitleSpecial(_case unicode
.SpecialCase
, s
[]byte) []byte {
414 return Map(func(r rune
) rune
{ return _case
.ToTitle(r
) }, s
)
417 // isSeparator reports whether the rune could mark a word boundary.
418 // TODO: update when package unicode captures more of the properties.
419 func isSeparator(r rune
) bool {
420 // ASCII alphanumerics and underscore are not separators
423 case '0' <= r
&& r
<= '9':
425 case 'a' <= r
&& r
<= 'z':
427 case 'A' <= r
&& r
<= 'Z':
434 // Letters and digits are not separators
435 if unicode
.IsLetter(r
) || unicode
.IsDigit(r
) {
438 // Otherwise, all we can do for now is treat spaces as separators.
439 return unicode
.IsSpace(r
)
442 // Title returns a copy of s with all Unicode letters that begin words
443 // mapped to their title case.
445 // BUG: The rule Title uses for word boundaries does not handle Unicode punctuation properly.
446 func Title(s
[]byte) []byte {
447 // Use a closure here to remember state.
448 // Hackish but effective. Depends on Map scanning in order and calling
449 // the closure once per rune.
453 if isSeparator(prev
) {
455 return unicode
.ToTitle(r
)
463 // TrimLeftFunc returns a subslice of s by slicing off all leading UTF-8-encoded
464 // Unicode code points c that satisfy f(c).
465 func TrimLeftFunc(s
[]byte, f
func(r rune
) bool) []byte {
466 i
:= indexFunc(s
, f
, false)
473 // TrimRightFunc returns a subslice of s by slicing off all trailing UTF-8
474 // encoded Unicode code points c that satisfy f(c).
475 func TrimRightFunc(s
[]byte, f
func(r rune
) bool) []byte {
476 i
:= lastIndexFunc(s
, f
, false)
477 if i
>= 0 && s
[i
] >= utf8
.RuneSelf
{
478 _
, wid
:= utf8
.DecodeRune(s
[i
:])
486 // TrimFunc returns a subslice of s by slicing off all leading and trailing
487 // UTF-8-encoded Unicode code points c that satisfy f(c).
488 func TrimFunc(s
[]byte, f
func(r rune
) bool) []byte {
489 return TrimRightFunc(TrimLeftFunc(s
, f
), f
)
492 // TrimPrefix returns s without the provided leading prefix string.
493 // If s doesn't start with prefix, s is returned unchanged.
494 func TrimPrefix(s
, prefix
[]byte) []byte {
495 if HasPrefix(s
, prefix
) {
496 return s
[len(prefix
):]
501 // TrimSuffix returns s without the provided trailing suffix string.
502 // If s doesn't end with suffix, s is returned unchanged.
503 func TrimSuffix(s
, suffix
[]byte) []byte {
504 if HasSuffix(s
, suffix
) {
505 return s
[:len(s
)-len(suffix
)]
510 // IndexFunc interprets s as a sequence of UTF-8-encoded Unicode code points.
511 // It returns the byte index in s of the first Unicode
512 // code point satisfying f(c), or -1 if none do.
513 func IndexFunc(s
[]byte, f
func(r rune
) bool) int {
514 return indexFunc(s
, f
, true)
517 // LastIndexFunc interprets s as a sequence of UTF-8-encoded Unicode code points.
518 // It returns the byte index in s of the last Unicode
519 // code point satisfying f(c), or -1 if none do.
520 func LastIndexFunc(s
[]byte, f
func(r rune
) bool) int {
521 return lastIndexFunc(s
, f
, true)
524 // indexFunc is the same as IndexFunc except that if
525 // truth==false, the sense of the predicate function is
527 func indexFunc(s
[]byte, f
func(r rune
) bool, truth
bool) int {
532 if r
>= utf8
.RuneSelf
{
533 r
, wid
= utf8
.DecodeRune(s
[start
:])
543 // lastIndexFunc is the same as LastIndexFunc except that if
544 // truth==false, the sense of the predicate function is
546 func lastIndexFunc(s
[]byte, f
func(r rune
) bool, truth
bool) int {
547 for i
:= len(s
); i
> 0; {
548 r
, size
:= rune(s
[i
-1]), 1
549 if r
>= utf8
.RuneSelf
{
550 r
, size
= utf8
.DecodeLastRune(s
[0:i
])
560 func makeCutsetFunc(cutset
string) func(r rune
) bool {
561 return func(r rune
) bool {
562 for _
, c
:= range cutset
{
571 // Trim returns a subslice of s by slicing off all leading and
572 // trailing UTF-8-encoded Unicode code points contained in cutset.
573 func Trim(s
[]byte, cutset
string) []byte {
574 return TrimFunc(s
, makeCutsetFunc(cutset
))
577 // TrimLeft returns a subslice of s by slicing off all leading
578 // UTF-8-encoded Unicode code points contained in cutset.
579 func TrimLeft(s
[]byte, cutset
string) []byte {
580 return TrimLeftFunc(s
, makeCutsetFunc(cutset
))
583 // TrimRight returns a subslice of s by slicing off all trailing
584 // UTF-8-encoded Unicode code points that are contained in cutset.
585 func TrimRight(s
[]byte, cutset
string) []byte {
586 return TrimRightFunc(s
, makeCutsetFunc(cutset
))
589 // TrimSpace returns a subslice of s by slicing off all leading and
590 // trailing white space, as defined by Unicode.
591 func TrimSpace(s
[]byte) []byte {
592 return TrimFunc(s
, unicode
.IsSpace
)
595 // Runes returns a slice of runes (Unicode code points) equivalent to s.
596 func Runes(s
[]byte) []rune
{
597 t
:= make([]rune
, utf8
.RuneCount(s
))
600 r
, l
:= utf8
.DecodeRune(s
)
608 // Replace returns a copy of the slice s with the first n
609 // non-overlapping instances of old replaced by new.
610 // If old is empty, it matches at the beginning of the slice
611 // and after each UTF-8 sequence, yielding up to k+1 replacements
612 // for a k-rune slice.
613 // If n < 0, there is no limit on the number of replacements.
614 func Replace(s
, old
, new []byte, n
int) []byte {
617 // Compute number of replacements.
621 // Just return a copy.
622 return append([]byte(nil), s
...)
628 // Apply replacements to buffer.
629 t
:= make([]byte, len(s
)+n
*(len(new)-len(old
)))
632 for i
:= 0; i
< n
; i
++ {
636 _
, wid
:= utf8
.DecodeRune(s
[start
:])
640 j
+= Index(s
[start
:], old
)
642 w
+= copy(t
[w
:], s
[start
:j
])
643 w
+= copy(t
[w
:], new)
646 w
+= copy(t
[w
:], s
[start
:])
650 // EqualFold reports whether s and t, interpreted as UTF-8 strings,
651 // are equal under Unicode case-folding.
652 func EqualFold(s
, t
[]byte) bool {
653 for len(s
) != 0 && len(t
) != 0 {
654 // Extract first rune from each.
656 if s
[0] < utf8
.RuneSelf
{
657 sr
, s
= rune(s
[0]), s
[1:]
659 r
, size
:= utf8
.DecodeRune(s
)
662 if t
[0] < utf8
.RuneSelf
{
663 tr
, t
= rune(t
[0]), t
[1:]
665 r
, size
:= utf8
.DecodeRune(t
)
669 // If they match, keep going; if not, return false.
676 // Make sr < tr to simplify what follows.
680 // Fast check for ASCII.
681 if tr
< utf8
.RuneSelf
&& 'A' <= sr
&& sr
<= 'Z' {
682 // ASCII, and sr is upper case. tr must be lower case.
683 if tr
== sr
+'a'-'A' {
689 // General case. SimpleFold(x) returns the next equivalent rune > x
690 // or wraps around to smaller values.
691 r
:= unicode
.SimpleFold(sr
)
692 for r
!= sr
&& r
< tr
{
693 r
= unicode
.SimpleFold(r
)
701 // One string is empty. Are both?
702 return len(s
) == len(t
)