* da.po, sv.po: Update.
[official-gcc.git] / libgo / go / bytes / bytes.go
blobb86824087e5356405855d4581aeb728fa50f8ae1
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Package bytes implements functions for the manipulation of byte slices.
6 // It is analogous to the facilities of the strings package.
7 package bytes
9 import (
10 "unicode"
11 "unicode/utf8"
14 func equalPortable(a, b []byte) bool {
15 if len(a) != len(b) {
16 return false
18 for i, c := range a {
19 if c != b[i] {
20 return false
23 return true
26 // explode splits s into a slice of UTF-8 sequences, one per Unicode code point (still slices of bytes),
27 // up to a maximum of n byte slices. Invalid UTF-8 sequences are chopped into individual bytes.
28 func explode(s []byte, n int) [][]byte {
29 if n <= 0 {
30 n = len(s)
32 a := make([][]byte, n)
33 var size int
34 na := 0
35 for len(s) > 0 {
36 if na+1 >= n {
37 a[na] = s
38 na++
39 break
41 _, size = utf8.DecodeRune(s)
42 a[na] = s[0:size]
43 s = s[size:]
44 na++
46 return a[0:na]
49 // Count counts the number of non-overlapping instances of sep in s.
50 // If sep is an empty slice, Count returns 1 + the number of Unicode code points in s.
51 func Count(s, sep []byte) int {
52 n := len(sep)
53 if n == 0 {
54 return utf8.RuneCount(s) + 1
56 if n > len(s) {
57 return 0
59 count := 0
60 c := sep[0]
61 i := 0
62 t := s[:len(s)-n+1]
63 for i < len(t) {
64 if t[i] != c {
65 o := IndexByte(t[i:], c)
66 if o < 0 {
67 break
69 i += o
71 if n == 1 || Equal(s[i:i+n], sep) {
72 count++
73 i += n
74 continue
76 i++
78 return count
81 // Contains reports whether subslice is within b.
82 func Contains(b, subslice []byte) bool {
83 return Index(b, subslice) != -1
86 // Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
87 func Index(s, sep []byte) int {
88 n := len(sep)
89 if n == 0 {
90 return 0
92 if n > len(s) {
93 return -1
95 c := sep[0]
96 if n == 1 {
97 return IndexByte(s, c)
99 i := 0
100 t := s[:len(s)-n+1]
101 for i < len(t) {
102 if t[i] != c {
103 o := IndexByte(t[i:], c)
104 if o < 0 {
105 break
107 i += o
109 if Equal(s[i:i+n], sep) {
110 return i
114 return -1
117 func indexBytePortable(s []byte, c byte) int {
118 for i, b := range s {
119 if b == c {
120 return i
123 return -1
126 // LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s.
127 func LastIndex(s, sep []byte) int {
128 n := len(sep)
129 if n == 0 {
130 return len(s)
132 c := sep[0]
133 for i := len(s) - n; i >= 0; i-- {
134 if s[i] == c && (n == 1 || Equal(s[i:i+n], sep)) {
135 return i
138 return -1
141 // LastIndexByte returns the index of the last instance of c in s, or -1 if c is not present in s.
142 func LastIndexByte(s []byte, c byte) int {
143 for i := len(s) - 1; i >= 0; i-- {
144 if s[i] == c {
145 return i
148 return -1
151 // IndexRune interprets s as a sequence of UTF-8-encoded Unicode code points.
152 // It returns the byte index of the first occurrence in s of the given rune.
153 // It returns -1 if rune is not present in s.
154 func IndexRune(s []byte, r rune) int {
155 for i := 0; i < len(s); {
156 r1, size := utf8.DecodeRune(s[i:])
157 if r == r1 {
158 return i
160 i += size
162 return -1
165 // IndexAny interprets s as a sequence of UTF-8-encoded Unicode code points.
166 // It returns the byte index of the first occurrence in s of any of the Unicode
167 // code points in chars. It returns -1 if chars is empty or if there is no code
168 // point in common.
169 func IndexAny(s []byte, chars string) int {
170 if len(chars) > 0 {
171 var r rune
172 var width int
173 for i := 0; i < len(s); i += width {
174 r = rune(s[i])
175 if r < utf8.RuneSelf {
176 width = 1
177 } else {
178 r, width = utf8.DecodeRune(s[i:])
180 for _, ch := range chars {
181 if r == ch {
182 return i
187 return -1
190 // LastIndexAny interprets s as a sequence of UTF-8-encoded Unicode code
191 // points. It returns the byte index of the last occurrence in s of any of
192 // the Unicode code points in chars. It returns -1 if chars is empty or if
193 // there is no code point in common.
194 func LastIndexAny(s []byte, chars string) int {
195 if len(chars) > 0 {
196 for i := len(s); i > 0; {
197 r, size := utf8.DecodeLastRune(s[0:i])
198 i -= size
199 for _, ch := range chars {
200 if r == ch {
201 return i
206 return -1
209 // Generic split: splits after each instance of sep,
210 // including sepSave bytes of sep in the subslices.
211 func genSplit(s, sep []byte, sepSave, n int) [][]byte {
212 if n == 0 {
213 return nil
215 if len(sep) == 0 {
216 return explode(s, n)
218 if n < 0 {
219 n = Count(s, sep) + 1
221 c := sep[0]
222 start := 0
223 a := make([][]byte, n)
224 na := 0
225 for i := 0; i+len(sep) <= len(s) && na+1 < n; i++ {
226 if s[i] == c && (len(sep) == 1 || Equal(s[i:i+len(sep)], sep)) {
227 a[na] = s[start : i+sepSave]
228 na++
229 start = i + len(sep)
230 i += len(sep) - 1
233 a[na] = s[start:]
234 return a[0 : na+1]
237 // SplitN slices s into subslices separated by sep and returns a slice of
238 // the subslices between those separators.
239 // If sep is empty, SplitN splits after each UTF-8 sequence.
240 // The count determines the number of subslices to return:
241 // n > 0: at most n subslices; the last subslice will be the unsplit remainder.
242 // n == 0: the result is nil (zero subslices)
243 // n < 0: all subslices
244 func SplitN(s, sep []byte, n int) [][]byte { return genSplit(s, sep, 0, n) }
246 // SplitAfterN slices s into subslices after each instance of sep and
247 // returns a slice of those subslices.
248 // If sep is empty, SplitAfterN splits after each UTF-8 sequence.
249 // The count determines the number of subslices to return:
250 // n > 0: at most n subslices; the last subslice will be the unsplit remainder.
251 // n == 0: the result is nil (zero subslices)
252 // n < 0: all subslices
253 func SplitAfterN(s, sep []byte, n int) [][]byte {
254 return genSplit(s, sep, len(sep), n)
257 // Split slices s into all subslices separated by sep and returns a slice of
258 // the subslices between those separators.
259 // If sep is empty, Split splits after each UTF-8 sequence.
260 // It is equivalent to SplitN with a count of -1.
261 func Split(s, sep []byte) [][]byte { return genSplit(s, sep, 0, -1) }
263 // SplitAfter slices s into all subslices after each instance of sep and
264 // returns a slice of those subslices.
265 // If sep is empty, SplitAfter splits after each UTF-8 sequence.
266 // It is equivalent to SplitAfterN with a count of -1.
267 func SplitAfter(s, sep []byte) [][]byte {
268 return genSplit(s, sep, len(sep), -1)
271 // Fields splits the slice s around each instance of one or more consecutive white space
272 // characters, returning a slice of subslices of s or an empty list if s contains only white space.
273 func Fields(s []byte) [][]byte {
274 return FieldsFunc(s, unicode.IsSpace)
277 // FieldsFunc interprets s as a sequence of UTF-8-encoded Unicode code points.
278 // It splits the slice s at each run of code points c satisfying f(c) and
279 // returns a slice of subslices of s. If all code points in s satisfy f(c), or
280 // len(s) == 0, an empty slice is returned.
281 // FieldsFunc makes no guarantees about the order in which it calls f(c).
282 // If f does not return consistent results for a given c, FieldsFunc may crash.
283 func FieldsFunc(s []byte, f func(rune) bool) [][]byte {
284 n := 0
285 inField := false
286 for i := 0; i < len(s); {
287 r, size := utf8.DecodeRune(s[i:])
288 wasInField := inField
289 inField = !f(r)
290 if inField && !wasInField {
293 i += size
296 a := make([][]byte, n)
297 na := 0
298 fieldStart := -1
299 for i := 0; i <= len(s) && na < n; {
300 r, size := utf8.DecodeRune(s[i:])
301 if fieldStart < 0 && size > 0 && !f(r) {
302 fieldStart = i
303 i += size
304 continue
306 if fieldStart >= 0 && (size == 0 || f(r)) {
307 a[na] = s[fieldStart:i]
308 na++
309 fieldStart = -1
311 if size == 0 {
312 break
314 i += size
316 return a[0:na]
319 // Join concatenates the elements of s to create a new byte slice. The separator
320 // sep is placed between elements in the resulting slice.
321 func Join(s [][]byte, sep []byte) []byte {
322 if len(s) == 0 {
323 return []byte{}
325 if len(s) == 1 {
326 // Just return a copy.
327 return append([]byte(nil), s[0]...)
329 n := len(sep) * (len(s) - 1)
330 for _, v := range s {
331 n += len(v)
334 b := make([]byte, n)
335 bp := copy(b, s[0])
336 for _, v := range s[1:] {
337 bp += copy(b[bp:], sep)
338 bp += copy(b[bp:], v)
340 return b
343 // HasPrefix tests whether the byte slice s begins with prefix.
344 func HasPrefix(s, prefix []byte) bool {
345 return len(s) >= len(prefix) && Equal(s[0:len(prefix)], prefix)
348 // HasSuffix tests whether the byte slice s ends with suffix.
349 func HasSuffix(s, suffix []byte) bool {
350 return len(s) >= len(suffix) && Equal(s[len(s)-len(suffix):], suffix)
353 // Map returns a copy of the byte slice s with all its characters modified
354 // according to the mapping function. If mapping returns a negative value, the character is
355 // dropped from the string with no replacement. The characters in s and the
356 // output are interpreted as UTF-8-encoded Unicode code points.
357 func Map(mapping func(r rune) rune, s []byte) []byte {
358 // In the worst case, the slice can grow when mapped, making
359 // things unpleasant. But it's so rare we barge in assuming it's
360 // fine. It could also shrink but that falls out naturally.
361 maxbytes := len(s) // length of b
362 nbytes := 0 // number of bytes encoded in b
363 b := make([]byte, maxbytes)
364 for i := 0; i < len(s); {
365 wid := 1
366 r := rune(s[i])
367 if r >= utf8.RuneSelf {
368 r, wid = utf8.DecodeRune(s[i:])
370 r = mapping(r)
371 if r >= 0 {
372 rl := utf8.RuneLen(r)
373 if rl < 0 {
374 rl = len(string(utf8.RuneError))
376 if nbytes+rl > maxbytes {
377 // Grow the buffer.
378 maxbytes = maxbytes*2 + utf8.UTFMax
379 nb := make([]byte, maxbytes)
380 copy(nb, b[0:nbytes])
381 b = nb
383 nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r)
385 i += wid
387 return b[0:nbytes]
390 // Repeat returns a new byte slice consisting of count copies of b.
391 func Repeat(b []byte, count int) []byte {
392 nb := make([]byte, len(b)*count)
393 bp := copy(nb, b)
394 for bp < len(nb) {
395 copy(nb[bp:], nb[:bp])
396 bp *= 2
398 return nb
401 // ToUpper returns a copy of the byte slice s with all Unicode letters mapped to their upper case.
402 func ToUpper(s []byte) []byte { return Map(unicode.ToUpper, s) }
404 // ToLower returns a copy of the byte slice s with all Unicode letters mapped to their lower case.
405 func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) }
407 // ToTitle returns a copy of the byte slice s with all Unicode letters mapped to their title case.
408 func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) }
410 // ToUpperSpecial returns a copy of the byte slice s with all Unicode letters mapped to their
411 // upper case, giving priority to the special casing rules.
412 func ToUpperSpecial(_case unicode.SpecialCase, s []byte) []byte {
413 return Map(func(r rune) rune { return _case.ToUpper(r) }, s)
416 // ToLowerSpecial returns a copy of the byte slice s with all Unicode letters mapped to their
417 // lower case, giving priority to the special casing rules.
418 func ToLowerSpecial(_case unicode.SpecialCase, s []byte) []byte {
419 return Map(func(r rune) rune { return _case.ToLower(r) }, s)
422 // ToTitleSpecial returns a copy of the byte slice s with all Unicode letters mapped to their
423 // title case, giving priority to the special casing rules.
424 func ToTitleSpecial(_case unicode.SpecialCase, s []byte) []byte {
425 return Map(func(r rune) rune { return _case.ToTitle(r) }, s)
428 // isSeparator reports whether the rune could mark a word boundary.
429 // TODO: update when package unicode captures more of the properties.
430 func isSeparator(r rune) bool {
431 // ASCII alphanumerics and underscore are not separators
432 if r <= 0x7F {
433 switch {
434 case '0' <= r && r <= '9':
435 return false
436 case 'a' <= r && r <= 'z':
437 return false
438 case 'A' <= r && r <= 'Z':
439 return false
440 case r == '_':
441 return false
443 return true
445 // Letters and digits are not separators
446 if unicode.IsLetter(r) || unicode.IsDigit(r) {
447 return false
449 // Otherwise, all we can do for now is treat spaces as separators.
450 return unicode.IsSpace(r)
453 // Title returns a copy of s with all Unicode letters that begin words
454 // mapped to their title case.
456 // BUG(rsc): The rule Title uses for word boundaries does not handle Unicode punctuation properly.
457 func Title(s []byte) []byte {
458 // Use a closure here to remember state.
459 // Hackish but effective. Depends on Map scanning in order and calling
460 // the closure once per rune.
461 prev := ' '
462 return Map(
463 func(r rune) rune {
464 if isSeparator(prev) {
465 prev = r
466 return unicode.ToTitle(r)
468 prev = r
469 return r
474 // TrimLeftFunc returns a subslice of s by slicing off all leading UTF-8-encoded
475 // Unicode code points c that satisfy f(c).
476 func TrimLeftFunc(s []byte, f func(r rune) bool) []byte {
477 i := indexFunc(s, f, false)
478 if i == -1 {
479 return nil
481 return s[i:]
484 // TrimRightFunc returns a subslice of s by slicing off all trailing UTF-8
485 // encoded Unicode code points c that satisfy f(c).
486 func TrimRightFunc(s []byte, f func(r rune) bool) []byte {
487 i := lastIndexFunc(s, f, false)
488 if i >= 0 && s[i] >= utf8.RuneSelf {
489 _, wid := utf8.DecodeRune(s[i:])
490 i += wid
491 } else {
494 return s[0:i]
497 // TrimFunc returns a subslice of s by slicing off all leading and trailing
498 // UTF-8-encoded Unicode code points c that satisfy f(c).
499 func TrimFunc(s []byte, f func(r rune) bool) []byte {
500 return TrimRightFunc(TrimLeftFunc(s, f), f)
503 // TrimPrefix returns s without the provided leading prefix string.
504 // If s doesn't start with prefix, s is returned unchanged.
505 func TrimPrefix(s, prefix []byte) []byte {
506 if HasPrefix(s, prefix) {
507 return s[len(prefix):]
509 return s
512 // TrimSuffix returns s without the provided trailing suffix string.
513 // If s doesn't end with suffix, s is returned unchanged.
514 func TrimSuffix(s, suffix []byte) []byte {
515 if HasSuffix(s, suffix) {
516 return s[:len(s)-len(suffix)]
518 return s
521 // IndexFunc interprets s as a sequence of UTF-8-encoded Unicode code points.
522 // It returns the byte index in s of the first Unicode
523 // code point satisfying f(c), or -1 if none do.
524 func IndexFunc(s []byte, f func(r rune) bool) int {
525 return indexFunc(s, f, true)
528 // LastIndexFunc interprets s as a sequence of UTF-8-encoded Unicode code points.
529 // It returns the byte index in s of the last Unicode
530 // code point satisfying f(c), or -1 if none do.
531 func LastIndexFunc(s []byte, f func(r rune) bool) int {
532 return lastIndexFunc(s, f, true)
535 // indexFunc is the same as IndexFunc except that if
536 // truth==false, the sense of the predicate function is
537 // inverted.
538 func indexFunc(s []byte, f func(r rune) bool, truth bool) int {
539 start := 0
540 for start < len(s) {
541 wid := 1
542 r := rune(s[start])
543 if r >= utf8.RuneSelf {
544 r, wid = utf8.DecodeRune(s[start:])
546 if f(r) == truth {
547 return start
549 start += wid
551 return -1
554 // lastIndexFunc is the same as LastIndexFunc except that if
555 // truth==false, the sense of the predicate function is
556 // inverted.
557 func lastIndexFunc(s []byte, f func(r rune) bool, truth bool) int {
558 for i := len(s); i > 0; {
559 r, size := rune(s[i-1]), 1
560 if r >= utf8.RuneSelf {
561 r, size = utf8.DecodeLastRune(s[0:i])
563 i -= size
564 if f(r) == truth {
565 return i
568 return -1
571 func makeCutsetFunc(cutset string) func(r rune) bool {
572 return func(r rune) bool {
573 for _, c := range cutset {
574 if c == r {
575 return true
578 return false
582 // Trim returns a subslice of s by slicing off all leading and
583 // trailing UTF-8-encoded Unicode code points contained in cutset.
584 func Trim(s []byte, cutset string) []byte {
585 return TrimFunc(s, makeCutsetFunc(cutset))
588 // TrimLeft returns a subslice of s by slicing off all leading
589 // UTF-8-encoded Unicode code points contained in cutset.
590 func TrimLeft(s []byte, cutset string) []byte {
591 return TrimLeftFunc(s, makeCutsetFunc(cutset))
594 // TrimRight returns a subslice of s by slicing off all trailing
595 // UTF-8-encoded Unicode code points that are contained in cutset.
596 func TrimRight(s []byte, cutset string) []byte {
597 return TrimRightFunc(s, makeCutsetFunc(cutset))
600 // TrimSpace returns a subslice of s by slicing off all leading and
601 // trailing white space, as defined by Unicode.
602 func TrimSpace(s []byte) []byte {
603 return TrimFunc(s, unicode.IsSpace)
606 // Runes returns a slice of runes (Unicode code points) equivalent to s.
607 func Runes(s []byte) []rune {
608 t := make([]rune, utf8.RuneCount(s))
609 i := 0
610 for len(s) > 0 {
611 r, l := utf8.DecodeRune(s)
612 t[i] = r
614 s = s[l:]
616 return t
619 // Replace returns a copy of the slice s with the first n
620 // non-overlapping instances of old replaced by new.
621 // If old is empty, it matches at the beginning of the slice
622 // and after each UTF-8 sequence, yielding up to k+1 replacements
623 // for a k-rune slice.
624 // If n < 0, there is no limit on the number of replacements.
625 func Replace(s, old, new []byte, n int) []byte {
626 m := 0
627 if n != 0 {
628 // Compute number of replacements.
629 m = Count(s, old)
631 if m == 0 {
632 // Just return a copy.
633 return append([]byte(nil), s...)
635 if n < 0 || m < n {
636 n = m
639 // Apply replacements to buffer.
640 t := make([]byte, len(s)+n*(len(new)-len(old)))
641 w := 0
642 start := 0
643 for i := 0; i < n; i++ {
644 j := start
645 if len(old) == 0 {
646 if i > 0 {
647 _, wid := utf8.DecodeRune(s[start:])
648 j += wid
650 } else {
651 j += Index(s[start:], old)
653 w += copy(t[w:], s[start:j])
654 w += copy(t[w:], new)
655 start = j + len(old)
657 w += copy(t[w:], s[start:])
658 return t[0:w]
661 // EqualFold reports whether s and t, interpreted as UTF-8 strings,
662 // are equal under Unicode case-folding.
663 func EqualFold(s, t []byte) bool {
664 for len(s) != 0 && len(t) != 0 {
665 // Extract first rune from each.
666 var sr, tr rune
667 if s[0] < utf8.RuneSelf {
668 sr, s = rune(s[0]), s[1:]
669 } else {
670 r, size := utf8.DecodeRune(s)
671 sr, s = r, s[size:]
673 if t[0] < utf8.RuneSelf {
674 tr, t = rune(t[0]), t[1:]
675 } else {
676 r, size := utf8.DecodeRune(t)
677 tr, t = r, t[size:]
680 // If they match, keep going; if not, return false.
682 // Easy case.
683 if tr == sr {
684 continue
687 // Make sr < tr to simplify what follows.
688 if tr < sr {
689 tr, sr = sr, tr
691 // Fast check for ASCII.
692 if tr < utf8.RuneSelf && 'A' <= sr && sr <= 'Z' {
693 // ASCII, and sr is upper case. tr must be lower case.
694 if tr == sr+'a'-'A' {
695 continue
697 return false
700 // General case. SimpleFold(x) returns the next equivalent rune > x
701 // or wraps around to smaller values.
702 r := unicode.SimpleFold(sr)
703 for r != sr && r < tr {
704 r = unicode.SimpleFold(r)
706 if r == tr {
707 continue
709 return false
712 // One string is empty. Are both?
713 return len(s) == len(t)