1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Package strings implements simple functions to manipulate strings.
13 // explode splits s into an array of UTF-8 sequences, one per Unicode character (still strings) up to a maximum of n (n < 0 means no limit).
14 // Invalid UTF-8 sequences become correct encodings of U+FFF8.
15 func explode(s
string, n
int) []string {
19 l
:= utf8
.RuneCountInString(s
)
23 a
:= make([]string, n
)
28 ch
, size
= utf8
.DecodeRuneInString(s
[cur
:])
29 if ch
== utf8
.RuneError
{
30 a
[i
] = string(utf8
.RuneError
)
32 a
[i
] = s
[cur
: cur
+size
]
36 // add the rest, if there is any
43 // primeRK is the prime base used in Rabin-Karp algorithm.
44 const primeRK
= 16777619
46 // hashstr returns the hash and the appropriate multiplicative
47 // factor for use in Rabin-Karp algorithm.
48 func hashstr(sep
string) (uint32, uint32) {
50 for i
:= 0; i
< len(sep
); i
++ {
51 hash
= hash
*primeRK
+ uint32(sep
[i
])
54 var pow
, sq
uint32 = 1, primeRK
55 for i
:= len(sep
); i
> 0; i
>>= 1 {
64 // Count counts the number of non-overlapping instances of sep in s.
65 func Count(s
, sep
string) int {
70 return utf8
.RuneCountInString(s
) + 1
72 // special case worth making fast
74 for i
:= 0; i
< len(s
); i
++ {
80 case len(sep
) > len(s
):
82 case len(sep
) == len(s
):
88 hashsep
, pow
:= hashstr(sep
)
90 for i
:= 0; i
< len(sep
); i
++ {
91 h
= h
*primeRK
+ uint32(s
[i
])
94 if h
== hashsep
&& s
[:len(sep
)] == sep
{
98 for i
:= len(sep
); i
< len(s
); {
101 h
-= pow
* uint32(s
[i
-len(sep
)])
103 if h
== hashsep
&& lastmatch
<= i
-len(sep
) && s
[i
-len(sep
):i
] == sep
{
111 // Contains returns true if substr is within s.
112 func Contains(s
, substr
string) bool {
113 return Index(s
, substr
) >= 0
116 // ContainsAny returns true if any Unicode code points in chars are within s.
117 func ContainsAny(s
, chars
string) bool {
118 return IndexAny(s
, chars
) >= 0
121 // ContainsRune returns true if the Unicode code point r is within s.
122 func ContainsRune(s
string, r rune
) bool {
123 return IndexRune(s
, r
) >= 0
126 // Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
127 func Index(s
, sep
string) int {
134 // special case worth making fast
135 for i
:= 0; i
< len(s
); i
++ {
150 hashsep
, pow
:= hashstr(sep
)
152 for i
:= 0; i
< n
; i
++ {
153 h
= h
*primeRK
+ uint32(s
[i
])
155 if h
== hashsep
&& s
[:n
] == sep
{
158 for i
:= n
; i
< len(s
); {
161 h
-= pow
* uint32(s
[i
-n
])
163 if h
== hashsep
&& s
[i
-n
:i
] == sep
{
170 // LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s.
171 func LastIndex(s
, sep
string) int {
178 // special case worth making fast
179 for i
:= len(s
) - 1; i
>= 0; i
-- {
187 for i
:= len(s
) - n
; i
>= 0; i
-- {
188 if s
[i
] == c
&& s
[i
:i
+n
] == sep
{
195 // IndexRune returns the index of the first instance of the Unicode code point
196 // r, or -1 if rune is not present in s.
197 func IndexRune(s
string, r rune
) int {
201 for i
:= 0; i
< len(s
); i
++ {
207 for i
, c
:= range s
{
216 // IndexAny returns the index of the first instance of any Unicode code point
217 // from chars in s, or -1 if no Unicode code point from chars is present in s.
218 func IndexAny(s
, chars
string) int {
220 for i
, c
:= range s
{
221 for _
, m
:= range chars
{
231 // LastIndexAny returns the index of the last instance of any Unicode code
232 // point from chars in s, or -1 if no Unicode code point from chars is
234 func LastIndexAny(s
, chars
string) int {
236 for i
:= len(s
); i
> 0; {
237 rune
, size
:= utf8
.DecodeLastRuneInString(s
[0:i
])
239 for _
, m
:= range chars
{
249 // Generic split: splits after each instance of sep,
250 // including sepSave bytes of sep in the subarrays.
251 func genSplit(s
, sep
string, sepSave
, n
int) []string {
259 n
= Count(s
, sep
) + 1
263 a
:= make([]string, n
)
265 for i
:= 0; i
+len(sep
) <= len(s
) && na
+1 < n
; i
++ {
266 if s
[i
] == c
&& (len(sep
) == 1 || s
[i
:i
+len(sep
)] == sep
) {
267 a
[na
] = s
[start
: i
+sepSave
]
277 // SplitN slices s into substrings separated by sep and returns a slice of
278 // the substrings between those separators.
279 // If sep is empty, SplitN splits after each UTF-8 sequence.
280 // The count determines the number of substrings to return:
281 // n > 0: at most n substrings; the last substring will be the unsplit remainder.
282 // n == 0: the result is nil (zero substrings)
283 // n < 0: all substrings
284 func SplitN(s
, sep
string, n
int) []string { return genSplit(s
, sep
, 0, n
) }
286 // SplitAfterN slices s into substrings after each instance of sep and
287 // returns a slice of those substrings.
288 // If sep is empty, SplitAfterN splits after each UTF-8 sequence.
289 // The count determines the number of substrings to return:
290 // n > 0: at most n substrings; the last substring will be the unsplit remainder.
291 // n == 0: the result is nil (zero substrings)
292 // n < 0: all substrings
293 func SplitAfterN(s
, sep
string, n
int) []string {
294 return genSplit(s
, sep
, len(sep
), n
)
297 // Split slices s into all substrings separated by sep and returns a slice of
298 // the substrings between those separators.
299 // If sep is empty, Split splits after each UTF-8 sequence.
300 // It is equivalent to SplitN with a count of -1.
301 func Split(s
, sep
string) []string { return genSplit(s
, sep
, 0, -1) }
303 // SplitAfter slices s into all substrings after each instance of sep and
304 // returns a slice of those substrings.
305 // If sep is empty, SplitAfter splits after each UTF-8 sequence.
306 // It is equivalent to SplitAfterN with a count of -1.
307 func SplitAfter(s
, sep
string) []string {
308 return genSplit(s
, sep
, len(sep
), -1)
311 // Fields splits the string s around each instance of one or more consecutive white space
312 // characters, as defined by unicode.IsSpace, returning an array of substrings of s or an
313 // empty list if s contains only white space.
314 func Fields(s
string) []string {
315 return FieldsFunc(s
, unicode
.IsSpace
)
318 // FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c)
319 // and returns an array of slices of s. If all code points in s satisfy f(c) or the
320 // string is empty, an empty slice is returned.
321 func FieldsFunc(s
string, f
func(rune
) bool) []string {
322 // First count the fields.
325 for _
, rune
:= range s
{
326 wasInField
:= inField
328 if inField
&& !wasInField
{
334 a
:= make([]string, n
)
336 fieldStart
:= -1 // Set to -1 when looking for start of field.
337 for i
, rune
:= range s
{
340 a
[na
] = s
[fieldStart
:i
]
344 } else if fieldStart
== -1 {
348 if fieldStart
>= 0 { // Last field might end at EOF.
349 a
[na
] = s
[fieldStart
:]
354 // Join concatenates the elements of a to create a single string. The separator string
355 // sep is placed between elements in the resulting string.
356 func Join(a
[]string, sep
string) string {
363 n
:= len(sep
) * (len(a
) - 1)
364 for i
:= 0; i
< len(a
); i
++ {
370 for _
, s
:= range a
[1:] {
371 bp
+= copy(b
[bp
:], sep
)
372 bp
+= copy(b
[bp
:], s
)
377 // HasPrefix tests whether the string s begins with prefix.
378 func HasPrefix(s
, prefix
string) bool {
379 return len(s
) >= len(prefix
) && s
[0:len(prefix
)] == prefix
382 // HasSuffix tests whether the string s ends with suffix.
383 func HasSuffix(s
, suffix
string) bool {
384 return len(s
) >= len(suffix
) && s
[len(s
)-len(suffix
):] == suffix
387 // Map returns a copy of the string s with all its characters modified
388 // according to the mapping function. If mapping returns a negative value, the character is
389 // dropped from the string with no replacement.
390 func Map(mapping
func(rune
) rune
, s
string) string {
391 // In the worst case, the string can grow when mapped, making
392 // things unpleasant. But it's so rare we barge in assuming it's
393 // fine. It could also shrink but that falls out naturally.
394 maxbytes
:= len(s
) // length of b
395 nbytes
:= 0 // number of bytes encoded in b
396 // The output buffer b is initialized on demand, the first
397 // time a character differs.
400 for i
, c
:= range s
{
406 b
= make([]byte, maxbytes
)
407 nbytes
= copy(b
, s
[:i
])
411 if r
>= utf8
.RuneSelf
{
412 wid
= utf8
.RuneLen(r
)
414 if nbytes
+wid
> maxbytes
{
416 maxbytes
= maxbytes
*2 + utf8
.UTFMax
417 nb
:= make([]byte, maxbytes
)
418 copy(nb
, b
[0:nbytes
])
421 nbytes
+= utf8
.EncodeRune(b
[nbytes
:maxbytes
], r
)
427 return string(b
[0:nbytes
])
430 // Repeat returns a new string consisting of count copies of the string s.
431 func Repeat(s
string, count
int) string {
432 b
:= make([]byte, len(s
)*count
)
434 for i
:= 0; i
< count
; i
++ {
435 for j
:= 0; j
< len(s
); j
++ {
443 // ToUpper returns a copy of the string s with all Unicode letters mapped to their upper case.
444 func ToUpper(s
string) string { return Map(unicode
.ToUpper
, s
) }
446 // ToLower returns a copy of the string s with all Unicode letters mapped to their lower case.
447 func ToLower(s
string) string { return Map(unicode
.ToLower
, s
) }
449 // ToTitle returns a copy of the string s with all Unicode letters mapped to their title case.
450 func ToTitle(s
string) string { return Map(unicode
.ToTitle
, s
) }
452 // ToUpperSpecial returns a copy of the string s with all Unicode letters mapped to their
453 // upper case, giving priority to the special casing rules.
454 func ToUpperSpecial(_case unicode
.SpecialCase
, s
string) string {
455 return Map(func(r rune
) rune
{ return _case
.ToUpper(r
) }, s
)
458 // ToLowerSpecial returns a copy of the string s with all Unicode letters mapped to their
459 // lower case, giving priority to the special casing rules.
460 func ToLowerSpecial(_case unicode
.SpecialCase
, s
string) string {
461 return Map(func(r rune
) rune
{ return _case
.ToLower(r
) }, s
)
464 // ToTitleSpecial returns a copy of the string s with all Unicode letters mapped to their
465 // title case, giving priority to the special casing rules.
466 func ToTitleSpecial(_case unicode
.SpecialCase
, s
string) string {
467 return Map(func(r rune
) rune
{ return _case
.ToTitle(r
) }, s
)
470 // isSeparator reports whether the rune could mark a word boundary.
471 // TODO: update when package unicode captures more of the properties.
472 func isSeparator(r rune
) bool {
473 // ASCII alphanumerics and underscore are not separators
476 case '0' <= r
&& r
<= '9':
478 case 'a' <= r
&& r
<= 'z':
480 case 'A' <= r
&& r
<= 'Z':
487 // Letters and digits are not separators
488 if unicode
.IsLetter(r
) || unicode
.IsDigit(r
) {
491 // Otherwise, all we can do for now is treat spaces as separators.
492 return unicode
.IsSpace(r
)
495 // Title returns a copy of the string s with all Unicode letters that begin words
496 // mapped to their title case.
498 // BUG: The rule Title uses for word boundaries does not handle Unicode punctuation properly.
499 func Title(s
string) string {
500 // Use a closure here to remember state.
501 // Hackish but effective. Depends on Map scanning in order and calling
502 // the closure once per rune.
506 if isSeparator(prev
) {
508 return unicode
.ToTitle(r
)
516 // TrimLeftFunc returns a slice of the string s with all leading
517 // Unicode code points c satisfying f(c) removed.
518 func TrimLeftFunc(s
string, f
func(rune
) bool) string {
519 i
:= indexFunc(s
, f
, false)
526 // TrimRightFunc returns a slice of the string s with all trailing
527 // Unicode code points c satisfying f(c) removed.
528 func TrimRightFunc(s
string, f
func(rune
) bool) string {
529 i
:= lastIndexFunc(s
, f
, false)
530 if i
>= 0 && s
[i
] >= utf8
.RuneSelf
{
531 _
, wid
:= utf8
.DecodeRuneInString(s
[i
:])
539 // TrimFunc returns a slice of the string s with all leading
540 // and trailing Unicode code points c satisfying f(c) removed.
541 func TrimFunc(s
string, f
func(rune
) bool) string {
542 return TrimRightFunc(TrimLeftFunc(s
, f
), f
)
545 // IndexFunc returns the index into s of the first Unicode
546 // code point satisfying f(c), or -1 if none do.
547 func IndexFunc(s
string, f
func(rune
) bool) int {
548 return indexFunc(s
, f
, true)
551 // LastIndexFunc returns the index into s of the last
552 // Unicode code point satisfying f(c), or -1 if none do.
553 func LastIndexFunc(s
string, f
func(rune
) bool) int {
554 return lastIndexFunc(s
, f
, true)
557 // indexFunc is the same as IndexFunc except that if
558 // truth==false, the sense of the predicate function is
560 func indexFunc(s
string, f
func(rune
) bool, truth
bool) int {
565 if r
>= utf8
.RuneSelf
{
566 r
, wid
= utf8
.DecodeRuneInString(s
[start
:])
576 // lastIndexFunc is the same as LastIndexFunc except that if
577 // truth==false, the sense of the predicate function is
579 func lastIndexFunc(s
string, f
func(rune
) bool, truth
bool) int {
580 for i
:= len(s
); i
> 0; {
581 r
, size
:= utf8
.DecodeLastRuneInString(s
[0:i
])
590 func makeCutsetFunc(cutset
string) func(rune
) bool {
591 return func(r rune
) bool { return IndexRune(cutset
, r
) >= 0 }
594 // Trim returns a slice of the string s with all leading and
595 // trailing Unicode code points contained in cutset removed.
596 func Trim(s
string, cutset
string) string {
597 if s
== "" || cutset
== "" {
600 return TrimFunc(s
, makeCutsetFunc(cutset
))
603 // TrimLeft returns a slice of the string s with all leading
604 // Unicode code points contained in cutset removed.
605 func TrimLeft(s
string, cutset
string) string {
606 if s
== "" || cutset
== "" {
609 return TrimLeftFunc(s
, makeCutsetFunc(cutset
))
612 // TrimRight returns a slice of the string s, with all trailing
613 // Unicode code points contained in cutset removed.
614 func TrimRight(s
string, cutset
string) string {
615 if s
== "" || cutset
== "" {
618 return TrimRightFunc(s
, makeCutsetFunc(cutset
))
621 // TrimSpace returns a slice of the string s, with all leading
622 // and trailing white space removed, as defined by Unicode.
623 func TrimSpace(s
string) string {
624 return TrimFunc(s
, unicode
.IsSpace
)
627 // TrimPrefix returns s without the provided leading prefix string.
628 // If s doesn't start with prefix, s is returned unchanged.
629 func TrimPrefix(s
, prefix
string) string {
630 if HasPrefix(s
, prefix
) {
631 return s
[len(prefix
):]
636 // TrimSuffix returns s without the provided trailing suffix string.
637 // If s doesn't end with suffix, s is returned unchanged.
638 func TrimSuffix(s
, suffix
string) string {
639 if HasSuffix(s
, suffix
) {
640 return s
[:len(s
)-len(suffix
)]
645 // Replace returns a copy of the string s with the first n
646 // non-overlapping instances of old replaced by new.
647 // If n < 0, there is no limit on the number of replacements.
648 func Replace(s
, old
, new string, n
int) string {
649 if old
== new || n
== 0 {
650 return s
// avoid allocation
653 // Compute number of replacements.
654 if m
:= Count(s
, old
); m
== 0 {
655 return s
// avoid allocation
656 } else if n
< 0 || m
< n
{
660 // Apply replacements to buffer.
661 t
:= make([]byte, len(s
)+n
*(len(new)-len(old
)))
664 for i
:= 0; i
< n
; i
++ {
668 _
, wid
:= utf8
.DecodeRuneInString(s
[start
:])
672 j
+= Index(s
[start
:], old
)
674 w
+= copy(t
[w
:], s
[start
:j
])
675 w
+= copy(t
[w
:], new)
678 w
+= copy(t
[w
:], s
[start
:])
679 return string(t
[0:w
])
682 // EqualFold reports whether s and t, interpreted as UTF-8 strings,
683 // are equal under Unicode case-folding.
684 func EqualFold(s
, t
string) bool {
685 for s
!= "" && t
!= "" {
686 // Extract first rune from each string.
688 if s
[0] < utf8
.RuneSelf
{
689 sr
, s
= rune(s
[0]), s
[1:]
691 r
, size
:= utf8
.DecodeRuneInString(s
)
694 if t
[0] < utf8
.RuneSelf
{
695 tr
, t
= rune(t
[0]), t
[1:]
697 r
, size
:= utf8
.DecodeRuneInString(t
)
701 // If they match, keep going; if not, return false.
708 // Make sr < tr to simplify what follows.
712 // Fast check for ASCII.
713 if tr
< utf8
.RuneSelf
&& 'A' <= sr
&& sr
<= 'Z' {
714 // ASCII, and sr is upper case. tr must be lower case.
715 if tr
== sr
+'a'-'A' {
721 // General case. SimpleFold(x) returns the next equivalent rune > x
722 // or wraps around to smaller values.
723 r
:= unicode
.SimpleFold(sr
)
724 for r
!= sr
&& r
< tr
{
725 r
= unicode
.SimpleFold(r
)
733 // One string is empty. Are both?