1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/strings/string_util.h"
21 #include "base/basictypes.h"
22 #include "base/logging.h"
23 #include "base/memory/singleton.h"
24 #include "base/strings/string_split.h"
25 #include "base/strings/utf_string_conversion_utils.h"
26 #include "base/strings/utf_string_conversions.h"
27 #include "base/third_party/icu/icu_utf.h"
28 #include "build/build_config.h"
30 // Remove when this entire file is in the base namespace.
36 // Force the singleton used by EmptyString[16] to be a unique type. This
37 // prevents other code that might accidentally use Singleton<string> from
38 // getting our internal one.
44 static EmptyStrings
* GetInstance() {
45 return Singleton
<EmptyStrings
>::get();
49 // Used by ReplaceStringPlaceholders to track the position in the string of
50 // replaced parameters.
51 struct ReplacementOffset
{
52 ReplacementOffset(uintptr_t parameter
, size_t offset
)
53 : parameter(parameter
),
56 // Index of the parameter.
59 // Starting position in the string.
63 static bool CompareParameter(const ReplacementOffset
& elem1
,
64 const ReplacementOffset
& elem2
) {
65 return elem1
.parameter
< elem2
.parameter
;
68 // Assuming that a pointer is the size of a "machine word", then
69 // uintptr_t is an integer type that is also a machine word.
70 typedef uintptr_t MachineWord
;
71 const uintptr_t kMachineWordAlignmentMask
= sizeof(MachineWord
) - 1;
73 inline bool IsAlignedToMachineWord(const void* pointer
) {
74 return !(reinterpret_cast<MachineWord
>(pointer
) & kMachineWordAlignmentMask
);
77 template<typename T
> inline T
* AlignToMachineWord(T
* pointer
) {
78 return reinterpret_cast<T
*>(reinterpret_cast<MachineWord
>(pointer
) &
79 ~kMachineWordAlignmentMask
);
82 template<size_t size
, typename CharacterType
> struct NonASCIIMask
;
83 template<> struct NonASCIIMask
<4, base::char16
> {
84 static inline uint32_t value() { return 0xFF80FF80U
; }
86 template<> struct NonASCIIMask
<4, char> {
87 static inline uint32_t value() { return 0x80808080U
; }
89 template<> struct NonASCIIMask
<8, base::char16
> {
90 static inline uint64_t value() { return 0xFF80FF80FF80FF80ULL
; }
92 template<> struct NonASCIIMask
<8, char> {
93 static inline uint64_t value() { return 0x8080808080808080ULL
; }
95 #if defined(WCHAR_T_IS_UTF32)
96 template<> struct NonASCIIMask
<4, wchar_t> {
97 static inline uint32_t value() { return 0xFFFFFF80U
; }
99 template<> struct NonASCIIMask
<8, wchar_t> {
100 static inline uint64_t value() { return 0xFFFFFF80FFFFFF80ULL
; }
102 #endif // WCHAR_T_IS_UTF32
108 bool IsWprintfFormatPortable(const wchar_t* format
) {
109 for (const wchar_t* position
= format
; *position
!= '\0'; ++position
) {
110 if (*position
== '%') {
111 bool in_specification
= true;
112 bool modifier_l
= false;
113 while (in_specification
) {
114 // Eat up characters until reaching a known specifier.
115 if (*++position
== '\0') {
116 // The format string ended in the middle of a specification. Call
117 // it portable because no unportable specifications were found. The
118 // string is equally broken on all platforms.
122 if (*position
== 'l') {
123 // 'l' is the only thing that can save the 's' and 'c' specifiers.
125 } else if (((*position
== 's' || *position
== 'c') && !modifier_l
) ||
126 *position
== 'S' || *position
== 'C' || *position
== 'F' ||
127 *position
== 'D' || *position
== 'O' || *position
== 'U') {
132 if (wcschr(L
"diouxXeEfgGaAcspn%", *position
)) {
133 // Portable, keep scanning the rest of the format string.
134 in_specification
= false;
143 const std::string
& EmptyString() {
144 return EmptyStrings::GetInstance()->s
;
147 const string16
& EmptyString16() {
148 return EmptyStrings::GetInstance()->s16
;
151 template<typename STR
>
152 bool ReplaceCharsT(const STR
& input
,
153 const STR
& replace_chars
,
154 const STR
& replace_with
,
156 bool removed
= false;
157 size_t replace_length
= replace_with
.length();
161 size_t found
= output
->find_first_of(replace_chars
);
162 while (found
!= STR::npos
) {
164 output
->replace(found
, 1, replace_with
);
165 found
= output
->find_first_of(replace_chars
, found
+ replace_length
);
171 bool ReplaceChars(const string16
& input
,
172 const base::StringPiece16
& replace_chars
,
173 const string16
& replace_with
,
175 return ReplaceCharsT(input
, replace_chars
.as_string(), replace_with
, output
);
178 bool ReplaceChars(const std::string
& input
,
179 const base::StringPiece
& replace_chars
,
180 const std::string
& replace_with
,
181 std::string
* output
) {
182 return ReplaceCharsT(input
, replace_chars
.as_string(), replace_with
, output
);
185 bool RemoveChars(const string16
& input
,
186 const base::StringPiece16
& remove_chars
,
188 return ReplaceChars(input
, remove_chars
.as_string(), string16(), output
);
191 bool RemoveChars(const std::string
& input
,
192 const base::StringPiece
& remove_chars
,
193 std::string
* output
) {
194 return ReplaceChars(input
, remove_chars
.as_string(), std::string(), output
);
197 template<typename Str
>
198 TrimPositions
TrimStringT(const Str
& input
,
199 BasicStringPiece
<Str
> trim_chars
,
200 TrimPositions positions
,
202 // Find the edges of leading/trailing whitespace as desired. Need to use
203 // a StringPiece version of input to be able to call find* on it with the
204 // StringPiece version of trim_chars (normally the trim_chars will be a
205 // constant so avoid making a copy).
206 BasicStringPiece
<Str
> input_piece(input
);
207 const size_t last_char
= input
.length() - 1;
208 const size_t first_good_char
= (positions
& TRIM_LEADING
) ?
209 input_piece
.find_first_not_of(trim_chars
) : 0;
210 const size_t last_good_char
= (positions
& TRIM_TRAILING
) ?
211 input_piece
.find_last_not_of(trim_chars
) : last_char
;
213 // When the string was all trimmed, report that we stripped off characters
214 // from whichever position the caller was interested in. For empty input, we
215 // stripped no characters, but we still need to clear |output|.
217 (first_good_char
== Str::npos
) || (last_good_char
== Str::npos
)) {
218 bool input_was_empty
= input
.empty(); // in case output == &input
220 return input_was_empty
? TRIM_NONE
: positions
;
225 input
.substr(first_good_char
, last_good_char
- first_good_char
+ 1);
227 // Return where we trimmed from.
228 return static_cast<TrimPositions
>(
229 ((first_good_char
== 0) ? TRIM_NONE
: TRIM_LEADING
) |
230 ((last_good_char
== last_char
) ? TRIM_NONE
: TRIM_TRAILING
));
233 bool TrimString(const string16
& input
,
234 base::StringPiece16 trim_chars
,
236 return TrimStringT(input
, trim_chars
, TRIM_ALL
, output
) != TRIM_NONE
;
239 bool TrimString(const std::string
& input
,
240 base::StringPiece trim_chars
,
241 std::string
* output
) {
242 return TrimStringT(input
, trim_chars
, TRIM_ALL
, output
) != TRIM_NONE
;
245 template<typename Str
>
246 BasicStringPiece
<Str
> TrimStringPieceT(BasicStringPiece
<Str
> input
,
247 BasicStringPiece
<Str
> trim_chars
,
248 TrimPositions positions
) {
249 size_t begin
= (positions
& TRIM_LEADING
) ?
250 input
.find_first_not_of(trim_chars
) : 0;
251 size_t end
= (positions
& TRIM_TRAILING
) ?
252 input
.find_last_not_of(trim_chars
) + 1 : input
.size();
253 return input
.substr(begin
, end
- begin
);
256 StringPiece16
TrimString(StringPiece16 input
,
257 const base::StringPiece16
& trim_chars
,
258 TrimPositions positions
) {
259 return TrimStringPieceT(input
, trim_chars
, positions
);
262 StringPiece
TrimString(StringPiece input
,
263 const base::StringPiece
& trim_chars
,
264 TrimPositions positions
) {
265 return TrimStringPieceT(input
, trim_chars
, positions
);
268 void TruncateUTF8ToByteSize(const std::string
& input
,
269 const size_t byte_size
,
270 std::string
* output
) {
272 if (byte_size
> input
.length()) {
276 DCHECK_LE(byte_size
, static_cast<uint32
>(kint32max
));
277 // Note: This cast is necessary because CBU8_NEXT uses int32s.
278 int32 truncation_length
= static_cast<int32
>(byte_size
);
279 int32 char_index
= truncation_length
- 1;
280 const char* data
= input
.data();
282 // Using CBU8, we will move backwards from the truncation point
283 // to the beginning of the string looking for a valid UTF8
284 // character. Once a full UTF8 character is found, we will
285 // truncate the string to the end of that character.
286 while (char_index
>= 0) {
287 int32 prev
= char_index
;
288 base_icu::UChar32 code_point
= 0;
289 CBU8_NEXT(data
, char_index
, truncation_length
, code_point
);
290 if (!IsValidCharacter(code_point
) ||
291 !IsValidCodepoint(code_point
)) {
292 char_index
= prev
- 1;
298 if (char_index
>= 0 )
299 *output
= input
.substr(0, char_index
);
304 TrimPositions
TrimWhitespace(const string16
& input
,
305 TrimPositions positions
,
307 return TrimStringT(input
, StringPiece16(kWhitespaceUTF16
), positions
, output
);
310 StringPiece16
TrimWhitespaceASCII(StringPiece16 input
,
311 TrimPositions positions
) {
312 return TrimStringPieceT(input
, StringPiece16(kWhitespaceUTF16
), positions
);
315 TrimPositions
TrimWhitespaceASCII(const std::string
& input
,
316 TrimPositions positions
,
317 std::string
* output
) {
318 return TrimStringT(input
, StringPiece(kWhitespaceASCII
), positions
, output
);
321 StringPiece
TrimWhitespaceASCII(StringPiece input
, TrimPositions positions
) {
322 return TrimStringPieceT(input
, StringPiece(kWhitespaceASCII
), positions
);
325 // This function is only for backward-compatibility.
326 // To be removed when all callers are updated.
327 TrimPositions
TrimWhitespace(const std::string
& input
,
328 TrimPositions positions
,
329 std::string
* output
) {
330 return TrimWhitespaceASCII(input
, positions
, output
);
333 template<typename STR
>
334 STR
CollapseWhitespaceT(const STR
& text
,
335 bool trim_sequences_with_line_breaks
) {
337 result
.resize(text
.size());
339 // Set flags to pretend we're already in a trimmed whitespace sequence, so we
340 // will trim any leading whitespace.
341 bool in_whitespace
= true;
342 bool already_trimmed
= true;
344 int chars_written
= 0;
345 for (typename
STR::const_iterator
i(text
.begin()); i
!= text
.end(); ++i
) {
346 if (IsUnicodeWhitespace(*i
)) {
347 if (!in_whitespace
) {
348 // Reduce all whitespace sequences to a single space.
349 in_whitespace
= true;
350 result
[chars_written
++] = L
' ';
352 if (trim_sequences_with_line_breaks
&& !already_trimmed
&&
353 ((*i
== '\n') || (*i
== '\r'))) {
354 // Whitespace sequences containing CR or LF are eliminated entirely.
355 already_trimmed
= true;
359 // Non-whitespace chracters are copied straight across.
360 in_whitespace
= false;
361 already_trimmed
= false;
362 result
[chars_written
++] = *i
;
366 if (in_whitespace
&& !already_trimmed
) {
367 // Any trailing whitespace is eliminated.
371 result
.resize(chars_written
);
375 string16
CollapseWhitespace(const string16
& text
,
376 bool trim_sequences_with_line_breaks
) {
377 return CollapseWhitespaceT(text
, trim_sequences_with_line_breaks
);
380 std::string
CollapseWhitespaceASCII(const std::string
& text
,
381 bool trim_sequences_with_line_breaks
) {
382 return CollapseWhitespaceT(text
, trim_sequences_with_line_breaks
);
385 bool ContainsOnlyChars(const StringPiece
& input
,
386 const StringPiece
& characters
) {
387 return input
.find_first_not_of(characters
) == StringPiece::npos
;
390 bool ContainsOnlyChars(const StringPiece16
& input
,
391 const StringPiece16
& characters
) {
392 return input
.find_first_not_of(characters
) == StringPiece16::npos
;
395 template <class Char
>
396 inline bool DoIsStringASCII(const Char
* characters
, size_t length
) {
397 MachineWord all_char_bits
= 0;
398 const Char
* end
= characters
+ length
;
400 // Prologue: align the input.
401 while (!IsAlignedToMachineWord(characters
) && characters
!= end
) {
402 all_char_bits
|= *characters
;
406 // Compare the values of CPU word size.
407 const Char
* word_end
= AlignToMachineWord(end
);
408 const size_t loop_increment
= sizeof(MachineWord
) / sizeof(Char
);
409 while (characters
< word_end
) {
410 all_char_bits
|= *(reinterpret_cast<const MachineWord
*>(characters
));
411 characters
+= loop_increment
;
414 // Process the remaining bytes.
415 while (characters
!= end
) {
416 all_char_bits
|= *characters
;
420 MachineWord non_ascii_bit_mask
=
421 NonASCIIMask
<sizeof(MachineWord
), Char
>::value();
422 return !(all_char_bits
& non_ascii_bit_mask
);
425 bool IsStringASCII(const StringPiece
& str
) {
426 return DoIsStringASCII(str
.data(), str
.length());
429 bool IsStringASCII(const StringPiece16
& str
) {
430 return DoIsStringASCII(str
.data(), str
.length());
433 bool IsStringASCII(const string16
& str
) {
434 return DoIsStringASCII(str
.data(), str
.length());
437 #if defined(WCHAR_T_IS_UTF32)
438 bool IsStringASCII(const std::wstring
& str
) {
439 return DoIsStringASCII(str
.data(), str
.length());
443 bool IsStringUTF8(const StringPiece
& str
) {
444 const char *src
= str
.data();
445 int32 src_len
= static_cast<int32
>(str
.length());
446 int32 char_index
= 0;
448 while (char_index
< src_len
) {
450 CBU8_NEXT(src
, char_index
, src_len
, code_point
);
451 if (!IsValidCharacter(code_point
))
457 template<typename Iter
>
458 static inline bool DoLowerCaseEqualsASCII(Iter a_begin
,
461 for (Iter it
= a_begin
; it
!= a_end
; ++it
, ++b
) {
462 if (!*b
|| ToLowerASCII(*it
) != *b
)
468 // Front-ends for LowerCaseEqualsASCII.
469 bool LowerCaseEqualsASCII(const std::string
& a
, const char* b
) {
470 return DoLowerCaseEqualsASCII(a
.begin(), a
.end(), b
);
473 bool LowerCaseEqualsASCII(const string16
& a
, const char* b
) {
474 return DoLowerCaseEqualsASCII(a
.begin(), a
.end(), b
);
477 bool LowerCaseEqualsASCII(std::string::const_iterator a_begin
,
478 std::string::const_iterator a_end
,
480 return DoLowerCaseEqualsASCII(a_begin
, a_end
, b
);
483 bool LowerCaseEqualsASCII(string16::const_iterator a_begin
,
484 string16::const_iterator a_end
,
486 return DoLowerCaseEqualsASCII(a_begin
, a_end
, b
);
489 bool LowerCaseEqualsASCII(const char* a_begin
,
492 return DoLowerCaseEqualsASCII(a_begin
, a_end
, b
);
495 bool LowerCaseEqualsASCII(const char* a_begin
,
499 while (a_begin
!= a_end
&& b_begin
!= b_end
&&
500 ToLowerASCII(*a_begin
) == *b_begin
) {
504 return a_begin
== a_end
&& b_begin
== b_end
;
507 bool LowerCaseEqualsASCII(const char16
* a_begin
,
510 return DoLowerCaseEqualsASCII(a_begin
, a_end
, b
);
513 bool EqualsASCII(const string16
& a
, const StringPiece
& b
) {
514 if (a
.length() != b
.length())
516 return std::equal(b
.begin(), b
.end(), a
.begin());
519 template<typename Str
>
520 bool StartsWithT(BasicStringPiece
<Str
> str
,
521 BasicStringPiece
<Str
> search_for
,
522 CompareCase case_sensitivity
) {
523 if (search_for
.size() > str
.size())
526 BasicStringPiece
<Str
> source
= str
.substr(0, search_for
.size());
528 switch (case_sensitivity
) {
529 case CompareCase::SENSITIVE
:
530 return source
== search_for
;
532 case CompareCase::INSENSITIVE_ASCII
:
534 search_for
.begin(), search_for
.end(),
536 base::CaseInsensitiveCompareASCII
<typename
Str::value_type
>());
544 bool StartsWith(StringPiece str
,
545 StringPiece search_for
,
546 CompareCase case_sensitivity
) {
547 return StartsWithT
<std::string
>(str
, search_for
, case_sensitivity
);
550 bool StartsWith(StringPiece16 str
,
551 StringPiece16 search_for
,
552 CompareCase case_sensitivity
) {
553 return StartsWithT
<string16
>(str
, search_for
, case_sensitivity
);
556 bool StartsWith(const string16
& str
,
557 const string16
& search
,
558 bool case_sensitive
) {
559 if (!case_sensitive
) {
560 // This function was originally written using the current locale functions
561 // for case-insensitive comparisons. Emulate this behavior until callers
562 // can be converted either to use the case-insensitive ASCII one (most
563 // callers) or ICU functions in base_i18n.
564 if (search
.size() > str
.size())
566 return std::equal(search
.begin(), search
.end(), str
.begin(),
567 CaseInsensitiveCompare
<char16
>());
569 return StartsWith(StringPiece16(str
), StringPiece16(search
),
570 CompareCase::SENSITIVE
);
573 template <typename Str
>
574 bool EndsWithT(BasicStringPiece
<Str
> str
,
575 BasicStringPiece
<Str
> search_for
,
576 CompareCase case_sensitivity
) {
577 if (search_for
.size() > str
.size())
580 BasicStringPiece
<Str
> source
= str
.substr(str
.size() - search_for
.size(),
583 switch (case_sensitivity
) {
584 case CompareCase::SENSITIVE
:
585 return source
== search_for
;
587 case CompareCase::INSENSITIVE_ASCII
:
589 source
.begin(), source
.end(),
591 base::CaseInsensitiveCompareASCII
<typename
Str::value_type
>());
599 bool EndsWith(StringPiece str
,
600 StringPiece search_for
,
601 CompareCase case_sensitivity
) {
602 return EndsWithT
<std::string
>(str
, search_for
, case_sensitivity
);
605 bool EndsWith(StringPiece16 str
,
606 StringPiece16 search_for
,
607 CompareCase case_sensitivity
) {
608 return EndsWithT
<string16
>(str
, search_for
, case_sensitivity
);
611 bool EndsWith(const string16
& str
,
612 const string16
& search
,
613 bool case_sensitive
) {
614 if (!case_sensitive
) {
615 // This function was originally written using the current locale functions
616 // for case-insensitive comparisons. Emulate this behavior until callers
617 // can be converted either to use the case-insensitive ASCII one (most
618 // callers) or ICU functions in base_i18n.
619 if (search
.size() > str
.size())
621 return std::equal(search
.begin(), search
.end(),
622 str
.begin() + (str
.size() - search
.size()),
623 CaseInsensitiveCompare
<char16
>());
625 return EndsWith(StringPiece16(str
), StringPiece16(search
),
626 CompareCase::SENSITIVE
);
629 char HexDigitToInt(wchar_t c
) {
630 DCHECK(IsHexDigit(c
));
631 if (c
>= '0' && c
<= '9')
632 return static_cast<char>(c
- '0');
633 if (c
>= 'A' && c
<= 'F')
634 return static_cast<char>(c
- 'A' + 10);
635 if (c
>= 'a' && c
<= 'f')
636 return static_cast<char>(c
- 'a' + 10);
640 static const char* const kByteStringsUnlocalized
[] = {
649 string16
FormatBytesUnlocalized(int64 bytes
) {
650 double unit_amount
= static_cast<double>(bytes
);
651 size_t dimension
= 0;
652 const int kKilo
= 1024;
653 while (unit_amount
>= kKilo
&&
654 dimension
< arraysize(kByteStringsUnlocalized
) - 1) {
655 unit_amount
/= kKilo
;
660 if (bytes
!= 0 && dimension
> 0 && unit_amount
< 100) {
661 base::snprintf(buf
, arraysize(buf
), "%.1lf%s", unit_amount
,
662 kByteStringsUnlocalized
[dimension
]);
664 base::snprintf(buf
, arraysize(buf
), "%.0lf%s", unit_amount
,
665 kByteStringsUnlocalized
[dimension
]);
668 return ASCIIToUTF16(buf
);
671 // Runs in O(n) time in the length of |str|.
672 template<class StringType
>
673 void DoReplaceSubstringsAfterOffset(StringType
* str
,
675 BasicStringPiece
<StringType
> find_this
,
676 BasicStringPiece
<StringType
> replace_with
,
678 DCHECK(!find_this
.empty());
680 // If the find string doesn't appear, there's nothing to do.
681 offset
= str
->find(find_this
.data(), offset
, find_this
.size());
682 if (offset
== StringType::npos
)
685 // If we're only replacing one instance, there's no need to do anything
687 size_t find_length
= find_this
.length();
689 str
->replace(offset
, find_length
, replace_with
.data(), replace_with
.size());
693 // If the find and replace strings are the same length, we can simply use
694 // replace() on each instance, and finish the entire operation in O(n) time.
695 size_t replace_length
= replace_with
.length();
696 if (find_length
== replace_length
) {
698 str
->replace(offset
, find_length
,
699 replace_with
.data(), replace_with
.size());
700 offset
= str
->find(find_this
.data(), offset
+ replace_length
,
702 } while (offset
!= StringType::npos
);
706 // Since the find and replace strings aren't the same length, a loop like the
707 // one above would be O(n^2) in the worst case, as replace() will shift the
708 // entire remaining string each time. We need to be more clever to keep
711 // If we're shortening the string, we can alternate replacements with shifting
712 // forward the intervening characters using memmove().
713 size_t str_length
= str
->length();
714 if (find_length
> replace_length
) {
715 size_t write_offset
= offset
;
717 if (replace_length
) {
718 str
->replace(write_offset
, replace_length
,
719 replace_with
.data(), replace_with
.size());
720 write_offset
+= replace_length
;
722 size_t read_offset
= offset
+ find_length
;
724 str
->find(find_this
.data(), read_offset
, find_this
.size()),
726 size_t length
= offset
- read_offset
;
728 memmove(&(*str
)[write_offset
], &(*str
)[read_offset
],
729 length
* sizeof(typename
StringType::value_type
));
730 write_offset
+= length
;
732 } while (offset
< str_length
);
733 str
->resize(write_offset
);
737 // We're lengthening the string. We can use alternating replacements and
738 // memmove() calls like above, but we need to precalculate the final string
739 // length and then expand from back-to-front to avoid overwriting the string
740 // as we're reading it, needing to shift, or having to copy to a second string
742 size_t first_match
= offset
;
744 // First, calculate the final length and resize the string.
745 size_t final_length
= str_length
;
746 size_t expansion
= replace_length
- find_length
;
747 size_t current_match
;
749 final_length
+= expansion
;
750 // Minor optimization: save this offset into |current_match|, so that on
751 // exit from the loop, |current_match| will point at the last instance of
752 // the find string, and we won't need to find() it again immediately.
753 current_match
= offset
;
754 offset
= str
->find(find_this
.data(), offset
+ find_length
,
756 } while (offset
!= StringType::npos
);
757 str
->resize(final_length
);
759 // Now do the replacement loop, working backwards through the string.
760 for (size_t prev_match
= str_length
, write_offset
= final_length
; ;
761 current_match
= str
->rfind(find_this
.data(), current_match
- 1,
763 size_t read_offset
= current_match
+ find_length
;
764 size_t length
= prev_match
- read_offset
;
766 write_offset
-= length
;
767 memmove(&(*str
)[write_offset
], &(*str
)[read_offset
],
768 length
* sizeof(typename
StringType::value_type
));
770 write_offset
-= replace_length
;
771 str
->replace(write_offset
, replace_length
,
772 replace_with
.data(), replace_with
.size());
773 if (current_match
== first_match
)
775 prev_match
= current_match
;
779 void ReplaceFirstSubstringAfterOffset(string16
* str
,
781 StringPiece16 find_this
,
782 StringPiece16 replace_with
) {
783 DoReplaceSubstringsAfterOffset
<string16
>(
784 str
, start_offset
, find_this
, replace_with
, false); // Replace first.
787 void ReplaceFirstSubstringAfterOffset(std::string
* str
,
789 StringPiece find_this
,
790 StringPiece replace_with
) {
791 DoReplaceSubstringsAfterOffset
<std::string
>(
792 str
, start_offset
, find_this
, replace_with
, false); // Replace first.
795 void ReplaceSubstringsAfterOffset(string16
* str
,
797 StringPiece16 find_this
,
798 StringPiece16 replace_with
) {
799 DoReplaceSubstringsAfterOffset
<string16
>(
800 str
, start_offset
, find_this
, replace_with
, true); // Replace all.
803 void ReplaceSubstringsAfterOffset(std::string
* str
,
805 StringPiece find_this
,
806 StringPiece replace_with
) {
807 DoReplaceSubstringsAfterOffset
<std::string
>(
808 str
, start_offset
, find_this
, replace_with
, true); // Replace all.
813 template<typename STR
>
814 static STR
JoinStringT(const std::vector
<STR
>& parts
, const STR
& sep
) {
818 STR
result(parts
[0]);
819 typename
std::vector
<STR
>::const_iterator iter
= parts
.begin();
822 for (; iter
!= parts
.end(); ++iter
) {
830 std::string
JoinString(const std::vector
<std::string
>& parts
, char sep
) {
831 return JoinStringT(parts
, std::string(1, sep
));
834 string16
JoinString(const std::vector
<string16
>& parts
, char16 sep
) {
835 return JoinStringT(parts
, string16(1, sep
));
838 std::string
JoinString(const std::vector
<std::string
>& parts
,
839 const std::string
& separator
) {
840 return JoinStringT(parts
, separator
);
843 string16
JoinString(const std::vector
<string16
>& parts
,
844 const string16
& separator
) {
845 return JoinStringT(parts
, separator
);
848 template<class FormatStringType
, class OutStringType
>
849 OutStringType
DoReplaceStringPlaceholders(const FormatStringType
& format_string
,
850 const std::vector
<OutStringType
>& subst
, std::vector
<size_t>* offsets
) {
851 size_t substitutions
= subst
.size();
853 size_t sub_length
= 0;
854 for (typename
std::vector
<OutStringType
>::const_iterator iter
= subst
.begin();
855 iter
!= subst
.end(); ++iter
) {
856 sub_length
+= iter
->length();
859 OutStringType formatted
;
860 formatted
.reserve(format_string
.length() + sub_length
);
862 std::vector
<ReplacementOffset
> r_offsets
;
863 for (typename
FormatStringType::const_iterator i
= format_string
.begin();
864 i
!= format_string
.end(); ++i
) {
866 if (i
+ 1 != format_string
.end()) {
868 DCHECK('$' == *i
|| '1' <= *i
) << "Invalid placeholder: " << *i
;
870 while (i
!= format_string
.end() && '$' == *i
) {
871 formatted
.push_back('$');
877 while (i
!= format_string
.end() && '0' <= *i
&& *i
<= '9') {
885 ReplacementOffset
r_offset(index
,
886 static_cast<int>(formatted
.size()));
887 r_offsets
.insert(std::lower_bound(r_offsets
.begin(),
893 if (index
< substitutions
)
894 formatted
.append(subst
.at(index
));
898 formatted
.push_back(*i
);
902 for (std::vector
<ReplacementOffset
>::const_iterator i
= r_offsets
.begin();
903 i
!= r_offsets
.end(); ++i
) {
904 offsets
->push_back(i
->offset
);
910 string16
ReplaceStringPlaceholders(const string16
& format_string
,
911 const std::vector
<string16
>& subst
,
912 std::vector
<size_t>* offsets
) {
913 return DoReplaceStringPlaceholders(format_string
, subst
, offsets
);
916 std::string
ReplaceStringPlaceholders(const base::StringPiece
& format_string
,
917 const std::vector
<std::string
>& subst
,
918 std::vector
<size_t>* offsets
) {
919 return DoReplaceStringPlaceholders(format_string
, subst
, offsets
);
922 string16
ReplaceStringPlaceholders(const string16
& format_string
,
925 std::vector
<size_t> offsets
;
926 std::vector
<string16
> subst
;
928 string16 result
= ReplaceStringPlaceholders(format_string
, subst
, &offsets
);
930 DCHECK_EQ(1U, offsets
.size());
932 *offset
= offsets
[0];
936 // The following code is compatible with the OpenBSD lcpy interface. See:
937 // http://www.gratisoft.us/todd/papers/strlcpy.html
938 // ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
942 template <typename CHAR
>
943 size_t lcpyT(CHAR
* dst
, const CHAR
* src
, size_t dst_size
) {
944 for (size_t i
= 0; i
< dst_size
; ++i
) {
945 if ((dst
[i
] = src
[i
]) == 0) // We hit and copied the terminating NULL.
949 // We were left off at dst_size. We over copied 1 byte. Null terminate.
951 dst
[dst_size
- 1] = 0;
953 // Count the rest of the |src|, and return it's length in characters.
954 while (src
[dst_size
]) ++dst_size
;
960 size_t base::strlcpy(char* dst
, const char* src
, size_t dst_size
) {
961 return lcpyT
<char>(dst
, src
, dst_size
);
963 size_t base::wcslcpy(wchar_t* dst
, const wchar_t* src
, size_t dst_size
) {
964 return lcpyT
<wchar_t>(dst
, src
, dst_size
);