Replace remaining Tokenize calls to SplitString
[chromium-blink-merge.git] / base / strings / string_util.cc
blob380d455f393962b1544f599cefe59e2fb1aac431
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/strings/string_util.h"
7 #include <ctype.h>
8 #include <errno.h>
9 #include <math.h>
10 #include <stdarg.h>
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <time.h>
15 #include <wchar.h>
16 #include <wctype.h>
18 #include <algorithm>
19 #include <vector>
21 #include "base/basictypes.h"
22 #include "base/logging.h"
23 #include "base/memory/singleton.h"
24 #include "base/strings/string_split.h"
25 #include "base/strings/utf_string_conversion_utils.h"
26 #include "base/strings/utf_string_conversions.h"
27 #include "base/third_party/icu/icu_utf.h"
28 #include "build/build_config.h"
30 // Remove when this entire file is in the base namespace.
31 using base::char16;
32 using base::string16;
34 namespace {
36 // Force the singleton used by EmptyString[16] to be a unique type. This
37 // prevents other code that might accidentally use Singleton<string> from
38 // getting our internal one.
39 struct EmptyStrings {
40 EmptyStrings() {}
41 const std::string s;
42 const string16 s16;
44 static EmptyStrings* GetInstance() {
45 return Singleton<EmptyStrings>::get();
49 // Used by ReplaceStringPlaceholders to track the position in the string of
50 // replaced parameters.
51 struct ReplacementOffset {
52 ReplacementOffset(uintptr_t parameter, size_t offset)
53 : parameter(parameter),
54 offset(offset) {}
56 // Index of the parameter.
57 uintptr_t parameter;
59 // Starting position in the string.
60 size_t offset;
63 static bool CompareParameter(const ReplacementOffset& elem1,
64 const ReplacementOffset& elem2) {
65 return elem1.parameter < elem2.parameter;
68 // Assuming that a pointer is the size of a "machine word", then
69 // uintptr_t is an integer type that is also a machine word.
70 typedef uintptr_t MachineWord;
71 const uintptr_t kMachineWordAlignmentMask = sizeof(MachineWord) - 1;
73 inline bool IsAlignedToMachineWord(const void* pointer) {
74 return !(reinterpret_cast<MachineWord>(pointer) & kMachineWordAlignmentMask);
77 template<typename T> inline T* AlignToMachineWord(T* pointer) {
78 return reinterpret_cast<T*>(reinterpret_cast<MachineWord>(pointer) &
79 ~kMachineWordAlignmentMask);
82 template<size_t size, typename CharacterType> struct NonASCIIMask;
83 template<> struct NonASCIIMask<4, base::char16> {
84 static inline uint32_t value() { return 0xFF80FF80U; }
86 template<> struct NonASCIIMask<4, char> {
87 static inline uint32_t value() { return 0x80808080U; }
89 template<> struct NonASCIIMask<8, base::char16> {
90 static inline uint64_t value() { return 0xFF80FF80FF80FF80ULL; }
92 template<> struct NonASCIIMask<8, char> {
93 static inline uint64_t value() { return 0x8080808080808080ULL; }
95 #if defined(WCHAR_T_IS_UTF32)
96 template<> struct NonASCIIMask<4, wchar_t> {
97 static inline uint32_t value() { return 0xFFFFFF80U; }
99 template<> struct NonASCIIMask<8, wchar_t> {
100 static inline uint64_t value() { return 0xFFFFFF80FFFFFF80ULL; }
102 #endif // WCHAR_T_IS_UTF32
104 } // namespace
106 namespace base {
108 bool IsWprintfFormatPortable(const wchar_t* format) {
109 for (const wchar_t* position = format; *position != '\0'; ++position) {
110 if (*position == '%') {
111 bool in_specification = true;
112 bool modifier_l = false;
113 while (in_specification) {
114 // Eat up characters until reaching a known specifier.
115 if (*++position == '\0') {
116 // The format string ended in the middle of a specification. Call
117 // it portable because no unportable specifications were found. The
118 // string is equally broken on all platforms.
119 return true;
122 if (*position == 'l') {
123 // 'l' is the only thing that can save the 's' and 'c' specifiers.
124 modifier_l = true;
125 } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
126 *position == 'S' || *position == 'C' || *position == 'F' ||
127 *position == 'D' || *position == 'O' || *position == 'U') {
128 // Not portable.
129 return false;
132 if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
133 // Portable, keep scanning the rest of the format string.
134 in_specification = false;
140 return true;
143 const std::string& EmptyString() {
144 return EmptyStrings::GetInstance()->s;
147 const string16& EmptyString16() {
148 return EmptyStrings::GetInstance()->s16;
151 template<typename STR>
152 bool ReplaceCharsT(const STR& input,
153 const STR& replace_chars,
154 const STR& replace_with,
155 STR* output) {
156 bool removed = false;
157 size_t replace_length = replace_with.length();
159 *output = input;
161 size_t found = output->find_first_of(replace_chars);
162 while (found != STR::npos) {
163 removed = true;
164 output->replace(found, 1, replace_with);
165 found = output->find_first_of(replace_chars, found + replace_length);
168 return removed;
171 bool ReplaceChars(const string16& input,
172 const base::StringPiece16& replace_chars,
173 const string16& replace_with,
174 string16* output) {
175 return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output);
178 bool ReplaceChars(const std::string& input,
179 const base::StringPiece& replace_chars,
180 const std::string& replace_with,
181 std::string* output) {
182 return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output);
185 bool RemoveChars(const string16& input,
186 const base::StringPiece16& remove_chars,
187 string16* output) {
188 return ReplaceChars(input, remove_chars.as_string(), string16(), output);
191 bool RemoveChars(const std::string& input,
192 const base::StringPiece& remove_chars,
193 std::string* output) {
194 return ReplaceChars(input, remove_chars.as_string(), std::string(), output);
197 template<typename Str>
198 TrimPositions TrimStringT(const Str& input,
199 BasicStringPiece<Str> trim_chars,
200 TrimPositions positions,
201 Str* output) {
202 // Find the edges of leading/trailing whitespace as desired. Need to use
203 // a StringPiece version of input to be able to call find* on it with the
204 // StringPiece version of trim_chars (normally the trim_chars will be a
205 // constant so avoid making a copy).
206 BasicStringPiece<Str> input_piece(input);
207 const size_t last_char = input.length() - 1;
208 const size_t first_good_char = (positions & TRIM_LEADING) ?
209 input_piece.find_first_not_of(trim_chars) : 0;
210 const size_t last_good_char = (positions & TRIM_TRAILING) ?
211 input_piece.find_last_not_of(trim_chars) : last_char;
213 // When the string was all trimmed, report that we stripped off characters
214 // from whichever position the caller was interested in. For empty input, we
215 // stripped no characters, but we still need to clear |output|.
216 if (input.empty() ||
217 (first_good_char == Str::npos) || (last_good_char == Str::npos)) {
218 bool input_was_empty = input.empty(); // in case output == &input
219 output->clear();
220 return input_was_empty ? TRIM_NONE : positions;
223 // Trim.
224 *output =
225 input.substr(first_good_char, last_good_char - first_good_char + 1);
227 // Return where we trimmed from.
228 return static_cast<TrimPositions>(
229 ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) |
230 ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));
233 bool TrimString(const string16& input,
234 base::StringPiece16 trim_chars,
235 string16* output) {
236 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
239 bool TrimString(const std::string& input,
240 base::StringPiece trim_chars,
241 std::string* output) {
242 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
245 template<typename Str>
246 BasicStringPiece<Str> TrimStringPieceT(BasicStringPiece<Str> input,
247 BasicStringPiece<Str> trim_chars,
248 TrimPositions positions) {
249 size_t begin = (positions & TRIM_LEADING) ?
250 input.find_first_not_of(trim_chars) : 0;
251 size_t end = (positions & TRIM_TRAILING) ?
252 input.find_last_not_of(trim_chars) + 1 : input.size();
253 return input.substr(begin, end - begin);
256 StringPiece16 TrimString(StringPiece16 input,
257 const base::StringPiece16& trim_chars,
258 TrimPositions positions) {
259 return TrimStringPieceT(input, trim_chars, positions);
262 StringPiece TrimString(StringPiece input,
263 const base::StringPiece& trim_chars,
264 TrimPositions positions) {
265 return TrimStringPieceT(input, trim_chars, positions);
268 void TruncateUTF8ToByteSize(const std::string& input,
269 const size_t byte_size,
270 std::string* output) {
271 DCHECK(output);
272 if (byte_size > input.length()) {
273 *output = input;
274 return;
276 DCHECK_LE(byte_size, static_cast<uint32>(kint32max));
277 // Note: This cast is necessary because CBU8_NEXT uses int32s.
278 int32 truncation_length = static_cast<int32>(byte_size);
279 int32 char_index = truncation_length - 1;
280 const char* data = input.data();
282 // Using CBU8, we will move backwards from the truncation point
283 // to the beginning of the string looking for a valid UTF8
284 // character. Once a full UTF8 character is found, we will
285 // truncate the string to the end of that character.
286 while (char_index >= 0) {
287 int32 prev = char_index;
288 base_icu::UChar32 code_point = 0;
289 CBU8_NEXT(data, char_index, truncation_length, code_point);
290 if (!IsValidCharacter(code_point) ||
291 !IsValidCodepoint(code_point)) {
292 char_index = prev - 1;
293 } else {
294 break;
298 if (char_index >= 0 )
299 *output = input.substr(0, char_index);
300 else
301 output->clear();
304 TrimPositions TrimWhitespace(const string16& input,
305 TrimPositions positions,
306 string16* output) {
307 return TrimStringT(input, StringPiece16(kWhitespaceUTF16), positions, output);
310 StringPiece16 TrimWhitespaceASCII(StringPiece16 input,
311 TrimPositions positions) {
312 return TrimStringPieceT(input, StringPiece16(kWhitespaceUTF16), positions);
315 TrimPositions TrimWhitespaceASCII(const std::string& input,
316 TrimPositions positions,
317 std::string* output) {
318 return TrimStringT(input, StringPiece(kWhitespaceASCII), positions, output);
321 StringPiece TrimWhitespaceASCII(StringPiece input, TrimPositions positions) {
322 return TrimStringPieceT(input, StringPiece(kWhitespaceASCII), positions);
325 // This function is only for backward-compatibility.
326 // To be removed when all callers are updated.
327 TrimPositions TrimWhitespace(const std::string& input,
328 TrimPositions positions,
329 std::string* output) {
330 return TrimWhitespaceASCII(input, positions, output);
333 template<typename STR>
334 STR CollapseWhitespaceT(const STR& text,
335 bool trim_sequences_with_line_breaks) {
336 STR result;
337 result.resize(text.size());
339 // Set flags to pretend we're already in a trimmed whitespace sequence, so we
340 // will trim any leading whitespace.
341 bool in_whitespace = true;
342 bool already_trimmed = true;
344 int chars_written = 0;
345 for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) {
346 if (IsUnicodeWhitespace(*i)) {
347 if (!in_whitespace) {
348 // Reduce all whitespace sequences to a single space.
349 in_whitespace = true;
350 result[chars_written++] = L' ';
352 if (trim_sequences_with_line_breaks && !already_trimmed &&
353 ((*i == '\n') || (*i == '\r'))) {
354 // Whitespace sequences containing CR or LF are eliminated entirely.
355 already_trimmed = true;
356 --chars_written;
358 } else {
359 // Non-whitespace chracters are copied straight across.
360 in_whitespace = false;
361 already_trimmed = false;
362 result[chars_written++] = *i;
366 if (in_whitespace && !already_trimmed) {
367 // Any trailing whitespace is eliminated.
368 --chars_written;
371 result.resize(chars_written);
372 return result;
375 string16 CollapseWhitespace(const string16& text,
376 bool trim_sequences_with_line_breaks) {
377 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
380 std::string CollapseWhitespaceASCII(const std::string& text,
381 bool trim_sequences_with_line_breaks) {
382 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
385 bool ContainsOnlyChars(const StringPiece& input,
386 const StringPiece& characters) {
387 return input.find_first_not_of(characters) == StringPiece::npos;
390 bool ContainsOnlyChars(const StringPiece16& input,
391 const StringPiece16& characters) {
392 return input.find_first_not_of(characters) == StringPiece16::npos;
395 template <class Char>
396 inline bool DoIsStringASCII(const Char* characters, size_t length) {
397 MachineWord all_char_bits = 0;
398 const Char* end = characters + length;
400 // Prologue: align the input.
401 while (!IsAlignedToMachineWord(characters) && characters != end) {
402 all_char_bits |= *characters;
403 ++characters;
406 // Compare the values of CPU word size.
407 const Char* word_end = AlignToMachineWord(end);
408 const size_t loop_increment = sizeof(MachineWord) / sizeof(Char);
409 while (characters < word_end) {
410 all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters));
411 characters += loop_increment;
414 // Process the remaining bytes.
415 while (characters != end) {
416 all_char_bits |= *characters;
417 ++characters;
420 MachineWord non_ascii_bit_mask =
421 NonASCIIMask<sizeof(MachineWord), Char>::value();
422 return !(all_char_bits & non_ascii_bit_mask);
425 bool IsStringASCII(const StringPiece& str) {
426 return DoIsStringASCII(str.data(), str.length());
429 bool IsStringASCII(const StringPiece16& str) {
430 return DoIsStringASCII(str.data(), str.length());
433 bool IsStringASCII(const string16& str) {
434 return DoIsStringASCII(str.data(), str.length());
437 #if defined(WCHAR_T_IS_UTF32)
438 bool IsStringASCII(const std::wstring& str) {
439 return DoIsStringASCII(str.data(), str.length());
441 #endif
443 bool IsStringUTF8(const StringPiece& str) {
444 const char *src = str.data();
445 int32 src_len = static_cast<int32>(str.length());
446 int32 char_index = 0;
448 while (char_index < src_len) {
449 int32 code_point;
450 CBU8_NEXT(src, char_index, src_len, code_point);
451 if (!IsValidCharacter(code_point))
452 return false;
454 return true;
457 template<typename Iter>
458 static inline bool DoLowerCaseEqualsASCII(Iter a_begin,
459 Iter a_end,
460 const char* b) {
461 for (Iter it = a_begin; it != a_end; ++it, ++b) {
462 if (!*b || ToLowerASCII(*it) != *b)
463 return false;
465 return *b == 0;
468 // Front-ends for LowerCaseEqualsASCII.
469 bool LowerCaseEqualsASCII(const std::string& a, const char* b) {
470 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
473 bool LowerCaseEqualsASCII(const string16& a, const char* b) {
474 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
477 bool LowerCaseEqualsASCII(std::string::const_iterator a_begin,
478 std::string::const_iterator a_end,
479 const char* b) {
480 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
483 bool LowerCaseEqualsASCII(string16::const_iterator a_begin,
484 string16::const_iterator a_end,
485 const char* b) {
486 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
489 bool LowerCaseEqualsASCII(const char* a_begin,
490 const char* a_end,
491 const char* b) {
492 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
495 bool LowerCaseEqualsASCII(const char* a_begin,
496 const char* a_end,
497 const char* b_begin,
498 const char* b_end) {
499 while (a_begin != a_end && b_begin != b_end &&
500 ToLowerASCII(*a_begin) == *b_begin) {
501 a_begin++;
502 b_begin++;
504 return a_begin == a_end && b_begin == b_end;
507 bool LowerCaseEqualsASCII(const char16* a_begin,
508 const char16* a_end,
509 const char* b) {
510 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
513 bool EqualsASCII(const string16& a, const StringPiece& b) {
514 if (a.length() != b.length())
515 return false;
516 return std::equal(b.begin(), b.end(), a.begin());
519 template<typename Str>
520 bool StartsWithT(BasicStringPiece<Str> str,
521 BasicStringPiece<Str> search_for,
522 CompareCase case_sensitivity) {
523 if (search_for.size() > str.size())
524 return false;
526 BasicStringPiece<Str> source = str.substr(0, search_for.size());
528 switch (case_sensitivity) {
529 case CompareCase::SENSITIVE:
530 return source == search_for;
532 case CompareCase::INSENSITIVE_ASCII:
533 return std::equal(
534 search_for.begin(), search_for.end(),
535 source.begin(),
536 base::CaseInsensitiveCompareASCII<typename Str::value_type>());
538 default:
539 NOTREACHED();
540 return false;
544 bool StartsWith(StringPiece str,
545 StringPiece search_for,
546 CompareCase case_sensitivity) {
547 return StartsWithT<std::string>(str, search_for, case_sensitivity);
550 bool StartsWith(StringPiece16 str,
551 StringPiece16 search_for,
552 CompareCase case_sensitivity) {
553 return StartsWithT<string16>(str, search_for, case_sensitivity);
556 bool StartsWith(const string16& str,
557 const string16& search,
558 bool case_sensitive) {
559 if (!case_sensitive) {
560 // This function was originally written using the current locale functions
561 // for case-insensitive comparisons. Emulate this behavior until callers
562 // can be converted either to use the case-insensitive ASCII one (most
563 // callers) or ICU functions in base_i18n.
564 if (search.size() > str.size())
565 return false;
566 return std::equal(search.begin(), search.end(), str.begin(),
567 CaseInsensitiveCompare<char16>());
569 return StartsWith(StringPiece16(str), StringPiece16(search),
570 CompareCase::SENSITIVE);
573 template <typename Str>
574 bool EndsWithT(BasicStringPiece<Str> str,
575 BasicStringPiece<Str> search_for,
576 CompareCase case_sensitivity) {
577 if (search_for.size() > str.size())
578 return false;
580 BasicStringPiece<Str> source = str.substr(str.size() - search_for.size(),
581 search_for.size());
583 switch (case_sensitivity) {
584 case CompareCase::SENSITIVE:
585 return source == search_for;
587 case CompareCase::INSENSITIVE_ASCII:
588 return std::equal(
589 source.begin(), source.end(),
590 search_for.begin(),
591 base::CaseInsensitiveCompareASCII<typename Str::value_type>());
593 default:
594 NOTREACHED();
595 return false;
599 bool EndsWith(StringPiece str,
600 StringPiece search_for,
601 CompareCase case_sensitivity) {
602 return EndsWithT<std::string>(str, search_for, case_sensitivity);
605 bool EndsWith(StringPiece16 str,
606 StringPiece16 search_for,
607 CompareCase case_sensitivity) {
608 return EndsWithT<string16>(str, search_for, case_sensitivity);
611 bool EndsWith(const string16& str,
612 const string16& search,
613 bool case_sensitive) {
614 if (!case_sensitive) {
615 // This function was originally written using the current locale functions
616 // for case-insensitive comparisons. Emulate this behavior until callers
617 // can be converted either to use the case-insensitive ASCII one (most
618 // callers) or ICU functions in base_i18n.
619 if (search.size() > str.size())
620 return false;
621 return std::equal(search.begin(), search.end(),
622 str.begin() + (str.size() - search.size()),
623 CaseInsensitiveCompare<char16>());
625 return EndsWith(StringPiece16(str), StringPiece16(search),
626 CompareCase::SENSITIVE);
629 char HexDigitToInt(wchar_t c) {
630 DCHECK(IsHexDigit(c));
631 if (c >= '0' && c <= '9')
632 return static_cast<char>(c - '0');
633 if (c >= 'A' && c <= 'F')
634 return static_cast<char>(c - 'A' + 10);
635 if (c >= 'a' && c <= 'f')
636 return static_cast<char>(c - 'a' + 10);
637 return 0;
640 static const char* const kByteStringsUnlocalized[] = {
641 " B",
642 " kB",
643 " MB",
644 " GB",
645 " TB",
646 " PB"
649 string16 FormatBytesUnlocalized(int64 bytes) {
650 double unit_amount = static_cast<double>(bytes);
651 size_t dimension = 0;
652 const int kKilo = 1024;
653 while (unit_amount >= kKilo &&
654 dimension < arraysize(kByteStringsUnlocalized) - 1) {
655 unit_amount /= kKilo;
656 dimension++;
659 char buf[64];
660 if (bytes != 0 && dimension > 0 && unit_amount < 100) {
661 base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount,
662 kByteStringsUnlocalized[dimension]);
663 } else {
664 base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount,
665 kByteStringsUnlocalized[dimension]);
668 return ASCIIToUTF16(buf);
671 // Runs in O(n) time in the length of |str|.
672 template<class StringType>
673 void DoReplaceSubstringsAfterOffset(StringType* str,
674 size_t offset,
675 BasicStringPiece<StringType> find_this,
676 BasicStringPiece<StringType> replace_with,
677 bool replace_all) {
678 DCHECK(!find_this.empty());
680 // If the find string doesn't appear, there's nothing to do.
681 offset = str->find(find_this.data(), offset, find_this.size());
682 if (offset == StringType::npos)
683 return;
685 // If we're only replacing one instance, there's no need to do anything
686 // complicated.
687 size_t find_length = find_this.length();
688 if (!replace_all) {
689 str->replace(offset, find_length, replace_with.data(), replace_with.size());
690 return;
693 // If the find and replace strings are the same length, we can simply use
694 // replace() on each instance, and finish the entire operation in O(n) time.
695 size_t replace_length = replace_with.length();
696 if (find_length == replace_length) {
697 do {
698 str->replace(offset, find_length,
699 replace_with.data(), replace_with.size());
700 offset = str->find(find_this.data(), offset + replace_length,
701 find_this.size());
702 } while (offset != StringType::npos);
703 return;
706 // Since the find and replace strings aren't the same length, a loop like the
707 // one above would be O(n^2) in the worst case, as replace() will shift the
708 // entire remaining string each time. We need to be more clever to keep
709 // things O(n).
711 // If we're shortening the string, we can alternate replacements with shifting
712 // forward the intervening characters using memmove().
713 size_t str_length = str->length();
714 if (find_length > replace_length) {
715 size_t write_offset = offset;
716 do {
717 if (replace_length) {
718 str->replace(write_offset, replace_length,
719 replace_with.data(), replace_with.size());
720 write_offset += replace_length;
722 size_t read_offset = offset + find_length;
723 offset = std::min(
724 str->find(find_this.data(), read_offset, find_this.size()),
725 str_length);
726 size_t length = offset - read_offset;
727 if (length) {
728 memmove(&(*str)[write_offset], &(*str)[read_offset],
729 length * sizeof(typename StringType::value_type));
730 write_offset += length;
732 } while (offset < str_length);
733 str->resize(write_offset);
734 return;
737 // We're lengthening the string. We can use alternating replacements and
738 // memmove() calls like above, but we need to precalculate the final string
739 // length and then expand from back-to-front to avoid overwriting the string
740 // as we're reading it, needing to shift, or having to copy to a second string
741 // temporarily.
742 size_t first_match = offset;
744 // First, calculate the final length and resize the string.
745 size_t final_length = str_length;
746 size_t expansion = replace_length - find_length;
747 size_t current_match;
748 do {
749 final_length += expansion;
750 // Minor optimization: save this offset into |current_match|, so that on
751 // exit from the loop, |current_match| will point at the last instance of
752 // the find string, and we won't need to find() it again immediately.
753 current_match = offset;
754 offset = str->find(find_this.data(), offset + find_length,
755 find_this.size());
756 } while (offset != StringType::npos);
757 str->resize(final_length);
759 // Now do the replacement loop, working backwards through the string.
760 for (size_t prev_match = str_length, write_offset = final_length; ;
761 current_match = str->rfind(find_this.data(), current_match - 1,
762 find_this.size())) {
763 size_t read_offset = current_match + find_length;
764 size_t length = prev_match - read_offset;
765 if (length) {
766 write_offset -= length;
767 memmove(&(*str)[write_offset], &(*str)[read_offset],
768 length * sizeof(typename StringType::value_type));
770 write_offset -= replace_length;
771 str->replace(write_offset, replace_length,
772 replace_with.data(), replace_with.size());
773 if (current_match == first_match)
774 return;
775 prev_match = current_match;
779 void ReplaceFirstSubstringAfterOffset(string16* str,
780 size_t start_offset,
781 StringPiece16 find_this,
782 StringPiece16 replace_with) {
783 DoReplaceSubstringsAfterOffset<string16>(
784 str, start_offset, find_this, replace_with, false); // Replace first.
787 void ReplaceFirstSubstringAfterOffset(std::string* str,
788 size_t start_offset,
789 StringPiece find_this,
790 StringPiece replace_with) {
791 DoReplaceSubstringsAfterOffset<std::string>(
792 str, start_offset, find_this, replace_with, false); // Replace first.
795 void ReplaceSubstringsAfterOffset(string16* str,
796 size_t start_offset,
797 StringPiece16 find_this,
798 StringPiece16 replace_with) {
799 DoReplaceSubstringsAfterOffset<string16>(
800 str, start_offset, find_this, replace_with, true); // Replace all.
803 void ReplaceSubstringsAfterOffset(std::string* str,
804 size_t start_offset,
805 StringPiece find_this,
806 StringPiece replace_with) {
807 DoReplaceSubstringsAfterOffset<std::string>(
808 str, start_offset, find_this, replace_with, true); // Replace all.
811 } // namespace base
813 template<typename STR>
814 static STR JoinStringT(const std::vector<STR>& parts, const STR& sep) {
815 if (parts.empty())
816 return STR();
818 STR result(parts[0]);
819 typename std::vector<STR>::const_iterator iter = parts.begin();
820 ++iter;
822 for (; iter != parts.end(); ++iter) {
823 result += sep;
824 result += *iter;
827 return result;
830 std::string JoinString(const std::vector<std::string>& parts, char sep) {
831 return JoinStringT(parts, std::string(1, sep));
834 string16 JoinString(const std::vector<string16>& parts, char16 sep) {
835 return JoinStringT(parts, string16(1, sep));
838 std::string JoinString(const std::vector<std::string>& parts,
839 const std::string& separator) {
840 return JoinStringT(parts, separator);
843 string16 JoinString(const std::vector<string16>& parts,
844 const string16& separator) {
845 return JoinStringT(parts, separator);
848 template<class FormatStringType, class OutStringType>
849 OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string,
850 const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) {
851 size_t substitutions = subst.size();
853 size_t sub_length = 0;
854 for (typename std::vector<OutStringType>::const_iterator iter = subst.begin();
855 iter != subst.end(); ++iter) {
856 sub_length += iter->length();
859 OutStringType formatted;
860 formatted.reserve(format_string.length() + sub_length);
862 std::vector<ReplacementOffset> r_offsets;
863 for (typename FormatStringType::const_iterator i = format_string.begin();
864 i != format_string.end(); ++i) {
865 if ('$' == *i) {
866 if (i + 1 != format_string.end()) {
867 ++i;
868 DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i;
869 if ('$' == *i) {
870 while (i != format_string.end() && '$' == *i) {
871 formatted.push_back('$');
872 ++i;
874 --i;
875 } else {
876 uintptr_t index = 0;
877 while (i != format_string.end() && '0' <= *i && *i <= '9') {
878 index *= 10;
879 index += *i - '0';
880 ++i;
882 --i;
883 index -= 1;
884 if (offsets) {
885 ReplacementOffset r_offset(index,
886 static_cast<int>(formatted.size()));
887 r_offsets.insert(std::lower_bound(r_offsets.begin(),
888 r_offsets.end(),
889 r_offset,
890 &CompareParameter),
891 r_offset);
893 if (index < substitutions)
894 formatted.append(subst.at(index));
897 } else {
898 formatted.push_back(*i);
901 if (offsets) {
902 for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin();
903 i != r_offsets.end(); ++i) {
904 offsets->push_back(i->offset);
907 return formatted;
910 string16 ReplaceStringPlaceholders(const string16& format_string,
911 const std::vector<string16>& subst,
912 std::vector<size_t>* offsets) {
913 return DoReplaceStringPlaceholders(format_string, subst, offsets);
916 std::string ReplaceStringPlaceholders(const base::StringPiece& format_string,
917 const std::vector<std::string>& subst,
918 std::vector<size_t>* offsets) {
919 return DoReplaceStringPlaceholders(format_string, subst, offsets);
922 string16 ReplaceStringPlaceholders(const string16& format_string,
923 const string16& a,
924 size_t* offset) {
925 std::vector<size_t> offsets;
926 std::vector<string16> subst;
927 subst.push_back(a);
928 string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);
930 DCHECK_EQ(1U, offsets.size());
931 if (offset)
932 *offset = offsets[0];
933 return result;
936 // The following code is compatible with the OpenBSD lcpy interface. See:
937 // http://www.gratisoft.us/todd/papers/strlcpy.html
938 // ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
940 namespace {
942 template <typename CHAR>
943 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
944 for (size_t i = 0; i < dst_size; ++i) {
945 if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL.
946 return i;
949 // We were left off at dst_size. We over copied 1 byte. Null terminate.
950 if (dst_size != 0)
951 dst[dst_size - 1] = 0;
953 // Count the rest of the |src|, and return it's length in characters.
954 while (src[dst_size]) ++dst_size;
955 return dst_size;
958 } // namespace
960 size_t base::strlcpy(char* dst, const char* src, size_t dst_size) {
961 return lcpyT<char>(dst, src, dst_size);
963 size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
964 return lcpyT<wchar_t>(dst, src, dst_size);