base/strings/string_util.cc

   1 // Copyright 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "base/strings/string_util.h"
   6
   7 #include <ctype.h>
   8 #include <errno.h>
   9 #include <math.h>
  10 #include <stdarg.h>
  11 #include <stdio.h>
  12 #include <stdlib.h>
  13 #include <string.h>
  14 #include <time.h>
  15 #include <wchar.h>
  16 #include <wctype.h>
  17
  18 #include <algorithm>
  19 #include <vector>
  20
  21 #include "base/basictypes.h"
  22 #include "base/logging.h"
  23 #include "base/memory/singleton.h"
  24 #include "base/strings/string_split.h"
  25 #include "base/strings/utf_string_conversion_utils.h"
  26 #include "base/strings/utf_string_conversions.h"
  27 #include "base/third_party/icu/icu_utf.h"
  28 #include "build/build_config.h"
  29
  30 // Remove when this entire file is in the base namespace.
  31 using base::char16;
  32 using base::string16;
  33
  34 namespace {
  35
  36 // Force the singleton used by EmptyString[16] to be a unique type. This
  37 // prevents other code that might accidentally use Singleton<string> from
  38 // getting our internal one.
  39 struct EmptyStrings {
  40   EmptyStrings() {}
  41   const std::string s;
  42   const string16 s16;
  43
  44   static EmptyStrings* GetInstance() {
  45     return Singleton<EmptyStrings>::get();
  46   }
  47 };
  48
  49 // Used by ReplaceStringPlaceholders to track the position in the string of
  50 // replaced parameters.
  51 struct ReplacementOffset {
  52   ReplacementOffset(uintptr_t parameter, size_t offset)
  53       : parameter(parameter),
  54         offset(offset) {}
  55
  56   // Index of the parameter.
  57   uintptr_t parameter;
  58
  59   // Starting position in the string.
  60   size_t offset;
  61 };
  62
  63 static bool CompareParameter(const ReplacementOffset& elem1,
  64                              const ReplacementOffset& elem2) {
  65   return elem1.parameter < elem2.parameter;
  66 }
  67
  68 // Assuming that a pointer is the size of a "machine word", then
  69 // uintptr_t is an integer type that is also a machine word.
  70 typedef uintptr_t MachineWord;
  71 const uintptr_t kMachineWordAlignmentMask = sizeof(MachineWord) - 1;
  72
  73 inline bool IsAlignedToMachineWord(const void* pointer) {
  74   return !(reinterpret_cast<MachineWord>(pointer) & kMachineWordAlignmentMask);
  75 }
  76
  77 template<typename T> inline T* AlignToMachineWord(T* pointer) {
  78   return reinterpret_cast<T*>(reinterpret_cast<MachineWord>(pointer) &
  79                               ~kMachineWordAlignmentMask);
  80 }
  81
  82 template<size_t size, typename CharacterType> struct NonASCIIMask;
  83 template<> struct NonASCIIMask<4, base::char16> {
  84     static inline uint32_t value() { return 0xFF80FF80U; }
  85 };
  86 template<> struct NonASCIIMask<4, char> {
  87     static inline uint32_t value() { return 0x80808080U; }
  88 };
  89 template<> struct NonASCIIMask<8, base::char16> {
  90     static inline uint64_t value() { return 0xFF80FF80FF80FF80ULL; }
  91 };
  92 template<> struct NonASCIIMask<8, char> {
  93     static inline uint64_t value() { return 0x8080808080808080ULL; }
  94 };
  95 #if defined(WCHAR_T_IS_UTF32)
  96 template<> struct NonASCIIMask<4, wchar_t> {
  97     static inline uint32_t value() { return 0xFFFFFF80U; }
  98 };
  99 template<> struct NonASCIIMask<8, wchar_t> {
 100     static inline uint64_t value() { return 0xFFFFFF80FFFFFF80ULL; }
 101 };
 102 #endif  // WCHAR_T_IS_UTF32
 103
 104 }  // namespace
 105
 106 namespace base {
 107
 108 bool IsWprintfFormatPortable(const wchar_t* format) {
 109   for (const wchar_t* position = format; *position != '\0'; ++position) {
 110     if (*position == '%') {
 111       bool in_specification = true;
 112       bool modifier_l = false;
 113       while (in_specification) {
 114         // Eat up characters until reaching a known specifier.
 115         if (*++position == '\0') {
 116           // The format string ended in the middle of a specification.  Call
 117           // it portable because no unportable specifications were found.  The
 118           // string is equally broken on all platforms.
 119           return true;
 120         }
 121
 122         if (*position == 'l') {
 123           // 'l' is the only thing that can save the 's' and 'c' specifiers.
 124           modifier_l = true;
 125         } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
 126                    *position == 'S' || *position == 'C' || *position == 'F' ||
 127                    *position == 'D' || *position == 'O' || *position == 'U') {
 128           // Not portable.
 129           return false;
 130         }
 131
 132         if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
 133           // Portable, keep scanning the rest of the format string.
 134           in_specification = false;
 135         }
 136       }
 137     }
 138   }
 139
 140   return true;
 141 }
 142
 143 const std::string& EmptyString() {
 144   return EmptyStrings::GetInstance()->s;
 145 }
 146
 147 const string16& EmptyString16() {
 148   return EmptyStrings::GetInstance()->s16;
 149 }
 150
 151 template<typename STR>
 152 bool ReplaceCharsT(const STR& input,
 153                    const STR& replace_chars,
 154                    const STR& replace_with,
 155                    STR* output) {
 156   bool removed = false;
 157   size_t replace_length = replace_with.length();
 158
 159   *output = input;
 160
 161   size_t found = output->find_first_of(replace_chars);
 162   while (found != STR::npos) {
 163     removed = true;
 164     output->replace(found, 1, replace_with);
 165     found = output->find_first_of(replace_chars, found + replace_length);
 166   }
 167
 168   return removed;
 169 }
 170
 171 bool ReplaceChars(const string16& input,
 172                   const base::StringPiece16& replace_chars,
 173                   const string16& replace_with,
 174                   string16* output) {
 175   return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output);
 176 }
 177
 178 bool ReplaceChars(const std::string& input,
 179                   const base::StringPiece& replace_chars,
 180                   const std::string& replace_with,
 181                   std::string* output) {
 182   return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output);
 183 }
 184
 185 bool RemoveChars(const string16& input,
 186                  const base::StringPiece16& remove_chars,
 187                  string16* output) {
 188   return ReplaceChars(input, remove_chars.as_string(), string16(), output);
 189 }
 190
 191 bool RemoveChars(const std::string& input,
 192                  const base::StringPiece& remove_chars,
 193                  std::string* output) {
 194   return ReplaceChars(input, remove_chars.as_string(), std::string(), output);
 195 }
 196
 197 template<typename Str>
 198 TrimPositions TrimStringT(const Str& input,
 199                           BasicStringPiece<Str> trim_chars,
 200                           TrimPositions positions,
 201                           Str* output) {
 202   // Find the edges of leading/trailing whitespace as desired. Need to use
 203   // a StringPiece version of input to be able to call find* on it with the
 204   // StringPiece version of trim_chars (normally the trim_chars will be a
 205   // constant so avoid making a copy).
 206   BasicStringPiece<Str> input_piece(input);
 207   const size_t last_char = input.length() - 1;
 208   const size_t first_good_char = (positions & TRIM_LEADING) ?
 209       input_piece.find_first_not_of(trim_chars) : 0;
 210   const size_t last_good_char = (positions & TRIM_TRAILING) ?
 211       input_piece.find_last_not_of(trim_chars) : last_char;
 212
 213   // When the string was all trimmed, report that we stripped off characters
 214   // from whichever position the caller was interested in. For empty input, we
 215   // stripped no characters, but we still need to clear |output|.
 216   if (input.empty() ||
 217       (first_good_char == Str::npos) || (last_good_char == Str::npos)) {
 218     bool input_was_empty = input.empty();  // in case output == &input
 219     output->clear();
 220     return input_was_empty ? TRIM_NONE : positions;
 221   }
 222
 223   // Trim.
 224   *output =
 225       input.substr(first_good_char, last_good_char - first_good_char + 1);
 226
 227   // Return where we trimmed from.
 228   return static_cast<TrimPositions>(
 229       ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) |
 230       ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));
 231 }
 232
 233 bool TrimString(const string16& input,
 234                 base::StringPiece16 trim_chars,
 235                 string16* output) {
 236   return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
 237 }
 238
 239 bool TrimString(const std::string& input,
 240                 base::StringPiece trim_chars,
 241                 std::string* output) {
 242   return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
 243 }
 244
 245 template<typename Str>
 246 BasicStringPiece<Str> TrimStringPieceT(BasicStringPiece<Str> input,
 247                                        BasicStringPiece<Str> trim_chars,
 248                                        TrimPositions positions) {
 249   size_t begin = (positions & TRIM_LEADING) ?
 250       input.find_first_not_of(trim_chars) : 0;
 251   size_t end = (positions & TRIM_TRAILING) ?
 252       input.find_last_not_of(trim_chars) + 1 : input.size();
 253   return input.substr(begin, end - begin);
 254 }
 255
 256 StringPiece16 TrimString(StringPiece16 input,
 257                          const base::StringPiece16& trim_chars,
 258                          TrimPositions positions) {
 259   return TrimStringPieceT(input, trim_chars, positions);
 260 }
 261
 262 StringPiece TrimString(StringPiece input,
 263                        const base::StringPiece& trim_chars,
 264                        TrimPositions positions) {
 265   return TrimStringPieceT(input, trim_chars, positions);
 266 }
 267
 268 void TruncateUTF8ToByteSize(const std::string& input,
 269                             const size_t byte_size,
 270                             std::string* output) {
 271   DCHECK(output);
 272   if (byte_size > input.length()) {
 273     *output = input;
 274     return;
 275   }
 276   DCHECK_LE(byte_size, static_cast<uint32>(kint32max));
 277   // Note: This cast is necessary because CBU8_NEXT uses int32s.
 278   int32 truncation_length = static_cast<int32>(byte_size);
 279   int32 char_index = truncation_length - 1;
 280   const char* data = input.data();
 281
 282   // Using CBU8, we will move backwards from the truncation point
 283   // to the beginning of the string looking for a valid UTF8
 284   // character.  Once a full UTF8 character is found, we will
 285   // truncate the string to the end of that character.
 286   while (char_index >= 0) {
 287     int32 prev = char_index;
 288     base_icu::UChar32 code_point = 0;
 289     CBU8_NEXT(data, char_index, truncation_length, code_point);
 290     if (!IsValidCharacter(code_point) ||
 291         !IsValidCodepoint(code_point)) {
 292       char_index = prev - 1;
 293     } else {
 294       break;
 295     }
 296   }
 297
 298   if (char_index >= 0 )
 299     *output = input.substr(0, char_index);
 300   else
 301     output->clear();
 302 }
 303
 304 TrimPositions TrimWhitespace(const string16& input,
 305                              TrimPositions positions,
 306                              string16* output) {
 307   return TrimStringT(input, StringPiece16(kWhitespaceUTF16), positions, output);
 308 }
 309
 310 StringPiece16 TrimWhitespaceASCII(StringPiece16 input,
 311                                   TrimPositions positions) {
 312   return TrimStringPieceT(input, StringPiece16(kWhitespaceUTF16), positions);
 313 }
 314
 315 TrimPositions TrimWhitespaceASCII(const std::string& input,
 316                                   TrimPositions positions,
 317                                   std::string* output) {
 318   return TrimStringT(input, StringPiece(kWhitespaceASCII), positions, output);
 319 }
 320
 321 StringPiece TrimWhitespaceASCII(StringPiece input, TrimPositions positions) {
 322   return TrimStringPieceT(input, StringPiece(kWhitespaceASCII), positions);
 323 }
 324
 325 // This function is only for backward-compatibility.
 326 // To be removed when all callers are updated.
 327 TrimPositions TrimWhitespace(const std::string& input,
 328                              TrimPositions positions,
 329                              std::string* output) {
 330   return TrimWhitespaceASCII(input, positions, output);
 331 }
 332
 333 template<typename STR>
 334 STR CollapseWhitespaceT(const STR& text,
 335                         bool trim_sequences_with_line_breaks) {
 336   STR result;
 337   result.resize(text.size());
 338
 339   // Set flags to pretend we're already in a trimmed whitespace sequence, so we
 340   // will trim any leading whitespace.
 341   bool in_whitespace = true;
 342   bool already_trimmed = true;
 343
 344   int chars_written = 0;
 345   for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) {
 346     if (IsUnicodeWhitespace(*i)) {
 347       if (!in_whitespace) {
 348         // Reduce all whitespace sequences to a single space.
 349         in_whitespace = true;
 350         result[chars_written++] = L' ';
 351       }
 352       if (trim_sequences_with_line_breaks && !already_trimmed &&
 353           ((*i == '\n') || (*i == '\r'))) {
 354         // Whitespace sequences containing CR or LF are eliminated entirely.
 355         already_trimmed = true;
 356         --chars_written;
 357       }
 358     } else {
 359       // Non-whitespace chracters are copied straight across.
 360       in_whitespace = false;
 361       already_trimmed = false;
 362       result[chars_written++] = *i;
 363     }
 364   }
 365
 366   if (in_whitespace && !already_trimmed) {
 367     // Any trailing whitespace is eliminated.
 368     --chars_written;
 369   }
 370
 371   result.resize(chars_written);
 372   return result;
 373 }
 374
 375 string16 CollapseWhitespace(const string16& text,
 376                             bool trim_sequences_with_line_breaks) {
 377   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
 378 }
 379
 380 std::string CollapseWhitespaceASCII(const std::string& text,
 381                                     bool trim_sequences_with_line_breaks) {
 382   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
 383 }
 384
 385 bool ContainsOnlyChars(const StringPiece& input,
 386                        const StringPiece& characters) {
 387   return input.find_first_not_of(characters) == StringPiece::npos;
 388 }
 389
 390 bool ContainsOnlyChars(const StringPiece16& input,
 391                        const StringPiece16& characters) {
 392   return input.find_first_not_of(characters) == StringPiece16::npos;
 393 }
 394
 395 template <class Char>
 396 inline bool DoIsStringASCII(const Char* characters, size_t length) {
 397   MachineWord all_char_bits = 0;
 398   const Char* end = characters + length;
 399
 400   // Prologue: align the input.
 401   while (!IsAlignedToMachineWord(characters) && characters != end) {
 402     all_char_bits |= *characters;
 403     ++characters;
 404   }
 405
 406   // Compare the values of CPU word size.
 407   const Char* word_end = AlignToMachineWord(end);
 408   const size_t loop_increment = sizeof(MachineWord) / sizeof(Char);
 409   while (characters < word_end) {
 410     all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters));
 411     characters += loop_increment;
 412   }
 413
 414   // Process the remaining bytes.
 415   while (characters != end) {
 416     all_char_bits |= *characters;
 417     ++characters;
 418   }
 419
 420   MachineWord non_ascii_bit_mask =
 421       NonASCIIMask<sizeof(MachineWord), Char>::value();
 422   return !(all_char_bits & non_ascii_bit_mask);
 423 }
 424
 425 bool IsStringASCII(const StringPiece& str) {
 426   return DoIsStringASCII(str.data(), str.length());
 427 }
 428
 429 bool IsStringASCII(const StringPiece16& str) {
 430   return DoIsStringASCII(str.data(), str.length());
 431 }
 432
 433 bool IsStringASCII(const string16& str) {
 434   return DoIsStringASCII(str.data(), str.length());
 435 }
 436
 437 #if defined(WCHAR_T_IS_UTF32)
 438 bool IsStringASCII(const std::wstring& str) {
 439   return DoIsStringASCII(str.data(), str.length());
 440 }
 441 #endif
 442
 443 bool IsStringUTF8(const StringPiece& str) {
 444   const char *src = str.data();
 445   int32 src_len = static_cast<int32>(str.length());
 446   int32 char_index = 0;
 447
 448   while (char_index < src_len) {
 449     int32 code_point;
 450     CBU8_NEXT(src, char_index, src_len, code_point);
 451     if (!IsValidCharacter(code_point))
 452       return false;
 453   }
 454   return true;
 455 }
 456
 457 template<typename Iter>
 458 static inline bool DoLowerCaseEqualsASCII(Iter a_begin,
 459                                           Iter a_end,
 460                                           const char* b) {
 461   for (Iter it = a_begin; it != a_end; ++it, ++b) {
 462     if (!*b || ToLowerASCII(*it) != *b)
 463       return false;
 464   }
 465   return *b == 0;
 466 }
 467
 468 // Front-ends for LowerCaseEqualsASCII.
 469 bool LowerCaseEqualsASCII(const std::string& a, const char* b) {
 470   return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
 471 }
 472
 473 bool LowerCaseEqualsASCII(const string16& a, const char* b) {
 474   return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
 475 }
 476
 477 bool LowerCaseEqualsASCII(std::string::const_iterator a_begin,
 478                           std::string::const_iterator a_end,
 479                           const char* b) {
 480   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
 481 }
 482
 483 bool LowerCaseEqualsASCII(string16::const_iterator a_begin,
 484                           string16::const_iterator a_end,
 485                           const char* b) {
 486   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
 487 }
 488
 489 bool LowerCaseEqualsASCII(const char* a_begin,
 490                           const char* a_end,
 491                           const char* b) {
 492   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
 493 }
 494
 495 bool LowerCaseEqualsASCII(const char* a_begin,
 496                           const char* a_end,
 497                           const char* b_begin,
 498                           const char* b_end) {
 499   while (a_begin != a_end && b_begin != b_end &&
 500          ToLowerASCII(*a_begin) == *b_begin) {
 501     a_begin++;
 502     b_begin++;
 503   }
 504   return a_begin == a_end && b_begin == b_end;
 505 }
 506
 507 bool LowerCaseEqualsASCII(const char16* a_begin,
 508                           const char16* a_end,
 509                           const char* b) {
 510   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
 511 }
 512
 513 bool EqualsASCII(const string16& a, const StringPiece& b) {
 514   if (a.length() != b.length())
 515     return false;
 516   return std::equal(b.begin(), b.end(), a.begin());
 517 }
 518
 519 template<typename Str>
 520 bool StartsWithT(BasicStringPiece<Str> str,
 521                  BasicStringPiece<Str> search_for,
 522                  CompareCase case_sensitivity) {
 523   if (search_for.size() > str.size())
 524     return false;
 525
 526   BasicStringPiece<Str> source = str.substr(0, search_for.size());
 527
 528   switch (case_sensitivity) {
 529     case CompareCase::SENSITIVE:
 530       return source == search_for;
 531
 532     case CompareCase::INSENSITIVE_ASCII:
 533       return std::equal(
 534           search_for.begin(), search_for.end(),
 535           source.begin(),
 536           base::CaseInsensitiveCompareASCII<typename Str::value_type>());
 537
 538     default:
 539       NOTREACHED();
 540       return false;
 541   }
 542 }
 543
 544 bool StartsWith(StringPiece str,
 545                 StringPiece search_for,
 546                 CompareCase case_sensitivity) {
 547   return StartsWithT<std::string>(str, search_for, case_sensitivity);
 548 }
 549
 550 bool StartsWith(StringPiece16 str,
 551                 StringPiece16 search_for,
 552                 CompareCase case_sensitivity) {
 553   return StartsWithT<string16>(str, search_for, case_sensitivity);
 554 }
 555
 556 bool StartsWith(const string16& str,
 557                 const string16& search,
 558                 bool case_sensitive) {
 559   if (!case_sensitive) {
 560     // This function was originally written using the current locale functions
 561     // for case-insensitive comparisons. Emulate this behavior until callers
 562     // can be converted either to use the case-insensitive ASCII one (most
 563     // callers) or ICU functions in base_i18n.
 564     if (search.size() > str.size())
 565       return false;
 566     return std::equal(search.begin(), search.end(), str.begin(),
 567                       CaseInsensitiveCompare<char16>());
 568   }
 569   return StartsWith(StringPiece16(str), StringPiece16(search),
 570                     CompareCase::SENSITIVE);
 571 }
 572
 573 template <typename Str>
 574 bool EndsWithT(BasicStringPiece<Str> str,
 575                BasicStringPiece<Str> search_for,
 576                CompareCase case_sensitivity) {
 577   if (search_for.size() > str.size())
 578     return false;
 579
 580   BasicStringPiece<Str> source = str.substr(str.size() - search_for.size(),
 581                                             search_for.size());
 582
 583   switch (case_sensitivity) {
 584     case CompareCase::SENSITIVE:
 585       return source == search_for;
 586
 587     case CompareCase::INSENSITIVE_ASCII:
 588       return std::equal(
 589           source.begin(), source.end(),
 590           search_for.begin(),
 591           base::CaseInsensitiveCompareASCII<typename Str::value_type>());
 592
 593     default:
 594       NOTREACHED();
 595       return false;
 596   }
 597 }
 598
 599 bool EndsWith(StringPiece str,
 600               StringPiece search_for,
 601               CompareCase case_sensitivity) {
 602   return EndsWithT<std::string>(str, search_for, case_sensitivity);
 603 }
 604
 605 bool EndsWith(StringPiece16 str,
 606               StringPiece16 search_for,
 607                           CompareCase case_sensitivity) {
 608   return EndsWithT<string16>(str, search_for, case_sensitivity);
 609 }
 610
 611 bool EndsWith(const string16& str,
 612               const string16& search,
 613               bool case_sensitive) {
 614   if (!case_sensitive) {
 615     // This function was originally written using the current locale functions
 616     // for case-insensitive comparisons. Emulate this behavior until callers
 617     // can be converted either to use the case-insensitive ASCII one (most
 618     // callers) or ICU functions in base_i18n.
 619     if (search.size() > str.size())
 620       return false;
 621     return std::equal(search.begin(), search.end(),
 622                       str.begin() + (str.size() - search.size()),
 623                       CaseInsensitiveCompare<char16>());
 624   }
 625   return EndsWith(StringPiece16(str), StringPiece16(search),
 626                     CompareCase::SENSITIVE);
 627 }
 628
 629 char HexDigitToInt(wchar_t c) {
 630   DCHECK(IsHexDigit(c));
 631   if (c >= '0' && c <= '9')
 632     return static_cast<char>(c - '0');
 633   if (c >= 'A' && c <= 'F')
 634     return static_cast<char>(c - 'A' + 10);
 635   if (c >= 'a' && c <= 'f')
 636     return static_cast<char>(c - 'a' + 10);
 637   return 0;
 638 }
 639
 640 static const char* const kByteStringsUnlocalized[] = {
 641   " B",
 642   " kB",
 643   " MB",
 644   " GB",
 645   " TB",
 646   " PB"
 647 };
 648
 649 string16 FormatBytesUnlocalized(int64 bytes) {
 650   double unit_amount = static_cast<double>(bytes);
 651   size_t dimension = 0;
 652   const int kKilo = 1024;
 653   while (unit_amount >= kKilo &&
 654          dimension < arraysize(kByteStringsUnlocalized) - 1) {
 655     unit_amount /= kKilo;
 656     dimension++;
 657   }
 658
 659   char buf[64];
 660   if (bytes != 0 && dimension > 0 && unit_amount < 100) {
 661     base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount,
 662                    kByteStringsUnlocalized[dimension]);
 663   } else {
 664     base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount,
 665                    kByteStringsUnlocalized[dimension]);
 666   }
 667
 668   return ASCIIToUTF16(buf);
 669 }
 670
 671 // Runs in O(n) time in the length of |str|.
 672 template<class StringType>
 673 void DoReplaceSubstringsAfterOffset(StringType* str,
 674                                     size_t offset,
 675                                     BasicStringPiece<StringType> find_this,
 676                                     BasicStringPiece<StringType> replace_with,
 677                                     bool replace_all) {
 678   DCHECK(!find_this.empty());
 679
 680   // If the find string doesn't appear, there's nothing to do.
 681   offset = str->find(find_this.data(), offset, find_this.size());
 682   if (offset == StringType::npos)
 683     return;
 684
 685   // If we're only replacing one instance, there's no need to do anything
 686   // complicated.
 687   size_t find_length = find_this.length();
 688   if (!replace_all) {
 689     str->replace(offset, find_length, replace_with.data(), replace_with.size());
 690     return;
 691   }
 692
 693   // If the find and replace strings are the same length, we can simply use
 694   // replace() on each instance, and finish the entire operation in O(n) time.
 695   size_t replace_length = replace_with.length();
 696   if (find_length == replace_length) {
 697     do {
 698       str->replace(offset, find_length,
 699                    replace_with.data(), replace_with.size());
 700       offset = str->find(find_this.data(), offset + replace_length,
 701                          find_this.size());
 702     } while (offset != StringType::npos);
 703     return;
 704   }
 705
 706   // Since the find and replace strings aren't the same length, a loop like the
 707   // one above would be O(n^2) in the worst case, as replace() will shift the
 708   // entire remaining string each time.  We need to be more clever to keep
 709   // things O(n).
 710   //
 711   // If we're shortening the string, we can alternate replacements with shifting
 712   // forward the intervening characters using memmove().
 713   size_t str_length = str->length();
 714   if (find_length > replace_length) {
 715     size_t write_offset = offset;
 716     do {
 717       if (replace_length) {
 718         str->replace(write_offset, replace_length,
 719                      replace_with.data(), replace_with.size());
 720         write_offset += replace_length;
 721       }
 722       size_t read_offset = offset + find_length;
 723       offset = std::min(
 724           str->find(find_this.data(), read_offset, find_this.size()),
 725           str_length);
 726       size_t length = offset - read_offset;
 727       if (length) {
 728         memmove(&(*str)[write_offset], &(*str)[read_offset],
 729                 length * sizeof(typename StringType::value_type));
 730         write_offset += length;
 731       }
 732     } while (offset < str_length);
 733     str->resize(write_offset);
 734     return;
 735   }
 736
 737   // We're lengthening the string.  We can use alternating replacements and
 738   // memmove() calls like above, but we need to precalculate the final string
 739   // length and then expand from back-to-front to avoid overwriting the string
 740   // as we're reading it, needing to shift, or having to copy to a second string
 741   // temporarily.
 742   size_t first_match = offset;
 743
 744   // First, calculate the final length and resize the string.
 745   size_t final_length = str_length;
 746   size_t expansion = replace_length - find_length;
 747   size_t current_match;
 748   do {
 749     final_length += expansion;
 750     // Minor optimization: save this offset into |current_match|, so that on
 751     // exit from the loop, |current_match| will point at the last instance of
 752     // the find string, and we won't need to find() it again immediately.
 753     current_match = offset;
 754     offset = str->find(find_this.data(), offset + find_length,
 755                        find_this.size());
 756   } while (offset != StringType::npos);
 757   str->resize(final_length);
 758
 759   // Now do the replacement loop, working backwards through the string.
 760   for (size_t prev_match = str_length, write_offset = final_length; ;
 761        current_match = str->rfind(find_this.data(), current_match - 1,
 762                                   find_this.size())) {
 763     size_t read_offset = current_match + find_length;
 764     size_t length = prev_match - read_offset;
 765     if (length) {
 766       write_offset -= length;
 767       memmove(&(*str)[write_offset], &(*str)[read_offset],
 768               length * sizeof(typename StringType::value_type));
 769     }
 770     write_offset -= replace_length;
 771     str->replace(write_offset, replace_length,
 772                  replace_with.data(), replace_with.size());
 773     if (current_match == first_match)
 774       return;
 775     prev_match = current_match;
 776   }
 777 }
 778
 779 void ReplaceFirstSubstringAfterOffset(string16* str,
 780                                       size_t start_offset,
 781                                       StringPiece16 find_this,
 782                                       StringPiece16 replace_with) {
 783   DoReplaceSubstringsAfterOffset<string16>(
 784       str, start_offset, find_this, replace_with, false);  // Replace first.
 785 }
 786
 787 void ReplaceFirstSubstringAfterOffset(std::string* str,
 788                                       size_t start_offset,
 789                                       StringPiece find_this,
 790                                       StringPiece replace_with) {
 791   DoReplaceSubstringsAfterOffset<std::string>(
 792       str, start_offset, find_this, replace_with, false);  // Replace first.
 793 }
 794
 795 void ReplaceSubstringsAfterOffset(string16* str,
 796                                   size_t start_offset,
 797                                   StringPiece16 find_this,
 798                                   StringPiece16 replace_with) {
 799   DoReplaceSubstringsAfterOffset<string16>(
 800       str, start_offset, find_this, replace_with, true);  // Replace all.
 801 }
 802
 803 void ReplaceSubstringsAfterOffset(std::string* str,
 804                                   size_t start_offset,
 805                                   StringPiece find_this,
 806                                   StringPiece replace_with) {
 807   DoReplaceSubstringsAfterOffset<std::string>(
 808       str, start_offset, find_this, replace_with, true);  // Replace all.
 809 }
 810
 811 }  // namespace base
 812
 813 template<typename STR>
 814 static STR JoinStringT(const std::vector<STR>& parts, const STR& sep) {
 815   if (parts.empty())
 816     return STR();
 817
 818   STR result(parts[0]);
 819   typename std::vector<STR>::const_iterator iter = parts.begin();
 820   ++iter;
 821
 822   for (; iter != parts.end(); ++iter) {
 823     result += sep;
 824     result += *iter;
 825   }
 826
 827   return result;
 828 }
 829
 830 std::string JoinString(const std::vector<std::string>& parts, char sep) {
 831   return JoinStringT(parts, std::string(1, sep));
 832 }
 833
 834 string16 JoinString(const std::vector<string16>& parts, char16 sep) {
 835   return JoinStringT(parts, string16(1, sep));
 836 }
 837
 838 std::string JoinString(const std::vector<std::string>& parts,
 839                        const std::string& separator) {
 840   return JoinStringT(parts, separator);
 841 }
 842
 843 string16 JoinString(const std::vector<string16>& parts,
 844                     const string16& separator) {
 845   return JoinStringT(parts, separator);
 846 }
 847
 848 template<class FormatStringType, class OutStringType>
 849 OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string,
 850     const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) {
 851   size_t substitutions = subst.size();
 852
 853   size_t sub_length = 0;
 854   for (typename std::vector<OutStringType>::const_iterator iter = subst.begin();
 855        iter != subst.end(); ++iter) {
 856     sub_length += iter->length();
 857   }
 858
 859   OutStringType formatted;
 860   formatted.reserve(format_string.length() + sub_length);
 861
 862   std::vector<ReplacementOffset> r_offsets;
 863   for (typename FormatStringType::const_iterator i = format_string.begin();
 864        i != format_string.end(); ++i) {
 865     if ('$' == *i) {
 866       if (i + 1 != format_string.end()) {
 867         ++i;
 868         DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i;
 869         if ('$' == *i) {
 870           while (i != format_string.end() && '$' == *i) {
 871             formatted.push_back('$');
 872             ++i;
 873           }
 874           --i;
 875         } else {
 876           uintptr_t index = 0;
 877           while (i != format_string.end() && '0' <= *i && *i <= '9') {
 878             index *= 10;
 879             index += *i - '0';
 880             ++i;
 881           }
 882           --i;
 883           index -= 1;
 884           if (offsets) {
 885             ReplacementOffset r_offset(index,
 886                 static_cast<int>(formatted.size()));
 887             r_offsets.insert(std::lower_bound(r_offsets.begin(),
 888                                               r_offsets.end(),
 889                                               r_offset,
 890                                               &CompareParameter),
 891                              r_offset);
 892           }
 893           if (index < substitutions)
 894             formatted.append(subst.at(index));
 895         }
 896       }
 897     } else {
 898       formatted.push_back(*i);
 899     }
 900   }
 901   if (offsets) {
 902     for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin();
 903          i != r_offsets.end(); ++i) {
 904       offsets->push_back(i->offset);
 905     }
 906   }
 907   return formatted;
 908 }
 909
 910 string16 ReplaceStringPlaceholders(const string16& format_string,
 911                                    const std::vector<string16>& subst,
 912                                    std::vector<size_t>* offsets) {
 913   return DoReplaceStringPlaceholders(format_string, subst, offsets);
 914 }
 915
 916 std::string ReplaceStringPlaceholders(const base::StringPiece& format_string,
 917                                       const std::vector<std::string>& subst,
 918                                       std::vector<size_t>* offsets) {
 919   return DoReplaceStringPlaceholders(format_string, subst, offsets);
 920 }
 921
 922 string16 ReplaceStringPlaceholders(const string16& format_string,
 923                                    const string16& a,
 924                                    size_t* offset) {
 925   std::vector<size_t> offsets;
 926   std::vector<string16> subst;
 927   subst.push_back(a);
 928   string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);
 929
 930   DCHECK_EQ(1U, offsets.size());
 931   if (offset)
 932     *offset = offsets[0];
 933   return result;
 934 }
 935
 936 // The following code is compatible with the OpenBSD lcpy interface.  See:
 937 //   http://www.gratisoft.us/todd/papers/strlcpy.html
 938 //   ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
 939
 940 namespace {
 941
 942 template <typename CHAR>
 943 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
 944   for (size_t i = 0; i < dst_size; ++i) {
 945     if ((dst[i] = src[i]) == 0)  // We hit and copied the terminating NULL.
 946       return i;
 947   }
 948
 949   // We were left off at dst_size.  We over copied 1 byte.  Null terminate.
 950   if (dst_size != 0)
 951     dst[dst_size - 1] = 0;
 952
 953   // Count the rest of the |src|, and return it's length in characters.
 954   while (src[dst_size]) ++dst_size;
 955   return dst_size;
 956 }
 957
 958 }  // namespace
 959
 960 size_t base::strlcpy(char* dst, const char* src, size_t dst_size) {
 961   return lcpyT<char>(dst, src, dst_size);
 962 }
 963 size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
 964   return lcpyT<wchar_t>(dst, src, dst_size);
 965 }