base/strings/string_util.cc

   1 // Copyright 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "base/strings/string_util.h"
   6
   7 #include <ctype.h>
   8 #include <errno.h>
   9 #include <math.h>
  10 #include <stdarg.h>
  11 #include <stdio.h>
  12 #include <stdlib.h>
  13 #include <string.h>
  14 #include <time.h>
  15 #include <wchar.h>
  16 #include <wctype.h>
  17
  18 #include <algorithm>
  19 #include <vector>
  20
  21 #include "base/basictypes.h"
  22 #include "base/logging.h"
  23 #include "base/memory/singleton.h"
  24 #include "base/strings/utf_string_conversion_utils.h"
  25 #include "base/strings/utf_string_conversions.h"
  26 #include "base/third_party/icu/icu_utf.h"
  27 #include "build/build_config.h"
  28
  29 // Remove when this entire file is in the base namespace.
  30 using base::char16;
  31 using base::string16;
  32
  33 namespace {
  34
  35 // Force the singleton used by Empty[W]String[16] to be a unique type. This
  36 // prevents other code that might accidentally use Singleton<string> from
  37 // getting our internal one.
  38 struct EmptyStrings {
  39   EmptyStrings() {}
  40   const std::string s;
  41   const std::wstring ws;
  42   const string16 s16;
  43
  44   static EmptyStrings* GetInstance() {
  45     return Singleton<EmptyStrings>::get();
  46   }
  47 };
  48
  49 // Used by ReplaceStringPlaceholders to track the position in the string of
  50 // replaced parameters.
  51 struct ReplacementOffset {
  52   ReplacementOffset(uintptr_t parameter, size_t offset)
  53       : parameter(parameter),
  54         offset(offset) {}
  55
  56   // Index of the parameter.
  57   uintptr_t parameter;
  58
  59   // Starting position in the string.
  60   size_t offset;
  61 };
  62
  63 static bool CompareParameter(const ReplacementOffset& elem1,
  64                              const ReplacementOffset& elem2) {
  65   return elem1.parameter < elem2.parameter;
  66 }
  67
  68 }  // namespace
  69
  70 namespace base {
  71
  72 bool IsWprintfFormatPortable(const wchar_t* format) {
  73   for (const wchar_t* position = format; *position != '\0'; ++position) {
  74     if (*position == '%') {
  75       bool in_specification = true;
  76       bool modifier_l = false;
  77       while (in_specification) {
  78         // Eat up characters until reaching a known specifier.
  79         if (*++position == '\0') {
  80           // The format string ended in the middle of a specification.  Call
  81           // it portable because no unportable specifications were found.  The
  82           // string is equally broken on all platforms.
  83           return true;
  84         }
  85
  86         if (*position == 'l') {
  87           // 'l' is the only thing that can save the 's' and 'c' specifiers.
  88           modifier_l = true;
  89         } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
  90                    *position == 'S' || *position == 'C' || *position == 'F' ||
  91                    *position == 'D' || *position == 'O' || *position == 'U') {
  92           // Not portable.
  93           return false;
  94         }
  95
  96         if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
  97           // Portable, keep scanning the rest of the format string.
  98           in_specification = false;
  99         }
 100       }
 101     }
 102   }
 103
 104   return true;
 105 }
 106
 107 const std::string& EmptyString() {
 108   return EmptyStrings::GetInstance()->s;
 109 }
 110
 111 const std::wstring& EmptyWString() {
 112   return EmptyStrings::GetInstance()->ws;
 113 }
 114
 115 const string16& EmptyString16() {
 116   return EmptyStrings::GetInstance()->s16;
 117 }
 118
 119 template<typename STR>
 120 bool ReplaceCharsT(const STR& input,
 121                    const typename STR::value_type replace_chars[],
 122                    const STR& replace_with,
 123                    STR* output) {
 124   bool removed = false;
 125   size_t replace_length = replace_with.length();
 126
 127   *output = input;
 128
 129   size_t found = output->find_first_of(replace_chars);
 130   while (found != STR::npos) {
 131     removed = true;
 132     output->replace(found, 1, replace_with);
 133     found = output->find_first_of(replace_chars, found + replace_length);
 134   }
 135
 136   return removed;
 137 }
 138
 139 bool ReplaceChars(const string16& input,
 140                   const char16 replace_chars[],
 141                   const string16& replace_with,
 142                   string16* output) {
 143   return ReplaceCharsT(input, replace_chars, replace_with, output);
 144 }
 145
 146 bool ReplaceChars(const std::string& input,
 147                   const char replace_chars[],
 148                   const std::string& replace_with,
 149                   std::string* output) {
 150   return ReplaceCharsT(input, replace_chars, replace_with, output);
 151 }
 152
 153 bool RemoveChars(const string16& input,
 154                  const char16 remove_chars[],
 155                  string16* output) {
 156   return ReplaceChars(input, remove_chars, string16(), output);
 157 }
 158
 159 bool RemoveChars(const std::string& input,
 160                  const char remove_chars[],
 161                  std::string* output) {
 162   return ReplaceChars(input, remove_chars, std::string(), output);
 163 }
 164
 165 template<typename STR>
 166 TrimPositions TrimStringT(const STR& input,
 167                           const typename STR::value_type trim_chars[],
 168                           TrimPositions positions,
 169                           STR* output) {
 170   // Find the edges of leading/trailing whitespace as desired.
 171   const typename STR::size_type last_char = input.length() - 1;
 172   const typename STR::size_type first_good_char = (positions & TRIM_LEADING) ?
 173       input.find_first_not_of(trim_chars) : 0;
 174   const typename STR::size_type last_good_char = (positions & TRIM_TRAILING) ?
 175       input.find_last_not_of(trim_chars) : last_char;
 176
 177   // When the string was all whitespace, report that we stripped off whitespace
 178   // from whichever position the caller was interested in.  For empty input, we
 179   // stripped no whitespace, but we still need to clear |output|.
 180   if (input.empty() ||
 181       (first_good_char == STR::npos) || (last_good_char == STR::npos)) {
 182     bool input_was_empty = input.empty();  // in case output == &input
 183     output->clear();
 184     return input_was_empty ? TRIM_NONE : positions;
 185   }
 186
 187   // Trim the whitespace.
 188   *output =
 189       input.substr(first_good_char, last_good_char - first_good_char + 1);
 190
 191   // Return where we trimmed from.
 192   return static_cast<TrimPositions>(
 193       ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) |
 194       ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));
 195 }
 196
 197 bool TrimString(const string16& input,
 198                 const char16 trim_chars[],
 199                 string16* output) {
 200   return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
 201 }
 202
 203 bool TrimString(const std::string& input,
 204                 const char trim_chars[],
 205                 std::string* output) {
 206   return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
 207 }
 208
 209 void TruncateUTF8ToByteSize(const std::string& input,
 210                             const size_t byte_size,
 211                             std::string* output) {
 212   DCHECK(output);
 213   if (byte_size > input.length()) {
 214     *output = input;
 215     return;
 216   }
 217   DCHECK_LE(byte_size, static_cast<uint32>(kint32max));
 218   // Note: This cast is necessary because CBU8_NEXT uses int32s.
 219   int32 truncation_length = static_cast<int32>(byte_size);
 220   int32 char_index = truncation_length - 1;
 221   const char* data = input.data();
 222
 223   // Using CBU8, we will move backwards from the truncation point
 224   // to the beginning of the string looking for a valid UTF8
 225   // character.  Once a full UTF8 character is found, we will
 226   // truncate the string to the end of that character.
 227   while (char_index >= 0) {
 228     int32 prev = char_index;
 229     uint32 code_point = 0;
 230     CBU8_NEXT(data, char_index, truncation_length, code_point);
 231     if (!IsValidCharacter(code_point) ||
 232         !IsValidCodepoint(code_point)) {
 233       char_index = prev - 1;
 234     } else {
 235       break;
 236     }
 237   }
 238
 239   if (char_index >= 0 )
 240     *output = input.substr(0, char_index);
 241   else
 242     output->clear();
 243 }
 244
 245 }  // namespace base
 246
 247 TrimPositions TrimWhitespace(const base::string16& input,
 248                              TrimPositions positions,
 249                              base::string16* output) {
 250   return base::TrimStringT(input, base::kWhitespaceUTF16, positions, output);
 251 }
 252
 253 TrimPositions TrimWhitespaceASCII(const std::string& input,
 254                                   TrimPositions positions,
 255                                   std::string* output) {
 256   return base::TrimStringT(input, base::kWhitespaceASCII, positions, output);
 257 }
 258
 259 // This function is only for backward-compatibility.
 260 // To be removed when all callers are updated.
 261 TrimPositions TrimWhitespace(const std::string& input,
 262                              TrimPositions positions,
 263                              std::string* output) {
 264   return TrimWhitespaceASCII(input, positions, output);
 265 }
 266
 267 template<typename STR>
 268 STR CollapseWhitespaceT(const STR& text,
 269                         bool trim_sequences_with_line_breaks) {
 270   STR result;
 271   result.resize(text.size());
 272
 273   // Set flags to pretend we're already in a trimmed whitespace sequence, so we
 274   // will trim any leading whitespace.
 275   bool in_whitespace = true;
 276   bool already_trimmed = true;
 277
 278   int chars_written = 0;
 279   for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) {
 280     if (IsWhitespace(*i)) {
 281       if (!in_whitespace) {
 282         // Reduce all whitespace sequences to a single space.
 283         in_whitespace = true;
 284         result[chars_written++] = L' ';
 285       }
 286       if (trim_sequences_with_line_breaks && !already_trimmed &&
 287           ((*i == '\n') || (*i == '\r'))) {
 288         // Whitespace sequences containing CR or LF are eliminated entirely.
 289         already_trimmed = true;
 290         --chars_written;
 291       }
 292     } else {
 293       // Non-whitespace chracters are copied straight across.
 294       in_whitespace = false;
 295       already_trimmed = false;
 296       result[chars_written++] = *i;
 297     }
 298   }
 299
 300   if (in_whitespace && !already_trimmed) {
 301     // Any trailing whitespace is eliminated.
 302     --chars_written;
 303   }
 304
 305   result.resize(chars_written);
 306   return result;
 307 }
 308
 309 string16 CollapseWhitespace(const string16& text,
 310                             bool trim_sequences_with_line_breaks) {
 311   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
 312 }
 313
 314 std::string CollapseWhitespaceASCII(const std::string& text,
 315                                     bool trim_sequences_with_line_breaks) {
 316   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
 317 }
 318
 319 bool ContainsOnlyWhitespaceASCII(const std::string& str) {
 320   for (std::string::const_iterator i(str.begin()); i != str.end(); ++i) {
 321     if (!IsAsciiWhitespace(*i))
 322       return false;
 323   }
 324   return true;
 325 }
 326
 327 bool ContainsOnlyWhitespace(const base::string16& str) {
 328   return str.find_first_not_of(base::kWhitespaceUTF16) == string16::npos;
 329 }
 330
 331 template<typename STR>
 332 static bool ContainsOnlyCharsT(const STR& input, const STR& characters) {
 333   for (typename STR::const_iterator iter = input.begin();
 334        iter != input.end(); ++iter) {
 335     if (characters.find(*iter) == STR::npos)
 336       return false;
 337   }
 338   return true;
 339 }
 340
 341 bool ContainsOnlyChars(const string16& input, const string16& characters) {
 342   return ContainsOnlyCharsT(input, characters);
 343 }
 344
 345 bool ContainsOnlyChars(const std::string& input,
 346                        const std::string& characters) {
 347   return ContainsOnlyCharsT(input, characters);
 348 }
 349
 350 #if !defined(WCHAR_T_IS_UTF16)
 351 bool IsStringASCII(const std::wstring& str);
 352 #endif
 353
 354 std::string WideToASCII(const std::wstring& wide) {
 355   DCHECK(IsStringASCII(wide)) << wide;
 356   return std::string(wide.begin(), wide.end());
 357 }
 358
 359 std::string UTF16ToASCII(const string16& utf16) {
 360   DCHECK(IsStringASCII(utf16)) << utf16;
 361   return std::string(utf16.begin(), utf16.end());
 362 }
 363
 364 template<class STR>
 365 static bool DoIsStringASCII(const STR& str) {
 366   for (size_t i = 0; i < str.length(); i++) {
 367     typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i];
 368     if (c > 0x7F)
 369       return false;
 370   }
 371   return true;
 372 }
 373
 374 #if !defined(WCHAR_T_IS_UTF16)
 375 bool IsStringASCII(const std::wstring& str) {
 376   return DoIsStringASCII(str);
 377 }
 378 #endif
 379
 380 bool IsStringASCII(const string16& str) {
 381   return DoIsStringASCII(str);
 382 }
 383
 384 bool IsStringASCII(const base::StringPiece& str) {
 385   return DoIsStringASCII(str);
 386 }
 387
 388 bool IsStringUTF8(const std::string& str) {
 389   const char *src = str.data();
 390   int32 src_len = static_cast<int32>(str.length());
 391   int32 char_index = 0;
 392
 393   while (char_index < src_len) {
 394     int32 code_point;
 395     CBU8_NEXT(src, char_index, src_len, code_point);
 396     if (!base::IsValidCharacter(code_point))
 397       return false;
 398   }
 399   return true;
 400 }
 401
 402 template<typename Iter>
 403 static inline bool DoLowerCaseEqualsASCII(Iter a_begin,
 404                                           Iter a_end,
 405                                           const char* b) {
 406   for (Iter it = a_begin; it != a_end; ++it, ++b) {
 407     if (!*b || base::ToLowerASCII(*it) != *b)
 408       return false;
 409   }
 410   return *b == 0;
 411 }
 412
 413 // Front-ends for LowerCaseEqualsASCII.
 414 bool LowerCaseEqualsASCII(const std::string& a, const char* b) {
 415   return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
 416 }
 417
 418 bool LowerCaseEqualsASCII(const string16& a, const char* b) {
 419   return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
 420 }
 421
 422 bool LowerCaseEqualsASCII(std::string::const_iterator a_begin,
 423                           std::string::const_iterator a_end,
 424                           const char* b) {
 425   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
 426 }
 427
 428 bool LowerCaseEqualsASCII(string16::const_iterator a_begin,
 429                           string16::const_iterator a_end,
 430                           const char* b) {
 431   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
 432 }
 433
 434 // TODO(port): Resolve wchar_t/iterator issues that require OS_ANDROID here.
 435 #if !defined(OS_ANDROID)
 436 bool LowerCaseEqualsASCII(const char* a_begin,
 437                           const char* a_end,
 438                           const char* b) {
 439   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
 440 }
 441
 442 bool LowerCaseEqualsASCII(const char16* a_begin,
 443                           const char16* a_end,
 444                           const char* b) {
 445   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
 446 }
 447
 448 #endif  // !defined(OS_ANDROID)
 449
 450 bool EqualsASCII(const string16& a, const base::StringPiece& b) {
 451   if (a.length() != b.length())
 452     return false;
 453   return std::equal(b.begin(), b.end(), a.begin());
 454 }
 455
 456 bool StartsWithASCII(const std::string& str,
 457                      const std::string& search,
 458                      bool case_sensitive) {
 459   if (case_sensitive)
 460     return str.compare(0, search.length(), search) == 0;
 461   else
 462     return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0;
 463 }
 464
 465 template <typename STR>
 466 bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) {
 467   if (case_sensitive) {
 468     return str.compare(0, search.length(), search) == 0;
 469   } else {
 470     if (search.size() > str.size())
 471       return false;
 472     return std::equal(search.begin(), search.end(), str.begin(),
 473                       base::CaseInsensitiveCompare<typename STR::value_type>());
 474   }
 475 }
 476
 477 bool StartsWith(const string16& str, const string16& search,
 478                 bool case_sensitive) {
 479   return StartsWithT(str, search, case_sensitive);
 480 }
 481
 482 template <typename STR>
 483 bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) {
 484   typename STR::size_type str_length = str.length();
 485   typename STR::size_type search_length = search.length();
 486   if (search_length > str_length)
 487     return false;
 488   if (case_sensitive) {
 489     return str.compare(str_length - search_length, search_length, search) == 0;
 490   } else {
 491     return std::equal(search.begin(), search.end(),
 492                       str.begin() + (str_length - search_length),
 493                       base::CaseInsensitiveCompare<typename STR::value_type>());
 494   }
 495 }
 496
 497 bool EndsWith(const std::string& str, const std::string& search,
 498               bool case_sensitive) {
 499   return EndsWithT(str, search, case_sensitive);
 500 }
 501
 502 bool EndsWith(const string16& str, const string16& search,
 503               bool case_sensitive) {
 504   return EndsWithT(str, search, case_sensitive);
 505 }
 506
 507 static const char* const kByteStringsUnlocalized[] = {
 508   " B",
 509   " kB",
 510   " MB",
 511   " GB",
 512   " TB",
 513   " PB"
 514 };
 515
 516 string16 FormatBytesUnlocalized(int64 bytes) {
 517   double unit_amount = static_cast<double>(bytes);
 518   size_t dimension = 0;
 519   const int kKilo = 1024;
 520   while (unit_amount >= kKilo &&
 521          dimension < arraysize(kByteStringsUnlocalized) - 1) {
 522     unit_amount /= kKilo;
 523     dimension++;
 524   }
 525
 526   char buf[64];
 527   if (bytes != 0 && dimension > 0 && unit_amount < 100) {
 528     base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount,
 529                    kByteStringsUnlocalized[dimension]);
 530   } else {
 531     base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount,
 532                    kByteStringsUnlocalized[dimension]);
 533   }
 534
 535   return base::ASCIIToUTF16(buf);
 536 }
 537
 538 template<class StringType>
 539 void DoReplaceSubstringsAfterOffset(StringType* str,
 540                                     typename StringType::size_type start_offset,
 541                                     const StringType& find_this,
 542                                     const StringType& replace_with,
 543                                     bool replace_all) {
 544   if ((start_offset == StringType::npos) || (start_offset >= str->length()))
 545     return;
 546
 547   DCHECK(!find_this.empty());
 548   for (typename StringType::size_type offs(str->find(find_this, start_offset));
 549       offs != StringType::npos; offs = str->find(find_this, offs)) {
 550     str->replace(offs, find_this.length(), replace_with);
 551     offs += replace_with.length();
 552
 553     if (!replace_all)
 554       break;
 555   }
 556 }
 557
 558 void ReplaceFirstSubstringAfterOffset(string16* str,
 559                                       string16::size_type start_offset,
 560                                       const string16& find_this,
 561                                       const string16& replace_with) {
 562   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
 563                                  false);  // replace first instance
 564 }
 565
 566 void ReplaceFirstSubstringAfterOffset(std::string* str,
 567                                       std::string::size_type start_offset,
 568                                       const std::string& find_this,
 569                                       const std::string& replace_with) {
 570   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
 571                                  false);  // replace first instance
 572 }
 573
 574 void ReplaceSubstringsAfterOffset(string16* str,
 575                                   string16::size_type start_offset,
 576                                   const string16& find_this,
 577                                   const string16& replace_with) {
 578   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
 579                                  true);  // replace all instances
 580 }
 581
 582 void ReplaceSubstringsAfterOffset(std::string* str,
 583                                   std::string::size_type start_offset,
 584                                   const std::string& find_this,
 585                                   const std::string& replace_with) {
 586   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
 587                                  true);  // replace all instances
 588 }
 589
 590
 591 template<typename STR>
 592 static size_t TokenizeT(const STR& str,
 593                         const STR& delimiters,
 594                         std::vector<STR>* tokens) {
 595   tokens->clear();
 596
 597   typename STR::size_type start = str.find_first_not_of(delimiters);
 598   while (start != STR::npos) {
 599     typename STR::size_type end = str.find_first_of(delimiters, start + 1);
 600     if (end == STR::npos) {
 601       tokens->push_back(str.substr(start));
 602       break;
 603     } else {
 604       tokens->push_back(str.substr(start, end - start));
 605       start = str.find_first_not_of(delimiters, end + 1);
 606     }
 607   }
 608
 609   return tokens->size();
 610 }
 611
 612 size_t Tokenize(const string16& str,
 613                 const string16& delimiters,
 614                 std::vector<string16>* tokens) {
 615   return TokenizeT(str, delimiters, tokens);
 616 }
 617
 618 size_t Tokenize(const std::string& str,
 619                 const std::string& delimiters,
 620                 std::vector<std::string>* tokens) {
 621   return TokenizeT(str, delimiters, tokens);
 622 }
 623
 624 size_t Tokenize(const base::StringPiece& str,
 625                 const base::StringPiece& delimiters,
 626                 std::vector<base::StringPiece>* tokens) {
 627   return TokenizeT(str, delimiters, tokens);
 628 }
 629
 630 template<typename STR>
 631 static STR JoinStringT(const std::vector<STR>& parts, const STR& sep) {
 632   if (parts.empty())
 633     return STR();
 634
 635   STR result(parts[0]);
 636   typename std::vector<STR>::const_iterator iter = parts.begin();
 637   ++iter;
 638
 639   for (; iter != parts.end(); ++iter) {
 640     result += sep;
 641     result += *iter;
 642   }
 643
 644   return result;
 645 }
 646
 647 std::string JoinString(const std::vector<std::string>& parts, char sep) {
 648   return JoinStringT(parts, std::string(1, sep));
 649 }
 650
 651 string16 JoinString(const std::vector<string16>& parts, char16 sep) {
 652   return JoinStringT(parts, string16(1, sep));
 653 }
 654
 655 std::string JoinString(const std::vector<std::string>& parts,
 656                        const std::string& separator) {
 657   return JoinStringT(parts, separator);
 658 }
 659
 660 string16 JoinString(const std::vector<string16>& parts,
 661                     const string16& separator) {
 662   return JoinStringT(parts, separator);
 663 }
 664
 665 template<class FormatStringType, class OutStringType>
 666 OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string,
 667     const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) {
 668   size_t substitutions = subst.size();
 669
 670   size_t sub_length = 0;
 671   for (typename std::vector<OutStringType>::const_iterator iter = subst.begin();
 672        iter != subst.end(); ++iter) {
 673     sub_length += iter->length();
 674   }
 675
 676   OutStringType formatted;
 677   formatted.reserve(format_string.length() + sub_length);
 678
 679   std::vector<ReplacementOffset> r_offsets;
 680   for (typename FormatStringType::const_iterator i = format_string.begin();
 681        i != format_string.end(); ++i) {
 682     if ('$' == *i) {
 683       if (i + 1 != format_string.end()) {
 684         ++i;
 685         DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i;
 686         if ('$' == *i) {
 687           while (i != format_string.end() && '$' == *i) {
 688             formatted.push_back('$');
 689             ++i;
 690           }
 691           --i;
 692         } else {
 693           uintptr_t index = 0;
 694           while (i != format_string.end() && '0' <= *i && *i <= '9') {
 695             index *= 10;
 696             index += *i - '0';
 697             ++i;
 698           }
 699           --i;
 700           index -= 1;
 701           if (offsets) {
 702             ReplacementOffset r_offset(index,
 703                 static_cast<int>(formatted.size()));
 704             r_offsets.insert(std::lower_bound(r_offsets.begin(),
 705                                               r_offsets.end(),
 706                                               r_offset,
 707                                               &CompareParameter),
 708                              r_offset);
 709           }
 710           if (index < substitutions)
 711             formatted.append(subst.at(index));
 712         }
 713       }
 714     } else {
 715       formatted.push_back(*i);
 716     }
 717   }
 718   if (offsets) {
 719     for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin();
 720          i != r_offsets.end(); ++i) {
 721       offsets->push_back(i->offset);
 722     }
 723   }
 724   return formatted;
 725 }
 726
 727 string16 ReplaceStringPlaceholders(const string16& format_string,
 728                                    const std::vector<string16>& subst,
 729                                    std::vector<size_t>* offsets) {
 730   return DoReplaceStringPlaceholders(format_string, subst, offsets);
 731 }
 732
 733 std::string ReplaceStringPlaceholders(const base::StringPiece& format_string,
 734                                       const std::vector<std::string>& subst,
 735                                       std::vector<size_t>* offsets) {
 736   return DoReplaceStringPlaceholders(format_string, subst, offsets);
 737 }
 738
 739 string16 ReplaceStringPlaceholders(const string16& format_string,
 740                                    const string16& a,
 741                                    size_t* offset) {
 742   std::vector<size_t> offsets;
 743   std::vector<string16> subst;
 744   subst.push_back(a);
 745   string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);
 746
 747   DCHECK_EQ(1U, offsets.size());
 748   if (offset)
 749     *offset = offsets[0];
 750   return result;
 751 }
 752
 753 static bool IsWildcard(base_icu::UChar32 character) {
 754   return character == '*' || character == '?';
 755 }
 756
 757 // Move the strings pointers to the point where they start to differ.
 758 template <typename CHAR, typename NEXT>
 759 static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,
 760                          const CHAR** string, const CHAR* string_end,
 761                          NEXT next) {
 762   const CHAR* escape = NULL;
 763   while (*pattern != pattern_end && *string != string_end) {
 764     if (!escape && IsWildcard(**pattern)) {
 765       // We don't want to match wildcard here, except if it's escaped.
 766       return;
 767     }
 768
 769     // Check if the escapement char is found. If so, skip it and move to the
 770     // next character.
 771     if (!escape && **pattern == '\\') {
 772       escape = *pattern;
 773       next(pattern, pattern_end);
 774       continue;
 775     }
 776
 777     // Check if the chars match, if so, increment the ptrs.
 778     const CHAR* pattern_next = *pattern;
 779     const CHAR* string_next = *string;
 780     base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
 781     if (pattern_char == next(&string_next, string_end) &&
 782         pattern_char != (base_icu::UChar32) CBU_SENTINEL) {
 783       *pattern = pattern_next;
 784       *string = string_next;
 785     } else {
 786       // Uh ho, it did not match, we are done. If the last char was an
 787       // escapement, that means that it was an error to advance the ptr here,
 788       // let's put it back where it was. This also mean that the MatchPattern
 789       // function will return false because if we can't match an escape char
 790       // here, then no one will.
 791       if (escape) {
 792         *pattern = escape;
 793       }
 794       return;
 795     }
 796
 797     escape = NULL;
 798   }
 799 }
 800
 801 template <typename CHAR, typename NEXT>
 802 static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {
 803   while (*pattern != end) {
 804     if (!IsWildcard(**pattern))
 805       return;
 806     next(pattern, end);
 807   }
 808 }
 809
 810 template <typename CHAR, typename NEXT>
 811 static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,
 812                           const CHAR* pattern, const CHAR* pattern_end,
 813                           int depth,
 814                           NEXT next) {
 815   const int kMaxDepth = 16;
 816   if (depth > kMaxDepth)
 817     return false;
 818
 819   // Eat all the matching chars.
 820   EatSameChars(&pattern, pattern_end, &eval, eval_end, next);
 821
 822   // If the string is empty, then the pattern must be empty too, or contains
 823   // only wildcards.
 824   if (eval == eval_end) {
 825     EatWildcard(&pattern, pattern_end, next);
 826     return pattern == pattern_end;
 827   }
 828
 829   // Pattern is empty but not string, this is not a match.
 830   if (pattern == pattern_end)
 831     return false;
 832
 833   // If this is a question mark, then we need to compare the rest with
 834   // the current string or the string with one character eaten.
 835   const CHAR* next_pattern = pattern;
 836   next(&next_pattern, pattern_end);
 837   if (pattern[0] == '?') {
 838     if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
 839                       depth + 1, next))
 840       return true;
 841     const CHAR* next_eval = eval;
 842     next(&next_eval, eval_end);
 843     if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,
 844                       depth + 1, next))
 845       return true;
 846   }
 847
 848   // This is a *, try to match all the possible substrings with the remainder
 849   // of the pattern.
 850   if (pattern[0] == '*') {
 851     // Collapse duplicate wild cards (********** into *) so that the
 852     // method does not recurse unnecessarily. http://crbug.com/52839
 853     EatWildcard(&next_pattern, pattern_end, next);
 854
 855     while (eval != eval_end) {
 856       if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
 857                         depth + 1, next))
 858         return true;
 859       eval++;
 860     }
 861
 862     // We reached the end of the string, let see if the pattern contains only
 863     // wildcards.
 864     if (eval == eval_end) {
 865       EatWildcard(&pattern, pattern_end, next);
 866       if (pattern != pattern_end)
 867         return false;
 868       return true;
 869     }
 870   }
 871
 872   return false;
 873 }
 874
 875 struct NextCharUTF8 {
 876   base_icu::UChar32 operator()(const char** p, const char* end) {
 877     base_icu::UChar32 c;
 878     int offset = 0;
 879     CBU8_NEXT(*p, offset, end - *p, c);
 880     *p += offset;
 881     return c;
 882   }
 883 };
 884
 885 struct NextCharUTF16 {
 886   base_icu::UChar32 operator()(const char16** p, const char16* end) {
 887     base_icu::UChar32 c;
 888     int offset = 0;
 889     CBU16_NEXT(*p, offset, end - *p, c);
 890     *p += offset;
 891     return c;
 892   }
 893 };
 894
 895 bool MatchPattern(const base::StringPiece& eval,
 896                   const base::StringPiece& pattern) {
 897   return MatchPatternT(eval.data(), eval.data() + eval.size(),
 898                        pattern.data(), pattern.data() + pattern.size(),
 899                        0, NextCharUTF8());
 900 }
 901
 902 bool MatchPattern(const string16& eval, const string16& pattern) {
 903   return MatchPatternT(eval.c_str(), eval.c_str() + eval.size(),
 904                        pattern.c_str(), pattern.c_str() + pattern.size(),
 905                        0, NextCharUTF16());
 906 }
 907
 908 // The following code is compatible with the OpenBSD lcpy interface.  See:
 909 //   http://www.gratisoft.us/todd/papers/strlcpy.html
 910 //   ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
 911
 912 namespace {
 913
 914 template <typename CHAR>
 915 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
 916   for (size_t i = 0; i < dst_size; ++i) {
 917     if ((dst[i] = src[i]) == 0)  // We hit and copied the terminating NULL.
 918       return i;
 919   }
 920
 921   // We were left off at dst_size.  We over copied 1 byte.  Null terminate.
 922   if (dst_size != 0)
 923     dst[dst_size - 1] = 0;
 924
 925   // Count the rest of the |src|, and return it's length in characters.
 926   while (src[dst_size]) ++dst_size;
 927   return dst_size;
 928 }
 929
 930 }  // namespace
 931
 932 size_t base::strlcpy(char* dst, const char* src, size_t dst_size) {
 933   return lcpyT<char>(dst, src, dst_size);
 934 }
 935 size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
 936   return lcpyT<wchar_t>(dst, src, dst_size);
 937 }