base/string_util.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "base/string_util.h"
   6
   7 #include "build/build_config.h"
   8
   9 #include <ctype.h>
  10 #include <errno.h>
  11 #include <math.h>
  12 #include <stdarg.h>
  13 #include <stdio.h>
  14 #include <stdlib.h>
  15 #include <string.h>
  16 #include <time.h>
  17 #include <wchar.h>
  18 #include <wctype.h>
  19
  20 #include <algorithm>
  21 #include <vector>
  22
  23 #include "base/basictypes.h"
  24 #include "base/logging.h"
  25 #include "base/memory/singleton.h"
  26 #include "base/strings/utf_string_conversion_utils.h"
  27 #include "base/strings/utf_string_conversions.h"
  28 #include "base/third_party/icu/icu_utf.h"
  29
  30 namespace {
  31
  32 // Force the singleton used by Empty[W]String[16] to be a unique type. This
  33 // prevents other code that might accidentally use Singleton<string> from
  34 // getting our internal one.
  35 struct EmptyStrings {
  36   EmptyStrings() {}
  37   const std::string s;
  38   const std::wstring ws;
  39   const string16 s16;
  40
  41   static EmptyStrings* GetInstance() {
  42     return Singleton<EmptyStrings>::get();
  43   }
  44 };
  45
  46 // Used by ReplaceStringPlaceholders to track the position in the string of
  47 // replaced parameters.
  48 struct ReplacementOffset {
  49   ReplacementOffset(uintptr_t parameter, size_t offset)
  50       : parameter(parameter),
  51         offset(offset) {}
  52
  53   // Index of the parameter.
  54   uintptr_t parameter;
  55
  56   // Starting position in the string.
  57   size_t offset;
  58 };
  59
  60 static bool CompareParameter(const ReplacementOffset& elem1,
  61                              const ReplacementOffset& elem2) {
  62   return elem1.parameter < elem2.parameter;
  63 }
  64
  65 }  // namespace
  66
  67 namespace base {
  68
  69 bool IsWprintfFormatPortable(const wchar_t* format) {
  70   for (const wchar_t* position = format; *position != '\0'; ++position) {
  71     if (*position == '%') {
  72       bool in_specification = true;
  73       bool modifier_l = false;
  74       while (in_specification) {
  75         // Eat up characters until reaching a known specifier.
  76         if (*++position == '\0') {
  77           // The format string ended in the middle of a specification.  Call
  78           // it portable because no unportable specifications were found.  The
  79           // string is equally broken on all platforms.
  80           return true;
  81         }
  82
  83         if (*position == 'l') {
  84           // 'l' is the only thing that can save the 's' and 'c' specifiers.
  85           modifier_l = true;
  86         } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
  87                    *position == 'S' || *position == 'C' || *position == 'F' ||
  88                    *position == 'D' || *position == 'O' || *position == 'U') {
  89           // Not portable.
  90           return false;
  91         }
  92
  93         if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
  94           // Portable, keep scanning the rest of the format string.
  95           in_specification = false;
  96         }
  97       }
  98     }
  99   }
 100
 101   return true;
 102 }
 103
 104 }  // namespace base
 105
 106
 107 const std::string& EmptyString() {
 108   return EmptyStrings::GetInstance()->s;
 109 }
 110
 111 const std::wstring& EmptyWString() {
 112   return EmptyStrings::GetInstance()->ws;
 113 }
 114
 115 const string16& EmptyString16() {
 116   return EmptyStrings::GetInstance()->s16;
 117 }
 118
 119 template<typename STR>
 120 bool ReplaceCharsT(const STR& input,
 121                    const typename STR::value_type replace_chars[],
 122                    const STR& replace_with,
 123                    STR* output) {
 124   bool removed = false;
 125   size_t replace_length = replace_with.length();
 126
 127   *output = input;
 128
 129   size_t found = output->find_first_of(replace_chars);
 130   while (found != STR::npos) {
 131     removed = true;
 132     output->replace(found, 1, replace_with);
 133     found = output->find_first_of(replace_chars, found + replace_length);
 134   }
 135
 136   return removed;
 137 }
 138
 139 bool ReplaceChars(const string16& input,
 140                   const char16 replace_chars[],
 141                   const string16& replace_with,
 142                   string16* output) {
 143   return ReplaceCharsT(input, replace_chars, replace_with, output);
 144 }
 145
 146 bool ReplaceChars(const std::string& input,
 147                   const char replace_chars[],
 148                   const std::string& replace_with,
 149                   std::string* output) {
 150   return ReplaceCharsT(input, replace_chars, replace_with, output);
 151 }
 152
 153 bool RemoveChars(const string16& input,
 154                  const char16 remove_chars[],
 155                  string16* output) {
 156   return ReplaceChars(input, remove_chars, string16(), output);
 157 }
 158
 159 bool RemoveChars(const std::string& input,
 160                  const char remove_chars[],
 161                  std::string* output) {
 162   return ReplaceChars(input, remove_chars, std::string(), output);
 163 }
 164
 165 template<typename STR>
 166 TrimPositions TrimStringT(const STR& input,
 167                           const typename STR::value_type trim_chars[],
 168                           TrimPositions positions,
 169                           STR* output) {
 170   // Find the edges of leading/trailing whitespace as desired.
 171   const typename STR::size_type last_char = input.length() - 1;
 172   const typename STR::size_type first_good_char = (positions & TRIM_LEADING) ?
 173       input.find_first_not_of(trim_chars) : 0;
 174   const typename STR::size_type last_good_char = (positions & TRIM_TRAILING) ?
 175       input.find_last_not_of(trim_chars) : last_char;
 176
 177   // When the string was all whitespace, report that we stripped off whitespace
 178   // from whichever position the caller was interested in.  For empty input, we
 179   // stripped no whitespace, but we still need to clear |output|.
 180   if (input.empty() ||
 181       (first_good_char == STR::npos) || (last_good_char == STR::npos)) {
 182     bool input_was_empty = input.empty();  // in case output == &input
 183     output->clear();
 184     return input_was_empty ? TRIM_NONE : positions;
 185   }
 186
 187   // Trim the whitespace.
 188   *output =
 189       input.substr(first_good_char, last_good_char - first_good_char + 1);
 190
 191   // Return where we trimmed from.
 192   return static_cast<TrimPositions>(
 193       ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) |
 194       ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));
 195 }
 196
 197 bool TrimString(const std::wstring& input,
 198                 const wchar_t trim_chars[],
 199                 std::wstring* output) {
 200   return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
 201 }
 202
 203 #if !defined(WCHAR_T_IS_UTF16)
 204 bool TrimString(const string16& input,
 205                 const char16 trim_chars[],
 206                 string16* output) {
 207   return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
 208 }
 209 #endif
 210
 211 bool TrimString(const std::string& input,
 212                 const char trim_chars[],
 213                 std::string* output) {
 214   return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
 215 }
 216
 217 void TruncateUTF8ToByteSize(const std::string& input,
 218                             const size_t byte_size,
 219                             std::string* output) {
 220   DCHECK(output);
 221   if (byte_size > input.length()) {
 222     *output = input;
 223     return;
 224   }
 225   DCHECK_LE(byte_size, static_cast<uint32>(kint32max));
 226   // Note: This cast is necessary because CBU8_NEXT uses int32s.
 227   int32 truncation_length = static_cast<int32>(byte_size);
 228   int32 char_index = truncation_length - 1;
 229   const char* data = input.data();
 230
 231   // Using CBU8, we will move backwards from the truncation point
 232   // to the beginning of the string looking for a valid UTF8
 233   // character.  Once a full UTF8 character is found, we will
 234   // truncate the string to the end of that character.
 235   while (char_index >= 0) {
 236     int32 prev = char_index;
 237     uint32 code_point = 0;
 238     CBU8_NEXT(data, char_index, truncation_length, code_point);
 239     if (!base::IsValidCharacter(code_point) ||
 240         !base::IsValidCodepoint(code_point)) {
 241       char_index = prev - 1;
 242     } else {
 243       break;
 244     }
 245   }
 246
 247   if (char_index >= 0 )
 248     *output = input.substr(0, char_index);
 249   else
 250     output->clear();
 251 }
 252
 253 TrimPositions TrimWhitespace(const string16& input,
 254                              TrimPositions positions,
 255                              string16* output) {
 256   return TrimStringT(input, kWhitespaceUTF16, positions, output);
 257 }
 258
 259 TrimPositions TrimWhitespaceASCII(const std::string& input,
 260                                   TrimPositions positions,
 261                                   std::string* output) {
 262   return TrimStringT(input, kWhitespaceASCII, positions, output);
 263 }
 264
 265 // This function is only for backward-compatibility.
 266 // To be removed when all callers are updated.
 267 TrimPositions TrimWhitespace(const std::string& input,
 268                              TrimPositions positions,
 269                              std::string* output) {
 270   return TrimWhitespaceASCII(input, positions, output);
 271 }
 272
 273 template<typename STR>
 274 STR CollapseWhitespaceT(const STR& text,
 275                         bool trim_sequences_with_line_breaks) {
 276   STR result;
 277   result.resize(text.size());
 278
 279   // Set flags to pretend we're already in a trimmed whitespace sequence, so we
 280   // will trim any leading whitespace.
 281   bool in_whitespace = true;
 282   bool already_trimmed = true;
 283
 284   int chars_written = 0;
 285   for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) {
 286     if (IsWhitespace(*i)) {
 287       if (!in_whitespace) {
 288         // Reduce all whitespace sequences to a single space.
 289         in_whitespace = true;
 290         result[chars_written++] = L' ';
 291       }
 292       if (trim_sequences_with_line_breaks && !already_trimmed &&
 293           ((*i == '\n') || (*i == '\r'))) {
 294         // Whitespace sequences containing CR or LF are eliminated entirely.
 295         already_trimmed = true;
 296         --chars_written;
 297       }
 298     } else {
 299       // Non-whitespace chracters are copied straight across.
 300       in_whitespace = false;
 301       already_trimmed = false;
 302       result[chars_written++] = *i;
 303     }
 304   }
 305
 306   if (in_whitespace && !already_trimmed) {
 307     // Any trailing whitespace is eliminated.
 308     --chars_written;
 309   }
 310
 311   result.resize(chars_written);
 312   return result;
 313 }
 314
 315 std::wstring CollapseWhitespace(const std::wstring& text,
 316                                 bool trim_sequences_with_line_breaks) {
 317   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
 318 }
 319
 320 #if !defined(WCHAR_T_IS_UTF16)
 321 string16 CollapseWhitespace(const string16& text,
 322                             bool trim_sequences_with_line_breaks) {
 323   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
 324 }
 325 #endif
 326
 327 std::string CollapseWhitespaceASCII(const std::string& text,
 328                                     bool trim_sequences_with_line_breaks) {
 329   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
 330 }
 331
 332 bool ContainsOnlyWhitespaceASCII(const std::string& str) {
 333   for (std::string::const_iterator i(str.begin()); i != str.end(); ++i) {
 334     if (!IsAsciiWhitespace(*i))
 335       return false;
 336   }
 337   return true;
 338 }
 339
 340 bool ContainsOnlyWhitespace(const string16& str) {
 341   return str.find_first_not_of(kWhitespaceUTF16) == string16::npos;
 342 }
 343
 344 template<typename STR>
 345 static bool ContainsOnlyCharsT(const STR& input, const STR& characters) {
 346   for (typename STR::const_iterator iter = input.begin();
 347        iter != input.end(); ++iter) {
 348     if (characters.find(*iter) == STR::npos)
 349       return false;
 350   }
 351   return true;
 352 }
 353
 354 bool ContainsOnlyChars(const std::wstring& input,
 355                        const std::wstring& characters) {
 356   return ContainsOnlyCharsT(input, characters);
 357 }
 358
 359 #if !defined(WCHAR_T_IS_UTF16)
 360 bool ContainsOnlyChars(const string16& input, const string16& characters) {
 361   return ContainsOnlyCharsT(input, characters);
 362 }
 363 #endif
 364
 365 bool ContainsOnlyChars(const std::string& input,
 366                        const std::string& characters) {
 367   return ContainsOnlyCharsT(input, characters);
 368 }
 369
 370 std::string WideToASCII(const std::wstring& wide) {
 371   DCHECK(IsStringASCII(wide)) << wide;
 372   return std::string(wide.begin(), wide.end());
 373 }
 374
 375 std::string UTF16ToASCII(const string16& utf16) {
 376   DCHECK(IsStringASCII(utf16)) << utf16;
 377   return std::string(utf16.begin(), utf16.end());
 378 }
 379
 380 // Latin1 is just the low range of Unicode, so we can copy directly to convert.
 381 bool WideToLatin1(const std::wstring& wide, std::string* latin1) {
 382   std::string output;
 383   output.resize(wide.size());
 384   latin1->clear();
 385   for (size_t i = 0; i < wide.size(); i++) {
 386     if (wide[i] > 255)
 387       return false;
 388     output[i] = static_cast<char>(wide[i]);
 389   }
 390   latin1->swap(output);
 391   return true;
 392 }
 393
 394 template<class STR>
 395 static bool DoIsStringASCII(const STR& str) {
 396   for (size_t i = 0; i < str.length(); i++) {
 397     typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i];
 398     if (c > 0x7F)
 399       return false;
 400   }
 401   return true;
 402 }
 403
 404 bool IsStringASCII(const std::wstring& str) {
 405   return DoIsStringASCII(str);
 406 }
 407
 408 #if !defined(WCHAR_T_IS_UTF16)
 409 bool IsStringASCII(const string16& str) {
 410   return DoIsStringASCII(str);
 411 }
 412 #endif
 413
 414 bool IsStringASCII(const base::StringPiece& str) {
 415   return DoIsStringASCII(str);
 416 }
 417
 418 bool IsStringUTF8(const std::string& str) {
 419   const char *src = str.data();
 420   int32 src_len = static_cast<int32>(str.length());
 421   int32 char_index = 0;
 422
 423   while (char_index < src_len) {
 424     int32 code_point;
 425     CBU8_NEXT(src, char_index, src_len, code_point);
 426     if (!base::IsValidCharacter(code_point))
 427       return false;
 428   }
 429   return true;
 430 }
 431
 432 template<typename Iter>
 433 static inline bool DoLowerCaseEqualsASCII(Iter a_begin,
 434                                           Iter a_end,
 435                                           const char* b) {
 436   for (Iter it = a_begin; it != a_end; ++it, ++b) {
 437     if (!*b || base::ToLowerASCII(*it) != *b)
 438       return false;
 439   }
 440   return *b == 0;
 441 }
 442
 443 // Front-ends for LowerCaseEqualsASCII.
 444 bool LowerCaseEqualsASCII(const std::string& a, const char* b) {
 445   return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
 446 }
 447
 448 bool LowerCaseEqualsASCII(const std::wstring& a, const char* b) {
 449   return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
 450 }
 451
 452 #if !defined(WCHAR_T_IS_UTF16)
 453 bool LowerCaseEqualsASCII(const string16& a, const char* b) {
 454   return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
 455 }
 456 #endif
 457
 458 bool LowerCaseEqualsASCII(std::string::const_iterator a_begin,
 459                           std::string::const_iterator a_end,
 460                           const char* b) {
 461   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
 462 }
 463
 464 bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin,
 465                           std::wstring::const_iterator a_end,
 466                           const char* b) {
 467   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
 468 }
 469
 470 #if !defined(WCHAR_T_IS_UTF16)
 471 bool LowerCaseEqualsASCII(string16::const_iterator a_begin,
 472                           string16::const_iterator a_end,
 473                           const char* b) {
 474   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
 475 }
 476 #endif
 477
 478 // TODO(port): Resolve wchar_t/iterator issues that require OS_ANDROID here.
 479 #if !defined(OS_ANDROID)
 480 bool LowerCaseEqualsASCII(const char* a_begin,
 481                           const char* a_end,
 482                           const char* b) {
 483   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
 484 }
 485
 486 bool LowerCaseEqualsASCII(const wchar_t* a_begin,
 487                           const wchar_t* a_end,
 488                           const char* b) {
 489   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
 490 }
 491
 492 #if !defined(WCHAR_T_IS_UTF16)
 493 bool LowerCaseEqualsASCII(const char16* a_begin,
 494                           const char16* a_end,
 495                           const char* b) {
 496   return DoLowerCaseEqualsASCII(a_begin, a_end, b);
 497 }
 498 #endif
 499
 500 #endif  // !defined(OS_ANDROID)
 501
 502 bool EqualsASCII(const string16& a, const base::StringPiece& b) {
 503   if (a.length() != b.length())
 504     return false;
 505   return std::equal(b.begin(), b.end(), a.begin());
 506 }
 507
 508 bool StartsWithASCII(const std::string& str,
 509                      const std::string& search,
 510                      bool case_sensitive) {
 511   if (case_sensitive)
 512     return str.compare(0, search.length(), search) == 0;
 513   else
 514     return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0;
 515 }
 516
 517 template <typename STR>
 518 bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) {
 519   if (case_sensitive) {
 520     return str.compare(0, search.length(), search) == 0;
 521   } else {
 522     if (search.size() > str.size())
 523       return false;
 524     return std::equal(search.begin(), search.end(), str.begin(),
 525                       base::CaseInsensitiveCompare<typename STR::value_type>());
 526   }
 527 }
 528
 529 bool StartsWith(const std::wstring& str, const std::wstring& search,
 530                 bool case_sensitive) {
 531   return StartsWithT(str, search, case_sensitive);
 532 }
 533
 534 #if !defined(WCHAR_T_IS_UTF16)
 535 bool StartsWith(const string16& str, const string16& search,
 536                 bool case_sensitive) {
 537   return StartsWithT(str, search, case_sensitive);
 538 }
 539 #endif
 540
 541 template <typename STR>
 542 bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) {
 543   typename STR::size_type str_length = str.length();
 544   typename STR::size_type search_length = search.length();
 545   if (search_length > str_length)
 546     return false;
 547   if (case_sensitive) {
 548     return str.compare(str_length - search_length, search_length, search) == 0;
 549   } else {
 550     return std::equal(search.begin(), search.end(),
 551                       str.begin() + (str_length - search_length),
 552                       base::CaseInsensitiveCompare<typename STR::value_type>());
 553   }
 554 }
 555
 556 bool EndsWith(const std::string& str, const std::string& search,
 557               bool case_sensitive) {
 558   return EndsWithT(str, search, case_sensitive);
 559 }
 560
 561 bool EndsWith(const std::wstring& str, const std::wstring& search,
 562               bool case_sensitive) {
 563   return EndsWithT(str, search, case_sensitive);
 564 }
 565
 566 #if !defined(WCHAR_T_IS_UTF16)
 567 bool EndsWith(const string16& str, const string16& search,
 568               bool case_sensitive) {
 569   return EndsWithT(str, search, case_sensitive);
 570 }
 571 #endif
 572
 573 static const char* const kByteStringsUnlocalized[] = {
 574   " B",
 575   " kB",
 576   " MB",
 577   " GB",
 578   " TB",
 579   " PB"
 580 };
 581
 582 string16 FormatBytesUnlocalized(int64 bytes) {
 583   double unit_amount = static_cast<double>(bytes);
 584   size_t dimension = 0;
 585   const int kKilo = 1024;
 586   while (unit_amount >= kKilo &&
 587          dimension < arraysize(kByteStringsUnlocalized) - 1) {
 588     unit_amount /= kKilo;
 589     dimension++;
 590   }
 591
 592   char buf[64];
 593   if (bytes != 0 && dimension > 0 && unit_amount < 100) {
 594     base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount,
 595                    kByteStringsUnlocalized[dimension]);
 596   } else {
 597     base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount,
 598                    kByteStringsUnlocalized[dimension]);
 599   }
 600
 601   return ASCIIToUTF16(buf);
 602 }
 603
 604 template<class StringType>
 605 void DoReplaceSubstringsAfterOffset(StringType* str,
 606                                     typename StringType::size_type start_offset,
 607                                     const StringType& find_this,
 608                                     const StringType& replace_with,
 609                                     bool replace_all) {
 610   if ((start_offset == StringType::npos) || (start_offset >= str->length()))
 611     return;
 612
 613   DCHECK(!find_this.empty());
 614   for (typename StringType::size_type offs(str->find(find_this, start_offset));
 615       offs != StringType::npos; offs = str->find(find_this, offs)) {
 616     str->replace(offs, find_this.length(), replace_with);
 617     offs += replace_with.length();
 618
 619     if (!replace_all)
 620       break;
 621   }
 622 }
 623
 624 void ReplaceFirstSubstringAfterOffset(string16* str,
 625                                       string16::size_type start_offset,
 626                                       const string16& find_this,
 627                                       const string16& replace_with) {
 628   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
 629                                  false);  // replace first instance
 630 }
 631
 632 void ReplaceFirstSubstringAfterOffset(std::string* str,
 633                                       std::string::size_type start_offset,
 634                                       const std::string& find_this,
 635                                       const std::string& replace_with) {
 636   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
 637                                  false);  // replace first instance
 638 }
 639
 640 void ReplaceSubstringsAfterOffset(string16* str,
 641                                   string16::size_type start_offset,
 642                                   const string16& find_this,
 643                                   const string16& replace_with) {
 644   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
 645                                  true);  // replace all instances
 646 }
 647
 648 void ReplaceSubstringsAfterOffset(std::string* str,
 649                                   std::string::size_type start_offset,
 650                                   const std::string& find_this,
 651                                   const std::string& replace_with) {
 652   DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
 653                                  true);  // replace all instances
 654 }
 655
 656
 657 template<typename STR>
 658 static size_t TokenizeT(const STR& str,
 659                         const STR& delimiters,
 660                         std::vector<STR>* tokens) {
 661   tokens->clear();
 662
 663   typename STR::size_type start = str.find_first_not_of(delimiters);
 664   while (start != STR::npos) {
 665     typename STR::size_type end = str.find_first_of(delimiters, start + 1);
 666     if (end == STR::npos) {
 667       tokens->push_back(str.substr(start));
 668       break;
 669     } else {
 670       tokens->push_back(str.substr(start, end - start));
 671       start = str.find_first_not_of(delimiters, end + 1);
 672     }
 673   }
 674
 675   return tokens->size();
 676 }
 677
 678 size_t Tokenize(const std::wstring& str,
 679                 const std::wstring& delimiters,
 680                 std::vector<std::wstring>* tokens) {
 681   return TokenizeT(str, delimiters, tokens);
 682 }
 683
 684 #if !defined(WCHAR_T_IS_UTF16)
 685 size_t Tokenize(const string16& str,
 686                 const string16& delimiters,
 687                 std::vector<string16>* tokens) {
 688   return TokenizeT(str, delimiters, tokens);
 689 }
 690 #endif
 691
 692 size_t Tokenize(const std::string& str,
 693                 const std::string& delimiters,
 694                 std::vector<std::string>* tokens) {
 695   return TokenizeT(str, delimiters, tokens);
 696 }
 697
 698 size_t Tokenize(const base::StringPiece& str,
 699                 const base::StringPiece& delimiters,
 700                 std::vector<base::StringPiece>* tokens) {
 701   return TokenizeT(str, delimiters, tokens);
 702 }
 703
 704 template<typename STR>
 705 static STR JoinStringT(const std::vector<STR>& parts, const STR& sep) {
 706   if (parts.empty())
 707     return STR();
 708
 709   STR result(parts[0]);
 710   typename std::vector<STR>::const_iterator iter = parts.begin();
 711   ++iter;
 712
 713   for (; iter != parts.end(); ++iter) {
 714     result += sep;
 715     result += *iter;
 716   }
 717
 718   return result;
 719 }
 720
 721 std::string JoinString(const std::vector<std::string>& parts, char sep) {
 722   return JoinStringT(parts, std::string(1, sep));
 723 }
 724
 725 string16 JoinString(const std::vector<string16>& parts, char16 sep) {
 726   return JoinStringT(parts, string16(1, sep));
 727 }
 728
 729 std::string JoinString(const std::vector<std::string>& parts,
 730                        const std::string& separator) {
 731   return JoinStringT(parts, separator);
 732 }
 733
 734 string16 JoinString(const std::vector<string16>& parts,
 735                     const string16& separator) {
 736   return JoinStringT(parts, separator);
 737 }
 738
 739 template<class FormatStringType, class OutStringType>
 740 OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string,
 741     const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) {
 742   size_t substitutions = subst.size();
 743
 744   size_t sub_length = 0;
 745   for (typename std::vector<OutStringType>::const_iterator iter = subst.begin();
 746        iter != subst.end(); ++iter) {
 747     sub_length += iter->length();
 748   }
 749
 750   OutStringType formatted;
 751   formatted.reserve(format_string.length() + sub_length);
 752
 753   std::vector<ReplacementOffset> r_offsets;
 754   for (typename FormatStringType::const_iterator i = format_string.begin();
 755        i != format_string.end(); ++i) {
 756     if ('$' == *i) {
 757       if (i + 1 != format_string.end()) {
 758         ++i;
 759         DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i;
 760         if ('$' == *i) {
 761           while (i != format_string.end() && '$' == *i) {
 762             formatted.push_back('$');
 763             ++i;
 764           }
 765           --i;
 766         } else {
 767           uintptr_t index = 0;
 768           while (i != format_string.end() && '0' <= *i && *i <= '9') {
 769             index *= 10;
 770             index += *i - '0';
 771             ++i;
 772           }
 773           --i;
 774           index -= 1;
 775           if (offsets) {
 776             ReplacementOffset r_offset(index,
 777                 static_cast<int>(formatted.size()));
 778             r_offsets.insert(std::lower_bound(r_offsets.begin(),
 779                                               r_offsets.end(),
 780                                               r_offset,
 781                                               &CompareParameter),
 782                              r_offset);
 783           }
 784           if (index < substitutions)
 785             formatted.append(subst.at(index));
 786         }
 787       }
 788     } else {
 789       formatted.push_back(*i);
 790     }
 791   }
 792   if (offsets) {
 793     for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin();
 794          i != r_offsets.end(); ++i) {
 795       offsets->push_back(i->offset);
 796     }
 797   }
 798   return formatted;
 799 }
 800
 801 string16 ReplaceStringPlaceholders(const string16& format_string,
 802                                    const std::vector<string16>& subst,
 803                                    std::vector<size_t>* offsets) {
 804   return DoReplaceStringPlaceholders(format_string, subst, offsets);
 805 }
 806
 807 std::string ReplaceStringPlaceholders(const base::StringPiece& format_string,
 808                                       const std::vector<std::string>& subst,
 809                                       std::vector<size_t>* offsets) {
 810   return DoReplaceStringPlaceholders(format_string, subst, offsets);
 811 }
 812
 813 string16 ReplaceStringPlaceholders(const string16& format_string,
 814                                    const string16& a,
 815                                    size_t* offset) {
 816   std::vector<size_t> offsets;
 817   std::vector<string16> subst;
 818   subst.push_back(a);
 819   string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);
 820
 821   DCHECK(offsets.size() == 1);
 822   if (offset) {
 823     *offset = offsets[0];
 824   }
 825   return result;
 826 }
 827
 828 static bool IsWildcard(base_icu::UChar32 character) {
 829   return character == '*' || character == '?';
 830 }
 831
 832 // Move the strings pointers to the point where they start to differ.
 833 template <typename CHAR, typename NEXT>
 834 static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,
 835                          const CHAR** string, const CHAR* string_end,
 836                          NEXT next) {
 837   const CHAR* escape = NULL;
 838   while (*pattern != pattern_end && *string != string_end) {
 839     if (!escape && IsWildcard(**pattern)) {
 840       // We don't want to match wildcard here, except if it's escaped.
 841       return;
 842     }
 843
 844     // Check if the escapement char is found. If so, skip it and move to the
 845     // next character.
 846     if (!escape && **pattern == '\\') {
 847       escape = *pattern;
 848       next(pattern, pattern_end);
 849       continue;
 850     }
 851
 852     // Check if the chars match, if so, increment the ptrs.
 853     const CHAR* pattern_next = *pattern;
 854     const CHAR* string_next = *string;
 855     base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
 856     if (pattern_char == next(&string_next, string_end) &&
 857         pattern_char != (base_icu::UChar32) CBU_SENTINEL) {
 858       *pattern = pattern_next;
 859       *string = string_next;
 860     } else {
 861       // Uh ho, it did not match, we are done. If the last char was an
 862       // escapement, that means that it was an error to advance the ptr here,
 863       // let's put it back where it was. This also mean that the MatchPattern
 864       // function will return false because if we can't match an escape char
 865       // here, then no one will.
 866       if (escape) {
 867         *pattern = escape;
 868       }
 869       return;
 870     }
 871
 872     escape = NULL;
 873   }
 874 }
 875
 876 template <typename CHAR, typename NEXT>
 877 static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {
 878   while (*pattern != end) {
 879     if (!IsWildcard(**pattern))
 880       return;
 881     next(pattern, end);
 882   }
 883 }
 884
 885 template <typename CHAR, typename NEXT>
 886 static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,
 887                           const CHAR* pattern, const CHAR* pattern_end,
 888                           int depth,
 889                           NEXT next) {
 890   const int kMaxDepth = 16;
 891   if (depth > kMaxDepth)
 892     return false;
 893
 894   // Eat all the matching chars.
 895   EatSameChars(&pattern, pattern_end, &eval, eval_end, next);
 896
 897   // If the string is empty, then the pattern must be empty too, or contains
 898   // only wildcards.
 899   if (eval == eval_end) {
 900     EatWildcard(&pattern, pattern_end, next);
 901     return pattern == pattern_end;
 902   }
 903
 904   // Pattern is empty but not string, this is not a match.
 905   if (pattern == pattern_end)
 906     return false;
 907
 908   // If this is a question mark, then we need to compare the rest with
 909   // the current string or the string with one character eaten.
 910   const CHAR* next_pattern = pattern;
 911   next(&next_pattern, pattern_end);
 912   if (pattern[0] == '?') {
 913     if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
 914                       depth + 1, next))
 915       return true;
 916     const CHAR* next_eval = eval;
 917     next(&next_eval, eval_end);
 918     if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,
 919                       depth + 1, next))
 920       return true;
 921   }
 922
 923   // This is a *, try to match all the possible substrings with the remainder
 924   // of the pattern.
 925   if (pattern[0] == '*') {
 926     // Collapse duplicate wild cards (********** into *) so that the
 927     // method does not recurse unnecessarily. http://crbug.com/52839
 928     EatWildcard(&next_pattern, pattern_end, next);
 929
 930     while (eval != eval_end) {
 931       if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
 932                         depth + 1, next))
 933         return true;
 934       eval++;
 935     }
 936
 937     // We reached the end of the string, let see if the pattern contains only
 938     // wildcards.
 939     if (eval == eval_end) {
 940       EatWildcard(&pattern, pattern_end, next);
 941       if (pattern != pattern_end)
 942         return false;
 943       return true;
 944     }
 945   }
 946
 947   return false;
 948 }
 949
 950 struct NextCharUTF8 {
 951   base_icu::UChar32 operator()(const char** p, const char* end) {
 952     base_icu::UChar32 c;
 953     int offset = 0;
 954     CBU8_NEXT(*p, offset, end - *p, c);
 955     *p += offset;
 956     return c;
 957   }
 958 };
 959
 960 struct NextCharUTF16 {
 961   base_icu::UChar32 operator()(const char16** p, const char16* end) {
 962     base_icu::UChar32 c;
 963     int offset = 0;
 964     CBU16_NEXT(*p, offset, end - *p, c);
 965     *p += offset;
 966     return c;
 967   }
 968 };
 969
 970 bool MatchPattern(const base::StringPiece& eval,
 971                   const base::StringPiece& pattern) {
 972   return MatchPatternT(eval.data(), eval.data() + eval.size(),
 973                        pattern.data(), pattern.data() + pattern.size(),
 974                        0, NextCharUTF8());
 975 }
 976
 977 bool MatchPattern(const string16& eval, const string16& pattern) {
 978   return MatchPatternT(eval.c_str(), eval.c_str() + eval.size(),
 979                        pattern.c_str(), pattern.c_str() + pattern.size(),
 980                        0, NextCharUTF16());
 981 }
 982
 983 // The following code is compatible with the OpenBSD lcpy interface.  See:
 984 //   http://www.gratisoft.us/todd/papers/strlcpy.html
 985 //   ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
 986
 987 namespace {
 988
 989 template <typename CHAR>
 990 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
 991   for (size_t i = 0; i < dst_size; ++i) {
 992     if ((dst[i] = src[i]) == 0)  // We hit and copied the terminating NULL.
 993       return i;
 994   }
 995
 996   // We were left off at dst_size.  We over copied 1 byte.  Null terminate.
 997   if (dst_size != 0)
 998     dst[dst_size - 1] = 0;
 999
1000   // Count the rest of the |src|, and return it's length in characters.
1001   while (src[dst_size]) ++dst_size;
1002   return dst_size;
1003 }
1004
1005 }  // namespace
1006
1007 size_t base::strlcpy(char* dst, const char* src, size_t dst_size) {
1008   return lcpyT<char>(dst, src, dst_size);
1009 }
1010 size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
1011   return lcpyT<wchar_t>(dst, src, dst_size);
1012 }