base/strings/string_util.cc

   1 // Copyright 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "base/strings/string_util.h"
   6
   7 #include <ctype.h>
   8 #include <errno.h>
   9 #include <math.h>
  10 #include <stdarg.h>
  11 #include <stdio.h>
  12 #include <stdlib.h>
  13 #include <string.h>
  14 #include <time.h>
  15 #include <wchar.h>
  16 #include <wctype.h>
  17
  18 #include <algorithm>
  19 #include <vector>
  20
  21 #include "base/basictypes.h"
  22 #include "base/logging.h"
  23 #include "base/memory/singleton.h"
  24 #include "base/strings/string_split.h"
  25 #include "base/strings/utf_string_conversion_utils.h"
  26 #include "base/strings/utf_string_conversions.h"
  27 #include "base/third_party/icu/icu_utf.h"
  28 #include "build/build_config.h"
  29
  30 namespace base {
  31
  32 namespace {
  33
  34 // Force the singleton used by EmptyString[16] to be a unique type. This
  35 // prevents other code that might accidentally use Singleton<string> from
  36 // getting our internal one.
  37 struct EmptyStrings {
  38   EmptyStrings() {}
  39   const std::string s;
  40   const string16 s16;
  41
  42   static EmptyStrings* GetInstance() {
  43     return Singleton<EmptyStrings>::get();
  44   }
  45 };
  46
  47 // Used by ReplaceStringPlaceholders to track the position in the string of
  48 // replaced parameters.
  49 struct ReplacementOffset {
  50   ReplacementOffset(uintptr_t parameter, size_t offset)
  51       : parameter(parameter),
  52         offset(offset) {}
  53
  54   // Index of the parameter.
  55   uintptr_t parameter;
  56
  57   // Starting position in the string.
  58   size_t offset;
  59 };
  60
  61 static bool CompareParameter(const ReplacementOffset& elem1,
  62                              const ReplacementOffset& elem2) {
  63   return elem1.parameter < elem2.parameter;
  64 }
  65
  66 // Assuming that a pointer is the size of a "machine word", then
  67 // uintptr_t is an integer type that is also a machine word.
  68 typedef uintptr_t MachineWord;
  69 const uintptr_t kMachineWordAlignmentMask = sizeof(MachineWord) - 1;
  70
  71 inline bool IsAlignedToMachineWord(const void* pointer) {
  72   return !(reinterpret_cast<MachineWord>(pointer) & kMachineWordAlignmentMask);
  73 }
  74
  75 template<typename T> inline T* AlignToMachineWord(T* pointer) {
  76   return reinterpret_cast<T*>(reinterpret_cast<MachineWord>(pointer) &
  77                               ~kMachineWordAlignmentMask);
  78 }
  79
  80 template<size_t size, typename CharacterType> struct NonASCIIMask;
  81 template<> struct NonASCIIMask<4, char16> {
  82     static inline uint32_t value() { return 0xFF80FF80U; }
  83 };
  84 template<> struct NonASCIIMask<4, char> {
  85     static inline uint32_t value() { return 0x80808080U; }
  86 };
  87 template<> struct NonASCIIMask<8, char16> {
  88     static inline uint64_t value() { return 0xFF80FF80FF80FF80ULL; }
  89 };
  90 template<> struct NonASCIIMask<8, char> {
  91     static inline uint64_t value() { return 0x8080808080808080ULL; }
  92 };
  93 #if defined(WCHAR_T_IS_UTF32)
  94 template<> struct NonASCIIMask<4, wchar_t> {
  95     static inline uint32_t value() { return 0xFFFFFF80U; }
  96 };
  97 template<> struct NonASCIIMask<8, wchar_t> {
  98     static inline uint64_t value() { return 0xFFFFFF80FFFFFF80ULL; }
  99 };
 100 #endif  // WCHAR_T_IS_UTF32
 101
 102 // DO NOT USE. http://crbug.com/24917
 103 //
 104 // tolower() will given incorrect results for non-ASCII characters. Use the
 105 // ASCII version, base::i18n::ToLower, or base::i18n::FoldCase. This is here
 106 // for backwards-compat for StartsWith until such calls can be updated.
 107 struct CaseInsensitiveCompareDeprecated {
 108  public:
 109   bool operator()(char16 x, char16 y) const {
 110     return tolower(x) == tolower(y);
 111   }
 112 };
 113
 114 }  // namespace
 115
 116 bool IsWprintfFormatPortable(const wchar_t* format) {
 117   for (const wchar_t* position = format; *position != '\0'; ++position) {
 118     if (*position == '%') {
 119       bool in_specification = true;
 120       bool modifier_l = false;
 121       while (in_specification) {
 122         // Eat up characters until reaching a known specifier.
 123         if (*++position == '\0') {
 124           // The format string ended in the middle of a specification.  Call
 125           // it portable because no unportable specifications were found.  The
 126           // string is equally broken on all platforms.
 127           return true;
 128         }
 129
 130         if (*position == 'l') {
 131           // 'l' is the only thing that can save the 's' and 'c' specifiers.
 132           modifier_l = true;
 133         } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
 134                    *position == 'S' || *position == 'C' || *position == 'F' ||
 135                    *position == 'D' || *position == 'O' || *position == 'U') {
 136           // Not portable.
 137           return false;
 138         }
 139
 140         if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
 141           // Portable, keep scanning the rest of the format string.
 142           in_specification = false;
 143         }
 144       }
 145     }
 146   }
 147
 148   return true;
 149 }
 150
 151 namespace {
 152
 153 template<typename StringType>
 154 StringType ToLowerASCIIImpl(BasicStringPiece<StringType> str) {
 155   StringType ret;
 156   ret.reserve(str.size());
 157   for (size_t i = 0; i < str.size(); i++)
 158     ret.push_back(ToLowerASCII(str[i]));
 159   return ret;
 160 }
 161
 162 template<typename StringType>
 163 StringType ToUpperASCIIImpl(BasicStringPiece<StringType> str) {
 164   StringType ret;
 165   ret.reserve(str.size());
 166   for (size_t i = 0; i < str.size(); i++)
 167     ret.push_back(ToUpperASCII(str[i]));
 168   return ret;
 169 }
 170
 171 }  // namespace
 172
 173 std::string ToLowerASCII(StringPiece str) {
 174   return ToLowerASCIIImpl<std::string>(str);
 175 }
 176
 177 string16 ToLowerASCII(StringPiece16 str) {
 178   return ToLowerASCIIImpl<string16>(str);
 179 }
 180
 181 std::string ToUpperASCII(StringPiece str) {
 182   return ToUpperASCIIImpl<std::string>(str);
 183 }
 184
 185 string16 ToUpperASCII(StringPiece16 str) {
 186   return ToUpperASCIIImpl<string16>(str);
 187 }
 188
 189 template<class StringType>
 190 int CompareCaseInsensitiveASCIIT(BasicStringPiece<StringType> a,
 191                                  BasicStringPiece<StringType> b) {
 192   // Find the first characters that aren't equal and compare them.  If the end
 193   // of one of the strings is found before a nonequal character, the lengths
 194   // of the strings are compared.
 195   size_t i = 0;
 196   while (i < a.length() && i < b.length()) {
 197     typename StringType::value_type lower_a = ToLowerASCII(a[i]);
 198     typename StringType::value_type lower_b = ToLowerASCII(b[i]);
 199     if (lower_a < lower_b)
 200       return -1;
 201     if (lower_a > lower_b)
 202       return 1;
 203     i++;
 204   }
 205
 206   // End of one string hit before finding a different character. Expect the
 207   // common case to be "strings equal" at this point so check that first.
 208   if (a.length() == b.length())
 209     return 0;
 210
 211   if (a.length() < b.length())
 212     return -1;
 213   return 1;
 214 }
 215
 216 int CompareCaseInsensitiveASCII(StringPiece a, StringPiece b) {
 217   return CompareCaseInsensitiveASCIIT<std::string>(a, b);
 218 }
 219
 220 int CompareCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b) {
 221   return CompareCaseInsensitiveASCIIT<string16>(a, b);
 222 }
 223
 224 bool EqualsCaseInsensitiveASCII(StringPiece a, StringPiece b) {
 225   if (a.length() != b.length())
 226     return false;
 227   return CompareCaseInsensitiveASCIIT<std::string>(a, b) == 0;
 228 }
 229
 230 bool EqualsCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b) {
 231   if (a.length() != b.length())
 232     return false;
 233   return CompareCaseInsensitiveASCIIT<string16>(a, b) == 0;
 234 }
 235
 236 const std::string& EmptyString() {
 237   return EmptyStrings::GetInstance()->s;
 238 }
 239
 240 const string16& EmptyString16() {
 241   return EmptyStrings::GetInstance()->s16;
 242 }
 243
 244 template<typename STR>
 245 bool ReplaceCharsT(const STR& input,
 246                    const STR& replace_chars,
 247                    const STR& replace_with,
 248                    STR* output) {
 249   bool removed = false;
 250   size_t replace_length = replace_with.length();
 251
 252   *output = input;
 253
 254   size_t found = output->find_first_of(replace_chars);
 255   while (found != STR::npos) {
 256     removed = true;
 257     output->replace(found, 1, replace_with);
 258     found = output->find_first_of(replace_chars, found + replace_length);
 259   }
 260
 261   return removed;
 262 }
 263
 264 bool ReplaceChars(const string16& input,
 265                   const StringPiece16& replace_chars,
 266                   const string16& replace_with,
 267                   string16* output) {
 268   return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output);
 269 }
 270
 271 bool ReplaceChars(const std::string& input,
 272                   const StringPiece& replace_chars,
 273                   const std::string& replace_with,
 274                   std::string* output) {
 275   return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output);
 276 }
 277
 278 bool RemoveChars(const string16& input,
 279                  const StringPiece16& remove_chars,
 280                  string16* output) {
 281   return ReplaceChars(input, remove_chars.as_string(), string16(), output);
 282 }
 283
 284 bool RemoveChars(const std::string& input,
 285                  const StringPiece& remove_chars,
 286                  std::string* output) {
 287   return ReplaceChars(input, remove_chars.as_string(), std::string(), output);
 288 }
 289
 290 template<typename Str>
 291 TrimPositions TrimStringT(const Str& input,
 292                           BasicStringPiece<Str> trim_chars,
 293                           TrimPositions positions,
 294                           Str* output) {
 295   // Find the edges of leading/trailing whitespace as desired. Need to use
 296   // a StringPiece version of input to be able to call find* on it with the
 297   // StringPiece version of trim_chars (normally the trim_chars will be a
 298   // constant so avoid making a copy).
 299   BasicStringPiece<Str> input_piece(input);
 300   const size_t last_char = input.length() - 1;
 301   const size_t first_good_char = (positions & TRIM_LEADING) ?
 302       input_piece.find_first_not_of(trim_chars) : 0;
 303   const size_t last_good_char = (positions & TRIM_TRAILING) ?
 304       input_piece.find_last_not_of(trim_chars) : last_char;
 305
 306   // When the string was all trimmed, report that we stripped off characters
 307   // from whichever position the caller was interested in. For empty input, we
 308   // stripped no characters, but we still need to clear |output|.
 309   if (input.empty() ||
 310       (first_good_char == Str::npos) || (last_good_char == Str::npos)) {
 311     bool input_was_empty = input.empty();  // in case output == &input
 312     output->clear();
 313     return input_was_empty ? TRIM_NONE : positions;
 314   }
 315
 316   // Trim.
 317   *output =
 318       input.substr(first_good_char, last_good_char - first_good_char + 1);
 319
 320   // Return where we trimmed from.
 321   return static_cast<TrimPositions>(
 322       ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) |
 323       ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));
 324 }
 325
 326 bool TrimString(const string16& input,
 327                 StringPiece16 trim_chars,
 328                 string16* output) {
 329   return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
 330 }
 331
 332 bool TrimString(const std::string& input,
 333                 StringPiece trim_chars,
 334                 std::string* output) {
 335   return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
 336 }
 337
 338 template<typename Str>
 339 BasicStringPiece<Str> TrimStringPieceT(BasicStringPiece<Str> input,
 340                                        BasicStringPiece<Str> trim_chars,
 341                                        TrimPositions positions) {
 342   size_t begin = (positions & TRIM_LEADING) ?
 343       input.find_first_not_of(trim_chars) : 0;
 344   size_t end = (positions & TRIM_TRAILING) ?
 345       input.find_last_not_of(trim_chars) + 1 : input.size();
 346   return input.substr(begin, end - begin);
 347 }
 348
 349 StringPiece16 TrimString(StringPiece16 input,
 350                          const StringPiece16& trim_chars,
 351                          TrimPositions positions) {
 352   return TrimStringPieceT(input, trim_chars, positions);
 353 }
 354
 355 StringPiece TrimString(StringPiece input,
 356                        const StringPiece& trim_chars,
 357                        TrimPositions positions) {
 358   return TrimStringPieceT(input, trim_chars, positions);
 359 }
 360
 361 void TruncateUTF8ToByteSize(const std::string& input,
 362                             const size_t byte_size,
 363                             std::string* output) {
 364   DCHECK(output);
 365   if (byte_size > input.length()) {
 366     *output = input;
 367     return;
 368   }
 369   DCHECK_LE(byte_size, static_cast<uint32>(kint32max));
 370   // Note: This cast is necessary because CBU8_NEXT uses int32s.
 371   int32 truncation_length = static_cast<int32>(byte_size);
 372   int32 char_index = truncation_length - 1;
 373   const char* data = input.data();
 374
 375   // Using CBU8, we will move backwards from the truncation point
 376   // to the beginning of the string looking for a valid UTF8
 377   // character.  Once a full UTF8 character is found, we will
 378   // truncate the string to the end of that character.
 379   while (char_index >= 0) {
 380     int32 prev = char_index;
 381     base_icu::UChar32 code_point = 0;
 382     CBU8_NEXT(data, char_index, truncation_length, code_point);
 383     if (!IsValidCharacter(code_point) ||
 384         !IsValidCodepoint(code_point)) {
 385       char_index = prev - 1;
 386     } else {
 387       break;
 388     }
 389   }
 390
 391   if (char_index >= 0 )
 392     *output = input.substr(0, char_index);
 393   else
 394     output->clear();
 395 }
 396
 397 TrimPositions TrimWhitespace(const string16& input,
 398                              TrimPositions positions,
 399                              string16* output) {
 400   return TrimStringT(input, StringPiece16(kWhitespaceUTF16), positions, output);
 401 }
 402
 403 StringPiece16 TrimWhitespace(StringPiece16 input,
 404                              TrimPositions positions) {
 405   return TrimStringPieceT(input, StringPiece16(kWhitespaceUTF16), positions);
 406 }
 407
 408 TrimPositions TrimWhitespaceASCII(const std::string& input,
 409                                   TrimPositions positions,
 410                                   std::string* output) {
 411   return TrimStringT(input, StringPiece(kWhitespaceASCII), positions, output);
 412 }
 413
 414 StringPiece TrimWhitespaceASCII(StringPiece input, TrimPositions positions) {
 415   return TrimStringPieceT(input, StringPiece(kWhitespaceASCII), positions);
 416 }
 417
 418 // This function is only for backward-compatibility.
 419 // To be removed when all callers are updated.
 420 TrimPositions TrimWhitespace(const std::string& input,
 421                              TrimPositions positions,
 422                              std::string* output) {
 423   return TrimWhitespaceASCII(input, positions, output);
 424 }
 425
 426 template<typename STR>
 427 STR CollapseWhitespaceT(const STR& text,
 428                         bool trim_sequences_with_line_breaks) {
 429   STR result;
 430   result.resize(text.size());
 431
 432   // Set flags to pretend we're already in a trimmed whitespace sequence, so we
 433   // will trim any leading whitespace.
 434   bool in_whitespace = true;
 435   bool already_trimmed = true;
 436
 437   int chars_written = 0;
 438   for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) {
 439     if (IsUnicodeWhitespace(*i)) {
 440       if (!in_whitespace) {
 441         // Reduce all whitespace sequences to a single space.
 442         in_whitespace = true;
 443         result[chars_written++] = L' ';
 444       }
 445       if (trim_sequences_with_line_breaks && !already_trimmed &&
 446           ((*i == '\n') || (*i == '\r'))) {
 447         // Whitespace sequences containing CR or LF are eliminated entirely.
 448         already_trimmed = true;
 449         --chars_written;
 450       }
 451     } else {
 452       // Non-whitespace chracters are copied straight across.
 453       in_whitespace = false;
 454       already_trimmed = false;
 455       result[chars_written++] = *i;
 456     }
 457   }
 458
 459   if (in_whitespace && !already_trimmed) {
 460     // Any trailing whitespace is eliminated.
 461     --chars_written;
 462   }
 463
 464   result.resize(chars_written);
 465   return result;
 466 }
 467
 468 string16 CollapseWhitespace(const string16& text,
 469                             bool trim_sequences_with_line_breaks) {
 470   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
 471 }
 472
 473 std::string CollapseWhitespaceASCII(const std::string& text,
 474                                     bool trim_sequences_with_line_breaks) {
 475   return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
 476 }
 477
 478 bool ContainsOnlyChars(const StringPiece& input,
 479                        const StringPiece& characters) {
 480   return input.find_first_not_of(characters) == StringPiece::npos;
 481 }
 482
 483 bool ContainsOnlyChars(const StringPiece16& input,
 484                        const StringPiece16& characters) {
 485   return input.find_first_not_of(characters) == StringPiece16::npos;
 486 }
 487
 488 template <class Char>
 489 inline bool DoIsStringASCII(const Char* characters, size_t length) {
 490   MachineWord all_char_bits = 0;
 491   const Char* end = characters + length;
 492
 493   // Prologue: align the input.
 494   while (!IsAlignedToMachineWord(characters) && characters != end) {
 495     all_char_bits |= *characters;
 496     ++characters;
 497   }
 498
 499   // Compare the values of CPU word size.
 500   const Char* word_end = AlignToMachineWord(end);
 501   const size_t loop_increment = sizeof(MachineWord) / sizeof(Char);
 502   while (characters < word_end) {
 503     all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters));
 504     characters += loop_increment;
 505   }
 506
 507   // Process the remaining bytes.
 508   while (characters != end) {
 509     all_char_bits |= *characters;
 510     ++characters;
 511   }
 512
 513   MachineWord non_ascii_bit_mask =
 514       NonASCIIMask<sizeof(MachineWord), Char>::value();
 515   return !(all_char_bits & non_ascii_bit_mask);
 516 }
 517
 518 bool IsStringASCII(const StringPiece& str) {
 519   return DoIsStringASCII(str.data(), str.length());
 520 }
 521
 522 bool IsStringASCII(const StringPiece16& str) {
 523   return DoIsStringASCII(str.data(), str.length());
 524 }
 525
 526 bool IsStringASCII(const string16& str) {
 527   return DoIsStringASCII(str.data(), str.length());
 528 }
 529
 530 #if defined(WCHAR_T_IS_UTF32)
 531 bool IsStringASCII(const std::wstring& str) {
 532   return DoIsStringASCII(str.data(), str.length());
 533 }
 534 #endif
 535
 536 bool IsStringUTF8(const StringPiece& str) {
 537   const char *src = str.data();
 538   int32 src_len = static_cast<int32>(str.length());
 539   int32 char_index = 0;
 540
 541   while (char_index < src_len) {
 542     int32 code_point;
 543     CBU8_NEXT(src, char_index, src_len, code_point);
 544     if (!IsValidCharacter(code_point))
 545       return false;
 546   }
 547   return true;
 548 }
 549
 550 // Implementation note: Normally this function will be called with a hardcoded
 551 // constant for the lowercase_ascii parameter. Constructing a StringPiece from
 552 // a C constant requires running strlen, so the result will be two passes
 553 // through the buffers, one to file the length of lowercase_ascii, and one to
 554 // compare each letter.
 555 //
 556 // This function could have taken a const char* to avoid this and only do one
 557 // pass through the string. But the strlen is faster than the case-insensitive
 558 // compares and lets us early-exit in the case that the strings are different
 559 // lengths (will often be the case for non-matches). So whether one approach or
 560 // the other will be faster depends on the case.
 561 //
 562 // The hardcoded strings are typically very short so it doesn't matter, and the
 563 // string piece gives additional flexibility for the caller (doesn't have to be
 564 // null terminated) so we choose the StringPiece route.
 565 template<typename Str>
 566 static inline bool DoLowerCaseEqualsASCII(BasicStringPiece<Str> str,
 567                                           StringPiece lowercase_ascii) {
 568   if (str.size() != lowercase_ascii.size())
 569     return false;
 570   for (size_t i = 0; i < str.size(); i++) {
 571     if (ToLowerASCII(str[i]) != lowercase_ascii[i])
 572       return false;
 573   }
 574   return true;
 575 }
 576
 577 bool LowerCaseEqualsASCII(StringPiece str, StringPiece lowercase_ascii) {
 578   return DoLowerCaseEqualsASCII<std::string>(str, lowercase_ascii);
 579 }
 580
 581 bool LowerCaseEqualsASCII(StringPiece16 str, StringPiece lowercase_ascii) {
 582   return DoLowerCaseEqualsASCII<string16>(str, lowercase_ascii);
 583 }
 584
 585 bool EqualsASCII(StringPiece16 str, StringPiece ascii) {
 586   if (str.length() != ascii.length())
 587     return false;
 588   return std::equal(ascii.begin(), ascii.end(), str.begin());
 589 }
 590
 591 template<typename Str>
 592 bool StartsWithT(BasicStringPiece<Str> str,
 593                  BasicStringPiece<Str> search_for,
 594                  CompareCase case_sensitivity) {
 595   if (search_for.size() > str.size())
 596     return false;
 597
 598   BasicStringPiece<Str> source = str.substr(0, search_for.size());
 599
 600   switch (case_sensitivity) {
 601     case CompareCase::SENSITIVE:
 602       return source == search_for;
 603
 604     case CompareCase::INSENSITIVE_ASCII:
 605       return std::equal(
 606           search_for.begin(), search_for.end(),
 607           source.begin(),
 608           CaseInsensitiveCompareASCII<typename Str::value_type>());
 609
 610     default:
 611       NOTREACHED();
 612       return false;
 613   }
 614 }
 615
 616 bool StartsWith(StringPiece str,
 617                 StringPiece search_for,
 618                 CompareCase case_sensitivity) {
 619   return StartsWithT<std::string>(str, search_for, case_sensitivity);
 620 }
 621
 622 bool StartsWith(StringPiece16 str,
 623                 StringPiece16 search_for,
 624                 CompareCase case_sensitivity) {
 625   return StartsWithT<string16>(str, search_for, case_sensitivity);
 626 }
 627
 628 template <typename Str>
 629 bool EndsWithT(BasicStringPiece<Str> str,
 630                BasicStringPiece<Str> search_for,
 631                CompareCase case_sensitivity) {
 632   if (search_for.size() > str.size())
 633     return false;
 634
 635   BasicStringPiece<Str> source = str.substr(str.size() - search_for.size(),
 636                                             search_for.size());
 637
 638   switch (case_sensitivity) {
 639     case CompareCase::SENSITIVE:
 640       return source == search_for;
 641
 642     case CompareCase::INSENSITIVE_ASCII:
 643       return std::equal(
 644           source.begin(), source.end(),
 645           search_for.begin(),
 646           CaseInsensitiveCompareASCII<typename Str::value_type>());
 647
 648     default:
 649       NOTREACHED();
 650       return false;
 651   }
 652 }
 653
 654 bool EndsWith(StringPiece str,
 655               StringPiece search_for,
 656               CompareCase case_sensitivity) {
 657   return EndsWithT<std::string>(str, search_for, case_sensitivity);
 658 }
 659
 660 bool EndsWith(StringPiece16 str,
 661               StringPiece16 search_for,
 662               CompareCase case_sensitivity) {
 663   return EndsWithT<string16>(str, search_for, case_sensitivity);
 664 }
 665
 666 char HexDigitToInt(wchar_t c) {
 667   DCHECK(IsHexDigit(c));
 668   if (c >= '0' && c <= '9')
 669     return static_cast<char>(c - '0');
 670   if (c >= 'A' && c <= 'F')
 671     return static_cast<char>(c - 'A' + 10);
 672   if (c >= 'a' && c <= 'f')
 673     return static_cast<char>(c - 'a' + 10);
 674   return 0;
 675 }
 676
 677 static const char* const kByteStringsUnlocalized[] = {
 678   " B",
 679   " kB",
 680   " MB",
 681   " GB",
 682   " TB",
 683   " PB"
 684 };
 685
 686 string16 FormatBytesUnlocalized(int64 bytes) {
 687   double unit_amount = static_cast<double>(bytes);
 688   size_t dimension = 0;
 689   const int kKilo = 1024;
 690   while (unit_amount >= kKilo &&
 691          dimension < arraysize(kByteStringsUnlocalized) - 1) {
 692     unit_amount /= kKilo;
 693     dimension++;
 694   }
 695
 696   char buf[64];
 697   if (bytes != 0 && dimension > 0 && unit_amount < 100) {
 698     base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount,
 699                    kByteStringsUnlocalized[dimension]);
 700   } else {
 701     base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount,
 702                    kByteStringsUnlocalized[dimension]);
 703   }
 704
 705   return ASCIIToUTF16(buf);
 706 }
 707
 708 // Runs in O(n) time in the length of |str|.
 709 template<class StringType>
 710 void DoReplaceSubstringsAfterOffset(StringType* str,
 711                                     size_t offset,
 712                                     BasicStringPiece<StringType> find_this,
 713                                     BasicStringPiece<StringType> replace_with,
 714                                     bool replace_all) {
 715   DCHECK(!find_this.empty());
 716
 717   // If the find string doesn't appear, there's nothing to do.
 718   offset = str->find(find_this.data(), offset, find_this.size());
 719   if (offset == StringType::npos)
 720     return;
 721
 722   // If we're only replacing one instance, there's no need to do anything
 723   // complicated.
 724   size_t find_length = find_this.length();
 725   if (!replace_all) {
 726     str->replace(offset, find_length, replace_with.data(), replace_with.size());
 727     return;
 728   }
 729
 730   // If the find and replace strings are the same length, we can simply use
 731   // replace() on each instance, and finish the entire operation in O(n) time.
 732   size_t replace_length = replace_with.length();
 733   if (find_length == replace_length) {
 734     do {
 735       str->replace(offset, find_length,
 736                    replace_with.data(), replace_with.size());
 737       offset = str->find(find_this.data(), offset + replace_length,
 738                          find_this.size());
 739     } while (offset != StringType::npos);
 740     return;
 741   }
 742
 743   // Since the find and replace strings aren't the same length, a loop like the
 744   // one above would be O(n^2) in the worst case, as replace() will shift the
 745   // entire remaining string each time.  We need to be more clever to keep
 746   // things O(n).
 747   //
 748   // If we're shortening the string, we can alternate replacements with shifting
 749   // forward the intervening characters using memmove().
 750   size_t str_length = str->length();
 751   if (find_length > replace_length) {
 752     size_t write_offset = offset;
 753     do {
 754       if (replace_length) {
 755         str->replace(write_offset, replace_length,
 756                      replace_with.data(), replace_with.size());
 757         write_offset += replace_length;
 758       }
 759       size_t read_offset = offset + find_length;
 760       offset = std::min(
 761           str->find(find_this.data(), read_offset, find_this.size()),
 762           str_length);
 763       size_t length = offset - read_offset;
 764       if (length) {
 765         memmove(&(*str)[write_offset], &(*str)[read_offset],
 766                 length * sizeof(typename StringType::value_type));
 767         write_offset += length;
 768       }
 769     } while (offset < str_length);
 770     str->resize(write_offset);
 771     return;
 772   }
 773
 774   // We're lengthening the string.  We can use alternating replacements and
 775   // memmove() calls like above, but we need to precalculate the final string
 776   // length and then expand from back-to-front to avoid overwriting the string
 777   // as we're reading it, needing to shift, or having to copy to a second string
 778   // temporarily.
 779   size_t first_match = offset;
 780
 781   // First, calculate the final length and resize the string.
 782   size_t final_length = str_length;
 783   size_t expansion = replace_length - find_length;
 784   size_t current_match;
 785   do {
 786     final_length += expansion;
 787     // Minor optimization: save this offset into |current_match|, so that on
 788     // exit from the loop, |current_match| will point at the last instance of
 789     // the find string, and we won't need to find() it again immediately.
 790     current_match = offset;
 791     offset = str->find(find_this.data(), offset + find_length,
 792                        find_this.size());
 793   } while (offset != StringType::npos);
 794   str->resize(final_length);
 795
 796   // Now do the replacement loop, working backwards through the string.
 797   for (size_t prev_match = str_length, write_offset = final_length; ;
 798        current_match = str->rfind(find_this.data(), current_match - 1,
 799                                   find_this.size())) {
 800     size_t read_offset = current_match + find_length;
 801     size_t length = prev_match - read_offset;
 802     if (length) {
 803       write_offset -= length;
 804       memmove(&(*str)[write_offset], &(*str)[read_offset],
 805               length * sizeof(typename StringType::value_type));
 806     }
 807     write_offset -= replace_length;
 808     str->replace(write_offset, replace_length,
 809                  replace_with.data(), replace_with.size());
 810     if (current_match == first_match)
 811       return;
 812     prev_match = current_match;
 813   }
 814 }
 815
 816 void ReplaceFirstSubstringAfterOffset(string16* str,
 817                                       size_t start_offset,
 818                                       StringPiece16 find_this,
 819                                       StringPiece16 replace_with) {
 820   DoReplaceSubstringsAfterOffset<string16>(
 821       str, start_offset, find_this, replace_with, false);  // Replace first.
 822 }
 823
 824 void ReplaceFirstSubstringAfterOffset(std::string* str,
 825                                       size_t start_offset,
 826                                       StringPiece find_this,
 827                                       StringPiece replace_with) {
 828   DoReplaceSubstringsAfterOffset<std::string>(
 829       str, start_offset, find_this, replace_with, false);  // Replace first.
 830 }
 831
 832 void ReplaceSubstringsAfterOffset(string16* str,
 833                                   size_t start_offset,
 834                                   StringPiece16 find_this,
 835                                   StringPiece16 replace_with) {
 836   DoReplaceSubstringsAfterOffset<string16>(
 837       str, start_offset, find_this, replace_with, true);  // Replace all.
 838 }
 839
 840 void ReplaceSubstringsAfterOffset(std::string* str,
 841                                   size_t start_offset,
 842                                   StringPiece find_this,
 843                                   StringPiece replace_with) {
 844   DoReplaceSubstringsAfterOffset<std::string>(
 845       str, start_offset, find_this, replace_with, true);  // Replace all.
 846 }
 847
 848 template <class string_type>
 849 inline typename string_type::value_type* WriteIntoT(string_type* str,
 850                                                     size_t length_with_null) {
 851   DCHECK_GT(length_with_null, 1u);
 852   str->reserve(length_with_null);
 853   str->resize(length_with_null - 1);
 854   return &((*str)[0]);
 855 }
 856
 857 char* WriteInto(std::string* str, size_t length_with_null) {
 858   return WriteIntoT(str, length_with_null);
 859 }
 860
 861 char16* WriteInto(string16* str, size_t length_with_null) {
 862   return WriteIntoT(str, length_with_null);
 863 }
 864
 865 template<typename STR>
 866 static STR JoinStringT(const std::vector<STR>& parts,
 867                        BasicStringPiece<STR> sep) {
 868   if (parts.empty())
 869     return STR();
 870
 871   STR result(parts[0]);
 872   auto iter = parts.begin();
 873   ++iter;
 874
 875   for (; iter != parts.end(); ++iter) {
 876     sep.AppendToString(&result);
 877     result += *iter;
 878   }
 879
 880   return result;
 881 }
 882
 883 std::string JoinString(const std::vector<std::string>& parts,
 884                        StringPiece separator) {
 885   return JoinStringT(parts, separator);
 886 }
 887
 888 string16 JoinString(const std::vector<string16>& parts,
 889                     StringPiece16 separator) {
 890   return JoinStringT(parts, separator);
 891 }
 892
 893 template<class FormatStringType, class OutStringType>
 894 OutStringType DoReplaceStringPlaceholders(
 895     const FormatStringType& format_string,
 896     const std::vector<OutStringType>& subst,
 897     std::vector<size_t>* offsets) {
 898   size_t substitutions = subst.size();
 899
 900   size_t sub_length = 0;
 901   for (const auto& cur : subst)
 902     sub_length += cur.length();
 903
 904   OutStringType formatted;
 905   formatted.reserve(format_string.length() + sub_length);
 906
 907   std::vector<ReplacementOffset> r_offsets;
 908   for (auto i = format_string.begin(); i != format_string.end(); ++i) {
 909     if ('$' == *i) {
 910       if (i + 1 != format_string.end()) {
 911         ++i;
 912         DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i;
 913         if ('$' == *i) {
 914           while (i != format_string.end() && '$' == *i) {
 915             formatted.push_back('$');
 916             ++i;
 917           }
 918           --i;
 919         } else {
 920           uintptr_t index = 0;
 921           while (i != format_string.end() && '0' <= *i && *i <= '9') {
 922             index *= 10;
 923             index += *i - '0';
 924             ++i;
 925           }
 926           --i;
 927           index -= 1;
 928           if (offsets) {
 929             ReplacementOffset r_offset(index,
 930                 static_cast<int>(formatted.size()));
 931             r_offsets.insert(std::lower_bound(r_offsets.begin(),
 932                                               r_offsets.end(),
 933                                               r_offset,
 934                                               &CompareParameter),
 935                              r_offset);
 936           }
 937           if (index < substitutions)
 938             formatted.append(subst.at(index));
 939         }
 940       }
 941     } else {
 942       formatted.push_back(*i);
 943     }
 944   }
 945   if (offsets) {
 946     for (const auto& cur : r_offsets)
 947       offsets->push_back(cur.offset);
 948   }
 949   return formatted;
 950 }
 951
 952 string16 ReplaceStringPlaceholders(const string16& format_string,
 953                                    const std::vector<string16>& subst,
 954                                    std::vector<size_t>* offsets) {
 955   return DoReplaceStringPlaceholders(format_string, subst, offsets);
 956 }
 957
 958 std::string ReplaceStringPlaceholders(const StringPiece& format_string,
 959                                       const std::vector<std::string>& subst,
 960                                       std::vector<size_t>* offsets) {
 961   return DoReplaceStringPlaceholders(format_string, subst, offsets);
 962 }
 963
 964 string16 ReplaceStringPlaceholders(const string16& format_string,
 965                                    const string16& a,
 966                                    size_t* offset) {
 967   std::vector<size_t> offsets;
 968   std::vector<string16> subst;
 969   subst.push_back(a);
 970   string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);
 971
 972   DCHECK_EQ(1U, offsets.size());
 973   if (offset)
 974     *offset = offsets[0];
 975   return result;
 976 }
 977
 978 // The following code is compatible with the OpenBSD lcpy interface.  See:
 979 //   http://www.gratisoft.us/todd/papers/strlcpy.html
 980 //   ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
 981
 982 namespace {
 983
 984 template <typename CHAR>
 985 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
 986   for (size_t i = 0; i < dst_size; ++i) {
 987     if ((dst[i] = src[i]) == 0)  // We hit and copied the terminating NULL.
 988       return i;
 989   }
 990
 991   // We were left off at dst_size.  We over copied 1 byte.  Null terminate.
 992   if (dst_size != 0)
 993     dst[dst_size - 1] = 0;
 994
 995   // Count the rest of the |src|, and return it's length in characters.
 996   while (src[dst_size]) ++dst_size;
 997   return dst_size;
 998 }
 999
1000 }  // namespace
1001
1002 size_t strlcpy(char* dst, const char* src, size_t dst_size) {
1003   return lcpyT<char>(dst, src, dst_size);
1004 }
1005 size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
1006   return lcpyT<wchar_t>(dst, src, dst_size);
1007 }
1008
1009 }  // namespace base