base/string_util_unittest.cc

   1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include <math.h>
   6 #include <stdarg.h>
   7
   8 #include <limits>
   9 #include <sstream>
  10
  11 #include "base/basictypes.h"
  12 #include "base/string_util.h"
  13 #include "base/utf_string_conversions.h"
  14 #include "testing/gmock/include/gmock/gmock.h"
  15 #include "testing/gtest/include/gtest/gtest.h"
  16
  17 using ::testing::ElementsAre;
  18
  19 namespace base {
  20
  21 namespace {
  22
  23 // Given a null-terminated string of wchar_t with each wchar_t representing
  24 // a UTF-16 code unit, returns a string16 made up of wchar_t's in the input.
  25 // Each wchar_t should be <= 0xFFFF and a non-BMP character (> U+FFFF)
  26 // should be represented as a surrogate pair (two UTF-16 units)
  27 // *even* where wchar_t is 32-bit (Linux and Mac).
  28 //
  29 // This is to help write tests for functions with string16 params until
  30 // the C++ 0x UTF-16 literal is well-supported by compilers.
  31 string16 BuildString16(const wchar_t* s) {
  32 #if defined(WCHAR_T_IS_UTF16)
  33   return string16(s);
  34 #elif defined(WCHAR_T_IS_UTF32)
  35   string16 u16;
  36   while (*s != 0) {
  37     DCHECK_LE(static_cast<unsigned int>(*s), 0xFFFFu);
  38     u16.push_back(*s++);
  39   }
  40   return u16;
  41 #endif
  42 }
  43
  44 }  // namespace
  45
  46 static const struct trim_case {
  47   const wchar_t* input;
  48   const TrimPositions positions;
  49   const wchar_t* output;
  50   const TrimPositions return_value;
  51 } trim_cases[] = {
  52   {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
  53   {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
  54   {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
  55   {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
  56   {L"", TRIM_ALL, L"", TRIM_NONE},
  57   {L"  ", TRIM_LEADING, L"", TRIM_LEADING},
  58   {L"  ", TRIM_TRAILING, L"", TRIM_TRAILING},
  59   {L"  ", TRIM_ALL, L"", TRIM_ALL},
  60   {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
  61   {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
  62 };
  63
  64 static const struct trim_case_ascii {
  65   const char* input;
  66   const TrimPositions positions;
  67   const char* output;
  68   const TrimPositions return_value;
  69 } trim_cases_ascii[] = {
  70   {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
  71   {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
  72   {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
  73   {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
  74   {"", TRIM_ALL, "", TRIM_NONE},
  75   {"  ", TRIM_LEADING, "", TRIM_LEADING},
  76   {"  ", TRIM_TRAILING, "", TRIM_TRAILING},
  77   {"  ", TRIM_ALL, "", TRIM_ALL},
  78   {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
  79 };
  80
  81 namespace {
  82
  83 // Helper used to test TruncateUTF8ToByteSize.
  84 bool Truncated(const std::string& input, const size_t byte_size,
  85                std::string* output) {
  86     size_t prev = input.length();
  87     TruncateUTF8ToByteSize(input, byte_size, output);
  88     return prev != output->length();
  89 }
  90
  91 }  // namespace
  92
  93 TEST(StringUtilTest, TruncateUTF8ToByteSize) {
  94   std::string output;
  95
  96   // Empty strings and invalid byte_size arguments
  97   EXPECT_FALSE(Truncated("", 0, &output));
  98   EXPECT_EQ(output, "");
  99   EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
 100   EXPECT_EQ(output, "");
 101   EXPECT_FALSE(Truncated("\xe1\x80\xbf", -1, &output));
 102   EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
 103
 104   // Testing the truncation of valid UTF8 correctly
 105   EXPECT_TRUE(Truncated("abc", 2, &output));
 106   EXPECT_EQ(output, "ab");
 107   EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
 108   EXPECT_EQ(output.compare("\xc2\x81"), 0);
 109   EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
 110   EXPECT_EQ(output.compare("\xc2\x81"), 0);
 111   EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
 112   EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
 113
 114   {
 115     const char array[] = "\x00\x00\xc2\x81\xc2\x81";
 116     const std::string array_string(array, arraysize(array));
 117     EXPECT_TRUE(Truncated(array_string, 4, &output));
 118     EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
 119   }
 120
 121   {
 122     const char array[] = "\x00\xc2\x81\xc2\x81";
 123     const std::string array_string(array, arraysize(array));
 124     EXPECT_TRUE(Truncated(array_string, 4, &output));
 125     EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
 126   }
 127
 128   // Testing invalid UTF8
 129   EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
 130   EXPECT_EQ(output.compare(""), 0);
 131   EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
 132   EXPECT_EQ(output.compare(""), 0);
 133   EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
 134   EXPECT_EQ(output.compare(""), 0);
 135
 136   // Testing invalid UTF8 mixed with valid UTF8
 137   EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
 138   EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
 139   EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
 140   EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
 141   EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
 142               10, &output));
 143   EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
 144   EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
 145               10, &output));
 146   EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
 147   EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
 148   EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
 149
 150   // Overlong sequences
 151   EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
 152   EXPECT_EQ(output.compare(""), 0);
 153   EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
 154   EXPECT_EQ(output.compare(""), 0);
 155   EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
 156   EXPECT_EQ(output.compare(""), 0);
 157   EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
 158   EXPECT_EQ(output.compare(""), 0);
 159   EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
 160   EXPECT_EQ(output.compare(""), 0);
 161   EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
 162   EXPECT_EQ(output.compare(""), 0);
 163   EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
 164   EXPECT_EQ(output.compare(""), 0);
 165   EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
 166   EXPECT_EQ(output.compare(""), 0);
 167   EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
 168   EXPECT_EQ(output.compare(""), 0);
 169   EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
 170   EXPECT_EQ(output.compare(""), 0);
 171   EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
 172   EXPECT_EQ(output.compare(""), 0);
 173
 174   // Beyond U+10FFFF (the upper limit of Unicode codespace)
 175   EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
 176   EXPECT_EQ(output.compare(""), 0);
 177   EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
 178   EXPECT_EQ(output.compare(""), 0);
 179   EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
 180   EXPECT_EQ(output.compare(""), 0);
 181
 182   // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
 183   EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
 184   EXPECT_EQ(output.compare(""), 0);
 185   EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
 186   EXPECT_EQ(output.compare(""), 0);
 187
 188   {
 189     const char array[] = "\x00\x00\xfe\xff";
 190     const std::string array_string(array, arraysize(array));
 191     EXPECT_TRUE(Truncated(array_string, 4, &output));
 192     EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
 193   }
 194
 195   // Variants on the previous test
 196   {
 197     const char array[] = "\xff\xfe\x00\x00";
 198     const std::string array_string(array, 4);
 199     EXPECT_FALSE(Truncated(array_string, 4, &output));
 200     EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
 201   }
 202   {
 203     const char array[] = "\xff\x00\x00\xfe";
 204     const std::string array_string(array, arraysize(array));
 205     EXPECT_TRUE(Truncated(array_string, 4, &output));
 206     EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
 207   }
 208
 209   // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
 210   EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
 211   EXPECT_EQ(output.compare(""), 0);
 212   EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
 213   EXPECT_EQ(output.compare(""), 0);
 214   EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
 215   EXPECT_EQ(output.compare(""), 0);
 216   EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
 217   EXPECT_EQ(output.compare(""), 0);
 218   EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
 219   EXPECT_EQ(output.compare(""), 0);
 220
 221   // Strings in legacy encodings that are valid in UTF-8, but
 222   // are invalid as UTF-8 in real data.
 223   EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
 224   EXPECT_EQ(output.compare("caf"), 0);
 225   EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
 226   EXPECT_EQ(output.compare(""), 0);
 227   EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
 228   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
 229   EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
 230               &output));
 231   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
 232
 233   // Testing using the same string as input and output.
 234   EXPECT_FALSE(Truncated(output, 4, &output));
 235   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
 236   EXPECT_TRUE(Truncated(output, 3, &output));
 237   EXPECT_EQ(output.compare("\xa7\x41"), 0);
 238
 239   // "abc" with U+201[CD] in windows-125[0-8]
 240   EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
 241   EXPECT_EQ(output.compare("\x93" "abc"), 0);
 242
 243   // U+0639 U+064E U+0644 U+064E in ISO-8859-6
 244   EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
 245   EXPECT_EQ(output.compare(""), 0);
 246
 247   // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
 248   EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
 249   EXPECT_EQ(output.compare(""), 0);
 250 }
 251
 252 TEST(StringUtilTest, TrimWhitespace) {
 253   std::wstring output;  // Allow contents to carry over to next testcase
 254   for (size_t i = 0; i < arraysize(trim_cases); ++i) {
 255     const trim_case& value = trim_cases[i];
 256     EXPECT_EQ(value.return_value,
 257               TrimWhitespace(value.input, value.positions, &output));
 258     EXPECT_EQ(value.output, output);
 259   }
 260
 261   // Test that TrimWhitespace() can take the same string for input and output
 262   output = L"  This is a test \r\n";
 263   EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
 264   EXPECT_EQ(L"This is a test", output);
 265
 266   // Once more, but with a string of whitespace
 267   output = L"  \r\n";
 268   EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
 269   EXPECT_EQ(L"", output);
 270
 271   std::string output_ascii;
 272   for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
 273     const trim_case_ascii& value = trim_cases_ascii[i];
 274     EXPECT_EQ(value.return_value,
 275               TrimWhitespace(value.input, value.positions, &output_ascii));
 276     EXPECT_EQ(value.output, output_ascii);
 277   }
 278 }
 279
 280 static const struct collapse_case {
 281   const wchar_t* input;
 282   const bool trim;
 283   const wchar_t* output;
 284 } collapse_cases[] = {
 285   {L" Google Video ", false, L"Google Video"},
 286   {L"Google Video", false, L"Google Video"},
 287   {L"", false, L""},
 288   {L"  ", false, L""},
 289   {L"\t\rTest String\n", false, L"Test String"},
 290   {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
 291   {L"    Test     \n  \t String    ", false, L"Test String"},
 292   {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
 293   {L"   Test String", false, L"Test String"},
 294   {L"Test String    ", false, L"Test String"},
 295   {L"Test String", false, L"Test String"},
 296   {L"", true, L""},
 297   {L"\n", true, L""},
 298   {L"  \r  ", true, L""},
 299   {L"\nFoo", true, L"Foo"},
 300   {L"\r  Foo  ", true, L"Foo"},
 301   {L" Foo bar ", true, L"Foo bar"},
 302   {L"  \tFoo  bar  \n", true, L"Foo bar"},
 303   {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
 304 };
 305
 306 TEST(StringUtilTest, CollapseWhitespace) {
 307   for (size_t i = 0; i < arraysize(collapse_cases); ++i) {
 308     const collapse_case& value = collapse_cases[i];
 309     EXPECT_EQ(value.output, CollapseWhitespace(value.input, value.trim));
 310   }
 311 }
 312
 313 static const struct collapse_case_ascii {
 314   const char* input;
 315   const bool trim;
 316   const char* output;
 317 } collapse_cases_ascii[] = {
 318   {" Google Video ", false, "Google Video"},
 319   {"Google Video", false, "Google Video"},
 320   {"", false, ""},
 321   {"  ", false, ""},
 322   {"\t\rTest String\n", false, "Test String"},
 323   {"    Test     \n  \t String    ", false, "Test String"},
 324   {"   Test String", false, "Test String"},
 325   {"Test String    ", false, "Test String"},
 326   {"Test String", false, "Test String"},
 327   {"", true, ""},
 328   {"\n", true, ""},
 329   {"  \r  ", true, ""},
 330   {"\nFoo", true, "Foo"},
 331   {"\r  Foo  ", true, "Foo"},
 332   {" Foo bar ", true, "Foo bar"},
 333   {"  \tFoo  bar  \n", true, "Foo bar"},
 334   {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
 335 };
 336
 337 TEST(StringUtilTest, CollapseWhitespaceASCII) {
 338   for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) {
 339     const collapse_case_ascii& value = collapse_cases_ascii[i];
 340     EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
 341   }
 342 }
 343
 344 TEST(StringUtilTest, ContainsOnlyWhitespaceASCII) {
 345   EXPECT_TRUE(ContainsOnlyWhitespaceASCII(""));
 346   EXPECT_TRUE(ContainsOnlyWhitespaceASCII(" "));
 347   EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t"));
 348   EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t \r \n  "));
 349   EXPECT_FALSE(ContainsOnlyWhitespaceASCII("a"));
 350   EXPECT_FALSE(ContainsOnlyWhitespaceASCII("\thello\r \n  "));
 351 }
 352
 353 TEST(StringUtilTest, ContainsOnlyWhitespace) {
 354   EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("")));
 355   EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16(" ")));
 356   EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t")));
 357   EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t \r \n  ")));
 358   EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("a")));
 359   EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("\thello\r \n  ")));
 360 }
 361
 362 TEST(StringUtilTest, IsStringUTF8) {
 363   EXPECT_TRUE(IsStringUTF8("abc"));
 364   EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
 365   EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
 366   EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
 367   EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
 368   EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc"));  // UTF-8 BOM
 369
 370   // surrogate code points
 371   EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
 372   EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
 373   EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
 374
 375   // overlong sequences
 376   EXPECT_FALSE(IsStringUTF8("\xc0\x80"));  // U+0000
 377   EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81"));  // "AB"
 378   EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80"));  // U+0000
 379   EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80"));  // U+0080
 380   EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf"));  // U+07ff
 381   EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D"));  // U+000D
 382   EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91"));  // U+0091
 383   EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80"));  // U+0800
 384   EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf"));  // U+FEFF (BOM)
 385   EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf"));  // U+003F
 386   EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5"));  // U+00A5
 387
 388   // Beyond U+10FFFF (the upper limit of Unicode codespace)
 389   EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80"));  // U+110000
 390   EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf"));  // 5 bytes
 391   EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80"));  // 6 bytes
 392
 393   // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
 394   EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
 395   EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
 396   EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
 397   EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
 398
 399   // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
 400   EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe"));  // U+FFFE)
 401   EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe"));  // U+1FFFE
 402   EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf"));  // U+10FFFF
 403   EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90"));  // U+FDD0
 404   EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf"));  // U+FDEF
 405   // Strings in legacy encodings. We can certainly make up strings
 406   // in a legacy encoding that are valid in UTF-8, but in real data,
 407   // most of them are invalid as UTF-8.
 408   EXPECT_FALSE(IsStringUTF8("caf\xe9"));  // cafe with U+00E9 in ISO-8859-1
 409   EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2"));  // U+AC00, U+AC001 in EUC-KR
 410   EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e"));  // U+4F60 U+597D in Big5
 411   // "abc" with U+201[CD] in windows-125[0-8]
 412   EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
 413   // U+0639 U+064E U+0644 U+064E in ISO-8859-6
 414   EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
 415   // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
 416   EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
 417 }
 418
 419 TEST(StringUtilTest, ConvertASCII) {
 420   static const char* char_cases[] = {
 421     "Google Video",
 422     "Hello, world\n",
 423     "0123ABCDwxyz \a\b\t\r\n!+,.~"
 424   };
 425
 426   static const wchar_t* const wchar_cases[] = {
 427     L"Google Video",
 428     L"Hello, world\n",
 429     L"0123ABCDwxyz \a\b\t\r\n!+,.~"
 430   };
 431
 432   for (size_t i = 0; i < arraysize(char_cases); ++i) {
 433     EXPECT_TRUE(IsStringASCII(char_cases[i]));
 434     std::wstring wide = ASCIIToWide(char_cases[i]);
 435     EXPECT_EQ(wchar_cases[i], wide);
 436
 437     EXPECT_TRUE(IsStringASCII(wchar_cases[i]));
 438     std::string ascii = WideToASCII(wchar_cases[i]);
 439     EXPECT_EQ(char_cases[i], ascii);
 440   }
 441
 442   EXPECT_FALSE(IsStringASCII("Google \x80Video"));
 443   EXPECT_FALSE(IsStringASCII(L"Google \x80Video"));
 444
 445   // Convert empty strings.
 446   std::wstring wempty;
 447   std::string empty;
 448   EXPECT_EQ(empty, WideToASCII(wempty));
 449   EXPECT_EQ(wempty, ASCIIToWide(empty));
 450
 451   // Convert strings with an embedded NUL character.
 452   const char chars_with_nul[] = "test\0string";
 453   const int length_with_nul = arraysize(chars_with_nul) - 1;
 454   std::string string_with_nul(chars_with_nul, length_with_nul);
 455   std::wstring wide_with_nul = ASCIIToWide(string_with_nul);
 456   EXPECT_EQ(static_cast<std::wstring::size_type>(length_with_nul),
 457             wide_with_nul.length());
 458   std::string narrow_with_nul = WideToASCII(wide_with_nul);
 459   EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
 460             narrow_with_nul.length());
 461   EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
 462 }
 463
 464 TEST(StringUtilTest, ToUpperASCII) {
 465   EXPECT_EQ('C', ToUpperASCII('C'));
 466   EXPECT_EQ('C', ToUpperASCII('c'));
 467   EXPECT_EQ('2', ToUpperASCII('2'));
 468
 469   EXPECT_EQ(L'C', ToUpperASCII(L'C'));
 470   EXPECT_EQ(L'C', ToUpperASCII(L'c'));
 471   EXPECT_EQ(L'2', ToUpperASCII(L'2'));
 472
 473   std::string in_place_a("Cc2");
 474   StringToUpperASCII(&in_place_a);
 475   EXPECT_EQ("CC2", in_place_a);
 476
 477   std::wstring in_place_w(L"Cc2");
 478   StringToUpperASCII(&in_place_w);
 479   EXPECT_EQ(L"CC2", in_place_w);
 480
 481   std::string original_a("Cc2");
 482   std::string upper_a = StringToUpperASCII(original_a);
 483   EXPECT_EQ("CC2", upper_a);
 484
 485   std::wstring original_w(L"Cc2");
 486   std::wstring upper_w = StringToUpperASCII(original_w);
 487   EXPECT_EQ(L"CC2", upper_w);
 488 }
 489
 490 static const struct {
 491   const wchar_t* src_w;
 492   const char*    src_a;
 493   const char*    dst;
 494 } lowercase_cases[] = {
 495   {L"FoO", "FoO", "foo"},
 496   {L"foo", "foo", "foo"},
 497   {L"FOO", "FOO", "foo"},
 498 };
 499
 500 TEST(StringUtilTest, LowerCaseEqualsASCII) {
 501   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(lowercase_cases); ++i) {
 502     EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_w,
 503                                      lowercase_cases[i].dst));
 504     EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,
 505                                      lowercase_cases[i].dst));
 506   }
 507 }
 508
 509 TEST(StringUtilTest, GetByteDisplayUnits) {
 510   static const struct {
 511     int64 bytes;
 512     DataUnits expected;
 513   } cases[] = {
 514     {0, DATA_UNITS_BYTE},
 515     {512, DATA_UNITS_BYTE},
 516     {10*1024, DATA_UNITS_KIBIBYTE},
 517     {10*1024*1024, DATA_UNITS_MEBIBYTE},
 518     {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE},
 519     {~(1LL<<63), DATA_UNITS_GIBIBYTE},
 520 #ifdef NDEBUG
 521     {-1, DATA_UNITS_BYTE},
 522 #endif
 523   };
 524
 525   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
 526     EXPECT_EQ(cases[i].expected, GetByteDisplayUnits(cases[i].bytes));
 527 }
 528
 529 TEST(StringUtilTest, FormatBytes) {
 530   static const struct {
 531     int64 bytes;
 532     DataUnits units;
 533     const wchar_t* expected;
 534     const wchar_t* expected_with_units;
 535   } cases[] = {
 536     // Expected behavior: we show one post-decimal digit when we have
 537     // under two pre-decimal digits, except in cases where it makes no
 538     // sense (zero or bytes).
 539     // Since we switch units once we cross the 1000 mark, this keeps
 540     // the display of file sizes or bytes consistently around three
 541     // digits.
 542     {0, DATA_UNITS_BYTE, L"0", L"0 B"},
 543     {512, DATA_UNITS_BYTE, L"512", L"512 B"},
 544     {512, DATA_UNITS_KIBIBYTE, L"0.5", L"0.5 kB"},
 545     {1024*1024, DATA_UNITS_KIBIBYTE, L"1024", L"1024 kB"},
 546     {1024*1024, DATA_UNITS_MEBIBYTE, L"1.0", L"1.0 MB"},
 547     {1024*1024*1024, DATA_UNITS_GIBIBYTE, L"1.0", L"1.0 GB"},
 548     {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, L"10.0", L"10.0 GB"},
 549     {99LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, L"99.0", L"99.0 GB"},
 550     {105LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, L"105", L"105 GB"},
 551     {105LL*1024*1024*1024 + 500LL*1024*1024, DATA_UNITS_GIBIBYTE,
 552      L"105", L"105 GB"},
 553     {~(1LL<<63), DATA_UNITS_GIBIBYTE, L"8589934592", L"8589934592 GB"},
 554
 555     {99*1024 + 103, DATA_UNITS_KIBIBYTE, L"99.1", L"99.1 kB"},
 556     {1024*1024 + 103, DATA_UNITS_KIBIBYTE, L"1024", L"1024 kB"},
 557     {1024*1024 + 205 * 1024, DATA_UNITS_MEBIBYTE, L"1.2", L"1.2 MB"},
 558     {1024*1024*1024 + (927 * 1024*1024), DATA_UNITS_GIBIBYTE,
 559      L"1.9", L"1.9 GB"},
 560     {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, L"10.0", L"10.0 GB"},
 561     {100LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, L"100", L"100 GB"},
 562 #ifdef NDEBUG
 563     {-1, DATA_UNITS_BYTE, L"", L""},
 564 #endif
 565   };
 566
 567   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
 568     EXPECT_EQ(cases[i].expected,
 569               FormatBytes(cases[i].bytes, cases[i].units, false));
 570     EXPECT_EQ(cases[i].expected_with_units,
 571               FormatBytes(cases[i].bytes, cases[i].units, true));
 572   }
 573 }
 574
 575 TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
 576   static const struct {
 577     const char* str;
 578     string16::size_type start_offset;
 579     const char* find_this;
 580     const char* replace_with;
 581     const char* expected;
 582   } cases[] = {
 583     {"aaa", 0, "a", "b", "bbb"},
 584     {"abb", 0, "ab", "a", "ab"},
 585     {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
 586     {"Not found", 0, "x", "0", "Not found"},
 587     {"Not found again", 5, "x", "0", "Not found again"},
 588     {" Making it much longer ", 0, " ", "Four score and seven years ago",
 589      "Four score and seven years agoMakingFour score and seven years agoit"
 590      "Four score and seven years agomuchFour score and seven years agolonger"
 591      "Four score and seven years ago"},
 592     {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
 593     {"Replace me only me once", 9, "me ", "", "Replace me only once"},
 594     {"abababab", 2, "ab", "c", "abccc"},
 595   };
 596
 597   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
 598     string16 str = ASCIIToUTF16(cases[i].str);
 599     ReplaceSubstringsAfterOffset(&str, cases[i].start_offset,
 600                                  ASCIIToUTF16(cases[i].find_this),
 601                                  ASCIIToUTF16(cases[i].replace_with));
 602     EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
 603   }
 604 }
 605
 606 TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
 607   static const struct {
 608     const char* str;
 609     string16::size_type start_offset;
 610     const char* find_this;
 611     const char* replace_with;
 612     const char* expected;
 613   } cases[] = {
 614     {"aaa", 0, "a", "b", "baa"},
 615     {"abb", 0, "ab", "a", "ab"},
 616     {"Removing some substrings inging", 0, "ing", "",
 617       "Remov some substrings inging"},
 618     {"Not found", 0, "x", "0", "Not found"},
 619     {"Not found again", 5, "x", "0", "Not found again"},
 620     {" Making it much longer ", 0, " ", "Four score and seven years ago",
 621      "Four score and seven years agoMaking it much longer "},
 622     {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
 623     {"Replace me only me once", 4, "me ", "", "Replace only me once"},
 624     {"abababab", 2, "ab", "c", "abcabab"},
 625   };
 626
 627   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
 628     string16 str = ASCIIToUTF16(cases[i].str);
 629     ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset,
 630                                      ASCIIToUTF16(cases[i].find_this),
 631                                      ASCIIToUTF16(cases[i].replace_with));
 632     EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
 633   }
 634 }
 635
 636 namespace {
 637
 638 template <typename INT>
 639 struct IntToStringTest {
 640   INT num;
 641   const char* sexpected;
 642   const char* uexpected;
 643 };
 644
 645 }  // namespace
 646
 647 TEST(StringUtilTest, IntToString) {
 648   static const IntToStringTest<int> int_tests[] = {
 649       { 0, "0", "0" },
 650       { -1, "-1", "4294967295" },
 651       { std::numeric_limits<int>::max(), "2147483647", "2147483647" },
 652       { std::numeric_limits<int>::min(), "-2147483648", "2147483648" },
 653   };
 654   static const IntToStringTest<int64> int64_tests[] = {
 655       { 0, "0", "0" },
 656       { -1, "-1", "18446744073709551615" },
 657       { std::numeric_limits<int64>::max(),
 658         "9223372036854775807",
 659         "9223372036854775807", },
 660       { std::numeric_limits<int64>::min(),
 661         "-9223372036854775808",
 662         "9223372036854775808" },
 663   };
 664
 665   for (size_t i = 0; i < arraysize(int_tests); ++i) {
 666     const IntToStringTest<int>* test = &int_tests[i];
 667     EXPECT_EQ(IntToString(test->num), test->sexpected);
 668     EXPECT_EQ(IntToWString(test->num), UTF8ToWide(test->sexpected));
 669     EXPECT_EQ(UintToString(test->num), test->uexpected);
 670     EXPECT_EQ(UintToWString(test->num), UTF8ToWide(test->uexpected));
 671   }
 672   for (size_t i = 0; i < arraysize(int64_tests); ++i) {
 673     const IntToStringTest<int64>* test = &int64_tests[i];
 674     EXPECT_EQ(Int64ToString(test->num), test->sexpected);
 675     EXPECT_EQ(Int64ToWString(test->num), UTF8ToWide(test->sexpected));
 676     EXPECT_EQ(Uint64ToString(test->num), test->uexpected);
 677     EXPECT_EQ(Uint64ToWString(test->num), UTF8ToWide(test->uexpected));
 678   }
 679 }
 680
 681 TEST(StringUtilTest, Uint64ToString) {
 682   static const struct {
 683     uint64 input;
 684     std::string output;
 685   } cases[] = {
 686     {0, "0"},
 687     {42, "42"},
 688     {INT_MAX, "2147483647"},
 689     {kuint64max, "18446744073709551615"},
 690   };
 691
 692   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
 693     EXPECT_EQ(cases[i].output, Uint64ToString(cases[i].input));
 694 }
 695
 696 TEST(StringUtilTest, StringToInt) {
 697   static const struct {
 698     std::string input;
 699     int output;
 700     bool success;
 701   } cases[] = {
 702     {"0", 0, true},
 703     {"42", 42, true},
 704     {"-2147483648", INT_MIN, true},
 705     {"2147483647", INT_MAX, true},
 706     {"", 0, false},
 707     {" 42", 42, false},
 708     {"42 ", 42, false},
 709     {"\t\n\v\f\r 42", 42, false},
 710     {"blah42", 0, false},
 711     {"42blah", 42, false},
 712     {"blah42blah", 0, false},
 713     {"-273.15", -273, false},
 714     {"+98.6", 98, false},
 715     {"--123", 0, false},
 716     {"++123", 0, false},
 717     {"-+123", 0, false},
 718     {"+-123", 0, false},
 719     {"-", 0, false},
 720     {"-2147483649", INT_MIN, false},
 721     {"-99999999999", INT_MIN, false},
 722     {"2147483648", INT_MAX, false},
 723     {"99999999999", INT_MAX, false},
 724   };
 725
 726   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
 727     EXPECT_EQ(cases[i].output, StringToInt(cases[i].input));
 728     int output;
 729     EXPECT_EQ(cases[i].success, StringToInt(cases[i].input, &output));
 730     EXPECT_EQ(cases[i].output, output);
 731
 732     std::wstring wide_input = ASCIIToWide(cases[i].input);
 733     EXPECT_EQ(cases[i].output, StringToInt(WideToUTF16Hack(wide_input)));
 734     EXPECT_EQ(cases[i].success, StringToInt(WideToUTF16Hack(wide_input),
 735                                             &output));
 736     EXPECT_EQ(cases[i].output, output);
 737   }
 738
 739   // One additional test to verify that conversion of numbers in strings with
 740   // embedded NUL characters.  The NUL and extra data after it should be
 741   // interpreted as junk after the number.
 742   const char input[] = "6\06";
 743   std::string input_string(input, arraysize(input) - 1);
 744   int output;
 745   EXPECT_FALSE(StringToInt(input_string, &output));
 746   EXPECT_EQ(6, output);
 747
 748   std::wstring wide_input = ASCIIToWide(input_string);
 749   EXPECT_FALSE(StringToInt(WideToUTF16Hack(wide_input), &output));
 750   EXPECT_EQ(6, output);
 751 }
 752
 753 TEST(StringUtilTest, StringToInt64) {
 754   static const struct {
 755     std::string input;
 756     int64 output;
 757     bool success;
 758   } cases[] = {
 759     {"0", 0, true},
 760     {"42", 42, true},
 761     {"-2147483648", INT_MIN, true},
 762     {"2147483647", INT_MAX, true},
 763     {"-2147483649", GG_INT64_C(-2147483649), true},
 764     {"-99999999999", GG_INT64_C(-99999999999), true},
 765     {"2147483648", GG_INT64_C(2147483648), true},
 766     {"99999999999", GG_INT64_C(99999999999), true},
 767     {"9223372036854775807", kint64max, true},
 768     {"-9223372036854775808", kint64min, true},
 769     {"09", 9, true},
 770     {"-09", -9, true},
 771     {"", 0, false},
 772     {" 42", 42, false},
 773     {"42 ", 42, false},
 774     {"\t\n\v\f\r 42", 42, false},
 775     {"blah42", 0, false},
 776     {"42blah", 42, false},
 777     {"blah42blah", 0, false},
 778     {"-273.15", -273, false},
 779     {"+98.6", 98, false},
 780     {"--123", 0, false},
 781     {"++123", 0, false},
 782     {"-+123", 0, false},
 783     {"+-123", 0, false},
 784     {"-", 0, false},
 785     {"-9223372036854775809", kint64min, false},
 786     {"-99999999999999999999", kint64min, false},
 787     {"9223372036854775808", kint64max, false},
 788     {"99999999999999999999", kint64max, false},
 789   };
 790
 791   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
 792     EXPECT_EQ(cases[i].output, StringToInt64(cases[i].input));
 793     int64 output;
 794     EXPECT_EQ(cases[i].success, StringToInt64(cases[i].input, &output));
 795     EXPECT_EQ(cases[i].output, output);
 796
 797     std::wstring wide_input = ASCIIToWide(cases[i].input);
 798     EXPECT_EQ(cases[i].output, StringToInt64(WideToUTF16Hack(wide_input)));
 799     EXPECT_EQ(cases[i].success, StringToInt64(WideToUTF16Hack(wide_input),
 800                                               &output));
 801     EXPECT_EQ(cases[i].output, output);
 802   }
 803
 804   // One additional test to verify that conversion of numbers in strings with
 805   // embedded NUL characters.  The NUL and extra data after it should be
 806   // interpreted as junk after the number.
 807   const char input[] = "6\06";
 808   std::string input_string(input, arraysize(input) - 1);
 809   int64 output;
 810   EXPECT_FALSE(StringToInt64(input_string, &output));
 811   EXPECT_EQ(6, output);
 812
 813   std::wstring wide_input = ASCIIToWide(input_string);
 814   EXPECT_FALSE(StringToInt64(WideToUTF16Hack(wide_input), &output));
 815   EXPECT_EQ(6, output);
 816 }
 817
 818 TEST(StringUtilTest, HexStringToInt) {
 819   static const struct {
 820     std::string input;
 821     int output;
 822     bool success;
 823   } cases[] = {
 824     {"0", 0, true},
 825     {"42", 66, true},
 826     {"-42", -66, true},
 827     {"+42", 66, true},
 828     {"7fffffff", INT_MAX, true},
 829     {"80000000", INT_MIN, true},
 830     {"ffffffff", -1, true},
 831     {"DeadBeef", 0xdeadbeef, true},
 832     {"0x42", 66, true},
 833     {"-0x42", -66, true},
 834     {"+0x42", 66, true},
 835     {"0x7fffffff", INT_MAX, true},
 836     {"0x80000000", INT_MIN, true},
 837     {"0xffffffff", -1, true},
 838     {"0XDeadBeef", 0xdeadbeef, true},
 839     {"0x0f", 15, true},
 840     {"0f", 15, true},
 841     {" 45", 0x45, false},
 842     {"\t\n\v\f\r 0x45", 0x45, false},
 843     {" 45", 0x45, false},
 844     {"45 ", 0x45, false},
 845     {"efgh", 0xef, false},
 846     {"0xefgh", 0xef, false},
 847     {"hgfe", 0, false},
 848     {"100000000", -1, false},  // don't care about |output|, just |success|
 849     {"-", 0, false},
 850     {"", 0, false},
 851   };
 852
 853   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
 854     EXPECT_EQ(cases[i].output, HexStringToInt(cases[i].input));
 855     int output;
 856     EXPECT_EQ(cases[i].success, HexStringToInt(cases[i].input, &output));
 857     EXPECT_EQ(cases[i].output, output);
 858
 859     std::wstring wide_input = ASCIIToWide(cases[i].input);
 860     EXPECT_EQ(cases[i].output, HexStringToInt(WideToUTF16Hack(wide_input)));
 861     EXPECT_EQ(cases[i].success, HexStringToInt(WideToUTF16Hack(wide_input),
 862                                                &output));
 863     EXPECT_EQ(cases[i].output, output);
 864   }
 865   // One additional test to verify that conversion of numbers in strings with
 866   // embedded NUL characters.  The NUL and extra data after it should be
 867   // interpreted as junk after the number.
 868   const char input[] = "0xc0ffee\09";
 869   std::string input_string(input, arraysize(input) - 1);
 870   int output;
 871   EXPECT_FALSE(HexStringToInt(input_string, &output));
 872   EXPECT_EQ(0xc0ffee, output);
 873
 874   std::wstring wide_input = ASCIIToWide(input_string);
 875   EXPECT_FALSE(HexStringToInt(WideToUTF16Hack(wide_input), &output));
 876   EXPECT_EQ(0xc0ffee, output);
 877 }
 878
 879 TEST(StringUtilTest, HexStringToBytes) {
 880   static const struct {
 881     const std::string input;
 882     const char* output;
 883     size_t output_len;
 884     bool success;
 885   } cases[] = {
 886     {"0", "", 0, false},  // odd number of characters fails
 887     {"00", "\0", 1, true},
 888     {"42", "\x42", 1, true},
 889     {"-42", "", 0, false},  // any non-hex value fails
 890     {"+42", "", 0, false},
 891     {"7fffffff", "\x7f\xff\xff\xff", 4, true},
 892     {"80000000", "\x80\0\0\0", 4, true},
 893     {"deadbeef", "\xde\xad\xbe\xef", 4, true},
 894     {"DeadBeef", "\xde\xad\xbe\xef", 4, true},
 895     {"0x42", "", 0, false},  // leading 0x fails (x is not hex)
 896     {"0f", "\xf", 1, true},
 897     {"45  ", "\x45", 1, false},
 898     {"efgh", "\xef", 1, false},
 899     {"", "", 0, false},
 900     {"0123456789ABCDEF", "\x01\x23\x45\x67\x89\xAB\xCD\xEF", 8, true},
 901     {"0123456789ABCDEF012345",
 902      "\x01\x23\x45\x67\x89\xAB\xCD\xEF\x01\x23\x45", 11, true},
 903   };
 904
 905
 906   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
 907     std::vector<uint8> output;
 908     std::vector<uint8> compare;
 909     EXPECT_EQ(cases[i].success, HexStringToBytes(cases[i].input, &output)) <<
 910         i << ": " << cases[i].input;
 911     for (size_t j = 0; j < cases[i].output_len; ++j)
 912       compare.push_back(static_cast<uint8>(cases[i].output[j]));
 913     ASSERT_EQ(output.size(), compare.size()) << i << ": " << cases[i].input;
 914     EXPECT_TRUE(std::equal(output.begin(), output.end(), compare.begin())) <<
 915         i << ": " << cases[i].input;
 916
 917     output.clear();
 918     compare.clear();
 919
 920     std::wstring wide_input = ASCIIToWide(cases[i].input);
 921     EXPECT_EQ(cases[i].success,
 922               HexStringToBytes(WideToUTF16Hack(wide_input), &output)) <<
 923         i << ": " << cases[i].input;
 924     for (size_t j = 0; j < cases[i].output_len; ++j)
 925       compare.push_back(static_cast<uint8>(cases[i].output[j]));
 926     ASSERT_EQ(output.size(), compare.size()) << i << ": " << cases[i].input;
 927     EXPECT_TRUE(std::equal(output.begin(), output.end(), compare.begin())) <<
 928         i << ": " << cases[i].input;
 929   }
 930 }
 931
 932 TEST(StringUtilTest, StringToDouble) {
 933   static const struct {
 934     std::string input;
 935     double output;
 936     bool success;
 937   } cases[] = {
 938     {"0", 0.0, true},
 939     {"42", 42.0, true},
 940     {"-42", -42.0, true},
 941     {"123.45", 123.45, true},
 942     {"-123.45", -123.45, true},
 943     {"+123.45", 123.45, true},
 944     {"2.99792458e8", 299792458.0, true},
 945     {"149597870.691E+3", 149597870691.0, true},
 946     {"6.", 6.0, true},
 947     {"9e99999999999999999999", HUGE_VAL, false},
 948     {"-9e99999999999999999999", -HUGE_VAL, false},
 949     {"1e-2", 0.01, true},
 950     {" 1e-2", 0.01, false},
 951     {"1e-2 ", 0.01, false},
 952     {"-1E-7", -0.0000001, true},
 953     {"01e02", 100, true},
 954     {"2.3e15", 2.3e15, true},
 955     {"\t\n\v\f\r -123.45e2", -12345.0, false},
 956     {"+123 e4", 123.0, false},
 957     {"123e ", 123.0, false},
 958     {"123e", 123.0, false},
 959     {" 2.99", 2.99, false},
 960     {"1e3.4", 1000.0, false},
 961     {"nothing", 0.0, false},
 962     {"-", 0.0, false},
 963     {"+", 0.0, false},
 964     {"", 0.0, false},
 965   };
 966
 967   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
 968     EXPECT_DOUBLE_EQ(cases[i].output, StringToDouble(cases[i].input));
 969     double output;
 970     EXPECT_EQ(cases[i].success, StringToDouble(cases[i].input, &output));
 971     EXPECT_DOUBLE_EQ(cases[i].output, output);
 972
 973     std::wstring wide_input = ASCIIToWide(cases[i].input);
 974     EXPECT_DOUBLE_EQ(cases[i].output,
 975                      StringToDouble(WideToUTF16Hack(wide_input)));
 976     EXPECT_EQ(cases[i].success, StringToDouble(WideToUTF16Hack(wide_input),
 977                                                &output));
 978     EXPECT_DOUBLE_EQ(cases[i].output, output);
 979   }
 980
 981   // One additional test to verify that conversion of numbers in strings with
 982   // embedded NUL characters.  The NUL and extra data after it should be
 983   // interpreted as junk after the number.
 984   const char input[] = "3.14\0159";
 985   std::string input_string(input, arraysize(input) - 1);
 986   double output;
 987   EXPECT_FALSE(StringToDouble(input_string, &output));
 988   EXPECT_DOUBLE_EQ(3.14, output);
 989
 990   std::wstring wide_input = ASCIIToWide(input_string);
 991   EXPECT_FALSE(StringToDouble(WideToUTF16Hack(wide_input), &output));
 992   EXPECT_DOUBLE_EQ(3.14, output);
 993 }
 994
 995 // This checks where we can use the assignment operator for a va_list. We need
 996 // a way to do this since Visual C doesn't support va_copy, but assignment on
 997 // va_list is not guaranteed to be a copy. See StringAppendVT which uses this
 998 // capability.
 999 static void VariableArgsFunc(const char* format, ...) {
1000   va_list org;
1001   va_start(org, format);
1002
1003   va_list dup;
1004   GG_VA_COPY(dup, org);
1005   int i1 = va_arg(org, int);
1006   int j1 = va_arg(org, int);
1007   char* s1 = va_arg(org, char*);
1008   double d1 = va_arg(org, double);
1009   va_end(org);
1010
1011   int i2 = va_arg(dup, int);
1012   int j2 = va_arg(dup, int);
1013   char* s2 = va_arg(dup, char*);
1014   double d2 = va_arg(dup, double);
1015
1016   EXPECT_EQ(i1, i2);
1017   EXPECT_EQ(j1, j2);
1018   EXPECT_STREQ(s1, s2);
1019   EXPECT_EQ(d1, d2);
1020
1021   va_end(dup);
1022 }
1023
1024 TEST(StringUtilTest, VAList) {
1025   VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21);
1026 }
1027
1028 TEST(StringUtilTest, StringPrintfEmpty) {
1029   EXPECT_EQ("", StringPrintf("%s", ""));
1030 }
1031
1032 TEST(StringUtilTest, StringPrintfMisc) {
1033   EXPECT_EQ("123hello w", StringPrintf("%3d%2s %1c", 123, "hello", 'w'));
1034   EXPECT_EQ(L"123hello w", StringPrintf(L"%3d%2ls %1lc", 123, L"hello", 'w'));
1035 }
1036
1037 TEST(StringUtilTest, StringAppendfEmptyString) {
1038   std::string value("Hello");
1039   StringAppendF(&value, "%s", "");
1040   EXPECT_EQ("Hello", value);
1041
1042   std::wstring valuew(L"Hello");
1043   StringAppendF(&valuew, L"%ls", L"");
1044   EXPECT_EQ(L"Hello", valuew);
1045 }
1046
1047 TEST(StringUtilTest, StringAppendfString) {
1048   std::string value("Hello");
1049   StringAppendF(&value, " %s", "World");
1050   EXPECT_EQ("Hello World", value);
1051
1052   std::wstring valuew(L"Hello");
1053   StringAppendF(&valuew, L" %ls", L"World");
1054   EXPECT_EQ(L"Hello World", valuew);
1055 }
1056
1057 TEST(StringUtilTest, StringAppendfInt) {
1058   std::string value("Hello");
1059   StringAppendF(&value, " %d", 123);
1060   EXPECT_EQ("Hello 123", value);
1061
1062   std::wstring valuew(L"Hello");
1063   StringAppendF(&valuew, L" %d", 123);
1064   EXPECT_EQ(L"Hello 123", valuew);
1065 }
1066
1067 // Make sure that lengths exactly around the initial buffer size are handled
1068 // correctly.
1069 TEST(StringUtilTest, StringPrintfBounds) {
1070   const int kSrcLen = 1026;
1071   char src[kSrcLen];
1072   for (size_t i = 0; i < arraysize(src); i++)
1073     src[i] = 'A';
1074
1075   wchar_t srcw[kSrcLen];
1076   for (size_t i = 0; i < arraysize(srcw); i++)
1077     srcw[i] = 'A';
1078
1079   for (int i = 1; i < 3; i++) {
1080     src[kSrcLen - i] = 0;
1081     std::string out;
1082     SStringPrintf(&out, "%s", src);
1083     EXPECT_STREQ(src, out.c_str());
1084
1085     srcw[kSrcLen - i] = 0;
1086     std::wstring outw;
1087     SStringPrintf(&outw, L"%ls", srcw);
1088     EXPECT_STREQ(srcw, outw.c_str());
1089   }
1090 }
1091
1092 // Test very large sprintfs that will cause the buffer to grow.
1093 TEST(StringUtilTest, Grow) {
1094   char src[1026];
1095   for (size_t i = 0; i < arraysize(src); i++)
1096     src[i] = 'A';
1097   src[1025] = 0;
1098
1099   const char* fmt = "%sB%sB%sB%sB%sB%sB%s";
1100
1101   std::string out;
1102   SStringPrintf(&out, fmt, src, src, src, src, src, src, src);
1103
1104   const int kRefSize = 320000;
1105   char* ref = new char[kRefSize];
1106 #if defined(OS_WIN)
1107   sprintf_s(ref, kRefSize, fmt, src, src, src, src, src, src, src);
1108 #elif defined(OS_POSIX)
1109   snprintf(ref, kRefSize, fmt, src, src, src, src, src, src, src);
1110 #endif
1111
1112   EXPECT_STREQ(ref, out.c_str());
1113   delete[] ref;
1114 }
1115
1116 // A helper for the StringAppendV test that follows.
1117 // Just forwards its args to StringAppendV.
1118 static void StringAppendVTestHelper(std::string* out,
1119                                     const char* format,
1120                                     ...) PRINTF_FORMAT(2, 3);
1121
1122 static void StringAppendVTestHelper(std::string* out, const char* format, ...) {
1123   va_list ap;
1124   va_start(ap, format);
1125   StringAppendV(out, format, ap);
1126   va_end(ap);
1127 }
1128
1129 TEST(StringUtilTest, StringAppendV) {
1130   std::string out;
1131   StringAppendVTestHelper(&out, "%d foo %s", 1, "bar");
1132   EXPECT_EQ("1 foo bar", out);
1133 }
1134
1135 // Test the boundary condition for the size of the string_util's
1136 // internal buffer.
1137 TEST(StringUtilTest, GrowBoundary) {
1138   const int string_util_buf_len = 1024;
1139   // Our buffer should be one larger than the size of StringAppendVT's stack
1140   // buffer.
1141   const int buf_len = string_util_buf_len + 1;
1142   char src[buf_len + 1];  // Need extra one for NULL-terminator.
1143   for (int i = 0; i < buf_len; ++i)
1144     src[i] = 'a';
1145   src[buf_len] = 0;
1146
1147   std::string out;
1148   SStringPrintf(&out, "%s", src);
1149
1150   EXPECT_STREQ(src, out.c_str());
1151 }
1152
1153 // TODO(evanm): what's the proper cross-platform test here?
1154 #if defined(OS_WIN)
1155 // sprintf in Visual Studio fails when given U+FFFF. This tests that the
1156 // failure case is gracefuly handled.
1157 TEST(StringUtilTest, Invalid) {
1158   wchar_t invalid[2];
1159   invalid[0] = 0xffff;
1160   invalid[1] = 0;
1161
1162   std::wstring out;
1163   SStringPrintf(&out, L"%ls", invalid);
1164   EXPECT_STREQ(L"", out.c_str());
1165 }
1166 #endif
1167
1168 // Test for SplitString
1169 TEST(StringUtilTest, SplitString) {
1170   std::vector<std::wstring> r;
1171
1172   SplitString(L"", L',', &r);
1173   ASSERT_EQ(1U, r.size());
1174   EXPECT_EQ(r[0], L"");
1175   r.clear();
1176
1177   SplitString(L"a,b,c", L',', &r);
1178   ASSERT_EQ(3U, r.size());
1179   EXPECT_EQ(r[0], L"a");
1180   EXPECT_EQ(r[1], L"b");
1181   EXPECT_EQ(r[2], L"c");
1182   r.clear();
1183
1184   SplitString(L"a, b, c", L',', &r);
1185   ASSERT_EQ(3U, r.size());
1186   EXPECT_EQ(r[0], L"a");
1187   EXPECT_EQ(r[1], L"b");
1188   EXPECT_EQ(r[2], L"c");
1189   r.clear();
1190
1191   SplitString(L"a,,c", L',', &r);
1192   ASSERT_EQ(3U, r.size());
1193   EXPECT_EQ(r[0], L"a");
1194   EXPECT_EQ(r[1], L"");
1195   EXPECT_EQ(r[2], L"c");
1196   r.clear();
1197
1198   SplitString(L"", L'*', &r);
1199   ASSERT_EQ(1U, r.size());
1200   EXPECT_EQ(r[0], L"");
1201   r.clear();
1202
1203   SplitString(L"foo", L'*', &r);
1204   ASSERT_EQ(1U, r.size());
1205   EXPECT_EQ(r[0], L"foo");
1206   r.clear();
1207
1208   SplitString(L"foo ,", L',', &r);
1209   ASSERT_EQ(2U, r.size());
1210   EXPECT_EQ(r[0], L"foo");
1211   EXPECT_EQ(r[1], L"");
1212   r.clear();
1213
1214   SplitString(L",", L',', &r);
1215   ASSERT_EQ(2U, r.size());
1216   EXPECT_EQ(r[0], L"");
1217   EXPECT_EQ(r[1], L"");
1218   r.clear();
1219
1220   SplitString(L"\t\ta\t", L'\t', &r);
1221   ASSERT_EQ(4U, r.size());
1222   EXPECT_EQ(r[0], L"");
1223   EXPECT_EQ(r[1], L"");
1224   EXPECT_EQ(r[2], L"a");
1225   EXPECT_EQ(r[3], L"");
1226   r.clear();
1227
1228   SplitStringDontTrim(L"\t\ta\t", L'\t', &r);
1229   ASSERT_EQ(4U, r.size());
1230   EXPECT_EQ(r[0], L"");
1231   EXPECT_EQ(r[1], L"");
1232   EXPECT_EQ(r[2], L"a");
1233   EXPECT_EQ(r[3], L"");
1234   r.clear();
1235
1236   SplitString(L"\ta\t\nb\tcc", L'\n', &r);
1237   ASSERT_EQ(2U, r.size());
1238   EXPECT_EQ(r[0], L"a");
1239   EXPECT_EQ(r[1], L"b\tcc");
1240   r.clear();
1241
1242   SplitStringDontTrim(L"\ta\t\nb\tcc", L'\n', &r);
1243   ASSERT_EQ(2U, r.size());
1244   EXPECT_EQ(r[0], L"\ta\t");
1245   EXPECT_EQ(r[1], L"b\tcc");
1246   r.clear();
1247 }
1248
1249 // Test for Tokenize
1250 template <typename STR>
1251 void TokenizeTest() {
1252   std::vector<STR> r;
1253   size_t size;
1254
1255   size = Tokenize(STR("This is a string"), STR(" "), &r);
1256   EXPECT_EQ(4U, size);
1257   ASSERT_EQ(4U, r.size());
1258   EXPECT_EQ(r[0], STR("This"));
1259   EXPECT_EQ(r[1], STR("is"));
1260   EXPECT_EQ(r[2], STR("a"));
1261   EXPECT_EQ(r[3], STR("string"));
1262   r.clear();
1263
1264   size = Tokenize(STR("one,two,three"), STR(","), &r);
1265   EXPECT_EQ(3U, size);
1266   ASSERT_EQ(3U, r.size());
1267   EXPECT_EQ(r[0], STR("one"));
1268   EXPECT_EQ(r[1], STR("two"));
1269   EXPECT_EQ(r[2], STR("three"));
1270   r.clear();
1271
1272   size = Tokenize(STR("one,two:three;four"), STR(",:"), &r);
1273   EXPECT_EQ(3U, size);
1274   ASSERT_EQ(3U, r.size());
1275   EXPECT_EQ(r[0], STR("one"));
1276   EXPECT_EQ(r[1], STR("two"));
1277   EXPECT_EQ(r[2], STR("three;four"));
1278   r.clear();
1279
1280   size = Tokenize(STR("one,two:three;four"), STR(";,:"), &r);
1281   EXPECT_EQ(4U, size);
1282   ASSERT_EQ(4U, r.size());
1283   EXPECT_EQ(r[0], STR("one"));
1284   EXPECT_EQ(r[1], STR("two"));
1285   EXPECT_EQ(r[2], STR("three"));
1286   EXPECT_EQ(r[3], STR("four"));
1287   r.clear();
1288
1289   size = Tokenize(STR("one, two, three"), STR(","), &r);
1290   EXPECT_EQ(3U, size);
1291   ASSERT_EQ(3U, r.size());
1292   EXPECT_EQ(r[0], STR("one"));
1293   EXPECT_EQ(r[1], STR(" two"));
1294   EXPECT_EQ(r[2], STR(" three"));
1295   r.clear();
1296
1297   size = Tokenize(STR("one, two, three, "), STR(","), &r);
1298   EXPECT_EQ(4U, size);
1299   ASSERT_EQ(4U, r.size());
1300   EXPECT_EQ(r[0], STR("one"));
1301   EXPECT_EQ(r[1], STR(" two"));
1302   EXPECT_EQ(r[2], STR(" three"));
1303   EXPECT_EQ(r[3], STR(" "));
1304   r.clear();
1305
1306   size = Tokenize(STR("one, two, three,"), STR(","), &r);
1307   EXPECT_EQ(3U, size);
1308   ASSERT_EQ(3U, r.size());
1309   EXPECT_EQ(r[0], STR("one"));
1310   EXPECT_EQ(r[1], STR(" two"));
1311   EXPECT_EQ(r[2], STR(" three"));
1312   r.clear();
1313
1314   size = Tokenize(STR(""), STR(","), &r);
1315   EXPECT_EQ(0U, size);
1316   ASSERT_EQ(0U, r.size());
1317   r.clear();
1318
1319   size = Tokenize(STR(","), STR(","), &r);
1320   EXPECT_EQ(0U, size);
1321   ASSERT_EQ(0U, r.size());
1322   r.clear();
1323
1324   size = Tokenize(STR(",;:."), STR(".:;,"), &r);
1325   EXPECT_EQ(0U, size);
1326   ASSERT_EQ(0U, r.size());
1327   r.clear();
1328
1329   size = Tokenize(STR("\t\ta\t"), STR("\t"), &r);
1330   EXPECT_EQ(1U, size);
1331   ASSERT_EQ(1U, r.size());
1332   EXPECT_EQ(r[0], STR("a"));
1333   r.clear();
1334
1335   size = Tokenize(STR("\ta\t\nb\tcc"), STR("\n"), &r);
1336   EXPECT_EQ(2U, size);
1337   ASSERT_EQ(2U, r.size());
1338   EXPECT_EQ(r[0], STR("\ta\t"));
1339   EXPECT_EQ(r[1], STR("b\tcc"));
1340   r.clear();
1341 }
1342
1343 TEST(StringUtilTest, TokenizeStdString) {
1344   TokenizeTest<std::string>();
1345 }
1346
1347 TEST(StringUtilTest, TokenizeStringPiece) {
1348   TokenizeTest<base::StringPiece>();
1349 }
1350
1351 // Test for JoinString
1352 TEST(StringUtilTest, JoinString) {
1353   std::vector<std::string> in;
1354   EXPECT_EQ("", JoinString(in, ','));
1355
1356   in.push_back("a");
1357   EXPECT_EQ("a", JoinString(in, ','));
1358
1359   in.push_back("b");
1360   in.push_back("c");
1361   EXPECT_EQ("a,b,c", JoinString(in, ','));
1362
1363   in.push_back("");
1364   EXPECT_EQ("a,b,c,", JoinString(in, ','));
1365   in.push_back(" ");
1366   EXPECT_EQ("a|b|c|| ", JoinString(in, '|'));
1367 }
1368
1369 TEST(StringUtilTest, StartsWith) {
1370   EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true));
1371   EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true));
1372   EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false));
1373   EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false));
1374   EXPECT_FALSE(StartsWithASCII("java", "javascript", true));
1375   EXPECT_FALSE(StartsWithASCII("java", "javascript", false));
1376   EXPECT_FALSE(StartsWithASCII("", "javascript", false));
1377   EXPECT_FALSE(StartsWithASCII("", "javascript", true));
1378   EXPECT_TRUE(StartsWithASCII("java", "", false));
1379   EXPECT_TRUE(StartsWithASCII("java", "", true));
1380
1381   EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", true));
1382   EXPECT_FALSE(StartsWith(L"JavaScript:url", L"javascript", true));
1383   EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", false));
1384   EXPECT_TRUE(StartsWith(L"JavaScript:url", L"javascript", false));
1385   EXPECT_FALSE(StartsWith(L"java", L"javascript", true));
1386   EXPECT_FALSE(StartsWith(L"java", L"javascript", false));
1387   EXPECT_FALSE(StartsWith(L"", L"javascript", false));
1388   EXPECT_FALSE(StartsWith(L"", L"javascript", true));
1389   EXPECT_TRUE(StartsWith(L"java", L"", false));
1390   EXPECT_TRUE(StartsWith(L"java", L"", true));
1391 }
1392
1393 TEST(StringUtilTest, EndsWith) {
1394   EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", true));
1395   EXPECT_FALSE(EndsWith(L"Foo.Plugin", L".plugin", true));
1396   EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", false));
1397   EXPECT_TRUE(EndsWith(L"Foo.Plugin", L".plugin", false));
1398   EXPECT_FALSE(EndsWith(L".plug", L".plugin", true));
1399   EXPECT_FALSE(EndsWith(L".plug", L".plugin", false));
1400   EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", true));
1401   EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", false));
1402   EXPECT_FALSE(EndsWith(L"", L".plugin", false));
1403   EXPECT_FALSE(EndsWith(L"", L".plugin", true));
1404   EXPECT_TRUE(EndsWith(L"Foo.plugin", L"", false));
1405   EXPECT_TRUE(EndsWith(L"Foo.plugin", L"", true));
1406   EXPECT_TRUE(EndsWith(L".plugin", L".plugin", false));
1407   EXPECT_TRUE(EndsWith(L".plugin", L".plugin", true));
1408   EXPECT_TRUE(EndsWith(L"", L"", false));
1409   EXPECT_TRUE(EndsWith(L"", L"", true));
1410 }
1411
1412 TEST(StringUtilTest, GetStringFWithOffsets) {
1413   std::vector<string16> subst;
1414   subst.push_back(ASCIIToUTF16("1"));
1415   subst.push_back(ASCIIToUTF16("2"));
1416   std::vector<size_t> offsets;
1417
1418   ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
1419                             subst,
1420                             &offsets);
1421   EXPECT_EQ(2U, offsets.size());
1422   EXPECT_EQ(7U, offsets[0]);
1423   EXPECT_EQ(25U, offsets[1]);
1424   offsets.clear();
1425
1426   ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
1427                             subst,
1428                             &offsets);
1429   EXPECT_EQ(2U, offsets.size());
1430   EXPECT_EQ(25U, offsets[0]);
1431   EXPECT_EQ(7U, offsets[1]);
1432   offsets.clear();
1433 }
1434
1435 TEST(StringUtilTest, ReplaceStringPlaceholders) {
1436   std::vector<string16> subst;
1437   subst.push_back(ASCIIToUTF16("9a"));
1438   subst.push_back(ASCIIToUTF16("8b"));
1439   subst.push_back(ASCIIToUTF16("7c"));
1440   subst.push_back(ASCIIToUTF16("6d"));
1441   subst.push_back(ASCIIToUTF16("5e"));
1442   subst.push_back(ASCIIToUTF16("4f"));
1443   subst.push_back(ASCIIToUTF16("3g"));
1444   subst.push_back(ASCIIToUTF16("2h"));
1445   subst.push_back(ASCIIToUTF16("1i"));
1446
1447   string16 formatted =
1448       ReplaceStringPlaceholders(
1449           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL);
1450
1451   EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"));
1452 }
1453
1454 TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
1455   // Test whether replacestringplaceholders works as expected when there
1456   // are fewer inputs than outputs.
1457   std::vector<string16> subst;
1458   subst.push_back(ASCIIToUTF16("9a"));
1459   subst.push_back(ASCIIToUTF16("8b"));
1460   subst.push_back(ASCIIToUTF16("7c"));
1461
1462   string16 formatted =
1463       ReplaceStringPlaceholders(
1464           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL);
1465
1466   EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"));
1467 }
1468
1469 TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
1470   std::vector<std::string> subst;
1471   subst.push_back("9a");
1472   subst.push_back("8b");
1473   subst.push_back("7c");
1474   subst.push_back("6d");
1475   subst.push_back("5e");
1476   subst.push_back("4f");
1477   subst.push_back("3g");
1478   subst.push_back("2h");
1479   subst.push_back("1i");
1480
1481   std::string formatted =
1482       ReplaceStringPlaceholders(
1483           "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL);
1484
1485   EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii");
1486 }
1487
1488 TEST(StringUtilTest, SplitStringAlongWhitespace) {
1489   struct TestData {
1490     const std::wstring input;
1491     const size_t expected_result_count;
1492     const std::wstring output1;
1493     const std::wstring output2;
1494   } data[] = {
1495     { L"a",       1, L"a",  L""   },
1496     { L" ",       0, L"",   L""   },
1497     { L" a",      1, L"a",  L""   },
1498     { L" ab ",    1, L"ab", L""   },
1499     { L" ab c",   2, L"ab", L"c"  },
1500     { L" ab c ",  2, L"ab", L"c"  },
1501     { L" ab cd",  2, L"ab", L"cd" },
1502     { L" ab cd ", 2, L"ab", L"cd" },
1503     { L" \ta\t",  1, L"a",  L""   },
1504     { L" b\ta\t", 2, L"b",  L"a"  },
1505     { L" b\tat",  2, L"b",  L"at" },
1506     { L"b\tat",   2, L"b",  L"at" },
1507     { L"b\t at",  2, L"b",  L"at" },
1508   };
1509   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) {
1510     std::vector<std::wstring> results;
1511     SplitStringAlongWhitespace(data[i].input, &results);
1512     ASSERT_EQ(data[i].expected_result_count, results.size());
1513     if (data[i].expected_result_count > 0)
1514       ASSERT_EQ(data[i].output1, results[0]);
1515     if (data[i].expected_result_count > 1)
1516       ASSERT_EQ(data[i].output2, results[1]);
1517   }
1518 }
1519
1520 TEST(StringUtilTest, MatchPatternTest) {
1521   EXPECT_EQ(MatchPatternASCII("www.google.com", "*.com"), true);
1522   EXPECT_EQ(MatchPatternASCII("www.google.com", "*"), true);
1523   EXPECT_EQ(MatchPatternASCII("www.google.com", "www*.g*.org"), false);
1524   EXPECT_EQ(MatchPatternASCII("Hello", "H?l?o"), true);
1525   EXPECT_EQ(MatchPatternASCII("www.google.com", "http://*)"), false);
1526   EXPECT_EQ(MatchPatternASCII("www.msn.com", "*.COM"), false);
1527   EXPECT_EQ(MatchPatternASCII("Hello*1234", "He??o\\*1*"), true);
1528   EXPECT_EQ(MatchPatternASCII("", "*.*"), false);
1529   EXPECT_EQ(MatchPatternASCII("", "*"), true);
1530   EXPECT_EQ(MatchPatternASCII("", "?"), true);
1531   EXPECT_EQ(MatchPatternASCII("", ""), true);
1532   EXPECT_EQ(MatchPatternASCII("Hello", ""), false);
1533   EXPECT_EQ(MatchPatternASCII("Hello*", "Hello*"), true);
1534   // Stop after a certain recursion depth.
1535   EXPECT_EQ(MatchPatternASCII("12345678901234567890", "???????????????????*"),
1536                               false);
1537 }
1538
1539 TEST(StringUtilTest, LcpyTest) {
1540   // Test the normal case where we fit in our buffer.
1541   {
1542     char dst[10];
1543     wchar_t wdst[10];
1544     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1545     EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1546     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1547     EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1548   }
1549
1550   // Test dst_size == 0, nothing should be written to |dst| and we should
1551   // have the equivalent of strlen(src).
1552   {
1553     char dst[2] = {1, 2};
1554     wchar_t wdst[2] = {1, 2};
1555     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", 0));
1556     EXPECT_EQ(1, dst[0]);
1557     EXPECT_EQ(2, dst[1]);
1558     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", 0));
1559 #if defined(WCHAR_T_IS_UNSIGNED)
1560     EXPECT_EQ(1U, wdst[0]);
1561     EXPECT_EQ(2U, wdst[1]);
1562 #else
1563     EXPECT_EQ(1, wdst[0]);
1564     EXPECT_EQ(2, wdst[1]);
1565 #endif
1566   }
1567
1568   // Test the case were we _just_ competely fit including the null.
1569   {
1570     char dst[8];
1571     wchar_t wdst[8];
1572     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1573     EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1574     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1575     EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1576   }
1577
1578   // Test the case were we we are one smaller, so we can't fit the null.
1579   {
1580     char dst[7];
1581     wchar_t wdst[7];
1582     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1583     EXPECT_EQ(0, memcmp(dst, "abcdef", 7));
1584     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1585     EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));
1586   }
1587
1588   // Test the case were we are just too small.
1589   {
1590     char dst[3];
1591     wchar_t wdst[3];
1592     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1593     EXPECT_EQ(0, memcmp(dst, "ab", 3));
1594     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1595     EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));
1596   }
1597 }
1598
1599 TEST(StringUtilTest, WprintfFormatPortabilityTest) {
1600   struct TestData {
1601     const wchar_t* input;
1602     bool portable;
1603   } cases[] = {
1604     { L"%ls", true },
1605     { L"%s", false },
1606     { L"%S", false },
1607     { L"%lS", false },
1608     { L"Hello, %s", false },
1609     { L"%lc", true },
1610     { L"%c", false },
1611     { L"%C", false },
1612     { L"%lC", false },
1613     { L"%ls %s", false },
1614     { L"%s %ls", false },
1615     { L"%s %ls %s", false },
1616     { L"%f", true },
1617     { L"%f %F", false },
1618     { L"%d %D", false },
1619     { L"%o %O", false },
1620     { L"%u %U", false },
1621     { L"%f %d %o %u", true },
1622     { L"%-8d (%02.1f%)", true },
1623     { L"% 10s", false },
1624     { L"% 10ls", true }
1625   };
1626   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
1627     EXPECT_EQ(cases[i].portable, base::IsWprintfFormatPortable(cases[i].input));
1628   }
1629 }
1630
1631 TEST(StringUtilTest, ElideString) {
1632   struct TestData {
1633     const wchar_t* input;
1634     int max_len;
1635     bool result;
1636     const wchar_t* output;
1637   } cases[] = {
1638     { L"Hello", 0, true, L"" },
1639     { L"", 0, false, L"" },
1640     { L"Hello, my name is Tom", 1, true, L"H" },
1641     { L"Hello, my name is Tom", 2, true, L"He" },
1642     { L"Hello, my name is Tom", 3, true, L"H.m" },
1643     { L"Hello, my name is Tom", 4, true, L"H..m" },
1644     { L"Hello, my name is Tom", 5, true, L"H...m" },
1645     { L"Hello, my name is Tom", 6, true, L"He...m" },
1646     { L"Hello, my name is Tom", 7, true, L"He...om" },
1647     { L"Hello, my name is Tom", 10, true, L"Hell...Tom" },
1648     { L"Hello, my name is Tom", 100, false, L"Hello, my name is Tom" }
1649   };
1650   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
1651     std::wstring output;
1652     EXPECT_EQ(cases[i].result,
1653               ElideString(cases[i].input, cases[i].max_len, &output));
1654     EXPECT_TRUE(output == cases[i].output);
1655   }
1656 }
1657
1658 TEST(StringUtilTest, HexEncode) {
1659   std::string hex(HexEncode(NULL, 0));
1660   EXPECT_EQ(hex.length(), 0U);
1661   unsigned char bytes[] = {0x01, 0xff, 0x02, 0xfe, 0x03, 0x80, 0x81};
1662   hex = HexEncode(bytes, sizeof(bytes));
1663   EXPECT_EQ(hex.compare("01FF02FE038081"), 0);
1664 }
1665
1666 TEST(StringUtilTest, RemoveChars) {
1667   const char* kRemoveChars = "-/+*";
1668   std::string input = "A-+bc/d!*";
1669   EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
1670   EXPECT_EQ("Abcd!", input);
1671
1672   // No characters match kRemoveChars.
1673   EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1674   EXPECT_EQ("Abcd!", input);
1675
1676   // Empty string.
1677   input.clear();
1678   EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1679   EXPECT_EQ(std::string(), input);
1680 }
1681
1682 TEST(StringUtilTest, ContainsOnlyChars) {
1683   // Providing an empty list of characters should return false but for the empty
1684   // string.
1685   EXPECT_TRUE(ContainsOnlyChars("", ""));
1686   EXPECT_FALSE(ContainsOnlyChars("Hello", ""));
1687
1688   EXPECT_TRUE(ContainsOnlyChars("", "1234"));
1689   EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
1690   EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
1691   EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
1692   EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
1693 }
1694
1695 TEST(SplitStringUsingSubstrTest, EmptyString) {
1696   std::vector<std::string> results;
1697   SplitStringUsingSubstr("", "DELIMITER", &results);
1698   ASSERT_EQ(1u, results.size());
1699   EXPECT_THAT(results, ElementsAre(""));
1700 }
1701
1702 TEST(SplitStringUsingSubstrTest, StringWithNoDelimiter) {
1703   std::vector<std::string> results;
1704   SplitStringUsingSubstr("alongwordwithnodelimiter", "DELIMITER", &results);
1705   ASSERT_EQ(1u, results.size());
1706   EXPECT_THAT(results, ElementsAre("alongwordwithnodelimiter"));
1707 }
1708
1709 TEST(SplitStringUsingSubstrTest, LeadingDelimitersSkipped) {
1710   std::vector<std::string> results;
1711   SplitStringUsingSubstr(
1712       "DELIMITERDELIMITERDELIMITERoneDELIMITERtwoDELIMITERthree",
1713       "DELIMITER",
1714       &results);
1715   ASSERT_EQ(6u, results.size());
1716   EXPECT_THAT(results, ElementsAre("", "", "", "one", "two", "three"));
1717 }
1718
1719 TEST(SplitStringUsingSubstrTest, ConsecutiveDelimitersSkipped) {
1720   std::vector<std::string> results;
1721   SplitStringUsingSubstr(
1722       "unoDELIMITERDELIMITERDELIMITERdosDELIMITERtresDELIMITERDELIMITERcuatro",
1723       "DELIMITER",
1724       &results);
1725   ASSERT_EQ(7u, results.size());
1726   EXPECT_THAT(results, ElementsAre("uno", "", "", "dos", "tres", "", "cuatro"));
1727 }
1728
1729 TEST(SplitStringUsingSubstrTest, TrailingDelimitersSkipped) {
1730   std::vector<std::string> results;
1731   SplitStringUsingSubstr(
1732       "unDELIMITERdeuxDELIMITERtroisDELIMITERquatreDELIMITERDELIMITERDELIMITER",
1733       "DELIMITER",
1734       &results);
1735   ASSERT_EQ(7u, results.size());
1736   EXPECT_THAT(
1737       results, ElementsAre("un", "deux", "trois", "quatre", "", "", ""));
1738 }
1739
1740 }  // namespace base