Roll skia to r1241.
[chromium-blink-merge.git] / base / string_util_unittest.cc
blobcd456429f0a992673309f9fac0ca312d494a5379
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include <math.h>
6 #include <stdarg.h>
8 #include <limits>
9 #include <sstream>
11 #include "base/basictypes.h"
12 #include "base/string_util.h"
13 #include "base/utf_string_conversions.h"
14 #include "testing/gmock/include/gmock/gmock.h"
15 #include "testing/gtest/include/gtest/gtest.h"
17 using ::testing::ElementsAre;
19 namespace base {
21 static const struct trim_case {
22 const wchar_t* input;
23 const TrimPositions positions;
24 const wchar_t* output;
25 const TrimPositions return_value;
26 } trim_cases[] = {
27 {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
28 {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
29 {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
30 {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
31 {L"", TRIM_ALL, L"", TRIM_NONE},
32 {L" ", TRIM_LEADING, L"", TRIM_LEADING},
33 {L" ", TRIM_TRAILING, L"", TRIM_TRAILING},
34 {L" ", TRIM_ALL, L"", TRIM_ALL},
35 {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
36 {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
39 static const struct trim_case_ascii {
40 const char* input;
41 const TrimPositions positions;
42 const char* output;
43 const TrimPositions return_value;
44 } trim_cases_ascii[] = {
45 {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
46 {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
47 {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
48 {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
49 {"", TRIM_ALL, "", TRIM_NONE},
50 {" ", TRIM_LEADING, "", TRIM_LEADING},
51 {" ", TRIM_TRAILING, "", TRIM_TRAILING},
52 {" ", TRIM_ALL, "", TRIM_ALL},
53 {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
56 namespace {
58 // Helper used to test TruncateUTF8ToByteSize.
59 bool Truncated(const std::string& input, const size_t byte_size,
60 std::string* output) {
61 size_t prev = input.length();
62 TruncateUTF8ToByteSize(input, byte_size, output);
63 return prev != output->length();
66 } // namespace
68 TEST(StringUtilTest, TruncateUTF8ToByteSize) {
69 std::string output;
71 // Empty strings and invalid byte_size arguments
72 EXPECT_FALSE(Truncated("", 0, &output));
73 EXPECT_EQ(output, "");
74 EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
75 EXPECT_EQ(output, "");
76 EXPECT_FALSE(Truncated("\xe1\x80\xbf", -1, &output));
77 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
79 // Testing the truncation of valid UTF8 correctly
80 EXPECT_TRUE(Truncated("abc", 2, &output));
81 EXPECT_EQ(output, "ab");
82 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
83 EXPECT_EQ(output.compare("\xc2\x81"), 0);
84 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
85 EXPECT_EQ(output.compare("\xc2\x81"), 0);
86 EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
87 EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
90 const char array[] = "\x00\x00\xc2\x81\xc2\x81";
91 const std::string array_string(array, arraysize(array));
92 EXPECT_TRUE(Truncated(array_string, 4, &output));
93 EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
97 const char array[] = "\x00\xc2\x81\xc2\x81";
98 const std::string array_string(array, arraysize(array));
99 EXPECT_TRUE(Truncated(array_string, 4, &output));
100 EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
103 // Testing invalid UTF8
104 EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
105 EXPECT_EQ(output.compare(""), 0);
106 EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
107 EXPECT_EQ(output.compare(""), 0);
108 EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
109 EXPECT_EQ(output.compare(""), 0);
111 // Testing invalid UTF8 mixed with valid UTF8
112 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
113 EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
114 EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
115 EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
116 EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
117 10, &output));
118 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
119 EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
120 10, &output));
121 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
122 EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
123 EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
125 // Overlong sequences
126 EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
127 EXPECT_EQ(output.compare(""), 0);
128 EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
129 EXPECT_EQ(output.compare(""), 0);
130 EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
131 EXPECT_EQ(output.compare(""), 0);
132 EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
133 EXPECT_EQ(output.compare(""), 0);
134 EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
135 EXPECT_EQ(output.compare(""), 0);
136 EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
137 EXPECT_EQ(output.compare(""), 0);
138 EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
139 EXPECT_EQ(output.compare(""), 0);
140 EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
141 EXPECT_EQ(output.compare(""), 0);
142 EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
143 EXPECT_EQ(output.compare(""), 0);
144 EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
145 EXPECT_EQ(output.compare(""), 0);
146 EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
147 EXPECT_EQ(output.compare(""), 0);
149 // Beyond U+10FFFF (the upper limit of Unicode codespace)
150 EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
151 EXPECT_EQ(output.compare(""), 0);
152 EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
153 EXPECT_EQ(output.compare(""), 0);
154 EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
155 EXPECT_EQ(output.compare(""), 0);
157 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
158 EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
159 EXPECT_EQ(output.compare(""), 0);
160 EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
161 EXPECT_EQ(output.compare(""), 0);
164 const char array[] = "\x00\x00\xfe\xff";
165 const std::string array_string(array, arraysize(array));
166 EXPECT_TRUE(Truncated(array_string, 4, &output));
167 EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
170 // Variants on the previous test
172 const char array[] = "\xff\xfe\x00\x00";
173 const std::string array_string(array, 4);
174 EXPECT_FALSE(Truncated(array_string, 4, &output));
175 EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
178 const char array[] = "\xff\x00\x00\xfe";
179 const std::string array_string(array, arraysize(array));
180 EXPECT_TRUE(Truncated(array_string, 4, &output));
181 EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
184 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
185 EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
186 EXPECT_EQ(output.compare(""), 0);
187 EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
188 EXPECT_EQ(output.compare(""), 0);
189 EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
190 EXPECT_EQ(output.compare(""), 0);
191 EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
192 EXPECT_EQ(output.compare(""), 0);
193 EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
194 EXPECT_EQ(output.compare(""), 0);
196 // Strings in legacy encodings that are valid in UTF-8, but
197 // are invalid as UTF-8 in real data.
198 EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
199 EXPECT_EQ(output.compare("caf"), 0);
200 EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
201 EXPECT_EQ(output.compare(""), 0);
202 EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
203 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
204 EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
205 &output));
206 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
208 // Testing using the same string as input and output.
209 EXPECT_FALSE(Truncated(output, 4, &output));
210 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
211 EXPECT_TRUE(Truncated(output, 3, &output));
212 EXPECT_EQ(output.compare("\xa7\x41"), 0);
214 // "abc" with U+201[CD] in windows-125[0-8]
215 EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
216 EXPECT_EQ(output.compare("\x93" "abc"), 0);
218 // U+0639 U+064E U+0644 U+064E in ISO-8859-6
219 EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
220 EXPECT_EQ(output.compare(""), 0);
222 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
223 EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
224 EXPECT_EQ(output.compare(""), 0);
227 TEST(StringUtilTest, TrimWhitespace) {
228 std::wstring output; // Allow contents to carry over to next testcase
229 for (size_t i = 0; i < arraysize(trim_cases); ++i) {
230 const trim_case& value = trim_cases[i];
231 EXPECT_EQ(value.return_value,
232 TrimWhitespace(value.input, value.positions, &output));
233 EXPECT_EQ(value.output, output);
236 // Test that TrimWhitespace() can take the same string for input and output
237 output = L" This is a test \r\n";
238 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
239 EXPECT_EQ(L"This is a test", output);
241 // Once more, but with a string of whitespace
242 output = L" \r\n";
243 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
244 EXPECT_EQ(L"", output);
246 std::string output_ascii;
247 for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
248 const trim_case_ascii& value = trim_cases_ascii[i];
249 EXPECT_EQ(value.return_value,
250 TrimWhitespace(value.input, value.positions, &output_ascii));
251 EXPECT_EQ(value.output, output_ascii);
255 static const struct collapse_case {
256 const wchar_t* input;
257 const bool trim;
258 const wchar_t* output;
259 } collapse_cases[] = {
260 {L" Google Video ", false, L"Google Video"},
261 {L"Google Video", false, L"Google Video"},
262 {L"", false, L""},
263 {L" ", false, L""},
264 {L"\t\rTest String\n", false, L"Test String"},
265 {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
266 {L" Test \n \t String ", false, L"Test String"},
267 {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
268 {L" Test String", false, L"Test String"},
269 {L"Test String ", false, L"Test String"},
270 {L"Test String", false, L"Test String"},
271 {L"", true, L""},
272 {L"\n", true, L""},
273 {L" \r ", true, L""},
274 {L"\nFoo", true, L"Foo"},
275 {L"\r Foo ", true, L"Foo"},
276 {L" Foo bar ", true, L"Foo bar"},
277 {L" \tFoo bar \n", true, L"Foo bar"},
278 {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
281 TEST(StringUtilTest, CollapseWhitespace) {
282 for (size_t i = 0; i < arraysize(collapse_cases); ++i) {
283 const collapse_case& value = collapse_cases[i];
284 EXPECT_EQ(value.output, CollapseWhitespace(value.input, value.trim));
288 static const struct collapse_case_ascii {
289 const char* input;
290 const bool trim;
291 const char* output;
292 } collapse_cases_ascii[] = {
293 {" Google Video ", false, "Google Video"},
294 {"Google Video", false, "Google Video"},
295 {"", false, ""},
296 {" ", false, ""},
297 {"\t\rTest String\n", false, "Test String"},
298 {" Test \n \t String ", false, "Test String"},
299 {" Test String", false, "Test String"},
300 {"Test String ", false, "Test String"},
301 {"Test String", false, "Test String"},
302 {"", true, ""},
303 {"\n", true, ""},
304 {" \r ", true, ""},
305 {"\nFoo", true, "Foo"},
306 {"\r Foo ", true, "Foo"},
307 {" Foo bar ", true, "Foo bar"},
308 {" \tFoo bar \n", true, "Foo bar"},
309 {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
312 TEST(StringUtilTest, CollapseWhitespaceASCII) {
313 for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) {
314 const collapse_case_ascii& value = collapse_cases_ascii[i];
315 EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
319 TEST(StringUtilTest, ContainsOnlyWhitespaceASCII) {
320 EXPECT_TRUE(ContainsOnlyWhitespaceASCII(""));
321 EXPECT_TRUE(ContainsOnlyWhitespaceASCII(" "));
322 EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t"));
323 EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t \r \n "));
324 EXPECT_FALSE(ContainsOnlyWhitespaceASCII("a"));
325 EXPECT_FALSE(ContainsOnlyWhitespaceASCII("\thello\r \n "));
328 TEST(StringUtilTest, ContainsOnlyWhitespace) {
329 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("")));
330 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16(" ")));
331 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t")));
332 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t \r \n ")));
333 EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("a")));
334 EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("\thello\r \n ")));
337 TEST(StringUtilTest, IsStringUTF8) {
338 EXPECT_TRUE(IsStringUTF8("abc"));
339 EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
340 EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
341 EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
342 EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
343 EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc")); // UTF-8 BOM
345 // surrogate code points
346 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
347 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
348 EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
350 // overlong sequences
351 EXPECT_FALSE(IsStringUTF8("\xc0\x80")); // U+0000
352 EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81")); // "AB"
353 EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80")); // U+0000
354 EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80")); // U+0080
355 EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf")); // U+07ff
356 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D")); // U+000D
357 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91")); // U+0091
358 EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80")); // U+0800
359 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf")); // U+FEFF (BOM)
360 EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf")); // U+003F
361 EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5")); // U+00A5
363 // Beyond U+10FFFF (the upper limit of Unicode codespace)
364 EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80")); // U+110000
365 EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf")); // 5 bytes
366 EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80")); // 6 bytes
368 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
369 EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
370 EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
371 EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
372 EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
374 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
375 EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe")); // U+FFFE)
376 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe")); // U+1FFFE
377 EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf")); // U+10FFFF
378 EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90")); // U+FDD0
379 EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf")); // U+FDEF
380 // Strings in legacy encodings. We can certainly make up strings
381 // in a legacy encoding that are valid in UTF-8, but in real data,
382 // most of them are invalid as UTF-8.
383 EXPECT_FALSE(IsStringUTF8("caf\xe9")); // cafe with U+00E9 in ISO-8859-1
384 EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2")); // U+AC00, U+AC001 in EUC-KR
385 EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e")); // U+4F60 U+597D in Big5
386 // "abc" with U+201[CD] in windows-125[0-8]
387 EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
388 // U+0639 U+064E U+0644 U+064E in ISO-8859-6
389 EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
390 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
391 EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
393 // Check that we support Embedded Nulls. The first uses the canonical UTF-8
394 // representation, and the second uses a 2-byte sequence. The second version
395 // is invalid UTF-8 since UTF-8 states that the shortest encoding for a
396 // given codepoint must be used.
397 static const char kEmbeddedNull[] = "embedded\0null";
398 EXPECT_TRUE(IsStringUTF8(
399 std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));
400 EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
403 TEST(StringUtilTest, ConvertASCII) {
404 static const char* char_cases[] = {
405 "Google Video",
406 "Hello, world\n",
407 "0123ABCDwxyz \a\b\t\r\n!+,.~"
410 static const wchar_t* const wchar_cases[] = {
411 L"Google Video",
412 L"Hello, world\n",
413 L"0123ABCDwxyz \a\b\t\r\n!+,.~"
416 for (size_t i = 0; i < arraysize(char_cases); ++i) {
417 EXPECT_TRUE(IsStringASCII(char_cases[i]));
418 std::wstring wide = ASCIIToWide(char_cases[i]);
419 EXPECT_EQ(wchar_cases[i], wide);
421 EXPECT_TRUE(IsStringASCII(wchar_cases[i]));
422 std::string ascii = WideToASCII(wchar_cases[i]);
423 EXPECT_EQ(char_cases[i], ascii);
426 EXPECT_FALSE(IsStringASCII("Google \x80Video"));
427 EXPECT_FALSE(IsStringASCII(L"Google \x80Video"));
429 // Convert empty strings.
430 std::wstring wempty;
431 std::string empty;
432 EXPECT_EQ(empty, WideToASCII(wempty));
433 EXPECT_EQ(wempty, ASCIIToWide(empty));
435 // Convert strings with an embedded NUL character.
436 const char chars_with_nul[] = "test\0string";
437 const int length_with_nul = arraysize(chars_with_nul) - 1;
438 std::string string_with_nul(chars_with_nul, length_with_nul);
439 std::wstring wide_with_nul = ASCIIToWide(string_with_nul);
440 EXPECT_EQ(static_cast<std::wstring::size_type>(length_with_nul),
441 wide_with_nul.length());
442 std::string narrow_with_nul = WideToASCII(wide_with_nul);
443 EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
444 narrow_with_nul.length());
445 EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
448 TEST(StringUtilTest, ToUpperASCII) {
449 EXPECT_EQ('C', ToUpperASCII('C'));
450 EXPECT_EQ('C', ToUpperASCII('c'));
451 EXPECT_EQ('2', ToUpperASCII('2'));
453 EXPECT_EQ(L'C', ToUpperASCII(L'C'));
454 EXPECT_EQ(L'C', ToUpperASCII(L'c'));
455 EXPECT_EQ(L'2', ToUpperASCII(L'2'));
457 std::string in_place_a("Cc2");
458 StringToUpperASCII(&in_place_a);
459 EXPECT_EQ("CC2", in_place_a);
461 std::wstring in_place_w(L"Cc2");
462 StringToUpperASCII(&in_place_w);
463 EXPECT_EQ(L"CC2", in_place_w);
465 std::string original_a("Cc2");
466 std::string upper_a = StringToUpperASCII(original_a);
467 EXPECT_EQ("CC2", upper_a);
469 std::wstring original_w(L"Cc2");
470 std::wstring upper_w = StringToUpperASCII(original_w);
471 EXPECT_EQ(L"CC2", upper_w);
474 static const struct {
475 const wchar_t* src_w;
476 const char* src_a;
477 const char* dst;
478 } lowercase_cases[] = {
479 {L"FoO", "FoO", "foo"},
480 {L"foo", "foo", "foo"},
481 {L"FOO", "FOO", "foo"},
484 TEST(StringUtilTest, LowerCaseEqualsASCII) {
485 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(lowercase_cases); ++i) {
486 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_w,
487 lowercase_cases[i].dst));
488 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,
489 lowercase_cases[i].dst));
493 TEST(StringUtilTest, GetByteDisplayUnits) {
494 static const struct {
495 int64 bytes;
496 DataUnits expected;
497 } cases[] = {
498 {0, DATA_UNITS_BYTE},
499 {512, DATA_UNITS_BYTE},
500 {10*1024, DATA_UNITS_KIBIBYTE},
501 {10*1024*1024, DATA_UNITS_MEBIBYTE},
502 {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE},
503 {~(1LL<<63), DATA_UNITS_GIBIBYTE},
504 #ifdef NDEBUG
505 {-1, DATA_UNITS_BYTE},
506 #endif
509 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
510 EXPECT_EQ(cases[i].expected, GetByteDisplayUnits(cases[i].bytes));
513 TEST(StringUtilTest, FormatBytes) {
514 static const struct {
515 int64 bytes;
516 DataUnits units;
517 const char* expected;
518 const char* expected_with_units;
519 } cases[] = {
520 // Expected behavior: we show one post-decimal digit when we have
521 // under two pre-decimal digits, except in cases where it makes no
522 // sense (zero or bytes).
523 // Since we switch units once we cross the 1000 mark, this keeps
524 // the display of file sizes or bytes consistently around three
525 // digits.
526 {0, DATA_UNITS_BYTE, "0", "0 B"},
527 {512, DATA_UNITS_BYTE, "512", "512 B"},
528 {512, DATA_UNITS_KIBIBYTE, "0.5", "0.5 kB"},
529 {1024*1024, DATA_UNITS_KIBIBYTE, "1024", "1024 kB"},
530 {1024*1024, DATA_UNITS_MEBIBYTE, "1.0", "1.0 MB"},
531 {1024*1024*1024, DATA_UNITS_GIBIBYTE, "1.0", "1.0 GB"},
532 {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "10.0", "10.0 GB"},
533 {99LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "99.0", "99.0 GB"},
534 {105LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "105", "105 GB"},
535 {105LL*1024*1024*1024 + 500LL*1024*1024, DATA_UNITS_GIBIBYTE,
536 "105", "105 GB"},
537 {~(1LL<<63), DATA_UNITS_GIBIBYTE, "8589934592", "8589934592 GB"},
539 {99*1024 + 103, DATA_UNITS_KIBIBYTE, "99.1", "99.1 kB"},
540 {1024*1024 + 103, DATA_UNITS_KIBIBYTE, "1024", "1024 kB"},
541 {1024*1024 + 205 * 1024, DATA_UNITS_MEBIBYTE, "1.2", "1.2 MB"},
542 {1024*1024*1024 + (927 * 1024*1024), DATA_UNITS_GIBIBYTE,
543 "1.9", "1.9 GB"},
544 {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "10.0", "10.0 GB"},
545 {100LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "100", "100 GB"},
546 #ifdef NDEBUG
547 {-1, DATA_UNITS_BYTE, "", ""},
548 #endif
551 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
552 EXPECT_EQ(ASCIIToUTF16(cases[i].expected),
553 FormatBytes(cases[i].bytes, cases[i].units, false));
554 EXPECT_EQ(ASCIIToUTF16(cases[i].expected_with_units),
555 FormatBytes(cases[i].bytes, cases[i].units, true));
559 TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
560 static const struct {
561 const char* str;
562 string16::size_type start_offset;
563 const char* find_this;
564 const char* replace_with;
565 const char* expected;
566 } cases[] = {
567 {"aaa", 0, "a", "b", "bbb"},
568 {"abb", 0, "ab", "a", "ab"},
569 {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
570 {"Not found", 0, "x", "0", "Not found"},
571 {"Not found again", 5, "x", "0", "Not found again"},
572 {" Making it much longer ", 0, " ", "Four score and seven years ago",
573 "Four score and seven years agoMakingFour score and seven years agoit"
574 "Four score and seven years agomuchFour score and seven years agolonger"
575 "Four score and seven years ago"},
576 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
577 {"Replace me only me once", 9, "me ", "", "Replace me only once"},
578 {"abababab", 2, "ab", "c", "abccc"},
581 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
582 string16 str = ASCIIToUTF16(cases[i].str);
583 ReplaceSubstringsAfterOffset(&str, cases[i].start_offset,
584 ASCIIToUTF16(cases[i].find_this),
585 ASCIIToUTF16(cases[i].replace_with));
586 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
590 TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
591 static const struct {
592 const char* str;
593 string16::size_type start_offset;
594 const char* find_this;
595 const char* replace_with;
596 const char* expected;
597 } cases[] = {
598 {"aaa", 0, "a", "b", "baa"},
599 {"abb", 0, "ab", "a", "ab"},
600 {"Removing some substrings inging", 0, "ing", "",
601 "Remov some substrings inging"},
602 {"Not found", 0, "x", "0", "Not found"},
603 {"Not found again", 5, "x", "0", "Not found again"},
604 {" Making it much longer ", 0, " ", "Four score and seven years ago",
605 "Four score and seven years agoMaking it much longer "},
606 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
607 {"Replace me only me once", 4, "me ", "", "Replace only me once"},
608 {"abababab", 2, "ab", "c", "abcabab"},
611 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
612 string16 str = ASCIIToUTF16(cases[i].str);
613 ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset,
614 ASCIIToUTF16(cases[i].find_this),
615 ASCIIToUTF16(cases[i].replace_with));
616 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
620 TEST(StringUtilTest, HexDigitToInt) {
621 EXPECT_EQ(0, HexDigitToInt('0'));
622 EXPECT_EQ(1, HexDigitToInt('1'));
623 EXPECT_EQ(2, HexDigitToInt('2'));
624 EXPECT_EQ(3, HexDigitToInt('3'));
625 EXPECT_EQ(4, HexDigitToInt('4'));
626 EXPECT_EQ(5, HexDigitToInt('5'));
627 EXPECT_EQ(6, HexDigitToInt('6'));
628 EXPECT_EQ(7, HexDigitToInt('7'));
629 EXPECT_EQ(8, HexDigitToInt('8'));
630 EXPECT_EQ(9, HexDigitToInt('9'));
631 EXPECT_EQ(10, HexDigitToInt('A'));
632 EXPECT_EQ(11, HexDigitToInt('B'));
633 EXPECT_EQ(12, HexDigitToInt('C'));
634 EXPECT_EQ(13, HexDigitToInt('D'));
635 EXPECT_EQ(14, HexDigitToInt('E'));
636 EXPECT_EQ(15, HexDigitToInt('F'));
638 // Verify the lower case as well.
639 EXPECT_EQ(10, HexDigitToInt('a'));
640 EXPECT_EQ(11, HexDigitToInt('b'));
641 EXPECT_EQ(12, HexDigitToInt('c'));
642 EXPECT_EQ(13, HexDigitToInt('d'));
643 EXPECT_EQ(14, HexDigitToInt('e'));
644 EXPECT_EQ(15, HexDigitToInt('f'));
647 // This checks where we can use the assignment operator for a va_list. We need
648 // a way to do this since Visual C doesn't support va_copy, but assignment on
649 // va_list is not guaranteed to be a copy. See StringAppendVT which uses this
650 // capability.
651 static void VariableArgsFunc(const char* format, ...) {
652 va_list org;
653 va_start(org, format);
655 va_list dup;
656 GG_VA_COPY(dup, org);
657 int i1 = va_arg(org, int);
658 int j1 = va_arg(org, int);
659 char* s1 = va_arg(org, char*);
660 double d1 = va_arg(org, double);
661 va_end(org);
663 int i2 = va_arg(dup, int);
664 int j2 = va_arg(dup, int);
665 char* s2 = va_arg(dup, char*);
666 double d2 = va_arg(dup, double);
668 EXPECT_EQ(i1, i2);
669 EXPECT_EQ(j1, j2);
670 EXPECT_STREQ(s1, s2);
671 EXPECT_EQ(d1, d2);
673 va_end(dup);
676 TEST(StringUtilTest, VAList) {
677 VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21);
680 // Test for Tokenize
681 template <typename STR>
682 void TokenizeTest() {
683 std::vector<STR> r;
684 size_t size;
686 size = Tokenize(STR("This is a string"), STR(" "), &r);
687 EXPECT_EQ(4U, size);
688 ASSERT_EQ(4U, r.size());
689 EXPECT_EQ(r[0], STR("This"));
690 EXPECT_EQ(r[1], STR("is"));
691 EXPECT_EQ(r[2], STR("a"));
692 EXPECT_EQ(r[3], STR("string"));
693 r.clear();
695 size = Tokenize(STR("one,two,three"), STR(","), &r);
696 EXPECT_EQ(3U, size);
697 ASSERT_EQ(3U, r.size());
698 EXPECT_EQ(r[0], STR("one"));
699 EXPECT_EQ(r[1], STR("two"));
700 EXPECT_EQ(r[2], STR("three"));
701 r.clear();
703 size = Tokenize(STR("one,two:three;four"), STR(",:"), &r);
704 EXPECT_EQ(3U, size);
705 ASSERT_EQ(3U, r.size());
706 EXPECT_EQ(r[0], STR("one"));
707 EXPECT_EQ(r[1], STR("two"));
708 EXPECT_EQ(r[2], STR("three;four"));
709 r.clear();
711 size = Tokenize(STR("one,two:three;four"), STR(";,:"), &r);
712 EXPECT_EQ(4U, size);
713 ASSERT_EQ(4U, r.size());
714 EXPECT_EQ(r[0], STR("one"));
715 EXPECT_EQ(r[1], STR("two"));
716 EXPECT_EQ(r[2], STR("three"));
717 EXPECT_EQ(r[3], STR("four"));
718 r.clear();
720 size = Tokenize(STR("one, two, three"), STR(","), &r);
721 EXPECT_EQ(3U, size);
722 ASSERT_EQ(3U, r.size());
723 EXPECT_EQ(r[0], STR("one"));
724 EXPECT_EQ(r[1], STR(" two"));
725 EXPECT_EQ(r[2], STR(" three"));
726 r.clear();
728 size = Tokenize(STR("one, two, three, "), STR(","), &r);
729 EXPECT_EQ(4U, size);
730 ASSERT_EQ(4U, r.size());
731 EXPECT_EQ(r[0], STR("one"));
732 EXPECT_EQ(r[1], STR(" two"));
733 EXPECT_EQ(r[2], STR(" three"));
734 EXPECT_EQ(r[3], STR(" "));
735 r.clear();
737 size = Tokenize(STR("one, two, three,"), STR(","), &r);
738 EXPECT_EQ(3U, size);
739 ASSERT_EQ(3U, r.size());
740 EXPECT_EQ(r[0], STR("one"));
741 EXPECT_EQ(r[1], STR(" two"));
742 EXPECT_EQ(r[2], STR(" three"));
743 r.clear();
745 size = Tokenize(STR(""), STR(","), &r);
746 EXPECT_EQ(0U, size);
747 ASSERT_EQ(0U, r.size());
748 r.clear();
750 size = Tokenize(STR(","), STR(","), &r);
751 EXPECT_EQ(0U, size);
752 ASSERT_EQ(0U, r.size());
753 r.clear();
755 size = Tokenize(STR(",;:."), STR(".:;,"), &r);
756 EXPECT_EQ(0U, size);
757 ASSERT_EQ(0U, r.size());
758 r.clear();
760 size = Tokenize(STR("\t\ta\t"), STR("\t"), &r);
761 EXPECT_EQ(1U, size);
762 ASSERT_EQ(1U, r.size());
763 EXPECT_EQ(r[0], STR("a"));
764 r.clear();
766 size = Tokenize(STR("\ta\t\nb\tcc"), STR("\n"), &r);
767 EXPECT_EQ(2U, size);
768 ASSERT_EQ(2U, r.size());
769 EXPECT_EQ(r[0], STR("\ta\t"));
770 EXPECT_EQ(r[1], STR("b\tcc"));
771 r.clear();
774 TEST(StringUtilTest, TokenizeStdString) {
775 TokenizeTest<std::string>();
778 TEST(StringUtilTest, TokenizeStringPiece) {
779 TokenizeTest<base::StringPiece>();
782 // Test for JoinString
783 TEST(StringUtilTest, JoinString) {
784 std::vector<std::string> in;
785 EXPECT_EQ("", JoinString(in, ','));
787 in.push_back("a");
788 EXPECT_EQ("a", JoinString(in, ','));
790 in.push_back("b");
791 in.push_back("c");
792 EXPECT_EQ("a,b,c", JoinString(in, ','));
794 in.push_back("");
795 EXPECT_EQ("a,b,c,", JoinString(in, ','));
796 in.push_back(" ");
797 EXPECT_EQ("a|b|c|| ", JoinString(in, '|'));
800 TEST(StringUtilTest, StartsWith) {
801 EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true));
802 EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true));
803 EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false));
804 EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false));
805 EXPECT_FALSE(StartsWithASCII("java", "javascript", true));
806 EXPECT_FALSE(StartsWithASCII("java", "javascript", false));
807 EXPECT_FALSE(StartsWithASCII("", "javascript", false));
808 EXPECT_FALSE(StartsWithASCII("", "javascript", true));
809 EXPECT_TRUE(StartsWithASCII("java", "", false));
810 EXPECT_TRUE(StartsWithASCII("java", "", true));
812 EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", true));
813 EXPECT_FALSE(StartsWith(L"JavaScript:url", L"javascript", true));
814 EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", false));
815 EXPECT_TRUE(StartsWith(L"JavaScript:url", L"javascript", false));
816 EXPECT_FALSE(StartsWith(L"java", L"javascript", true));
817 EXPECT_FALSE(StartsWith(L"java", L"javascript", false));
818 EXPECT_FALSE(StartsWith(L"", L"javascript", false));
819 EXPECT_FALSE(StartsWith(L"", L"javascript", true));
820 EXPECT_TRUE(StartsWith(L"java", L"", false));
821 EXPECT_TRUE(StartsWith(L"java", L"", true));
824 TEST(StringUtilTest, EndsWith) {
825 EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", true));
826 EXPECT_FALSE(EndsWith(L"Foo.Plugin", L".plugin", true));
827 EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", false));
828 EXPECT_TRUE(EndsWith(L"Foo.Plugin", L".plugin", false));
829 EXPECT_FALSE(EndsWith(L".plug", L".plugin", true));
830 EXPECT_FALSE(EndsWith(L".plug", L".plugin", false));
831 EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", true));
832 EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", false));
833 EXPECT_FALSE(EndsWith(L"", L".plugin", false));
834 EXPECT_FALSE(EndsWith(L"", L".plugin", true));
835 EXPECT_TRUE(EndsWith(L"Foo.plugin", L"", false));
836 EXPECT_TRUE(EndsWith(L"Foo.plugin", L"", true));
837 EXPECT_TRUE(EndsWith(L".plugin", L".plugin", false));
838 EXPECT_TRUE(EndsWith(L".plugin", L".plugin", true));
839 EXPECT_TRUE(EndsWith(L"", L"", false));
840 EXPECT_TRUE(EndsWith(L"", L"", true));
843 TEST(StringUtilTest, GetStringFWithOffsets) {
844 std::vector<string16> subst;
845 subst.push_back(ASCIIToUTF16("1"));
846 subst.push_back(ASCIIToUTF16("2"));
847 std::vector<size_t> offsets;
849 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
850 subst,
851 &offsets);
852 EXPECT_EQ(2U, offsets.size());
853 EXPECT_EQ(7U, offsets[0]);
854 EXPECT_EQ(25U, offsets[1]);
855 offsets.clear();
857 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
858 subst,
859 &offsets);
860 EXPECT_EQ(2U, offsets.size());
861 EXPECT_EQ(25U, offsets[0]);
862 EXPECT_EQ(7U, offsets[1]);
863 offsets.clear();
866 TEST(StringUtilTest, ReplaceStringPlaceholders) {
867 std::vector<string16> subst;
868 subst.push_back(ASCIIToUTF16("9a"));
869 subst.push_back(ASCIIToUTF16("8b"));
870 subst.push_back(ASCIIToUTF16("7c"));
871 subst.push_back(ASCIIToUTF16("6d"));
872 subst.push_back(ASCIIToUTF16("5e"));
873 subst.push_back(ASCIIToUTF16("4f"));
874 subst.push_back(ASCIIToUTF16("3g"));
875 subst.push_back(ASCIIToUTF16("2h"));
876 subst.push_back(ASCIIToUTF16("1i"));
878 string16 formatted =
879 ReplaceStringPlaceholders(
880 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL);
882 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"));
885 TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
886 // Test whether replacestringplaceholders works as expected when there
887 // are fewer inputs than outputs.
888 std::vector<string16> subst;
889 subst.push_back(ASCIIToUTF16("9a"));
890 subst.push_back(ASCIIToUTF16("8b"));
891 subst.push_back(ASCIIToUTF16("7c"));
893 string16 formatted =
894 ReplaceStringPlaceholders(
895 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL);
897 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"));
900 TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
901 std::vector<std::string> subst;
902 subst.push_back("9a");
903 subst.push_back("8b");
904 subst.push_back("7c");
905 subst.push_back("6d");
906 subst.push_back("5e");
907 subst.push_back("4f");
908 subst.push_back("3g");
909 subst.push_back("2h");
910 subst.push_back("1i");
912 std::string formatted =
913 ReplaceStringPlaceholders(
914 "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL);
916 EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii");
919 TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {
920 std::vector<std::string> subst;
921 subst.push_back("a");
922 subst.push_back("b");
923 subst.push_back("c");
924 EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, NULL),
925 "$1 $$2 $$$3");
928 TEST(StringUtilTest, MatchPatternTest) {
929 EXPECT_TRUE(MatchPattern("www.google.com", "*.com"));
930 EXPECT_TRUE(MatchPattern("www.google.com", "*"));
931 EXPECT_FALSE(MatchPattern("www.google.com", "www*.g*.org"));
932 EXPECT_TRUE(MatchPattern("Hello", "H?l?o"));
933 EXPECT_FALSE(MatchPattern("www.google.com", "http://*)"));
934 EXPECT_FALSE(MatchPattern("www.msn.com", "*.COM"));
935 EXPECT_TRUE(MatchPattern("Hello*1234", "He??o\\*1*"));
936 EXPECT_FALSE(MatchPattern("", "*.*"));
937 EXPECT_TRUE(MatchPattern("", "*"));
938 EXPECT_TRUE(MatchPattern("", "?"));
939 EXPECT_TRUE(MatchPattern("", ""));
940 EXPECT_FALSE(MatchPattern("Hello", ""));
941 EXPECT_TRUE(MatchPattern("Hello*", "Hello*"));
942 // Stop after a certain recursion depth.
943 EXPECT_FALSE(MatchPattern("123456789012345678", "?????????????????*"));
945 // Test UTF8 matching.
946 EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0", "*\xe2\x99\xa0"));
947 EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0.", "heart: ?."));
948 EXPECT_TRUE(MatchPattern("hearts: \xe2\x99\xa0\xe2\x99\xa0", "*"));
949 // Invalid sequences should be handled as a single invalid character.
950 EXPECT_TRUE(MatchPattern("invalid: \xef\xbf\xbe", "invalid: ?"));
951 // If the pattern has invalid characters, it shouldn't match anything.
952 EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80"));
954 // Test UTF16 character matching.
955 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"),
956 UTF8ToUTF16("*.com")));
957 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"),
958 UTF8ToUTF16("He??o\\*1*")));
960 // This test verifies that consecutive wild cards are collapsed into 1
961 // wildcard (when this doesn't occur, MatchPattern reaches it's maximum
962 // recursion depth).
963 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello"),
964 UTF8ToUTF16("He********************************o")));
967 TEST(StringUtilTest, LcpyTest) {
968 // Test the normal case where we fit in our buffer.
970 char dst[10];
971 wchar_t wdst[10];
972 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
973 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
974 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
975 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
978 // Test dst_size == 0, nothing should be written to |dst| and we should
979 // have the equivalent of strlen(src).
981 char dst[2] = {1, 2};
982 wchar_t wdst[2] = {1, 2};
983 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", 0));
984 EXPECT_EQ(1, dst[0]);
985 EXPECT_EQ(2, dst[1]);
986 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", 0));
987 #if defined(WCHAR_T_IS_UNSIGNED)
988 EXPECT_EQ(1U, wdst[0]);
989 EXPECT_EQ(2U, wdst[1]);
990 #else
991 EXPECT_EQ(1, wdst[0]);
992 EXPECT_EQ(2, wdst[1]);
993 #endif
996 // Test the case were we _just_ competely fit including the null.
998 char dst[8];
999 wchar_t wdst[8];
1000 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1001 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1002 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1003 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1006 // Test the case were we we are one smaller, so we can't fit the null.
1008 char dst[7];
1009 wchar_t wdst[7];
1010 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1011 EXPECT_EQ(0, memcmp(dst, "abcdef", 7));
1012 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1013 EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));
1016 // Test the case were we are just too small.
1018 char dst[3];
1019 wchar_t wdst[3];
1020 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1021 EXPECT_EQ(0, memcmp(dst, "ab", 3));
1022 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1023 EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));
1027 TEST(StringUtilTest, WprintfFormatPortabilityTest) {
1028 struct TestData {
1029 const wchar_t* input;
1030 bool portable;
1031 } cases[] = {
1032 { L"%ls", true },
1033 { L"%s", false },
1034 { L"%S", false },
1035 { L"%lS", false },
1036 { L"Hello, %s", false },
1037 { L"%lc", true },
1038 { L"%c", false },
1039 { L"%C", false },
1040 { L"%lC", false },
1041 { L"%ls %s", false },
1042 { L"%s %ls", false },
1043 { L"%s %ls %s", false },
1044 { L"%f", true },
1045 { L"%f %F", false },
1046 { L"%d %D", false },
1047 { L"%o %O", false },
1048 { L"%u %U", false },
1049 { L"%f %d %o %u", true },
1050 { L"%-8d (%02.1f%)", true },
1051 { L"% 10s", false },
1052 { L"% 10ls", true }
1054 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
1055 EXPECT_EQ(cases[i].portable, base::IsWprintfFormatPortable(cases[i].input));
1059 TEST(StringUtilTest, RemoveChars) {
1060 const char* kRemoveChars = "-/+*";
1061 std::string input = "A-+bc/d!*";
1062 EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
1063 EXPECT_EQ("Abcd!", input);
1065 // No characters match kRemoveChars.
1066 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1067 EXPECT_EQ("Abcd!", input);
1069 // Empty string.
1070 input.clear();
1071 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1072 EXPECT_EQ(std::string(), input);
1075 TEST(StringUtilTest, ContainsOnlyChars) {
1076 // Providing an empty list of characters should return false but for the empty
1077 // string.
1078 EXPECT_TRUE(ContainsOnlyChars("", ""));
1079 EXPECT_FALSE(ContainsOnlyChars("Hello", ""));
1081 EXPECT_TRUE(ContainsOnlyChars("", "1234"));
1082 EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
1083 EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
1084 EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
1085 EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
1088 } // namespace base