1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
7 #include "base/i18n/rtl.h"
8 #include "base/i18n/string_search.h"
9 #include "base/strings/string16.h"
10 #include "base/strings/utf_string_conversions.h"
11 #include "testing/gtest/include/gtest/gtest.h"
12 #include "third_party/icu/source/i18n/unicode/usearch.h"
17 // Note on setting default locale for testing: The current default locale on
18 // the Mac trybot is en_US_POSIX, with which primary-level collation strength
19 // string search is case-sensitive, when normally it should be
20 // case-insensitive. In other locales (including en_US which English speakers
21 // in the U.S. use), this search would be case-insensitive as expected.
23 TEST(StringSearchTest
, ASCII
) {
24 std::string
default_locale(uloc_getDefault());
25 bool locale_is_posix
= (default_locale
== "en_US_POSIX");
27 SetICUDefaultLocale("en_US");
32 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
33 ASCIIToUTF16("hello"), ASCIIToUTF16("hello world"), &index
, &length
));
35 EXPECT_EQ(5U, length
);
37 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
38 ASCIIToUTF16("h e l l o"), ASCIIToUTF16("h e l l o"),
41 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
42 ASCIIToUTF16("aabaaa"), ASCIIToUTF16("aaabaabaaa"), &index
, &length
));
44 EXPECT_EQ(6U, length
);
46 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
47 ASCIIToUTF16("searching within empty string"), string16(),
50 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
51 string16(), ASCIIToUTF16("searching for empty string"), &index
, &length
));
53 EXPECT_EQ(0U, length
);
55 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
56 ASCIIToUTF16("case insensitivity"), ASCIIToUTF16("CaSe InSeNsItIvItY"),
59 EXPECT_EQ(18U, length
);
62 SetICUDefaultLocale(default_locale
.data());
65 TEST(StringSearchTest
, UnicodeLocaleIndependent
) {
67 const string16 e_base
= WideToUTF16(L
"e");
68 const string16 E_base
= WideToUTF16(L
"E");
69 const string16 a_base
= WideToUTF16(L
"a");
71 // Composed characters
72 const string16 e_with_acute_accent
= WideToUTF16(L
"\u00e9");
73 const string16 E_with_acute_accent
= WideToUTF16(L
"\u00c9");
74 const string16 e_with_grave_accent
= WideToUTF16(L
"\u00e8");
75 const string16 E_with_grave_accent
= WideToUTF16(L
"\u00c8");
76 const string16 a_with_acute_accent
= WideToUTF16(L
"\u00e1");
78 // Decomposed characters
79 const string16 e_with_acute_combining_mark
= WideToUTF16(L
"e\u0301");
80 const string16 E_with_acute_combining_mark
= WideToUTF16(L
"E\u0301");
81 const string16 e_with_grave_combining_mark
= WideToUTF16(L
"e\u0300");
82 const string16 E_with_grave_combining_mark
= WideToUTF16(L
"E\u0300");
83 const string16 a_with_acute_combining_mark
= WideToUTF16(L
"a\u0301");
85 std::string
default_locale(uloc_getDefault());
86 bool locale_is_posix
= (default_locale
== "en_US_POSIX");
88 SetICUDefaultLocale("en_US");
93 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
94 e_base
, e_with_acute_accent
, &index
, &length
));
96 EXPECT_EQ(e_with_acute_accent
.size(), length
);
98 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
99 e_with_acute_accent
, e_base
, &index
, &length
));
100 EXPECT_EQ(0U, index
);
101 EXPECT_EQ(e_base
.size(), length
);
103 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
104 e_base
, e_with_acute_combining_mark
, &index
, &length
));
105 EXPECT_EQ(0U, index
);
106 EXPECT_EQ(e_with_acute_combining_mark
.size(), length
);
108 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
109 e_with_acute_combining_mark
, e_base
, &index
, &length
));
110 EXPECT_EQ(0U, index
);
111 EXPECT_EQ(e_base
.size(), length
);
113 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
114 e_with_acute_combining_mark
, e_with_acute_accent
,
116 EXPECT_EQ(0U, index
);
117 EXPECT_EQ(e_with_acute_accent
.size(), length
);
119 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
120 e_with_acute_accent
, e_with_acute_combining_mark
,
122 EXPECT_EQ(0U, index
);
123 EXPECT_EQ(e_with_acute_combining_mark
.size(), length
);
125 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
126 e_with_acute_combining_mark
, e_with_grave_combining_mark
,
128 EXPECT_EQ(0U, index
);
129 EXPECT_EQ(e_with_grave_combining_mark
.size(), length
);
131 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
132 e_with_grave_combining_mark
, e_with_acute_combining_mark
,
134 EXPECT_EQ(0U, index
);
135 EXPECT_EQ(e_with_acute_combining_mark
.size(), length
);
137 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
138 e_with_acute_combining_mark
, e_with_grave_accent
, &index
, &length
));
139 EXPECT_EQ(0U, index
);
140 EXPECT_EQ(e_with_grave_accent
.size(), length
);
142 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
143 e_with_grave_accent
, e_with_acute_combining_mark
, &index
, &length
));
144 EXPECT_EQ(0U, index
);
145 EXPECT_EQ(e_with_acute_combining_mark
.size(), length
);
147 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
148 E_with_acute_accent
, e_with_acute_accent
, &index
, &length
));
149 EXPECT_EQ(0U, index
);
150 EXPECT_EQ(e_with_acute_accent
.size(), length
);
152 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
153 E_with_grave_accent
, e_with_acute_accent
, &index
, &length
));
154 EXPECT_EQ(0U, index
);
155 EXPECT_EQ(e_with_acute_accent
.size(), length
);
157 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
158 E_with_acute_combining_mark
, e_with_grave_accent
, &index
, &length
));
159 EXPECT_EQ(0U, index
);
160 EXPECT_EQ(e_with_grave_accent
.size(), length
);
162 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
163 E_with_grave_combining_mark
, e_with_acute_accent
, &index
, &length
));
164 EXPECT_EQ(0U, index
);
165 EXPECT_EQ(e_with_acute_accent
.size(), length
);
167 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
168 E_base
, e_with_grave_accent
, &index
, &length
));
169 EXPECT_EQ(0U, index
);
170 EXPECT_EQ(e_with_grave_accent
.size(), length
);
172 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
173 a_with_acute_accent
, e_with_acute_accent
, &index
, &length
));
175 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
176 a_with_acute_combining_mark
, e_with_acute_combining_mark
,
180 SetICUDefaultLocale(default_locale
.data());
183 TEST(StringSearchTest
, UnicodeLocaleDependent
) {
185 const string16 a_base
= WideToUTF16(L
"a");
187 // Composed characters
188 const string16 a_with_ring
= WideToUTF16(L
"\u00e5");
190 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
191 a_base
, a_with_ring
, NULL
, NULL
));
193 const char* default_locale
= uloc_getDefault();
194 SetICUDefaultLocale("da");
196 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
197 a_base
, a_with_ring
, NULL
, NULL
));
199 SetICUDefaultLocale(default_locale
);
202 TEST(StringSearchTest
, FixedPatternMultipleSearch
) {
203 std::string
default_locale(uloc_getDefault());
204 bool locale_is_posix
= (default_locale
== "en_US_POSIX");
206 SetICUDefaultLocale("en_US");
211 // Search "hello" over multiple texts.
212 FixedPatternStringSearchIgnoringCaseAndAccents
query(ASCIIToUTF16("hello"));
213 EXPECT_TRUE(query
.Search(ASCIIToUTF16("12hello34"), &index
, &length
));
214 EXPECT_EQ(2U, index
);
215 EXPECT_EQ(5U, length
);
216 EXPECT_FALSE(query
.Search(ASCIIToUTF16("bye"), &index
, &length
));
217 EXPECT_TRUE(query
.Search(ASCIIToUTF16("hELLo"), &index
, &length
));
218 EXPECT_EQ(0U, index
);
219 EXPECT_EQ(5U, length
);
222 SetICUDefaultLocale(default_locale
.data());