Another take on menu's. This uses the hosting menu scroll view container as a menuba...
[chromium-blink-merge.git] / app / l10n_util.cc
blob1ec4b1997a2ea2a7ad0b1f0cd17f1557797c436a
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "app/l10n_util.h"
7 #include <cstdlib>
9 #include "app/app_paths.h"
10 #include "app/l10n_util_collator.h"
11 #include "app/resource_bundle.h"
12 #include "base/command_line.h"
13 #include "base/file_util.h"
14 #include "base/i18n/file_util_icu.h"
15 #include "base/i18n/rtl.h"
16 #include "base/path_service.h"
17 #include "base/scoped_ptr.h"
18 #include "base/string16.h"
19 #include "base/string_number_conversions.h"
20 #include "base/sys_string_conversions.h"
21 #include "base/utf_string_conversions.h"
22 #include "build/build_config.h"
23 #include "gfx/canvas.h"
24 #include "unicode/rbbi.h"
26 #if defined(OS_MACOSX)
27 #include "app/l10n_util_mac.h"
28 #endif
30 // TODO(playmobil): remove this undef once SkPostConfig.h is fixed.
31 // skia/include/corecg/SkPostConfig.h #defines strcasecmp() so we can't use
32 // base::strcasecmp() without #undefing it here.
33 #undef strcasecmp
35 namespace {
37 #if defined(OS_WIN)
38 static const FilePath::CharType kLocaleFileExtension[] = L".dll";
39 #elif defined(OS_POSIX)
40 static const FilePath::CharType kLocaleFileExtension[] = ".pak";
41 #endif
43 // Added to the end of strings that are too big in TrucateString.
44 static const wchar_t* const kElideString = L"\x2026";
46 static const char* const kAcceptLanguageList[] = {
47 "af", // Afrikaans
48 "am", // Amharic
49 "ar", // Arabic
50 "az", // Azerbaijani
51 "be", // Belarusian
52 "bg", // Bulgarian
53 "bh", // Bihari
54 "bn", // Bengali
55 "br", // Breton
56 "bs", // Bosnian
57 "ca", // Catalan
58 "co", // Corsican
59 "cs", // Czech
60 "cy", // Welsh
61 "da", // Danish
62 "de", // German
63 "de-AT", // German (Austria)
64 "de-CH", // German (Switzerland)
65 "de-DE", // German (Germany)
66 "el", // Greek
67 "en", // English
68 "en-AU", // English (Austrailia)
69 "en-CA", // English (Canada)
70 "en-GB", // English (UK)
71 "en-NZ", // English (New Zealand)
72 "en-US", // English (US)
73 "en-ZA", // English (South Africa)
74 "eo", // Esperanto
75 // TODO(jungshik) : Do we want to list all es-Foo for Latin-American
76 // Spanish speaking countries?
77 "es", // Spanish
78 "et", // Estonian
79 "eu", // Basque
80 "fa", // Persian
81 "fi", // Finnish
82 "fil", // Filipino
83 "fo", // Faroese
84 "fr", // French
85 "fr-CA", // French (Canada)
86 "fr-CH", // French (Switzerland)
87 "fr-FR", // French (France)
88 "fy", // Frisian
89 "ga", // Irish
90 "gd", // Scots Gaelic
91 "gl", // Galician
92 "gn", // Guarani
93 "gu", // Gujarati
94 "ha", // Hausa
95 "haw", // Hawaiian
96 "he", // Hebrew
97 "hi", // Hindi
98 "hr", // Croatian
99 "hu", // Hungarian
100 "hy", // Armenian
101 "ia", // Interlingua
102 "id", // Indonesian
103 "is", // Icelandic
104 "it", // Italian
105 "it-CH", // Italian (Switzerland)
106 "it-IT", // Italian (Italy)
107 "ja", // Japanese
108 "jw", // Javanese
109 "ka", // Georgian
110 "kk", // Kazakh
111 "km", // Cambodian
112 "kn", // Kannada
113 "ko", // Korean
114 "ku", // Kurdish
115 "ky", // Kyrgyz
116 "la", // Latin
117 "ln", // Lingala
118 "lo", // Laothian
119 "lt", // Lithuanian
120 "lv", // Latvian
121 "mk", // Macedonian
122 "ml", // Malayalam
123 "mn", // Mongolian
124 "mo", // Moldavian
125 "mr", // Marathi
126 "ms", // Malay
127 "mt", // Maltese
128 "nb", // Norwegian (Bokmal)
129 "ne", // Nepali
130 "nl", // Dutch
131 "nn", // Norwegian (Nynorsk)
132 "no", // Norwegian
133 "oc", // Occitan
134 "om", // Oromo
135 "or", // Oriya
136 "pa", // Punjabi
137 "pl", // Polish
138 "ps", // Pashto
139 "pt", // Portuguese
140 "pt-BR", // Portuguese (Brazil)
141 "pt-PT", // Portuguese (Portugal)
142 "qu", // Quechua
143 "rm", // Romansh
144 "ro", // Romanian
145 "ru", // Russian
146 "sd", // Sindhi
147 "sh", // Serbo-Croatian
148 "si", // Sinhalese
149 "sk", // Slovak
150 "sl", // Slovenian
151 "sn", // Shona
152 "so", // Somali
153 "sq", // Albanian
154 "sr", // Serbian
155 "st", // Sesotho
156 "su", // Sundanese
157 "sv", // Swedish
158 "sw", // Swahili
159 "ta", // Tamil
160 "te", // Telugu
161 "tg", // Tajik
162 "th", // Thai
163 "ti", // Tigrinya
164 "tk", // Turkmen
165 "to", // Tonga
166 "tr", // Turkish
167 "tt", // Tatar
168 "tw", // Twi
169 "ug", // Uighur
170 "uk", // Ukrainian
171 "ur", // Urdu
172 "uz", // Uzbek
173 "vi", // Vietnamese
174 "xh", // Xhosa
175 "yi", // Yiddish
176 "yo", // Yoruba
177 "zh", // Chinese
178 "zh-CN", // Chinese (Simplified)
179 "zh-TW", // Chinese (Traditional)
180 "zu", // Zulu
183 // Returns true if |locale_name| has an alias in the ICU data file.
184 bool IsDuplicateName(const std::string& locale_name) {
185 static const char* const kDuplicateNames[] = {
186 "en",
187 "pt",
188 "zh",
189 "zh_hans_cn",
190 "zh_hant_tw"
193 // Skip all 'es_RR'. Currently, we use 'es' for es-ES (Spanish in Spain).
194 // 'es-419' (Spanish in Latin America) is not available in ICU so that it
195 // has to be added manually in GetAvailableLocales().
196 if (LowerCaseEqualsASCII(locale_name.substr(0, 3), "es_"))
197 return true;
198 for (size_t i = 0; i < arraysize(kDuplicateNames); ++i) {
199 if (base::strcasecmp(kDuplicateNames[i], locale_name.c_str()) == 0)
200 return true;
202 return false;
205 bool IsLocaleNameTranslated(const char* locale,
206 const std::string& display_locale) {
207 string16 display_name =
208 l10n_util::GetDisplayNameForLocale(locale, display_locale, false);
209 // Because ICU sets the error code to U_USING_DEFAULT_WARNING whether or not
210 // uloc_getDisplayName returns the actual translation or the default
211 // value (locale code), we have to rely on this hack to tell whether
212 // the translation is available or not. If ICU doesn't have a translated
213 // name for this locale, GetDisplayNameForLocale will just return the
214 // locale code.
215 return !IsStringASCII(display_name) || UTF16ToASCII(display_name) != locale;
218 // We added 30+ minimally populated locales with only a few entries
219 // (exemplar character set, script, writing direction and its own
220 // lanaguage name). These locales have to be distinguished from the
221 // fully populated locales to which Chrome is localized.
222 bool IsLocalePartiallyPopulated(const std::string& locale_name) {
223 // For partially populated locales, even the translation for "English"
224 // is not available. A more robust/elegant way to check is to add a special
225 // field (say, 'isPartial' to our version of ICU locale files) and
226 // check its value, but this hack seems to work well.
227 return !IsLocaleNameTranslated("en", locale_name);
230 bool IsLocaleAvailable(const std::string& locale,
231 const FilePath& locale_path) {
232 // If locale has any illegal characters in it, we don't want to try to
233 // load it because it may be pointing outside the locale data file directory.
234 if (!file_util::IsFilenameLegal(ASCIIToUTF16(locale)))
235 return false;
237 // IsLocalePartiallyPopulated() can be called here for an early return w/o
238 // checking the resource availability below. It'd help when Chrome is run
239 // under a system locale Chrome is not localized to (e.g.Farsi on Linux),
240 // but it'd slow down the start up time a little bit for locales Chrome is
241 // localized to. So, we don't call it here.
242 if (!l10n_util::IsLocaleSupportedByOS(locale))
243 return false;
245 FilePath test_path = locale_path;
246 test_path =
247 test_path.AppendASCII(locale).ReplaceExtension(kLocaleFileExtension);
248 return file_util::PathExists(test_path);
251 bool CheckAndResolveLocale(const std::string& locale,
252 const FilePath& locale_path,
253 std::string* resolved_locale) {
254 if (IsLocaleAvailable(locale, locale_path)) {
255 *resolved_locale = locale;
256 return true;
258 // If the locale matches language but not country, use that instead.
259 // TODO(jungshik) : Nothing is done about languages that Chrome
260 // does not support but available on Windows. We fall
261 // back to en-US in GetApplicationLocale so that it's a not critical,
262 // but we can do better.
263 std::string::size_type hyphen_pos = locale.find('-');
264 if (hyphen_pos != std::string::npos && hyphen_pos > 0) {
265 std::string lang(locale, 0, hyphen_pos);
266 std::string region(locale, hyphen_pos + 1);
267 std::string tmp_locale(lang);
268 // Map es-RR other than es-ES to es-419 (Chrome's Latin American
269 // Spanish locale).
270 if (LowerCaseEqualsASCII(lang, "es") && !LowerCaseEqualsASCII(region, "es"))
271 tmp_locale.append("-419");
272 else if (LowerCaseEqualsASCII(lang, "zh")) {
273 // Map zh-HK and zh-MK to zh-TW. Otherwise, zh-FOO is mapped to zh-CN.
274 if (LowerCaseEqualsASCII(region, "hk") ||
275 LowerCaseEqualsASCII(region, "mk")) {
276 tmp_locale.append("-TW");
277 } else {
278 tmp_locale.append("-CN");
281 if (IsLocaleAvailable(tmp_locale, locale_path)) {
282 resolved_locale->swap(tmp_locale);
283 return true;
287 // Google updater uses no, iw and en for our nb, he, and en-US.
288 // We need to map them to our codes.
289 struct {
290 const char* source;
291 const char* dest;
292 } alias_map[] = {
293 {"no", "nb"},
294 {"tl", "fil"},
295 {"iw", "he"},
296 {"en", "en-US"},
299 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(alias_map); ++i) {
300 if (LowerCaseEqualsASCII(locale, alias_map[i].source)) {
301 std::string tmp_locale(alias_map[i].dest);
302 if (IsLocaleAvailable(tmp_locale, locale_path)) {
303 resolved_locale->swap(tmp_locale);
304 return true;
309 return false;
312 // Get the locale of the operating system. The return value is of the form
313 // language[-country] (e.g., en-US) where the language is the 2 letter code from
314 // ISO-639.
315 std::string GetSystemLocale() {
316 std::string language, region;
317 base::i18n::GetLanguageAndRegionFromOS(&language, &region);
318 std::string ret;
319 if (!language.empty())
320 ret.append(language);
321 if (!region.empty()) {
322 ret.append("-");
323 ret.append(region);
325 return ret;
328 #if defined(OS_POSIX) && !defined(OS_MACOSX)
329 // Split and normalize the language list specified by LANGUAGE environment.
330 // LANGUAGE environment specifies a priority list of user prefered locales for
331 // application UI messages. Locales are separated by ':' character. The format
332 // of a locale is: language[_territory[.codeset]][@modifier]
334 // This function splits the language list and normalizes each locale into
335 // language[-territory] format, eg. fr, zh-CN, etc.
336 void SplitAndNormalizeLanguageList(const std::string& env_language,
337 std::vector<std::string>* result) {
338 std::vector<std::string> langs;
339 SplitString(env_language, ':', &langs);
340 std::vector<std::string>::iterator i = langs.begin();
341 for (; i != langs.end(); ++i) {
342 size_t end_pos = i->find_first_of(".@");
343 // Erase encoding and modifier part.
344 if (end_pos != std::string::npos)
345 i->erase(end_pos);
347 if (!i->empty()) {
348 std::string locale;
349 size_t sep = i->find_first_of("_-");
350 if (sep != std::string::npos) {
351 // language part is always in lower case.
352 locale = StringToLowerASCII(i->substr(0, sep));
353 locale.append("-");
354 // territory part is always in upper case.
355 locale.append(StringToUpperASCII(i->substr(sep + 1)));
356 } else {
357 locale = StringToLowerASCII(*i);
359 result->push_back(locale);
363 #endif
365 } // namespace
367 namespace l10n_util {
369 std::string GetApplicationLocale(const std::string& pref_locale) {
370 #if !defined(OS_MACOSX)
371 FilePath locale_path;
372 PathService::Get(app::DIR_LOCALES, &locale_path);
373 std::string resolved_locale;
374 std::vector<std::string> candidates;
375 const std::string system_locale = GetSystemLocale();
377 // We only use --lang and the app pref on Windows. On Linux, we only
378 // look at the LC_*/LANG environment variables. We do, however, pass --lang
379 // to renderer and plugin processes so they know what language the parent
380 // process decided to use.
381 #if defined(OS_WIN)
382 // First, try the preference value.
383 if (!pref_locale.empty())
384 candidates.push_back(pref_locale);
386 // Next, try the system locale.
387 candidates.push_back(system_locale);
389 #elif defined(OS_CHROMEOS)
390 // On ChromeOS, use the application locale preference.
391 if (!pref_locale.empty())
392 candidates.push_back(pref_locale);
394 #elif defined(OS_POSIX)
395 // On POSIX, we also check LANGUAGE environment variable, which is supported
396 // by gettext to specify a priority list of prefered languages.
397 const char* env_language = ::getenv("LANGUAGE");
398 if (env_language)
399 SplitAndNormalizeLanguageList(env_language, &candidates);
401 // Only fallback to the system locale if LANGUAGE is not specified.
402 // We emulate gettext's behavior here, which ignores LANG/LC_MESSAGES/LC_ALL
403 // when LANGUAGE is specified. If no language specified in LANGUAGE is valid,
404 // then just fallback to the locale based on LC_ALL/LANG.
405 if (candidates.empty())
406 candidates.push_back(system_locale);
407 #endif
409 std::vector<std::string>::const_iterator i = candidates.begin();
410 for (; i != candidates.end(); ++i) {
411 if (CheckAndResolveLocale(*i, locale_path, &resolved_locale)) {
412 base::i18n::SetICUDefaultLocale(resolved_locale);
413 return resolved_locale;
417 // Fallback on en-US.
418 const std::string fallback_locale("en-US");
419 if (IsLocaleAvailable(fallback_locale, locale_path)) {
420 base::i18n::SetICUDefaultLocale(fallback_locale);
421 return fallback_locale;
424 // No locale data file was found; we shouldn't get here.
425 NOTREACHED();
427 return std::string();
429 #else // !defined(OS_MACOSX)
431 // Use any override (Cocoa for the browser), otherwise use the preference
432 // passed to the function.
433 std::string app_locale = l10n_util::GetLocaleOverride();
434 if (app_locale.empty())
435 app_locale = pref_locale;
437 // The above should handle all of the cases Chrome normally hits, but for some
438 // unit tests, we need something to fall back too.
439 if (app_locale.empty())
440 app_locale = "en-US";
442 // Windows/Linux call SetICUDefaultLocale after determining the actual locale
443 // with CheckAndResolveLocal to make ICU APIs work in that locale.
444 // Mac doesn't use a locale directory tree of resources (it uses Mac style
445 // resources), so mirror the Windows/Linux behavior of calling
446 // SetICUDefaultLocale.
447 base::i18n::SetICUDefaultLocale(app_locale);
448 return app_locale;
449 #endif // !defined(OS_MACOSX)
452 string16 GetDisplayNameForLocale(const std::string& locale,
453 const std::string& display_locale,
454 bool is_for_ui) {
455 std::string locale_code = locale;
456 // Internally, we use the language code of zh-CN and zh-TW, but we want the
457 // display names to be Chinese (Simplified) and Chinese (Traditional) instead
458 // of Chinese (China) and Chinese (Taiwan). To do that, we pass zh-Hans
459 // and zh-Hant to ICU. Even with this mapping, we'd get
460 // 'Chinese (Simplified Han)' and 'Chinese (Traditional Han)' in English and
461 // even longer results in other languages. Arguably, they're better than
462 // the current results : Chinese (China) / Chinese (Taiwan).
463 // TODO(jungshik): Do one of the following:
464 // 1. Special-case Chinese by getting the custom-translation for them
465 // 2. Recycle IDS_ENCODING_{SIMP,TRAD}_CHINESE.
466 // 3. Get translations for two directly from the ICU resouce bundle
467 // because they're not accessible with other any API.
468 // 4. Patch ICU to special-case zh-Hans/zh-Hant for us.
469 // #1 and #2 wouldn't work if display_locale != current UI locale although
470 // we can think of additional hack to work around the problem.
471 // #3 can be potentially expensive.
472 if (locale_code == "zh-CN")
473 locale_code = "zh-Hans";
474 else if (locale_code == "zh-TW")
475 locale_code = "zh-Hant";
477 UErrorCode error = U_ZERO_ERROR;
478 const int buffer_size = 1024;
480 string16 display_name;
481 int actual_size = uloc_getDisplayName(locale_code.c_str(),
482 display_locale.c_str(),
483 WriteInto(&display_name, buffer_size + 1), buffer_size, &error);
484 DCHECK(U_SUCCESS(error));
485 display_name.resize(actual_size);
486 // Add an RTL mark so parentheses are properly placed.
487 if (is_for_ui && base::i18n::IsRTL())
488 display_name.push_back(static_cast<char16>(base::i18n::kRightToLeftMark));
489 return display_name;
492 std::wstring GetString(int message_id) {
493 ResourceBundle& rb = ResourceBundle::GetSharedInstance();
494 return UTF16ToWide(rb.GetLocalizedString(message_id));
497 std::string GetStringUTF8(int message_id) {
498 ResourceBundle& rb = ResourceBundle::GetSharedInstance();
499 return UTF16ToUTF8(rb.GetLocalizedString(message_id));
502 string16 GetStringUTF16(int message_id) {
503 ResourceBundle& rb = ResourceBundle::GetSharedInstance();
504 return rb.GetLocalizedString(message_id);
507 static string16 GetStringF(int message_id,
508 const string16& a,
509 const string16& b,
510 const string16& c,
511 const string16& d,
512 std::vector<size_t>* offsets) {
513 // TODO(tc): We could save a string copy if we got the raw string as
514 // a StringPiece and were able to call ReplaceStringPlaceholders with
515 // a StringPiece format string and string16 substitution strings. In
516 // practice, the strings should be relatively short.
517 ResourceBundle& rb = ResourceBundle::GetSharedInstance();
518 const string16& format_string = rb.GetLocalizedString(message_id);
519 std::vector<string16> subst;
520 subst.push_back(a);
521 subst.push_back(b);
522 subst.push_back(c);
523 subst.push_back(d);
524 string16 formatted = ReplaceStringPlaceholders(format_string, subst,
525 offsets);
526 return formatted;
529 #if !defined(WCHAR_T_IS_UTF16)
530 std::wstring GetStringF(int message_id, const std::wstring& a) {
531 return UTF16ToWide(GetStringF(message_id, WideToUTF16(a), string16(),
532 string16(), string16(), NULL));
535 std::wstring GetStringF(int message_id,
536 const std::wstring& a,
537 const std::wstring& b) {
538 return UTF16ToWide(GetStringF(message_id, WideToUTF16(a), WideToUTF16(b),
539 string16(), string16(), NULL));
542 std::wstring GetStringF(int message_id,
543 const std::wstring& a,
544 const std::wstring& b,
545 const std::wstring& c) {
546 return UTF16ToWide(GetStringF(message_id, WideToUTF16(a), WideToUTF16(b),
547 WideToUTF16(c), string16(), NULL));
550 std::wstring GetStringF(int message_id,
551 const std::wstring& a,
552 const std::wstring& b,
553 const std::wstring& c,
554 const std::wstring& d) {
555 return UTF16ToWide(GetStringF(message_id, WideToUTF16(a), WideToUTF16(b),
556 WideToUTF16(c), WideToUTF16(d), NULL));
558 #endif
560 std::string GetStringFUTF8(int message_id,
561 const string16& a) {
562 return UTF16ToUTF8(GetStringF(message_id, a, string16(), string16(),
563 string16(), NULL));
566 std::string GetStringFUTF8(int message_id,
567 const string16& a,
568 const string16& b) {
569 return UTF16ToUTF8(GetStringF(message_id, a, b, string16(), string16(),
570 NULL));
573 std::string GetStringFUTF8(int message_id,
574 const string16& a,
575 const string16& b,
576 const string16& c) {
577 return UTF16ToUTF8(GetStringF(message_id, a, b, c, string16(), NULL));
580 std::string GetStringFUTF8(int message_id,
581 const string16& a,
582 const string16& b,
583 const string16& c,
584 const string16& d) {
585 return UTF16ToUTF8(GetStringF(message_id, a, b, c, d, NULL));
588 string16 GetStringFUTF16(int message_id,
589 const string16& a) {
590 return GetStringF(message_id, a, string16(), string16(), string16(), NULL);
593 string16 GetStringFUTF16(int message_id,
594 const string16& a,
595 const string16& b) {
596 return GetStringF(message_id, a, b, string16(), string16(), NULL);
599 string16 GetStringFUTF16(int message_id,
600 const string16& a,
601 const string16& b,
602 const string16& c) {
603 return GetStringF(message_id, a, b, c, string16(), NULL);
606 string16 GetStringFUTF16(int message_id,
607 const string16& a,
608 const string16& b,
609 const string16& c,
610 const string16& d) {
611 return GetStringF(message_id, a, b, c, d, NULL);
614 std::wstring GetStringF(int message_id, const std::wstring& a, size_t* offset) {
615 DCHECK(offset);
616 std::vector<size_t> offsets;
617 string16 result = GetStringF(message_id, WideToUTF16(a), string16(),
618 string16(), string16(), &offsets);
619 DCHECK(offsets.size() == 1);
620 *offset = offsets[0];
621 return UTF16ToWide(result);
624 std::wstring GetStringF(int message_id,
625 const std::wstring& a,
626 const std::wstring& b,
627 std::vector<size_t>* offsets) {
628 return UTF16ToWide(GetStringF(message_id, WideToUTF16(a), WideToUTF16(b),
629 string16(), string16(), offsets));
632 string16 GetStringFUTF16(int message_id, const string16& a, size_t* offset) {
633 DCHECK(offset);
634 std::vector<size_t> offsets;
635 string16 result = GetStringFUTF16(message_id, a, string16(), &offsets);
636 DCHECK(offsets.size() == 1);
637 *offset = offsets[0];
638 return result;
641 string16 GetStringFUTF16(int message_id,
642 const string16& a,
643 const string16& b,
644 std::vector<size_t>* offsets) {
645 return GetStringF(message_id, a, b, string16(), string16(), offsets);
648 std::wstring GetStringF(int message_id, int a) {
649 return GetStringF(message_id, UTF8ToWide(base::IntToString(a)));
652 std::wstring GetStringF(int message_id, int64 a) {
653 return GetStringF(message_id, UTF8ToWide(base::Int64ToString(a)));
656 std::wstring TruncateString(const std::wstring& string, size_t length) {
657 if (string.size() <= length)
658 // String fits, return it.
659 return string;
661 if (length == 0) {
662 // No room for the ellide string, return an empty string.
663 return std::wstring(L"");
665 size_t max = length - 1;
667 if (max == 0) {
668 // Just enough room for the elide string.
669 return kElideString;
672 #if defined(WCHAR_T_IS_UTF32)
673 const string16 string_utf16 = WideToUTF16(string);
674 #else
675 const std::wstring &string_utf16 = string;
676 #endif
677 // Use a line iterator to find the first boundary.
678 UErrorCode status = U_ZERO_ERROR;
679 scoped_ptr<icu::RuleBasedBreakIterator> bi(
680 static_cast<icu::RuleBasedBreakIterator*>(
681 icu::RuleBasedBreakIterator::createLineInstance(
682 icu::Locale::getDefault(), status)));
683 if (U_FAILURE(status))
684 return string.substr(0, max) + kElideString;
685 bi->setText(string_utf16.c_str());
686 int32_t index = bi->preceding(static_cast<int32_t>(max));
687 if (index == icu::BreakIterator::DONE) {
688 index = static_cast<int32_t>(max);
689 } else {
690 // Found a valid break (may be the beginning of the string). Now use
691 // a character iterator to find the previous non-whitespace character.
692 icu::StringCharacterIterator char_iterator(string_utf16.c_str());
693 if (index == 0) {
694 // No valid line breaks. Start at the end again. This ensures we break
695 // on a valid character boundary.
696 index = static_cast<int32_t>(max);
698 char_iterator.setIndex(index);
699 while (char_iterator.hasPrevious()) {
700 char_iterator.previous();
701 if (!(u_isspace(char_iterator.current()) ||
702 u_charType(char_iterator.current()) == U_CONTROL_CHAR ||
703 u_charType(char_iterator.current()) == U_NON_SPACING_MARK)) {
704 // Not a whitespace character. Advance the iterator so that we
705 // include the current character in the truncated string.
706 char_iterator.next();
707 break;
710 if (char_iterator.hasPrevious()) {
711 // Found a valid break point.
712 index = char_iterator.getIndex();
713 } else {
714 // String has leading whitespace, return the elide string.
715 return kElideString;
718 return string.substr(0, index) + kElideString;
721 string16 ToLower(const string16& string) {
722 icu::UnicodeString lower_u_str(
723 icu::UnicodeString(string.c_str()).toLower(icu::Locale::getDefault()));
724 string16 result;
725 lower_u_str.extract(0, lower_u_str.length(),
726 WriteInto(&result, lower_u_str.length() + 1));
727 return result;
730 string16 ToUpper(const string16& string) {
731 icu::UnicodeString upper_u_str(
732 icu::UnicodeString(string.c_str()).toUpper(icu::Locale::getDefault()));
733 string16 result;
734 upper_u_str.extract(0, upper_u_str.length(),
735 WriteInto(&result, upper_u_str.length() + 1));
736 return result;
739 // Compares the character data stored in two different string16 strings by
740 // specified Collator instance.
741 UCollationResult CompareString16WithCollator(const icu::Collator* collator,
742 const string16& lhs,
743 const string16& rhs) {
744 DCHECK(collator);
745 UErrorCode error = U_ZERO_ERROR;
746 UCollationResult result = collator->compare(
747 static_cast<const UChar*>(lhs.c_str()), static_cast<int>(lhs.length()),
748 static_cast<const UChar*>(rhs.c_str()), static_cast<int>(rhs.length()),
749 error);
750 DCHECK(U_SUCCESS(error));
751 return result;
754 // Compares the character data stored in two different std:wstring strings by
755 // specified Collator instance.
756 UCollationResult CompareStringWithCollator(const icu::Collator* collator,
757 const std::wstring& lhs,
758 const std::wstring& rhs) {
759 DCHECK(collator);
760 UCollationResult result;
761 #if defined(WCHAR_T_IS_UTF32)
762 // Need to convert to UTF-16 to be compatible with UnicodeString's
763 // constructor.
764 string16 lhs_utf16 = WideToUTF16(lhs);
765 string16 rhs_utf16 = WideToUTF16(rhs);
767 result = CompareString16WithCollator(collator, lhs_utf16, rhs_utf16);
768 #else
769 result = CompareString16WithCollator(collator, lhs, rhs);
770 #endif
771 return result;
774 // Specialization of operator() method for std::wstring version.
775 template <>
776 bool StringComparator<std::wstring>::operator()(const std::wstring& lhs,
777 const std::wstring& rhs) {
778 // If we can not get collator instance for specified locale, just do simple
779 // string compare.
780 if (!collator_)
781 return lhs < rhs;
782 return CompareStringWithCollator(collator_, lhs, rhs) == UCOL_LESS;
785 #if !defined(WCHAR_T_IS_UTF16)
786 // Specialization of operator() method for string16 version.
787 template <>
788 bool StringComparator<string16>::operator()(const string16& lhs,
789 const string16& rhs) {
790 // If we can not get collator instance for specified locale, just do simple
791 // string compare.
792 if (!collator_)
793 return lhs < rhs;
794 return CompareString16WithCollator(collator_, lhs, rhs) == UCOL_LESS;
796 #endif // !defined(WCHAR_T_IS_UTF16)
798 void SortStrings(const std::string& locale,
799 std::vector<std::wstring>* strings) {
800 SortVectorWithStringKey(locale, strings, false);
803 void SortStrings16(const std::string& locale,
804 std::vector<string16>* strings) {
805 SortVectorWithStringKey(locale, strings, false);
808 const std::vector<std::string>& GetAvailableLocales() {
809 static std::vector<std::string> locales;
810 if (locales.empty()) {
811 int num_locales = uloc_countAvailable();
812 for (int i = 0; i < num_locales; ++i) {
813 std::string locale_name = uloc_getAvailable(i);
814 // Filter out the names that have aliases.
815 if (IsDuplicateName(locale_name))
816 continue;
817 // Filter out locales for which we have only partially populated data
818 // and to which Chrome is not localized.
819 if (IsLocalePartiallyPopulated(locale_name))
820 continue;
821 if (!IsLocaleSupportedByOS(locale_name))
822 continue;
823 // Normalize underscores to hyphens because that's what our locale files
824 // use.
825 std::replace(locale_name.begin(), locale_name.end(), '_', '-');
827 // Map the Chinese locale names over to zh-CN and zh-TW.
828 if (LowerCaseEqualsASCII(locale_name, "zh-hans")) {
829 locale_name = "zh-CN";
830 } else if (LowerCaseEqualsASCII(locale_name, "zh-hant")) {
831 locale_name = "zh-TW";
833 locales.push_back(locale_name);
836 // Manually add 'es-419' to the list. See the comment in IsDuplicateName().
837 locales.push_back("es-419");
839 return locales;
842 void GetAcceptLanguagesForLocale(const std::string& display_locale,
843 std::vector<std::string>* locale_codes) {
844 for (size_t i = 0; i < arraysize(kAcceptLanguageList); ++i) {
845 if (!IsLocaleNameTranslated(kAcceptLanguageList[i], display_locale))
846 // TODO(jungshik) : Put them at the of the list with language codes
847 // enclosed by brackets instead of skipping.
848 continue;
849 locale_codes->push_back(kAcceptLanguageList[i]);
853 } // namespace l10n_util