app/l10n_util.cc

   1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "app/l10n_util.h"
   6
   7 #include <cstdlib>
   8
   9 #include "app/app_paths.h"
  10 #include "app/l10n_util_collator.h"
  11 #include "app/resource_bundle.h"
  12 #include "base/command_line.h"
  13 #include "base/file_util.h"
  14 #include "base/i18n/file_util_icu.h"
  15 #include "base/i18n/rtl.h"
  16 #include "base/path_service.h"
  17 #include "base/scoped_ptr.h"
  18 #include "base/string16.h"
  19 #include "base/string_number_conversions.h"
  20 #include "base/sys_string_conversions.h"
  21 #include "base/utf_string_conversions.h"
  22 #include "build/build_config.h"
  23 #include "gfx/canvas.h"
  24 #include "unicode/rbbi.h"
  25
  26 #if defined(OS_MACOSX)
  27 #include "app/l10n_util_mac.h"
  28 #endif
  29
  30 // TODO(playmobil): remove this undef once SkPostConfig.h is fixed.
  31 // skia/include/corecg/SkPostConfig.h #defines strcasecmp() so we can't use
  32 // base::strcasecmp() without #undefing it here.
  33 #undef strcasecmp
  34
  35 namespace {
  36
  37 #if defined(OS_WIN)
  38 static const FilePath::CharType kLocaleFileExtension[] = L".dll";
  39 #elif defined(OS_POSIX)
  40 static const FilePath::CharType kLocaleFileExtension[] = ".pak";
  41 #endif
  42
  43 // Added to the end of strings that are too big in TrucateString.
  44 static const wchar_t* const kElideString = L"\x2026";
  45
  46 static const char* const kAcceptLanguageList[] = {
  47   "af",     // Afrikaans
  48   "am",     // Amharic
  49   "ar",     // Arabic
  50   "az",     // Azerbaijani
  51   "be",     // Belarusian
  52   "bg",     // Bulgarian
  53   "bh",     // Bihari
  54   "bn",     // Bengali
  55   "br",     // Breton
  56   "bs",     // Bosnian
  57   "ca",     // Catalan
  58   "co",     // Corsican
  59   "cs",     // Czech
  60   "cy",     // Welsh
  61   "da",     // Danish
  62   "de",     // German
  63   "de-AT",  // German (Austria)
  64   "de-CH",  // German (Switzerland)
  65   "de-DE",  // German (Germany)
  66   "el",     // Greek
  67   "en",     // English
  68   "en-AU",  // English (Austrailia)
  69   "en-CA",  // English (Canada)
  70   "en-GB",  // English (UK)
  71   "en-NZ",  // English (New Zealand)
  72   "en-US",  // English (US)
  73   "en-ZA",  // English (South Africa)
  74   "eo",     // Esperanto
  75   // TODO(jungshik) : Do we want to list all es-Foo for Latin-American
  76   // Spanish speaking countries?
  77   "es",     // Spanish
  78   "et",     // Estonian
  79   "eu",     // Basque
  80   "fa",     // Persian
  81   "fi",     // Finnish
  82   "fil",    // Filipino
  83   "fo",     // Faroese
  84   "fr",     // French
  85   "fr-CA",  // French (Canada)
  86   "fr-CH",  // French (Switzerland)
  87   "fr-FR",  // French (France)
  88   "fy",     // Frisian
  89   "ga",     // Irish
  90   "gd",     // Scots Gaelic
  91   "gl",     // Galician
  92   "gn",     // Guarani
  93   "gu",     // Gujarati
  94   "ha",     // Hausa
  95   "haw",    // Hawaiian
  96   "he",     // Hebrew
  97   "hi",     // Hindi
  98   "hr",     // Croatian
  99   "hu",     // Hungarian
 100   "hy",     // Armenian
 101   "ia",     // Interlingua
 102   "id",     // Indonesian
 103   "is",     // Icelandic
 104   "it",     // Italian
 105   "it-CH",  // Italian (Switzerland)
 106   "it-IT",  // Italian (Italy)
 107   "ja",     // Japanese
 108   "jw",     // Javanese
 109   "ka",     // Georgian
 110   "kk",     // Kazakh
 111   "km",     // Cambodian
 112   "kn",     // Kannada
 113   "ko",     // Korean
 114   "ku",     // Kurdish
 115   "ky",     // Kyrgyz
 116   "la",     // Latin
 117   "ln",     // Lingala
 118   "lo",     // Laothian
 119   "lt",     // Lithuanian
 120   "lv",     // Latvian
 121   "mk",     // Macedonian
 122   "ml",     // Malayalam
 123   "mn",     // Mongolian
 124   "mo",     // Moldavian
 125   "mr",     // Marathi
 126   "ms",     // Malay
 127   "mt",     // Maltese
 128   "nb",     // Norwegian (Bokmal)
 129   "ne",     // Nepali
 130   "nl",     // Dutch
 131   "nn",     // Norwegian (Nynorsk)
 132   "no",     // Norwegian
 133   "oc",     // Occitan
 134   "om",     // Oromo
 135   "or",     // Oriya
 136   "pa",     // Punjabi
 137   "pl",     // Polish
 138   "ps",     // Pashto
 139   "pt",     // Portuguese
 140   "pt-BR",  // Portuguese (Brazil)
 141   "pt-PT",  // Portuguese (Portugal)
 142   "qu",     // Quechua
 143   "rm",     // Romansh
 144   "ro",     // Romanian
 145   "ru",     // Russian
 146   "sd",     // Sindhi
 147   "sh",     // Serbo-Croatian
 148   "si",     // Sinhalese
 149   "sk",     // Slovak
 150   "sl",     // Slovenian
 151   "sn",     // Shona
 152   "so",     // Somali
 153   "sq",     // Albanian
 154   "sr",     // Serbian
 155   "st",     // Sesotho
 156   "su",     // Sundanese
 157   "sv",     // Swedish
 158   "sw",     // Swahili
 159   "ta",     // Tamil
 160   "te",     // Telugu
 161   "tg",     // Tajik
 162   "th",     // Thai
 163   "ti",     // Tigrinya
 164   "tk",     // Turkmen
 165   "to",     // Tonga
 166   "tr",     // Turkish
 167   "tt",     // Tatar
 168   "tw",     // Twi
 169   "ug",     // Uighur
 170   "uk",     // Ukrainian
 171   "ur",     // Urdu
 172   "uz",     // Uzbek
 173   "vi",     // Vietnamese
 174   "xh",     // Xhosa
 175   "yi",     // Yiddish
 176   "yo",     // Yoruba
 177   "zh",     // Chinese
 178   "zh-CN",  // Chinese (Simplified)
 179   "zh-TW",  // Chinese (Traditional)
 180   "zu",     // Zulu
 181 };
 182
 183 // Returns true if |locale_name| has an alias in the ICU data file.
 184 bool IsDuplicateName(const std::string& locale_name) {
 185   static const char* const kDuplicateNames[] = {
 186     "en",
 187     "pt",
 188     "zh",
 189     "zh_hans_cn",
 190     "zh_hant_tw"
 191   };
 192
 193   // Skip all 'es_RR'. Currently, we use 'es' for es-ES (Spanish in Spain).
 194   // 'es-419' (Spanish in Latin America) is not available in ICU so that it
 195   // has to be added manually in GetAvailableLocales().
 196   if (LowerCaseEqualsASCII(locale_name.substr(0, 3),  "es_"))
 197     return true;
 198   for (size_t i = 0; i < arraysize(kDuplicateNames); ++i) {
 199     if (base::strcasecmp(kDuplicateNames[i], locale_name.c_str()) == 0)
 200       return true;
 201   }
 202   return false;
 203 }
 204
 205 bool IsLocaleNameTranslated(const char* locale,
 206                             const std::string& display_locale) {
 207   string16 display_name =
 208       l10n_util::GetDisplayNameForLocale(locale, display_locale, false);
 209   // Because ICU sets the error code to U_USING_DEFAULT_WARNING whether or not
 210   // uloc_getDisplayName returns the actual translation or the default
 211   // value (locale code), we have to rely on this hack to tell whether
 212   // the translation is available or not.  If ICU doesn't have a translated
 213   // name for this locale, GetDisplayNameForLocale will just return the
 214   // locale code.
 215   return !IsStringASCII(display_name) || UTF16ToASCII(display_name) != locale;
 216 }
 217
 218 // We added 30+ minimally populated locales with only a few entries
 219 // (exemplar character set, script, writing direction and its own
 220 // lanaguage name). These locales have to be distinguished from the
 221 // fully populated locales to which Chrome is localized.
 222 bool IsLocalePartiallyPopulated(const std::string& locale_name) {
 223   // For partially populated locales, even the translation for "English"
 224   // is not available. A more robust/elegant way to check is to add a special
 225   // field (say, 'isPartial' to our version of ICU locale files) and
 226   // check its value, but this hack seems to work well.
 227   return !IsLocaleNameTranslated("en", locale_name);
 228 }
 229
 230 bool IsLocaleAvailable(const std::string& locale,
 231                        const FilePath& locale_path) {
 232   // If locale has any illegal characters in it, we don't want to try to
 233   // load it because it may be pointing outside the locale data file directory.
 234   if (!file_util::IsFilenameLegal(ASCIIToUTF16(locale)))
 235     return false;
 236
 237   // IsLocalePartiallyPopulated() can be called here for an early return w/o
 238   // checking the resource availability below. It'd help when Chrome is run
 239   // under a system locale Chrome is not localized to (e.g.Farsi on Linux),
 240   // but it'd slow down the start up time a little bit for locales Chrome is
 241   // localized to. So, we don't call it here.
 242   if (!l10n_util::IsLocaleSupportedByOS(locale))
 243     return false;
 244
 245   FilePath test_path = locale_path;
 246   test_path =
 247     test_path.AppendASCII(locale).ReplaceExtension(kLocaleFileExtension);
 248   return file_util::PathExists(test_path);
 249 }
 250
 251 bool CheckAndResolveLocale(const std::string& locale,
 252                            const FilePath& locale_path,
 253                            std::string* resolved_locale) {
 254   if (IsLocaleAvailable(locale, locale_path)) {
 255     *resolved_locale = locale;
 256     return true;
 257   }
 258   // If the locale matches language but not country, use that instead.
 259   // TODO(jungshik) : Nothing is done about languages that Chrome
 260   // does not support but available on Windows. We fall
 261   // back to en-US in GetApplicationLocale so that it's a not critical,
 262   // but we can do better.
 263   std::string::size_type hyphen_pos = locale.find('-');
 264   if (hyphen_pos != std::string::npos && hyphen_pos > 0) {
 265     std::string lang(locale, 0, hyphen_pos);
 266     std::string region(locale, hyphen_pos + 1);
 267     std::string tmp_locale(lang);
 268     // Map es-RR other than es-ES to es-419 (Chrome's Latin American
 269     // Spanish locale).
 270     if (LowerCaseEqualsASCII(lang, "es") && !LowerCaseEqualsASCII(region, "es"))
 271       tmp_locale.append("-419");
 272     else if (LowerCaseEqualsASCII(lang, "zh")) {
 273       // Map zh-HK and zh-MK to zh-TW. Otherwise, zh-FOO is mapped to zh-CN.
 274      if (LowerCaseEqualsASCII(region, "hk") ||
 275          LowerCaseEqualsASCII(region, "mk")) {
 276        tmp_locale.append("-TW");
 277      } else {
 278        tmp_locale.append("-CN");
 279      }
 280     }
 281     if (IsLocaleAvailable(tmp_locale, locale_path)) {
 282       resolved_locale->swap(tmp_locale);
 283       return true;
 284     }
 285   }
 286
 287   // Google updater uses no, iw and en for our nb, he, and en-US.
 288   // We need to map them to our codes.
 289   struct {
 290     const char* source;
 291     const char* dest;
 292   } alias_map[] = {
 293       {"no", "nb"},
 294       {"tl", "fil"},
 295       {"iw", "he"},
 296       {"en", "en-US"},
 297   };
 298
 299   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(alias_map); ++i) {
 300     if (LowerCaseEqualsASCII(locale, alias_map[i].source)) {
 301       std::string tmp_locale(alias_map[i].dest);
 302       if (IsLocaleAvailable(tmp_locale, locale_path)) {
 303         resolved_locale->swap(tmp_locale);
 304         return true;
 305       }
 306     }
 307   }
 308
 309   return false;
 310 }
 311
 312 // Get the locale of the operating system.  The return value is of the form
 313 // language[-country] (e.g., en-US) where the language is the 2 letter code from
 314 // ISO-639.
 315 std::string GetSystemLocale() {
 316   std::string language, region;
 317   base::i18n::GetLanguageAndRegionFromOS(&language, &region);
 318   std::string ret;
 319   if (!language.empty())
 320     ret.append(language);
 321   if (!region.empty()) {
 322     ret.append("-");
 323     ret.append(region);
 324   }
 325   return ret;
 326 }
 327
 328 #if defined(OS_POSIX) && !defined(OS_MACOSX)
 329 // Split and normalize the language list specified by LANGUAGE environment.
 330 // LANGUAGE environment specifies a priority list of user prefered locales for
 331 // application UI messages. Locales are separated by ':' character. The format
 332 // of a locale is: language[_territory[.codeset]][@modifier]
 333 //
 334 // This function splits the language list and normalizes each locale into
 335 // language[-territory] format, eg. fr, zh-CN, etc.
 336 void SplitAndNormalizeLanguageList(const std::string& env_language,
 337                                    std::vector<std::string>* result) {
 338   std::vector<std::string> langs;
 339   SplitString(env_language, ':', &langs);
 340   std::vector<std::string>::iterator i = langs.begin();
 341   for (; i != langs.end(); ++i) {
 342     size_t end_pos = i->find_first_of(".@");
 343     // Erase encoding and modifier part.
 344     if (end_pos != std::string::npos)
 345       i->erase(end_pos);
 346
 347     if (!i->empty()) {
 348       std::string locale;
 349       size_t sep = i->find_first_of("_-");
 350       if (sep != std::string::npos) {
 351         // language part is always in lower case.
 352         locale = StringToLowerASCII(i->substr(0, sep));
 353         locale.append("-");
 354         // territory part is always in upper case.
 355         locale.append(StringToUpperASCII(i->substr(sep + 1)));
 356       } else {
 357         locale = StringToLowerASCII(*i);
 358       }
 359       result->push_back(locale);
 360     }
 361   }
 362 }
 363 #endif
 364
 365 }  // namespace
 366
 367 namespace l10n_util {
 368
 369 std::string GetApplicationLocale(const std::string& pref_locale) {
 370 #if !defined(OS_MACOSX)
 371   FilePath locale_path;
 372   PathService::Get(app::DIR_LOCALES, &locale_path);
 373   std::string resolved_locale;
 374   std::vector<std::string> candidates;
 375   const std::string system_locale = GetSystemLocale();
 376
 377   // We only use --lang and the app pref on Windows.  On Linux, we only
 378   // look at the LC_*/LANG environment variables.  We do, however, pass --lang
 379   // to renderer and plugin processes so they know what language the parent
 380   // process decided to use.
 381 #if defined(OS_WIN)
 382   // First, try the preference value.
 383   if (!pref_locale.empty())
 384     candidates.push_back(pref_locale);
 385
 386   // Next, try the system locale.
 387   candidates.push_back(system_locale);
 388
 389 #elif defined(OS_CHROMEOS)
 390   // On ChromeOS, use the application locale preference.
 391   if (!pref_locale.empty())
 392     candidates.push_back(pref_locale);
 393
 394 #elif defined(OS_POSIX)
 395   // On POSIX, we also check LANGUAGE environment variable, which is supported
 396   // by gettext to specify a priority list of prefered languages.
 397   const char* env_language = ::getenv("LANGUAGE");
 398   if (env_language)
 399     SplitAndNormalizeLanguageList(env_language, &candidates);
 400
 401   // Only fallback to the system locale if LANGUAGE is not specified.
 402   // We emulate gettext's behavior here, which ignores LANG/LC_MESSAGES/LC_ALL
 403   // when LANGUAGE is specified. If no language specified in LANGUAGE is valid,
 404   // then just fallback to the locale based on LC_ALL/LANG.
 405   if (candidates.empty())
 406     candidates.push_back(system_locale);
 407 #endif
 408
 409   std::vector<std::string>::const_iterator i = candidates.begin();
 410   for (; i != candidates.end(); ++i) {
 411     if (CheckAndResolveLocale(*i, locale_path, &resolved_locale)) {
 412       base::i18n::SetICUDefaultLocale(resolved_locale);
 413       return resolved_locale;
 414     }
 415   }
 416
 417   // Fallback on en-US.
 418   const std::string fallback_locale("en-US");
 419   if (IsLocaleAvailable(fallback_locale, locale_path)) {
 420     base::i18n::SetICUDefaultLocale(fallback_locale);
 421     return fallback_locale;
 422   }
 423
 424   // No locale data file was found; we shouldn't get here.
 425   NOTREACHED();
 426
 427   return std::string();
 428
 429 #else  // !defined(OS_MACOSX)
 430
 431   // Use any override (Cocoa for the browser), otherwise use the preference
 432   // passed to the function.
 433   std::string app_locale = l10n_util::GetLocaleOverride();
 434   if (app_locale.empty())
 435     app_locale = pref_locale;
 436
 437   // The above should handle all of the cases Chrome normally hits, but for some
 438   // unit tests, we need something to fall back too.
 439   if (app_locale.empty())
 440     app_locale = "en-US";
 441
 442   // Windows/Linux call SetICUDefaultLocale after determining the actual locale
 443   // with CheckAndResolveLocal to make ICU APIs work in that locale.
 444   // Mac doesn't use a locale directory tree of resources (it uses Mac style
 445   // resources), so mirror the Windows/Linux behavior of calling
 446   // SetICUDefaultLocale.
 447   base::i18n::SetICUDefaultLocale(app_locale);
 448   return app_locale;
 449 #endif  // !defined(OS_MACOSX)
 450 }
 451
 452 string16 GetDisplayNameForLocale(const std::string& locale,
 453                                  const std::string& display_locale,
 454                                  bool is_for_ui) {
 455   std::string locale_code = locale;
 456   // Internally, we use the language code of zh-CN and zh-TW, but we want the
 457   // display names to be Chinese (Simplified) and Chinese (Traditional) instead
 458   // of Chinese (China) and Chinese (Taiwan).  To do that, we pass zh-Hans
 459   // and zh-Hant to ICU. Even with this mapping, we'd get
 460   // 'Chinese (Simplified Han)' and 'Chinese (Traditional Han)' in English and
 461   // even longer results in other languages. Arguably, they're better than
 462   // the current results : Chinese (China) / Chinese (Taiwan).
 463   // TODO(jungshik): Do one of the following:
 464   // 1. Special-case Chinese by getting the custom-translation for them
 465   // 2. Recycle IDS_ENCODING_{SIMP,TRAD}_CHINESE.
 466   // 3. Get translations for two directly from the ICU resouce bundle
 467   // because they're not accessible with other any API.
 468   // 4. Patch ICU to special-case zh-Hans/zh-Hant for us.
 469   // #1 and #2 wouldn't work if display_locale != current UI locale although
 470   // we can think of additional hack to work around the problem.
 471   // #3 can be potentially expensive.
 472   if (locale_code == "zh-CN")
 473     locale_code = "zh-Hans";
 474   else if (locale_code == "zh-TW")
 475     locale_code = "zh-Hant";
 476
 477   UErrorCode error = U_ZERO_ERROR;
 478   const int buffer_size = 1024;
 479
 480   string16 display_name;
 481   int actual_size = uloc_getDisplayName(locale_code.c_str(),
 482       display_locale.c_str(),
 483       WriteInto(&display_name, buffer_size + 1), buffer_size, &error);
 484   DCHECK(U_SUCCESS(error));
 485   display_name.resize(actual_size);
 486   // Add an RTL mark so parentheses are properly placed.
 487   if (is_for_ui && base::i18n::IsRTL())
 488     display_name.push_back(static_cast<char16>(base::i18n::kRightToLeftMark));
 489   return display_name;
 490 }
 491
 492 std::wstring GetString(int message_id) {
 493   ResourceBundle& rb = ResourceBundle::GetSharedInstance();
 494   return UTF16ToWide(rb.GetLocalizedString(message_id));
 495 }
 496
 497 std::string GetStringUTF8(int message_id) {
 498   ResourceBundle& rb = ResourceBundle::GetSharedInstance();
 499   return UTF16ToUTF8(rb.GetLocalizedString(message_id));
 500 }
 501
 502 string16 GetStringUTF16(int message_id) {
 503   ResourceBundle& rb = ResourceBundle::GetSharedInstance();
 504   return rb.GetLocalizedString(message_id);
 505 }
 506
 507 static string16 GetStringF(int message_id,
 508                            const string16& a,
 509                            const string16& b,
 510                            const string16& c,
 511                            const string16& d,
 512                            std::vector<size_t>* offsets) {
 513   // TODO(tc): We could save a string copy if we got the raw string as
 514   // a StringPiece and were able to call ReplaceStringPlaceholders with
 515   // a StringPiece format string and string16 substitution strings.  In
 516   // practice, the strings should be relatively short.
 517   ResourceBundle& rb = ResourceBundle::GetSharedInstance();
 518   const string16& format_string = rb.GetLocalizedString(message_id);
 519   std::vector<string16> subst;
 520   subst.push_back(a);
 521   subst.push_back(b);
 522   subst.push_back(c);
 523   subst.push_back(d);
 524   string16 formatted = ReplaceStringPlaceholders(format_string, subst,
 525                                                  offsets);
 526   return formatted;
 527 }
 528
 529 #if !defined(WCHAR_T_IS_UTF16)
 530 std::wstring GetStringF(int message_id, const std::wstring& a) {
 531   return UTF16ToWide(GetStringF(message_id, WideToUTF16(a), string16(),
 532                                 string16(), string16(), NULL));
 533 }
 534
 535 std::wstring GetStringF(int message_id,
 536                         const std::wstring& a,
 537                         const std::wstring& b) {
 538   return UTF16ToWide(GetStringF(message_id, WideToUTF16(a), WideToUTF16(b),
 539                                 string16(), string16(), NULL));
 540 }
 541
 542 std::wstring GetStringF(int message_id,
 543                         const std::wstring& a,
 544                         const std::wstring& b,
 545                         const std::wstring& c) {
 546   return UTF16ToWide(GetStringF(message_id, WideToUTF16(a), WideToUTF16(b),
 547                                 WideToUTF16(c), string16(), NULL));
 548 }
 549
 550 std::wstring GetStringF(int message_id,
 551                         const std::wstring& a,
 552                         const std::wstring& b,
 553                         const std::wstring& c,
 554                         const std::wstring& d) {
 555   return UTF16ToWide(GetStringF(message_id, WideToUTF16(a), WideToUTF16(b),
 556                                 WideToUTF16(c), WideToUTF16(d), NULL));
 557 }
 558 #endif
 559
 560 std::string GetStringFUTF8(int message_id,
 561                            const string16& a) {
 562   return UTF16ToUTF8(GetStringF(message_id, a, string16(), string16(),
 563                                 string16(), NULL));
 564 }
 565
 566 std::string GetStringFUTF8(int message_id,
 567                            const string16& a,
 568                            const string16& b) {
 569   return UTF16ToUTF8(GetStringF(message_id, a, b, string16(), string16(),
 570                                 NULL));
 571 }
 572
 573 std::string GetStringFUTF8(int message_id,
 574                            const string16& a,
 575                            const string16& b,
 576                            const string16& c) {
 577   return UTF16ToUTF8(GetStringF(message_id, a, b, c, string16(), NULL));
 578 }
 579
 580 std::string GetStringFUTF8(int message_id,
 581                            const string16& a,
 582                            const string16& b,
 583                            const string16& c,
 584                            const string16& d) {
 585   return UTF16ToUTF8(GetStringF(message_id, a, b, c, d, NULL));
 586 }
 587
 588 string16 GetStringFUTF16(int message_id,
 589                          const string16& a) {
 590   return GetStringF(message_id, a, string16(), string16(), string16(), NULL);
 591 }
 592
 593 string16 GetStringFUTF16(int message_id,
 594                          const string16& a,
 595                          const string16& b) {
 596   return GetStringF(message_id, a, b, string16(), string16(), NULL);
 597 }
 598
 599 string16 GetStringFUTF16(int message_id,
 600                          const string16& a,
 601                          const string16& b,
 602                          const string16& c) {
 603   return GetStringF(message_id, a, b, c, string16(), NULL);
 604 }
 605
 606 string16 GetStringFUTF16(int message_id,
 607                          const string16& a,
 608                          const string16& b,
 609                          const string16& c,
 610                          const string16& d) {
 611   return GetStringF(message_id, a, b, c, d, NULL);
 612 }
 613
 614 std::wstring GetStringF(int message_id, const std::wstring& a, size_t* offset) {
 615   DCHECK(offset);
 616   std::vector<size_t> offsets;
 617   string16 result = GetStringF(message_id, WideToUTF16(a), string16(),
 618                                string16(), string16(), &offsets);
 619   DCHECK(offsets.size() == 1);
 620   *offset = offsets[0];
 621   return UTF16ToWide(result);
 622 }
 623
 624 std::wstring GetStringF(int message_id,
 625                         const std::wstring& a,
 626                         const std::wstring& b,
 627                         std::vector<size_t>* offsets) {
 628   return UTF16ToWide(GetStringF(message_id, WideToUTF16(a), WideToUTF16(b),
 629                                 string16(), string16(), offsets));
 630 }
 631
 632 string16 GetStringFUTF16(int message_id, const string16& a, size_t* offset) {
 633   DCHECK(offset);
 634   std::vector<size_t> offsets;
 635   string16 result = GetStringFUTF16(message_id, a, string16(), &offsets);
 636   DCHECK(offsets.size() == 1);
 637   *offset = offsets[0];
 638   return result;
 639 }
 640
 641 string16 GetStringFUTF16(int message_id,
 642                         const string16& a,
 643                         const string16& b,
 644                         std::vector<size_t>* offsets) {
 645   return GetStringF(message_id, a, b, string16(), string16(), offsets);
 646 }
 647
 648 std::wstring GetStringF(int message_id, int a) {
 649   return GetStringF(message_id, UTF8ToWide(base::IntToString(a)));
 650 }
 651
 652 std::wstring GetStringF(int message_id, int64 a) {
 653   return GetStringF(message_id, UTF8ToWide(base::Int64ToString(a)));
 654 }
 655
 656 std::wstring TruncateString(const std::wstring& string, size_t length) {
 657   if (string.size() <= length)
 658     // String fits, return it.
 659     return string;
 660
 661   if (length == 0) {
 662     // No room for the ellide string, return an empty string.
 663     return std::wstring(L"");
 664   }
 665   size_t max = length - 1;
 666
 667   if (max == 0) {
 668     // Just enough room for the elide string.
 669     return kElideString;
 670   }
 671
 672 #if defined(WCHAR_T_IS_UTF32)
 673   const string16 string_utf16 = WideToUTF16(string);
 674 #else
 675   const std::wstring &string_utf16 = string;
 676 #endif
 677   // Use a line iterator to find the first boundary.
 678   UErrorCode status = U_ZERO_ERROR;
 679   scoped_ptr<icu::RuleBasedBreakIterator> bi(
 680       static_cast<icu::RuleBasedBreakIterator*>(
 681           icu::RuleBasedBreakIterator::createLineInstance(
 682               icu::Locale::getDefault(), status)));
 683   if (U_FAILURE(status))
 684     return string.substr(0, max) + kElideString;
 685   bi->setText(string_utf16.c_str());
 686   int32_t index = bi->preceding(static_cast<int32_t>(max));
 687   if (index == icu::BreakIterator::DONE) {
 688     index = static_cast<int32_t>(max);
 689   } else {
 690     // Found a valid break (may be the beginning of the string). Now use
 691     // a character iterator to find the previous non-whitespace character.
 692     icu::StringCharacterIterator char_iterator(string_utf16.c_str());
 693     if (index == 0) {
 694       // No valid line breaks. Start at the end again. This ensures we break
 695       // on a valid character boundary.
 696       index = static_cast<int32_t>(max);
 697     }
 698     char_iterator.setIndex(index);
 699     while (char_iterator.hasPrevious()) {
 700       char_iterator.previous();
 701       if (!(u_isspace(char_iterator.current()) ||
 702             u_charType(char_iterator.current()) == U_CONTROL_CHAR ||
 703             u_charType(char_iterator.current()) == U_NON_SPACING_MARK)) {
 704         // Not a whitespace character. Advance the iterator so that we
 705         // include the current character in the truncated string.
 706         char_iterator.next();
 707         break;
 708       }
 709     }
 710     if (char_iterator.hasPrevious()) {
 711       // Found a valid break point.
 712       index = char_iterator.getIndex();
 713     } else {
 714       // String has leading whitespace, return the elide string.
 715       return kElideString;
 716     }
 717   }
 718   return string.substr(0, index) + kElideString;
 719 }
 720
 721 string16 ToLower(const string16& string) {
 722   icu::UnicodeString lower_u_str(
 723       icu::UnicodeString(string.c_str()).toLower(icu::Locale::getDefault()));
 724   string16 result;
 725   lower_u_str.extract(0, lower_u_str.length(),
 726                       WriteInto(&result, lower_u_str.length() + 1));
 727   return result;
 728 }
 729
 730 string16 ToUpper(const string16& string) {
 731   icu::UnicodeString upper_u_str(
 732       icu::UnicodeString(string.c_str()).toUpper(icu::Locale::getDefault()));
 733   string16 result;
 734   upper_u_str.extract(0, upper_u_str.length(),
 735                       WriteInto(&result, upper_u_str.length() + 1));
 736   return result;
 737 }
 738
 739 // Compares the character data stored in two different string16 strings by
 740 // specified Collator instance.
 741 UCollationResult CompareString16WithCollator(const icu::Collator* collator,
 742                                              const string16& lhs,
 743                                              const string16& rhs) {
 744   DCHECK(collator);
 745   UErrorCode error = U_ZERO_ERROR;
 746   UCollationResult result = collator->compare(
 747       static_cast<const UChar*>(lhs.c_str()), static_cast<int>(lhs.length()),
 748       static_cast<const UChar*>(rhs.c_str()), static_cast<int>(rhs.length()),
 749       error);
 750   DCHECK(U_SUCCESS(error));
 751   return result;
 752 }
 753
 754 // Compares the character data stored in two different std:wstring strings by
 755 // specified Collator instance.
 756 UCollationResult CompareStringWithCollator(const icu::Collator* collator,
 757                                            const std::wstring& lhs,
 758                                            const std::wstring& rhs) {
 759   DCHECK(collator);
 760   UCollationResult result;
 761 #if defined(WCHAR_T_IS_UTF32)
 762   // Need to convert to UTF-16 to be compatible with UnicodeString's
 763   // constructor.
 764   string16 lhs_utf16 = WideToUTF16(lhs);
 765   string16 rhs_utf16 = WideToUTF16(rhs);
 766
 767   result = CompareString16WithCollator(collator, lhs_utf16, rhs_utf16);
 768 #else
 769   result = CompareString16WithCollator(collator, lhs, rhs);
 770 #endif
 771   return result;
 772 }
 773
 774 // Specialization of operator() method for std::wstring version.
 775 template <>
 776 bool StringComparator<std::wstring>::operator()(const std::wstring& lhs,
 777                                                 const std::wstring& rhs) {
 778   // If we can not get collator instance for specified locale, just do simple
 779   // string compare.
 780   if (!collator_)
 781     return lhs < rhs;
 782   return CompareStringWithCollator(collator_, lhs, rhs) == UCOL_LESS;
 783 };
 784
 785 #if !defined(WCHAR_T_IS_UTF16)
 786 // Specialization of operator() method for string16 version.
 787 template <>
 788 bool StringComparator<string16>::operator()(const string16& lhs,
 789                                             const string16& rhs) {
 790   // If we can not get collator instance for specified locale, just do simple
 791   // string compare.
 792   if (!collator_)
 793     return lhs < rhs;
 794   return CompareString16WithCollator(collator_, lhs, rhs) == UCOL_LESS;
 795 };
 796 #endif  // !defined(WCHAR_T_IS_UTF16)
 797
 798 void SortStrings(const std::string& locale,
 799                  std::vector<std::wstring>* strings) {
 800   SortVectorWithStringKey(locale, strings, false);
 801 }
 802
 803 void SortStrings16(const std::string& locale,
 804                    std::vector<string16>* strings) {
 805   SortVectorWithStringKey(locale, strings, false);
 806 }
 807
 808 const std::vector<std::string>& GetAvailableLocales() {
 809   static std::vector<std::string> locales;
 810   if (locales.empty()) {
 811     int num_locales = uloc_countAvailable();
 812     for (int i = 0; i < num_locales; ++i) {
 813       std::string locale_name = uloc_getAvailable(i);
 814       // Filter out the names that have aliases.
 815       if (IsDuplicateName(locale_name))
 816         continue;
 817       // Filter out locales for which we have only partially populated data
 818       // and to which Chrome is not localized.
 819       if (IsLocalePartiallyPopulated(locale_name))
 820         continue;
 821       if (!IsLocaleSupportedByOS(locale_name))
 822         continue;
 823       // Normalize underscores to hyphens because that's what our locale files
 824       // use.
 825       std::replace(locale_name.begin(), locale_name.end(), '_', '-');
 826
 827       // Map the Chinese locale names over to zh-CN and zh-TW.
 828       if (LowerCaseEqualsASCII(locale_name, "zh-hans")) {
 829         locale_name = "zh-CN";
 830       } else if (LowerCaseEqualsASCII(locale_name, "zh-hant")) {
 831         locale_name = "zh-TW";
 832       }
 833       locales.push_back(locale_name);
 834     }
 835
 836     // Manually add 'es-419' to the list. See the comment in IsDuplicateName().
 837     locales.push_back("es-419");
 838   }
 839   return locales;
 840 }
 841
 842 void GetAcceptLanguagesForLocale(const std::string& display_locale,
 843                                  std::vector<std::string>* locale_codes) {
 844   for (size_t i = 0; i < arraysize(kAcceptLanguageList); ++i) {
 845     if (!IsLocaleNameTranslated(kAcceptLanguageList[i], display_locale))
 846       // TODO(jungshik) : Put them at the of the list with language codes
 847       // enclosed by brackets instead of skipping.
 848         continue;
 849     locale_codes->push_back(kAcceptLanguageList[i]);
 850   }
 851 }
 852
 853 }  // namespace l10n_util