base/utf_string_conversions.cc

   1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "base/utf_string_conversions.h"
   6
   7 #include "base/string_piece.h"
   8 #include "base/utf_string_conversion_utils.h"
   9
  10 using base::PrepareForUTF8Output;
  11 using base::PrepareForUTF16Or32Output;
  12 using base::ReadUnicodeCharacter;
  13 using base::WriteUnicodeCharacter;
  14
  15 namespace {
  16
  17 // Generalized Unicode converter -----------------------------------------------
  18
  19 // Converts the given source Unicode character type to the given destination
  20 // Unicode character type as a STL string. The given input buffer and size
  21 // determine the source, and the given output STL string will be replaced by
  22 // the result.
  23 template<typename SRC_CHAR, typename DEST_STRING>
  24 bool ConvertUnicode(const SRC_CHAR* src,
  25                     size_t src_len,
  26                     DEST_STRING* output) {
  27   // ICU requires 32-bit numbers.
  28   bool success = true;
  29   int32 src_len32 = static_cast<int32>(src_len);
  30   for (int32 i = 0; i < src_len32; i++) {
  31     uint32 code_point;
  32     if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
  33       WriteUnicodeCharacter(code_point, output);
  34     } else {
  35       WriteUnicodeCharacter(0xFFFD, output);
  36       success = false;
  37     }
  38   }
  39
  40   return success;
  41 }
  42
  43 }  // namespace
  44
  45 // UTF-8 <-> Wide --------------------------------------------------------------
  46
  47 bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) {
  48   PrepareForUTF8Output(src, src_len, output);
  49   return ConvertUnicode(src, src_len, output);
  50 }
  51
  52 std::string WideToUTF8(const std::wstring& wide) {
  53   std::string ret;
  54   // Ignore the success flag of this call, it will do the best it can for
  55   // invalid input, which is what we want here.
  56   WideToUTF8(wide.data(), wide.length(), &ret);
  57   return ret;
  58 }
  59
  60 bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) {
  61   PrepareForUTF16Or32Output(src, src_len, output);
  62   return ConvertUnicode(src, src_len, output);
  63 }
  64
  65 std::wstring UTF8ToWide(const base::StringPiece& utf8) {
  66   std::wstring ret;
  67   UTF8ToWide(utf8.data(), utf8.length(), &ret);
  68   return ret;
  69 }
  70
  71 // UTF-16 <-> Wide -------------------------------------------------------------
  72
  73 #if defined(WCHAR_T_IS_UTF16)
  74
  75 // When wide == UTF-16, then conversions are a NOP.
  76 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
  77   output->assign(src, src_len);
  78   return true;
  79 }
  80
  81 string16 WideToUTF16(const std::wstring& wide) {
  82   return wide;
  83 }
  84
  85 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
  86   output->assign(src, src_len);
  87   return true;
  88 }
  89
  90 std::wstring UTF16ToWide(const string16& utf16) {
  91   return utf16;
  92 }
  93
  94 #elif defined(WCHAR_T_IS_UTF32)
  95
  96 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
  97   output->clear();
  98   // Assume that normally we won't have any non-BMP characters so the counts
  99   // will be the same.
 100   output->reserve(src_len);
 101   return ConvertUnicode(src, src_len, output);
 102 }
 103
 104 string16 WideToUTF16(const std::wstring& wide) {
 105   string16 ret;
 106   WideToUTF16(wide.data(), wide.length(), &ret);
 107   return ret;
 108 }
 109
 110 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
 111   output->clear();
 112   // Assume that normally we won't have any non-BMP characters so the counts
 113   // will be the same.
 114   output->reserve(src_len);
 115   return ConvertUnicode(src, src_len, output);
 116 }
 117
 118 std::wstring UTF16ToWide(const string16& utf16) {
 119   std::wstring ret;
 120   UTF16ToWide(utf16.data(), utf16.length(), &ret);
 121   return ret;
 122 }
 123
 124 #endif  // defined(WCHAR_T_IS_UTF32)
 125
 126 // UTF16 <-> UTF8 --------------------------------------------------------------
 127
 128 #if defined(WCHAR_T_IS_UTF32)
 129
 130 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
 131   PrepareForUTF16Or32Output(src, src_len, output);
 132   return ConvertUnicode(src, src_len, output);
 133 }
 134
 135 string16 UTF8ToUTF16(const std::string& utf8) {
 136   string16 ret;
 137   // Ignore the success flag of this call, it will do the best it can for
 138   // invalid input, which is what we want here.
 139   UTF8ToUTF16(utf8.data(), utf8.length(), &ret);
 140   return ret;
 141 }
 142
 143 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
 144   PrepareForUTF8Output(src, src_len, output);
 145   return ConvertUnicode(src, src_len, output);
 146 }
 147
 148 std::string UTF16ToUTF8(const string16& utf16) {
 149   std::string ret;
 150   // Ignore the success flag of this call, it will do the best it can for
 151   // invalid input, which is what we want here.
 152   UTF16ToUTF8(utf16.data(), utf16.length(), &ret);
 153   return ret;
 154 }
 155
 156 #elif defined(WCHAR_T_IS_UTF16)
 157 // Easy case since we can use the "wide" versions we already wrote above.
 158
 159 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
 160   return UTF8ToWide(src, src_len, output);
 161 }
 162
 163 string16 UTF8ToUTF16(const std::string& utf8) {
 164   return UTF8ToWide(utf8);
 165 }
 166
 167 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
 168   return WideToUTF8(src, src_len, output);
 169 }
 170
 171 std::string UTF16ToUTF8(const string16& utf16) {
 172   return WideToUTF8(utf16);
 173 }
 174
 175 #endif