1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/utf_string_conversions.h"
7 #include "base/string_piece.h"
8 #include "base/utf_string_conversion_utils.h"
10 using base::PrepareForUTF8Output
;
11 using base::PrepareForUTF16Or32Output
;
12 using base::ReadUnicodeCharacter
;
13 using base::WriteUnicodeCharacter
;
17 // Generalized Unicode converter -----------------------------------------------
19 // Converts the given source Unicode character type to the given destination
20 // Unicode character type as a STL string. The given input buffer and size
21 // determine the source, and the given output STL string will be replaced by
23 template<typename SRC_CHAR
, typename DEST_STRING
>
24 bool ConvertUnicode(const SRC_CHAR
* src
,
26 DEST_STRING
* output
) {
27 // ICU requires 32-bit numbers.
29 int32 src_len32
= static_cast<int32
>(src_len
);
30 for (int32 i
= 0; i
< src_len32
; i
++) {
32 if (ReadUnicodeCharacter(src
, src_len32
, &i
, &code_point
)) {
33 WriteUnicodeCharacter(code_point
, output
);
35 WriteUnicodeCharacter(0xFFFD, output
);
45 // UTF-8 <-> Wide --------------------------------------------------------------
47 bool WideToUTF8(const wchar_t* src
, size_t src_len
, std::string
* output
) {
48 PrepareForUTF8Output(src
, src_len
, output
);
49 return ConvertUnicode(src
, src_len
, output
);
52 std::string
WideToUTF8(const std::wstring
& wide
) {
54 // Ignore the success flag of this call, it will do the best it can for
55 // invalid input, which is what we want here.
56 WideToUTF8(wide
.data(), wide
.length(), &ret
);
60 bool UTF8ToWide(const char* src
, size_t src_len
, std::wstring
* output
) {
61 PrepareForUTF16Or32Output(src
, src_len
, output
);
62 return ConvertUnicode(src
, src_len
, output
);
65 std::wstring
UTF8ToWide(const base::StringPiece
& utf8
) {
67 UTF8ToWide(utf8
.data(), utf8
.length(), &ret
);
71 // UTF-16 <-> Wide -------------------------------------------------------------
73 #if defined(WCHAR_T_IS_UTF16)
75 // When wide == UTF-16, then conversions are a NOP.
76 bool WideToUTF16(const wchar_t* src
, size_t src_len
, string16
* output
) {
77 output
->assign(src
, src_len
);
81 string16
WideToUTF16(const std::wstring
& wide
) {
85 bool UTF16ToWide(const char16
* src
, size_t src_len
, std::wstring
* output
) {
86 output
->assign(src
, src_len
);
90 std::wstring
UTF16ToWide(const string16
& utf16
) {
94 #elif defined(WCHAR_T_IS_UTF32)
96 bool WideToUTF16(const wchar_t* src
, size_t src_len
, string16
* output
) {
98 // Assume that normally we won't have any non-BMP characters so the counts
100 output
->reserve(src_len
);
101 return ConvertUnicode(src
, src_len
, output
);
104 string16
WideToUTF16(const std::wstring
& wide
) {
106 WideToUTF16(wide
.data(), wide
.length(), &ret
);
110 bool UTF16ToWide(const char16
* src
, size_t src_len
, std::wstring
* output
) {
112 // Assume that normally we won't have any non-BMP characters so the counts
114 output
->reserve(src_len
);
115 return ConvertUnicode(src
, src_len
, output
);
118 std::wstring
UTF16ToWide(const string16
& utf16
) {
120 UTF16ToWide(utf16
.data(), utf16
.length(), &ret
);
124 #endif // defined(WCHAR_T_IS_UTF32)
126 // UTF16 <-> UTF8 --------------------------------------------------------------
128 #if defined(WCHAR_T_IS_UTF32)
130 bool UTF8ToUTF16(const char* src
, size_t src_len
, string16
* output
) {
131 PrepareForUTF16Or32Output(src
, src_len
, output
);
132 return ConvertUnicode(src
, src_len
, output
);
135 string16
UTF8ToUTF16(const std::string
& utf8
) {
137 // Ignore the success flag of this call, it will do the best it can for
138 // invalid input, which is what we want here.
139 UTF8ToUTF16(utf8
.data(), utf8
.length(), &ret
);
143 bool UTF16ToUTF8(const char16
* src
, size_t src_len
, std::string
* output
) {
144 PrepareForUTF8Output(src
, src_len
, output
);
145 return ConvertUnicode(src
, src_len
, output
);
148 std::string
UTF16ToUTF8(const string16
& utf16
) {
150 // Ignore the success flag of this call, it will do the best it can for
151 // invalid input, which is what we want here.
152 UTF16ToUTF8(utf16
.data(), utf16
.length(), &ret
);
156 #elif defined(WCHAR_T_IS_UTF16)
157 // Easy case since we can use the "wide" versions we already wrote above.
159 bool UTF8ToUTF16(const char* src
, size_t src_len
, string16
* output
) {
160 return UTF8ToWide(src
, src_len
, output
);
163 string16
UTF8ToUTF16(const std::string
& utf8
) {
164 return UTF8ToWide(utf8
);
167 bool UTF16ToUTF8(const char16
* src
, size_t src_len
, std::string
* output
) {
168 return WideToUTF8(src
, src_len
, output
);
171 std::string
UTF16ToUTF8(const string16
& utf16
) {
172 return WideToUTF8(utf16
);