Port PluginObject fix downstream. See http://trac.webkit.org/changeset/61415/ for...
[chromium-blink-merge.git] / base / i18n / icu_string_conversions.cc
blob9014a7ba67cd5a2d615b232bb0b9b75494942a71
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/i18n/icu_string_conversions.h"
7 #include <vector>
9 #include "base/basictypes.h"
10 #include "base/logging.h"
11 #include "base/string_util.h"
12 #include "unicode/ucnv.h"
13 #include "unicode/ucnv_cb.h"
14 #include "unicode/ucnv_err.h"
15 #include "unicode/ustring.h"
17 namespace base {
19 namespace {
20 // ToUnicodeCallbackSubstitute() is based on UCNV_TO_U_CALLBACK_SUSBSTITUTE
21 // in source/common/ucnv_err.c.
23 // Copyright (c) 1995-2006 International Business Machines Corporation
24 // and others
26 // All rights reserved.
29 // Permission is hereby granted, free of charge, to any person obtaining a
30 // copy of this software and associated documentation files (the "Software"),
31 // to deal in the Software without restriction, including without limitation
32 // the rights to use, copy, modify, merge, publish, distribute, and/or
33 // sell copies of the Software, and to permit persons to whom the Software
34 // is furnished to do so, provided that the above copyright notice(s) and
35 // this permission notice appear in all copies of the Software and that
36 // both the above copyright notice(s) and this permission notice appear in
37 // supporting documentation.
39 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
40 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
41 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
42 // OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS
43 // INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT
44 // OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
45 // OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
46 // OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
47 // OR PERFORMANCE OF THIS SOFTWARE.
49 // Except as contained in this notice, the name of a copyright holder
50 // shall not be used in advertising or otherwise to promote the sale, use
51 // or other dealings in this Software without prior written authorization
52 // of the copyright holder.
54 // ___________________________________________________________________________
56 // All trademarks and registered trademarks mentioned herein are the property
57 // of their respective owners.
59 void ToUnicodeCallbackSubstitute(const void* context,
60 UConverterToUnicodeArgs *to_args,
61 const char* code_units,
62 int32_t length,
63 UConverterCallbackReason reason,
64 UErrorCode * err) {
65 static const UChar kReplacementChar = 0xFFFD;
66 if (reason <= UCNV_IRREGULAR) {
67 if (context == NULL ||
68 (*(reinterpret_cast<const char*>(context)) == 'i' &&
69 reason == UCNV_UNASSIGNED)) {
70 *err = U_ZERO_ERROR;
71 ucnv_cbToUWriteUChars(to_args, &kReplacementChar, 1, 0, err);
73 // else the caller must have set the error code accordingly.
75 // else ignore the reset, close and clone calls.
78 bool ConvertFromUTF16(UConverter* converter, const UChar* uchar_src,
79 int uchar_len, OnStringConversionError::Type on_error,
80 std::string* encoded) {
81 int encoded_max_length = UCNV_GET_MAX_BYTES_FOR_STRING(uchar_len,
82 ucnv_getMaxCharSize(converter));
83 encoded->resize(encoded_max_length);
85 UErrorCode status = U_ZERO_ERROR;
87 // Setup our error handler.
88 switch (on_error) {
89 case OnStringConversionError::FAIL:
90 ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_STOP, 0,
91 NULL, NULL, &status);
92 break;
93 case OnStringConversionError::SKIP:
94 case OnStringConversionError::SUBSTITUTE:
95 ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_SKIP, 0,
96 NULL, NULL, &status);
97 break;
98 default:
99 NOTREACHED();
102 // ucnv_fromUChars returns size not including terminating null
103 int actual_size = ucnv_fromUChars(converter, &(*encoded)[0],
104 encoded_max_length, uchar_src, uchar_len, &status);
105 encoded->resize(actual_size);
106 ucnv_close(converter);
107 if (U_SUCCESS(status))
108 return true;
109 encoded->clear(); // Make sure the output is empty on error.
110 return false;
113 // Set up our error handler for ToUTF-16 converters
114 void SetUpErrorHandlerForToUChars(OnStringConversionError::Type on_error,
115 UConverter* converter, UErrorCode* status) {
116 switch (on_error) {
117 case OnStringConversionError::FAIL:
118 ucnv_setToUCallBack(converter, UCNV_TO_U_CALLBACK_STOP, 0,
119 NULL, NULL, status);
120 break;
121 case OnStringConversionError::SKIP:
122 ucnv_setToUCallBack(converter, UCNV_TO_U_CALLBACK_SKIP, 0,
123 NULL, NULL, status);
124 break;
125 case OnStringConversionError::SUBSTITUTE:
126 ucnv_setToUCallBack(converter, ToUnicodeCallbackSubstitute, 0,
127 NULL, NULL, status);
128 break;
129 default:
130 NOTREACHED();
134 inline UConverterType utf32_platform_endian() {
135 #if U_IS_BIG_ENDIAN
136 return UCNV_UTF32_BigEndian;
137 #else
138 return UCNV_UTF32_LittleEndian;
139 #endif
142 } // namespace
144 const char kCodepageLatin1[] = "ISO-8859-1";
145 const char kCodepageUTF8[] = "UTF-8";
146 const char kCodepageUTF16BE[] = "UTF-16BE";
147 const char kCodepageUTF16LE[] = "UTF-16LE";
149 // Codepage <-> Wide/UTF-16 ---------------------------------------------------
151 bool UTF16ToCodepage(const string16& utf16,
152 const char* codepage_name,
153 OnStringConversionError::Type on_error,
154 std::string* encoded) {
155 encoded->clear();
157 UErrorCode status = U_ZERO_ERROR;
158 UConverter* converter = ucnv_open(codepage_name, &status);
159 if (!U_SUCCESS(status))
160 return false;
162 return ConvertFromUTF16(converter, utf16.c_str(),
163 static_cast<int>(utf16.length()), on_error, encoded);
166 bool CodepageToUTF16(const std::string& encoded,
167 const char* codepage_name,
168 OnStringConversionError::Type on_error,
169 string16* utf16) {
170 utf16->clear();
172 UErrorCode status = U_ZERO_ERROR;
173 UConverter* converter = ucnv_open(codepage_name, &status);
174 if (!U_SUCCESS(status))
175 return false;
177 // Even in the worst case, the maximum length in 2-byte units of UTF-16
178 // output would be at most the same as the number of bytes in input. There
179 // is no single-byte encoding in which a character is mapped to a
180 // non-BMP character requiring two 2-byte units.
182 // Moreover, non-BMP characters in legacy multibyte encodings
183 // (e.g. EUC-JP, GB18030) take at least 2 bytes. The only exceptions are
184 // BOCU and SCSU, but we don't care about them.
185 size_t uchar_max_length = encoded.length() + 1;
187 SetUpErrorHandlerForToUChars(on_error, converter, &status);
188 int actual_size = ucnv_toUChars(converter, WriteInto(utf16, uchar_max_length),
189 static_cast<int>(uchar_max_length), encoded.data(),
190 static_cast<int>(encoded.length()), &status);
191 ucnv_close(converter);
192 if (!U_SUCCESS(status)) {
193 utf16->clear(); // Make sure the output is empty on error.
194 return false;
197 utf16->resize(actual_size);
198 return true;
201 bool WideToCodepage(const std::wstring& wide,
202 const char* codepage_name,
203 OnStringConversionError::Type on_error,
204 std::string* encoded) {
205 #if defined(WCHAR_T_IS_UTF16)
206 return UTF16ToCodepage(wide, codepage_name, on_error, encoded);
207 #elif defined(WCHAR_T_IS_UTF32)
208 encoded->clear();
210 UErrorCode status = U_ZERO_ERROR;
211 UConverter* converter = ucnv_open(codepage_name, &status);
212 if (!U_SUCCESS(status))
213 return false;
215 int utf16_len;
216 // When wchar_t is wider than UChar (16 bits), transform |wide| into a
217 // UChar* string. Size the UChar* buffer to be large enough to hold twice
218 // as many UTF-16 code units (UChar's) as there are Unicode code points,
219 // in case each code points translates to a UTF-16 surrogate pair,
220 // and leave room for a NUL terminator.
221 std::vector<UChar> utf16(wide.length() * 2 + 1);
222 u_strFromWCS(&utf16[0], utf16.size(), &utf16_len,
223 wide.c_str(), wide.length(), &status);
224 DCHECK(U_SUCCESS(status)) << "failed to convert wstring to UChar*";
226 return ConvertFromUTF16(converter, &utf16[0], utf16_len, on_error, encoded);
227 #endif // defined(WCHAR_T_IS_UTF32)
230 bool CodepageToWide(const std::string& encoded,
231 const char* codepage_name,
232 OnStringConversionError::Type on_error,
233 std::wstring* wide) {
234 #if defined(WCHAR_T_IS_UTF16)
235 return CodepageToUTF16(encoded, codepage_name, on_error, wide);
236 #elif defined(WCHAR_T_IS_UTF32)
237 wide->clear();
239 UErrorCode status = U_ZERO_ERROR;
240 UConverter* converter = ucnv_open(codepage_name, &status);
241 if (!U_SUCCESS(status))
242 return false;
244 // The maximum length in 4 byte unit of UTF-32 output would be
245 // at most the same as the number of bytes in input. In the worst
246 // case of GB18030 (excluding escaped-based encodings like ISO-2022-JP),
247 // this can be 4 times larger than actually needed.
248 size_t wchar_max_length = encoded.length() + 1;
250 SetUpErrorHandlerForToUChars(on_error, converter, &status);
251 int actual_size = ucnv_toAlgorithmic(utf32_platform_endian(), converter,
252 reinterpret_cast<char*>(WriteInto(wide, wchar_max_length)),
253 static_cast<int>(wchar_max_length) * sizeof(wchar_t), encoded.data(),
254 static_cast<int>(encoded.length()), &status);
255 ucnv_close(converter);
256 if (!U_SUCCESS(status)) {
257 wide->clear(); // Make sure the output is empty on error.
258 return false;
261 // actual_size is # of bytes.
262 wide->resize(actual_size / sizeof(wchar_t));
263 return true;
264 #endif // defined(WCHAR_T_IS_UTF32)
267 } // namespace base