Clean up VectorEffects::init
[hiphop-php.git] / hphp / runtime / ext / ext_icu_ucnv.cpp
blobb6f78a793a9fdc1982ccb24485ac2c1aac9045cc
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
6 | Copyright (c) 1997-2010 The PHP Group |
7 +----------------------------------------------------------------------+
8 | This source file is subject to version 3.01 of the PHP license, |
9 | that is bundled with this package in the file LICENSE, and is |
10 | available through the world-wide-web at the following url: |
11 | http://www.php.net/license/3_01.txt |
12 | If you did not receive a copy of the PHP license and are unable to |
13 | obtain it through the world-wide-web, please send a note to |
14 | license@php.net so we can mail you a copy immediately. |
15 +----------------------------------------------------------------------+
18 #include "hphp/runtime/ext/ext_icu_ucnv.h"
19 #include "hphp/runtime/vm/jit/translator-inline.h"
21 namespace HPHP {
22 ///////////////////////////////////////////////////////////////////////////////
24 #define UCNV_REASON_CONST(v) \
25 const int64_t q_UConverter$$REASON_ ## v = UCNV_ ## v ;
26 #define UCNV_TYPE_CONST(v) \
27 const int64_t q_UConverter$$ ## v = UCNV_ ## v ;
29 UCNV_REASON_CONST(UNASSIGNED);
30 UCNV_REASON_CONST(ILLEGAL);
31 UCNV_REASON_CONST(IRREGULAR);
32 UCNV_REASON_CONST(RESET);
33 UCNV_REASON_CONST(CLOSE);
34 UCNV_REASON_CONST(CLONE);
36 UCNV_TYPE_CONST(UNSUPPORTED_CONVERTER);
37 UCNV_TYPE_CONST(SBCS);
38 UCNV_TYPE_CONST(DBCS);
39 UCNV_TYPE_CONST(MBCS);
40 UCNV_TYPE_CONST(LATIN_1);
41 UCNV_TYPE_CONST(UTF8);
42 UCNV_TYPE_CONST(UTF16_BigEndian);
43 UCNV_TYPE_CONST(UTF16_LittleEndian);
44 UCNV_TYPE_CONST(UTF32_BigEndian);
45 UCNV_TYPE_CONST(UTF32_LittleEndian);
46 UCNV_TYPE_CONST(EBCDIC_STATEFUL);
47 UCNV_TYPE_CONST(ISO_2022);
48 UCNV_TYPE_CONST(LMBCS_1);
49 UCNV_TYPE_CONST(LMBCS_2);
50 UCNV_TYPE_CONST(LMBCS_3);
51 UCNV_TYPE_CONST(LMBCS_4);
52 UCNV_TYPE_CONST(LMBCS_5);
53 UCNV_TYPE_CONST(LMBCS_6);
54 UCNV_TYPE_CONST(LMBCS_8);
55 UCNV_TYPE_CONST(LMBCS_11);
56 UCNV_TYPE_CONST(LMBCS_16);
57 UCNV_TYPE_CONST(LMBCS_17);
58 UCNV_TYPE_CONST(LMBCS_18);
59 UCNV_TYPE_CONST(LMBCS_19);
60 UCNV_TYPE_CONST(LMBCS_LAST);
61 UCNV_TYPE_CONST(HZ);
62 UCNV_TYPE_CONST(SCSU);
63 UCNV_TYPE_CONST(ISCII);
64 UCNV_TYPE_CONST(US_ASCII);
65 UCNV_TYPE_CONST(UTF7);
66 UCNV_TYPE_CONST(BOCU1);
67 UCNV_TYPE_CONST(UTF16);
68 UCNV_TYPE_CONST(UTF32);
69 UCNV_TYPE_CONST(CESU8);
70 UCNV_TYPE_CONST(IMAP_MAILBOX);
72 static StaticString s_toUCallback("toUCallback");
73 static StaticString s_fromUCallback("fromUCallback");
75 #define THROW_UFAILURE(fname, uerr, ierr) throwFailure(uerr, #fname, ierr);
77 c_UConverter::c_UConverter(Class* cb)
78 : ExtObjectData(cb), m_src(NULL), m_dest(NULL) {
79 m_error.code = U_ZERO_ERROR;
80 m_error.custom_error_message = "";
83 c_UConverter::~c_UConverter() { }
85 void c_UConverter::throwFailure(UErrorCode error, const char *fname,
86 intl_error &merror) {
87 char message[1024];
88 snprintf(message, sizeof(message), "%s() returned error %ld: %s",
89 fname, (long)error, u_errorName(error));
90 merror.code = error;
91 merror.custom_error_message = String((const char*)message, CopyString);
94 void c_UConverter::t___construct(CStrRef toEncoding, CStrRef fromEncoding) {
95 setEncoding(toEncoding, &m_dest, m_error);
96 setEncoding(fromEncoding, &m_src, m_error);
97 setCallback(m_dest);
98 setCallback(m_src);
101 Variant c_UConverter::t___destruct() {
102 if (m_src) {
103 ucnv_close(m_src);
105 if (m_dest) {
106 ucnv_close(m_dest);
109 return uninit_null();
112 /* get/set source/dest encodings */
114 #define TARGET_CHECK(args, len) \
115 checkLimits(args->targetLimit - args->target, len)
116 bool c_UConverter::checkLimits(int64_t available, int64_t needed) {
117 if (needed > available) {
118 THROW_UFAILURE(appendUTarget, U_BUFFER_OVERFLOW_ERROR, m_error);
119 return false;
121 return true;
124 void c_UConverter::appendToUTarget(Variant val,
125 UConverterToUnicodeArgs *args) {
126 if (val.isNull()) {
127 // Ignore
128 return;
130 if (val.isInteger()) {
131 int64_t lval = val.toInt64();
132 if (lval < 0 || lval > 0x10FFFF) {
133 THROW_UFAILURE(appendToUTarget, U_ILLEGAL_ARGUMENT_ERROR, m_error);
134 return;
136 if (lval > 0xFFFF) {
137 if (TARGET_CHECK(args, 2)) {
138 *(args->target++) = (UChar)(((lval - 0x10000) >> 10) | 0xD800);
139 *(args->target++) = (UChar)(((lval - 0x10000) & 0x3FF) | 0xDC00);
141 return;
143 if (TARGET_CHECK(args, 1)) {
144 *(args->target++) = (UChar)lval;
146 return;
148 if (val.isString()) {
149 const char *strval = val.toString().data();
150 int32_t i = 0, strlen = val.toString().size();
151 while((i != strlen) && TARGET_CHECK(args, 1)) {
152 UChar c;
153 U8_NEXT(strval, i, strlen, c);
154 *(args->target++) = c;
156 return;
158 if (val.isArray()) {
159 for(ArrayIter it(val.toArray()); it; ++it) {
160 appendToUTarget(it.second(), args);
162 return;
164 THROW_UFAILURE(appendToTarget, U_ILLEGAL_ARGUMENT_ERROR, m_error);
167 void c_UConverter::ucnvToUCallback(c_UConverter *objval,
168 UConverterToUnicodeArgs *args,
169 const char *codeUnits, int32_t length,
170 UConverterCallbackReason reason,
171 UErrorCode *pErrorCode) {
172 String source(args->source, args->sourceLimit - args->source, CopyString);
173 Variant errRef((int64_t)*pErrorCode);
174 Variant ret = objval->o_invoke_few_args(
175 s_toUCallback, 4,
176 reason, source, String(codeUnits, length, CopyString), strongBind(errRef));
177 if (errRef.is(KindOfInt64)) {
178 *pErrorCode = (UErrorCode)errRef.toInt64();
179 } else {
180 throwFailure(U_ILLEGAL_ARGUMENT_ERROR, "ucnvToUCallback()",
181 objval->m_error);
183 objval->appendToUTarget(ret, args);
186 void c_UConverter::appendFromUTarget(Variant val,
187 UConverterFromUnicodeArgs *args) {
188 if (val.isNull()) {
189 // ignore
190 return;
192 if (val.isInteger()) {
193 int64_t lval = val.toInt64();
194 if (lval < 0 || lval > 255) {
195 THROW_UFAILURE(appendFromUTarget, U_ILLEGAL_ARGUMENT_ERROR, m_error);
196 return;
198 if (TARGET_CHECK(args, 1)) {
199 *(args->target++) = (char)lval;
201 return;
203 if (val.isString()) {
204 int32_t strlen = val.toString().size();
205 if (TARGET_CHECK(args, strlen)) {
206 memcpy(args->target, val.toString().data(), strlen);
207 args->target += strlen;
209 return;
211 if (val.isArray()) {
212 for(ArrayIter it(val.toArray()); it; ++it) {
213 appendFromUTarget(it.second(), args);
215 return;
217 THROW_UFAILURE(appendFromUTarget, U_ILLEGAL_ARGUMENT_ERROR, m_error);
220 void c_UConverter::ucnvFromUCallback(c_UConverter *objval,
221 UConverterFromUnicodeArgs *args,
222 const UChar *codeUnits, int32_t length,
223 UChar32 codePoint,
224 UConverterCallbackReason reason,
225 UErrorCode *pErrorCode) {
226 Array source = Array::Create();
227 for(int i = 0; i < length; i++) {
228 UChar32 c;
229 U16_NEXT(codeUnits, i, length, c);
230 source.append((int64_t)c);
232 Variant errRef((int64_t)*pErrorCode);
233 Variant ret =
234 objval->o_invoke_few_args(
235 s_fromUCallback, 4,
236 reason, source, (int64_t)codePoint, strongBind(errRef));
237 if (errRef.is(KindOfInt64)) {
238 *pErrorCode = (UErrorCode)errRef.toInt64();
239 } else {
240 throwFailure(U_ILLEGAL_ARGUMENT_ERROR, "ucnvFromUCallback()",
241 objval->m_error);
243 objval->appendFromUTarget(ret, args);
246 bool c_UConverter::setCallback(UConverter *cnv) {
247 if (o_getClassName().get()->isame(String("UConverter").get())) {
248 return true;
251 UErrorCode error = U_ZERO_ERROR;
252 ucnv_setToUCallBack(cnv, (UConverterToUCallback)ucnvToUCallback,
253 (const void*)this, NULL, NULL, &error);
254 if (U_FAILURE(error)) {
255 THROW_UFAILURE(ucnv_setToUCallback, error, m_error);
256 ucnv_close(cnv);
257 return false;
259 error = U_ZERO_ERROR;
260 ucnv_setFromUCallBack(cnv, (UConverterFromUCallback)ucnvFromUCallback,
261 (const void*)this, NULL, NULL, &error);
262 if (U_FAILURE(error)) {
263 THROW_UFAILURE(ucnv_setFromUCallback, error, m_error);
264 ucnv_close(cnv);
265 return false;
268 return true;
271 bool c_UConverter::setEncoding(CStrRef encoding, UConverter **pcnv,
272 intl_error &err) {
273 UErrorCode error = U_ZERO_ERROR;
274 UConverter *cnv = ucnv_open(encoding.data(), &error);
276 if (error == U_AMBIGUOUS_ALIAS_WARNING) {
277 UErrorCode getname_error = U_ZERO_ERROR;
278 const char *actual_encoding = ucnv_getName(cnv, &getname_error);
279 if (U_FAILURE(getname_error)) {
280 actual_encoding = "(unknown)";
282 raise_warning("Ambiguous encoding specified, using %s", actual_encoding);
283 } else if (U_FAILURE(error)) {
284 THROW_UFAILURE(ucnv_open, error, err);
285 return false;
288 if (*pcnv) {
289 ucnv_close(*pcnv);
291 *pcnv = cnv;
293 return true;
296 void c_UConverter::t_setsourceencoding(CStrRef encoding) {
297 setEncoding(encoding, &m_src, m_error);
300 void c_UConverter::t_setdestinationencoding(CStrRef encoding) {
301 setEncoding(encoding, &m_dest, m_error);
304 String c_UConverter::t_getsourceencoding() {
305 if (!m_src) {
306 return uninit_null();
309 UErrorCode error = U_ZERO_ERROR;
310 const char *name = ucnv_getName(m_src, &error);
311 if (U_FAILURE(error)) {
312 THROW_UFAILURE(ucnv_getName, error, m_error);
313 return uninit_null();
316 return String(name);
319 String c_UConverter::t_getdestinationencoding() {
320 if (!m_dest) {
321 return uninit_null();
324 UErrorCode error = U_ZERO_ERROR;
325 const char *name = ucnv_getName(m_dest, &error);
326 if (U_FAILURE(error)) {
327 THROW_UFAILURE(ucnv_getName, error, m_error);
328 return uninit_null();
331 return String(name);
334 /* Get algorithmic types */
336 int64_t c_UConverter::t_getsourcetype() {
337 if (!m_src) {
338 return UCNV_UNSUPPORTED_CONVERTER;
341 return ucnv_getType(m_src);
344 int64_t c_UConverter::t_getdestinationtype() {
345 if (!m_dest) {
346 return UCNV_UNSUPPORTED_CONVERTER;
349 return ucnv_getType(m_dest);
352 /* Basic substitution */
354 bool c_UConverter::setSubstChars(String chars, UConverter *cnv,
355 intl_error &err) {
356 UErrorCode error = U_ZERO_ERROR;
357 ucnv_setSubstChars(cnv, chars.data(), chars.size(), &error);
358 if (U_FAILURE(error)) {
359 THROW_UFAILURE(ucnv_setSubstChars, error, err);
360 return false;
362 return true;
365 bool c_UConverter::t_setsubstchars(CStrRef chars) {
366 return setSubstChars(chars, m_dest, m_error) &&
367 setSubstChars(chars, m_src, m_error);
370 String c_UConverter::t_getsubstchars() {
371 UErrorCode error = U_ZERO_ERROR;
372 char chars[127];
373 int8_t chars_len = sizeof(chars);
375 ucnv_getSubstChars(m_src, chars, &chars_len, &error);
376 if (U_FAILURE(error)) {
377 THROW_UFAILURE(ucnv_getSubstChars, error, m_error);
378 return uninit_null();
381 return String(chars, chars_len, CopyString);
384 /* Callbacks */
386 Variant c_UConverter::defaultCallback(int64_t reason, VRefParam error) {
387 switch(reason) {
388 case UCNV_UNASSIGNED:
389 case UCNV_ILLEGAL:
390 case UCNV_IRREGULAR:
391 error = U_ZERO_ERROR;
392 return t_getsubstchars();
395 return uninit_null();
398 Variant c_UConverter::t_fromucallback(int64_t reason,
399 CArrRef source, int64_t codepoint,
400 VRefParam error) {
401 return defaultCallback(reason, error);
404 Variant c_UConverter::t_toucallback(int64_t reason,
405 CStrRef source, CStrRef codeunits,
406 VRefParam error) {
407 return defaultCallback(reason, error);
410 /* Main workhorse functions */
412 Variant c_UConverter::t_convert(CStrRef str, bool reverse) {
413 SYNC_VM_REGS_SCOPED();
414 return doConvert(str, reverse ? m_src : m_dest,
415 reverse ? m_dest : m_src, m_error);
418 String c_UConverter::doConvert(CStrRef str,
419 UConverter *toCnv, UConverter *fromCnv,
420 intl_error &err) {
421 UErrorCode error = U_ZERO_ERROR;
423 if (!fromCnv || !toCnv) {
424 err.code = U_INVALID_STATE_ERROR;
425 err.custom_error_message = "Internal converters not initialized";
426 return uninit_null();
429 /* Convert to UChar pivot encoding */
430 int32_t temp_len = ucnv_toUChars(fromCnv, NULL, 0,
431 str.c_str(), str.size(), &error);
432 if (U_FAILURE(error) && error != U_BUFFER_OVERFLOW_ERROR) {
433 THROW_UFAILURE(ucnv_toUChars, error, err);
434 return uninit_null();
436 // Explicitly include the space for a \u0000 UChar since String
437 // only allocates one extra byte (not the 2 needed)
438 String tempStr(sizeof(UChar) * (temp_len + 1), ReserveString);
439 UChar *temp = (UChar*) tempStr.mutableSlice().ptr;
441 error = U_ZERO_ERROR;
442 temp_len = ucnv_toUChars(fromCnv, temp, temp_len,
443 str.c_str(), str.size(), &error);
444 if (U_FAILURE(error)) {
445 THROW_UFAILURE(ucnv_toUChars, error, err);
446 return uninit_null();
448 temp[temp_len] = 0;
450 /* Convert to final encoding */
451 error = U_ZERO_ERROR;
452 int32_t dest_len = ucnv_fromUChars(toCnv, NULL, 0,
453 temp, temp_len, &error);
454 if (U_FAILURE(error) && error != U_BUFFER_OVERFLOW_ERROR) {
455 THROW_UFAILURE(ucnv_fromUChars, error, err);
456 return uninit_null();
458 String destStr(dest_len, ReserveString);
459 char *dest = (char*) destStr.mutableSlice().ptr;
461 error = U_ZERO_ERROR;
462 dest_len = ucnv_fromUChars(toCnv, dest, dest_len,
463 temp, temp_len, &error);
464 if (U_FAILURE(error)) {
465 THROW_UFAILURE(ucnv_fromUChars, error, err);
466 return uninit_null();
468 return destStr.setSize(dest_len);
471 const StaticString
472 s_from_subst("from_subst"),
473 s_to_subst("to_subst");
475 Variant c_UConverter::ti_transcode(CStrRef str, CStrRef toEncoding,
476 CStrRef fromEncoding, CArrRef options) {
477 UConverter *fromCnv = NULL, *toCnv = NULL;
478 if (!setEncoding(fromEncoding, &fromCnv, s_intl_error->m_error)) {
479 return uninit_null();
481 if (!setEncoding(toEncoding, &toCnv, s_intl_error->m_error)) {
482 return uninit_null();
484 if (options.exists(s_from_subst) &&
485 !setSubstChars(options[s_from_subst].toString(), fromCnv,
486 s_intl_error->m_error)) {
487 return uninit_null();
489 if (options.exists(s_to_subst) &&
490 !setSubstChars(options[s_to_subst].toString(), toCnv,
491 s_intl_error->m_error)) {
492 return uninit_null();
494 Variant ret = doConvert(str, toCnv, fromCnv, s_intl_error->m_error);
495 ucnv_close(toCnv);
496 ucnv_close(fromCnv);
497 return ret;
500 /* ext/intl error handling */
502 int64_t c_UConverter::t_geterrorcode() {
503 return m_error.code;
506 String c_UConverter::t_geterrormessage() {
507 return m_error.custom_error_message;
510 /* Ennumerators and lookups */
512 #define UCNV_REASON_CASE(v) case UCNV_ ## v : return String("REASON_" #v );
513 String c_UConverter::ti_reasontext(int64_t reason) {
514 switch (reason) {
515 UCNV_REASON_CASE(UNASSIGNED)
516 UCNV_REASON_CASE(ILLEGAL)
517 UCNV_REASON_CASE(IRREGULAR)
518 UCNV_REASON_CASE(RESET)
519 UCNV_REASON_CASE(CLOSE)
520 UCNV_REASON_CASE(CLONE)
521 default:
522 raise_warning("Unknown UConverterCallbackReason: %ld", (long)reason);
523 return uninit_null();
527 Array c_UConverter::ti_getavailable() {
528 int32_t i, count = ucnv_countAvailable();
529 Array ret = Array::Create();
531 for(i = 0; i < count; ++i) {
532 ret.append(ucnv_getAvailableName(i));
535 return ret;
538 Array c_UConverter::ti_getaliases(CStrRef encoding) {
539 UErrorCode error = U_ZERO_ERROR;
540 int16_t i, count = ucnv_countAliases(encoding.data(), &error);
542 if (U_FAILURE(error)) {
543 THROW_UFAILURE(ucnv_getAliases, error, s_intl_error->m_error);
544 return uninit_null().toArray();
547 Array ret = Array::Create();
548 for(i = 0; i < count; ++i) {
549 error = U_ZERO_ERROR;
550 const char *alias = ucnv_getAlias(encoding.c_str(), i, &error);
551 if (U_FAILURE(error)) {
552 THROW_UFAILURE(ucnv_getAlias, error, s_intl_error->m_error);
553 return uninit_null().toArray();
555 ret.append(alias);
557 return ret;
560 Array c_UConverter::ti_getstandards() {
561 int16_t i, count = ucnv_countStandards();
562 Array ret = Array::Create();
564 for(i = 0; i < count; ++i) {
565 UErrorCode error = U_ZERO_ERROR;
566 const char *name = ucnv_getStandard(i, &error);
567 if (U_FAILURE(error)) {
568 THROW_UFAILURE(ucnv_getStandard, error, s_intl_error->m_error);
569 return uninit_null().toArray();
571 ret.append(name);
573 return ret;
576 String c_UConverter::ti_getstandardname(CStrRef name, CStrRef standard) {
577 UErrorCode error = U_ZERO_ERROR;
578 const char *standard_name = ucnv_getStandardName(name.data(),
579 standard.data(),
580 &error);
582 if (U_FAILURE(error)) {
583 THROW_UFAILURE(ucnv_getStandardName, error, s_intl_error->m_error);
584 return uninit_null();
587 return String(standard_name, CopyString);
590 String c_UConverter::ti_getmimename(CStrRef name) {
591 return ti_getstandardname(name, "MIME");
594 ///////////////////////////////////////////////////////////////////////////////