2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
6 | Copyright (c) 1997-2010 The PHP Group |
7 +----------------------------------------------------------------------+
8 | This source file is subject to version 3.01 of the PHP license, |
9 | that is bundled with this package in the file LICENSE, and is |
10 | available through the world-wide-web at the following url: |
11 | http://www.php.net/license/3_01.txt |
12 | If you did not receive a copy of the PHP license and are unable to |
13 | obtain it through the world-wide-web, please send a note to |
14 | license@php.net so we can mail you a copy immediately. |
15 +----------------------------------------------------------------------+
18 #include "hphp/runtime/ext/ext_icu_ucnv.h"
19 #include "hphp/runtime/vm/jit/translator-inline.h"
22 ///////////////////////////////////////////////////////////////////////////////
24 #define UCNV_REASON_CONST(v) \
25 const int64_t q_UConverter$$REASON_ ## v = UCNV_ ## v ;
26 #define UCNV_TYPE_CONST(v) \
27 const int64_t q_UConverter$$ ## v = UCNV_ ## v ;
29 UCNV_REASON_CONST(UNASSIGNED
);
30 UCNV_REASON_CONST(ILLEGAL
);
31 UCNV_REASON_CONST(IRREGULAR
);
32 UCNV_REASON_CONST(RESET
);
33 UCNV_REASON_CONST(CLOSE
);
34 UCNV_REASON_CONST(CLONE
);
36 UCNV_TYPE_CONST(UNSUPPORTED_CONVERTER
);
37 UCNV_TYPE_CONST(SBCS
);
38 UCNV_TYPE_CONST(DBCS
);
39 UCNV_TYPE_CONST(MBCS
);
40 UCNV_TYPE_CONST(LATIN_1
);
41 UCNV_TYPE_CONST(UTF8
);
42 UCNV_TYPE_CONST(UTF16_BigEndian
);
43 UCNV_TYPE_CONST(UTF16_LittleEndian
);
44 UCNV_TYPE_CONST(UTF32_BigEndian
);
45 UCNV_TYPE_CONST(UTF32_LittleEndian
);
46 UCNV_TYPE_CONST(EBCDIC_STATEFUL
);
47 UCNV_TYPE_CONST(ISO_2022
);
48 UCNV_TYPE_CONST(LMBCS_1
);
49 UCNV_TYPE_CONST(LMBCS_2
);
50 UCNV_TYPE_CONST(LMBCS_3
);
51 UCNV_TYPE_CONST(LMBCS_4
);
52 UCNV_TYPE_CONST(LMBCS_5
);
53 UCNV_TYPE_CONST(LMBCS_6
);
54 UCNV_TYPE_CONST(LMBCS_8
);
55 UCNV_TYPE_CONST(LMBCS_11
);
56 UCNV_TYPE_CONST(LMBCS_16
);
57 UCNV_TYPE_CONST(LMBCS_17
);
58 UCNV_TYPE_CONST(LMBCS_18
);
59 UCNV_TYPE_CONST(LMBCS_19
);
60 UCNV_TYPE_CONST(LMBCS_LAST
);
62 UCNV_TYPE_CONST(SCSU
);
63 UCNV_TYPE_CONST(ISCII
);
64 UCNV_TYPE_CONST(US_ASCII
);
65 UCNV_TYPE_CONST(UTF7
);
66 UCNV_TYPE_CONST(BOCU1
);
67 UCNV_TYPE_CONST(UTF16
);
68 UCNV_TYPE_CONST(UTF32
);
69 UCNV_TYPE_CONST(CESU8
);
70 UCNV_TYPE_CONST(IMAP_MAILBOX
);
72 static StaticString
s_toUCallback("toUCallback");
73 static StaticString
s_fromUCallback("fromUCallback");
75 #define THROW_UFAILURE(fname, uerr, ierr) throwFailure(uerr, #fname, ierr);
77 c_UConverter::c_UConverter(Class
* cb
)
78 : ExtObjectData(cb
), m_src(NULL
), m_dest(NULL
) {
79 m_error
.code
= U_ZERO_ERROR
;
80 m_error
.custom_error_message
= "";
83 c_UConverter::~c_UConverter() { }
85 void c_UConverter::throwFailure(UErrorCode error
, const char *fname
,
88 snprintf(message
, sizeof(message
), "%s() returned error %ld: %s",
89 fname
, (long)error
, u_errorName(error
));
91 merror
.custom_error_message
= String((const char*)message
, CopyString
);
94 void c_UConverter::t___construct(CStrRef toEncoding
, CStrRef fromEncoding
) {
95 setEncoding(toEncoding
, &m_dest
, m_error
);
96 setEncoding(fromEncoding
, &m_src
, m_error
);
101 Variant
c_UConverter::t___destruct() {
109 return uninit_null();
112 /* get/set source/dest encodings */
114 #define TARGET_CHECK(args, len) \
115 checkLimits(args->targetLimit - args->target, len)
116 bool c_UConverter::checkLimits(int64_t available
, int64_t needed
) {
117 if (needed
> available
) {
118 THROW_UFAILURE(appendUTarget
, U_BUFFER_OVERFLOW_ERROR
, m_error
);
124 void c_UConverter::appendToUTarget(Variant val
,
125 UConverterToUnicodeArgs
*args
) {
130 if (val
.isInteger()) {
131 int64_t lval
= val
.toInt64();
132 if (lval
< 0 || lval
> 0x10FFFF) {
133 THROW_UFAILURE(appendToUTarget
, U_ILLEGAL_ARGUMENT_ERROR
, m_error
);
137 if (TARGET_CHECK(args
, 2)) {
138 *(args
->target
++) = (UChar
)(((lval
- 0x10000) >> 10) | 0xD800);
139 *(args
->target
++) = (UChar
)(((lval
- 0x10000) & 0x3FF) | 0xDC00);
143 if (TARGET_CHECK(args
, 1)) {
144 *(args
->target
++) = (UChar
)lval
;
148 if (val
.isString()) {
149 const char *strval
= val
.toString().data();
150 int32_t i
= 0, strlen
= val
.toString().size();
151 while((i
!= strlen
) && TARGET_CHECK(args
, 1)) {
153 U8_NEXT(strval
, i
, strlen
, c
);
154 *(args
->target
++) = c
;
159 for(ArrayIter
it(val
.toArray()); it
; ++it
) {
160 appendToUTarget(it
.second(), args
);
164 THROW_UFAILURE(appendToTarget
, U_ILLEGAL_ARGUMENT_ERROR
, m_error
);
167 void c_UConverter::ucnvToUCallback(c_UConverter
*objval
,
168 UConverterToUnicodeArgs
*args
,
169 const char *codeUnits
, int32_t length
,
170 UConverterCallbackReason reason
,
171 UErrorCode
*pErrorCode
) {
172 String
source(args
->source
, args
->sourceLimit
- args
->source
, CopyString
);
173 Variant
errRef((int64_t)*pErrorCode
);
174 Variant ret
= objval
->o_invoke_few_args(
176 reason
, source
, String(codeUnits
, length
, CopyString
), strongBind(errRef
));
177 if (errRef
.is(KindOfInt64
)) {
178 *pErrorCode
= (UErrorCode
)errRef
.toInt64();
180 throwFailure(U_ILLEGAL_ARGUMENT_ERROR
, "ucnvToUCallback()",
183 objval
->appendToUTarget(ret
, args
);
186 void c_UConverter::appendFromUTarget(Variant val
,
187 UConverterFromUnicodeArgs
*args
) {
192 if (val
.isInteger()) {
193 int64_t lval
= val
.toInt64();
194 if (lval
< 0 || lval
> 255) {
195 THROW_UFAILURE(appendFromUTarget
, U_ILLEGAL_ARGUMENT_ERROR
, m_error
);
198 if (TARGET_CHECK(args
, 1)) {
199 *(args
->target
++) = (char)lval
;
203 if (val
.isString()) {
204 int32_t strlen
= val
.toString().size();
205 if (TARGET_CHECK(args
, strlen
)) {
206 memcpy(args
->target
, val
.toString().data(), strlen
);
207 args
->target
+= strlen
;
212 for(ArrayIter
it(val
.toArray()); it
; ++it
) {
213 appendFromUTarget(it
.second(), args
);
217 THROW_UFAILURE(appendFromUTarget
, U_ILLEGAL_ARGUMENT_ERROR
, m_error
);
220 void c_UConverter::ucnvFromUCallback(c_UConverter
*objval
,
221 UConverterFromUnicodeArgs
*args
,
222 const UChar
*codeUnits
, int32_t length
,
224 UConverterCallbackReason reason
,
225 UErrorCode
*pErrorCode
) {
226 Array source
= Array::Create();
227 for(int i
= 0; i
< length
; i
++) {
229 U16_NEXT(codeUnits
, i
, length
, c
);
230 source
.append((int64_t)c
);
232 Variant
errRef((int64_t)*pErrorCode
);
234 objval
->o_invoke_few_args(
236 reason
, source
, (int64_t)codePoint
, strongBind(errRef
));
237 if (errRef
.is(KindOfInt64
)) {
238 *pErrorCode
= (UErrorCode
)errRef
.toInt64();
240 throwFailure(U_ILLEGAL_ARGUMENT_ERROR
, "ucnvFromUCallback()",
243 objval
->appendFromUTarget(ret
, args
);
246 bool c_UConverter::setCallback(UConverter
*cnv
) {
247 if (o_getClassName().get()->isame(String("UConverter").get())) {
251 UErrorCode error
= U_ZERO_ERROR
;
252 ucnv_setToUCallBack(cnv
, (UConverterToUCallback
)ucnvToUCallback
,
253 (const void*)this, NULL
, NULL
, &error
);
254 if (U_FAILURE(error
)) {
255 THROW_UFAILURE(ucnv_setToUCallback
, error
, m_error
);
259 error
= U_ZERO_ERROR
;
260 ucnv_setFromUCallBack(cnv
, (UConverterFromUCallback
)ucnvFromUCallback
,
261 (const void*)this, NULL
, NULL
, &error
);
262 if (U_FAILURE(error
)) {
263 THROW_UFAILURE(ucnv_setFromUCallback
, error
, m_error
);
271 bool c_UConverter::setEncoding(CStrRef encoding
, UConverter
**pcnv
,
273 UErrorCode error
= U_ZERO_ERROR
;
274 UConverter
*cnv
= ucnv_open(encoding
.data(), &error
);
276 if (error
== U_AMBIGUOUS_ALIAS_WARNING
) {
277 UErrorCode getname_error
= U_ZERO_ERROR
;
278 const char *actual_encoding
= ucnv_getName(cnv
, &getname_error
);
279 if (U_FAILURE(getname_error
)) {
280 actual_encoding
= "(unknown)";
282 raise_warning("Ambiguous encoding specified, using %s", actual_encoding
);
283 } else if (U_FAILURE(error
)) {
284 THROW_UFAILURE(ucnv_open
, error
, err
);
296 void c_UConverter::t_setsourceencoding(CStrRef encoding
) {
297 setEncoding(encoding
, &m_src
, m_error
);
300 void c_UConverter::t_setdestinationencoding(CStrRef encoding
) {
301 setEncoding(encoding
, &m_dest
, m_error
);
304 String
c_UConverter::t_getsourceencoding() {
306 return uninit_null();
309 UErrorCode error
= U_ZERO_ERROR
;
310 const char *name
= ucnv_getName(m_src
, &error
);
311 if (U_FAILURE(error
)) {
312 THROW_UFAILURE(ucnv_getName
, error
, m_error
);
313 return uninit_null();
319 String
c_UConverter::t_getdestinationencoding() {
321 return uninit_null();
324 UErrorCode error
= U_ZERO_ERROR
;
325 const char *name
= ucnv_getName(m_dest
, &error
);
326 if (U_FAILURE(error
)) {
327 THROW_UFAILURE(ucnv_getName
, error
, m_error
);
328 return uninit_null();
334 /* Get algorithmic types */
336 int64_t c_UConverter::t_getsourcetype() {
338 return UCNV_UNSUPPORTED_CONVERTER
;
341 return ucnv_getType(m_src
);
344 int64_t c_UConverter::t_getdestinationtype() {
346 return UCNV_UNSUPPORTED_CONVERTER
;
349 return ucnv_getType(m_dest
);
352 /* Basic substitution */
354 bool c_UConverter::setSubstChars(String chars
, UConverter
*cnv
,
356 UErrorCode error
= U_ZERO_ERROR
;
357 ucnv_setSubstChars(cnv
, chars
.data(), chars
.size(), &error
);
358 if (U_FAILURE(error
)) {
359 THROW_UFAILURE(ucnv_setSubstChars
, error
, err
);
365 bool c_UConverter::t_setsubstchars(CStrRef chars
) {
366 return setSubstChars(chars
, m_dest
, m_error
) &&
367 setSubstChars(chars
, m_src
, m_error
);
370 String
c_UConverter::t_getsubstchars() {
371 UErrorCode error
= U_ZERO_ERROR
;
373 int8_t chars_len
= sizeof(chars
);
375 ucnv_getSubstChars(m_src
, chars
, &chars_len
, &error
);
376 if (U_FAILURE(error
)) {
377 THROW_UFAILURE(ucnv_getSubstChars
, error
, m_error
);
378 return uninit_null();
381 return String(chars
, chars_len
, CopyString
);
386 Variant
c_UConverter::defaultCallback(int64_t reason
, VRefParam error
) {
388 case UCNV_UNASSIGNED
:
391 error
= U_ZERO_ERROR
;
392 return t_getsubstchars();
395 return uninit_null();
398 Variant
c_UConverter::t_fromucallback(int64_t reason
,
399 CArrRef source
, int64_t codepoint
,
401 return defaultCallback(reason
, error
);
404 Variant
c_UConverter::t_toucallback(int64_t reason
,
405 CStrRef source
, CStrRef codeunits
,
407 return defaultCallback(reason
, error
);
410 /* Main workhorse functions */
412 Variant
c_UConverter::t_convert(CStrRef str
, bool reverse
) {
413 SYNC_VM_REGS_SCOPED();
414 return doConvert(str
, reverse
? m_src
: m_dest
,
415 reverse
? m_dest
: m_src
, m_error
);
418 String
c_UConverter::doConvert(CStrRef str
,
419 UConverter
*toCnv
, UConverter
*fromCnv
,
421 UErrorCode error
= U_ZERO_ERROR
;
423 if (!fromCnv
|| !toCnv
) {
424 err
.code
= U_INVALID_STATE_ERROR
;
425 err
.custom_error_message
= "Internal converters not initialized";
426 return uninit_null();
429 /* Convert to UChar pivot encoding */
430 int32_t temp_len
= ucnv_toUChars(fromCnv
, NULL
, 0,
431 str
.c_str(), str
.size(), &error
);
432 if (U_FAILURE(error
) && error
!= U_BUFFER_OVERFLOW_ERROR
) {
433 THROW_UFAILURE(ucnv_toUChars
, error
, err
);
434 return uninit_null();
436 // Explicitly include the space for a \u0000 UChar since String
437 // only allocates one extra byte (not the 2 needed)
438 String
tempStr(sizeof(UChar
) * (temp_len
+ 1), ReserveString
);
439 UChar
*temp
= (UChar
*) tempStr
.mutableSlice().ptr
;
441 error
= U_ZERO_ERROR
;
442 temp_len
= ucnv_toUChars(fromCnv
, temp
, temp_len
,
443 str
.c_str(), str
.size(), &error
);
444 if (U_FAILURE(error
)) {
445 THROW_UFAILURE(ucnv_toUChars
, error
, err
);
446 return uninit_null();
450 /* Convert to final encoding */
451 error
= U_ZERO_ERROR
;
452 int32_t dest_len
= ucnv_fromUChars(toCnv
, NULL
, 0,
453 temp
, temp_len
, &error
);
454 if (U_FAILURE(error
) && error
!= U_BUFFER_OVERFLOW_ERROR
) {
455 THROW_UFAILURE(ucnv_fromUChars
, error
, err
);
456 return uninit_null();
458 String
destStr(dest_len
, ReserveString
);
459 char *dest
= (char*) destStr
.mutableSlice().ptr
;
461 error
= U_ZERO_ERROR
;
462 dest_len
= ucnv_fromUChars(toCnv
, dest
, dest_len
,
463 temp
, temp_len
, &error
);
464 if (U_FAILURE(error
)) {
465 THROW_UFAILURE(ucnv_fromUChars
, error
, err
);
466 return uninit_null();
468 return destStr
.setSize(dest_len
);
472 s_from_subst("from_subst"),
473 s_to_subst("to_subst");
475 Variant
c_UConverter::ti_transcode(CStrRef str
, CStrRef toEncoding
,
476 CStrRef fromEncoding
, CArrRef options
) {
477 UConverter
*fromCnv
= NULL
, *toCnv
= NULL
;
478 if (!setEncoding(fromEncoding
, &fromCnv
, s_intl_error
->m_error
)) {
479 return uninit_null();
481 if (!setEncoding(toEncoding
, &toCnv
, s_intl_error
->m_error
)) {
482 return uninit_null();
484 if (options
.exists(s_from_subst
) &&
485 !setSubstChars(options
[s_from_subst
].toString(), fromCnv
,
486 s_intl_error
->m_error
)) {
487 return uninit_null();
489 if (options
.exists(s_to_subst
) &&
490 !setSubstChars(options
[s_to_subst
].toString(), toCnv
,
491 s_intl_error
->m_error
)) {
492 return uninit_null();
494 Variant ret
= doConvert(str
, toCnv
, fromCnv
, s_intl_error
->m_error
);
500 /* ext/intl error handling */
502 int64_t c_UConverter::t_geterrorcode() {
506 String
c_UConverter::t_geterrormessage() {
507 return m_error
.custom_error_message
;
510 /* Ennumerators and lookups */
512 #define UCNV_REASON_CASE(v) case UCNV_ ## v : return String("REASON_" #v );
513 String
c_UConverter::ti_reasontext(int64_t reason
) {
515 UCNV_REASON_CASE(UNASSIGNED
)
516 UCNV_REASON_CASE(ILLEGAL
)
517 UCNV_REASON_CASE(IRREGULAR
)
518 UCNV_REASON_CASE(RESET
)
519 UCNV_REASON_CASE(CLOSE
)
520 UCNV_REASON_CASE(CLONE
)
522 raise_warning("Unknown UConverterCallbackReason: %ld", (long)reason
);
523 return uninit_null();
527 Array
c_UConverter::ti_getavailable() {
528 int32_t i
, count
= ucnv_countAvailable();
529 Array ret
= Array::Create();
531 for(i
= 0; i
< count
; ++i
) {
532 ret
.append(ucnv_getAvailableName(i
));
538 Array
c_UConverter::ti_getaliases(CStrRef encoding
) {
539 UErrorCode error
= U_ZERO_ERROR
;
540 int16_t i
, count
= ucnv_countAliases(encoding
.data(), &error
);
542 if (U_FAILURE(error
)) {
543 THROW_UFAILURE(ucnv_getAliases
, error
, s_intl_error
->m_error
);
544 return uninit_null().toArray();
547 Array ret
= Array::Create();
548 for(i
= 0; i
< count
; ++i
) {
549 error
= U_ZERO_ERROR
;
550 const char *alias
= ucnv_getAlias(encoding
.c_str(), i
, &error
);
551 if (U_FAILURE(error
)) {
552 THROW_UFAILURE(ucnv_getAlias
, error
, s_intl_error
->m_error
);
553 return uninit_null().toArray();
560 Array
c_UConverter::ti_getstandards() {
561 int16_t i
, count
= ucnv_countStandards();
562 Array ret
= Array::Create();
564 for(i
= 0; i
< count
; ++i
) {
565 UErrorCode error
= U_ZERO_ERROR
;
566 const char *name
= ucnv_getStandard(i
, &error
);
567 if (U_FAILURE(error
)) {
568 THROW_UFAILURE(ucnv_getStandard
, error
, s_intl_error
->m_error
);
569 return uninit_null().toArray();
576 String
c_UConverter::ti_getstandardname(CStrRef name
, CStrRef standard
) {
577 UErrorCode error
= U_ZERO_ERROR
;
578 const char *standard_name
= ucnv_getStandardName(name
.data(),
582 if (U_FAILURE(error
)) {
583 THROW_UFAILURE(ucnv_getStandardName
, error
, s_intl_error
->m_error
);
584 return uninit_null();
587 return String(standard_name
, CopyString
);
590 String
c_UConverter::ti_getmimename(CStrRef name
) {
591 return ti_getstandardname(name
, "MIME");
594 ///////////////////////////////////////////////////////////////////////////////