Rename runtime/base/zend_* to zend-
[hiphop-php.git] / hphp / runtime / ext / ext_intl.cpp
blobc2939a78137415209205c9e21aabc1566be0ded6
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
6 | Copyright (c) 1997-2010 The PHP Group |
7 +----------------------------------------------------------------------+
8 | This source file is subject to version 3.01 of the PHP license, |
9 | that is bundled with this package in the file LICENSE, and is |
10 | available through the world-wide-web at the following url: |
11 | http://www.php.net/license/3_01.txt |
12 | If you did not receive a copy of the PHP license and are unable to |
13 | obtain it through the world-wide-web, please send a note to |
14 | license@php.net so we can mail you a copy immediately. |
15 +----------------------------------------------------------------------+
18 #include "hphp/runtime/ext/ext_intl.h"
19 #include "hphp/runtime/ext/ext_array.h" // for throw_bad_array_exception
20 #include "hphp/runtime/base/request_local.h"
21 #include "hphp/runtime/base/intl_convert.h"
22 #include "hphp/runtime/base/zend-collator.h"
23 #include "hphp/runtime/base/zend-qsort.h"
24 #include "unicode/uidna.h"
25 #include "unicode/ustring.h"
26 #include "unicode/ucol.h" // icu
27 #include "unicode/uclean.h" // icu
28 #include "unicode/putil.h" // icu
29 #include "unicode/utypes.h"
30 #include "unicode/unorm.h"
32 #include "hphp/system/systemlib.h"
34 #ifdef UIDNA_INFO_INITIALIZER
35 #define HAVE_46_API 1 /* has UTS#46 API (introduced in ICU 4.6) */
36 #endif
38 namespace HPHP {
39 IMPLEMENT_DEFAULT_EXTENSION(idn);
40 ///////////////////////////////////////////////////////////////////////////////
42 int64_t f_intl_get_error_code() {
43 return s_intl_error->m_error.code;
46 String f_intl_get_error_message() {
47 if (!s_intl_error->m_error.custom_error_message.empty()) {
48 return s_intl_error->m_error.custom_error_message;
50 return String(u_errorName(s_intl_error->m_error.code), CopyString);
53 String f_intl_error_name(int64_t error_code) {
54 return String(u_errorName((UErrorCode)error_code), CopyString);
57 bool f_intl_is_failure(int64_t error_code) {
58 if (U_FAILURE((UErrorCode)error_code)) return true;
59 return false;
62 ///////////////////////////////////////////////////////////////////////////////
64 const int64_t q_Collator$$SORT_REGULAR = 0;
65 const int64_t q_Collator$$SORT_STRING = 1;
66 const int64_t q_Collator$$SORT_NUMERIC = 2;
67 const int64_t q_Collator$$FRENCH_COLLATION = UCOL_FRENCH_COLLATION;
68 const int64_t q_Collator$$ALTERNATE_HANDLING = UCOL_ALTERNATE_HANDLING;
69 const int64_t q_Collator$$CASE_FIRST = UCOL_CASE_FIRST;
70 const int64_t q_Collator$$CASE_LEVEL = UCOL_CASE_LEVEL;
71 const int64_t q_Collator$$NORMALIZATION_MODE = UCOL_NORMALIZATION_MODE;
72 const int64_t q_Collator$$STRENGTH = UCOL_STRENGTH;
73 const int64_t q_Collator$$HIRAGANA_QUATERNARY_MODE = UCOL_HIRAGANA_QUATERNARY_MODE;
74 const int64_t q_Collator$$NUMERIC_COLLATION = UCOL_NUMERIC_COLLATION;
75 const int64_t q_Collator$$DEFAULT_VALUE = UCOL_DEFAULT;
76 const int64_t q_Collator$$PRIMARY = UCOL_PRIMARY;
77 const int64_t q_Collator$$SECONDARY = UCOL_SECONDARY;
78 const int64_t q_Collator$$TERTIARY = UCOL_TERTIARY;
79 const int64_t q_Collator$$DEFAULT_STRENGTH = UCOL_DEFAULT_STRENGTH;
80 const int64_t q_Collator$$QUATERNARY = UCOL_QUATERNARY;
81 const int64_t q_Collator$$IDENTICAL = UCOL_IDENTICAL;
82 const int64_t q_Collator$$OFF = UCOL_OFF;
83 const int64_t q_Collator$$ON = UCOL_ON;
84 const int64_t q_Collator$$SHIFTED = UCOL_SHIFTED;
85 const int64_t q_Collator$$NON_IGNORABLE = UCOL_NON_IGNORABLE;
86 const int64_t q_Collator$$LOWER_FIRST = UCOL_LOWER_FIRST;
87 const int64_t q_Collator$$UPPER_FIRST = UCOL_UPPER_FIRST;
89 ///////////////////////////////////////////////////////////////////////////////
91 c_Collator::c_Collator(Class* cb) :
92 ExtObjectData(cb), m_locale(), m_ucoll(NULL), m_errcode() {
95 c_Collator::~c_Collator() {
96 if (m_ucoll) {
97 ucol_close(m_ucoll);
98 m_ucoll = NULL;
102 void c_Collator::t___construct(CStrRef locale) {
103 if (m_ucoll) {
104 ucol_close(m_ucoll);
105 m_ucoll = NULL;
107 m_errcode.clear();
108 if (!locale.empty()) {
109 m_locale = locale;
110 m_ucoll = ucol_open(locale.data(), &(m_errcode.code));
111 if (!U_FAILURE(m_errcode.code)) {
112 // If the specified locale opened successfully, return
113 s_intl_error->m_error.clear();
114 s_intl_error->m_error.code = m_errcode.code;
115 return;
118 // If the empty string was given or if the specified locale did
119 // not open successfully, so fall back to using the default locale
120 m_errcode.code = U_USING_FALLBACK_WARNING;
121 s_intl_error->m_error.clear();
122 s_intl_error->m_error.code = m_errcode.code;
123 if (m_ucoll) {
124 ucol_close(m_ucoll);
125 m_ucoll = NULL;
127 UErrorCode errcode = U_ZERO_ERROR;
128 m_locale = String(uloc_getDefault(), CopyString);
129 m_ucoll = ucol_open(m_locale.data(), &errcode);
130 if (U_FAILURE(errcode)) {
131 m_errcode.code = errcode;
132 m_errcode.custom_error_message =
133 "collator_create: unable to open ICU collator";
134 s_intl_error->m_error.clear();
135 s_intl_error->m_error.code = m_errcode.code;
136 s_intl_error->m_error.custom_error_message = m_errcode.custom_error_message;
137 if (m_ucoll) {
138 ucol_close(m_ucoll);
139 m_ucoll = NULL;
144 bool c_Collator::t_asort(VRefParam arr,
145 int64_t sort_flag /* = q_Collator$$SORT_REGULAR */) {
146 if (!arr.isArray()) {
147 throw_bad_array_exception();
148 return false;
150 if (!m_ucoll) {
151 raise_warning("asort called on uninitialized Collator object");
152 return false;
154 m_errcode.clear();
155 bool ret = collator_asort(arr, sort_flag, true, m_ucoll, &m_errcode);
156 s_intl_error->m_error.clear();
157 s_intl_error->m_error.code = m_errcode.code;
158 s_intl_error->m_error.custom_error_message = m_errcode.custom_error_message;
159 if (U_FAILURE(m_errcode.code)) {
160 return false;
162 return ret;
165 Variant c_Collator::t_compare(CStrRef str1, CStrRef str2) {
166 if (!m_ucoll) {
167 raise_warning("compare called on uninitialized Collator object");
168 return 0;
170 UChar* ustr1 = NULL;
171 UChar* ustr2 = NULL;
172 int ustr1_len = 0;
173 int ustr2_len = 0;
174 m_errcode.clear();
175 intl_convert_utf8_to_utf16(&ustr1, &ustr1_len,
176 str1.data(), str1.length(),
177 &(m_errcode.code));
178 if (U_FAILURE(m_errcode.code)) {
179 free(ustr1);
180 return false;
182 intl_convert_utf8_to_utf16(&ustr2, &ustr2_len,
183 str2.data(), str2.length(),
184 &(m_errcode.code));
185 if (U_FAILURE(m_errcode.code)) {
186 free(ustr1);
187 free(ustr2);
188 return false;
190 int64_t ret = ucol_strcoll(m_ucoll, ustr1, ustr1_len, ustr2, ustr2_len);
191 free(ustr1);
192 free(ustr2);
193 return ret;
196 Variant c_Collator::ti_create(CStrRef locale) {
197 p_Collator c(NEWOBJ(c_Collator)());
198 c.get()->t___construct(locale);
199 return c;
202 int64_t c_Collator::t_getattribute(int64_t attr) {
203 if (!m_ucoll) {
204 raise_warning("getattribute called on uninitialized Collator object");
205 return 0;
207 m_errcode.clear();
208 int64_t ret = (int64_t)ucol_getAttribute(m_ucoll, (UColAttribute)attr,
209 &(m_errcode.code));
210 s_intl_error->m_error.clear();
211 s_intl_error->m_error.code = m_errcode.code;
212 if (U_FAILURE(m_errcode.code)) {
213 m_errcode.custom_error_message = "Error getting attribute value";
214 s_intl_error->m_error.custom_error_message = m_errcode.custom_error_message;
215 return 0;
217 return ret;
220 int64_t c_Collator::t_geterrorcode() {
221 return m_errcode.code;
224 String c_Collator::t_geterrormessage() {
225 return String(u_errorName(m_errcode.code), CopyString);
228 String c_Collator::t_getlocale(int64_t type /* = 0 */) {
229 if (!m_ucoll) {
230 raise_warning("getlocale called on uninitialized Collator object");
231 return "";
233 m_errcode.clear();
234 String ret(
235 (char*)ucol_getLocaleByType(m_ucoll, (ULocDataLocaleType)type,
236 &(m_errcode.code)),
237 CopyString);
238 if (U_FAILURE(m_errcode.code)) {
239 m_errcode.custom_error_message = "Error getting locale by type";
240 s_intl_error->m_error.code = m_errcode.code;
241 s_intl_error->m_error.custom_error_message =
242 m_errcode.custom_error_message;
243 return "";
245 return ret;
248 int64_t c_Collator::t_getstrength() {
249 if (!m_ucoll) {
250 raise_warning("getstrength called on uninitialized Collator object");
251 return 0;
253 return ucol_getStrength(m_ucoll);
256 bool c_Collator::t_setattribute(int64_t attr, int64_t val) {
257 if (!m_ucoll) {
258 raise_warning("setattribute called on uninitialized Collator object");
259 return false;
261 m_errcode.clear();
262 ucol_setAttribute(m_ucoll, (UColAttribute)attr,
263 (UColAttributeValue)val, &(m_errcode.code));
264 s_intl_error->m_error.clear();
265 s_intl_error->m_error.code = m_errcode.code;
266 if (U_FAILURE(m_errcode.code)) {
267 m_errcode.custom_error_message = "Error setting attribute value";
268 s_intl_error->m_error.custom_error_message = m_errcode.custom_error_message;
269 return false;
271 return true;
274 bool c_Collator::t_setstrength(int64_t strength) {
275 if (!m_ucoll) {
276 raise_warning("setstrength called on uninitialized Collator object");
277 return false;
279 ucol_setStrength(m_ucoll, (UCollationStrength)strength);
280 return true;
283 typedef struct _collator_sort_key_index {
284 char* key; /* pointer to sort key */
285 ssize_t valPos; /* position of the original array element */
286 } collator_sort_key_index_t;
288 static const int32_t DEF_SORT_KEYS_BUF_SIZE = 1048576;
289 static const int32_t DEF_SORT_KEYS_BUF_INCREMENT = 1048576;
291 static const int32_t DEF_SORT_KEYS_INDX_BUF_SIZE = 1048576;
292 static const int32_t DEF_SORT_KEYS_INDX_BUF_INCREMENT = 1048576;
294 static const int32_t DEF_UTF16_BUF_SIZE = 1024;
296 /* {{{ collator_cmp_sort_keys
297 * Compare sort keys
299 static int collator_cmp_sort_keys(const void* p1, const void* p2, const void*) {
300 char* key1 = ((collator_sort_key_index_t*)p1)->key;
301 char* key2 = ((collator_sort_key_index_t*)p2)->key;
302 return strcmp( key1, key2 );
305 bool c_Collator::t_sortwithsortkeys(VRefParam arr) {
306 char* sortKeyBuf = NULL; /* buffer to store sort keys */
307 int32_t sortKeyBufSize = DEF_SORT_KEYS_BUF_SIZE; /* buffer size */
308 ptrdiff_t sortKeyBufOffset = 0; /* pos in buffer to store sort key */
309 int32_t sortKeyLen = 0; /* the length of currently processing key */
310 int32_t bufLeft = 0;
311 int32_t bufIncrement = 0;
313 /* buffer to store 'indexes' which will be passed to 'qsort' */
314 collator_sort_key_index_t* sortKeyIndxBuf = NULL;
315 int32_t sortKeyIndxBufSize = DEF_SORT_KEYS_INDX_BUF_SIZE;
316 int32_t sortKeyIndxSize = sizeof( collator_sort_key_index_t );
318 int32_t sortKeyCount = 0;
319 int32_t j = 0;
321 /* tmp buffer to hold current processing string in utf-16 */
322 UChar* utf16_buf = NULL;
323 /* the length of utf16_buf */
324 int utf16_buf_size = DEF_UTF16_BUF_SIZE;
325 /* length of converted string */
326 int utf16_len = 0;
328 m_errcode.clear();
329 s_intl_error->m_error.clear();
332 * Sort specified array.
334 if (!arr.isArray()) {
335 return true;
337 Array hash = arr.toArray();
338 if (hash.size() == 0) {
339 return true;
342 /* Create bufers */
343 sortKeyBuf = (char*)calloc(sortKeyBufSize, sizeof(char));
344 sortKeyIndxBuf = (collator_sort_key_index_t*)malloc(sortKeyIndxBufSize);
345 utf16_buf = (UChar*)malloc(utf16_buf_size);
347 /* Iterate through input hash and create a sort key for each value. */
348 for (ssize_t pos = hash->iter_begin(); pos != ArrayData::invalid_index;
349 pos = hash->iter_advance(pos)) {
350 /* Convert current hash item from UTF-8 to UTF-16LE and save the result
351 * to utf16_buf. */
352 utf16_len = utf16_buf_size;
353 /* Process string values only. */
354 Variant val(hash->getValue(pos));
355 if (val.isString()) {
356 String str = val.toString();
357 intl_convert_utf8_to_utf16(&utf16_buf, &utf16_len, str.data(),
358 str.size(), &(m_errcode.code));
359 if (U_FAILURE(m_errcode.code)) {
360 m_errcode.custom_error_message = "Sort with sort keys failed";
361 if (utf16_buf) {
362 free(utf16_buf);
364 free(sortKeyIndxBuf);
365 free(sortKeyBuf);
366 return false;
368 } else {
369 /* Set empty string */
370 utf16_len = 0;
371 utf16_buf[utf16_len] = 0;
374 if ((utf16_len + 1) > utf16_buf_size) {
375 utf16_buf_size = utf16_len + 1;
378 /* Get sort key, reallocating the buffer if needed. */
379 bufLeft = sortKeyBufSize - sortKeyBufOffset;
381 sortKeyLen = ucol_getSortKey(m_ucoll,
382 utf16_buf,
383 utf16_len,
384 (uint8_t*)sortKeyBuf + sortKeyBufOffset,
385 bufLeft);
387 /* check for sortKeyBuf overflow, increasing its size of the buffer if
388 needed */
389 if (sortKeyLen > bufLeft) {
390 bufIncrement = ( sortKeyLen > DEF_SORT_KEYS_BUF_INCREMENT ) ?
391 sortKeyLen : DEF_SORT_KEYS_BUF_INCREMENT;
392 sortKeyBufSize += bufIncrement;
393 bufLeft += bufIncrement;
394 sortKeyBuf = (char*)realloc(sortKeyBuf, sortKeyBufSize);
395 sortKeyLen = ucol_getSortKey(m_ucoll, utf16_buf, utf16_len,
396 (uint8_t*)sortKeyBuf + sortKeyBufOffset,
397 bufLeft);
400 /* check sortKeyIndxBuf overflow, increasing its size of the buffer if
401 needed */
402 if ((sortKeyCount + 1) * sortKeyIndxSize > sortKeyIndxBufSize) {
403 bufIncrement = (sortKeyIndxSize > DEF_SORT_KEYS_INDX_BUF_INCREMENT) ?
404 sortKeyIndxSize : DEF_SORT_KEYS_INDX_BUF_INCREMENT;
405 sortKeyIndxBufSize += bufIncrement;
406 sortKeyIndxBuf = (collator_sort_key_index_t*)realloc(sortKeyIndxBuf,
407 sortKeyIndxBufSize);
409 sortKeyIndxBuf[sortKeyCount].key = (char*)sortKeyBufOffset;
410 sortKeyIndxBuf[sortKeyCount].valPos = pos;
411 sortKeyBufOffset += sortKeyLen;
412 ++sortKeyCount;
415 /* update ptrs to point to valid keys. */
416 for( j = 0; j < sortKeyCount; j++ )
417 sortKeyIndxBuf[j].key = sortKeyBuf + (ptrdiff_t)sortKeyIndxBuf[j].key;
419 /* sort it */
420 zend_qsort(sortKeyIndxBuf, sortKeyCount, sortKeyIndxSize,
421 collator_cmp_sort_keys, NULL);
423 /* for resulting hash we'll assign new hash keys rather then reordering */
424 Array sortedHash = Array::Create();
426 for (j = 0; j < sortKeyCount; j++) {
427 sortedHash.append(hash->getValue(sortKeyIndxBuf[j].valPos));
430 /* Save sorted hash into return variable. */
431 arr = sortedHash;
433 if (utf16_buf)
434 free(utf16_buf);
436 free(sortKeyIndxBuf);
437 free(sortKeyBuf);
439 return true;
442 bool c_Collator::t_sort(VRefParam arr,
443 int64_t sort_flag /* = q_Collator$$SORT_REGULAR */) {
444 if (!arr.isArray()) {
445 throw_bad_array_exception();
446 return false;
448 if (!m_ucoll) {
449 raise_warning("sort called on uninitialized Collator object");
450 return false;
452 m_errcode.clear();
453 bool ret = collator_sort(arr, sort_flag, true, m_ucoll, &(m_errcode));
454 s_intl_error->m_error.clear();
455 s_intl_error->m_error.code = m_errcode.code;
456 s_intl_error->m_error.custom_error_message = m_errcode.custom_error_message;
457 if (U_FAILURE(m_errcode.code)) {
458 return false;
460 return ret;
463 ///////////////////////////////////////////////////////////////////////////////
465 #define CHECK_COLL(obj) \
466 c_Collator *coll = NULL; \
467 if (obj.isObject()) { \
468 coll = obj.toObject().getTyped<c_Collator>(); \
470 if (!coll) { \
471 raise_warning("Expecting collator object"); \
472 return false; \
475 Variant f_collator_asort(CVarRef obj, VRefParam arr,
476 int64_t sort_flag /* = q_Collator$$SORT_REGULAR */) {
477 CHECK_COLL(obj);
478 return coll->t_asort(ref(arr), sort_flag);
481 Variant f_collator_compare(CVarRef obj, CStrRef str1, CStrRef str2) {
482 CHECK_COLL(obj);
483 return coll->t_compare(str1, str2);
486 Variant f_collator_create(CStrRef locale) {
487 return c_Collator::ti_create(locale);
490 Variant f_collator_get_attribute(CVarRef obj, int64_t attr) {
491 CHECK_COLL(obj);
492 return coll->t_getattribute(attr);
495 Variant f_collator_get_error_code(CVarRef obj) {
496 CHECK_COLL(obj);
497 return coll->t_geterrorcode();
500 Variant f_collator_get_error_message(CVarRef obj) {
501 CHECK_COLL(obj);
502 return coll->t_geterrormessage();
505 Variant f_collator_get_locale(CVarRef obj, int64_t type /* = 0 */) {
506 CHECK_COLL(obj);
507 return coll->t_getlocale(type);
510 Variant f_collator_get_strength(CVarRef obj) {
511 CHECK_COLL(obj);
512 return coll->t_getstrength();
515 Variant f_collator_set_attribute(CVarRef obj, int64_t attr, int64_t val) {
516 CHECK_COLL(obj);
517 return coll->t_setattribute(attr, val);
520 Variant f_collator_set_strength(CVarRef obj, int64_t strength) {
521 CHECK_COLL(obj);
522 return coll->t_setstrength(strength);
525 Variant f_collator_sort_with_sort_keys(CVarRef obj, VRefParam arr) {
526 CHECK_COLL(obj);
527 return coll->t_sortwithsortkeys(ref(arr));
530 Variant f_collator_sort(CVarRef obj, VRefParam arr,
531 int64_t sort_flag /* = q_Collator$$SORT_REGULAR */) {
532 CHECK_COLL(obj);
533 return coll->t_sort(ref(arr), sort_flag);
536 ///////////////////////////////////////////////////////////////////////////////
538 const int64_t q_Locale$$ACTUAL_LOCALE = 0;
539 const int64_t q_Locale$$VALID_LOCALE = 1;
541 ///////////////////////////////////////////////////////////////////////////////
543 c_Locale::c_Locale(Class* cb) : ExtObjectData(cb) {
546 c_Locale::~c_Locale() {
549 void c_Locale::t___construct() {
552 ///////////////////////////////////////////////////////////////////////////////
554 const int64_t q_Normalizer$$NONE = UNORM_NONE;
555 const int64_t q_Normalizer$$FORM_D = UNORM_NFD;
556 const int64_t q_Normalizer$$NFD = UNORM_NFD;
557 const int64_t q_Normalizer$$FORM_KD = UNORM_NFKD;
558 const int64_t q_Normalizer$$NFKD = UNORM_NFKD;
559 const int64_t q_Normalizer$$FORM_C = UNORM_NFC;
560 const int64_t q_Normalizer$$NFC = UNORM_NFC;
561 const int64_t q_Normalizer$$FORM_KC = UNORM_NFKC;
562 const int64_t q_Normalizer$$NFKC = UNORM_NFKC;
564 ///////////////////////////////////////////////////////////////////////////////
566 c_Normalizer::c_Normalizer(Class* cb) : ExtObjectData(cb) {
569 c_Normalizer::~c_Normalizer() {
572 void c_Normalizer::t___construct() {
575 ///////////////////////////////////////////////////////////////////////////////
577 Variant c_Normalizer::ti_isnormalized(CStrRef input,
578 int64_t form /* = q_Normalizer$$FORM_C */) {
579 s_intl_error->m_error.clear();
581 switch (form) {
582 case UNORM_NFD:
583 case UNORM_NFKD:
584 case UNORM_NFC:
585 case UNORM_NFKC:
586 break;
587 default:
588 s_intl_error->m_error.code = U_ILLEGAL_ARGUMENT_ERROR;
589 s_intl_error->m_error.custom_error_message =
590 "normalizer_isnormalized: illegal normalization form";
591 return uninit_null();
594 /* First convert the string to UTF-16. */
595 UChar* uinput = NULL; int uinput_len = 0;
596 UErrorCode status = U_ZERO_ERROR;
597 intl_convert_utf8_to_utf16(&uinput, &uinput_len, input.data(), input.size(),
598 &status);
600 if (U_FAILURE(status)) {
601 s_intl_error->m_error.code = status;
602 s_intl_error->m_error.custom_error_message = "Error converting string to UTF-16.";
603 free(uinput);
604 return false;
607 /* test string */
608 UBool uret = unorm_isNormalizedWithOptions(uinput, uinput_len,
609 (UNormalizationMode)form,
610 (int32_t)0, &status);
611 free(uinput);
613 /* Bail out if an unexpected error occured. */
614 if (U_FAILURE(status)) {
615 s_intl_error->m_error.code = status;
616 s_intl_error->m_error.custom_error_message =
617 "Error testing if string is the given normalization form.";
618 return false;
621 return uret;
624 Variant c_Normalizer::ti_normalize(CStrRef input,
625 int64_t form /* = q_Normalizer$$FORM_C */) {
626 s_intl_error->m_error.clear();
628 int expansion_factor = 1;
629 switch(form) {
630 case UNORM_NONE:
631 case UNORM_NFC:
632 case UNORM_NFKC:
633 break;
634 case UNORM_NFD:
635 case UNORM_NFKD:
636 expansion_factor = 3;
637 break;
638 default:
639 s_intl_error->m_error.code = U_ILLEGAL_ARGUMENT_ERROR;
640 s_intl_error->m_error.custom_error_message =
641 "normalizer_normalize: illegal normalization form";
642 return uninit_null();
645 /* First convert the string to UTF-16. */
646 UChar* uinput = NULL; int uinput_len = 0;
647 UErrorCode status = U_ZERO_ERROR;
648 intl_convert_utf8_to_utf16(&uinput, &uinput_len, input.data(), input.size(),
649 &status);
651 if (U_FAILURE(status)) {
652 s_intl_error->m_error.code = status;
653 s_intl_error->m_error.custom_error_message =
654 "Error converting string to UTF-16.";
655 free(uinput);
656 return uninit_null();
659 /* Allocate memory for the destination buffer for normalization */
660 int uret_len = uinput_len * expansion_factor;
661 UChar *uret_buf = (UChar*)malloc((uret_len + 1) * sizeof(UChar));
663 /* normalize */
664 int size_needed = unorm_normalize(uinput, uinput_len,
665 (UNormalizationMode)form, (int32_t) 0,
666 uret_buf, uret_len, &status);
668 /* Bail out if an unexpected error occured.
669 * (U_BUFFER_OVERFLOW_ERROR means that *target buffer is not large enough).
670 * (U_STRING_NOT_TERMINATED_WARNING usually means that the input string
671 * is empty).
673 if (U_FAILURE(status) &&
674 status != U_BUFFER_OVERFLOW_ERROR &&
675 status != U_STRING_NOT_TERMINATED_WARNING) {
676 free(uret_buf);
677 free(uinput);
678 return uninit_null();
681 if (size_needed > uret_len) {
682 /* realloc does not seem to work properly - memory is corrupted
683 * uret_buf = eurealloc(uret_buf, size_needed + 1); */
684 free(uret_buf);
685 uret_buf = (UChar*)malloc((size_needed + 1) * sizeof(UChar));
686 uret_len = size_needed;
688 status = U_ZERO_ERROR;
690 /* try normalize again */
691 size_needed = unorm_normalize( uinput, uinput_len,
692 (UNormalizationMode)form, (int32_t) 0,
693 uret_buf, uret_len, &status);
695 /* Bail out if an unexpected error occured. */
696 if (U_FAILURE(status)) {
697 /* Set error messages. */
698 s_intl_error->m_error.code = status;
699 s_intl_error->m_error.custom_error_message = "Error normalizing string";
700 free(uret_buf);
701 free(uinput);
702 return uninit_null();
706 free(uinput);
708 /* the buffer we actually used */
709 uret_len = size_needed;
711 /* Convert normalized string from UTF-16 to UTF-8. */
712 char* ret_buf = NULL; int ret_len = 0;
713 intl_convert_utf16_to_utf8(&ret_buf, &ret_len, uret_buf, uret_len, &status);
714 free(uret_buf);
715 if (U_FAILURE(status)) {
716 s_intl_error->m_error.code = status;
717 s_intl_error->m_error.custom_error_message =
718 "normalizer_normalize: error converting normalized text UTF-8";
719 return uninit_null();
722 return String(ret_buf, ret_len, AttachString);
725 ///////////////////////////////////////////////////////////////////////////////
727 enum IdnVariant {
728 INTL_IDN_VARIANT_2003 = 0,
729 INTL_IDN_VARIANT_UTS46
732 enum {
733 INTL_IDN_TO_ASCII = 0,
734 INTL_IDN_TO_UTF8
737 #ifdef HAVE_46_API
739 const StaticString
740 s_result("result"),
741 s_isTransitionalDifferent("isTransitionalDifferent"),
742 s_errors("errors");
744 static Variant php_intl_idn_to_46(CStrRef domain, int64_t options, IdnVariant idn_variant, VRefParam idna_info, int mode) {
745 int32_t converted_capacity;
746 char *converted = NULL;
747 int32_t converted_len;
748 UIDNA *uts46;
749 UIDNAInfo info = UIDNA_INFO_INITIALIZER;
750 UErrorCode status = U_ZERO_ERROR;
752 // Get UIDNA instance which implements UTS #46.
753 uts46 = uidna_openUTS46(options, &status);
754 SCOPE_EXIT { uidna_close(uts46); };
755 if (U_FAILURE(status)) return false;
757 // Call the appropriate IDN function
758 status = U_ZERO_ERROR;
759 converted_capacity = 255; // no domain name may exceed this
760 String result(converted_capacity, ReserveString); // reserves converted_capacity+1 characters.
761 converted = result.mutableSlice().ptr;
762 if (mode == INTL_IDN_TO_ASCII) {
763 converted_len = uidna_nameToASCII_UTF8(uts46, (char*)domain.data(), domain.size(),
764 converted, converted_capacity, &info, &status);
765 } else {
766 converted_len = uidna_nameToUnicodeUTF8(uts46, (char*)domain.data(), domain.size(),
767 converted, converted_capacity, &info, &status);
769 if (U_FAILURE(status) || converted_len > converted_capacity) return false;
770 if (info.errors == 0) {
771 result.setSize(converted_len);
772 } else {
773 result.setSize(0);
776 // Set up the array returned in idna_info.
777 ArrayInit arr(3);
778 arr.set(s_result, result);
779 arr.set(s_isTransitionalDifferent, info.isTransitionalDifferent);
780 arr.set(s_errors, (long)info.errors);
781 // As in Zend, the previous value of idn_variant is overwritten, not modified.
782 idna_info = arr.create();
783 if (info.errors == 0) {
784 return result;
786 return false;
789 #endif
791 static Variant php_intl_idn_to(CStrRef domain, int64_t options, IdnVariant idn_variant, VRefParam idna_info, int mode) {
792 UChar* ustring = NULL;
793 int ustring_len = 0;
794 UErrorCode status;
795 char *converted_utf8 = NULL;
796 int32_t converted_utf8_len;
797 UChar* converted = NULL;
798 int32_t converted_ret_len;
800 if (idn_variant != INTL_IDN_VARIANT_2003) {
801 #ifdef HAVE_46_API
802 if (idn_variant == INTL_IDN_VARIANT_UTS46) {
803 return php_intl_idn_to_46(domain, options, idn_variant, ref(idna_info), mode);
805 #endif
806 return false;
809 // Convert the string to UTF-16
810 status = U_ZERO_ERROR;
811 intl_convert_utf8_to_utf16(&ustring, &ustring_len,
812 (char*)domain.data(), domain.size(), &status);
813 if (U_FAILURE(status)) {
814 free(ustring);
815 return false;
818 // Call the appropriate IDN function
819 int converted_len = (ustring_len > 1) ? ustring_len : 1;
820 for (;;) {
821 UParseError parse_error;
822 status = U_ZERO_ERROR;
823 converted = (UChar*)malloc(sizeof(UChar)*converted_len);
824 // If the malloc failed, bail out
825 if (!converted) {
826 free(ustring);
827 return false;
829 if (mode == INTL_IDN_TO_ASCII) {
830 converted_ret_len = uidna_IDNToASCII(ustring,
831 ustring_len, converted, converted_len,
832 (int32_t)options, &parse_error, &status);
833 } else {
834 converted_ret_len = uidna_IDNToUnicode(ustring,
835 ustring_len, converted, converted_len,
836 (int32_t)options, &parse_error, &status);
838 if (status != U_BUFFER_OVERFLOW_ERROR)
839 break;
840 // If we have a buffer overflow error, try again with a larger buffer
841 free(converted);
842 converted = NULL;
843 converted_len = converted_len * 2;
845 free(ustring);
846 if (U_FAILURE(status)) {
847 free(converted);
848 return false;
851 // Convert the string back to UTF-8
852 status = U_ZERO_ERROR;
853 intl_convert_utf16_to_utf8(&converted_utf8, &converted_utf8_len,
854 converted, converted_ret_len, &status);
855 free(converted);
856 if (U_FAILURE(status)) {
857 free(converted_utf8);
858 return false;
861 // Return the string
862 return String(converted_utf8, converted_utf8_len, AttachString);
865 Variant f_idn_to_ascii(CStrRef domain, int64_t options /* = 0 */, int64_t variant /* = 0 */, VRefParam idna_info /* = null */) {
866 return php_intl_idn_to(domain, options, (IdnVariant)variant, idna_info, INTL_IDN_TO_ASCII);
869 Variant f_idn_to_unicode(CStrRef domain, int64_t options /* = 0 */, int64_t variant /* = 0 */, VRefParam idna_info /* = null */) {
870 return php_intl_idn_to(domain, options, (IdnVariant)variant, idna_info, INTL_IDN_TO_UTF8);
873 Variant f_idn_to_utf8(CStrRef domain, int64_t options /* = 0 */, int64_t variant /* = 0 */, VRefParam idna_info /* = null */) {
874 return php_intl_idn_to(domain, options, (IdnVariant)variant, idna_info, INTL_IDN_TO_UTF8);
877 ///////////////////////////////////////////////////////////////////////////////