7 #include <runtime/base/zend/zend_collator.h>
9 // Avoid dragging in the icu namespace.
10 #ifndef U_USING_ICU_NAMESPACE
11 #define U_USING_ICU_NAMESPACE 0
14 #include <unicode/utypes.h>
15 #include <unicode/ucnv.h>
16 #include <unicode/ustring.h>
22 'name' => "UConverter",
23 'desc' => "ICU UConverter class",
24 'flags' => HasDocComment
,
27 static void throwFailure(UErrorCode error,
30 bool checkLimits(int64_t available, int64_t needed);
31 void appendToUTarget(Variant val, UConverterToUnicodeArgs *args);
32 void appendFromUTarget(Variant val, UConverterFromUnicodeArgs *args);
33 static void ucnvToUCallback(c_UConverter *objval,
34 UConverterToUnicodeArgs *args,
35 const char *codeUnits, int32_t length,
36 UConverterCallbackReason reason,
37 UErrorCode *pErrorCode);
38 static void ucnvFromUCallback(c_UConverter *objval,
39 UConverterFromUnicodeArgs *args,
40 const UChar *codeUnits, int32_t length,
42 UConverterCallbackReason reason,
43 UErrorCode *pErrorCode);
44 static bool setEncoding(CStrRef encoding,
47 static bool setSubstChars(String chars, UConverter *cnv, intl_error &err);
48 bool setCallback(UConverter *cnv);
49 Variant defaultCallback(int64 reason, VRefParam error);
50 static String doConvert(CStrRef str, UConverter *toCnv,
51 UConverter *fromCnv, intl_error &err);
61 'name' => "REASON_UNASSIGNED",
66 'name' => "REASON_ILLEGAL",
71 'name' => "REASON_IRREGULAR",
76 'name' => "REASON_RESET",
81 'name' => "REASON_CLOSE",
86 'name' => "REASON_CLONE",
91 'name' => "UNSUPPORTED_CONVERTER",
100 DefineConstant(array(
105 DefineConstant(array(
110 DefineConstant(array(
115 DefineConstant(array(
120 DefineConstant(array(
121 'name' => "UTF16_BigEndian",
125 DefineConstant(array(
126 'name' => "UTF16_LittleEndian",
130 DefineConstant(array(
131 'name' => "UTF32_BigEndian",
135 DefineConstant(array(
136 'name' => "UTF32_LittleEndian",
140 DefineConstant(array(
141 'name' => "EBCDIC_STATEFUL",
145 DefineConstant(array(
146 'name' => "ISO_2022",
150 DefineConstant(array(
155 DefineConstant(array(
160 DefineConstant(array(
165 DefineConstant(array(
170 DefineConstant(array(
175 DefineConstant(array(
180 DefineConstant(array(
185 DefineConstant(array(
186 'name' => "LMBCS_11",
190 DefineConstant(array(
191 'name' => "LMBCS_16",
195 DefineConstant(array(
196 'name' => "LMBCS_17",
200 DefineConstant(array(
201 'name' => "LMBCS_18",
205 DefineConstant(array(
206 'name' => "LMBCS_19",
210 DefineConstant(array(
211 'name' => "LMBCS_LAST",
215 DefineConstant(array(
220 DefineConstant(array(
225 DefineConstant(array(
230 DefineConstant(array(
231 'name' => "US_ASCII",
235 DefineConstant(array(
240 DefineConstant(array(
245 DefineConstant(array(
250 DefineConstant(array(
255 DefineConstant(array(
260 DefineConstant(array(
261 'name' => "IMAP_MAILBOX",
267 'name' => "__construct",
268 'desc' => "Object constructor",
269 'flags' => HasDocComment
,
272 'name' => "toEncoding",
274 'value' => "\"utf-8\"",
275 'desc' => "Target character encoding",
278 'name' => "fromEncoding",
280 'value' => "\"utf-8\"",
281 'desc' => "Source character encoding",
289 'name' => "__destruct",
290 'desc' => "Object destructor",
297 /* Get/Set Source/Destination encodings */
301 'name' => "getSourceEncoding",
302 'desc' => "Returns the name of the source encoding",
303 'flags' => HasDocComment
,
306 'desc' => "Cannonical name of source encoding",
313 'name' => "setSourceEncoding",
314 'desc' => "Changes the source encoding converter to the named encoding",
315 'flags' => HasDocComment
,
318 'name' => 'encoding',
320 'desc' => "Name of encoding to use",
328 'name' => "getDestinationEncoding",
329 'desc' => "Returns the name of the destination encoding",
330 'flags' => HasDocComment
,
333 'desc' => "Cannonical name of destination encoding",
340 'name' => "setDestinationEncoding",
341 'desc' => "Changes the destination encoding converter to the named encoding",
342 'flags' => HasDocComment
,
345 'name' => 'encoding',
347 'desc' => "Name of encoding to use",
353 /* Get algorithmic types */
357 'name' => "getSourceType",
358 'desc' => "Returns the source algorithmic encoding type (e.g. SBCS, DBCS, LATIN_1, UTF8, etc...)",
359 'flags' => HasDocComment
,
362 'desc' => "Algorithmic encoding type",
369 'name' => "getDestinationType",
370 'desc' => "Returns the destination algorithmic encoding type (e.g. SBCS, DBCS, LATIN_1, UTF8, etc...)",
371 'flags' => HasDocComment
,
374 'desc' => "Algorithmic encoding type",
379 /* Basic character substitution */
383 'name' => "getSubstChars",
384 'desc' => "Returns the current substitution character used for conversion failures",
385 'flags' => HasDocComment
,
388 'desc' => "One or more codeunits representing a single codepoint",
395 'name' => "setSubstChars",
396 'desc' => "Set the substitution character to use for conversion failures",
397 'flags' => HasDocComment
,
400 'desc' => "Whether or not setting substitution characters succeeded",
406 'desc' => "One or more codeunits representing a codepoint",
412 /* Standard callbacks */
416 'name' => "fromUCallback",
417 'desc' => "Issued by the object when converting to the target encoding",
418 'flags' => HasDocComment
,
421 'desc' => "Substitution codeunits for illegal/irregular/unassigned codepoints",
427 'desc' => "Event which caused the callback",
432 'desc' => "Contextual codepoints from the string being converted",
435 'name' => "codepoint",
437 'desc' => "The specific codepoint in question",
443 'desc' => "Reference param, error condition on the way in, should be U_ZERO_ERROR on the way out",
451 'name' => "toUCallback",
452 'desc' => "Issued by the object when converting from the source encoding",
453 'flags' => HasDocComment
,
456 'desc' => "Substitution codepoints for illegal/irregular/unassigned codeunits",
462 'desc' => "Event which caused the callback",
467 'desc' => "Contextual codeunits from the string being converted",
470 'name' => "codeunits",
472 'desc' => "The specific codeunits in question",
478 'desc' => "Reference param, error condition on the way in, should be U_ZERO_ERROR on the way out",
484 /* Primary converter functions */
489 'desc' => "Convert a string between the source/destination encodings",
490 'flags' => HasDocComment
,
493 'desc' => "Transcoded string",
499 'desc' => "String to be transcoded",
505 'desc' => "Convert from destination to source encodings instead",
513 'name' => "transcode",
514 'desc' => "Convert a string between two encodings",
515 'flags' => HasDocComment|IsStatic
,
518 'desc' => "Transcoded string",
524 'desc' => "String to be transcoded",
527 'name' => "toEncoding",
529 'desc' => "Source encoding",
532 'name' => "fromEncoding",
534 'desc' => "Destination encoding",
539 'value' => "null_variant",
540 'desc' => "Optional configuration for converters",
546 /* ext/intl style error handlers */
550 'name' => "getErrorCode",
551 'desc' => "Last UErrorCode associated with this converter",
552 'flags' => HasDocComment
,
555 'desc' => "UErrorCode U_* value",
562 'name' => "getErrorMessage",
563 'desc' => "Last error message associated with this converter",
564 'flags' => HasDocComment
,
567 'desc' => "Descriptive error message",
572 /* Ennumerators and lookups */
576 'name' => "reasonText",
577 'desc' => "Name of REASON_* constant",
578 'flags' => HasDocComment|IsStatic
,
581 'desc' => "Printable REASON_* constant name",
587 'desc' => "REASON_* constant",
595 'name' => "getAvailable",
596 'desc' => "Returns list of available encodings",
597 'flags' => HasDocComment|IsStatic
,
600 'desc' => "Available encodings",
607 'name' => "getAliases",
608 'desc' => "Returns valid aliases of the named encoding",
609 'flags' => HasDocComment|IsStatic
,
612 'desc' => "Valid aliases of the named encoding",
616 'name' => "encoding",
618 'desc' => "Encoding to get aliases of",
626 'name' => "getStandards",
627 'desc' => "Returns list supported standards",
628 'flags' => HasDocComment|IsStatic
,
631 'desc' => "Available standards",