Busybox: Upgrade to 1.21.1 (stable). lsof active.
[tomato.git] / release / src / router / php / ext / mysqlnd / mysqlnd_charset.c
blob8766a4b25dfb89181b3d76fc0e8a60917314b97d
1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 5 |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2006-2013 The PHP Group |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Authors: Georg Richter <georg@mysql.com> |
16 | Andrey Hristov <andrey@mysql.com> |
17 | Ulf Wendel <uwendel@mysql.com> |
18 +----------------------------------------------------------------------+
20 #include "php.h"
21 #include "php_globals.h"
22 #include "mysqlnd.h"
23 #include "mysqlnd_priv.h"
24 #include "mysqlnd_debug.h"
25 #include "mysqlnd_charset.h"
27 /* {{{ utf8 functions */
28 static unsigned int check_mb_utf8mb3_sequence(const char *start, const char *end)
30 zend_uchar c;
32 if (start >= end) {
33 return 0;
36 c = (zend_uchar) start[0];
38 if (c < 0x80) {
39 return 1; /* single byte character */
41 if (c < 0xC2) {
42 return 0; /* invalid mb character */
44 if (c < 0xE0) {
45 if (start + 2 > end) {
46 return 0; /* too small */
48 if (!(((zend_uchar)start[1] ^ 0x80) < 0x40)) {
49 return 0;
51 return 2;
53 if (c < 0xF0) {
54 if (start + 3 > end) {
55 return 0; /* too small */
57 if (!(((zend_uchar)start[1] ^ 0x80) < 0x40 && ((zend_uchar)start[2] ^ 0x80) < 0x40 &&
58 (c >= 0xE1 || (zend_uchar)start[1] >= 0xA0))) {
59 return 0; /* invalid utf8 character */
61 return 3;
63 return 0;
67 static unsigned int check_mb_utf8_sequence(const char *start, const char *end)
69 zend_uchar c;
71 if (start >= end) {
72 return 0;
75 c = (zend_uchar) start[0];
77 if (c < 0x80) {
78 return 1; /* single byte character */
80 if (c < 0xC2) {
81 return 0; /* invalid mb character */
83 if (c < 0xE0) {
84 if (start + 2 > end) {
85 return 0; /* too small */
87 if (!(((zend_uchar)start[1] ^ 0x80) < 0x40)) {
88 return 0;
90 return 2;
92 if (c < 0xF0) {
93 if (start + 3 > end) {
94 return 0; /* too small */
96 if (!(((zend_uchar)start[1] ^ 0x80) < 0x40 && ((zend_uchar)start[2] ^ 0x80) < 0x40 &&
97 (c >= 0xE1 || (zend_uchar)start[1] >= 0xA0))) {
98 return 0; /* invalid utf8 character */
100 return 3;
102 if (c < 0xF5) {
103 if (start + 4 > end) { /* We need 4 characters */
104 return 0; /* too small */
108 UTF-8 quick four-byte mask:
109 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
110 Encoding allows to encode U+00010000..U+001FFFFF
112 The maximum character defined in the Unicode standard is U+0010FFFF.
113 Higher characters U+00110000..U+001FFFFF are not used.
115 11110000.10010000.10xxxxxx.10xxxxxx == F0.90.80.80 == U+00010000 (min)
116 11110100.10001111.10111111.10111111 == F4.8F.BF.BF == U+0010FFFF (max)
118 Valid codes:
119 [F0][90..BF][80..BF][80..BF]
120 [F1][80..BF][80..BF][80..BF]
121 [F2][80..BF][80..BF][80..BF]
122 [F3][80..BF][80..BF][80..BF]
123 [F4][80..8F][80..BF][80..BF]
126 if (!(((zend_uchar)start[1] ^ 0x80) < 0x40 &&
127 ((zend_uchar)start[2] ^ 0x80) < 0x40 &&
128 ((zend_uchar)start[3] ^ 0x80) < 0x40 &&
129 (c >= 0xf1 || (zend_uchar)start[1] >= 0x90) &&
130 (c <= 0xf3 || (zend_uchar)start[1] <= 0x8F)))
132 return 0; /* invalid utf8 character */
134 return 4;
136 return 0;
139 static unsigned int check_mb_utf8mb3_valid(const char *start, const char *end)
141 unsigned int len = check_mb_utf8mb3_sequence(start, end);
142 return (len > 1)? len:0;
145 static unsigned int check_mb_utf8_valid(const char *start, const char *end)
147 unsigned int len = check_mb_utf8_sequence(start, end);
148 return (len > 1)? len:0;
152 static unsigned int mysqlnd_mbcharlen_utf8mb3(unsigned int utf8)
154 if (utf8 < 0x80) {
155 return 1; /* single byte character */
157 if (utf8 < 0xC2) {
158 return 0; /* invalid multibyte header */
160 if (utf8 < 0xE0) {
161 return 2; /* double byte character */
163 if (utf8 < 0xF0) {
164 return 3; /* triple byte character */
166 return 0;
170 static unsigned int mysqlnd_mbcharlen_utf8(unsigned int utf8)
172 if (utf8 < 0x80) {
173 return 1; /* single byte character */
175 if (utf8 < 0xC2) {
176 return 0; /* invalid multibyte header */
178 if (utf8 < 0xE0) {
179 return 2; /* double byte character */
181 if (utf8 < 0xF0) {
182 return 3; /* triple byte character */
184 if (utf8 < 0xF8) {
185 return 4; /* four byte character */
187 return 0;
189 /* }}} */
192 /* {{{ big5 functions */
193 #define valid_big5head(c) (0xA1 <= (unsigned int)(c) && (unsigned int)(c) <= 0xF9)
194 #define valid_big5tail(c) ((0x40 <= (unsigned int)(c) && (unsigned int)(c) <= 0x7E) || \
195 (0xA1 <= (unsigned int)(c) && (unsigned int)(c) <= 0xFE))
197 #define isbig5code(c,d) (isbig5head(c) && isbig5tail(d))
199 static unsigned int check_mb_big5(const char *start, const char *end)
201 return (valid_big5head(*(start)) && (end - start) > 1 && valid_big5tail(*(start + 1)) ? 2 : 0);
205 static unsigned int mysqlnd_mbcharlen_big5(unsigned int big5)
207 return (valid_big5head(big5)) ? 2 : 1;
209 /* }}} */
212 /* {{{ cp932 functions */
213 #define valid_cp932head(c) ((0x81 <= (c) && (c) <= 0x9F) || (0xE0 <= (c) && c <= 0xFC))
214 #define valid_cp932tail(c) ((0x40 <= (c) && (c) <= 0x7E) || (0x80 <= (c) && c <= 0xFC))
217 static unsigned int check_mb_cp932(const char *start, const char *end)
219 return (valid_cp932head((zend_uchar)start[0]) && (end - start > 1) &&
220 valid_cp932tail((zend_uchar)start[1])) ? 2 : 0;
224 static unsigned int mysqlnd_mbcharlen_cp932(unsigned int cp932)
226 return (valid_cp932head((zend_uchar)cp932)) ? 2 : 1;
228 /* }}} */
231 /* {{{ euckr functions */
232 #define valid_euckr(c) ((0xA1 <= (zend_uchar)(c) && (zend_uchar)(c) <= 0xFE))
234 static unsigned int check_mb_euckr(const char *start, const char *end)
236 if (end - start <= 1) {
237 return 0; /* invalid length */
239 if (*(zend_uchar *)start < 0x80) {
240 return 0; /* invalid euckr character */
242 if (valid_euckr(start[1])) {
243 return 2;
245 return 0;
249 static unsigned int mysqlnd_mbcharlen_euckr(unsigned int kr)
251 return (valid_euckr(kr)) ? 2 : 1;
253 /* }}} */
256 /* {{{ eucjpms functions */
257 #define valid_eucjpms(c) (((c) & 0xFF) >= 0xA1 && ((c) & 0xFF) <= 0xFE)
258 #define valid_eucjpms_kata(c) (((c) & 0xFF) >= 0xA1 && ((c) & 0xFF) <= 0xDF)
259 #define valid_eucjpms_ss2(c) (((c) & 0xFF) == 0x8E)
260 #define valid_eucjpms_ss3(c) (((c) & 0xFF) == 0x8F)
262 static unsigned int check_mb_eucjpms(const char *start, const char *end)
264 if (*((zend_uchar *)start) < 0x80) {
265 return 0; /* invalid eucjpms character */
267 if (valid_eucjpms(start[0]) && (end - start) > 1 && valid_eucjpms(start[1])) {
268 return 2;
270 if (valid_eucjpms_ss2(start[0]) && (end - start) > 1 && valid_eucjpms_kata(start[1])) {
271 return 2;
273 if (valid_eucjpms_ss3(start[0]) && (end - start) > 2 && valid_eucjpms(start[1]) &&
274 valid_eucjpms(start[2])) {
275 return 2;
277 return 0;
281 static unsigned int mysqlnd_mbcharlen_eucjpms(unsigned int jpms)
283 if (valid_eucjpms(jpms) || valid_eucjpms_ss2(jpms)) {
284 return 2;
286 if (valid_eucjpms_ss3(jpms)) {
287 return 3;
289 return 1;
291 /* }}} */
294 /* {{{ gb2312 functions */
295 #define valid_gb2312_head(c) (0xA1 <= (zend_uchar)(c) && (zend_uchar)(c) <= 0xF7)
296 #define valid_gb2312_tail(c) (0xA1 <= (zend_uchar)(c) && (zend_uchar)(c) <= 0xFE)
299 static unsigned int check_mb_gb2312(const char *start, const char *end)
301 return (valid_gb2312_head((unsigned int)start[0]) && end - start > 1 &&
302 valid_gb2312_tail((unsigned int)start[1])) ? 2 : 0;
306 static unsigned int mysqlnd_mbcharlen_gb2312(unsigned int gb)
308 return (valid_gb2312_head(gb)) ? 2 : 1;
310 /* }}} */
313 /* {{{ gbk functions */
314 #define valid_gbk_head(c) (0x81<=(zend_uchar)(c) && (zend_uchar)(c)<=0xFE)
315 #define valid_gbk_tail(c) ((0x40<=(zend_uchar)(c) && (zend_uchar)(c)<=0x7E) || (0x80<=(zend_uchar)(c) && (zend_uchar)(c)<=0xFE))
317 static unsigned int check_mb_gbk(const char *start, const char *end)
319 return (valid_gbk_head(start[0]) && (end) - (start) > 1 && valid_gbk_tail(start[1])) ? 2 : 0;
322 static unsigned int mysqlnd_mbcharlen_gbk(unsigned int gbk)
324 return (valid_gbk_head(gbk) ? 2 : 1);
326 /* }}} */
329 /* {{{ sjis functions */
330 #define valid_sjis_head(c) ((0x81 <= (c) && (c) <= 0x9F) || (0xE0 <= (c) && (c) <= 0xFC))
331 #define valid_sjis_tail(c) ((0x40 <= (c) && (c) <= 0x7E) || (0x80 <= (c) && (c) <= 0xFC))
334 static unsigned int check_mb_sjis(const char *start, const char *end)
336 return (valid_sjis_head((zend_uchar)start[0]) && (end - start) > 1 && valid_sjis_tail((zend_uchar)start[1])) ? 2 : 0;
340 static unsigned int mysqlnd_mbcharlen_sjis(unsigned int sjis)
342 return (valid_sjis_head((zend_uchar)sjis)) ? 2 : 1;
344 /* }}} */
347 /* {{{ ucs2 functions */
348 static unsigned int check_mb_ucs2(const char *start __attribute((unused)), const char *end __attribute((unused)))
350 return 2; /* always 2 */
353 static unsigned int mysqlnd_mbcharlen_ucs2(unsigned int ucs2 __attribute((unused)))
355 return 2; /* always 2 */
357 /* }}} */
360 /* {{{ ujis functions */
361 #define valid_ujis(c) ((0xA1 <= ((c)&0xFF) && ((c)&0xFF) <= 0xFE))
362 #define valid_ujis_kata(c) ((0xA1 <= ((c)&0xFF) && ((c)&0xFF) <= 0xDF))
363 #define valid_ujis_ss2(c) (((c)&0xFF) == 0x8E)
364 #define valid_ujis_ss3(c) (((c)&0xFF) == 0x8F)
366 static unsigned int check_mb_ujis(const char *start, const char *end)
368 if (*(zend_uchar*)start < 0x80) {
369 return 0; /* invalid ujis character */
371 if (valid_ujis(*(start)) && valid_ujis(*((start)+1))) {
372 return 2;
374 if (valid_ujis_ss2(*(start)) && valid_ujis_kata(*((start)+1))) {
375 return 2;
377 if (valid_ujis_ss3(*(start)) && (end-start) > 2 && valid_ujis(*((start)+1)) && valid_ujis(*((start)+2))) {
378 return 3;
380 return 0;
384 static unsigned int mysqlnd_mbcharlen_ujis(unsigned int ujis)
386 return (valid_ujis(ujis)? 2: valid_ujis_ss2(ujis)? 2: valid_ujis_ss3(ujis)? 3: 1);
388 /* }}} */
392 /* {{{ utf16 functions */
393 #define UTF16_HIGH_HEAD(x) ((((zend_uchar) (x)) & 0xFC) == 0xD8)
394 #define UTF16_LOW_HEAD(x) ((((zend_uchar) (x)) & 0xFC) == 0xDC)
396 static unsigned int check_mb_utf16(const char *start, const char *end)
398 if (start + 2 > end) {
399 return 0;
402 if (UTF16_HIGH_HEAD(*start)) {
403 return (start + 4 <= end) && UTF16_LOW_HEAD(start[2]) ? 4 : 0;
406 if (UTF16_LOW_HEAD(*start)) {
407 return 0;
409 return 2;
413 static uint mysqlnd_mbcharlen_utf16(unsigned int utf16)
415 return UTF16_HIGH_HEAD(utf16) ? 4 : 2;
417 /* }}} */
420 /* {{{ utf32 functions */
421 static uint
422 check_mb_utf32(const char *start __attribute((unused)), const char *end __attribute((unused)))
424 return 4;
428 static uint
429 mysqlnd_mbcharlen_utf32(unsigned int utf32 __attribute((unused)))
431 return 4;
433 /* }}} */
436 The server compiles sometimes the full utf-8 (the mb4) as utf8m4, and the old as utf8,
437 for BC reasons. Sometimes, utf8mb4 is just utf8 but the old charsets are utf8mb3.
438 Change easily now, with a macro, could be made compilastion dependable.
441 #define UTF8_MB4 "utf8mb4"
442 #define UTF8_MB3 "utf8"
444 /* {{{ mysqlnd_charsets */
445 const MYSQLND_CHARSET mysqlnd_charsets[] =
447 { 1, "big5","big5_chinese_ci", 1, 2, "", mysqlnd_mbcharlen_big5, check_mb_big5},
448 { 3, "dec8", "dec8_swedisch_ci", 1, 1, "", NULL, NULL},
449 { 4, "cp850", "cp850_general_ci", 1, 1, "", NULL, NULL},
450 { 6, "hp8", "hp8_english_ci", 1, 1, "", NULL, NULL},
451 { 7, "koi8r", "koi8r_general_ci", 1, 1, "", NULL, NULL},
452 { 8, "latin1", "latin1_swedish_ci", 1, 1, "", NULL, NULL},
453 { 5, "latin1", "latin1_german_ci", 1, 1, "", NULL, NULL}, /* should be after 0x9 because swedish_ci is the default collation */
454 { 9, "latin2", "latin2_general_ci", 1, 1, "", NULL, NULL},
455 { 2, "latin2", "latin2_czech_cs", 1, 1, "", NULL, NULL}, /* should be after 0x9 because general_ci is the default collation */
456 { 10, "swe7", "swe7_swedish_ci", 1, 1, "", NULL, NULL},
457 { 11, "ascii", "ascii_general_ci", 1, 1, "", NULL, NULL},
458 { 12, "ujis", "ujis_japanese_ci", 1, 3, "", mysqlnd_mbcharlen_ujis, check_mb_ujis},
459 { 13, "sjis", "sjis_japanese_ci", 1, 2, "", mysqlnd_mbcharlen_sjis, check_mb_sjis},
460 { 16, "hebrew", "hebrew_general_ci", 1, 1, "", NULL, NULL},
461 { 17, "filename", "filename", 1, 5, "", NULL, NULL},
462 { 18, "tis620", "tis620_thai_ci", 1, 1, "", NULL, NULL},
463 { 19, "euckr", "euckr_korean_ci", 1, 2, "", mysqlnd_mbcharlen_euckr, check_mb_euckr},
464 { 21, "latin2", "latin2_hungarian_ci", 1, 1, "", NULL, NULL},
465 { 27, "latin2", "latin2_croatian_ci", 1, 1, "", NULL, NULL},
466 { 22, "koi8u", "koi8u_general_ci", 1, 1, "", NULL, NULL},
467 { 24, "gb2312", "gb2312_chinese_ci", 1, 2, "", mysqlnd_mbcharlen_gb2312, check_mb_gb2312},
468 { 25, "greek", "greek_general_ci", 1, 1, "", NULL, NULL},
469 { 26, "cp1250", "cp1250_general_ci", 1, 1, "", NULL, NULL},
470 { 28, "gbk", "gbk_chinese_ci", 1, 2, "", mysqlnd_mbcharlen_gbk, check_mb_gbk},
471 { 30, "latin5", "latin5_turkish_ci", 1, 1, "", NULL, NULL},
472 { 31, "latin1", "latin1_german2_ci", 1, 1, "", NULL, NULL},
473 { 15, "latin1", "latin1_danish_ci", 1, 1, "", NULL, NULL},
474 { 32, "armscii8", "armscii8_general_ci", 1, 1, "", NULL, NULL},
475 { 33, UTF8_MB3, UTF8_MB3"_general_ci", 1, 3, "UTF-8 Unicode", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
476 { 35, "ucs2", "ucs2_general_ci", 2, 2, "UCS-2 Unicode", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
477 { 36, "cp866", "cp866_general_ci", 1, 1, "", NULL, NULL},
478 { 37, "keybcs2", "keybcs2_general_ci", 1, 1, "", NULL, NULL},
479 { 38, "macce", "macce_general_ci", 1, 1, "", NULL, NULL},
480 { 39, "macroman", "macroman_general_ci", 1, 1, "", NULL, NULL},
481 { 40, "cp852", "cp852_general_ci", 1, 1, "", NULL, NULL},
482 { 41, "latin7", "latin7_general_ci", 1, 1, "", NULL, NULL},
483 { 20, "latin7", "latin7_estonian_cs", 1, 1, "", NULL, NULL},
484 { 57, "cp1256", "cp1256_general_ci", 1, 1, "", NULL, NULL},
485 { 59, "cp1257", "cp1257_general_ci", 1, 1, "", NULL, NULL},
486 { 63, "binary", "binary", 1, 1, "", NULL, NULL},
487 { 97, "eucjpms", "eucjpms_japanese_ci", 1, 3, "", mysqlnd_mbcharlen_eucjpms, check_mb_eucjpms},
488 { 29, "cp1257", "cp1257_lithunian_ci", 1, 1, "", NULL, NULL},
489 { 31, "latin1", "latin1_german2_ci", 1, 1, "", NULL, NULL},
490 { 34, "cp1250", "cp1250_czech_cs", 1, 1, "", NULL, NULL},
491 { 42, "latin7", "latin7_general_cs", 1, 1, "", NULL, NULL},
492 { 43, "macce", "macce_bin", 1, 1, "", NULL, NULL},
493 { 44, "cp1250", "cp1250_croatian_ci", 1, 1, "", NULL, NULL},
494 { 45, UTF8_MB4, UTF8_MB4"_general_ci", 1, 4, "UTF-8 Unicode", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
495 { 46, UTF8_MB4, UTF8_MB4"_bin", 1, 4, "UTF-8 Unicode", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
496 { 47, "latin1", "latin1_bin", 1, 1, "", NULL, NULL},
497 { 48, "latin1", "latin1_general_ci", 1, 1, "", NULL, NULL},
498 { 49, "latin1", "latin1_general_cs", 1, 1, "", NULL, NULL},
499 { 51, "cp1251", "cp1251_general_ci", 1, 1, "", NULL, NULL},
500 { 14, "cp1251", "cp1251_bulgarian_ci", 1, 1, "", NULL, NULL},
501 { 23, "cp1251", "cp1251_ukrainian_ci", 1, 1, "", NULL, NULL},
502 { 50, "cp1251", "cp1251_bin", 1, 1, "", NULL, NULL},
503 { 52, "cp1251", "cp1251_general_cs", 1, 1, "", NULL, NULL},
504 { 53, "macroman", "macroman_bin", 1, 1, "", NULL, NULL},
505 { 54, "utf16", "utf16_general_ci", 2, 4, "UTF-16 Unicode", mysqlnd_mbcharlen_utf16, check_mb_utf16},
506 { 55, "utf16", "utf16_bin", 2, 4, "UTF-16 Unicode", mysqlnd_mbcharlen_utf16, check_mb_utf16},
507 { 56, "utf16le", "utf16le_general_ci", 2, 4, "UTF-16LE Unicode", mysqlnd_mbcharlen_utf16, check_mb_utf16},
508 { 58, "cp1257", "cp1257_bin", 1, 1, "", NULL, NULL},
509 #ifdef USED_TO_BE_SO_BEFORE_MYSQL_5_5
510 { 60, "armascii8", "armascii8_bin", 1, 1, "", NULL, NULL},
511 #endif
512 /*55*/{ 60, "utf32", "utf32_general_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
513 /*55*/{ 61, "utf32", "utf32_bin", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
514 { 62, "utf16le", "utf16le_bin", 2, 4, "UTF-16LE Unicode", mysqlnd_mbcharlen_utf16, check_mb_utf16},
515 { 65, "ascii", "ascii_bin", 1, 1, "", NULL, NULL},
516 { 66, "cp1250", "cp1250_bin", 1, 1, "", NULL, NULL},
517 { 67, "cp1256", "cp1256_bin", 1, 1, "", NULL, NULL},
518 { 68, "cp866", "cp866_bin", 1, 1, "", NULL, NULL},
519 { 69, "dec8", "dec8_bin", 1, 1, "", NULL, NULL},
520 { 70, "greek", "greek_bin", 1, 1, "", NULL, NULL},
521 { 71, "hebew", "hebrew_bin", 1, 1, "", NULL, NULL},
522 { 72, "hp8", "hp8_bin", 1, 1, "", NULL, NULL},
523 { 73, "keybcs2", "keybcs2_bin", 1, 1, "", NULL, NULL},
524 { 74, "koi8r", "koi8r_bin", 1, 1, "", NULL, NULL},
525 { 75, "koi8u", "koi8u_bin", 1, 1, "", NULL, NULL},
526 { 77, "latin2", "latin2_bin", 1, 1, "", NULL, NULL},
527 { 78, "latin5", "latin5_bin", 1, 1, "", NULL, NULL},
528 { 79, "latin7", "latin7_bin", 1, 1, "", NULL, NULL},
529 { 80, "cp850", "cp850_bin", 1, 1, "", NULL, NULL},
530 { 81, "cp852", "cp852_bin", 1, 1, "", NULL, NULL},
531 { 82, "swe7", "swe7_bin", 1, 1, "", NULL, NULL},
532 { 83, UTF8_MB3, UTF8_MB3"_bin", 1, 3, "UTF-8 Unicode", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
533 { 84, "big5", "big5_bin", 1, 2, "", mysqlnd_mbcharlen_big5, check_mb_big5},
534 { 85, "euckr", "euckr_bin", 1, 2, "", mysqlnd_mbcharlen_euckr, check_mb_euckr},
535 { 86, "gb2312", "gb2312_bin", 1, 2, "", mysqlnd_mbcharlen_gb2312, check_mb_gb2312},
536 { 87, "gbk", "gbk_bin", 1, 2, "", mysqlnd_mbcharlen_gbk, check_mb_gbk},
537 { 88, "sjis", "sjis_bin", 1, 2, "", mysqlnd_mbcharlen_sjis, check_mb_sjis},
538 { 89, "tis620", "tis620_bin", 1, 1, "", NULL, NULL},
539 { 90, "ucs2", "ucs2_bin", 2, 2, "UCS-2 Unicode", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
540 { 91, "ujis", "ujis_bin", 1, 3, "", mysqlnd_mbcharlen_ujis, check_mb_ujis},
541 { 92, "geostd8", "geostd8_general_ci", 1, 1, "", NULL, NULL},
542 { 93, "geostd8", "geostd8_bin", 1, 1, "", NULL, NULL},
543 { 94, "latin1", "latin1_spanish_ci", 1, 1, "", NULL, NULL},
544 { 95, "cp932", "cp932_japanese_ci", 1, 2, "", mysqlnd_mbcharlen_cp932, check_mb_cp932},
545 { 96, "cp932", "cp932_bin", 1, 2, "", mysqlnd_mbcharlen_cp932, check_mb_cp932},
546 { 97, "eucjpms", "eucjpms_japanese_ci", 1, 3, "", mysqlnd_mbcharlen_eucjpms, check_mb_eucjpms},
547 { 98, "eucjpms", "eucjpms_bin", 1, 3, "", mysqlnd_mbcharlen_eucjpms, check_mb_eucjpms},
548 { 99, "cp1250", "cp1250_polish_ci", 1, 1, "", NULL, NULL},
549 { 128, "ucs2", "ucs2_unicode_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
550 { 129, "ucs2", "ucs2_icelandic_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
551 { 130, "ucs2", "ucs2_latvian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
552 { 131, "ucs2", "ucs2_romanian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
553 { 132, "ucs2", "ucs2_slovenian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
554 { 133, "ucs2", "ucs2_polish_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
555 { 134, "ucs2", "ucs2_estonian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
556 { 135, "ucs2", "ucs2_spanish_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
557 { 136, "ucs2", "ucs2_swedish_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
558 { 137, "ucs2", "ucs2_turkish_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
559 { 138, "ucs2", "ucs2_czech_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
560 { 139, "ucs2", "ucs2_danish_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
561 { 140, "ucs2", "ucs2_lithunian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
562 { 141, "ucs2", "ucs2_slovak_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
563 { 142, "ucs2", "ucs2_spanish2_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
564 { 143, "ucs2", "ucs2_roman_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
565 { 144, "ucs2", "ucs2_persian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
566 { 145, "ucs2", "ucs2_esperanto_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
567 { 146, "ucs2", "ucs2_hungarian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
568 { 147, "ucs2", "ucs2_sinhala_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
569 { 148, "ucs2", "ucs2_german2_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
570 { 149, "ucs2", "ucs2_croatian_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
571 { 150, "ucs2", "ucs2_unicode_520_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
572 { 151, "ucs2", "ucs2_vietnamese_ci", 2, 2, "", mysqlnd_mbcharlen_ucs2, check_mb_ucs2},
574 /*56*/{160, "utf32", "utf32_unicode_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
575 /*56*/{161, "utf32", "utf32_icelandic_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
576 /*56*/{162, "utf32", "utf32_latvian_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
577 /*56*/{163, "utf32", "utf32_romanian_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
578 /*56*/{164, "utf32", "utf32_slovenian_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
579 /*56*/{165, "utf32", "utf32_polish_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
580 /*56*/{166, "utf32", "utf32_estonian_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
581 /*56*/{167, "utf32", "utf32_spanish_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
582 /*56*/{168, "utf32", "utf32_swedish_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
583 /*56*/{169, "utf32", "utf32_turkish_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
584 /*56*/{170, "utf32", "utf32_czech_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
585 /*56*/{171, "utf32", "utf32_danish_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
586 /*56*/{172, "utf32", "utf32_lithuanian_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
587 /*56*/{173, "utf32", "utf32_slovak_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
588 /*56*/{174, "utf32", "utf32_spanish2_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
589 /*56*/{175, "utf32", "utf32_roman_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
590 /*56*/{176, "utf32", "utf32_persian_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
591 /*56*/{177, "utf32", "utf32_esperanto_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
592 /*56*/{178, "utf32", "utf32_hungarian_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
593 /*56*/{179, "utf32", "utf32_sinhala_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
594 /*56*/{180, "utf32", "utf32_german2_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
595 /*56*/{181, "utf32", "utf32_croatian_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
596 /*56*/{182, "utf32", "utf32_unicode_520_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
597 /*56*/{183, "utf32", "utf32_vietnamese_ci", 4, 4, "UTF-32 Unicode", mysqlnd_mbcharlen_utf32, check_mb_utf32},
599 { 192, UTF8_MB3, UTF8_MB3"_general_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
600 { 193, UTF8_MB3, UTF8_MB3"_icelandic_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
601 { 194, UTF8_MB3, UTF8_MB3"_latvian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
602 { 195, UTF8_MB3, UTF8_MB3"_romanian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
603 { 196, UTF8_MB3, UTF8_MB3"_slovenian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
604 { 197, UTF8_MB3, UTF8_MB3"_polish_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
605 { 198, UTF8_MB3, UTF8_MB3"_estonian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
606 { 199, UTF8_MB3, UTF8_MB3"_spanish_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
607 { 200, UTF8_MB3, UTF8_MB3"_swedish_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
608 { 201, UTF8_MB3, UTF8_MB3"_turkish_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
609 { 202, UTF8_MB3, UTF8_MB3"_czech_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
610 { 203, UTF8_MB3, UTF8_MB3"_danish_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid },
611 { 204, UTF8_MB3, UTF8_MB3"_lithunian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid },
612 { 205, UTF8_MB3, UTF8_MB3"_slovak_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
613 { 206, UTF8_MB3, UTF8_MB3"_spanish2_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
614 { 207, UTF8_MB3, UTF8_MB3"_roman_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
615 { 208, UTF8_MB3, UTF8_MB3"_persian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
616 { 209, UTF8_MB3, UTF8_MB3"_esperanto_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
617 { 210, UTF8_MB3, UTF8_MB3"_hungarian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
618 { 211, UTF8_MB3, UTF8_MB3"_sinhala_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
619 { 211, UTF8_MB3, UTF8_MB3"_german2_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
620 { 213, UTF8_MB3, UTF8_MB3"_croatian_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
621 { 214, UTF8_MB3, UTF8_MB3"_unicode_520_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
622 { 215, UTF8_MB3, UTF8_MB3"_vietnamese_ci", 1, 3, "", mysqlnd_mbcharlen_utf8mb3, check_mb_utf8mb3_valid},
624 { 224, UTF8_MB4, UTF8_MB4"_unicode_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
625 { 225, UTF8_MB4, UTF8_MB4"_icelandic_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
626 { 226, UTF8_MB4, UTF8_MB4"_latvian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
627 { 227, UTF8_MB4, UTF8_MB4"_romanian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
628 { 228, UTF8_MB4, UTF8_MB4"_slovenian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
629 { 229, UTF8_MB4, UTF8_MB4"_polish_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
630 { 230, UTF8_MB4, UTF8_MB4"_estonian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
631 { 231, UTF8_MB4, UTF8_MB4"_spanish_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
632 { 232, UTF8_MB4, UTF8_MB4"_swedish_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
633 { 233, UTF8_MB4, UTF8_MB4"_turkish_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
634 { 234, UTF8_MB4, UTF8_MB4"_czech_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
635 { 235, UTF8_MB4, UTF8_MB4"_danish_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
636 { 236, UTF8_MB4, UTF8_MB4"_lithuanian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
637 { 237, UTF8_MB4, UTF8_MB4"_slovak_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
638 { 238, UTF8_MB4, UTF8_MB4"_spanish2_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
639 { 239, UTF8_MB4, UTF8_MB4"_roman_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
640 { 240, UTF8_MB4, UTF8_MB4"_persian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
641 { 241, UTF8_MB4, UTF8_MB4"_esperanto_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
642 { 242, UTF8_MB4, UTF8_MB4"_hungarian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
643 { 243, UTF8_MB4, UTF8_MB4"_sinhala_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
644 { 244, UTF8_MB4, UTF8_MB4"_german2_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
645 { 245, UTF8_MB4, UTF8_MB4"_croatian_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
646 { 246, UTF8_MB4, UTF8_MB4"_unicode_520_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
647 { 247, UTF8_MB4, UTF8_MB4"_vietnamese_ci", 1, 4, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
649 { 254, UTF8_MB3, UTF8_MB3"_general_cs", 1, 3, "", mysqlnd_mbcharlen_utf8, check_mb_utf8_valid},
650 { 0, NULL, NULL, 0, 0, NULL, NULL, NULL}
652 /* }}} */
655 /* {{{ mysqlnd_find_charset_nr */
656 PHPAPI const MYSQLND_CHARSET * mysqlnd_find_charset_nr(unsigned int charsetnr)
658 const MYSQLND_CHARSET * c = mysqlnd_charsets;
660 do {
661 if (c->nr == charsetnr) {
662 return c;
664 ++c;
665 } while (c[0].nr != 0);
666 return NULL;
668 /* }}} */
671 /* {{{ mysqlnd_find_charset_name */
672 PHPAPI const MYSQLND_CHARSET * mysqlnd_find_charset_name(const char * const name)
674 if (name) {
675 const MYSQLND_CHARSET * c = mysqlnd_charsets;
676 do {
677 if (!strcasecmp(c->name, name)) {
678 return c;
680 ++c;
681 } while (c[0].nr != 0);
683 return NULL;
685 /* }}} */
688 /* {{{ mysqlnd_cset_escape_quotes */
689 PHPAPI ulong mysqlnd_cset_escape_quotes(const MYSQLND_CHARSET * const cset, char *newstr,
690 const char * escapestr, size_t escapestr_len TSRMLS_DC)
692 const char *newstr_s = newstr;
693 const char *newstr_e = newstr + 2 * escapestr_len;
694 const char *end = escapestr + escapestr_len;
695 zend_bool escape_overflow = FALSE;
697 DBG_ENTER("mysqlnd_cset_escape_quotes");
699 for (;escapestr < end; escapestr++) {
700 unsigned int len = 0;
701 /* check unicode characters */
703 if (cset->char_maxlen > 1 && (len = cset->mb_valid(escapestr, end))) {
705 /* check possible overflow */
706 if ((newstr + len) > newstr_e) {
707 escape_overflow = TRUE;
708 break;
710 /* copy mb char without escaping it */
711 while (len--) {
712 *newstr++ = *escapestr++;
714 escapestr--;
715 continue;
717 if (*escapestr == '\'') {
718 if (newstr + 2 > newstr_e) {
719 escape_overflow = TRUE;
720 break;
722 *newstr++ = '\'';
723 *newstr++ = '\'';
724 } else {
725 if (newstr + 1 > newstr_e) {
726 escape_overflow = TRUE;
727 break;
729 *newstr++ = *escapestr;
732 *newstr = '\0';
734 if (escape_overflow) {
735 DBG_RETURN((ulong)~0);
737 DBG_RETURN((ulong)(newstr - newstr_s));
739 /* }}} */
742 /* {{{ mysqlnd_cset_escape_slashes */
743 PHPAPI ulong mysqlnd_cset_escape_slashes(const MYSQLND_CHARSET * const cset, char *newstr,
744 const char * escapestr, size_t escapestr_len TSRMLS_DC)
746 const char *newstr_s = newstr;
747 const char *newstr_e = newstr + 2 * escapestr_len;
748 const char *end = escapestr + escapestr_len;
749 zend_bool escape_overflow = FALSE;
751 DBG_ENTER("mysqlnd_cset_escape_slashes");
752 DBG_INF_FMT("charset=%s", cset->name);
754 for (;escapestr < end; escapestr++) {
755 char esc = '\0';
756 unsigned int len = 0;
758 /* check unicode characters */
759 if (cset->char_maxlen > 1 && (len = cset->mb_valid(escapestr, end))) {
760 /* check possible overflow */
761 if ((newstr + len) > newstr_e) {
762 escape_overflow = TRUE;
763 break;
765 /* copy mb char without escaping it */
766 while (len--) {
767 *newstr++ = *escapestr++;
769 escapestr--;
770 continue;
772 if (cset->char_maxlen > 1 && cset->mb_charlen(*escapestr) > 1) {
773 esc = *escapestr;
774 } else {
775 switch (*escapestr) {
776 case 0:
777 esc = '0';
778 break;
779 case '\n':
780 esc = 'n';
781 break;
782 case '\r':
783 esc = 'r';
784 break;
785 case '\\':
786 case '\'':
787 case '"':
788 esc = *escapestr;
789 break;
790 case '\032':
791 esc = 'Z';
792 break;
795 if (esc) {
796 if (newstr + 2 > newstr_e) {
797 escape_overflow = TRUE;
798 break;
800 /* copy escaped character */
801 *newstr++ = '\\';
802 *newstr++ = esc;
803 } else {
804 if (newstr + 1 > newstr_e) {
805 escape_overflow = TRUE;
806 break;
808 /* copy non escaped character */
809 *newstr++ = *escapestr;
812 *newstr = '\0';
814 if (escape_overflow) {
815 DBG_RETURN((ulong)~0);
817 DBG_RETURN((ulong)(newstr - newstr_s));
819 /* }}} */
822 static struct st_mysqlnd_plugin_charsets mysqlnd_plugin_charsets_plugin =
825 MYSQLND_PLUGIN_API_VERSION,
826 "charsets",
827 MYSQLND_VERSION_ID,
828 MYSQLND_VERSION,
829 "PHP License 3.01",
830 "Andrey Hristov <andrey@mysql.com>, Ulf Wendel <uwendel@mysql.com>, Georg Richter <georg@mysql.com>",
832 NULL, /* no statistics , will be filled later if there are some */
833 NULL, /* no statistics */
836 NULL /* plugin shutdown */
839 {/* methods */
840 mysqlnd_find_charset_nr,
841 mysqlnd_find_charset_name,
842 mysqlnd_cset_escape_quotes,
843 mysqlnd_cset_escape_slashes
848 /* {{{ mysqlnd_charsets_plugin_register */
849 void
850 mysqlnd_charsets_plugin_register(TSRMLS_D)
852 mysqlnd_plugin_register_ex((struct st_mysqlnd_plugin_header *) &mysqlnd_plugin_charsets_plugin TSRMLS_CC);
854 /* }}} */
858 * Local variables:
859 * tab-width: 4
860 * c-basic-offset: 4
861 * End:
862 * vim600: noet sw=4 ts=4 fdm=marker
863 * vim<600: noet sw=4 ts=4