Rename runtime/base/zend_* to zend-
[hiphop-php.git] / hphp / runtime / ext / ext_iconv.cpp
blob963a74c5a9baf0e2d5abb8c8a9a655d1c16abfca
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
6 | Copyright (c) 1997-2010 The PHP Group |
7 +----------------------------------------------------------------------+
8 | This source file is subject to version 3.01 of the PHP license, |
9 | that is bundled with this package in the file LICENSE, and is |
10 | available through the world-wide-web at the following url: |
11 | http://www.php.net/license/3_01.txt |
12 | If you did not receive a copy of the PHP license and are unable to |
13 | obtain it through the world-wide-web, please send a note to |
14 | license@php.net so we can mail you a copy immediately. |
15 +----------------------------------------------------------------------+
18 #include "hphp/runtime/ext/ext_iconv.h"
19 #include "hphp/runtime/base/string_buffer.h"
20 #include "hphp/runtime/base/request_local.h"
21 #include "hphp/runtime/base/zend-functions.h"
22 #include "hphp/runtime/base/zend-string.h"
24 #define ICONV_SUPPORTS_ERRNO 1
25 #include <iconv.h>
27 /**
28 * libiconv sometimes defines the second parameter of its
29 * main function as (char**), and sometimes as (const char**) but
30 * provides no means to detect this. Let build system determine
31 * which is appropriate and (optionally) define ICONV_CONST=const
32 * if such is needed in the cast.
34 #ifndef ICONV_CONST
35 # define ICONV_CONST
36 #endif
38 namespace HPHP {
39 IMPLEMENT_DEFAULT_EXTENSION(iconv);
40 ///////////////////////////////////////////////////////////////////////////////
42 #define _php_iconv_memequal(a, b, c) \
43 ((c) == sizeof(unsigned long) ? *((unsigned long *)(a)) == *((unsigned long *)(b)) : ((c) == sizeof(unsigned int) ? *((unsigned int *)(a)) == *((unsigned int *)(b)) : memcmp(a, b, c) == 0))
45 static char _generic_superset_name[] = "UCS-4LE";
46 #define GENERIC_SUPERSET_NAME _generic_superset_name
47 #define GENERIC_SUPERSET_NBYTES 4
49 #define PHP_ICONV_MIME_DECODE_STRICT (1<<0)
50 #define PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR (1<<1)
52 typedef enum _php_iconv_enc_scheme_t {
53 PHP_ICONV_ENC_SCHEME_BASE64,
54 PHP_ICONV_ENC_SCHEME_QPRINT
55 } php_iconv_enc_scheme_t;
57 typedef enum _php_iconv_err_t {
58 PHP_ICONV_ERR_SUCCESS = 0,
59 PHP_ICONV_ERR_CONVERTER = 1,
60 PHP_ICONV_ERR_WRONG_CHARSET = 2,
61 PHP_ICONV_ERR_TOO_BIG = 3,
62 PHP_ICONV_ERR_ILLEGAL_SEQ = 4,
63 PHP_ICONV_ERR_ILLEGAL_CHAR = 5,
64 PHP_ICONV_ERR_UNKNOWN = 6,
65 PHP_ICONV_ERR_MALFORMED = 7,
66 PHP_ICONV_ERR_ALLOC = 8
67 } php_iconv_err_t;
69 static void _php_iconv_show_error(php_iconv_err_t err, const char *out_charset,
70 const char *in_charset) {
71 switch (err) {
72 case PHP_ICONV_ERR_SUCCESS:
73 break;
74 case PHP_ICONV_ERR_CONVERTER:
75 raise_warning("iconv: Cannot open converter");
76 break;
77 case PHP_ICONV_ERR_WRONG_CHARSET:
78 raise_warning("iconv: Wrong charset, "
79 "conversion from `%s' to `%s' is not allowed",
80 in_charset, out_charset);
81 break;
82 case PHP_ICONV_ERR_ILLEGAL_CHAR:
83 raise_notice("iconv: Detected an incomplete multibyte character "
84 "in input string");
85 break;
86 case PHP_ICONV_ERR_ILLEGAL_SEQ:
87 raise_notice("iconv: Detected an illegal character in input string");
88 break;
89 case PHP_ICONV_ERR_TOO_BIG:
90 // should not happen
91 raise_warning("iconv: Buffer length exceeded");
92 break;
93 case PHP_ICONV_ERR_MALFORMED:
94 raise_notice("iconv: Malformed string");
95 break;
96 default:
97 // other error
98 raise_notice("iconv: Unknown error (%d)", errno);
99 break;
103 class ICONVGlobals : public RequestEventHandler {
104 public:
105 String input_encoding;
106 String output_encoding;
107 String internal_encoding;
109 ICONVGlobals() {}
111 virtual void requestInit() {
112 input_encoding = "ISO-8859-1";
113 output_encoding = "ISO-8859-1";
114 internal_encoding = "ISO-8859-1";
117 virtual void requestShutdown() {
118 input_encoding.reset();
119 output_encoding.reset();
120 internal_encoding.reset();
123 IMPLEMENT_STATIC_REQUEST_LOCAL(ICONVGlobals, s_iconv_globals);
124 #define ICONVG(name) s_iconv_globals->name
126 ///////////////////////////////////////////////////////////////////////////////
127 // helpers
129 #ifndef ICONV_CSNMAXLEN
130 #define ICONV_CSNMAXLEN 64
131 #endif
132 static bool validate_charset(CStrRef charset) {
133 if (charset.size() >= ICONV_CSNMAXLEN) {
134 throw_invalid_argument
135 ("Charset parameter exceeds the maximum allowed "
136 "length of %d characters", ICONV_CSNMAXLEN);
137 return false;
139 return true;
142 static Variant check_charset(CStrRef charset) {
143 if (!validate_charset(charset)) return false;
144 if (charset.empty()) {
145 return ICONVG(internal_encoding);
147 return charset;
150 static php_iconv_err_t _php_iconv_appendl(StringBuffer &d, const char *s,
151 size_t l, iconv_t cd) {
152 const char *in_p = s;
153 size_t in_left = l;
154 size_t out_left = 0;
155 size_t buf_growth = 128;
156 char *out_p;
157 #if !ICONV_SUPPORTS_ERRNO
158 size_t prev_in_left = in_left;
159 #endif
161 if (in_p != NULL) {
162 while (in_left > 0) {
163 out_left = buf_growth - out_left;
164 out_p = d.reserve(out_left);
166 if (iconv(cd, (ICONV_CONST char **)&in_p, &in_left, (char **)&out_p, &out_left) ==
167 (size_t)-1) {
168 #if ICONV_SUPPORTS_ERRNO
169 switch (errno) {
170 case EINVAL: return PHP_ICONV_ERR_ILLEGAL_CHAR;
171 case EILSEQ: return PHP_ICONV_ERR_ILLEGAL_SEQ;
172 case E2BIG: break;
173 default:
174 return PHP_ICONV_ERR_UNKNOWN;
176 #else
177 if (prev_in_left == in_left) {
178 return PHP_ICONV_ERR_UNKNOWN;
180 #endif
182 #if !ICONV_SUPPORTS_ERRNO
183 prev_in_left = in_left;
184 #endif
185 d.resize(d.size() + buf_growth - out_left);
186 buf_growth <<= 1;
188 } else {
189 for (;;) {
190 out_left = buf_growth - out_left;
191 out_p = d.reserve(out_left);
193 if (iconv(cd, NULL, NULL, (char **)&out_p, &out_left) == (size_t)0) {
194 d.resize(d.size() + buf_growth - out_left);
195 break;
196 } else {
197 #if ICONV_SUPPORTS_ERRNO
198 if (errno != E2BIG) {
199 return PHP_ICONV_ERR_UNKNOWN;
201 #else
202 if (out_left != 0) {
203 return PHP_ICONV_ERR_UNKNOWN;
205 #endif
207 d.resize(d.size() + buf_growth - out_left);
208 buf_growth <<= 1;
211 return PHP_ICONV_ERR_SUCCESS;
214 static php_iconv_err_t _php_iconv_appendc(StringBuffer &d, const char c,
215 iconv_t cd) {
216 return _php_iconv_appendl(d, &c, 1, cd);
219 static php_iconv_err_t php_iconv_string(const char *in_p, size_t in_len,
220 char **out, size_t *out_len,
221 const char *out_charset,
222 const char *in_charset) {
223 #if !ICONV_SUPPORTS_ERRNO
224 size_t in_size, out_size, out_left;
225 char *out_buffer, *out_p;
226 iconv_t cd;
227 size_t result;
229 *out = NULL;
230 *out_len = 0;
233 * This is not the right way to get output size...
234 * This is not space efficient for large text.
235 * This is also problem for encoding like UTF-7/UTF-8/ISO-2022 which
236 * a single char can be more than 4 bytes.
237 * I added 15 extra bytes for safety. <yohgaki@php.net>
239 out_size = in_len * sizeof(int) + 15;
240 out_left = out_size;
242 in_size = in_len;
244 cd = iconv_open(out_charset, in_charset);
246 if (cd == (iconv_t)(-1)) {
247 return PHP_ICONV_ERR_UNKNOWN;
250 out_buffer = (char *)malloc(out_size + 1);
251 out_p = out_buffer;
253 result = iconv(cd, (char **)&in_p, &in_size, (char **)&out_p, &out_left);
254 if (result == (size_t)(-1)) {
255 free(out_buffer);
256 return PHP_ICONV_ERR_UNKNOWN;
259 if (out_left < 8) {
260 out_buffer = (char *)realloc(out_buffer, out_size + 8);
263 // flush the shift-out sequences
264 result = iconv(cd, NULL, NULL, &out_p, &out_left);
266 if (result == (size_t)(-1)) {
267 free(out_buffer);
268 return PHP_ICONV_ERR_UNKNOWN;
271 *out_len = out_size - out_left;
272 out_buffer[*out_len] = '\0';
273 *out = out_buffer;
275 iconv_close(cd);
277 return PHP_ICONV_ERR_SUCCESS;
279 #else // iconv supports errno. Handle it better way.
281 iconv_t cd;
282 size_t in_left, out_size, out_left;
283 char *out_p, *out_buf, *tmp_buf;
284 size_t bsz, result = 0;
285 php_iconv_err_t retval = PHP_ICONV_ERR_SUCCESS;
287 *out = NULL;
288 *out_len = 0;
290 cd = iconv_open(out_charset, in_charset);
292 if (cd == (iconv_t)(-1)) {
293 if (errno == EINVAL) {
294 return PHP_ICONV_ERR_WRONG_CHARSET;
295 } else {
296 return PHP_ICONV_ERR_CONVERTER;
299 in_left= in_len;
300 out_left = in_len + 32; // Avoid realloc() most cases
301 out_size = 0;
302 bsz = out_left;
303 out_buf = (char *)malloc(bsz + 1);
304 out_p = out_buf;
306 while (in_left > 0) {
307 result = iconv(cd, (ICONV_CONST char **)&in_p, &in_left, (char **)&out_p, &out_left);
308 out_size = bsz - out_left;
309 if (result == (size_t)(-1)) {
310 if (errno == E2BIG && in_left > 0) {
311 // converted string is longer than out buffer
312 bsz += in_len;
314 tmp_buf = (char*)realloc(out_buf, bsz + 1);
315 out_p = out_buf = tmp_buf;
316 out_p += out_size;
317 out_left = bsz - out_size;
318 continue;
321 break;
324 if (result != (size_t)(-1)) {
325 // flush the shift-out sequences
326 for (;;) {
327 result = iconv(cd, NULL, NULL, (char **)&out_p, &out_left);
328 out_size = bsz - out_left;
330 if (result != (size_t)(-1)) {
331 break;
334 if (errno == E2BIG) {
335 bsz += 16;
336 tmp_buf = (char *)realloc(out_buf, bsz);
338 out_p = out_buf = tmp_buf;
339 out_p += out_size;
340 out_left = bsz - out_size;
341 } else {
342 break;
347 iconv_close(cd);
349 if (result == (size_t)(-1)) {
350 switch (errno) {
351 case EINVAL: retval = PHP_ICONV_ERR_ILLEGAL_CHAR; break;
352 case EILSEQ: retval = PHP_ICONV_ERR_ILLEGAL_SEQ; break;
353 case E2BIG:
354 // should not happen
355 retval = PHP_ICONV_ERR_TOO_BIG;
356 break;
357 default:
358 // other error
359 retval = PHP_ICONV_ERR_UNKNOWN;
360 free(out_buf);
361 return PHP_ICONV_ERR_UNKNOWN;
364 *out_p = '\0';
365 *out = out_buf;
366 *out_len = out_size;
367 return retval;
368 #endif
371 static php_iconv_err_t _php_iconv_strlen(unsigned int *pretval,
372 const char *str, size_t nbytes,
373 const char *enc) {
374 char buf[GENERIC_SUPERSET_NBYTES*2];
375 php_iconv_err_t err = PHP_ICONV_ERR_SUCCESS;
376 iconv_t cd;
377 const char *in_p;
378 size_t in_left;
379 char *out_p;
380 size_t out_left;
381 unsigned int cnt;
383 *pretval = (unsigned int)-1;
385 cd = iconv_open(GENERIC_SUPERSET_NAME, enc);
386 if (cd == (iconv_t)(-1)) {
387 #if ICONV_SUPPORTS_ERRNO
388 if (errno == EINVAL) {
389 return PHP_ICONV_ERR_WRONG_CHARSET;
390 } else {
391 return PHP_ICONV_ERR_CONVERTER;
393 #else
394 return PHP_ICONV_ERR_UNKNOWN;
395 #endif
398 errno = out_left = 0;
400 for (in_p = str, in_left = nbytes, cnt = 0; in_left > 0; cnt+=2) {
401 size_t prev_in_left;
402 out_p = buf;
403 out_left = sizeof(buf);
405 prev_in_left = in_left;
407 if (iconv(cd, (ICONV_CONST char **)&in_p, &in_left, (char **) &out_p, &out_left)
408 == (size_t)-1) {
409 if (prev_in_left == in_left) {
410 break;
415 if (out_left > 0) {
416 cnt -= out_left / GENERIC_SUPERSET_NBYTES;
419 #if ICONV_SUPPORTS_ERRNO
420 switch (errno) {
421 case EINVAL: err = PHP_ICONV_ERR_ILLEGAL_CHAR; break;
422 case EILSEQ: err = PHP_ICONV_ERR_ILLEGAL_SEQ; break;
423 case E2BIG:
424 case 0:
425 *pretval = cnt;
426 break;
427 default:
428 err = PHP_ICONV_ERR_UNKNOWN;
429 break;
431 #else
432 *pretval = cnt;
433 #endif
435 iconv_close(cd);
436 return err;
439 static php_iconv_err_t _php_iconv_substr(StringBuffer &pretval,
440 const char *str, size_t nbytes,
441 int offset, int len, const char *enc){
442 char buf[GENERIC_SUPERSET_NBYTES];
443 php_iconv_err_t err = PHP_ICONV_ERR_SUCCESS;
444 iconv_t cd1, cd2;
445 const char *in_p;
446 size_t in_left;
447 char *out_p;
448 size_t out_left;
449 unsigned int cnt;
450 unsigned int total_len;
452 err = _php_iconv_strlen(&total_len, str, nbytes, enc);
453 if (err != PHP_ICONV_ERR_SUCCESS) {
454 return err;
457 if (len < 0) {
458 if ((len += (total_len - offset)) < 0) {
459 return PHP_ICONV_ERR_SUCCESS;
463 if (offset < 0) {
464 if ((offset += total_len) < 0) {
465 return PHP_ICONV_ERR_SUCCESS;
469 if (len > (int)total_len) {
470 len = total_len;
474 if (offset >= (int)total_len) {
475 return PHP_ICONV_ERR_SUCCESS;
478 if ((offset + len) > (int)total_len ) {
479 /* trying to compute the length */
480 len = total_len - offset;
483 if (len == 0) {
484 return PHP_ICONV_ERR_SUCCESS;
487 cd1 = iconv_open(GENERIC_SUPERSET_NAME, enc);
489 if (cd1 == (iconv_t)(-1)) {
490 #if ICONV_SUPPORTS_ERRNO
491 if (errno == EINVAL) {
492 return PHP_ICONV_ERR_WRONG_CHARSET;
493 } else {
494 return PHP_ICONV_ERR_CONVERTER;
496 #else
497 return PHP_ICONV_ERR_UNKNOWN;
498 #endif
501 cd2 = (iconv_t)NULL;
502 errno = 0;
504 for (in_p = str, in_left = nbytes, cnt = 0; in_left > 0 && len > 0; ++cnt) {
505 size_t prev_in_left;
506 out_p = buf;
507 out_left = sizeof(buf);
509 prev_in_left = in_left;
511 if (iconv(cd1, (ICONV_CONST char **)&in_p, &in_left, (char **) &out_p, &out_left) ==
512 (size_t)-1) {
513 if (prev_in_left == in_left) {
514 break;
518 if (cnt >= (unsigned int)offset) {
519 if (cd2 == (iconv_t)NULL) {
520 cd2 = iconv_open(enc, GENERIC_SUPERSET_NAME);
522 if (cd2 == (iconv_t)(-1)) {
523 cd2 = (iconv_t)NULL;
524 #if ICONV_SUPPORTS_ERRNO
525 if (errno == EINVAL) {
526 err = PHP_ICONV_ERR_WRONG_CHARSET;
527 } else {
528 err = PHP_ICONV_ERR_CONVERTER;
530 #else
531 err = PHP_ICONV_ERR_UNKNOWN;
532 #endif
533 break;
537 if (_php_iconv_appendl(pretval, buf, sizeof(buf), cd2) !=
538 PHP_ICONV_ERR_SUCCESS) {
539 break;
541 --len;
545 #if ICONV_SUPPORTS_ERRNO
546 switch (errno) {
547 case EINVAL:
548 err = PHP_ICONV_ERR_ILLEGAL_CHAR;
549 break;
551 case EILSEQ:
552 err = PHP_ICONV_ERR_ILLEGAL_SEQ;
553 break;
555 case E2BIG:
556 break;
558 #endif
559 if (err == PHP_ICONV_ERR_SUCCESS) {
560 if (cd2 != (iconv_t)NULL) {
561 _php_iconv_appendl(pretval, NULL, 0, cd2);
565 if (cd1 != (iconv_t)NULL) {
566 iconv_close(cd1);
569 if (cd2 != (iconv_t)NULL) {
570 iconv_close(cd2);
572 return err;
575 static php_iconv_err_t _php_iconv_strpos(unsigned int *pretval,
576 const char *haystk,
577 size_t haystk_nbytes,
578 const char *ndl, size_t ndl_nbytes,
579 int offset, const char *enc) {
580 char buf[GENERIC_SUPERSET_NBYTES];
581 php_iconv_err_t err = PHP_ICONV_ERR_SUCCESS;
582 iconv_t cd;
583 const char *in_p;
584 size_t in_left;
585 char *out_p;
586 size_t out_left;
587 unsigned int cnt;
588 char *ndl_buf;
589 const char *ndl_buf_p;
590 size_t ndl_buf_len, ndl_buf_left;
591 unsigned int match_ofs;
593 *pretval = (unsigned int)-1;
595 err = php_iconv_string(ndl, ndl_nbytes,
596 &ndl_buf, &ndl_buf_len, GENERIC_SUPERSET_NAME, enc);
598 if (err != PHP_ICONV_ERR_SUCCESS) {
599 if (ndl_buf != NULL) {
600 free(ndl_buf);
602 return err;
605 cd = iconv_open(GENERIC_SUPERSET_NAME, enc);
607 if (cd == (iconv_t)(-1)) {
608 if (ndl_buf != NULL) {
609 free(ndl_buf);
611 #if ICONV_SUPPORTS_ERRNO
612 if (errno == EINVAL) {
613 return PHP_ICONV_ERR_WRONG_CHARSET;
614 } else {
615 return PHP_ICONV_ERR_CONVERTER;
617 #else
618 return PHP_ICONV_ERR_UNKNOWN;
619 #endif
622 ndl_buf_p = ndl_buf;
623 ndl_buf_left = ndl_buf_len;
624 match_ofs = (unsigned int)-1;
626 for (in_p = haystk, in_left = haystk_nbytes, cnt = 0; in_left > 0; ++cnt) {
627 size_t prev_in_left;
628 out_p = buf;
629 out_left = sizeof(buf);
631 prev_in_left = in_left;
633 if (iconv(cd, (ICONV_CONST char **)&in_p, &in_left, (char **) &out_p, &out_left) ==
634 (size_t)-1) {
635 if (prev_in_left == in_left) {
636 #if ICONV_SUPPORTS_ERRNO
637 switch (errno) {
638 case EINVAL: err = PHP_ICONV_ERR_ILLEGAL_CHAR; break;
639 case EILSEQ: err = PHP_ICONV_ERR_ILLEGAL_SEQ; break;
640 case E2BIG:
641 break;
642 default:
643 err = PHP_ICONV_ERR_UNKNOWN;
644 break;
646 #endif
647 break;
650 if (offset >= 0) {
651 if (cnt >= (unsigned int)offset) {
652 if (_php_iconv_memequal(buf, ndl_buf_p, sizeof(buf))) {
653 if (match_ofs == (unsigned int)-1) {
654 match_ofs = cnt;
656 ndl_buf_p += GENERIC_SUPERSET_NBYTES;
657 ndl_buf_left -= GENERIC_SUPERSET_NBYTES;
658 if (ndl_buf_left == 0) {
659 *pretval = match_ofs;
660 break;
662 } else {
663 unsigned int i, j, lim;
665 i = 0;
666 j = GENERIC_SUPERSET_NBYTES;
667 lim = (unsigned int)(ndl_buf_p - ndl_buf);
669 while (j < lim) {
670 if (_php_iconv_memequal(&ndl_buf[j], &ndl_buf[i],
671 GENERIC_SUPERSET_NBYTES)) {
672 i += GENERIC_SUPERSET_NBYTES;
673 } else {
674 j -= i;
675 i = 0;
677 j += GENERIC_SUPERSET_NBYTES;
680 if (_php_iconv_memequal(buf, &ndl_buf[i], sizeof(buf))) {
681 match_ofs += (lim - i) / GENERIC_SUPERSET_NBYTES;
682 i += GENERIC_SUPERSET_NBYTES;
683 ndl_buf_p = &ndl_buf[i];
684 ndl_buf_left = ndl_buf_len - i;
685 } else {
686 match_ofs = (unsigned int)-1;
687 ndl_buf_p = ndl_buf;
688 ndl_buf_left = ndl_buf_len;
692 } else {
693 if (_php_iconv_memequal(buf, ndl_buf_p, sizeof(buf))) {
694 if (match_ofs == (unsigned int)-1) {
695 match_ofs = cnt;
697 ndl_buf_p += GENERIC_SUPERSET_NBYTES;
698 ndl_buf_left -= GENERIC_SUPERSET_NBYTES;
699 if (ndl_buf_left == 0) {
700 *pretval = match_ofs;
701 ndl_buf_p = ndl_buf;
702 ndl_buf_left = ndl_buf_len;
703 match_ofs = (unsigned int)-1;
705 } else {
706 unsigned int i, j, lim;
708 i = 0;
709 j = GENERIC_SUPERSET_NBYTES;
710 lim = (unsigned int)(ndl_buf_p - ndl_buf);
712 while (j < lim) {
713 if (_php_iconv_memequal(&ndl_buf[j], &ndl_buf[i],
714 GENERIC_SUPERSET_NBYTES)) {
715 i += GENERIC_SUPERSET_NBYTES;
716 } else {
717 j -= i;
718 i = 0;
720 j += GENERIC_SUPERSET_NBYTES;
723 if (_php_iconv_memequal(buf, &ndl_buf[i], sizeof(buf))) {
724 match_ofs += (lim - i) / GENERIC_SUPERSET_NBYTES;
725 i += GENERIC_SUPERSET_NBYTES;
726 ndl_buf_p = &ndl_buf[i];
727 ndl_buf_left = ndl_buf_len - i;
728 } else {
729 match_ofs = (unsigned int)-1;
730 ndl_buf_p = ndl_buf;
731 ndl_buf_left = ndl_buf_len;
737 if (ndl_buf) {
738 free(ndl_buf);
741 iconv_close(cd);
742 return err;
745 static php_iconv_err_t _php_iconv_mime_decode(StringBuffer &retval,
746 const char *str,
747 size_t str_nbytes,
748 const char *enc,
749 const char **next_pos,
750 int mode) {
751 php_iconv_err_t err = PHP_ICONV_ERR_SUCCESS;
753 iconv_t cd = (iconv_t)(-1), cd_pl = (iconv_t)(-1);
755 const char *p1;
756 size_t str_left;
757 unsigned int scan_stat = 0;
758 const char *csname = NULL;
759 size_t csname_len;
760 const char *encoded_text = NULL;
761 size_t encoded_text_len = 0;
762 const char *encoded_word = NULL;
763 const char *spaces = NULL;
765 php_iconv_enc_scheme_t enc_scheme = PHP_ICONV_ENC_SCHEME_BASE64;
767 if (next_pos != NULL) {
768 *next_pos = NULL;
770 cd_pl = iconv_open(enc, "ASCII");
772 if (cd_pl == (iconv_t)(-1)) {
773 #if ICONV_SUPPORTS_ERRNO
774 if (errno == EINVAL) {
775 err = PHP_ICONV_ERR_WRONG_CHARSET;
776 } else {
777 err = PHP_ICONV_ERR_CONVERTER;
779 #else
780 err = PHP_ICONV_ERR_UNKNOWN;
781 #endif
782 goto out;
785 p1 = str;
786 for (str_left = str_nbytes; str_left > 0; str_left--, p1++) {
787 int eos = 0;
789 switch (scan_stat) {
790 case 0: /* expecting any character */
791 switch (*p1) {
792 case '\r': /* part of an EOL sequence? */
793 scan_stat = 7;
794 break;
796 case '\n':
797 scan_stat = 8;
798 break;
800 case '=': /* first letter of an encoded chunk */
801 encoded_word = p1;
802 scan_stat = 1;
803 break;
805 case ' ': case '\t': /* a chunk of whitespaces */
806 spaces = p1;
807 scan_stat = 11;
808 break;
810 default: /* first letter of a non-encoded word */
811 _php_iconv_appendc(retval, *p1, cd_pl);
812 encoded_word = NULL;
813 if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) {
814 scan_stat = 12;
816 break;
818 break;
820 case 1: /* expecting a delimiter */
821 if (*p1 != '?') {
822 err = _php_iconv_appendl(retval, encoded_word,
823 (size_t)((p1 + 1) - encoded_word), cd_pl);
824 if (err != PHP_ICONV_ERR_SUCCESS) {
825 goto out;
827 encoded_word = NULL;
828 if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) {
829 scan_stat = 12;
830 } else {
831 scan_stat = 0;
833 break;
835 csname = p1 + 1;
836 scan_stat = 2;
837 break;
839 case 2: /* expecting a charset name */
840 switch (*p1) {
841 case '?': /* normal delimiter: encoding scheme follows */
842 scan_stat = 3;
843 break;
845 case '*': /* new style delimiter: locale id follows */
846 scan_stat = 10;
847 break;
849 if (scan_stat != 2) {
850 char tmpbuf[80];
852 if (csname == NULL) {
853 err = PHP_ICONV_ERR_MALFORMED;
854 goto out;
857 csname_len = (size_t)(p1 - csname);
859 if (csname_len > sizeof(tmpbuf) - 1) {
860 if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) {
861 err = _php_iconv_appendl(retval, encoded_word,
862 (size_t)((p1 + 1) - encoded_word), cd_pl);
863 if (err != PHP_ICONV_ERR_SUCCESS) {
864 goto out;
866 encoded_word = NULL;
867 if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) {
868 scan_stat = 12;
869 } else {
870 scan_stat = 0;
872 break;
873 } else {
874 err = PHP_ICONV_ERR_MALFORMED;
875 goto out;
879 memcpy(tmpbuf, csname, csname_len);
880 tmpbuf[csname_len] = '\0';
882 if (cd != (iconv_t)(-1)) {
883 iconv_close(cd);
886 cd = iconv_open(enc, tmpbuf);
888 if (cd == (iconv_t)(-1)) {
889 if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) {
890 err = _php_iconv_appendl(retval, encoded_word,
891 (size_t)((p1 + 1) - encoded_word), cd_pl);
892 if (err != PHP_ICONV_ERR_SUCCESS) {
893 goto out;
895 encoded_word = NULL;
896 if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) {
897 scan_stat = 12;
898 } else {
899 scan_stat = 0;
901 break;
902 } else {
903 #if ICONV_SUPPORTS_ERRNO
904 if (errno == EINVAL) {
905 err = PHP_ICONV_ERR_WRONG_CHARSET;
906 } else {
907 err = PHP_ICONV_ERR_CONVERTER;
909 #else
910 err = PHP_ICONV_ERR_UNKNOWN;
911 #endif
912 goto out;
916 break;
918 case 3: /* expecting a encoding scheme specifier */
919 switch (*p1) {
920 case 'b':
921 case 'B':
922 enc_scheme = PHP_ICONV_ENC_SCHEME_BASE64;
923 scan_stat = 4;
924 break;
926 case 'q':
927 case 'Q':
928 enc_scheme = PHP_ICONV_ENC_SCHEME_QPRINT;
929 scan_stat = 4;
930 break;
932 default:
933 if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) {
934 err = _php_iconv_appendl(retval, encoded_word,
935 (size_t)((p1 + 1) - encoded_word), cd_pl);
936 if (err != PHP_ICONV_ERR_SUCCESS) {
937 goto out;
939 encoded_word = NULL;
940 if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) {
941 scan_stat = 12;
942 } else {
943 scan_stat = 0;
945 break;
946 } else {
947 err = PHP_ICONV_ERR_MALFORMED;
948 goto out;
951 break;
953 case 4: /* expecting a delimiter */
954 if (*p1 != '?') {
955 if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) {
956 /* pass the entire chunk through the converter */
957 err = _php_iconv_appendl(retval, encoded_word,
958 (size_t)((p1 + 1) - encoded_word), cd_pl);
959 if (err != PHP_ICONV_ERR_SUCCESS) {
960 goto out;
962 encoded_word = NULL;
963 if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) {
964 scan_stat = 12;
965 } else {
966 scan_stat = 0;
968 break;
969 } else {
970 err = PHP_ICONV_ERR_MALFORMED;
971 goto out;
974 encoded_text = p1 + 1;
975 scan_stat = 5;
976 break;
978 case 5: /* expecting an encoded portion */
979 if (*p1 == '?') {
980 encoded_text_len = (size_t)(p1 - encoded_text);
981 scan_stat = 6;
983 break;
985 case 7: /* expecting a "\n" character */
986 if (*p1 == '\n') {
987 scan_stat = 8;
988 } else {
989 /* bare CR */
990 _php_iconv_appendc(retval, '\r', cd_pl);
991 _php_iconv_appendc(retval, *p1, cd_pl);
992 scan_stat = 0;
994 break;
996 case 8: /* checking whether the following line is part of a
997 folded header */
998 if (*p1 != ' ' && *p1 != '\t') {
999 --p1;
1000 str_left = 1; /* quit_loop */
1001 break;
1003 if (encoded_word == NULL) {
1004 _php_iconv_appendc(retval, ' ', cd_pl);
1006 spaces = NULL;
1007 scan_stat = 11;
1008 break;
1010 case 6: /* expecting a End-Of-Chunk character "=" */
1011 if (*p1 != '=') {
1012 if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) {
1013 /* pass the entire chunk through the converter */
1014 err = _php_iconv_appendl(retval, encoded_word,
1015 (size_t)((p1 + 1) - encoded_word), cd_pl);
1016 if (err != PHP_ICONV_ERR_SUCCESS) {
1017 goto out;
1019 encoded_word = NULL;
1020 if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) {
1021 scan_stat = 12;
1022 } else {
1023 scan_stat = 0;
1025 break;
1026 } else {
1027 err = PHP_ICONV_ERR_MALFORMED;
1028 goto out;
1031 scan_stat = 9;
1032 if (str_left == 1) {
1033 eos = 1;
1034 } else {
1035 break;
1038 case 9: /* choice point, seeing what to do next.*/
1039 switch (*p1) {
1040 default:
1041 /* Handle non-RFC-compliant formats
1043 * RFC2047 requires the character that comes right
1044 * after an encoded word (chunk) to be a whitespace,
1045 * while there are lots of broken implementations that
1046 * generate such malformed headers that don't fulfill
1047 * that requirement.
1049 if (!eos) {
1050 if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) {
1051 /* pass the entire chunk through the converter */
1052 err = _php_iconv_appendl(retval, encoded_word,
1053 (size_t)((p1 + 1) - encoded_word), cd_pl);
1054 if (err != PHP_ICONV_ERR_SUCCESS) {
1055 goto out;
1057 scan_stat = 12;
1058 break;
1061 /* break is omitted intentionally */
1063 case '\r': case '\n': case ' ': case '\t': {
1064 String decoded;
1065 switch (enc_scheme) {
1066 case PHP_ICONV_ENC_SCHEME_BASE64:
1068 int len = encoded_text_len;
1069 char *ret = string_base64_decode(encoded_text, len, false);
1070 decoded = String(ret, len, AttachString);
1072 break;
1073 case PHP_ICONV_ENC_SCHEME_QPRINT:
1075 int len = encoded_text_len;
1076 char *ret = string_quoted_printable_decode(encoded_text, len, true);
1077 decoded = String(ret, len, AttachString);
1079 break;
1080 default:
1081 break;
1084 if (decoded.isNull()) {
1085 if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) {
1086 /* pass the entire chunk through the converter */
1087 err = _php_iconv_appendl(retval, encoded_word,
1088 (size_t)((p1 + 1) - encoded_word), cd_pl);
1089 if (err != PHP_ICONV_ERR_SUCCESS) {
1090 goto out;
1092 encoded_word = NULL;
1093 if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) {
1094 scan_stat = 12;
1095 } else {
1096 scan_stat = 0;
1098 break;
1099 } else {
1100 err = PHP_ICONV_ERR_UNKNOWN;
1101 goto out;
1105 err = _php_iconv_appendl(retval, decoded.data(), decoded.size(), cd);
1106 if (err != PHP_ICONV_ERR_SUCCESS) {
1107 if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) {
1108 /* pass the entire chunk through the converter */
1109 err = _php_iconv_appendl(retval, encoded_word,
1110 (size_t)(p1 - encoded_word), cd_pl);
1111 if (err != PHP_ICONV_ERR_SUCCESS) {
1112 goto out;
1114 encoded_word = NULL;
1115 } else {
1116 goto out;
1120 if (eos) { /* reached end-of-string. done. */
1121 scan_stat = 0;
1122 break;
1125 switch (*p1) {
1126 case '\r': /* part of an EOL sequence? */
1127 scan_stat = 7;
1128 break;
1130 case '\n':
1131 scan_stat = 8;
1132 break;
1134 case '=': /* first letter of an encoded chunk */
1135 scan_stat = 1;
1136 break;
1138 case ' ': case '\t': /* medial whitespaces */
1139 spaces = p1;
1140 scan_stat = 11;
1141 break;
1143 default: /* first letter of a non-encoded word */
1144 _php_iconv_appendc(retval, *p1, cd_pl);
1145 scan_stat = 12;
1146 break;
1148 } break;
1150 break;
1152 case 10: /* expects a language specifier. dismiss it for now */
1153 if (*p1 == '?') {
1154 scan_stat = 3;
1156 break;
1158 case 11: /* expecting a chunk of whitespaces */
1159 switch (*p1) {
1160 case '\r': /* part of an EOL sequence? */
1161 scan_stat = 7;
1162 break;
1164 case '\n':
1165 scan_stat = 8;
1166 break;
1168 case '=': /* first letter of an encoded chunk */
1169 if (spaces != NULL && encoded_word == NULL) {
1170 _php_iconv_appendl(retval, spaces, (size_t)(p1 - spaces), cd_pl);
1171 spaces = NULL;
1173 encoded_word = p1;
1174 scan_stat = 1;
1175 break;
1177 case ' ': case '\t':
1178 break;
1180 default: /* first letter of a non-encoded word */
1181 if (spaces != NULL) {
1182 _php_iconv_appendl(retval, spaces, (size_t)(p1 - spaces), cd_pl);
1183 spaces = NULL;
1185 _php_iconv_appendc(retval, *p1, cd_pl);
1186 encoded_word = NULL;
1187 if ((mode & PHP_ICONV_MIME_DECODE_STRICT)) {
1188 scan_stat = 12;
1189 } else {
1190 scan_stat = 0;
1192 break;
1194 break;
1196 case 12: /* expecting a non-encoded word */
1197 switch (*p1) {
1198 case '\r': /* part of an EOL sequence? */
1199 scan_stat = 7;
1200 break;
1202 case '\n':
1203 scan_stat = 8;
1204 break;
1206 case ' ': case '\t':
1207 spaces = p1;
1208 scan_stat = 11;
1209 break;
1211 case '=': /* first letter of an encoded chunk */
1212 if (!(mode & PHP_ICONV_MIME_DECODE_STRICT)) {
1213 encoded_word = p1;
1214 scan_stat = 1;
1215 break;
1217 /* break is omitted intentionally */
1219 default:
1220 _php_iconv_appendc(retval, *p1, cd_pl);
1221 break;
1223 break;
1226 switch (scan_stat) {
1227 case 0: case 8: case 11: case 12:
1228 break;
1229 default:
1230 if ((mode & PHP_ICONV_MIME_DECODE_CONTINUE_ON_ERROR)) {
1231 if (scan_stat == 1) {
1232 _php_iconv_appendc(retval, '=', cd_pl);
1234 err = PHP_ICONV_ERR_SUCCESS;
1235 } else {
1236 err = PHP_ICONV_ERR_MALFORMED;
1237 goto out;
1241 if (next_pos != NULL) {
1242 *next_pos = p1;
1245 out:
1246 if (cd != (iconv_t)(-1)) {
1247 iconv_close(cd);
1249 if (cd_pl != (iconv_t)(-1)) {
1250 iconv_close(cd_pl);
1252 return err;
1255 ///////////////////////////////////////////////////////////////////////////////
1257 const StaticString
1258 s_scheme("scheme"),
1259 s_input_charset("input-charset"),
1260 s_output_charset("output-charset"),
1261 s_line_length("line-length"),
1262 s_line_break_chars("line-break-chars");
1264 Variant f_iconv_mime_encode(CStrRef field_name, CStrRef field_value,
1265 CVarRef preferences /* = null_variant */) {
1266 php_iconv_enc_scheme_t scheme_id = PHP_ICONV_ENC_SCHEME_BASE64;
1267 String in_charset;
1268 String out_charset;
1269 long line_len = 76;
1270 String lfchars = "\r\n";
1271 StringBuffer ret;
1272 char *buf = NULL;
1274 if (!preferences.isNull()) {
1275 Variant scheme = preferences[s_scheme];
1276 if (scheme.isString()) {
1277 String s = scheme.toString();
1278 switch (*s.data()) {
1279 case 'B': case 'b':
1280 scheme_id = PHP_ICONV_ENC_SCHEME_BASE64;
1281 break;
1282 case 'Q': case 'q':
1283 scheme_id = PHP_ICONV_ENC_SCHEME_QPRINT;
1284 break;
1288 Variant input_charset = preferences[s_input_charset];
1289 if (input_charset.isString()) {
1290 in_charset = input_charset.toString();
1291 if (!validate_charset(in_charset)) return false;
1294 Variant output_charset = preferences[s_output_charset];
1295 if (output_charset.isString()) {
1296 out_charset = output_charset.toString();
1297 if (!validate_charset(out_charset)) return false;
1300 Variant line_length = preferences[s_line_length];
1301 if (!line_length.isNull()) {
1302 line_len = line_length.toInt64();
1305 Variant line_break_chars = preferences[s_line_break_chars];
1306 if (!line_break_chars.isNull()) {
1307 lfchars = line_break_chars.toString();
1311 static int qp_table[256] = {
1312 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* 0x00 */
1313 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* 0x10 */
1314 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x20 */
1315 1, 1, 1, 1, 1, 1, 1 ,1, 1, 1, 1, 1, 1, 3, 1, 3, /* 0x30 */
1316 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40 */
1317 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, /* 0x50 */
1318 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60 */
1319 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, /* 0x70 */
1320 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* 0x80 */
1321 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* 0x90 */
1322 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* 0xA0 */
1323 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* 0xB0 */
1324 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* 0xC0 */
1325 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* 0xD0 */
1326 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* 0xE0 */
1327 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 /* 0xF0 */
1330 php_iconv_err_t err = PHP_ICONV_ERR_SUCCESS;
1331 iconv_t cd = (iconv_t)(-1), cd_pl = (iconv_t)(-1);
1333 if ((field_name.size() + 2) >= line_len ||
1334 (out_charset.size() + 12) >= line_len) {
1335 /* field name is too long */
1336 err = PHP_ICONV_ERR_TOO_BIG;
1337 goto out;
1340 cd_pl = iconv_open("ASCII", in_charset.data());
1341 if (cd_pl == (iconv_t)(-1)) {
1342 #if ICONV_SUPPORTS_ERRNO
1343 if (errno == EINVAL) {
1344 err = PHP_ICONV_ERR_WRONG_CHARSET;
1345 } else {
1346 err = PHP_ICONV_ERR_CONVERTER;
1348 #else
1349 err = PHP_ICONV_ERR_UNKNOWN;
1350 #endif
1351 goto out;
1354 cd = iconv_open(out_charset.data(), in_charset.data());
1355 if (cd == (iconv_t)(-1)) {
1356 #if ICONV_SUPPORTS_ERRNO
1357 if (errno == EINVAL) {
1358 err = PHP_ICONV_ERR_WRONG_CHARSET;
1359 } else {
1360 err = PHP_ICONV_ERR_CONVERTER;
1362 #else
1363 err = PHP_ICONV_ERR_UNKNOWN;
1364 #endif
1365 goto out;
1368 const char *in_p;
1369 size_t in_left;
1370 char *out_p;
1371 size_t out_left;
1373 buf = (char*)malloc(line_len + 5);
1374 unsigned int char_cnt;
1375 char_cnt = line_len;
1377 _php_iconv_appendl(ret, field_name.data(), field_name.size(), cd_pl);
1378 char_cnt -= field_name.size();
1379 ret.append(": ");
1380 char_cnt -= 2;
1382 in_p = field_value.data();
1383 in_left = field_value.size();
1385 do {
1386 size_t prev_in_left;
1387 size_t out_size;
1389 if ((int)char_cnt < (out_charset.size() + 12)) {
1390 ret.append(lfchars); // lfchars must be encoded in ASCII here
1391 ret.append(' ');
1392 char_cnt = line_len - 1;
1395 ret.append("=?");
1396 char_cnt -= 2;
1397 ret.append(out_charset);
1398 char_cnt -= out_charset.size();
1399 ret.append('?');
1400 char_cnt --;
1402 switch (scheme_id) {
1403 case PHP_ICONV_ENC_SCHEME_BASE64:
1405 size_t ini_in_left;
1406 const char *ini_in_p;
1407 size_t out_reserved = 4;
1409 ret.append('B');
1410 char_cnt--;
1411 ret.append('?');
1412 char_cnt--;
1414 prev_in_left = ini_in_left = in_left;
1415 ini_in_p = in_p;
1417 out_size = (char_cnt - 2) / 4 * 3;
1419 for (;;) {
1420 out_p = buf;
1422 if (out_size <= out_reserved) {
1423 err = PHP_ICONV_ERR_TOO_BIG;
1424 goto out;
1427 out_left = out_size - out_reserved;
1429 if (iconv(cd, (ICONV_CONST char **)&in_p, &in_left,
1430 (char **)&out_p, &out_left) == (size_t)-1) {
1431 #if ICONV_SUPPORTS_ERRNO
1432 switch (errno) {
1433 case EINVAL: err = PHP_ICONV_ERR_ILLEGAL_CHAR; goto out;
1434 case EILSEQ: err = PHP_ICONV_ERR_ILLEGAL_SEQ; goto out;
1435 case E2BIG:
1436 if (prev_in_left == in_left) {
1437 err = PHP_ICONV_ERR_TOO_BIG;
1438 goto out;
1440 break;
1441 default:
1442 err = PHP_ICONV_ERR_UNKNOWN;
1443 goto out;
1445 #else
1446 if (prev_in_left == in_left) {
1447 err = PHP_ICONV_ERR_UNKNOWN;
1448 goto out;
1450 #endif
1453 out_left += out_reserved;
1455 if (iconv(cd, NULL, NULL, (char **)&out_p, &out_left) ==
1456 (size_t)-1) {
1457 #if ICONV_SUPPORTS_ERRNO
1458 if (errno != E2BIG) {
1459 err = PHP_ICONV_ERR_UNKNOWN;
1460 goto out;
1462 #else
1463 if (out_left != 0) {
1464 err = PHP_ICONV_ERR_UNKNOWN;
1465 goto out;
1467 #endif
1468 } else {
1469 break;
1472 if (iconv(cd, NULL, NULL, NULL, NULL) == (size_t)-1) {
1473 err = PHP_ICONV_ERR_UNKNOWN;
1474 goto out;
1477 out_reserved += 4;
1478 in_left = ini_in_left;
1479 in_p = ini_in_p;
1482 prev_in_left = in_left;
1485 int encoded_len = out_size - out_left;
1486 char *encoded_str = string_base64_encode(buf, encoded_len);
1487 String encoded(encoded_str, encoded_len, AttachString);
1488 if ((int)char_cnt < encoded.size()) {
1489 /* something went wrong! */
1490 err = PHP_ICONV_ERR_UNKNOWN;
1491 goto out;
1494 ret.append(encoded);
1495 char_cnt -= encoded.size();
1496 ret.append("?=");
1497 char_cnt -= 2;
1499 break; /* case PHP_ICONV_ENC_SCHEME_BASE64: */
1501 case PHP_ICONV_ENC_SCHEME_QPRINT:
1503 size_t ini_in_left;
1504 const char *ini_in_p;
1505 const unsigned char *p;
1506 size_t nbytes_required;
1508 ret.append('Q');
1509 char_cnt--;
1510 ret.append('?');
1511 char_cnt--;
1513 prev_in_left = ini_in_left = in_left;
1514 ini_in_p = in_p;
1516 for (out_size = char_cnt; out_size > 0;) {
1517 size_t prev_out_left ATTRIBUTE_UNUSED;
1519 nbytes_required = 0;
1521 out_p = buf;
1522 out_left = out_size;
1524 if (iconv(cd, (ICONV_CONST char **)&in_p, &in_left,
1525 (char **)&out_p, &out_left) == (size_t)-1) {
1526 #if ICONV_SUPPORTS_ERRNO
1527 switch (errno) {
1528 case EINVAL: err = PHP_ICONV_ERR_ILLEGAL_CHAR; goto out;
1529 case EILSEQ: err = PHP_ICONV_ERR_ILLEGAL_SEQ; goto out;
1530 case E2BIG:
1531 if (prev_in_left == in_left) {
1532 err = PHP_ICONV_ERR_UNKNOWN;
1533 goto out;
1535 break;
1536 default:
1537 err = PHP_ICONV_ERR_UNKNOWN;
1538 goto out;
1540 #else
1541 if (prev_in_left == in_left) {
1542 err = PHP_ICONV_ERR_UNKNOWN;
1543 goto out;
1545 #endif
1548 prev_out_left = out_left;
1549 if (iconv(cd, NULL, NULL, (char **)&out_p, &out_left) ==
1550 (size_t)-1) {
1551 #if ICONV_SUPPORTS_ERRNO
1552 if (errno != E2BIG) {
1553 err = PHP_ICONV_ERR_UNKNOWN;
1554 goto out;
1556 #else
1557 if (out_left == prev_out_left) {
1558 err = PHP_ICONV_ERR_UNKNOWN;
1559 goto out;
1561 #endif
1564 for (p = (unsigned char *)buf; p < (unsigned char *)out_p; p++) {
1565 nbytes_required += qp_table[*p];
1568 if (nbytes_required <= char_cnt - 2) {
1569 break;
1572 out_size -= ((nbytes_required - (char_cnt - 2)) + 1) / (3 - 1);
1573 in_left = ini_in_left;
1574 in_p = ini_in_p;
1577 for (p = (unsigned char *)buf; p < (unsigned char *)out_p; p++) {
1578 if (qp_table[*p] == 1) {
1579 ret.append(*(char*)p);
1580 char_cnt--;
1581 } else {
1582 static char qp_digits[] = "0123456789ABCDEF";
1583 ret.append('=');
1584 ret.append(qp_digits[(*p >> 4) & 0x0f]);
1585 ret.append(qp_digits[(*p & 0x0f)]);
1586 char_cnt -= 3;
1589 prev_in_left = in_left;
1591 ret.append("?=");
1592 char_cnt -= 2;
1594 if (iconv(cd, NULL, NULL, NULL, NULL) == (size_t)-1) {
1595 err = PHP_ICONV_ERR_UNKNOWN;
1596 goto out;
1599 } break; /* case PHP_ICONV_ENC_SCHEME_QPRINT: */
1601 } while (in_left > 0);
1603 out:
1604 if (cd != (iconv_t)(-1)) {
1605 iconv_close(cd);
1607 if (cd_pl != (iconv_t)(-1)) {
1608 iconv_close(cd_pl);
1610 if (buf != NULL) {
1611 free(buf);
1614 if (err != PHP_ICONV_ERR_SUCCESS) {
1615 return false;
1617 return ret.detach();
1620 Variant f_iconv_mime_decode(CStrRef encoded_string, int mode /* = 0 */,
1621 CStrRef charset /* = null_string */) {
1622 Variant encoded = check_charset(charset);
1623 if (same(encoded, false)) return false;
1624 String enc = encoded.toString();
1625 StringBuffer retval;
1626 php_iconv_err_t err =
1627 _php_iconv_mime_decode(retval, encoded_string.data(),
1628 encoded_string.size(), enc.data(), NULL, mode);
1629 _php_iconv_show_error(err, enc.data(), "???");
1631 if (err == PHP_ICONV_ERR_SUCCESS) {
1632 return retval.detach();
1634 return false;
1637 Variant f_iconv_mime_decode_headers(CStrRef encoded_headers,
1638 int mode /* = 0 */,
1639 CStrRef charset /* = null_string */) {
1640 Variant encoded = check_charset(charset);
1641 if (same(encoded, false)) return false;
1642 String enc = encoded.toString();
1643 Array ret;
1644 php_iconv_err_t err = PHP_ICONV_ERR_SUCCESS;
1645 const char *encoded_str = encoded_headers.data();
1646 int encoded_str_len = encoded_headers.size();
1647 while (encoded_str_len > 0) {
1648 StringBuffer decoded_header;
1650 const char *header_name = NULL;
1651 size_t header_name_len = 0;
1652 const char *header_value = NULL;
1653 size_t header_value_len = 0;
1654 const char *p, *limit;
1656 const char *next_pos;
1657 err = _php_iconv_mime_decode(decoded_header, encoded_str, encoded_str_len,
1658 enc.data(), &next_pos, mode);
1659 if (err != PHP_ICONV_ERR_SUCCESS || decoded_header.data() == NULL) {
1660 break;
1663 limit = decoded_header.data() + decoded_header.size();
1664 for (p = decoded_header.data(); p < limit; p++) {
1665 if (*p == ':') {
1666 *((char*)p) = '\0';
1667 header_name = decoded_header.data();
1668 header_name_len = p - decoded_header.data();
1670 while (++p < limit) {
1671 if (*p != ' ' && *p != '\t') {
1672 break;
1676 header_value = p;
1677 header_value_len = limit - p;
1678 break;
1682 if (header_name != NULL) {
1683 String header(header_name, header_name_len, CopyString);
1684 String value(header_value, header_value_len, CopyString);
1685 if (ret.exists(header)) {
1686 Variant elem = ret[header];
1687 if (!elem.is(KindOfArray)) {
1688 ret.set(header, CREATE_VECTOR2(elem, value));
1689 } else {
1690 elem.append(value);
1691 ret.set(header, elem);
1693 } else {
1694 ret.set(header, value);
1697 encoded_str_len -= next_pos - encoded_str;
1698 encoded_str = next_pos;
1701 if (err != PHP_ICONV_ERR_SUCCESS) {
1702 _php_iconv_show_error(err, enc.data(), "???");
1703 return false;
1705 return ret;
1708 const StaticString
1709 s_input_encoding("input_encoding"),
1710 s_output_encoding("output_encoding"),
1711 s_internal_encoding("internal_encoding"),
1712 s_all("all");
1715 Variant f_iconv_get_encoding(CStrRef type /* = "all" */) {
1716 if (type == s_all) {
1717 Array ret;
1718 ret.set(s_input_encoding, ICONVG(input_encoding));
1719 ret.set(s_output_encoding, ICONVG(output_encoding));
1720 ret.set(s_internal_encoding, ICONVG(internal_encoding));
1721 return ret;
1723 if (type == s_input_encoding) return ICONVG(input_encoding);
1724 if (type == s_output_encoding) return ICONVG(output_encoding);
1725 if (type == s_internal_encoding) return ICONVG(internal_encoding);
1726 return false;
1729 bool f_iconv_set_encoding(CStrRef type, CStrRef charset) {
1730 if (!validate_charset(charset)) return false;
1731 if (type == s_input_encoding) {
1732 ICONVG(input_encoding) = charset;
1733 } else if (type == s_output_encoding) {
1734 ICONVG(output_encoding) = charset;
1735 } else if (type == s_internal_encoding) {
1736 ICONVG(internal_encoding) = charset;
1737 } else {
1738 return false;
1740 return true;
1743 Variant f_iconv(CStrRef in_charset, CStrRef out_charset, CStrRef str) {
1744 if (!validate_charset(in_charset)) return false;
1745 if (!validate_charset(out_charset)) return false;
1747 char *out_buffer;
1748 size_t out_len;
1749 php_iconv_err_t err =
1750 php_iconv_string(str.data(), str.size(), &out_buffer, &out_len,
1751 out_charset.data(), in_charset.data());
1752 _php_iconv_show_error(err, out_charset.data(), in_charset.data());
1753 if (out_buffer != NULL) {
1754 return String(out_buffer, out_len, AttachString);
1756 return false;
1759 Variant f_iconv_strlen(CStrRef str, CStrRef charset /* = null_string */) {
1760 Variant encoded = check_charset(charset);
1761 if (same(encoded, false)) return false;
1762 String enc = encoded.toString();
1763 unsigned int retval;
1764 php_iconv_err_t err = _php_iconv_strlen(&retval, str.data(), str.size(),
1765 enc.data());
1766 _php_iconv_show_error(err, GENERIC_SUPERSET_NAME, enc.data());
1767 if (err == PHP_ICONV_ERR_SUCCESS) {
1768 return (int64_t)retval;
1770 return false;
1773 Variant f_iconv_strpos(CStrRef haystack, CStrRef needle, int offset /* = 0 */,
1774 CStrRef charset /* = null_string */) {
1775 if (offset < 0) {
1776 raise_warning("Offset not contained in string.");
1777 return false;
1779 if (needle.size() < 1) {
1780 return false;
1783 Variant encoded = check_charset(charset);
1784 if (same(encoded, false)) return false;
1785 String enc = encoded.toString();
1786 unsigned int retval;
1787 php_iconv_err_t err =
1788 _php_iconv_strpos(&retval, haystack.data(), haystack.size(),
1789 needle.data(), needle.size(), offset, enc.data());
1790 _php_iconv_show_error(err, GENERIC_SUPERSET_NAME, enc.data());
1791 if (err == PHP_ICONV_ERR_SUCCESS && retval != (unsigned int)-1) {
1792 return (long)retval;
1794 return false;
1797 Variant f_iconv_strrpos(CStrRef haystack, CStrRef needle,
1798 CStrRef charset /* = null_string */) {
1799 if (needle.size() < 1) {
1800 return false;
1803 Variant encoded = check_charset(charset);
1804 if (same(encoded, false)) return false;
1805 String enc = encoded.toString();
1806 unsigned int retval;
1807 php_iconv_err_t err =
1808 _php_iconv_strpos(&retval, haystack.data(), haystack.size(),
1809 needle.data(), needle.size(), -1, enc.data());
1810 _php_iconv_show_error(err, GENERIC_SUPERSET_NAME, enc.data());
1811 if (err == PHP_ICONV_ERR_SUCCESS && retval != (unsigned int)-1) {
1812 return (long)retval;
1814 return false;
1817 Variant f_iconv_substr(CStrRef str, int offset, int length /* = INT_MAX */,
1818 CStrRef charset /* = null_string */) {
1819 Variant encoded = check_charset(charset);
1820 if (same(encoded, false)) return false;
1821 String enc = encoded.toString();
1822 StringBuffer retval;
1823 php_iconv_err_t err = _php_iconv_substr(retval, str.data(), str.size(),
1824 offset, length, enc.data());
1825 _php_iconv_show_error(err, GENERIC_SUPERSET_NAME, enc.data());
1826 if (err == PHP_ICONV_ERR_SUCCESS && !str.empty() && retval.data()) {
1827 return retval.detach();
1829 return false;
1832 String f_ob_iconv_handler(CStrRef contents, int status) {
1833 String mimetype = g_context->getMimeType();
1834 if (!mimetype.empty()) {
1835 char *out_buffer;
1836 size_t out_len;
1837 php_iconv_err_t err =
1838 php_iconv_string(contents.data(), contents.size(), &out_buffer, &out_len,
1839 ICONVG(output_encoding).c_str(),
1840 ICONVG(internal_encoding).c_str());
1841 _php_iconv_show_error(err, ICONVG(output_encoding).c_str(),
1842 ICONVG(internal_encoding).c_str());
1843 if (out_buffer != NULL) {
1844 g_context->setContentType(mimetype, ICONVG(output_encoding));
1845 return String(out_buffer, out_len, AttachString);
1848 return contents;
1851 ///////////////////////////////////////////////////////////////////////////////