2 Unix SMB/CIFS implementation.
3 Character set conversion Extensions
4 Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5 Copyright (C) Andrew Tridgell 2001
6 Copyright (C) Simo Sorce 2001
7 Copyright (C) Jelmer Vernooij 2007
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
24 #include "system/iconv.h"
29 * @brief Character-set conversion routines built on our iconv.
31 * @note Samba's internal character set (at least in the 3.0 series)
32 * is always the same as the one for the Unix filesystem. It is
33 * <b>not</b> necessarily UTF-8 and may be different on machines that
34 * need i18n filenames to be compatible with Unix software. It does
35 * have to be a superset of ASCII. All multibyte sequences must start
36 * with a byte with the high bit set.
42 * Convert string from one encoding to another, making error checking etc
44 * @param mem_ctx Memory context
45 * @param cd Iconv handle
46 * @param src pointer to source string (multibyte or singlebyte)
47 * @param srclen length of the source string in bytes
48 * @param dest pointer to destination string (multibyte or singlebyte)
49 * @param destlen maximal length allowed for string
50 * @returns the number of bytes occupied in the destination
52 _PUBLIC_ ssize_t
iconv_talloc(TALLOC_CTX
*ctx
,
54 void const *src
, size_t srclen
,
57 size_t i_len
, o_len
, destlen
;
58 void **dest
= (void **)dst
;
60 const char *inbuf
= (const char *)src
;
65 /* it is _very_ rare that a conversion increases the size by
70 destlen
= 2 + (destlen
*3);
71 ob
= talloc_realloc(ctx
, outbuf
, char, destlen
);
73 DEBUG(0, ("iconv_talloc: realloc failed!\n"));
80 /* we give iconv 2 less bytes to allow us to terminate at the
84 retval
= smb_iconv(cd
,
87 if(retval
== (size_t)-1) {
88 const char *reason
="unknown error";
91 reason
="Incomplete multibyte sequence";
96 reason
="Illegal multibyte sequence";
99 DEBUG(0,("Conversion error: %s - ",reason
));
100 dump_data(0, (const uint8_t *) inbuf
, i_len
);
105 destlen
= (destlen
-2) - o_len
;
107 /* guarantee null termination in all charsets */
108 SSVAL(ob
, destlen
, 0);
117 * Convert string from one encoding to another, making error checking etc
119 * @param src pointer to source string (multibyte or singlebyte)
120 * @param srclen length of the source string in bytes
121 * @param dest pointer to destination string (multibyte or singlebyte)
122 * @param destlen maximal length allowed for string
123 * @returns the number of bytes occupied in the destination
125 _PUBLIC_
bool convert_string_convenience(struct smb_iconv_convenience
*ic
,
126 charset_t from
, charset_t to
,
127 void const *src
, size_t srclen
,
128 void *dest
, size_t destlen
, size_t *converted_size
,
129 bool allow_badcharcnv
)
133 const char* inbuf
= (const char*)src
;
134 char* outbuf
= (char*)dest
;
135 smb_iconv_t descriptor
;
137 if (allow_badcharcnv
) {
138 /* Not implemented yet */
142 if (srclen
== (size_t)-1)
143 srclen
= strlen(inbuf
)+1;
145 descriptor
= get_conv_handle(ic
, from
, to
);
147 if (descriptor
== (smb_iconv_t
)-1 || descriptor
== (smb_iconv_t
)0) {
148 /* conversion not supported, use as is */
149 size_t len
= MIN(srclen
,destlen
);
150 memcpy(dest
,src
,len
);
151 *converted_size
= len
;
157 retval
= smb_iconv(descriptor
, &inbuf
, &i_len
, &outbuf
, &o_len
);
158 if(retval
==(size_t)-1) {
162 reason
="Incomplete multibyte sequence";
165 reason
="No more room";
166 if (from
== CH_UNIX
) {
167 DEBUG(0,("E2BIG: convert_string(%s,%s): srclen=%d destlen=%d - '%s'\n",
168 charset_name(ic
, from
), charset_name(ic
, to
),
169 (int)srclen
, (int)destlen
,
172 DEBUG(0,("E2BIG: convert_string(%s,%s): srclen=%d destlen=%d\n",
173 charset_name(ic
, from
), charset_name(ic
, to
),
174 (int)srclen
, (int)destlen
));
178 reason
="Illegal multibyte sequence";
181 /* smb_panic(reason); */
183 if (converted_size
!= NULL
)
184 *converted_size
= destlen
-o_len
;
189 * Convert between character sets, allocating a new buffer using talloc for the result.
191 * @param srclen length of source buffer.
192 * @param dest always set at least to NULL
193 * @note -1 is not accepted for srclen.
195 * @returns Size in bytes of the converted string; or -1 in case of error.
198 _PUBLIC_
bool convert_string_talloc_convenience(TALLOC_CTX
*ctx
,
199 struct smb_iconv_convenience
*ic
,
200 charset_t from
, charset_t to
,
201 void const *src
, size_t srclen
,
202 void *dst
, size_t *converted_size
,
203 bool allow_badcharcnv
)
205 void **dest
= (void **)dst
;
206 smb_iconv_t descriptor
;
209 if (allow_badcharcnv
)
210 return false; /* Not implemented yet */
214 if (src
== NULL
|| srclen
== (size_t)-1 || srclen
== 0)
217 descriptor
= get_conv_handle(ic
, from
, to
);
219 if (descriptor
== (smb_iconv_t
)-1 || descriptor
== (smb_iconv_t
)0) {
220 /* conversion not supported, return -1*/
221 DEBUG(3, ("convert_string_talloc: conversion from %s to %s not supported!\n",
222 charset_name(ic
, from
),
223 charset_name(ic
, to
)));
227 ret
= iconv_talloc(ctx
, descriptor
, src
, srclen
, dest
);
230 if (converted_size
!= NULL
)
231 *converted_size
= ret
;