2 Unix SMB/CIFS implementation.
3 Character set conversion Extensions
4 Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5 Copyright (C) Andrew Tridgell 2001
6 Copyright (C) Simo Sorce 2001
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 static pstring cvtbuf
;
27 static smb_iconv_t conv_handles
[NUM_CHARSETS
][NUM_CHARSETS
];
29 /****************************************************************************
30 return the name of a charset to give to iconv()
31 ****************************************************************************/
32 static char *charset_name(charset_t ch
)
36 if (ch
== CH_UCS2
) ret
= "UCS-2LE";
37 else if (ch
== CH_UNIX
) ret
= lp_unix_charset();
38 else if (ch
== CH_DOS
) ret
= lp_dos_charset();
39 else if (ch
== CH_DISPLAY
) ret
= lp_display_charset();
41 if (!ret
|| !*ret
) ret
= "ASCII";
46 static void lazy_initialize_conv(void)
48 static int initialized
= False
;
58 /****************************************************************************
59 Initialize iconv conversion descriptors
60 ****************************************************************************/
64 BOOL did_reload
= False
;
66 /* so that charset_name() works we need to get the UNIX<->UCS2 going
68 if (!conv_handles
[CH_UNIX
][CH_UCS2
]) {
69 conv_handles
[CH_UNIX
][CH_UCS2
] = smb_iconv_open("UCS-2LE", "ASCII");
71 if (!conv_handles
[CH_UCS2
][CH_UNIX
]) {
72 conv_handles
[CH_UCS2
][CH_UNIX
] = smb_iconv_open("ASCII", "UCS-2LE");
76 for (c1
=0;c1
<NUM_CHARSETS
;c1
++) {
77 for (c2
=0;c2
<NUM_CHARSETS
;c2
++) {
78 char *n1
= charset_name((charset_t
)c1
);
79 char *n2
= charset_name((charset_t
)c2
);
80 if (conv_handles
[c1
][c2
] &&
81 strcmp(n1
, conv_handles
[c1
][c2
]->from_name
) == 0 &&
82 strcmp(n2
, conv_handles
[c1
][c2
]->to_name
) == 0) continue;
86 if (conv_handles
[c1
][c2
]) {
87 smb_iconv_close(conv_handles
[c1
][c2
]);
89 conv_handles
[c1
][c2
] = smb_iconv_open(n2
,n1
);
90 if (conv_handles
[c1
][c2
] == (smb_iconv_t
)-1) {
91 DEBUG(0,("Conversion from %s to %s not supported\n",
92 charset_name((charset_t
)c1
), charset_name((charset_t
)c2
)));
93 conv_handles
[c1
][c2
] = NULL
;
104 * Convert string from one encoding to another, making error checking etc
106 * @param descriptor conversion descriptor, created in init_iconv()
107 * @param src pointer to source string (multibyte or singlebyte)
108 * @param srclen length of the source string in bytes
109 * @param dest pointer to destination string (multibyte or singlebyte)
110 * @param destlen maximal length allowed for string
111 * @retval the number of bytes occupied in the destination
113 size_t convert_string(charset_t from
, charset_t to
,
114 void const *src
, size_t srclen
,
115 void *dest
, size_t destlen
)
119 const char* inbuf
= (const char*)src
;
120 char* outbuf
= (char*)dest
;
121 smb_iconv_t descriptor
;
123 if (srclen
== -1) srclen
= strlen(src
)+1;
125 lazy_initialize_conv();
127 descriptor
= conv_handles
[from
][to
];
129 if (descriptor
== (smb_iconv_t
)-1 || descriptor
== (smb_iconv_t
)0) {
130 /* conversion not supported, use as is */
131 int len
= MIN(srclen
,destlen
);
132 memcpy(dest
,src
,len
);
138 retval
= smb_iconv(descriptor
, &inbuf
, &i_len
, &outbuf
, &o_len
);
141 char *reason
="unknown error";
143 { case EINVAL
: reason
="Incomplete multibyte sequence"; break;
144 case E2BIG
: reason
="No more room";
145 DEBUG(0, ("Required %d, available %d\n",
147 /* we are not sure we need srclen bytes,
148 may be more, may be less.
149 We only know we need more than destlen
154 case EILSEQ
: reason
="Illegal myltibyte sequence"; break;
156 /* smb_panic(reason); */
158 return destlen
-o_len
;
162 * Convert between character sets, allocating a new buffer for the result.
164 * @param srclen length of source buffer.
165 * @note -1 is not accepted for srclen.
167 * @retval Size in bytes of the converted string; or -1 in case of error.
169 size_t convert_string_allocate(charset_t from
, charset_t to
,
170 void const *src
, size_t srclen
, void **dest
)
172 size_t i_len
, o_len
, destlen
;
174 const char *inbuf
= (const char *)src
;
176 smb_iconv_t descriptor
;
180 if (src
== NULL
|| srclen
== -1) return -1;
182 lazy_initialize_conv();
184 descriptor
= conv_handles
[from
][to
];
186 if (descriptor
== (smb_iconv_t
)-1 || descriptor
== (smb_iconv_t
)0) {
187 /* conversion not supported, return -1*/
191 destlen
= MAX(srclen
, 512);
194 destlen
= destlen
* 2;
195 ob
= (char *)realloc(outbuf
, destlen
);
197 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
204 retval
= smb_iconv(descriptor
,
209 char *reason
="unknown error";
213 reason
="Incomplete multibyte sequence";
218 reason
="Illegal myltibyte sequence";
221 DEBUG(0,("Conversion error: %s(%s)\n",reason
,inbuf
));
222 /* smb_panic(reason); */
226 destlen
= destlen
- o_len
;
227 *dest
= (char *)Realloc(ob
,destlen
);
229 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
237 int unix_strupper(const char *src
, size_t srclen
, char *dest
, size_t destlen
)
240 smb_ucs2_t
*buffer
=(smb_ucs2_t
*)cvtbuf
;
241 size
=convert_string(CH_UNIX
, CH_UCS2
, src
, srclen
, buffer
, sizeof(cvtbuf
));
242 if (!strupper_w(buffer
) && (dest
== src
)) return srclen
;
243 return convert_string(CH_UCS2
, CH_UNIX
, buffer
, size
, dest
, destlen
);
246 int unix_strlower(const char *src
, size_t srclen
, char *dest
, size_t destlen
)
249 smb_ucs2_t
*buffer
=(smb_ucs2_t
*)cvtbuf
;
250 size
=convert_string(CH_UNIX
, CH_UCS2
, src
, srclen
, buffer
, sizeof(cvtbuf
));
251 if (!strlower_w(buffer
) && (dest
== src
)) return srclen
;
252 return convert_string(CH_UCS2
, CH_UNIX
, buffer
, size
, dest
, destlen
);
256 int ucs2_align(const void *base_ptr
, const void *p
, int flags
)
258 if (flags
& (STR_NOALIGN
|STR_ASCII
)) return 0;
259 return PTR_DIFF(p
, base_ptr
) & 1;
263 /****************************************************************************
264 copy a string from a char* unix src to a dos codepage string destination
265 return the number of bytes occupied by the string in the destination
267 STR_TERMINATE means include the null termination
268 STR_UPPER means uppercase in the destination
269 dest_len is the maximum length allowed in the destination. If dest_len
270 is -1 then no maxiumum is used
271 ****************************************************************************/
272 int push_ascii(void *dest
, const char *src
, int dest_len
, int flags
)
274 int src_len
= strlen(src
);
277 /* treat a pstring as "unlimited" length */
278 if (dest_len
== -1) {
279 dest_len
= sizeof(pstring
);
282 if (flags
& STR_UPPER
) {
283 pstrcpy(tmpbuf
, src
);
288 if (flags
& STR_TERMINATE
) {
292 return convert_string(CH_UNIX
, CH_DOS
, src
, src_len
, dest
, dest_len
);
295 int push_ascii_fstring(void *dest
, const char *src
)
297 return push_ascii(dest
, src
, sizeof(fstring
), STR_TERMINATE
);
300 int push_ascii_pstring(void *dest
, const char *src
)
302 return push_ascii(dest
, src
, sizeof(pstring
), STR_TERMINATE
);
305 int push_pstring(void *dest
, const char *src
)
307 return push_ascii(dest
, src
, sizeof(pstring
), STR_TERMINATE
);
311 /****************************************************************************
312 copy a string from a dos codepage source to a unix char* destination
314 STR_TERMINATE means the string in src is null terminated
315 if STR_TERMINATE is set then src_len is ignored
316 src_len is the length of the source area in bytes
317 return the number of bytes occupied by the string in src
318 the resulting string in "dest" is always null terminated
319 ****************************************************************************/
320 int pull_ascii(char *dest
, const void *src
, int dest_len
, int src_len
, int flags
)
324 if (dest_len
== -1) {
325 dest_len
= sizeof(pstring
);
328 if (flags
& STR_TERMINATE
) src_len
= strlen(src
)+1;
330 ret
= convert_string(CH_DOS
, CH_UNIX
, src
, src_len
, dest
, dest_len
);
332 if (dest_len
) dest
[MIN(ret
, dest_len
-1)] = 0;
337 int pull_ascii_pstring(char *dest
, const void *src
)
339 return pull_ascii(dest
, src
, sizeof(pstring
), -1, STR_TERMINATE
);
342 int pull_ascii_fstring(char *dest
, const void *src
)
344 return pull_ascii(dest
, src
, sizeof(fstring
), -1, STR_TERMINATE
);
347 /****************************************************************************
348 copy a string from a char* src to a unicode destination
349 return the number of bytes occupied by the string in the destination
351 STR_TERMINATE means include the null termination
352 STR_UPPER means uppercase in the destination
353 STR_NOALIGN means don't do alignment
354 dest_len is the maximum length allowed in the destination. If dest_len
355 is -1 then no maxiumum is used
356 ****************************************************************************/
357 int push_ucs2(const void *base_ptr
, void *dest
, const char *src
, int dest_len
, int flags
)
360 int src_len
= strlen(src
);
363 /* treat a pstring as "unlimited" length */
364 if (dest_len
== -1) {
365 dest_len
= sizeof(pstring
);
368 if (flags
& STR_UPPER
) {
369 pstrcpy(tmpbuf
, src
);
374 if (flags
& STR_TERMINATE
) {
378 if (ucs2_align(base_ptr
, dest
, flags
)) {
380 dest
= (void *)((char *)dest
+ 1);
381 if (dest_len
) dest_len
--;
385 /* ucs2 is always a multiple of 2 bytes */
388 len
+= convert_string(CH_UNIX
, CH_UCS2
, src
, src_len
, dest
, dest_len
);
393 /****************************************************************************
394 copy a string from a ucs2 source to a unix char* destination
396 STR_TERMINATE means the string in src is null terminated
397 STR_NOALIGN means don't try to align
398 if STR_TERMINATE is set then src_len is ignored
399 src_len is the length of the source area in bytes
400 return the number of bytes occupied by the string in src
401 the resulting string in "dest" is always null terminated
402 ****************************************************************************/
403 int pull_ucs2(const void *base_ptr
, char *dest
, const void *src
, int dest_len
, int src_len
, int flags
)
407 if (dest_len
== -1) {
408 dest_len
= sizeof(pstring
);
411 if (ucs2_align(base_ptr
, src
, flags
)) {
412 src
= (const void *)((const char *)src
+ 1);
413 if (src_len
> 0) src_len
--;
416 if (flags
& STR_TERMINATE
) src_len
= strlen_w(src
)*2+2;
418 /* ucs2 is always a multiple of 2 bytes */
421 ret
= convert_string(CH_UCS2
, CH_UNIX
, src
, src_len
, dest
, dest_len
);
422 if (dest_len
) dest
[MIN(ret
, dest_len
-1)] = 0;
427 int pull_ucs2_pstring(char *dest
, const void *src
)
429 return pull_ucs2(NULL
, dest
, src
, sizeof(pstring
), -1, STR_TERMINATE
);
432 int pull_ucs2_fstring(char *dest
, const void *src
)
434 return pull_ucs2(NULL
, dest
, src
, sizeof(fstring
), -1, STR_TERMINATE
);
438 /****************************************************************************
439 copy a string from a char* src to a unicode or ascii
440 dos codepage destination choosing unicode or ascii based on the
441 flags in the SMB buffer starting at base_ptr
442 return the number of bytes occupied by the string in the destination
444 STR_TERMINATE means include the null termination
445 STR_UPPER means uppercase in the destination
446 STR_ASCII use ascii even with unicode packet
447 STR_NOALIGN means don't do alignment
448 dest_len is the maximum length allowed in the destination. If dest_len
449 is -1 then no maxiumum is used
450 ****************************************************************************/
451 int push_string(const void *base_ptr
, void *dest
, const char *src
, int dest_len
, int flags
)
453 if (!(flags
& STR_ASCII
) && \
454 ((flags
& STR_UNICODE
|| \
455 (SVAL(base_ptr
, smb_flg2
) & FLAGS2_UNICODE_STRINGS
)))) {
456 return push_ucs2(base_ptr
, dest
, src
, dest_len
, flags
);
458 return push_ascii(dest
, src
, dest_len
, flags
);
462 /****************************************************************************
463 copy a string from a unicode or ascii source (depending on
464 the packet flags) to a char* destination
466 STR_TERMINATE means the string in src is null terminated
467 STR_UNICODE means to force as unicode
468 STR_ASCII use ascii even with unicode packet
469 STR_NOALIGN means don't do alignment
470 if STR_TERMINATE is set then src_len is ignored
471 src_len is the length of the source area in bytes
472 return the number of bytes occupied by the string in src
473 the resulting string in "dest" is always null terminated
474 ****************************************************************************/
475 int pull_string(const void *base_ptr
, char *dest
, const void *src
, int dest_len
, int src_len
,
478 if (!(flags
& STR_ASCII
) && \
479 ((flags
& STR_UNICODE
|| \
480 (SVAL(base_ptr
, smb_flg2
) & FLAGS2_UNICODE_STRINGS
)))) {
481 return pull_ucs2(base_ptr
, dest
, src
, dest_len
, src_len
, flags
);
483 return pull_ascii(dest
, src
, dest_len
, src_len
, flags
);
486 int align_string(const void *base_ptr
, const char *p
, int flags
)
488 if (!(flags
& STR_ASCII
) && \
489 ((flags
& STR_UNICODE
|| \
490 (SVAL(base_ptr
, smb_flg2
) & FLAGS2_UNICODE_STRINGS
)))) {
491 return ucs2_align(base_ptr
, p
, flags
);
498 /****************************************************************************
499 convert from ucs2 to unix charset and return the
500 allocated and converted string or NULL if an error occurred.
501 you must provide a zero terminated string.
502 the returning string will be zero terminated.
503 ****************************************************************************/
504 char *acnv_u2ux(const smb_ucs2_t
*src
)
510 slen
= (strlen_w(src
) + 1) * sizeof(smb_ucs2_t
);
511 dlen
= convert_string_allocate(CH_UCS2
, CH_UNIX
, src
, slen
, &dest
);
512 if (dlen
== -1) return NULL
;
516 /****************************************************************************
517 convert from unix to ucs2 charset and return the
518 allocated and converted string or NULL if an error occurred.
519 you must provide a zero terminated string.
520 the returning string will be zero terminated.
521 ****************************************************************************/
522 smb_ucs2_t
*acnv_uxu2(const char *src
)
528 slen
= strlen(src
) + 1;
529 dlen
= convert_string_allocate(CH_UNIX
, CH_UCS2
, src
, slen
, &dest
);
530 if (dlen
== -1) return NULL
;
534 /****************************************************************************
535 convert from ucs2 to dos charset and return the
536 allocated and converted string or NULL if an error occurred.
537 you must provide a zero terminated string.
538 the returning string will be zero terminated.
539 ****************************************************************************/
540 char *acnv_u2dos(const smb_ucs2_t
*src
)
546 slen
= (strlen_w(src
) + 1) * sizeof(smb_ucs2_t
);
547 dlen
= convert_string_allocate(CH_UCS2
, CH_DOS
, src
, slen
, &dest
);
548 if (dlen
== -1) return NULL
;
552 /****************************************************************************
553 convert from dos to ucs2 charset and return the
554 allocated and converted string or NULL if an error occurred.
555 you must provide a zero terminated string.
556 the returning string will be zero terminated.
557 ****************************************************************************/
558 smb_ucs2_t
*acnv_dosu2(const char *src
)
564 slen
= strlen(src
) + 1;
565 dlen
= convert_string_allocate(CH_DOS
, CH_UCS2
, src
, slen
, &dest
);
566 if (dlen
== -1) return NULL
;