2 Unix SMB/CIFS implementation.
3 Character set conversion Extensions
4 Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5 Copyright (C) Andrew Tridgell 2001
6 Copyright (C) Simo Sorce 2001
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 static pstring cvtbuf
;
27 static smb_iconv_t conv_handles
[NUM_CHARSETS
][NUM_CHARSETS
];
29 /****************************************************************************
30 return the name of a charset to give to iconv()
31 ****************************************************************************/
32 static char *charset_name(charset_t ch
)
36 if (ch
== CH_UCS2
) ret
= "UCS-2LE";
37 else if (ch
== CH_UNIX
) ret
= lp_unix_charset();
38 else if (ch
== CH_DOS
) ret
= lp_dos_charset();
39 else if (ch
== CH_DISPLAY
) ret
= lp_display_charset();
40 else if (ch
== CH_UTF8
) ret
= "UTF8";
42 if (!ret
|| !*ret
) ret
= "ASCII";
47 static void lazy_initialize_conv(void)
49 static int initialized
= False
;
59 /****************************************************************************
60 Initialize iconv conversion descriptors
61 ****************************************************************************/
65 BOOL did_reload
= False
;
67 /* so that charset_name() works we need to get the UNIX<->UCS2 going
69 if (!conv_handles
[CH_UNIX
][CH_UCS2
]) {
70 conv_handles
[CH_UNIX
][CH_UCS2
] = smb_iconv_open("UCS-2LE", "ASCII");
72 if (!conv_handles
[CH_UCS2
][CH_UNIX
]) {
73 conv_handles
[CH_UCS2
][CH_UNIX
] = smb_iconv_open("ASCII", "UCS-2LE");
77 for (c1
=0;c1
<NUM_CHARSETS
;c1
++) {
78 for (c2
=0;c2
<NUM_CHARSETS
;c2
++) {
79 char *n1
= charset_name((charset_t
)c1
);
80 char *n2
= charset_name((charset_t
)c2
);
81 if (conv_handles
[c1
][c2
] &&
82 strcmp(n1
, conv_handles
[c1
][c2
]->from_name
) == 0 &&
83 strcmp(n2
, conv_handles
[c1
][c2
]->to_name
) == 0) continue;
87 if (conv_handles
[c1
][c2
]) {
88 smb_iconv_close(conv_handles
[c1
][c2
]);
90 conv_handles
[c1
][c2
] = smb_iconv_open(n2
,n1
);
91 if (conv_handles
[c1
][c2
] == (smb_iconv_t
)-1) {
92 DEBUG(0,("Conversion from %s to %s not supported\n",
93 charset_name((charset_t
)c1
), charset_name((charset_t
)c2
)));
94 conv_handles
[c1
][c2
] = NULL
;
105 * Convert string from one encoding to another, making error checking etc
107 * @param descriptor conversion descriptor, created in init_iconv()
108 * @param src pointer to source string (multibyte or singlebyte)
109 * @param srclen length of the source string in bytes
110 * @param dest pointer to destination string (multibyte or singlebyte)
111 * @param destlen maximal length allowed for string
112 * @retval the number of bytes occupied in the destination
114 size_t convert_string(charset_t from
, charset_t to
,
115 void const *src
, size_t srclen
,
116 void *dest
, size_t destlen
)
120 const char* inbuf
= (const char*)src
;
121 char* outbuf
= (char*)dest
;
122 smb_iconv_t descriptor
;
124 if (srclen
== -1) srclen
= strlen(src
)+1;
126 lazy_initialize_conv();
128 descriptor
= conv_handles
[from
][to
];
130 if (descriptor
== (smb_iconv_t
)-1 || descriptor
== (smb_iconv_t
)0) {
131 /* conversion not supported, use as is */
132 int len
= MIN(srclen
,destlen
);
133 memcpy(dest
,src
,len
);
139 retval
= smb_iconv(descriptor
, &inbuf
, &i_len
, &outbuf
, &o_len
);
142 char *reason
="unknown error";
144 { case EINVAL
: reason
="Incomplete multibyte sequence"; break;
145 case E2BIG
: reason
="No more room";
146 DEBUG(0, ("convert_string: Required %d, available %d\n",
148 /* we are not sure we need srclen bytes,
149 may be more, may be less.
150 We only know we need more than destlen
155 case EILSEQ
: reason
="Illegal multibyte sequence"; break;
157 /* smb_panic(reason); */
159 return destlen
-o_len
;
163 * Convert between character sets, allocating a new buffer for the result.
165 * @param srclen length of source buffer.
166 * @param dest always set at least to NULL
167 * @note -1 is not accepted for srclen.
169 * @retval Size in bytes of the converted string; or -1 in case of error.
171 size_t convert_string_allocate(charset_t from
, charset_t to
,
172 void const *src
, size_t srclen
, void **dest
)
174 size_t i_len
, o_len
, destlen
;
176 const char *inbuf
= (const char *)src
;
178 smb_iconv_t descriptor
;
182 if (src
== NULL
|| srclen
== -1) return -1;
184 lazy_initialize_conv();
186 descriptor
= conv_handles
[from
][to
];
188 if (descriptor
== (smb_iconv_t
)-1 || descriptor
== (smb_iconv_t
)0) {
189 /* conversion not supported, return -1*/
190 DEBUG(3, ("convert_string_allocate: conversion not supported!\n"));
194 destlen
= MAX(srclen
, 512);
197 destlen
= destlen
* 2;
198 ob
= (char *)realloc(outbuf
, destlen
);
200 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
207 retval
= smb_iconv(descriptor
,
212 char *reason
="unknown error";
216 reason
="Incomplete multibyte sequence";
221 reason
="Illegal multibyte sequence";
224 DEBUG(0,("Conversion error: %s(%s)\n",reason
,inbuf
));
225 /* smb_panic(reason); */
229 destlen
= destlen
- o_len
;
230 *dest
= (char *)Realloc(ob
,destlen
);
232 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
241 * Convert between character sets, allocating a new buffer using talloc for the result.
243 * @param srclen length of source buffer.
244 * @param dest always set at least to NULL
245 * @note -1 is not accepted for srclen.
247 * @retval Size in bytes of the converted string; or -1 in case of error.
249 size_t convert_string_talloc(TALLOC_CTX
*ctx
, charset_t from
, charset_t to
,
250 void const *src
, size_t srclen
, void **dest
)
252 void *alloced_string
;
256 dest_len
=convert_string_allocate(from
, to
, src
, srclen
, &alloced_string
);
259 *dest
= talloc_memdup(ctx
, alloced_string
, dest_len
);
260 SAFE_FREE(alloced_string
);
266 int unix_strupper(const char *src
, size_t srclen
, char *dest
, size_t destlen
)
269 smb_ucs2_t
*buffer
=(smb_ucs2_t
*)cvtbuf
;
270 size
=convert_string(CH_UNIX
, CH_UCS2
, src
, srclen
, buffer
, sizeof(cvtbuf
));
271 if (!strupper_w(buffer
) && (dest
== src
)) return srclen
;
272 return convert_string(CH_UCS2
, CH_UNIX
, buffer
, size
, dest
, destlen
);
275 int unix_strlower(const char *src
, size_t srclen
, char *dest
, size_t destlen
)
278 smb_ucs2_t
*buffer
=(smb_ucs2_t
*)cvtbuf
;
279 size
=convert_string(CH_UNIX
, CH_UCS2
, src
, srclen
, buffer
, sizeof(cvtbuf
));
280 if (!strlower_w(buffer
) && (dest
== src
)) return srclen
;
281 return convert_string(CH_UCS2
, CH_UNIX
, buffer
, size
, dest
, destlen
);
285 int ucs2_align(const void *base_ptr
, const void *p
, int flags
)
287 if (flags
& (STR_NOALIGN
|STR_ASCII
)) return 0;
288 return PTR_DIFF(p
, base_ptr
) & 1;
292 /****************************************************************************
293 copy a string from a char* unix src to a dos codepage string destination
294 return the number of bytes occupied by the string in the destination
296 STR_TERMINATE means include the null termination
297 STR_UPPER means uppercase in the destination
298 dest_len is the maximum length allowed in the destination. If dest_len
299 is -1 then no maxiumum is used
300 ****************************************************************************/
301 int push_ascii(void *dest
, const char *src
, int dest_len
, int flags
)
303 int src_len
= strlen(src
);
306 /* treat a pstring as "unlimited" length */
307 if (dest_len
== -1) {
308 dest_len
= sizeof(pstring
);
311 if (flags
& STR_UPPER
) {
312 pstrcpy(tmpbuf
, src
);
317 if (flags
& STR_TERMINATE
) {
321 return convert_string(CH_UNIX
, CH_DOS
, src
, src_len
, dest
, dest_len
);
324 int push_ascii_fstring(void *dest
, const char *src
)
326 return push_ascii(dest
, src
, sizeof(fstring
), STR_TERMINATE
);
329 int push_ascii_pstring(void *dest
, const char *src
)
331 return push_ascii(dest
, src
, sizeof(pstring
), STR_TERMINATE
);
334 int push_pstring(void *dest
, const char *src
)
336 return push_ascii(dest
, src
, sizeof(pstring
), STR_TERMINATE
);
340 /****************************************************************************
341 copy a string from a dos codepage source to a unix char* destination
343 STR_TERMINATE means the string in src is null terminated
344 if STR_TERMINATE is set then src_len is ignored
345 src_len is the length of the source area in bytes
346 return the number of bytes occupied by the string in src
347 the resulting string in "dest" is always null terminated
348 ****************************************************************************/
349 int pull_ascii(char *dest
, const void *src
, int dest_len
, int src_len
, int flags
)
353 if (dest_len
== -1) {
354 dest_len
= sizeof(pstring
);
357 if (flags
& STR_TERMINATE
) {
359 src_len
= strlen(src
) + 1;
361 int len
= strnlen(src
, src_len
);
362 if (len
< src_len
) len
++;
367 ret
= convert_string(CH_DOS
, CH_UNIX
, src
, src_len
, dest
, dest_len
);
369 if (dest_len
) dest
[MIN(ret
, dest_len
-1)] = 0;
374 int pull_ascii_pstring(char *dest
, const void *src
)
376 return pull_ascii(dest
, src
, sizeof(pstring
), -1, STR_TERMINATE
);
379 int pull_ascii_fstring(char *dest
, const void *src
)
381 return pull_ascii(dest
, src
, sizeof(fstring
), -1, STR_TERMINATE
);
384 /****************************************************************************
385 copy a string from a char* src to a unicode destination
386 return the number of bytes occupied by the string in the destination
388 STR_TERMINATE means include the null termination
389 STR_UPPER means uppercase in the destination
390 STR_NOALIGN means don't do alignment
391 dest_len is the maximum length allowed in the destination. If dest_len
392 is -1 then no maxiumum is used
393 ****************************************************************************/
394 int push_ucs2(const void *base_ptr
, void *dest
, const char *src
, int dest_len
, int flags
)
397 int src_len
= strlen(src
);
400 /* treat a pstring as "unlimited" length */
401 if (dest_len
== -1) {
402 dest_len
= sizeof(pstring
);
405 if (flags
& STR_UPPER
) {
406 pstrcpy(tmpbuf
, src
);
411 if (flags
& STR_TERMINATE
) {
415 if (ucs2_align(base_ptr
, dest
, flags
)) {
417 dest
= (void *)((char *)dest
+ 1);
418 if (dest_len
) dest_len
--;
422 /* ucs2 is always a multiple of 2 bytes */
425 len
+= convert_string(CH_UNIX
, CH_UCS2
, src
, src_len
, dest
, dest_len
);
430 * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer using talloc
432 * @param dest always set at least to NULL
434 * @retval The number of bytes occupied by the string in the destination
435 * or -1 in case of error.
437 int push_ucs2_talloc(TALLOC_CTX
*ctx
, smb_ucs2_t
**dest
, const char *src
)
439 int src_len
= strlen(src
)+1;
442 return convert_string_talloc(ctx
, CH_UNIX
, CH_UCS2
, src
, src_len
, (void **)dest
);
446 * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
448 * @param dest always set at least to NULL
450 * @retval The number of bytes occupied by the string in the destination
451 * or -1 in case of error.
453 int push_ucs2_allocate(smb_ucs2_t
**dest
, const char *src
)
455 int src_len
= strlen(src
)+1;
458 return convert_string_allocate(CH_UNIX
, CH_UCS2
, src
, src_len
, (void **)dest
);
461 /****************************************************************************
462 copy a string from a char* src to a UTF-8 destination
463 return the number of bytes occupied by the string in the destination
465 STR_TERMINATE means include the null termination
466 STR_UPPER means uppercase in the destination
467 dest_len is the maximum length allowed in the destination. If dest_len
468 is -1 then no maxiumum is used
469 ****************************************************************************/
470 int push_utf8(void *dest
, const char *src
, int dest_len
, int flags
)
472 int src_len
= strlen(src
);
475 /* treat a pstring as "unlimited" length */
476 if (dest_len
== -1) {
477 dest_len
= sizeof(pstring
);
480 if (flags
& STR_UPPER
) {
481 pstrcpy(tmpbuf
, src
);
486 if (flags
& STR_TERMINATE
) {
490 return convert_string(CH_UNIX
, CH_UTF8
, src
, src_len
, dest
, dest_len
);
493 int push_utf8_fstring(void *dest
, const char *src
)
495 return push_utf8(dest
, src
, sizeof(fstring
), STR_TERMINATE
);
498 int push_utf8_pstring(void *dest
, const char *src
)
500 return push_utf8(dest
, src
, sizeof(pstring
), STR_TERMINATE
);
504 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
506 * @param dest always set at least to NULL
508 * @retval The number of bytes occupied by the string in the destination
510 int push_utf8_talloc(TALLOC_CTX
*ctx
, char **dest
, const char *src
)
512 int src_len
= strlen(src
)+1;
515 return convert_string_talloc(ctx
, CH_UNIX
, CH_UTF8
, src
, src_len
, (void**)dest
);
519 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
521 * @param dest always set at least to NULL
523 * @retval The number of bytes occupied by the string in the destination
525 int push_utf8_allocate(void **dest
, const char *src
)
527 int src_len
= strlen(src
)+1;
530 return convert_string_allocate(CH_UNIX
, CH_UTF8
, src
, src_len
, dest
);
533 /****************************************************************************
534 copy a string from a ucs2 source to a unix char* destination
536 STR_TERMINATE means the string in src is null terminated
537 STR_NOALIGN means don't try to align
538 if STR_TERMINATE is set then src_len is ignored if it is -1
539 src_len is the length of the source area in bytes
540 return the number of bytes occupied by the string in src
541 the resulting string in "dest" is always null terminated
542 ****************************************************************************/
543 int pull_ucs2(const void *base_ptr
, char *dest
, const void *src
, int dest_len
, int src_len
, int flags
)
547 if (dest_len
== -1) {
548 dest_len
= sizeof(pstring
);
551 if (ucs2_align(base_ptr
, src
, flags
)) {
552 src
= (const void *)((const char *)src
+ 1);
553 if (src_len
> 0) src_len
--;
556 if (flags
& STR_TERMINATE
) {
558 src_len
= strlen_w(src
)*2 + 2;
560 int len
= strnlen_w(src
, src_len
/2);
561 if (len
< src_len
/2) len
++;
566 /* ucs2 is always a multiple of 2 bytes */
570 ret
= convert_string(CH_UCS2
, CH_UNIX
, src
, src_len
, dest
, dest_len
);
571 if (dest_len
) dest
[MIN(ret
, dest_len
-1)] = 0;
576 int pull_ucs2_pstring(char *dest
, const void *src
)
578 return pull_ucs2(NULL
, dest
, src
, sizeof(pstring
), -1, STR_TERMINATE
);
581 int pull_ucs2_fstring(char *dest
, const void *src
)
583 return pull_ucs2(NULL
, dest
, src
, sizeof(fstring
), -1, STR_TERMINATE
);
587 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
589 * @param dest always set at least to NULL
591 * @retval The number of bytes occupied by the string in the destination
593 int pull_ucs2_talloc(TALLOC_CTX
*ctx
, void **dest
, const char *src
)
595 int src_len
= strlen(src
)+1;
597 return convert_string_talloc(ctx
, CH_UCS2
, CH_UNIX
, src
, src_len
, dest
);
601 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
603 * @param dest always set at least to NULL
605 * @retval The number of bytes occupied by the string in the destination
607 int pull_ucs2_allocate(void **dest
, const char *src
)
609 int src_len
= strlen(src
)+1;
611 return convert_string_allocate(CH_UCS2
, CH_UNIX
, src
, src_len
, dest
);
614 /****************************************************************************
615 copy a string from a utf-8 source to a unix char* destination
617 STR_TERMINATE means the string in src is null terminated
618 if STR_TERMINATE is set then src_len is ignored
619 src_len is the length of the source area in bytes
620 return the number of bytes occupied by the string in src
621 the resulting string in "dest" is always null terminated
622 ****************************************************************************/
623 int pull_utf8(char *dest
, const void *src
, int dest_len
, int src_len
, int flags
)
627 if (dest_len
== -1) {
628 dest_len
= sizeof(pstring
);
631 if (flags
& STR_TERMINATE
) {
633 src_len
= strlen(src
) + 1;
635 int len
= strnlen(src
, src_len
);
636 if (len
< src_len
) len
++;
641 ret
= convert_string(CH_UTF8
, CH_UNIX
, src
, src_len
, dest
, dest_len
);
642 if (dest_len
) dest
[MIN(ret
, dest_len
-1)] = 0;
647 int pull_utf8_pstring(char *dest
, const void *src
)
649 return pull_utf8(dest
, src
, sizeof(pstring
), -1, STR_TERMINATE
);
652 int pull_utf8_fstring(char *dest
, const void *src
)
654 return pull_utf8(dest
, src
, sizeof(fstring
), -1, STR_TERMINATE
);
658 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
660 * @param dest always set at least to NULL
662 * @retval The number of bytes occupied by the string in the destination
664 int pull_utf8_talloc(TALLOC_CTX
*ctx
, char **dest
, const char *src
)
666 int src_len
= strlen(src
)+1;
668 return convert_string_talloc(ctx
, CH_UTF8
, CH_UNIX
, src
, src_len
, (void **)dest
);
672 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
674 * @param dest always set at least to NULL
676 * @retval The number of bytes occupied by the string in the destination
678 int pull_utf8_allocate(void **dest
, const char *src
)
680 int src_len
= strlen(src
)+1;
682 return convert_string_allocate(CH_UTF8
, CH_UNIX
, src
, src_len
, dest
);
685 /****************************************************************************
686 copy a string from a char* src to a unicode or ascii
687 dos codepage destination choosing unicode or ascii based on the
688 flags in the SMB buffer starting at base_ptr
689 return the number of bytes occupied by the string in the destination
691 STR_TERMINATE means include the null termination
692 STR_UPPER means uppercase in the destination
693 STR_ASCII use ascii even with unicode packet
694 STR_NOALIGN means don't do alignment
695 dest_len is the maximum length allowed in the destination. If dest_len
696 is -1 then no maxiumum is used
697 ****************************************************************************/
698 int push_string(const void *base_ptr
, void *dest
, const char *src
, int dest_len
, int flags
)
700 if (!(flags
& STR_ASCII
) && \
701 ((flags
& STR_UNICODE
|| \
702 (SVAL(base_ptr
, smb_flg2
) & FLAGS2_UNICODE_STRINGS
)))) {
703 return push_ucs2(base_ptr
, dest
, src
, dest_len
, flags
);
705 return push_ascii(dest
, src
, dest_len
, flags
);
709 /****************************************************************************
710 copy a string from a unicode or ascii source (depending on
711 the packet flags) to a char* destination
713 STR_TERMINATE means the string in src is null terminated
714 STR_UNICODE means to force as unicode
715 STR_ASCII use ascii even with unicode packet
716 STR_NOALIGN means don't do alignment
717 if STR_TERMINATE is set then src_len is ignored is it is -1
718 src_len is the length of the source area in bytes
719 return the number of bytes occupied by the string in src
720 the resulting string in "dest" is always null terminated
721 ****************************************************************************/
722 int pull_string(const void *base_ptr
, char *dest
, const void *src
, int dest_len
, int src_len
,
725 if (!(flags
& STR_ASCII
) && \
726 ((flags
& STR_UNICODE
|| \
727 (SVAL(base_ptr
, smb_flg2
) & FLAGS2_UNICODE_STRINGS
)))) {
728 return pull_ucs2(base_ptr
, dest
, src
, dest_len
, src_len
, flags
);
730 return pull_ascii(dest
, src
, dest_len
, src_len
, flags
);
733 int align_string(const void *base_ptr
, const char *p
, int flags
)
735 if (!(flags
& STR_ASCII
) && \
736 ((flags
& STR_UNICODE
|| \
737 (SVAL(base_ptr
, smb_flg2
) & FLAGS2_UNICODE_STRINGS
)))) {
738 return ucs2_align(base_ptr
, p
, flags
);
745 /****************************************************************************
746 convert from ucs2 to unix charset and return the
747 allocated and converted string or NULL if an error occurred.
748 you must provide a zero terminated string.
749 the returning string will be zero terminated.
750 ****************************************************************************/
751 char *acnv_u2ux(const smb_ucs2_t
*src
)
757 slen
= (strlen_w(src
) + 1) * sizeof(smb_ucs2_t
);
758 dlen
= convert_string_allocate(CH_UCS2
, CH_UNIX
, src
, slen
, &dest
);
759 if (dlen
== -1) return NULL
;
763 /****************************************************************************
764 convert from unix to ucs2 charset and return the
765 allocated and converted string or NULL if an error occurred.
766 you must provide a zero terminated string.
767 the returning string will be zero terminated.
768 ****************************************************************************/
769 smb_ucs2_t
*acnv_uxu2(const char *src
)
775 slen
= strlen(src
) + 1;
776 dlen
= convert_string_allocate(CH_UNIX
, CH_UCS2
, src
, slen
, &dest
);
777 if (dlen
== -1) return NULL
;
781 /****************************************************************************
782 convert from ucs2 to dos charset and return the
783 allocated and converted string or NULL if an error occurred.
784 you must provide a zero terminated string.
785 the returning string will be zero terminated.
786 ****************************************************************************/
787 char *acnv_u2dos(const smb_ucs2_t
*src
)
793 slen
= (strlen_w(src
) + 1) * sizeof(smb_ucs2_t
);
794 dlen
= convert_string_allocate(CH_UCS2
, CH_DOS
, src
, slen
, &dest
);
795 if (dlen
== -1) return NULL
;
799 /****************************************************************************
800 convert from dos to ucs2 charset and return the
801 allocated and converted string or NULL if an error occurred.
802 you must provide a zero terminated string.
803 the returning string will be zero terminated.
804 ****************************************************************************/
805 smb_ucs2_t
*acnv_dosu2(const char *src
)
811 slen
= strlen(src
) + 1;
812 dlen
= convert_string_allocate(CH_DOS
, CH_UCS2
, src
, slen
, &dest
);
813 if (dlen
== -1) return NULL
;