2 Unix SMB/CIFS implementation.
3 Character set conversion Extensions
4 Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5 Copyright (C) Andrew Tridgell 2001
6 Copyright (C) Simo Sorce 2001
7 Copyright (C) Martin Pool 2003
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
25 /* We can parameterize this if someone complains.... JRA. */
27 char lp_failed_convert_char(void)
35 * @brief Character-set conversion routines built on our iconv.
37 * @note Samba's internal character set (at least in the 3.0 series)
38 * is always the same as the one for the Unix filesystem. It is
39 * <b>not</b> necessarily UTF-8 and may be different on machines that
40 * need i18n filenames to be compatible with Unix software. It does
41 * have to be a superset of ASCII. All multibyte sequences must start
42 * with a byte with the high bit set.
48 static smb_iconv_t conv_handles
[NUM_CHARSETS
][NUM_CHARSETS
];
49 static bool conv_silent
; /* Should we do a debug if the conversion fails ? */
50 static bool initialized
;
53 * Return the name of a charset to give to iconv().
55 static const char *charset_name(charset_t ch
)
57 const char *ret
= NULL
;
59 if (ch
== CH_UTF16LE
) ret
= "UTF-16LE";
60 else if (ch
== CH_UTF16BE
) ret
= "UTF-16BE";
61 else if (ch
== CH_UNIX
) ret
= lp_unix_charset();
62 else if (ch
== CH_DOS
) ret
= lp_dos_charset();
63 else if (ch
== CH_DISPLAY
) ret
= lp_display_charset();
64 else if (ch
== CH_UTF8
) ret
= "UTF8";
66 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)
67 if (ret
&& !strcmp(ret
, "LOCALE")) {
68 const char *ln
= NULL
;
71 setlocale(LC_ALL
, "");
73 ln
= nl_langinfo(CODESET
);
75 /* Check whether the charset name is supported
77 smb_iconv_t handle
= smb_iconv_open(ln
,"UCS-2LE");
78 if (handle
== (smb_iconv_t
) -1) {
79 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln
));
82 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln
));
83 smb_iconv_close(handle
);
90 if (!ret
|| !*ret
) ret
= "ASCII";
94 void lazy_initialize_conv(void)
104 * Destroy global objects allocated by init_iconv()
106 void gfree_charcnv(void)
110 for (c1
=0;c1
<NUM_CHARSETS
;c1
++) {
111 for (c2
=0;c2
<NUM_CHARSETS
;c2
++) {
112 if ( conv_handles
[c1
][c2
] ) {
113 smb_iconv_close( conv_handles
[c1
][c2
] );
114 conv_handles
[c1
][c2
] = 0;
122 * Initialize iconv conversion descriptors.
124 * This is called the first time it is needed, and also called again
125 * every time the configuration is reloaded, because the charset or
126 * codepage might have changed.
128 void init_iconv(void)
131 bool did_reload
= False
;
133 /* so that charset_name() works we need to get the UNIX<->UCS2 going
135 if (!conv_handles
[CH_UNIX
][CH_UTF16LE
])
136 conv_handles
[CH_UNIX
][CH_UTF16LE
] = smb_iconv_open(charset_name(CH_UTF16LE
), "ASCII");
138 if (!conv_handles
[CH_UTF16LE
][CH_UNIX
])
139 conv_handles
[CH_UTF16LE
][CH_UNIX
] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE
));
141 for (c1
=0;c1
<NUM_CHARSETS
;c1
++) {
142 for (c2
=0;c2
<NUM_CHARSETS
;c2
++) {
143 const char *n1
= charset_name((charset_t
)c1
);
144 const char *n2
= charset_name((charset_t
)c2
);
145 if (conv_handles
[c1
][c2
] &&
146 strcmp(n1
, conv_handles
[c1
][c2
]->from_name
) == 0 &&
147 strcmp(n2
, conv_handles
[c1
][c2
]->to_name
) == 0)
152 if (conv_handles
[c1
][c2
])
153 smb_iconv_close(conv_handles
[c1
][c2
]);
155 conv_handles
[c1
][c2
] = smb_iconv_open(n2
,n1
);
156 if (conv_handles
[c1
][c2
] == (smb_iconv_t
)-1) {
157 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
158 charset_name((charset_t
)c1
), charset_name((charset_t
)c2
)));
159 if (c1
!= CH_UTF16LE
&& c1
!= CH_UTF16BE
) {
162 if (c2
!= CH_UTF16LE
&& c2
!= CH_UTF16BE
) {
165 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
167 conv_handles
[c1
][c2
] = smb_iconv_open(n2
,n1
);
168 if (!conv_handles
[c1
][c2
]) {
169 DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1
, n2
));
170 smb_panic("init_iconv: conv_handle initialization failed");
177 /* XXX: Does this really get called every time the dos
178 * codepage changes? */
179 /* XXX: Is the did_reload test too strict? */
187 * Convert string from one encoding to another, making error checking etc
188 * Slow path version - uses (slow) iconv.
190 * @param src pointer to source string (multibyte or singlebyte)
191 * @param srclen length of the source string in bytes
192 * @param dest pointer to destination string (multibyte or singlebyte)
193 * @param destlen maximal length allowed for string
194 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
195 * @returns the number of bytes occupied in the destination
197 * Ensure the srclen contains the terminating zero.
201 static size_t convert_string_internal(charset_t from
, charset_t to
,
202 void const *src
, size_t srclen
,
203 void *dest
, size_t destlen
, bool allow_bad_conv
)
207 const char* inbuf
= (const char*)src
;
208 char* outbuf
= (char*)dest
;
209 smb_iconv_t descriptor
;
211 lazy_initialize_conv();
213 descriptor
= conv_handles
[from
][to
];
215 if (srclen
== (size_t)-1) {
216 if (from
== CH_UTF16LE
|| from
== CH_UTF16BE
) {
217 srclen
= (strlen_w((const smb_ucs2_t
*)src
)+1) * 2;
219 srclen
= strlen((const char *)src
)+1;
224 if (descriptor
== (smb_iconv_t
)-1 || descriptor
== (smb_iconv_t
)0) {
226 DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
235 retval
= smb_iconv(descriptor
, &inbuf
, &i_len
, &outbuf
, &o_len
);
236 if(retval
==(size_t)-1) {
237 const char *reason
="unknown error";
240 reason
="Incomplete multibyte sequence";
242 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason
,inbuf
));
247 reason
="No more room";
249 if (from
== CH_UNIX
) {
250 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
251 charset_name(from
), charset_name(to
),
252 (unsigned int)srclen
, (unsigned int)destlen
, (const char *)src
));
254 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
255 charset_name(from
), charset_name(to
),
256 (unsigned int)srclen
, (unsigned int)destlen
));
261 reason
="Illegal multibyte sequence";
263 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason
,inbuf
));
269 DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason
,inbuf
));
272 /* smb_panic(reason); */
274 return destlen
-o_len
;
279 * Conversion not supported. This is actually an error, but there are so
280 * many misconfigured iconv systems and smb.conf's out there we can't just
281 * fail. Do a very bad conversion instead.... JRA.
285 if (o_len
== 0 || i_len
== 0)
286 return destlen
- o_len
;
288 if (((from
== CH_UTF16LE
)||(from
== CH_UTF16BE
)) &&
289 ((to
!= CH_UTF16LE
)||(to
!= CH_UTF16BE
))) {
290 /* Can't convert from utf16 any endian to multibyte.
291 Replace with the default fail char.
294 return destlen
- o_len
;
296 *outbuf
= lp_failed_convert_char();
305 if (o_len
== 0 || i_len
== 0)
306 return destlen
- o_len
;
308 /* Keep trying with the next char... */
311 } else if (from
!= CH_UTF16LE
&& from
!= CH_UTF16BE
&& to
== CH_UTF16LE
) {
312 /* Can't convert to UTF16LE - just widen by adding the
313 default fail char then zero.
316 return destlen
- o_len
;
318 outbuf
[0] = lp_failed_convert_char();
327 if (o_len
== 0 || i_len
== 0)
328 return destlen
- o_len
;
330 /* Keep trying with the next char... */
333 } else if (from
!= CH_UTF16LE
&& from
!= CH_UTF16BE
&&
334 to
!= CH_UTF16LE
&& to
!= CH_UTF16BE
) {
335 /* Failed multibyte to multibyte. Just copy the default fail char and
337 outbuf
[0] = lp_failed_convert_char();
345 if (o_len
== 0 || i_len
== 0)
346 return destlen
- o_len
;
348 /* Keep trying with the next char... */
352 /* Keep compiler happy.... */
353 return destlen
- o_len
;
359 * Convert string from one encoding to another, making error checking etc
360 * Fast path version - handles ASCII first.
362 * @param src pointer to source string (multibyte or singlebyte)
363 * @param srclen length of the source string in bytes, or -1 for nul terminated.
364 * @param dest pointer to destination string (multibyte or singlebyte)
365 * @param destlen maximal length allowed for string - *NEVER* -1.
366 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
367 * @returns the number of bytes occupied in the destination
369 * Ensure the srclen contains the terminating zero.
371 * This function has been hand-tuned to provide a fast path.
372 * Don't change unless you really know what you are doing. JRA.
375 size_t convert_string(charset_t from
, charset_t to
,
376 void const *src
, size_t srclen
,
377 void *dest
, size_t destlen
, bool allow_bad_conv
)
380 * NB. We deliberately don't do a strlen here if srclen == -1.
381 * This is very expensive over millions of calls and is taken
382 * care of in the slow path in convert_string_internal. JRA.
386 SMB_ASSERT(destlen
!= (size_t)-1);
392 if (from
!= CH_UTF16LE
&& from
!= CH_UTF16BE
&& to
!= CH_UTF16LE
&& to
!= CH_UTF16BE
) {
393 const unsigned char *p
= (const unsigned char *)src
;
394 unsigned char *q
= (unsigned char *)dest
;
395 size_t slen
= srclen
;
396 size_t dlen
= destlen
;
397 unsigned char lastp
= '\0';
400 /* If all characters are ascii, fast path here. */
401 while (slen
&& dlen
) {
402 if ((lastp
= *p
) <= 0x7f) {
404 if (slen
!= (size_t)-1) {
412 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
415 return retval
+ convert_string_internal(from
, to
, p
, slen
, q
, dlen
, allow_bad_conv
);
420 /* Even if we fast path we should note if we ran out of room. */
421 if (((slen
!= (size_t)-1) && slen
) ||
422 ((slen
== (size_t)-1) && lastp
)) {
427 } else if (from
== CH_UTF16LE
&& to
!= CH_UTF16LE
) {
428 const unsigned char *p
= (const unsigned char *)src
;
429 unsigned char *q
= (unsigned char *)dest
;
431 size_t slen
= srclen
;
432 size_t dlen
= destlen
;
433 unsigned char lastp
= '\0';
435 /* If all characters are ascii, fast path here. */
436 while (((slen
== (size_t)-1) || (slen
>= 2)) && dlen
) {
437 if (((lastp
= *p
) <= 0x7f) && (p
[1] == 0)) {
439 if (slen
!= (size_t)-1) {
448 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
451 return retval
+ convert_string_internal(from
, to
, p
, slen
, q
, dlen
, allow_bad_conv
);
456 /* Even if we fast path we should note if we ran out of room. */
457 if (((slen
!= (size_t)-1) && slen
) ||
458 ((slen
== (size_t)-1) && lastp
)) {
463 } else if (from
!= CH_UTF16LE
&& from
!= CH_UTF16BE
&& to
== CH_UTF16LE
) {
464 const unsigned char *p
= (const unsigned char *)src
;
465 unsigned char *q
= (unsigned char *)dest
;
467 size_t slen
= srclen
;
468 size_t dlen
= destlen
;
469 unsigned char lastp
= '\0';
471 /* If all characters are ascii, fast path here. */
472 while (slen
&& (dlen
>= 2)) {
473 if ((lastp
= *p
) <= 0x7F) {
476 if (slen
!= (size_t)-1) {
484 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
487 return retval
+ convert_string_internal(from
, to
, p
, slen
, q
, dlen
, allow_bad_conv
);
492 /* Even if we fast path we should note if we ran out of room. */
493 if (((slen
!= (size_t)-1) && slen
) ||
494 ((slen
== (size_t)-1) && lastp
)) {
501 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
504 return convert_string_internal(from
, to
, src
, srclen
, dest
, destlen
, allow_bad_conv
);
508 * Convert between character sets, allocating a new buffer for the result.
510 * @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
511 * (this is a bad interface and needs fixing. JRA).
512 * @param srclen length of source buffer.
513 * @param dest always set at least to NULL
514 * @param converted_size set to the size of the allocated buffer on return
516 * @note -1 is not accepted for srclen.
518 * @return True if new buffer was correctly allocated, and string was
521 * Ensure the srclen contains the terminating zero.
523 * I hate the goto's in this function. It's embarressing.....
524 * There has to be a cleaner way to do this. JRA.
527 bool convert_string_allocate(TALLOC_CTX
*ctx
, charset_t from
, charset_t to
,
528 void const *src
, size_t srclen
, void *dst
,
529 size_t *converted_size
, bool allow_bad_conv
)
531 size_t i_len
, o_len
, destlen
= (srclen
* 3) / 2;
533 const char *inbuf
= (const char *)src
;
534 char *outbuf
= NULL
, *ob
= NULL
;
535 smb_iconv_t descriptor
;
536 void **dest
= (void **)dst
;
540 if (!converted_size
) {
545 if (src
== NULL
|| srclen
== (size_t)-1) {
550 ob
= ((ctx
!= NULL
) ? talloc_strdup(ctx
, "") : SMB_STRDUP(""));
560 lazy_initialize_conv();
562 descriptor
= conv_handles
[from
][to
];
564 if (descriptor
== (smb_iconv_t
)-1 || descriptor
== (smb_iconv_t
)0) {
566 DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
573 /* +2 is for ucs2 null termination. */
574 if ((destlen
*2)+2 < destlen
) {
575 /* wrapped ! abort. */
577 DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
583 destlen
= destlen
* 2;
586 /* +2 is for ucs2 null termination. */
588 ob
= (char *)TALLOC_REALLOC(ctx
, ob
, destlen
+ 2);
590 ob
= (char *)SMB_REALLOC(ob
, destlen
+ 2);
594 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
604 retval
= smb_iconv(descriptor
,
607 if(retval
== (size_t)-1) {
608 const char *reason
="unknown error";
611 reason
="Incomplete multibyte sequence";
613 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason
,inbuf
));
620 reason
="Illegal multibyte sequence";
622 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason
,inbuf
));
628 DEBUG(0,("Conversion error: %s(%s)\n",reason
,inbuf
));
629 /* smb_panic(reason); */
640 destlen
= destlen
- o_len
;
641 /* Don't shrink unless we're reclaiming a lot of
642 * space. This is in the hot codepath and these
643 * reallocs *cost*. JRA.
646 /* We're shrinking here so we know the +2 is safe from wrap. */
648 ob
= (char *)TALLOC_REALLOC(ctx
,ob
,destlen
+ 2);
650 ob
= (char *)SMB_REALLOC(ob
,destlen
+ 2);
654 if (destlen
&& !ob
) {
655 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
662 /* Must ucs2 null terminate in the extra space we allocated. */
664 ob
[destlen
+1] = '\0';
666 *converted_size
= destlen
;
672 * Conversion not supported. This is actually an error, but there are so
673 * many misconfigured iconv systems and smb.conf's out there we can't just
674 * fail. Do a very bad conversion instead.... JRA.
678 if (o_len
== 0 || i_len
== 0)
681 if (((from
== CH_UTF16LE
)||(from
== CH_UTF16BE
)) &&
682 ((to
!= CH_UTF16LE
)||(to
!= CH_UTF16BE
))) {
683 /* Can't convert from utf16 any endian to multibyte.
684 Replace with the default fail char.
691 *outbuf
= lp_failed_convert_char();
700 if (o_len
== 0 || i_len
== 0)
703 /* Keep trying with the next char... */
706 } else if (from
!= CH_UTF16LE
&& from
!= CH_UTF16BE
&& to
== CH_UTF16LE
) {
707 /* Can't convert to UTF16LE - just widen by adding the
708 default fail char then zero.
713 outbuf
[0] = lp_failed_convert_char();
722 if (o_len
== 0 || i_len
== 0)
725 /* Keep trying with the next char... */
728 } else if (from
!= CH_UTF16LE
&& from
!= CH_UTF16BE
&&
729 to
!= CH_UTF16LE
&& to
!= CH_UTF16BE
) {
730 /* Failed multibyte to multibyte. Just copy the default fail char and
732 outbuf
[0] = lp_failed_convert_char();
740 if (o_len
== 0 || i_len
== 0)
743 /* Keep trying with the next char... */
747 /* Keep compiler happy.... */
754 * Convert between character sets, allocating a new buffer using talloc for the result.
756 * @param srclen length of source buffer.
757 * @param dest always set at least to NULL
758 * @note -1 is not accepted for srclen.
760 * @returns Size in bytes of the converted string; or -1 in case of error.
762 size_t convert_string_talloc(TALLOC_CTX
*ctx
, charset_t from
, charset_t to
,
763 void const *src
, size_t srclen
, void *dst
,
766 void **dest
= (void **)dst
;
770 if (!convert_string_allocate(ctx
, from
, to
, src
, srclen
, dest
,
771 &dest_len
, allow_bad_conv
))
778 size_t unix_strupper(const char *src
, size_t srclen
, char *dest
, size_t destlen
)
783 size
= push_ucs2_allocate(&buffer
, src
);
784 if (size
== (size_t)-1) {
787 if (!strupper_w(buffer
) && (dest
== src
)) {
792 size
= convert_string(CH_UTF16LE
, CH_UNIX
, buffer
, size
, dest
, destlen
, True
);
798 strdup() a unix string to upper case.
801 char *strdup_upper(const char *s
)
803 char *out_buffer
= SMB_STRDUP(s
);
804 const unsigned char *p
= (const unsigned char *)s
;
805 unsigned char *q
= (unsigned char *)out_buffer
;
811 /* this is quite a common operation, so we want it to be
812 fast. We optimise for the ascii case, knowing that all our
813 supported multi-byte character sets are ascii-compatible
814 (ie. they match for the first 128 chars) */
819 *q
++ = toupper_ascii_fast(*p
);
826 smb_ucs2_t
*buffer
= NULL
;
828 SAFE_FREE(out_buffer
);
829 if (!convert_string_allocate(NULL
, CH_UNIX
, CH_UTF16LE
, s
,
830 strlen(s
) + 1, (void **)(void *)&buffer
, &size
,
837 if (!convert_string_allocate(NULL
, CH_UTF16LE
, CH_UNIX
, buffer
,
838 size
, (void **)(void *)&out_buffer
, &size2
, True
)) {
843 /* Don't need the intermediate buffer
853 talloc_strdup() a unix string to upper case.
856 char *talloc_strdup_upper(TALLOC_CTX
*ctx
, const char *s
)
858 char *out_buffer
= talloc_strdup(ctx
,s
);
859 const unsigned char *p
= (const unsigned char *)s
;
860 unsigned char *q
= (unsigned char *)out_buffer
;
866 /* this is quite a common operation, so we want it to be
867 fast. We optimise for the ascii case, knowing that all our
868 supported multi-byte character sets are ascii-compatible
869 (ie. they match for the first 128 chars) */
874 *q
++ = toupper_ascii_fast(*p
);
881 smb_ucs2_t
*ubuf
= NULL
;
883 /* We're not using the ascii buffer above. */
884 TALLOC_FREE(out_buffer
);
886 size
= convert_string_talloc(ctx
, CH_UNIX
, CH_UTF16LE
,
890 if (size
== (size_t)-1) {
896 size
= convert_string_talloc(ctx
, CH_UTF16LE
, CH_UNIX
,
901 /* Don't need the intermediate buffer
907 if (size
== (size_t)-1) {
915 size_t unix_strlower(const char *src
, size_t srclen
, char *dest
, size_t destlen
)
918 smb_ucs2_t
*buffer
= NULL
;
920 if (!convert_string_allocate(NULL
, CH_UNIX
, CH_UTF16LE
, src
, srclen
,
921 (void **)(void *)&buffer
, &size
, True
)) {
922 smb_panic("failed to create UCS2 buffer");
924 if (!strlower_w(buffer
) && (dest
== src
)) {
928 size
= convert_string(CH_UTF16LE
, CH_UNIX
, buffer
, size
, dest
, destlen
, True
);
934 strdup() a unix string to lower case.
937 char *strdup_lower(const char *s
)
940 smb_ucs2_t
*buffer
= NULL
;
943 size
= push_ucs2_allocate(&buffer
, s
);
944 if (size
== -1 || !buffer
) {
950 size
= pull_ucs2_allocate(&out_buffer
, buffer
);
953 if (size
== (size_t)-1) {
960 char *talloc_strdup_lower(TALLOC_CTX
*ctx
, const char *s
)
963 smb_ucs2_t
*buffer
= NULL
;
966 size
= push_ucs2_talloc(ctx
, &buffer
, s
);
967 if (size
== -1 || !buffer
) {
974 size
= pull_ucs2_talloc(ctx
, &out_buffer
, buffer
);
977 if (size
== (size_t)-1) {
978 TALLOC_FREE(out_buffer
);
986 size_t ucs2_align(const void *base_ptr
, const void *p
, int flags
)
988 if (flags
& (STR_NOALIGN
|STR_ASCII
))
990 return PTR_DIFF(p
, base_ptr
) & 1;
995 * Copy a string from a char* unix src to a dos codepage string destination.
997 * @return the number of bytes occupied by the string in the destination.
999 * @param flags can include
1001 * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
1002 * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
1005 * @param dest_len the maximum length in bytes allowed in the
1008 size_t push_ascii(void *dest
, const char *src
, size_t dest_len
, int flags
)
1010 size_t src_len
= strlen(src
);
1011 char *tmpbuf
= NULL
;
1014 /* No longer allow a length of -1. */
1015 if (dest_len
== (size_t)-1) {
1016 smb_panic("push_ascii - dest_len == -1");
1019 if (flags
& STR_UPPER
) {
1020 tmpbuf
= SMB_STRDUP(src
);
1022 smb_panic("malloc fail");
1028 if (flags
& (STR_TERMINATE
| STR_TERMINATE_ASCII
)) {
1032 ret
= convert_string(CH_UNIX
, CH_DOS
, src
, src_len
, dest
, dest_len
, True
);
1033 if (ret
== (size_t)-1 &&
1034 (flags
& (STR_TERMINATE
| STR_TERMINATE_ASCII
))
1036 ((char *)dest
)[0] = '\0';
1042 size_t push_ascii_fstring(void *dest
, const char *src
)
1044 return push_ascii(dest
, src
, sizeof(fstring
), STR_TERMINATE
);
1047 /********************************************************************
1048 Push an nstring - ensure null terminated. Written by
1049 moriyama@miraclelinux.com (MORIYAMA Masayuki).
1050 ********************************************************************/
1052 size_t push_ascii_nstring(void *dest
, const char *src
)
1054 size_t i
, buffer_len
, dest_len
;
1058 buffer_len
= push_ucs2_allocate(&buffer
, src
);
1059 if (buffer_len
== (size_t)-1) {
1060 smb_panic("failed to create UCS2 buffer");
1063 /* We're using buffer_len below to count ucs2 characters, not bytes. */
1064 buffer_len
/= sizeof(smb_ucs2_t
);
1067 for (i
= 0; buffer
[i
] != 0 && (i
< buffer_len
); i
++) {
1068 unsigned char mb
[10];
1069 /* Convert one smb_ucs2_t character at a time. */
1070 size_t mb_len
= convert_string(CH_UTF16LE
, CH_DOS
, buffer
+i
, sizeof(smb_ucs2_t
), mb
, sizeof(mb
), False
);
1071 if ((mb_len
!= (size_t)-1) && (dest_len
+ mb_len
<= MAX_NETBIOSNAME_LEN
- 1)) {
1072 memcpy((char *)dest
+ dest_len
, mb
, mb_len
);
1079 ((char *)dest
)[dest_len
] = '\0';
1082 conv_silent
= False
;
1086 /********************************************************************
1087 Push and malloc an ascii string. src and dest null terminated.
1088 ********************************************************************/
1090 size_t push_ascii_allocate(char **dest
, const char *src
)
1092 size_t dest_len
, src_len
= strlen(src
)+1;
1095 if (!convert_string_allocate(NULL
, CH_UNIX
, CH_DOS
, src
, src_len
,
1096 (void **)dest
, &dest_len
, True
))
1103 * Copy a string from a dos codepage source to a unix char* destination.
1105 * The resulting string in "dest" is always null terminated.
1107 * @param flags can have:
1109 * <dt>STR_TERMINATE</dt>
1110 * <dd>STR_TERMINATE means the string in @p src
1111 * is null terminated, and src_len is ignored.</dd>
1114 * @param src_len is the length of the source area in bytes.
1115 * @returns the number of bytes occupied by the string in @p src.
1117 size_t pull_ascii(char *dest
, const void *src
, size_t dest_len
, size_t src_len
, int flags
)
1121 if (dest_len
== (size_t)-1) {
1122 /* No longer allow dest_len of -1. */
1123 smb_panic("pull_ascii - invalid dest_len of -1");
1126 if (flags
& STR_TERMINATE
) {
1127 if (src_len
== (size_t)-1) {
1128 src_len
= strlen((const char *)src
) + 1;
1130 size_t len
= strnlen((const char *)src
, src_len
);
1137 ret
= convert_string(CH_DOS
, CH_UNIX
, src
, src_len
, dest
, dest_len
, True
);
1138 if (ret
== (size_t)-1) {
1143 if (dest_len
&& ret
) {
1144 /* Did we already process the terminating zero ? */
1145 if (dest
[MIN(ret
-1, dest_len
-1)] != 0) {
1146 dest
[MIN(ret
, dest_len
-1)] = 0;
1156 * Copy a string from a dos codepage source to a unix char* destination.
1158 Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1161 * The resulting string in "dest" is always null terminated.
1163 * @param flags can have:
1165 * <dt>STR_TERMINATE</dt>
1166 * <dd>STR_TERMINATE means the string in @p src
1167 * is null terminated, and src_len is ignored.</dd>
1170 * @param src_len is the length of the source area in bytes.
1171 * @returns the number of bytes occupied by the string in @p src.
1174 static size_t pull_ascii_base_talloc(TALLOC_CTX
*ctx
,
1181 size_t dest_len
= 0;
1184 /* Ensure we never use the braindead "malloc" varient. */
1186 smb_panic("NULL talloc CTX in pull_ascii_base_talloc\n");
1196 if (flags
& STR_TERMINATE
) {
1197 if (src_len
== (size_t)-1) {
1198 src_len
= strlen((const char *)src
) + 1;
1200 size_t len
= strnlen((const char *)src
, src_len
);
1205 /* Ensure we don't use an insane length from the client. */
1206 if (src_len
>= 1024*1024) {
1207 char *msg
= talloc_asprintf(ctx
,
1208 "Bad src length (%u) in "
1209 "pull_ascii_base_talloc",
1210 (unsigned int)src_len
);
1214 /* Can't have an unlimited length
1215 * non STR_TERMINATE'd.
1217 if (src_len
== (size_t)-1) {
1223 /* src_len != -1 here. */
1225 if (!convert_string_allocate(ctx
, CH_DOS
, CH_UNIX
, src
, src_len
, &dest
,
1230 if (dest_len
&& dest
) {
1231 /* Did we already process the terminating zero ? */
1232 if (dest
[dest_len
-1] != 0) {
1233 size_t size
= talloc_get_size(dest
);
1234 /* Have we got space to append the '\0' ? */
1235 if (size
<= dest_len
) {
1237 dest
= TALLOC_REALLOC_ARRAY(ctx
, dest
, char,
1241 dest_len
= (size_t)-1;
1246 dest
[dest_len
] = '\0';
1257 size_t pull_ascii_fstring(char *dest
, const void *src
)
1259 return pull_ascii(dest
, src
, sizeof(fstring
), -1, STR_TERMINATE
);
1262 /* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1264 size_t pull_ascii_nstring(char *dest
, size_t dest_len
, const void *src
)
1266 return pull_ascii(dest
, src
, dest_len
, sizeof(nstring
)-1, STR_TERMINATE
);
1270 * Copy a string from a char* src to a unicode destination.
1272 * @returns the number of bytes occupied by the string in the destination.
1274 * @param flags can have:
1277 * <dt>STR_TERMINATE <dd>means include the null termination.
1278 * <dt>STR_UPPER <dd>means uppercase in the destination.
1279 * <dt>STR_NOALIGN <dd>means don't do alignment.
1282 * @param dest_len is the maximum length allowed in the
1286 size_t push_ucs2(const void *base_ptr
, void *dest
, const char *src
, size_t dest_len
, int flags
)
1292 if (dest_len
== (size_t)-1) {
1293 /* No longer allow dest_len of -1. */
1294 smb_panic("push_ucs2 - invalid dest_len of -1");
1297 if (flags
& STR_TERMINATE
)
1298 src_len
= (size_t)-1;
1300 src_len
= strlen(src
);
1302 if (ucs2_align(base_ptr
, dest
, flags
)) {
1304 dest
= (void *)((char *)dest
+ 1);
1310 /* ucs2 is always a multiple of 2 bytes */
1313 ret
= convert_string(CH_UNIX
, CH_UTF16LE
, src
, src_len
, dest
, dest_len
, True
);
1314 if (ret
== (size_t)-1) {
1315 if ((flags
& STR_TERMINATE
) &&
1325 if (flags
& STR_UPPER
) {
1326 smb_ucs2_t
*dest_ucs2
= (smb_ucs2_t
*)dest
;
1329 /* We check for i < (ret / 2) below as the dest string isn't null
1330 terminated if STR_TERMINATE isn't set. */
1332 for (i
= 0; i
< (ret
/ 2) && i
< (dest_len
/ 2) && dest_ucs2
[i
]; i
++) {
1333 smb_ucs2_t v
= toupper_w(dest_ucs2
[i
]);
1334 if (v
!= dest_ucs2
[i
]) {
1345 * Copy a string from a unix char* src to a UCS2 destination,
1346 * allocating a buffer using talloc().
1348 * @param dest always set at least to NULL
1350 * @returns The number of bytes occupied by the string in the destination
1351 * or -1 in case of error.
1353 size_t push_ucs2_talloc(TALLOC_CTX
*ctx
, smb_ucs2_t
**dest
, const char *src
)
1355 size_t src_len
= strlen(src
)+1;
1358 return convert_string_talloc(ctx
, CH_UNIX
, CH_UTF16LE
, src
, src_len
, (void **)dest
, True
);
1363 * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1365 * @param dest always set at least to NULL
1367 * @returns The number of bytes occupied by the string in the destination
1368 * or -1 in case of error.
1371 size_t push_ucs2_allocate(smb_ucs2_t
**dest
, const char *src
)
1373 size_t dest_len
, src_len
= strlen(src
)+1;
1376 if (!convert_string_allocate(NULL
, CH_UNIX
, CH_UTF16LE
, src
, src_len
,
1377 (void **)dest
, &dest_len
, True
))
1384 Copy a string from a char* src to a UTF-8 destination.
1385 Return the number of bytes occupied by the string in the destination
1387 STR_TERMINATE means include the null termination
1388 STR_UPPER means uppercase in the destination
1389 dest_len is the maximum length allowed in the destination. If dest_len
1390 is -1 then no maxiumum is used.
1393 static size_t push_utf8(void *dest
, const char *src
, size_t dest_len
, int flags
)
1397 char *tmpbuf
= NULL
;
1399 if (dest_len
== (size_t)-1) {
1400 /* No longer allow dest_len of -1. */
1401 smb_panic("push_utf8 - invalid dest_len of -1");
1404 if (flags
& STR_UPPER
) {
1405 tmpbuf
= strdup_upper(src
);
1410 src_len
= strlen(src
);
1413 src_len
= strlen(src
);
1414 if (flags
& STR_TERMINATE
) {
1418 ret
= convert_string(CH_UNIX
, CH_UTF8
, src
, src_len
, dest
, dest_len
, True
);
1423 size_t push_utf8_fstring(void *dest
, const char *src
)
1425 return push_utf8(dest
, src
, sizeof(fstring
), STR_TERMINATE
);
1429 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1431 * @param dest always set at least to NULL
1433 * @returns The number of bytes occupied by the string in the destination
1436 size_t push_utf8_talloc(TALLOC_CTX
*ctx
, char **dest
, const char *src
)
1438 size_t src_len
= strlen(src
)+1;
1441 return convert_string_talloc(ctx
, CH_UNIX
, CH_UTF8
, src
, src_len
, (void**)dest
, True
);
1445 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1447 * @param dest always set at least to NULL
1449 * @returns The number of bytes occupied by the string in the destination
1452 size_t push_utf8_allocate(char **dest
, const char *src
)
1454 size_t dest_len
, src_len
= strlen(src
)+1;
1457 if (!convert_string_allocate(NULL
, CH_UNIX
, CH_UTF8
, src
, src_len
,
1458 (void **)dest
, &dest_len
, True
))
1465 Copy a string from a ucs2 source to a unix char* destination.
1467 STR_TERMINATE means the string in src is null terminated.
1468 STR_NOALIGN means don't try to align.
1469 if STR_TERMINATE is set then src_len is ignored if it is -1.
1470 src_len is the length of the source area in bytes
1471 Return the number of bytes occupied by the string in src.
1472 The resulting string in "dest" is always null terminated.
1475 size_t pull_ucs2(const void *base_ptr
, char *dest
, const void *src
, size_t dest_len
, size_t src_len
, int flags
)
1479 if (dest_len
== (size_t)-1) {
1480 /* No longer allow dest_len of -1. */
1481 smb_panic("pull_ucs2 - invalid dest_len of -1");
1485 if (dest
&& dest_len
> 0) {
1491 if (ucs2_align(base_ptr
, src
, flags
)) {
1492 src
= (const void *)((const char *)src
+ 1);
1493 if (src_len
!= (size_t)-1)
1497 if (flags
& STR_TERMINATE
) {
1498 /* src_len -1 is the default for null terminated strings. */
1499 if (src_len
!= (size_t)-1) {
1500 size_t len
= strnlen_w((const smb_ucs2_t
*)src
,
1502 if (len
< src_len
/2)
1508 /* ucs2 is always a multiple of 2 bytes */
1509 if (src_len
!= (size_t)-1)
1512 ret
= convert_string(CH_UTF16LE
, CH_UNIX
, src
, src_len
, dest
, dest_len
, True
);
1513 if (ret
== (size_t)-1) {
1518 if (src_len
== (size_t)-1)
1521 if (dest_len
&& ret
) {
1522 /* Did we already process the terminating zero ? */
1523 if (dest
[MIN(ret
-1, dest_len
-1)] != 0) {
1524 dest
[MIN(ret
, dest_len
-1)] = 0;
1534 Copy a string from a ucs2 source to a unix char* destination.
1535 Talloc version with a base pointer.
1536 Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1539 STR_TERMINATE means the string in src is null terminated.
1540 STR_NOALIGN means don't try to align.
1541 if STR_TERMINATE is set then src_len is ignored if it is -1.
1542 src_len is the length of the source area in bytes
1543 Return the number of bytes occupied by the string in src.
1544 The resulting string in "dest" is always null terminated.
1547 size_t pull_ucs2_base_talloc(TALLOC_CTX
*ctx
,
1548 const void *base_ptr
,
1560 /* Ensure we never use the braindead "malloc" varient. */
1562 smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1570 if (ucs2_align(base_ptr
, src
, flags
)) {
1571 src
= (const void *)((const char *)src
+ 1);
1572 if (src_len
!= (size_t)-1)
1576 if (flags
& STR_TERMINATE
) {
1577 /* src_len -1 is the default for null terminated strings. */
1578 if (src_len
!= (size_t)-1) {
1579 size_t len
= strnlen_w((const smb_ucs2_t
*)src
,
1581 if (len
< src_len
/2)
1586 * src_len == -1 - alloc interface won't take this
1587 * so we must calculate.
1589 src_len
= (strlen_w((const smb_ucs2_t
*)src
)+1)*sizeof(smb_ucs2_t
);
1591 /* Ensure we don't use an insane length from the client. */
1592 if (src_len
>= 1024*1024) {
1593 smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1596 /* Can't have an unlimited length
1597 * non STR_TERMINATE'd.
1599 if (src_len
== (size_t)-1) {
1605 /* src_len != -1 here. */
1607 /* ucs2 is always a multiple of 2 bytes */
1610 dest_len
= convert_string_talloc(ctx
,
1617 if (dest_len
== (size_t)-1) {
1622 /* Did we already process the terminating zero ? */
1623 if (dest
[dest_len
-1] != 0) {
1624 size_t size
= talloc_get_size(dest
);
1625 /* Have we got space to append the '\0' ? */
1626 if (size
<= dest_len
) {
1628 dest
= TALLOC_REALLOC_ARRAY(ctx
, dest
, char,
1632 dest_len
= (size_t)-1;
1637 dest
[dest_len
] = '\0';
1648 size_t pull_ucs2_fstring(char *dest
, const void *src
)
1650 return pull_ucs2(NULL
, dest
, src
, sizeof(fstring
), -1, STR_TERMINATE
);
1654 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1656 * @param dest always set at least to NULL
1658 * @returns The number of bytes occupied by the string in the destination
1661 size_t pull_ucs2_talloc(TALLOC_CTX
*ctx
, char **dest
, const smb_ucs2_t
*src
)
1663 size_t src_len
= (strlen_w(src
)+1) * sizeof(smb_ucs2_t
);
1665 return convert_string_talloc(ctx
, CH_UTF16LE
, CH_UNIX
, src
, src_len
, (void **)dest
, True
);
1669 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1671 * @param dest always set at least to NULL
1673 * @returns The number of bytes occupied by the string in the destination
1676 size_t pull_ucs2_allocate(char **dest
, const smb_ucs2_t
*src
)
1678 size_t dest_len
, src_len
= (strlen_w(src
)+1) * sizeof(smb_ucs2_t
);
1680 if (!convert_string_allocate(NULL
, CH_UTF16LE
, CH_UNIX
, src
, src_len
,
1681 (void **)dest
, &dest_len
, True
))
1688 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1690 * @param dest always set at least to NULL
1692 * @returns The number of bytes occupied by the string in the destination
1695 size_t pull_utf8_talloc(TALLOC_CTX
*ctx
, char **dest
, const char *src
)
1697 size_t src_len
= strlen(src
)+1;
1699 return convert_string_talloc(ctx
, CH_UTF8
, CH_UNIX
, src
, src_len
, (void **)dest
, True
);
1703 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1705 * @param dest always set at least to NULL
1707 * @returns The number of bytes occupied by the string in the destination
1710 size_t pull_utf8_allocate(char **dest
, const char *src
)
1712 size_t dest_len
, src_len
= strlen(src
)+1;
1714 if (!convert_string_allocate(NULL
, CH_UTF8
, CH_UNIX
, src
, src_len
,
1715 (void **)dest
, &dest_len
, True
))
1722 * Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1724 * @param dest always set at least to NULL
1726 * @returns The number of bytes occupied by the string in the destination
1729 size_t pull_ascii_talloc(TALLOC_CTX
*ctx
, char **dest
, const char *src
)
1731 size_t src_len
= strlen(src
)+1;
1733 return convert_string_talloc(ctx
, CH_DOS
, CH_UNIX
, src
, src_len
, (void **)dest
, True
);
1737 Copy a string from a char* src to a unicode or ascii
1738 dos codepage destination choosing unicode or ascii based on the
1739 flags in the SMB buffer starting at base_ptr.
1740 Return the number of bytes occupied by the string in the destination.
1742 STR_TERMINATE means include the null termination.
1743 STR_UPPER means uppercase in the destination.
1744 STR_ASCII use ascii even with unicode packet.
1745 STR_NOALIGN means don't do alignment.
1746 dest_len is the maximum length allowed in the destination. If dest_len
1747 is -1 then no maxiumum is used.
1750 size_t push_string_fn(const char *function
, unsigned int line
,
1751 const void *base_ptr
, uint16 flags2
,
1752 void *dest
, const char *src
,
1753 size_t dest_len
, int flags
)
1756 /* We really need to zero fill here, not clobber
1757 * region, as we want to ensure that valgrind thinks
1758 * all of the outgoing buffer has been written to
1759 * so a send() or write() won't trap an error.
1763 clobber_region(function
, line
, dest
, dest_len
);
1765 memset(dest
, '\0', dest_len
);
1769 if (!(flags
& STR_ASCII
) && \
1770 ((flags
& STR_UNICODE
|| \
1771 (flags2
& FLAGS2_UNICODE_STRINGS
)))) {
1772 return push_ucs2(base_ptr
, dest
, src
, dest_len
, flags
);
1774 return push_ascii(dest
, src
, dest_len
, flags
);
1779 Copy a string from a unicode or ascii source (depending on
1780 the packet flags) to a char* destination.
1782 STR_TERMINATE means the string in src is null terminated.
1783 STR_UNICODE means to force as unicode.
1784 STR_ASCII use ascii even with unicode packet.
1785 STR_NOALIGN means don't do alignment.
1786 if STR_TERMINATE is set then src_len is ignored is it is -1
1787 src_len is the length of the source area in bytes.
1788 Return the number of bytes occupied by the string in src.
1789 The resulting string in "dest" is always null terminated.
1792 size_t pull_string_fn(const char *function
,
1794 const void *base_ptr
,
1803 clobber_region(function
, line
, dest
, dest_len
);
1806 if ((base_ptr
== NULL
) && ((flags
& (STR_ASCII
|STR_UNICODE
)) == 0)) {
1807 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1811 if (!(flags
& STR_ASCII
) && \
1812 ((flags
& STR_UNICODE
|| \
1813 (smb_flags2
& FLAGS2_UNICODE_STRINGS
)))) {
1814 return pull_ucs2(base_ptr
, dest
, src
, dest_len
, src_len
, flags
);
1816 return pull_ascii(dest
, src
, dest_len
, src_len
, flags
);
1820 Copy a string from a unicode or ascii source (depending on
1821 the packet flags) to a char* destination.
1822 Variant that uses talloc.
1824 STR_TERMINATE means the string in src is null terminated.
1825 STR_UNICODE means to force as unicode.
1826 STR_ASCII use ascii even with unicode packet.
1827 STR_NOALIGN means don't do alignment.
1828 if STR_TERMINATE is set then src_len is ignored is it is -1
1829 src_len is the length of the source area in bytes.
1830 Return the number of bytes occupied by the string in src.
1831 The resulting string in "dest" is always null terminated.
1834 size_t pull_string_talloc_fn(const char *function
,
1837 const void *base_ptr
,
1844 if ((base_ptr
== NULL
) && ((flags
& (STR_ASCII
|STR_UNICODE
)) == 0)) {
1845 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1849 if (!(flags
& STR_ASCII
) && \
1850 ((flags
& STR_UNICODE
|| \
1851 (smb_flags2
& FLAGS2_UNICODE_STRINGS
)))) {
1852 return pull_ucs2_base_talloc(ctx
,
1859 return pull_ascii_base_talloc(ctx
,
1867 size_t align_string(const void *base_ptr
, const char *p
, int flags
)
1869 if (!(flags
& STR_ASCII
) && \
1870 ((flags
& STR_UNICODE
|| \
1871 (SVAL(base_ptr
, smb_flg2
) & FLAGS2_UNICODE_STRINGS
)))) {
1872 return ucs2_align(base_ptr
, p
, flags
);
1878 Return the unicode codepoint for the next multi-byte CH_UNIX character
1879 in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1881 Also return the number of bytes consumed (which tells the caller
1882 how many bytes to skip to get to the next CH_UNIX character).
1884 Return INVALID_CODEPOINT if the next character cannot be converted.
1887 codepoint_t
next_codepoint(const char *str
, size_t *size
)
1889 /* It cannot occupy more than 4 bytes in UTF16 format */
1891 smb_iconv_t descriptor
;
1897 if ((str
[0] & 0x80) == 0) {
1899 return (codepoint_t
)str
[0];
1902 /* We assume that no multi-byte character can take
1903 more than 5 bytes. This is OK as we only
1904 support codepoints up to 1M */
1906 ilen_orig
= strnlen(str
, 5);
1909 lazy_initialize_conv();
1911 descriptor
= conv_handles
[CH_UNIX
][CH_UTF16LE
];
1912 if (descriptor
== (smb_iconv_t
)-1 || descriptor
== (smb_iconv_t
)0) {
1914 return INVALID_CODEPOINT
;
1917 /* This looks a little strange, but it is needed to cope
1918 with codepoints above 64k which are encoded as per RFC2781. */
1920 outbuf
= (char *)buf
;
1921 smb_iconv(descriptor
, &str
, &ilen
, &outbuf
, &olen
);
1923 /* We failed to convert to a 2 byte character.
1924 See if we can convert to a 4 UTF16-LE byte char encoding.
1927 outbuf
= (char *)buf
;
1928 smb_iconv(descriptor
, &str
, &ilen
, &outbuf
, &olen
);
1930 /* We didn't convert any bytes */
1932 return INVALID_CODEPOINT
;
1939 *size
= ilen_orig
- ilen
;
1942 /* 2 byte, UTF16-LE encoded value. */
1943 return (codepoint_t
)SVAL(buf
, 0);
1946 /* Decode a 4 byte UTF16-LE character manually.
1947 See RFC2871 for the encoding machanism.
1949 codepoint_t w1
= SVAL(buf
,0) & ~0xD800;
1950 codepoint_t w2
= SVAL(buf
,2) & ~0xDC00;
1952 return (codepoint_t
)0x10000 +
1956 /* no other length is valid */
1957 return INVALID_CODEPOINT
;