2 Unix SMB/CIFS implementation.
3 Character set conversion Extensions
4 Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5 Copyright (C) Andrew Tridgell 2001
6 Copyright (C) Simo Sorce 2001
7 Copyright (C) Martin Pool 2003
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
25 /* We can parameterize this if someone complains.... JRA. */
27 char lp_failed_convert_char(void)
35 * @brief Character-set conversion routines built on our iconv.
37 * @note Samba's internal character set (at least in the 3.0 series)
38 * is always the same as the one for the Unix filesystem. It is
39 * <b>not</b> necessarily UTF-8 and may be different on machines that
40 * need i18n filenames to be compatible with Unix software. It does
41 * have to be a superset of ASCII. All multibyte sequences must start
42 * with a byte with the high bit set.
48 static smb_iconv_t conv_handles
[NUM_CHARSETS
][NUM_CHARSETS
];
49 static bool conv_silent
; /* Should we do a debug if the conversion fails ? */
50 static bool initialized
;
53 * Return the name of a charset to give to iconv().
55 static const char *charset_name(charset_t ch
)
67 ret
= lp_unix_charset();
70 ret
= lp_dos_charset();
73 ret
= lp_display_charset();
82 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)
83 if (ret
&& !strcmp(ret
, "LOCALE")) {
84 const char *ln
= NULL
;
87 setlocale(LC_ALL
, "");
89 ln
= nl_langinfo(CODESET
);
91 /* Check whether the charset name is supported
93 smb_iconv_t handle
= smb_iconv_open(ln
,"UCS-2LE");
94 if (handle
== (smb_iconv_t
) -1) {
95 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln
));
98 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln
));
99 smb_iconv_close(handle
);
106 if (!ret
|| !*ret
) ret
= "ASCII";
110 void lazy_initialize_conv(void)
120 * Destroy global objects allocated by init_iconv()
122 void gfree_charcnv(void)
126 for (c1
=0;c1
<NUM_CHARSETS
;c1
++) {
127 for (c2
=0;c2
<NUM_CHARSETS
;c2
++) {
128 if ( conv_handles
[c1
][c2
] ) {
129 smb_iconv_close( conv_handles
[c1
][c2
] );
130 conv_handles
[c1
][c2
] = 0;
138 * Initialize iconv conversion descriptors.
140 * This is called the first time it is needed, and also called again
141 * every time the configuration is reloaded, because the charset or
142 * codepage might have changed.
144 void init_iconv(void)
147 bool did_reload
= False
;
149 /* so that charset_name() works we need to get the UNIX<->UCS2 going
151 if (!conv_handles
[CH_UNIX
][CH_UTF16LE
])
152 conv_handles
[CH_UNIX
][CH_UTF16LE
] = smb_iconv_open(charset_name(CH_UTF16LE
), "ASCII");
154 if (!conv_handles
[CH_UTF16LE
][CH_UNIX
])
155 conv_handles
[CH_UTF16LE
][CH_UNIX
] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE
));
157 for (c1
=0;c1
<NUM_CHARSETS
;c1
++) {
158 for (c2
=0;c2
<NUM_CHARSETS
;c2
++) {
159 const char *n1
= charset_name((charset_t
)c1
);
160 const char *n2
= charset_name((charset_t
)c2
);
161 if (conv_handles
[c1
][c2
] &&
162 strcmp(n1
, conv_handles
[c1
][c2
]->from_name
) == 0 &&
163 strcmp(n2
, conv_handles
[c1
][c2
]->to_name
) == 0)
168 if (conv_handles
[c1
][c2
])
169 smb_iconv_close(conv_handles
[c1
][c2
]);
171 conv_handles
[c1
][c2
] = smb_iconv_open(n2
,n1
);
172 if (conv_handles
[c1
][c2
] == (smb_iconv_t
)-1) {
173 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
174 charset_name((charset_t
)c1
), charset_name((charset_t
)c2
)));
175 if (c1
!= CH_UTF16LE
&& c1
!= CH_UTF16BE
) {
178 if (c2
!= CH_UTF16LE
&& c2
!= CH_UTF16BE
) {
181 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
183 conv_handles
[c1
][c2
] = smb_iconv_open(n2
,n1
);
184 if (!conv_handles
[c1
][c2
]) {
185 DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1
, n2
));
186 smb_panic("init_iconv: conv_handle initialization failed");
193 /* XXX: Does this really get called every time the dos
194 * codepage changes? */
195 /* XXX: Is the did_reload test too strict? */
203 * Convert string from one encoding to another, making error checking etc
204 * Slow path version - uses (slow) iconv.
206 * @param src pointer to source string (multibyte or singlebyte)
207 * @param srclen length of the source string in bytes
208 * @param dest pointer to destination string (multibyte or singlebyte)
209 * @param destlen maximal length allowed for string
210 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
211 * @returns the number of bytes occupied in the destination
213 * Ensure the srclen contains the terminating zero.
217 static size_t convert_string_internal(charset_t from
, charset_t to
,
218 void const *src
, size_t srclen
,
219 void *dest
, size_t destlen
, bool allow_bad_conv
)
223 const char* inbuf
= (const char*)src
;
224 char* outbuf
= (char*)dest
;
225 smb_iconv_t descriptor
;
227 lazy_initialize_conv();
229 descriptor
= conv_handles
[from
][to
];
231 if (srclen
== (size_t)-1) {
232 if (from
== CH_UTF16LE
|| from
== CH_UTF16BE
) {
233 srclen
= (strlen_w((const smb_ucs2_t
*)src
)+1) * 2;
235 srclen
= strlen((const char *)src
)+1;
240 if (descriptor
== (smb_iconv_t
)-1 || descriptor
== (smb_iconv_t
)0) {
242 DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
251 retval
= smb_iconv(descriptor
, &inbuf
, &i_len
, &outbuf
, &o_len
);
252 if(retval
==(size_t)-1) {
253 const char *reason
="unknown error";
256 reason
="Incomplete multibyte sequence";
258 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason
,inbuf
));
263 reason
="No more room";
265 if (from
== CH_UNIX
) {
266 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
267 charset_name(from
), charset_name(to
),
268 (unsigned int)srclen
, (unsigned int)destlen
, (const char *)src
));
270 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
271 charset_name(from
), charset_name(to
),
272 (unsigned int)srclen
, (unsigned int)destlen
));
277 reason
="Illegal multibyte sequence";
279 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason
,inbuf
));
286 DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason
,inbuf
));
289 /* smb_panic(reason); */
291 return destlen
-o_len
;
296 * Conversion not supported. This is actually an error, but there are so
297 * many misconfigured iconv systems and smb.conf's out there we can't just
298 * fail. Do a very bad conversion instead.... JRA.
302 if (o_len
== 0 || i_len
== 0)
303 return destlen
- o_len
;
305 if (((from
== CH_UTF16LE
)||(from
== CH_UTF16BE
)) &&
306 ((to
!= CH_UTF16LE
)||(to
!= CH_UTF16BE
))) {
307 /* Can't convert from utf16 any endian to multibyte.
308 Replace with the default fail char.
311 return destlen
- o_len
;
313 *outbuf
= lp_failed_convert_char();
322 if (o_len
== 0 || i_len
== 0)
323 return destlen
- o_len
;
325 /* Keep trying with the next char... */
328 } else if (from
!= CH_UTF16LE
&& from
!= CH_UTF16BE
&& to
== CH_UTF16LE
) {
329 /* Can't convert to UTF16LE - just widen by adding the
330 default fail char then zero.
333 return destlen
- o_len
;
335 outbuf
[0] = lp_failed_convert_char();
344 if (o_len
== 0 || i_len
== 0)
345 return destlen
- o_len
;
347 /* Keep trying with the next char... */
350 } else if (from
!= CH_UTF16LE
&& from
!= CH_UTF16BE
&&
351 to
!= CH_UTF16LE
&& to
!= CH_UTF16BE
) {
352 /* Failed multibyte to multibyte. Just copy the default fail char and
354 outbuf
[0] = lp_failed_convert_char();
362 if (o_len
== 0 || i_len
== 0)
363 return destlen
- o_len
;
365 /* Keep trying with the next char... */
369 /* Keep compiler happy.... */
370 return destlen
- o_len
;
376 * Convert string from one encoding to another, making error checking etc
377 * Fast path version - handles ASCII first.
379 * @param src pointer to source string (multibyte or singlebyte)
380 * @param srclen length of the source string in bytes, or -1 for nul terminated.
381 * @param dest pointer to destination string (multibyte or singlebyte)
382 * @param destlen maximal length allowed for string - *NEVER* -1.
383 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
384 * @returns the number of bytes occupied in the destination
386 * Ensure the srclen contains the terminating zero.
388 * This function has been hand-tuned to provide a fast path.
389 * Don't change unless you really know what you are doing. JRA.
392 size_t convert_string(charset_t from
, charset_t to
,
393 void const *src
, size_t srclen
,
394 void *dest
, size_t destlen
, bool allow_bad_conv
)
397 * NB. We deliberately don't do a strlen here if srclen == -1.
398 * This is very expensive over millions of calls and is taken
399 * care of in the slow path in convert_string_internal. JRA.
403 SMB_ASSERT(destlen
!= (size_t)-1);
409 if (from
!= CH_UTF16LE
&& from
!= CH_UTF16BE
&& to
!= CH_UTF16LE
&& to
!= CH_UTF16BE
) {
410 const unsigned char *p
= (const unsigned char *)src
;
411 unsigned char *q
= (unsigned char *)dest
;
412 size_t slen
= srclen
;
413 size_t dlen
= destlen
;
414 unsigned char lastp
= '\0';
417 /* If all characters are ascii, fast path here. */
418 while (slen
&& dlen
) {
419 if ((lastp
= *p
) <= 0x7f) {
421 if (slen
!= (size_t)-1) {
429 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
432 size_t ret
= convert_string_internal(from
, to
, p
, slen
, q
, dlen
, allow_bad_conv
);
433 if (ret
== (size_t)-1) {
441 /* Even if we fast path we should note if we ran out of room. */
442 if (((slen
!= (size_t)-1) && slen
) ||
443 ((slen
== (size_t)-1) && lastp
)) {
448 } else if (from
== CH_UTF16LE
&& to
!= CH_UTF16LE
) {
449 const unsigned char *p
= (const unsigned char *)src
;
450 unsigned char *q
= (unsigned char *)dest
;
452 size_t slen
= srclen
;
453 size_t dlen
= destlen
;
454 unsigned char lastp
= '\0';
456 /* If all characters are ascii, fast path here. */
457 while (((slen
== (size_t)-1) || (slen
>= 2)) && dlen
) {
458 if (((lastp
= *p
) <= 0x7f) && (p
[1] == 0)) {
460 if (slen
!= (size_t)-1) {
469 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
472 size_t ret
= convert_string_internal(from
, to
, p
, slen
, q
, dlen
, allow_bad_conv
);
473 if (ret
== (size_t)-1) {
481 /* Even if we fast path we should note if we ran out of room. */
482 if (((slen
!= (size_t)-1) && slen
) ||
483 ((slen
== (size_t)-1) && lastp
)) {
488 } else if (from
!= CH_UTF16LE
&& from
!= CH_UTF16BE
&& to
== CH_UTF16LE
) {
489 const unsigned char *p
= (const unsigned char *)src
;
490 unsigned char *q
= (unsigned char *)dest
;
492 size_t slen
= srclen
;
493 size_t dlen
= destlen
;
494 unsigned char lastp
= '\0';
496 /* If all characters are ascii, fast path here. */
497 while (slen
&& (dlen
>= 2)) {
498 if ((lastp
= *p
) <= 0x7F) {
501 if (slen
!= (size_t)-1) {
509 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
512 size_t ret
= convert_string_internal(from
, to
, p
, slen
, q
, dlen
, allow_bad_conv
);
513 if (ret
== (size_t)-1) {
521 /* Even if we fast path we should note if we ran out of room. */
522 if (((slen
!= (size_t)-1) && slen
) ||
523 ((slen
== (size_t)-1) && lastp
)) {
530 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
533 return convert_string_internal(from
, to
, src
, srclen
, dest
, destlen
, allow_bad_conv
);
537 * Convert between character sets, allocating a new buffer using talloc for the result.
539 * @param srclen length of source buffer.
540 * @param dest always set at least to NULL
541 * @parm converted_size set to the number of bytes occupied by the string in
542 * the destination on success.
543 * @note -1 is not accepted for srclen.
545 * @return true if new buffer was correctly allocated, and string was
548 * Ensure the srclen contains the terminating zero.
550 * I hate the goto's in this function. It's embarressing.....
551 * There has to be a cleaner way to do this. JRA.
553 bool convert_string_talloc(TALLOC_CTX
*ctx
, charset_t from
, charset_t to
,
554 void const *src
, size_t srclen
, void *dst
,
555 size_t *converted_size
, bool allow_bad_conv
)
558 size_t i_len
, o_len
, destlen
= (srclen
* 3) / 2;
560 const char *inbuf
= (const char *)src
;
561 char *outbuf
= NULL
, *ob
= NULL
;
562 smb_iconv_t descriptor
;
563 void **dest
= (void **)dst
;
567 if (!converted_size
) {
572 if (src
== NULL
|| srclen
== (size_t)-1) {
577 ob
= talloc_strdup(ctx
, "");
587 lazy_initialize_conv();
589 descriptor
= conv_handles
[from
][to
];
591 if (descriptor
== (smb_iconv_t
)-1 || descriptor
== (smb_iconv_t
)0) {
593 DEBUG(0,("convert_string_talloc: Conversion not supported.\n"));
600 /* +2 is for ucs2 null termination. */
601 if ((destlen
*2)+2 < destlen
) {
602 /* wrapped ! abort. */
604 DEBUG(0, ("convert_string_talloc: destlen wrapped !\n"));
609 destlen
= destlen
* 2;
612 /* +2 is for ucs2 null termination. */
613 ob
= (char *)TALLOC_REALLOC(ctx
, ob
, destlen
+ 2);
616 DEBUG(0, ("convert_string_talloc: realloc failed!\n"));
626 retval
= smb_iconv(descriptor
,
629 if(retval
== (size_t)-1) {
630 const char *reason
="unknown error";
633 reason
="Incomplete multibyte sequence";
635 DEBUG(3,("convert_string_talloc: Conversion error: %s(%s)\n",reason
,inbuf
));
642 reason
="Illegal multibyte sequence";
644 DEBUG(3,("convert_string_talloc: Conversion error: %s(%s)\n",reason
,inbuf
));
650 DEBUG(0,("Conversion error: %s(%s)\n",reason
,inbuf
));
651 /* smb_panic(reason); */
658 destlen
= destlen
- o_len
;
659 /* Don't shrink unless we're reclaiming a lot of
660 * space. This is in the hot codepath and these
661 * reallocs *cost*. JRA.
664 /* We're shrinking here so we know the +2 is safe from wrap. */
665 ob
= (char *)TALLOC_REALLOC(ctx
,ob
,destlen
+ 2);
668 if (destlen
&& !ob
) {
669 DEBUG(0, ("convert_string_talloc: out of memory!\n"));
676 /* Must ucs2 null terminate in the extra space we allocated. */
678 ob
[destlen
+1] = '\0';
680 *converted_size
= destlen
;
686 * Conversion not supported. This is actually an error, but there are so
687 * many misconfigured iconv systems and smb.conf's out there we can't just
688 * fail. Do a very bad conversion instead.... JRA.
692 if (o_len
== 0 || i_len
== 0)
695 if (((from
== CH_UTF16LE
)||(from
== CH_UTF16BE
)) &&
696 ((to
!= CH_UTF16LE
)||(to
!= CH_UTF16BE
))) {
697 /* Can't convert from utf16 any endian to multibyte.
698 Replace with the default fail char.
705 *outbuf
= lp_failed_convert_char();
714 if (o_len
== 0 || i_len
== 0)
717 /* Keep trying with the next char... */
720 } else if (from
!= CH_UTF16LE
&& from
!= CH_UTF16BE
&& to
== CH_UTF16LE
) {
721 /* Can't convert to UTF16LE - just widen by adding the
722 default fail char then zero.
727 outbuf
[0] = lp_failed_convert_char();
736 if (o_len
== 0 || i_len
== 0)
739 /* Keep trying with the next char... */
742 } else if (from
!= CH_UTF16LE
&& from
!= CH_UTF16BE
&&
743 to
!= CH_UTF16LE
&& to
!= CH_UTF16BE
) {
744 /* Failed multibyte to multibyte. Just copy the default fail char and
746 outbuf
[0] = lp_failed_convert_char();
754 if (o_len
== 0 || i_len
== 0)
757 /* Keep trying with the next char... */
761 /* Keep compiler happy.... */
767 size_t unix_strupper(const char *src
, size_t srclen
, char *dest
, size_t destlen
)
772 if (!push_ucs2_talloc(talloc_tos(), &buffer
, src
, &size
)) {
776 if (!strupper_w(buffer
) && (dest
== src
)) {
781 size
= convert_string(CH_UTF16LE
, CH_UNIX
, buffer
, size
, dest
, destlen
, True
);
787 talloc_strdup() a unix string to upper case.
790 char *talloc_strdup_upper(TALLOC_CTX
*ctx
, const char *s
)
792 char *out_buffer
= talloc_strdup(ctx
,s
);
793 const unsigned char *p
= (const unsigned char *)s
;
794 unsigned char *q
= (unsigned char *)out_buffer
;
800 /* this is quite a common operation, so we want it to be
801 fast. We optimise for the ascii case, knowing that all our
802 supported multi-byte character sets are ascii-compatible
803 (ie. they match for the first 128 chars) */
808 *q
++ = toupper_ascii_fast(*p
);
814 size_t converted_size
, converted_size2
;
815 smb_ucs2_t
*ubuf
= NULL
;
817 /* We're not using the ascii buffer above. */
818 TALLOC_FREE(out_buffer
);
820 if (!convert_string_talloc(ctx
, CH_UNIX
, CH_UTF16LE
, s
,
821 strlen(s
)+1, (void *)&ubuf
,
822 &converted_size
, True
))
829 if (!convert_string_talloc(ctx
, CH_UTF16LE
, CH_UNIX
, ubuf
,
830 converted_size
, (void *)&out_buffer
,
831 &converted_size2
, True
))
837 /* Don't need the intermediate buffer
846 char *strupper_talloc(TALLOC_CTX
*ctx
, const char *s
) {
847 return talloc_strdup_upper(ctx
, s
);
851 size_t unix_strlower(const char *src
, size_t srclen
, char *dest
, size_t destlen
)
854 smb_ucs2_t
*buffer
= NULL
;
856 if (!convert_string_talloc(talloc_tos(), CH_UNIX
, CH_UTF16LE
, src
, srclen
,
857 (void **)(void *)&buffer
, &size
,
860 smb_panic("failed to create UCS2 buffer");
862 if (!strlower_w(buffer
) && (dest
== src
)) {
866 size
= convert_string(CH_UTF16LE
, CH_UNIX
, buffer
, size
, dest
, destlen
, True
);
872 char *talloc_strdup_lower(TALLOC_CTX
*ctx
, const char *s
)
874 size_t converted_size
;
875 smb_ucs2_t
*buffer
= NULL
;
878 if (!push_ucs2_talloc(ctx
, &buffer
, s
, &converted_size
)) {
884 if (!pull_ucs2_talloc(ctx
, &out_buffer
, buffer
, &converted_size
)) {
894 char *strlower_talloc(TALLOC_CTX
*ctx
, const char *s
) {
895 return talloc_strdup_lower(ctx
, s
);
898 size_t ucs2_align(const void *base_ptr
, const void *p
, int flags
)
900 if (flags
& (STR_NOALIGN
|STR_ASCII
))
902 return PTR_DIFF(p
, base_ptr
) & 1;
907 * Copy a string from a char* unix src to a dos codepage string destination.
909 * @return the number of bytes occupied by the string in the destination.
911 * @param flags can include
913 * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
914 * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
917 * @param dest_len the maximum length in bytes allowed in the
920 size_t push_ascii(void *dest
, const char *src
, size_t dest_len
, int flags
)
922 size_t src_len
= strlen(src
);
926 /* No longer allow a length of -1. */
927 if (dest_len
== (size_t)-1) {
928 smb_panic("push_ascii - dest_len == -1");
931 if (flags
& STR_UPPER
) {
932 tmpbuf
= SMB_STRDUP(src
);
934 smb_panic("malloc fail");
940 if (flags
& (STR_TERMINATE
| STR_TERMINATE_ASCII
)) {
944 ret
= convert_string(CH_UNIX
, CH_DOS
, src
, src_len
, dest
, dest_len
, True
);
945 if (ret
== (size_t)-1 &&
946 (flags
& (STR_TERMINATE
| STR_TERMINATE_ASCII
))
948 ((char *)dest
)[0] = '\0';
954 size_t push_ascii_fstring(void *dest
, const char *src
)
956 return push_ascii(dest
, src
, sizeof(fstring
), STR_TERMINATE
);
959 /********************************************************************
960 Push an nstring - ensure null terminated. Written by
961 moriyama@miraclelinux.com (MORIYAMA Masayuki).
962 ********************************************************************/
964 size_t push_ascii_nstring(void *dest
, const char *src
)
966 size_t i
, buffer_len
, dest_len
;
970 if (!push_ucs2_talloc(talloc_tos(), &buffer
, src
, &buffer_len
)) {
971 smb_panic("failed to create UCS2 buffer");
974 /* We're using buffer_len below to count ucs2 characters, not bytes. */
975 buffer_len
/= sizeof(smb_ucs2_t
);
978 for (i
= 0; buffer
[i
] != 0 && (i
< buffer_len
); i
++) {
979 unsigned char mb
[10];
980 /* Convert one smb_ucs2_t character at a time. */
981 size_t mb_len
= convert_string(CH_UTF16LE
, CH_DOS
, buffer
+i
, sizeof(smb_ucs2_t
), mb
, sizeof(mb
), False
);
982 if ((mb_len
!= (size_t)-1) && (dest_len
+ mb_len
<= MAX_NETBIOSNAME_LEN
- 1)) {
983 memcpy((char *)dest
+ dest_len
, mb
, mb_len
);
990 ((char *)dest
)[dest_len
] = '\0';
997 /********************************************************************
998 Push and malloc an ascii string. src and dest null terminated.
999 ********************************************************************/
1001 bool push_ascii_talloc(TALLOC_CTX
*mem_ctx
, char **dest
, const char *src
, size_t *converted_size
)
1003 size_t src_len
= strlen(src
)+1;
1006 return convert_string_talloc(mem_ctx
, CH_UNIX
, CH_DOS
, src
, src_len
,
1007 (void **)dest
, converted_size
, True
);
1011 * Copy a string from a dos codepage source to a unix char* destination.
1013 * The resulting string in "dest" is always null terminated.
1015 * @param flags can have:
1017 * <dt>STR_TERMINATE</dt>
1018 * <dd>STR_TERMINATE means the string in @p src
1019 * is null terminated, and src_len is ignored.</dd>
1022 * @param src_len is the length of the source area in bytes.
1023 * @returns the number of bytes occupied by the string in @p src.
1025 size_t pull_ascii(char *dest
, const void *src
, size_t dest_len
, size_t src_len
, int flags
)
1029 if (dest_len
== (size_t)-1) {
1030 /* No longer allow dest_len of -1. */
1031 smb_panic("pull_ascii - invalid dest_len of -1");
1034 if (flags
& STR_TERMINATE
) {
1035 if (src_len
== (size_t)-1) {
1036 src_len
= strlen((const char *)src
) + 1;
1038 size_t len
= strnlen((const char *)src
, src_len
);
1045 ret
= convert_string(CH_DOS
, CH_UNIX
, src
, src_len
, dest
, dest_len
, True
);
1046 if (ret
== (size_t)-1) {
1051 if (dest_len
&& ret
) {
1052 /* Did we already process the terminating zero ? */
1053 if (dest
[MIN(ret
-1, dest_len
-1)] != 0) {
1054 dest
[MIN(ret
, dest_len
-1)] = 0;
1064 * Copy a string from a dos codepage source to a unix char* destination.
1067 * The resulting string in "dest" is always null terminated.
1069 * @param flags can have:
1071 * <dt>STR_TERMINATE</dt>
1072 * <dd>STR_TERMINATE means the string in @p src
1073 * is null terminated, and src_len is ignored.</dd>
1076 * @param src_len is the length of the source area in bytes.
1077 * @returns the number of bytes occupied by the string in @p src.
1080 static size_t pull_ascii_base_talloc(TALLOC_CTX
*ctx
,
1095 if (flags
& STR_TERMINATE
) {
1096 if (src_len
== (size_t)-1) {
1097 src_len
= strlen((const char *)src
) + 1;
1099 size_t len
= strnlen((const char *)src
, src_len
);
1104 /* Ensure we don't use an insane length from the client. */
1105 if (src_len
>= 1024*1024) {
1106 char *msg
= talloc_asprintf(ctx
,
1107 "Bad src length (%u) in "
1108 "pull_ascii_base_talloc",
1109 (unsigned int)src_len
);
1113 /* Can't have an unlimited length
1114 * non STR_TERMINATE'd.
1116 if (src_len
== (size_t)-1) {
1122 /* src_len != -1 here. */
1124 if (!convert_string_talloc(ctx
, CH_DOS
, CH_UNIX
, src
, src_len
, &dest
,
1129 if (dest_len
&& dest
) {
1130 /* Did we already process the terminating zero ? */
1131 if (dest
[dest_len
-1] != 0) {
1132 size_t size
= talloc_get_size(dest
);
1133 /* Have we got space to append the '\0' ? */
1134 if (size
<= dest_len
) {
1136 dest
= TALLOC_REALLOC_ARRAY(ctx
, dest
, char,
1140 dest_len
= (size_t)-1;
1145 dest
[dest_len
] = '\0';
1156 size_t pull_ascii_fstring(char *dest
, const void *src
)
1158 return pull_ascii(dest
, src
, sizeof(fstring
), -1, STR_TERMINATE
);
1161 /* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1163 size_t pull_ascii_nstring(char *dest
, size_t dest_len
, const void *src
)
1165 return pull_ascii(dest
, src
, dest_len
, sizeof(nstring
)-1, STR_TERMINATE
);
1169 * Copy a string from a char* src to a unicode destination.
1171 * @returns the number of bytes occupied by the string in the destination.
1173 * @param flags can have:
1176 * <dt>STR_TERMINATE <dd>means include the null termination.
1177 * <dt>STR_UPPER <dd>means uppercase in the destination.
1178 * <dt>STR_NOALIGN <dd>means don't do alignment.
1181 * @param dest_len is the maximum length allowed in the
1185 size_t push_ucs2(const void *base_ptr
, void *dest
, const char *src
, size_t dest_len
, int flags
)
1191 if (dest_len
== (size_t)-1) {
1192 /* No longer allow dest_len of -1. */
1193 smb_panic("push_ucs2 - invalid dest_len of -1");
1196 if (flags
& STR_TERMINATE
)
1197 src_len
= (size_t)-1;
1199 src_len
= strlen(src
);
1201 if (ucs2_align(base_ptr
, dest
, flags
)) {
1203 dest
= (void *)((char *)dest
+ 1);
1209 /* ucs2 is always a multiple of 2 bytes */
1212 ret
= convert_string(CH_UNIX
, CH_UTF16LE
, src
, src_len
, dest
, dest_len
, True
);
1213 if (ret
== (size_t)-1) {
1214 if ((flags
& STR_TERMINATE
) &&
1224 if (flags
& STR_UPPER
) {
1225 smb_ucs2_t
*dest_ucs2
= (smb_ucs2_t
*)dest
;
1228 /* We check for i < (ret / 2) below as the dest string isn't null
1229 terminated if STR_TERMINATE isn't set. */
1231 for (i
= 0; i
< (ret
/ 2) && i
< (dest_len
/ 2) && dest_ucs2
[i
]; i
++) {
1232 smb_ucs2_t v
= toupper_w(dest_ucs2
[i
]);
1233 if (v
!= dest_ucs2
[i
]) {
1244 * Copy a string from a unix char* src to a UCS2 destination,
1245 * allocating a buffer using talloc().
1247 * @param dest always set at least to NULL
1248 * @parm converted_size set to the number of bytes occupied by the string in
1249 * the destination on success.
1251 * @return true if new buffer was correctly allocated, and string was
1254 bool push_ucs2_talloc(TALLOC_CTX
*ctx
, smb_ucs2_t
**dest
, const char *src
,
1255 size_t *converted_size
)
1257 size_t src_len
= strlen(src
)+1;
1260 return convert_string_talloc(ctx
, CH_UNIX
, CH_UTF16LE
, src
, src_len
,
1261 (void **)dest
, converted_size
, True
);
1266 Copy a string from a char* src to a UTF-8 destination.
1267 Return the number of bytes occupied by the string in the destination
1269 STR_TERMINATE means include the null termination
1270 STR_UPPER means uppercase in the destination
1271 dest_len is the maximum length allowed in the destination. If dest_len
1272 is -1 then no maxiumum is used.
1275 static size_t push_utf8(void *dest
, const char *src
, size_t dest_len
, int flags
)
1279 char *tmpbuf
= NULL
;
1281 if (dest_len
== (size_t)-1) {
1282 /* No longer allow dest_len of -1. */
1283 smb_panic("push_utf8 - invalid dest_len of -1");
1286 if (flags
& STR_UPPER
) {
1287 tmpbuf
= strupper_talloc(talloc_tos(), src
);
1292 src_len
= strlen(src
);
1295 src_len
= strlen(src
);
1296 if (flags
& STR_TERMINATE
) {
1300 ret
= convert_string(CH_UNIX
, CH_UTF8
, src
, src_len
, dest
, dest_len
, True
);
1301 TALLOC_FREE(tmpbuf
);
1305 size_t push_utf8_fstring(void *dest
, const char *src
)
1307 return push_utf8(dest
, src
, sizeof(fstring
), STR_TERMINATE
);
1311 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1313 * @param dest always set at least to NULL
1314 * @parm converted_size set to the number of bytes occupied by the string in
1315 * the destination on success.
1317 * @return true if new buffer was correctly allocated, and string was
1321 bool push_utf8_talloc(TALLOC_CTX
*ctx
, char **dest
, const char *src
,
1322 size_t *converted_size
)
1324 size_t src_len
= strlen(src
)+1;
1327 return convert_string_talloc(ctx
, CH_UNIX
, CH_UTF8
, src
, src_len
,
1328 (void**)dest
, converted_size
, True
);
1332 Copy a string from a ucs2 source to a unix char* destination.
1334 STR_TERMINATE means the string in src is null terminated.
1335 STR_NOALIGN means don't try to align.
1336 if STR_TERMINATE is set then src_len is ignored if it is -1.
1337 src_len is the length of the source area in bytes
1338 Return the number of bytes occupied by the string in src.
1339 The resulting string in "dest" is always null terminated.
1342 size_t pull_ucs2(const void *base_ptr
, char *dest
, const void *src
, size_t dest_len
, size_t src_len
, int flags
)
1346 if (dest_len
== (size_t)-1) {
1347 /* No longer allow dest_len of -1. */
1348 smb_panic("pull_ucs2 - invalid dest_len of -1");
1352 if (dest
&& dest_len
> 0) {
1358 if (ucs2_align(base_ptr
, src
, flags
)) {
1359 src
= (const void *)((const char *)src
+ 1);
1360 if (src_len
!= (size_t)-1)
1364 if (flags
& STR_TERMINATE
) {
1365 /* src_len -1 is the default for null terminated strings. */
1366 if (src_len
!= (size_t)-1) {
1367 size_t len
= strnlen_w((const smb_ucs2_t
*)src
,
1369 if (len
< src_len
/2)
1375 /* ucs2 is always a multiple of 2 bytes */
1376 if (src_len
!= (size_t)-1)
1379 ret
= convert_string(CH_UTF16LE
, CH_UNIX
, src
, src_len
, dest
, dest_len
, True
);
1380 if (ret
== (size_t)-1) {
1385 if (src_len
== (size_t)-1)
1388 if (dest_len
&& ret
) {
1389 /* Did we already process the terminating zero ? */
1390 if (dest
[MIN(ret
-1, dest_len
-1)] != 0) {
1391 dest
[MIN(ret
, dest_len
-1)] = 0;
1401 Copy a string from a ucs2 source to a unix char* destination.
1402 Talloc version with a base pointer.
1403 Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1406 STR_TERMINATE means the string in src is null terminated.
1407 STR_NOALIGN means don't try to align.
1408 if STR_TERMINATE is set then src_len is ignored if it is -1.
1409 src_len is the length of the source area in bytes
1410 Return the number of bytes occupied by the string in src.
1411 The resulting string in "dest" is always null terminated.
1414 size_t pull_ucs2_base_talloc(TALLOC_CTX
*ctx
,
1415 const void *base_ptr
,
1427 /* Ensure we never use the braindead "malloc" varient. */
1429 smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1437 if (ucs2_align(base_ptr
, src
, flags
)) {
1438 src
= (const void *)((const char *)src
+ 1);
1439 if (src_len
!= (size_t)-1)
1443 if (flags
& STR_TERMINATE
) {
1444 /* src_len -1 is the default for null terminated strings. */
1445 if (src_len
!= (size_t)-1) {
1446 size_t len
= strnlen_w((const smb_ucs2_t
*)src
,
1448 if (len
< src_len
/2)
1453 * src_len == -1 - alloc interface won't take this
1454 * so we must calculate.
1456 src_len
= (strlen_w((const smb_ucs2_t
*)src
)+1)*sizeof(smb_ucs2_t
);
1458 /* Ensure we don't use an insane length from the client. */
1459 if (src_len
>= 1024*1024) {
1460 smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1463 /* Can't have an unlimited length
1464 * non STR_TERMINATE'd.
1466 if (src_len
== (size_t)-1) {
1472 /* src_len != -1 here. */
1474 /* ucs2 is always a multiple of 2 bytes */
1477 if (!convert_string_talloc(ctx
, CH_UTF16LE
, CH_UNIX
, src
, src_len
,
1478 (void *)&dest
, &dest_len
, True
)) {
1483 /* Did we already process the terminating zero ? */
1484 if (dest
[dest_len
-1] != 0) {
1485 size_t size
= talloc_get_size(dest
);
1486 /* Have we got space to append the '\0' ? */
1487 if (size
<= dest_len
) {
1489 dest
= TALLOC_REALLOC_ARRAY(ctx
, dest
, char,
1493 dest_len
= (size_t)-1;
1498 dest
[dest_len
] = '\0';
1509 size_t pull_ucs2_fstring(char *dest
, const void *src
)
1511 return pull_ucs2(NULL
, dest
, src
, sizeof(fstring
), -1, STR_TERMINATE
);
1515 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1517 * @param dest always set at least to NULL
1518 * @parm converted_size set to the number of bytes occupied by the string in
1519 * the destination on success.
1521 * @return true if new buffer was correctly allocated, and string was
1525 bool pull_ucs2_talloc(TALLOC_CTX
*ctx
, char **dest
, const smb_ucs2_t
*src
,
1526 size_t *converted_size
)
1528 size_t src_len
= (strlen_w(src
)+1) * sizeof(smb_ucs2_t
);
1531 return convert_string_talloc(ctx
, CH_UTF16LE
, CH_UNIX
, src
, src_len
,
1532 (void **)dest
, converted_size
, True
);
1536 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1538 * @param dest always set at least to NULL
1539 * @parm converted_size set to the number of bytes occupied by the string in
1540 * the destination on success.
1542 * @return true if new buffer was correctly allocated, and string was
1546 bool pull_utf8_talloc(TALLOC_CTX
*ctx
, char **dest
, const char *src
,
1547 size_t *converted_size
)
1549 size_t src_len
= strlen(src
)+1;
1552 return convert_string_talloc(ctx
, CH_UTF8
, CH_UNIX
, src
, src_len
,
1553 (void **)dest
, converted_size
, True
);
1558 * Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1560 * @param dest always set at least to NULL
1561 * @parm converted_size set to the number of bytes occupied by the string in
1562 * the destination on success.
1564 * @return true if new buffer was correctly allocated, and string was
1568 bool pull_ascii_talloc(TALLOC_CTX
*ctx
, char **dest
, const char *src
,
1569 size_t *converted_size
)
1571 size_t src_len
= strlen(src
)+1;
1574 return convert_string_talloc(ctx
, CH_DOS
, CH_UNIX
, src
, src_len
,
1575 (void **)dest
, converted_size
, True
);
1579 Copy a string from a char* src to a unicode or ascii
1580 dos codepage destination choosing unicode or ascii based on the
1582 Return the number of bytes occupied by the string in the destination.
1584 STR_TERMINATE means include the null termination.
1585 STR_UPPER means uppercase in the destination.
1586 STR_ASCII use ascii even with unicode packet.
1587 STR_NOALIGN means don't do alignment.
1588 dest_len is the maximum length allowed in the destination. If dest_len
1589 is -1 then no maxiumum is used.
1592 size_t push_string_check_fn(const char *function
, unsigned int line
,
1593 void *dest
, const char *src
,
1594 size_t dest_len
, int flags
)
1597 /* We really need to zero fill here, not clobber
1598 * region, as we want to ensure that valgrind thinks
1599 * all of the outgoing buffer has been written to
1600 * so a send() or write() won't trap an error.
1604 clobber_region(function
, line
, dest
, dest_len
);
1606 memset(dest
, '\0', dest_len
);
1610 if (!(flags
& STR_ASCII
) && (flags
& STR_UNICODE
)) {
1611 return push_ucs2(NULL
, dest
, src
, dest_len
, flags
);
1613 return push_ascii(dest
, src
, dest_len
, flags
);
1618 Copy a string from a char* src to a unicode or ascii
1619 dos codepage destination choosing unicode or ascii based on the
1620 flags in the SMB buffer starting at base_ptr.
1621 Return the number of bytes occupied by the string in the destination.
1623 STR_TERMINATE means include the null termination.
1624 STR_UPPER means uppercase in the destination.
1625 STR_ASCII use ascii even with unicode packet.
1626 STR_NOALIGN means don't do alignment.
1627 dest_len is the maximum length allowed in the destination. If dest_len
1628 is -1 then no maxiumum is used.
1631 size_t push_string_base(const char *function
, unsigned int line
,
1632 const char *base
, uint16 flags2
,
1633 void *dest
, const char *src
,
1634 size_t dest_len
, int flags
)
1637 /* We really need to zero fill here, not clobber
1638 * region, as we want to ensure that valgrind thinks
1639 * all of the outgoing buffer has been written to
1640 * so a send() or write() won't trap an error.
1644 clobber_region(function
, line
, dest
, dest_len
);
1646 memset(dest
, '\0', dest_len
);
1650 if (!(flags
& STR_ASCII
) && \
1651 ((flags
& STR_UNICODE
|| \
1652 (flags2
& FLAGS2_UNICODE_STRINGS
)))) {
1653 return push_ucs2(base
, dest
, src
, dest_len
, flags
);
1655 return push_ascii(dest
, src
, dest_len
, flags
);
1659 Copy a string from a char* src to a unicode or ascii
1660 dos codepage destination choosing unicode or ascii based on the
1662 Return the number of bytes occupied by the string in the destination.
1664 STR_TERMINATE means include the null termination.
1665 STR_UPPER means uppercase in the destination.
1666 STR_ASCII use ascii even with unicode packet.
1667 STR_NOALIGN means don't do alignment.
1668 dest_len is the maximum length allowed in the destination. If dest_len
1669 is -1 then no maxiumum is used.
1672 ssize_t
push_string(void *dest
, const char *src
, size_t dest_len
, int flags
)
1676 /* We really need to zero fill here, not clobber
1677 * region, as we want to ensure that valgrind thinks
1678 * all of the outgoing buffer has been written to
1679 * so a send() or write() won't trap an error.
1682 memset(dest
, '\0', dest_len
);
1685 if (!(flags
& STR_ASCII
) && \
1686 (flags
& STR_UNICODE
)) {
1687 ret
= push_ucs2(NULL
, dest
, src
, dest_len
, flags
);
1689 ret
= push_ascii(dest
, src
, dest_len
, flags
);
1691 if (ret
== (size_t)-1) {
1698 Copy a string from a unicode or ascii source (depending on
1699 the packet flags) to a char* destination.
1701 STR_TERMINATE means the string in src is null terminated.
1702 STR_UNICODE means to force as unicode.
1703 STR_ASCII use ascii even with unicode packet.
1704 STR_NOALIGN means don't do alignment.
1705 if STR_TERMINATE is set then src_len is ignored is it is -1
1706 src_len is the length of the source area in bytes.
1707 Return the number of bytes occupied by the string in src.
1708 The resulting string in "dest" is always null terminated.
1711 size_t pull_string_fn(const char *function
,
1713 const void *base_ptr
,
1722 clobber_region(function
, line
, dest
, dest_len
);
1725 if ((base_ptr
== NULL
) && ((flags
& (STR_ASCII
|STR_UNICODE
)) == 0)) {
1726 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1730 if (!(flags
& STR_ASCII
) && \
1731 ((flags
& STR_UNICODE
|| \
1732 (smb_flags2
& FLAGS2_UNICODE_STRINGS
)))) {
1733 return pull_ucs2(base_ptr
, dest
, src
, dest_len
, src_len
, flags
);
1735 return pull_ascii(dest
, src
, dest_len
, src_len
, flags
);
1739 Copy a string from a unicode or ascii source (depending on
1740 the packet flags) to a char* destination.
1741 Variant that uses talloc.
1743 STR_TERMINATE means the string in src is null terminated.
1744 STR_UNICODE means to force as unicode.
1745 STR_ASCII use ascii even with unicode packet.
1746 STR_NOALIGN means don't do alignment.
1747 if STR_TERMINATE is set then src_len is ignored is it is -1
1748 src_len is the length of the source area in bytes.
1749 Return the number of bytes occupied by the string in src.
1750 The resulting string in "dest" is always null terminated.
1753 size_t pull_string_talloc_fn(const char *function
,
1756 const void *base_ptr
,
1763 if ((base_ptr
== NULL
) && ((flags
& (STR_ASCII
|STR_UNICODE
)) == 0)) {
1764 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1768 if (!(flags
& STR_ASCII
) && \
1769 ((flags
& STR_UNICODE
|| \
1770 (smb_flags2
& FLAGS2_UNICODE_STRINGS
)))) {
1771 return pull_ucs2_base_talloc(ctx
,
1778 return pull_ascii_base_talloc(ctx
,
1786 size_t align_string(const void *base_ptr
, const char *p
, int flags
)
1788 if (!(flags
& STR_ASCII
) && \
1789 ((flags
& STR_UNICODE
|| \
1790 (SVAL(base_ptr
, smb_flg2
) & FLAGS2_UNICODE_STRINGS
)))) {
1791 return ucs2_align(base_ptr
, p
, flags
);
1797 Return the unicode codepoint for the next multi-byte CH_UNIX character
1798 in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1800 Also return the number of bytes consumed (which tells the caller
1801 how many bytes to skip to get to the next CH_UNIX character).
1803 Return INVALID_CODEPOINT if the next character cannot be converted.
1806 codepoint_t
next_codepoint(const char *str
, size_t *size
)
1808 /* It cannot occupy more than 4 bytes in UTF16 format */
1810 smb_iconv_t descriptor
;
1816 if ((str
[0] & 0x80) == 0) {
1818 return (codepoint_t
)str
[0];
1821 /* We assume that no multi-byte character can take
1822 more than 5 bytes. This is OK as we only
1823 support codepoints up to 1M */
1825 ilen_orig
= strnlen(str
, 5);
1828 lazy_initialize_conv();
1830 descriptor
= conv_handles
[CH_UNIX
][CH_UTF16LE
];
1831 if (descriptor
== (smb_iconv_t
)-1 || descriptor
== (smb_iconv_t
)0) {
1833 return INVALID_CODEPOINT
;
1836 /* This looks a little strange, but it is needed to cope
1837 with codepoints above 64k which are encoded as per RFC2781. */
1839 outbuf
= (char *)buf
;
1840 smb_iconv(descriptor
, &str
, &ilen
, &outbuf
, &olen
);
1842 /* We failed to convert to a 2 byte character.
1843 See if we can convert to a 4 UTF16-LE byte char encoding.
1846 outbuf
= (char *)buf
;
1847 smb_iconv(descriptor
, &str
, &ilen
, &outbuf
, &olen
);
1849 /* We didn't convert any bytes */
1851 return INVALID_CODEPOINT
;
1858 *size
= ilen_orig
- ilen
;
1861 /* 2 byte, UTF16-LE encoded value. */
1862 return (codepoint_t
)SVAL(buf
, 0);
1865 /* Decode a 4 byte UTF16-LE character manually.
1866 See RFC2871 for the encoding machanism.
1868 codepoint_t w1
= SVAL(buf
,0) & ~0xD800;
1869 codepoint_t w2
= SVAL(buf
,2) & ~0xDC00;
1871 return (codepoint_t
)0x10000 +
1875 /* no other length is valid */
1876 return INVALID_CODEPOINT
;
1880 push a single codepoint into a CH_UNIX string the target string must
1881 be able to hold the full character, which is guaranteed if it is at
1882 least 5 bytes in size. The caller may pass less than 5 bytes if they
1883 are sure the character will fit (for example, you can assume that
1884 uppercase/lowercase of a character will not add more than 1 byte)
1886 return the number of bytes occupied by the CH_UNIX character, or
1889 _PUBLIC_ ssize_t
push_codepoint(char *str
, codepoint_t c
)
1891 smb_iconv_t descriptor
;
1901 lazy_initialize_conv();
1903 descriptor
= conv_handles
[CH_UNIX
][CH_UTF16LE
];
1904 if (descriptor
== (smb_iconv_t
)-1 || descriptor
== (smb_iconv_t
)0) {
1911 inbuf
= (char *)buf
;
1913 smb_iconv(descriptor
, &inbuf
, &ilen
, &str
, &olen
);
1922 buf
[0] = (c
>>10) & 0xFF;
1923 buf
[1] = (c
>>18) | 0xd8;
1925 buf
[3] = ((c
>>8) & 0x3) | 0xdc;
1929 inbuf
= (char *)buf
;
1931 smb_iconv(descriptor
, &inbuf
, &ilen
, &str
, &olen
);