2 Unix SMB/CIFS implementation.
3 Character set conversion Extensions
4 Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5 Copyright (C) Andrew Tridgell 2001
6 Copyright (C) Simo Sorce 2001
7 Copyright (C) Martin Pool 2003
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26 /* We can parameterize this if someone complains.... JRA. */
28 char lp_failed_convert_char(void)
36 * @brief Character-set conversion routines built on our iconv.
38 * @note Samba's internal character set (at least in the 3.0 series)
39 * is always the same as the one for the Unix filesystem. It is
40 * <b>not</b> necessarily UTF-8 and may be different on machines that
41 * need i18n filenames to be compatible with Unix software. It does
42 * have to be a superset of ASCII. All multibyte sequences must start
43 * with a byte with the high bit set.
49 static smb_iconv_t conv_handles
[NUM_CHARSETS
][NUM_CHARSETS
];
50 static BOOL conv_silent
; /* Should we do a debug if the conversion fails ? */
53 * Return the name of a charset to give to iconv().
55 static const char *charset_name(charset_t ch
)
57 const char *ret
= NULL
;
59 if (ch
== CH_UCS2
) ret
= "UTF-16LE";
60 else if (ch
== CH_UNIX
) ret
= lp_unix_charset();
61 else if (ch
== CH_DOS
) ret
= lp_dos_charset();
62 else if (ch
== CH_DISPLAY
) ret
= lp_display_charset();
63 else if (ch
== CH_UTF8
) ret
= "UTF8";
65 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)
66 if (ret
&& !strcmp(ret
, "LOCALE")) {
67 const char *ln
= NULL
;
70 setlocale(LC_ALL
, "");
72 ln
= nl_langinfo(CODESET
);
74 /* Check whether the charset name is supported
76 smb_iconv_t handle
= smb_iconv_open(ln
,"UCS-2LE");
77 if (handle
== (smb_iconv_t
) -1) {
78 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln
));
81 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln
));
82 smb_iconv_close(handle
);
88 /* We set back the locale to C to get ASCII-compatible toupper/lower functions.
89 For now we do not need any other POSIX localisations anyway. When we should
90 really need localized string functions one day we need to write our own
93 setlocale(LC_ALL
, "C");
98 if (!ret
|| !*ret
) ret
= "ASCII";
102 void lazy_initialize_conv(void)
104 static int initialized
= False
;
114 * Initialize iconv conversion descriptors.
116 * This is called the first time it is needed, and also called again
117 * every time the configuration is reloaded, because the charset or
118 * codepage might have changed.
120 void init_iconv(void)
123 BOOL did_reload
= False
;
125 /* so that charset_name() works we need to get the UNIX<->UCS2 going
127 if (!conv_handles
[CH_UNIX
][CH_UCS2
])
128 conv_handles
[CH_UNIX
][CH_UCS2
] = smb_iconv_open(charset_name(CH_UCS2
), "ASCII");
130 if (!conv_handles
[CH_UCS2
][CH_UNIX
])
131 conv_handles
[CH_UCS2
][CH_UNIX
] = smb_iconv_open("ASCII", charset_name(CH_UCS2
));
133 for (c1
=0;c1
<NUM_CHARSETS
;c1
++) {
134 for (c2
=0;c2
<NUM_CHARSETS
;c2
++) {
135 const char *n1
= charset_name((charset_t
)c1
);
136 const char *n2
= charset_name((charset_t
)c2
);
137 if (conv_handles
[c1
][c2
] &&
138 strcmp(n1
, conv_handles
[c1
][c2
]->from_name
) == 0 &&
139 strcmp(n2
, conv_handles
[c1
][c2
]->to_name
) == 0)
144 if (conv_handles
[c1
][c2
])
145 smb_iconv_close(conv_handles
[c1
][c2
]);
147 conv_handles
[c1
][c2
] = smb_iconv_open(n2
,n1
);
148 if (conv_handles
[c1
][c2
] == (smb_iconv_t
)-1) {
149 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
150 charset_name((charset_t
)c1
), charset_name((charset_t
)c2
)));
157 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
159 conv_handles
[c1
][c2
] = smb_iconv_open(n2
,n1
);
160 if (!conv_handles
[c1
][c2
]) {
161 DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1
, n2
));
162 smb_panic("init_iconv: conv_handle initialization failed.");
169 /* XXX: Does this really get called every time the dos
170 * codepage changes? */
171 /* XXX: Is the did_reload test too strict? */
173 init_doschar_table();
180 * Convert string from one encoding to another, making error checking etc
181 * Slow path version - uses (slow) iconv.
183 * @param src pointer to source string (multibyte or singlebyte)
184 * @param srclen length of the source string in bytes
185 * @param dest pointer to destination string (multibyte or singlebyte)
186 * @param destlen maximal length allowed for string
187 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
188 * @returns the number of bytes occupied in the destination
190 * Ensure the srclen contains the terminating zero.
194 static size_t convert_string_internal(charset_t from
, charset_t to
,
195 void const *src
, size_t srclen
,
196 void *dest
, size_t destlen
, BOOL allow_bad_conv
)
200 const char* inbuf
= (const char*)src
;
201 char* outbuf
= (char*)dest
;
202 smb_iconv_t descriptor
;
204 lazy_initialize_conv();
206 descriptor
= conv_handles
[from
][to
];
208 if (srclen
== (size_t)-1) {
209 if (from
== CH_UCS2
) {
210 srclen
= (strlen_w((const smb_ucs2_t
*)src
)+1) * 2;
212 srclen
= strlen((const char *)src
)+1;
217 if (descriptor
== (smb_iconv_t
)-1 || descriptor
== (smb_iconv_t
)0) {
219 DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
228 retval
= smb_iconv(descriptor
, &inbuf
, &i_len
, &outbuf
, &o_len
);
229 if(retval
==(size_t)-1) {
230 const char *reason
="unknown error";
233 reason
="Incomplete multibyte sequence";
235 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason
,inbuf
));
240 reason
="No more room";
242 if (from
== CH_UNIX
) {
243 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
244 charset_name(from
), charset_name(to
),
245 (unsigned int)srclen
, (unsigned int)destlen
, (const char *)src
));
247 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
248 charset_name(from
), charset_name(to
),
249 (unsigned int)srclen
, (unsigned int)destlen
));
254 reason
="Illegal multibyte sequence";
256 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason
,inbuf
));
262 DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason
,inbuf
));
265 /* smb_panic(reason); */
267 return destlen
-o_len
;
272 * Conversion not supported. This is actually an error, but there are so
273 * many misconfigured iconv systems and smb.conf's out there we can't just
274 * fail. Do a very bad conversion instead.... JRA.
278 if (o_len
== 0 || i_len
== 0)
279 return destlen
- o_len
;
281 if (from
== CH_UCS2
&& to
!= CH_UCS2
) {
282 /* Can't convert from ucs2 to multibyte. Replace with the default fail char. */
284 return destlen
- o_len
;
286 *outbuf
= lp_failed_convert_char();
295 if (o_len
== 0 || i_len
== 0)
296 return destlen
- o_len
;
298 /* Keep trying with the next char... */
301 } else if (from
!= CH_UCS2
&& to
== CH_UCS2
) {
302 /* Can't convert to ucs2 - just widen by adding the default fail char then zero. */
304 return destlen
- o_len
;
306 outbuf
[0] = lp_failed_convert_char();
315 if (o_len
== 0 || i_len
== 0)
316 return destlen
- o_len
;
318 /* Keep trying with the next char... */
321 } else if (from
!= CH_UCS2
&& to
!= CH_UCS2
) {
322 /* Failed multibyte to multibyte. Just copy the default fail char and
324 outbuf
[0] = lp_failed_convert_char();
332 if (o_len
== 0 || i_len
== 0)
333 return destlen
- o_len
;
335 /* Keep trying with the next char... */
339 /* Keep compiler happy.... */
340 return destlen
- o_len
;
346 * Convert string from one encoding to another, making error checking etc
347 * Fast path version - handles ASCII first.
349 * @param src pointer to source string (multibyte or singlebyte)
350 * @param srclen length of the source string in bytes, or -1 for nul terminated.
351 * @param dest pointer to destination string (multibyte or singlebyte)
352 * @param destlen maximal length allowed for string - *NEVER* -1.
353 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
354 * @returns the number of bytes occupied in the destination
356 * Ensure the srclen contains the terminating zero.
358 * This function has been hand-tuned to provide a fast path.
359 * Don't change unless you really know what you are doing. JRA.
362 size_t convert_string(charset_t from
, charset_t to
,
363 void const *src
, size_t srclen
,
364 void *dest
, size_t destlen
, BOOL allow_bad_conv
)
367 * NB. We deliberately don't do a strlen here if srclen == -1.
368 * This is very expensive over millions of calls and is taken
369 * care of in the slow path in convert_string_internal. JRA.
373 SMB_ASSERT(destlen
!= (size_t)-1);
379 if (from
!= CH_UCS2
&& to
!= CH_UCS2
) {
380 const unsigned char *p
= (const unsigned char *)src
;
381 unsigned char *q
= (unsigned char *)dest
;
382 size_t slen
= srclen
;
383 size_t dlen
= destlen
;
384 unsigned char lastp
= '\0';
387 /* If all characters are ascii, fast path here. */
388 while (slen
&& dlen
) {
389 if ((lastp
= *p
) <= 0x7f) {
391 if (slen
!= (size_t)-1) {
399 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
402 return retval
+ convert_string_internal(from
, to
, p
, slen
, q
, dlen
, allow_bad_conv
);
407 /* Even if we fast path we should note if we ran out of room. */
408 if (((slen
!= (size_t)-1) && slen
) ||
409 ((slen
== (size_t)-1) && lastp
)) {
414 } else if (from
== CH_UCS2
&& to
!= CH_UCS2
) {
415 const unsigned char *p
= (const unsigned char *)src
;
416 unsigned char *q
= (unsigned char *)dest
;
418 size_t slen
= srclen
;
419 size_t dlen
= destlen
;
420 unsigned char lastp
= '\0';
422 /* If all characters are ascii, fast path here. */
423 while (((slen
== (size_t)-1) || (slen
>= 2)) && dlen
) {
424 if (((lastp
= *p
) <= 0x7f) && (p
[1] == 0)) {
426 if (slen
!= (size_t)-1) {
435 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
438 return retval
+ convert_string_internal(from
, to
, p
, slen
, q
, dlen
, allow_bad_conv
);
443 /* Even if we fast path we should note if we ran out of room. */
444 if (((slen
!= (size_t)-1) && slen
) ||
445 ((slen
== (size_t)-1) && lastp
)) {
450 } else if (from
!= CH_UCS2
&& to
== CH_UCS2
) {
451 const unsigned char *p
= (const unsigned char *)src
;
452 unsigned char *q
= (unsigned char *)dest
;
454 size_t slen
= srclen
;
455 size_t dlen
= destlen
;
456 unsigned char lastp
= '\0';
458 /* If all characters are ascii, fast path here. */
459 while (slen
&& (dlen
>= 2)) {
460 if ((lastp
= *p
) <= 0x7F) {
463 if (slen
!= (size_t)-1) {
471 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
474 return retval
+ convert_string_internal(from
, to
, p
, slen
, q
, dlen
, allow_bad_conv
);
479 /* Even if we fast path we should note if we ran out of room. */
480 if (((slen
!= (size_t)-1) && slen
) ||
481 ((slen
== (size_t)-1) && lastp
)) {
488 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
491 return convert_string_internal(from
, to
, src
, srclen
, dest
, destlen
, allow_bad_conv
);
495 * Convert between character sets, allocating a new buffer for the result.
497 * @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
498 * @param srclen length of source buffer.
499 * @param dest always set at least to NULL
500 * @note -1 is not accepted for srclen.
502 * @returns Size in bytes of the converted string; or -1 in case of error.
504 * Ensure the srclen contains the terminating zero.
506 * I hate the goto's in this function. It's embarressing.....
507 * There has to be a cleaner way to do this. JRA.
510 size_t convert_string_allocate(TALLOC_CTX
*ctx
, charset_t from
, charset_t to
,
511 void const *src
, size_t srclen
, void **dest
, BOOL allow_bad_conv
)
513 size_t i_len
, o_len
, destlen
= MAX(srclen
, 512);
515 const char *inbuf
= (const char *)src
;
516 char *outbuf
= NULL
, *ob
= NULL
;
517 smb_iconv_t descriptor
;
521 if (src
== NULL
|| srclen
== (size_t)-1)
526 lazy_initialize_conv();
528 descriptor
= conv_handles
[from
][to
];
530 if (descriptor
== (smb_iconv_t
)-1 || descriptor
== (smb_iconv_t
)0) {
532 DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
538 if ((destlen
*2) < destlen
) {
539 /* wrapped ! abort. */
541 DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
546 destlen
= destlen
* 2;
550 ob
= (char *)TALLOC_REALLOC(ctx
, ob
, destlen
);
552 ob
= (char *)SMB_REALLOC(ob
, destlen
);
555 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
567 retval
= smb_iconv(descriptor
,
570 if(retval
== (size_t)-1) {
571 const char *reason
="unknown error";
574 reason
="Incomplete multibyte sequence";
576 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason
,inbuf
));
583 reason
="Illegal multibyte sequence";
585 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason
,inbuf
));
591 DEBUG(0,("Conversion error: %s(%s)\n",reason
,inbuf
));
592 /* smb_panic(reason); */
598 destlen
= destlen
- o_len
;
600 *dest
= (char *)TALLOC_REALLOC(ctx
,ob
,destlen
);
602 *dest
= (char *)SMB_REALLOC(ob
,destlen
);
603 if (destlen
&& !*dest
) {
604 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
615 * Conversion not supported. This is actually an error, but there are so
616 * many misconfigured iconv systems and smb.conf's out there we can't just
617 * fail. Do a very bad conversion instead.... JRA.
621 if (o_len
== 0 || i_len
== 0)
624 if (from
== CH_UCS2
&& to
!= CH_UCS2
) {
625 /* Can't convert from ucs2 to multibyte. Just use the default fail char. */
630 *outbuf
= lp_failed_convert_char();
639 if (o_len
== 0 || i_len
== 0)
642 /* Keep trying with the next char... */
645 } else if (from
!= CH_UCS2
&& to
== CH_UCS2
) {
646 /* Can't convert to ucs2 - just widen by adding the default fail char then zero. */
650 outbuf
[0] = lp_failed_convert_char();
659 if (o_len
== 0 || i_len
== 0)
662 /* Keep trying with the next char... */
665 } else if (from
!= CH_UCS2
&& to
!= CH_UCS2
) {
666 /* Failed multibyte to multibyte. Just copy the default fail char and
668 outbuf
[0] = lp_failed_convert_char();
676 if (o_len
== 0 || i_len
== 0)
679 /* Keep trying with the next char... */
683 /* Keep compiler happy.... */
690 * Convert between character sets, allocating a new buffer using talloc for the result.
692 * @param srclen length of source buffer.
693 * @param dest always set at least to NULL
694 * @note -1 is not accepted for srclen.
696 * @returns Size in bytes of the converted string; or -1 in case of error.
698 static size_t convert_string_talloc(TALLOC_CTX
*ctx
, charset_t from
, charset_t to
,
699 void const *src
, size_t srclen
, void **dest
, BOOL allow_bad_conv
)
704 dest_len
=convert_string_allocate(ctx
, from
, to
, src
, srclen
, dest
, allow_bad_conv
);
705 if (dest_len
== (size_t)-1)
712 size_t unix_strupper(const char *src
, size_t srclen
, char *dest
, size_t destlen
)
717 size
= push_ucs2_allocate(&buffer
, src
);
718 if (size
== (size_t)-1) {
719 smb_panic("failed to create UCS2 buffer");
721 if (!strupper_w(buffer
) && (dest
== src
)) {
726 size
= convert_string(CH_UCS2
, CH_UNIX
, buffer
, size
, dest
, destlen
, True
);
732 strdup() a unix string to upper case.
736 char *strdup_upper(const char *s
)
739 const unsigned char *p
= (const unsigned char *)s
;
740 unsigned char *q
= (unsigned char *)out_buffer
;
742 /* this is quite a common operation, so we want it to be
743 fast. We optimise for the ascii case, knowing that all our
744 supported multi-byte character sets are ascii-compatible
745 (ie. they match for the first 128 chars) */
754 if (p
- ( const unsigned char *)s
>= sizeof(pstring
))
762 size
= convert_string(CH_UNIX
, CH_UCS2
, s
, -1, buffer
, sizeof(buffer
), True
);
763 if (size
== (size_t)-1) {
769 size
= convert_string(CH_UCS2
, CH_UNIX
, buffer
, -1, out_buffer
, sizeof(out_buffer
), True
);
770 if (size
== (size_t)-1) {
775 return SMB_STRDUP(out_buffer
);
778 size_t unix_strlower(const char *src
, size_t srclen
, char *dest
, size_t destlen
)
781 smb_ucs2_t
*buffer
= NULL
;
783 size
= convert_string_allocate(NULL
, CH_UNIX
, CH_UCS2
, src
, srclen
,
784 (void **) &buffer
, True
);
785 if (size
== (size_t)-1 || !buffer
) {
786 smb_panic("failed to create UCS2 buffer");
788 if (!strlower_w(buffer
) && (dest
== src
)) {
792 size
= convert_string(CH_UCS2
, CH_UNIX
, buffer
, size
, dest
, destlen
, True
);
798 strdup() a unix string to lower case.
801 char *strdup_lower(const char *s
)
804 smb_ucs2_t
*buffer
= NULL
;
807 size
= push_ucs2_allocate(&buffer
, s
);
808 if (size
== -1 || !buffer
) {
814 size
= pull_ucs2_allocate(&out_buffer
, buffer
);
817 if (size
== (size_t)-1) {
824 static size_t ucs2_align(const void *base_ptr
, const void *p
, int flags
)
826 if (flags
& (STR_NOALIGN
|STR_ASCII
))
828 return PTR_DIFF(p
, base_ptr
) & 1;
833 * Copy a string from a char* unix src to a dos codepage string destination.
835 * @return the number of bytes occupied by the string in the destination.
837 * @param flags can include
839 * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
840 * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
843 * @param dest_len the maximum length in bytes allowed in the
844 * destination. If @p dest_len is -1 then no maximum is used.
846 size_t push_ascii(void *dest
, const char *src
, size_t dest_len
, int flags
)
848 size_t src_len
= strlen(src
);
851 /* treat a pstring as "unlimited" length */
852 if (dest_len
== (size_t)-1)
853 dest_len
= sizeof(pstring
);
855 if (flags
& STR_UPPER
) {
856 pstrcpy(tmpbuf
, src
);
861 if (flags
& (STR_TERMINATE
| STR_TERMINATE_ASCII
))
864 return convert_string(CH_UNIX
, CH_DOS
, src
, src_len
, dest
, dest_len
, True
);
867 size_t push_ascii_fstring(void *dest
, const char *src
)
869 return push_ascii(dest
, src
, sizeof(fstring
), STR_TERMINATE
);
872 size_t push_ascii_pstring(void *dest
, const char *src
)
874 return push_ascii(dest
, src
, sizeof(pstring
), STR_TERMINATE
);
877 /********************************************************************
878 Push an nstring - ensure null terminated. Written by
879 moriyama@miraclelinux.com (MORIYAMA Masayuki).
880 ********************************************************************/
882 size_t push_ascii_nstring(void *dest
, const char *src
)
884 size_t i
, buffer_len
, dest_len
;
888 buffer_len
= push_ucs2_allocate(&buffer
, src
);
889 if (buffer_len
== (size_t)-1) {
890 smb_panic("failed to create UCS2 buffer");
893 /* We're using buffer_len below to count ucs2 characters, not bytes. */
894 buffer_len
/= sizeof(smb_ucs2_t
);
897 for (i
= 0; buffer
[i
] != 0 && (i
< buffer_len
); i
++) {
898 unsigned char mb
[10];
899 /* Convert one smb_ucs2_t character at a time. */
900 size_t mb_len
= convert_string(CH_UCS2
, CH_DOS
, buffer
+i
, sizeof(smb_ucs2_t
), mb
, sizeof(mb
), False
);
901 if ((mb_len
!= (size_t)-1) && (dest_len
+ mb_len
<= MAX_NETBIOSNAME_LEN
- 1)) {
902 memcpy((char *)dest
+ dest_len
, mb
, mb_len
);
909 ((char *)dest
)[dest_len
] = '\0';
917 * Copy a string from a dos codepage source to a unix char* destination.
919 * The resulting string in "dest" is always null terminated.
921 * @param flags can have:
923 * <dt>STR_TERMINATE</dt>
924 * <dd>STR_TERMINATE means the string in @p src
925 * is null terminated, and src_len is ignored.</dd>
928 * @param src_len is the length of the source area in bytes.
929 * @returns the number of bytes occupied by the string in @p src.
931 size_t pull_ascii(char *dest
, const void *src
, size_t dest_len
, size_t src_len
, int flags
)
935 if (dest_len
== (size_t)-1)
936 dest_len
= sizeof(pstring
);
938 if (flags
& STR_TERMINATE
) {
939 if (src_len
== (size_t)-1) {
940 src_len
= strlen(src
) + 1;
942 size_t len
= strnlen(src
, src_len
);
949 ret
= convert_string(CH_DOS
, CH_UNIX
, src
, src_len
, dest
, dest_len
, True
);
950 if (ret
== (size_t)-1) {
955 dest
[MIN(ret
, dest_len
-1)] = 0;
962 size_t pull_ascii_pstring(char *dest
, const void *src
)
964 return pull_ascii(dest
, src
, sizeof(pstring
), -1, STR_TERMINATE
);
967 size_t pull_ascii_fstring(char *dest
, const void *src
)
969 return pull_ascii(dest
, src
, sizeof(fstring
), -1, STR_TERMINATE
);
972 /* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
974 size_t pull_ascii_nstring(char *dest
, size_t dest_len
, const void *src
)
976 return pull_ascii(dest
, src
, dest_len
, sizeof(nstring
)-1, STR_TERMINATE
);
980 * Copy a string from a char* src to a unicode destination.
982 * @returns the number of bytes occupied by the string in the destination.
984 * @param flags can have:
987 * <dt>STR_TERMINATE <dd>means include the null termination.
988 * <dt>STR_UPPER <dd>means uppercase in the destination.
989 * <dt>STR_NOALIGN <dd>means don't do alignment.
992 * @param dest_len is the maximum length allowed in the
993 * destination. If dest_len is -1 then no maxiumum is used.
996 size_t push_ucs2(const void *base_ptr
, void *dest
, const char *src
, size_t dest_len
, int flags
)
1002 /* treat a pstring as "unlimited" length */
1003 if (dest_len
== (size_t)-1)
1004 dest_len
= sizeof(pstring
);
1006 if (flags
& STR_TERMINATE
)
1007 src_len
= (size_t)-1;
1009 src_len
= strlen(src
);
1011 if (ucs2_align(base_ptr
, dest
, flags
)) {
1013 dest
= (void *)((char *)dest
+ 1);
1019 /* ucs2 is always a multiple of 2 bytes */
1022 ret
= convert_string(CH_UNIX
, CH_UCS2
, src
, src_len
, dest
, dest_len
, True
);
1023 if (ret
== (size_t)-1) {
1029 if (flags
& STR_UPPER
) {
1030 smb_ucs2_t
*dest_ucs2
= dest
;
1032 for (i
= 0; i
< (dest_len
/ 2) && dest_ucs2
[i
]; i
++) {
1033 smb_ucs2_t v
= toupper_w(dest_ucs2
[i
]);
1034 if (v
!= dest_ucs2
[i
]) {
1045 * Copy a string from a unix char* src to a UCS2 destination,
1046 * allocating a buffer using talloc().
1048 * @param dest always set at least to NULL
1050 * @returns The number of bytes occupied by the string in the destination
1051 * or -1 in case of error.
1053 size_t push_ucs2_talloc(TALLOC_CTX
*ctx
, smb_ucs2_t
**dest
, const char *src
)
1055 size_t src_len
= strlen(src
)+1;
1058 return convert_string_talloc(ctx
, CH_UNIX
, CH_UCS2
, src
, src_len
, (void **)dest
, True
);
1063 * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1065 * @param dest always set at least to NULL
1067 * @returns The number of bytes occupied by the string in the destination
1068 * or -1 in case of error.
1071 size_t push_ucs2_allocate(smb_ucs2_t
**dest
, const char *src
)
1073 size_t src_len
= strlen(src
)+1;
1076 return convert_string_allocate(NULL
, CH_UNIX
, CH_UCS2
, src
, src_len
, (void **)dest
, True
);
1080 Copy a string from a char* src to a UTF-8 destination.
1081 Return the number of bytes occupied by the string in the destination
1083 STR_TERMINATE means include the null termination
1084 STR_UPPER means uppercase in the destination
1085 dest_len is the maximum length allowed in the destination. If dest_len
1086 is -1 then no maxiumum is used.
1089 static size_t push_utf8(void *dest
, const char *src
, size_t dest_len
, int flags
)
1091 size_t src_len
= strlen(src
);
1094 /* treat a pstring as "unlimited" length */
1095 if (dest_len
== (size_t)-1)
1096 dest_len
= sizeof(pstring
);
1098 if (flags
& STR_UPPER
) {
1099 pstrcpy(tmpbuf
, src
);
1104 if (flags
& STR_TERMINATE
)
1107 return convert_string(CH_UNIX
, CH_UTF8
, src
, src_len
, dest
, dest_len
, True
);
1110 size_t push_utf8_fstring(void *dest
, const char *src
)
1112 return push_utf8(dest
, src
, sizeof(fstring
), STR_TERMINATE
);
1116 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1118 * @param dest always set at least to NULL
1120 * @returns The number of bytes occupied by the string in the destination
1123 size_t push_utf8_talloc(TALLOC_CTX
*ctx
, char **dest
, const char *src
)
1125 size_t src_len
= strlen(src
)+1;
1128 return convert_string_talloc(ctx
, CH_UNIX
, CH_UTF8
, src
, src_len
, (void**)dest
, True
);
1132 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1134 * @param dest always set at least to NULL
1136 * @returns The number of bytes occupied by the string in the destination
1139 size_t push_utf8_allocate(char **dest
, const char *src
)
1141 size_t src_len
= strlen(src
)+1;
1144 return convert_string_allocate(NULL
, CH_UNIX
, CH_UTF8
, src
, src_len
, (void **)dest
, True
);
1148 Copy a string from a ucs2 source to a unix char* destination.
1150 STR_TERMINATE means the string in src is null terminated.
1151 STR_NOALIGN means don't try to align.
1152 if STR_TERMINATE is set then src_len is ignored if it is -1.
1153 src_len is the length of the source area in bytes
1154 Return the number of bytes occupied by the string in src.
1155 The resulting string in "dest" is always null terminated.
1158 size_t pull_ucs2(const void *base_ptr
, char *dest
, const void *src
, size_t dest_len
, size_t src_len
, int flags
)
1162 if (dest_len
== (size_t)-1)
1163 dest_len
= sizeof(pstring
);
1165 if (ucs2_align(base_ptr
, src
, flags
)) {
1166 src
= (const void *)((const char *)src
+ 1);
1167 if (src_len
!= (size_t)-1)
1171 if (flags
& STR_TERMINATE
) {
1172 /* src_len -1 is the default for null terminated strings. */
1173 if (src_len
!= (size_t)-1) {
1174 size_t len
= strnlen_w(src
, src_len
/2);
1175 if (len
< src_len
/2)
1181 /* ucs2 is always a multiple of 2 bytes */
1182 if (src_len
!= (size_t)-1)
1185 ret
= convert_string(CH_UCS2
, CH_UNIX
, src
, src_len
, dest
, dest_len
, True
);
1186 if (ret
== (size_t)-1) {
1190 if (src_len
== (size_t)-1)
1194 dest
[MIN(ret
, dest_len
-1)] = 0;
1201 size_t pull_ucs2_pstring(char *dest
, const void *src
)
1203 return pull_ucs2(NULL
, dest
, src
, sizeof(pstring
), -1, STR_TERMINATE
);
1206 size_t pull_ucs2_fstring(char *dest
, const void *src
)
1208 return pull_ucs2(NULL
, dest
, src
, sizeof(fstring
), -1, STR_TERMINATE
);
1212 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1214 * @param dest always set at least to NULL
1216 * @returns The number of bytes occupied by the string in the destination
1219 size_t pull_ucs2_talloc(TALLOC_CTX
*ctx
, char **dest
, const smb_ucs2_t
*src
)
1221 size_t src_len
= (strlen_w(src
)+1) * sizeof(smb_ucs2_t
);
1223 return convert_string_talloc(ctx
, CH_UCS2
, CH_UNIX
, src
, src_len
, (void **)dest
, True
);
1227 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1229 * @param dest always set at least to NULL
1231 * @returns The number of bytes occupied by the string in the destination
1234 size_t pull_ucs2_allocate(char **dest
, const smb_ucs2_t
*src
)
1236 size_t src_len
= (strlen_w(src
)+1) * sizeof(smb_ucs2_t
);
1238 return convert_string_allocate(NULL
, CH_UCS2
, CH_UNIX
, src
, src_len
, (void **)dest
, True
);
1242 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1244 * @param dest always set at least to NULL
1246 * @returns The number of bytes occupied by the string in the destination
1249 size_t pull_utf8_talloc(TALLOC_CTX
*ctx
, char **dest
, const char *src
)
1251 size_t src_len
= strlen(src
)+1;
1253 return convert_string_talloc(ctx
, CH_UTF8
, CH_UNIX
, src
, src_len
, (void **)dest
, True
);
1257 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1259 * @param dest always set at least to NULL
1261 * @returns The number of bytes occupied by the string in the destination
1264 size_t pull_utf8_allocate(char **dest
, const char *src
)
1266 size_t src_len
= strlen(src
)+1;
1268 return convert_string_allocate(NULL
, CH_UTF8
, CH_UNIX
, src
, src_len
, (void **)dest
, True
);
1272 * Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1274 * @param dest always set at least to NULL
1276 * @returns The number of bytes occupied by the string in the destination
1279 size_t pull_ascii_talloc(TALLOC_CTX
*ctx
, char **dest
, const char *src
)
1281 size_t src_len
= strlen(src
)+1;
1283 return convert_string_talloc(ctx
, CH_DOS
, CH_UNIX
, src
, src_len
, (void **)dest
, True
);
1287 Copy a string from a char* src to a unicode or ascii
1288 dos codepage destination choosing unicode or ascii based on the
1289 flags in the SMB buffer starting at base_ptr.
1290 Return the number of bytes occupied by the string in the destination.
1292 STR_TERMINATE means include the null termination.
1293 STR_UPPER means uppercase in the destination.
1294 STR_ASCII use ascii even with unicode packet.
1295 STR_NOALIGN means don't do alignment.
1296 dest_len is the maximum length allowed in the destination. If dest_len
1297 is -1 then no maxiumum is used.
1300 size_t push_string_fn(const char *function
, unsigned int line
, const void *base_ptr
, void *dest
, const char *src
, size_t dest_len
, int flags
)
1303 /* We really need to zero fill here, not clobber
1304 * region, as we want to ensure that valgrind thinks
1305 * all of the outgoing buffer has been written to
1306 * so a send() or write() won't trap an error.
1310 if (dest_len
!= (size_t)-1)
1311 clobber_region(function
, line
, dest
, dest_len
);
1313 if (dest_len
!= (size_t)-1)
1314 memset(dest
, '\0', dest_len
);
1318 if (!(flags
& STR_ASCII
) && \
1319 ((flags
& STR_UNICODE
|| \
1320 (SVAL(base_ptr
, smb_flg2
) & FLAGS2_UNICODE_STRINGS
)))) {
1321 return push_ucs2(base_ptr
, dest
, src
, dest_len
, flags
);
1323 return push_ascii(dest
, src
, dest_len
, flags
);
1328 Copy a string from a unicode or ascii source (depending on
1329 the packet flags) to a char* destination.
1331 STR_TERMINATE means the string in src is null terminated.
1332 STR_UNICODE means to force as unicode.
1333 STR_ASCII use ascii even with unicode packet.
1334 STR_NOALIGN means don't do alignment.
1335 if STR_TERMINATE is set then src_len is ignored is it is -1
1336 src_len is the length of the source area in bytes.
1337 Return the number of bytes occupied by the string in src.
1338 The resulting string in "dest" is always null terminated.
1341 size_t pull_string_fn(const char *function
, unsigned int line
, const void *base_ptr
, char *dest
, const void *src
, size_t dest_len
, size_t src_len
, int flags
)
1344 if (dest_len
!= (size_t)-1)
1345 clobber_region(function
, line
, dest
, dest_len
);
1348 if (!(flags
& STR_ASCII
) && \
1349 ((flags
& STR_UNICODE
|| \
1350 (SVAL(base_ptr
, smb_flg2
) & FLAGS2_UNICODE_STRINGS
)))) {
1351 return pull_ucs2(base_ptr
, dest
, src
, dest_len
, src_len
, flags
);
1353 return pull_ascii(dest
, src
, dest_len
, src_len
, flags
);
1356 size_t align_string(const void *base_ptr
, const char *p
, int flags
)
1358 if (!(flags
& STR_ASCII
) && \
1359 ((flags
& STR_UNICODE
|| \
1360 (SVAL(base_ptr
, smb_flg2
) & FLAGS2_UNICODE_STRINGS
)))) {
1361 return ucs2_align(base_ptr
, p
, flags
);
1366 /****************************************************************
1367 Calculate the size (in bytes) of the next multibyte character in
1368 our internal character set. Note that p must be pointing to a
1369 valid mb char, not within one.
1370 ****************************************************************/
1372 size_t next_mb_char_size(const char *s
)
1377 return 1; /* ascii. */
1380 for ( i
= 1; i
<=4; i
++ ) {
1382 if (convert_string(CH_UNIX
, CH_UCS2
, s
, i
, &uc
, 2, False
) == 2) {
1384 DEBUG(10,("next_mb_char_size: size %u at string %s\n",
1385 (unsigned int)i
, s
));
1387 conv_silent
= False
;
1391 /* We're hosed - we don't know how big this is... */
1392 DEBUG(10,("next_mb_char_size: unknown size at string %s\n", s
));
1393 conv_silent
= False
;