Apply the changes that Derrell Lipman supplied ...
[Samba/gebeck_regimport.git] / source3 / lib / charcnv.c
blob9d15c6daa028b16fcd2c46488de83bd0d986b0d5
1 /*
2 Unix SMB/CIFS implementation.
3 Character set conversion Extensions
4 Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5 Copyright (C) Andrew Tridgell 2001
6 Copyright (C) Simo Sorce 2001
7 Copyright (C) Martin Pool 2003
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 #include "includes.h"
26 /**
27 * @file
29 * @brief Character-set conversion routines built on our iconv.
31 * @note Samba's internal character set (at least in the 3.0 series)
32 * is always the same as the one for the Unix filesystem. It is
33 * <b>not</b> necessarily UTF-8 and may be different on machines that
34 * need i18n filenames to be compatible with Unix software. It does
35 * have to be a superset of ASCII. All multibyte sequences must start
36 * with a byte with the high bit set.
38 * @sa lib/iconv.c
42 static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
43 static BOOL conv_silent; /* Should we do a debug if the conversion fails ? */
45 /**
46 * Return the name of a charset to give to iconv().
47 **/
48 static const char *charset_name(charset_t ch)
50 const char *ret = NULL;
52 if (ch == CH_UCS2) ret = "UCS-2LE";
53 else if (ch == CH_UNIX) ret = lp_unix_charset();
54 else if (ch == CH_DOS) ret = lp_dos_charset();
55 else if (ch == CH_DISPLAY) ret = lp_display_charset();
56 else if (ch == CH_UTF8) ret = "UTF8";
58 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)
59 if (ret && !strcmp(ret, "LOCALE")) {
60 const char *ln = NULL;
62 #ifdef HAVE_SETLOCALE
63 setlocale(LC_ALL, "");
64 #endif
65 ln = nl_langinfo(CODESET);
66 if (ln) {
67 /* Check whether the charset name is supported
68 by iconv */
69 smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
70 if (handle == (smb_iconv_t) -1) {
71 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
72 ln = NULL;
73 } else {
74 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
75 smb_iconv_close(handle);
78 ret = ln;
80 #endif
82 if (!ret || !*ret) ret = "ASCII";
83 return ret;
86 void lazy_initialize_conv(void)
88 static int initialized = False;
90 if (!initialized) {
91 initialized = True;
92 load_case_tables();
93 init_iconv();
97 /**
98 * Initialize iconv conversion descriptors.
100 * This is called the first time it is needed, and also called again
101 * every time the configuration is reloaded, because the charset or
102 * codepage might have changed.
104 void init_iconv(void)
106 int c1, c2;
107 BOOL did_reload = False;
109 /* so that charset_name() works we need to get the UNIX<->UCS2 going
110 first */
111 if (!conv_handles[CH_UNIX][CH_UCS2])
112 conv_handles[CH_UNIX][CH_UCS2] = smb_iconv_open("UCS-2LE", "ASCII");
114 if (!conv_handles[CH_UCS2][CH_UNIX])
115 conv_handles[CH_UCS2][CH_UNIX] = smb_iconv_open("ASCII", "UCS-2LE");
117 for (c1=0;c1<NUM_CHARSETS;c1++) {
118 for (c2=0;c2<NUM_CHARSETS;c2++) {
119 const char *n1 = charset_name((charset_t)c1);
120 const char *n2 = charset_name((charset_t)c2);
121 if (conv_handles[c1][c2] &&
122 strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
123 strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
124 continue;
126 did_reload = True;
128 if (conv_handles[c1][c2])
129 smb_iconv_close(conv_handles[c1][c2]);
131 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
132 if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
133 DEBUG(0,("Conversion from %s to %s not supported\n",
134 charset_name((charset_t)c1), charset_name((charset_t)c2)));
135 conv_handles[c1][c2] = NULL;
140 if (did_reload) {
141 /* XXX: Does this really get called every time the dos
142 * codepage changes? */
143 /* XXX: Is the did_reload test too strict? */
144 conv_silent = True;
145 init_doschar_table();
146 init_valid_table();
147 conv_silent = False;
152 * Convert string from one encoding to another, making error checking etc
153 * Slow path version - uses (slow) iconv.
155 * @param src pointer to source string (multibyte or singlebyte)
156 * @param srclen length of the source string in bytes
157 * @param dest pointer to destination string (multibyte or singlebyte)
158 * @param destlen maximal length allowed for string
159 * @returns the number of bytes occupied in the destination
161 * Ensure the srclen contains the terminating zero.
165 static size_t convert_string_internal(charset_t from, charset_t to,
166 void const *src, size_t srclen,
167 void *dest, size_t destlen)
169 size_t i_len, o_len;
170 size_t retval;
171 const char* inbuf = (const char*)src;
172 char* outbuf = (char*)dest;
173 smb_iconv_t descriptor;
175 lazy_initialize_conv();
177 descriptor = conv_handles[from][to];
179 if (srclen == (size_t)-1) {
180 if (from == CH_UCS2) {
181 srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
182 } else {
183 srclen = strlen((const char *)src)+1;
187 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
188 if (!conv_silent)
189 DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
190 goto use_as_is;
193 i_len=srclen;
194 o_len=destlen;
195 retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
196 if(retval==(size_t)-1) {
197 const char *reason="unknown error";
198 switch(errno) {
199 case EINVAL:
200 reason="Incomplete multibyte sequence";
201 if (!conv_silent)
202 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
203 goto use_as_is;
204 case E2BIG:
205 reason="No more room";
206 if (!conv_silent)
207 DEBUG(3, ("convert_string_internal: Required %lu, available %lu\n",
208 (unsigned long)srclen, (unsigned long)destlen));
209 /* we are not sure we need srclen bytes,
210 may be more, may be less.
211 We only know we need more than destlen
212 bytes ---simo */
213 break;
214 case EILSEQ:
215 reason="Illegal multibyte sequence";
216 if (!conv_silent)
217 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
218 goto use_as_is;
219 default:
220 if (!conv_silent)
221 DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
222 break;
224 /* smb_panic(reason); */
226 return destlen-o_len;
228 use_as_is:
230 /* conversion not supported, use as is */
232 size_t len = MIN(srclen,destlen);
233 if (len)
234 memcpy(dest,src,len);
235 return len;
240 * Convert string from one encoding to another, making error checking etc
241 * Fast path version - handles ASCII first.
243 * @param src pointer to source string (multibyte or singlebyte)
244 * @param srclen length of the source string in bytes
245 * @param dest pointer to destination string (multibyte or singlebyte)
246 * @param destlen maximal length allowed for string
247 * @returns the number of bytes occupied in the destination
249 * Ensure the srclen contains the terminating zero.
251 * This function has been hand-tuned to provide a fast path.
252 * Don't change unless you really know what you are doing. JRA.
255 size_t convert_string(charset_t from, charset_t to,
256 void const *src, size_t srclen,
257 void *dest, size_t destlen)
260 * NB. We deliberately don't do a strlen here is srclen == -1.
261 * This is very expensive over millions of calls and is taken
262 * care of in the slow path in convert_string_internal. JRA.
265 if (srclen == 0)
266 return 0;
268 if (from != CH_UCS2 && to != CH_UCS2) {
269 const unsigned char *p = (const unsigned char *)src;
270 unsigned char *q = (unsigned char *)dest;
271 size_t slen = srclen;
272 size_t dlen = destlen;
273 unsigned char lastp;
274 size_t retval = 0;
276 /* If all characters are ascii, fast path here. */
277 while (slen && dlen) {
278 if ((lastp = *p) <= 0x7f) {
279 *q++ = *p++;
280 if (slen != (size_t)-1) {
281 slen--;
283 dlen--;
284 retval++;
285 if (!lastp)
286 break;
287 } else {
288 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
289 goto general_case;
290 #else
291 return retval + convert_string_internal(from, to, p, slen, q, dlen);
292 #endif
295 return retval;
296 } else if (from == CH_UCS2 && to != CH_UCS2) {
297 const unsigned char *p = (const unsigned char *)src;
298 unsigned char *q = (unsigned char *)dest;
299 size_t retval = 0;
300 size_t slen = srclen;
301 size_t dlen = destlen;
302 unsigned char lastp;
304 /* If all characters are ascii, fast path here. */
305 while ((slen >= 2) && dlen) {
306 if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
307 *q++ = *p;
308 if (slen != (size_t)-1) {
309 slen -= 2;
311 p += 2;
312 dlen--;
313 retval++;
314 if (!lastp)
315 break;
316 } else {
317 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
318 goto general_case;
319 #else
320 return retval + convert_string_internal(from, to, p, slen, q, dlen);
321 #endif
324 return retval;
325 } else if (from != CH_UCS2 && to == CH_UCS2) {
326 const unsigned char *p = (const unsigned char *)src;
327 unsigned char *q = (unsigned char *)dest;
328 size_t retval = 0;
329 size_t slen = srclen;
330 size_t dlen = destlen;
331 unsigned char lastp;
333 /* If all characters are ascii, fast path here. */
334 while (slen && (dlen >= 2)) {
335 if ((lastp = *p) <= 0x7F) {
336 *q++ = *p++;
337 *q++ = '\0';
338 if (slen != (size_t)-1) {
339 slen--;
341 dlen -= 2;
342 retval += 2;
343 if (!lastp)
344 break;
345 } else {
346 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
347 goto general_case;
348 #else
349 return retval + convert_string_internal(from, to, p, slen, q, dlen);
350 #endif
353 return retval;
356 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
357 general_case:
358 #endif
359 return convert_string_internal(from, to, src, srclen, dest, destlen);
363 * Convert between character sets, allocating a new buffer for the result.
365 * @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
366 * @param srclen length of source buffer.
367 * @param dest always set at least to NULL
368 * @note -1 is not accepted for srclen.
370 * @returns Size in bytes of the converted string; or -1 in case of error.
372 * Ensure the srclen contains the terminating zero.
375 size_t convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
376 void const *src, size_t srclen, void **dest)
378 size_t i_len, o_len, destlen = MAX(srclen, 512);
379 size_t retval;
380 const char *inbuf = (const char *)src;
381 char *outbuf = NULL, *ob = NULL;
382 smb_iconv_t descriptor;
384 *dest = NULL;
386 if (src == NULL || srclen == (size_t)-1)
387 return (size_t)-1;
388 if (srclen == 0)
389 return 0;
391 lazy_initialize_conv();
393 descriptor = conv_handles[from][to];
395 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
396 if (!conv_silent)
397 DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
398 goto use_as_is;
401 convert:
402 if ((destlen*2) < destlen) {
403 /* wrapped ! abort. */
404 if (!conv_silent)
405 DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
406 if (!ctx)
407 SAFE_FREE(outbuf);
408 return (size_t)-1;
409 } else {
410 destlen = destlen * 2;
413 if (ctx)
414 ob = (char *)talloc_realloc(ctx, ob, destlen);
415 else
416 ob = (char *)Realloc(ob, destlen);
418 if (!ob) {
419 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
420 if (!ctx)
421 SAFE_FREE(outbuf);
422 return (size_t)-1;
423 } else {
424 outbuf = ob;
426 i_len = srclen;
427 o_len = destlen;
428 retval = smb_iconv(descriptor,
429 &inbuf, &i_len,
430 &outbuf, &o_len);
431 if(retval == (size_t)-1) {
432 const char *reason="unknown error";
433 switch(errno) {
434 case EINVAL:
435 reason="Incomplete multibyte sequence";
436 if (!conv_silent)
437 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
438 goto use_as_is;
439 case E2BIG:
440 goto convert;
441 case EILSEQ:
442 reason="Illegal multibyte sequence";
443 if (!conv_silent)
444 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
445 goto use_as_is;
447 if (!conv_silent)
448 DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
449 /* smb_panic(reason); */
450 return (size_t)-1;
453 destlen = destlen - o_len;
454 if (ctx)
455 *dest = (char *)talloc_realloc(ctx,ob,destlen);
456 else
457 *dest = (char *)Realloc(ob,destlen);
458 if (destlen && !*dest) {
459 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
460 if (!ctx)
461 SAFE_FREE(ob);
462 return (size_t)-1;
465 return destlen;
467 use_as_is:
469 /* conversion not supported, use as is */
471 if (srclen && (destlen != srclen)) {
472 destlen = srclen;
473 if (ctx)
474 ob = (char *)talloc_realloc(ctx, ob, destlen);
475 else
476 ob = (char *)Realloc(ob, destlen);
477 if (!ob) {
478 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
479 if (!ctx)
480 SAFE_FREE(outbuf);
481 return (size_t)-1;
484 if (srclen && ob)
485 memcpy(ob,(const char *)src,srclen);
486 *dest = (char *)ob;
487 return srclen;
493 * Convert between character sets, allocating a new buffer using talloc for the result.
495 * @param srclen length of source buffer.
496 * @param dest always set at least to NULL
497 * @note -1 is not accepted for srclen.
499 * @returns Size in bytes of the converted string; or -1 in case of error.
501 static size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
502 void const *src, size_t srclen, void **dest)
504 size_t dest_len;
506 *dest = NULL;
507 dest_len=convert_string_allocate(ctx, from, to, src, srclen, dest);
508 if (dest_len == (size_t)-1)
509 return (size_t)-1;
510 if (*dest == NULL)
511 return (size_t)-1;
512 return dest_len;
515 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
517 size_t size;
518 smb_ucs2_t *buffer;
520 size = push_ucs2_allocate(&buffer, src);
521 if (size == -1) {
522 smb_panic("failed to create UCS2 buffer");
524 if (!strupper_w(buffer) && (dest == src)) {
525 free(buffer);
526 return srclen;
529 size = convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen);
530 free(buffer);
531 return size;
535 strdup() a unix string to upper case.
536 Max size is pstring.
539 char *strdup_upper(const char *s)
541 pstring out_buffer;
542 const unsigned char *p = (const unsigned char *)s;
543 unsigned char *q = (unsigned char *)out_buffer;
545 /* this is quite a common operation, so we want it to be
546 fast. We optimise for the ascii case, knowing that all our
547 supported multi-byte character sets are ascii-compatible
548 (ie. they match for the first 128 chars) */
550 while (1) {
551 if (*p & 0x80)
552 break;
553 *q++ = toupper(*p);
554 if (!*p)
555 break;
556 p++;
557 if (p - ( const unsigned char *)s >= sizeof(pstring))
558 break;
561 if (*p) {
562 /* MB case. */
563 size_t size;
564 wpstring buffer;
565 size = convert_string(CH_UNIX, CH_UCS2, s, -1, buffer, sizeof(buffer));
566 if (size == -1) {
567 return NULL;
570 strupper_w(buffer);
572 size = convert_string(CH_UCS2, CH_UNIX, buffer, -1, out_buffer, sizeof(out_buffer));
573 if (size == -1) {
574 return NULL;
578 return strdup(out_buffer);
581 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
583 size_t size;
584 smb_ucs2_t *buffer;
586 size = convert_string_allocate(NULL, CH_UNIX, CH_UCS2, src, srclen,
587 (void **) &buffer);
588 if (size == -1) {
589 smb_panic("failed to create UCS2 buffer");
591 if (!strlower_w(buffer) && (dest == src)) {
592 free(buffer);
593 return srclen;
595 size = convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen);
596 free(buffer);
597 return size;
601 strdup() a unix string to lower case.
604 char *strdup_lower(const char *s)
606 size_t size;
607 smb_ucs2_t *buffer;
608 char *out_buffer;
610 size = push_ucs2_allocate(&buffer, s);
611 if (size == -1) {
612 return NULL;
615 strlower_w(buffer);
617 size = pull_ucs2_allocate(&out_buffer, buffer);
618 SAFE_FREE(buffer);
620 if (size == -1) {
621 return NULL;
624 return out_buffer;
627 static size_t ucs2_align(const void *base_ptr, const void *p, int flags)
629 if (flags & (STR_NOALIGN|STR_ASCII))
630 return 0;
631 return PTR_DIFF(p, base_ptr) & 1;
636 * Copy a string from a char* unix src to a dos codepage string destination.
638 * @return the number of bytes occupied by the string in the destination.
640 * @param flags can include
641 * <dl>
642 * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
643 * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
644 * </dl>
646 * @param dest_len the maximum length in bytes allowed in the
647 * destination. If @p dest_len is -1 then no maximum is used.
649 size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
651 size_t src_len = strlen(src);
652 pstring tmpbuf;
654 /* treat a pstring as "unlimited" length */
655 if (dest_len == (size_t)-1)
656 dest_len = sizeof(pstring);
658 if (flags & STR_UPPER) {
659 pstrcpy(tmpbuf, src);
660 strupper_m(tmpbuf);
661 src = tmpbuf;
664 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
665 src_len++;
667 return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len);
670 size_t push_ascii_fstring(void *dest, const char *src)
672 return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
675 size_t push_ascii_pstring(void *dest, const char *src)
677 return push_ascii(dest, src, sizeof(pstring), STR_TERMINATE);
680 size_t push_ascii_nstring(void *dest, const char *src)
682 return push_ascii(dest, src, sizeof(nstring), STR_TERMINATE);
686 * Copy a string from a dos codepage source to a unix char* destination.
688 * The resulting string in "dest" is always null terminated.
690 * @param flags can have:
691 * <dl>
692 * <dt>STR_TERMINATE</dt>
693 * <dd>STR_TERMINATE means the string in @p src
694 * is null terminated, and src_len is ignored.</dd>
695 * </dl>
697 * @param src_len is the length of the source area in bytes.
698 * @returns the number of bytes occupied by the string in @p src.
700 size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
702 size_t ret;
704 if (dest_len == (size_t)-1)
705 dest_len = sizeof(pstring);
707 if (flags & STR_TERMINATE) {
708 if (src_len == (size_t)-1) {
709 src_len = strlen(src) + 1;
710 } else {
711 size_t len = strnlen(src, src_len);
712 if (len < src_len)
713 len++;
714 src_len = len;
718 ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len);
720 if (dest_len)
721 dest[MIN(ret, dest_len-1)] = 0;
722 else
723 dest[0] = 0;
725 return src_len;
728 size_t pull_ascii_pstring(char *dest, const void *src)
730 return pull_ascii(dest, src, sizeof(pstring), -1, STR_TERMINATE);
733 size_t pull_ascii_fstring(char *dest, const void *src)
735 return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
738 size_t pull_ascii_nstring(char *dest, const void *src)
740 return pull_ascii(dest, src, sizeof(nstring), sizeof(nstring), STR_TERMINATE);
744 * Copy a string from a char* src to a unicode destination.
746 * @returns the number of bytes occupied by the string in the destination.
748 * @param flags can have:
750 * <dl>
751 * <dt>STR_TERMINATE <dd>means include the null termination.
752 * <dt>STR_UPPER <dd>means uppercase in the destination.
753 * <dt>STR_NOALIGN <dd>means don't do alignment.
754 * </dl>
756 * @param dest_len is the maximum length allowed in the
757 * destination. If dest_len is -1 then no maxiumum is used.
760 size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
762 size_t len=0;
763 size_t src_len;
765 /* treat a pstring as "unlimited" length */
766 if (dest_len == (size_t)-1)
767 dest_len = sizeof(pstring);
769 if (flags & STR_TERMINATE)
770 src_len = (size_t)-1;
771 else
772 src_len = strlen(src);
774 if (ucs2_align(base_ptr, dest, flags)) {
775 *(char *)dest = 0;
776 dest = (void *)((char *)dest + 1);
777 if (dest_len)
778 dest_len--;
779 len++;
782 /* ucs2 is always a multiple of 2 bytes */
783 dest_len &= ~1;
785 len += convert_string(CH_UNIX, CH_UCS2, src, src_len, dest, dest_len);
787 if (flags & STR_UPPER) {
788 smb_ucs2_t *dest_ucs2 = dest;
789 size_t i;
790 for (i = 0; i < (dest_len / 2) && dest_ucs2[i]; i++) {
791 smb_ucs2_t v = toupper_w(dest_ucs2[i]);
792 if (v != dest_ucs2[i]) {
793 dest_ucs2[i] = v;
798 return len;
803 * Copy a string from a unix char* src to a UCS2 destination,
804 * allocating a buffer using talloc().
806 * @param dest always set at least to NULL
808 * @returns The number of bytes occupied by the string in the destination
809 * or -1 in case of error.
811 size_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
813 size_t src_len = strlen(src)+1;
815 *dest = NULL;
816 return convert_string_talloc(ctx, CH_UNIX, CH_UCS2, src, src_len, (void **)dest);
821 * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
823 * @param dest always set at least to NULL
825 * @returns The number of bytes occupied by the string in the destination
826 * or -1 in case of error.
829 size_t push_ucs2_allocate(smb_ucs2_t **dest, const char *src)
831 size_t src_len = strlen(src)+1;
833 *dest = NULL;
834 return convert_string_allocate(NULL, CH_UNIX, CH_UCS2, src, src_len, (void **)dest);
838 Copy a string from a char* src to a UTF-8 destination.
839 Return the number of bytes occupied by the string in the destination
840 Flags can have:
841 STR_TERMINATE means include the null termination
842 STR_UPPER means uppercase in the destination
843 dest_len is the maximum length allowed in the destination. If dest_len
844 is -1 then no maxiumum is used.
847 static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
849 size_t src_len = strlen(src);
850 pstring tmpbuf;
852 /* treat a pstring as "unlimited" length */
853 if (dest_len == (size_t)-1)
854 dest_len = sizeof(pstring);
856 if (flags & STR_UPPER) {
857 pstrcpy(tmpbuf, src);
858 strupper_m(tmpbuf);
859 src = tmpbuf;
862 if (flags & STR_TERMINATE)
863 src_len++;
865 return convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len);
868 size_t push_utf8_fstring(void *dest, const char *src)
870 return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
874 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
876 * @param dest always set at least to NULL
878 * @returns The number of bytes occupied by the string in the destination
881 size_t push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
883 size_t src_len = strlen(src)+1;
885 *dest = NULL;
886 return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest);
890 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
892 * @param dest always set at least to NULL
894 * @returns The number of bytes occupied by the string in the destination
897 size_t push_utf8_allocate(char **dest, const char *src)
899 size_t src_len = strlen(src)+1;
901 *dest = NULL;
902 return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len, (void **)dest);
906 Copy a string from a ucs2 source to a unix char* destination.
907 Flags can have:
908 STR_TERMINATE means the string in src is null terminated.
909 STR_NOALIGN means don't try to align.
910 if STR_TERMINATE is set then src_len is ignored if it is -1.
911 src_len is the length of the source area in bytes
912 Return the number of bytes occupied by the string in src.
913 The resulting string in "dest" is always null terminated.
916 size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
918 size_t ret;
920 if (dest_len == (size_t)-1)
921 dest_len = sizeof(pstring);
923 if (ucs2_align(base_ptr, src, flags)) {
924 src = (const void *)((const char *)src + 1);
925 if (src_len > 0)
926 src_len--;
929 if (flags & STR_TERMINATE) {
930 /* src_len -1 is the default for null terminated strings. */
931 if (src_len != (size_t)-1) {
932 size_t len = strnlen_w(src, src_len/2);
933 if (len < src_len/2)
934 len++;
935 src_len = len*2;
939 /* ucs2 is always a multiple of 2 bytes */
940 if (src_len != (size_t)-1)
941 src_len &= ~1;
943 ret = convert_string(CH_UCS2, CH_UNIX, src, src_len, dest, dest_len);
944 if (dest_len)
945 dest[MIN(ret, dest_len-1)] = 0;
946 else
947 dest[0] = 0;
949 return src_len;
952 size_t pull_ucs2_pstring(char *dest, const void *src)
954 return pull_ucs2(NULL, dest, src, sizeof(pstring), -1, STR_TERMINATE);
957 size_t pull_ucs2_fstring(char *dest, const void *src)
959 return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
963 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
965 * @param dest always set at least to NULL
967 * @returns The number of bytes occupied by the string in the destination
970 size_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src)
972 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
973 *dest = NULL;
974 return convert_string_talloc(ctx, CH_UCS2, CH_UNIX, src, src_len, (void **)dest);
978 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
980 * @param dest always set at least to NULL
982 * @returns The number of bytes occupied by the string in the destination
985 size_t pull_ucs2_allocate(char **dest, const smb_ucs2_t *src)
987 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
988 *dest = NULL;
989 return convert_string_allocate(NULL, CH_UCS2, CH_UNIX, src, src_len, (void **)dest);
993 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
995 * @param dest always set at least to NULL
997 * @returns The number of bytes occupied by the string in the destination
1000 size_t pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1002 size_t src_len = strlen(src)+1;
1003 *dest = NULL;
1004 return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest);
1008 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1010 * @param dest always set at least to NULL
1012 * @returns The number of bytes occupied by the string in the destination
1015 size_t pull_utf8_allocate(void **dest, const char *src)
1017 size_t src_len = strlen(src)+1;
1018 *dest = NULL;
1019 return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len, dest);
1023 Copy a string from a char* src to a unicode or ascii
1024 dos codepage destination choosing unicode or ascii based on the
1025 flags in the SMB buffer starting at base_ptr.
1026 Return the number of bytes occupied by the string in the destination.
1027 flags can have:
1028 STR_TERMINATE means include the null termination.
1029 STR_UPPER means uppercase in the destination.
1030 STR_ASCII use ascii even with unicode packet.
1031 STR_NOALIGN means don't do alignment.
1032 dest_len is the maximum length allowed in the destination. If dest_len
1033 is -1 then no maxiumum is used.
1036 size_t push_string_fn(const char *function, unsigned int line, const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1038 #ifdef DEVELOPER
1039 /* We really need to zero fill here, not clobber
1040 * region, as we want to ensure that valgrind thinks
1041 * all of the outgoing buffer has been written to
1042 * so a send() or write() won't trap an error.
1043 * JRA.
1045 #if 0
1046 if (dest_len != (size_t)-1)
1047 clobber_region(function, line, dest, dest_len);
1048 #else
1049 if (dest_len != (size_t)-1)
1050 memset(dest, '\0', dest_len);
1051 #endif
1052 #endif
1054 if (!(flags & STR_ASCII) && \
1055 ((flags & STR_UNICODE || \
1056 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1057 return push_ucs2(base_ptr, dest, src, dest_len, flags);
1059 return push_ascii(dest, src, dest_len, flags);
1064 Copy a string from a unicode or ascii source (depending on
1065 the packet flags) to a char* destination.
1066 Flags can have:
1067 STR_TERMINATE means the string in src is null terminated.
1068 STR_UNICODE means to force as unicode.
1069 STR_ASCII use ascii even with unicode packet.
1070 STR_NOALIGN means don't do alignment.
1071 if STR_TERMINATE is set then src_len is ignored is it is -1
1072 src_len is the length of the source area in bytes.
1073 Return the number of bytes occupied by the string in src.
1074 The resulting string in "dest" is always null terminated.
1077 size_t pull_string_fn(const char *function, unsigned int line, const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1079 #ifdef DEVELOPER
1080 if (dest_len != (size_t)-1)
1081 clobber_region(function, line, dest, dest_len);
1082 #endif
1084 if (!(flags & STR_ASCII) && \
1085 ((flags & STR_UNICODE || \
1086 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1087 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1089 return pull_ascii(dest, src, dest_len, src_len, flags);
1092 size_t align_string(const void *base_ptr, const char *p, int flags)
1094 if (!(flags & STR_ASCII) && \
1095 ((flags & STR_UNICODE || \
1096 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1097 return ucs2_align(base_ptr, p, flags);
1099 return 0;