r1383: sync from 3.0 tree
[Samba.git] / source / lib / charcnv.c
blob3d7678c34cb168decf8ef42e9adbd72513fcc76e
1 /*
2 Unix SMB/CIFS implementation.
3 Character set conversion Extensions
4 Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5 Copyright (C) Andrew Tridgell 2001
6 Copyright (C) Simo Sorce 2001
7 Copyright (C) Martin Pool 2003
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 #include "includes.h"
26 /**
27 * @file
29 * @brief Character-set conversion routines built on our iconv.
31 * @note Samba's internal character set (at least in the 3.0 series)
32 * is always the same as the one for the Unix filesystem. It is
33 * <b>not</b> necessarily UTF-8 and may be different on machines that
34 * need i18n filenames to be compatible with Unix software. It does
35 * have to be a superset of ASCII. All multibyte sequences must start
36 * with a byte with the high bit set.
38 * @sa lib/iconv.c
42 static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
43 static BOOL conv_silent; /* Should we do a debug if the conversion fails ? */
45 /**
46 * Return the name of a charset to give to iconv().
47 **/
48 static const char *charset_name(charset_t ch)
50 const char *ret = NULL;
52 if (ch == CH_UCS2) ret = "UCS-2LE";
53 else if (ch == CH_UNIX) ret = lp_unix_charset();
54 else if (ch == CH_DOS) ret = lp_dos_charset();
55 else if (ch == CH_DISPLAY) ret = lp_display_charset();
56 else if (ch == CH_UTF8) ret = "UTF8";
58 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)
59 if (ret && !strcmp(ret, "LOCALE")) {
60 const char *ln = NULL;
62 #ifdef HAVE_SETLOCALE
63 setlocale(LC_ALL, "");
64 #endif
65 ln = nl_langinfo(CODESET);
66 if (ln) {
67 /* Check whether the charset name is supported
68 by iconv */
69 smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
70 if (handle == (smb_iconv_t) -1) {
71 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
72 ln = NULL;
73 } else {
74 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
75 smb_iconv_close(handle);
78 ret = ln;
80 #endif
82 if (!ret || !*ret) ret = "ASCII";
83 return ret;
86 void lazy_initialize_conv(void)
88 static int initialized = False;
90 if (!initialized) {
91 initialized = True;
92 load_case_tables();
93 init_iconv();
97 /**
98 * Initialize iconv conversion descriptors.
100 * This is called the first time it is needed, and also called again
101 * every time the configuration is reloaded, because the charset or
102 * codepage might have changed.
104 void init_iconv(void)
106 int c1, c2;
107 BOOL did_reload = False;
109 /* so that charset_name() works we need to get the UNIX<->UCS2 going
110 first */
111 if (!conv_handles[CH_UNIX][CH_UCS2])
112 conv_handles[CH_UNIX][CH_UCS2] = smb_iconv_open("UCS-2LE", "ASCII");
114 if (!conv_handles[CH_UCS2][CH_UNIX])
115 conv_handles[CH_UCS2][CH_UNIX] = smb_iconv_open("ASCII", "UCS-2LE");
117 for (c1=0;c1<NUM_CHARSETS;c1++) {
118 for (c2=0;c2<NUM_CHARSETS;c2++) {
119 const char *n1 = charset_name((charset_t)c1);
120 const char *n2 = charset_name((charset_t)c2);
121 if (conv_handles[c1][c2] &&
122 strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
123 strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
124 continue;
126 did_reload = True;
128 if (conv_handles[c1][c2])
129 smb_iconv_close(conv_handles[c1][c2]);
131 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
132 if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
133 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
134 charset_name((charset_t)c1), charset_name((charset_t)c2)));
135 if (c1 != CH_UCS2) {
136 n1 = "ASCII";
138 if (c2 != CH_UCS2) {
139 n2 = "ASCII";
141 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
142 n1, n2 ));
143 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
144 if (!conv_handles[c1][c2]) {
145 DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
146 smb_panic("init_iconv: conv_handle initialization failed.");
152 if (did_reload) {
153 /* XXX: Does this really get called every time the dos
154 * codepage changes? */
155 /* XXX: Is the did_reload test too strict? */
156 conv_silent = True;
157 init_doschar_table();
158 init_valid_table();
159 conv_silent = False;
164 * Convert string from one encoding to another, making error checking etc
165 * Slow path version - uses (slow) iconv.
167 * @param src pointer to source string (multibyte or singlebyte)
168 * @param srclen length of the source string in bytes
169 * @param dest pointer to destination string (multibyte or singlebyte)
170 * @param destlen maximal length allowed for string
171 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
172 * @returns the number of bytes occupied in the destination
174 * Ensure the srclen contains the terminating zero.
178 static size_t convert_string_internal(charset_t from, charset_t to,
179 void const *src, size_t srclen,
180 void *dest, size_t destlen, BOOL allow_bad_conv)
182 size_t i_len, o_len;
183 size_t retval;
184 const char* inbuf = (const char*)src;
185 char* outbuf = (char*)dest;
186 smb_iconv_t descriptor;
188 lazy_initialize_conv();
190 descriptor = conv_handles[from][to];
192 if (srclen == (size_t)-1) {
193 if (from == CH_UCS2) {
194 srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
195 } else {
196 srclen = strlen((const char *)src)+1;
201 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
202 if (!conv_silent)
203 DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
204 return (size_t)-1;
207 i_len=srclen;
208 o_len=destlen;
210 again:
212 retval = smb_iconv(descriptor, (char **)&inbuf, &i_len, &outbuf, &o_len);
213 if(retval==(size_t)-1) {
214 const char *reason="unknown error";
215 switch(errno) {
216 case EINVAL:
217 reason="Incomplete multibyte sequence";
218 if (!conv_silent)
219 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
220 if (allow_bad_conv)
221 goto use_as_is;
222 break;
223 case E2BIG:
224 reason="No more room";
225 if (!conv_silent)
226 DEBUG(3, ("convert_string_internal: Required %lu, available %lu\n",
227 (unsigned long)srclen, (unsigned long)destlen));
228 /* we are not sure we need srclen bytes,
229 may be more, may be less.
230 We only know we need more than destlen
231 bytes ---simo */
232 break;
233 case EILSEQ:
234 reason="Illegal multibyte sequence";
235 if (!conv_silent)
236 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
237 if (allow_bad_conv)
238 goto use_as_is;
239 break;
240 default:
241 if (!conv_silent)
242 DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
243 break;
245 /* smb_panic(reason); */
247 return destlen-o_len;
249 use_as_is:
252 * Conversion not supported. This is actually an error, but there are so
253 * many misconfigured iconv systems and smb.conf's out there we can't just
254 * fail. Do a very bad conversion instead.... JRA.
258 if (o_len == 0 || i_len == 0)
259 return destlen - o_len;
261 if (from == CH_UCS2 && to != CH_UCS2) {
262 /* Can't convert from ucs2 to multibyte. Just truncate this char to ascii. */
263 if (i_len < 2)
264 return destlen - o_len;
265 if (i_len >= 2) {
266 *outbuf = inbuf[0];
268 outbuf++;
269 o_len--;
271 inbuf += 2;
272 i_len -= 2;
275 if (o_len == 0 || i_len == 0)
276 return destlen - o_len;
278 /* Keep trying with the next char... */
279 goto again;
281 } else if (from != CH_UCS2 && to == CH_UCS2) {
282 /* Can't convert to ucs2 - just widen by adding zero. */
283 if (o_len < 2)
284 return destlen - o_len;
286 outbuf[0] = inbuf[0];
287 outbuf[1] = '\0';
289 inbuf++;
290 i_len--;
292 outbuf += 2;
293 o_len -= 2;
295 if (o_len == 0 || i_len == 0)
296 return destlen - o_len;
298 /* Keep trying with the next char... */
299 goto again;
301 } else if (from != CH_UCS2 && to != CH_UCS2) {
302 /* Failed multibyte to multibyte. Just copy 1 char and
303 try again. */
304 outbuf[0] = inbuf[0];
306 inbuf++;
307 i_len--;
309 outbuf++;
310 o_len--;
312 if (o_len == 0 || i_len == 0)
313 return destlen - o_len;
315 /* Keep trying with the next char... */
316 goto again;
318 } else {
319 /* Keep compiler happy.... */
320 return destlen - o_len;
326 * Convert string from one encoding to another, making error checking etc
327 * Fast path version - handles ASCII first.
329 * @param src pointer to source string (multibyte or singlebyte)
330 * @param srclen length of the source string in bytes, or -1 for nul terminated.
331 * @param dest pointer to destination string (multibyte or singlebyte)
332 * @param destlen maximal length allowed for string - *NEVER* -1.
333 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
334 * @returns the number of bytes occupied in the destination
336 * Ensure the srclen contains the terminating zero.
338 * This function has been hand-tuned to provide a fast path.
339 * Don't change unless you really know what you are doing. JRA.
342 size_t convert_string(charset_t from, charset_t to,
343 void const *src, size_t srclen,
344 void *dest, size_t destlen, BOOL allow_bad_conv)
347 * NB. We deliberately don't do a strlen here if srclen == -1.
348 * This is very expensive over millions of calls and is taken
349 * care of in the slow path in convert_string_internal. JRA.
352 #ifdef DEVELOPER
353 SMB_ASSERT(destlen != (size_t)-1);
354 #endif
356 if (srclen == 0)
357 return 0;
359 if (from != CH_UCS2 && to != CH_UCS2) {
360 const unsigned char *p = (const unsigned char *)src;
361 unsigned char *q = (unsigned char *)dest;
362 size_t slen = srclen;
363 size_t dlen = destlen;
364 unsigned char lastp;
365 size_t retval = 0;
367 /* If all characters are ascii, fast path here. */
368 while (slen && dlen) {
369 if ((lastp = *p) <= 0x7f) {
370 *q++ = *p++;
371 if (slen != (size_t)-1) {
372 slen--;
374 dlen--;
375 retval++;
376 if (!lastp)
377 break;
378 } else {
379 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
380 goto general_case;
381 #else
382 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
383 #endif
386 return retval;
387 } else if (from == CH_UCS2 && to != CH_UCS2) {
388 const unsigned char *p = (const unsigned char *)src;
389 unsigned char *q = (unsigned char *)dest;
390 size_t retval = 0;
391 size_t slen = srclen;
392 size_t dlen = destlen;
393 unsigned char lastp;
395 /* If all characters are ascii, fast path here. */
396 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
397 if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
398 *q++ = *p;
399 if (slen != (size_t)-1) {
400 slen -= 2;
402 p += 2;
403 dlen--;
404 retval++;
405 if (!lastp)
406 break;
407 } else {
408 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
409 goto general_case;
410 #else
411 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
412 #endif
415 return retval;
416 } else if (from != CH_UCS2 && to == CH_UCS2) {
417 const unsigned char *p = (const unsigned char *)src;
418 unsigned char *q = (unsigned char *)dest;
419 size_t retval = 0;
420 size_t slen = srclen;
421 size_t dlen = destlen;
422 unsigned char lastp;
424 /* If all characters are ascii, fast path here. */
425 while (slen && (dlen >= 2)) {
426 if ((lastp = *p) <= 0x7F) {
427 *q++ = *p++;
428 *q++ = '\0';
429 if (slen != (size_t)-1) {
430 slen--;
432 dlen -= 2;
433 retval += 2;
434 if (!lastp)
435 break;
436 } else {
437 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
438 goto general_case;
439 #else
440 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
441 #endif
444 return retval;
447 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
448 general_case:
449 #endif
450 return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
454 * Convert between character sets, allocating a new buffer for the result.
456 * @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
457 * @param srclen length of source buffer.
458 * @param dest always set at least to NULL
459 * @note -1 is not accepted for srclen.
461 * @returns Size in bytes of the converted string; or -1 in case of error.
463 * Ensure the srclen contains the terminating zero.
465 * I hate the goto's in this function. It's embarressing.....
466 * There has to be a cleaner way to do this. JRA.
469 size_t convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
470 void const *src, size_t srclen, void **dest, BOOL allow_bad_conv)
472 size_t i_len, o_len, destlen = MAX(srclen, 512);
473 size_t retval;
474 const char *inbuf = (const char *)src;
475 char *outbuf = NULL, *ob = NULL;
476 smb_iconv_t descriptor;
478 *dest = NULL;
480 if (src == NULL || srclen == (size_t)-1)
481 return (size_t)-1;
482 if (srclen == 0)
483 return 0;
485 lazy_initialize_conv();
487 descriptor = conv_handles[from][to];
489 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
490 if (!conv_silent)
491 DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
492 return (size_t)-1;
495 convert:
497 if ((destlen*2) < destlen) {
498 /* wrapped ! abort. */
499 if (!conv_silent)
500 DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
501 if (!ctx)
502 SAFE_FREE(outbuf);
503 return (size_t)-1;
504 } else {
505 destlen = destlen * 2;
508 if (ctx)
509 ob = (char *)talloc_realloc(ctx, ob, destlen);
510 else
511 ob = (char *)Realloc(ob, destlen);
513 if (!ob) {
514 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
515 if (!ctx)
516 SAFE_FREE(outbuf);
517 return (size_t)-1;
518 } else {
519 outbuf = ob;
521 i_len = srclen;
522 o_len = destlen;
524 again:
526 retval = smb_iconv(descriptor,
527 (char **)&inbuf, &i_len,
528 &outbuf, &o_len);
529 if(retval == (size_t)-1) {
530 const char *reason="unknown error";
531 switch(errno) {
532 case EINVAL:
533 reason="Incomplete multibyte sequence";
534 if (!conv_silent)
535 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
536 if (allow_bad_conv)
537 goto use_as_is;
538 break;
539 case E2BIG:
540 goto convert;
541 case EILSEQ:
542 reason="Illegal multibyte sequence";
543 if (!conv_silent)
544 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
545 if (allow_bad_conv)
546 goto use_as_is;
547 break;
549 if (!conv_silent)
550 DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
551 /* smb_panic(reason); */
552 return (size_t)-1;
555 out:
557 destlen = destlen - o_len;
558 if (ctx)
559 *dest = (char *)talloc_realloc(ctx,ob,destlen);
560 else
561 *dest = (char *)Realloc(ob,destlen);
562 if (destlen && !*dest) {
563 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
564 if (!ctx)
565 SAFE_FREE(ob);
566 return (size_t)-1;
569 return destlen;
571 use_as_is:
574 * Conversion not supported. This is actually an error, but there are so
575 * many misconfigured iconv systems and smb.conf's out there we can't just
576 * fail. Do a very bad conversion instead.... JRA.
580 if (o_len == 0 || i_len == 0)
581 goto out;
583 if (from == CH_UCS2 && to != CH_UCS2) {
584 /* Can't convert from ucs2 to multibyte. Just truncate this char to ascii. */
585 if (i_len < 2)
586 goto out;
588 if (i_len >= 2) {
589 *outbuf = inbuf[0];
591 outbuf++;
592 o_len--;
594 inbuf += 2;
595 i_len -= 2;
598 if (o_len == 0 || i_len == 0)
599 goto out;
601 /* Keep trying with the next char... */
602 goto again;
604 } else if (from != CH_UCS2 && to == CH_UCS2) {
605 /* Can't convert to ucs2 - just widen by adding zero. */
606 if (o_len < 2)
607 goto out;
609 outbuf[0] = inbuf[0];
610 outbuf[1] = '\0';
612 inbuf++;
613 i_len--;
615 outbuf += 2;
616 o_len -= 2;
618 if (o_len == 0 || i_len == 0)
619 goto out;
621 /* Keep trying with the next char... */
622 goto again;
624 } else if (from != CH_UCS2 && to != CH_UCS2) {
625 /* Failed multibyte to multibyte. Just copy 1 char and
626 try again. */
627 outbuf[0] = inbuf[0];
629 inbuf++;
630 i_len--;
632 outbuf++;
633 o_len--;
635 if (o_len == 0 || i_len == 0)
636 goto out;
638 /* Keep trying with the next char... */
639 goto again;
641 } else {
642 /* Keep compiler happy.... */
643 goto out;
649 * Convert between character sets, allocating a new buffer using talloc for the result.
651 * @param srclen length of source buffer.
652 * @param dest always set at least to NULL
653 * @note -1 is not accepted for srclen.
655 * @returns Size in bytes of the converted string; or -1 in case of error.
657 static size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
658 void const *src, size_t srclen, void **dest, BOOL allow_bad_conv)
660 size_t dest_len;
662 *dest = NULL;
663 dest_len=convert_string_allocate(ctx, from, to, src, srclen, dest, allow_bad_conv);
664 if (dest_len == (size_t)-1)
665 return (size_t)-1;
666 if (*dest == NULL)
667 return (size_t)-1;
668 return dest_len;
671 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
673 size_t size;
674 smb_ucs2_t *buffer;
676 size = push_ucs2_allocate(&buffer, src);
677 if (size == (size_t)-1) {
678 smb_panic("failed to create UCS2 buffer");
680 if (!strupper_w(buffer) && (dest == src)) {
681 free(buffer);
682 return srclen;
685 size = convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen, True);
686 free(buffer);
687 return size;
691 strdup() a unix string to upper case.
692 Max size is pstring.
695 char *strdup_upper(const char *s)
697 pstring out_buffer;
698 const unsigned char *p = (const unsigned char *)s;
699 unsigned char *q = (unsigned char *)out_buffer;
701 /* this is quite a common operation, so we want it to be
702 fast. We optimise for the ascii case, knowing that all our
703 supported multi-byte character sets are ascii-compatible
704 (ie. they match for the first 128 chars) */
706 while (1) {
707 if (*p & 0x80)
708 break;
709 *q++ = toupper(*p);
710 if (!*p)
711 break;
712 p++;
713 if (p - ( const unsigned char *)s >= sizeof(pstring))
714 break;
717 if (*p) {
718 /* MB case. */
719 size_t size;
720 wpstring buffer;
721 size = convert_string(CH_UNIX, CH_UCS2, s, -1, buffer, sizeof(buffer), True);
722 if (size == (size_t)-1) {
723 return NULL;
726 strupper_w(buffer);
728 size = convert_string(CH_UCS2, CH_UNIX, buffer, -1, out_buffer, sizeof(out_buffer), True);
729 if (size == (size_t)-1) {
730 return NULL;
734 return strdup(out_buffer);
737 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
739 size_t size;
740 smb_ucs2_t *buffer = NULL;
742 size = convert_string_allocate(NULL, CH_UNIX, CH_UCS2, src, srclen,
743 (void **) &buffer, True);
744 if (size == (size_t)-1 || !buffer) {
745 smb_panic("failed to create UCS2 buffer");
747 if (!strlower_w(buffer) && (dest == src)) {
748 SAFE_FREE(buffer);
749 return srclen;
751 size = convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen, True);
752 SAFE_FREE(buffer);
753 return size;
757 strdup() a unix string to lower case.
760 char *strdup_lower(const char *s)
762 size_t size;
763 smb_ucs2_t *buffer = NULL;
764 char *out_buffer;
766 size = push_ucs2_allocate(&buffer, s);
767 if (size == -1 || !buffer) {
768 return NULL;
771 strlower_w(buffer);
773 size = pull_ucs2_allocate(&out_buffer, buffer);
774 SAFE_FREE(buffer);
776 if (size == (size_t)-1) {
777 return NULL;
780 return out_buffer;
783 static size_t ucs2_align(const void *base_ptr, const void *p, int flags)
785 if (flags & (STR_NOALIGN|STR_ASCII))
786 return 0;
787 return PTR_DIFF(p, base_ptr) & 1;
792 * Copy a string from a char* unix src to a dos codepage string destination.
794 * @return the number of bytes occupied by the string in the destination.
796 * @param flags can include
797 * <dl>
798 * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
799 * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
800 * </dl>
802 * @param dest_len the maximum length in bytes allowed in the
803 * destination. If @p dest_len is -1 then no maximum is used.
805 size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
807 size_t src_len = strlen(src);
808 pstring tmpbuf;
810 /* treat a pstring as "unlimited" length */
811 if (dest_len == (size_t)-1)
812 dest_len = sizeof(pstring);
814 if (flags & STR_UPPER) {
815 pstrcpy(tmpbuf, src);
816 strupper_m(tmpbuf);
817 src = tmpbuf;
820 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
821 src_len++;
823 return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
826 size_t push_ascii_fstring(void *dest, const char *src)
828 return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
831 size_t push_ascii_pstring(void *dest, const char *src)
833 return push_ascii(dest, src, sizeof(pstring), STR_TERMINATE);
836 /********************************************************************
837 Push an nstring - ensure null terminated. Written by
838 moriyama@miraclelinux.com (MORIYAMA Masayuki).
839 ********************************************************************/
841 size_t push_ascii_nstring(void *dest, const char *src)
843 size_t i, buffer_len, dest_len;
844 smb_ucs2_t *buffer;
846 conv_silent = True;
847 buffer_len = push_ucs2_allocate(&buffer, src);
848 if (buffer_len == (size_t)-1) {
849 smb_panic("failed to create UCS2 buffer");
852 /* We're using buffer_len below to count ucs2 characters, not bytes. */
853 buffer_len /= sizeof(smb_ucs2_t);
855 dest_len = 0;
856 for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
857 unsigned char mb[10];
858 /* Convert one smb_ucs2_t character at a time. */
859 size_t mb_len = convert_string(CH_UCS2, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
860 if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
861 memcpy((char *)dest + dest_len, mb, mb_len);
862 dest_len += mb_len;
863 } else {
864 errno = E2BIG;
865 break;
868 ((char *)dest)[dest_len] = '\0';
870 SAFE_FREE(buffer);
871 conv_silent = False;
872 return dest_len;
876 * Copy a string from a dos codepage source to a unix char* destination.
878 * The resulting string in "dest" is always null terminated.
880 * @param flags can have:
881 * <dl>
882 * <dt>STR_TERMINATE</dt>
883 * <dd>STR_TERMINATE means the string in @p src
884 * is null terminated, and src_len is ignored.</dd>
885 * </dl>
887 * @param src_len is the length of the source area in bytes.
888 * @returns the number of bytes occupied by the string in @p src.
890 size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
892 size_t ret;
894 if (dest_len == (size_t)-1)
895 dest_len = sizeof(pstring);
897 if (flags & STR_TERMINATE) {
898 if (src_len == (size_t)-1) {
899 src_len = strlen(src) + 1;
900 } else {
901 size_t len = strnlen(src, src_len);
902 if (len < src_len)
903 len++;
904 src_len = len;
908 ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
909 if (ret == (size_t)-1) {
910 dest_len = 0;
913 if (dest_len)
914 dest[MIN(ret, dest_len-1)] = 0;
915 else
916 dest[0] = 0;
918 return src_len;
921 size_t pull_ascii_pstring(char *dest, const void *src)
923 return pull_ascii(dest, src, sizeof(pstring), -1, STR_TERMINATE);
926 size_t pull_ascii_fstring(char *dest, const void *src)
928 return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
931 /* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
933 size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src)
935 return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
939 * Copy a string from a char* src to a unicode destination.
941 * @returns the number of bytes occupied by the string in the destination.
943 * @param flags can have:
945 * <dl>
946 * <dt>STR_TERMINATE <dd>means include the null termination.
947 * <dt>STR_UPPER <dd>means uppercase in the destination.
948 * <dt>STR_NOALIGN <dd>means don't do alignment.
949 * </dl>
951 * @param dest_len is the maximum length allowed in the
952 * destination. If dest_len is -1 then no maxiumum is used.
955 size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
957 size_t len=0;
958 size_t src_len;
959 size_t ret;
961 /* treat a pstring as "unlimited" length */
962 if (dest_len == (size_t)-1)
963 dest_len = sizeof(pstring);
965 if (flags & STR_TERMINATE)
966 src_len = (size_t)-1;
967 else
968 src_len = strlen(src);
970 if (ucs2_align(base_ptr, dest, flags)) {
971 *(char *)dest = 0;
972 dest = (void *)((char *)dest + 1);
973 if (dest_len)
974 dest_len--;
975 len++;
978 /* ucs2 is always a multiple of 2 bytes */
979 dest_len &= ~1;
981 ret = convert_string(CH_UNIX, CH_UCS2, src, src_len, dest, dest_len, True);
982 if (ret == (size_t)-1) {
983 return 0;
986 len += ret;
988 if (flags & STR_UPPER) {
989 smb_ucs2_t *dest_ucs2 = dest;
990 size_t i;
991 for (i = 0; i < (dest_len / 2) && dest_ucs2[i]; i++) {
992 smb_ucs2_t v = toupper_w(dest_ucs2[i]);
993 if (v != dest_ucs2[i]) {
994 dest_ucs2[i] = v;
999 return len;
1004 * Copy a string from a unix char* src to a UCS2 destination,
1005 * allocating a buffer using talloc().
1007 * @param dest always set at least to NULL
1009 * @returns The number of bytes occupied by the string in the destination
1010 * or -1 in case of error.
1012 size_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
1014 size_t src_len = strlen(src)+1;
1016 *dest = NULL;
1017 return convert_string_talloc(ctx, CH_UNIX, CH_UCS2, src, src_len, (void **)dest, True);
1022 * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1024 * @param dest always set at least to NULL
1026 * @returns The number of bytes occupied by the string in the destination
1027 * or -1 in case of error.
1030 size_t push_ucs2_allocate(smb_ucs2_t **dest, const char *src)
1032 size_t src_len = strlen(src)+1;
1034 *dest = NULL;
1035 return convert_string_allocate(NULL, CH_UNIX, CH_UCS2, src, src_len, (void **)dest, True);
1039 Copy a string from a char* src to a UTF-8 destination.
1040 Return the number of bytes occupied by the string in the destination
1041 Flags can have:
1042 STR_TERMINATE means include the null termination
1043 STR_UPPER means uppercase in the destination
1044 dest_len is the maximum length allowed in the destination. If dest_len
1045 is -1 then no maxiumum is used.
1048 static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
1050 size_t src_len = strlen(src);
1051 pstring tmpbuf;
1053 /* treat a pstring as "unlimited" length */
1054 if (dest_len == (size_t)-1)
1055 dest_len = sizeof(pstring);
1057 if (flags & STR_UPPER) {
1058 pstrcpy(tmpbuf, src);
1059 strupper_m(tmpbuf);
1060 src = tmpbuf;
1063 if (flags & STR_TERMINATE)
1064 src_len++;
1066 return convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1069 size_t push_utf8_fstring(void *dest, const char *src)
1071 return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1075 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1077 * @param dest always set at least to NULL
1079 * @returns The number of bytes occupied by the string in the destination
1082 size_t push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1084 size_t src_len = strlen(src)+1;
1086 *dest = NULL;
1087 return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest, True);
1091 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1093 * @param dest always set at least to NULL
1095 * @returns The number of bytes occupied by the string in the destination
1098 size_t push_utf8_allocate(char **dest, const char *src)
1100 size_t src_len = strlen(src)+1;
1102 *dest = NULL;
1103 return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, True);
1107 Copy a string from a ucs2 source to a unix char* destination.
1108 Flags can have:
1109 STR_TERMINATE means the string in src is null terminated.
1110 STR_NOALIGN means don't try to align.
1111 if STR_TERMINATE is set then src_len is ignored if it is -1.
1112 src_len is the length of the source area in bytes
1113 Return the number of bytes occupied by the string in src.
1114 The resulting string in "dest" is always null terminated.
1117 size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1119 size_t ret;
1121 if (dest_len == (size_t)-1)
1122 dest_len = sizeof(pstring);
1124 if (ucs2_align(base_ptr, src, flags)) {
1125 src = (const void *)((const char *)src + 1);
1126 if (src_len != (size_t)-1)
1127 src_len--;
1130 if (flags & STR_TERMINATE) {
1131 /* src_len -1 is the default for null terminated strings. */
1132 if (src_len != (size_t)-1) {
1133 size_t len = strnlen_w(src, src_len/2);
1134 if (len < src_len/2)
1135 len++;
1136 src_len = len*2;
1140 /* ucs2 is always a multiple of 2 bytes */
1141 if (src_len != (size_t)-1)
1142 src_len &= ~1;
1144 ret = convert_string(CH_UCS2, CH_UNIX, src, src_len, dest, dest_len, True);
1145 if (ret == (size_t)-1) {
1146 return 0;
1149 if (src_len == (size_t)-1)
1150 src_len = ret*2;
1152 if (dest_len)
1153 dest[MIN(ret, dest_len-1)] = 0;
1154 else
1155 dest[0] = 0;
1157 return src_len;
1160 size_t pull_ucs2_pstring(char *dest, const void *src)
1162 return pull_ucs2(NULL, dest, src, sizeof(pstring), -1, STR_TERMINATE);
1165 size_t pull_ucs2_fstring(char *dest, const void *src)
1167 return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1171 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1173 * @param dest always set at least to NULL
1175 * @returns The number of bytes occupied by the string in the destination
1178 size_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src)
1180 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1181 *dest = NULL;
1182 return convert_string_talloc(ctx, CH_UCS2, CH_UNIX, src, src_len, (void **)dest, True);
1186 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1188 * @param dest always set at least to NULL
1190 * @returns The number of bytes occupied by the string in the destination
1193 size_t pull_ucs2_allocate(char **dest, const smb_ucs2_t *src)
1195 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1196 *dest = NULL;
1197 return convert_string_allocate(NULL, CH_UCS2, CH_UNIX, src, src_len, (void **)dest, True);
1201 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1203 * @param dest always set at least to NULL
1205 * @returns The number of bytes occupied by the string in the destination
1208 size_t pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1210 size_t src_len = strlen(src)+1;
1211 *dest = NULL;
1212 return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1216 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1218 * @param dest always set at least to NULL
1220 * @returns The number of bytes occupied by the string in the destination
1223 size_t pull_utf8_allocate(char **dest, const char *src)
1225 size_t src_len = strlen(src)+1;
1226 *dest = NULL;
1227 return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1231 Copy a string from a char* src to a unicode or ascii
1232 dos codepage destination choosing unicode or ascii based on the
1233 flags in the SMB buffer starting at base_ptr.
1234 Return the number of bytes occupied by the string in the destination.
1235 flags can have:
1236 STR_TERMINATE means include the null termination.
1237 STR_UPPER means uppercase in the destination.
1238 STR_ASCII use ascii even with unicode packet.
1239 STR_NOALIGN means don't do alignment.
1240 dest_len is the maximum length allowed in the destination. If dest_len
1241 is -1 then no maxiumum is used.
1244 size_t push_string_fn(const char *function, unsigned int line, const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1246 #ifdef DEVELOPER
1247 /* We really need to zero fill here, not clobber
1248 * region, as we want to ensure that valgrind thinks
1249 * all of the outgoing buffer has been written to
1250 * so a send() or write() won't trap an error.
1251 * JRA.
1253 #if 0
1254 if (dest_len != (size_t)-1)
1255 clobber_region(function, line, dest, dest_len);
1256 #else
1257 if (dest_len != (size_t)-1)
1258 memset(dest, '\0', dest_len);
1259 #endif
1260 #endif
1262 if (!(flags & STR_ASCII) && \
1263 ((flags & STR_UNICODE || \
1264 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1265 return push_ucs2(base_ptr, dest, src, dest_len, flags);
1267 return push_ascii(dest, src, dest_len, flags);
1272 Copy a string from a unicode or ascii source (depending on
1273 the packet flags) to a char* destination.
1274 Flags can have:
1275 STR_TERMINATE means the string in src is null terminated.
1276 STR_UNICODE means to force as unicode.
1277 STR_ASCII use ascii even with unicode packet.
1278 STR_NOALIGN means don't do alignment.
1279 if STR_TERMINATE is set then src_len is ignored is it is -1
1280 src_len is the length of the source area in bytes.
1281 Return the number of bytes occupied by the string in src.
1282 The resulting string in "dest" is always null terminated.
1285 size_t pull_string_fn(const char *function, unsigned int line, const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1287 #ifdef DEVELOPER
1288 if (dest_len != (size_t)-1)
1289 clobber_region(function, line, dest, dest_len);
1290 #endif
1292 if (!(flags & STR_ASCII) && \
1293 ((flags & STR_UNICODE || \
1294 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1295 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1297 return pull_ascii(dest, src, dest_len, src_len, flags);
1300 size_t align_string(const void *base_ptr, const char *p, int flags)
1302 if (!(flags & STR_ASCII) && \
1303 ((flags & STR_UNICODE || \
1304 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1305 return ucs2_align(base_ptr, p, flags);
1307 return 0;
1310 /****************************************************************
1311 Calculate the size (in bytes) of the next multibyte character in
1312 our internal character set. Note that p must be pointing to a
1313 valid mb char, not within one.
1314 ****************************************************************/
1316 size_t next_mb_char_size(const char *s)
1318 size_t i;
1320 if (!(*s & 0x80))
1321 return 1; /* ascii. */
1323 conv_silent = True;
1324 for ( i = 1; i <=4; i++ ) {
1325 smb_ucs2_t uc;
1326 if (convert_string(CH_UNIX, CH_UCS2, s, i, &uc, 2, False) == 2) {
1327 #if 0 /* JRATEST */
1328 DEBUG(10,("next_mb_char_size: size %u at string %s\n",
1329 (unsigned int)i, s));
1330 #endif
1331 conv_silent = False;
1332 return i;
1335 /* We're hosed - we don't know how big this is... */
1336 DEBUG(10,("next_mb_char_size: unknown size at string %s\n", s));
1337 conv_silent = False;
1338 return 1;