WHATSNEW: Update changes.
[Samba.git] / source3 / lib / charcnv.c
blob9ac9930267ef6614738f0f5d75a933d3d8d958b9
1 /*
2 Unix SMB/CIFS implementation.
3 Character set conversion Extensions
4 Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5 Copyright (C) Andrew Tridgell 2001
6 Copyright (C) Simo Sorce 2001
7 Copyright (C) Martin Pool 2003
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
23 #include "includes.h"
25 /* We can parameterize this if someone complains.... JRA. */
27 char lp_failed_convert_char(void)
29 return '_';
32 /**
33 * @file
35 * @brief Character-set conversion routines built on our iconv.
37 * @note Samba's internal character set (at least in the 3.0 series)
38 * is always the same as the one for the Unix filesystem. It is
39 * <b>not</b> necessarily UTF-8 and may be different on machines that
40 * need i18n filenames to be compatible with Unix software. It does
41 * have to be a superset of ASCII. All multibyte sequences must start
42 * with a byte with the high bit set.
44 * @sa lib/iconv.c
48 static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
49 static bool conv_silent; /* Should we do a debug if the conversion fails ? */
50 static bool initialized;
52 /**
53 * Return the name of a charset to give to iconv().
54 **/
55 static const char *charset_name(charset_t ch)
57 const char *ret;
59 switch (ch) {
60 case CH_UTF16LE:
61 ret = "UTF-16LE";
62 break;
63 case CH_UTF16BE:
64 ret = "UTF-16BE";
65 break;
66 case CH_UNIX:
67 ret = lp_unix_charset();
68 break;
69 case CH_DOS:
70 ret = lp_dos_charset();
71 break;
72 case CH_DISPLAY:
73 ret = lp_display_charset();
74 break;
75 case CH_UTF8:
76 ret = "UTF8";
77 break;
78 default:
79 ret = NULL;
82 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)
83 if (ret && !strcmp(ret, "LOCALE")) {
84 const char *ln = NULL;
86 #ifdef HAVE_SETLOCALE
87 setlocale(LC_ALL, "");
88 #endif
89 ln = nl_langinfo(CODESET);
90 if (ln) {
91 /* Check whether the charset name is supported
92 by iconv */
93 smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
94 if (handle == (smb_iconv_t) -1) {
95 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
96 ln = NULL;
97 } else {
98 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
99 smb_iconv_close(handle);
102 ret = ln;
104 #endif
106 if (!ret || !*ret) ret = "ASCII";
107 return ret;
110 void lazy_initialize_conv(void)
112 if (!initialized) {
113 load_case_tables();
114 init_iconv();
115 initialized = true;
120 * Destroy global objects allocated by init_iconv()
122 void gfree_charcnv(void)
124 int c1, c2;
126 for (c1=0;c1<NUM_CHARSETS;c1++) {
127 for (c2=0;c2<NUM_CHARSETS;c2++) {
128 if ( conv_handles[c1][c2] ) {
129 smb_iconv_close( conv_handles[c1][c2] );
130 conv_handles[c1][c2] = 0;
134 initialized = false;
138 * Initialize iconv conversion descriptors.
140 * This is called the first time it is needed, and also called again
141 * every time the configuration is reloaded, because the charset or
142 * codepage might have changed.
144 void init_iconv(void)
146 int c1, c2;
147 bool did_reload = False;
149 /* so that charset_name() works we need to get the UNIX<->UCS2 going
150 first */
151 if (!conv_handles[CH_UNIX][CH_UTF16LE])
152 conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
154 if (!conv_handles[CH_UTF16LE][CH_UNIX])
155 conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
157 for (c1=0;c1<NUM_CHARSETS;c1++) {
158 for (c2=0;c2<NUM_CHARSETS;c2++) {
159 const char *n1 = charset_name((charset_t)c1);
160 const char *n2 = charset_name((charset_t)c2);
161 if (conv_handles[c1][c2] &&
162 strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
163 strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
164 continue;
166 did_reload = True;
168 if (conv_handles[c1][c2])
169 smb_iconv_close(conv_handles[c1][c2]);
171 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
172 if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
173 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
174 charset_name((charset_t)c1), charset_name((charset_t)c2)));
175 if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
176 n1 = "ASCII";
178 if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
179 n2 = "ASCII";
181 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
182 n1, n2 ));
183 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
184 if (!conv_handles[c1][c2]) {
185 DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
186 smb_panic("init_iconv: conv_handle initialization failed");
192 if (did_reload) {
193 /* XXX: Does this really get called every time the dos
194 * codepage changes? */
195 /* XXX: Is the did_reload test too strict? */
196 conv_silent = True;
197 init_valid_table();
198 conv_silent = False;
203 * Convert string from one encoding to another, making error checking etc
204 * Slow path version - uses (slow) iconv.
206 * @param src pointer to source string (multibyte or singlebyte)
207 * @param srclen length of the source string in bytes
208 * @param dest pointer to destination string (multibyte or singlebyte)
209 * @param destlen maximal length allowed for string
210 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
211 * @returns the number of bytes occupied in the destination
213 * Ensure the srclen contains the terminating zero.
217 static size_t convert_string_internal(charset_t from, charset_t to,
218 void const *src, size_t srclen,
219 void *dest, size_t destlen, bool allow_bad_conv)
221 size_t i_len, o_len;
222 size_t retval;
223 const char* inbuf = (const char*)src;
224 char* outbuf = (char*)dest;
225 smb_iconv_t descriptor;
227 lazy_initialize_conv();
229 descriptor = conv_handles[from][to];
231 if (srclen == (size_t)-1) {
232 if (from == CH_UTF16LE || from == CH_UTF16BE) {
233 srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
234 } else {
235 srclen = strlen((const char *)src)+1;
240 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
241 if (!conv_silent)
242 DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
243 return (size_t)-1;
246 i_len=srclen;
247 o_len=destlen;
249 again:
251 retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
252 if(retval==(size_t)-1) {
253 const char *reason="unknown error";
254 switch(errno) {
255 case EINVAL:
256 reason="Incomplete multibyte sequence";
257 if (!conv_silent)
258 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
259 if (allow_bad_conv)
260 goto use_as_is;
261 return (size_t)-1;
262 case E2BIG:
263 reason="No more room";
264 if (!conv_silent) {
265 if (from == CH_UNIX) {
266 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
267 charset_name(from), charset_name(to),
268 (unsigned int)srclen, (unsigned int)destlen, (const char *)src));
269 } else {
270 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
271 charset_name(from), charset_name(to),
272 (unsigned int)srclen, (unsigned int)destlen));
275 break;
276 case EILSEQ:
277 reason="Illegal multibyte sequence";
278 if (!conv_silent)
279 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
280 if (allow_bad_conv)
281 goto use_as_is;
283 return (size_t)-1;
284 default:
285 if (!conv_silent)
286 DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
287 return (size_t)-1;
289 /* smb_panic(reason); */
291 return destlen-o_len;
293 use_as_is:
296 * Conversion not supported. This is actually an error, but there are so
297 * many misconfigured iconv systems and smb.conf's out there we can't just
298 * fail. Do a very bad conversion instead.... JRA.
302 if (o_len == 0 || i_len == 0)
303 return destlen - o_len;
305 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
306 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
307 /* Can't convert from utf16 any endian to multibyte.
308 Replace with the default fail char.
310 if (i_len < 2)
311 return destlen - o_len;
312 if (i_len >= 2) {
313 *outbuf = lp_failed_convert_char();
315 outbuf++;
316 o_len--;
318 inbuf += 2;
319 i_len -= 2;
322 if (o_len == 0 || i_len == 0)
323 return destlen - o_len;
325 /* Keep trying with the next char... */
326 goto again;
328 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
329 /* Can't convert to UTF16LE - just widen by adding the
330 default fail char then zero.
332 if (o_len < 2)
333 return destlen - o_len;
335 outbuf[0] = lp_failed_convert_char();
336 outbuf[1] = '\0';
338 inbuf++;
339 i_len--;
341 outbuf += 2;
342 o_len -= 2;
344 if (o_len == 0 || i_len == 0)
345 return destlen - o_len;
347 /* Keep trying with the next char... */
348 goto again;
350 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
351 to != CH_UTF16LE && to != CH_UTF16BE) {
352 /* Failed multibyte to multibyte. Just copy the default fail char and
353 try again. */
354 outbuf[0] = lp_failed_convert_char();
356 inbuf++;
357 i_len--;
359 outbuf++;
360 o_len--;
362 if (o_len == 0 || i_len == 0)
363 return destlen - o_len;
365 /* Keep trying with the next char... */
366 goto again;
368 } else {
369 /* Keep compiler happy.... */
370 return destlen - o_len;
376 * Convert string from one encoding to another, making error checking etc
377 * Fast path version - handles ASCII first.
379 * @param src pointer to source string (multibyte or singlebyte)
380 * @param srclen length of the source string in bytes, or -1 for nul terminated.
381 * @param dest pointer to destination string (multibyte or singlebyte)
382 * @param destlen maximal length allowed for string - *NEVER* -1.
383 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
384 * @returns the number of bytes occupied in the destination
386 * Ensure the srclen contains the terminating zero.
388 * This function has been hand-tuned to provide a fast path.
389 * Don't change unless you really know what you are doing. JRA.
392 size_t convert_string(charset_t from, charset_t to,
393 void const *src, size_t srclen,
394 void *dest, size_t destlen, bool allow_bad_conv)
397 * NB. We deliberately don't do a strlen here if srclen == -1.
398 * This is very expensive over millions of calls and is taken
399 * care of in the slow path in convert_string_internal. JRA.
402 #ifdef DEVELOPER
403 SMB_ASSERT(destlen != (size_t)-1);
404 #endif
406 if (srclen == 0)
407 return 0;
409 if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
410 const unsigned char *p = (const unsigned char *)src;
411 unsigned char *q = (unsigned char *)dest;
412 size_t slen = srclen;
413 size_t dlen = destlen;
414 unsigned char lastp = '\0';
415 size_t retval = 0;
417 /* If all characters are ascii, fast path here. */
418 while (slen && dlen) {
419 if ((lastp = *p) <= 0x7f) {
420 *q++ = *p++;
421 if (slen != (size_t)-1) {
422 slen--;
424 dlen--;
425 retval++;
426 if (!lastp)
427 break;
428 } else {
429 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
430 goto general_case;
431 #else
432 size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
433 if (ret == (size_t)-1) {
434 return ret;
436 return retval + ret;
437 #endif
440 if (!dlen) {
441 /* Even if we fast path we should note if we ran out of room. */
442 if (((slen != (size_t)-1) && slen) ||
443 ((slen == (size_t)-1) && lastp)) {
444 errno = E2BIG;
447 return retval;
448 } else if (from == CH_UTF16LE && to != CH_UTF16LE) {
449 const unsigned char *p = (const unsigned char *)src;
450 unsigned char *q = (unsigned char *)dest;
451 size_t retval = 0;
452 size_t slen = srclen;
453 size_t dlen = destlen;
454 unsigned char lastp = '\0';
456 /* If all characters are ascii, fast path here. */
457 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
458 if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
459 *q++ = *p;
460 if (slen != (size_t)-1) {
461 slen -= 2;
463 p += 2;
464 dlen--;
465 retval++;
466 if (!lastp)
467 break;
468 } else {
469 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
470 goto general_case;
471 #else
472 size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
473 if (ret == (size_t)-1) {
474 return ret;
476 return retval + ret;
477 #endif
480 if (!dlen) {
481 /* Even if we fast path we should note if we ran out of room. */
482 if (((slen != (size_t)-1) && slen) ||
483 ((slen == (size_t)-1) && lastp)) {
484 errno = E2BIG;
487 return retval;
488 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
489 const unsigned char *p = (const unsigned char *)src;
490 unsigned char *q = (unsigned char *)dest;
491 size_t retval = 0;
492 size_t slen = srclen;
493 size_t dlen = destlen;
494 unsigned char lastp = '\0';
496 /* If all characters are ascii, fast path here. */
497 while (slen && (dlen >= 2)) {
498 if ((lastp = *p) <= 0x7F) {
499 *q++ = *p++;
500 *q++ = '\0';
501 if (slen != (size_t)-1) {
502 slen--;
504 dlen -= 2;
505 retval += 2;
506 if (!lastp)
507 break;
508 } else {
509 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
510 goto general_case;
511 #else
512 size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
513 if (ret == (size_t)-1) {
514 return ret;
516 return retval + ret;
517 #endif
520 if (!dlen) {
521 /* Even if we fast path we should note if we ran out of room. */
522 if (((slen != (size_t)-1) && slen) ||
523 ((slen == (size_t)-1) && lastp)) {
524 errno = E2BIG;
527 return retval;
530 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
531 general_case:
532 #endif
533 return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
537 * Convert between character sets, allocating a new buffer using talloc for the result.
539 * @param srclen length of source buffer.
540 * @param dest always set at least to NULL
541 * @parm converted_size set to the number of bytes occupied by the string in
542 * the destination on success.
543 * @note -1 is not accepted for srclen.
545 * @return true if new buffer was correctly allocated, and string was
546 * converted.
548 * Ensure the srclen contains the terminating zero.
550 * I hate the goto's in this function. It's embarressing.....
551 * There has to be a cleaner way to do this. JRA.
553 bool convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
554 void const *src, size_t srclen, void *dst,
555 size_t *converted_size, bool allow_bad_conv)
558 size_t i_len, o_len, destlen = (srclen * 3) / 2;
559 size_t retval;
560 const char *inbuf = (const char *)src;
561 char *outbuf = NULL, *ob = NULL;
562 smb_iconv_t descriptor;
563 void **dest = (void **)dst;
565 *dest = NULL;
567 if (!converted_size) {
568 errno = EINVAL;
569 return false;
572 if (src == NULL || srclen == (size_t)-1) {
573 errno = EINVAL;
574 return false;
576 if (srclen == 0) {
577 ob = talloc_strdup(ctx, "");
578 if (ob == NULL) {
579 errno = ENOMEM;
580 return false;
582 *dest = ob;
583 *converted_size = 0;
584 return true;
587 lazy_initialize_conv();
589 descriptor = conv_handles[from][to];
591 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
592 if (!conv_silent)
593 DEBUG(0,("convert_string_talloc: Conversion not supported.\n"));
594 errno = EOPNOTSUPP;
595 return false;
598 convert:
600 /* +2 is for ucs2 null termination. */
601 if ((destlen*2)+2 < destlen) {
602 /* wrapped ! abort. */
603 if (!conv_silent)
604 DEBUG(0, ("convert_string_talloc: destlen wrapped !\n"));
605 TALLOC_FREE(outbuf);
606 errno = EOPNOTSUPP;
607 return false;
608 } else {
609 destlen = destlen * 2;
612 /* +2 is for ucs2 null termination. */
613 ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
615 if (!ob) {
616 DEBUG(0, ("convert_string_talloc: realloc failed!\n"));
617 errno = ENOMEM;
618 return false;
620 outbuf = ob;
621 i_len = srclen;
622 o_len = destlen;
624 again:
626 retval = smb_iconv(descriptor,
627 &inbuf, &i_len,
628 &outbuf, &o_len);
629 if(retval == (size_t)-1) {
630 const char *reason="unknown error";
631 switch(errno) {
632 case EINVAL:
633 reason="Incomplete multibyte sequence";
634 if (!conv_silent)
635 DEBUG(3,("convert_string_talloc: Conversion error: %s(%s)\n",reason,inbuf));
636 if (allow_bad_conv)
637 goto use_as_is;
638 break;
639 case E2BIG:
640 goto convert;
641 case EILSEQ:
642 reason="Illegal multibyte sequence";
643 if (!conv_silent)
644 DEBUG(3,("convert_string_talloc: Conversion error: %s(%s)\n",reason,inbuf));
645 if (allow_bad_conv)
646 goto use_as_is;
647 break;
649 if (!conv_silent)
650 DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
651 /* smb_panic(reason); */
652 TALLOC_FREE(ob);
653 return false;
656 out:
658 destlen = destlen - o_len;
659 /* Don't shrink unless we're reclaiming a lot of
660 * space. This is in the hot codepath and these
661 * reallocs *cost*. JRA.
663 if (o_len > 1024) {
664 /* We're shrinking here so we know the +2 is safe from wrap. */
665 ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
668 if (destlen && !ob) {
669 DEBUG(0, ("convert_string_talloc: out of memory!\n"));
670 errno = ENOMEM;
671 return false;
674 *dest = ob;
676 /* Must ucs2 null terminate in the extra space we allocated. */
677 ob[destlen] = '\0';
678 ob[destlen+1] = '\0';
680 *converted_size = destlen;
681 return true;
683 use_as_is:
686 * Conversion not supported. This is actually an error, but there are so
687 * many misconfigured iconv systems and smb.conf's out there we can't just
688 * fail. Do a very bad conversion instead.... JRA.
692 if (o_len == 0 || i_len == 0)
693 goto out;
695 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
696 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
697 /* Can't convert from utf16 any endian to multibyte.
698 Replace with the default fail char.
701 if (i_len < 2)
702 goto out;
704 if (i_len >= 2) {
705 *outbuf = lp_failed_convert_char();
707 outbuf++;
708 o_len--;
710 inbuf += 2;
711 i_len -= 2;
714 if (o_len == 0 || i_len == 0)
715 goto out;
717 /* Keep trying with the next char... */
718 goto again;
720 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
721 /* Can't convert to UTF16LE - just widen by adding the
722 default fail char then zero.
724 if (o_len < 2)
725 goto out;
727 outbuf[0] = lp_failed_convert_char();
728 outbuf[1] = '\0';
730 inbuf++;
731 i_len--;
733 outbuf += 2;
734 o_len -= 2;
736 if (o_len == 0 || i_len == 0)
737 goto out;
739 /* Keep trying with the next char... */
740 goto again;
742 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
743 to != CH_UTF16LE && to != CH_UTF16BE) {
744 /* Failed multibyte to multibyte. Just copy the default fail char and
745 try again. */
746 outbuf[0] = lp_failed_convert_char();
748 inbuf++;
749 i_len--;
751 outbuf++;
752 o_len--;
754 if (o_len == 0 || i_len == 0)
755 goto out;
757 /* Keep trying with the next char... */
758 goto again;
760 } else {
761 /* Keep compiler happy.... */
762 goto out;
767 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
769 size_t size;
770 smb_ucs2_t *buffer;
772 if (!push_ucs2_talloc(talloc_tos(), &buffer, src, &size)) {
773 return (size_t)-1;
776 if (!strupper_w(buffer) && (dest == src)) {
777 TALLOC_FREE(buffer);
778 return srclen;
781 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
782 TALLOC_FREE(buffer);
783 return size;
787 talloc_strdup() a unix string to upper case.
790 char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *s)
792 char *out_buffer = talloc_strdup(ctx,s);
793 const unsigned char *p = (const unsigned char *)s;
794 unsigned char *q = (unsigned char *)out_buffer;
796 if (!q) {
797 return NULL;
800 /* this is quite a common operation, so we want it to be
801 fast. We optimise for the ascii case, knowing that all our
802 supported multi-byte character sets are ascii-compatible
803 (ie. they match for the first 128 chars) */
805 while (*p) {
806 if (*p & 0x80)
807 break;
808 *q++ = toupper_ascii_fast(*p);
809 p++;
812 if (*p) {
813 /* MB case. */
814 size_t converted_size, converted_size2;
815 smb_ucs2_t *ubuf = NULL;
817 /* We're not using the ascii buffer above. */
818 TALLOC_FREE(out_buffer);
820 if (!convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, s,
821 strlen(s)+1, (void *)&ubuf,
822 &converted_size, True))
824 return NULL;
827 strupper_w(ubuf);
829 if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, ubuf,
830 converted_size, (void *)&out_buffer,
831 &converted_size2, True))
833 TALLOC_FREE(ubuf);
834 return NULL;
837 /* Don't need the intermediate buffer
838 * anymore.
840 TALLOC_FREE(ubuf);
843 return out_buffer;
846 char *strupper_talloc(TALLOC_CTX *ctx, const char *s) {
847 return talloc_strdup_upper(ctx, s);
851 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
853 size_t size;
854 smb_ucs2_t *buffer = NULL;
856 if (!convert_string_talloc(talloc_tos(), CH_UNIX, CH_UTF16LE, src, srclen,
857 (void **)(void *)&buffer, &size,
858 True))
860 smb_panic("failed to create UCS2 buffer");
862 if (!strlower_w(buffer) && (dest == src)) {
863 TALLOC_FREE(buffer);
864 return srclen;
866 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
867 TALLOC_FREE(buffer);
868 return size;
872 char *talloc_strdup_lower(TALLOC_CTX *ctx, const char *s)
874 size_t converted_size;
875 smb_ucs2_t *buffer = NULL;
876 char *out_buffer;
878 if (!push_ucs2_talloc(ctx, &buffer, s, &converted_size)) {
879 return NULL;
882 strlower_w(buffer);
884 if (!pull_ucs2_talloc(ctx, &out_buffer, buffer, &converted_size)) {
885 TALLOC_FREE(buffer);
886 return NULL;
889 TALLOC_FREE(buffer);
891 return out_buffer;
894 char *strlower_talloc(TALLOC_CTX *ctx, const char *s) {
895 return talloc_strdup_lower(ctx, s);
898 size_t ucs2_align(const void *base_ptr, const void *p, int flags)
900 if (flags & (STR_NOALIGN|STR_ASCII))
901 return 0;
902 return PTR_DIFF(p, base_ptr) & 1;
907 * Copy a string from a char* unix src to a dos codepage string destination.
909 * @return the number of bytes occupied by the string in the destination.
911 * @param flags can include
912 * <dl>
913 * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
914 * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
915 * </dl>
917 * @param dest_len the maximum length in bytes allowed in the
918 * destination.
920 size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
922 size_t src_len = strlen(src);
923 char *tmpbuf = NULL;
924 size_t ret;
926 /* No longer allow a length of -1. */
927 if (dest_len == (size_t)-1) {
928 smb_panic("push_ascii - dest_len == -1");
931 if (flags & STR_UPPER) {
932 tmpbuf = SMB_STRDUP(src);
933 if (!tmpbuf) {
934 smb_panic("malloc fail");
936 strupper_m(tmpbuf);
937 src = tmpbuf;
940 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
941 src_len++;
944 ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
945 if (ret == (size_t)-1 &&
946 (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
947 && dest_len > 0) {
948 ((char *)dest)[0] = '\0';
950 SAFE_FREE(tmpbuf);
951 return ret;
954 size_t push_ascii_fstring(void *dest, const char *src)
956 return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
959 /********************************************************************
960 Push an nstring - ensure null terminated. Written by
961 moriyama@miraclelinux.com (MORIYAMA Masayuki).
962 ********************************************************************/
964 size_t push_ascii_nstring(void *dest, const char *src)
966 size_t i, buffer_len, dest_len;
967 smb_ucs2_t *buffer;
969 conv_silent = True;
970 if (!push_ucs2_talloc(talloc_tos(), &buffer, src, &buffer_len)) {
971 smb_panic("failed to create UCS2 buffer");
974 /* We're using buffer_len below to count ucs2 characters, not bytes. */
975 buffer_len /= sizeof(smb_ucs2_t);
977 dest_len = 0;
978 for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
979 unsigned char mb[10];
980 /* Convert one smb_ucs2_t character at a time. */
981 size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
982 if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
983 memcpy((char *)dest + dest_len, mb, mb_len);
984 dest_len += mb_len;
985 } else {
986 errno = E2BIG;
987 break;
990 ((char *)dest)[dest_len] = '\0';
992 conv_silent = False;
993 TALLOC_FREE(buffer);
994 return dest_len;
997 /********************************************************************
998 Push and malloc an ascii string. src and dest null terminated.
999 ********************************************************************/
1001 bool push_ascii_talloc(TALLOC_CTX *mem_ctx, char **dest, const char *src, size_t *converted_size)
1003 size_t src_len = strlen(src)+1;
1005 *dest = NULL;
1006 return convert_string_talloc(mem_ctx, CH_UNIX, CH_DOS, src, src_len,
1007 (void **)dest, converted_size, True);
1011 * Copy a string from a dos codepage source to a unix char* destination.
1013 * The resulting string in "dest" is always null terminated.
1015 * @param flags can have:
1016 * <dl>
1017 * <dt>STR_TERMINATE</dt>
1018 * <dd>STR_TERMINATE means the string in @p src
1019 * is null terminated, and src_len is ignored.</dd>
1020 * </dl>
1022 * @param src_len is the length of the source area in bytes.
1023 * @returns the number of bytes occupied by the string in @p src.
1025 size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1027 size_t ret;
1029 if (dest_len == (size_t)-1) {
1030 /* No longer allow dest_len of -1. */
1031 smb_panic("pull_ascii - invalid dest_len of -1");
1034 if (flags & STR_TERMINATE) {
1035 if (src_len == (size_t)-1) {
1036 src_len = strlen((const char *)src) + 1;
1037 } else {
1038 size_t len = strnlen((const char *)src, src_len);
1039 if (len < src_len)
1040 len++;
1041 src_len = len;
1045 ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
1046 if (ret == (size_t)-1) {
1047 ret = 0;
1048 dest_len = 0;
1051 if (dest_len && ret) {
1052 /* Did we already process the terminating zero ? */
1053 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1054 dest[MIN(ret, dest_len-1)] = 0;
1056 } else {
1057 dest[0] = 0;
1060 return src_len;
1064 * Copy a string from a dos codepage source to a unix char* destination.
1065 * Talloc version.
1067 * The resulting string in "dest" is always null terminated.
1069 * @param flags can have:
1070 * <dl>
1071 * <dt>STR_TERMINATE</dt>
1072 * <dd>STR_TERMINATE means the string in @p src
1073 * is null terminated, and src_len is ignored.</dd>
1074 * </dl>
1076 * @param src_len is the length of the source area in bytes.
1077 * @returns the number of bytes occupied by the string in @p src.
1080 static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
1081 char **ppdest,
1082 const void *src,
1083 size_t src_len,
1084 int flags)
1086 char *dest = NULL;
1087 size_t dest_len;
1089 *ppdest = NULL;
1091 if (!src_len) {
1092 return 0;
1095 if (flags & STR_TERMINATE) {
1096 if (src_len == (size_t)-1) {
1097 src_len = strlen((const char *)src) + 1;
1098 } else {
1099 size_t len = strnlen((const char *)src, src_len);
1100 if (len < src_len)
1101 len++;
1102 src_len = len;
1104 /* Ensure we don't use an insane length from the client. */
1105 if (src_len >= 1024*1024) {
1106 char *msg = talloc_asprintf(ctx,
1107 "Bad src length (%u) in "
1108 "pull_ascii_base_talloc",
1109 (unsigned int)src_len);
1110 smb_panic(msg);
1112 } else {
1113 /* Can't have an unlimited length
1114 * non STR_TERMINATE'd.
1116 if (src_len == (size_t)-1) {
1117 errno = EINVAL;
1118 return 0;
1122 /* src_len != -1 here. */
1124 if (!convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, &dest,
1125 &dest_len, True)) {
1126 dest_len = 0;
1129 if (dest_len && dest) {
1130 /* Did we already process the terminating zero ? */
1131 if (dest[dest_len-1] != 0) {
1132 size_t size = talloc_get_size(dest);
1133 /* Have we got space to append the '\0' ? */
1134 if (size <= dest_len) {
1135 /* No, realloc. */
1136 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1137 dest_len+1);
1138 if (!dest) {
1139 /* talloc fail. */
1140 dest_len = (size_t)-1;
1141 return 0;
1144 /* Yay - space ! */
1145 dest[dest_len] = '\0';
1146 dest_len++;
1148 } else if (dest) {
1149 dest[0] = 0;
1152 *ppdest = dest;
1153 return src_len;
1156 size_t pull_ascii_fstring(char *dest, const void *src)
1158 return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1161 /* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1163 size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src)
1165 return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1169 * Copy a string from a char* src to a unicode destination.
1171 * @returns the number of bytes occupied by the string in the destination.
1173 * @param flags can have:
1175 * <dl>
1176 * <dt>STR_TERMINATE <dd>means include the null termination.
1177 * <dt>STR_UPPER <dd>means uppercase in the destination.
1178 * <dt>STR_NOALIGN <dd>means don't do alignment.
1179 * </dl>
1181 * @param dest_len is the maximum length allowed in the
1182 * destination.
1185 size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1187 size_t len=0;
1188 size_t src_len;
1189 size_t ret;
1191 if (dest_len == (size_t)-1) {
1192 /* No longer allow dest_len of -1. */
1193 smb_panic("push_ucs2 - invalid dest_len of -1");
1196 if (flags & STR_TERMINATE)
1197 src_len = (size_t)-1;
1198 else
1199 src_len = strlen(src);
1201 if (ucs2_align(base_ptr, dest, flags)) {
1202 *(char *)dest = 0;
1203 dest = (void *)((char *)dest + 1);
1204 if (dest_len)
1205 dest_len--;
1206 len++;
1209 /* ucs2 is always a multiple of 2 bytes */
1210 dest_len &= ~1;
1212 ret = convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1213 if (ret == (size_t)-1) {
1214 if ((flags & STR_TERMINATE) &&
1215 dest &&
1216 dest_len) {
1217 *(char *)dest = 0;
1219 return len;
1222 len += ret;
1224 if (flags & STR_UPPER) {
1225 smb_ucs2_t *dest_ucs2 = (smb_ucs2_t *)dest;
1226 size_t i;
1228 /* We check for i < (ret / 2) below as the dest string isn't null
1229 terminated if STR_TERMINATE isn't set. */
1231 for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1232 smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1233 if (v != dest_ucs2[i]) {
1234 dest_ucs2[i] = v;
1239 return len;
1244 * Copy a string from a unix char* src to a UCS2 destination,
1245 * allocating a buffer using talloc().
1247 * @param dest always set at least to NULL
1248 * @parm converted_size set to the number of bytes occupied by the string in
1249 * the destination on success.
1251 * @return true if new buffer was correctly allocated, and string was
1252 * converted.
1254 bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src,
1255 size_t *converted_size)
1257 size_t src_len = strlen(src)+1;
1259 *dest = NULL;
1260 return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len,
1261 (void **)dest, converted_size, True);
1266 Copy a string from a char* src to a UTF-8 destination.
1267 Return the number of bytes occupied by the string in the destination
1268 Flags can have:
1269 STR_TERMINATE means include the null termination
1270 STR_UPPER means uppercase in the destination
1271 dest_len is the maximum length allowed in the destination. If dest_len
1272 is -1 then no maxiumum is used.
1275 static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
1277 size_t src_len = 0;
1278 size_t ret;
1279 char *tmpbuf = NULL;
1281 if (dest_len == (size_t)-1) {
1282 /* No longer allow dest_len of -1. */
1283 smb_panic("push_utf8 - invalid dest_len of -1");
1286 if (flags & STR_UPPER) {
1287 tmpbuf = strupper_talloc(talloc_tos(), src);
1288 if (!tmpbuf) {
1289 return (size_t)-1;
1291 src = tmpbuf;
1292 src_len = strlen(src);
1295 src_len = strlen(src);
1296 if (flags & STR_TERMINATE) {
1297 src_len++;
1300 ret = convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1301 TALLOC_FREE(tmpbuf);
1302 return ret;
1305 size_t push_utf8_fstring(void *dest, const char *src)
1307 return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1311 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1313 * @param dest always set at least to NULL
1314 * @parm converted_size set to the number of bytes occupied by the string in
1315 * the destination on success.
1317 * @return true if new buffer was correctly allocated, and string was
1318 * converted.
1321 bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1322 size_t *converted_size)
1324 size_t src_len = strlen(src)+1;
1326 *dest = NULL;
1327 return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len,
1328 (void**)dest, converted_size, True);
1332 Copy a string from a ucs2 source to a unix char* destination.
1333 Flags can have:
1334 STR_TERMINATE means the string in src is null terminated.
1335 STR_NOALIGN means don't try to align.
1336 if STR_TERMINATE is set then src_len is ignored if it is -1.
1337 src_len is the length of the source area in bytes
1338 Return the number of bytes occupied by the string in src.
1339 The resulting string in "dest" is always null terminated.
1342 size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1344 size_t ret;
1346 if (dest_len == (size_t)-1) {
1347 /* No longer allow dest_len of -1. */
1348 smb_panic("pull_ucs2 - invalid dest_len of -1");
1351 if (!src_len) {
1352 if (dest && dest_len > 0) {
1353 dest[0] = '\0';
1355 return 0;
1358 if (ucs2_align(base_ptr, src, flags)) {
1359 src = (const void *)((const char *)src + 1);
1360 if (src_len != (size_t)-1)
1361 src_len--;
1364 if (flags & STR_TERMINATE) {
1365 /* src_len -1 is the default for null terminated strings. */
1366 if (src_len != (size_t)-1) {
1367 size_t len = strnlen_w((const smb_ucs2_t *)src,
1368 src_len/2);
1369 if (len < src_len/2)
1370 len++;
1371 src_len = len*2;
1375 /* ucs2 is always a multiple of 2 bytes */
1376 if (src_len != (size_t)-1)
1377 src_len &= ~1;
1379 ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1380 if (ret == (size_t)-1) {
1381 ret = 0;
1382 dest_len = 0;
1385 if (src_len == (size_t)-1)
1386 src_len = ret*2;
1388 if (dest_len && ret) {
1389 /* Did we already process the terminating zero ? */
1390 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1391 dest[MIN(ret, dest_len-1)] = 0;
1393 } else {
1394 dest[0] = 0;
1397 return src_len;
1401 Copy a string from a ucs2 source to a unix char* destination.
1402 Talloc version with a base pointer.
1403 Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1404 needs fixing. JRA).
1405 Flags can have:
1406 STR_TERMINATE means the string in src is null terminated.
1407 STR_NOALIGN means don't try to align.
1408 if STR_TERMINATE is set then src_len is ignored if it is -1.
1409 src_len is the length of the source area in bytes
1410 Return the number of bytes occupied by the string in src.
1411 The resulting string in "dest" is always null terminated.
1414 size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
1415 const void *base_ptr,
1416 char **ppdest,
1417 const void *src,
1418 size_t src_len,
1419 int flags)
1421 char *dest;
1422 size_t dest_len;
1424 *ppdest = NULL;
1426 #ifdef DEVELOPER
1427 /* Ensure we never use the braindead "malloc" varient. */
1428 if (ctx == NULL) {
1429 smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1431 #endif
1433 if (!src_len) {
1434 return 0;
1437 if (ucs2_align(base_ptr, src, flags)) {
1438 src = (const void *)((const char *)src + 1);
1439 if (src_len != (size_t)-1)
1440 src_len--;
1443 if (flags & STR_TERMINATE) {
1444 /* src_len -1 is the default for null terminated strings. */
1445 if (src_len != (size_t)-1) {
1446 size_t len = strnlen_w((const smb_ucs2_t *)src,
1447 src_len/2);
1448 if (len < src_len/2)
1449 len++;
1450 src_len = len*2;
1451 } else {
1453 * src_len == -1 - alloc interface won't take this
1454 * so we must calculate.
1456 src_len = (strlen_w((const smb_ucs2_t *)src)+1)*sizeof(smb_ucs2_t);
1458 /* Ensure we don't use an insane length from the client. */
1459 if (src_len >= 1024*1024) {
1460 smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1462 } else {
1463 /* Can't have an unlimited length
1464 * non STR_TERMINATE'd.
1466 if (src_len == (size_t)-1) {
1467 errno = EINVAL;
1468 return 0;
1472 /* src_len != -1 here. */
1474 /* ucs2 is always a multiple of 2 bytes */
1475 src_len &= ~1;
1477 if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1478 (void *)&dest, &dest_len, True)) {
1479 dest_len = 0;
1482 if (dest_len) {
1483 /* Did we already process the terminating zero ? */
1484 if (dest[dest_len-1] != 0) {
1485 size_t size = talloc_get_size(dest);
1486 /* Have we got space to append the '\0' ? */
1487 if (size <= dest_len) {
1488 /* No, realloc. */
1489 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1490 dest_len+1);
1491 if (!dest) {
1492 /* talloc fail. */
1493 dest_len = (size_t)-1;
1494 return 0;
1497 /* Yay - space ! */
1498 dest[dest_len] = '\0';
1499 dest_len++;
1501 } else if (dest) {
1502 dest[0] = 0;
1505 *ppdest = dest;
1506 return src_len;
1509 size_t pull_ucs2_fstring(char *dest, const void *src)
1511 return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1515 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1517 * @param dest always set at least to NULL
1518 * @parm converted_size set to the number of bytes occupied by the string in
1519 * the destination on success.
1521 * @return true if new buffer was correctly allocated, and string was
1522 * converted.
1525 bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src,
1526 size_t *converted_size)
1528 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1530 *dest = NULL;
1531 return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1532 (void **)dest, converted_size, True);
1536 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1538 * @param dest always set at least to NULL
1539 * @parm converted_size set to the number of bytes occupied by the string in
1540 * the destination on success.
1542 * @return true if new buffer was correctly allocated, and string was
1543 * converted.
1546 bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1547 size_t *converted_size)
1549 size_t src_len = strlen(src)+1;
1551 *dest = NULL;
1552 return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len,
1553 (void **)dest, converted_size, True);
1558 * Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1560 * @param dest always set at least to NULL
1561 * @parm converted_size set to the number of bytes occupied by the string in
1562 * the destination on success.
1564 * @return true if new buffer was correctly allocated, and string was
1565 * converted.
1568 bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1569 size_t *converted_size)
1571 size_t src_len = strlen(src)+1;
1573 *dest = NULL;
1574 return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len,
1575 (void **)dest, converted_size, True);
1579 Copy a string from a char* src to a unicode or ascii
1580 dos codepage destination choosing unicode or ascii based on the
1581 flags supplied
1582 Return the number of bytes occupied by the string in the destination.
1583 flags can have:
1584 STR_TERMINATE means include the null termination.
1585 STR_UPPER means uppercase in the destination.
1586 STR_ASCII use ascii even with unicode packet.
1587 STR_NOALIGN means don't do alignment.
1588 dest_len is the maximum length allowed in the destination. If dest_len
1589 is -1 then no maxiumum is used.
1592 size_t push_string_check_fn(const char *function, unsigned int line,
1593 void *dest, const char *src,
1594 size_t dest_len, int flags)
1596 #ifdef DEVELOPER
1597 /* We really need to zero fill here, not clobber
1598 * region, as we want to ensure that valgrind thinks
1599 * all of the outgoing buffer has been written to
1600 * so a send() or write() won't trap an error.
1601 * JRA.
1603 #if 0
1604 clobber_region(function, line, dest, dest_len);
1605 #else
1606 memset(dest, '\0', dest_len);
1607 #endif
1608 #endif
1610 if (!(flags & STR_ASCII) && (flags & STR_UNICODE)) {
1611 return push_ucs2(NULL, dest, src, dest_len, flags);
1613 return push_ascii(dest, src, dest_len, flags);
1618 Copy a string from a char* src to a unicode or ascii
1619 dos codepage destination choosing unicode or ascii based on the
1620 flags in the SMB buffer starting at base_ptr.
1621 Return the number of bytes occupied by the string in the destination.
1622 flags can have:
1623 STR_TERMINATE means include the null termination.
1624 STR_UPPER means uppercase in the destination.
1625 STR_ASCII use ascii even with unicode packet.
1626 STR_NOALIGN means don't do alignment.
1627 dest_len is the maximum length allowed in the destination. If dest_len
1628 is -1 then no maxiumum is used.
1631 size_t push_string_base(const char *function, unsigned int line,
1632 const char *base, uint16 flags2,
1633 void *dest, const char *src,
1634 size_t dest_len, int flags)
1636 #ifdef DEVELOPER
1637 /* We really need to zero fill here, not clobber
1638 * region, as we want to ensure that valgrind thinks
1639 * all of the outgoing buffer has been written to
1640 * so a send() or write() won't trap an error.
1641 * JRA.
1643 #if 0
1644 clobber_region(function, line, dest, dest_len);
1645 #else
1646 memset(dest, '\0', dest_len);
1647 #endif
1648 #endif
1650 if (!(flags & STR_ASCII) && \
1651 ((flags & STR_UNICODE || \
1652 (flags2 & FLAGS2_UNICODE_STRINGS)))) {
1653 return push_ucs2(base, dest, src, dest_len, flags);
1655 return push_ascii(dest, src, dest_len, flags);
1659 Copy a string from a char* src to a unicode or ascii
1660 dos codepage destination choosing unicode or ascii based on the
1661 flags supplied
1662 Return the number of bytes occupied by the string in the destination.
1663 flags can have:
1664 STR_TERMINATE means include the null termination.
1665 STR_UPPER means uppercase in the destination.
1666 STR_ASCII use ascii even with unicode packet.
1667 STR_NOALIGN means don't do alignment.
1668 dest_len is the maximum length allowed in the destination. If dest_len
1669 is -1 then no maxiumum is used.
1672 ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags)
1674 size_t ret;
1675 #ifdef DEVELOPER
1676 /* We really need to zero fill here, not clobber
1677 * region, as we want to ensure that valgrind thinks
1678 * all of the outgoing buffer has been written to
1679 * so a send() or write() won't trap an error.
1680 * JRA.
1682 memset(dest, '\0', dest_len);
1683 #endif
1685 if (!(flags & STR_ASCII) && \
1686 (flags & STR_UNICODE)) {
1687 ret = push_ucs2(NULL, dest, src, dest_len, flags);
1688 } else {
1689 ret = push_ascii(dest, src, dest_len, flags);
1691 if (ret == (size_t)-1) {
1692 return -1;
1694 return ret;
1698 Copy a string from a unicode or ascii source (depending on
1699 the packet flags) to a char* destination.
1700 Flags can have:
1701 STR_TERMINATE means the string in src is null terminated.
1702 STR_UNICODE means to force as unicode.
1703 STR_ASCII use ascii even with unicode packet.
1704 STR_NOALIGN means don't do alignment.
1705 if STR_TERMINATE is set then src_len is ignored is it is -1
1706 src_len is the length of the source area in bytes.
1707 Return the number of bytes occupied by the string in src.
1708 The resulting string in "dest" is always null terminated.
1711 size_t pull_string_fn(const char *function,
1712 unsigned int line,
1713 const void *base_ptr,
1714 uint16 smb_flags2,
1715 char *dest,
1716 const void *src,
1717 size_t dest_len,
1718 size_t src_len,
1719 int flags)
1721 #ifdef DEVELOPER
1722 clobber_region(function, line, dest, dest_len);
1723 #endif
1725 if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1726 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1727 "UNICODE defined");
1730 if (!(flags & STR_ASCII) && \
1731 ((flags & STR_UNICODE || \
1732 (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1733 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1735 return pull_ascii(dest, src, dest_len, src_len, flags);
1739 Copy a string from a unicode or ascii source (depending on
1740 the packet flags) to a char* destination.
1741 Variant that uses talloc.
1742 Flags can have:
1743 STR_TERMINATE means the string in src is null terminated.
1744 STR_UNICODE means to force as unicode.
1745 STR_ASCII use ascii even with unicode packet.
1746 STR_NOALIGN means don't do alignment.
1747 if STR_TERMINATE is set then src_len is ignored is it is -1
1748 src_len is the length of the source area in bytes.
1749 Return the number of bytes occupied by the string in src.
1750 The resulting string in "dest" is always null terminated.
1753 size_t pull_string_talloc_fn(const char *function,
1754 unsigned int line,
1755 TALLOC_CTX *ctx,
1756 const void *base_ptr,
1757 uint16 smb_flags2,
1758 char **ppdest,
1759 const void *src,
1760 size_t src_len,
1761 int flags)
1763 if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1764 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1765 "UNICODE defined");
1768 if (!(flags & STR_ASCII) && \
1769 ((flags & STR_UNICODE || \
1770 (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1771 return pull_ucs2_base_talloc(ctx,
1772 base_ptr,
1773 ppdest,
1774 src,
1775 src_len,
1776 flags);
1778 return pull_ascii_base_talloc(ctx,
1779 ppdest,
1780 src,
1781 src_len,
1782 flags);
1786 size_t align_string(const void *base_ptr, const char *p, int flags)
1788 if (!(flags & STR_ASCII) && \
1789 ((flags & STR_UNICODE || \
1790 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1791 return ucs2_align(base_ptr, p, flags);
1793 return 0;
1797 Return the unicode codepoint for the next multi-byte CH_UNIX character
1798 in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1800 Also return the number of bytes consumed (which tells the caller
1801 how many bytes to skip to get to the next CH_UNIX character).
1803 Return INVALID_CODEPOINT if the next character cannot be converted.
1806 codepoint_t next_codepoint(const char *str, size_t *size)
1808 /* It cannot occupy more than 4 bytes in UTF16 format */
1809 uint8_t buf[4];
1810 smb_iconv_t descriptor;
1811 size_t ilen_orig;
1812 size_t ilen;
1813 size_t olen;
1814 char *outbuf;
1816 if ((str[0] & 0x80) == 0) {
1817 *size = 1;
1818 return (codepoint_t)str[0];
1821 /* We assume that no multi-byte character can take
1822 more than 5 bytes. This is OK as we only
1823 support codepoints up to 1M */
1825 ilen_orig = strnlen(str, 5);
1826 ilen = ilen_orig;
1828 lazy_initialize_conv();
1830 descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1831 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
1832 *size = 1;
1833 return INVALID_CODEPOINT;
1836 /* This looks a little strange, but it is needed to cope
1837 with codepoints above 64k which are encoded as per RFC2781. */
1838 olen = 2;
1839 outbuf = (char *)buf;
1840 smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1841 if (olen == 2) {
1842 /* We failed to convert to a 2 byte character.
1843 See if we can convert to a 4 UTF16-LE byte char encoding.
1845 olen = 4;
1846 outbuf = (char *)buf;
1847 smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1848 if (olen == 4) {
1849 /* We didn't convert any bytes */
1850 *size = 1;
1851 return INVALID_CODEPOINT;
1853 olen = 4 - olen;
1854 } else {
1855 olen = 2 - olen;
1858 *size = ilen_orig - ilen;
1860 if (olen == 2) {
1861 /* 2 byte, UTF16-LE encoded value. */
1862 return (codepoint_t)SVAL(buf, 0);
1864 if (olen == 4) {
1865 /* Decode a 4 byte UTF16-LE character manually.
1866 See RFC2871 for the encoding machanism.
1868 codepoint_t w1 = SVAL(buf,0) & ~0xD800;
1869 codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
1871 return (codepoint_t)0x10000 +
1872 (w1 << 10) + w2;
1875 /* no other length is valid */
1876 return INVALID_CODEPOINT;
1880 push a single codepoint into a CH_UNIX string the target string must
1881 be able to hold the full character, which is guaranteed if it is at
1882 least 5 bytes in size. The caller may pass less than 5 bytes if they
1883 are sure the character will fit (for example, you can assume that
1884 uppercase/lowercase of a character will not add more than 1 byte)
1886 return the number of bytes occupied by the CH_UNIX character, or
1887 -1 on failure
1889 _PUBLIC_ ssize_t push_codepoint(char *str, codepoint_t c)
1891 smb_iconv_t descriptor;
1892 uint8_t buf[4];
1893 size_t ilen, olen;
1894 const char *inbuf;
1896 if (c < 128) {
1897 *str = c;
1898 return 1;
1901 lazy_initialize_conv();
1903 descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1904 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
1905 return -1;
1908 if (c < 0x10000) {
1909 ilen = 2;
1910 olen = 5;
1911 inbuf = (char *)buf;
1912 SSVAL(buf, 0, c);
1913 smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);
1914 if (ilen != 0) {
1915 return -1;
1917 return 5 - olen;
1920 c -= 0x10000;
1922 buf[0] = (c>>10) & 0xFF;
1923 buf[1] = (c>>18) | 0xd8;
1924 buf[2] = c & 0xFF;
1925 buf[3] = ((c>>8) & 0x3) | 0xdc;
1927 ilen = 4;
1928 olen = 5;
1929 inbuf = (char *)buf;
1931 smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);
1932 if (ilen != 0) {
1933 return -1;
1935 return 5 - olen;