s3: Fix bug #9085.
[Samba.git] / source3 / lib / charcnv.c
blob743f748669ac7d868ae469a16581222bbc56e989
1 /*
2 Unix SMB/CIFS implementation.
3 Character set conversion Extensions
4 Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5 Copyright (C) Andrew Tridgell 2001
6 Copyright (C) Simo Sorce 2001
7 Copyright (C) Martin Pool 2003
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
23 #include "includes.h"
25 /* We can parameterize this if someone complains.... JRA. */
27 char lp_failed_convert_char(void)
29 return '_';
32 /**
33 * @file
35 * @brief Character-set conversion routines built on our iconv.
37 * @note Samba's internal character set (at least in the 3.0 series)
38 * is always the same as the one for the Unix filesystem. It is
39 * <b>not</b> necessarily UTF-8 and may be different on machines that
40 * need i18n filenames to be compatible with Unix software. It does
41 * have to be a superset of ASCII. All multibyte sequences must start
42 * with a byte with the high bit set.
44 * @sa lib/iconv.c
48 static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
49 static bool conv_silent; /* Should we do a debug if the conversion fails ? */
50 static bool initialized;
52 /**
53 * Return the name of a charset to give to iconv().
54 **/
55 static const char *charset_name(charset_t ch)
57 const char *ret;
59 switch (ch) {
60 case CH_UTF16LE:
61 ret = "UTF-16LE";
62 break;
63 case CH_UTF16BE:
64 ret = "UTF-16BE";
65 break;
66 case CH_UNIX:
67 ret = lp_unix_charset();
68 break;
69 case CH_DOS:
70 ret = lp_dos_charset();
71 break;
72 case CH_DISPLAY:
73 ret = lp_display_charset();
74 break;
75 case CH_UTF8:
76 ret = "UTF8";
77 break;
78 default:
79 ret = NULL;
82 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)
83 if (ret && !strcmp(ret, "LOCALE")) {
84 const char *ln = NULL;
86 #ifdef HAVE_SETLOCALE
87 setlocale(LC_ALL, "");
88 #endif
89 ln = nl_langinfo(CODESET);
90 if (ln) {
91 /* Check whether the charset name is supported
92 by iconv */
93 smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
94 if (handle == (smb_iconv_t) -1) {
95 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
96 ln = NULL;
97 } else {
98 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
99 smb_iconv_close(handle);
102 ret = ln;
104 #endif
106 if (!ret || !*ret) ret = "ASCII";
107 return ret;
110 void lazy_initialize_conv(void)
112 if (!initialized) {
113 load_case_tables();
114 init_iconv();
115 initialized = true;
120 * Destroy global objects allocated by init_iconv()
122 void gfree_charcnv(void)
124 int c1, c2;
126 for (c1=0;c1<NUM_CHARSETS;c1++) {
127 for (c2=0;c2<NUM_CHARSETS;c2++) {
128 if ( conv_handles[c1][c2] ) {
129 smb_iconv_close( conv_handles[c1][c2] );
130 conv_handles[c1][c2] = 0;
134 initialized = false;
138 * Initialize iconv conversion descriptors.
140 * This is called the first time it is needed, and also called again
141 * every time the configuration is reloaded, because the charset or
142 * codepage might have changed.
144 void init_iconv(void)
146 int c1, c2;
147 bool did_reload = False;
149 /* so that charset_name() works we need to get the UNIX<->UCS2 going
150 first */
151 if (!conv_handles[CH_UNIX][CH_UTF16LE])
152 conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
154 if (!conv_handles[CH_UTF16LE][CH_UNIX])
155 conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
157 for (c1=0;c1<NUM_CHARSETS;c1++) {
158 for (c2=0;c2<NUM_CHARSETS;c2++) {
159 const char *n1 = charset_name((charset_t)c1);
160 const char *n2 = charset_name((charset_t)c2);
161 if (conv_handles[c1][c2] &&
162 strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
163 strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
164 continue;
166 did_reload = True;
168 if (conv_handles[c1][c2])
169 smb_iconv_close(conv_handles[c1][c2]);
171 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
172 if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
173 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
174 charset_name((charset_t)c1), charset_name((charset_t)c2)));
175 if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
176 n1 = "ASCII";
178 if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
179 n2 = "ASCII";
181 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
182 n1, n2 ));
183 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
184 if (!conv_handles[c1][c2]) {
185 DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
186 smb_panic("init_iconv: conv_handle initialization failed");
192 if (did_reload) {
193 /* XXX: Does this really get called every time the dos
194 * codepage changes? */
195 /* XXX: Is the did_reload test too strict? */
196 conv_silent = True;
197 init_valid_table();
198 conv_silent = False;
203 * Convert string from one encoding to another, making error checking etc
204 * Slow path version - uses (slow) iconv.
206 * @param src pointer to source string (multibyte or singlebyte)
207 * @param srclen length of the source string in bytes
208 * @param dest pointer to destination string (multibyte or singlebyte)
209 * @param destlen maximal length allowed for string
210 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
211 * @returns the number of bytes occupied in the destination
213 * Ensure the srclen contains the terminating zero.
217 static size_t convert_string_internal(charset_t from, charset_t to,
218 void const *src, size_t srclen,
219 void *dest, size_t destlen, bool allow_bad_conv)
221 size_t i_len, o_len;
222 size_t retval;
223 const char* inbuf = (const char*)src;
224 char* outbuf = (char*)dest;
225 smb_iconv_t descriptor;
227 lazy_initialize_conv();
229 descriptor = conv_handles[from][to];
231 if (srclen == (size_t)-1) {
232 if (from == CH_UTF16LE || from == CH_UTF16BE) {
233 srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
234 } else {
235 srclen = strlen((const char *)src)+1;
240 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
241 if (!conv_silent)
242 DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
243 return (size_t)-1;
246 i_len=srclen;
247 o_len=destlen;
249 again:
251 retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
252 if(retval==(size_t)-1) {
253 const char *reason="unknown error";
254 switch(errno) {
255 case EINVAL:
256 reason="Incomplete multibyte sequence";
257 if (!conv_silent)
258 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
259 if (allow_bad_conv)
260 goto use_as_is;
261 return (size_t)-1;
262 case E2BIG:
263 reason="No more room";
264 if (!conv_silent) {
265 if (from == CH_UNIX) {
266 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
267 charset_name(from), charset_name(to),
268 (unsigned int)srclen, (unsigned int)destlen, (const char *)src));
269 } else {
270 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
271 charset_name(from), charset_name(to),
272 (unsigned int)srclen, (unsigned int)destlen));
275 break;
276 case EILSEQ:
277 reason="Illegal multibyte sequence";
278 if (!conv_silent)
279 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
280 if (allow_bad_conv)
281 goto use_as_is;
283 return (size_t)-1;
284 default:
285 if (!conv_silent)
286 DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
287 return (size_t)-1;
289 /* smb_panic(reason); */
291 return destlen-o_len;
293 use_as_is:
296 * Conversion not supported. This is actually an error, but there are so
297 * many misconfigured iconv systems and smb.conf's out there we can't just
298 * fail. Do a very bad conversion instead.... JRA.
302 if (o_len == 0 || i_len == 0)
303 return destlen - o_len;
305 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
306 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
307 /* Can't convert from utf16 any endian to multibyte.
308 Replace with the default fail char.
310 if (i_len < 2)
311 return destlen - o_len;
312 if (i_len >= 2) {
313 *outbuf = lp_failed_convert_char();
315 outbuf++;
316 o_len--;
318 inbuf += 2;
319 i_len -= 2;
322 if (o_len == 0 || i_len == 0)
323 return destlen - o_len;
325 /* Keep trying with the next char... */
326 goto again;
328 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
329 /* Can't convert to UTF16LE - just widen by adding the
330 default fail char then zero.
332 if (o_len < 2)
333 return destlen - o_len;
335 outbuf[0] = lp_failed_convert_char();
336 outbuf[1] = '\0';
338 inbuf++;
339 i_len--;
341 outbuf += 2;
342 o_len -= 2;
344 if (o_len == 0 || i_len == 0)
345 return destlen - o_len;
347 /* Keep trying with the next char... */
348 goto again;
350 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
351 to != CH_UTF16LE && to != CH_UTF16BE) {
352 /* Failed multibyte to multibyte. Just copy the default fail char and
353 try again. */
354 outbuf[0] = lp_failed_convert_char();
356 inbuf++;
357 i_len--;
359 outbuf++;
360 o_len--;
362 if (o_len == 0 || i_len == 0)
363 return destlen - o_len;
365 /* Keep trying with the next char... */
366 goto again;
368 } else {
369 /* Keep compiler happy.... */
370 return destlen - o_len;
376 * Convert string from one encoding to another, making error checking etc
377 * Fast path version - handles ASCII first.
379 * @param src pointer to source string (multibyte or singlebyte)
380 * @param srclen length of the source string in bytes, or -1 for nul terminated.
381 * @param dest pointer to destination string (multibyte or singlebyte)
382 * @param destlen maximal length allowed for string - *NEVER* -1.
383 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
384 * @returns the number of bytes occupied in the destination
386 * Ensure the srclen contains the terminating zero.
388 * This function has been hand-tuned to provide a fast path.
389 * Don't change unless you really know what you are doing. JRA.
392 size_t convert_string(charset_t from, charset_t to,
393 void const *src, size_t srclen,
394 void *dest, size_t destlen, bool allow_bad_conv)
397 * NB. We deliberately don't do a strlen here if srclen == -1.
398 * This is very expensive over millions of calls and is taken
399 * care of in the slow path in convert_string_internal. JRA.
402 #ifdef DEVELOPER
403 SMB_ASSERT(destlen != (size_t)-1);
404 #endif
406 if (srclen == 0)
407 return 0;
409 if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
410 const unsigned char *p = (const unsigned char *)src;
411 unsigned char *q = (unsigned char *)dest;
412 size_t slen = srclen;
413 size_t dlen = destlen;
414 unsigned char lastp = '\0';
415 size_t retval = 0;
417 /* If all characters are ascii, fast path here. */
418 while (slen && dlen) {
419 if ((lastp = *p) <= 0x7f) {
420 *q++ = *p++;
421 if (slen != (size_t)-1) {
422 slen--;
424 dlen--;
425 retval++;
426 if (!lastp)
427 break;
428 } else {
429 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
430 goto general_case;
431 #else
432 size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
433 if (ret == (size_t)-1) {
434 return ret;
436 return retval + ret;
437 #endif
440 if (!dlen) {
441 /* Even if we fast path we should note if we ran out of room. */
442 if (((slen != (size_t)-1) && slen) ||
443 ((slen == (size_t)-1) && lastp)) {
444 errno = E2BIG;
447 return retval;
448 } else if (from == CH_UTF16LE && to != CH_UTF16LE) {
449 const unsigned char *p = (const unsigned char *)src;
450 unsigned char *q = (unsigned char *)dest;
451 size_t retval = 0;
452 size_t slen = srclen;
453 size_t dlen = destlen;
454 unsigned char lastp = '\0';
456 /* If all characters are ascii, fast path here. */
457 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
458 if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
459 *q++ = *p;
460 if (slen != (size_t)-1) {
461 slen -= 2;
463 p += 2;
464 dlen--;
465 retval++;
466 if (!lastp)
467 break;
468 } else {
469 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
470 goto general_case;
471 #else
472 size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
473 if (ret == (size_t)-1) {
474 return ret;
476 return retval + ret;
477 #endif
480 if (!dlen) {
481 /* Even if we fast path we should note if we ran out of room. */
482 if (((slen != (size_t)-1) && slen) ||
483 ((slen == (size_t)-1) && lastp)) {
484 errno = E2BIG;
487 return retval;
488 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
489 const unsigned char *p = (const unsigned char *)src;
490 unsigned char *q = (unsigned char *)dest;
491 size_t retval = 0;
492 size_t slen = srclen;
493 size_t dlen = destlen;
494 unsigned char lastp = '\0';
496 /* If all characters are ascii, fast path here. */
497 while (slen && (dlen >= 2)) {
498 if ((lastp = *p) <= 0x7F) {
499 *q++ = *p++;
500 *q++ = '\0';
501 if (slen != (size_t)-1) {
502 slen--;
504 dlen -= 2;
505 retval += 2;
506 if (!lastp)
507 break;
508 } else {
509 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
510 goto general_case;
511 #else
512 size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
513 if (ret == (size_t)-1) {
514 return ret;
516 return retval + ret;
517 #endif
520 if (!dlen) {
521 /* Even if we fast path we should note if we ran out of room. */
522 if (((slen != (size_t)-1) && slen) ||
523 ((slen == (size_t)-1) && lastp)) {
524 errno = E2BIG;
527 return retval;
530 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
531 general_case:
532 #endif
533 return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
537 * Convert between character sets, allocating a new buffer using talloc for the result.
539 * @param srclen length of source buffer.
540 * @param dest always set at least to NULL
541 * @parm converted_size set to the number of bytes occupied by the string in
542 * the destination on success.
543 * @note -1 is not accepted for srclen.
545 * @return true if new buffer was correctly allocated, and string was
546 * converted.
548 * Ensure the srclen contains the terminating zero.
550 * I hate the goto's in this function. It's embarressing.....
551 * There has to be a cleaner way to do this. JRA.
553 bool convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
554 void const *src, size_t srclen, void *dst,
555 size_t *converted_size, bool allow_bad_conv)
558 size_t i_len, o_len, destlen = (srclen * 3) / 2;
559 size_t retval;
560 const char *inbuf = (const char *)src;
561 char *outbuf = NULL, *ob = NULL;
562 smb_iconv_t descriptor;
563 void **dest = (void **)dst;
565 *dest = NULL;
567 if (!converted_size) {
568 errno = EINVAL;
569 return false;
572 if (src == NULL || srclen == (size_t)-1) {
573 errno = EINVAL;
574 return false;
577 if (srclen == 0) {
578 /* We really should treat this as an error, but
579 there are too many callers that need this to
580 return a NULL terminated string in the correct
581 character set. */
582 if (to == CH_UTF16LE|| to == CH_UTF16BE || to == CH_UTF16MUNGED) {
583 destlen = 2;
584 } else {
585 destlen = 1;
587 ob = talloc_zero_array(ctx, char, destlen);
588 if (ob == NULL) {
589 errno = ENOMEM;
590 return false;
592 *converted_size = destlen;
593 *dest = ob;
594 return true;
597 lazy_initialize_conv();
599 descriptor = conv_handles[from][to];
601 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
602 if (!conv_silent)
603 DEBUG(0,("convert_string_talloc: Conversion not supported.\n"));
604 errno = EOPNOTSUPP;
605 return false;
608 convert:
610 /* +2 is for ucs2 null termination. */
611 if ((destlen*2)+2 < destlen) {
612 /* wrapped ! abort. */
613 if (!conv_silent)
614 DEBUG(0, ("convert_string_talloc: destlen wrapped !\n"));
615 TALLOC_FREE(outbuf);
616 errno = EOPNOTSUPP;
617 return false;
618 } else {
619 destlen = destlen * 2;
622 /* +2 is for ucs2 null termination. */
623 ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
625 if (!ob) {
626 DEBUG(0, ("convert_string_talloc: realloc failed!\n"));
627 errno = ENOMEM;
628 return false;
630 outbuf = ob;
631 i_len = srclen;
632 o_len = destlen;
634 again:
636 retval = smb_iconv(descriptor,
637 &inbuf, &i_len,
638 &outbuf, &o_len);
639 if(retval == (size_t)-1) {
640 const char *reason="unknown error";
641 switch(errno) {
642 case EINVAL:
643 reason="Incomplete multibyte sequence";
644 if (!conv_silent)
645 DEBUG(3,("convert_string_talloc: Conversion error: %s(%s)\n",reason,inbuf));
646 if (allow_bad_conv)
647 goto use_as_is;
648 break;
649 case E2BIG:
650 goto convert;
651 case EILSEQ:
652 reason="Illegal multibyte sequence";
653 if (!conv_silent)
654 DEBUG(3,("convert_string_talloc: Conversion error: %s(%s)\n",reason,inbuf));
655 if (allow_bad_conv)
656 goto use_as_is;
657 break;
659 if (!conv_silent)
660 DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
661 /* smb_panic(reason); */
662 TALLOC_FREE(ob);
663 return false;
666 out:
668 destlen = destlen - o_len;
669 /* Don't shrink unless we're reclaiming a lot of
670 * space. This is in the hot codepath and these
671 * reallocs *cost*. JRA.
673 if (o_len > 1024) {
674 /* We're shrinking here so we know the +2 is safe from wrap. */
675 ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
678 if (destlen && !ob) {
679 DEBUG(0, ("convert_string_talloc: out of memory!\n"));
680 errno = ENOMEM;
681 return false;
684 *dest = ob;
686 /* Must ucs2 null terminate in the extra space we allocated. */
687 ob[destlen] = '\0';
688 ob[destlen+1] = '\0';
690 /* Ensure we can never return a *converted_size of zero. */
691 if (destlen == 0) {
692 /* This can happen from a bad iconv "use_as_is:" call. */
693 if (to == CH_UTF16LE|| to == CH_UTF16BE || to == CH_UTF16MUNGED) {
694 destlen = 2;
695 } else {
696 destlen = 1;
700 *converted_size = destlen;
701 return true;
703 use_as_is:
706 * Conversion not supported. This is actually an error, but there are so
707 * many misconfigured iconv systems and smb.conf's out there we can't just
708 * fail. Do a very bad conversion instead.... JRA.
712 if (o_len == 0 || i_len == 0)
713 goto out;
715 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
716 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
717 /* Can't convert from utf16 any endian to multibyte.
718 Replace with the default fail char.
721 if (i_len < 2)
722 goto out;
724 if (i_len >= 2) {
725 *outbuf = lp_failed_convert_char();
727 outbuf++;
728 o_len--;
730 inbuf += 2;
731 i_len -= 2;
734 if (o_len == 0 || i_len == 0)
735 goto out;
737 /* Keep trying with the next char... */
738 goto again;
740 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
741 /* Can't convert to UTF16LE - just widen by adding the
742 default fail char then zero.
744 if (o_len < 2)
745 goto out;
747 outbuf[0] = lp_failed_convert_char();
748 outbuf[1] = '\0';
750 inbuf++;
751 i_len--;
753 outbuf += 2;
754 o_len -= 2;
756 if (o_len == 0 || i_len == 0)
757 goto out;
759 /* Keep trying with the next char... */
760 goto again;
762 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
763 to != CH_UTF16LE && to != CH_UTF16BE) {
764 /* Failed multibyte to multibyte. Just copy the default fail char and
765 try again. */
766 outbuf[0] = lp_failed_convert_char();
768 inbuf++;
769 i_len--;
771 outbuf++;
772 o_len--;
774 if (o_len == 0 || i_len == 0)
775 goto out;
777 /* Keep trying with the next char... */
778 goto again;
780 } else {
781 /* Keep compiler happy.... */
782 goto out;
787 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
789 size_t size;
790 smb_ucs2_t *buffer;
792 if (!push_ucs2_talloc(talloc_tos(), &buffer, src, &size)) {
793 return (size_t)-1;
796 if (!strupper_w(buffer) && (dest == src)) {
797 TALLOC_FREE(buffer);
798 return srclen;
801 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
802 TALLOC_FREE(buffer);
803 return size;
807 talloc_strdup() a unix string to upper case.
810 char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *s)
812 char *out_buffer = talloc_strdup(ctx,s);
813 const unsigned char *p = (const unsigned char *)s;
814 unsigned char *q = (unsigned char *)out_buffer;
816 if (!q) {
817 return NULL;
820 /* this is quite a common operation, so we want it to be
821 fast. We optimise for the ascii case, knowing that all our
822 supported multi-byte character sets are ascii-compatible
823 (ie. they match for the first 128 chars) */
825 while (*p) {
826 if (*p & 0x80)
827 break;
828 *q++ = toupper_ascii_fast(*p);
829 p++;
832 if (*p) {
833 /* MB case. */
834 size_t converted_size, converted_size2;
835 smb_ucs2_t *ubuf = NULL;
837 /* We're not using the ascii buffer above. */
838 TALLOC_FREE(out_buffer);
840 if (!convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, s,
841 strlen(s)+1, (void *)&ubuf,
842 &converted_size, True))
844 return NULL;
847 strupper_w(ubuf);
849 if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, ubuf,
850 converted_size, (void *)&out_buffer,
851 &converted_size2, True))
853 TALLOC_FREE(ubuf);
854 return NULL;
857 /* Don't need the intermediate buffer
858 * anymore.
860 TALLOC_FREE(ubuf);
863 return out_buffer;
866 char *strupper_talloc(TALLOC_CTX *ctx, const char *s) {
867 return talloc_strdup_upper(ctx, s);
871 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
873 size_t size;
874 smb_ucs2_t *buffer = NULL;
876 if (!convert_string_talloc(talloc_tos(), CH_UNIX, CH_UTF16LE, src, srclen,
877 (void **)(void *)&buffer, &size,
878 True))
880 smb_panic("failed to create UCS2 buffer");
882 if (!strlower_w(buffer) && (dest == src)) {
883 TALLOC_FREE(buffer);
884 return srclen;
886 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
887 TALLOC_FREE(buffer);
888 return size;
892 char *talloc_strdup_lower(TALLOC_CTX *ctx, const char *s)
894 size_t converted_size;
895 smb_ucs2_t *buffer = NULL;
896 char *out_buffer;
898 if (!push_ucs2_talloc(ctx, &buffer, s, &converted_size)) {
899 return NULL;
902 strlower_w(buffer);
904 if (!pull_ucs2_talloc(ctx, &out_buffer, buffer, &converted_size)) {
905 TALLOC_FREE(buffer);
906 return NULL;
909 TALLOC_FREE(buffer);
911 return out_buffer;
914 char *strlower_talloc(TALLOC_CTX *ctx, const char *s) {
915 return talloc_strdup_lower(ctx, s);
918 size_t ucs2_align(const void *base_ptr, const void *p, int flags)
920 if (flags & (STR_NOALIGN|STR_ASCII))
921 return 0;
922 return PTR_DIFF(p, base_ptr) & 1;
927 * Copy a string from a char* unix src to a dos codepage string destination.
929 * @return the number of bytes occupied by the string in the destination.
931 * @param flags can include
932 * <dl>
933 * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
934 * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
935 * </dl>
937 * @param dest_len the maximum length in bytes allowed in the
938 * destination.
940 size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
942 size_t src_len = strlen(src);
943 char *tmpbuf = NULL;
944 size_t ret;
946 /* No longer allow a length of -1. */
947 if (dest_len == (size_t)-1) {
948 smb_panic("push_ascii - dest_len == -1");
951 if (flags & STR_UPPER) {
952 tmpbuf = SMB_STRDUP(src);
953 if (!tmpbuf) {
954 smb_panic("malloc fail");
956 strupper_m(tmpbuf);
957 src = tmpbuf;
960 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
961 src_len++;
964 ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
965 if (ret == (size_t)-1 &&
966 (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
967 && dest_len > 0) {
968 ((char *)dest)[0] = '\0';
970 SAFE_FREE(tmpbuf);
971 return ret;
974 size_t push_ascii_fstring(void *dest, const char *src)
976 return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
979 /********************************************************************
980 Push an nstring - ensure null terminated. Written by
981 moriyama@miraclelinux.com (MORIYAMA Masayuki).
982 ********************************************************************/
984 size_t push_ascii_nstring(void *dest, const char *src)
986 size_t i, buffer_len, dest_len;
987 smb_ucs2_t *buffer;
989 conv_silent = True;
990 if (!push_ucs2_talloc(talloc_tos(), &buffer, src, &buffer_len)) {
991 smb_panic("failed to create UCS2 buffer");
994 /* We're using buffer_len below to count ucs2 characters, not bytes. */
995 buffer_len /= sizeof(smb_ucs2_t);
997 dest_len = 0;
998 for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
999 unsigned char mb[10];
1000 /* Convert one smb_ucs2_t character at a time. */
1001 size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
1002 if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
1003 memcpy((char *)dest + dest_len, mb, mb_len);
1004 dest_len += mb_len;
1005 } else {
1006 errno = E2BIG;
1007 break;
1010 ((char *)dest)[dest_len] = '\0';
1012 conv_silent = False;
1013 TALLOC_FREE(buffer);
1014 return dest_len;
1017 /********************************************************************
1018 Push and malloc an ascii string. src and dest null terminated.
1019 ********************************************************************/
1021 bool push_ascii_talloc(TALLOC_CTX *mem_ctx, char **dest, const char *src, size_t *converted_size)
1023 size_t src_len = strlen(src)+1;
1025 *dest = NULL;
1026 return convert_string_talloc(mem_ctx, CH_UNIX, CH_DOS, src, src_len,
1027 (void **)dest, converted_size, True);
1031 * Copy a string from a dos codepage source to a unix char* destination.
1033 * The resulting string in "dest" is always null terminated.
1035 * @param flags can have:
1036 * <dl>
1037 * <dt>STR_TERMINATE</dt>
1038 * <dd>STR_TERMINATE means the string in @p src
1039 * is null terminated, and src_len is ignored.</dd>
1040 * </dl>
1042 * @param src_len is the length of the source area in bytes.
1043 * @returns the number of bytes occupied by the string in @p src.
1045 size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1047 size_t ret;
1049 if (dest_len == (size_t)-1) {
1050 /* No longer allow dest_len of -1. */
1051 smb_panic("pull_ascii - invalid dest_len of -1");
1054 if (flags & STR_TERMINATE) {
1055 if (src_len == (size_t)-1) {
1056 src_len = strlen((const char *)src) + 1;
1057 } else {
1058 size_t len = strnlen((const char *)src, src_len);
1059 if (len < src_len)
1060 len++;
1061 src_len = len;
1065 ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
1066 if (ret == (size_t)-1) {
1067 ret = 0;
1068 dest_len = 0;
1071 if (dest_len && ret) {
1072 /* Did we already process the terminating zero ? */
1073 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1074 dest[MIN(ret, dest_len-1)] = 0;
1076 } else {
1077 dest[0] = 0;
1080 return src_len;
1084 * Copy a string from a dos codepage source to a unix char* destination.
1085 * Talloc version.
1087 * The resulting string in "dest" is always null terminated.
1089 * @param flags can have:
1090 * <dl>
1091 * <dt>STR_TERMINATE</dt>
1092 * <dd>STR_TERMINATE means the string in @p src
1093 * is null terminated, and src_len is ignored.</dd>
1094 * </dl>
1096 * @param src_len is the length of the source area in bytes.
1097 * @returns the number of bytes occupied by the string in @p src.
1100 static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
1101 char **ppdest,
1102 const void *src,
1103 size_t src_len,
1104 int flags)
1106 char *dest = NULL;
1107 size_t dest_len;
1109 *ppdest = NULL;
1111 if (!src_len) {
1112 return 0;
1115 if (flags & STR_TERMINATE) {
1116 if (src_len == (size_t)-1) {
1117 src_len = strlen((const char *)src) + 1;
1118 } else {
1119 size_t len = strnlen((const char *)src, src_len);
1120 if (len < src_len)
1121 len++;
1122 src_len = len;
1124 /* Ensure we don't use an insane length from the client. */
1125 if (src_len >= 1024*1024) {
1126 char *msg = talloc_asprintf(ctx,
1127 "Bad src length (%u) in "
1128 "pull_ascii_base_talloc",
1129 (unsigned int)src_len);
1130 smb_panic(msg);
1132 } else {
1133 /* Can't have an unlimited length
1134 * non STR_TERMINATE'd.
1136 if (src_len == (size_t)-1) {
1137 errno = EINVAL;
1138 return 0;
1142 /* src_len != -1 here. */
1144 if (!convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, &dest,
1145 &dest_len, True)) {
1146 dest_len = 0;
1149 if (dest_len && dest) {
1150 /* Did we already process the terminating zero ? */
1151 if (dest[dest_len-1] != 0) {
1152 size_t size = talloc_get_size(dest);
1153 /* Have we got space to append the '\0' ? */
1154 if (size <= dest_len) {
1155 /* No, realloc. */
1156 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1157 dest_len+1);
1158 if (!dest) {
1159 /* talloc fail. */
1160 dest_len = (size_t)-1;
1161 return 0;
1164 /* Yay - space ! */
1165 dest[dest_len] = '\0';
1166 dest_len++;
1168 } else if (dest) {
1169 dest[0] = 0;
1172 *ppdest = dest;
1173 return src_len;
1176 size_t pull_ascii_fstring(char *dest, const void *src)
1178 return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1181 /* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1183 size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src)
1185 return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1189 * Copy a string from a char* src to a unicode destination.
1191 * @returns the number of bytes occupied by the string in the destination.
1193 * @param flags can have:
1195 * <dl>
1196 * <dt>STR_TERMINATE <dd>means include the null termination.
1197 * <dt>STR_UPPER <dd>means uppercase in the destination.
1198 * <dt>STR_NOALIGN <dd>means don't do alignment.
1199 * </dl>
1201 * @param dest_len is the maximum length allowed in the
1202 * destination.
1205 size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1207 size_t len=0;
1208 size_t src_len;
1209 size_t ret;
1211 if (dest_len == (size_t)-1) {
1212 /* No longer allow dest_len of -1. */
1213 smb_panic("push_ucs2 - invalid dest_len of -1");
1216 if (flags & STR_TERMINATE)
1217 src_len = (size_t)-1;
1218 else
1219 src_len = strlen(src);
1221 if (ucs2_align(base_ptr, dest, flags)) {
1222 *(char *)dest = 0;
1223 dest = (void *)((char *)dest + 1);
1224 if (dest_len)
1225 dest_len--;
1226 len++;
1229 /* ucs2 is always a multiple of 2 bytes */
1230 dest_len &= ~1;
1232 ret = convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1233 if (ret == (size_t)-1) {
1234 if ((flags & STR_TERMINATE) &&
1235 dest &&
1236 dest_len) {
1237 *(char *)dest = 0;
1239 return len;
1242 len += ret;
1244 if (flags & STR_UPPER) {
1245 smb_ucs2_t *dest_ucs2 = (smb_ucs2_t *)dest;
1246 size_t i;
1248 /* We check for i < (ret / 2) below as the dest string isn't null
1249 terminated if STR_TERMINATE isn't set. */
1251 for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1252 smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1253 if (v != dest_ucs2[i]) {
1254 dest_ucs2[i] = v;
1259 return len;
1264 * Copy a string from a unix char* src to a UCS2 destination,
1265 * allocating a buffer using talloc().
1267 * @param dest always set at least to NULL
1268 * @parm converted_size set to the number of bytes occupied by the string in
1269 * the destination on success.
1271 * @return true if new buffer was correctly allocated, and string was
1272 * converted.
1274 bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src,
1275 size_t *converted_size)
1277 size_t src_len = strlen(src)+1;
1279 *dest = NULL;
1280 return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len,
1281 (void **)dest, converted_size, True);
1286 Copy a string from a char* src to a UTF-8 destination.
1287 Return the number of bytes occupied by the string in the destination
1288 Flags can have:
1289 STR_TERMINATE means include the null termination
1290 STR_UPPER means uppercase in the destination
1291 dest_len is the maximum length allowed in the destination. If dest_len
1292 is -1 then no maxiumum is used.
1295 static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
1297 size_t src_len = 0;
1298 size_t ret;
1299 char *tmpbuf = NULL;
1301 if (dest_len == (size_t)-1) {
1302 /* No longer allow dest_len of -1. */
1303 smb_panic("push_utf8 - invalid dest_len of -1");
1306 if (flags & STR_UPPER) {
1307 tmpbuf = strupper_talloc(talloc_tos(), src);
1308 if (!tmpbuf) {
1309 return (size_t)-1;
1311 src = tmpbuf;
1312 src_len = strlen(src);
1315 src_len = strlen(src);
1316 if (flags & STR_TERMINATE) {
1317 src_len++;
1320 ret = convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1321 TALLOC_FREE(tmpbuf);
1322 return ret;
1325 size_t push_utf8_fstring(void *dest, const char *src)
1327 return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1331 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1333 * @param dest always set at least to NULL
1334 * @parm converted_size set to the number of bytes occupied by the string in
1335 * the destination on success.
1337 * @return true if new buffer was correctly allocated, and string was
1338 * converted.
1341 bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1342 size_t *converted_size)
1344 size_t src_len = strlen(src)+1;
1346 *dest = NULL;
1347 return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len,
1348 (void**)dest, converted_size, True);
1352 Copy a string from a ucs2 source to a unix char* destination.
1353 Flags can have:
1354 STR_TERMINATE means the string in src is null terminated.
1355 STR_NOALIGN means don't try to align.
1356 if STR_TERMINATE is set then src_len is ignored if it is -1.
1357 src_len is the length of the source area in bytes
1358 Return the number of bytes occupied by the string in src.
1359 The resulting string in "dest" is always null terminated.
1362 size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1364 size_t ret;
1365 size_t ucs2_align_len = 0;
1367 if (dest_len == (size_t)-1) {
1368 /* No longer allow dest_len of -1. */
1369 smb_panic("pull_ucs2 - invalid dest_len of -1");
1372 if (!src_len) {
1373 if (dest && dest_len > 0) {
1374 dest[0] = '\0';
1376 return 0;
1379 if (ucs2_align(base_ptr, src, flags)) {
1380 src = (const void *)((const char *)src + 1);
1381 if (src_len != (size_t)-1)
1382 src_len--;
1383 ucs2_align_len = 1;
1386 if (flags & STR_TERMINATE) {
1387 /* src_len -1 is the default for null terminated strings. */
1388 if (src_len != (size_t)-1) {
1389 size_t len = strnlen_w((const smb_ucs2_t *)src,
1390 src_len/2);
1391 if (len < src_len/2)
1392 len++;
1393 src_len = len*2;
1397 /* ucs2 is always a multiple of 2 bytes */
1398 if (src_len != (size_t)-1)
1399 src_len &= ~1;
1401 ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1402 if (ret == (size_t)-1) {
1403 ret = 0;
1404 dest_len = 0;
1407 if (src_len == (size_t)-1)
1408 src_len = ret*2;
1410 if (dest_len && ret) {
1411 /* Did we already process the terminating zero ? */
1412 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1413 dest[MIN(ret, dest_len-1)] = 0;
1415 } else {
1416 dest[0] = 0;
1419 return src_len + ucs2_align_len;
1423 Copy a string from a ucs2 source to a unix char* destination.
1424 Talloc version with a base pointer.
1425 Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1426 needs fixing. JRA).
1427 Flags can have:
1428 STR_TERMINATE means the string in src is null terminated.
1429 STR_NOALIGN means don't try to align.
1430 if STR_TERMINATE is set then src_len is ignored if it is -1.
1431 src_len is the length of the source area in bytes
1432 Return the number of bytes occupied by the string in src.
1433 The resulting string in "dest" is always null terminated.
1436 size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
1437 const void *base_ptr,
1438 char **ppdest,
1439 const void *src,
1440 size_t src_len,
1441 int flags)
1443 char *dest;
1444 size_t dest_len;
1445 size_t ucs2_align_len = 0;
1447 *ppdest = NULL;
1449 #ifdef DEVELOPER
1450 /* Ensure we never use the braindead "malloc" varient. */
1451 if (ctx == NULL) {
1452 smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1454 #endif
1456 if (!src_len) {
1457 return 0;
1460 if (ucs2_align(base_ptr, src, flags)) {
1461 src = (const void *)((const char *)src + 1);
1462 if (src_len != (size_t)-1)
1463 src_len--;
1464 ucs2_align_len = 1;
1467 if (flags & STR_TERMINATE) {
1468 /* src_len -1 is the default for null terminated strings. */
1469 if (src_len != (size_t)-1) {
1470 size_t len = strnlen_w((const smb_ucs2_t *)src,
1471 src_len/2);
1472 if (len < src_len/2)
1473 len++;
1474 src_len = len*2;
1475 } else {
1477 * src_len == -1 - alloc interface won't take this
1478 * so we must calculate.
1480 src_len = (strlen_w((const smb_ucs2_t *)src)+1)*sizeof(smb_ucs2_t);
1482 /* Ensure we don't use an insane length from the client. */
1483 if (src_len >= 1024*1024) {
1484 smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1486 } else {
1487 /* Can't have an unlimited length
1488 * non STR_TERMINATE'd.
1490 if (src_len == (size_t)-1) {
1491 errno = EINVAL;
1492 return 0;
1496 /* src_len != -1 here. */
1498 /* ucs2 is always a multiple of 2 bytes */
1499 src_len &= ~1;
1501 if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1502 (void *)&dest, &dest_len, True)) {
1503 dest_len = 0;
1506 if (dest_len) {
1507 /* Did we already process the terminating zero ? */
1508 if (dest[dest_len-1] != 0) {
1509 size_t size = talloc_get_size(dest);
1510 /* Have we got space to append the '\0' ? */
1511 if (size <= dest_len) {
1512 /* No, realloc. */
1513 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1514 dest_len+1);
1515 if (!dest) {
1516 /* talloc fail. */
1517 dest_len = (size_t)-1;
1518 return 0;
1521 /* Yay - space ! */
1522 dest[dest_len] = '\0';
1523 dest_len++;
1525 } else if (dest) {
1526 dest[0] = 0;
1529 *ppdest = dest;
1530 return src_len + ucs2_align_len;
1533 size_t pull_ucs2_fstring(char *dest, const void *src)
1535 return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1539 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1541 * @param dest always set at least to NULL
1542 * @parm converted_size set to the number of bytes occupied by the string in
1543 * the destination on success.
1545 * @return true if new buffer was correctly allocated, and string was
1546 * converted.
1549 bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src,
1550 size_t *converted_size)
1552 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1554 *dest = NULL;
1555 return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1556 (void **)dest, converted_size, True);
1560 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1562 * @param dest always set at least to NULL
1563 * @parm converted_size set to the number of bytes occupied by the string in
1564 * the destination on success.
1566 * @return true if new buffer was correctly allocated, and string was
1567 * converted.
1570 bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1571 size_t *converted_size)
1573 size_t src_len = strlen(src)+1;
1575 *dest = NULL;
1576 return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len,
1577 (void **)dest, converted_size, True);
1582 * Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1584 * @param dest always set at least to NULL
1585 * @parm converted_size set to the number of bytes occupied by the string in
1586 * the destination on success.
1588 * @return true if new buffer was correctly allocated, and string was
1589 * converted.
1592 bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1593 size_t *converted_size)
1595 size_t src_len = strlen(src)+1;
1597 *dest = NULL;
1598 return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len,
1599 (void **)dest, converted_size, True);
1603 Copy a string from a char* src to a unicode or ascii
1604 dos codepage destination choosing unicode or ascii based on the
1605 flags supplied
1606 Return the number of bytes occupied by the string in the destination.
1607 flags can have:
1608 STR_TERMINATE means include the null termination.
1609 STR_UPPER means uppercase in the destination.
1610 STR_ASCII use ascii even with unicode packet.
1611 STR_NOALIGN means don't do alignment.
1612 dest_len is the maximum length allowed in the destination. If dest_len
1613 is -1 then no maxiumum is used.
1616 size_t push_string_check_fn(const char *function, unsigned int line,
1617 void *dest, const char *src,
1618 size_t dest_len, int flags)
1620 #ifdef DEVELOPER
1621 /* We really need to zero fill here, not clobber
1622 * region, as we want to ensure that valgrind thinks
1623 * all of the outgoing buffer has been written to
1624 * so a send() or write() won't trap an error.
1625 * JRA.
1627 #if 0
1628 clobber_region(function, line, dest, dest_len);
1629 #else
1630 memset(dest, '\0', dest_len);
1631 #endif
1632 #endif
1634 if (!(flags & STR_ASCII) && (flags & STR_UNICODE)) {
1635 return push_ucs2(NULL, dest, src, dest_len, flags);
1637 return push_ascii(dest, src, dest_len, flags);
1642 Copy a string from a char* src to a unicode or ascii
1643 dos codepage destination choosing unicode or ascii based on the
1644 flags in the SMB buffer starting at base_ptr.
1645 Return the number of bytes occupied by the string in the destination.
1646 flags can have:
1647 STR_TERMINATE means include the null termination.
1648 STR_UPPER means uppercase in the destination.
1649 STR_ASCII use ascii even with unicode packet.
1650 STR_NOALIGN means don't do alignment.
1651 dest_len is the maximum length allowed in the destination. If dest_len
1652 is -1 then no maxiumum is used.
1655 size_t push_string_base(const char *function, unsigned int line,
1656 const char *base, uint16 flags2,
1657 void *dest, const char *src,
1658 size_t dest_len, int flags)
1660 #ifdef DEVELOPER
1661 /* We really need to zero fill here, not clobber
1662 * region, as we want to ensure that valgrind thinks
1663 * all of the outgoing buffer has been written to
1664 * so a send() or write() won't trap an error.
1665 * JRA.
1667 #if 0
1668 clobber_region(function, line, dest, dest_len);
1669 #else
1670 memset(dest, '\0', dest_len);
1671 #endif
1672 #endif
1674 if (!(flags & STR_ASCII) && \
1675 ((flags & STR_UNICODE || \
1676 (flags2 & FLAGS2_UNICODE_STRINGS)))) {
1677 return push_ucs2(base, dest, src, dest_len, flags);
1679 return push_ascii(dest, src, dest_len, flags);
1683 Copy a string from a char* src to a unicode or ascii
1684 dos codepage destination choosing unicode or ascii based on the
1685 flags supplied
1686 Return the number of bytes occupied by the string in the destination.
1687 flags can have:
1688 STR_TERMINATE means include the null termination.
1689 STR_UPPER means uppercase in the destination.
1690 STR_ASCII use ascii even with unicode packet.
1691 STR_NOALIGN means don't do alignment.
1692 dest_len is the maximum length allowed in the destination. If dest_len
1693 is -1 then no maxiumum is used.
1696 ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags)
1698 size_t ret;
1699 #ifdef DEVELOPER
1700 /* We really need to zero fill here, not clobber
1701 * region, as we want to ensure that valgrind thinks
1702 * all of the outgoing buffer has been written to
1703 * so a send() or write() won't trap an error.
1704 * JRA.
1706 memset(dest, '\0', dest_len);
1707 #endif
1709 if (!(flags & STR_ASCII) && \
1710 (flags & STR_UNICODE)) {
1711 ret = push_ucs2(NULL, dest, src, dest_len, flags);
1712 } else {
1713 ret = push_ascii(dest, src, dest_len, flags);
1715 if (ret == (size_t)-1) {
1716 return -1;
1718 return ret;
1722 Copy a string from a unicode or ascii source (depending on
1723 the packet flags) to a char* destination.
1724 Flags can have:
1725 STR_TERMINATE means the string in src is null terminated.
1726 STR_UNICODE means to force as unicode.
1727 STR_ASCII use ascii even with unicode packet.
1728 STR_NOALIGN means don't do alignment.
1729 if STR_TERMINATE is set then src_len is ignored is it is -1
1730 src_len is the length of the source area in bytes.
1731 Return the number of bytes occupied by the string in src.
1732 The resulting string in "dest" is always null terminated.
1735 size_t pull_string_fn(const char *function,
1736 unsigned int line,
1737 const void *base_ptr,
1738 uint16 smb_flags2,
1739 char *dest,
1740 const void *src,
1741 size_t dest_len,
1742 size_t src_len,
1743 int flags)
1745 #ifdef DEVELOPER
1746 clobber_region(function, line, dest, dest_len);
1747 #endif
1749 if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1750 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1751 "UNICODE defined");
1754 if (!(flags & STR_ASCII) && \
1755 ((flags & STR_UNICODE || \
1756 (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1757 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1759 return pull_ascii(dest, src, dest_len, src_len, flags);
1763 Copy a string from a unicode or ascii source (depending on
1764 the packet flags) to a char* destination.
1765 Variant that uses talloc.
1766 Flags can have:
1767 STR_TERMINATE means the string in src is null terminated.
1768 STR_UNICODE means to force as unicode.
1769 STR_ASCII use ascii even with unicode packet.
1770 STR_NOALIGN means don't do alignment.
1771 if STR_TERMINATE is set then src_len is ignored is it is -1
1772 src_len is the length of the source area in bytes.
1773 Return the number of bytes occupied by the string in src.
1774 The resulting string in "dest" is always null terminated.
1777 size_t pull_string_talloc_fn(const char *function,
1778 unsigned int line,
1779 TALLOC_CTX *ctx,
1780 const void *base_ptr,
1781 uint16 smb_flags2,
1782 char **ppdest,
1783 const void *src,
1784 size_t src_len,
1785 int flags)
1787 if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1788 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1789 "UNICODE defined");
1792 if (!(flags & STR_ASCII) && \
1793 ((flags & STR_UNICODE || \
1794 (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1795 return pull_ucs2_base_talloc(ctx,
1796 base_ptr,
1797 ppdest,
1798 src,
1799 src_len,
1800 flags);
1802 return pull_ascii_base_talloc(ctx,
1803 ppdest,
1804 src,
1805 src_len,
1806 flags);
1810 size_t align_string(const void *base_ptr, const char *p, int flags)
1812 if (!(flags & STR_ASCII) && \
1813 ((flags & STR_UNICODE || \
1814 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1815 return ucs2_align(base_ptr, p, flags);
1817 return 0;
1821 Return the unicode codepoint for the next multi-byte CH_UNIX character
1822 in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1824 Also return the number of bytes consumed (which tells the caller
1825 how many bytes to skip to get to the next CH_UNIX character).
1827 Return INVALID_CODEPOINT if the next character cannot be converted.
1830 codepoint_t next_codepoint(const char *str, size_t *size)
1832 /* It cannot occupy more than 4 bytes in UTF16 format */
1833 uint8_t buf[4];
1834 smb_iconv_t descriptor;
1835 size_t ilen_orig;
1836 size_t ilen;
1837 size_t olen;
1838 char *outbuf;
1840 if ((str[0] & 0x80) == 0) {
1841 *size = 1;
1842 return (codepoint_t)str[0];
1845 /* We assume that no multi-byte character can take
1846 more than 5 bytes. This is OK as we only
1847 support codepoints up to 1M */
1849 ilen_orig = strnlen(str, 5);
1850 ilen = ilen_orig;
1852 lazy_initialize_conv();
1854 descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1855 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
1856 *size = 1;
1857 return INVALID_CODEPOINT;
1860 /* This looks a little strange, but it is needed to cope
1861 with codepoints above 64k which are encoded as per RFC2781. */
1862 olen = 2;
1863 outbuf = (char *)buf;
1864 smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1865 if (olen == 2) {
1866 /* We failed to convert to a 2 byte character.
1867 See if we can convert to a 4 UTF16-LE byte char encoding.
1869 olen = 4;
1870 outbuf = (char *)buf;
1871 smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1872 if (olen == 4) {
1873 /* We didn't convert any bytes */
1874 *size = 1;
1875 return INVALID_CODEPOINT;
1877 olen = 4 - olen;
1878 } else {
1879 olen = 2 - olen;
1882 *size = ilen_orig - ilen;
1884 if (olen == 2) {
1885 /* 2 byte, UTF16-LE encoded value. */
1886 return (codepoint_t)SVAL(buf, 0);
1888 if (olen == 4) {
1889 /* Decode a 4 byte UTF16-LE character manually.
1890 See RFC2871 for the encoding machanism.
1892 codepoint_t w1 = SVAL(buf,0) & ~0xD800;
1893 codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
1895 return (codepoint_t)0x10000 +
1896 (w1 << 10) + w2;
1899 /* no other length is valid */
1900 return INVALID_CODEPOINT;
1904 push a single codepoint into a CH_UNIX string the target string must
1905 be able to hold the full character, which is guaranteed if it is at
1906 least 5 bytes in size. The caller may pass less than 5 bytes if they
1907 are sure the character will fit (for example, you can assume that
1908 uppercase/lowercase of a character will not add more than 1 byte)
1910 return the number of bytes occupied by the CH_UNIX character, or
1911 -1 on failure
1913 _PUBLIC_ ssize_t push_codepoint(char *str, codepoint_t c)
1915 smb_iconv_t descriptor;
1916 uint8_t buf[4];
1917 size_t ilen, olen;
1918 const char *inbuf;
1920 if (c < 128) {
1921 *str = c;
1922 return 1;
1925 lazy_initialize_conv();
1927 descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1928 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
1929 return -1;
1932 if (c < 0x10000) {
1933 ilen = 2;
1934 olen = 5;
1935 inbuf = (char *)buf;
1936 SSVAL(buf, 0, c);
1937 smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);
1938 if (ilen != 0) {
1939 return -1;
1941 return 5 - olen;
1944 c -= 0x10000;
1946 buf[0] = (c>>10) & 0xFF;
1947 buf[1] = (c>>18) | 0xd8;
1948 buf[2] = c & 0xFF;
1949 buf[3] = ((c>>8) & 0x3) | 0xdc;
1951 ilen = 4;
1952 olen = 5;
1953 inbuf = (char *)buf;
1955 smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);
1956 if (ilen != 0) {
1957 return -1;
1959 return 5 - olen;