Determine case sensitivity based on file system attributes.
[Samba.git] / source / lib / charcnv.c
blobe51c33d7cd679c684435205f1212b294b0c021a4
1 /*
2 Unix SMB/CIFS implementation.
3 Character set conversion Extensions
4 Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5 Copyright (C) Andrew Tridgell 2001
6 Copyright (C) Simo Sorce 2001
7 Copyright (C) Martin Pool 2003
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
23 #include "includes.h"
25 /* We can parameterize this if someone complains.... JRA. */
27 char lp_failed_convert_char(void)
29 return '_';
32 /**
33 * @file
35 * @brief Character-set conversion routines built on our iconv.
37 * @note Samba's internal character set (at least in the 3.0 series)
38 * is always the same as the one for the Unix filesystem. It is
39 * <b>not</b> necessarily UTF-8 and may be different on machines that
40 * need i18n filenames to be compatible with Unix software. It does
41 * have to be a superset of ASCII. All multibyte sequences must start
42 * with a byte with the high bit set.
44 * @sa lib/iconv.c
48 static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
49 static bool conv_silent; /* Should we do a debug if the conversion fails ? */
50 static bool initialized;
52 /**
53 * Return the name of a charset to give to iconv().
54 **/
55 static const char *charset_name(charset_t ch)
57 const char *ret = NULL;
59 if (ch == CH_UTF16LE) ret = "UTF-16LE";
60 else if (ch == CH_UTF16BE) ret = "UTF-16BE";
61 else if (ch == CH_UNIX) ret = lp_unix_charset();
62 else if (ch == CH_DOS) ret = lp_dos_charset();
63 else if (ch == CH_DISPLAY) ret = lp_display_charset();
64 else if (ch == CH_UTF8) ret = "UTF8";
66 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)
67 if (ret && !strcmp(ret, "LOCALE")) {
68 const char *ln = NULL;
70 #ifdef HAVE_SETLOCALE
71 setlocale(LC_ALL, "");
72 #endif
73 ln = nl_langinfo(CODESET);
74 if (ln) {
75 /* Check whether the charset name is supported
76 by iconv */
77 smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
78 if (handle == (smb_iconv_t) -1) {
79 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
80 ln = NULL;
81 } else {
82 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
83 smb_iconv_close(handle);
86 ret = ln;
88 #endif
90 if (!ret || !*ret) ret = "ASCII";
91 return ret;
94 void lazy_initialize_conv(void)
96 if (!initialized) {
97 load_case_tables();
98 init_iconv();
99 initialized = true;
104 * Destroy global objects allocated by init_iconv()
106 void gfree_charcnv(void)
108 int c1, c2;
110 for (c1=0;c1<NUM_CHARSETS;c1++) {
111 for (c2=0;c2<NUM_CHARSETS;c2++) {
112 if ( conv_handles[c1][c2] ) {
113 smb_iconv_close( conv_handles[c1][c2] );
114 conv_handles[c1][c2] = 0;
118 initialized = false;
122 * Initialize iconv conversion descriptors.
124 * This is called the first time it is needed, and also called again
125 * every time the configuration is reloaded, because the charset or
126 * codepage might have changed.
128 void init_iconv(void)
130 int c1, c2;
131 bool did_reload = False;
133 /* so that charset_name() works we need to get the UNIX<->UCS2 going
134 first */
135 if (!conv_handles[CH_UNIX][CH_UTF16LE])
136 conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
138 if (!conv_handles[CH_UTF16LE][CH_UNIX])
139 conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
141 for (c1=0;c1<NUM_CHARSETS;c1++) {
142 for (c2=0;c2<NUM_CHARSETS;c2++) {
143 const char *n1 = charset_name((charset_t)c1);
144 const char *n2 = charset_name((charset_t)c2);
145 if (conv_handles[c1][c2] &&
146 strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
147 strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
148 continue;
150 did_reload = True;
152 if (conv_handles[c1][c2])
153 smb_iconv_close(conv_handles[c1][c2]);
155 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
156 if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
157 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
158 charset_name((charset_t)c1), charset_name((charset_t)c2)));
159 if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
160 n1 = "ASCII";
162 if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
163 n2 = "ASCII";
165 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
166 n1, n2 ));
167 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
168 if (!conv_handles[c1][c2]) {
169 DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
170 smb_panic("init_iconv: conv_handle initialization failed");
176 if (did_reload) {
177 /* XXX: Does this really get called every time the dos
178 * codepage changes? */
179 /* XXX: Is the did_reload test too strict? */
180 conv_silent = True;
181 init_valid_table();
182 conv_silent = False;
187 * Convert string from one encoding to another, making error checking etc
188 * Slow path version - uses (slow) iconv.
190 * @param src pointer to source string (multibyte or singlebyte)
191 * @param srclen length of the source string in bytes
192 * @param dest pointer to destination string (multibyte or singlebyte)
193 * @param destlen maximal length allowed for string
194 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
195 * @returns the number of bytes occupied in the destination
197 * Ensure the srclen contains the terminating zero.
201 static size_t convert_string_internal(charset_t from, charset_t to,
202 void const *src, size_t srclen,
203 void *dest, size_t destlen, bool allow_bad_conv)
205 size_t i_len, o_len;
206 size_t retval;
207 const char* inbuf = (const char*)src;
208 char* outbuf = (char*)dest;
209 smb_iconv_t descriptor;
211 lazy_initialize_conv();
213 descriptor = conv_handles[from][to];
215 if (srclen == (size_t)-1) {
216 if (from == CH_UTF16LE || from == CH_UTF16BE) {
217 srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
218 } else {
219 srclen = strlen((const char *)src)+1;
224 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
225 if (!conv_silent)
226 DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
227 return (size_t)-1;
230 i_len=srclen;
231 o_len=destlen;
233 again:
235 retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
236 if(retval==(size_t)-1) {
237 const char *reason="unknown error";
238 switch(errno) {
239 case EINVAL:
240 reason="Incomplete multibyte sequence";
241 if (!conv_silent)
242 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
243 if (allow_bad_conv)
244 goto use_as_is;
245 break;
246 case E2BIG:
247 reason="No more room";
248 if (!conv_silent) {
249 if (from == CH_UNIX) {
250 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
251 charset_name(from), charset_name(to),
252 (unsigned int)srclen, (unsigned int)destlen, (const char *)src));
253 } else {
254 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
255 charset_name(from), charset_name(to),
256 (unsigned int)srclen, (unsigned int)destlen));
259 break;
260 case EILSEQ:
261 reason="Illegal multibyte sequence";
262 if (!conv_silent)
263 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
264 if (allow_bad_conv)
265 goto use_as_is;
266 break;
267 default:
268 if (!conv_silent)
269 DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
270 break;
272 /* smb_panic(reason); */
274 return destlen-o_len;
276 use_as_is:
279 * Conversion not supported. This is actually an error, but there are so
280 * many misconfigured iconv systems and smb.conf's out there we can't just
281 * fail. Do a very bad conversion instead.... JRA.
285 if (o_len == 0 || i_len == 0)
286 return destlen - o_len;
288 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
289 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
290 /* Can't convert from utf16 any endian to multibyte.
291 Replace with the default fail char.
293 if (i_len < 2)
294 return destlen - o_len;
295 if (i_len >= 2) {
296 *outbuf = lp_failed_convert_char();
298 outbuf++;
299 o_len--;
301 inbuf += 2;
302 i_len -= 2;
305 if (o_len == 0 || i_len == 0)
306 return destlen - o_len;
308 /* Keep trying with the next char... */
309 goto again;
311 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
312 /* Can't convert to UTF16LE - just widen by adding the
313 default fail char then zero.
315 if (o_len < 2)
316 return destlen - o_len;
318 outbuf[0] = lp_failed_convert_char();
319 outbuf[1] = '\0';
321 inbuf++;
322 i_len--;
324 outbuf += 2;
325 o_len -= 2;
327 if (o_len == 0 || i_len == 0)
328 return destlen - o_len;
330 /* Keep trying with the next char... */
331 goto again;
333 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
334 to != CH_UTF16LE && to != CH_UTF16BE) {
335 /* Failed multibyte to multibyte. Just copy the default fail char and
336 try again. */
337 outbuf[0] = lp_failed_convert_char();
339 inbuf++;
340 i_len--;
342 outbuf++;
343 o_len--;
345 if (o_len == 0 || i_len == 0)
346 return destlen - o_len;
348 /* Keep trying with the next char... */
349 goto again;
351 } else {
352 /* Keep compiler happy.... */
353 return destlen - o_len;
359 * Convert string from one encoding to another, making error checking etc
360 * Fast path version - handles ASCII first.
362 * @param src pointer to source string (multibyte or singlebyte)
363 * @param srclen length of the source string in bytes, or -1 for nul terminated.
364 * @param dest pointer to destination string (multibyte or singlebyte)
365 * @param destlen maximal length allowed for string - *NEVER* -1.
366 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
367 * @returns the number of bytes occupied in the destination
369 * Ensure the srclen contains the terminating zero.
371 * This function has been hand-tuned to provide a fast path.
372 * Don't change unless you really know what you are doing. JRA.
375 size_t convert_string(charset_t from, charset_t to,
376 void const *src, size_t srclen,
377 void *dest, size_t destlen, bool allow_bad_conv)
380 * NB. We deliberately don't do a strlen here if srclen == -1.
381 * This is very expensive over millions of calls and is taken
382 * care of in the slow path in convert_string_internal. JRA.
385 #ifdef DEVELOPER
386 SMB_ASSERT(destlen != (size_t)-1);
387 #endif
389 if (srclen == 0)
390 return 0;
392 if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
393 const unsigned char *p = (const unsigned char *)src;
394 unsigned char *q = (unsigned char *)dest;
395 size_t slen = srclen;
396 size_t dlen = destlen;
397 unsigned char lastp = '\0';
398 size_t retval = 0;
400 /* If all characters are ascii, fast path here. */
401 while (slen && dlen) {
402 if ((lastp = *p) <= 0x7f) {
403 *q++ = *p++;
404 if (slen != (size_t)-1) {
405 slen--;
407 dlen--;
408 retval++;
409 if (!lastp)
410 break;
411 } else {
412 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
413 goto general_case;
414 #else
415 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
416 #endif
419 if (!dlen) {
420 /* Even if we fast path we should note if we ran out of room. */
421 if (((slen != (size_t)-1) && slen) ||
422 ((slen == (size_t)-1) && lastp)) {
423 errno = E2BIG;
426 return retval;
427 } else if (from == CH_UTF16LE && to != CH_UTF16LE) {
428 const unsigned char *p = (const unsigned char *)src;
429 unsigned char *q = (unsigned char *)dest;
430 size_t retval = 0;
431 size_t slen = srclen;
432 size_t dlen = destlen;
433 unsigned char lastp = '\0';
435 /* If all characters are ascii, fast path here. */
436 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
437 if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
438 *q++ = *p;
439 if (slen != (size_t)-1) {
440 slen -= 2;
442 p += 2;
443 dlen--;
444 retval++;
445 if (!lastp)
446 break;
447 } else {
448 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
449 goto general_case;
450 #else
451 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
452 #endif
455 if (!dlen) {
456 /* Even if we fast path we should note if we ran out of room. */
457 if (((slen != (size_t)-1) && slen) ||
458 ((slen == (size_t)-1) && lastp)) {
459 errno = E2BIG;
462 return retval;
463 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
464 const unsigned char *p = (const unsigned char *)src;
465 unsigned char *q = (unsigned char *)dest;
466 size_t retval = 0;
467 size_t slen = srclen;
468 size_t dlen = destlen;
469 unsigned char lastp = '\0';
471 /* If all characters are ascii, fast path here. */
472 while (slen && (dlen >= 2)) {
473 if ((lastp = *p) <= 0x7F) {
474 *q++ = *p++;
475 *q++ = '\0';
476 if (slen != (size_t)-1) {
477 slen--;
479 dlen -= 2;
480 retval += 2;
481 if (!lastp)
482 break;
483 } else {
484 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
485 goto general_case;
486 #else
487 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
488 #endif
491 if (!dlen) {
492 /* Even if we fast path we should note if we ran out of room. */
493 if (((slen != (size_t)-1) && slen) ||
494 ((slen == (size_t)-1) && lastp)) {
495 errno = E2BIG;
498 return retval;
501 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
502 general_case:
503 #endif
504 return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
508 * Convert between character sets, allocating a new buffer for the result.
510 * @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
511 * (this is a bad interface and needs fixing. JRA).
512 * @param srclen length of source buffer.
513 * @param dest always set at least to NULL
514 * @param converted_size set to the size of the allocated buffer on return
515 * true
516 * @note -1 is not accepted for srclen.
518 * @return True if new buffer was correctly allocated, and string was
519 * converted.
521 * Ensure the srclen contains the terminating zero.
523 * I hate the goto's in this function. It's embarressing.....
524 * There has to be a cleaner way to do this. JRA.
527 bool convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
528 void const *src, size_t srclen, void *dst,
529 size_t *converted_size, bool allow_bad_conv)
531 size_t i_len, o_len, destlen = (srclen * 3) / 2;
532 size_t retval;
533 const char *inbuf = (const char *)src;
534 char *outbuf = NULL, *ob = NULL;
535 smb_iconv_t descriptor;
536 void **dest = (void **)dst;
538 *dest = NULL;
540 if (!converted_size) {
541 errno = EINVAL;
542 return false;
545 if (src == NULL || srclen == (size_t)-1) {
546 errno = EINVAL;
547 return false;
549 if (srclen == 0) {
550 ob = ((ctx != NULL) ? talloc_strdup(ctx, "") : SMB_STRDUP(""));
551 if (ob == NULL) {
552 errno = ENOMEM;
553 return false;
555 *dest = ob;
556 *converted_size = 0;
557 return true;
560 lazy_initialize_conv();
562 descriptor = conv_handles[from][to];
564 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
565 if (!conv_silent)
566 DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
567 errno = EOPNOTSUPP;
568 return false;
571 convert:
573 /* +2 is for ucs2 null termination. */
574 if ((destlen*2)+2 < destlen) {
575 /* wrapped ! abort. */
576 if (!conv_silent)
577 DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
578 if (!ctx)
579 SAFE_FREE(outbuf);
580 errno = EOPNOTSUPP;
581 return false;
582 } else {
583 destlen = destlen * 2;
586 /* +2 is for ucs2 null termination. */
587 if (ctx) {
588 ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
589 } else {
590 ob = (char *)SMB_REALLOC(ob, destlen + 2);
593 if (!ob) {
594 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
595 errno = ENOMEM;
596 return false;
598 outbuf = ob;
599 i_len = srclen;
600 o_len = destlen;
602 again:
604 retval = smb_iconv(descriptor,
605 &inbuf, &i_len,
606 &outbuf, &o_len);
607 if(retval == (size_t)-1) {
608 const char *reason="unknown error";
609 switch(errno) {
610 case EINVAL:
611 reason="Incomplete multibyte sequence";
612 if (!conv_silent)
613 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
614 if (allow_bad_conv)
615 goto use_as_is;
616 break;
617 case E2BIG:
618 goto convert;
619 case EILSEQ:
620 reason="Illegal multibyte sequence";
621 if (!conv_silent)
622 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
623 if (allow_bad_conv)
624 goto use_as_is;
625 break;
627 if (!conv_silent)
628 DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
629 /* smb_panic(reason); */
630 if (ctx) {
631 TALLOC_FREE(ob);
632 } else {
633 SAFE_FREE(ob);
635 return false;
638 out:
640 destlen = destlen - o_len;
641 /* Don't shrink unless we're reclaiming a lot of
642 * space. This is in the hot codepath and these
643 * reallocs *cost*. JRA.
645 if (o_len > 1024) {
646 /* We're shrinking here so we know the +2 is safe from wrap. */
647 if (ctx) {
648 ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
649 } else {
650 ob = (char *)SMB_REALLOC(ob,destlen + 2);
654 if (destlen && !ob) {
655 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
656 errno = ENOMEM;
657 return false;
660 *dest = ob;
662 /* Must ucs2 null terminate in the extra space we allocated. */
663 ob[destlen] = '\0';
664 ob[destlen+1] = '\0';
666 *converted_size = destlen;
667 return true;
669 use_as_is:
672 * Conversion not supported. This is actually an error, but there are so
673 * many misconfigured iconv systems and smb.conf's out there we can't just
674 * fail. Do a very bad conversion instead.... JRA.
678 if (o_len == 0 || i_len == 0)
679 goto out;
681 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
682 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
683 /* Can't convert from utf16 any endian to multibyte.
684 Replace with the default fail char.
687 if (i_len < 2)
688 goto out;
690 if (i_len >= 2) {
691 *outbuf = lp_failed_convert_char();
693 outbuf++;
694 o_len--;
696 inbuf += 2;
697 i_len -= 2;
700 if (o_len == 0 || i_len == 0)
701 goto out;
703 /* Keep trying with the next char... */
704 goto again;
706 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
707 /* Can't convert to UTF16LE - just widen by adding the
708 default fail char then zero.
710 if (o_len < 2)
711 goto out;
713 outbuf[0] = lp_failed_convert_char();
714 outbuf[1] = '\0';
716 inbuf++;
717 i_len--;
719 outbuf += 2;
720 o_len -= 2;
722 if (o_len == 0 || i_len == 0)
723 goto out;
725 /* Keep trying with the next char... */
726 goto again;
728 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
729 to != CH_UTF16LE && to != CH_UTF16BE) {
730 /* Failed multibyte to multibyte. Just copy the default fail char and
731 try again. */
732 outbuf[0] = lp_failed_convert_char();
734 inbuf++;
735 i_len--;
737 outbuf++;
738 o_len--;
740 if (o_len == 0 || i_len == 0)
741 goto out;
743 /* Keep trying with the next char... */
744 goto again;
746 } else {
747 /* Keep compiler happy.... */
748 goto out;
754 * Convert between character sets, allocating a new buffer using talloc for the result.
756 * @param srclen length of source buffer.
757 * @param dest always set at least to NULL
758 * @note -1 is not accepted for srclen.
760 * @returns Size in bytes of the converted string; or -1 in case of error.
762 size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
763 void const *src, size_t srclen, void *dst,
764 bool allow_bad_conv)
766 void **dest = (void **)dst;
767 size_t dest_len;
769 *dest = NULL;
770 if (!convert_string_allocate(ctx, from, to, src, srclen, dest,
771 &dest_len, allow_bad_conv))
772 return (size_t)-1;
773 if (*dest == NULL)
774 return (size_t)-1;
775 return dest_len;
778 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
780 size_t size;
781 smb_ucs2_t *buffer;
783 size = push_ucs2_allocate(&buffer, src);
784 if (size == (size_t)-1) {
785 return (size_t)-1;
787 if (!strupper_w(buffer) && (dest == src)) {
788 free(buffer);
789 return srclen;
792 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
793 free(buffer);
794 return size;
798 strdup() a unix string to upper case.
801 char *strdup_upper(const char *s)
803 char *out_buffer = SMB_STRDUP(s);
804 const unsigned char *p = (const unsigned char *)s;
805 unsigned char *q = (unsigned char *)out_buffer;
807 if (!q) {
808 return NULL;
811 /* this is quite a common operation, so we want it to be
812 fast. We optimise for the ascii case, knowing that all our
813 supported multi-byte character sets are ascii-compatible
814 (ie. they match for the first 128 chars) */
816 while (*p) {
817 if (*p & 0x80)
818 break;
819 *q++ = toupper_ascii_fast(*p);
820 p++;
823 if (*p) {
824 /* MB case. */
825 size_t size, size2;
826 smb_ucs2_t *buffer = NULL;
828 SAFE_FREE(out_buffer);
829 if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, s,
830 strlen(s) + 1, (void **)(void *)&buffer, &size,
831 True)) {
832 return NULL;
835 strupper_w(buffer);
837 if (!convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, buffer,
838 size, (void **)(void *)&out_buffer, &size2, True)) {
839 TALLOC_FREE(buffer);
840 return NULL;
843 /* Don't need the intermediate buffer
844 * anymore.
846 TALLOC_FREE(buffer);
849 return out_buffer;
853 talloc_strdup() a unix string to upper case.
856 char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *s)
858 char *out_buffer = talloc_strdup(ctx,s);
859 const unsigned char *p = (const unsigned char *)s;
860 unsigned char *q = (unsigned char *)out_buffer;
862 if (!q) {
863 return NULL;
866 /* this is quite a common operation, so we want it to be
867 fast. We optimise for the ascii case, knowing that all our
868 supported multi-byte character sets are ascii-compatible
869 (ie. they match for the first 128 chars) */
871 while (*p) {
872 if (*p & 0x80)
873 break;
874 *q++ = toupper_ascii_fast(*p);
875 p++;
878 if (*p) {
879 /* MB case. */
880 size_t size;
881 smb_ucs2_t *ubuf = NULL;
883 /* We're not using the ascii buffer above. */
884 TALLOC_FREE(out_buffer);
886 size = convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE,
887 s, strlen(s)+1,
888 (void *)&ubuf,
889 True);
890 if (size == (size_t)-1) {
891 return NULL;
894 strupper_w(ubuf);
896 size = convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX,
897 ubuf, size,
898 (void *)&out_buffer,
899 True);
901 /* Don't need the intermediate buffer
902 * anymore.
905 TALLOC_FREE(ubuf);
907 if (size == (size_t)-1) {
908 return NULL;
912 return out_buffer;
915 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
917 size_t size;
918 smb_ucs2_t *buffer = NULL;
920 if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, srclen,
921 (void **)(void *)&buffer, &size, True)) {
922 smb_panic("failed to create UCS2 buffer");
924 if (!strlower_w(buffer) && (dest == src)) {
925 SAFE_FREE(buffer);
926 return srclen;
928 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
929 SAFE_FREE(buffer);
930 return size;
934 strdup() a unix string to lower case.
937 char *strdup_lower(const char *s)
939 size_t size;
940 smb_ucs2_t *buffer = NULL;
941 char *out_buffer;
943 size = push_ucs2_allocate(&buffer, s);
944 if (size == -1 || !buffer) {
945 return NULL;
948 strlower_w(buffer);
950 size = pull_ucs2_allocate(&out_buffer, buffer);
951 SAFE_FREE(buffer);
953 if (size == (size_t)-1) {
954 return NULL;
957 return out_buffer;
960 char *talloc_strdup_lower(TALLOC_CTX *ctx, const char *s)
962 size_t size;
963 smb_ucs2_t *buffer = NULL;
964 char *out_buffer;
966 size = push_ucs2_talloc(ctx, &buffer, s);
967 if (size == -1 || !buffer) {
968 TALLOC_FREE(buffer);
969 return NULL;
972 strlower_w(buffer);
974 size = pull_ucs2_talloc(ctx, &out_buffer, buffer);
975 TALLOC_FREE(buffer);
977 if (size == (size_t)-1) {
978 TALLOC_FREE(out_buffer);
979 return NULL;
982 return out_buffer;
986 size_t ucs2_align(const void *base_ptr, const void *p, int flags)
988 if (flags & (STR_NOALIGN|STR_ASCII))
989 return 0;
990 return PTR_DIFF(p, base_ptr) & 1;
995 * Copy a string from a char* unix src to a dos codepage string destination.
997 * @return the number of bytes occupied by the string in the destination.
999 * @param flags can include
1000 * <dl>
1001 * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
1002 * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
1003 * </dl>
1005 * @param dest_len the maximum length in bytes allowed in the
1006 * destination.
1008 size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
1010 size_t src_len = strlen(src);
1011 char *tmpbuf = NULL;
1012 size_t ret;
1014 /* No longer allow a length of -1. */
1015 if (dest_len == (size_t)-1) {
1016 smb_panic("push_ascii - dest_len == -1");
1019 if (flags & STR_UPPER) {
1020 tmpbuf = SMB_STRDUP(src);
1021 if (!tmpbuf) {
1022 smb_panic("malloc fail");
1024 strupper_m(tmpbuf);
1025 src = tmpbuf;
1028 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
1029 src_len++;
1032 ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
1033 if (ret == (size_t)-1 &&
1034 (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
1035 && dest_len > 0) {
1036 ((char *)dest)[0] = '\0';
1038 SAFE_FREE(tmpbuf);
1039 return ret;
1042 size_t push_ascii_fstring(void *dest, const char *src)
1044 return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
1047 /********************************************************************
1048 Push an nstring - ensure null terminated. Written by
1049 moriyama@miraclelinux.com (MORIYAMA Masayuki).
1050 ********************************************************************/
1052 size_t push_ascii_nstring(void *dest, const char *src)
1054 size_t i, buffer_len, dest_len;
1055 smb_ucs2_t *buffer;
1057 conv_silent = True;
1058 buffer_len = push_ucs2_allocate(&buffer, src);
1059 if (buffer_len == (size_t)-1) {
1060 smb_panic("failed to create UCS2 buffer");
1063 /* We're using buffer_len below to count ucs2 characters, not bytes. */
1064 buffer_len /= sizeof(smb_ucs2_t);
1066 dest_len = 0;
1067 for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
1068 unsigned char mb[10];
1069 /* Convert one smb_ucs2_t character at a time. */
1070 size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
1071 if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
1072 memcpy((char *)dest + dest_len, mb, mb_len);
1073 dest_len += mb_len;
1074 } else {
1075 errno = E2BIG;
1076 break;
1079 ((char *)dest)[dest_len] = '\0';
1081 SAFE_FREE(buffer);
1082 conv_silent = False;
1083 return dest_len;
1086 /********************************************************************
1087 Push and malloc an ascii string. src and dest null terminated.
1088 ********************************************************************/
1090 size_t push_ascii_allocate(char **dest, const char *src)
1092 size_t dest_len, src_len = strlen(src)+1;
1094 *dest = NULL;
1095 if (!convert_string_allocate(NULL, CH_UNIX, CH_DOS, src, src_len,
1096 (void **)dest, &dest_len, True))
1097 return (size_t)-1;
1098 else
1099 return dest_len;
1103 * Copy a string from a dos codepage source to a unix char* destination.
1105 * The resulting string in "dest" is always null terminated.
1107 * @param flags can have:
1108 * <dl>
1109 * <dt>STR_TERMINATE</dt>
1110 * <dd>STR_TERMINATE means the string in @p src
1111 * is null terminated, and src_len is ignored.</dd>
1112 * </dl>
1114 * @param src_len is the length of the source area in bytes.
1115 * @returns the number of bytes occupied by the string in @p src.
1117 size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1119 size_t ret;
1121 if (dest_len == (size_t)-1) {
1122 /* No longer allow dest_len of -1. */
1123 smb_panic("pull_ascii - invalid dest_len of -1");
1126 if (flags & STR_TERMINATE) {
1127 if (src_len == (size_t)-1) {
1128 src_len = strlen((const char *)src) + 1;
1129 } else {
1130 size_t len = strnlen((const char *)src, src_len);
1131 if (len < src_len)
1132 len++;
1133 src_len = len;
1137 ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
1138 if (ret == (size_t)-1) {
1139 ret = 0;
1140 dest_len = 0;
1143 if (dest_len && ret) {
1144 /* Did we already process the terminating zero ? */
1145 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1146 dest[MIN(ret, dest_len-1)] = 0;
1148 } else {
1149 dest[0] = 0;
1152 return src_len;
1156 * Copy a string from a dos codepage source to a unix char* destination.
1157 Talloc version.
1158 Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1159 needs fixing. JRA).
1161 * The resulting string in "dest" is always null terminated.
1163 * @param flags can have:
1164 * <dl>
1165 * <dt>STR_TERMINATE</dt>
1166 * <dd>STR_TERMINATE means the string in @p src
1167 * is null terminated, and src_len is ignored.</dd>
1168 * </dl>
1170 * @param src_len is the length of the source area in bytes.
1171 * @returns the number of bytes occupied by the string in @p src.
1174 static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
1175 char **ppdest,
1176 const void *src,
1177 size_t src_len,
1178 int flags)
1180 char *dest = NULL;
1181 size_t dest_len = 0;
1183 #ifdef DEVELOPER
1184 /* Ensure we never use the braindead "malloc" varient. */
1185 if (ctx == NULL) {
1186 smb_panic("NULL talloc CTX in pull_ascii_base_talloc\n");
1188 #endif
1190 *ppdest = NULL;
1192 if (!src_len) {
1193 return 0;
1196 if (flags & STR_TERMINATE) {
1197 if (src_len == (size_t)-1) {
1198 src_len = strlen((const char *)src) + 1;
1199 } else {
1200 size_t len = strnlen((const char *)src, src_len);
1201 if (len < src_len)
1202 len++;
1203 src_len = len;
1205 /* Ensure we don't use an insane length from the client. */
1206 if (src_len >= 1024*1024) {
1207 char *msg = talloc_asprintf(ctx,
1208 "Bad src length (%u) in "
1209 "pull_ascii_base_talloc",
1210 (unsigned int)src_len);
1211 smb_panic(msg);
1213 } else {
1214 /* Can't have an unlimited length
1215 * non STR_TERMINATE'd.
1217 if (src_len == (size_t)-1) {
1218 errno = EINVAL;
1219 return 0;
1223 /* src_len != -1 here. */
1225 if (!convert_string_allocate(ctx, CH_DOS, CH_UNIX, src, src_len, &dest,
1226 &dest_len, True)) {
1227 dest_len = 0;
1230 if (dest_len && dest) {
1231 /* Did we already process the terminating zero ? */
1232 if (dest[dest_len-1] != 0) {
1233 size_t size = talloc_get_size(dest);
1234 /* Have we got space to append the '\0' ? */
1235 if (size <= dest_len) {
1236 /* No, realloc. */
1237 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1238 dest_len+1);
1239 if (!dest) {
1240 /* talloc fail. */
1241 dest_len = (size_t)-1;
1242 return 0;
1245 /* Yay - space ! */
1246 dest[dest_len] = '\0';
1247 dest_len++;
1249 } else if (dest) {
1250 dest[0] = 0;
1253 *ppdest = dest;
1254 return src_len;
1257 size_t pull_ascii_fstring(char *dest, const void *src)
1259 return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1262 /* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1264 size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src)
1266 return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1270 * Copy a string from a char* src to a unicode destination.
1272 * @returns the number of bytes occupied by the string in the destination.
1274 * @param flags can have:
1276 * <dl>
1277 * <dt>STR_TERMINATE <dd>means include the null termination.
1278 * <dt>STR_UPPER <dd>means uppercase in the destination.
1279 * <dt>STR_NOALIGN <dd>means don't do alignment.
1280 * </dl>
1282 * @param dest_len is the maximum length allowed in the
1283 * destination.
1286 size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1288 size_t len=0;
1289 size_t src_len;
1290 size_t ret;
1292 if (dest_len == (size_t)-1) {
1293 /* No longer allow dest_len of -1. */
1294 smb_panic("push_ucs2 - invalid dest_len of -1");
1297 if (flags & STR_TERMINATE)
1298 src_len = (size_t)-1;
1299 else
1300 src_len = strlen(src);
1302 if (ucs2_align(base_ptr, dest, flags)) {
1303 *(char *)dest = 0;
1304 dest = (void *)((char *)dest + 1);
1305 if (dest_len)
1306 dest_len--;
1307 len++;
1310 /* ucs2 is always a multiple of 2 bytes */
1311 dest_len &= ~1;
1313 ret = convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1314 if (ret == (size_t)-1) {
1315 if ((flags & STR_TERMINATE) &&
1316 dest &&
1317 dest_len) {
1318 *(char *)dest = 0;
1320 return len;
1323 len += ret;
1325 if (flags & STR_UPPER) {
1326 smb_ucs2_t *dest_ucs2 = (smb_ucs2_t *)dest;
1327 size_t i;
1329 /* We check for i < (ret / 2) below as the dest string isn't null
1330 terminated if STR_TERMINATE isn't set. */
1332 for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1333 smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1334 if (v != dest_ucs2[i]) {
1335 dest_ucs2[i] = v;
1340 return len;
1345 * Copy a string from a unix char* src to a UCS2 destination,
1346 * allocating a buffer using talloc().
1348 * @param dest always set at least to NULL
1350 * @returns The number of bytes occupied by the string in the destination
1351 * or -1 in case of error.
1353 size_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
1355 size_t src_len = strlen(src)+1;
1357 *dest = NULL;
1358 return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len, (void **)dest, True);
1363 * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1365 * @param dest always set at least to NULL
1367 * @returns The number of bytes occupied by the string in the destination
1368 * or -1 in case of error.
1371 size_t push_ucs2_allocate(smb_ucs2_t **dest, const char *src)
1373 size_t dest_len, src_len = strlen(src)+1;
1375 *dest = NULL;
1376 if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF16LE, src, src_len,
1377 (void **)dest, &dest_len, True))
1378 return (size_t)-1;
1379 else
1380 return dest_len;
1384 Copy a string from a char* src to a UTF-8 destination.
1385 Return the number of bytes occupied by the string in the destination
1386 Flags can have:
1387 STR_TERMINATE means include the null termination
1388 STR_UPPER means uppercase in the destination
1389 dest_len is the maximum length allowed in the destination. If dest_len
1390 is -1 then no maxiumum is used.
1393 static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
1395 size_t src_len = 0;
1396 size_t ret;
1397 char *tmpbuf = NULL;
1399 if (dest_len == (size_t)-1) {
1400 /* No longer allow dest_len of -1. */
1401 smb_panic("push_utf8 - invalid dest_len of -1");
1404 if (flags & STR_UPPER) {
1405 tmpbuf = strdup_upper(src);
1406 if (!tmpbuf) {
1407 return (size_t)-1;
1409 src = tmpbuf;
1410 src_len = strlen(src);
1413 src_len = strlen(src);
1414 if (flags & STR_TERMINATE) {
1415 src_len++;
1418 ret = convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1419 SAFE_FREE(tmpbuf);
1420 return ret;
1423 size_t push_utf8_fstring(void *dest, const char *src)
1425 return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1429 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1431 * @param dest always set at least to NULL
1433 * @returns The number of bytes occupied by the string in the destination
1436 size_t push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1438 size_t src_len = strlen(src)+1;
1440 *dest = NULL;
1441 return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest, True);
1445 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1447 * @param dest always set at least to NULL
1449 * @returns The number of bytes occupied by the string in the destination
1452 size_t push_utf8_allocate(char **dest, const char *src)
1454 size_t dest_len, src_len = strlen(src)+1;
1456 *dest = NULL;
1457 if (!convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len,
1458 (void **)dest, &dest_len, True))
1459 return (size_t)-1;
1460 else
1461 return dest_len;
1465 Copy a string from a ucs2 source to a unix char* destination.
1466 Flags can have:
1467 STR_TERMINATE means the string in src is null terminated.
1468 STR_NOALIGN means don't try to align.
1469 if STR_TERMINATE is set then src_len is ignored if it is -1.
1470 src_len is the length of the source area in bytes
1471 Return the number of bytes occupied by the string in src.
1472 The resulting string in "dest" is always null terminated.
1475 size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1477 size_t ret;
1479 if (dest_len == (size_t)-1) {
1480 /* No longer allow dest_len of -1. */
1481 smb_panic("pull_ucs2 - invalid dest_len of -1");
1484 if (!src_len) {
1485 if (dest && dest_len > 0) {
1486 dest[0] = '\0';
1488 return 0;
1491 if (ucs2_align(base_ptr, src, flags)) {
1492 src = (const void *)((const char *)src + 1);
1493 if (src_len != (size_t)-1)
1494 src_len--;
1497 if (flags & STR_TERMINATE) {
1498 /* src_len -1 is the default for null terminated strings. */
1499 if (src_len != (size_t)-1) {
1500 size_t len = strnlen_w((const smb_ucs2_t *)src,
1501 src_len/2);
1502 if (len < src_len/2)
1503 len++;
1504 src_len = len*2;
1508 /* ucs2 is always a multiple of 2 bytes */
1509 if (src_len != (size_t)-1)
1510 src_len &= ~1;
1512 ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1513 if (ret == (size_t)-1) {
1514 ret = 0;
1515 dest_len = 0;
1518 if (src_len == (size_t)-1)
1519 src_len = ret*2;
1521 if (dest_len && ret) {
1522 /* Did we already process the terminating zero ? */
1523 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1524 dest[MIN(ret, dest_len-1)] = 0;
1526 } else {
1527 dest[0] = 0;
1530 return src_len;
1534 Copy a string from a ucs2 source to a unix char* destination.
1535 Talloc version with a base pointer.
1536 Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1537 needs fixing. JRA).
1538 Flags can have:
1539 STR_TERMINATE means the string in src is null terminated.
1540 STR_NOALIGN means don't try to align.
1541 if STR_TERMINATE is set then src_len is ignored if it is -1.
1542 src_len is the length of the source area in bytes
1543 Return the number of bytes occupied by the string in src.
1544 The resulting string in "dest" is always null terminated.
1547 size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
1548 const void *base_ptr,
1549 char **ppdest,
1550 const void *src,
1551 size_t src_len,
1552 int flags)
1554 char *dest;
1555 size_t dest_len;
1557 *ppdest = NULL;
1559 #ifdef DEVELOPER
1560 /* Ensure we never use the braindead "malloc" varient. */
1561 if (ctx == NULL) {
1562 smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1564 #endif
1566 if (!src_len) {
1567 return 0;
1570 if (ucs2_align(base_ptr, src, flags)) {
1571 src = (const void *)((const char *)src + 1);
1572 if (src_len != (size_t)-1)
1573 src_len--;
1576 if (flags & STR_TERMINATE) {
1577 /* src_len -1 is the default for null terminated strings. */
1578 if (src_len != (size_t)-1) {
1579 size_t len = strnlen_w((const smb_ucs2_t *)src,
1580 src_len/2);
1581 if (len < src_len/2)
1582 len++;
1583 src_len = len*2;
1584 } else {
1586 * src_len == -1 - alloc interface won't take this
1587 * so we must calculate.
1589 src_len = (strlen_w((const smb_ucs2_t *)src)+1)*sizeof(smb_ucs2_t);
1591 /* Ensure we don't use an insane length from the client. */
1592 if (src_len >= 1024*1024) {
1593 smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1595 } else {
1596 /* Can't have an unlimited length
1597 * non STR_TERMINATE'd.
1599 if (src_len == (size_t)-1) {
1600 errno = EINVAL;
1601 return 0;
1605 /* src_len != -1 here. */
1607 /* ucs2 is always a multiple of 2 bytes */
1608 src_len &= ~1;
1610 dest_len = convert_string_talloc(ctx,
1611 CH_UTF16LE,
1612 CH_UNIX,
1613 src,
1614 src_len,
1615 (void *)&dest,
1616 True);
1617 if (dest_len == (size_t)-1) {
1618 dest_len = 0;
1621 if (dest_len) {
1622 /* Did we already process the terminating zero ? */
1623 if (dest[dest_len-1] != 0) {
1624 size_t size = talloc_get_size(dest);
1625 /* Have we got space to append the '\0' ? */
1626 if (size <= dest_len) {
1627 /* No, realloc. */
1628 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1629 dest_len+1);
1630 if (!dest) {
1631 /* talloc fail. */
1632 dest_len = (size_t)-1;
1633 return 0;
1636 /* Yay - space ! */
1637 dest[dest_len] = '\0';
1638 dest_len++;
1640 } else if (dest) {
1641 dest[0] = 0;
1644 *ppdest = dest;
1645 return src_len;
1648 size_t pull_ucs2_fstring(char *dest, const void *src)
1650 return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1654 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1656 * @param dest always set at least to NULL
1658 * @returns The number of bytes occupied by the string in the destination
1661 size_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src)
1663 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1664 *dest = NULL;
1665 return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len, (void **)dest, True);
1669 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1671 * @param dest always set at least to NULL
1673 * @returns The number of bytes occupied by the string in the destination
1676 size_t pull_ucs2_allocate(char **dest, const smb_ucs2_t *src)
1678 size_t dest_len, src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1679 *dest = NULL;
1680 if (!convert_string_allocate(NULL, CH_UTF16LE, CH_UNIX, src, src_len,
1681 (void **)dest, &dest_len, True))
1682 return (size_t)-1;
1683 else
1684 return dest_len;
1688 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1690 * @param dest always set at least to NULL
1692 * @returns The number of bytes occupied by the string in the destination
1695 size_t pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1697 size_t src_len = strlen(src)+1;
1698 *dest = NULL;
1699 return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1703 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1705 * @param dest always set at least to NULL
1707 * @returns The number of bytes occupied by the string in the destination
1710 size_t pull_utf8_allocate(char **dest, const char *src)
1712 size_t dest_len, src_len = strlen(src)+1;
1713 *dest = NULL;
1714 if (!convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len,
1715 (void **)dest, &dest_len, True))
1716 return (size_t)-1;
1717 else
1718 return dest_len;
1722 * Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1724 * @param dest always set at least to NULL
1726 * @returns The number of bytes occupied by the string in the destination
1729 size_t pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1731 size_t src_len = strlen(src)+1;
1732 *dest = NULL;
1733 return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, True);
1737 Copy a string from a char* src to a unicode or ascii
1738 dos codepage destination choosing unicode or ascii based on the
1739 flags in the SMB buffer starting at base_ptr.
1740 Return the number of bytes occupied by the string in the destination.
1741 flags can have:
1742 STR_TERMINATE means include the null termination.
1743 STR_UPPER means uppercase in the destination.
1744 STR_ASCII use ascii even with unicode packet.
1745 STR_NOALIGN means don't do alignment.
1746 dest_len is the maximum length allowed in the destination. If dest_len
1747 is -1 then no maxiumum is used.
1750 size_t push_string_fn(const char *function, unsigned int line,
1751 const void *base_ptr, uint16 flags2,
1752 void *dest, const char *src,
1753 size_t dest_len, int flags)
1755 #ifdef DEVELOPER
1756 /* We really need to zero fill here, not clobber
1757 * region, as we want to ensure that valgrind thinks
1758 * all of the outgoing buffer has been written to
1759 * so a send() or write() won't trap an error.
1760 * JRA.
1762 #if 0
1763 clobber_region(function, line, dest, dest_len);
1764 #else
1765 memset(dest, '\0', dest_len);
1766 #endif
1767 #endif
1769 if (!(flags & STR_ASCII) && \
1770 ((flags & STR_UNICODE || \
1771 (flags2 & FLAGS2_UNICODE_STRINGS)))) {
1772 return push_ucs2(base_ptr, dest, src, dest_len, flags);
1774 return push_ascii(dest, src, dest_len, flags);
1779 Copy a string from a unicode or ascii source (depending on
1780 the packet flags) to a char* destination.
1781 Flags can have:
1782 STR_TERMINATE means the string in src is null terminated.
1783 STR_UNICODE means to force as unicode.
1784 STR_ASCII use ascii even with unicode packet.
1785 STR_NOALIGN means don't do alignment.
1786 if STR_TERMINATE is set then src_len is ignored is it is -1
1787 src_len is the length of the source area in bytes.
1788 Return the number of bytes occupied by the string in src.
1789 The resulting string in "dest" is always null terminated.
1792 size_t pull_string_fn(const char *function,
1793 unsigned int line,
1794 const void *base_ptr,
1795 uint16 smb_flags2,
1796 char *dest,
1797 const void *src,
1798 size_t dest_len,
1799 size_t src_len,
1800 int flags)
1802 #ifdef DEVELOPER
1803 clobber_region(function, line, dest, dest_len);
1804 #endif
1806 if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1807 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1808 "UNICODE defined");
1811 if (!(flags & STR_ASCII) && \
1812 ((flags & STR_UNICODE || \
1813 (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1814 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1816 return pull_ascii(dest, src, dest_len, src_len, flags);
1820 Copy a string from a unicode or ascii source (depending on
1821 the packet flags) to a char* destination.
1822 Variant that uses talloc.
1823 Flags can have:
1824 STR_TERMINATE means the string in src is null terminated.
1825 STR_UNICODE means to force as unicode.
1826 STR_ASCII use ascii even with unicode packet.
1827 STR_NOALIGN means don't do alignment.
1828 if STR_TERMINATE is set then src_len is ignored is it is -1
1829 src_len is the length of the source area in bytes.
1830 Return the number of bytes occupied by the string in src.
1831 The resulting string in "dest" is always null terminated.
1834 size_t pull_string_talloc_fn(const char *function,
1835 unsigned int line,
1836 TALLOC_CTX *ctx,
1837 const void *base_ptr,
1838 uint16 smb_flags2,
1839 char **ppdest,
1840 const void *src,
1841 size_t src_len,
1842 int flags)
1844 if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1845 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1846 "UNICODE defined");
1849 if (!(flags & STR_ASCII) && \
1850 ((flags & STR_UNICODE || \
1851 (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1852 return pull_ucs2_base_talloc(ctx,
1853 base_ptr,
1854 ppdest,
1855 src,
1856 src_len,
1857 flags);
1859 return pull_ascii_base_talloc(ctx,
1860 ppdest,
1861 src,
1862 src_len,
1863 flags);
1867 size_t align_string(const void *base_ptr, const char *p, int flags)
1869 if (!(flags & STR_ASCII) && \
1870 ((flags & STR_UNICODE || \
1871 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1872 return ucs2_align(base_ptr, p, flags);
1874 return 0;
1878 Return the unicode codepoint for the next multi-byte CH_UNIX character
1879 in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1881 Also return the number of bytes consumed (which tells the caller
1882 how many bytes to skip to get to the next CH_UNIX character).
1884 Return INVALID_CODEPOINT if the next character cannot be converted.
1887 codepoint_t next_codepoint(const char *str, size_t *size)
1889 /* It cannot occupy more than 4 bytes in UTF16 format */
1890 uint8_t buf[4];
1891 smb_iconv_t descriptor;
1892 size_t ilen_orig;
1893 size_t ilen;
1894 size_t olen;
1895 char *outbuf;
1897 if ((str[0] & 0x80) == 0) {
1898 *size = 1;
1899 return (codepoint_t)str[0];
1902 /* We assume that no multi-byte character can take
1903 more than 5 bytes. This is OK as we only
1904 support codepoints up to 1M */
1906 ilen_orig = strnlen(str, 5);
1907 ilen = ilen_orig;
1909 lazy_initialize_conv();
1911 descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1912 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
1913 *size = 1;
1914 return INVALID_CODEPOINT;
1917 /* This looks a little strange, but it is needed to cope
1918 with codepoints above 64k which are encoded as per RFC2781. */
1919 olen = 2;
1920 outbuf = (char *)buf;
1921 smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1922 if (olen == 2) {
1923 /* We failed to convert to a 2 byte character.
1924 See if we can convert to a 4 UTF16-LE byte char encoding.
1926 olen = 4;
1927 outbuf = (char *)buf;
1928 smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1929 if (olen == 4) {
1930 /* We didn't convert any bytes */
1931 *size = 1;
1932 return INVALID_CODEPOINT;
1934 olen = 4 - olen;
1935 } else {
1936 olen = 2 - olen;
1939 *size = ilen_orig - ilen;
1941 if (olen == 2) {
1942 /* 2 byte, UTF16-LE encoded value. */
1943 return (codepoint_t)SVAL(buf, 0);
1945 if (olen == 4) {
1946 /* Decode a 4 byte UTF16-LE character manually.
1947 See RFC2871 for the encoding machanism.
1949 codepoint_t w1 = SVAL(buf,0) & ~0xD800;
1950 codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
1952 return (codepoint_t)0x10000 +
1953 (w1 << 10) + w2;
1956 /* no other length is valid */
1957 return INVALID_CODEPOINT;