s3-charcnv Don't genreate valid_table on the fly, rely on valid.dat
[Samba.git] / source3 / lib / charcnv.c
blob4c98f8f33938b1838e01537f4fbcaa9a508f8071
1 /*
2 Unix SMB/CIFS implementation.
3 Character set conversion Extensions
4 Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5 Copyright (C) Andrew Tridgell 2001
6 Copyright (C) Simo Sorce 2001
7 Copyright (C) Martin Pool 2003
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
23 #include "includes.h"
25 /* We can parameterize this if someone complains.... JRA. */
27 char lp_failed_convert_char(void)
29 return '_';
32 /**
33 * @file
35 * @brief Character-set conversion routines built on our iconv.
37 * @note Samba's internal character set (at least in the 3.0 series)
38 * is always the same as the one for the Unix filesystem. It is
39 * <b>not</b> necessarily UTF-8 and may be different on machines that
40 * need i18n filenames to be compatible with Unix software. It does
41 * have to be a superset of ASCII. All multibyte sequences must start
42 * with a byte with the high bit set.
44 * @sa lib/iconv.c
48 static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
49 static bool conv_silent; /* Should we do a debug if the conversion fails ? */
50 static bool initialized;
52 /**
53 * Return the name of a charset to give to iconv().
54 **/
55 static const char *charset_name(charset_t ch)
57 const char *ret;
59 switch (ch) {
60 case CH_UTF16LE:
61 ret = "UTF-16LE";
62 break;
63 case CH_UTF16BE:
64 ret = "UTF-16BE";
65 break;
66 case CH_UNIX:
67 ret = lp_unix_charset();
68 break;
69 case CH_DOS:
70 ret = lp_dos_charset();
71 break;
72 case CH_DISPLAY:
73 ret = lp_display_charset();
74 break;
75 case CH_UTF8:
76 ret = "UTF8";
77 break;
78 default:
79 ret = NULL;
82 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)
83 if (ret && !strcmp(ret, "LOCALE")) {
84 const char *ln = NULL;
86 #ifdef HAVE_SETLOCALE
87 setlocale(LC_ALL, "");
88 #endif
89 ln = nl_langinfo(CODESET);
90 if (ln) {
91 /* Check whether the charset name is supported
92 by iconv */
93 smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
94 if (handle == (smb_iconv_t) -1) {
95 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
96 ln = NULL;
97 } else {
98 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
99 smb_iconv_close(handle);
102 ret = ln;
104 #endif
106 if (!ret || !*ret) ret = "ASCII";
107 return ret;
110 void lazy_initialize_conv(void)
112 if (!initialized) {
113 load_case_tables();
114 init_iconv();
115 initialized = true;
120 * Destroy global objects allocated by init_iconv()
122 void gfree_charcnv(void)
124 int c1, c2;
126 for (c1=0;c1<NUM_CHARSETS;c1++) {
127 for (c2=0;c2<NUM_CHARSETS;c2++) {
128 if ( conv_handles[c1][c2] ) {
129 smb_iconv_close( conv_handles[c1][c2] );
130 conv_handles[c1][c2] = 0;
134 initialized = false;
138 * Initialize iconv conversion descriptors.
140 * This is called the first time it is needed, and also called again
141 * every time the configuration is reloaded, because the charset or
142 * codepage might have changed.
144 void init_iconv(void)
146 int c1, c2;
147 bool did_reload = False;
149 /* so that charset_name() works we need to get the UNIX<->UCS2 going
150 first */
151 if (!conv_handles[CH_UNIX][CH_UTF16LE])
152 conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
154 if (!conv_handles[CH_UTF16LE][CH_UNIX])
155 conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
157 for (c1=0;c1<NUM_CHARSETS;c1++) {
158 for (c2=0;c2<NUM_CHARSETS;c2++) {
159 const char *n1 = charset_name((charset_t)c1);
160 const char *n2 = charset_name((charset_t)c2);
161 if (conv_handles[c1][c2] &&
162 strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
163 strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
164 continue;
166 did_reload = True;
168 if (conv_handles[c1][c2])
169 smb_iconv_close(conv_handles[c1][c2]);
171 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
172 if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
173 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
174 charset_name((charset_t)c1), charset_name((charset_t)c2)));
175 if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
176 n1 = "ASCII";
178 if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
179 n2 = "ASCII";
181 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
182 n1, n2 ));
183 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
184 if (!conv_handles[c1][c2]) {
185 DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
186 smb_panic("init_iconv: conv_handle initialization failed");
194 * Convert string from one encoding to another, making error checking etc
195 * Slow path version - uses (slow) iconv.
197 * @param src pointer to source string (multibyte or singlebyte)
198 * @param srclen length of the source string in bytes
199 * @param dest pointer to destination string (multibyte or singlebyte)
200 * @param destlen maximal length allowed for string
201 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
202 * @returns the number of bytes occupied in the destination
204 * Ensure the srclen contains the terminating zero.
208 static size_t convert_string_internal(charset_t from, charset_t to,
209 void const *src, size_t srclen,
210 void *dest, size_t destlen, bool allow_bad_conv)
212 size_t i_len, o_len;
213 size_t retval;
214 const char* inbuf = (const char*)src;
215 char* outbuf = (char*)dest;
216 smb_iconv_t descriptor;
218 lazy_initialize_conv();
220 descriptor = conv_handles[from][to];
222 if (srclen == (size_t)-1) {
223 if (from == CH_UTF16LE || from == CH_UTF16BE) {
224 srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
225 } else {
226 srclen = strlen((const char *)src)+1;
231 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
232 if (!conv_silent)
233 DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
234 return (size_t)-1;
237 i_len=srclen;
238 o_len=destlen;
240 again:
242 retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
243 if(retval==(size_t)-1) {
244 const char *reason="unknown error";
245 switch(errno) {
246 case EINVAL:
247 reason="Incomplete multibyte sequence";
248 if (!conv_silent)
249 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
250 if (allow_bad_conv)
251 goto use_as_is;
252 return (size_t)-1;
253 case E2BIG:
254 reason="No more room";
255 if (!conv_silent) {
256 if (from == CH_UNIX) {
257 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
258 charset_name(from), charset_name(to),
259 (unsigned int)srclen, (unsigned int)destlen, (const char *)src));
260 } else {
261 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
262 charset_name(from), charset_name(to),
263 (unsigned int)srclen, (unsigned int)destlen));
266 break;
267 case EILSEQ:
268 reason="Illegal multibyte sequence";
269 if (!conv_silent)
270 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
271 if (allow_bad_conv)
272 goto use_as_is;
274 return (size_t)-1;
275 default:
276 if (!conv_silent)
277 DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
278 return (size_t)-1;
280 /* smb_panic(reason); */
282 return destlen-o_len;
284 use_as_is:
287 * Conversion not supported. This is actually an error, but there are so
288 * many misconfigured iconv systems and smb.conf's out there we can't just
289 * fail. Do a very bad conversion instead.... JRA.
293 if (o_len == 0 || i_len == 0)
294 return destlen - o_len;
296 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
297 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
298 /* Can't convert from utf16 any endian to multibyte.
299 Replace with the default fail char.
301 if (i_len < 2)
302 return destlen - o_len;
303 if (i_len >= 2) {
304 *outbuf = lp_failed_convert_char();
306 outbuf++;
307 o_len--;
309 inbuf += 2;
310 i_len -= 2;
313 if (o_len == 0 || i_len == 0)
314 return destlen - o_len;
316 /* Keep trying with the next char... */
317 goto again;
319 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
320 /* Can't convert to UTF16LE - just widen by adding the
321 default fail char then zero.
323 if (o_len < 2)
324 return destlen - o_len;
326 outbuf[0] = lp_failed_convert_char();
327 outbuf[1] = '\0';
329 inbuf++;
330 i_len--;
332 outbuf += 2;
333 o_len -= 2;
335 if (o_len == 0 || i_len == 0)
336 return destlen - o_len;
338 /* Keep trying with the next char... */
339 goto again;
341 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
342 to != CH_UTF16LE && to != CH_UTF16BE) {
343 /* Failed multibyte to multibyte. Just copy the default fail char and
344 try again. */
345 outbuf[0] = lp_failed_convert_char();
347 inbuf++;
348 i_len--;
350 outbuf++;
351 o_len--;
353 if (o_len == 0 || i_len == 0)
354 return destlen - o_len;
356 /* Keep trying with the next char... */
357 goto again;
359 } else {
360 /* Keep compiler happy.... */
361 return destlen - o_len;
367 * Convert string from one encoding to another, making error checking etc
368 * Fast path version - handles ASCII first.
370 * @param src pointer to source string (multibyte or singlebyte)
371 * @param srclen length of the source string in bytes, or -1 for nul terminated.
372 * @param dest pointer to destination string (multibyte or singlebyte)
373 * @param destlen maximal length allowed for string - *NEVER* -1.
374 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
375 * @returns the number of bytes occupied in the destination
377 * Ensure the srclen contains the terminating zero.
379 * This function has been hand-tuned to provide a fast path.
380 * Don't change unless you really know what you are doing. JRA.
383 size_t convert_string(charset_t from, charset_t to,
384 void const *src, size_t srclen,
385 void *dest, size_t destlen, bool allow_bad_conv)
388 * NB. We deliberately don't do a strlen here if srclen == -1.
389 * This is very expensive over millions of calls and is taken
390 * care of in the slow path in convert_string_internal. JRA.
393 #ifdef DEVELOPER
394 SMB_ASSERT(destlen != (size_t)-1);
395 #endif
397 if (srclen == 0)
398 return 0;
400 if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
401 const unsigned char *p = (const unsigned char *)src;
402 unsigned char *q = (unsigned char *)dest;
403 size_t slen = srclen;
404 size_t dlen = destlen;
405 unsigned char lastp = '\0';
406 size_t retval = 0;
408 /* If all characters are ascii, fast path here. */
409 while (slen && dlen) {
410 if ((lastp = *p) <= 0x7f) {
411 *q++ = *p++;
412 if (slen != (size_t)-1) {
413 slen--;
415 dlen--;
416 retval++;
417 if (!lastp)
418 break;
419 } else {
420 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
421 goto general_case;
422 #else
423 size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
424 if (ret == (size_t)-1) {
425 return ret;
427 return retval + ret;
428 #endif
431 if (!dlen) {
432 /* Even if we fast path we should note if we ran out of room. */
433 if (((slen != (size_t)-1) && slen) ||
434 ((slen == (size_t)-1) && lastp)) {
435 errno = E2BIG;
438 return retval;
439 } else if (from == CH_UTF16LE && to != CH_UTF16LE) {
440 const unsigned char *p = (const unsigned char *)src;
441 unsigned char *q = (unsigned char *)dest;
442 size_t retval = 0;
443 size_t slen = srclen;
444 size_t dlen = destlen;
445 unsigned char lastp = '\0';
447 /* If all characters are ascii, fast path here. */
448 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
449 if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
450 *q++ = *p;
451 if (slen != (size_t)-1) {
452 slen -= 2;
454 p += 2;
455 dlen--;
456 retval++;
457 if (!lastp)
458 break;
459 } else {
460 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
461 goto general_case;
462 #else
463 size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
464 if (ret == (size_t)-1) {
465 return ret;
467 return retval + ret;
468 #endif
471 if (!dlen) {
472 /* Even if we fast path we should note if we ran out of room. */
473 if (((slen != (size_t)-1) && slen) ||
474 ((slen == (size_t)-1) && lastp)) {
475 errno = E2BIG;
478 return retval;
479 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
480 const unsigned char *p = (const unsigned char *)src;
481 unsigned char *q = (unsigned char *)dest;
482 size_t retval = 0;
483 size_t slen = srclen;
484 size_t dlen = destlen;
485 unsigned char lastp = '\0';
487 /* If all characters are ascii, fast path here. */
488 while (slen && (dlen >= 2)) {
489 if ((lastp = *p) <= 0x7F) {
490 *q++ = *p++;
491 *q++ = '\0';
492 if (slen != (size_t)-1) {
493 slen--;
495 dlen -= 2;
496 retval += 2;
497 if (!lastp)
498 break;
499 } else {
500 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
501 goto general_case;
502 #else
503 size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
504 if (ret == (size_t)-1) {
505 return ret;
507 return retval + ret;
508 #endif
511 if (!dlen) {
512 /* Even if we fast path we should note if we ran out of room. */
513 if (((slen != (size_t)-1) && slen) ||
514 ((slen == (size_t)-1) && lastp)) {
515 errno = E2BIG;
518 return retval;
521 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
522 general_case:
523 #endif
524 return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
528 * Convert between character sets, allocating a new buffer using talloc for the result.
530 * @param srclen length of source buffer.
531 * @param dest always set at least to NULL
532 * @parm converted_size set to the number of bytes occupied by the string in
533 * the destination on success.
534 * @note -1 is not accepted for srclen.
536 * @return true if new buffer was correctly allocated, and string was
537 * converted.
539 * Ensure the srclen contains the terminating zero.
541 * I hate the goto's in this function. It's embarressing.....
542 * There has to be a cleaner way to do this. JRA.
544 bool convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
545 void const *src, size_t srclen, void *dst,
546 size_t *converted_size, bool allow_bad_conv)
549 size_t i_len, o_len, destlen = (srclen * 3) / 2;
550 size_t retval;
551 const char *inbuf = (const char *)src;
552 char *outbuf = NULL, *ob = NULL;
553 smb_iconv_t descriptor;
554 void **dest = (void **)dst;
556 *dest = NULL;
558 if (!converted_size) {
559 errno = EINVAL;
560 return false;
563 if (src == NULL || srclen == (size_t)-1) {
564 errno = EINVAL;
565 return false;
567 if (srclen == 0) {
568 ob = talloc_strdup(ctx, "");
569 if (ob == NULL) {
570 errno = ENOMEM;
571 return false;
573 *dest = ob;
574 *converted_size = 0;
575 return true;
578 lazy_initialize_conv();
580 descriptor = conv_handles[from][to];
582 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
583 if (!conv_silent)
584 DEBUG(0,("convert_string_talloc: Conversion not supported.\n"));
585 errno = EOPNOTSUPP;
586 return false;
589 convert:
591 /* +2 is for ucs2 null termination. */
592 if ((destlen*2)+2 < destlen) {
593 /* wrapped ! abort. */
594 if (!conv_silent)
595 DEBUG(0, ("convert_string_talloc: destlen wrapped !\n"));
596 TALLOC_FREE(outbuf);
597 errno = EOPNOTSUPP;
598 return false;
599 } else {
600 destlen = destlen * 2;
603 /* +2 is for ucs2 null termination. */
604 ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
606 if (!ob) {
607 DEBUG(0, ("convert_string_talloc: realloc failed!\n"));
608 errno = ENOMEM;
609 return false;
611 outbuf = ob;
612 i_len = srclen;
613 o_len = destlen;
615 again:
617 retval = smb_iconv(descriptor,
618 &inbuf, &i_len,
619 &outbuf, &o_len);
620 if(retval == (size_t)-1) {
621 const char *reason="unknown error";
622 switch(errno) {
623 case EINVAL:
624 reason="Incomplete multibyte sequence";
625 if (!conv_silent)
626 DEBUG(3,("convert_string_talloc: Conversion error: %s(%s)\n",reason,inbuf));
627 if (allow_bad_conv)
628 goto use_as_is;
629 break;
630 case E2BIG:
631 goto convert;
632 case EILSEQ:
633 reason="Illegal multibyte sequence";
634 if (!conv_silent)
635 DEBUG(3,("convert_string_talloc: Conversion error: %s(%s)\n",reason,inbuf));
636 if (allow_bad_conv)
637 goto use_as_is;
638 break;
640 if (!conv_silent)
641 DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
642 /* smb_panic(reason); */
643 TALLOC_FREE(ob);
644 return false;
647 out:
649 destlen = destlen - o_len;
650 /* Don't shrink unless we're reclaiming a lot of
651 * space. This is in the hot codepath and these
652 * reallocs *cost*. JRA.
654 if (o_len > 1024) {
655 /* We're shrinking here so we know the +2 is safe from wrap. */
656 ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
659 if (destlen && !ob) {
660 DEBUG(0, ("convert_string_talloc: out of memory!\n"));
661 errno = ENOMEM;
662 return false;
665 *dest = ob;
667 /* Must ucs2 null terminate in the extra space we allocated. */
668 ob[destlen] = '\0';
669 ob[destlen+1] = '\0';
671 *converted_size = destlen;
672 return true;
674 use_as_is:
677 * Conversion not supported. This is actually an error, but there are so
678 * many misconfigured iconv systems and smb.conf's out there we can't just
679 * fail. Do a very bad conversion instead.... JRA.
683 if (o_len == 0 || i_len == 0)
684 goto out;
686 if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
687 ((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
688 /* Can't convert from utf16 any endian to multibyte.
689 Replace with the default fail char.
692 if (i_len < 2)
693 goto out;
695 if (i_len >= 2) {
696 *outbuf = lp_failed_convert_char();
698 outbuf++;
699 o_len--;
701 inbuf += 2;
702 i_len -= 2;
705 if (o_len == 0 || i_len == 0)
706 goto out;
708 /* Keep trying with the next char... */
709 goto again;
711 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
712 /* Can't convert to UTF16LE - just widen by adding the
713 default fail char then zero.
715 if (o_len < 2)
716 goto out;
718 outbuf[0] = lp_failed_convert_char();
719 outbuf[1] = '\0';
721 inbuf++;
722 i_len--;
724 outbuf += 2;
725 o_len -= 2;
727 if (o_len == 0 || i_len == 0)
728 goto out;
730 /* Keep trying with the next char... */
731 goto again;
733 } else if (from != CH_UTF16LE && from != CH_UTF16BE &&
734 to != CH_UTF16LE && to != CH_UTF16BE) {
735 /* Failed multibyte to multibyte. Just copy the default fail char and
736 try again. */
737 outbuf[0] = lp_failed_convert_char();
739 inbuf++;
740 i_len--;
742 outbuf++;
743 o_len--;
745 if (o_len == 0 || i_len == 0)
746 goto out;
748 /* Keep trying with the next char... */
749 goto again;
751 } else {
752 /* Keep compiler happy.... */
753 goto out;
758 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
760 size_t size;
761 smb_ucs2_t *buffer;
763 if (!push_ucs2_talloc(talloc_tos(), &buffer, src, &size)) {
764 return (size_t)-1;
767 if (!strupper_w(buffer) && (dest == src)) {
768 TALLOC_FREE(buffer);
769 return srclen;
772 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
773 TALLOC_FREE(buffer);
774 return size;
778 talloc_strdup() a unix string to upper case.
781 char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *s)
783 char *out_buffer = talloc_strdup(ctx,s);
784 const unsigned char *p = (const unsigned char *)s;
785 unsigned char *q = (unsigned char *)out_buffer;
787 if (!q) {
788 return NULL;
791 /* this is quite a common operation, so we want it to be
792 fast. We optimise for the ascii case, knowing that all our
793 supported multi-byte character sets are ascii-compatible
794 (ie. they match for the first 128 chars) */
796 while (*p) {
797 if (*p & 0x80)
798 break;
799 *q++ = toupper_ascii_fast(*p);
800 p++;
803 if (*p) {
804 /* MB case. */
805 size_t converted_size, converted_size2;
806 smb_ucs2_t *ubuf = NULL;
808 /* We're not using the ascii buffer above. */
809 TALLOC_FREE(out_buffer);
811 if (!convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, s,
812 strlen(s)+1, (void *)&ubuf,
813 &converted_size, True))
815 return NULL;
818 strupper_w(ubuf);
820 if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, ubuf,
821 converted_size, (void *)&out_buffer,
822 &converted_size2, True))
824 TALLOC_FREE(ubuf);
825 return NULL;
828 /* Don't need the intermediate buffer
829 * anymore.
831 TALLOC_FREE(ubuf);
834 return out_buffer;
837 char *strupper_talloc(TALLOC_CTX *ctx, const char *s) {
838 return talloc_strdup_upper(ctx, s);
842 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
844 size_t size;
845 smb_ucs2_t *buffer = NULL;
847 if (!convert_string_talloc(talloc_tos(), CH_UNIX, CH_UTF16LE, src, srclen,
848 (void **)(void *)&buffer, &size,
849 True))
851 smb_panic("failed to create UCS2 buffer");
853 if (!strlower_w(buffer) && (dest == src)) {
854 TALLOC_FREE(buffer);
855 return srclen;
857 size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
858 TALLOC_FREE(buffer);
859 return size;
863 char *talloc_strdup_lower(TALLOC_CTX *ctx, const char *s)
865 size_t converted_size;
866 smb_ucs2_t *buffer = NULL;
867 char *out_buffer;
869 if (!push_ucs2_talloc(ctx, &buffer, s, &converted_size)) {
870 return NULL;
873 strlower_w(buffer);
875 if (!pull_ucs2_talloc(ctx, &out_buffer, buffer, &converted_size)) {
876 TALLOC_FREE(buffer);
877 return NULL;
880 TALLOC_FREE(buffer);
882 return out_buffer;
885 char *strlower_talloc(TALLOC_CTX *ctx, const char *s) {
886 return talloc_strdup_lower(ctx, s);
889 size_t ucs2_align(const void *base_ptr, const void *p, int flags)
891 if (flags & (STR_NOALIGN|STR_ASCII))
892 return 0;
893 return PTR_DIFF(p, base_ptr) & 1;
898 * Copy a string from a char* unix src to a dos codepage string destination.
900 * @return the number of bytes occupied by the string in the destination.
902 * @param flags can include
903 * <dl>
904 * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
905 * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
906 * </dl>
908 * @param dest_len the maximum length in bytes allowed in the
909 * destination.
911 size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
913 size_t src_len = strlen(src);
914 char *tmpbuf = NULL;
915 size_t ret;
917 /* No longer allow a length of -1. */
918 if (dest_len == (size_t)-1) {
919 smb_panic("push_ascii - dest_len == -1");
922 if (flags & STR_UPPER) {
923 tmpbuf = SMB_STRDUP(src);
924 if (!tmpbuf) {
925 smb_panic("malloc fail");
927 strupper_m(tmpbuf);
928 src = tmpbuf;
931 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
932 src_len++;
935 ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
936 if (ret == (size_t)-1 &&
937 (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
938 && dest_len > 0) {
939 ((char *)dest)[0] = '\0';
941 SAFE_FREE(tmpbuf);
942 return ret;
945 size_t push_ascii_fstring(void *dest, const char *src)
947 return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
950 /********************************************************************
951 Push an nstring - ensure null terminated. Written by
952 moriyama@miraclelinux.com (MORIYAMA Masayuki).
953 ********************************************************************/
955 size_t push_ascii_nstring(void *dest, const char *src)
957 size_t i, buffer_len, dest_len;
958 smb_ucs2_t *buffer;
960 conv_silent = True;
961 if (!push_ucs2_talloc(talloc_tos(), &buffer, src, &buffer_len)) {
962 smb_panic("failed to create UCS2 buffer");
965 /* We're using buffer_len below to count ucs2 characters, not bytes. */
966 buffer_len /= sizeof(smb_ucs2_t);
968 dest_len = 0;
969 for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
970 unsigned char mb[10];
971 /* Convert one smb_ucs2_t character at a time. */
972 size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
973 if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
974 memcpy((char *)dest + dest_len, mb, mb_len);
975 dest_len += mb_len;
976 } else {
977 errno = E2BIG;
978 break;
981 ((char *)dest)[dest_len] = '\0';
983 conv_silent = False;
984 TALLOC_FREE(buffer);
985 return dest_len;
988 /********************************************************************
989 Push and malloc an ascii string. src and dest null terminated.
990 ********************************************************************/
992 bool push_ascii_talloc(TALLOC_CTX *mem_ctx, char **dest, const char *src, size_t *converted_size)
994 size_t src_len = strlen(src)+1;
996 *dest = NULL;
997 return convert_string_talloc(mem_ctx, CH_UNIX, CH_DOS, src, src_len,
998 (void **)dest, converted_size, True);
1002 * Copy a string from a dos codepage source to a unix char* destination.
1004 * The resulting string in "dest" is always null terminated.
1006 * @param flags can have:
1007 * <dl>
1008 * <dt>STR_TERMINATE</dt>
1009 * <dd>STR_TERMINATE means the string in @p src
1010 * is null terminated, and src_len is ignored.</dd>
1011 * </dl>
1013 * @param src_len is the length of the source area in bytes.
1014 * @returns the number of bytes occupied by the string in @p src.
1016 size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1018 size_t ret;
1020 if (dest_len == (size_t)-1) {
1021 /* No longer allow dest_len of -1. */
1022 smb_panic("pull_ascii - invalid dest_len of -1");
1025 if (flags & STR_TERMINATE) {
1026 if (src_len == (size_t)-1) {
1027 src_len = strlen((const char *)src) + 1;
1028 } else {
1029 size_t len = strnlen((const char *)src, src_len);
1030 if (len < src_len)
1031 len++;
1032 src_len = len;
1036 ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
1037 if (ret == (size_t)-1) {
1038 ret = 0;
1039 dest_len = 0;
1042 if (dest_len && ret) {
1043 /* Did we already process the terminating zero ? */
1044 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1045 dest[MIN(ret, dest_len-1)] = 0;
1047 } else {
1048 dest[0] = 0;
1051 return src_len;
1055 * Copy a string from a dos codepage source to a unix char* destination.
1056 * Talloc version.
1058 * The resulting string in "dest" is always null terminated.
1060 * @param flags can have:
1061 * <dl>
1062 * <dt>STR_TERMINATE</dt>
1063 * <dd>STR_TERMINATE means the string in @p src
1064 * is null terminated, and src_len is ignored.</dd>
1065 * </dl>
1067 * @param src_len is the length of the source area in bytes.
1068 * @returns the number of bytes occupied by the string in @p src.
1071 static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
1072 char **ppdest,
1073 const void *src,
1074 size_t src_len,
1075 int flags)
1077 char *dest = NULL;
1078 size_t dest_len;
1080 *ppdest = NULL;
1082 if (!src_len) {
1083 return 0;
1086 if (flags & STR_TERMINATE) {
1087 if (src_len == (size_t)-1) {
1088 src_len = strlen((const char *)src) + 1;
1089 } else {
1090 size_t len = strnlen((const char *)src, src_len);
1091 if (len < src_len)
1092 len++;
1093 src_len = len;
1095 /* Ensure we don't use an insane length from the client. */
1096 if (src_len >= 1024*1024) {
1097 char *msg = talloc_asprintf(ctx,
1098 "Bad src length (%u) in "
1099 "pull_ascii_base_talloc",
1100 (unsigned int)src_len);
1101 smb_panic(msg);
1103 } else {
1104 /* Can't have an unlimited length
1105 * non STR_TERMINATE'd.
1107 if (src_len == (size_t)-1) {
1108 errno = EINVAL;
1109 return 0;
1113 /* src_len != -1 here. */
1115 if (!convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, &dest,
1116 &dest_len, True)) {
1117 dest_len = 0;
1120 if (dest_len && dest) {
1121 /* Did we already process the terminating zero ? */
1122 if (dest[dest_len-1] != 0) {
1123 size_t size = talloc_get_size(dest);
1124 /* Have we got space to append the '\0' ? */
1125 if (size <= dest_len) {
1126 /* No, realloc. */
1127 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1128 dest_len+1);
1129 if (!dest) {
1130 /* talloc fail. */
1131 dest_len = (size_t)-1;
1132 return 0;
1135 /* Yay - space ! */
1136 dest[dest_len] = '\0';
1137 dest_len++;
1139 } else if (dest) {
1140 dest[0] = 0;
1143 *ppdest = dest;
1144 return src_len;
1147 size_t pull_ascii_fstring(char *dest, const void *src)
1149 return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1152 /* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1154 size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src)
1156 return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1160 * Copy a string from a char* src to a unicode destination.
1162 * @returns the number of bytes occupied by the string in the destination.
1164 * @param flags can have:
1166 * <dl>
1167 * <dt>STR_TERMINATE <dd>means include the null termination.
1168 * <dt>STR_UPPER <dd>means uppercase in the destination.
1169 * <dt>STR_NOALIGN <dd>means don't do alignment.
1170 * </dl>
1172 * @param dest_len is the maximum length allowed in the
1173 * destination.
1176 size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1178 size_t len=0;
1179 size_t src_len;
1180 size_t ret;
1182 if (dest_len == (size_t)-1) {
1183 /* No longer allow dest_len of -1. */
1184 smb_panic("push_ucs2 - invalid dest_len of -1");
1187 if (flags & STR_TERMINATE)
1188 src_len = (size_t)-1;
1189 else
1190 src_len = strlen(src);
1192 if (ucs2_align(base_ptr, dest, flags)) {
1193 *(char *)dest = 0;
1194 dest = (void *)((char *)dest + 1);
1195 if (dest_len)
1196 dest_len--;
1197 len++;
1200 /* ucs2 is always a multiple of 2 bytes */
1201 dest_len &= ~1;
1203 ret = convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1204 if (ret == (size_t)-1) {
1205 if ((flags & STR_TERMINATE) &&
1206 dest &&
1207 dest_len) {
1208 *(char *)dest = 0;
1210 return len;
1213 len += ret;
1215 if (flags & STR_UPPER) {
1216 smb_ucs2_t *dest_ucs2 = (smb_ucs2_t *)dest;
1217 size_t i;
1219 /* We check for i < (ret / 2) below as the dest string isn't null
1220 terminated if STR_TERMINATE isn't set. */
1222 for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1223 smb_ucs2_t v = toupper_m(dest_ucs2[i]);
1224 if (v != dest_ucs2[i]) {
1225 dest_ucs2[i] = v;
1230 return len;
1235 * Copy a string from a unix char* src to a UCS2 destination,
1236 * allocating a buffer using talloc().
1238 * @param dest always set at least to NULL
1239 * @parm converted_size set to the number of bytes occupied by the string in
1240 * the destination on success.
1242 * @return true if new buffer was correctly allocated, and string was
1243 * converted.
1245 bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src,
1246 size_t *converted_size)
1248 size_t src_len = strlen(src)+1;
1250 *dest = NULL;
1251 return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len,
1252 (void **)dest, converted_size, True);
1257 Copy a string from a char* src to a UTF-8 destination.
1258 Return the number of bytes occupied by the string in the destination
1259 Flags can have:
1260 STR_TERMINATE means include the null termination
1261 STR_UPPER means uppercase in the destination
1262 dest_len is the maximum length allowed in the destination. If dest_len
1263 is -1 then no maxiumum is used.
1266 static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
1268 size_t src_len = 0;
1269 size_t ret;
1270 char *tmpbuf = NULL;
1272 if (dest_len == (size_t)-1) {
1273 /* No longer allow dest_len of -1. */
1274 smb_panic("push_utf8 - invalid dest_len of -1");
1277 if (flags & STR_UPPER) {
1278 tmpbuf = strupper_talloc(talloc_tos(), src);
1279 if (!tmpbuf) {
1280 return (size_t)-1;
1282 src = tmpbuf;
1283 src_len = strlen(src);
1286 src_len = strlen(src);
1287 if (flags & STR_TERMINATE) {
1288 src_len++;
1291 ret = convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1292 TALLOC_FREE(tmpbuf);
1293 return ret;
1296 size_t push_utf8_fstring(void *dest, const char *src)
1298 return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1302 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1304 * @param dest always set at least to NULL
1305 * @parm converted_size set to the number of bytes occupied by the string in
1306 * the destination on success.
1308 * @return true if new buffer was correctly allocated, and string was
1309 * converted.
1312 bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1313 size_t *converted_size)
1315 size_t src_len = strlen(src)+1;
1317 *dest = NULL;
1318 return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len,
1319 (void**)dest, converted_size, True);
1323 Copy a string from a ucs2 source to a unix char* destination.
1324 Flags can have:
1325 STR_TERMINATE means the string in src is null terminated.
1326 STR_NOALIGN means don't try to align.
1327 if STR_TERMINATE is set then src_len is ignored if it is -1.
1328 src_len is the length of the source area in bytes
1329 Return the number of bytes occupied by the string in src.
1330 The resulting string in "dest" is always null terminated.
1333 size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1335 size_t ret;
1337 if (dest_len == (size_t)-1) {
1338 /* No longer allow dest_len of -1. */
1339 smb_panic("pull_ucs2 - invalid dest_len of -1");
1342 if (!src_len) {
1343 if (dest && dest_len > 0) {
1344 dest[0] = '\0';
1346 return 0;
1349 if (ucs2_align(base_ptr, src, flags)) {
1350 src = (const void *)((const char *)src + 1);
1351 if (src_len != (size_t)-1)
1352 src_len--;
1355 if (flags & STR_TERMINATE) {
1356 /* src_len -1 is the default for null terminated strings. */
1357 if (src_len != (size_t)-1) {
1358 size_t len = strnlen_w((const smb_ucs2_t *)src,
1359 src_len/2);
1360 if (len < src_len/2)
1361 len++;
1362 src_len = len*2;
1366 /* ucs2 is always a multiple of 2 bytes */
1367 if (src_len != (size_t)-1)
1368 src_len &= ~1;
1370 ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1371 if (ret == (size_t)-1) {
1372 ret = 0;
1373 dest_len = 0;
1376 if (src_len == (size_t)-1)
1377 src_len = ret*2;
1379 if (dest_len && ret) {
1380 /* Did we already process the terminating zero ? */
1381 if (dest[MIN(ret-1, dest_len-1)] != 0) {
1382 dest[MIN(ret, dest_len-1)] = 0;
1384 } else {
1385 dest[0] = 0;
1388 return src_len;
1392 Copy a string from a ucs2 source to a unix char* destination.
1393 Talloc version with a base pointer.
1394 Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1395 needs fixing. JRA).
1396 Flags can have:
1397 STR_TERMINATE means the string in src is null terminated.
1398 STR_NOALIGN means don't try to align.
1399 if STR_TERMINATE is set then src_len is ignored if it is -1.
1400 src_len is the length of the source area in bytes
1401 Return the number of bytes occupied by the string in src.
1402 The resulting string in "dest" is always null terminated.
1405 size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
1406 const void *base_ptr,
1407 char **ppdest,
1408 const void *src,
1409 size_t src_len,
1410 int flags)
1412 char *dest;
1413 size_t dest_len;
1415 *ppdest = NULL;
1417 #ifdef DEVELOPER
1418 /* Ensure we never use the braindead "malloc" varient. */
1419 if (ctx == NULL) {
1420 smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1422 #endif
1424 if (!src_len) {
1425 return 0;
1428 if (ucs2_align(base_ptr, src, flags)) {
1429 src = (const void *)((const char *)src + 1);
1430 if (src_len != (size_t)-1)
1431 src_len--;
1434 if (flags & STR_TERMINATE) {
1435 /* src_len -1 is the default for null terminated strings. */
1436 if (src_len != (size_t)-1) {
1437 size_t len = strnlen_w((const smb_ucs2_t *)src,
1438 src_len/2);
1439 if (len < src_len/2)
1440 len++;
1441 src_len = len*2;
1442 } else {
1444 * src_len == -1 - alloc interface won't take this
1445 * so we must calculate.
1447 src_len = (strlen_w((const smb_ucs2_t *)src)+1)*sizeof(smb_ucs2_t);
1449 /* Ensure we don't use an insane length from the client. */
1450 if (src_len >= 1024*1024) {
1451 smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1453 } else {
1454 /* Can't have an unlimited length
1455 * non STR_TERMINATE'd.
1457 if (src_len == (size_t)-1) {
1458 errno = EINVAL;
1459 return 0;
1463 /* src_len != -1 here. */
1465 /* ucs2 is always a multiple of 2 bytes */
1466 src_len &= ~1;
1468 if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1469 (void *)&dest, &dest_len, True)) {
1470 dest_len = 0;
1473 if (dest_len) {
1474 /* Did we already process the terminating zero ? */
1475 if (dest[dest_len-1] != 0) {
1476 size_t size = talloc_get_size(dest);
1477 /* Have we got space to append the '\0' ? */
1478 if (size <= dest_len) {
1479 /* No, realloc. */
1480 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1481 dest_len+1);
1482 if (!dest) {
1483 /* talloc fail. */
1484 dest_len = (size_t)-1;
1485 return 0;
1488 /* Yay - space ! */
1489 dest[dest_len] = '\0';
1490 dest_len++;
1492 } else if (dest) {
1493 dest[0] = 0;
1496 *ppdest = dest;
1497 return src_len;
1500 size_t pull_ucs2_fstring(char *dest, const void *src)
1502 return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1506 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1508 * @param dest always set at least to NULL
1509 * @parm converted_size set to the number of bytes occupied by the string in
1510 * the destination on success.
1512 * @return true if new buffer was correctly allocated, and string was
1513 * converted.
1516 bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src,
1517 size_t *converted_size)
1519 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1521 *dest = NULL;
1522 return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1523 (void **)dest, converted_size, True);
1527 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1529 * @param dest always set at least to NULL
1530 * @parm converted_size set to the number of bytes occupied by the string in
1531 * the destination on success.
1533 * @return true if new buffer was correctly allocated, and string was
1534 * converted.
1537 bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1538 size_t *converted_size)
1540 size_t src_len = strlen(src)+1;
1542 *dest = NULL;
1543 return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len,
1544 (void **)dest, converted_size, True);
1549 * Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1551 * @param dest always set at least to NULL
1552 * @parm converted_size set to the number of bytes occupied by the string in
1553 * the destination on success.
1555 * @return true if new buffer was correctly allocated, and string was
1556 * converted.
1559 bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1560 size_t *converted_size)
1562 size_t src_len = strlen(src)+1;
1564 *dest = NULL;
1565 return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len,
1566 (void **)dest, converted_size, True);
1570 Copy a string from a char* src to a unicode or ascii
1571 dos codepage destination choosing unicode or ascii based on the
1572 flags supplied
1573 Return the number of bytes occupied by the string in the destination.
1574 flags can have:
1575 STR_TERMINATE means include the null termination.
1576 STR_UPPER means uppercase in the destination.
1577 STR_ASCII use ascii even with unicode packet.
1578 STR_NOALIGN means don't do alignment.
1579 dest_len is the maximum length allowed in the destination. If dest_len
1580 is -1 then no maxiumum is used.
1583 size_t push_string_check_fn(const char *function, unsigned int line,
1584 void *dest, const char *src,
1585 size_t dest_len, int flags)
1587 #ifdef DEVELOPER
1588 /* We really need to zero fill here, not clobber
1589 * region, as we want to ensure that valgrind thinks
1590 * all of the outgoing buffer has been written to
1591 * so a send() or write() won't trap an error.
1592 * JRA.
1594 #if 0
1595 clobber_region(function, line, dest, dest_len);
1596 #else
1597 memset(dest, '\0', dest_len);
1598 #endif
1599 #endif
1601 if (!(flags & STR_ASCII) && (flags & STR_UNICODE)) {
1602 return push_ucs2(NULL, dest, src, dest_len, flags);
1604 return push_ascii(dest, src, dest_len, flags);
1609 Copy a string from a char* src to a unicode or ascii
1610 dos codepage destination choosing unicode or ascii based on the
1611 flags in the SMB buffer starting at base_ptr.
1612 Return the number of bytes occupied by the string in the destination.
1613 flags can have:
1614 STR_TERMINATE means include the null termination.
1615 STR_UPPER means uppercase in the destination.
1616 STR_ASCII use ascii even with unicode packet.
1617 STR_NOALIGN means don't do alignment.
1618 dest_len is the maximum length allowed in the destination. If dest_len
1619 is -1 then no maxiumum is used.
1622 size_t push_string_base(const char *function, unsigned int line,
1623 const char *base, uint16 flags2,
1624 void *dest, const char *src,
1625 size_t dest_len, int flags)
1627 #ifdef DEVELOPER
1628 /* We really need to zero fill here, not clobber
1629 * region, as we want to ensure that valgrind thinks
1630 * all of the outgoing buffer has been written to
1631 * so a send() or write() won't trap an error.
1632 * JRA.
1634 #if 0
1635 clobber_region(function, line, dest, dest_len);
1636 #else
1637 memset(dest, '\0', dest_len);
1638 #endif
1639 #endif
1641 if (!(flags & STR_ASCII) && \
1642 ((flags & STR_UNICODE || \
1643 (flags2 & FLAGS2_UNICODE_STRINGS)))) {
1644 return push_ucs2(base, dest, src, dest_len, flags);
1646 return push_ascii(dest, src, dest_len, flags);
1650 Copy a string from a char* src to a unicode or ascii
1651 dos codepage destination choosing unicode or ascii based on the
1652 flags supplied
1653 Return the number of bytes occupied by the string in the destination.
1654 flags can have:
1655 STR_TERMINATE means include the null termination.
1656 STR_UPPER means uppercase in the destination.
1657 STR_ASCII use ascii even with unicode packet.
1658 STR_NOALIGN means don't do alignment.
1659 dest_len is the maximum length allowed in the destination. If dest_len
1660 is -1 then no maxiumum is used.
1663 ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags)
1665 size_t ret;
1666 #ifdef DEVELOPER
1667 /* We really need to zero fill here, not clobber
1668 * region, as we want to ensure that valgrind thinks
1669 * all of the outgoing buffer has been written to
1670 * so a send() or write() won't trap an error.
1671 * JRA.
1673 memset(dest, '\0', dest_len);
1674 #endif
1676 if (!(flags & STR_ASCII) && \
1677 (flags & STR_UNICODE)) {
1678 ret = push_ucs2(NULL, dest, src, dest_len, flags);
1679 } else {
1680 ret = push_ascii(dest, src, dest_len, flags);
1682 if (ret == (size_t)-1) {
1683 return -1;
1685 return ret;
1689 Copy a string from a unicode or ascii source (depending on
1690 the packet flags) to a char* destination.
1691 Flags can have:
1692 STR_TERMINATE means the string in src is null terminated.
1693 STR_UNICODE means to force as unicode.
1694 STR_ASCII use ascii even with unicode packet.
1695 STR_NOALIGN means don't do alignment.
1696 if STR_TERMINATE is set then src_len is ignored is it is -1
1697 src_len is the length of the source area in bytes.
1698 Return the number of bytes occupied by the string in src.
1699 The resulting string in "dest" is always null terminated.
1702 size_t pull_string_fn(const char *function,
1703 unsigned int line,
1704 const void *base_ptr,
1705 uint16 smb_flags2,
1706 char *dest,
1707 const void *src,
1708 size_t dest_len,
1709 size_t src_len,
1710 int flags)
1712 #ifdef DEVELOPER
1713 clobber_region(function, line, dest, dest_len);
1714 #endif
1716 if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1717 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1718 "UNICODE defined");
1721 if (!(flags & STR_ASCII) && \
1722 ((flags & STR_UNICODE || \
1723 (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1724 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1726 return pull_ascii(dest, src, dest_len, src_len, flags);
1730 Copy a string from a unicode or ascii source (depending on
1731 the packet flags) to a char* destination.
1732 Variant that uses talloc.
1733 Flags can have:
1734 STR_TERMINATE means the string in src is null terminated.
1735 STR_UNICODE means to force as unicode.
1736 STR_ASCII use ascii even with unicode packet.
1737 STR_NOALIGN means don't do alignment.
1738 if STR_TERMINATE is set then src_len is ignored is it is -1
1739 src_len is the length of the source area in bytes.
1740 Return the number of bytes occupied by the string in src.
1741 The resulting string in "dest" is always null terminated.
1744 size_t pull_string_talloc_fn(const char *function,
1745 unsigned int line,
1746 TALLOC_CTX *ctx,
1747 const void *base_ptr,
1748 uint16 smb_flags2,
1749 char **ppdest,
1750 const void *src,
1751 size_t src_len,
1752 int flags)
1754 if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1755 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1756 "UNICODE defined");
1759 if (!(flags & STR_ASCII) && \
1760 ((flags & STR_UNICODE || \
1761 (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1762 return pull_ucs2_base_talloc(ctx,
1763 base_ptr,
1764 ppdest,
1765 src,
1766 src_len,
1767 flags);
1769 return pull_ascii_base_talloc(ctx,
1770 ppdest,
1771 src,
1772 src_len,
1773 flags);
1777 size_t align_string(const void *base_ptr, const char *p, int flags)
1779 if (!(flags & STR_ASCII) && \
1780 ((flags & STR_UNICODE || \
1781 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1782 return ucs2_align(base_ptr, p, flags);
1784 return 0;
1788 * Return the unicode codepoint for the next character in the input
1789 * string in the given src_charset.
1790 * The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1792 * Also return the number of bytes consumed (which tells the caller
1793 * how many bytes to skip to get to the next src_charset-character).
1795 * This is implemented (in the non-ascii-case) by first converting the
1796 * next character in the input string to UTF16_LE and then calculating
1797 * the unicode codepoint from that.
1799 * Return INVALID_CODEPOINT if the next character cannot be converted.
1802 codepoint_t next_codepoint_ext(const char *str, charset_t src_charset,
1803 size_t *bytes_consumed)
1805 /* It cannot occupy more than 4 bytes in UTF16 format */
1806 uint8_t buf[4];
1807 smb_iconv_t descriptor;
1808 size_t ilen_orig;
1809 size_t ilen;
1810 size_t olen;
1811 char *outbuf;
1813 /* fastpath if the character is ASCII */
1814 if ((str[0] & 0x80) == 0) {
1815 *bytes_consumed = 1;
1816 return (codepoint_t)str[0];
1820 * We assume that no multi-byte character can take more than
1821 * 5 bytes. This is OK as we only support codepoints up to 1M (U+100000)
1824 ilen_orig = strnlen(str, 5);
1825 ilen = ilen_orig;
1827 lazy_initialize_conv();
1829 descriptor = conv_handles[src_charset][CH_UTF16LE];
1830 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
1831 *bytes_consumed = 1;
1832 return INVALID_CODEPOINT;
1836 * This looks a little strange, but it is needed to cope
1837 * with codepoints above 64k (U+10000) which are encoded as per RFC2781.
1839 olen = 2;
1840 outbuf = (char *)buf;
1841 smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1842 if (olen == 2) {
1844 * We failed to convert to a 2 byte character.
1845 * See if we can convert to a 4 UTF16-LE byte char encoding.
1847 olen = 4;
1848 outbuf = (char *)buf;
1849 smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1850 if (olen == 4) {
1851 /* We didn't convert any bytes */
1852 *bytes_consumed = 1;
1853 return INVALID_CODEPOINT;
1855 olen = 4 - olen;
1856 } else {
1857 olen = 2 - olen;
1860 *bytes_consumed = ilen_orig - ilen;
1862 if (olen == 2) {
1863 /* 2 byte, UTF16-LE encoded value. */
1864 return (codepoint_t)SVAL(buf, 0);
1866 if (olen == 4) {
1868 * Decode a 4 byte UTF16-LE character manually.
1869 * See RFC2871 for the encoding machanism.
1871 codepoint_t w1 = SVAL(buf,0) & ~0xD800;
1872 codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
1874 return (codepoint_t)0x10000 +
1875 (w1 << 10) + w2;
1878 /* no other length is valid */
1879 return INVALID_CODEPOINT;
1883 Return the unicode codepoint for the next multi-byte CH_UNIX character
1884 in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1886 Also return the number of bytes consumed (which tells the caller
1887 how many bytes to skip to get to the next CH_UNIX character).
1889 Return INVALID_CODEPOINT if the next character cannot be converted.
1892 codepoint_t next_codepoint(const char *str, size_t *size)
1894 return next_codepoint_ext(str, CH_UNIX, size);
1898 push a single codepoint into a CH_UNIX string the target string must
1899 be able to hold the full character, which is guaranteed if it is at
1900 least 5 bytes in size. The caller may pass less than 5 bytes if they
1901 are sure the character will fit (for example, you can assume that
1902 uppercase/lowercase of a character will not add more than 1 byte)
1904 return the number of bytes occupied by the CH_UNIX character, or
1905 -1 on failure
1907 _PUBLIC_ ssize_t push_codepoint(char *str, codepoint_t c)
1909 smb_iconv_t descriptor;
1910 uint8_t buf[4];
1911 size_t ilen, olen;
1912 const char *inbuf;
1914 if (c < 128) {
1915 *str = c;
1916 return 1;
1919 lazy_initialize_conv();
1921 descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1922 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
1923 return -1;
1926 if (c < 0x10000) {
1927 ilen = 2;
1928 olen = 5;
1929 inbuf = (char *)buf;
1930 SSVAL(buf, 0, c);
1931 smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);
1932 if (ilen != 0) {
1933 return -1;
1935 return 5 - olen;
1938 c -= 0x10000;
1940 buf[0] = (c>>10) & 0xFF;
1941 buf[1] = (c>>18) | 0xd8;
1942 buf[2] = c & 0xFF;
1943 buf[3] = ((c>>8) & 0x3) | 0xdc;
1945 ilen = 4;
1946 olen = 5;
1947 inbuf = (char *)buf;
1949 smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);
1950 if (ilen != 0) {
1951 return -1;
1953 return 5 - olen;