s3-pdb_ipa: Add ipasam_create_dom_group()
[Samba/vl.git] / source3 / lib / charcnv.c
blob3fd574b98539bc53ecfd1ab512d3e8df7bc85689
1 /*
2 Unix SMB/CIFS implementation.
3 Character set conversion Extensions
4 Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5 Copyright (C) Andrew Tridgell 2001
6 Copyright (C) Simo Sorce 2001
7 Copyright (C) Martin Pool 2003
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
23 #include "includes.h"
25 /**
26 * @file
28 * @brief Character-set conversion routines built on our iconv.
30 * @note Samba's internal character set (at least in the 3.0 series)
31 * is always the same as the one for the Unix filesystem. It is
32 * <b>not</b> necessarily UTF-8 and may be different on machines that
33 * need i18n filenames to be compatible with Unix software. It does
34 * have to be a superset of ASCII. All multibyte sequences must start
35 * with a byte with the high bit set.
37 * @sa lib/iconv.c
41 static bool initialized;
43 static void lazy_initialize_conv(void)
45 if (!initialized) {
46 load_case_tables_library();
47 init_iconv();
48 initialized = true;
52 /**
53 * Destroy global objects allocated by init_iconv()
54 **/
55 void gfree_charcnv(void)
57 TALLOC_FREE(global_iconv_handle);
58 initialized = false;
61 /**
62 * Initialize iconv conversion descriptors.
64 * This is called the first time it is needed, and also called again
65 * every time the configuration is reloaded, because the charset or
66 * codepage might have changed.
67 **/
68 void init_iconv(void)
70 global_iconv_handle = smb_iconv_handle_reinit(NULL, lp_dos_charset(),
71 lp_unix_charset(), lp_display_charset(),
72 true, global_iconv_handle);
75 /**
76 * Convert string from one encoding to another, making error checking etc
77 * Slow path version - uses (slow) iconv.
79 * @param src pointer to source string (multibyte or singlebyte)
80 * @param srclen length of the source string in bytes
81 * @param dest pointer to destination string (multibyte or singlebyte)
82 * @param destlen maximal length allowed for string
83 * @param converted size is the number of bytes occupied in the destination
85 * @returns false and sets errno on fail, true on success.
87 * Ensure the srclen contains the terminating zero.
89 **/
91 static bool convert_string_internal(charset_t from, charset_t to,
92 void const *src, size_t srclen,
93 void *dest, size_t destlen, size_t *converted_size)
95 size_t i_len, o_len;
96 size_t retval;
97 const char* inbuf = (const char*)src;
98 char* outbuf = (char*)dest;
99 smb_iconv_t descriptor;
100 struct smb_iconv_handle *ic;
102 lazy_initialize_conv();
103 ic = get_iconv_handle();
104 descriptor = get_conv_handle(ic, from, to);
106 if (srclen == (size_t)-1) {
107 if (from == CH_UTF16LE || from == CH_UTF16BE) {
108 srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
109 } else {
110 srclen = strlen((const char *)src)+1;
115 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
116 errno = EINVAL;
117 return false;
120 i_len=srclen;
121 o_len=destlen;
123 retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
124 if (retval == (size_t)-1) {
125 return false;
127 *converted_size = destlen-o_len;
128 return true;
132 * Convert string from one encoding to another, making error checking etc
133 * Fast path version - handles ASCII first.
135 * @param src pointer to source string (multibyte or singlebyte)
136 * @param srclen length of the source string in bytes, or -1 for nul terminated.
137 * @param dest pointer to destination string (multibyte or singlebyte)
138 * @param destlen maximal length allowed for string - *NEVER* -1.
139 * @param converted size is the number of bytes occupied in the destination
141 * @returns false and sets errno on fail, true on success.
143 * Ensure the srclen contains the terminating zero.
145 * This function has been hand-tuned to provide a fast path.
146 * Don't change unless you really know what you are doing. JRA.
149 bool convert_string_error(charset_t from, charset_t to,
150 void const *src, size_t srclen,
151 void *dest, size_t destlen,
152 size_t *converted_size)
155 * NB. We deliberately don't do a strlen here if srclen == -1.
156 * This is very expensive over millions of calls and is taken
157 * care of in the slow path in convert_string_internal. JRA.
160 #ifdef DEVELOPER
161 SMB_ASSERT(destlen != (size_t)-1);
162 #endif
164 if (srclen == 0) {
165 *converted_size = 0;
166 return true;
169 if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
170 const unsigned char *p = (const unsigned char *)src;
171 unsigned char *q = (unsigned char *)dest;
172 size_t slen = srclen;
173 size_t dlen = destlen;
174 unsigned char lastp = '\0';
175 size_t retval = 0;
177 /* If all characters are ascii, fast path here. */
178 while (slen && dlen) {
179 if ((lastp = *p) <= 0x7f) {
180 *q++ = *p++;
181 if (slen != (size_t)-1) {
182 slen--;
184 dlen--;
185 retval++;
186 if (!lastp)
187 break;
188 } else {
189 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
190 goto general_case;
191 #else
192 bool ret = convert_string_internal(from, to, p, slen, q, dlen, converted_size);
193 *converted_size += retval;
194 return ret;
195 #endif
199 *converted_size = retval;
201 if (!dlen) {
202 /* Even if we fast path we should note if we ran out of room. */
203 if (((slen != (size_t)-1) && slen) ||
204 ((slen == (size_t)-1) && lastp)) {
205 errno = E2BIG;
206 return false;
209 return true;
210 } else if (from == CH_UTF16LE && to != CH_UTF16LE) {
211 const unsigned char *p = (const unsigned char *)src;
212 unsigned char *q = (unsigned char *)dest;
213 size_t retval = 0;
214 size_t slen = srclen;
215 size_t dlen = destlen;
216 unsigned char lastp = '\0';
218 /* If all characters are ascii, fast path here. */
219 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
220 if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
221 *q++ = *p;
222 if (slen != (size_t)-1) {
223 slen -= 2;
225 p += 2;
226 dlen--;
227 retval++;
228 if (!lastp)
229 break;
230 } else {
231 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
232 goto general_case;
233 #else
234 bool ret = convert_string_internal(from, to, p, slen, q, dlen, converted_size);
235 *converted_size += retval;
236 return ret;
237 #endif
241 *converted_size = retval;
243 if (!dlen) {
244 /* Even if we fast path we should note if we ran out of room. */
245 if (((slen != (size_t)-1) && slen) ||
246 ((slen == (size_t)-1) && lastp)) {
247 errno = E2BIG;
248 return false;
251 return true;
252 } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
253 const unsigned char *p = (const unsigned char *)src;
254 unsigned char *q = (unsigned char *)dest;
255 size_t retval = 0;
256 size_t slen = srclen;
257 size_t dlen = destlen;
258 unsigned char lastp = '\0';
260 /* If all characters are ascii, fast path here. */
261 while (slen && (dlen >= 2)) {
262 if ((lastp = *p) <= 0x7F) {
263 *q++ = *p++;
264 *q++ = '\0';
265 if (slen != (size_t)-1) {
266 slen--;
268 dlen -= 2;
269 retval += 2;
270 if (!lastp)
271 break;
272 } else {
273 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
274 goto general_case;
275 #else
276 bool ret = convert_string_internal(from, to, p, slen, q, dlen, converted_size);
277 *converted_size += retval;
278 return ret;
279 #endif
283 *converted_size = retval;
285 if (!dlen) {
286 /* Even if we fast path we should note if we ran out of room. */
287 if (((slen != (size_t)-1) && slen) ||
288 ((slen == (size_t)-1) && lastp)) {
289 errno = E2BIG;
290 return false;
293 return true;
296 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
297 general_case:
298 #endif
299 return convert_string_internal(from, to, src, srclen, dest, destlen, converted_size);
302 bool convert_string(charset_t from, charset_t to,
303 void const *src, size_t srclen,
304 void *dest, size_t destlen,
305 size_t *converted_size)
307 bool ret = convert_string_error(from, to, src, srclen, dest, destlen, converted_size);
309 if(ret==false) {
310 const char *reason="unknown error";
311 switch(errno) {
312 case EINVAL:
313 reason="Incomplete multibyte sequence";
314 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",
315 reason, (const char *)src));
316 break;
317 case E2BIG:
319 struct smb_iconv_handle *ic;
320 lazy_initialize_conv();
321 ic = get_iconv_handle();
323 reason="No more room";
324 if (from == CH_UNIX) {
325 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
326 charset_name(ic, from), charset_name(ic, to),
327 (unsigned int)srclen, (unsigned int)destlen, (const char *)src));
328 } else {
329 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
330 charset_name(ic, from), charset_name(ic, to),
331 (unsigned int)srclen, (unsigned int)destlen));
333 break;
335 case EILSEQ:
336 reason="Illegal multibyte sequence";
337 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",
338 reason, (const char *)src));
339 break;
340 default:
341 DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",
342 reason, (const char *)src));
343 break;
345 /* smb_panic(reason); */
347 return ret;
352 * Convert between character sets, allocating a new buffer using talloc for the result.
354 * @param srclen length of source buffer.
355 * @param dest always set at least to NULL
356 * @parm converted_size set to the number of bytes occupied by the string in
357 * the destination on success.
358 * @note -1 is not accepted for srclen.
360 * @return true if new buffer was correctly allocated, and string was
361 * converted.
363 * Ensure the srclen contains the terminating zero.
365 * I hate the goto's in this function. It's embarressing.....
366 * There has to be a cleaner way to do this. JRA.
368 bool convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
369 void const *src, size_t srclen, void *dst,
370 size_t *converted_size)
373 size_t i_len, o_len, destlen = (srclen * 3) / 2;
374 size_t retval;
375 const char *inbuf = (const char *)src;
376 char *outbuf = NULL, *ob = NULL;
377 smb_iconv_t descriptor;
378 void **dest = (void **)dst;
379 struct smb_iconv_handle *ic;
381 *dest = NULL;
383 if (src == NULL || srclen == (size_t)-1) {
384 errno = EINVAL;
385 return false;
388 if (srclen == 0) {
389 /* We really should treat this as an error, but
390 there are too many callers that need this to
391 return a NULL terminated string in the correct
392 character set. */
393 if (to == CH_UTF16LE|| to == CH_UTF16BE || to == CH_UTF16MUNGED) {
394 destlen = 2;
395 } else {
396 destlen = 1;
398 ob = talloc_zero_array(ctx, char, destlen);
399 if (ob == NULL) {
400 errno = ENOMEM;
401 return false;
403 *converted_size = destlen;
404 *dest = ob;
405 return true;
408 lazy_initialize_conv();
409 ic = get_iconv_handle();
410 descriptor = get_conv_handle(ic, from, to);
412 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
413 DEBUG(0,("convert_string_talloc: Conversion not supported.\n"));
414 errno = EOPNOTSUPP;
415 return false;
418 convert:
420 /* +2 is for ucs2 null termination. */
421 if ((destlen*2)+2 < destlen) {
422 /* wrapped ! abort. */
423 DEBUG(0, ("convert_string_talloc: destlen wrapped !\n"));
424 TALLOC_FREE(outbuf);
425 errno = EOPNOTSUPP;
426 return false;
427 } else {
428 destlen = destlen * 2;
431 /* +2 is for ucs2 null termination. */
432 ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
434 if (!ob) {
435 DEBUG(0, ("convert_string_talloc: realloc failed!\n"));
436 errno = ENOMEM;
437 return false;
439 outbuf = ob;
440 i_len = srclen;
441 o_len = destlen;
443 retval = smb_iconv(descriptor,
444 &inbuf, &i_len,
445 &outbuf, &o_len);
446 if(retval == (size_t)-1) {
447 const char *reason="unknown error";
448 switch(errno) {
449 case EINVAL:
450 reason="Incomplete multibyte sequence";
451 DEBUG(3,("convert_string_talloc: Conversion error: %s(%s)\n",reason,inbuf));
452 break;
453 case E2BIG:
454 goto convert;
455 case EILSEQ:
456 reason="Illegal multibyte sequence";
457 DEBUG(3,("convert_string_talloc: Conversion error: %s(%s)\n",reason,inbuf));
458 break;
460 DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
461 /* smb_panic(reason); */
462 TALLOC_FREE(ob);
463 return false;
466 destlen = destlen - o_len;
467 /* Don't shrink unless we're reclaiming a lot of
468 * space. This is in the hot codepath and these
469 * reallocs *cost*. JRA.
471 if (o_len > 1024) {
472 /* We're shrinking here so we know the +2 is safe from wrap. */
473 ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
476 if (destlen && !ob) {
477 DEBUG(0, ("convert_string_talloc: out of memory!\n"));
478 errno = ENOMEM;
479 return false;
482 *dest = ob;
484 /* Must ucs2 null terminate in the extra space we allocated. */
485 ob[destlen] = '\0';
486 ob[destlen+1] = '\0';
488 /* Ensure we can never return a *converted_size of zero. */
489 if (destlen == 0) {
490 /* As we're now returning false on a bad smb_iconv call,
491 this should never happen. But be safe anyway. */
492 if (to == CH_UTF16LE|| to == CH_UTF16BE || to == CH_UTF16MUNGED) {
493 destlen = 2;
494 } else {
495 destlen = 1;
499 *converted_size = destlen;
500 return true;
503 bool unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
505 size_t size;
506 smb_ucs2_t *buffer;
507 bool ret;
509 if (!push_ucs2_talloc(talloc_tos(), &buffer, src, &size)) {
510 return (size_t)-1;
513 if (!strupper_w(buffer) && (dest == src)) {
514 TALLOC_FREE(buffer);
515 return srclen;
518 ret = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, &size);
519 TALLOC_FREE(buffer);
520 return ret;
524 talloc_strdup() a unix string to upper case.
527 char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *s)
529 char *out_buffer = talloc_strdup(ctx,s);
530 const unsigned char *p = (const unsigned char *)s;
531 unsigned char *q = (unsigned char *)out_buffer;
533 if (!q) {
534 return NULL;
537 /* this is quite a common operation, so we want it to be
538 fast. We optimise for the ascii case, knowing that all our
539 supported multi-byte character sets are ascii-compatible
540 (ie. they match for the first 128 chars) */
542 while (*p) {
543 if (*p & 0x80)
544 break;
545 *q++ = toupper_ascii_fast(*p);
546 p++;
549 if (*p) {
550 /* MB case. */
551 size_t converted_size, converted_size2;
552 smb_ucs2_t *ubuf = NULL;
554 /* We're not using the ascii buffer above. */
555 TALLOC_FREE(out_buffer);
557 if (!convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, s,
558 strlen(s)+1, (void *)&ubuf,
559 &converted_size))
561 return NULL;
564 strupper_w(ubuf);
566 if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, ubuf,
567 converted_size, (void *)&out_buffer,
568 &converted_size2))
570 TALLOC_FREE(ubuf);
571 return NULL;
574 /* Don't need the intermediate buffer
575 * anymore.
577 TALLOC_FREE(ubuf);
580 return out_buffer;
583 char *strupper_talloc(TALLOC_CTX *ctx, const char *s) {
584 return talloc_strdup_upper(ctx, s);
588 bool unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
590 size_t size;
591 smb_ucs2_t *buffer = NULL;
592 bool ret;
594 if (!convert_string_talloc(talloc_tos(), CH_UNIX, CH_UTF16LE, src, srclen,
595 (void **)(void *)&buffer, &size))
597 smb_panic("failed to create UCS2 buffer");
599 if (!strlower_w(buffer) && (dest == src)) {
600 TALLOC_FREE(buffer);
601 return srclen;
603 ret = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, &size);
604 TALLOC_FREE(buffer);
605 return ret;
609 char *talloc_strdup_lower(TALLOC_CTX *ctx, const char *s)
611 size_t converted_size;
612 smb_ucs2_t *buffer = NULL;
613 char *out_buffer;
615 if (!push_ucs2_talloc(ctx, &buffer, s, &converted_size)) {
616 return NULL;
619 strlower_w(buffer);
621 if (!pull_ucs2_talloc(ctx, &out_buffer, buffer, &converted_size)) {
622 TALLOC_FREE(buffer);
623 return NULL;
626 TALLOC_FREE(buffer);
628 return out_buffer;
631 char *strlower_talloc(TALLOC_CTX *ctx, const char *s) {
632 return talloc_strdup_lower(ctx, s);
635 size_t ucs2_align(const void *base_ptr, const void *p, int flags)
637 if (flags & (STR_NOALIGN|STR_ASCII))
638 return 0;
639 return PTR_DIFF(p, base_ptr) & 1;
644 * Copy a string from a char* unix src to a dos codepage string destination.
646 * @return the number of bytes occupied by the string in the destination.
648 * @param flags can include
649 * <dl>
650 * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
651 * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
652 * </dl>
654 * @param dest_len the maximum length in bytes allowed in the
655 * destination.
657 size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
659 size_t src_len = strlen(src);
660 char *tmpbuf = NULL;
661 size_t size;
662 bool ret;
664 /* No longer allow a length of -1. */
665 if (dest_len == (size_t)-1) {
666 smb_panic("push_ascii - dest_len == -1");
669 if (flags & STR_UPPER) {
670 tmpbuf = SMB_STRDUP(src);
671 if (!tmpbuf) {
672 smb_panic("malloc fail");
674 strupper_m(tmpbuf);
675 src = tmpbuf;
678 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
679 src_len++;
682 ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, &size);
683 if (ret == false &&
684 (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
685 && dest_len > 0) {
686 ((char *)dest)[0] = '\0';
688 SAFE_FREE(tmpbuf);
689 return ret ? size : (size_t)-1;
692 /********************************************************************
693 Push and malloc an ascii string. src and dest null terminated.
694 ********************************************************************/
696 bool push_ascii_talloc(TALLOC_CTX *mem_ctx, char **dest, const char *src, size_t *converted_size)
698 size_t src_len = strlen(src)+1;
700 *dest = NULL;
701 return convert_string_talloc(mem_ctx, CH_UNIX, CH_DOS, src, src_len,
702 (void **)dest, converted_size);
706 * Copy a string from a dos codepage source to a unix char* destination.
708 * The resulting string in "dest" is always null terminated.
710 * @param flags can have:
711 * <dl>
712 * <dt>STR_TERMINATE</dt>
713 * <dd>STR_TERMINATE means the string in @p src
714 * is null terminated, and src_len is ignored.</dd>
715 * </dl>
717 * @param src_len is the length of the source area in bytes.
718 * @returns the number of bytes occupied by the string in @p src.
720 size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
722 bool ret;
723 size_t size = 0;
725 if (dest_len == (size_t)-1) {
726 /* No longer allow dest_len of -1. */
727 smb_panic("pull_ascii - invalid dest_len of -1");
730 if (flags & STR_TERMINATE) {
731 if (src_len == (size_t)-1) {
732 src_len = strlen((const char *)src) + 1;
733 } else {
734 size_t len = strnlen((const char *)src, src_len);
735 if (len < src_len)
736 len++;
737 src_len = len;
741 ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, &size);
742 if (ret == false) {
743 size = 0;
744 dest_len = 0;
747 if (dest_len && size) {
748 /* Did we already process the terminating zero ? */
749 if (dest[MIN(size-1, dest_len-1)] != 0) {
750 dest[MIN(size, dest_len-1)] = 0;
752 } else {
753 dest[0] = 0;
756 return src_len;
760 * Copy a string from a dos codepage source to a unix char* destination.
761 * Talloc version.
763 * The resulting string in "dest" is always null terminated.
765 * @param flags can have:
766 * <dl>
767 * <dt>STR_TERMINATE</dt>
768 * <dd>STR_TERMINATE means the string in @p src
769 * is null terminated, and src_len is ignored.</dd>
770 * </dl>
772 * @param src_len is the length of the source area in bytes.
773 * @returns the number of bytes occupied by the string in @p src.
776 static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
777 char **ppdest,
778 const void *src,
779 size_t src_len,
780 int flags)
782 char *dest = NULL;
783 size_t dest_len;
785 *ppdest = NULL;
787 if (!src_len) {
788 return 0;
791 if (src_len == (size_t)-1) {
792 smb_panic("sec_len == -1 in pull_ascii_base_talloc");
795 if (flags & STR_TERMINATE) {
796 size_t len = strnlen((const char *)src, src_len);
797 if (len < src_len)
798 len++;
799 src_len = len;
800 /* Ensure we don't use an insane length from the client. */
801 if (src_len >= 1024*1024) {
802 char *msg = talloc_asprintf(ctx,
803 "Bad src length (%u) in "
804 "pull_ascii_base_talloc",
805 (unsigned int)src_len);
806 smb_panic(msg);
810 /* src_len != -1 here. */
812 if (!convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, &dest,
813 &dest_len)) {
814 dest_len = 0;
817 if (dest_len && dest) {
818 /* Did we already process the terminating zero ? */
819 if (dest[dest_len-1] != 0) {
820 size_t size = talloc_get_size(dest);
821 /* Have we got space to append the '\0' ? */
822 if (size <= dest_len) {
823 /* No, realloc. */
824 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
825 dest_len+1);
826 if (!dest) {
827 /* talloc fail. */
828 dest_len = (size_t)-1;
829 return 0;
832 /* Yay - space ! */
833 dest[dest_len] = '\0';
834 dest_len++;
836 } else if (dest) {
837 dest[0] = 0;
840 *ppdest = dest;
841 return src_len;
845 * Copy a string from a char* src to a unicode destination.
847 * @returns the number of bytes occupied by the string in the destination.
849 * @param flags can have:
851 * <dl>
852 * <dt>STR_TERMINATE <dd>means include the null termination.
853 * <dt>STR_UPPER <dd>means uppercase in the destination.
854 * <dt>STR_NOALIGN <dd>means don't do alignment.
855 * </dl>
857 * @param dest_len is the maximum length allowed in the
858 * destination.
861 size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
863 size_t len=0;
864 size_t src_len;
865 size_t size = 0;
866 bool ret;
868 if (dest_len == (size_t)-1) {
869 /* No longer allow dest_len of -1. */
870 smb_panic("push_ucs2 - invalid dest_len of -1");
873 if (flags & STR_TERMINATE)
874 src_len = (size_t)-1;
875 else
876 src_len = strlen(src);
878 if (ucs2_align(base_ptr, dest, flags)) {
879 *(char *)dest = 0;
880 dest = (void *)((char *)dest + 1);
881 if (dest_len)
882 dest_len--;
883 len++;
886 /* ucs2 is always a multiple of 2 bytes */
887 dest_len &= ~1;
889 ret = convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, &size);
890 if (ret == false) {
891 if ((flags & STR_TERMINATE) &&
892 dest &&
893 dest_len) {
894 *(char *)dest = 0;
896 return len;
899 len += size;
901 if (flags & STR_UPPER) {
902 smb_ucs2_t *dest_ucs2 = (smb_ucs2_t *)dest;
903 size_t i;
905 /* We check for i < (ret / 2) below as the dest string isn't null
906 terminated if STR_TERMINATE isn't set. */
908 for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
909 smb_ucs2_t v = toupper_m(dest_ucs2[i]);
910 if (v != dest_ucs2[i]) {
911 dest_ucs2[i] = v;
916 return len;
921 * Copy a string from a unix char* src to a UCS2 destination,
922 * allocating a buffer using talloc().
924 * @param dest always set at least to NULL
925 * @parm converted_size set to the number of bytes occupied by the string in
926 * the destination on success.
928 * @return true if new buffer was correctly allocated, and string was
929 * converted.
931 bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src,
932 size_t *converted_size)
934 size_t src_len = strlen(src)+1;
936 *dest = NULL;
937 return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len,
938 (void **)dest, converted_size);
943 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
945 * @param dest always set at least to NULL
946 * @parm converted_size set to the number of bytes occupied by the string in
947 * the destination on success.
949 * @return true if new buffer was correctly allocated, and string was
950 * converted.
953 bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
954 size_t *converted_size)
956 size_t src_len = strlen(src)+1;
958 *dest = NULL;
959 return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len,
960 (void**)dest, converted_size);
964 Copy a string from a ucs2 source to a unix char* destination.
965 Flags can have:
966 STR_TERMINATE means the string in src is null terminated.
967 STR_NOALIGN means don't try to align.
968 if STR_TERMINATE is set then src_len is ignored if it is -1.
969 src_len is the length of the source area in bytes
970 Return the number of bytes occupied by the string in src.
971 The resulting string in "dest" is always null terminated.
974 size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
976 size_t size = 0;
977 size_t ucs2_align_len = 0;
978 bool ret;
980 if (dest_len == (size_t)-1) {
981 /* No longer allow dest_len of -1. */
982 smb_panic("pull_ucs2 - invalid dest_len of -1");
985 if (!src_len) {
986 if (dest && dest_len > 0) {
987 dest[0] = '\0';
989 return 0;
992 if (ucs2_align(base_ptr, src, flags)) {
993 src = (const void *)((const char *)src + 1);
994 if (src_len != (size_t)-1)
995 src_len--;
996 ucs2_align_len = 1;
999 if (flags & STR_TERMINATE) {
1000 /* src_len -1 is the default for null terminated strings. */
1001 if (src_len != (size_t)-1) {
1002 size_t len = strnlen_w((const smb_ucs2_t *)src,
1003 src_len/2);
1004 if (len < src_len/2)
1005 len++;
1006 src_len = len*2;
1010 /* ucs2 is always a multiple of 2 bytes */
1011 if (src_len != (size_t)-1)
1012 src_len &= ~1;
1014 ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, &size);
1015 if (ret == false) {
1016 size = 0;
1017 dest_len = 0;
1020 if (src_len == (size_t)-1)
1021 src_len = size*2;
1023 if (dest_len && size) {
1024 /* Did we already process the terminating zero ? */
1025 if (dest[MIN(size-1, dest_len-1)] != 0) {
1026 dest[MIN(size, dest_len-1)] = 0;
1028 } else {
1029 dest[0] = 0;
1032 return src_len + ucs2_align_len;
1036 Copy a string from a ucs2 source to a unix char* destination.
1037 Talloc version with a base pointer.
1038 Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1039 needs fixing. JRA).
1040 Flags can have:
1041 STR_TERMINATE means the string in src is null terminated.
1042 STR_NOALIGN means don't try to align.
1043 if STR_TERMINATE is set then src_len is ignored if it is -1.
1044 src_len is the length of the source area in bytes
1045 Return the number of bytes occupied by the string in src.
1046 The resulting string in "dest" is always null terminated.
1049 static size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
1050 const void *base_ptr,
1051 char **ppdest,
1052 const void *src,
1053 size_t src_len,
1054 int flags)
1056 char *dest;
1057 size_t dest_len;
1058 size_t ucs2_align_len = 0;
1060 *ppdest = NULL;
1062 #ifdef DEVELOPER
1063 /* Ensure we never use the braindead "malloc" varient. */
1064 if (ctx == NULL) {
1065 smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1067 #endif
1069 if (!src_len) {
1070 return 0;
1073 if (src_len == (size_t)-1) {
1074 /* no longer used anywhere, but worth checking */
1075 smb_panic("sec_len == -1 in pull_ucs2_base_talloc");
1078 if (ucs2_align(base_ptr, src, flags)) {
1079 src = (const void *)((const char *)src + 1);
1080 src_len--;
1081 ucs2_align_len = 1;
1084 if (flags & STR_TERMINATE) {
1085 /* src_len -1 is the default for null terminated strings. */
1086 size_t len = strnlen_w((const smb_ucs2_t *)src,
1087 src_len/2);
1088 if (len < src_len/2)
1089 len++;
1090 src_len = len*2;
1092 /* Ensure we don't use an insane length from the client. */
1093 if (src_len >= 1024*1024) {
1094 smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1098 /* ucs2 is always a multiple of 2 bytes */
1099 src_len &= ~1;
1101 if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1102 (void *)&dest, &dest_len)) {
1103 dest_len = 0;
1106 if (dest_len) {
1107 /* Did we already process the terminating zero ? */
1108 if (dest[dest_len-1] != 0) {
1109 size_t size = talloc_get_size(dest);
1110 /* Have we got space to append the '\0' ? */
1111 if (size <= dest_len) {
1112 /* No, realloc. */
1113 dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1114 dest_len+1);
1115 if (!dest) {
1116 /* talloc fail. */
1117 dest_len = (size_t)-1;
1118 return 0;
1121 /* Yay - space ! */
1122 dest[dest_len] = '\0';
1123 dest_len++;
1125 } else if (dest) {
1126 dest[0] = 0;
1129 *ppdest = dest;
1130 return src_len + ucs2_align_len;
1134 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1136 * @param dest always set at least to NULL
1137 * @parm converted_size set to the number of bytes occupied by the string in
1138 * the destination on success.
1140 * @return true if new buffer was correctly allocated, and string was
1141 * converted.
1144 bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src,
1145 size_t *converted_size)
1147 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1149 *dest = NULL;
1150 return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1151 (void **)dest, converted_size);
1155 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1157 * @param dest always set at least to NULL
1158 * @parm converted_size set to the number of bytes occupied by the string in
1159 * the destination on success.
1161 * @return true if new buffer was correctly allocated, and string was
1162 * converted.
1165 bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1166 size_t *converted_size)
1168 size_t src_len = strlen(src)+1;
1170 *dest = NULL;
1171 return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len,
1172 (void **)dest, converted_size);
1177 * Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1179 * @param dest always set at least to NULL
1180 * @parm converted_size set to the number of bytes occupied by the string in
1181 * the destination on success.
1183 * @return true if new buffer was correctly allocated, and string was
1184 * converted.
1187 bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1188 size_t *converted_size)
1190 size_t src_len = strlen(src)+1;
1192 *dest = NULL;
1193 return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len,
1194 (void **)dest, converted_size);
1198 Copy a string from a char* src to a unicode or ascii
1199 dos codepage destination choosing unicode or ascii based on the
1200 flags supplied
1201 Return the number of bytes occupied by the string in the destination.
1202 flags can have:
1203 STR_TERMINATE means include the null termination.
1204 STR_UPPER means uppercase in the destination.
1205 STR_ASCII use ascii even with unicode packet.
1206 STR_NOALIGN means don't do alignment.
1207 dest_len is the maximum length allowed in the destination. If dest_len
1208 is -1 then no maxiumum is used.
1211 size_t push_string_check_fn(void *dest, const char *src,
1212 size_t dest_len, int flags)
1214 if (!(flags & STR_ASCII) && (flags & STR_UNICODE)) {
1215 return push_ucs2(NULL, dest, src, dest_len, flags);
1217 return push_ascii(dest, src, dest_len, flags);
1222 Copy a string from a char* src to a unicode or ascii
1223 dos codepage destination choosing unicode or ascii based on the
1224 flags in the SMB buffer starting at base_ptr.
1225 Return the number of bytes occupied by the string in the destination.
1226 flags can have:
1227 STR_TERMINATE means include the null termination.
1228 STR_UPPER means uppercase in the destination.
1229 STR_ASCII use ascii even with unicode packet.
1230 STR_NOALIGN means don't do alignment.
1231 dest_len is the maximum length allowed in the destination. If dest_len
1232 is -1 then no maxiumum is used.
1235 size_t push_string_base(const char *base, uint16 flags2,
1236 void *dest, const char *src,
1237 size_t dest_len, int flags)
1240 if (!(flags & STR_ASCII) && \
1241 ((flags & STR_UNICODE || \
1242 (flags2 & FLAGS2_UNICODE_STRINGS)))) {
1243 return push_ucs2(base, dest, src, dest_len, flags);
1245 return push_ascii(dest, src, dest_len, flags);
1249 Copy a string from a char* src to a unicode or ascii
1250 dos codepage destination choosing unicode or ascii based on the
1251 flags supplied
1252 Return the number of bytes occupied by the string in the destination.
1253 flags can have:
1254 STR_TERMINATE means include the null termination.
1255 STR_UPPER means uppercase in the destination.
1256 STR_ASCII use ascii even with unicode packet.
1257 STR_NOALIGN means don't do alignment.
1258 dest_len is the maximum length allowed in the destination. If dest_len
1259 is -1 then no maxiumum is used.
1262 ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags)
1264 size_t ret;
1266 if (!(flags & STR_ASCII) && \
1267 (flags & STR_UNICODE)) {
1268 ret = push_ucs2(NULL, dest, src, dest_len, flags);
1269 } else {
1270 ret = push_ascii(dest, src, dest_len, flags);
1272 if (ret == (size_t)-1) {
1273 return -1;
1275 return ret;
1279 Copy a string from a unicode or ascii source (depending on
1280 the packet flags) to a char* destination.
1281 Flags can have:
1282 STR_TERMINATE means the string in src is null terminated.
1283 STR_UNICODE means to force as unicode.
1284 STR_ASCII use ascii even with unicode packet.
1285 STR_NOALIGN means don't do alignment.
1286 if STR_TERMINATE is set then src_len is ignored is it is -1
1287 src_len is the length of the source area in bytes.
1288 Return the number of bytes occupied by the string in src.
1289 The resulting string in "dest" is always null terminated.
1292 size_t pull_string_fn(const void *base_ptr,
1293 uint16 smb_flags2,
1294 char *dest,
1295 const void *src,
1296 size_t dest_len,
1297 size_t src_len,
1298 int flags)
1300 if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1301 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1302 "UNICODE defined");
1305 if (!(flags & STR_ASCII) && \
1306 ((flags & STR_UNICODE || \
1307 (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1308 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1310 return pull_ascii(dest, src, dest_len, src_len, flags);
1314 Copy a string from a unicode or ascii source (depending on
1315 the packet flags) to a char* destination.
1316 Variant that uses talloc.
1317 Flags can have:
1318 STR_TERMINATE means the string in src is null terminated.
1319 STR_UNICODE means to force as unicode.
1320 STR_ASCII use ascii even with unicode packet.
1321 STR_NOALIGN means don't do alignment.
1322 if STR_TERMINATE is set then src_len is ignored is it is -1
1323 src_len is the length of the source area in bytes.
1324 Return the number of bytes occupied by the string in src.
1325 The resulting string in "dest" is always null terminated.
1328 size_t pull_string_talloc(TALLOC_CTX *ctx,
1329 const void *base_ptr,
1330 uint16 smb_flags2,
1331 char **ppdest,
1332 const void *src,
1333 size_t src_len,
1334 int flags)
1336 if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1337 smb_panic("No base ptr to get flg2 and neither ASCII nor "
1338 "UNICODE defined");
1341 if (!(flags & STR_ASCII) && \
1342 ((flags & STR_UNICODE || \
1343 (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1344 return pull_ucs2_base_talloc(ctx,
1345 base_ptr,
1346 ppdest,
1347 src,
1348 src_len,
1349 flags);
1351 return pull_ascii_base_talloc(ctx,
1352 ppdest,
1353 src,
1354 src_len,
1355 flags);
1359 size_t align_string(const void *base_ptr, const char *p, int flags)
1361 if (!(flags & STR_ASCII) && \
1362 ((flags & STR_UNICODE || \
1363 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1364 return ucs2_align(base_ptr, p, flags);
1366 return 0;