Use "unix netbios name" type unstring - 64 bytes long to manipulate netbios
[Samba/gebeck_regimport.git] / source3 / lib / charcnv.c
blob20af806d9004b967d370f953c418316483851e34
1 /*
2 Unix SMB/CIFS implementation.
3 Character set conversion Extensions
4 Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5 Copyright (C) Andrew Tridgell 2001
6 Copyright (C) Simo Sorce 2001
7 Copyright (C) Martin Pool 2003
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 #include "includes.h"
26 /**
27 * @file
29 * @brief Character-set conversion routines built on our iconv.
31 * @note Samba's internal character set (at least in the 3.0 series)
32 * is always the same as the one for the Unix filesystem. It is
33 * <b>not</b> necessarily UTF-8 and may be different on machines that
34 * need i18n filenames to be compatible with Unix software. It does
35 * have to be a superset of ASCII. All multibyte sequences must start
36 * with a byte with the high bit set.
38 * @sa lib/iconv.c
42 static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
43 static BOOL conv_silent; /* Should we do a debug if the conversion fails ? */
45 /**
46 * Return the name of a charset to give to iconv().
47 **/
48 static const char *charset_name(charset_t ch)
50 const char *ret = NULL;
52 if (ch == CH_UCS2) ret = "UCS-2LE";
53 else if (ch == CH_UNIX) ret = lp_unix_charset();
54 else if (ch == CH_DOS) ret = lp_dos_charset();
55 else if (ch == CH_DISPLAY) ret = lp_display_charset();
56 else if (ch == CH_UTF8) ret = "UTF8";
58 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)
59 if (ret && !strcmp(ret, "LOCALE")) {
60 const char *ln = NULL;
62 #ifdef HAVE_SETLOCALE
63 setlocale(LC_ALL, "");
64 #endif
65 ln = nl_langinfo(CODESET);
66 if (ln) {
67 /* Check whether the charset name is supported
68 by iconv */
69 smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
70 if (handle == (smb_iconv_t) -1) {
71 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
72 ln = NULL;
73 } else {
74 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
75 smb_iconv_close(handle);
78 ret = ln;
80 #endif
82 if (!ret || !*ret) ret = "ASCII";
83 return ret;
86 void lazy_initialize_conv(void)
88 static int initialized = False;
90 if (!initialized) {
91 initialized = True;
92 load_case_tables();
93 init_iconv();
97 /**
98 * Initialize iconv conversion descriptors.
100 * This is called the first time it is needed, and also called again
101 * every time the configuration is reloaded, because the charset or
102 * codepage might have changed.
104 void init_iconv(void)
106 int c1, c2;
107 BOOL did_reload = False;
109 /* so that charset_name() works we need to get the UNIX<->UCS2 going
110 first */
111 if (!conv_handles[CH_UNIX][CH_UCS2])
112 conv_handles[CH_UNIX][CH_UCS2] = smb_iconv_open("UCS-2LE", "ASCII");
114 if (!conv_handles[CH_UCS2][CH_UNIX])
115 conv_handles[CH_UCS2][CH_UNIX] = smb_iconv_open("ASCII", "UCS-2LE");
117 for (c1=0;c1<NUM_CHARSETS;c1++) {
118 for (c2=0;c2<NUM_CHARSETS;c2++) {
119 const char *n1 = charset_name((charset_t)c1);
120 const char *n2 = charset_name((charset_t)c2);
121 if (conv_handles[c1][c2] &&
122 strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
123 strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
124 continue;
126 did_reload = True;
128 if (conv_handles[c1][c2])
129 smb_iconv_close(conv_handles[c1][c2]);
131 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
132 if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
133 DEBUG(0,("Conversion from %s to %s not supported\n",
134 charset_name((charset_t)c1), charset_name((charset_t)c2)));
135 conv_handles[c1][c2] = NULL;
140 if (did_reload) {
141 /* XXX: Does this really get called every time the dos
142 * codepage changes? */
143 /* XXX: Is the did_reload test too strict? */
144 conv_silent = True;
145 init_doschar_table();
146 init_valid_table();
147 conv_silent = False;
152 * Convert string from one encoding to another, making error checking etc
153 * Slow path version - uses (slow) iconv.
155 * @param src pointer to source string (multibyte or singlebyte)
156 * @param srclen length of the source string in bytes
157 * @param dest pointer to destination string (multibyte or singlebyte)
158 * @param destlen maximal length allowed for string
159 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
160 * @returns the number of bytes occupied in the destination
162 * Ensure the srclen contains the terminating zero.
166 static size_t convert_string_internal(charset_t from, charset_t to,
167 void const *src, size_t srclen,
168 void *dest, size_t destlen, BOOL allow_bad_conv)
170 size_t i_len, o_len;
171 size_t retval;
172 const char* inbuf = (const char*)src;
173 char* outbuf = (char*)dest;
174 smb_iconv_t descriptor;
176 lazy_initialize_conv();
178 descriptor = conv_handles[from][to];
180 if (srclen == (size_t)-1) {
181 if (from == CH_UCS2) {
182 srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
183 } else {
184 srclen = strlen((const char *)src)+1;
189 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
190 if (!conv_silent)
191 DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
192 return (size_t)-1;
195 i_len=srclen;
196 o_len=destlen;
198 again:
200 retval = smb_iconv(descriptor, (char **)&inbuf, &i_len, &outbuf, &o_len);
201 if(retval==(size_t)-1) {
202 const char *reason="unknown error";
203 switch(errno) {
204 case EINVAL:
205 reason="Incomplete multibyte sequence";
206 if (!conv_silent)
207 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
208 if (allow_bad_conv)
209 goto use_as_is;
210 break;
211 case E2BIG:
212 reason="No more room";
213 if (!conv_silent)
214 DEBUG(3, ("convert_string_internal: Required %lu, available %lu\n",
215 (unsigned long)srclen, (unsigned long)destlen));
216 /* we are not sure we need srclen bytes,
217 may be more, may be less.
218 We only know we need more than destlen
219 bytes ---simo */
220 break;
221 case EILSEQ:
222 reason="Illegal multibyte sequence";
223 if (!conv_silent)
224 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
225 if (allow_bad_conv)
226 goto use_as_is;
227 break;
228 default:
229 if (!conv_silent)
230 DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
231 break;
233 /* smb_panic(reason); */
235 return destlen-o_len;
237 use_as_is:
240 * Conversion not supported. This is actually an error, but there are so
241 * many misconfigured iconv systems and smb.conf's out there we can't just
242 * fail. Do a very bad conversion instead.... JRA.
246 if (o_len == 0 || i_len == 0)
247 return destlen - o_len;
249 if (from == CH_UCS2 && to != CH_UCS2) {
250 /* Can't convert from ucs2 to multibyte. Just truncate this char to ascii. */
251 if (i_len < 2)
252 return destlen - o_len;
253 if (i_len >= 2) {
254 *outbuf = inbuf[0];
256 outbuf++;
257 o_len--;
259 inbuf += 2;
260 i_len -= 2;
263 if (o_len == 0 || i_len == 0)
264 return destlen - o_len;
266 /* Keep trying with the next char... */
267 goto again;
269 } else if (from != CH_UCS2 && to == CH_UCS2) {
270 /* Can't convert to ucs2 - just widen by adding zero. */
271 if (o_len < 2)
272 return destlen - o_len;
274 outbuf[0] = inbuf[0];
275 outbuf[1] = '\0';
277 inbuf++;
278 i_len--;
280 outbuf += 2;
281 o_len -= 2;
283 if (o_len == 0 || i_len == 0)
284 return destlen - o_len;
286 /* Keep trying with the next char... */
287 goto again;
289 } else if (from != CH_UCS2 && to != CH_UCS2) {
290 /* Failed multibyte to multibyte. Just copy 1 char and
291 try again. */
292 outbuf[0] = inbuf[0];
294 inbuf++;
295 i_len--;
297 outbuf++;
298 o_len--;
300 if (o_len == 0 || i_len == 0)
301 return destlen - o_len;
303 /* Keep trying with the next char... */
304 goto again;
306 } else {
307 /* Keep compiler happy.... */
308 return destlen - o_len;
314 * Convert string from one encoding to another, making error checking etc
315 * Fast path version - handles ASCII first.
317 * @param src pointer to source string (multibyte or singlebyte)
318 * @param srclen length of the source string in bytes, or -1 for nul terminated.
319 * @param dest pointer to destination string (multibyte or singlebyte)
320 * @param destlen maximal length allowed for string - *NEVER* -1.
321 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
322 * @returns the number of bytes occupied in the destination
324 * Ensure the srclen contains the terminating zero.
326 * This function has been hand-tuned to provide a fast path.
327 * Don't change unless you really know what you are doing. JRA.
330 size_t convert_string(charset_t from, charset_t to,
331 void const *src, size_t srclen,
332 void *dest, size_t destlen, BOOL allow_bad_conv)
335 * NB. We deliberately don't do a strlen here if srclen == -1.
336 * This is very expensive over millions of calls and is taken
337 * care of in the slow path in convert_string_internal. JRA.
340 #ifdef DEVELOPER
341 SMB_ASSERT(destlen != (size_t)-1);
342 #endif
344 if (srclen == 0)
345 return 0;
347 if (from != CH_UCS2 && to != CH_UCS2) {
348 const unsigned char *p = (const unsigned char *)src;
349 unsigned char *q = (unsigned char *)dest;
350 size_t slen = srclen;
351 size_t dlen = destlen;
352 unsigned char lastp;
353 size_t retval = 0;
355 /* If all characters are ascii, fast path here. */
356 while (slen && dlen) {
357 if ((lastp = *p) <= 0x7f) {
358 *q++ = *p++;
359 if (slen != (size_t)-1) {
360 slen--;
362 dlen--;
363 retval++;
364 if (!lastp)
365 break;
366 } else {
367 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
368 goto general_case;
369 #else
370 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
371 #endif
374 return retval;
375 } else if (from == CH_UCS2 && to != CH_UCS2) {
376 const unsigned char *p = (const unsigned char *)src;
377 unsigned char *q = (unsigned char *)dest;
378 size_t retval = 0;
379 size_t slen = srclen;
380 size_t dlen = destlen;
381 unsigned char lastp;
383 /* If all characters are ascii, fast path here. */
384 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
385 if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
386 *q++ = *p;
387 if (slen != (size_t)-1) {
388 slen -= 2;
390 p += 2;
391 dlen--;
392 retval++;
393 if (!lastp)
394 break;
395 } else {
396 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
397 goto general_case;
398 #else
399 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
400 #endif
403 return retval;
404 } else if (from != CH_UCS2 && to == CH_UCS2) {
405 const unsigned char *p = (const unsigned char *)src;
406 unsigned char *q = (unsigned char *)dest;
407 size_t retval = 0;
408 size_t slen = srclen;
409 size_t dlen = destlen;
410 unsigned char lastp;
412 /* If all characters are ascii, fast path here. */
413 while (slen && (dlen >= 2)) {
414 if ((lastp = *p) <= 0x7F) {
415 *q++ = *p++;
416 *q++ = '\0';
417 if (slen != (size_t)-1) {
418 slen--;
420 dlen -= 2;
421 retval += 2;
422 if (!lastp)
423 break;
424 } else {
425 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
426 goto general_case;
427 #else
428 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
429 #endif
432 return retval;
435 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
436 general_case:
437 #endif
438 return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
442 * Convert between character sets, allocating a new buffer for the result.
444 * @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
445 * @param srclen length of source buffer.
446 * @param dest always set at least to NULL
447 * @note -1 is not accepted for srclen.
449 * @returns Size in bytes of the converted string; or -1 in case of error.
451 * Ensure the srclen contains the terminating zero.
453 * I hate the goto's in this function. It's embarressing.....
454 * There has to be a cleaner way to do this. JRA.
457 size_t convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
458 void const *src, size_t srclen, void **dest, BOOL allow_bad_conv)
460 size_t i_len, o_len, destlen = MAX(srclen, 512);
461 size_t retval;
462 const char *inbuf = (const char *)src;
463 char *outbuf = NULL, *ob = NULL;
464 smb_iconv_t descriptor;
466 *dest = NULL;
468 if (src == NULL || srclen == (size_t)-1)
469 return (size_t)-1;
470 if (srclen == 0)
471 return 0;
473 lazy_initialize_conv();
475 descriptor = conv_handles[from][to];
477 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
478 if (!conv_silent)
479 DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
480 if (allow_bad_conv)
481 goto use_as_is;
482 return (size_t)-1;
485 convert:
487 if ((destlen*2) < destlen) {
488 /* wrapped ! abort. */
489 if (!conv_silent)
490 DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
491 if (!ctx)
492 SAFE_FREE(outbuf);
493 return (size_t)-1;
494 } else {
495 destlen = destlen * 2;
498 if (ctx)
499 ob = (char *)talloc_realloc(ctx, ob, destlen);
500 else
501 ob = (char *)Realloc(ob, destlen);
503 if (!ob) {
504 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
505 if (!ctx)
506 SAFE_FREE(outbuf);
507 return (size_t)-1;
508 } else {
509 outbuf = ob;
511 i_len = srclen;
512 o_len = destlen;
514 again:
516 retval = smb_iconv(descriptor,
517 (char **)&inbuf, &i_len,
518 &outbuf, &o_len);
519 if(retval == (size_t)-1) {
520 const char *reason="unknown error";
521 switch(errno) {
522 case EINVAL:
523 reason="Incomplete multibyte sequence";
524 if (!conv_silent)
525 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
526 if (allow_bad_conv)
527 goto use_as_is;
528 break;
529 case E2BIG:
530 goto convert;
531 case EILSEQ:
532 reason="Illegal multibyte sequence";
533 if (!conv_silent)
534 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
535 if (allow_bad_conv)
536 goto use_as_is;
537 break;
539 if (!conv_silent)
540 DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
541 /* smb_panic(reason); */
542 return (size_t)-1;
545 out:
547 destlen = destlen - o_len;
548 if (ctx)
549 *dest = (char *)talloc_realloc(ctx,ob,destlen);
550 else
551 *dest = (char *)Realloc(ob,destlen);
552 if (destlen && !*dest) {
553 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
554 if (!ctx)
555 SAFE_FREE(ob);
556 return (size_t)-1;
559 return destlen;
561 use_as_is:
564 * Conversion not supported. This is actually an error, but there are so
565 * many misconfigured iconv systems and smb.conf's out there we can't just
566 * fail. Do a very bad conversion instead.... JRA.
570 if (o_len == 0 || i_len == 0)
571 goto out;
573 if (from == CH_UCS2 && to != CH_UCS2) {
574 /* Can't convert from ucs2 to multibyte. Just truncate this char to ascii. */
575 if (i_len < 2)
576 goto out;
578 if (i_len >= 2) {
579 *outbuf = inbuf[0];
581 outbuf++;
582 o_len--;
584 inbuf += 2;
585 i_len -= 2;
588 if (o_len == 0 || i_len == 0)
589 goto out;
591 /* Keep trying with the next char... */
592 goto again;
594 } else if (from != CH_UCS2 && to == CH_UCS2) {
595 /* Can't convert to ucs2 - just widen by adding zero. */
596 if (o_len < 2)
597 goto out;
599 outbuf[0] = inbuf[0];
600 outbuf[1] = '\0';
602 inbuf++;
603 i_len--;
605 outbuf += 2;
606 o_len -= 2;
608 if (o_len == 0 || i_len == 0)
609 goto out;
611 /* Keep trying with the next char... */
612 goto again;
614 } else if (from != CH_UCS2 && to != CH_UCS2) {
615 /* Failed multibyte to multibyte. Just copy 1 char and
616 try again. */
617 outbuf[0] = inbuf[0];
619 inbuf++;
620 i_len--;
622 outbuf++;
623 o_len--;
625 if (o_len == 0 || i_len == 0)
626 goto out;
628 /* Keep trying with the next char... */
629 goto again;
631 } else {
632 /* Keep compiler happy.... */
633 goto out;
639 * Convert between character sets, allocating a new buffer using talloc for the result.
641 * @param srclen length of source buffer.
642 * @param dest always set at least to NULL
643 * @note -1 is not accepted for srclen.
645 * @returns Size in bytes of the converted string; or -1 in case of error.
647 static size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
648 void const *src, size_t srclen, void **dest, BOOL allow_bad_conv)
650 size_t dest_len;
652 *dest = NULL;
653 dest_len=convert_string_allocate(ctx, from, to, src, srclen, dest, allow_bad_conv);
654 if (dest_len == (size_t)-1)
655 return (size_t)-1;
656 if (*dest == NULL)
657 return (size_t)-1;
658 return dest_len;
661 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
663 size_t size;
664 smb_ucs2_t *buffer;
666 size = push_ucs2_allocate(&buffer, src);
667 if (size == (size_t)-1) {
668 smb_panic("failed to create UCS2 buffer");
670 if (!strupper_w(buffer) && (dest == src)) {
671 free(buffer);
672 return srclen;
675 size = convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen, True);
676 free(buffer);
677 return size;
681 strdup() a unix string to upper case.
682 Max size is pstring.
685 char *strdup_upper(const char *s)
687 pstring out_buffer;
688 const unsigned char *p = (const unsigned char *)s;
689 unsigned char *q = (unsigned char *)out_buffer;
691 /* this is quite a common operation, so we want it to be
692 fast. We optimise for the ascii case, knowing that all our
693 supported multi-byte character sets are ascii-compatible
694 (ie. they match for the first 128 chars) */
696 while (1) {
697 if (*p & 0x80)
698 break;
699 *q++ = toupper(*p);
700 if (!*p)
701 break;
702 p++;
703 if (p - ( const unsigned char *)s >= sizeof(pstring))
704 break;
707 if (*p) {
708 /* MB case. */
709 size_t size;
710 wpstring buffer;
711 size = convert_string(CH_UNIX, CH_UCS2, s, -1, buffer, sizeof(buffer), True);
712 if (size == (size_t)-1) {
713 return NULL;
716 strupper_w(buffer);
718 size = convert_string(CH_UCS2, CH_UNIX, buffer, -1, out_buffer, sizeof(out_buffer), True);
719 if (size == (size_t)-1) {
720 return NULL;
724 return strdup(out_buffer);
727 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
729 size_t size;
730 smb_ucs2_t *buffer = NULL;
732 size = convert_string_allocate(NULL, CH_UNIX, CH_UCS2, src, srclen,
733 (void **) &buffer, True);
734 if (size == (size_t)-1 || !buffer) {
735 smb_panic("failed to create UCS2 buffer");
737 if (!strlower_w(buffer) && (dest == src)) {
738 SAFE_FREE(buffer);
739 return srclen;
741 size = convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen, True);
742 SAFE_FREE(buffer);
743 return size;
747 strdup() a unix string to lower case.
750 char *strdup_lower(const char *s)
752 size_t size;
753 smb_ucs2_t *buffer = NULL;
754 char *out_buffer;
756 size = push_ucs2_allocate(&buffer, s);
757 if (size == -1 || !buffer) {
758 return NULL;
761 strlower_w(buffer);
763 size = pull_ucs2_allocate(&out_buffer, buffer);
764 SAFE_FREE(buffer);
766 if (size == (size_t)-1) {
767 return NULL;
770 return out_buffer;
773 static size_t ucs2_align(const void *base_ptr, const void *p, int flags)
775 if (flags & (STR_NOALIGN|STR_ASCII))
776 return 0;
777 return PTR_DIFF(p, base_ptr) & 1;
782 * Copy a string from a char* unix src to a dos codepage string destination.
784 * @return the number of bytes occupied by the string in the destination.
786 * @param flags can include
787 * <dl>
788 * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
789 * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
790 * </dl>
792 * @param dest_len the maximum length in bytes allowed in the
793 * destination. If @p dest_len is -1 then no maximum is used.
795 size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
797 size_t src_len = strlen(src);
798 pstring tmpbuf;
800 /* treat a pstring as "unlimited" length */
801 if (dest_len == (size_t)-1)
802 dest_len = sizeof(pstring);
804 if (flags & STR_UPPER) {
805 pstrcpy(tmpbuf, src);
806 strupper_m(tmpbuf);
807 src = tmpbuf;
810 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
811 src_len++;
813 return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
816 size_t push_ascii_fstring(void *dest, const char *src)
818 return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
821 size_t push_ascii_pstring(void *dest, const char *src)
823 return push_ascii(dest, src, sizeof(pstring), STR_TERMINATE);
826 /********************************************************************
827 Push an nstring - ensure null terminated. Written by
828 moriyama@miraclelinux.com (MORIYAMA Masayuki).
829 ********************************************************************/
831 size_t push_ascii_nstring(void *dest, const char *src)
833 size_t i, buffer_len, dest_len;
834 smb_ucs2_t *buffer;
836 conv_silent = True;
837 buffer_len = push_ucs2_allocate(&buffer, src);
838 if (buffer_len == (size_t)-1) {
839 smb_panic("failed to create UCS2 buffer");
842 dest_len = 0;
843 for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
844 unsigned char mb[10];
845 /* Convert one smb_ucs2_t character at a time. */
846 size_t mb_len = convert_string(CH_UCS2, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
847 if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
848 memcpy((char *)dest + dest_len, mb, mb_len);
849 dest_len += mb_len;
850 } else {
851 errno = E2BIG;
852 break;
855 ((char *)dest)[dest_len] = '\0';
857 SAFE_FREE(buffer);
858 conv_silent = False;
859 return dest_len;
863 * Copy a string from a dos codepage source to a unix char* destination.
865 * The resulting string in "dest" is always null terminated.
867 * @param flags can have:
868 * <dl>
869 * <dt>STR_TERMINATE</dt>
870 * <dd>STR_TERMINATE means the string in @p src
871 * is null terminated, and src_len is ignored.</dd>
872 * </dl>
874 * @param src_len is the length of the source area in bytes.
875 * @returns the number of bytes occupied by the string in @p src.
877 size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
879 size_t ret;
881 if (dest_len == (size_t)-1)
882 dest_len = sizeof(pstring);
884 if (flags & STR_TERMINATE) {
885 if (src_len == (size_t)-1) {
886 src_len = strlen(src) + 1;
887 } else {
888 size_t len = strnlen(src, src_len);
889 if (len < src_len)
890 len++;
891 src_len = len;
895 ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
896 if (ret == (size_t)-1) {
897 dest_len = 0;
900 if (dest_len)
901 dest[MIN(ret, dest_len-1)] = 0;
902 else
903 dest[0] = 0;
905 return src_len;
908 size_t pull_ascii_pstring(char *dest, const void *src)
910 return pull_ascii(dest, src, sizeof(pstring), -1, STR_TERMINATE);
913 size_t pull_ascii_fstring(char *dest, const void *src)
915 return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
918 /* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
920 size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src)
922 return pull_ascii(dest, src, dest_len, sizeof(nstring), STR_TERMINATE);
926 * Copy a string from a char* src to a unicode destination.
928 * @returns the number of bytes occupied by the string in the destination.
930 * @param flags can have:
932 * <dl>
933 * <dt>STR_TERMINATE <dd>means include the null termination.
934 * <dt>STR_UPPER <dd>means uppercase in the destination.
935 * <dt>STR_NOALIGN <dd>means don't do alignment.
936 * </dl>
938 * @param dest_len is the maximum length allowed in the
939 * destination. If dest_len is -1 then no maxiumum is used.
942 size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
944 size_t len=0;
945 size_t src_len;
946 size_t ret;
948 /* treat a pstring as "unlimited" length */
949 if (dest_len == (size_t)-1)
950 dest_len = sizeof(pstring);
952 if (flags & STR_TERMINATE)
953 src_len = (size_t)-1;
954 else
955 src_len = strlen(src);
957 if (ucs2_align(base_ptr, dest, flags)) {
958 *(char *)dest = 0;
959 dest = (void *)((char *)dest + 1);
960 if (dest_len)
961 dest_len--;
962 len++;
965 /* ucs2 is always a multiple of 2 bytes */
966 dest_len &= ~1;
968 ret = convert_string(CH_UNIX, CH_UCS2, src, src_len, dest, dest_len, True);
969 if (ret == (size_t)-1) {
970 return 0;
973 len += ret;
975 if (flags & STR_UPPER) {
976 smb_ucs2_t *dest_ucs2 = dest;
977 size_t i;
978 for (i = 0; i < (dest_len / 2) && dest_ucs2[i]; i++) {
979 smb_ucs2_t v = toupper_w(dest_ucs2[i]);
980 if (v != dest_ucs2[i]) {
981 dest_ucs2[i] = v;
986 return len;
991 * Copy a string from a unix char* src to a UCS2 destination,
992 * allocating a buffer using talloc().
994 * @param dest always set at least to NULL
996 * @returns The number of bytes occupied by the string in the destination
997 * or -1 in case of error.
999 size_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
1001 size_t src_len = strlen(src)+1;
1003 *dest = NULL;
1004 return convert_string_talloc(ctx, CH_UNIX, CH_UCS2, src, src_len, (void **)dest, True);
1009 * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1011 * @param dest always set at least to NULL
1013 * @returns The number of bytes occupied by the string in the destination
1014 * or -1 in case of error.
1017 size_t push_ucs2_allocate(smb_ucs2_t **dest, const char *src)
1019 size_t src_len = strlen(src)+1;
1021 *dest = NULL;
1022 return convert_string_allocate(NULL, CH_UNIX, CH_UCS2, src, src_len, (void **)dest, True);
1026 Copy a string from a char* src to a UTF-8 destination.
1027 Return the number of bytes occupied by the string in the destination
1028 Flags can have:
1029 STR_TERMINATE means include the null termination
1030 STR_UPPER means uppercase in the destination
1031 dest_len is the maximum length allowed in the destination. If dest_len
1032 is -1 then no maxiumum is used.
1035 static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
1037 size_t src_len = strlen(src);
1038 pstring tmpbuf;
1040 /* treat a pstring as "unlimited" length */
1041 if (dest_len == (size_t)-1)
1042 dest_len = sizeof(pstring);
1044 if (flags & STR_UPPER) {
1045 pstrcpy(tmpbuf, src);
1046 strupper_m(tmpbuf);
1047 src = tmpbuf;
1050 if (flags & STR_TERMINATE)
1051 src_len++;
1053 return convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1056 size_t push_utf8_fstring(void *dest, const char *src)
1058 return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1062 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1064 * @param dest always set at least to NULL
1066 * @returns The number of bytes occupied by the string in the destination
1069 size_t push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1071 size_t src_len = strlen(src)+1;
1073 *dest = NULL;
1074 return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest, True);
1078 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1080 * @param dest always set at least to NULL
1082 * @returns The number of bytes occupied by the string in the destination
1085 size_t push_utf8_allocate(char **dest, const char *src)
1087 size_t src_len = strlen(src)+1;
1089 *dest = NULL;
1090 return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, True);
1094 Copy a string from a ucs2 source to a unix char* destination.
1095 Flags can have:
1096 STR_TERMINATE means the string in src is null terminated.
1097 STR_NOALIGN means don't try to align.
1098 if STR_TERMINATE is set then src_len is ignored if it is -1.
1099 src_len is the length of the source area in bytes
1100 Return the number of bytes occupied by the string in src.
1101 The resulting string in "dest" is always null terminated.
1104 size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1106 size_t ret;
1108 if (dest_len == (size_t)-1)
1109 dest_len = sizeof(pstring);
1111 if (ucs2_align(base_ptr, src, flags)) {
1112 src = (const void *)((const char *)src + 1);
1113 if (src_len != (size_t)-1)
1114 src_len--;
1117 if (flags & STR_TERMINATE) {
1118 /* src_len -1 is the default for null terminated strings. */
1119 if (src_len != (size_t)-1) {
1120 size_t len = strnlen_w(src, src_len/2);
1121 if (len < src_len/2)
1122 len++;
1123 src_len = len*2;
1127 /* ucs2 is always a multiple of 2 bytes */
1128 if (src_len != (size_t)-1)
1129 src_len &= ~1;
1131 ret = convert_string(CH_UCS2, CH_UNIX, src, src_len, dest, dest_len, True);
1132 if (ret == (size_t)-1) {
1133 return 0;
1136 if (src_len == (size_t)-1)
1137 src_len = ret*2;
1139 if (dest_len)
1140 dest[MIN(ret, dest_len-1)] = 0;
1141 else
1142 dest[0] = 0;
1144 return src_len;
1147 size_t pull_ucs2_pstring(char *dest, const void *src)
1149 return pull_ucs2(NULL, dest, src, sizeof(pstring), -1, STR_TERMINATE);
1152 size_t pull_ucs2_fstring(char *dest, const void *src)
1154 return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1158 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1160 * @param dest always set at least to NULL
1162 * @returns The number of bytes occupied by the string in the destination
1165 size_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src)
1167 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1168 *dest = NULL;
1169 return convert_string_talloc(ctx, CH_UCS2, CH_UNIX, src, src_len, (void **)dest, True);
1173 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1175 * @param dest always set at least to NULL
1177 * @returns The number of bytes occupied by the string in the destination
1180 size_t pull_ucs2_allocate(char **dest, const smb_ucs2_t *src)
1182 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1183 *dest = NULL;
1184 return convert_string_allocate(NULL, CH_UCS2, CH_UNIX, src, src_len, (void **)dest, True);
1188 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1190 * @param dest always set at least to NULL
1192 * @returns The number of bytes occupied by the string in the destination
1195 size_t pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1197 size_t src_len = strlen(src)+1;
1198 *dest = NULL;
1199 return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1203 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1205 * @param dest always set at least to NULL
1207 * @returns The number of bytes occupied by the string in the destination
1210 size_t pull_utf8_allocate(char **dest, const char *src)
1212 size_t src_len = strlen(src)+1;
1213 *dest = NULL;
1214 return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1218 Copy a string from a char* src to a unicode or ascii
1219 dos codepage destination choosing unicode or ascii based on the
1220 flags in the SMB buffer starting at base_ptr.
1221 Return the number of bytes occupied by the string in the destination.
1222 flags can have:
1223 STR_TERMINATE means include the null termination.
1224 STR_UPPER means uppercase in the destination.
1225 STR_ASCII use ascii even with unicode packet.
1226 STR_NOALIGN means don't do alignment.
1227 dest_len is the maximum length allowed in the destination. If dest_len
1228 is -1 then no maxiumum is used.
1231 size_t push_string_fn(const char *function, unsigned int line, const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1233 #ifdef DEVELOPER
1234 /* We really need to zero fill here, not clobber
1235 * region, as we want to ensure that valgrind thinks
1236 * all of the outgoing buffer has been written to
1237 * so a send() or write() won't trap an error.
1238 * JRA.
1240 #if 0
1241 if (dest_len != (size_t)-1)
1242 clobber_region(function, line, dest, dest_len);
1243 #else
1244 if (dest_len != (size_t)-1)
1245 memset(dest, '\0', dest_len);
1246 #endif
1247 #endif
1249 if (!(flags & STR_ASCII) && \
1250 ((flags & STR_UNICODE || \
1251 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1252 return push_ucs2(base_ptr, dest, src, dest_len, flags);
1254 return push_ascii(dest, src, dest_len, flags);
1259 Copy a string from a unicode or ascii source (depending on
1260 the packet flags) to a char* destination.
1261 Flags can have:
1262 STR_TERMINATE means the string in src is null terminated.
1263 STR_UNICODE means to force as unicode.
1264 STR_ASCII use ascii even with unicode packet.
1265 STR_NOALIGN means don't do alignment.
1266 if STR_TERMINATE is set then src_len is ignored is it is -1
1267 src_len is the length of the source area in bytes.
1268 Return the number of bytes occupied by the string in src.
1269 The resulting string in "dest" is always null terminated.
1272 size_t pull_string_fn(const char *function, unsigned int line, const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1274 #ifdef DEVELOPER
1275 if (dest_len != (size_t)-1)
1276 clobber_region(function, line, dest, dest_len);
1277 #endif
1279 if (!(flags & STR_ASCII) && \
1280 ((flags & STR_UNICODE || \
1281 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1282 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1284 return pull_ascii(dest, src, dest_len, src_len, flags);
1287 size_t align_string(const void *base_ptr, const char *p, int flags)
1289 if (!(flags & STR_ASCII) && \
1290 ((flags & STR_UNICODE || \
1291 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1292 return ucs2_align(base_ptr, p, flags);
1294 return 0;