Fix check_path_syntax() for multibyte encodings which have no '\' as second byte.
[Samba/gebeck_regimport.git] / source3 / lib / charcnv.c
blobb06d869bcc3eccbd6a1cf5f8b311d57cd3ca8802
1 /*
2 Unix SMB/CIFS implementation.
3 Character set conversion Extensions
4 Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5 Copyright (C) Andrew Tridgell 2001
6 Copyright (C) Simo Sorce 2001
7 Copyright (C) Martin Pool 2003
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 #include "includes.h"
26 /**
27 * @file
29 * @brief Character-set conversion routines built on our iconv.
31 * @note Samba's internal character set (at least in the 3.0 series)
32 * is always the same as the one for the Unix filesystem. It is
33 * <b>not</b> necessarily UTF-8 and may be different on machines that
34 * need i18n filenames to be compatible with Unix software. It does
35 * have to be a superset of ASCII. All multibyte sequences must start
36 * with a byte with the high bit set.
38 * @sa lib/iconv.c
42 static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
43 static BOOL conv_silent; /* Should we do a debug if the conversion fails ? */
44 /* Unsafe unix charsets which could contain '\\' as second byte of mb character */
45 static const char *conv_unsafe_charsets[] = {
46 "CP932",
47 "EUC-JP",
48 NULL};
49 /* Global variable which is set to True in init_iconv() if unix charset is unsafe
50 w.r.t. '\\' in second byte of mb character. Otherwise it is set to False.
52 BOOL is_unix_charset_unsafe;
54 /**
55 * Return the name of a charset to give to iconv().
56 **/
57 static const char *charset_name(charset_t ch)
59 const char *ret = NULL;
61 if (ch == CH_UCS2) ret = "UCS-2LE";
62 else if (ch == CH_UNIX) ret = lp_unix_charset();
63 else if (ch == CH_DOS) ret = lp_dos_charset();
64 else if (ch == CH_DISPLAY) ret = lp_display_charset();
65 else if (ch == CH_UTF8) ret = "UTF8";
67 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)
68 if (ret && !strcmp(ret, "LOCALE")) {
69 const char *ln = NULL;
71 #ifdef HAVE_SETLOCALE
72 setlocale(LC_ALL, "");
73 #endif
74 ln = nl_langinfo(CODESET);
75 if (ln) {
76 /* Check whether the charset name is supported
77 by iconv */
78 smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
79 if (handle == (smb_iconv_t) -1) {
80 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
81 ln = NULL;
82 } else {
83 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
84 smb_iconv_close(handle);
87 ret = ln;
89 #endif
91 if (!ret || !*ret) ret = "ASCII";
92 return ret;
95 void lazy_initialize_conv(void)
97 static int initialized = False;
99 if (!initialized) {
100 initialized = True;
101 load_case_tables();
102 init_iconv();
107 * Initialize iconv conversion descriptors.
109 * This is called the first time it is needed, and also called again
110 * every time the configuration is reloaded, because the charset or
111 * codepage might have changed.
113 void init_iconv(void)
115 int c1, c2;
116 BOOL did_reload = False;
117 const char **unsafe_charset = conv_unsafe_charsets;
119 /* so that charset_name() works we need to get the UNIX<->UCS2 going
120 first */
121 if (!conv_handles[CH_UNIX][CH_UCS2])
122 conv_handles[CH_UNIX][CH_UCS2] = smb_iconv_open("UCS-2LE", "ASCII");
124 if (!conv_handles[CH_UCS2][CH_UNIX])
125 conv_handles[CH_UCS2][CH_UNIX] = smb_iconv_open("ASCII", "UCS-2LE");
127 for (c1=0;c1<NUM_CHARSETS;c1++) {
128 for (c2=0;c2<NUM_CHARSETS;c2++) {
129 const char *n1 = charset_name((charset_t)c1);
130 const char *n2 = charset_name((charset_t)c2);
131 if (conv_handles[c1][c2] &&
132 strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
133 strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
134 continue;
136 did_reload = True;
138 if (conv_handles[c1][c2])
139 smb_iconv_close(conv_handles[c1][c2]);
141 conv_handles[c1][c2] = smb_iconv_open(n2,n1);
142 if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
143 DEBUG(0,("Conversion from %s to %s not supported\n",
144 charset_name((charset_t)c1), charset_name((charset_t)c2)));
145 conv_handles[c1][c2] = NULL;
150 if (did_reload) {
151 /* XXX: Does this really get called every time the dos
152 * codepage changes? */
153 /* XXX: Is the did_reload test too strict? */
154 conv_silent = True;
155 init_doschar_table();
156 init_valid_table();
157 conv_silent = False;
160 while(*unsafe_charset && strcmp(*unsafe_charset, conv_handles[CH_UCS2][CH_UNIX]->to_name)) {
161 unsafe_charset++;
164 if (*unsafe_charset) {
165 is_unix_charset_unsafe = True;
166 } else {
167 is_unix_charset_unsafe = False;
172 * Convert string from one encoding to another, making error checking etc
173 * Slow path version - uses (slow) iconv.
175 * @param src pointer to source string (multibyte or singlebyte)
176 * @param srclen length of the source string in bytes
177 * @param dest pointer to destination string (multibyte or singlebyte)
178 * @param destlen maximal length allowed for string
179 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
180 * @returns the number of bytes occupied in the destination
182 * Ensure the srclen contains the terminating zero.
186 static size_t convert_string_internal(charset_t from, charset_t to,
187 void const *src, size_t srclen,
188 void *dest, size_t destlen, BOOL allow_bad_conv)
190 size_t i_len, o_len;
191 size_t retval;
192 const char* inbuf = (const char*)src;
193 char* outbuf = (char*)dest;
194 smb_iconv_t descriptor;
196 lazy_initialize_conv();
198 descriptor = conv_handles[from][to];
200 if (srclen == (size_t)-1) {
201 if (from == CH_UCS2) {
202 srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
203 } else {
204 srclen = strlen((const char *)src)+1;
209 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
210 if (!conv_silent)
211 DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
212 return (size_t)-1;
215 i_len=srclen;
216 o_len=destlen;
218 again:
220 retval = smb_iconv(descriptor, (char **)&inbuf, &i_len, &outbuf, &o_len);
221 if(retval==(size_t)-1) {
222 const char *reason="unknown error";
223 switch(errno) {
224 case EINVAL:
225 reason="Incomplete multibyte sequence";
226 if (!conv_silent)
227 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
228 if (allow_bad_conv)
229 goto use_as_is;
230 break;
231 case E2BIG:
232 reason="No more room";
233 if (!conv_silent)
234 DEBUG(3, ("convert_string_internal: Required %lu, available %lu\n",
235 (unsigned long)srclen, (unsigned long)destlen));
236 /* we are not sure we need srclen bytes,
237 may be more, may be less.
238 We only know we need more than destlen
239 bytes ---simo */
240 break;
241 case EILSEQ:
242 reason="Illegal multibyte sequence";
243 if (!conv_silent)
244 DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
245 if (allow_bad_conv)
246 goto use_as_is;
247 break;
248 default:
249 if (!conv_silent)
250 DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
251 break;
253 /* smb_panic(reason); */
255 return destlen-o_len;
257 use_as_is:
260 * Conversion not supported. This is actually an error, but there are so
261 * many misconfigured iconv systems and smb.conf's out there we can't just
262 * fail. Do a very bad conversion instead.... JRA.
266 if (o_len == 0 || i_len == 0)
267 return destlen - o_len;
269 if (from == CH_UCS2 && to != CH_UCS2) {
270 /* Can't convert from ucs2 to multibyte. Just truncate this char to ascii. */
271 if (i_len < 2)
272 return destlen - o_len;
273 if (i_len >= 2) {
274 *outbuf = inbuf[0];
276 outbuf++;
277 o_len--;
279 inbuf += 2;
280 i_len -= 2;
283 if (o_len == 0 || i_len == 0)
284 return destlen - o_len;
286 /* Keep trying with the next char... */
287 goto again;
289 } else if (from != CH_UCS2 && to == CH_UCS2) {
290 /* Can't convert to ucs2 - just widen by adding zero. */
291 if (o_len < 2)
292 return destlen - o_len;
294 outbuf[0] = inbuf[0];
295 outbuf[1] = '\0';
297 inbuf++;
298 i_len--;
300 outbuf += 2;
301 o_len -= 2;
303 if (o_len == 0 || i_len == 0)
304 return destlen - o_len;
306 /* Keep trying with the next char... */
307 goto again;
309 } else if (from != CH_UCS2 && to != CH_UCS2) {
310 /* Failed multibyte to multibyte. Just copy 1 char and
311 try again. */
312 outbuf[0] = inbuf[0];
314 inbuf++;
315 i_len--;
317 outbuf++;
318 o_len--;
320 if (o_len == 0 || i_len == 0)
321 return destlen - o_len;
323 /* Keep trying with the next char... */
324 goto again;
326 } else {
327 /* Keep compiler happy.... */
328 return destlen - o_len;
334 * Convert string from one encoding to another, making error checking etc
335 * Fast path version - handles ASCII first.
337 * @param src pointer to source string (multibyte or singlebyte)
338 * @param srclen length of the source string in bytes, or -1 for nul terminated.
339 * @param dest pointer to destination string (multibyte or singlebyte)
340 * @param destlen maximal length allowed for string - *NEVER* -1.
341 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
342 * @returns the number of bytes occupied in the destination
344 * Ensure the srclen contains the terminating zero.
346 * This function has been hand-tuned to provide a fast path.
347 * Don't change unless you really know what you are doing. JRA.
350 size_t convert_string(charset_t from, charset_t to,
351 void const *src, size_t srclen,
352 void *dest, size_t destlen, BOOL allow_bad_conv)
355 * NB. We deliberately don't do a strlen here if srclen == -1.
356 * This is very expensive over millions of calls and is taken
357 * care of in the slow path in convert_string_internal. JRA.
360 #ifdef DEVELOPER
361 SMB_ASSERT(destlen != (size_t)-1);
362 #endif
364 if (srclen == 0)
365 return 0;
367 if (from != CH_UCS2 && to != CH_UCS2) {
368 const unsigned char *p = (const unsigned char *)src;
369 unsigned char *q = (unsigned char *)dest;
370 size_t slen = srclen;
371 size_t dlen = destlen;
372 unsigned char lastp;
373 size_t retval = 0;
375 /* If all characters are ascii, fast path here. */
376 while (slen && dlen) {
377 if ((lastp = *p) <= 0x7f) {
378 *q++ = *p++;
379 if (slen != (size_t)-1) {
380 slen--;
382 dlen--;
383 retval++;
384 if (!lastp)
385 break;
386 } else {
387 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
388 goto general_case;
389 #else
390 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
391 #endif
394 return retval;
395 } else if (from == CH_UCS2 && to != CH_UCS2) {
396 const unsigned char *p = (const unsigned char *)src;
397 unsigned char *q = (unsigned char *)dest;
398 size_t retval = 0;
399 size_t slen = srclen;
400 size_t dlen = destlen;
401 unsigned char lastp;
403 /* If all characters are ascii, fast path here. */
404 while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
405 if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
406 *q++ = *p;
407 if (slen != (size_t)-1) {
408 slen -= 2;
410 p += 2;
411 dlen--;
412 retval++;
413 if (!lastp)
414 break;
415 } else {
416 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
417 goto general_case;
418 #else
419 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
420 #endif
423 return retval;
424 } else if (from != CH_UCS2 && to == CH_UCS2) {
425 const unsigned char *p = (const unsigned char *)src;
426 unsigned char *q = (unsigned char *)dest;
427 size_t retval = 0;
428 size_t slen = srclen;
429 size_t dlen = destlen;
430 unsigned char lastp;
432 /* If all characters are ascii, fast path here. */
433 while (slen && (dlen >= 2)) {
434 if ((lastp = *p) <= 0x7F) {
435 *q++ = *p++;
436 *q++ = '\0';
437 if (slen != (size_t)-1) {
438 slen--;
440 dlen -= 2;
441 retval += 2;
442 if (!lastp)
443 break;
444 } else {
445 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
446 goto general_case;
447 #else
448 return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
449 #endif
452 return retval;
455 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
456 general_case:
457 #endif
458 return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
462 * Convert between character sets, allocating a new buffer for the result.
464 * @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
465 * @param srclen length of source buffer.
466 * @param dest always set at least to NULL
467 * @note -1 is not accepted for srclen.
469 * @returns Size in bytes of the converted string; or -1 in case of error.
471 * Ensure the srclen contains the terminating zero.
473 * I hate the goto's in this function. It's embarressing.....
474 * There has to be a cleaner way to do this. JRA.
477 size_t convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
478 void const *src, size_t srclen, void **dest, BOOL allow_bad_conv)
480 size_t i_len, o_len, destlen = MAX(srclen, 512);
481 size_t retval;
482 const char *inbuf = (const char *)src;
483 char *outbuf = NULL, *ob = NULL;
484 smb_iconv_t descriptor;
486 *dest = NULL;
488 if (src == NULL || srclen == (size_t)-1)
489 return (size_t)-1;
490 if (srclen == 0)
491 return 0;
493 lazy_initialize_conv();
495 descriptor = conv_handles[from][to];
497 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
498 if (!conv_silent)
499 DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
500 if (allow_bad_conv)
501 goto use_as_is;
502 return (size_t)-1;
505 convert:
507 if ((destlen*2) < destlen) {
508 /* wrapped ! abort. */
509 if (!conv_silent)
510 DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
511 if (!ctx)
512 SAFE_FREE(outbuf);
513 return (size_t)-1;
514 } else {
515 destlen = destlen * 2;
518 if (ctx)
519 ob = (char *)talloc_realloc(ctx, ob, destlen);
520 else
521 ob = (char *)Realloc(ob, destlen);
523 if (!ob) {
524 DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
525 if (!ctx)
526 SAFE_FREE(outbuf);
527 return (size_t)-1;
528 } else {
529 outbuf = ob;
531 i_len = srclen;
532 o_len = destlen;
534 again:
536 retval = smb_iconv(descriptor,
537 (char **)&inbuf, &i_len,
538 &outbuf, &o_len);
539 if(retval == (size_t)-1) {
540 const char *reason="unknown error";
541 switch(errno) {
542 case EINVAL:
543 reason="Incomplete multibyte sequence";
544 if (!conv_silent)
545 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
546 if (allow_bad_conv)
547 goto use_as_is;
548 break;
549 case E2BIG:
550 goto convert;
551 case EILSEQ:
552 reason="Illegal multibyte sequence";
553 if (!conv_silent)
554 DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
555 if (allow_bad_conv)
556 goto use_as_is;
557 break;
559 if (!conv_silent)
560 DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
561 /* smb_panic(reason); */
562 return (size_t)-1;
565 out:
567 destlen = destlen - o_len;
568 if (ctx)
569 *dest = (char *)talloc_realloc(ctx,ob,destlen);
570 else
571 *dest = (char *)Realloc(ob,destlen);
572 if (destlen && !*dest) {
573 DEBUG(0, ("convert_string_allocate: out of memory!\n"));
574 if (!ctx)
575 SAFE_FREE(ob);
576 return (size_t)-1;
579 return destlen;
581 use_as_is:
584 * Conversion not supported. This is actually an error, but there are so
585 * many misconfigured iconv systems and smb.conf's out there we can't just
586 * fail. Do a very bad conversion instead.... JRA.
590 if (o_len == 0 || i_len == 0)
591 goto out;
593 if (from == CH_UCS2 && to != CH_UCS2) {
594 /* Can't convert from ucs2 to multibyte. Just truncate this char to ascii. */
595 if (i_len < 2)
596 goto out;
598 if (i_len >= 2) {
599 *outbuf = inbuf[0];
601 outbuf++;
602 o_len--;
604 inbuf += 2;
605 i_len -= 2;
608 if (o_len == 0 || i_len == 0)
609 goto out;
611 /* Keep trying with the next char... */
612 goto again;
614 } else if (from != CH_UCS2 && to == CH_UCS2) {
615 /* Can't convert to ucs2 - just widen by adding zero. */
616 if (o_len < 2)
617 goto out;
619 outbuf[0] = inbuf[0];
620 outbuf[1] = '\0';
622 inbuf++;
623 i_len--;
625 outbuf += 2;
626 o_len -= 2;
628 if (o_len == 0 || i_len == 0)
629 goto out;
631 /* Keep trying with the next char... */
632 goto again;
634 } else if (from != CH_UCS2 && to != CH_UCS2) {
635 /* Failed multibyte to multibyte. Just copy 1 char and
636 try again. */
637 outbuf[0] = inbuf[0];
639 inbuf++;
640 i_len--;
642 outbuf++;
643 o_len--;
645 if (o_len == 0 || i_len == 0)
646 goto out;
648 /* Keep trying with the next char... */
649 goto again;
651 } else {
652 /* Keep compiler happy.... */
653 goto out;
659 * Convert between character sets, allocating a new buffer using talloc for the result.
661 * @param srclen length of source buffer.
662 * @param dest always set at least to NULL
663 * @note -1 is not accepted for srclen.
665 * @returns Size in bytes of the converted string; or -1 in case of error.
667 static size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
668 void const *src, size_t srclen, void **dest, BOOL allow_bad_conv)
670 size_t dest_len;
672 *dest = NULL;
673 dest_len=convert_string_allocate(ctx, from, to, src, srclen, dest, allow_bad_conv);
674 if (dest_len == (size_t)-1)
675 return (size_t)-1;
676 if (*dest == NULL)
677 return (size_t)-1;
678 return dest_len;
681 size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
683 size_t size;
684 smb_ucs2_t *buffer;
686 size = push_ucs2_allocate(&buffer, src);
687 if (size == (size_t)-1) {
688 smb_panic("failed to create UCS2 buffer");
690 if (!strupper_w(buffer) && (dest == src)) {
691 free(buffer);
692 return srclen;
695 size = convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen, True);
696 free(buffer);
697 return size;
701 strdup() a unix string to upper case.
702 Max size is pstring.
705 char *strdup_upper(const char *s)
707 pstring out_buffer;
708 const unsigned char *p = (const unsigned char *)s;
709 unsigned char *q = (unsigned char *)out_buffer;
711 /* this is quite a common operation, so we want it to be
712 fast. We optimise for the ascii case, knowing that all our
713 supported multi-byte character sets are ascii-compatible
714 (ie. they match for the first 128 chars) */
716 while (1) {
717 if (*p & 0x80)
718 break;
719 *q++ = toupper(*p);
720 if (!*p)
721 break;
722 p++;
723 if (p - ( const unsigned char *)s >= sizeof(pstring))
724 break;
727 if (*p) {
728 /* MB case. */
729 size_t size;
730 wpstring buffer;
731 size = convert_string(CH_UNIX, CH_UCS2, s, -1, buffer, sizeof(buffer), True);
732 if (size == (size_t)-1) {
733 return NULL;
736 strupper_w(buffer);
738 size = convert_string(CH_UCS2, CH_UNIX, buffer, -1, out_buffer, sizeof(out_buffer), True);
739 if (size == (size_t)-1) {
740 return NULL;
744 return strdup(out_buffer);
747 size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
749 size_t size;
750 smb_ucs2_t *buffer = NULL;
752 size = convert_string_allocate(NULL, CH_UNIX, CH_UCS2, src, srclen,
753 (void **) &buffer, True);
754 if (size == (size_t)-1 || !buffer) {
755 smb_panic("failed to create UCS2 buffer");
757 if (!strlower_w(buffer) && (dest == src)) {
758 SAFE_FREE(buffer);
759 return srclen;
761 size = convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen, True);
762 SAFE_FREE(buffer);
763 return size;
767 strdup() a unix string to lower case.
770 char *strdup_lower(const char *s)
772 size_t size;
773 smb_ucs2_t *buffer = NULL;
774 char *out_buffer;
776 size = push_ucs2_allocate(&buffer, s);
777 if (size == -1 || !buffer) {
778 return NULL;
781 strlower_w(buffer);
783 size = pull_ucs2_allocate(&out_buffer, buffer);
784 SAFE_FREE(buffer);
786 if (size == (size_t)-1) {
787 return NULL;
790 return out_buffer;
793 static size_t ucs2_align(const void *base_ptr, const void *p, int flags)
795 if (flags & (STR_NOALIGN|STR_ASCII))
796 return 0;
797 return PTR_DIFF(p, base_ptr) & 1;
802 * Copy a string from a char* unix src to a dos codepage string destination.
804 * @return the number of bytes occupied by the string in the destination.
806 * @param flags can include
807 * <dl>
808 * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
809 * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
810 * </dl>
812 * @param dest_len the maximum length in bytes allowed in the
813 * destination. If @p dest_len is -1 then no maximum is used.
815 size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
817 size_t src_len = strlen(src);
818 pstring tmpbuf;
820 /* treat a pstring as "unlimited" length */
821 if (dest_len == (size_t)-1)
822 dest_len = sizeof(pstring);
824 if (flags & STR_UPPER) {
825 pstrcpy(tmpbuf, src);
826 strupper_m(tmpbuf);
827 src = tmpbuf;
830 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
831 src_len++;
833 return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
836 size_t push_ascii_fstring(void *dest, const char *src)
838 return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
841 size_t push_ascii_pstring(void *dest, const char *src)
843 return push_ascii(dest, src, sizeof(pstring), STR_TERMINATE);
846 /********************************************************************
847 Push an nstring - ensure null terminated. Written by
848 moriyama@miraclelinux.com (MORIYAMA Masayuki).
849 ********************************************************************/
851 size_t push_ascii_nstring(void *dest, const char *src)
853 size_t i, buffer_len, dest_len;
854 smb_ucs2_t *buffer;
856 conv_silent = True;
857 buffer_len = push_ucs2_allocate(&buffer, src);
858 if (buffer_len == (size_t)-1) {
859 smb_panic("failed to create UCS2 buffer");
862 dest_len = 0;
863 for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
864 unsigned char mb[10];
865 /* Convert one smb_ucs2_t character at a time. */
866 size_t mb_len = convert_string(CH_UCS2, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
867 if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
868 memcpy((char *)dest + dest_len, mb, mb_len);
869 dest_len += mb_len;
870 } else {
871 errno = E2BIG;
872 break;
875 ((char *)dest)[dest_len] = '\0';
877 SAFE_FREE(buffer);
878 conv_silent = False;
879 return dest_len;
883 * Copy a string from a dos codepage source to a unix char* destination.
885 * The resulting string in "dest" is always null terminated.
887 * @param flags can have:
888 * <dl>
889 * <dt>STR_TERMINATE</dt>
890 * <dd>STR_TERMINATE means the string in @p src
891 * is null terminated, and src_len is ignored.</dd>
892 * </dl>
894 * @param src_len is the length of the source area in bytes.
895 * @returns the number of bytes occupied by the string in @p src.
897 size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
899 size_t ret;
901 if (dest_len == (size_t)-1)
902 dest_len = sizeof(pstring);
904 if (flags & STR_TERMINATE) {
905 if (src_len == (size_t)-1) {
906 src_len = strlen(src) + 1;
907 } else {
908 size_t len = strnlen(src, src_len);
909 if (len < src_len)
910 len++;
911 src_len = len;
915 ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
916 if (ret == (size_t)-1) {
917 dest_len = 0;
920 if (dest_len)
921 dest[MIN(ret, dest_len-1)] = 0;
922 else
923 dest[0] = 0;
925 return src_len;
928 size_t pull_ascii_pstring(char *dest, const void *src)
930 return pull_ascii(dest, src, sizeof(pstring), -1, STR_TERMINATE);
933 size_t pull_ascii_fstring(char *dest, const void *src)
935 return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
938 /* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
940 size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src)
942 return pull_ascii(dest, src, dest_len, sizeof(nstring), STR_TERMINATE);
946 * Copy a string from a char* src to a unicode destination.
948 * @returns the number of bytes occupied by the string in the destination.
950 * @param flags can have:
952 * <dl>
953 * <dt>STR_TERMINATE <dd>means include the null termination.
954 * <dt>STR_UPPER <dd>means uppercase in the destination.
955 * <dt>STR_NOALIGN <dd>means don't do alignment.
956 * </dl>
958 * @param dest_len is the maximum length allowed in the
959 * destination. If dest_len is -1 then no maxiumum is used.
962 size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
964 size_t len=0;
965 size_t src_len;
966 size_t ret;
968 /* treat a pstring as "unlimited" length */
969 if (dest_len == (size_t)-1)
970 dest_len = sizeof(pstring);
972 if (flags & STR_TERMINATE)
973 src_len = (size_t)-1;
974 else
975 src_len = strlen(src);
977 if (ucs2_align(base_ptr, dest, flags)) {
978 *(char *)dest = 0;
979 dest = (void *)((char *)dest + 1);
980 if (dest_len)
981 dest_len--;
982 len++;
985 /* ucs2 is always a multiple of 2 bytes */
986 dest_len &= ~1;
988 ret = convert_string(CH_UNIX, CH_UCS2, src, src_len, dest, dest_len, True);
989 if (ret == (size_t)-1) {
990 return 0;
993 len += ret;
995 if (flags & STR_UPPER) {
996 smb_ucs2_t *dest_ucs2 = dest;
997 size_t i;
998 for (i = 0; i < (dest_len / 2) && dest_ucs2[i]; i++) {
999 smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1000 if (v != dest_ucs2[i]) {
1001 dest_ucs2[i] = v;
1006 return len;
1011 * Copy a string from a unix char* src to a UCS2 destination,
1012 * allocating a buffer using talloc().
1014 * @param dest always set at least to NULL
1016 * @returns The number of bytes occupied by the string in the destination
1017 * or -1 in case of error.
1019 size_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
1021 size_t src_len = strlen(src)+1;
1023 *dest = NULL;
1024 return convert_string_talloc(ctx, CH_UNIX, CH_UCS2, src, src_len, (void **)dest, True);
1029 * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1031 * @param dest always set at least to NULL
1033 * @returns The number of bytes occupied by the string in the destination
1034 * or -1 in case of error.
1037 size_t push_ucs2_allocate(smb_ucs2_t **dest, const char *src)
1039 size_t src_len = strlen(src)+1;
1041 *dest = NULL;
1042 return convert_string_allocate(NULL, CH_UNIX, CH_UCS2, src, src_len, (void **)dest, True);
1046 Copy a string from a char* src to a UTF-8 destination.
1047 Return the number of bytes occupied by the string in the destination
1048 Flags can have:
1049 STR_TERMINATE means include the null termination
1050 STR_UPPER means uppercase in the destination
1051 dest_len is the maximum length allowed in the destination. If dest_len
1052 is -1 then no maxiumum is used.
1055 static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
1057 size_t src_len = strlen(src);
1058 pstring tmpbuf;
1060 /* treat a pstring as "unlimited" length */
1061 if (dest_len == (size_t)-1)
1062 dest_len = sizeof(pstring);
1064 if (flags & STR_UPPER) {
1065 pstrcpy(tmpbuf, src);
1066 strupper_m(tmpbuf);
1067 src = tmpbuf;
1070 if (flags & STR_TERMINATE)
1071 src_len++;
1073 return convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1076 size_t push_utf8_fstring(void *dest, const char *src)
1078 return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1082 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1084 * @param dest always set at least to NULL
1086 * @returns The number of bytes occupied by the string in the destination
1089 size_t push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1091 size_t src_len = strlen(src)+1;
1093 *dest = NULL;
1094 return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest, True);
1098 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1100 * @param dest always set at least to NULL
1102 * @returns The number of bytes occupied by the string in the destination
1105 size_t push_utf8_allocate(char **dest, const char *src)
1107 size_t src_len = strlen(src)+1;
1109 *dest = NULL;
1110 return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, True);
1114 Copy a string from a ucs2 source to a unix char* destination.
1115 Flags can have:
1116 STR_TERMINATE means the string in src is null terminated.
1117 STR_NOALIGN means don't try to align.
1118 if STR_TERMINATE is set then src_len is ignored if it is -1.
1119 src_len is the length of the source area in bytes
1120 Return the number of bytes occupied by the string in src.
1121 The resulting string in "dest" is always null terminated.
1124 size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1126 size_t ret;
1128 if (dest_len == (size_t)-1)
1129 dest_len = sizeof(pstring);
1131 if (ucs2_align(base_ptr, src, flags)) {
1132 src = (const void *)((const char *)src + 1);
1133 if (src_len != (size_t)-1)
1134 src_len--;
1137 if (flags & STR_TERMINATE) {
1138 /* src_len -1 is the default for null terminated strings. */
1139 if (src_len != (size_t)-1) {
1140 size_t len = strnlen_w(src, src_len/2);
1141 if (len < src_len/2)
1142 len++;
1143 src_len = len*2;
1147 /* ucs2 is always a multiple of 2 bytes */
1148 if (src_len != (size_t)-1)
1149 src_len &= ~1;
1151 ret = convert_string(CH_UCS2, CH_UNIX, src, src_len, dest, dest_len, True);
1152 if (ret == (size_t)-1) {
1153 return 0;
1156 if (src_len == (size_t)-1)
1157 src_len = ret*2;
1159 if (dest_len)
1160 dest[MIN(ret, dest_len-1)] = 0;
1161 else
1162 dest[0] = 0;
1164 return src_len;
1167 size_t pull_ucs2_pstring(char *dest, const void *src)
1169 return pull_ucs2(NULL, dest, src, sizeof(pstring), -1, STR_TERMINATE);
1172 size_t pull_ucs2_fstring(char *dest, const void *src)
1174 return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1178 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1180 * @param dest always set at least to NULL
1182 * @returns The number of bytes occupied by the string in the destination
1185 size_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src)
1187 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1188 *dest = NULL;
1189 return convert_string_talloc(ctx, CH_UCS2, CH_UNIX, src, src_len, (void **)dest, True);
1193 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1195 * @param dest always set at least to NULL
1197 * @returns The number of bytes occupied by the string in the destination
1200 size_t pull_ucs2_allocate(char **dest, const smb_ucs2_t *src)
1202 size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1203 *dest = NULL;
1204 return convert_string_allocate(NULL, CH_UCS2, CH_UNIX, src, src_len, (void **)dest, True);
1208 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1210 * @param dest always set at least to NULL
1212 * @returns The number of bytes occupied by the string in the destination
1215 size_t pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1217 size_t src_len = strlen(src)+1;
1218 *dest = NULL;
1219 return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1223 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1225 * @param dest always set at least to NULL
1227 * @returns The number of bytes occupied by the string in the destination
1230 size_t pull_utf8_allocate(char **dest, const char *src)
1232 size_t src_len = strlen(src)+1;
1233 *dest = NULL;
1234 return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1238 Copy a string from a char* src to a unicode or ascii
1239 dos codepage destination choosing unicode or ascii based on the
1240 flags in the SMB buffer starting at base_ptr.
1241 Return the number of bytes occupied by the string in the destination.
1242 flags can have:
1243 STR_TERMINATE means include the null termination.
1244 STR_UPPER means uppercase in the destination.
1245 STR_ASCII use ascii even with unicode packet.
1246 STR_NOALIGN means don't do alignment.
1247 dest_len is the maximum length allowed in the destination. If dest_len
1248 is -1 then no maxiumum is used.
1251 size_t push_string_fn(const char *function, unsigned int line, const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1253 #ifdef DEVELOPER
1254 /* We really need to zero fill here, not clobber
1255 * region, as we want to ensure that valgrind thinks
1256 * all of the outgoing buffer has been written to
1257 * so a send() or write() won't trap an error.
1258 * JRA.
1260 #if 0
1261 if (dest_len != (size_t)-1)
1262 clobber_region(function, line, dest, dest_len);
1263 #else
1264 if (dest_len != (size_t)-1)
1265 memset(dest, '\0', dest_len);
1266 #endif
1267 #endif
1269 if (!(flags & STR_ASCII) && \
1270 ((flags & STR_UNICODE || \
1271 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1272 return push_ucs2(base_ptr, dest, src, dest_len, flags);
1274 return push_ascii(dest, src, dest_len, flags);
1279 Copy a string from a unicode or ascii source (depending on
1280 the packet flags) to a char* destination.
1281 Flags can have:
1282 STR_TERMINATE means the string in src is null terminated.
1283 STR_UNICODE means to force as unicode.
1284 STR_ASCII use ascii even with unicode packet.
1285 STR_NOALIGN means don't do alignment.
1286 if STR_TERMINATE is set then src_len is ignored is it is -1
1287 src_len is the length of the source area in bytes.
1288 Return the number of bytes occupied by the string in src.
1289 The resulting string in "dest" is always null terminated.
1292 size_t pull_string_fn(const char *function, unsigned int line, const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1294 #ifdef DEVELOPER
1295 if (dest_len != (size_t)-1)
1296 clobber_region(function, line, dest, dest_len);
1297 #endif
1299 if (!(flags & STR_ASCII) && \
1300 ((flags & STR_UNICODE || \
1301 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1302 return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1304 return pull_ascii(dest, src, dest_len, src_len, flags);
1307 size_t align_string(const void *base_ptr, const char *p, int flags)
1309 if (!(flags & STR_ASCII) && \
1310 ((flags & STR_UNICODE || \
1311 (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1312 return ucs2_align(base_ptr, p, flags);
1314 return 0;