merging for 2.2.6pre1
[Samba.git] / source / lib / util_unistr.c
blobefad8df1ea82b4edd38a6798a16794b3e5bc8e1e
1 /*
2 Unix SMB/Netbios implementation.
3 Version 1.9.
4 Samba utility functions
5 Copyright (C) Andrew Tridgell 1992-1998
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 #include "includes.h"
24 smb_ucs2_t wchar_list_sep[] = { (smb_ucs2_t)' ', (smb_ucs2_t)'\t', (smb_ucs2_t)',',
25 (smb_ucs2_t)';', (smb_ucs2_t)':', (smb_ucs2_t)'\n',
26 (smb_ucs2_t)'\r', 0 };
28 * The following are the codepage to ucs2 and vica versa maps.
29 * These are dynamically loaded from a unicode translation file.
32 static smb_ucs2_t *doscp_to_ucs2;
33 static uint16 *ucs2_to_doscp;
35 static smb_ucs2_t *unixcp_to_ucs2;
36 static uint16 *ucs2_to_unixcp;
38 #ifndef MAXUNI
39 #define MAXUNI 1024
40 #endif
42 /*******************************************************************
43 Write a string in (little-endian) unicode format. src is in
44 the current UNIX character set. len is the length in bytes of the
45 string pointed to by dst.
47 if null_terminate is True then null terminate the packet (adds 2 bytes)
49 the return value is the length in bytes consumed by the string, including the
50 null termination if applied
51 ********************************************************************/
53 size_t unix_PutUniCode(char *dst,const char *src, ssize_t len, BOOL null_terminate)
55 size_t ret = 0;
56 while (*src && (len >= 2)) {
57 size_t skip = get_character_len(*src);
58 smb_ucs2_t val = (*src & 0xff);
61 * If this is a multibyte character (and all DOS/Windows
62 * codepages have at maximum 2 byte multibyte characters)
63 * then work out the index value for the unicode conversion.
66 if (skip == 2)
67 val = ((val << 8) | (src[1] & 0xff));
69 SSVAL(dst,ret,unixcp_to_ucs2[val]);
70 ret += 2;
71 len -= 2;
72 if (skip)
73 src += skip;
74 else
75 src++;
77 if (null_terminate) {
78 SSVAL(dst,ret,0);
79 ret += 2;
81 return(ret);
84 /*******************************************************************
85 Write a string in (little-endian) unicode format. src is in
86 the current DOS codepage. len is the length in bytes of the
87 string pointed to by dst.
89 if null_terminate is True then null terminate the packet (adds 2 bytes)
91 the return value is the length in bytes consumed by the string, including the
92 null termination if applied
93 ********************************************************************/
95 size_t dos_PutUniCode(char *dst,const char *src, ssize_t len, BOOL null_terminate)
97 size_t ret = 0;
98 while (*src && (len >= 2)) {
99 size_t skip = get_character_len(*src);
100 smb_ucs2_t val = (*src & 0xff);
103 * If this is a multibyte character (and all DOS/Windows
104 * codepages have at maximum 2 byte multibyte characters)
105 * then work out the index value for the unicode conversion.
108 if (skip == 2)
109 val = ((val << 8) | (src[1] & 0xff));
111 SSVAL(dst,ret,doscp_to_ucs2[val]);
112 ret += 2;
113 len -= 2;
114 if (skip)
115 src += skip;
116 else
117 src++;
119 if (null_terminate) {
120 SSVAL(dst,ret,0);
121 ret += 2;
123 return(ret);
126 /*******************************************************************
127 Pull a DOS codepage string out of a UNICODE array. len is in bytes.
128 ********************************************************************/
130 void unistr_to_dos(char *dest, const char *src, size_t len)
132 char *destend = dest + len;
134 while (dest < destend) {
135 uint16 ucs2_val = SVAL(src,0);
136 uint16 cp_val = ucs2_to_doscp[ucs2_val];
138 src += 2;
140 if (ucs2_val == 0)
141 break;
143 if (cp_val < 256)
144 *dest++ = (char)cp_val;
145 else {
146 *dest++ = (cp_val >> 8) & 0xff;
147 *dest++ = (cp_val & 0xff);
151 *dest = 0;
154 /*******************************************************************
155 Skip past a unicode string, but not more than len. Always move
156 past a terminating zero if found.
157 ********************************************************************/
159 char *skip_unibuf(char *src, size_t len)
161 char *srcend = src + len;
163 while (src < srcend && SVAL(src,0))
164 src += 2;
166 if(!SVAL(src,0))
167 src += 2;
169 return src;
172 /*******************************************************************
173 Return a DOS codepage version of a little-endian unicode string.
174 len is the filename length (ignoring any terminating zero) in uin16
175 units. Always null terminates.
176 Hack alert: uses fixed buffer(s).
177 ********************************************************************/
179 char *dos_unistrn2(uint16 *src, int len)
181 static char lbufs[8][MAXUNI];
182 static int nexti;
183 char *lbuf = lbufs[nexti];
184 char *p;
186 nexti = (nexti+1)%8;
188 for (p = lbuf; (len > 0) && (p-lbuf < MAXUNI-3) && *src; len--, src++) {
189 uint16 ucs2_val = SVAL(src,0);
190 uint16 cp_val = ucs2_to_doscp[ucs2_val];
192 if (cp_val < 256)
193 *p++ = (char)cp_val;
194 else {
195 *p++ = (cp_val >> 8) & 0xff;
196 *p++ = (cp_val & 0xff);
200 *p = 0;
201 return lbuf;
204 static char lbufs[8][MAXUNI];
205 static int nexti;
207 /*******************************************************************
208 Return a DOS codepage version of a little-endian unicode string.
209 Hack alert: uses fixed buffer(s).
210 ********************************************************************/
212 char *dos_unistr2(uint16 *src)
214 char *lbuf = lbufs[nexti];
215 char *p;
217 nexti = (nexti+1)%8;
219 for (p = lbuf; (p-lbuf < MAXUNI-3) && *src; src++) {
220 uint16 ucs2_val = SVAL(src,0);
221 uint16 cp_val = ucs2_to_doscp[ucs2_val];
223 if (cp_val < 256)
224 *p++ = (char)cp_val;
225 else {
226 *p++ = (cp_val >> 8) & 0xff;
227 *p++ = (cp_val & 0xff);
231 *p = 0;
232 return lbuf;
235 /*******************************************************************
236 Return a DOS codepage version of a little-endian unicode string
237 ********************************************************************/
239 char *dos_unistr2_to_str(UNISTR2 *str)
241 char *lbuf = lbufs[nexti];
242 char *p;
243 uint16 *src = str->buffer;
245 nexti = (nexti+1)%8;
247 for (p = lbuf; (p - lbuf < MAXUNI-3) && (src - str->buffer < str->uni_str_len) && *src; src++) {
248 uint16 ucs2_val = SVAL(src,0);
249 uint16 cp_val = ucs2_to_doscp[ucs2_val];
251 if (cp_val < 256)
252 *p++ = (char)cp_val;
253 else {
254 *p++ = (cp_val >> 8) & 0xff;
255 *p++ = (cp_val & 0xff);
259 *p = 0;
260 return lbuf;
263 /*******************************************************************
264 Put an ASCII string into a UNICODE array (uint16's).
265 use little-endian ucs2
266 ********************************************************************/
267 void ascii_to_unistr(uint16 *dest, const char *src, int maxlen)
269 uint16 *destend = dest + maxlen;
270 char c;
272 while (dest < destend) {
273 c = *(src++);
274 if (c == 0)
275 break;
277 SSVAL(dest, 0, c);
278 dest++;
281 *dest = 0;
284 /*******************************************************************
285 Pull an ASCII string out of a UNICODE array (uint16's).
286 ********************************************************************/
288 void unistr_to_ascii(char *dest, const uint16 *src, int len)
290 char *destend = dest + len;
291 uint16 c;
293 if (src == NULL) {
294 *dest = '\0';
295 return;
298 /* normal code path for a valid 'src' */
299 while (dest < destend) {
300 c = SVAL(src, 0);
301 src++;
302 if (c == 0)
303 break;
305 *(dest++) = (char)c;
308 *dest = 0;
309 return;
312 /*******************************************************************
313 Convert a (little-endian) UNISTR2 structure to an ASCII string, either
314 DOS or UNIX codepage.
315 ********************************************************************/
317 static void unistr2_to_mbcp(char *dest, const UNISTR2 *str, size_t maxlen, uint16 *ucs2_to_mbcp)
319 char *p;
320 uint16 *src;
321 size_t len;
323 if (str == NULL) {
324 *dest='\0';
325 return;
328 src = str->buffer;
330 len = MIN(str->uni_str_len, maxlen);
331 if (len == 0) {
332 *dest='\0';
333 return;
336 for (p = dest; (p-dest < maxlen-3) && (src - str->buffer < str->uni_str_len) && *src; src++) {
337 uint16 ucs2_val = SVAL(src,0);
338 uint16 cp_val = ucs2_to_mbcp[ucs2_val];
340 if (cp_val < 256)
341 *p++ = (char)cp_val;
342 else {
343 *p++ = (cp_val >> 8) & 0xff;
344 *p++ = (cp_val & 0xff);
348 *p = 0;
351 /*******************************************************************
352 Convert a (little-endian) UNISTR2 structure to an ASCII string
353 Warning: this version does DOS codepage.
354 ********************************************************************/
356 void unistr2_to_dos(char *dest, const UNISTR2 *str, size_t maxlen)
358 unistr2_to_mbcp(dest, str, maxlen, ucs2_to_doscp);
361 /*******************************************************************
362 Convert a (little-endian) UNISTR2 structure to an ASCII string
363 Warning: this version does UNIX codepage.
364 ********************************************************************/
366 void unistr2_to_unix(char *dest, const UNISTR2 *str, size_t maxlen)
368 unistr2_to_mbcp(dest, str, maxlen, ucs2_to_unixcp);
371 /*******************************************************************
372 Return a number stored in a buffer
373 ********************************************************************/
375 uint32 buffer2_to_uint32(BUFFER2 *str)
377 if (str->buf_len == 4)
378 return IVAL(str->buffer, 0);
379 else
380 return 0;
383 /*******************************************************************
384 Return a DOS codepage version of a NOTunicode string
385 ********************************************************************/
387 char *dos_buffer2_to_str(BUFFER2 *str)
389 char *lbuf = lbufs[nexti];
390 char *p;
391 uint16 *src = str->buffer;
393 nexti = (nexti+1)%8;
395 for (p = lbuf; (p - lbuf < sizeof(str->buffer)-3) && (src - str->buffer < str->buf_len/2) && *src; src++) {
396 uint16 ucs2_val = SVAL(src,0);
397 uint16 cp_val = ucs2_to_doscp[ucs2_val];
399 if (cp_val < 256)
400 *p++ = (char)cp_val;
401 else {
402 *p++ = (cp_val >> 8) & 0xff;
403 *p++ = (cp_val & 0xff);
407 *p = 0;
408 return lbuf;
411 /*******************************************************************
412 Return a dos codepage version of a NOTunicode string
413 ********************************************************************/
415 char *dos_buffer2_to_multistr(BUFFER2 *str)
417 char *lbuf = lbufs[nexti];
418 char *p;
419 uint16 *src = str->buffer;
421 nexti = (nexti+1)%8;
423 for (p = lbuf; (p - lbuf < sizeof(str->buffer)-3) && (src - str->buffer < str->buf_len/2); src++) {
424 if (*src == 0) {
425 *p++ = ' ';
426 } else {
427 uint16 ucs2_val = SVAL(src,0);
428 uint16 cp_val = ucs2_to_doscp[ucs2_val];
430 if (cp_val < 256)
431 *p++ = (char)cp_val;
432 else {
433 *p++ = (cp_val >> 8) & 0xff;
434 *p++ = (cp_val & 0xff);
439 *p = 0;
440 return lbuf;
443 /*******************************************************************
444 Create a null-terminated unicode string from a null-terminated DOS
445 codepage string.
446 Return number of unicode chars copied, excluding the null character.
447 Unicode strings created are in little-endian format.
448 ********************************************************************/
450 size_t dos_struni2(char *dst, const char *src, size_t max_len)
452 size_t len = 0;
454 if (dst == NULL)
455 return 0;
457 if (src != NULL) {
458 for (; (len < max_len-2) && *src; len++, dst +=2) {
459 size_t skip = get_character_len(*src);
460 smb_ucs2_t val = (*src & 0xff);
463 * If this is a multibyte character (and all DOS/Windows
464 * codepages have at maximum 2 byte multibyte characters)
465 * then work out the index value for the unicode conversion.
468 if (skip == 2)
469 val = ((val << 8) | (src[1] & 0xff));
471 SSVAL(dst,0,doscp_to_ucs2[val]);
472 if (skip)
473 src += skip;
474 else
475 src++;
479 SSVAL(dst,0,0);
481 return len;
484 /*******************************************************************
485 Return a DOS codepage version of a little-endian unicode string.
486 Hack alert: uses fixed buffer(s).
487 ********************************************************************/
489 char *dos_unistr(char *buf)
491 char *lbuf = lbufs[nexti];
492 uint16 *src = (uint16 *)buf;
493 char *p;
495 nexti = (nexti+1)%8;
497 for (p = lbuf; (p-lbuf < MAXUNI-3) && *src; src++) {
498 uint16 ucs2_val = SVAL(src,0);
499 uint16 cp_val = ucs2_to_doscp[ucs2_val];
501 if (cp_val < 256)
502 *p++ = (char)cp_val;
503 else {
504 *p++ = (cp_val >> 8) & 0xff;
505 *p++ = (cp_val & 0xff);
509 *p = 0;
510 return lbuf;
513 /*******************************************************************
514 returns the length in number of wide characters
515 ******************************************************************/
516 int unistrlen(uint16 *s)
518 int len;
520 if (!s)
521 return -1;
523 for (len=0; *s; s++,len++);
525 return len;
528 /*******************************************************************
529 Strcpy for unicode strings. returns length (in num of wide chars)
530 ********************************************************************/
532 int unistrcpy(uint16 *dst, uint16 *src)
534 int num_wchars = 0;
536 while (*src) {
537 *dst++ = *src++;
538 num_wchars++;
540 *dst = 0;
542 return num_wchars;
545 /*******************************************************************
546 Free any existing maps.
547 ********************************************************************/
549 static void free_maps(smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
551 /* this handles identity mappings where we share the pointer */
552 if (*pp_ucs2_to_cp == *pp_cp_to_ucs2) {
553 *pp_ucs2_to_cp = NULL;
556 SAFE_FREE(*pp_cp_to_ucs2);
557 SAFE_FREE(*pp_ucs2_to_cp);
560 /*******************************************************************
561 Build a default (null) codepage to unicode map.
562 ********************************************************************/
564 void default_unicode_map(smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
566 int i;
568 free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp);
570 if ((*pp_ucs2_to_cp = (uint16 *)malloc(2*65536)) == NULL) {
571 DEBUG(0,("default_unicode_map: malloc fail for ucs2_to_cp size %u.\n", 2*65536));
572 abort();
575 *pp_cp_to_ucs2 = *pp_ucs2_to_cp; /* Default map is an identity. */
576 for (i = 0; i < 65536; i++)
577 (*pp_cp_to_ucs2)[i] = i;
580 /*******************************************************************
581 Load a codepage to unicode and vica-versa map.
582 ********************************************************************/
584 BOOL load_unicode_map(const char *codepage, smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
586 pstring unicode_map_file_name;
587 FILE *fp = NULL;
588 SMB_STRUCT_STAT st;
589 smb_ucs2_t *cp_to_ucs2 = *pp_cp_to_ucs2;
590 uint16 *ucs2_to_cp = *pp_ucs2_to_cp;
591 size_t cp_to_ucs2_size;
592 size_t ucs2_to_cp_size;
593 size_t i;
594 size_t size;
595 char buf[UNICODE_MAP_HEADER_SIZE];
597 DEBUG(5, ("load_unicode_map: loading unicode map for codepage %s.\n", codepage));
599 if (*codepage == '\0')
600 goto clean_and_exit;
602 if(strlen(lp_codepagedir()) + 13 + strlen(codepage) >
603 sizeof(unicode_map_file_name)) {
604 DEBUG(0,("load_unicode_map: filename too long to load\n"));
605 goto clean_and_exit;
608 pstrcpy(unicode_map_file_name, lp_codepagedir());
609 pstrcat(unicode_map_file_name, "/");
610 pstrcat(unicode_map_file_name, "unicode_map.");
611 pstrcat(unicode_map_file_name, codepage);
613 if(sys_stat(unicode_map_file_name,&st)!=0) {
614 DEBUG(0,("load_unicode_map: filename %s does not exist.\n",
615 unicode_map_file_name));
616 goto clean_and_exit;
619 size = st.st_size;
621 if ((size != UNICODE_MAP_HEADER_SIZE + 4*65536) && (size != UNICODE_MAP_HEADER_SIZE +(2*256 + 2*65536))) {
622 DEBUG(0,("load_unicode_map: file %s is an incorrect size for a \
623 unicode map file (size=%d).\n", unicode_map_file_name, (int)size));
624 goto clean_and_exit;
627 if((fp = sys_fopen( unicode_map_file_name, "r")) == NULL) {
628 DEBUG(0,("load_unicode_map: cannot open file %s. Error was %s\n",
629 unicode_map_file_name, strerror(errno)));
630 goto clean_and_exit;
633 if(fread( buf, 1, UNICODE_MAP_HEADER_SIZE, fp)!=UNICODE_MAP_HEADER_SIZE) {
634 DEBUG(0,("load_unicode_map: cannot read header from file %s. Error was %s\n",
635 unicode_map_file_name, strerror(errno)));
636 goto clean_and_exit;
639 /* Check the version value */
640 if(SVAL(buf,UNICODE_MAP_VERSION_OFFSET) != UNICODE_MAP_FILE_VERSION_ID) {
641 DEBUG(0,("load_unicode_map: filename %s has incorrect version id. \
642 Needed %hu, got %hu.\n",
643 unicode_map_file_name, (uint16)UNICODE_MAP_FILE_VERSION_ID,
644 SVAL(buf,UNICODE_MAP_VERSION_OFFSET)));
645 goto clean_and_exit;
648 /* Check the codepage value */
649 if(!strequal(&buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET], codepage)) {
650 DEBUG(0,("load_unicode_map: codepage %s in file %s is not the same as that \
651 requested (%s).\n", &buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET], unicode_map_file_name, codepage ));
652 goto clean_and_exit;
655 ucs2_to_cp_size = 2*65536;
656 if (size == UNICODE_MAP_HEADER_SIZE + 4*65536) {
658 * This is a multibyte code page.
660 cp_to_ucs2_size = 2*65536;
661 } else {
663 * Single byte code page.
665 cp_to_ucs2_size = 2*256;
669 * Free any old translation tables.
672 free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp);
674 if ((cp_to_ucs2 = (smb_ucs2_t *)malloc(cp_to_ucs2_size)) == NULL) {
675 DEBUG(0,("load_unicode_map: malloc fail for cp_to_ucs2 size %u.\n", cp_to_ucs2_size ));
676 goto clean_and_exit;
679 if ((ucs2_to_cp = (uint16 *)malloc(ucs2_to_cp_size)) == NULL) {
680 DEBUG(0,("load_unicode_map: malloc fail for ucs2_to_cp size %u.\n", ucs2_to_cp_size ));
681 goto clean_and_exit;
684 if(fread( (char *)cp_to_ucs2, 1, cp_to_ucs2_size, fp)!=cp_to_ucs2_size) {
685 DEBUG(0,("load_unicode_map: cannot read cp_to_ucs2 from file %s. Error was %s\n",
686 unicode_map_file_name, strerror(errno)));
687 goto clean_and_exit;
690 if(fread( (char *)ucs2_to_cp, 1, ucs2_to_cp_size, fp)!=ucs2_to_cp_size) {
691 DEBUG(0,("load_unicode_map: cannot read ucs2_to_cp from file %s. Error was %s\n",
692 unicode_map_file_name, strerror(errno)));
693 goto clean_and_exit;
697 * Now ensure the 16 bit values are in the correct endianness.
700 for (i = 0; i < cp_to_ucs2_size/2; i++)
701 cp_to_ucs2[i] = SVAL(cp_to_ucs2,i*2);
703 for (i = 0; i < ucs2_to_cp_size/2; i++)
704 ucs2_to_cp[i] = SVAL(ucs2_to_cp,i*2);
706 fclose(fp);
708 *pp_cp_to_ucs2 = cp_to_ucs2;
709 *pp_ucs2_to_cp = ucs2_to_cp;
711 return True;
713 clean_and_exit:
715 /* pseudo destructor :-) */
717 if(fp != NULL)
718 fclose(fp);
720 free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp);
722 default_unicode_map(pp_cp_to_ucs2, pp_ucs2_to_cp);
724 return False;
727 /*******************************************************************
728 Load a dos codepage to unicode and vica-versa map.
729 ********************************************************************/
731 BOOL load_dos_unicode_map(int codepage)
733 fstring codepage_str;
735 slprintf(codepage_str, sizeof(fstring)-1, "%03d", codepage);
736 DEBUG(10,("load_dos_unicode_map: %s\n", codepage_str));
737 return load_unicode_map(codepage_str, &doscp_to_ucs2, &ucs2_to_doscp);
740 /*******************************************************************
741 Load a UNIX codepage to unicode and vica-versa map.
742 ********************************************************************/
744 BOOL load_unix_unicode_map(const char *unix_char_set, BOOL override)
746 static BOOL init_done;
747 fstring upper_unix_char_set;
749 fstrcpy(upper_unix_char_set, unix_char_set);
750 strupper(upper_unix_char_set);
752 DEBUG(10,("load_unix_unicode_map: %s (init_done=%d, override=%d)\n",
753 upper_unix_char_set, (int)init_done, (int)override ));
755 if (!init_done)
756 init_done = True;
757 else if (!override)
758 return True;
760 return load_unicode_map(upper_unix_char_set, &unixcp_to_ucs2, &ucs2_to_unixcp);
763 /*******************************************************************
764 The following functions reproduce many of the non-UNICODE standard
765 string functions in Samba.
766 ********************************************************************/
768 /*******************************************************************
769 Convert a UNICODE string to multibyte format. Note that the 'src' is in
770 native byte order, not little endian. Always zero terminates.
771 dst_len is in bytes.
772 ********************************************************************/
774 static char *unicode_to_multibyte(char *dst, const smb_ucs2_t *src,
775 size_t dst_len, const uint16 *ucs2_to_cp)
777 size_t dst_pos;
779 for(dst_pos = 0; (dst_pos < dst_len - 1) && *src;) {
780 smb_ucs2_t val = ucs2_to_cp[*src++];
781 if(val < 256) {
782 dst[dst_pos++] = (char)val;
783 } else {
785 if(dst_pos >= dst_len - 2)
786 break;
789 * A 2 byte value is always written as
790 * high/low into the buffer stream.
793 dst[dst_pos++] = (char)((val >> 8) & 0xff);
794 dst[dst_pos++] = (char)(val & 0xff);
798 dst[dst_pos] = '\0';
800 return dst;
803 /*******************************************************************
804 Convert a multibyte string to UNICODE format. Note that the 'dst' is in
805 native byte order, not little endian. Always zero terminates.
806 dst_len is in bytes.
807 ********************************************************************/
809 smb_ucs2_t *multibyte_to_unicode(smb_ucs2_t *dst, const char *src,
810 size_t dst_len, smb_ucs2_t *cp_to_ucs2)
812 size_t i;
814 dst_len /= sizeof(smb_ucs2_t); /* Convert to smb_ucs2_t units. */
816 for(i = 0; (i < (dst_len - 1)) && *src;) {
817 size_t skip = skip_multibyte_char(*src);
818 smb_ucs2_t val = (*src & 0xff);
821 * If this is a multibyte character
822 * then work out the index value for the unicode conversion.
825 if (skip == 2)
826 val = ((val << 8) | (src[1] & 0xff));
828 dst[i++] = cp_to_ucs2[val];
829 if (skip)
830 src += skip;
831 else
832 src++;
835 dst[i] = 0;
837 return dst;
840 /*******************************************************************
841 Convert a UNICODE string to multibyte format. Note that the 'src' is in
842 native byte order, not little endian. Always zero terminates.
843 This function may be replaced if the MB codepage format is an
844 encoded one (ie. utf8, hex). See the code in lib/kanji.c
845 for details. dst_len is in bytes.
846 ********************************************************************/
848 char *unicode_to_unix(char *dst, const smb_ucs2_t *src, size_t dst_len)
850 return unicode_to_multibyte(dst, src, dst_len, ucs2_to_unixcp);
853 /*******************************************************************
854 Convert a UNIX string to UNICODE format. Note that the 'dst' is in
855 native byte order, not little endian. Always zero terminates.
856 This function may be replaced if the UNIX codepage format is a
857 multi-byte one (ie. JIS, SJIS or utf8). See the code in lib/kanji.c
858 for details. dst_len is in bytes, not ucs2 units.
859 ********************************************************************/
861 smb_ucs2_t *unix_to_unicode(smb_ucs2_t *dst, const char *src, size_t dst_len)
863 return multibyte_to_unicode(dst, src, dst_len, unixcp_to_ucs2);
866 /*******************************************************************
867 Convert a single UNICODE character to unix character. Returns the
868 number of bytes in the unix character.
869 ********************************************************************/
871 size_t unicode_to_unix_char(char *dst, const smb_ucs2_t src)
873 smb_ucs2_t val = ucs2_to_unixcp[src];
874 if(val < 256) {
875 *dst = (char)val;
876 return (size_t)1;
879 * A 2 byte value is always written as
880 * high/low into the buffer stream.
883 dst[0] = (char)((val >> 8) & 0xff);
884 dst[1] = (char)(val & 0xff);
885 return (size_t)2;
888 /*******************************************************************
889 Convert a UNICODE string to DOS format. Note that the 'src' is in
890 native byte order, not little endian. Always zero terminates.
891 dst_len is in bytes.
892 ********************************************************************/
894 char *unicode_to_dos(char *dst, const smb_ucs2_t *src, size_t dst_len)
896 return unicode_to_multibyte(dst, src, dst_len, ucs2_to_doscp);
899 /*******************************************************************
900 Convert a single UNICODE character to DOS codepage. Returns the
901 number of bytes in the DOS codepage character.
902 ********************************************************************/
904 size_t unicode_to_dos_char(char *dst, const smb_ucs2_t src)
906 smb_ucs2_t val = ucs2_to_doscp[src];
907 if(val < 256) {
908 *dst = (char)val;
909 return (size_t)1;
912 * A 2 byte value is always written as
913 * high/low into the buffer stream.
916 dst[0] = (char)((val >> 8) & 0xff);
917 dst[1] = (char)(val & 0xff);
918 return (size_t)2;
921 /*******************************************************************
922 Convert a DOS string to UNICODE format. Note that the 'dst' is in
923 native byte order, not little endian. Always zero terminates.
924 This function may be replaced if the DOS codepage format is a
925 multi-byte one (ie. JIS, SJIS or utf8). See the code in lib/kanji.c
926 for details. dst_len is in bytes, not ucs2 units.
927 ********************************************************************/
929 smb_ucs2_t *dos_to_unicode(smb_ucs2_t *dst, const char *src, size_t dst_len)
931 return multibyte_to_unicode(dst, src, dst_len, doscp_to_ucs2);
934 /*******************************************************************
935 Count the number of characters in a smb_ucs2_t string.
936 ********************************************************************/
938 size_t strlen_w(const smb_ucs2_t *src)
940 size_t len;
942 for(len = 0; *src++; len++)
945 return len;
948 /*******************************************************************
949 Safe wstring copy into a known length string. maxlength includes
950 the terminating zero. maxlength is in ucs2 units.
951 ********************************************************************/
953 smb_ucs2_t *safe_strcpy_w(smb_ucs2_t *dest,const smb_ucs2_t *src, size_t maxlength)
955 size_t ucs2_len;
957 if (!dest) {
958 DEBUG(0,("ERROR: NULL dest in safe_strcpy_w\n"));
959 return NULL;
962 if (!src) {
963 *dest = 0;
964 return dest;
967 maxlength /= sizeof(smb_ucs2_t);
969 ucs2_len = strlen_w(src);
971 if (ucs2_len >= maxlength) {
972 fstring out;
973 DEBUG(0,("ERROR: string overflow by %u bytes in safe_strcpy_w [%.50s]\n",
974 (unsigned int)((ucs2_len-maxlength)*sizeof(smb_ucs2_t)),
975 unicode_to_unix(out,src,sizeof(out))) );
976 ucs2_len = maxlength - 1;
979 memcpy(dest, src, ucs2_len*sizeof(smb_ucs2_t));
980 dest[ucs2_len] = 0;
981 return dest;
984 /*******************************************************************
985 Safe string cat into a string. maxlength includes the terminating zero.
986 maxlength is in ucs2 units.
987 ********************************************************************/
989 smb_ucs2_t *safe_strcat_w(smb_ucs2_t *dest, const smb_ucs2_t *src, size_t maxlength)
991 size_t ucs2_src_len, ucs2_dest_len;
993 if (!dest) {
994 DEBUG(0,("ERROR: NULL dest in safe_strcat_w\n"));
995 return NULL;
998 if (!src)
999 return dest;
1001 ucs2_src_len = strlen_w(src);
1002 ucs2_dest_len = strlen_w(dest);
1004 if (ucs2_src_len + ucs2_dest_len >= maxlength) {
1005 fstring out;
1006 int new_len = maxlength - ucs2_dest_len - 1;
1007 DEBUG(0,("ERROR: string overflow by %u characters in safe_strcat_w [%.50s]\n",
1008 (unsigned int)(sizeof(smb_ucs2_t)*(ucs2_src_len + ucs2_dest_len - maxlength)),
1009 unicode_to_unix(out,src,sizeof(out))) );
1010 ucs2_src_len = (size_t)(new_len > 0 ? new_len : 0);
1013 memcpy(&dest[ucs2_dest_len], src, ucs2_src_len*sizeof(smb_ucs2_t));
1014 dest[ucs2_dest_len + ucs2_src_len] = 0;
1015 return dest;
1018 /*******************************************************************
1019 Compare the two strings s1 and s2.
1020 ********************************************************************/
1022 int strcmp_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2)
1024 smb_ucs2_t c1, c2;
1026 for (;;) {
1027 c1 = *s1++;
1028 c2 = *s2++;
1030 if (c1 != c2)
1031 return c1 - c2;
1033 if (c1 == 0)
1034 break;
1036 return 0;
1039 /*******************************************************************
1040 Compare the first n characters of s1 to s2. len is in ucs2 units.
1041 ********************************************************************/
1043 int strncmp_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2, size_t len)
1045 smb_ucs2_t c1, c2;
1047 for (; len != 0; --len) {
1048 c1 = *s1++;
1049 c2 = *s2++;
1051 if (c1 != c2)
1052 return c1 - c2;
1054 if (c1 == 0)
1055 break;
1058 return 0;
1061 /*******************************************************************
1062 Search string s2 from s1.
1063 ********************************************************************/
1065 smb_ucs2_t *strstr_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2)
1067 size_t len = strlen_w(s2);
1069 if (!*s2)
1070 return (smb_ucs2_t *)s1;
1072 for(;*s1; s1++) {
1073 if (*s1 == *s2) {
1074 if (strncmp_w(s1, s2, len) == 0)
1075 return (smb_ucs2_t *)s1;
1078 return NULL;
1081 /*******************************************************************
1082 Search for ucs2 char c from the beginning of s.
1083 ********************************************************************/
1085 smb_ucs2_t *strchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
1087 do {
1088 if (*s == c)
1089 return (smb_ucs2_t *)s;
1090 } while (*s++);
1092 return NULL;
1095 /*******************************************************************
1096 Search for ucs2 char c from the end of s.
1097 ********************************************************************/
1099 smb_ucs2_t *strrchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
1101 smb_ucs2_t *retval = 0;
1103 do {
1104 if (*s == c)
1105 retval = (smb_ucs2_t *)s;
1106 } while (*s++);
1108 return retval;
1111 /*******************************************************************
1112 Search token from s1 separated by any ucs2 char of s2.
1113 ********************************************************************/
1115 smb_ucs2_t *strtok_w(smb_ucs2_t *s1, const smb_ucs2_t *s2)
1117 static smb_ucs2_t *s = NULL;
1118 smb_ucs2_t *q;
1120 if (!s1) {
1121 if (!s)
1122 return NULL;
1123 s1 = s;
1126 for (q = s1; *s1; s1++) {
1127 smb_ucs2_t *p = strchr_w(s2, *s1);
1128 if (p) {
1129 if (s1 != q) {
1130 s = s1 + 1;
1131 *s1 = '\0';
1132 return q;
1134 q = s1 + 1;
1138 s = NULL;
1139 if (*q)
1140 return q;
1142 return NULL;
1145 /*******************************************************************
1146 Duplicate a ucs2 string.
1147 ********************************************************************/
1149 smb_ucs2_t *strdup_w(const smb_ucs2_t *s)
1151 size_t newlen = (strlen_w(s)+1)*sizeof(smb_ucs2_t);
1152 smb_ucs2_t *newstr = (smb_ucs2_t *)malloc(newlen);
1153 if (newstr == NULL)
1154 return NULL;
1155 safe_strcpy_w(newstr, s, newlen);
1156 return newstr;
1159 /*******************************************************************
1160 Mapping tables for UNICODE character. Allows toupper/tolower and
1161 isXXX functions to work.
1163 tridge: split into 2 pieces. This saves us 5/6 of the memory
1164 with a small speed penalty
1165 The magic constants are the lower/upper range of the tables two
1166 parts
1167 ********************************************************************/
1169 typedef struct {
1170 smb_ucs2_t lower;
1171 smb_ucs2_t upper;
1172 unsigned char flags;
1173 } smb_unicode_table_t;
1175 #define TABLE1_BOUNDARY 9450
1176 #define TABLE2_BOUNDARY 64256
1178 static smb_unicode_table_t map_table1[] = {
1179 #include "unicode_map_table1.h"
1182 static smb_unicode_table_t map_table2[] = {
1183 #include "unicode_map_table2.h"
1186 static unsigned char map_table_flags(smb_ucs2_t v)
1188 if (v < TABLE1_BOUNDARY) return map_table1[v].flags;
1189 if (v >= TABLE2_BOUNDARY) return map_table2[v - TABLE2_BOUNDARY].flags;
1190 return 0;
1193 static smb_ucs2_t map_table_lower(smb_ucs2_t v)
1195 if (v < TABLE1_BOUNDARY) return map_table1[v].lower;
1196 if (v >= TABLE2_BOUNDARY) return map_table2[v - TABLE2_BOUNDARY].lower;
1197 return v;
1200 static smb_ucs2_t map_table_upper(smb_ucs2_t v)
1202 if (v < TABLE1_BOUNDARY) return map_table1[v].upper;
1203 if (v >= TABLE2_BOUNDARY) return map_table2[v - TABLE2_BOUNDARY].upper;
1204 return v;
1207 /*******************************************************************
1208 Is an upper case wchar.
1209 ********************************************************************/
1211 int isupper_w( smb_ucs2_t val)
1213 return (map_table_flags(val) & UNI_UPPER);
1216 /*******************************************************************
1217 Is a lower case wchar.
1218 ********************************************************************/
1220 int islower_w( smb_ucs2_t val)
1222 return (map_table_flags(val) & UNI_LOWER);
1225 /*******************************************************************
1226 Is a digit wchar.
1227 ********************************************************************/
1229 int isdigit_w( smb_ucs2_t val)
1231 return (map_table_flags(val) & UNI_DIGIT);
1234 /*******************************************************************
1235 Is a hex digit wchar.
1236 ********************************************************************/
1238 int isxdigit_w( smb_ucs2_t val)
1240 return (map_table_flags(val) & UNI_XDIGIT);
1243 /*******************************************************************
1244 Is a space wchar.
1245 ********************************************************************/
1247 int isspace_w( smb_ucs2_t val)
1249 return (map_table_flags(val) & UNI_SPACE);
1252 /*******************************************************************
1253 Convert a wchar to upper case.
1254 ********************************************************************/
1256 smb_ucs2_t toupper_w( smb_ucs2_t val )
1258 return map_table_upper(val);
1261 /*******************************************************************
1262 Convert a wchar to lower case.
1263 ********************************************************************/
1265 smb_ucs2_t tolower_w( smb_ucs2_t val )
1267 return map_table_lower(val);
1270 static smb_ucs2_t *last_ptr = NULL;
1272 void set_first_token_w(smb_ucs2_t *ptr)
1274 last_ptr = ptr;
1277 /****************************************************************************
1278 Get the next token from a string, return False if none found
1279 handles double-quotes.
1280 Based on a routine by GJC@VILLAGE.COM.
1281 Extensively modified by Andrew.Tridgell@anu.edu.au
1282 bufsize is in bytes.
1283 ****************************************************************************/
1285 static smb_ucs2_t sep_list[] = { (smb_ucs2_t)' ', (smb_ucs2_t)'\t', (smb_ucs2_t)'\n', (smb_ucs2_t)'\r', 0};
1286 static smb_ucs2_t quotechar = (smb_ucs2_t)'\"';
1288 BOOL next_token_w(smb_ucs2_t **ptr, smb_ucs2_t *buff, smb_ucs2_t *sep, size_t bufsize)
1290 smb_ucs2_t *s;
1291 BOOL quoted;
1292 size_t len=1;
1295 * Convert bufsize to smb_ucs2_t units.
1298 bufsize /= sizeof(smb_ucs2_t);
1300 if (!ptr)
1301 ptr = &last_ptr;
1302 if (!ptr)
1303 return(False);
1305 s = *ptr;
1308 * Default to simple separators.
1311 if (!sep)
1312 sep = sep_list;
1315 * Find the first non sep char.
1318 while(*s && strchr_w(sep,*s))
1319 s++;
1322 * Nothing left ?
1325 if (!*s)
1326 return(False);
1329 * Copy over the token.
1332 for (quoted = False; len < bufsize && *s && (quoted || !strchr_w(sep,*s)); s++) {
1333 if (*s == quotechar) {
1334 quoted = !quoted;
1335 } else {
1336 len++;
1337 *buff++ = *s;
1341 *ptr = (*s) ? s+1 : s;
1342 *buff = 0;
1343 last_ptr = *ptr;
1345 return(True);
1348 /****************************************************************************
1349 Convert list of tokens to array; dependent on above routine.
1350 Uses last_ptr from above - bit of a hack.
1351 ****************************************************************************/
1353 smb_ucs2_t **toktocliplist_w(int *ctok, smb_ucs2_t *sep)
1355 smb_ucs2_t *s=last_ptr;
1356 int ictok=0;
1357 smb_ucs2_t **ret, **iret;
1359 if (!sep)
1360 sep = sep_list;
1362 while(*s && strchr_w(sep,*s))
1363 s++;
1366 * Nothing left ?
1369 if (!*s)
1370 return(NULL);
1372 do {
1373 ictok++;
1374 while(*s && (!strchr_w(sep,*s)))
1375 s++;
1376 while(*s && strchr_w(sep,*s))
1377 *s++=0;
1378 } while(*s);
1380 *ctok = ictok;
1381 s = last_ptr;
1383 if (!(ret=iret=malloc(ictok*sizeof(smb_ucs2_t *))))
1384 return NULL;
1386 while(ictok--) {
1387 *iret++=s;
1388 while(*s++)
1390 while(!*s)
1391 s++;
1394 return ret;
1397 /*******************************************************************
1398 Case insensitive string compararison.
1399 ********************************************************************/
1401 int StrCaseCmp_w(const smb_ucs2_t *s, const smb_ucs2_t *t)
1404 * Compare until we run out of string, either t or s, or find a difference.
1407 while (*s && *t && toupper_w(*s) == toupper_w(*t)) {
1408 s++;
1409 t++;
1412 return(toupper_w(*s) - toupper_w(*t));
1415 /*******************************************************************
1416 Case insensitive string compararison, length limited.
1417 n is in ucs2 units.
1418 ********************************************************************/
1420 int StrnCaseCmp_w(const smb_ucs2_t *s, const smb_ucs2_t *t, size_t n)
1423 * Compare until we run out of string, either t or s, or chars.
1426 while (n && *s && *t && toupper_w(*s) == toupper_w(*t)) {
1427 s++;
1428 t++;
1429 n--;
1433 * Not run out of chars - strings are different lengths.
1436 if (n)
1437 return(toupper_w(*s) - toupper_w(*t));
1440 * Identical up to where we run out of chars,
1441 * and strings are same length.
1444 return(0);
1447 /*******************************************************************
1448 Compare 2 strings.
1449 ********************************************************************/
1451 BOOL strequal_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2)
1453 if (s1 == s2)
1454 return(True);
1455 if (!s1 || !s2)
1456 return(False);
1458 return(StrCaseCmp_w(s1,s2)==0);
1461 /*******************************************************************
1462 Compare 2 strings up to and including the nth char. n is in ucs2
1463 units.
1464 ******************************************************************/
1466 BOOL strnequal_w(const smb_ucs2_t *s1,const smb_ucs2_t *s2,size_t n)
1468 if (s1 == s2)
1469 return(True);
1470 if (!s1 || !s2 || !n)
1471 return(False);
1473 return(StrnCaseCmp_w(s1,s2,n)==0);
1476 /*******************************************************************
1477 Compare 2 strings (case sensitive).
1478 ********************************************************************/
1480 BOOL strcsequal_w(const smb_ucs2_t *s1,const smb_ucs2_t *s2)
1482 if (s1 == s2)
1483 return(True);
1484 if (!s1 || !s2)
1485 return(False);
1487 return(strcmp_w(s1,s2)==0);
1490 /*******************************************************************
1491 Convert a string to lower case.
1492 ********************************************************************/
1494 void strlower_w(smb_ucs2_t *s)
1496 while (*s) {
1497 if (isupper_w(*s))
1498 *s = tolower_w(*s);
1499 s++;
1503 /*******************************************************************
1504 Convert a string to upper case.
1505 ********************************************************************/
1507 void strupper_w(smb_ucs2_t *s)
1509 while (*s) {
1510 if (islower_w(*s))
1511 *s = toupper_w(*s);
1512 s++;
1516 /*******************************************************************
1517 Convert a string to "normal" form.
1518 ********************************************************************/
1520 void strnorm_w(smb_ucs2_t *s)
1522 extern int case_default;
1523 if (case_default == CASE_UPPER)
1524 strupper_w(s);
1525 else
1526 strlower_w(s);
1529 /*******************************************************************
1530 Check if a string is in "normal" case.
1531 ********************************************************************/
1533 BOOL strisnormal_w(smb_ucs2_t *s)
1535 extern int case_default;
1536 if (case_default == CASE_UPPER)
1537 return(!strhaslower_w(s));
1539 return(!strhasupper_w(s));
1542 /****************************************************************************
1543 String replace.
1544 ****************************************************************************/
1546 void string_replace_w(smb_ucs2_t *s, smb_ucs2_t oldc, smb_ucs2_t newc)
1548 while (*s) {
1549 if (oldc == *s)
1550 *s = newc;
1551 s++;
1555 /*******************************************************************
1556 Skip past some strings in a buffer. n is in bytes.
1557 ********************************************************************/
1559 smb_ucs2_t *skip_string_w(smb_ucs2_t *buf,size_t n)
1561 while (n--)
1562 buf += (strlen_w(buf)*sizeof(smb_ucs2_t)) + 1;
1563 return(buf);
1566 /*******************************************************************
1567 Count the number of characters in a string. Same as strlen_w in
1568 smb_ucs2_t string units.
1569 ********************************************************************/
1571 size_t str_charnum_w(const smb_ucs2_t *s)
1573 return strlen_w(s);
1576 /*******************************************************************
1577 Trim the specified elements off the front and back of a string.
1578 ********************************************************************/
1580 BOOL trim_string_w(smb_ucs2_t *s,const smb_ucs2_t *front,const smb_ucs2_t *back)
1582 BOOL ret = False;
1583 size_t front_len = (front && *front) ? strlen_w(front) : 0;
1584 size_t back_len = (back && *back) ? strlen_w(back) : 0;
1585 size_t s_len;
1587 while (front_len && strncmp_w(s, front, front_len) == 0) {
1588 smb_ucs2_t *p = s;
1589 ret = True;
1591 while (1) {
1592 if (!(*p = p[front_len]))
1593 break;
1594 p++;
1598 if(back_len) {
1599 s_len = strlen_w(s);
1600 while ((s_len >= back_len) &&
1601 (strncmp_w(s + s_len - back_len, back, back_len)==0)) {
1602 ret = True;
1603 s[s_len - back_len] = 0;
1604 s_len = strlen_w(s);
1608 return(ret);
1611 /****************************************************************************
1612 Does a string have any uppercase chars in it ?
1613 ****************************************************************************/
1615 BOOL strhasupper_w(const smb_ucs2_t *s)
1617 while (*s) {
1618 if (isupper_w(*s))
1619 return(True);
1620 s++;
1622 return(False);
1625 /****************************************************************************
1626 Does a string have any lowercase chars in it ?
1627 ****************************************************************************/
1629 BOOL strhaslower_w(const smb_ucs2_t *s)
1631 while (*s) {
1632 if (islower(*s))
1633 return(True);
1634 s++;
1636 return(False);
1639 /****************************************************************************
1640 Find the number of 'c' chars in a string.
1641 ****************************************************************************/
1643 size_t count_chars_w(const smb_ucs2_t *s,smb_ucs2_t c)
1645 size_t count=0;
1647 while (*s) {
1648 if (*s == c)
1649 count++;
1650 s++;
1652 return(count);
1655 /*******************************************************************
1656 Return True if a string consists only of one particular character.
1657 ********************************************************************/
1659 BOOL str_is_all_w(const smb_ucs2_t *s,smb_ucs2_t c)
1661 if(s == NULL)
1662 return False;
1663 if(!*s)
1664 return False;
1666 while (*s) {
1667 if (*s != c)
1668 return False;
1669 s++;
1671 return True;
1674 /*******************************************************************
1675 Paranoid strcpy into a buffer of given length (includes terminating
1676 zero. Strips out all but 'a-Z0-9' and replaces with '_'. Deliberately
1677 does *NOT* check for multibyte characters. Don't change it !
1678 maxlength is in ucs2 units.
1679 ********************************************************************/
1681 smb_ucs2_t *alpha_strcpy_w(smb_ucs2_t *dest, const smb_ucs2_t *src, const smb_ucs2_t *other_safe_chars, size_t maxlength)
1683 size_t len, i;
1684 smb_ucs2_t nullstr_w = (smb_ucs2_t)0;
1686 if (!dest) {
1687 DEBUG(0,("ERROR: NULL dest in alpha_strcpy_w\n"));
1688 return NULL;
1691 if (!src) {
1692 *dest = 0;
1693 return dest;
1696 len = strlen_w(src);
1697 if (len >= maxlength)
1698 len = maxlength - 1;
1700 if (!other_safe_chars)
1701 other_safe_chars = &nullstr_w;
1703 for(i = 0; i < len; i++) {
1704 smb_ucs2_t val = src[i];
1705 if(isupper_w(val) ||islower_w(val) || isdigit_w(val) || strchr_w(other_safe_chars, val))
1706 dest[i] = src[i];
1707 else
1708 dest[i] = (smb_ucs2_t)'_';
1711 dest[i] = 0;
1713 return dest;
1716 /****************************************************************************
1717 Like strncpy but always null terminates. Make sure there is room !
1718 The variable n should always be one less than the available size and is in
1719 ucs2 units.
1720 ****************************************************************************/
1722 smb_ucs2_t *StrnCpy_w(smb_ucs2_t *dest,const smb_ucs2_t *src,size_t n)
1724 smb_ucs2_t *d = dest;
1725 if (!dest)
1726 return(NULL);
1727 if (!src) {
1728 *dest = 0;
1729 return(dest);
1732 while (n-- && (*d++ = *src++))
1734 *d = 0;
1735 return(dest);
1738 /****************************************************************************
1739 Like strncpy but copies up to the character marker. Always null terminates.
1740 returns a pointer to the character marker in the source string (src).
1741 n is in ucs2 units.
1742 ****************************************************************************/
1744 smb_ucs2_t *strncpyn_w(smb_ucs2_t *dest, const smb_ucs2_t *src,size_t n, smb_ucs2_t c)
1746 smb_ucs2_t *p;
1747 size_t str_len;
1749 p = strchr_w(src, c);
1750 if (p == NULL) {
1751 fstring cval;
1752 smb_ucs2_t mbcval[2];
1753 mbcval[0] = c;
1754 mbcval[1] = 0;
1755 DEBUG(5, ("strncpyn_w: separator character (%s) not found\n",
1756 unicode_to_unix(cval,mbcval,sizeof(cval)) ));
1757 return NULL;
1760 str_len = PTR_DIFF(p, src) + 1;
1761 safe_strcpy_w(dest, src, MIN(n, str_len));
1763 return p;
1766 /*************************************************************
1767 Routine to get hex characters and turn them into a 16 byte array.
1768 The array can be variable length, and any non-hex-numeric
1769 characters are skipped. "0xnn" or "0Xnn" is specially catered
1770 for. len is in bytes.
1771 Valid examples: "0A5D15"; "0x15, 0x49, 0xa2"; "59\ta9\te3\n"
1772 **************************************************************/
1774 static smb_ucs2_t hexprefix[] = { (smb_ucs2_t)'0', (smb_ucs2_t)'x', 0 };
1775 static smb_ucs2_t hexchars[] = { (smb_ucs2_t)'0', (smb_ucs2_t)'1', (smb_ucs2_t)'2', (smb_ucs2_t)'3',
1776 (smb_ucs2_t)'4', (smb_ucs2_t)'5', (smb_ucs2_t)'6', (smb_ucs2_t)'7',
1777 (smb_ucs2_t)'8', (smb_ucs2_t)'9', (smb_ucs2_t)'A', (smb_ucs2_t)'B',
1778 (smb_ucs2_t)'C', (smb_ucs2_t)'D', (smb_ucs2_t)'E', (smb_ucs2_t)'F', 0 };
1780 size_t strhex_to_str_w(char *p, size_t len, const smb_ucs2_t *strhex)
1782 size_t i;
1783 size_t num_chars = 0;
1784 unsigned char lonybble, hinybble;
1785 smb_ucs2_t *p1 = NULL, *p2 = NULL;
1788 * Convert to smb_ucs2_t units.
1791 len /= sizeof(smb_ucs2_t);
1793 for (i = 0; i < len && strhex[i] != 0; i++) {
1794 if (strnequal_w(hexchars, hexprefix, 2)) {
1795 i++; /* skip two chars */
1796 continue;
1799 if (!(p1 = strchr_w(hexchars, toupper_w(strhex[i]))))
1800 break;
1802 i++; /* next hex digit */
1804 if (!(p2 = strchr_w(hexchars, toupper_w(strhex[i]))))
1805 break;
1807 /* get the two nybbles */
1808 hinybble = (PTR_DIFF(p1, hexchars)/sizeof(smb_ucs2_t));
1809 lonybble = (PTR_DIFF(p2, hexchars)/sizeof(smb_ucs2_t));
1811 p[num_chars] = (hinybble << 4) | lonybble;
1812 num_chars++;
1814 p1 = NULL;
1815 p2 = NULL;
1817 return num_chars;
1820 /****************************************************************************
1821 Check if a string is part of a list.
1822 ****************************************************************************/
1824 BOOL in_list_w(smb_ucs2_t *s,smb_ucs2_t *list,BOOL casesensitive)
1826 wpstring tok;
1827 smb_ucs2_t *p=list;
1829 if (!list)
1830 return(False);
1832 while (next_token_w(&p,tok,LIST_SEP_W,sizeof(tok))) {
1833 if (casesensitive) {
1834 if (strcmp_w(tok,s) == 0)
1835 return(True);
1836 } else {
1837 if (StrCaseCmp_w(tok,s) == 0)
1838 return(True);
1841 return(False);
1844 /* This is used to prevent lots of mallocs of size 2 */
1845 static smb_ucs2_t *null_string = NULL;
1847 /****************************************************************************
1848 Set a string value, allocing the space for the string.
1849 ****************************************************************************/
1851 BOOL string_init_w(smb_ucs2_t **dest,const smb_ucs2_t *src)
1853 size_t l;
1855 if (!null_string) {
1856 if((null_string = (smb_ucs2_t *)malloc(sizeof(smb_ucs2_t))) == NULL) {
1857 DEBUG(0,("string_init_w: malloc fail for null_string.\n"));
1858 return False;
1860 *null_string = 0;
1863 if (!src)
1864 src = null_string;
1866 l = strlen_w(src);
1868 if (l == 0)
1869 *dest = null_string;
1870 else {
1871 (*dest) = (smb_ucs2_t *)malloc(sizeof(smb_ucs2_t)*(l+1));
1872 if ((*dest) == NULL) {
1873 DEBUG(0,("Out of memory in string_init_w\n"));
1874 return False;
1877 wpstrcpy(*dest,src);
1879 return(True);
1882 /****************************************************************************
1883 Free a string value.
1884 ****************************************************************************/
1886 void string_free_w(smb_ucs2_t **s)
1888 if (!s || !(*s))
1889 return;
1890 if (*s == null_string)
1891 *s = NULL;
1892 SAFE_FREE(*s);
1895 /****************************************************************************
1896 Set a string value, allocing the space for the string, and deallocating any
1897 existing space.
1898 ****************************************************************************/
1900 BOOL string_set_w(smb_ucs2_t **dest,const smb_ucs2_t *src)
1902 string_free_w(dest);
1904 return(string_init_w(dest,src));
1907 /****************************************************************************
1908 Substitute a string for a pattern in another string. Make sure there is
1909 enough room !
1911 This routine looks for pattern in s and replaces it with
1912 insert. It may do multiple replacements.
1914 Any of " ; ' $ or ` in the insert string are replaced with _
1915 if len==0 then no length check is performed
1916 len is in ucs2 units.
1917 ****************************************************************************/
1919 void string_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,const smb_ucs2_t *insert, size_t len)
1921 smb_ucs2_t *p;
1922 ssize_t ls,lp,li, i;
1924 if (!insert || !pattern || !s)
1925 return;
1927 ls = (ssize_t)strlen_w(s);
1928 lp = (ssize_t)strlen_w(pattern);
1929 li = (ssize_t)strlen_w(insert);
1931 if (!*pattern)
1932 return;
1934 while (lp <= ls && (p = strstr_w(s,pattern))) {
1935 if (len && (ls + (li-lp) >= len)) {
1936 fstring out;
1937 DEBUG(0,("ERROR: string overflow by %d in string_sub_w(%.50s, %d)\n",
1938 (int)(sizeof(smb_ucs2_t)*(ls + (li-lp) - len)),
1939 unicode_to_unix(out,pattern,sizeof(out)), (int)len*sizeof(smb_ucs2_t)));
1940 break;
1942 if (li != lp)
1943 memmove(p+li,p+lp,sizeof(smb_ucs2_t)*(strlen_w(p+lp)+1));
1945 for (i=0;i<li;i++) {
1946 switch (insert[i]) {
1947 case (smb_ucs2_t)'`':
1948 case (smb_ucs2_t)'"':
1949 case (smb_ucs2_t)'\'':
1950 case (smb_ucs2_t)';':
1951 case (smb_ucs2_t)'$':
1952 case (smb_ucs2_t)'%':
1953 case (smb_ucs2_t)'\r':
1954 case (smb_ucs2_t)'\n':
1955 p[i] = (smb_ucs2_t)'_';
1956 break;
1957 default:
1958 p[i] = insert[i];
1961 s = p + li;
1962 ls += (li-lp);
1966 void fstring_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,const smb_ucs2_t *insert)
1968 string_sub_w(s, pattern, insert, sizeof(wfstring));
1971 void pstring_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,smb_ucs2_t *insert)
1973 string_sub_w(s, pattern, insert, sizeof(wpstring));
1976 /****************************************************************************
1977 Similar to string_sub() but allows for any character to be substituted.
1978 Use with caution !
1979 if len==0 then no length check is performed.
1980 ****************************************************************************/
1982 void all_string_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,const smb_ucs2_t *insert, size_t len)
1984 smb_ucs2_t *p;
1985 ssize_t ls,lp,li;
1987 if (!insert || !pattern || !s)
1988 return;
1990 ls = (ssize_t)strlen_w(s);
1991 lp = (ssize_t)strlen_w(pattern);
1992 li = (ssize_t)strlen_w(insert);
1994 if (!*pattern)
1995 return;
1997 while (lp <= ls && (p = strstr_w(s,pattern))) {
1998 if (len && (ls + (li-lp) >= len)) {
1999 fstring out;
2000 DEBUG(0,("ERROR: string overflow by %d in all_string_sub_w(%.50s, %d)\n",
2001 (int)(sizeof(smb_ucs2_t)*(ls + (li-lp) - len)),
2002 unicode_to_unix(out,pattern,sizeof(out)), (int)len*sizeof(smb_ucs2_t)));
2003 break;
2005 if (li != lp)
2006 memmove(p+li,p+lp,sizeof(smb_ucs2_t)*(strlen_w(p+lp)+1));
2008 memcpy(p, insert, li*sizeof(smb_ucs2_t));
2009 s = p + li;
2010 ls += (li-lp);
2014 /****************************************************************************
2015 Splits out the front and back at a separator.
2016 ****************************************************************************/
2018 void split_at_last_component_w(smb_ucs2_t *path, smb_ucs2_t *front, smb_ucs2_t sep, smb_ucs2_t *back)
2020 smb_ucs2_t *p = strrchr_w(path, sep);
2022 if (p != NULL)
2023 *p = 0;
2025 if (front != NULL)
2026 wpstrcpy(front, path);
2028 if (p != NULL) {
2029 if (back != NULL)
2030 wpstrcpy(back, p+1);
2031 *p = (smb_ucs2_t)'\\';
2032 } else {
2033 if (back != NULL)
2034 back[0] = 0;
2039 /****************************************************************************
2040 Write an octal as a string.
2041 ****************************************************************************/
2043 smb_ucs2_t *octal_string_w(int i)
2045 static smb_ucs2_t wret[64];
2046 char ret[64];
2048 if (i == -1)
2049 slprintf(ret, sizeof(ret)-1, "-1");
2050 else
2051 slprintf(ret, sizeof(ret)-1, "0%o", i);
2052 return unix_to_unicode(wret, ret, sizeof(wret));
2056 /****************************************************************************
2057 Truncate a string at a specified length.
2058 length is in ucs2 units.
2059 ****************************************************************************/
2061 smb_ucs2_t *string_truncate_w(smb_ucs2_t *s, size_t length)
2063 if (s && strlen_w(s) > length)
2064 s[length] = 0;
2066 return s;
2069 /******************************************************************
2070 functions for UTF8 support (using in kanji.c)
2071 ******************************************************************/
2072 smb_ucs2_t doscp2ucs2(int w)
2074 return ((smb_ucs2_t)doscp_to_ucs2[w]);
2077 int ucs2doscp(smb_ucs2_t w)
2079 return ((int)ucs2_to_doscp[w]);
2082 /* Temporary fix until 3.0... JRA */
2084 int rpcstr_pull(char* dest, void *src, int dest_len, int src_len, int flags)
2086 if(dest_len==-1)
2087 dest_len=MAXUNI-3;
2089 if (flags & STR_TERMINATE)
2090 src_len = strlen_w(src)*2+2;
2092 dest_len = MIN((src_len/2), (dest_len-1));
2093 unistr_to_ascii(dest, src, dest_len);
2094 return src_len;