few edits
[Samba.git] / source / lib / util_unistr.c
bloba7b800a205673c73ab2c84623b621deb5c9a8db0
1 /*
2 Unix SMB/Netbios implementation.
3 Version 1.9.
4 Samba utility functions
5 Copyright (C) Andrew Tridgell 1992-1998
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 #include "includes.h"
24 smb_ucs2_t wchar_list_sep[] = { (smb_ucs2_t)' ', (smb_ucs2_t)'\t', (smb_ucs2_t)',',
25 (smb_ucs2_t)';', (smb_ucs2_t)':', (smb_ucs2_t)'\n',
26 (smb_ucs2_t)'\r', 0 };
28 * The following are the codepage to ucs2 and vica versa maps.
29 * These are dynamically loaded from a unicode translation file.
32 static smb_ucs2_t *doscp_to_ucs2;
33 static uint16 *ucs2_to_doscp;
35 static smb_ucs2_t *unixcp_to_ucs2;
36 static uint16 *ucs2_to_unixcp;
38 #ifndef MAXUNI
39 #define MAXUNI 1024
40 #endif
42 /*******************************************************************
43 Write a string in (little-endian) unicode format. src is in
44 the current UNIX character set. len is the length in bytes of the
45 string pointed to by dst.
47 if null_terminate is True then null terminate the packet (adds 2 bytes)
49 the return value is the length in bytes consumed by the string, including the
50 null termination if applied
51 ********************************************************************/
53 size_t unix_PutUniCode(char *dst,const char *src, ssize_t len, BOOL null_terminate)
55 size_t ret = 0;
56 while (*src && (len >= 2)) {
57 size_t skip = get_character_len(*src);
58 smb_ucs2_t val = (*src & 0xff);
61 * If this is a multibyte character (and all DOS/Windows
62 * codepages have at maximum 2 byte multibyte characters)
63 * then work out the index value for the unicode conversion.
66 if (skip == 2)
67 val = ((val << 8) | (src[1] & 0xff));
69 SSVAL(dst,ret,unixcp_to_ucs2[val]);
70 ret += 2;
71 len -= 2;
72 if (skip)
73 src += skip;
74 else
75 src++;
77 if (null_terminate) {
78 SSVAL(dst,ret,0);
79 ret += 2;
81 return(ret);
84 /*******************************************************************
85 Write a string in (little-endian) unicode format. src is in
86 the current DOS codepage. len is the length in bytes of the
87 string pointed to by dst.
89 if null_terminate is True then null terminate the packet (adds 2 bytes)
91 the return value is the length in bytes consumed by the string, including the
92 null termination if applied
93 ********************************************************************/
95 size_t dos_PutUniCode(char *dst,const char *src, ssize_t len, BOOL null_terminate)
97 size_t ret = 0;
98 while (*src && (len >= 2)) {
99 size_t skip = get_character_len(*src);
100 smb_ucs2_t val = (*src & 0xff);
103 * If this is a multibyte character (and all DOS/Windows
104 * codepages have at maximum 2 byte multibyte characters)
105 * then work out the index value for the unicode conversion.
108 if (skip == 2)
109 val = ((val << 8) | (src[1] & 0xff));
111 SSVAL(dst,ret,doscp_to_ucs2[val]);
112 ret += 2;
113 len -= 2;
114 if (skip)
115 src += skip;
116 else
117 src++;
119 if (null_terminate) {
120 SSVAL(dst,ret,0);
121 ret += 2;
123 return(ret);
126 /*******************************************************************
127 Pull a DOS codepage string out of a UNICODE array. len is in bytes.
128 ********************************************************************/
130 void unistr_to_dos(char *dest, const char *src, size_t len)
132 char *destend = dest + len;
134 while (dest < destend) {
135 uint16 ucs2_val = SVAL(src,0);
136 uint16 cp_val = ucs2_to_doscp[ucs2_val];
138 src += 2;
140 if (ucs2_val == 0)
141 break;
143 if (cp_val < 256)
144 *dest++ = (char)cp_val;
145 else {
146 *dest++ = (cp_val >> 8) & 0xff;
147 *dest++ = (cp_val & 0xff);
151 *dest = 0;
154 /*******************************************************************
155 Skip past a unicode string, but not more than len. Always move
156 past a terminating zero if found.
157 ********************************************************************/
159 char *skip_unibuf(char *src, size_t len)
161 char *srcend = src + len;
163 while (src < srcend && SVAL(src,0))
164 src += 2;
166 if(!SVAL(src,0))
167 src += 2;
169 return src;
172 /*******************************************************************
173 Return a DOS codepage version of a little-endian unicode string.
174 len is the filename length (ignoring any terminating zero) in uin16
175 units. Always null terminates.
176 Hack alert: uses fixed buffer(s).
177 len is in 2 byte (unicode) units.
178 ********************************************************************/
180 char *dos_unistrn2(uint16 *src, int len)
182 static char lbufs[8][MAXUNI];
183 static int nexti;
184 char *lbuf = lbufs[nexti];
185 char *p;
187 nexti = (nexti+1)%8;
189 for (p = lbuf; (len > 0) && (p-lbuf < MAXUNI-3) && *src; len--, src++) {
190 uint16 ucs2_val = SVAL(src,0);
191 uint16 cp_val = ucs2_to_doscp[ucs2_val];
193 if (cp_val < 256)
194 *p++ = (char)cp_val;
195 else {
196 *p++ = (cp_val >> 8) & 0xff;
197 *p++ = (cp_val & 0xff);
201 *p = 0;
202 return lbuf;
205 static char lbufs[8][MAXUNI];
206 static int nexti;
208 /*******************************************************************
209 Return a DOS codepage version of a little-endian unicode string.
210 Hack alert: uses fixed buffer(s).
211 ********************************************************************/
213 char *dos_unistr2(uint16 *src)
215 char *lbuf = lbufs[nexti];
216 char *p;
218 nexti = (nexti+1)%8;
220 for (p = lbuf; (p-lbuf < MAXUNI-3) && *src; src++) {
221 uint16 ucs2_val = SVAL(src,0);
222 uint16 cp_val = ucs2_to_doscp[ucs2_val];
224 if (cp_val < 256)
225 *p++ = (char)cp_val;
226 else {
227 *p++ = (cp_val >> 8) & 0xff;
228 *p++ = (cp_val & 0xff);
232 *p = 0;
233 return lbuf;
236 /*******************************************************************
237 Return a DOS codepage version of a little-endian unicode string
238 ********************************************************************/
240 char *dos_unistr2_to_str(UNISTR2 *str)
242 char *lbuf = lbufs[nexti];
243 char *p;
244 uint16 *src = str->buffer;
246 nexti = (nexti+1)%8;
248 for (p = lbuf; (p - lbuf < MAXUNI-3) && (src - str->buffer < str->uni_str_len) && *src; src++) {
249 uint16 ucs2_val = SVAL(src,0);
250 uint16 cp_val = ucs2_to_doscp[ucs2_val];
252 if (cp_val < 256)
253 *p++ = (char)cp_val;
254 else {
255 *p++ = (cp_val >> 8) & 0xff;
256 *p++ = (cp_val & 0xff);
260 *p = 0;
261 return lbuf;
264 /*******************************************************************
265 Put an ASCII string into a UNICODE array (uint16's).
266 use little-endian ucs2
267 ********************************************************************/
268 void ascii_to_unistr(uint16 *dest, const char *src, int maxlen)
270 uint16 *destend = dest + maxlen;
271 char c;
273 while (dest < destend) {
274 c = *(src++);
275 if (c == 0)
276 break;
278 SSVAL(dest, 0, c);
279 dest++;
282 *dest = 0;
285 /*******************************************************************
286 Pull an ASCII string out of a UNICODE array (uint16's).
287 ********************************************************************/
289 void unistr_to_ascii(char *dest, const uint16 *src, int len)
291 char *destend = dest + len;
292 uint16 c;
294 if (src == NULL) {
295 *dest = '\0';
296 return;
299 /* normal code path for a valid 'src' */
300 while (dest < destend) {
301 c = SVAL(src, 0);
302 src++;
303 if (c == 0)
304 break;
306 *(dest++) = (char)c;
309 *dest = 0;
310 return;
313 /*******************************************************************
314 Convert a (little-endian) UNISTR2 structure to an ASCII string, either
315 DOS or UNIX codepage.
316 ********************************************************************/
318 static void unistr2_to_mbcp(char *dest, const UNISTR2 *str, size_t maxlen, uint16 *ucs2_to_mbcp)
320 char *p;
321 uint16 *src;
322 size_t len;
324 if (str == NULL) {
325 *dest='\0';
326 return;
329 src = str->buffer;
331 len = MIN(str->uni_str_len, maxlen);
332 if (len == 0) {
333 *dest='\0';
334 return;
337 for (p = dest; (p-dest < maxlen-3) && (src - str->buffer < str->uni_str_len) && *src; src++) {
338 uint16 ucs2_val = SVAL(src,0);
339 uint16 cp_val = ucs2_to_mbcp[ucs2_val];
341 if (cp_val < 256)
342 *p++ = (char)cp_val;
343 else {
344 *p++ = (cp_val >> 8) & 0xff;
345 *p++ = (cp_val & 0xff);
349 *p = 0;
352 /*******************************************************************
353 Convert a (little-endian) UNISTR2 structure to an ASCII string
354 Warning: this version does DOS codepage.
355 ********************************************************************/
357 void unistr2_to_dos(char *dest, const UNISTR2 *str, size_t maxlen)
359 unistr2_to_mbcp(dest, str, maxlen, ucs2_to_doscp);
362 /*******************************************************************
363 Convert a (little-endian) UNISTR2 structure to an ASCII string
364 Warning: this version does UNIX codepage.
365 ********************************************************************/
367 void unistr2_to_unix(char *dest, const UNISTR2 *str, size_t maxlen)
369 unistr2_to_mbcp(dest, str, maxlen, ucs2_to_unixcp);
372 /*******************************************************************
373 Return a number stored in a buffer
374 ********************************************************************/
376 uint32 buffer2_to_uint32(BUFFER2 *str)
378 if (str->buf_len == 4)
379 return IVAL(str->buffer, 0);
380 else
381 return 0;
384 /*******************************************************************
385 Return a DOS codepage version of a NOTunicode string
386 ********************************************************************/
388 char *dos_buffer2_to_str(BUFFER2 *str)
390 char *lbuf = lbufs[nexti];
391 char *p;
392 uint16 *src = str->buffer;
394 nexti = (nexti+1)%8;
396 for (p = lbuf; (p - lbuf < sizeof(str->buffer)-3) && (src - str->buffer < str->buf_len/2) && *src; src++) {
397 uint16 ucs2_val = SVAL(src,0);
398 uint16 cp_val = ucs2_to_doscp[ucs2_val];
400 if (cp_val < 256)
401 *p++ = (char)cp_val;
402 else {
403 *p++ = (cp_val >> 8) & 0xff;
404 *p++ = (cp_val & 0xff);
408 *p = 0;
409 return lbuf;
412 /*******************************************************************
413 Return a dos codepage version of a NOTunicode string
414 ********************************************************************/
416 char *dos_buffer2_to_multistr(BUFFER2 *str)
418 char *lbuf = lbufs[nexti];
419 char *p;
420 uint16 *src = str->buffer;
422 nexti = (nexti+1)%8;
424 for (p = lbuf; (p - lbuf < sizeof(str->buffer)-3) && (src - str->buffer < str->buf_len/2); src++) {
425 if (*src == 0) {
426 *p++ = ' ';
427 } else {
428 uint16 ucs2_val = SVAL(src,0);
429 uint16 cp_val = ucs2_to_doscp[ucs2_val];
431 if (cp_val < 256)
432 *p++ = (char)cp_val;
433 else {
434 *p++ = (cp_val >> 8) & 0xff;
435 *p++ = (cp_val & 0xff);
440 *p = 0;
441 return lbuf;
444 /*******************************************************************
445 Create a null-terminated unicode string from a null-terminated DOS
446 codepage string.
447 Return number of unicode chars copied, excluding the null character.
448 Unicode strings created are in little-endian format.
449 max_len is in bytes.
450 ********************************************************************/
452 size_t dos_struni2(char *dst, const char *src, size_t max_len)
454 size_t len = 0;
456 if (dst == NULL)
457 return 0;
459 if (src != NULL) {
460 for (; ((len*2) < max_len-2) && *src; len++, dst +=2) {
461 size_t skip = get_character_len(*src);
462 smb_ucs2_t val = (*src & 0xff);
465 * If this is a multibyte character (and all DOS/Windows
466 * codepages have at maximum 2 byte multibyte characters)
467 * then work out the index value for the unicode conversion.
470 if (skip == 2)
471 val = ((val << 8) | (src[1] & 0xff));
473 SSVAL(dst,0,doscp_to_ucs2[val]);
474 if (skip)
475 src += skip;
476 else
477 src++;
481 SSVAL(dst,0,0);
483 return len;
486 /*******************************************************************
487 Return a DOS codepage version of a little-endian unicode string.
488 Hack alert: uses fixed buffer(s).
489 ********************************************************************/
491 char *dos_unistr(char *buf)
493 char *lbuf = lbufs[nexti];
494 uint16 *src = (uint16 *)buf;
495 char *p;
497 nexti = (nexti+1)%8;
499 for (p = lbuf; (p-lbuf < MAXUNI-3) && *src; src++) {
500 uint16 ucs2_val = SVAL(src,0);
501 uint16 cp_val = ucs2_to_doscp[ucs2_val];
503 if (cp_val < 256)
504 *p++ = (char)cp_val;
505 else {
506 *p++ = (cp_val >> 8) & 0xff;
507 *p++ = (cp_val & 0xff);
511 *p = 0;
512 return lbuf;
515 /*******************************************************************
516 returns the length in number of wide characters
517 ******************************************************************/
518 int unistrlen(uint16 *s)
520 int len;
522 if (!s)
523 return -1;
525 for (len=0; *s; s++,len++);
527 return len;
530 /*******************************************************************
531 Strcpy for unicode strings. returns length (in num of wide chars)
532 ********************************************************************/
534 int unistrcpy(uint16 *dst, uint16 *src)
536 int num_wchars = 0;
538 while (*src) {
539 *dst++ = *src++;
540 num_wchars++;
542 *dst = 0;
544 return num_wchars;
547 /*******************************************************************
548 Free any existing maps.
549 ********************************************************************/
551 static void free_maps(smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
553 /* this handles identity mappings where we share the pointer */
554 if (*pp_ucs2_to_cp == *pp_cp_to_ucs2) {
555 *pp_ucs2_to_cp = NULL;
558 SAFE_FREE(*pp_cp_to_ucs2);
559 SAFE_FREE(*pp_ucs2_to_cp);
562 /*******************************************************************
563 Build a default (null) codepage to unicode map.
564 ********************************************************************/
566 void default_unicode_map(smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
568 int i;
570 free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp);
572 if ((*pp_ucs2_to_cp = (uint16 *)malloc(2*65536)) == NULL) {
573 DEBUG(0,("default_unicode_map: malloc fail for ucs2_to_cp size %u.\n", 2*65536));
574 abort();
577 *pp_cp_to_ucs2 = *pp_ucs2_to_cp; /* Default map is an identity. */
578 for (i = 0; i < 65536; i++)
579 (*pp_cp_to_ucs2)[i] = i;
582 /*******************************************************************
583 Load a codepage to unicode and vica-versa map.
584 ********************************************************************/
586 BOOL load_unicode_map(const char *codepage, smb_ucs2_t **pp_cp_to_ucs2, uint16 **pp_ucs2_to_cp)
588 pstring unicode_map_file_name;
589 FILE *fp = NULL;
590 SMB_STRUCT_STAT st;
591 smb_ucs2_t *cp_to_ucs2 = *pp_cp_to_ucs2;
592 uint16 *ucs2_to_cp = *pp_ucs2_to_cp;
593 size_t cp_to_ucs2_size;
594 size_t ucs2_to_cp_size;
595 size_t i;
596 size_t size;
597 char buf[UNICODE_MAP_HEADER_SIZE];
599 DEBUG(5, ("load_unicode_map: loading unicode map for codepage %s.\n", codepage));
601 if (*codepage == '\0')
602 goto clean_and_exit;
604 if(strlen(lp_codepagedir()) + 13 + strlen(codepage) >
605 sizeof(unicode_map_file_name)) {
606 DEBUG(0,("load_unicode_map: filename too long to load\n"));
607 goto clean_and_exit;
610 pstrcpy(unicode_map_file_name, lp_codepagedir());
611 pstrcat(unicode_map_file_name, "/");
612 pstrcat(unicode_map_file_name, "unicode_map.");
613 pstrcat(unicode_map_file_name, codepage);
615 if(sys_stat(unicode_map_file_name,&st)!=0) {
616 DEBUG(0,("load_unicode_map: filename %s does not exist.\n",
617 unicode_map_file_name));
618 goto clean_and_exit;
621 size = st.st_size;
623 if ((size != UNICODE_MAP_HEADER_SIZE + 4*65536) && (size != UNICODE_MAP_HEADER_SIZE +(2*256 + 2*65536))) {
624 DEBUG(0,("load_unicode_map: file %s is an incorrect size for a \
625 unicode map file (size=%d).\n", unicode_map_file_name, (int)size));
626 goto clean_and_exit;
629 if((fp = sys_fopen( unicode_map_file_name, "r")) == NULL) {
630 DEBUG(0,("load_unicode_map: cannot open file %s. Error was %s\n",
631 unicode_map_file_name, strerror(errno)));
632 goto clean_and_exit;
635 if(fread( buf, 1, UNICODE_MAP_HEADER_SIZE, fp)!=UNICODE_MAP_HEADER_SIZE) {
636 DEBUG(0,("load_unicode_map: cannot read header from file %s. Error was %s\n",
637 unicode_map_file_name, strerror(errno)));
638 goto clean_and_exit;
641 /* Check the version value */
642 if(SVAL(buf,UNICODE_MAP_VERSION_OFFSET) != UNICODE_MAP_FILE_VERSION_ID) {
643 DEBUG(0,("load_unicode_map: filename %s has incorrect version id. \
644 Needed %hu, got %hu.\n",
645 unicode_map_file_name, (uint16)UNICODE_MAP_FILE_VERSION_ID,
646 SVAL(buf,UNICODE_MAP_VERSION_OFFSET)));
647 goto clean_and_exit;
650 /* Check the codepage value */
651 if(!strequal(&buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET], codepage)) {
652 DEBUG(0,("load_unicode_map: codepage %s in file %s is not the same as that \
653 requested (%s).\n", &buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET], unicode_map_file_name, codepage ));
654 goto clean_and_exit;
657 ucs2_to_cp_size = 2*65536;
658 if (size == UNICODE_MAP_HEADER_SIZE + 4*65536) {
660 * This is a multibyte code page.
662 cp_to_ucs2_size = 2*65536;
663 } else {
665 * Single byte code page.
667 cp_to_ucs2_size = 2*256;
671 * Free any old translation tables.
674 free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp);
676 if ((cp_to_ucs2 = (smb_ucs2_t *)malloc(cp_to_ucs2_size)) == NULL) {
677 DEBUG(0,("load_unicode_map: malloc fail for cp_to_ucs2 size %u.\n", cp_to_ucs2_size ));
678 goto clean_and_exit;
681 if ((ucs2_to_cp = (uint16 *)malloc(ucs2_to_cp_size)) == NULL) {
682 DEBUG(0,("load_unicode_map: malloc fail for ucs2_to_cp size %u.\n", ucs2_to_cp_size ));
683 goto clean_and_exit;
686 if(fread( (char *)cp_to_ucs2, 1, cp_to_ucs2_size, fp)!=cp_to_ucs2_size) {
687 DEBUG(0,("load_unicode_map: cannot read cp_to_ucs2 from file %s. Error was %s\n",
688 unicode_map_file_name, strerror(errno)));
689 goto clean_and_exit;
692 if(fread( (char *)ucs2_to_cp, 1, ucs2_to_cp_size, fp)!=ucs2_to_cp_size) {
693 DEBUG(0,("load_unicode_map: cannot read ucs2_to_cp from file %s. Error was %s\n",
694 unicode_map_file_name, strerror(errno)));
695 goto clean_and_exit;
699 * Now ensure the 16 bit values are in the correct endianness.
702 for (i = 0; i < cp_to_ucs2_size/2; i++)
703 cp_to_ucs2[i] = SVAL(cp_to_ucs2,i*2);
705 for (i = 0; i < ucs2_to_cp_size/2; i++)
706 ucs2_to_cp[i] = SVAL(ucs2_to_cp,i*2);
708 fclose(fp);
710 *pp_cp_to_ucs2 = cp_to_ucs2;
711 *pp_ucs2_to_cp = ucs2_to_cp;
713 return True;
715 clean_and_exit:
717 /* pseudo destructor :-) */
719 if(fp != NULL)
720 fclose(fp);
722 free_maps(pp_cp_to_ucs2, pp_ucs2_to_cp);
724 default_unicode_map(pp_cp_to_ucs2, pp_ucs2_to_cp);
726 return False;
729 /*******************************************************************
730 Load a dos codepage to unicode and vica-versa map.
731 ********************************************************************/
733 BOOL load_dos_unicode_map(int codepage)
735 fstring codepage_str;
737 slprintf(codepage_str, sizeof(fstring)-1, "%03d", codepage);
738 DEBUG(10,("load_dos_unicode_map: %s\n", codepage_str));
739 return load_unicode_map(codepage_str, &doscp_to_ucs2, &ucs2_to_doscp);
742 /*******************************************************************
743 Load a UNIX codepage to unicode and vica-versa map.
744 ********************************************************************/
746 BOOL load_unix_unicode_map(const char *unix_char_set, BOOL override)
748 static BOOL init_done;
749 fstring upper_unix_char_set;
751 fstrcpy(upper_unix_char_set, unix_char_set);
752 strupper(upper_unix_char_set);
754 DEBUG(10,("load_unix_unicode_map: %s (init_done=%d, override=%d)\n",
755 upper_unix_char_set, (int)init_done, (int)override ));
757 if (!init_done)
758 init_done = True;
759 else if (!override)
760 return True;
762 return load_unicode_map(upper_unix_char_set, &unixcp_to_ucs2, &ucs2_to_unixcp);
765 /*******************************************************************
766 The following functions reproduce many of the non-UNICODE standard
767 string functions in Samba.
768 ********************************************************************/
770 /*******************************************************************
771 Convert a UNICODE string to multibyte format. Note that the 'src' is in
772 native byte order, not little endian. Always zero terminates.
773 dst_len is in bytes.
774 ********************************************************************/
776 static char *unicode_to_multibyte(char *dst, const smb_ucs2_t *src,
777 size_t dst_len, const uint16 *ucs2_to_cp)
779 size_t dst_pos;
781 for(dst_pos = 0; (dst_pos < dst_len - 1) && *src;) {
782 smb_ucs2_t val = ucs2_to_cp[*src++];
783 if(val < 256) {
784 dst[dst_pos++] = (char)val;
785 } else {
787 if(dst_pos >= dst_len - 2)
788 break;
791 * A 2 byte value is always written as
792 * high/low into the buffer stream.
795 dst[dst_pos++] = (char)((val >> 8) & 0xff);
796 dst[dst_pos++] = (char)(val & 0xff);
800 dst[dst_pos] = '\0';
802 return dst;
805 /*******************************************************************
806 Convert a multibyte string to UNICODE format. Note that the 'dst' is in
807 native byte order, not little endian. Always zero terminates.
808 dst_len is in bytes.
809 ********************************************************************/
811 smb_ucs2_t *multibyte_to_unicode(smb_ucs2_t *dst, const char *src,
812 size_t dst_len, smb_ucs2_t *cp_to_ucs2)
814 size_t i;
816 dst_len /= sizeof(smb_ucs2_t); /* Convert to smb_ucs2_t units. */
818 for(i = 0; (i < (dst_len - 1)) && *src;) {
819 size_t skip = skip_multibyte_char(*src);
820 smb_ucs2_t val = (*src & 0xff);
823 * If this is a multibyte character
824 * then work out the index value for the unicode conversion.
827 if (skip == 2)
828 val = ((val << 8) | (src[1] & 0xff));
830 dst[i++] = cp_to_ucs2[val];
831 if (skip)
832 src += skip;
833 else
834 src++;
837 dst[i] = 0;
839 return dst;
842 /*******************************************************************
843 Convert a UNICODE string to multibyte format. Note that the 'src' is in
844 native byte order, not little endian. Always zero terminates.
845 This function may be replaced if the MB codepage format is an
846 encoded one (ie. utf8, hex). See the code in lib/kanji.c
847 for details. dst_len is in bytes.
848 ********************************************************************/
850 char *unicode_to_unix(char *dst, const smb_ucs2_t *src, size_t dst_len)
852 return unicode_to_multibyte(dst, src, dst_len, ucs2_to_unixcp);
855 /*******************************************************************
856 Convert a UNIX string to UNICODE format. Note that the 'dst' is in
857 native byte order, not little endian. Always zero terminates.
858 This function may be replaced if the UNIX codepage format is a
859 multi-byte one (ie. JIS, SJIS or utf8). See the code in lib/kanji.c
860 for details. dst_len is in bytes, not ucs2 units.
861 ********************************************************************/
863 smb_ucs2_t *unix_to_unicode(smb_ucs2_t *dst, const char *src, size_t dst_len)
865 return multibyte_to_unicode(dst, src, dst_len, unixcp_to_ucs2);
868 /*******************************************************************
869 Convert a single UNICODE character to unix character. Returns the
870 number of bytes in the unix character.
871 ********************************************************************/
873 size_t unicode_to_unix_char(char *dst, const smb_ucs2_t src)
875 smb_ucs2_t val = ucs2_to_unixcp[src];
876 if(val < 256) {
877 *dst = (char)val;
878 return (size_t)1;
881 * A 2 byte value is always written as
882 * high/low into the buffer stream.
885 dst[0] = (char)((val >> 8) & 0xff);
886 dst[1] = (char)(val & 0xff);
887 return (size_t)2;
890 /*******************************************************************
891 Convert a UNICODE string to DOS format. Note that the 'src' is in
892 native byte order, not little endian. Always zero terminates.
893 dst_len is in bytes.
894 ********************************************************************/
896 char *unicode_to_dos(char *dst, const smb_ucs2_t *src, size_t dst_len)
898 return unicode_to_multibyte(dst, src, dst_len, ucs2_to_doscp);
901 /*******************************************************************
902 Convert a single UNICODE character to DOS codepage. Returns the
903 number of bytes in the DOS codepage character.
904 ********************************************************************/
906 size_t unicode_to_dos_char(char *dst, const smb_ucs2_t src)
908 smb_ucs2_t val = ucs2_to_doscp[src];
909 if(val < 256) {
910 *dst = (char)val;
911 return (size_t)1;
914 * A 2 byte value is always written as
915 * high/low into the buffer stream.
918 dst[0] = (char)((val >> 8) & 0xff);
919 dst[1] = (char)(val & 0xff);
920 return (size_t)2;
923 /*******************************************************************
924 Convert a DOS string to UNICODE format. Note that the 'dst' is in
925 native byte order, not little endian. Always zero terminates.
926 This function may be replaced if the DOS codepage format is a
927 multi-byte one (ie. JIS, SJIS or utf8). See the code in lib/kanji.c
928 for details. dst_len is in bytes, not ucs2 units.
929 ********************************************************************/
931 smb_ucs2_t *dos_to_unicode(smb_ucs2_t *dst, const char *src, size_t dst_len)
933 return multibyte_to_unicode(dst, src, dst_len, doscp_to_ucs2);
936 /*******************************************************************
937 Count the number of characters in a smb_ucs2_t string.
938 ********************************************************************/
940 size_t strlen_w(const smb_ucs2_t *src)
942 size_t len;
944 for(len = 0; *src++; len++)
947 return len;
950 /*******************************************************************
951 Safe wstring copy into a known length string. maxlength includes
952 the terminating zero. maxlength is in ucs2 units.
953 ********************************************************************/
955 smb_ucs2_t *safe_strcpy_w(smb_ucs2_t *dest,const smb_ucs2_t *src, size_t maxlength)
957 size_t ucs2_len;
959 if (!dest) {
960 DEBUG(0,("ERROR: NULL dest in safe_strcpy_w\n"));
961 return NULL;
964 if (!src) {
965 *dest = 0;
966 return dest;
969 maxlength /= sizeof(smb_ucs2_t);
971 ucs2_len = strlen_w(src);
973 if (ucs2_len >= maxlength) {
974 fstring out;
975 DEBUG(0,("ERROR: string overflow by %u bytes in safe_strcpy_w [%.50s]\n",
976 (unsigned int)((ucs2_len-maxlength)*sizeof(smb_ucs2_t)),
977 unicode_to_unix(out,src,sizeof(out))) );
978 ucs2_len = maxlength - 1;
981 memcpy(dest, src, ucs2_len*sizeof(smb_ucs2_t));
982 dest[ucs2_len] = 0;
983 return dest;
986 /*******************************************************************
987 Safe string cat into a string. maxlength includes the terminating zero.
988 maxlength is in ucs2 units.
989 ********************************************************************/
991 smb_ucs2_t *safe_strcat_w(smb_ucs2_t *dest, const smb_ucs2_t *src, size_t maxlength)
993 size_t ucs2_src_len, ucs2_dest_len;
995 if (!dest) {
996 DEBUG(0,("ERROR: NULL dest in safe_strcat_w\n"));
997 return NULL;
1000 if (!src)
1001 return dest;
1003 ucs2_src_len = strlen_w(src);
1004 ucs2_dest_len = strlen_w(dest);
1006 if (ucs2_src_len + ucs2_dest_len >= maxlength) {
1007 fstring out;
1008 int new_len = maxlength - ucs2_dest_len - 1;
1009 DEBUG(0,("ERROR: string overflow by %u characters in safe_strcat_w [%.50s]\n",
1010 (unsigned int)(sizeof(smb_ucs2_t)*(ucs2_src_len + ucs2_dest_len - maxlength)),
1011 unicode_to_unix(out,src,sizeof(out))) );
1012 ucs2_src_len = (size_t)(new_len > 0 ? new_len : 0);
1015 memcpy(&dest[ucs2_dest_len], src, ucs2_src_len*sizeof(smb_ucs2_t));
1016 dest[ucs2_dest_len + ucs2_src_len] = 0;
1017 return dest;
1020 /*******************************************************************
1021 Compare the two strings s1 and s2.
1022 ********************************************************************/
1024 int strcmp_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2)
1026 smb_ucs2_t c1, c2;
1028 for (;;) {
1029 c1 = *s1++;
1030 c2 = *s2++;
1032 if (c1 != c2)
1033 return c1 - c2;
1035 if (c1 == 0)
1036 break;
1038 return 0;
1041 /*******************************************************************
1042 Compare the first n characters of s1 to s2. len is in ucs2 units.
1043 ********************************************************************/
1045 int strncmp_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2, size_t len)
1047 smb_ucs2_t c1, c2;
1049 for (; len != 0; --len) {
1050 c1 = *s1++;
1051 c2 = *s2++;
1053 if (c1 != c2)
1054 return c1 - c2;
1056 if (c1 == 0)
1057 break;
1060 return 0;
1063 /*******************************************************************
1064 Search string s2 from s1.
1065 ********************************************************************/
1067 smb_ucs2_t *strstr_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2)
1069 size_t len = strlen_w(s2);
1071 if (!*s2)
1072 return (smb_ucs2_t *)s1;
1074 for(;*s1; s1++) {
1075 if (*s1 == *s2) {
1076 if (strncmp_w(s1, s2, len) == 0)
1077 return (smb_ucs2_t *)s1;
1080 return NULL;
1083 /*******************************************************************
1084 Search for ucs2 char c from the beginning of s.
1085 ********************************************************************/
1087 smb_ucs2_t *strchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
1089 do {
1090 if (*s == c)
1091 return (smb_ucs2_t *)s;
1092 } while (*s++);
1094 return NULL;
1097 /*******************************************************************
1098 Search for ucs2 char c from the end of s.
1099 ********************************************************************/
1101 smb_ucs2_t *strrchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
1103 smb_ucs2_t *retval = 0;
1105 do {
1106 if (*s == c)
1107 retval = (smb_ucs2_t *)s;
1108 } while (*s++);
1110 return retval;
1113 /*******************************************************************
1114 Search token from s1 separated by any ucs2 char of s2.
1115 ********************************************************************/
1117 smb_ucs2_t *strtok_w(smb_ucs2_t *s1, const smb_ucs2_t *s2)
1119 static smb_ucs2_t *s = NULL;
1120 smb_ucs2_t *q;
1122 if (!s1) {
1123 if (!s)
1124 return NULL;
1125 s1 = s;
1128 for (q = s1; *s1; s1++) {
1129 smb_ucs2_t *p = strchr_w(s2, *s1);
1130 if (p) {
1131 if (s1 != q) {
1132 s = s1 + 1;
1133 *s1 = '\0';
1134 return q;
1136 q = s1 + 1;
1140 s = NULL;
1141 if (*q)
1142 return q;
1144 return NULL;
1147 /*******************************************************************
1148 Duplicate a ucs2 string.
1149 ********************************************************************/
1151 smb_ucs2_t *strdup_w(const smb_ucs2_t *s)
1153 size_t newlen = (strlen_w(s)+1)*sizeof(smb_ucs2_t);
1154 smb_ucs2_t *newstr = (smb_ucs2_t *)malloc(newlen);
1155 if (newstr == NULL)
1156 return NULL;
1157 safe_strcpy_w(newstr, s, newlen);
1158 return newstr;
1161 /*******************************************************************
1162 Mapping tables for UNICODE character. Allows toupper/tolower and
1163 isXXX functions to work.
1165 tridge: split into 2 pieces. This saves us 5/6 of the memory
1166 with a small speed penalty
1167 The magic constants are the lower/upper range of the tables two
1168 parts
1169 ********************************************************************/
1171 typedef struct {
1172 smb_ucs2_t lower;
1173 smb_ucs2_t upper;
1174 unsigned char flags;
1175 } smb_unicode_table_t;
1177 #define TABLE1_BOUNDARY 9450
1178 #define TABLE2_BOUNDARY 64256
1180 static smb_unicode_table_t map_table1[] = {
1181 #include "unicode_map_table1.h"
1184 static smb_unicode_table_t map_table2[] = {
1185 #include "unicode_map_table2.h"
1188 static unsigned char map_table_flags(smb_ucs2_t v)
1190 if (v < TABLE1_BOUNDARY) return map_table1[v].flags;
1191 if (v >= TABLE2_BOUNDARY) return map_table2[v - TABLE2_BOUNDARY].flags;
1192 return 0;
1195 static smb_ucs2_t map_table_lower(smb_ucs2_t v)
1197 if (v < TABLE1_BOUNDARY) return map_table1[v].lower;
1198 if (v >= TABLE2_BOUNDARY) return map_table2[v - TABLE2_BOUNDARY].lower;
1199 return v;
1202 static smb_ucs2_t map_table_upper(smb_ucs2_t v)
1204 if (v < TABLE1_BOUNDARY) return map_table1[v].upper;
1205 if (v >= TABLE2_BOUNDARY) return map_table2[v - TABLE2_BOUNDARY].upper;
1206 return v;
1209 /*******************************************************************
1210 Is an upper case wchar.
1211 ********************************************************************/
1213 int isupper_w( smb_ucs2_t val)
1215 return (map_table_flags(val) & UNI_UPPER);
1218 /*******************************************************************
1219 Is a lower case wchar.
1220 ********************************************************************/
1222 int islower_w( smb_ucs2_t val)
1224 return (map_table_flags(val) & UNI_LOWER);
1227 /*******************************************************************
1228 Is a digit wchar.
1229 ********************************************************************/
1231 int isdigit_w( smb_ucs2_t val)
1233 return (map_table_flags(val) & UNI_DIGIT);
1236 /*******************************************************************
1237 Is a hex digit wchar.
1238 ********************************************************************/
1240 int isxdigit_w( smb_ucs2_t val)
1242 return (map_table_flags(val) & UNI_XDIGIT);
1245 /*******************************************************************
1246 Is a space wchar.
1247 ********************************************************************/
1249 int isspace_w( smb_ucs2_t val)
1251 return (map_table_flags(val) & UNI_SPACE);
1254 /*******************************************************************
1255 Convert a wchar to upper case.
1256 ********************************************************************/
1258 smb_ucs2_t toupper_w( smb_ucs2_t val )
1260 return map_table_upper(val);
1263 /*******************************************************************
1264 Convert a wchar to lower case.
1265 ********************************************************************/
1267 smb_ucs2_t tolower_w( smb_ucs2_t val )
1269 return map_table_lower(val);
1272 static smb_ucs2_t *last_ptr = NULL;
1274 void set_first_token_w(smb_ucs2_t *ptr)
1276 last_ptr = ptr;
1279 /****************************************************************************
1280 Get the next token from a string, return False if none found
1281 handles double-quotes.
1282 Based on a routine by GJC@VILLAGE.COM.
1283 Extensively modified by Andrew.Tridgell@anu.edu.au
1284 bufsize is in bytes.
1285 ****************************************************************************/
1287 static smb_ucs2_t sep_list[] = { (smb_ucs2_t)' ', (smb_ucs2_t)'\t', (smb_ucs2_t)'\n', (smb_ucs2_t)'\r', 0};
1288 static smb_ucs2_t quotechar = (smb_ucs2_t)'\"';
1290 BOOL next_token_w(smb_ucs2_t **ptr, smb_ucs2_t *buff, smb_ucs2_t *sep, size_t bufsize)
1292 smb_ucs2_t *s;
1293 BOOL quoted;
1294 size_t len=1;
1297 * Convert bufsize to smb_ucs2_t units.
1300 bufsize /= sizeof(smb_ucs2_t);
1302 if (!ptr)
1303 ptr = &last_ptr;
1304 if (!ptr)
1305 return(False);
1307 s = *ptr;
1310 * Default to simple separators.
1313 if (!sep)
1314 sep = sep_list;
1317 * Find the first non sep char.
1320 while(*s && strchr_w(sep,*s))
1321 s++;
1324 * Nothing left ?
1327 if (!*s)
1328 return(False);
1331 * Copy over the token.
1334 for (quoted = False; len < bufsize && *s && (quoted || !strchr_w(sep,*s)); s++) {
1335 if (*s == quotechar) {
1336 quoted = !quoted;
1337 } else {
1338 len++;
1339 *buff++ = *s;
1343 *ptr = (*s) ? s+1 : s;
1344 *buff = 0;
1345 last_ptr = *ptr;
1347 return(True);
1350 /****************************************************************************
1351 Convert list of tokens to array; dependent on above routine.
1352 Uses last_ptr from above - bit of a hack.
1353 ****************************************************************************/
1355 smb_ucs2_t **toktocliplist_w(int *ctok, smb_ucs2_t *sep)
1357 smb_ucs2_t *s=last_ptr;
1358 int ictok=0;
1359 smb_ucs2_t **ret, **iret;
1361 if (!sep)
1362 sep = sep_list;
1364 while(*s && strchr_w(sep,*s))
1365 s++;
1368 * Nothing left ?
1371 if (!*s)
1372 return(NULL);
1374 do {
1375 ictok++;
1376 while(*s && (!strchr_w(sep,*s)))
1377 s++;
1378 while(*s && strchr_w(sep,*s))
1379 *s++=0;
1380 } while(*s);
1382 *ctok = ictok;
1383 s = last_ptr;
1385 if (!(ret=iret=malloc(ictok*sizeof(smb_ucs2_t *))))
1386 return NULL;
1388 while(ictok--) {
1389 *iret++=s;
1390 while(*s++)
1392 while(!*s)
1393 s++;
1396 return ret;
1399 /*******************************************************************
1400 Case insensitive string compararison.
1401 ********************************************************************/
1403 int StrCaseCmp_w(const smb_ucs2_t *s, const smb_ucs2_t *t)
1406 * Compare until we run out of string, either t or s, or find a difference.
1409 while (*s && *t && toupper_w(*s) == toupper_w(*t)) {
1410 s++;
1411 t++;
1414 return(toupper_w(*s) - toupper_w(*t));
1417 /*******************************************************************
1418 Case insensitive string compararison, length limited.
1419 n is in ucs2 units.
1420 ********************************************************************/
1422 int StrnCaseCmp_w(const smb_ucs2_t *s, const smb_ucs2_t *t, size_t n)
1425 * Compare until we run out of string, either t or s, or chars.
1428 while (n && *s && *t && toupper_w(*s) == toupper_w(*t)) {
1429 s++;
1430 t++;
1431 n--;
1435 * Not run out of chars - strings are different lengths.
1438 if (n)
1439 return(toupper_w(*s) - toupper_w(*t));
1442 * Identical up to where we run out of chars,
1443 * and strings are same length.
1446 return(0);
1449 /*******************************************************************
1450 Compare 2 strings.
1451 ********************************************************************/
1453 BOOL strequal_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2)
1455 if (s1 == s2)
1456 return(True);
1457 if (!s1 || !s2)
1458 return(False);
1460 return(StrCaseCmp_w(s1,s2)==0);
1463 /*******************************************************************
1464 Compare 2 strings up to and including the nth char. n is in ucs2
1465 units.
1466 ******************************************************************/
1468 BOOL strnequal_w(const smb_ucs2_t *s1,const smb_ucs2_t *s2,size_t n)
1470 if (s1 == s2)
1471 return(True);
1472 if (!s1 || !s2 || !n)
1473 return(False);
1475 return(StrnCaseCmp_w(s1,s2,n)==0);
1478 /*******************************************************************
1479 Compare 2 strings (case sensitive).
1480 ********************************************************************/
1482 BOOL strcsequal_w(const smb_ucs2_t *s1,const smb_ucs2_t *s2)
1484 if (s1 == s2)
1485 return(True);
1486 if (!s1 || !s2)
1487 return(False);
1489 return(strcmp_w(s1,s2)==0);
1492 /*******************************************************************
1493 Convert a string to lower case.
1494 ********************************************************************/
1496 void strlower_w(smb_ucs2_t *s)
1498 while (*s) {
1499 if (isupper_w(*s))
1500 *s = tolower_w(*s);
1501 s++;
1505 /*******************************************************************
1506 Convert a string to upper case.
1507 ********************************************************************/
1509 void strupper_w(smb_ucs2_t *s)
1511 while (*s) {
1512 if (islower_w(*s))
1513 *s = toupper_w(*s);
1514 s++;
1518 /*******************************************************************
1519 Convert a string to "normal" form.
1520 ********************************************************************/
1522 void strnorm_w(smb_ucs2_t *s)
1524 extern int case_default;
1525 if (case_default == CASE_UPPER)
1526 strupper_w(s);
1527 else
1528 strlower_w(s);
1531 /*******************************************************************
1532 Check if a string is in "normal" case.
1533 ********************************************************************/
1535 BOOL strisnormal_w(smb_ucs2_t *s)
1537 extern int case_default;
1538 if (case_default == CASE_UPPER)
1539 return(!strhaslower_w(s));
1541 return(!strhasupper_w(s));
1544 /****************************************************************************
1545 String replace.
1546 ****************************************************************************/
1548 void string_replace_w(smb_ucs2_t *s, smb_ucs2_t oldc, smb_ucs2_t newc)
1550 while (*s) {
1551 if (oldc == *s)
1552 *s = newc;
1553 s++;
1557 /*******************************************************************
1558 Skip past some strings in a buffer. n is in bytes.
1559 ********************************************************************/
1561 smb_ucs2_t *skip_string_w(smb_ucs2_t *buf,size_t n)
1563 while (n--)
1564 buf += (strlen_w(buf)*sizeof(smb_ucs2_t)) + 1;
1565 return(buf);
1568 /*******************************************************************
1569 Count the number of characters in a string. Same as strlen_w in
1570 smb_ucs2_t string units.
1571 ********************************************************************/
1573 size_t str_charnum_w(const smb_ucs2_t *s)
1575 return strlen_w(s);
1578 /*******************************************************************
1579 Trim the specified elements off the front and back of a string.
1580 ********************************************************************/
1582 BOOL trim_string_w(smb_ucs2_t *s,const smb_ucs2_t *front,const smb_ucs2_t *back)
1584 BOOL ret = False;
1585 size_t front_len = (front && *front) ? strlen_w(front) : 0;
1586 size_t back_len = (back && *back) ? strlen_w(back) : 0;
1587 size_t s_len;
1589 while (front_len && strncmp_w(s, front, front_len) == 0) {
1590 smb_ucs2_t *p = s;
1591 ret = True;
1593 while (1) {
1594 if (!(*p = p[front_len]))
1595 break;
1596 p++;
1600 if(back_len) {
1601 s_len = strlen_w(s);
1602 while ((s_len >= back_len) &&
1603 (strncmp_w(s + s_len - back_len, back, back_len)==0)) {
1604 ret = True;
1605 s[s_len - back_len] = 0;
1606 s_len = strlen_w(s);
1610 return(ret);
1613 /****************************************************************************
1614 Does a string have any uppercase chars in it ?
1615 ****************************************************************************/
1617 BOOL strhasupper_w(const smb_ucs2_t *s)
1619 while (*s) {
1620 if (isupper_w(*s))
1621 return(True);
1622 s++;
1624 return(False);
1627 /****************************************************************************
1628 Does a string have any lowercase chars in it ?
1629 ****************************************************************************/
1631 BOOL strhaslower_w(const smb_ucs2_t *s)
1633 while (*s) {
1634 if (islower(*s))
1635 return(True);
1636 s++;
1638 return(False);
1641 /****************************************************************************
1642 Find the number of 'c' chars in a string.
1643 ****************************************************************************/
1645 size_t count_chars_w(const smb_ucs2_t *s,smb_ucs2_t c)
1647 size_t count=0;
1649 while (*s) {
1650 if (*s == c)
1651 count++;
1652 s++;
1654 return(count);
1657 /*******************************************************************
1658 Return True if a string consists only of one particular character.
1659 ********************************************************************/
1661 BOOL str_is_all_w(const smb_ucs2_t *s,smb_ucs2_t c)
1663 if(s == NULL)
1664 return False;
1665 if(!*s)
1666 return False;
1668 while (*s) {
1669 if (*s != c)
1670 return False;
1671 s++;
1673 return True;
1676 /*******************************************************************
1677 Paranoid strcpy into a buffer of given length (includes terminating
1678 zero. Strips out all but 'a-Z0-9' and replaces with '_'. Deliberately
1679 does *NOT* check for multibyte characters. Don't change it !
1680 maxlength is in ucs2 units.
1681 ********************************************************************/
1683 smb_ucs2_t *alpha_strcpy_w(smb_ucs2_t *dest, const smb_ucs2_t *src, const smb_ucs2_t *other_safe_chars, size_t maxlength)
1685 size_t len, i;
1686 smb_ucs2_t nullstr_w = (smb_ucs2_t)0;
1688 if (!dest) {
1689 DEBUG(0,("ERROR: NULL dest in alpha_strcpy_w\n"));
1690 return NULL;
1693 if (!src) {
1694 *dest = 0;
1695 return dest;
1698 len = strlen_w(src);
1699 if (len >= maxlength)
1700 len = maxlength - 1;
1702 if (!other_safe_chars)
1703 other_safe_chars = &nullstr_w;
1705 for(i = 0; i < len; i++) {
1706 smb_ucs2_t val = src[i];
1707 if(isupper_w(val) ||islower_w(val) || isdigit_w(val) || strchr_w(other_safe_chars, val))
1708 dest[i] = src[i];
1709 else
1710 dest[i] = (smb_ucs2_t)'_';
1713 dest[i] = 0;
1715 return dest;
1718 /****************************************************************************
1719 Like strncpy but always null terminates. Make sure there is room !
1720 The variable n should always be one less than the available size and is in
1721 ucs2 units.
1722 ****************************************************************************/
1724 smb_ucs2_t *StrnCpy_w(smb_ucs2_t *dest,const smb_ucs2_t *src,size_t n)
1726 smb_ucs2_t *d = dest;
1727 if (!dest)
1728 return(NULL);
1729 if (!src) {
1730 *dest = 0;
1731 return(dest);
1734 while (n-- && (*d++ = *src++))
1736 *d = 0;
1737 return(dest);
1740 /****************************************************************************
1741 Like strncpy but copies up to the character marker. Always null terminates.
1742 returns a pointer to the character marker in the source string (src).
1743 n is in ucs2 units.
1744 ****************************************************************************/
1746 smb_ucs2_t *strncpyn_w(smb_ucs2_t *dest, const smb_ucs2_t *src,size_t n, smb_ucs2_t c)
1748 smb_ucs2_t *p;
1749 size_t str_len;
1751 p = strchr_w(src, c);
1752 if (p == NULL) {
1753 fstring cval;
1754 smb_ucs2_t mbcval[2];
1755 mbcval[0] = c;
1756 mbcval[1] = 0;
1757 DEBUG(5, ("strncpyn_w: separator character (%s) not found\n",
1758 unicode_to_unix(cval,mbcval,sizeof(cval)) ));
1759 return NULL;
1762 str_len = PTR_DIFF(p, src) + 1;
1763 safe_strcpy_w(dest, src, MIN(n, str_len));
1765 return p;
1768 /*************************************************************
1769 Routine to get hex characters and turn them into a 16 byte array.
1770 The array can be variable length, and any non-hex-numeric
1771 characters are skipped. "0xnn" or "0Xnn" is specially catered
1772 for. len is in bytes.
1773 Valid examples: "0A5D15"; "0x15, 0x49, 0xa2"; "59\ta9\te3\n"
1774 **************************************************************/
1776 static smb_ucs2_t hexprefix[] = { (smb_ucs2_t)'0', (smb_ucs2_t)'x', 0 };
1777 static smb_ucs2_t hexchars[] = { (smb_ucs2_t)'0', (smb_ucs2_t)'1', (smb_ucs2_t)'2', (smb_ucs2_t)'3',
1778 (smb_ucs2_t)'4', (smb_ucs2_t)'5', (smb_ucs2_t)'6', (smb_ucs2_t)'7',
1779 (smb_ucs2_t)'8', (smb_ucs2_t)'9', (smb_ucs2_t)'A', (smb_ucs2_t)'B',
1780 (smb_ucs2_t)'C', (smb_ucs2_t)'D', (smb_ucs2_t)'E', (smb_ucs2_t)'F', 0 };
1782 size_t strhex_to_str_w(char *p, size_t len, const smb_ucs2_t *strhex)
1784 size_t i;
1785 size_t num_chars = 0;
1786 unsigned char lonybble, hinybble;
1787 smb_ucs2_t *p1 = NULL, *p2 = NULL;
1790 * Convert to smb_ucs2_t units.
1793 len /= sizeof(smb_ucs2_t);
1795 for (i = 0; i < len && strhex[i] != 0; i++) {
1796 if (strnequal_w(hexchars, hexprefix, 2)) {
1797 i++; /* skip two chars */
1798 continue;
1801 if (!(p1 = strchr_w(hexchars, toupper_w(strhex[i]))))
1802 break;
1804 i++; /* next hex digit */
1806 if (!(p2 = strchr_w(hexchars, toupper_w(strhex[i]))))
1807 break;
1809 /* get the two nybbles */
1810 hinybble = (PTR_DIFF(p1, hexchars)/sizeof(smb_ucs2_t));
1811 lonybble = (PTR_DIFF(p2, hexchars)/sizeof(smb_ucs2_t));
1813 p[num_chars] = (hinybble << 4) | lonybble;
1814 num_chars++;
1816 p1 = NULL;
1817 p2 = NULL;
1819 return num_chars;
1822 /****************************************************************************
1823 Check if a string is part of a list.
1824 ****************************************************************************/
1826 BOOL in_list_w(smb_ucs2_t *s,smb_ucs2_t *list,BOOL casesensitive)
1828 wpstring tok;
1829 smb_ucs2_t *p=list;
1831 if (!list)
1832 return(False);
1834 while (next_token_w(&p,tok,LIST_SEP_W,sizeof(tok))) {
1835 if (casesensitive) {
1836 if (strcmp_w(tok,s) == 0)
1837 return(True);
1838 } else {
1839 if (StrCaseCmp_w(tok,s) == 0)
1840 return(True);
1843 return(False);
1846 /* This is used to prevent lots of mallocs of size 2 */
1847 static smb_ucs2_t *null_string = NULL;
1849 /****************************************************************************
1850 Set a string value, allocing the space for the string.
1851 ****************************************************************************/
1853 BOOL string_init_w(smb_ucs2_t **dest,const smb_ucs2_t *src)
1855 size_t l;
1857 if (!null_string) {
1858 if((null_string = (smb_ucs2_t *)malloc(sizeof(smb_ucs2_t))) == NULL) {
1859 DEBUG(0,("string_init_w: malloc fail for null_string.\n"));
1860 return False;
1862 *null_string = 0;
1865 if (!src)
1866 src = null_string;
1868 l = strlen_w(src);
1870 if (l == 0)
1871 *dest = null_string;
1872 else {
1873 (*dest) = (smb_ucs2_t *)malloc(sizeof(smb_ucs2_t)*(l+1));
1874 if ((*dest) == NULL) {
1875 DEBUG(0,("Out of memory in string_init_w\n"));
1876 return False;
1879 wpstrcpy(*dest,src);
1881 return(True);
1884 /****************************************************************************
1885 Free a string value.
1886 ****************************************************************************/
1888 void string_free_w(smb_ucs2_t **s)
1890 if (!s || !(*s))
1891 return;
1892 if (*s == null_string)
1893 *s = NULL;
1894 SAFE_FREE(*s);
1897 /****************************************************************************
1898 Set a string value, allocing the space for the string, and deallocating any
1899 existing space.
1900 ****************************************************************************/
1902 BOOL string_set_w(smb_ucs2_t **dest,const smb_ucs2_t *src)
1904 string_free_w(dest);
1906 return(string_init_w(dest,src));
1909 /****************************************************************************
1910 Substitute a string for a pattern in another string. Make sure there is
1911 enough room !
1913 This routine looks for pattern in s and replaces it with
1914 insert. It may do multiple replacements.
1916 Any of " ; ' $ or ` in the insert string are replaced with _
1917 if len==0 then no length check is performed
1918 len is in ucs2 units.
1919 ****************************************************************************/
1921 void string_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,const smb_ucs2_t *insert, size_t len)
1923 smb_ucs2_t *p;
1924 ssize_t ls,lp,li, i;
1926 if (!insert || !pattern || !s)
1927 return;
1929 ls = (ssize_t)strlen_w(s);
1930 lp = (ssize_t)strlen_w(pattern);
1931 li = (ssize_t)strlen_w(insert);
1933 if (!*pattern)
1934 return;
1936 while (lp <= ls && (p = strstr_w(s,pattern))) {
1937 if (len && (ls + (li-lp) >= len)) {
1938 fstring out;
1939 DEBUG(0,("ERROR: string overflow by %d in string_sub_w(%.50s, %d)\n",
1940 (int)(sizeof(smb_ucs2_t)*(ls + (li-lp) - len)),
1941 unicode_to_unix(out,pattern,sizeof(out)), (int)len*sizeof(smb_ucs2_t)));
1942 break;
1944 if (li != lp)
1945 memmove(p+li,p+lp,sizeof(smb_ucs2_t)*(strlen_w(p+lp)+1));
1947 for (i=0;i<li;i++) {
1948 switch (insert[i]) {
1949 case (smb_ucs2_t)'`':
1950 case (smb_ucs2_t)'"':
1951 case (smb_ucs2_t)'\'':
1952 case (smb_ucs2_t)';':
1953 case (smb_ucs2_t)'$':
1954 case (smb_ucs2_t)'%':
1955 case (smb_ucs2_t)'\r':
1956 case (smb_ucs2_t)'\n':
1957 p[i] = (smb_ucs2_t)'_';
1958 break;
1959 default:
1960 p[i] = insert[i];
1963 s = p + li;
1964 ls += (li-lp);
1968 void fstring_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,const smb_ucs2_t *insert)
1970 string_sub_w(s, pattern, insert, sizeof(wfstring));
1973 void pstring_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,smb_ucs2_t *insert)
1975 string_sub_w(s, pattern, insert, sizeof(wpstring));
1978 /****************************************************************************
1979 Similar to string_sub() but allows for any character to be substituted.
1980 Use with caution !
1981 if len==0 then no length check is performed.
1982 ****************************************************************************/
1984 void all_string_sub_w(smb_ucs2_t *s,const smb_ucs2_t *pattern,const smb_ucs2_t *insert, size_t len)
1986 smb_ucs2_t *p;
1987 ssize_t ls,lp,li;
1989 if (!insert || !pattern || !s)
1990 return;
1992 ls = (ssize_t)strlen_w(s);
1993 lp = (ssize_t)strlen_w(pattern);
1994 li = (ssize_t)strlen_w(insert);
1996 if (!*pattern)
1997 return;
1999 while (lp <= ls && (p = strstr_w(s,pattern))) {
2000 if (len && (ls + (li-lp) >= len)) {
2001 fstring out;
2002 DEBUG(0,("ERROR: string overflow by %d in all_string_sub_w(%.50s, %d)\n",
2003 (int)(sizeof(smb_ucs2_t)*(ls + (li-lp) - len)),
2004 unicode_to_unix(out,pattern,sizeof(out)), (int)len*sizeof(smb_ucs2_t)));
2005 break;
2007 if (li != lp)
2008 memmove(p+li,p+lp,sizeof(smb_ucs2_t)*(strlen_w(p+lp)+1));
2010 memcpy(p, insert, li*sizeof(smb_ucs2_t));
2011 s = p + li;
2012 ls += (li-lp);
2016 /****************************************************************************
2017 Splits out the front and back at a separator.
2018 ****************************************************************************/
2020 void split_at_last_component_w(smb_ucs2_t *path, smb_ucs2_t *front, smb_ucs2_t sep, smb_ucs2_t *back)
2022 smb_ucs2_t *p = strrchr_w(path, sep);
2024 if (p != NULL)
2025 *p = 0;
2027 if (front != NULL)
2028 wpstrcpy(front, path);
2030 if (p != NULL) {
2031 if (back != NULL)
2032 wpstrcpy(back, p+1);
2033 *p = (smb_ucs2_t)'\\';
2034 } else {
2035 if (back != NULL)
2036 back[0] = 0;
2041 /****************************************************************************
2042 Write an octal as a string.
2043 ****************************************************************************/
2045 smb_ucs2_t *octal_string_w(int i)
2047 static smb_ucs2_t wret[64];
2048 char ret[64];
2050 if (i == -1)
2051 slprintf(ret, sizeof(ret)-1, "-1");
2052 else
2053 slprintf(ret, sizeof(ret)-1, "0%o", i);
2054 return unix_to_unicode(wret, ret, sizeof(wret));
2058 /****************************************************************************
2059 Truncate a string at a specified length.
2060 length is in ucs2 units.
2061 ****************************************************************************/
2063 smb_ucs2_t *string_truncate_w(smb_ucs2_t *s, size_t length)
2065 if (s && strlen_w(s) > length)
2066 s[length] = 0;
2068 return s;
2071 /******************************************************************
2072 functions for UTF8 support (using in kanji.c)
2073 ******************************************************************/
2074 smb_ucs2_t doscp2ucs2(int w)
2076 return ((smb_ucs2_t)doscp_to_ucs2[w]);
2079 int ucs2doscp(smb_ucs2_t w)
2081 return ((int)ucs2_to_doscp[w]);
2084 /* Temporary fix until 3.0... JRA */
2086 int rpcstr_pull(char* dest, void *src, int dest_len, int src_len, int flags)
2088 if(dest_len==-1)
2089 dest_len=MAXUNI-3;
2091 if (flags & STR_TERMINATE)
2092 src_len = strlen_w(src)*2+2;
2094 dest_len = MIN((src_len/2), (dest_len-1));
2095 unistr_to_ascii(dest, src, dest_len);
2096 return src_len;