sync'ing up for 3.0alpha20 release
[Samba/gbeck.git] / source3 / lib / util_unistr.c
blobeb47252413133928590182abdbaaceb22daa25e9
1 /*
2 Unix SMB/CIFS implementation.
3 Samba utility functions
4 Copyright (C) Andrew Tridgell 1992-2001
5 Copyright (C) Simo Sorce 2001
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 #include "includes.h"
24 #ifndef MAXUNI
25 #define MAXUNI 1024
26 #endif
28 /* these 3 tables define the unicode case handling. They are loaded
29 at startup either via mmap() or read() from the lib directory */
30 static smb_ucs2_t *upcase_table;
31 static smb_ucs2_t *lowcase_table;
32 static uint8 *valid_table;
35 /*******************************************************************
36 load the case handling tables
37 ********************************************************************/
38 void load_case_tables(void)
40 static int initialised;
41 int i;
43 if (initialised) return;
44 initialised = 1;
46 upcase_table = map_file(lib_path("upcase.dat"), 0x20000);
47 lowcase_table = map_file(lib_path("lowcase.dat"), 0x20000);
49 /* we would like Samba to limp along even if these tables are
50 not available */
51 if (!upcase_table) {
52 DEBUG(1,("creating lame upcase table\n"));
53 upcase_table = malloc(0x20000);
54 for (i=0;i<0x10000;i++) {
55 smb_ucs2_t v;
56 SSVAL(&v, 0, i);
57 upcase_table[v] = i;
59 for (i=0;i<256;i++) {
60 smb_ucs2_t v;
61 SSVAL(&v, 0, UCS2_CHAR(i));
62 upcase_table[v] = UCS2_CHAR(islower(i)?toupper(i):i);
66 if (!lowcase_table) {
67 DEBUG(1,("creating lame lowcase table\n"));
68 lowcase_table = malloc(0x20000);
69 for (i=0;i<0x10000;i++) {
70 smb_ucs2_t v;
71 SSVAL(&v, 0, i);
72 lowcase_table[v] = i;
74 for (i=0;i<256;i++) {
75 smb_ucs2_t v;
76 SSVAL(&v, 0, UCS2_CHAR(i));
77 lowcase_table[v] = UCS2_CHAR(isupper(i)?tolower(i):i);
83 see if a ucs2 character can be mapped correctly to a dos character
84 and mapped back to the same character in ucs2
86 static int check_dos_char(smb_ucs2_t c)
88 char buf[10];
89 smb_ucs2_t c2 = 0;
90 int len1, len2;
91 len1 = convert_string(CH_UCS2, CH_DOS, &c, 2, buf, sizeof(buf));
92 if (len1 == 0) return 0;
93 len2 = convert_string(CH_DOS, CH_UCS2, buf, len1, &c2, 2);
94 if (len2 != 2) return 0;
95 return (c == c2);
98 /*******************************************************************
99 load the valid character map table
100 ********************************************************************/
101 void init_valid_table(void)
103 static int initialised;
104 static int mapped_file;
105 int i;
106 const char *allowed = ".!#$%&'()_-@^`~";
108 if (initialised && mapped_file) return;
109 initialised = 1;
111 valid_table = map_file(lib_path("valid.dat"), 0x10000);
112 if (valid_table) {
113 mapped_file = 1;
114 return;
117 if (valid_table) free(valid_table);
119 DEBUG(2,("creating default valid table\n"));
120 valid_table = malloc(0x10000);
121 for (i=0;i<128;i++) valid_table[i] = isalnum(i) ||
122 strchr(allowed,i);
123 for (;i<0x10000;i++) {
124 smb_ucs2_t c;
125 SSVAL(&c, 0, i);
126 valid_table[i] = check_dos_char(c);
131 /*******************************************************************
132 Write a string in (little-endian) unicode format. src is in
133 the current DOS codepage. len is the length in bytes of the
134 string pointed to by dst.
136 if null_terminate is True then null terminate the packet (adds 2 bytes)
138 the return value is the length in bytes consumed by the string, including the
139 null termination if applied
140 ********************************************************************/
142 size_t dos_PutUniCode(char *dst,const char *src, ssize_t len, BOOL null_terminate)
144 return push_ucs2(NULL, dst, src, len,
145 STR_UNICODE|STR_NOALIGN | (null_terminate?STR_TERMINATE:0));
149 /*******************************************************************
150 Skip past a unicode string, but not more than len. Always move
151 past a terminating zero if found.
152 ********************************************************************/
154 char *skip_unibuf(char *src, size_t len)
156 char *srcend = src + len;
158 while (src < srcend && SVAL(src,0))
159 src += 2;
161 if(!SVAL(src,0))
162 src += 2;
164 return src;
167 /* Copy a string from little-endian or big-endian unicode source (depending
168 * on flags) to internal samba format destination
170 int rpcstr_pull(char* dest, void *src, int dest_len, int src_len, int flags)
172 if(dest_len==-1) dest_len=MAXUNI-3;
173 return pull_ucs2(NULL, dest, src, dest_len, src_len, flags|STR_UNICODE|STR_NOALIGN);
176 /* Copy a string from a unistr2 source to internal samba format
177 destination. Use this instead of direct calls to rpcstr_pull() to avoid
178 having to determine whether the source string is null terminated. */
180 int rpcstr_pull_unistr2_fstring(char *dest, UNISTR2 *src)
182 return pull_ucs2(NULL, dest, src->buffer, sizeof(fstring),
183 src->uni_str_len * 2, 0);
186 /* Converts a string from internal samba format to unicode
188 int rpcstr_push(void* dest, const char *src, int dest_len, int flags)
190 return push_ucs2(NULL, dest, src, dest_len, flags|STR_UNICODE|STR_NOALIGN);
193 /*******************************************************************
194 Return a DOS codepage version of a little-endian unicode string.
195 len is the filename length (ignoring any terminating zero) in uin16
196 units. Always null terminates.
197 Hack alert: uses fixed buffer(s).
198 ********************************************************************/
199 char *dos_unistrn2(const uint16 *src, int len)
201 static char lbufs[8][MAXUNI];
202 static int nexti;
203 char *lbuf = lbufs[nexti];
204 nexti = (nexti+1)%8;
205 pull_ucs2(NULL, lbuf, src, MAXUNI-3, len*2, STR_NOALIGN);
206 return lbuf;
209 /*******************************************************************
210 Convert a (little-endian) UNISTR2 structure to an ASCII string
211 ********************************************************************/
212 void unistr2_to_ascii(char *dest, const UNISTR2 *str, size_t maxlen)
214 if (str == NULL) {
215 *dest='\0';
216 return;
218 pull_ucs2(NULL, dest, str->buffer, maxlen, str->uni_str_len*2, STR_NOALIGN);
221 /*******************************************************************
222 give a static string for displaying a UNISTR2
223 ********************************************************************/
224 const char *unistr2_static(const UNISTR2 *str)
226 static pstring ret;
227 unistr2_to_ascii(ret, str, sizeof(ret));
228 return ret;
232 /*******************************************************************
233 duplicate a UNISTR2 string into a null terminated char*
234 using a talloc context
235 ********************************************************************/
236 char *unistr2_tdup(TALLOC_CTX *ctx, const UNISTR2 *str)
238 char *s;
239 int maxlen = (str->uni_str_len+1)*4;
240 if (!str->buffer) return NULL;
241 s = (char *)talloc(ctx, maxlen); /* convervative */
242 if (!s) return NULL;
243 pull_ucs2(NULL, s, str->buffer, maxlen, str->uni_str_len*2,
244 STR_NOALIGN);
245 return s;
249 /*******************************************************************
250 Return a number stored in a buffer
251 ********************************************************************/
253 uint32 buffer2_to_uint32(BUFFER2 *str)
255 if (str->buf_len == 4)
256 return IVAL(str->buffer, 0);
257 else
258 return 0;
261 /*******************************************************************
262 Convert a wchar to upper case.
263 ********************************************************************/
265 smb_ucs2_t toupper_w(smb_ucs2_t val)
267 return upcase_table[SVAL(&val,0)];
270 /*******************************************************************
271 Convert a wchar to lower case.
272 ********************************************************************/
274 smb_ucs2_t tolower_w( smb_ucs2_t val )
276 return lowcase_table[SVAL(&val,0)];
280 /*******************************************************************
281 determine if a character is lowercase
282 ********************************************************************/
283 BOOL islower_w(smb_ucs2_t c)
285 return upcase_table[SVAL(&c,0)] != c;
288 /*******************************************************************
289 determine if a character is uppercase
290 ********************************************************************/
291 BOOL isupper_w(smb_ucs2_t c)
293 return lowcase_table[SVAL(&c,0)] != c;
297 /*******************************************************************
298 determine if a character is valid in a 8.3 name
299 ********************************************************************/
300 BOOL isvalid83_w(smb_ucs2_t c)
302 return valid_table[SVAL(&c,0)] != 0;
305 /*******************************************************************
306 Count the number of characters in a smb_ucs2_t string.
307 ********************************************************************/
308 size_t strlen_w(const smb_ucs2_t *src)
310 size_t len;
312 for(len = 0; *src++; len++) ;
314 return len;
317 /*******************************************************************
318 Count up to max number of characters in a smb_ucs2_t string.
319 ********************************************************************/
320 size_t strnlen_w(const smb_ucs2_t *src, size_t max)
322 size_t len;
324 for(len = 0; *src++ && (len < max); len++) ;
326 return len;
329 /*******************************************************************
330 wide strchr()
331 ********************************************************************/
332 smb_ucs2_t *strchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
334 while (*s != 0) {
335 if (c == *s) return (smb_ucs2_t *)s;
336 s++;
338 return NULL;
341 smb_ucs2_t *strchr_wa(const smb_ucs2_t *s, char c)
343 return strchr_w(s, UCS2_CHAR(c));
346 smb_ucs2_t *strrchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
348 const smb_ucs2_t *p = s;
349 int len = strlen_w(s);
350 if (len == 0) return NULL;
351 p += (len - 1);
352 do {
353 if (c == *p) return (smb_ucs2_t *)p;
354 } while (p-- != s);
355 return NULL;
358 /*******************************************************************
359 wide strstr()
360 ********************************************************************/
361 smb_ucs2_t *strstr_w(const smb_ucs2_t *s, const smb_ucs2_t *ins)
363 smb_ucs2_t *r;
364 size_t slen, inslen;
366 if (!s || !*s || !ins || !*ins) return NULL;
367 slen = strlen_w(s);
368 inslen = strlen_w(ins);
369 r = (smb_ucs2_t *)s;
370 while ((r = strchr_w(r, *ins))) {
371 if (strncmp_w(r, ins, inslen) == 0) return r;
372 r++;
374 return NULL;
377 /*******************************************************************
378 Convert a string to lower case.
379 return True if any char is converted
380 ********************************************************************/
381 BOOL strlower_w(smb_ucs2_t *s)
383 BOOL ret = False;
384 while (*s) {
385 smb_ucs2_t v = tolower_w(*s);
386 if (v != *s) {
387 *s = v;
388 ret = True;
390 s++;
392 return ret;
395 /*******************************************************************
396 Convert a string to upper case.
397 return True if any char is converted
398 ********************************************************************/
399 BOOL strupper_w(smb_ucs2_t *s)
401 BOOL ret = False;
402 while (*s) {
403 smb_ucs2_t v = toupper_w(*s);
404 if (v != *s) {
405 *s = v;
406 ret = True;
408 s++;
410 return ret;
413 /*******************************************************************
414 convert a string to "normal" form
415 ********************************************************************/
416 void strnorm_w(smb_ucs2_t *s)
418 extern int case_default;
419 if (case_default == CASE_UPPER)
420 strupper_w(s);
421 else
422 strlower_w(s);
425 int strcmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b)
427 while (*b && *a == *b) { a++; b++; }
428 return (*a - *b);
429 /* warning: if *a != *b and both are not 0 we retrun a random
430 greater or lesser than 0 number not realted to which
431 string is longer */
434 int strncmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len)
436 size_t n = 0;
437 while ((n < len) && *b && *a == *b) { a++; b++; n++;}
438 return (len - n)?(*a - *b):0;
441 /*******************************************************************
442 case insensitive string comparison
443 ********************************************************************/
444 int strcasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b)
446 while (*b && toupper_w(*a) == toupper_w(*b)) { a++; b++; }
447 return (tolower_w(*a) - tolower_w(*b));
450 /*******************************************************************
451 case insensitive string comparison, lenght limited
452 ********************************************************************/
453 int strncasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len)
455 size_t n = 0;
456 while ((n < len) && *b && (toupper_w(*a) == toupper_w(*b))) { a++; b++; n++; }
457 return (len - n)?(tolower_w(*a) - tolower_w(*b)):0;
460 /*******************************************************************
461 compare 2 strings
462 ********************************************************************/
463 BOOL strequal_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2)
465 if (s1 == s2) return(True);
466 if (!s1 || !s2) return(False);
468 return(strcasecmp_w(s1,s2)==0);
471 /*******************************************************************
472 compare 2 strings up to and including the nth char.
473 ******************************************************************/
474 BOOL strnequal_w(const smb_ucs2_t *s1,const smb_ucs2_t *s2,size_t n)
476 if (s1 == s2) return(True);
477 if (!s1 || !s2 || !n) return(False);
479 return(strncasecmp_w(s1,s2,n)==0);
482 /*******************************************************************
483 duplicate string
484 ********************************************************************/
485 smb_ucs2_t *strdup_w(const smb_ucs2_t *src)
487 return strndup_w(src, 0);
490 /* if len == 0 then duplicate the whole string */
491 smb_ucs2_t *strndup_w(const smb_ucs2_t *src, size_t len)
493 smb_ucs2_t *dest;
495 if (!len) len = strlen_w(src);
496 dest = (smb_ucs2_t *)malloc((len + 1) * sizeof(smb_ucs2_t));
497 if (!dest) {
498 DEBUG(0,("strdup_w: out of memory!\n"));
499 return NULL;
502 memcpy(dest, src, len * sizeof(smb_ucs2_t));
503 dest[len] = 0;
505 return dest;
508 /*******************************************************************
509 copy a string with max len
510 ********************************************************************/
512 smb_ucs2_t *strncpy_w(smb_ucs2_t *dest, const smb_ucs2_t *src, const size_t max)
514 size_t len;
516 if (!dest || !src) return NULL;
518 for (len = 0; (src[len] != 0) && (len < max); len++)
519 dest[len] = src[len];
520 while (len < max)
521 dest[len++] = 0;
523 return dest;
527 /*******************************************************************
528 append a string of len bytes and add a terminator
529 ********************************************************************/
531 smb_ucs2_t *strncat_w(smb_ucs2_t *dest, const smb_ucs2_t *src, const size_t max)
533 size_t start;
534 size_t len;
536 if (!dest || !src) return NULL;
538 start = strlen_w(dest);
539 len = strnlen_w(src, max);
541 memcpy(&dest[start], src, len*sizeof(smb_ucs2_t));
542 dest[start+len] = 0;
544 return dest;
547 smb_ucs2_t *strcat_w(smb_ucs2_t *dest, const smb_ucs2_t *src)
549 size_t start;
550 size_t len;
552 if (!dest || !src) return NULL;
554 start = strlen_w(dest);
555 len = strlen_w(src);
557 memcpy(&dest[start], src, len*sizeof(smb_ucs2_t));
558 dest[start+len] = 0;
560 return dest;
564 /*******************************************************************
565 replace any occurence of oldc with newc in unicode string
566 ********************************************************************/
568 void string_replace_w(smb_ucs2_t *s, smb_ucs2_t oldc, smb_ucs2_t newc)
570 for(;*s;s++) {
571 if(*s==oldc) *s=newc;
575 /*******************************************************************
576 trim unicode string
577 ********************************************************************/
579 BOOL trim_string_w(smb_ucs2_t *s, const smb_ucs2_t *front,
580 const smb_ucs2_t *back)
582 BOOL ret = False;
583 size_t len, front_len, back_len;
585 if (!s || !*s) return False;
587 len = strlen_w(s);
589 if (front && *front) {
590 front_len = strlen_w(front);
591 while (len && strncmp_w(s, front, front_len) == 0) {
592 memmove(s, (s + front_len), (len - front_len + 1) * sizeof(smb_ucs2_t));
593 len -= front_len;
594 ret = True;
598 if (back && *back) {
599 back_len = strlen_w(back);
600 while (len && strncmp_w((s + (len - back_len)), back, back_len) == 0) {
601 s[len - back_len] = 0;
602 len -= back_len;
603 ret = True;
607 return ret;
611 The *_wa() functions take a combination of 7 bit ascii
612 and wide characters They are used so that you can use string
613 functions combining C string constants with ucs2 strings
615 The char* arguments must NOT be multibyte - to be completely sure
616 of this only pass string constants */
619 void pstrcpy_wa(smb_ucs2_t *dest, const char *src)
621 int i;
622 for (i=0;i<PSTRING_LEN;i++) {
623 dest[i] = UCS2_CHAR(src[i]);
624 if (src[i] == 0) return;
628 int strcmp_wa(const smb_ucs2_t *a, const char *b)
630 while (*b && *a == UCS2_CHAR(*b)) { a++; b++; }
631 return (*a - UCS2_CHAR(*b));
634 int strncmp_wa(const smb_ucs2_t *a, const char *b, size_t len)
636 size_t n = 0;
637 while ((n < len) && *b && *a == UCS2_CHAR(*b)) { a++; b++; n++;}
638 return (len - n)?(*a - UCS2_CHAR(*b)):0;
641 smb_ucs2_t *strpbrk_wa(const smb_ucs2_t *s, const char *p)
643 while (*s != 0) {
644 int i;
645 for (i=0; p[i] && *s != UCS2_CHAR(p[i]); i++)
647 if (p[i]) return (smb_ucs2_t *)s;
648 s++;
650 return NULL;
653 smb_ucs2_t *strstr_wa(const smb_ucs2_t *s, const char *ins)
655 smb_ucs2_t *r;
656 size_t slen, inslen;
658 if (!s || !*s || !ins || !*ins) return NULL;
659 slen = strlen_w(s);
660 inslen = strlen(ins);
661 r = (smb_ucs2_t *)s;
662 while ((r = strchr_w(r, UCS2_CHAR(*ins)))) {
663 if (strncmp_wa(r, ins, inslen) == 0) return r;
664 r++;
666 return NULL;
669 /*******************************************************************
670 copy a string with max len
671 ********************************************************************/
673 smb_ucs2_t *strncpy_wa(smb_ucs2_t *dest, const char *src, const size_t max)
675 smb_ucs2_t *ucs2_src;
677 if (!dest || !src) return NULL;
678 if (!(ucs2_src = acnv_uxu2(src)))
679 return NULL;
681 strncpy_w(dest, ucs2_src, max);
682 SAFE_FREE(ucs2_src);
683 return dest;
686 /*******************************************************************
687 convert and duplicate an ascii string
688 ********************************************************************/
689 smb_ucs2_t *strdup_wa(const char *src)
691 return strndup_wa(src, 0);
694 /* if len == 0 then duplicate the whole string */
695 smb_ucs2_t *strndup_wa(const char *src, size_t len)
697 smb_ucs2_t *dest, *s;
699 s = acnv_dosu2(src);
700 if (!len) len = strlen_w(s);
701 dest = (smb_ucs2_t *)malloc((len + 1) * sizeof(smb_ucs2_t));
702 if (!dest) {
703 DEBUG(0,("strdup_w: out of memory!\n"));
704 SAFE_FREE(s);
705 return NULL;
708 memcpy(dest, src, len * sizeof(smb_ucs2_t));
709 dest[len] = 0;
711 SAFE_FREE(s);
712 return dest;
715 /*******************************************************************
716 append a string of len bytes and add a terminator
717 ********************************************************************/
719 smb_ucs2_t *strncat_wa(smb_ucs2_t *dest, const char *src, const size_t max)
721 smb_ucs2_t *ucs2_src;
723 if (!dest || !src) return NULL;
724 if (!(ucs2_src = acnv_uxu2(src)))
725 return NULL;
727 strncat_w(dest, ucs2_src, max);
728 SAFE_FREE(ucs2_src);
729 return dest;
732 smb_ucs2_t *strcat_wa(smb_ucs2_t *dest, const char *src)
734 smb_ucs2_t *ucs2_src;
736 if (!dest || !src) return NULL;
737 if (!(ucs2_src = acnv_uxu2(src)))
738 return NULL;
740 strcat_w(dest, ucs2_src);
741 SAFE_FREE(ucs2_src);
742 return dest;
745 BOOL trim_string_wa(smb_ucs2_t *s, const char *front,
746 const char *back)
748 wpstring f, b;
750 if (front) push_ucs2(NULL, f, front, sizeof(wpstring) - 1, STR_TERMINATE);
751 else *f = 0;
752 if (back) push_ucs2(NULL, b, back, sizeof(wpstring) - 1, STR_TERMINATE);
753 else *b = 0;
754 return trim_string_w(s, f, b);
757 /*******************************************************************
758 returns the length in number of wide characters
759 ******************************************************************/
760 int unistrlen(uint16 *s)
762 int len;
764 if (!s)
765 return -1;
767 for (len=0; *s; s++,len++);
769 return len;
772 /*******************************************************************
773 Strcpy for unicode strings. returns length (in num of wide chars)
774 ********************************************************************/
776 int unistrcpy(uint16 *dst, uint16 *src)
778 int num_wchars = 0;
780 while (*src) {
781 *dst++ = *src++;
782 num_wchars++;
784 *dst = 0;
786 return num_wchars;
790 * Samba ucs2 type to UNISTR2 conversion
792 * @param ctx Talloc context to create the dst strcture (if null) and the
793 * contents of the unicode string.
794 * @param dst UNISTR2 destination. If equals null, then it's allocated.
795 * @param src smb_ucs2_t source.
796 * @param max_len maximum number of unicode characters to copy. If equals
797 * null, then null-termination of src is taken
799 * @return copied UNISTR2 destination
801 UNISTR2* ucs2_to_unistr2(TALLOC_CTX *ctx, UNISTR2* dst, smb_ucs2_t* src)
803 size_t len;
805 if (!src) return NULL;
806 len = strlen_w(src);
808 /* allocate UNISTR2 destination if not given */
809 if (!dst) {
810 dst = (UNISTR2*) talloc(ctx, sizeof(UNISTR2));
811 if (!dst) return NULL;
813 if (!dst->buffer) {
814 dst->buffer = (uint16*) talloc(ctx, sizeof(uint16) * (len + 1));
815 if (!dst->buffer) return NULL;
818 /* set UNISTR2 parameters */
819 dst->uni_max_len = len + 1;
820 dst->undoc = 0;
821 dst->uni_str_len = len;
823 /* copy the actual unicode string */
824 strncpy_w(dst->buffer, src, dst->uni_max_len);
826 return dst;