some bugfix and new functions,
[Samba.git] / source / lib / util_unistr.c
blob58ecc197232f532486bd62d6e824633424d84daf
1 /*
2 Unix SMB/Netbios implementation.
3 Version 3.0
4 Samba utility functions
5 Copyright (C) Andrew Tridgell 1992-2001
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 #include "includes.h"
24 #ifndef MAXUNI
25 #define MAXUNI 1024
26 #endif
28 /* these 3 tables define the unicode case handling. They are loaded
29 at startup either via mmap() or read() from the lib directory */
30 static smb_ucs2_t *upcase_table;
31 static smb_ucs2_t *lowcase_table;
32 static uint8 *valid_table;
34 /*******************************************************************
35 load the case handling tables
36 ********************************************************************/
37 void load_case_tables(void)
39 static int initialised;
40 int i;
42 if (initialised) return;
43 initialised = 1;
45 upcase_table = map_file(lib_path("upcase.dat"), 0x20000);
46 lowcase_table = map_file(lib_path("lowcase.dat"), 0x20000);
47 valid_table = map_file(lib_path("valid.dat"), 0x10000);
49 /* we would like Samba to limp along even if these tables are
50 not available */
51 if (!upcase_table) {
52 DEBUG(1,("creating lame upcase table\n"));
53 upcase_table = malloc(0x20000);
54 for (i=0;i<256;i++) upcase_table[i] = islower(i)?toupper(i):i;
55 for (;i<0x10000;i++) upcase_table[i] = i;
58 if (!lowcase_table) {
59 DEBUG(1,("creating lame lowcase table\n"));
60 lowcase_table = malloc(0x20000);
61 for (i=0;i<256;i++) lowcase_table[i] = isupper(i)?tolower(i):i;
62 for (;i<0x10000;i++) lowcase_table[i] = i;
65 if (!valid_table) {
66 const char *allowed = "!#$%&'()_-@^`~";
67 DEBUG(1,("creating lame valid table\n"));
68 valid_table = malloc(0x10000);
69 for (i=0;i<256;i++) valid_table[i] = isalnum(i) || strchr(allowed,i);
70 for (;i<0x10000;i++) valid_table[i] = 0;
75 /*******************************************************************
76 Write a string in (little-endian) unicode format. src is in
77 the current DOS codepage. len is the length in bytes of the
78 string pointed to by dst.
80 if null_terminate is True then null terminate the packet (adds 2 bytes)
82 the return value is the length in bytes consumed by the string, including the
83 null termination if applied
84 ********************************************************************/
86 size_t dos_PutUniCode(char *dst,const char *src, ssize_t len, BOOL null_terminate)
88 return push_ucs2(NULL, dst, src, len,
89 STR_UNICODE|STR_NOALIGN | (null_terminate?STR_TERMINATE:0));
93 /*******************************************************************
94 Skip past a unicode string, but not more than len. Always move
95 past a terminating zero if found.
96 ********************************************************************/
98 char *skip_unibuf(char *src, size_t len)
100 char *srcend = src + len;
102 while (src < srcend && SVAL(src,0))
103 src += 2;
105 if(!SVAL(src,0))
106 src += 2;
108 return src;
111 /* Copy a string from little-endian or big-endian unicode source (depending
112 * on flags) to internal samba format destination
114 int rpcstr_pull(char* dest, void *src, int dest_len, int src_len, int flags)
116 if(dest_len==-1) dest_len=MAXUNI-3;
117 return pull_ucs2(NULL, dest, src, dest_len, src_len, flags|STR_UNICODE|STR_NOALIGN);
120 /* Copy a string from a unistr2 source to internal samba format
121 destination. Use this instead of direct calls to rpcstr_pull() to avoid
122 having to determine whether the source string is null terminated. */
124 int rpcstr_pull_unistr2_fstring(char *dest, UNISTR2 *src)
126 return pull_ucs2(NULL, dest, src->buffer, sizeof(fstring),
127 src->uni_str_len * 2, 0);
130 /* Converts a string from internal samba format to unicode
132 int rpcstr_push(void* dest, const char *src, int dest_len, int flags)
134 return push_ucs2(NULL, dest, src, dest_len, flags|STR_UNICODE|STR_NOALIGN);
137 /*******************************************************************
138 Return a DOS codepage version of a little-endian unicode string.
139 len is the filename length (ignoring any terminating zero) in uin16
140 units. Always null terminates.
141 Hack alert: uses fixed buffer(s).
142 ********************************************************************/
143 char *dos_unistrn2(const uint16 *src, int len)
145 static char lbufs[8][MAXUNI];
146 static int nexti;
147 char *lbuf = lbufs[nexti];
148 nexti = (nexti+1)%8;
149 pull_ucs2(NULL, lbuf, src, MAXUNI-3, len*2, STR_NOALIGN);
150 return lbuf;
153 /*******************************************************************
154 Convert a (little-endian) UNISTR2 structure to an ASCII string
155 ********************************************************************/
156 void unistr2_to_ascii(char *dest, const UNISTR2 *str, size_t maxlen)
158 if (str == NULL) {
159 *dest='\0';
160 return;
162 pull_ucs2(NULL, dest, str->buffer, maxlen, str->uni_str_len*2, STR_NOALIGN);
166 /*******************************************************************
167 Return a number stored in a buffer
168 ********************************************************************/
170 uint32 buffer2_to_uint32(BUFFER2 *str)
172 if (str->buf_len == 4)
173 return IVAL(str->buffer, 0);
174 else
175 return 0;
178 /*******************************************************************
179 Convert a wchar to upper case.
180 ********************************************************************/
182 smb_ucs2_t toupper_w(smb_ucs2_t val)
184 return upcase_table[SVAL(&val,0)];
187 /*******************************************************************
188 Convert a wchar to lower case.
189 ********************************************************************/
191 smb_ucs2_t tolower_w( smb_ucs2_t val )
193 return lowcase_table[SVAL(&val,0)];
196 /*******************************************************************
197 determine if a character is lowercase
198 ********************************************************************/
199 BOOL islower_w(smb_ucs2_t c)
201 return upcase_table[SVAL(&c,0)] != c;
204 /*******************************************************************
205 determine if a character is uppercase
206 ********************************************************************/
207 BOOL isupper_w(smb_ucs2_t c)
209 return lowcase_table[SVAL(&c,0)] != c;
213 /*******************************************************************
214 determine if a character is valid in a 8.3 name
215 ********************************************************************/
216 BOOL isvalid83_w(smb_ucs2_t c)
218 return valid_table[SVAL(&c,0)] != 0;
221 /*******************************************************************
222 Count the number of characters in a smb_ucs2_t string.
223 ********************************************************************/
224 size_t strlen_w(const smb_ucs2_t *src)
226 size_t len;
228 for(len = 0; *src++; len++) ;
230 return len;
233 /*******************************************************************
234 Count up to max number of characters in a smb_ucs2_t string.
235 ********************************************************************/
236 size_t strnlen_w(const smb_ucs2_t *src, size_t max)
238 size_t len;
240 for(len = 0; *src++ && (len < max); len++) ;
242 return len;
245 /*******************************************************************
246 wide strchr()
247 ********************************************************************/
248 smb_ucs2_t *strchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
250 while (*s != 0) {
251 if (c == *s) return (smb_ucs2_t *)s;
252 s++;
254 return NULL;
257 smb_ucs2_t *strchr_wa(const smb_ucs2_t *s, char c)
259 return strchr_w(s, UCS2_CHAR(c));
262 smb_ucs2_t *strrchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
264 const smb_ucs2_t *p = s;
265 int len = strlen_w(s);
266 if (len == 0) return NULL;
267 p += (len - 1);
268 do {
269 if (c == *p) return (smb_ucs2_t *)p;
270 } while (p-- != s);
271 return NULL;
274 /*******************************************************************
275 wide strstr()
276 ********************************************************************/
277 smb_ucs2_t *strstr_w(const smb_ucs2_t *s, const smb_ucs2_t *ins)
279 smb_ucs2_t *r;
280 size_t slen, inslen;
282 if (!s || !*s || !ins || !*ins) return NULL;
283 slen = strlen_w(s);
284 inslen = strlen_w(ins);
285 r = (smb_ucs2_t *)s;
286 while ((r = strchr_w(r, *ins))) {
287 if (strncmp_w(r, ins, inslen) == 0) return r;
288 r++;
290 return NULL;
293 /*******************************************************************
294 Convert a string to lower case.
295 return True if any char is converted
296 ********************************************************************/
297 BOOL strlower_w(smb_ucs2_t *s)
299 BOOL ret = False;
300 while (*s) {
301 smb_ucs2_t v = tolower_w(*s);
302 if (v != *s) {
303 *s = v;
304 ret = True;
306 s++;
308 return ret;
311 /*******************************************************************
312 Convert a string to upper case.
313 return True if any char is converted
314 ********************************************************************/
315 BOOL strupper_w(smb_ucs2_t *s)
317 BOOL ret = False;
318 while (*s) {
319 smb_ucs2_t v = toupper_w(*s);
320 if (v != *s) {
321 *s = v;
322 ret = True;
324 s++;
326 return ret;
329 /*******************************************************************
330 convert a string to "normal" form
331 ********************************************************************/
332 void strnorm_w(smb_ucs2_t *s)
334 extern int case_default;
335 if (case_default == CASE_UPPER)
336 strupper_w(s);
337 else
338 strlower_w(s);
341 int strcmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b)
343 while (*b && *a == *b) { a++; b++; }
344 return (*a - *b);
345 /* warning: if *a != *b and both are not 0 we retrun a random
346 greater or lesser than 0 number not realted to which
347 string is longer */
350 int strncmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len)
352 size_t n = 0;
353 while ((n < len) && *b && *a == *b) { a++; b++; n++;}
354 return (len - n)?(*a - *b):0;
357 /*******************************************************************
358 case insensitive string comparison
359 ********************************************************************/
360 int strcasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b)
362 while (*b && toupper_w(*a) == toupper_w(*b)) { a++; b++; }
363 return (tolower_w(*a) - tolower_w(*b));
366 /*******************************************************************
367 case insensitive string comparison, lenght limited
368 ********************************************************************/
369 int strncasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len)
371 size_t n = 0;
372 while ((n < len) && *b && (toupper_w(*a) == toupper_w(*b))) { a++; b++; n++; }
373 return (len - n)?(tolower_w(*a) - tolower_w(*b)):0;
376 /*******************************************************************
377 compare 2 strings
378 ********************************************************************/
379 BOOL strequal_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2)
381 if (s1 == s2) return(True);
382 if (!s1 || !s2) return(False);
384 return(strcasecmp_w(s1,s2)==0);
387 /*******************************************************************
388 compare 2 strings up to and including the nth char.
389 ******************************************************************/
390 BOOL strnequal_w(const smb_ucs2_t *s1,const smb_ucs2_t *s2,size_t n)
392 if (s1 == s2) return(True);
393 if (!s1 || !s2 || !n) return(False);
395 return(strncasecmp_w(s1,s2,n)==0);
398 /*******************************************************************
399 duplicate string
400 ********************************************************************/
401 smb_ucs2_t *strdup_w(const smb_ucs2_t *src)
403 return strndup_w(src, 0);
406 /* if len == 0 then duplicate the whole string */
407 smb_ucs2_t *strndup_w(const smb_ucs2_t *src, size_t len)
409 smb_ucs2_t *dest;
411 if (!len) len = strlen_w(src);
412 dest = (smb_ucs2_t *)malloc((len + 1) * sizeof(smb_ucs2_t));
413 if (!dest) {
414 DEBUG(0,("strdup_w: out of memory!\n"));
415 return NULL;
418 memcpy(dest, src, len * sizeof(smb_ucs2_t));
419 dest[len] = 0;
421 return dest;
424 /*******************************************************************
425 copy a string with max len
426 ********************************************************************/
428 smb_ucs2_t *strncpy_w(smb_ucs2_t *dest, const smb_ucs2_t *src, const size_t max)
430 size_t len;
432 if (!dest || !src) return NULL;
434 for (len = 0; (src[len] != 0) && (len < max); len++)
435 dest[len] = src[len];
436 while (len < max)
437 dest[len++] = 0;
439 return dest;
443 /*******************************************************************
444 append a string of len bytes and add a terminator
445 ********************************************************************/
447 smb_ucs2_t *strncat_w(smb_ucs2_t *dest, const smb_ucs2_t *src, const size_t max)
449 size_t start;
450 size_t len;
452 if (!dest || !src) return NULL;
454 start = strlen_w(dest);
455 len = strnlen_w(src, max);
457 memcpy(&dest[start], src, len*sizeof(smb_ucs2_t));
458 dest[start+len] = 0;
460 return dest;
463 smb_ucs2_t *strcat_w(smb_ucs2_t *dest, const smb_ucs2_t *src)
465 size_t start;
466 size_t len;
468 if (!dest || !src) return NULL;
470 start = strlen_w(dest);
471 len = strlen_w(src);
473 memcpy(&dest[start], src, len*sizeof(smb_ucs2_t));
474 dest[start+len] = 0;
476 return dest;
479 /*******************************************************************
480 replace any occurence of oldc with newc in unicode string
481 ********************************************************************/
483 void string_replace_w(smb_ucs2_t *s, smb_ucs2_t oldc, smb_ucs2_t newc)
485 for(;*s;s++) {
486 if(*s==oldc) *s=newc;
490 /*******************************************************************
491 trim unicode string
492 ********************************************************************/
494 BOOL trim_string_w(smb_ucs2_t *s, const smb_ucs2_t *front,
495 const smb_ucs2_t *back)
497 BOOL ret = False;
498 size_t len, lw, front_len, flw, back_len, blw;
500 if (!s || !*s) return False;
502 len = strlen_w(s);
504 if (front && *front) {
505 front_len = strlen_w(front);
506 flw = front_len * sizeof(smb_ucs2_t);
507 lw = (len + 1) * sizeof(smb_ucs2_t);
508 while (len && strncmp_w(s, front, front_len) == 0) {
509 memcpy(s, s + flw, lw - flw);
510 len -= front_len;
511 lw -= flw;
512 ret = True;
516 if (back && *back) {
517 back_len = strlen_w(back);
518 blw = back_len * sizeof(smb_ucs2_t);
519 lw = len * sizeof(smb_ucs2_t);
520 while (len && strncmp_w(s + lw - blw, back, back_len) == 0) {
521 s[len - back_len] = 0;
522 len -= back_len;
523 lw -= blw;
524 ret = True;
528 return ret;
532 The *_wa() functions take a combination of 7 bit ascii
533 and wide characters They are used so that you can use string
534 functions combining C string constants with ucs2 strings
536 The char* arguments must NOT be multibyte - to be completely sure
537 of this only pass string constants */
540 void pstrcpy_wa(smb_ucs2_t *dest, const char *src)
542 int i;
543 for (i=0;i<PSTRING_LEN;i++) {
544 dest[i] = UCS2_CHAR(src[i]);
545 if (src[i] == 0) return;
549 int strcmp_wa(const smb_ucs2_t *a, const char *b)
551 while (*b && *a == UCS2_CHAR(*b)) { a++; b++; }
552 return (*a - UCS2_CHAR(*b));
555 int strncmp_wa(const smb_ucs2_t *a, const char *b, size_t len)
557 size_t n = 0;
558 while ((n < len) && *b && *a == UCS2_CHAR(*b)) { a++; b++; n++;}
559 return (len - n)?(*a - UCS2_CHAR(*b)):0;
562 smb_ucs2_t *strpbrk_wa(const smb_ucs2_t *s, const char *p)
564 while (*s != 0) {
565 int i;
566 for (i=0; p[i] && *s != UCS2_CHAR(p[i]); i++)
568 if (p[i]) return (smb_ucs2_t *)s;
569 s++;
571 return NULL;
574 smb_ucs2_t *strstr_wa(const smb_ucs2_t *s, const char *ins)
576 smb_ucs2_t *r;
577 size_t slen, inslen;
579 if (!s || !*s || !ins || !*ins) return NULL;
580 slen = strlen_w(s);
581 inslen = strlen(ins);
582 r = (smb_ucs2_t *)s;
583 while ((r = strchr_w(r, UCS2_CHAR(*ins)))) {
584 if (strncmp_wa(r, ins, inslen) == 0) return r;
585 r++;
587 return NULL;
590 /*******************************************************************
591 copy a string with max len
592 ********************************************************************/
594 smb_ucs2_t *strncpy_wa(smb_ucs2_t *dest, const char *src, const size_t max)
596 smb_ucs2_t *ucs2_src;
598 if (!dest || !src) return NULL;
599 if (!(ucs2_src = acnv_uxu2(src)))
600 return NULL;
602 strncpy_w(dest, ucs2_src, max);
603 SAFE_FREE(ucs2_src);
604 return dest;
607 /*******************************************************************
608 convert and duplicate an ascii string
609 ********************************************************************/
610 smb_ucs2_t *strdup_wa(const char *src)
612 return strndup_wa(src, 0);
615 /* if len == 0 then duplicate the whole string */
616 smb_ucs2_t *strndup_wa(const char *src, size_t len)
618 smb_ucs2_t *dest, *s;
620 s = acnv_dosu2(src);
621 if (!len) len = strlen_w(s);
622 dest = (smb_ucs2_t *)malloc((len + 1) * sizeof(smb_ucs2_t));
623 if (!dest) {
624 DEBUG(0,("strdup_w: out of memory!\n"));
625 SAFE_FREE(s);
626 return NULL;
629 memcpy(dest, src, len * sizeof(smb_ucs2_t));
630 dest[len] = 0;
632 SAFE_FREE(s);
633 return dest;
636 /*******************************************************************
637 append a string of len bytes and add a terminator
638 ********************************************************************/
640 smb_ucs2_t *strncat_wa(smb_ucs2_t *dest, const char *src, const size_t max)
642 smb_ucs2_t *ucs2_src;
644 if (!dest || !src) return NULL;
645 if (!(ucs2_src = acnv_uxu2(src)))
646 return NULL;
648 strncat_w(dest, ucs2_src, max);
649 SAFE_FREE(ucs2_src);
650 return dest;
653 smb_ucs2_t *strcat_wa(smb_ucs2_t *dest, const char *src)
655 smb_ucs2_t *ucs2_src;
657 if (!dest || !src) return NULL;
658 if (!(ucs2_src = acnv_uxu2(src)))
659 return NULL;
661 strcat_w(dest, ucs2_src);
662 SAFE_FREE(ucs2_src);
663 return dest;
666 BOOL trim_string_wa(smb_ucs2_t *s, const char *front,
667 const char *back)
669 wpstring f, b;
671 if (front) push_ucs2(NULL, f, front, sizeof(wpstring) - 1, STR_TERMINATE);
672 else *f = 0;
673 if (back) push_ucs2(NULL, b, back, sizeof(wpstring) - 1, STR_TERMINATE);
674 else *b = 0;
675 return trim_string_w(s, f, b);