Fix scripts to generate correct tables for compilers which have character constants...
[Samba/gebeck_regimport.git] / source3 / lib / util_unistr.c
blobfd51f3c57d998ac0060eaa0ed96f78782f66d231
1 /*
2 Unix SMB/CIFS implementation.
3 Samba utility functions
4 Copyright (C) Andrew Tridgell 1992-2001
5 Copyright (C) Simo Sorce 2001
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 #include "includes.h"
24 #ifndef MAXUNI
25 #define MAXUNI 1024
26 #endif
28 /* these 3 tables define the unicode case handling. They are loaded
29 at startup either via mmap() or read() from the lib directory */
30 static smb_ucs2_t *upcase_table;
31 static smb_ucs2_t *lowcase_table;
32 static uint8 *valid_table;
34 /**
35 * This table says which Unicode characters are valid dos
36 * characters.
38 * Each value is just a single bit.
39 **/
40 static uint8 doschar_table[8192]; /* 65536 characters / 8 bits/byte */
43 /**
44 * Load or generate the case handling tables.
46 * The case tables are defined in UCS2 and don't depend on any
47 * configured parameters, so they never need to be reloaded.
48 **/
49 void load_case_tables(void)
51 static int initialised;
52 int i;
54 if (initialised) return;
55 initialised = 1;
57 upcase_table = map_file(lib_path("upcase.dat"), 0x20000);
58 lowcase_table = map_file(lib_path("lowcase.dat"), 0x20000);
60 /* we would like Samba to limp along even if these tables are
61 not available */
62 if (!upcase_table) {
63 DEBUG(1,("creating lame upcase table\n"));
64 upcase_table = malloc(0x20000);
65 for (i=0;i<0x10000;i++) {
66 smb_ucs2_t v;
67 SSVAL(&v, 0, i);
68 upcase_table[v] = i;
70 for (i=0;i<256;i++) {
71 smb_ucs2_t v;
72 SSVAL(&v, 0, UCS2_CHAR(i));
73 upcase_table[v] = UCS2_CHAR(islower(i)?toupper(i):i);
77 if (!lowcase_table) {
78 DEBUG(1,("creating lame lowcase table\n"));
79 lowcase_table = malloc(0x20000);
80 for (i=0;i<0x10000;i++) {
81 smb_ucs2_t v;
82 SSVAL(&v, 0, i);
83 lowcase_table[v] = i;
85 for (i=0;i<256;i++) {
86 smb_ucs2_t v;
87 SSVAL(&v, 0, UCS2_CHAR(i));
88 lowcase_table[v] = UCS2_CHAR(isupper(i)?tolower(i):i);
94 see if a ucs2 character can be mapped correctly to a dos character
95 and mapped back to the same character in ucs2
97 int check_dos_char(smb_ucs2_t c)
99 lazy_initialize_conv();
101 /* Find the right byte, and right bit within the byte; return
102 * 1 or 0 */
103 return (doschar_table[(c & 0xffff) / 8] & (1 << (c & 7))) != 0;
107 static int check_dos_char_slowly(smb_ucs2_t c)
109 char buf[10];
110 smb_ucs2_t c2 = 0;
111 int len1, len2;
112 len1 = convert_string(CH_UCS2, CH_DOS, &c, 2, buf, sizeof(buf));
113 if (len1 == 0) return 0;
114 len2 = convert_string(CH_DOS, CH_UCS2, buf, len1, &c2, 2);
115 if (len2 != 2) return 0;
116 return (c == c2);
121 * Fill out doschar table the hard way, by examining each character
123 void init_doschar_table(void)
125 int i, j, byteval;
127 /* For each byte of packed table */
129 for (i = 0; i <= 0xffff; i += 8) {
130 byteval = 0;
131 for (j = 0; j <= 7; j++) {
132 smb_ucs2_t c;
134 c = i + j;
136 if (check_dos_char_slowly(c))
137 byteval |= 1 << j;
139 doschar_table[i/8] = byteval;
145 * Load the valid character map table from <tt>valid.dat</tt> or
146 * create from the configured codepage.
148 * This function is called whenever the configuration is reloaded.
149 * However, the valid character table is not changed if it's loaded
150 * from a file, because we can't unmap files.
152 void init_valid_table(void)
154 static int mapped_file;
155 int i;
156 const char *allowed = ".!#$%&'()_-@^`~";
157 uint8 *valid_file;
159 if (mapped_file) {
160 /* Can't unmap files, so stick with what we have */
161 return;
164 valid_file = map_file(lib_path("valid.dat"), 0x10000);
165 if (valid_file) {
166 valid_table = valid_file;
167 mapped_file = 1;
168 return;
171 /* Otherwise, we're using a dynamically created valid_table.
172 * It might need to be regenerated if the code page changed.
173 * We know that we're not using a mapped file, so we can
174 * free() the old one. */
175 if (valid_table) free(valid_table);
177 DEBUG(2,("creating default valid table\n"));
178 valid_table = malloc(0x10000);
179 for (i=0;i<128;i++)
180 valid_table[i] = isalnum(i) || strchr(allowed,i);
182 for (;i<0x10000;i++) {
183 smb_ucs2_t c;
184 SSVAL(&c, 0, i);
185 valid_table[i] = check_dos_char(c);
191 /*******************************************************************
192 Write a string in (little-endian) unicode format. src is in
193 the current DOS codepage. len is the length in bytes of the
194 string pointed to by dst.
196 if null_terminate is True then null terminate the packet (adds 2 bytes)
198 the return value is the length in bytes consumed by the string, including the
199 null termination if applied
200 ********************************************************************/
202 size_t dos_PutUniCode(char *dst,const char *src, ssize_t len, BOOL null_terminate)
204 return push_ucs2(NULL, dst, src, len,
205 STR_UNICODE|STR_NOALIGN | (null_terminate?STR_TERMINATE:0));
209 /*******************************************************************
210 Skip past a unicode string, but not more than len. Always move
211 past a terminating zero if found.
212 ********************************************************************/
214 char *skip_unibuf(char *src, size_t len)
216 char *srcend = src + len;
218 while (src < srcend && SVAL(src,0))
219 src += 2;
221 if(!SVAL(src,0))
222 src += 2;
224 return src;
227 /* Copy a string from little-endian or big-endian unicode source (depending
228 * on flags) to internal samba format destination
230 int rpcstr_pull(char* dest, void *src, int dest_len, int src_len, int flags)
232 if (!src) {
233 dest[0] = 0;
234 return 0;
236 if(dest_len==-1) dest_len=MAXUNI-3;
237 return pull_ucs2(NULL, dest, src, dest_len, src_len, flags|STR_UNICODE|STR_NOALIGN);
240 /* Copy a string from a unistr2 source to internal samba format
241 destination. Use this instead of direct calls to rpcstr_pull() to avoid
242 having to determine whether the source string is null terminated. */
244 int rpcstr_pull_unistr2_fstring(char *dest, UNISTR2 *src)
246 return pull_ucs2(NULL, dest, src->buffer, sizeof(fstring),
247 src->uni_str_len * 2, 0);
250 /* Converts a string from internal samba format to unicode
252 int rpcstr_push(void* dest, const char *src, int dest_len, int flags)
254 return push_ucs2(NULL, dest, src, dest_len, flags|STR_UNICODE|STR_NOALIGN);
257 /*******************************************************************
258 Return a DOS codepage version of a little-endian unicode string.
259 len is the filename length (ignoring any terminating zero) in uin16
260 units. Always null terminates.
261 Hack alert: uses fixed buffer(s).
262 ********************************************************************/
263 char *dos_unistrn2(const uint16 *src, int len)
265 static char lbufs[8][MAXUNI];
266 static int nexti;
267 char *lbuf = lbufs[nexti];
268 nexti = (nexti+1)%8;
269 pull_ucs2(NULL, lbuf, src, MAXUNI-3, len*2, STR_NOALIGN);
270 return lbuf;
273 /*******************************************************************
274 Convert a (little-endian) UNISTR2 structure to an ASCII string
275 ********************************************************************/
276 void unistr2_to_ascii(char *dest, const UNISTR2 *str, size_t maxlen)
278 if (str == NULL) {
279 *dest='\0';
280 return;
282 pull_ucs2(NULL, dest, str->buffer, maxlen, str->uni_str_len*2, STR_NOALIGN);
285 /*******************************************************************
286 give a static string for displaying a UNISTR2
287 ********************************************************************/
288 const char *unistr2_static(const UNISTR2 *str)
290 static pstring ret;
291 unistr2_to_ascii(ret, str, sizeof(ret));
292 return ret;
296 /*******************************************************************
297 duplicate a UNISTR2 string into a null terminated char*
298 using a talloc context
299 ********************************************************************/
300 char *unistr2_tdup(TALLOC_CTX *ctx, const UNISTR2 *str)
302 char *s;
303 int maxlen = (str->uni_str_len+1)*4;
304 if (!str->buffer) return NULL;
305 s = (char *)talloc(ctx, maxlen); /* convervative */
306 if (!s) return NULL;
307 pull_ucs2(NULL, s, str->buffer, maxlen, str->uni_str_len*2,
308 STR_NOALIGN);
309 return s;
313 /*******************************************************************
314 Return a number stored in a buffer
315 ********************************************************************/
317 uint32 buffer2_to_uint32(BUFFER2 *str)
319 if (str->buf_len == 4)
320 return IVAL(str->buffer, 0);
321 else
322 return 0;
325 /*******************************************************************
326 Convert a wchar to upper case.
327 ********************************************************************/
329 smb_ucs2_t toupper_w(smb_ucs2_t val)
331 return upcase_table[SVAL(&val,0)];
334 /*******************************************************************
335 Convert a wchar to lower case.
336 ********************************************************************/
338 smb_ucs2_t tolower_w( smb_ucs2_t val )
340 return lowcase_table[SVAL(&val,0)];
344 /*******************************************************************
345 determine if a character is lowercase
346 ********************************************************************/
347 BOOL islower_w(smb_ucs2_t c)
349 return upcase_table[SVAL(&c,0)] != c;
352 /*******************************************************************
353 determine if a character is uppercase
354 ********************************************************************/
355 BOOL isupper_w(smb_ucs2_t c)
357 return lowcase_table[SVAL(&c,0)] != c;
361 /*******************************************************************
362 determine if a character is valid in a 8.3 name
363 ********************************************************************/
364 BOOL isvalid83_w(smb_ucs2_t c)
366 return valid_table[SVAL(&c,0)] != 0;
369 /*******************************************************************
370 Count the number of characters in a smb_ucs2_t string.
371 ********************************************************************/
372 size_t strlen_w(const smb_ucs2_t *src)
374 size_t len;
376 for(len = 0; *src++; len++) ;
378 return len;
381 /*******************************************************************
382 Count up to max number of characters in a smb_ucs2_t string.
383 ********************************************************************/
384 size_t strnlen_w(const smb_ucs2_t *src, size_t max)
386 size_t len;
388 for(len = 0; *src++ && (len < max); len++) ;
390 return len;
393 /*******************************************************************
394 Wide strchr().
395 ********************************************************************/
397 smb_ucs2_t *strchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
399 while (*s != 0) {
400 if (c == *s) return (smb_ucs2_t *)s;
401 s++;
403 if (c == *s) return (smb_ucs2_t *)s;
405 return NULL;
408 smb_ucs2_t *strchr_wa(const smb_ucs2_t *s, char c)
410 return strchr_w(s, UCS2_CHAR(c));
413 /*******************************************************************
414 Wide strrchr().
415 ********************************************************************/
417 smb_ucs2_t *strrchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
419 const smb_ucs2_t *p = s;
420 int len = strlen_w(s);
421 if (len == 0) return NULL;
422 p += (len - 1);
423 do {
424 if (c == *p) return (smb_ucs2_t *)p;
425 } while (p-- != s);
426 return NULL;
429 /*******************************************************************
430 Wide version of strrchr that returns after doing strrchr 'n' times.
431 ********************************************************************/
433 smb_ucs2_t *strnrchr_w(const smb_ucs2_t *s, smb_ucs2_t c, unsigned int n)
435 const smb_ucs2_t *p = s;
436 int len = strlen_w(s);
437 if (len == 0 || !n)
438 return NULL;
439 p += (len - 1);
440 do {
441 if (c == *p)
442 n--;
444 if (!n)
445 return (smb_ucs2_t *)p;
446 } while (p-- != s);
447 return NULL;
450 /*******************************************************************
451 Wide strstr().
452 ********************************************************************/
454 smb_ucs2_t *strstr_w(const smb_ucs2_t *s, const smb_ucs2_t *ins)
456 smb_ucs2_t *r;
457 size_t slen, inslen;
459 if (!s || !*s || !ins || !*ins) return NULL;
460 slen = strlen_w(s);
461 inslen = strlen_w(ins);
462 r = (smb_ucs2_t *)s;
463 while ((r = strchr_w(r, *ins))) {
464 if (strncmp_w(r, ins, inslen) == 0) return r;
465 r++;
467 return NULL;
470 /*******************************************************************
471 Convert a string to lower case.
472 return True if any char is converted
473 ********************************************************************/
474 BOOL strlower_w(smb_ucs2_t *s)
476 BOOL ret = False;
477 while (*s) {
478 smb_ucs2_t v = tolower_w(*s);
479 if (v != *s) {
480 *s = v;
481 ret = True;
483 s++;
485 return ret;
488 /*******************************************************************
489 Convert a string to upper case.
490 return True if any char is converted
491 ********************************************************************/
492 BOOL strupper_w(smb_ucs2_t *s)
494 BOOL ret = False;
495 while (*s) {
496 smb_ucs2_t v = toupper_w(*s);
497 if (v != *s) {
498 *s = v;
499 ret = True;
501 s++;
503 return ret;
506 /*******************************************************************
507 convert a string to "normal" form
508 ********************************************************************/
509 void strnorm_w(smb_ucs2_t *s)
511 extern int case_default;
512 if (case_default == CASE_UPPER)
513 strupper_w(s);
514 else
515 strlower_w(s);
518 int strcmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b)
520 while (*b && *a == *b) { a++; b++; }
521 return (*a - *b);
522 /* warning: if *a != *b and both are not 0 we retrun a random
523 greater or lesser than 0 number not realted to which
524 string is longer */
527 int strncmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len)
529 size_t n = 0;
530 while ((n < len) && *b && *a == *b) { a++; b++; n++;}
531 return (len - n)?(*a - *b):0;
534 /*******************************************************************
535 case insensitive string comparison
536 ********************************************************************/
537 int strcasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b)
539 while (*b && toupper_w(*a) == toupper_w(*b)) { a++; b++; }
540 return (tolower_w(*a) - tolower_w(*b));
543 /*******************************************************************
544 case insensitive string comparison, lenght limited
545 ********************************************************************/
546 int strncasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len)
548 size_t n = 0;
549 while ((n < len) && *b && (toupper_w(*a) == toupper_w(*b))) { a++; b++; n++; }
550 return (len - n)?(tolower_w(*a) - tolower_w(*b)):0;
553 /*******************************************************************
554 compare 2 strings
555 ********************************************************************/
556 BOOL strequal_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2)
558 if (s1 == s2) return(True);
559 if (!s1 || !s2) return(False);
561 return(strcasecmp_w(s1,s2)==0);
564 /*******************************************************************
565 compare 2 strings up to and including the nth char.
566 ******************************************************************/
567 BOOL strnequal_w(const smb_ucs2_t *s1,const smb_ucs2_t *s2,size_t n)
569 if (s1 == s2) return(True);
570 if (!s1 || !s2 || !n) return(False);
572 return(strncasecmp_w(s1,s2,n)==0);
575 /*******************************************************************
576 duplicate string
577 ********************************************************************/
578 smb_ucs2_t *strdup_w(const smb_ucs2_t *src)
580 return strndup_w(src, 0);
583 /* if len == 0 then duplicate the whole string */
584 smb_ucs2_t *strndup_w(const smb_ucs2_t *src, size_t len)
586 smb_ucs2_t *dest;
588 if (!len) len = strlen_w(src);
589 dest = (smb_ucs2_t *)malloc((len + 1) * sizeof(smb_ucs2_t));
590 if (!dest) {
591 DEBUG(0,("strdup_w: out of memory!\n"));
592 return NULL;
595 memcpy(dest, src, len * sizeof(smb_ucs2_t));
596 dest[len] = 0;
598 return dest;
601 /*******************************************************************
602 copy a string with max len
603 ********************************************************************/
605 smb_ucs2_t *strncpy_w(smb_ucs2_t *dest, const smb_ucs2_t *src, const size_t max)
607 size_t len;
609 if (!dest || !src) return NULL;
611 for (len = 0; (src[len] != 0) && (len < max); len++)
612 dest[len] = src[len];
613 while (len < max)
614 dest[len++] = 0;
616 return dest;
620 /*******************************************************************
621 append a string of len bytes and add a terminator
622 ********************************************************************/
624 smb_ucs2_t *strncat_w(smb_ucs2_t *dest, const smb_ucs2_t *src, const size_t max)
626 size_t start;
627 size_t len;
629 if (!dest || !src) return NULL;
631 start = strlen_w(dest);
632 len = strnlen_w(src, max);
634 memcpy(&dest[start], src, len*sizeof(smb_ucs2_t));
635 dest[start+len] = 0;
637 return dest;
640 smb_ucs2_t *strcat_w(smb_ucs2_t *dest, const smb_ucs2_t *src)
642 size_t start;
643 size_t len;
645 if (!dest || !src) return NULL;
647 start = strlen_w(dest);
648 len = strlen_w(src);
650 memcpy(&dest[start], src, len*sizeof(smb_ucs2_t));
651 dest[start+len] = 0;
653 return dest;
657 /*******************************************************************
658 replace any occurence of oldc with newc in unicode string
659 ********************************************************************/
661 void string_replace_w(smb_ucs2_t *s, smb_ucs2_t oldc, smb_ucs2_t newc)
663 for(;*s;s++) {
664 if(*s==oldc) *s=newc;
668 /*******************************************************************
669 trim unicode string
670 ********************************************************************/
672 BOOL trim_string_w(smb_ucs2_t *s, const smb_ucs2_t *front,
673 const smb_ucs2_t *back)
675 BOOL ret = False;
676 size_t len, front_len, back_len;
678 if (!s || !*s) return False;
680 len = strlen_w(s);
682 if (front && *front) {
683 front_len = strlen_w(front);
684 while (len && strncmp_w(s, front, front_len) == 0) {
685 memmove(s, (s + front_len), (len - front_len + 1) * sizeof(smb_ucs2_t));
686 len -= front_len;
687 ret = True;
691 if (back && *back) {
692 back_len = strlen_w(back);
693 while (len && strncmp_w((s + (len - back_len)), back, back_len) == 0) {
694 s[len - back_len] = 0;
695 len -= back_len;
696 ret = True;
700 return ret;
704 The *_wa() functions take a combination of 7 bit ascii
705 and wide characters They are used so that you can use string
706 functions combining C string constants with ucs2 strings
708 The char* arguments must NOT be multibyte - to be completely sure
709 of this only pass string constants */
712 void pstrcpy_wa(smb_ucs2_t *dest, const char *src)
714 int i;
715 for (i=0;i<PSTRING_LEN;i++) {
716 dest[i] = UCS2_CHAR(src[i]);
717 if (src[i] == 0) return;
721 int strcmp_wa(const smb_ucs2_t *a, const char *b)
723 while (*b && *a == UCS2_CHAR(*b)) { a++; b++; }
724 return (*a - UCS2_CHAR(*b));
727 int strncmp_wa(const smb_ucs2_t *a, const char *b, size_t len)
729 size_t n = 0;
730 while ((n < len) && *b && *a == UCS2_CHAR(*b)) { a++; b++; n++;}
731 return (len - n)?(*a - UCS2_CHAR(*b)):0;
734 smb_ucs2_t *strpbrk_wa(const smb_ucs2_t *s, const char *p)
736 while (*s != 0) {
737 int i;
738 for (i=0; p[i] && *s != UCS2_CHAR(p[i]); i++)
740 if (p[i]) return (smb_ucs2_t *)s;
741 s++;
743 return NULL;
746 smb_ucs2_t *strstr_wa(const smb_ucs2_t *s, const char *ins)
748 smb_ucs2_t *r;
749 size_t slen, inslen;
751 if (!s || !*s || !ins || !*ins) return NULL;
752 slen = strlen_w(s);
753 inslen = strlen(ins);
754 r = (smb_ucs2_t *)s;
755 while ((r = strchr_w(r, UCS2_CHAR(*ins)))) {
756 if (strncmp_wa(r, ins, inslen) == 0) return r;
757 r++;
759 return NULL;
762 BOOL trim_string_wa(smb_ucs2_t *s, const char *front,
763 const char *back)
765 wpstring f, b;
767 if (front) push_ucs2(NULL, f, front, sizeof(wpstring) - 1, STR_TERMINATE);
768 else *f = 0;
769 if (back) push_ucs2(NULL, b, back, sizeof(wpstring) - 1, STR_TERMINATE);
770 else *b = 0;
771 return trim_string_w(s, f, b);
774 /*******************************************************************
775 returns the length in number of wide characters
776 ******************************************************************/
777 int unistrlen(uint16 *s)
779 int len;
781 if (!s)
782 return -1;
784 for (len=0; *s; s++,len++);
786 return len;
789 /*******************************************************************
790 Strcpy for unicode strings. returns length (in num of wide chars)
791 ********************************************************************/
793 int unistrcpy(uint16 *dst, uint16 *src)
795 int num_wchars = 0;
797 while (*src) {
798 *dst++ = *src++;
799 num_wchars++;
801 *dst = 0;
803 return num_wchars;
807 * Samba ucs2 type to UNISTR2 conversion
809 * @param ctx Talloc context to create the dst strcture (if null) and the
810 * contents of the unicode string.
811 * @param dst UNISTR2 destination. If equals null, then it's allocated.
812 * @param src smb_ucs2_t source.
813 * @param max_len maximum number of unicode characters to copy. If equals
814 * null, then null-termination of src is taken
816 * @return copied UNISTR2 destination
818 UNISTR2* ucs2_to_unistr2(TALLOC_CTX *ctx, UNISTR2* dst, smb_ucs2_t* src)
820 size_t len;
822 if (!src) return NULL;
823 len = strlen_w(src);
825 /* allocate UNISTR2 destination if not given */
826 if (!dst) {
827 dst = (UNISTR2*) talloc(ctx, sizeof(UNISTR2));
828 if (!dst) return NULL;
830 if (!dst->buffer) {
831 dst->buffer = (uint16*) talloc(ctx, sizeof(uint16) * (len + 1));
832 if (!dst->buffer) return NULL;
835 /* set UNISTR2 parameters */
836 dst->uni_max_len = len + 1;
837 dst->undoc = 0;
838 dst->uni_str_len = len;
840 /* copy the actual unicode string */
841 strncpy_w(dst->buffer, src, dst->uni_max_len);
843 return dst;