nsstest: no need for two copies of the same tool.
[Samba/vl.git] / source3 / lib / util_unistr.c
blob4cda38dc191bdfc8247a95613e216b63c8384ee8
1 /*
2 Unix SMB/CIFS implementation.
3 Samba utility functions
4 Copyright (C) Andrew Tridgell 1992-2001
5 Copyright (C) Simo Sorce 2001
6 Copyright (C) Jeremy Allison 2005
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 #include "includes.h"
24 #ifndef MAXUNI
25 #define MAXUNI 1024
26 #endif
28 /* these 3 tables define the unicode case handling. They are loaded
29 at startup either via mmap() or read() from the lib directory */
30 static smb_ucs2_t *upcase_table;
31 static smb_ucs2_t *lowcase_table;
32 static uint8 *valid_table;
33 static bool upcase_table_use_unmap;
34 static bool lowcase_table_use_unmap;
35 static bool valid_table_use_unmap;
36 static bool initialized;
38 /**
39 * Destroy global objects allocated by load_case_tables()
40 **/
41 void gfree_case_tables(void)
43 if ( upcase_table ) {
44 if ( upcase_table_use_unmap )
45 unmap_file(upcase_table, 0x20000);
46 else
47 SAFE_FREE(upcase_table);
50 if ( lowcase_table ) {
51 if ( lowcase_table_use_unmap )
52 unmap_file(lowcase_table, 0x20000);
53 else
54 SAFE_FREE(lowcase_table);
57 if ( valid_table ) {
58 if ( valid_table_use_unmap )
59 unmap_file(valid_table, 0x10000);
60 else
61 SAFE_FREE(valid_table);
63 initialized = false;
66 /**
67 * Load or generate the case handling tables.
69 * The case tables are defined in UCS2 and don't depend on any
70 * configured parameters, so they never need to be reloaded.
71 **/
73 void load_case_tables(void)
75 char *old_locale = NULL, *saved_locale = NULL;
76 int i;
77 TALLOC_CTX *frame = NULL;
79 if (initialized) {
80 return;
82 initialized = true;
84 frame = talloc_stackframe();
86 upcase_table = (smb_ucs2_t *)map_file(data_path("upcase.dat"),
87 0x20000);
88 upcase_table_use_unmap = ( upcase_table != NULL );
90 lowcase_table = (smb_ucs2_t *)map_file(data_path("lowcase.dat"),
91 0x20000);
92 lowcase_table_use_unmap = ( lowcase_table != NULL );
94 #ifdef HAVE_SETLOCALE
95 /* Get the name of the current locale. */
96 old_locale = setlocale(LC_ALL, NULL);
98 if (old_locale) {
99 /* Save it as it is in static storage. */
100 saved_locale = SMB_STRDUP(old_locale);
103 /* We set back the locale to C to get ASCII-compatible toupper/lower functions. */
104 setlocale(LC_ALL, "C");
105 #endif
107 /* we would like Samba to limp along even if these tables are
108 not available */
109 if (!upcase_table) {
110 DEBUG(1,("creating lame upcase table\n"));
111 upcase_table = (smb_ucs2_t *)SMB_MALLOC(0x20000);
112 if (!upcase_table) {
113 smb_panic("lame upcase table malloc fail");
114 /* notreached. */
115 return;
117 for (i=0;i<0x10000;i++) {
118 smb_ucs2_t v;
119 SSVAL(&v, 0, i);
120 upcase_table[v] = i;
122 for (i=0;i<256;i++) {
123 smb_ucs2_t v;
124 SSVAL(&v, 0, UCS2_CHAR(i));
125 upcase_table[v] = UCS2_CHAR(islower(i)?toupper(i):i);
129 if (!lowcase_table) {
130 DEBUG(1,("creating lame lowcase table\n"));
131 lowcase_table = (smb_ucs2_t *)SMB_MALLOC(0x20000);
132 if (!lowcase_table) {
133 smb_panic("lame lowcase table malloc fail");
134 /* notreached. */
135 return;
137 for (i=0;i<0x10000;i++) {
138 smb_ucs2_t v;
139 SSVAL(&v, 0, i);
140 lowcase_table[v] = i;
142 for (i=0;i<256;i++) {
143 smb_ucs2_t v;
144 SSVAL(&v, 0, UCS2_CHAR(i));
145 lowcase_table[v] = UCS2_CHAR(isupper(i)?tolower(i):i);
149 #ifdef HAVE_SETLOCALE
150 /* Restore the old locale. */
151 if (saved_locale) {
152 setlocale (LC_ALL, saved_locale);
153 SAFE_FREE(saved_locale);
155 #endif
156 TALLOC_FREE(frame);
159 static int check_dos_char_slowly(smb_ucs2_t c)
161 char buf[10];
162 smb_ucs2_t c2 = 0;
163 int len1, len2;
165 len1 = convert_string(CH_UTF16LE, CH_DOS, &c, 2, buf, sizeof(buf),False);
166 if (len1 == 0) {
167 return 0;
169 len2 = convert_string(CH_DOS, CH_UTF16LE, buf, len1, &c2, 2,False);
170 if (len2 != 2) {
171 return 0;
173 return (c == c2);
177 * Load the valid character map table from <tt>valid.dat</tt> or
178 * create from the configured codepage.
180 * This function is called whenever the configuration is reloaded.
181 * However, the valid character table is not changed if it's loaded
182 * from a file, because we can't unmap files.
185 void init_valid_table(void)
187 static int mapped_file;
188 int i;
189 const char *allowed = ".!#$%&'()_-@^`~";
190 uint8 *valid_file;
192 if (mapped_file) {
193 /* Can't unmap files, so stick with what we have */
194 return;
197 valid_file = (uint8 *)map_file(data_path("valid.dat"), 0x10000);
198 if (valid_file) {
199 valid_table = valid_file;
200 mapped_file = 1;
201 valid_table_use_unmap = True;
202 return;
205 /* Otherwise, we're using a dynamically created valid_table.
206 * It might need to be regenerated if the code page changed.
207 * We know that we're not using a mapped file, so we can
208 * free() the old one. */
209 SAFE_FREE(valid_table);
211 /* use free rather than unmap */
212 valid_table_use_unmap = False;
214 DEBUG(2,("creating default valid table\n"));
215 valid_table = (uint8 *)SMB_MALLOC(0x10000);
216 SMB_ASSERT(valid_table != NULL);
217 for (i=0;i<128;i++) {
218 valid_table[i] = isalnum(i) || strchr(allowed,i);
221 lazy_initialize_conv();
223 for (;i<0x10000;i++) {
224 smb_ucs2_t c;
225 SSVAL(&c, 0, i);
226 valid_table[i] = check_dos_char_slowly(c);
230 /*******************************************************************
231 Write a string in (little-endian) unicode format. src is in
232 the current DOS codepage. len is the length in bytes of the
233 string pointed to by dst.
235 if null_terminate is True then null terminate the packet (adds 2 bytes)
237 the return value is the length in bytes consumed by the string, including the
238 null termination if applied
239 ********************************************************************/
241 size_t dos_PutUniCode(char *dst,const char *src, size_t len, bool null_terminate)
243 int flags = null_terminate ? STR_UNICODE|STR_NOALIGN|STR_TERMINATE
244 : STR_UNICODE|STR_NOALIGN;
245 return push_ucs2(NULL, dst, src, len, flags);
249 /*******************************************************************
250 Skip past a unicode string, but not more than len. Always move
251 past a terminating zero if found.
252 ********************************************************************/
254 char *skip_unibuf(char *src, size_t len)
256 char *srcend = src + len;
258 while (src < srcend && SVAL(src,0)) {
259 src += 2;
262 if(!SVAL(src,0)) {
263 src += 2;
266 return src;
269 /* Converts a string from internal samba format to unicode
272 int rpcstr_push(void *dest, const char *src, size_t dest_len, int flags)
274 return push_ucs2(NULL, dest, src, dest_len, flags|STR_UNICODE|STR_NOALIGN);
277 /* Converts a string from internal samba format to unicode. Always terminates.
278 * Actually just a wrapper round push_ucs2_talloc().
281 int rpcstr_push_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
283 size_t size;
284 if (push_ucs2_talloc(ctx, dest, src, &size))
285 return size;
286 else
287 return -1;
290 /*******************************************************************
291 Convert a wchar to upper case.
292 ********************************************************************/
294 smb_ucs2_t toupper_w(smb_ucs2_t val)
296 return upcase_table[SVAL(&val,0)];
299 /*******************************************************************
300 Convert a wchar to lower case.
301 ********************************************************************/
303 smb_ucs2_t tolower_w( smb_ucs2_t val )
305 return lowcase_table[SVAL(&val,0)];
308 /*******************************************************************
309 Determine if a character is lowercase.
310 ********************************************************************/
312 bool islower_w(smb_ucs2_t c)
314 return upcase_table[SVAL(&c,0)] != c;
317 /*******************************************************************
318 Determine if a character is uppercase.
319 ********************************************************************/
321 bool isupper_w(smb_ucs2_t c)
323 return lowcase_table[SVAL(&c,0)] != c;
326 /*******************************************************************
327 Determine if a character is valid in a 8.3 name.
328 ********************************************************************/
330 bool isvalid83_w(smb_ucs2_t c)
332 return valid_table[SVAL(&c,0)] != 0;
335 /*******************************************************************
336 Count the number of characters in a smb_ucs2_t string.
337 ********************************************************************/
339 size_t strlen_w(const smb_ucs2_t *src)
341 size_t len;
342 smb_ucs2_t c;
344 for(len = 0; *(COPY_UCS2_CHAR(&c,src)); src++, len++) {
348 return len;
351 /*******************************************************************
352 Count up to max number of characters in a smb_ucs2_t string.
353 ********************************************************************/
355 size_t strnlen_w(const smb_ucs2_t *src, size_t max)
357 size_t len;
358 smb_ucs2_t c;
360 for(len = 0; (len < max) && *(COPY_UCS2_CHAR(&c,src)); src++, len++) {
364 return len;
367 /*******************************************************************
368 Wide strchr().
369 ********************************************************************/
371 smb_ucs2_t *strchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
373 smb_ucs2_t cp;
374 while (*(COPY_UCS2_CHAR(&cp,s))) {
375 if (c == cp) {
376 return (smb_ucs2_t *)s;
378 s++;
380 if (c == cp) {
381 return (smb_ucs2_t *)s;
384 return NULL;
387 smb_ucs2_t *strchr_wa(const smb_ucs2_t *s, char c)
389 return strchr_w(s, UCS2_CHAR(c));
392 /*******************************************************************
393 Wide strrchr().
394 ********************************************************************/
396 smb_ucs2_t *strrchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
398 smb_ucs2_t cp;
399 const smb_ucs2_t *p = s;
400 int len = strlen_w(s);
402 if (len == 0) {
403 return NULL;
405 p += (len - 1);
406 do {
407 if (c == *(COPY_UCS2_CHAR(&cp,p))) {
408 return (smb_ucs2_t *)p;
410 } while (p-- != s);
411 return NULL;
414 /*******************************************************************
415 Wide version of strrchr that returns after doing strrchr 'n' times.
416 ********************************************************************/
418 smb_ucs2_t *strnrchr_w(const smb_ucs2_t *s, smb_ucs2_t c, unsigned int n)
420 smb_ucs2_t cp;
421 const smb_ucs2_t *p = s;
422 int len = strlen_w(s);
424 if (len == 0 || !n) {
425 return NULL;
427 p += (len - 1);
428 do {
429 if (c == *(COPY_UCS2_CHAR(&cp,p))) {
430 n--;
433 if (!n) {
434 return (smb_ucs2_t *)p;
436 } while (p-- != s);
437 return NULL;
440 /*******************************************************************
441 Wide strstr().
442 ********************************************************************/
444 smb_ucs2_t *strstr_w(const smb_ucs2_t *s, const smb_ucs2_t *ins)
446 smb_ucs2_t *r;
447 size_t inslen;
449 if (!s || !*s || !ins || !*ins) {
450 return NULL;
453 inslen = strlen_w(ins);
454 r = (smb_ucs2_t *)s;
456 while ((r = strchr_w(r, *ins))) {
457 if (strncmp_w(r, ins, inslen) == 0) {
458 return r;
460 r++;
463 return NULL;
466 /*******************************************************************
467 Convert a string to lower case.
468 return True if any char is converted
469 ********************************************************************/
471 bool strlower_w(smb_ucs2_t *s)
473 smb_ucs2_t cp;
474 bool ret = False;
476 while (*(COPY_UCS2_CHAR(&cp,s))) {
477 smb_ucs2_t v = tolower_w(cp);
478 if (v != cp) {
479 COPY_UCS2_CHAR(s,&v);
480 ret = True;
482 s++;
484 return ret;
487 /*******************************************************************
488 Convert a string to upper case.
489 return True if any char is converted
490 ********************************************************************/
492 bool strupper_w(smb_ucs2_t *s)
494 smb_ucs2_t cp;
495 bool ret = False;
496 while (*(COPY_UCS2_CHAR(&cp,s))) {
497 smb_ucs2_t v = toupper_w(cp);
498 if (v != cp) {
499 COPY_UCS2_CHAR(s,&v);
500 ret = True;
502 s++;
504 return ret;
507 /*******************************************************************
508 Convert a string to "normal" form.
509 ********************************************************************/
511 void strnorm_w(smb_ucs2_t *s, int case_default)
513 if (case_default == CASE_UPPER) {
514 strupper_w(s);
515 } else {
516 strlower_w(s);
520 int strcmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b)
522 smb_ucs2_t cpa, cpb;
524 while ((*(COPY_UCS2_CHAR(&cpb,b))) && (*(COPY_UCS2_CHAR(&cpa,a)) == cpb)) {
525 a++;
526 b++;
528 return (*(COPY_UCS2_CHAR(&cpa,a)) - *(COPY_UCS2_CHAR(&cpb,b)));
529 /* warning: if *a != *b and both are not 0 we return a random
530 greater or lesser than 0 number not realted to which
531 string is longer */
534 int strncmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len)
536 smb_ucs2_t cpa, cpb;
537 size_t n = 0;
539 while ((n < len) && (*(COPY_UCS2_CHAR(&cpb,b))) && (*(COPY_UCS2_CHAR(&cpa,a)) == cpb)) {
540 a++;
541 b++;
542 n++;
544 return (len - n)?(*(COPY_UCS2_CHAR(&cpa,a)) - *(COPY_UCS2_CHAR(&cpb,b))):0;
547 /*******************************************************************
548 Case insensitive string comparison.
549 ********************************************************************/
551 int strcasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b)
553 smb_ucs2_t cpa, cpb;
555 while ((*COPY_UCS2_CHAR(&cpb,b)) && toupper_w(*(COPY_UCS2_CHAR(&cpa,a))) == toupper_w(cpb)) {
556 a++;
557 b++;
559 return (tolower_w(*(COPY_UCS2_CHAR(&cpa,a))) - tolower_w(*(COPY_UCS2_CHAR(&cpb,b))));
562 /*******************************************************************
563 Case insensitive string comparison, length limited.
564 ********************************************************************/
566 int strncasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len)
568 smb_ucs2_t cpa, cpb;
569 size_t n = 0;
571 while ((n < len) && *COPY_UCS2_CHAR(&cpb,b) && (toupper_w(*(COPY_UCS2_CHAR(&cpa,a))) == toupper_w(cpb))) {
572 a++;
573 b++;
574 n++;
576 return (len - n)?(tolower_w(*(COPY_UCS2_CHAR(&cpa,a))) - tolower_w(*(COPY_UCS2_CHAR(&cpb,b)))):0;
579 /*******************************************************************
580 Compare 2 strings.
581 ********************************************************************/
583 bool strequal_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2)
585 if (s1 == s2) {
586 return(True);
588 if (!s1 || !s2) {
589 return(False);
592 return(strcasecmp_w(s1,s2)==0);
595 /*******************************************************************
596 Compare 2 strings up to and including the nth char.
597 ******************************************************************/
599 bool strnequal_w(const smb_ucs2_t *s1,const smb_ucs2_t *s2,size_t n)
601 if (s1 == s2) {
602 return(True);
604 if (!s1 || !s2 || !n) {
605 return(False);
608 return(strncasecmp_w(s1,s2,n)==0);
611 /*******************************************************************
612 Duplicate string.
613 ********************************************************************/
615 smb_ucs2_t *strdup_w(const smb_ucs2_t *src)
617 return strndup_w(src, 0);
620 /* if len == 0 then duplicate the whole string */
622 smb_ucs2_t *strndup_w(const smb_ucs2_t *src, size_t len)
624 smb_ucs2_t *dest;
626 if (!len) {
627 len = strlen_w(src);
629 dest = SMB_MALLOC_ARRAY(smb_ucs2_t, len + 1);
630 if (!dest) {
631 DEBUG(0,("strdup_w: out of memory!\n"));
632 return NULL;
635 memcpy(dest, src, len * sizeof(smb_ucs2_t));
636 dest[len] = 0;
637 return dest;
640 /*******************************************************************
641 Copy a string with max len.
642 ********************************************************************/
644 smb_ucs2_t *strncpy_w(smb_ucs2_t *dest, const smb_ucs2_t *src, const size_t max)
646 smb_ucs2_t cp;
647 size_t len;
649 if (!dest || !src) {
650 return NULL;
653 for (len = 0; (*COPY_UCS2_CHAR(&cp,(src+len))) && (len < max); len++) {
654 cp = *COPY_UCS2_CHAR(dest+len,src+len);
656 cp = 0;
657 for ( /*nothing*/ ; len < max; len++ ) {
658 cp = *COPY_UCS2_CHAR(dest+len,&cp);
661 return dest;
664 /*******************************************************************
665 Append a string of len bytes and add a terminator.
666 ********************************************************************/
668 smb_ucs2_t *strncat_w(smb_ucs2_t *dest, const smb_ucs2_t *src, const size_t max)
670 size_t start;
671 size_t len;
672 smb_ucs2_t z = 0;
674 if (!dest || !src) {
675 return NULL;
678 start = strlen_w(dest);
679 len = strnlen_w(src, max);
681 memcpy(&dest[start], src, len*sizeof(smb_ucs2_t));
682 z = *COPY_UCS2_CHAR(dest+start+len,&z);
684 return dest;
687 smb_ucs2_t *strcat_w(smb_ucs2_t *dest, const smb_ucs2_t *src)
689 size_t start;
690 size_t len;
691 smb_ucs2_t z = 0;
693 if (!dest || !src) {
694 return NULL;
697 start = strlen_w(dest);
698 len = strlen_w(src);
700 memcpy(&dest[start], src, len*sizeof(smb_ucs2_t));
701 z = *COPY_UCS2_CHAR(dest+start+len,&z);
703 return dest;
707 /*******************************************************************
708 Replace any occurence of oldc with newc in unicode string.
709 ********************************************************************/
711 void string_replace_w(smb_ucs2_t *s, smb_ucs2_t oldc, smb_ucs2_t newc)
713 smb_ucs2_t cp;
715 for(;*(COPY_UCS2_CHAR(&cp,s));s++) {
716 if(cp==oldc) {
717 COPY_UCS2_CHAR(s,&newc);
722 /*******************************************************************
723 Trim unicode string.
724 ********************************************************************/
726 bool trim_string_w(smb_ucs2_t *s, const smb_ucs2_t *front,
727 const smb_ucs2_t *back)
729 bool ret = False;
730 size_t len, front_len, back_len;
732 if (!s) {
733 return False;
736 len = strlen_w(s);
738 if (front && *front) {
739 front_len = strlen_w(front);
740 while (len && strncmp_w(s, front, front_len) == 0) {
741 memmove(s, (s + front_len), (len - front_len + 1) * sizeof(smb_ucs2_t));
742 len -= front_len;
743 ret = True;
747 if (back && *back) {
748 back_len = strlen_w(back);
749 while (len && strncmp_w((s + (len - back_len)), back, back_len) == 0) {
750 s[len - back_len] = 0;
751 len -= back_len;
752 ret = True;
756 return ret;
760 The *_wa() functions take a combination of 7 bit ascii
761 and wide characters They are used so that you can use string
762 functions combining C string constants with ucs2 strings
764 The char* arguments must NOT be multibyte - to be completely sure
765 of this only pass string constants */
767 int strcmp_wa(const smb_ucs2_t *a, const char *b)
769 smb_ucs2_t cp = 0;
771 while (*b && *(COPY_UCS2_CHAR(&cp,a)) == UCS2_CHAR(*b)) {
772 a++;
773 b++;
775 return (*(COPY_UCS2_CHAR(&cp,a)) - UCS2_CHAR(*b));
778 int strncmp_wa(const smb_ucs2_t *a, const char *b, size_t len)
780 smb_ucs2_t cp = 0;
781 size_t n = 0;
783 while ((n < len) && *b && *(COPY_UCS2_CHAR(&cp,a)) == UCS2_CHAR(*b)) {
784 a++;
785 b++;
786 n++;
788 return (len - n)?(*(COPY_UCS2_CHAR(&cp,a)) - UCS2_CHAR(*b)):0;
791 smb_ucs2_t *strpbrk_wa(const smb_ucs2_t *s, const char *p)
793 smb_ucs2_t cp;
795 while (*(COPY_UCS2_CHAR(&cp,s))) {
796 int i;
797 for (i=0; p[i] && cp != UCS2_CHAR(p[i]); i++)
799 if (p[i]) {
800 return (smb_ucs2_t *)s;
802 s++;
804 return NULL;
807 smb_ucs2_t *strstr_wa(const smb_ucs2_t *s, const char *ins)
809 smb_ucs2_t *r;
810 size_t inslen;
812 if (!s || !ins) {
813 return NULL;
816 inslen = strlen(ins);
817 r = (smb_ucs2_t *)s;
819 while ((r = strchr_w(r, UCS2_CHAR(*ins)))) {
820 if (strncmp_wa(r, ins, inslen) == 0)
821 return r;
822 r++;
825 return NULL;
828 /*************************************************************
829 ascii only toupper - saves the need for smbd to be in C locale.
830 *************************************************************/
832 int toupper_ascii(int c)
834 smb_ucs2_t uc = toupper_w(UCS2_CHAR(c));
835 return UCS2_TO_CHAR(uc);
838 /*************************************************************
839 ascii only tolower - saves the need for smbd to be in C locale.
840 *************************************************************/
842 int tolower_ascii(int c)
844 smb_ucs2_t uc = tolower_w(UCS2_CHAR(c));
845 return UCS2_TO_CHAR(uc);
848 /*************************************************************
849 ascii only isupper - saves the need for smbd to be in C locale.
850 *************************************************************/
852 int isupper_ascii(int c)
854 return isupper_w(UCS2_CHAR(c));
857 /*************************************************************
858 ascii only islower - saves the need for smbd to be in C locale.
859 *************************************************************/
861 int islower_ascii(int c)
863 return islower_w(UCS2_CHAR(c));