sr-epmap: Minor cleanups and fixes
[Samba/vl.git] / source3 / lib / util_unistr.c
blob14b5a8706b8d0a42ffd6bb5b8fa8daffa583eea7
1 /*
2 Unix SMB/CIFS implementation.
3 Samba utility functions
4 Copyright (C) Andrew Tridgell 1992-2001
5 Copyright (C) Simo Sorce 2001
6 Copyright (C) Jeremy Allison 2005
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 #include "includes.h"
24 #ifndef MAXUNI
25 #define MAXUNI 1024
26 #endif
28 /* these 3 tables define the unicode case handling. They are loaded
29 at startup either via mmap() or read() from the lib directory */
30 static uint8 *valid_table;
31 static bool initialized;
33 /**
34 * Destroy global objects allocated by load_case_tables()
35 **/
36 void gfree_case_tables(void)
38 if ( valid_table ) {
39 unmap_file(valid_table, 0x10000);
40 valid_table = NULL;
42 initialized = false;
45 /**
46 * Load the valid character map table from <tt>valid.dat</tt> or
47 * create from the configured codepage.
49 * This function is called whenever the configuration is reloaded.
50 * However, the valid character table is not changed if it's loaded
51 * from a file, because we can't unmap files.
52 **/
54 static void init_valid_table(void)
56 if (valid_table) {
57 return;
60 valid_table = (uint8 *)map_file(data_path("valid.dat"), 0x10000);
61 if (!valid_table) {
62 smb_panic("Could not load valid.dat file required for mangle method=hash");
63 return;
67 /*******************************************************************
68 Write a string in (little-endian) unicode format. src is in
69 the current DOS codepage. len is the length in bytes of the
70 string pointed to by dst.
72 if null_terminate is True then null terminate the packet (adds 2 bytes)
74 the return value is the length in bytes consumed by the string, including the
75 null termination if applied
76 ********************************************************************/
78 size_t dos_PutUniCode(char *dst,const char *src, size_t len, bool null_terminate)
80 int flags = null_terminate ? STR_UNICODE|STR_NOALIGN|STR_TERMINATE
81 : STR_UNICODE|STR_NOALIGN;
82 return push_ucs2(NULL, dst, src, len, flags);
86 /*******************************************************************
87 Skip past a unicode string, but not more than len. Always move
88 past a terminating zero if found.
89 ********************************************************************/
91 char *skip_unibuf(char *src, size_t len)
93 char *srcend = src + len;
95 while (src < srcend && SVAL(src,0)) {
96 src += 2;
99 if(!SVAL(src,0)) {
100 src += 2;
103 return src;
106 /* Converts a string from internal samba format to unicode
109 int rpcstr_push(void *dest, const char *src, size_t dest_len, int flags)
111 return push_ucs2(NULL, dest, src, dest_len, flags|STR_UNICODE|STR_NOALIGN);
114 /* Converts a string from internal samba format to unicode. Always terminates.
115 * Actually just a wrapper round push_ucs2_talloc().
118 int rpcstr_push_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
120 size_t size;
121 if (push_ucs2_talloc(ctx, dest, src, &size))
122 return size;
123 else
124 return -1;
127 /*******************************************************************
128 Determine if a character is valid in a 8.3 name.
129 ********************************************************************/
131 bool isvalid83_w(smb_ucs2_t c)
133 init_valid_table();
134 return valid_table[SVAL(&c,0)] != 0;
137 /*******************************************************************
138 Count the number of characters in a smb_ucs2_t string.
139 ********************************************************************/
141 size_t strlen_w(const smb_ucs2_t *src)
143 size_t len;
144 smb_ucs2_t c;
146 for(len = 0; *(COPY_UCS2_CHAR(&c,src)); src++, len++) {
150 return len;
153 /*******************************************************************
154 Count up to max number of characters in a smb_ucs2_t string.
155 ********************************************************************/
157 size_t strnlen_w(const smb_ucs2_t *src, size_t max)
159 size_t len;
160 smb_ucs2_t c;
162 for(len = 0; (len < max) && *(COPY_UCS2_CHAR(&c,src)); src++, len++) {
166 return len;
169 /*******************************************************************
170 Wide strchr().
171 ********************************************************************/
173 smb_ucs2_t *strchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
175 smb_ucs2_t cp;
176 while (*(COPY_UCS2_CHAR(&cp,s))) {
177 if (c == cp) {
178 return (smb_ucs2_t *)s;
180 s++;
182 if (c == cp) {
183 return (smb_ucs2_t *)s;
186 return NULL;
189 smb_ucs2_t *strchr_wa(const smb_ucs2_t *s, char c)
191 return strchr_w(s, UCS2_CHAR(c));
194 /*******************************************************************
195 Wide strrchr().
196 ********************************************************************/
198 smb_ucs2_t *strrchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
200 smb_ucs2_t cp;
201 const smb_ucs2_t *p = s;
202 int len = strlen_w(s);
204 if (len == 0) {
205 return NULL;
207 p += (len - 1);
208 do {
209 if (c == *(COPY_UCS2_CHAR(&cp,p))) {
210 return (smb_ucs2_t *)p;
212 } while (p-- != s);
213 return NULL;
216 /*******************************************************************
217 Wide version of strrchr that returns after doing strrchr 'n' times.
218 ********************************************************************/
220 smb_ucs2_t *strnrchr_w(const smb_ucs2_t *s, smb_ucs2_t c, unsigned int n)
222 smb_ucs2_t cp;
223 const smb_ucs2_t *p = s;
224 int len = strlen_w(s);
226 if (len == 0 || !n) {
227 return NULL;
229 p += (len - 1);
230 do {
231 if (c == *(COPY_UCS2_CHAR(&cp,p))) {
232 n--;
235 if (!n) {
236 return (smb_ucs2_t *)p;
238 } while (p-- != s);
239 return NULL;
242 /*******************************************************************
243 Wide strstr().
244 ********************************************************************/
246 smb_ucs2_t *strstr_w(const smb_ucs2_t *s, const smb_ucs2_t *ins)
248 smb_ucs2_t *r;
249 size_t inslen;
251 if (!s || !*s || !ins || !*ins) {
252 return NULL;
255 inslen = strlen_w(ins);
256 r = (smb_ucs2_t *)s;
258 while ((r = strchr_w(r, *ins))) {
259 if (strncmp_w(r, ins, inslen) == 0) {
260 return r;
262 r++;
265 return NULL;
268 /*******************************************************************
269 Convert a string to lower case.
270 return True if any char is converted
272 This is unsafe for any string involving a UTF16 character
273 ********************************************************************/
275 bool strlower_w(smb_ucs2_t *s)
277 smb_ucs2_t cp;
278 bool ret = False;
280 while (*(COPY_UCS2_CHAR(&cp,s))) {
281 smb_ucs2_t v = tolower_m(cp);
282 if (v != cp) {
283 COPY_UCS2_CHAR(s,&v);
284 ret = True;
286 s++;
288 return ret;
291 /*******************************************************************
292 Convert a string to upper case.
293 return True if any char is converted
295 This is unsafe for any string involving a UTF16 character
296 ********************************************************************/
298 bool strupper_w(smb_ucs2_t *s)
300 smb_ucs2_t cp;
301 bool ret = False;
302 while (*(COPY_UCS2_CHAR(&cp,s))) {
303 smb_ucs2_t v = toupper_m(cp);
304 if (v != cp) {
305 COPY_UCS2_CHAR(s,&v);
306 ret = True;
308 s++;
310 return ret;
313 /*******************************************************************
314 Convert a string to "normal" form.
315 ********************************************************************/
317 void strnorm_w(smb_ucs2_t *s, int case_default)
319 if (case_default == CASE_UPPER) {
320 strupper_w(s);
321 } else {
322 strlower_w(s);
326 int strcmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b)
328 smb_ucs2_t cpa, cpb;
330 while ((*(COPY_UCS2_CHAR(&cpb,b))) && (*(COPY_UCS2_CHAR(&cpa,a)) == cpb)) {
331 a++;
332 b++;
334 return (*(COPY_UCS2_CHAR(&cpa,a)) - *(COPY_UCS2_CHAR(&cpb,b)));
335 /* warning: if *a != *b and both are not 0 we return a random
336 greater or lesser than 0 number not realted to which
337 string is longer */
340 int strncmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len)
342 smb_ucs2_t cpa, cpb;
343 size_t n = 0;
345 while ((n < len) && (*(COPY_UCS2_CHAR(&cpb,b))) && (*(COPY_UCS2_CHAR(&cpa,a)) == cpb)) {
346 a++;
347 b++;
348 n++;
350 return (len - n)?(*(COPY_UCS2_CHAR(&cpa,a)) - *(COPY_UCS2_CHAR(&cpb,b))):0;
353 /*******************************************************************
354 Case insensitive string comparison.
355 ********************************************************************/
357 int strcasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b)
359 smb_ucs2_t cpa, cpb;
361 while ((*COPY_UCS2_CHAR(&cpb,b)) && toupper_m(*(COPY_UCS2_CHAR(&cpa,a))) == toupper_m(cpb)) {
362 a++;
363 b++;
365 return (tolower_m(*(COPY_UCS2_CHAR(&cpa,a))) - tolower_m(*(COPY_UCS2_CHAR(&cpb,b))));
368 /*******************************************************************
369 Case insensitive string comparison, length limited.
370 ********************************************************************/
372 int strncasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len)
374 smb_ucs2_t cpa, cpb;
375 size_t n = 0;
377 while ((n < len) && *COPY_UCS2_CHAR(&cpb,b) && (toupper_m(*(COPY_UCS2_CHAR(&cpa,a))) == toupper_m(cpb))) {
378 a++;
379 b++;
380 n++;
382 return (len - n)?(tolower_m(*(COPY_UCS2_CHAR(&cpa,a))) - tolower_m(*(COPY_UCS2_CHAR(&cpb,b)))):0;
385 /*******************************************************************
386 Compare 2 strings.
387 ********************************************************************/
389 bool strequal_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2)
391 if (s1 == s2) {
392 return(True);
394 if (!s1 || !s2) {
395 return(False);
398 return(strcasecmp_w(s1,s2)==0);
401 /*******************************************************************
402 Compare 2 strings up to and including the nth char.
403 ******************************************************************/
405 bool strnequal_w(const smb_ucs2_t *s1,const smb_ucs2_t *s2,size_t n)
407 if (s1 == s2) {
408 return(True);
410 if (!s1 || !s2 || !n) {
411 return(False);
414 return(strncasecmp_w(s1,s2,n)==0);
417 /*******************************************************************
418 Duplicate string.
419 ********************************************************************/
421 smb_ucs2_t *strdup_w(const smb_ucs2_t *src)
423 return strndup_w(src, 0);
426 /* if len == 0 then duplicate the whole string */
428 smb_ucs2_t *strndup_w(const smb_ucs2_t *src, size_t len)
430 smb_ucs2_t *dest;
432 if (!len) {
433 len = strlen_w(src);
435 dest = SMB_MALLOC_ARRAY(smb_ucs2_t, len + 1);
436 if (!dest) {
437 DEBUG(0,("strdup_w: out of memory!\n"));
438 return NULL;
441 memcpy(dest, src, len * sizeof(smb_ucs2_t));
442 dest[len] = 0;
443 return dest;
446 /*******************************************************************
447 Copy a string with max len.
448 ********************************************************************/
450 smb_ucs2_t *strncpy_w(smb_ucs2_t *dest, const smb_ucs2_t *src, const size_t max)
452 smb_ucs2_t cp;
453 size_t len;
455 if (!dest || !src) {
456 return NULL;
459 for (len = 0; (*COPY_UCS2_CHAR(&cp,(src+len))) && (len < max); len++) {
460 cp = *COPY_UCS2_CHAR(dest+len,src+len);
462 cp = 0;
463 for ( /*nothing*/ ; len < max; len++ ) {
464 cp = *COPY_UCS2_CHAR(dest+len,&cp);
467 return dest;
470 /*******************************************************************
471 Append a string of len bytes and add a terminator.
472 ********************************************************************/
474 smb_ucs2_t *strncat_w(smb_ucs2_t *dest, const smb_ucs2_t *src, const size_t max)
476 size_t start;
477 size_t len;
478 smb_ucs2_t z = 0;
480 if (!dest || !src) {
481 return NULL;
484 start = strlen_w(dest);
485 len = strnlen_w(src, max);
487 memcpy(&dest[start], src, len*sizeof(smb_ucs2_t));
488 z = *COPY_UCS2_CHAR(dest+start+len,&z);
490 return dest;
493 smb_ucs2_t *strcat_w(smb_ucs2_t *dest, const smb_ucs2_t *src)
495 size_t start;
496 size_t len;
497 smb_ucs2_t z = 0;
499 if (!dest || !src) {
500 return NULL;
503 start = strlen_w(dest);
504 len = strlen_w(src);
506 memcpy(&dest[start], src, len*sizeof(smb_ucs2_t));
507 z = *COPY_UCS2_CHAR(dest+start+len,&z);
509 return dest;
513 /*******************************************************************
514 Replace any occurence of oldc with newc in unicode string.
515 ********************************************************************/
517 void string_replace_w(smb_ucs2_t *s, smb_ucs2_t oldc, smb_ucs2_t newc)
519 smb_ucs2_t cp;
521 for(;*(COPY_UCS2_CHAR(&cp,s));s++) {
522 if(cp==oldc) {
523 COPY_UCS2_CHAR(s,&newc);
528 /*******************************************************************
529 Trim unicode string.
530 ********************************************************************/
532 bool trim_string_w(smb_ucs2_t *s, const smb_ucs2_t *front,
533 const smb_ucs2_t *back)
535 bool ret = False;
536 size_t len, front_len, back_len;
538 if (!s) {
539 return False;
542 len = strlen_w(s);
544 if (front && *front) {
545 front_len = strlen_w(front);
546 while (len && strncmp_w(s, front, front_len) == 0) {
547 memmove(s, (s + front_len), (len - front_len + 1) * sizeof(smb_ucs2_t));
548 len -= front_len;
549 ret = True;
553 if (back && *back) {
554 back_len = strlen_w(back);
555 while (len && strncmp_w((s + (len - back_len)), back, back_len) == 0) {
556 s[len - back_len] = 0;
557 len -= back_len;
558 ret = True;
562 return ret;
566 The *_wa() functions take a combination of 7 bit ascii
567 and wide characters They are used so that you can use string
568 functions combining C string constants with ucs2 strings
570 The char* arguments must NOT be multibyte - to be completely sure
571 of this only pass string constants */
573 int strcmp_wa(const smb_ucs2_t *a, const char *b)
575 smb_ucs2_t cp = 0;
577 while (*b && *(COPY_UCS2_CHAR(&cp,a)) == UCS2_CHAR(*b)) {
578 a++;
579 b++;
581 return (*(COPY_UCS2_CHAR(&cp,a)) - UCS2_CHAR(*b));
584 int strncmp_wa(const smb_ucs2_t *a, const char *b, size_t len)
586 smb_ucs2_t cp = 0;
587 size_t n = 0;
589 while ((n < len) && *b && *(COPY_UCS2_CHAR(&cp,a)) == UCS2_CHAR(*b)) {
590 a++;
591 b++;
592 n++;
594 return (len - n)?(*(COPY_UCS2_CHAR(&cp,a)) - UCS2_CHAR(*b)):0;
597 smb_ucs2_t *strpbrk_wa(const smb_ucs2_t *s, const char *p)
599 smb_ucs2_t cp;
601 while (*(COPY_UCS2_CHAR(&cp,s))) {
602 int i;
603 for (i=0; p[i] && cp != UCS2_CHAR(p[i]); i++)
605 if (p[i]) {
606 return (smb_ucs2_t *)s;
608 s++;
610 return NULL;
613 smb_ucs2_t *strstr_wa(const smb_ucs2_t *s, const char *ins)
615 smb_ucs2_t *r;
616 size_t inslen;
618 if (!s || !ins) {
619 return NULL;
622 inslen = strlen(ins);
623 r = (smb_ucs2_t *)s;
625 while ((r = strchr_w(r, UCS2_CHAR(*ins)))) {
626 if (strncmp_wa(r, ins, inslen) == 0)
627 return r;
628 r++;
631 return NULL;
634 /*************************************************************
635 ascii only toupper - saves the need for smbd to be in C locale.
636 *************************************************************/
638 int toupper_ascii(int c)
640 smb_ucs2_t uc = toupper_m(UCS2_CHAR(c));
641 return UCS2_TO_CHAR(uc);
644 /*************************************************************
645 ascii only tolower - saves the need for smbd to be in C locale.
646 *************************************************************/
648 int tolower_ascii(int c)
650 smb_ucs2_t uc = tolower_m(UCS2_CHAR(c));
651 return UCS2_TO_CHAR(uc);
654 /*************************************************************
655 ascii only isupper - saves the need for smbd to be in C locale.
656 *************************************************************/
658 int isupper_ascii(int c)
660 return isupper_m(UCS2_CHAR(c));
663 /*************************************************************
664 ascii only islower - saves the need for smbd to be in C locale.
665 *************************************************************/
667 int islower_ascii(int c)
669 return islower_m(UCS2_CHAR(c));