First part of fix for bug 8310 - toupper_ascii() is broken on big-endian systems
[Samba.git] / source3 / lib / util_unistr.c
blobd8a360dfcc3df58f1153d1b491c99cb497fe3240
1 /*
2 Unix SMB/CIFS implementation.
3 Samba utility functions
4 Copyright (C) Andrew Tridgell 1992-2001
5 Copyright (C) Simo Sorce 2001
6 Copyright (C) Jeremy Allison 2005
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 #include "includes.h"
24 /* these 3 tables define the unicode case handling. They are loaded
25 at startup either via mmap() or read() from the lib directory */
26 static uint8 *valid_table;
27 static bool initialized;
29 /**
30 * Destroy global objects allocated by load_case_tables()
31 **/
32 void gfree_case_tables(void)
34 if ( valid_table ) {
35 unmap_file(valid_table, 0x10000);
36 valid_table = NULL;
38 initialized = false;
41 /**
42 * Load the valid character map table from <tt>valid.dat</tt> or
43 * create from the configured codepage.
45 * This function is called whenever the configuration is reloaded.
46 * However, the valid character table is not changed if it's loaded
47 * from a file, because we can't unmap files.
48 **/
50 static void init_valid_table(void)
52 if (valid_table) {
53 return;
56 valid_table = (uint8 *)map_file(data_path("valid.dat"), 0x10000);
57 if (!valid_table) {
58 smb_panic("Could not load valid.dat file required for mangle method=hash");
59 return;
63 /*******************************************************************
64 Write a string in (little-endian) unicode format. src is in
65 the current DOS codepage. len is the length in bytes of the
66 string pointed to by dst.
68 if null_terminate is True then null terminate the packet (adds 2 bytes)
70 the return value is the length in bytes consumed by the string, including the
71 null termination if applied
72 ********************************************************************/
74 size_t dos_PutUniCode(char *dst,const char *src, size_t len, bool null_terminate)
76 int flags = null_terminate ? STR_UNICODE|STR_NOALIGN|STR_TERMINATE
77 : STR_UNICODE|STR_NOALIGN;
78 return push_ucs2(NULL, dst, src, len, flags);
82 /* Converts a string from internal samba format to unicode
85 int rpcstr_push(void *dest, const char *src, size_t dest_len, int flags)
87 return push_ucs2(NULL, dest, src, dest_len, flags|STR_UNICODE|STR_NOALIGN);
90 /* Converts a string from internal samba format to unicode. Always terminates.
91 * Actually just a wrapper round push_ucs2_talloc().
94 int rpcstr_push_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
96 size_t size;
97 if (push_ucs2_talloc(ctx, dest, src, &size))
98 return size;
99 else
100 return -1;
103 /*******************************************************************
104 Determine if a character is valid in a 8.3 name.
105 ********************************************************************/
107 bool isvalid83_w(smb_ucs2_t c)
109 init_valid_table();
110 return valid_table[SVAL(&c,0)] != 0;
113 /*******************************************************************
114 Count the number of characters in a smb_ucs2_t string.
115 ********************************************************************/
117 size_t strlen_w(const smb_ucs2_t *src)
119 size_t len;
120 smb_ucs2_t c;
122 for(len = 0; *(COPY_UCS2_CHAR(&c,src)); src++, len++) {
126 return len;
129 /*******************************************************************
130 Count up to max number of characters in a smb_ucs2_t string.
131 ********************************************************************/
133 size_t strnlen_w(const smb_ucs2_t *src, size_t max)
135 size_t len;
136 smb_ucs2_t c;
138 for(len = 0; (len < max) && *(COPY_UCS2_CHAR(&c,src)); src++, len++) {
142 return len;
145 /*******************************************************************
146 Wide strchr().
147 ********************************************************************/
149 smb_ucs2_t *strchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
151 smb_ucs2_t cp;
152 while (*(COPY_UCS2_CHAR(&cp,s))) {
153 if (c == cp) {
154 return (smb_ucs2_t *)s;
156 s++;
158 if (c == cp) {
159 return (smb_ucs2_t *)s;
162 return NULL;
165 smb_ucs2_t *strchr_wa(const smb_ucs2_t *s, char c)
167 return strchr_w(s, UCS2_CHAR(c));
170 /*******************************************************************
171 Wide strrchr().
172 ********************************************************************/
174 smb_ucs2_t *strrchr_w(const smb_ucs2_t *s, smb_ucs2_t c)
176 smb_ucs2_t cp;
177 const smb_ucs2_t *p = s;
178 int len = strlen_w(s);
180 if (len == 0) {
181 return NULL;
183 p += (len - 1);
184 do {
185 if (c == *(COPY_UCS2_CHAR(&cp,p))) {
186 return (smb_ucs2_t *)p;
188 } while (p-- != s);
189 return NULL;
192 /*******************************************************************
193 Wide version of strrchr that returns after doing strrchr 'n' times.
194 ********************************************************************/
196 smb_ucs2_t *strnrchr_w(const smb_ucs2_t *s, smb_ucs2_t c, unsigned int n)
198 smb_ucs2_t cp;
199 const smb_ucs2_t *p = s;
200 int len = strlen_w(s);
202 if (len == 0 || !n) {
203 return NULL;
205 p += (len - 1);
206 do {
207 if (c == *(COPY_UCS2_CHAR(&cp,p))) {
208 n--;
211 if (!n) {
212 return (smb_ucs2_t *)p;
214 } while (p-- != s);
215 return NULL;
218 /*******************************************************************
219 Wide strstr().
220 ********************************************************************/
222 smb_ucs2_t *strstr_w(const smb_ucs2_t *s, const smb_ucs2_t *ins)
224 smb_ucs2_t *r;
225 size_t inslen;
227 if (!s || !*s || !ins || !*ins) {
228 return NULL;
231 inslen = strlen_w(ins);
232 r = (smb_ucs2_t *)s;
234 while ((r = strchr_w(r, *ins))) {
235 if (strncmp_w(r, ins, inslen) == 0) {
236 return r;
238 r++;
241 return NULL;
244 /*******************************************************************
245 Convert a string to lower case.
246 return True if any char is converted
248 This is unsafe for any string involving a UTF16 character
249 ********************************************************************/
251 bool strlower_w(smb_ucs2_t *s)
253 smb_ucs2_t cp;
254 bool ret = False;
256 while (*(COPY_UCS2_CHAR(&cp,s))) {
257 smb_ucs2_t v = tolower_m(cp);
258 if (v != cp) {
259 COPY_UCS2_CHAR(s,&v);
260 ret = True;
262 s++;
264 return ret;
267 /*******************************************************************
268 Convert a string to upper case.
269 return True if any char is converted
271 This is unsafe for any string involving a UTF16 character
272 ********************************************************************/
274 bool strupper_w(smb_ucs2_t *s)
276 smb_ucs2_t cp;
277 bool ret = False;
278 while (*(COPY_UCS2_CHAR(&cp,s))) {
279 smb_ucs2_t v = toupper_m(cp);
280 if (v != cp) {
281 COPY_UCS2_CHAR(s,&v);
282 ret = True;
284 s++;
286 return ret;
289 /*******************************************************************
290 Convert a string to "normal" form.
291 ********************************************************************/
293 void strnorm_w(smb_ucs2_t *s, int case_default)
295 if (case_default == CASE_UPPER) {
296 strupper_w(s);
297 } else {
298 strlower_w(s);
302 int strcmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b)
304 smb_ucs2_t cpa, cpb;
306 while ((*(COPY_UCS2_CHAR(&cpb,b))) && (*(COPY_UCS2_CHAR(&cpa,a)) == cpb)) {
307 a++;
308 b++;
310 return (*(COPY_UCS2_CHAR(&cpa,a)) - *(COPY_UCS2_CHAR(&cpb,b)));
311 /* warning: if *a != *b and both are not 0 we return a random
312 greater or lesser than 0 number not realted to which
313 string is longer */
316 int strncmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len)
318 smb_ucs2_t cpa, cpb;
319 size_t n = 0;
321 while ((n < len) && (*(COPY_UCS2_CHAR(&cpb,b))) && (*(COPY_UCS2_CHAR(&cpa,a)) == cpb)) {
322 a++;
323 b++;
324 n++;
326 return (len - n)?(*(COPY_UCS2_CHAR(&cpa,a)) - *(COPY_UCS2_CHAR(&cpb,b))):0;
329 /*******************************************************************
330 Case insensitive string comparison.
331 ********************************************************************/
333 int strcasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b)
335 smb_ucs2_t cpa, cpb;
337 while ((*COPY_UCS2_CHAR(&cpb,b)) && toupper_m(*(COPY_UCS2_CHAR(&cpa,a))) == toupper_m(cpb)) {
338 a++;
339 b++;
341 return (tolower_m(*(COPY_UCS2_CHAR(&cpa,a))) - tolower_m(*(COPY_UCS2_CHAR(&cpb,b))));
344 /*******************************************************************
345 Case insensitive string comparison, length limited.
346 ********************************************************************/
348 int strncasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len)
350 smb_ucs2_t cpa, cpb;
351 size_t n = 0;
353 while ((n < len) && *COPY_UCS2_CHAR(&cpb,b) && (toupper_m(*(COPY_UCS2_CHAR(&cpa,a))) == toupper_m(cpb))) {
354 a++;
355 b++;
356 n++;
358 return (len - n)?(tolower_m(*(COPY_UCS2_CHAR(&cpa,a))) - tolower_m(*(COPY_UCS2_CHAR(&cpb,b)))):0;
361 /*******************************************************************
362 Compare 2 strings.
363 ********************************************************************/
365 bool strequal_w(const smb_ucs2_t *s1, const smb_ucs2_t *s2)
367 if (s1 == s2) {
368 return(True);
370 if (!s1 || !s2) {
371 return(False);
374 return(strcasecmp_w(s1,s2)==0);
377 /*******************************************************************
378 Compare 2 strings up to and including the nth char.
379 ******************************************************************/
381 bool strnequal_w(const smb_ucs2_t *s1,const smb_ucs2_t *s2,size_t n)
383 if (s1 == s2) {
384 return(True);
386 if (!s1 || !s2 || !n) {
387 return(False);
390 return(strncasecmp_w(s1,s2,n)==0);
393 /*******************************************************************
394 Duplicate string.
395 ********************************************************************/
397 smb_ucs2_t *strdup_w(const smb_ucs2_t *src)
399 return strndup_w(src, 0);
402 /* if len == 0 then duplicate the whole string */
404 smb_ucs2_t *strndup_w(const smb_ucs2_t *src, size_t len)
406 smb_ucs2_t *dest;
408 if (!len) {
409 len = strlen_w(src);
411 dest = SMB_MALLOC_ARRAY(smb_ucs2_t, len + 1);
412 if (!dest) {
413 DEBUG(0,("strdup_w: out of memory!\n"));
414 return NULL;
417 memcpy(dest, src, len * sizeof(smb_ucs2_t));
418 dest[len] = 0;
419 return dest;
422 /*******************************************************************
423 Copy a string with max len.
424 ********************************************************************/
426 smb_ucs2_t *strncpy_w(smb_ucs2_t *dest, const smb_ucs2_t *src, const size_t max)
428 smb_ucs2_t cp;
429 size_t len;
431 if (!dest || !src) {
432 return NULL;
435 for (len = 0; (*COPY_UCS2_CHAR(&cp,(src+len))) && (len < max); len++) {
436 cp = *COPY_UCS2_CHAR(dest+len,src+len);
438 cp = 0;
439 for ( /*nothing*/ ; len < max; len++ ) {
440 cp = *COPY_UCS2_CHAR(dest+len,&cp);
443 return dest;
446 /*******************************************************************
447 Append a string of len bytes and add a terminator.
448 ********************************************************************/
450 smb_ucs2_t *strncat_w(smb_ucs2_t *dest, const smb_ucs2_t *src, const size_t max)
452 size_t start;
453 size_t len;
454 smb_ucs2_t z = 0;
456 if (!dest || !src) {
457 return NULL;
460 start = strlen_w(dest);
461 len = strnlen_w(src, max);
463 memcpy(&dest[start], src, len*sizeof(smb_ucs2_t));
464 z = *COPY_UCS2_CHAR(dest+start+len,&z);
466 return dest;
469 smb_ucs2_t *strcat_w(smb_ucs2_t *dest, const smb_ucs2_t *src)
471 size_t start;
472 size_t len;
473 smb_ucs2_t z = 0;
475 if (!dest || !src) {
476 return NULL;
479 start = strlen_w(dest);
480 len = strlen_w(src);
482 memcpy(&dest[start], src, len*sizeof(smb_ucs2_t));
483 z = *COPY_UCS2_CHAR(dest+start+len,&z);
485 return dest;
489 /*******************************************************************
490 Replace any occurence of oldc with newc in unicode string.
491 ********************************************************************/
493 void string_replace_w(smb_ucs2_t *s, smb_ucs2_t oldc, smb_ucs2_t newc)
495 smb_ucs2_t cp;
497 for(;*(COPY_UCS2_CHAR(&cp,s));s++) {
498 if(cp==oldc) {
499 COPY_UCS2_CHAR(s,&newc);
504 /*******************************************************************
505 Trim unicode string.
506 ********************************************************************/
508 bool trim_string_w(smb_ucs2_t *s, const smb_ucs2_t *front,
509 const smb_ucs2_t *back)
511 bool ret = False;
512 size_t len, front_len, back_len;
514 if (!s) {
515 return False;
518 len = strlen_w(s);
520 if (front && *front) {
521 front_len = strlen_w(front);
522 while (len && strncmp_w(s, front, front_len) == 0) {
523 memmove(s, (s + front_len), (len - front_len + 1) * sizeof(smb_ucs2_t));
524 len -= front_len;
525 ret = True;
529 if (back && *back) {
530 back_len = strlen_w(back);
531 while (len && strncmp_w((s + (len - back_len)), back, back_len) == 0) {
532 s[len - back_len] = 0;
533 len -= back_len;
534 ret = True;
538 return ret;
542 The *_wa() functions take a combination of 7 bit ascii
543 and wide characters They are used so that you can use string
544 functions combining C string constants with ucs2 strings
546 The char* arguments must NOT be multibyte - to be completely sure
547 of this only pass string constants */
549 int strcmp_wa(const smb_ucs2_t *a, const char *b)
551 smb_ucs2_t cp = 0;
553 while (*b && *(COPY_UCS2_CHAR(&cp,a)) == UCS2_CHAR(*b)) {
554 a++;
555 b++;
557 return (*(COPY_UCS2_CHAR(&cp,a)) - UCS2_CHAR(*b));
560 int strncmp_wa(const smb_ucs2_t *a, const char *b, size_t len)
562 smb_ucs2_t cp = 0;
563 size_t n = 0;
565 while ((n < len) && *b && *(COPY_UCS2_CHAR(&cp,a)) == UCS2_CHAR(*b)) {
566 a++;
567 b++;
568 n++;
570 return (len - n)?(*(COPY_UCS2_CHAR(&cp,a)) - UCS2_CHAR(*b)):0;
573 smb_ucs2_t *strpbrk_wa(const smb_ucs2_t *s, const char *p)
575 smb_ucs2_t cp;
577 while (*(COPY_UCS2_CHAR(&cp,s))) {
578 int i;
579 for (i=0; p[i] && cp != UCS2_CHAR(p[i]); i++)
581 if (p[i]) {
582 return (smb_ucs2_t *)s;
584 s++;
586 return NULL;
589 smb_ucs2_t *strstr_wa(const smb_ucs2_t *s, const char *ins)
591 smb_ucs2_t *r;
592 size_t inslen;
594 if (!s || !ins) {
595 return NULL;
598 inslen = strlen(ins);
599 r = (smb_ucs2_t *)s;
601 while ((r = strchr_w(r, UCS2_CHAR(*ins)))) {
602 if (strncmp_wa(r, ins, inslen) == 0)
603 return r;
604 r++;
607 return NULL;