2 Unix SMB/CIFS implementation.
3 Samba utility functions
4 Copyright (C) Andrew Tridgell 1992-2001
5 Copyright (C) Simo Sorce 2001
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 #include "system/locale.h"
24 #include "dynconfig.h"
28 * @brief Unicode string manipulation
31 /* these 2 tables define the unicode case handling. They are loaded
32 at startup either via mmap() or read() from the lib directory */
33 static void *upcase_table
;
34 static void *lowcase_table
;
37 /*******************************************************************
38 load the case handling tables
39 ********************************************************************/
40 static void load_case_tables(void)
44 mem_ctx
= talloc_init("load_case_tables");
46 smb_panic("No memory for case_tables");
48 upcase_table
= map_file(talloc_asprintf(mem_ctx
, "%s/upcase.dat", dyn_DATADIR
), 0x20000);
49 lowcase_table
= map_file(talloc_asprintf(mem_ctx
, "%s/lowcase.dat", dyn_DATADIR
), 0x20000);
51 if (upcase_table
== NULL
) {
52 /* try also under codepages for testing purposes */
53 upcase_table
= map_file("codepages/upcase.dat", 0x20000);
54 if (upcase_table
== NULL
) {
55 upcase_table
= (void *)-1;
58 if (lowcase_table
== NULL
) {
59 /* try also under codepages for testing purposes */
60 lowcase_table
= map_file("codepages/lowcase.dat", 0x20000);
61 if (lowcase_table
== NULL
) {
62 lowcase_table
= (void *)-1;
68 Convert a codepoint_t to upper case.
70 codepoint_t
toupper_w(codepoint_t val
)
75 if (upcase_table
== NULL
) {
78 if (upcase_table
== (void *)-1) {
81 if (val
& 0xFFFF0000) {
84 return SVAL(upcase_table
, val
*2);
88 Convert a codepoint_t to lower case.
90 codepoint_t
tolower_w(codepoint_t val
)
95 if (lowcase_table
== NULL
) {
98 if (lowcase_table
== (void *)-1) {
101 if (val
& 0xFFFF0000) {
104 return SVAL(lowcase_table
, val
*2);
108 compare two codepoints case insensitively
110 int codepoint_cmpi(codepoint_t c1
, codepoint_t c2
)
113 toupper_w(c1
) == toupper_w(c2
)) {
120 Case insensitive string compararison
122 _PUBLIC_
int strcasecmp_m(const char *s1
, const char *s2
)
124 codepoint_t c1
=0, c2
=0;
127 /* handle null ptr comparisons to simplify the use in qsort */
128 if (s1
== s2
) return 0;
129 if (s1
== NULL
) return -1;
130 if (s2
== NULL
) return 1;
133 c1
= next_codepoint(s1
, &size1
);
134 c2
= next_codepoint(s2
, &size2
);
143 if (c1
== INVALID_CODEPOINT
||
144 c2
== INVALID_CODEPOINT
) {
145 /* what else can we do?? */
146 return strcasecmp(s1
, s2
);
149 if (toupper_w(c1
) != toupper_w(c2
)) {
158 * Get the next token from a string, return False if none found.
159 * Handles double-quotes.
161 * Based on a routine by GJC@VILLAGE.COM.
162 * Extensively modified by Andrew.Tridgell@anu.edu.au
164 _PUBLIC_ BOOL
next_token(const char **ptr
,char *buff
, const char *sep
, size_t bufsize
)
175 /* default to simple separators */
179 /* find the first non sep char */
180 while (*s
&& strchr_m(sep
,*s
))
187 /* copy over the token */
188 for (quoted
= False
; len
< bufsize
&& *s
&& (quoted
|| !strchr_m(sep
,*s
)); s
++) {
197 *ptr
= (*s
) ? s
+1 : s
;
204 Case insensitive string compararison, length limited
206 _PUBLIC_
int strncasecmp_m(const char *s1
, const char *s2
, size_t n
)
208 codepoint_t c1
=0, c2
=0;
211 /* handle null ptr comparisons to simplify the use in qsort */
212 if (s1
== s2
) return 0;
213 if (s1
== NULL
) return -1;
214 if (s2
== NULL
) return 1;
216 while (*s1
&& *s2
&& n
) {
219 c1
= next_codepoint(s1
, &size1
);
220 c2
= next_codepoint(s2
, &size2
);
229 if (c1
== INVALID_CODEPOINT
||
230 c2
== INVALID_CODEPOINT
) {
231 /* what else can we do?? */
232 return strcasecmp(s1
, s2
);
235 if (toupper_w(c1
) != toupper_w(c2
)) {
250 * @note The comparison is case-insensitive.
252 _PUBLIC_ BOOL
strequal_w(const char *s1
, const char *s2
)
254 return strcasecmp_m(s1
,s2
) == 0;
258 Compare 2 strings (case sensitive).
260 _PUBLIC_ BOOL
strcsequal_w(const char *s1
,const char *s2
)
267 return strcmp(s1
,s2
) == 0;
273 NOTE: oldc and newc must be 7 bit characters
275 _PUBLIC_
void string_replace_w(char *s
, char oldc
, char newc
)
279 codepoint_t c
= next_codepoint(s
, &size
);
288 Paranoid strcpy into a buffer of given length (includes terminating
289 zero. Strips out all but 'a-Z0-9' and the character in other_safe_chars
290 and replaces with '_'. Deliberately does *NOT* check for multibyte
291 characters. Don't change it !
294 _PUBLIC_
char *alpha_strcpy(char *dest
, const char *src
, const char *other_safe_chars
, size_t maxlength
)
298 if (maxlength
== 0) {
299 /* can't fit any bytes at all! */
304 DEBUG(0,("ERROR: NULL dest in alpha_strcpy\n"));
314 if (len
>= maxlength
)
317 if (!other_safe_chars
)
318 other_safe_chars
= "";
320 for(i
= 0; i
< len
; i
++) {
321 int val
= (src
[i
] & 0xff);
322 if (isupper(val
) || islower(val
) || isdigit(val
) || strchr_m(other_safe_chars
, val
))
334 Count the number of UCS2 characters in a string. Normally this will
335 be the same as the number of bytes in a string for single byte strings,
336 but will be different for multibyte.
338 _PUBLIC_
size_t strlen_m(const char *s
)
346 while (*s
&& !(((uint8_t)*s
) & 0x80)) {
357 codepoint_t c
= next_codepoint(s
, &c_size
);
370 Work out the number of multibyte chars in a string, including the NULL
373 _PUBLIC_
size_t strlen_m_term(const char *s
)
379 return strlen_m(s
) + 1;
383 Strchr and strrchr_m are a bit complex on general multi-byte strings.
385 _PUBLIC_
char *strchr_m(const char *s
, char c
)
387 /* characters below 0x3F are guaranteed to not appear in
388 non-initial position in multi-byte charsets */
389 if ((c
& 0xC0) == 0) {
395 codepoint_t c2
= next_codepoint(s
, &size
);
397 return discard_const(s
);
406 * Multibyte-character version of strrchr
408 _PUBLIC_
char *strrchr_m(const char *s
, char c
)
412 /* characters below 0x3F are guaranteed to not appear in
413 non-initial position in multi-byte charsets */
414 if ((c
& 0xC0) == 0) {
415 return strrchr(s
, c
);
420 codepoint_t c2
= next_codepoint(s
, &size
);
422 ret
= discard_const(s
);
431 return True if any (multi-byte) character is lower case
433 _PUBLIC_ BOOL
strhaslower(const char *string
)
440 s
= next_codepoint(string
, &c_size
);
446 return True
; /* that means it has lower case chars */
454 return True if any (multi-byte) character is upper case
456 _PUBLIC_ BOOL
strhasupper(const char *string
)
463 s
= next_codepoint(string
, &c_size
);
469 return True
; /* that means it has upper case chars */
477 Convert a string to lower case, allocated with talloc
479 _PUBLIC_
char *strlower_talloc(TALLOC_CTX
*ctx
, const char *src
)
484 /* this takes advantage of the fact that upper/lower can't
485 change the length of a character by more than 1 byte */
486 dest
= talloc_size(ctx
, 2*(strlen(src
))+1);
493 codepoint_t c
= next_codepoint(src
, &c_size
);
498 c_size
= push_codepoint(dest
+size
, c
);
508 /* trim it so talloc_append_string() works */
509 dest
= talloc_realloc_size(ctx
, dest
, size
+1);
515 Convert a string to UPPER case, allocated with talloc
517 _PUBLIC_
char *strupper_talloc(TALLOC_CTX
*ctx
, const char *src
)
526 /* this takes advantage of the fact that upper/lower can't
527 change the length of a character by more than 1 byte */
528 dest
= talloc_size(ctx
, 2*(strlen(src
))+1);
535 codepoint_t c
= next_codepoint(src
, &c_size
);
540 c_size
= push_codepoint(dest
+size
, c
);
550 /* trim it so talloc_append_string() works */
551 dest
= talloc_realloc_size(ctx
, dest
, size
+1);
557 Convert a string to lower case.
559 _PUBLIC_
void strlower_m(char *s
)
563 /* this is quite a common operation, so we want it to be
564 fast. We optimise for the ascii case, knowing that all our
565 supported multi-byte character sets are ascii-compatible
566 (ie. they match for the first 128 chars) */
567 while (*s
&& !(((uint8_t)*s
) & 0x80)) {
568 *s
= tolower((uint8_t)*s
);
578 size_t c_size
, c_size2
;
579 codepoint_t c
= next_codepoint(s
, &c_size
);
580 c_size2
= push_codepoint(d
, tolower_w(c
));
581 if (c_size2
> c_size
) {
582 DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strlower_m\n",
583 c
, tolower_w(c
), (int)c_size
, (int)c_size2
));
584 smb_panic("codepoint expansion in strlower_m\n");
593 Convert a string to UPPER case.
595 _PUBLIC_
void strupper_m(char *s
)
599 /* this is quite a common operation, so we want it to be
600 fast. We optimise for the ascii case, knowing that all our
601 supported multi-byte character sets are ascii-compatible
602 (ie. they match for the first 128 chars) */
603 while (*s
&& !(((uint8_t)*s
) & 0x80)) {
604 *s
= toupper((uint8_t)*s
);
614 size_t c_size
, c_size2
;
615 codepoint_t c
= next_codepoint(s
, &c_size
);
616 c_size2
= push_codepoint(d
, toupper_w(c
));
617 if (c_size2
> c_size
) {
618 DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strupper_m\n",
619 c
, toupper_w(c
), (int)c_size
, (int)c_size2
));
620 smb_panic("codepoint expansion in strupper_m\n");
630 Find the number of 'c' chars in a string
632 _PUBLIC_
size_t count_chars_w(const char *s
, char c
)
638 codepoint_t c2
= next_codepoint(s
, &size
);
639 if (c2
== c
) count
++;