2 Unix SMB/CIFS implementation.
3 Samba utility functions
4 Copyright (C) Andrew Tridgell 1992-2001
5 Copyright (C) Simo Sorce 2001
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 #include "system/locale.h"
24 #include "dynconfig.h"
28 * @brief Unicode string manipulation
31 /* these 2 tables define the unicode case handling. They are loaded
32 at startup either via mmap() or read() from the lib directory */
33 static void *upcase_table
;
34 static void *lowcase_table
;
37 /*******************************************************************
38 load the case handling tables
39 ********************************************************************/
40 static void load_case_tables(void)
44 mem_ctx
= talloc_init("load_case_tables");
46 smb_panic("No memory for case_tables");
48 upcase_table
= map_file(talloc_asprintf(mem_ctx
, "%s/upcase.dat", dyn_DATADIR
), 0x20000);
49 lowcase_table
= map_file(talloc_asprintf(mem_ctx
, "%s/lowcase.dat", dyn_DATADIR
), 0x20000);
51 if (upcase_table
== NULL
) {
52 /* try also under codepages for testing purposes */
53 upcase_table
= map_file("codepages/upcase.dat", 0x20000);
54 if (upcase_table
== NULL
) {
55 upcase_table
= (void *)-1;
58 if (lowcase_table
== NULL
) {
59 /* try also under codepages for testing purposes */
60 lowcase_table
= map_file("codepages/lowcase.dat", 0x20000);
61 if (lowcase_table
== NULL
) {
62 lowcase_table
= (void *)-1;
68 Convert a codepoint_t to upper case.
70 codepoint_t
toupper_w(codepoint_t val
)
75 if (upcase_table
== NULL
) {
78 if (upcase_table
== (void *)-1) {
81 if (val
& 0xFFFF0000) {
84 return SVAL(upcase_table
, val
*2);
88 Convert a codepoint_t to lower case.
90 codepoint_t
tolower_w(codepoint_t val
)
95 if (lowcase_table
== NULL
) {
98 if (lowcase_table
== (void *)-1) {
101 if (val
& 0xFFFF0000) {
104 return SVAL(lowcase_table
, val
*2);
108 compare two codepoints case insensitively
110 int codepoint_cmpi(codepoint_t c1
, codepoint_t c2
)
113 toupper_w(c1
) == toupper_w(c2
)) {
120 Case insensitive string compararison
122 _PUBLIC_
int strcasecmp_m(const char *s1
, const char *s2
)
124 codepoint_t c1
=0, c2
=0;
127 /* handle null ptr comparisons to simplify the use in qsort */
128 if (s1
== s2
) return 0;
129 if (s1
== NULL
) return -1;
130 if (s2
== NULL
) return 1;
133 c1
= next_codepoint(s1
, &size1
);
134 c2
= next_codepoint(s2
, &size2
);
143 if (c1
== INVALID_CODEPOINT
||
144 c2
== INVALID_CODEPOINT
) {
145 /* what else can we do?? */
146 return strcasecmp(s1
, s2
);
149 if (toupper_w(c1
) != toupper_w(c2
)) {
158 * Get the next token from a string, return False if none found.
159 * Handles double-quotes.
161 * Based on a routine by GJC@VILLAGE.COM.
162 * Extensively modified by Andrew.Tridgell@anu.edu.au
164 _PUBLIC_ BOOL
next_token(const char **ptr
,char *buff
, const char *sep
, size_t bufsize
)
175 /* default to simple separators */
179 /* find the first non sep char */
180 while (*s
&& strchr_m(sep
,*s
))
187 /* copy over the token */
188 for (quoted
= False
; len
< bufsize
&& *s
&& (quoted
|| !strchr_m(sep
,*s
)); s
++) {
197 *ptr
= (*s
) ? s
+1 : s
;
204 Case insensitive string compararison, length limited
206 _PUBLIC_
int strncasecmp_m(const char *s1
, const char *s2
, size_t n
)
208 codepoint_t c1
=0, c2
=0;
211 /* handle null ptr comparisons to simplify the use in qsort */
212 if (s1
== s2
) return 0;
213 if (s1
== NULL
) return -1;
214 if (s2
== NULL
) return 1;
216 while (*s1
&& *s2
&& n
) {
219 c1
= next_codepoint(s1
, &size1
);
220 c2
= next_codepoint(s2
, &size2
);
229 if (c1
== INVALID_CODEPOINT
||
230 c2
== INVALID_CODEPOINT
) {
231 /* what else can we do?? */
232 return strcasecmp(s1
, s2
);
235 if (toupper_w(c1
) != toupper_w(c2
)) {
250 * @note The comparison is case-insensitive.
252 _PUBLIC_ BOOL
strequal_w(const char *s1
, const char *s2
)
259 return strcasecmp_m(s1
,s2
) == 0;
263 Compare 2 strings (case sensitive).
265 _PUBLIC_ BOOL
strcsequal_w(const char *s1
,const char *s2
)
272 return strcmp(s1
,s2
) == 0;
278 NOTE: oldc and newc must be 7 bit characters
280 _PUBLIC_
void string_replace_w(char *s
, char oldc
, char newc
)
282 for (; s
&& *s
; s
++) {
284 codepoint_t c
= next_codepoint(s
, &size
);
293 Paranoid strcpy into a buffer of given length (includes terminating
294 zero. Strips out all but 'a-Z0-9' and the character in other_safe_chars
295 and replaces with '_'. Deliberately does *NOT* check for multibyte
296 characters. Don't change it !
299 _PUBLIC_
char *alpha_strcpy(char *dest
, const char *src
, const char *other_safe_chars
, size_t maxlength
)
303 if (maxlength
== 0) {
304 /* can't fit any bytes at all! */
309 DEBUG(0,("ERROR: NULL dest in alpha_strcpy\n"));
319 if (len
>= maxlength
)
322 if (!other_safe_chars
)
323 other_safe_chars
= "";
325 for(i
= 0; i
< len
; i
++) {
326 int val
= (src
[i
] & 0xff);
327 if (isupper(val
) || islower(val
) || isdigit(val
) || strchr_m(other_safe_chars
, val
))
339 Count the number of UCS2 characters in a string. Normally this will
340 be the same as the number of bytes in a string for single byte strings,
341 but will be different for multibyte.
343 _PUBLIC_
size_t strlen_m(const char *s
)
351 while (*s
&& !(((uint8_t)*s
) & 0x80)) {
362 codepoint_t c
= next_codepoint(s
, &c_size
);
375 Work out the number of multibyte chars in a string, including the NULL
378 _PUBLIC_
size_t strlen_m_term(const char *s
)
384 return strlen_m(s
) + 1;
388 Strchr and strrchr_m are a bit complex on general multi-byte strings.
390 _PUBLIC_
char *strchr_m(const char *s
, char c
)
392 /* characters below 0x3F are guaranteed to not appear in
393 non-initial position in multi-byte charsets */
394 if ((c
& 0xC0) == 0) {
400 codepoint_t c2
= next_codepoint(s
, &size
);
402 return discard_const(s
);
411 * Multibyte-character version of strrchr
413 _PUBLIC_
char *strrchr_m(const char *s
, char c
)
417 /* characters below 0x3F are guaranteed to not appear in
418 non-initial position in multi-byte charsets */
419 if ((c
& 0xC0) == 0) {
420 return strrchr(s
, c
);
425 codepoint_t c2
= next_codepoint(s
, &size
);
427 ret
= discard_const(s
);
436 return True if any (multi-byte) character is lower case
438 _PUBLIC_ BOOL
strhaslower(const char *string
)
445 s
= next_codepoint(string
, &c_size
);
451 return True
; /* that means it has lower case chars */
459 return True if any (multi-byte) character is upper case
461 _PUBLIC_ BOOL
strhasupper(const char *string
)
468 s
= next_codepoint(string
, &c_size
);
474 return True
; /* that means it has upper case chars */
482 Convert a string to lower case, allocated with talloc
484 _PUBLIC_
char *strlower_talloc(TALLOC_CTX
*ctx
, const char *src
)
489 /* this takes advantage of the fact that upper/lower can't
490 change the length of a character by more than 1 byte */
491 dest
= talloc_size(ctx
, 2*(strlen(src
))+1);
498 codepoint_t c
= next_codepoint(src
, &c_size
);
503 c_size
= push_codepoint(dest
+size
, c
);
517 Convert a string to UPPER case, allocated with talloc
519 _PUBLIC_
char *strupper_talloc(TALLOC_CTX
*ctx
, const char *src
)
528 /* this takes advantage of the fact that upper/lower can't
529 change the length of a character by more than 1 byte */
530 dest
= talloc_size(ctx
, 2*(strlen(src
))+1);
537 codepoint_t c
= next_codepoint(src
, &c_size
);
542 c_size
= push_codepoint(dest
+size
, c
);
556 Convert a string to lower case.
558 _PUBLIC_
void strlower_m(char *s
)
562 /* this is quite a common operation, so we want it to be
563 fast. We optimise for the ascii case, knowing that all our
564 supported multi-byte character sets are ascii-compatible
565 (ie. they match for the first 128 chars) */
566 while (*s
&& !(((uint8_t)*s
) & 0x80)) {
567 *s
= tolower((uint8_t)*s
);
577 size_t c_size
, c_size2
;
578 codepoint_t c
= next_codepoint(s
, &c_size
);
579 c_size2
= push_codepoint(d
, tolower_w(c
));
580 if (c_size2
> c_size
) {
581 DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strlower_m\n",
582 c
, tolower_w(c
), (int)c_size
, (int)c_size2
));
583 smb_panic("codepoint expansion in strlower_m\n");
592 Convert a string to UPPER case.
594 _PUBLIC_
void strupper_m(char *s
)
598 /* this is quite a common operation, so we want it to be
599 fast. We optimise for the ascii case, knowing that all our
600 supported multi-byte character sets are ascii-compatible
601 (ie. they match for the first 128 chars) */
602 while (*s
&& !(((uint8_t)*s
) & 0x80)) {
603 *s
= toupper((uint8_t)*s
);
613 size_t c_size
, c_size2
;
614 codepoint_t c
= next_codepoint(s
, &c_size
);
615 c_size2
= push_codepoint(d
, toupper_w(c
));
616 if (c_size2
> c_size
) {
617 DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strupper_m\n",
618 c
, toupper_w(c
), (int)c_size
, (int)c_size2
));
619 smb_panic("codepoint expansion in strupper_m\n");
629 Find the number of 'c' chars in a string
631 _PUBLIC_
size_t count_chars_w(const char *s
, char c
)
637 codepoint_t c2
= next_codepoint(s
, &size
);
638 if (c2
== c
) count
++;