lib: strings: Simplify strcasecmp
[Samba.git] / lib / util / charset / util_str.c
blobd2e6cbbc620e09102e203b293f538a1e3b8bf474
1 /*
2 Unix SMB/CIFS implementation.
3 Samba utility functions
4 Copyright (C) Andrew Tridgell 1992-2001
5 Copyright (C) Simo Sorce 2001
6 Copyright (C) Andrew Bartlett 2011
7 Copyright (C) Jeremy Allison 1992-2007
8 Copyright (C) Martin Pool 2003
9 Copyright (C) James Peach 2006
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 3 of the License, or
14 (at your option) any later version.
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with this program. If not, see <http://www.gnu.org/licenses/>.
25 #include "includes.h"
26 #include "system/locale.h"
28 #ifdef strcasecmp
29 #undef strcasecmp
30 #endif
32 /**
33 Case insensitive string compararison, handle specified for testing
34 **/
35 _PUBLIC_ int strcasecmp_m_handle(struct smb_iconv_handle *iconv_handle,
36 const char *s1, const char *s2)
38 codepoint_t c1=0, c2=0;
39 size_t size1, size2;
41 /* handle null ptr comparisons to simplify the use in qsort */
42 if (s1 == s2) return 0;
43 if (s1 == NULL) return -1;
44 if (s2 == NULL) return 1;
46 while (*s1 && *s2) {
47 c1 = next_codepoint_handle(iconv_handle, s1, &size1);
48 c2 = next_codepoint_handle(iconv_handle, s2, &size2);
50 if (c1 == INVALID_CODEPOINT ||
51 c2 == INVALID_CODEPOINT) {
52 return strcasecmp(s1, s2);
55 s1 += size1;
56 s2 += size2;
58 if (c1 == c2) {
59 continue;
62 if (toupper_m(c1) != toupper_m(c2)) {
63 return c1 - c2;
67 return *s1 - *s2;
70 /**
71 Case insensitive string compararison
72 **/
73 _PUBLIC_ int strcasecmp_m(const char *s1, const char *s2)
75 struct smb_iconv_handle *iconv_handle = get_iconv_handle();
76 return strcasecmp_m_handle(iconv_handle, s1, s2);
79 /**
80 Case insensitive string compararison, length limited, handle specified for testing
81 **/
82 _PUBLIC_ int strncasecmp_m_handle(struct smb_iconv_handle *iconv_handle,
83 const char *s1, const char *s2, size_t n)
85 codepoint_t c1=0, c2=0;
86 size_t size1, size2;
88 /* handle null ptr comparisons to simplify the use in qsort */
89 if (s1 == s2) return 0;
90 if (s1 == NULL) return -1;
91 if (s2 == NULL) return 1;
93 while (*s1 && *s2 && n) {
94 n--;
96 c1 = next_codepoint_handle(iconv_handle, s1, &size1);
97 c2 = next_codepoint_handle(iconv_handle, s2, &size2);
99 if (c1 == INVALID_CODEPOINT ||
100 c2 == INVALID_CODEPOINT) {
102 * n was specified in characters,
103 * now we must convert it to bytes.
104 * As bytes are the smallest
105 * character unit, the following
106 * increment and strncasecmp is always
107 * safe.
109 * The source string was already known
110 * to be n characters long, so we are
111 * guaranteed to be able to look at the
112 * (n remaining + size1) bytes from the
113 * s1 position).
115 n += size1;
116 return strncasecmp(s1, s2, n);
119 s1 += size1;
120 s2 += size2;
122 if (c1 == c2) {
123 continue;
126 if (toupper_m(c1) != toupper_m(c2)) {
127 return c1 - c2;
131 if (n == 0) {
132 return 0;
135 return *s1 - *s2;
139 Case insensitive string compararison, length limited
141 _PUBLIC_ int strncasecmp_m(const char *s1, const char *s2, size_t n)
143 struct smb_iconv_handle *iconv_handle = get_iconv_handle();
144 return strncasecmp_m_handle(iconv_handle, s1, s2, n);
148 * Compare 2 strings.
150 * @note The comparison is case-insensitive.
152 _PUBLIC_ bool strequal_m(const char *s1, const char *s2)
154 return strcasecmp_m(s1,s2) == 0;
158 Compare 2 strings (case sensitive).
160 _PUBLIC_ bool strcsequal(const char *s1,const char *s2)
162 if (s1 == s2)
163 return true;
164 if (!s1 || !s2)
165 return false;
167 return strcmp(s1,s2) == 0;
171 * Calculate the number of units (8 or 16-bit, depending on the
172 * destination charset), that would be needed to convert the input
173 * string which is expected to be in in src_charset encoding to the
174 * destination charset (which should be a unicode charset).
176 _PUBLIC_ size_t strlen_m_ext_handle(struct smb_iconv_handle *ic,
177 const char *s, charset_t src_charset, charset_t dst_charset)
179 size_t count = 0;
181 #ifdef DEVELOPER
182 switch (dst_charset) {
183 case CH_DOS:
184 case CH_UNIX:
185 smb_panic("cannot call strlen_m_ext() with a variable dest charset (must be UTF16* or UTF8)");
186 default:
187 break;
190 switch (src_charset) {
191 case CH_UTF16LE:
192 case CH_UTF16BE:
193 smb_panic("cannot call strlen_m_ext() with a UTF16 src charset (must be DOS, UNIX, DISPLAY or UTF8)");
194 default:
195 break;
197 #endif
198 if (!s) {
199 return 0;
202 while (*s && !(((uint8_t)*s) & 0x80)) {
203 s++;
204 count++;
207 if (!*s) {
208 return count;
211 while (*s) {
212 size_t c_size;
213 codepoint_t c = next_codepoint_handle_ext(ic, s, src_charset, &c_size);
214 s += c_size;
216 switch (dst_charset) {
217 case CH_UTF16LE:
218 case CH_UTF16BE:
219 case CH_UTF16MUNGED:
220 if (c < 0x10000) {
221 /* Unicode char fits into 16 bits. */
222 count += 1;
223 } else {
224 /* Double-width unicode char - 32 bits. */
225 count += 2;
227 break;
228 case CH_UTF8:
230 * this only checks ranges, and does not
231 * check for invalid codepoints
233 if (c < 0x80) {
234 count += 1;
235 } else if (c < 0x800) {
236 count += 2;
237 } else if (c < 0x10000) {
238 count += 3;
239 } else {
240 count += 4;
242 break;
243 default:
245 * non-unicode encoding:
246 * assume that each codepoint fits into
247 * one unit in the destination encoding.
249 count += 1;
253 return count;
257 * Calculate the number of units (8 or 16-bit, depending on the
258 * destination charset), that would be needed to convert the input
259 * string which is expected to be in in src_charset encoding to the
260 * destination charset (which should be a unicode charset).
262 _PUBLIC_ size_t strlen_m_ext(const char *s, charset_t src_charset, charset_t dst_charset)
264 struct smb_iconv_handle *ic = get_iconv_handle();
265 return strlen_m_ext_handle(ic, s, src_charset, dst_charset);
268 _PUBLIC_ size_t strlen_m_ext_term(const char *s, const charset_t src_charset,
269 const charset_t dst_charset)
271 if (!s) {
272 return 0;
274 return strlen_m_ext(s, src_charset, dst_charset) + 1;
278 * Calculate the number of 16-bit units that would be needed to convert
279 * the input string which is expected to be in CH_UNIX encoding to UTF16.
281 * This will be the same as the number of bytes in a string for single
282 * byte strings, but will be different for multibyte.
284 _PUBLIC_ size_t strlen_m(const char *s)
286 return strlen_m_ext(s, CH_UNIX, CH_UTF16LE);
290 Work out the number of multibyte chars in a string, including the NULL
291 terminator.
293 _PUBLIC_ size_t strlen_m_term(const char *s)
295 if (!s) {
296 return 0;
299 return strlen_m(s) + 1;
303 * Weird helper routine for the winreg pipe: If nothing is around, return 0,
304 * if a string is there, include the terminator.
307 _PUBLIC_ size_t strlen_m_term_null(const char *s)
309 size_t len;
310 if (!s) {
311 return 0;
313 len = strlen_m(s);
314 if (len == 0) {
315 return 0;
318 return len+1;
322 Strchr and strrchr_m are a bit complex on general multi-byte strings.
324 _PUBLIC_ char *strchr_m(const char *src, char c)
326 const char *s;
327 struct smb_iconv_handle *ic = get_iconv_handle();
328 if (src == NULL) {
329 return NULL;
331 /* characters below 0x3F are guaranteed to not appear in
332 non-initial position in multi-byte charsets */
333 if ((c & 0xC0) == 0) {
334 return strchr(src, c);
337 /* this is quite a common operation, so we want it to be
338 fast. We optimise for the ascii case, knowing that all our
339 supported multi-byte character sets are ascii-compatible
340 (ie. they match for the first 128 chars) */
342 for (s = src; *s && !(((unsigned char)s[0]) & 0x80); s++) {
343 if (*s == c)
344 return discard_const_p(char, s);
347 if (!*s)
348 return NULL;
350 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
351 /* With compose characters we must restart from the beginning. JRA. */
352 s = src;
353 #endif
355 while (*s) {
356 size_t size;
357 codepoint_t c2 = next_codepoint_handle(ic, s, &size);
358 if (c2 == c) {
359 return discard_const_p(char, s);
361 s += size;
364 return NULL;
368 * Multibyte-character version of strrchr
370 _PUBLIC_ char *strrchr_m(const char *s, char c)
372 struct smb_iconv_handle *ic = get_iconv_handle();
373 char *ret = NULL;
375 if (s == NULL) {
376 return NULL;
379 /* characters below 0x3F are guaranteed to not appear in
380 non-initial position in multi-byte charsets */
381 if ((c & 0xC0) == 0) {
382 return strrchr(s, c);
385 /* this is quite a common operation, so we want it to be
386 fast. We optimise for the ascii case, knowing that all our
387 supported multi-byte character sets are ascii-compatible
388 (ie. they match for the first 128 chars). Also, in Samba
389 we only search for ascii characters in 'c' and that
390 in all mb character sets with a compound character
391 containing c, if 'c' is not a match at position
392 p, then p[-1] > 0x7f. JRA. */
395 size_t len = strlen(s);
396 const char *cp = s;
397 bool got_mb = false;
399 if (len == 0)
400 return NULL;
401 cp += (len - 1);
402 do {
403 if (c == *cp) {
404 /* Could be a match. Part of a multibyte ? */
405 if ((cp > s) &&
406 (((unsigned char)cp[-1]) & 0x80)) {
407 /* Yep - go slow :-( */
408 got_mb = true;
409 break;
411 /* No - we have a match ! */
412 return discard_const_p(char , cp);
414 } while (cp-- != s);
415 if (!got_mb)
416 return NULL;
419 while (*s) {
420 size_t size;
421 codepoint_t c2 = next_codepoint_handle(ic, s, &size);
422 if (c2 == c) {
423 ret = discard_const_p(char, s);
425 s += size;
428 return ret;
432 return True if any (multi-byte) character is lower case
434 _PUBLIC_ bool strhaslower_handle(struct smb_iconv_handle *ic,
435 const char *string)
437 while (*string) {
438 size_t c_size;
439 codepoint_t s;
440 codepoint_t t;
442 s = next_codepoint_handle(ic, string, &c_size);
443 string += c_size;
445 t = toupper_m(s);
447 if (s != t) {
448 return true; /* that means it has lower case chars */
452 return false;
455 _PUBLIC_ bool strhaslower(const char *string)
457 struct smb_iconv_handle *ic = get_iconv_handle();
458 return strhaslower_handle(ic, string);
462 return True if any (multi-byte) character is upper case
464 _PUBLIC_ bool strhasupper_handle(struct smb_iconv_handle *ic,
465 const char *string)
467 while (*string) {
468 size_t c_size;
469 codepoint_t s;
470 codepoint_t t;
472 s = next_codepoint_handle(ic, string, &c_size);
473 string += c_size;
475 t = tolower_m(s);
477 if (s != t) {
478 return true; /* that means it has upper case chars */
482 return false;
485 _PUBLIC_ bool strhasupper(const char *string)
487 struct smb_iconv_handle *ic = get_iconv_handle();
488 return strhasupper_handle(ic, string);
491 /***********************************************************************
492 strstr_m - We convert via ucs2 for now.
493 ***********************************************************************/
495 char *strstr_m(const char *src, const char *findstr)
497 smb_ucs2_t *p;
498 smb_ucs2_t *src_w, *find_w;
499 const char *s;
500 char *s2;
501 char *retp;
502 size_t converted_size, findstr_len = 0;
504 TALLOC_CTX *frame; /* Only set up in the iconv case */
506 /* for correctness */
507 if (!findstr[0]) {
508 return discard_const_p(char, src);
511 /* Samba does single character findstr calls a *lot*. */
512 if (findstr[1] == '\0')
513 return strchr_m(src, *findstr);
515 /* We optimise for the ascii case, knowing that all our
516 supported multi-byte character sets are ascii-compatible
517 (ie. they match for the first 128 chars) */
519 for (s = src; *s && !(((unsigned char)s[0]) & 0x80); s++) {
520 if (*s == *findstr) {
521 if (!findstr_len)
522 findstr_len = strlen(findstr);
524 if (strncmp(s, findstr, findstr_len) == 0) {
525 return discard_const_p(char, s);
530 if (!*s)
531 return NULL;
533 #if 1 /* def BROKEN_UNICODE_COMPOSE_CHARACTERS */
534 /* 'make check' fails unless we do this */
536 /* With compose characters we must restart from the beginning. JRA. */
537 s = src;
538 #endif
540 frame = talloc_stackframe();
542 if (!push_ucs2_talloc(frame, &src_w, src, &converted_size)) {
543 DEBUG(0,("strstr_m: src malloc fail\n"));
544 TALLOC_FREE(frame);
545 return NULL;
548 if (!push_ucs2_talloc(frame, &find_w, findstr, &converted_size)) {
549 DEBUG(0,("strstr_m: find malloc fail\n"));
550 TALLOC_FREE(frame);
551 return NULL;
554 p = strstr_w(src_w, find_w);
556 if (!p) {
557 TALLOC_FREE(frame);
558 return NULL;
561 *p = 0;
562 if (!pull_ucs2_talloc(frame, &s2, src_w, &converted_size)) {
563 TALLOC_FREE(frame);
564 DEBUG(0,("strstr_m: dest malloc fail\n"));
565 return NULL;
567 retp = discard_const_p(char, (s+strlen(s2)));
568 TALLOC_FREE(frame);
569 return retp;