s3: Fix some nonempty line endings
[Samba/gebeck_regimport.git] / lib / util / charset / util_str.c
blob688ab5a0a1c11b8b1a02a421060eb0d8767c50e9
1 /*
2 Unix SMB/CIFS implementation.
3 Samba utility functions
4 Copyright (C) Andrew Tridgell 1992-2001
5 Copyright (C) Simo Sorce 2001
6 Copyright (C) Andrew Bartlett 2011
7 Copyright (C) Jeremy Allison 1992-2007
8 Copyright (C) Martin Pool 2003
9 Copyright (C) James Peach 2006
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 3 of the License, or
14 (at your option) any later version.
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with this program. If not, see <http://www.gnu.org/licenses/>.
25 #include "includes.h"
26 #include "system/locale.h"
28 #ifdef strcasecmp
29 #undef strcasecmp
30 #endif
32 /**
33 Case insensitive string compararison, handle specified for testing
34 **/
35 _PUBLIC_ int strcasecmp_m_handle(struct smb_iconv_handle *iconv_handle,
36 const char *s1, const char *s2)
38 codepoint_t c1=0, c2=0;
39 size_t size1, size2;
41 /* handle null ptr comparisons to simplify the use in qsort */
42 if (s1 == s2) return 0;
43 if (s1 == NULL) return -1;
44 if (s2 == NULL) return 1;
46 while (*s1 && *s2) {
47 c1 = next_codepoint_handle(iconv_handle, s1, &size1);
48 c2 = next_codepoint_handle(iconv_handle, s2, &size2);
50 s1 += size1;
51 s2 += size2;
53 if (c1 == c2) {
54 continue;
57 if (c1 == INVALID_CODEPOINT ||
58 c2 == INVALID_CODEPOINT) {
59 /* what else can we do?? */
60 return strcasecmp(s1, s2);
63 if (toupper_m(c1) != toupper_m(c2)) {
64 return c1 - c2;
68 return *s1 - *s2;
71 /**
72 Case insensitive string compararison
73 **/
74 _PUBLIC_ int strcasecmp_m(const char *s1, const char *s2)
76 struct smb_iconv_handle *iconv_handle = get_iconv_handle();
77 return strcasecmp_m_handle(iconv_handle, s1, s2);
80 /**
81 Case insensitive string compararison, length limited, handle specified for testing
82 **/
83 _PUBLIC_ int strncasecmp_m_handle(struct smb_iconv_handle *iconv_handle,
84 const char *s1, const char *s2, size_t n)
86 codepoint_t c1=0, c2=0;
87 size_t size1, size2;
89 /* handle null ptr comparisons to simplify the use in qsort */
90 if (s1 == s2) return 0;
91 if (s1 == NULL) return -1;
92 if (s2 == NULL) return 1;
94 while (*s1 && *s2 && n) {
95 n--;
97 c1 = next_codepoint_handle(iconv_handle, s1, &size1);
98 c2 = next_codepoint_handle(iconv_handle, s2, &size2);
100 s1 += size1;
101 s2 += size2;
103 if (c1 == c2) {
104 continue;
107 if (c1 == INVALID_CODEPOINT ||
108 c2 == INVALID_CODEPOINT) {
109 /* what else can we do?? */
110 return strcasecmp(s1, s2);
113 if (toupper_m(c1) != toupper_m(c2)) {
114 return c1 - c2;
118 if (n == 0) {
119 return 0;
122 return *s1 - *s2;
126 Case insensitive string compararison, length limited
128 _PUBLIC_ int strncasecmp_m(const char *s1, const char *s2, size_t n)
130 struct smb_iconv_handle *iconv_handle = get_iconv_handle();
131 return strncasecmp_m_handle(iconv_handle, s1, s2, n);
135 * Compare 2 strings.
137 * @note The comparison is case-insensitive.
139 _PUBLIC_ bool strequal_m(const char *s1, const char *s2)
141 return strcasecmp_m(s1,s2) == 0;
145 Compare 2 strings (case sensitive).
147 _PUBLIC_ bool strcsequal(const char *s1,const char *s2)
149 if (s1 == s2)
150 return true;
151 if (!s1 || !s2)
152 return false;
154 return strcmp(s1,s2) == 0;
158 * Calculate the number of units (8 or 16-bit, depending on the
159 * destination charset), that would be needed to convert the input
160 * string which is expected to be in in src_charset encoding to the
161 * destination charset (which should be a unicode charset).
163 _PUBLIC_ size_t strlen_m_ext_handle(struct smb_iconv_handle *ic,
164 const char *s, charset_t src_charset, charset_t dst_charset)
166 size_t count = 0;
168 #ifdef DEVELOPER
169 switch (dst_charset) {
170 case CH_DOS:
171 case CH_UNIX:
172 smb_panic("cannot call strlen_m_ext() with a variable dest charset (must be UTF16* or UTF8)");
173 default:
174 break;
177 switch (src_charset) {
178 case CH_UTF16LE:
179 case CH_UTF16BE:
180 smb_panic("cannot call strlen_m_ext() with a UTF16 src charset (must be DOS, UNIX, DISPLAY or UTF8)");
181 default:
182 break;
184 #endif
185 if (!s) {
186 return 0;
189 while (*s && !(((uint8_t)*s) & 0x80)) {
190 s++;
191 count++;
194 if (!*s) {
195 return count;
198 while (*s) {
199 size_t c_size;
200 codepoint_t c = next_codepoint_handle_ext(ic, s, src_charset, &c_size);
201 s += c_size;
203 switch (dst_charset) {
204 case CH_UTF16LE:
205 case CH_UTF16BE:
206 case CH_UTF16MUNGED:
207 if (c < 0x10000) {
208 /* Unicode char fits into 16 bits. */
209 count += 1;
210 } else {
211 /* Double-width unicode char - 32 bits. */
212 count += 2;
214 break;
215 case CH_UTF8:
217 * this only checks ranges, and does not
218 * check for invalid codepoints
220 if (c < 0x80) {
221 count += 1;
222 } else if (c < 0x800) {
223 count += 2;
224 } else if (c < 0x10000) {
225 count += 3;
226 } else {
227 count += 4;
229 break;
230 default:
232 * non-unicode encoding:
233 * assume that each codepoint fits into
234 * one unit in the destination encoding.
236 count += 1;
240 return count;
244 * Calculate the number of units (8 or 16-bit, depending on the
245 * destination charset), that would be needed to convert the input
246 * string which is expected to be in in src_charset encoding to the
247 * destination charset (which should be a unicode charset).
249 _PUBLIC_ size_t strlen_m_ext(const char *s, charset_t src_charset, charset_t dst_charset)
251 struct smb_iconv_handle *ic = get_iconv_handle();
252 return strlen_m_ext_handle(ic, s, src_charset, dst_charset);
255 _PUBLIC_ size_t strlen_m_ext_term(const char *s, const charset_t src_charset,
256 const charset_t dst_charset)
258 if (!s) {
259 return 0;
261 return strlen_m_ext(s, src_charset, dst_charset) + 1;
265 * Calculate the number of 16-bit units that would be needed to convert
266 * the input string which is expected to be in CH_UNIX encoding to UTF16.
268 * This will be the same as the number of bytes in a string for single
269 * byte strings, but will be different for multibyte.
271 _PUBLIC_ size_t strlen_m(const char *s)
273 return strlen_m_ext(s, CH_UNIX, CH_UTF16LE);
277 Work out the number of multibyte chars in a string, including the NULL
278 terminator.
280 _PUBLIC_ size_t strlen_m_term(const char *s)
282 if (!s) {
283 return 0;
286 return strlen_m(s) + 1;
290 * Weird helper routine for the winreg pipe: If nothing is around, return 0,
291 * if a string is there, include the terminator.
294 _PUBLIC_ size_t strlen_m_term_null(const char *s)
296 size_t len;
297 if (!s) {
298 return 0;
300 len = strlen_m(s);
301 if (len == 0) {
302 return 0;
305 return len+1;
309 Strchr and strrchr_m are a bit complex on general multi-byte strings.
311 _PUBLIC_ char *strchr_m(const char *src, char c)
313 const char *s;
314 struct smb_iconv_handle *ic = get_iconv_handle();
315 if (src == NULL) {
316 return NULL;
318 /* characters below 0x3F are guaranteed to not appear in
319 non-initial position in multi-byte charsets */
320 if ((c & 0xC0) == 0) {
321 return strchr(src, c);
324 /* this is quite a common operation, so we want it to be
325 fast. We optimise for the ascii case, knowing that all our
326 supported multi-byte character sets are ascii-compatible
327 (ie. they match for the first 128 chars) */
329 for (s = src; *s && !(((unsigned char)s[0]) & 0x80); s++) {
330 if (*s == c)
331 return discard_const_p(char, s);
334 if (!*s)
335 return NULL;
337 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
338 /* With compose characters we must restart from the beginning. JRA. */
339 s = src;
340 #endif
342 while (*s) {
343 size_t size;
344 codepoint_t c2 = next_codepoint_handle(ic, s, &size);
345 if (c2 == c) {
346 return discard_const_p(char, s);
348 s += size;
351 return NULL;
355 * Multibyte-character version of strrchr
357 _PUBLIC_ char *strrchr_m(const char *s, char c)
359 struct smb_iconv_handle *ic = get_iconv_handle();
360 char *ret = NULL;
362 if (s == NULL) {
363 return NULL;
366 /* characters below 0x3F are guaranteed to not appear in
367 non-initial position in multi-byte charsets */
368 if ((c & 0xC0) == 0) {
369 return strrchr(s, c);
372 /* this is quite a common operation, so we want it to be
373 fast. We optimise for the ascii case, knowing that all our
374 supported multi-byte character sets are ascii-compatible
375 (ie. they match for the first 128 chars). Also, in Samba
376 we only search for ascii characters in 'c' and that
377 in all mb character sets with a compound character
378 containing c, if 'c' is not a match at position
379 p, then p[-1] > 0x7f. JRA. */
382 size_t len = strlen(s);
383 const char *cp = s;
384 bool got_mb = false;
386 if (len == 0)
387 return NULL;
388 cp += (len - 1);
389 do {
390 if (c == *cp) {
391 /* Could be a match. Part of a multibyte ? */
392 if ((cp > s) &&
393 (((unsigned char)cp[-1]) & 0x80)) {
394 /* Yep - go slow :-( */
395 got_mb = true;
396 break;
398 /* No - we have a match ! */
399 return discard_const_p(char , cp);
401 } while (cp-- != s);
402 if (!got_mb)
403 return NULL;
406 while (*s) {
407 size_t size;
408 codepoint_t c2 = next_codepoint_handle(ic, s, &size);
409 if (c2 == c) {
410 ret = discard_const_p(char, s);
412 s += size;
415 return ret;
419 return True if any (multi-byte) character is lower case
421 _PUBLIC_ bool strhaslower_handle(struct smb_iconv_handle *ic,
422 const char *string)
424 while (*string) {
425 size_t c_size;
426 codepoint_t s;
427 codepoint_t t;
429 s = next_codepoint_handle(ic, string, &c_size);
430 string += c_size;
432 t = toupper_m(s);
434 if (s != t) {
435 return true; /* that means it has lower case chars */
439 return false;
442 _PUBLIC_ bool strhaslower(const char *string)
444 struct smb_iconv_handle *ic = get_iconv_handle();
445 return strhaslower_handle(ic, string);
449 return True if any (multi-byte) character is upper case
451 _PUBLIC_ bool strhasupper_handle(struct smb_iconv_handle *ic,
452 const char *string)
454 while (*string) {
455 size_t c_size;
456 codepoint_t s;
457 codepoint_t t;
459 s = next_codepoint_handle(ic, string, &c_size);
460 string += c_size;
462 t = tolower_m(s);
464 if (s != t) {
465 return true; /* that means it has upper case chars */
469 return false;
472 _PUBLIC_ bool strhasupper(const char *string)
474 struct smb_iconv_handle *ic = get_iconv_handle();
475 return strhasupper_handle(ic, string);
478 /***********************************************************************
479 strstr_m - We convert via ucs2 for now.
480 ***********************************************************************/
482 char *strstr_m(const char *src, const char *findstr)
484 smb_ucs2_t *p;
485 smb_ucs2_t *src_w, *find_w;
486 const char *s;
487 char *s2;
488 char *retp;
489 size_t converted_size, findstr_len = 0;
491 TALLOC_CTX *frame; /* Only set up in the iconv case */
493 /* for correctness */
494 if (!findstr[0]) {
495 return discard_const_p(char, src);
498 /* Samba does single character findstr calls a *lot*. */
499 if (findstr[1] == '\0')
500 return strchr_m(src, *findstr);
502 /* We optimise for the ascii case, knowing that all our
503 supported multi-byte character sets are ascii-compatible
504 (ie. they match for the first 128 chars) */
506 for (s = src; *s && !(((unsigned char)s[0]) & 0x80); s++) {
507 if (*s == *findstr) {
508 if (!findstr_len)
509 findstr_len = strlen(findstr);
511 if (strncmp(s, findstr, findstr_len) == 0) {
512 return discard_const_p(char, s);
517 if (!*s)
518 return NULL;
520 #if 1 /* def BROKEN_UNICODE_COMPOSE_CHARACTERS */
521 /* 'make check' fails unless we do this */
523 /* With compose characters we must restart from the beginning. JRA. */
524 s = src;
525 #endif
527 frame = talloc_stackframe();
529 if (!push_ucs2_talloc(frame, &src_w, src, &converted_size)) {
530 DEBUG(0,("strstr_m: src malloc fail\n"));
531 TALLOC_FREE(frame);
532 return NULL;
535 if (!push_ucs2_talloc(frame, &find_w, findstr, &converted_size)) {
536 DEBUG(0,("strstr_m: find malloc fail\n"));
537 TALLOC_FREE(frame);
538 return NULL;
541 p = strstr_w(src_w, find_w);
543 if (!p) {
544 TALLOC_FREE(frame);
545 return NULL;
548 *p = 0;
549 if (!pull_ucs2_talloc(frame, &s2, src_w, &converted_size)) {
550 TALLOC_FREE(frame);
551 DEBUG(0,("strstr_m: dest malloc fail\n"));
552 return NULL;
554 retp = discard_const_p(char, (s+strlen(s2)));
555 TALLOC_FREE(frame);
556 return retp;