Search for location of waf script
[Samba.git] / lib / util / charset / util_unistr.c
blob2cc87186daecbb3c421f1cef37b43d2a66e41cca
1 /*
2 Unix SMB/CIFS implementation.
3 Samba utility functions
4 Copyright (C) Andrew Tridgell 1992-2001
5 Copyright (C) Simo Sorce 2001
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 #include "includes.h"
22 #include "system/locale.h"
24 /**
25 String replace.
26 NOTE: oldc and newc must be 7 bit characters
27 **/
28 _PUBLIC_ void string_replace_m(char *s, char oldc, char newc)
30 struct smb_iconv_handle *ic = get_iconv_handle();
31 while (s && *s) {
32 size_t size;
33 codepoint_t c = next_codepoint_handle(ic, s, &size);
34 if (c == oldc) {
35 *s = newc;
37 s += size;
41 /**
42 Convert a string to lower case, allocated with talloc
43 **/
44 _PUBLIC_ char *strlower_talloc_handle(struct smb_iconv_handle *iconv_handle,
45 TALLOC_CTX *ctx, const char *src)
47 size_t size=0;
48 char *dest;
50 if(src == NULL) {
51 return NULL;
54 /* this takes advantage of the fact that upper/lower can't
55 change the length of a character by more than 1 byte */
56 dest = talloc_array(ctx, char, 2*(strlen(src))+1);
57 if (dest == NULL) {
58 return NULL;
61 while (*src) {
62 size_t c_size;
63 codepoint_t c = next_codepoint_handle(iconv_handle, src, &c_size);
64 src += c_size;
66 c = tolower_m(c);
68 c_size = push_codepoint_handle(iconv_handle, dest+size, c);
69 if (c_size == -1) {
70 talloc_free(dest);
71 return NULL;
73 size += c_size;
76 dest[size] = 0;
78 /* trim it so talloc_append_string() works */
79 dest = talloc_realloc(ctx, dest, char, size+1);
81 talloc_set_name_const(dest, dest);
83 return dest;
86 _PUBLIC_ char *strlower_talloc(TALLOC_CTX *ctx, const char *src)
88 struct smb_iconv_handle *iconv_handle = get_iconv_handle();
89 return strlower_talloc_handle(iconv_handle, ctx, src);
92 /**
93 Convert a string to UPPER case, allocated with talloc
94 source length limited to n bytes, iconv handle supplied
95 **/
96 _PUBLIC_ char *strupper_talloc_n_handle(struct smb_iconv_handle *iconv_handle,
97 TALLOC_CTX *ctx, const char *src, size_t n)
99 size_t size=0;
100 char *dest;
102 if (!src) {
103 return NULL;
106 /* this takes advantage of the fact that upper/lower can't
107 change the length of a character by more than 1 byte */
108 dest = talloc_array(ctx, char, 2*(n+1));
109 if (dest == NULL) {
110 return NULL;
113 while (n && *src) {
114 size_t c_size;
115 codepoint_t c = next_codepoint_handle_ext(iconv_handle, src, n,
116 CH_UNIX, &c_size);
117 src += c_size;
118 n -= c_size;
120 c = toupper_m(c);
122 c_size = push_codepoint_handle(iconv_handle, dest+size, c);
123 if (c_size == -1) {
124 talloc_free(dest);
125 return NULL;
127 size += c_size;
130 dest[size] = 0;
132 /* trim it so talloc_append_string() works */
133 dest = talloc_realloc(ctx, dest, char, size+1);
135 talloc_set_name_const(dest, dest);
137 return dest;
141 Convert a string to UPPER case, allocated with talloc
142 source length limited to n bytes
144 _PUBLIC_ char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n)
146 struct smb_iconv_handle *iconv_handle = get_iconv_handle();
147 return strupper_talloc_n_handle(iconv_handle, ctx, src, n);
150 Convert a string to UPPER case, allocated with talloc
152 _PUBLIC_ char *strupper_talloc(TALLOC_CTX *ctx, const char *src)
154 return strupper_talloc_n(ctx, src, src?strlen(src):0);
158 talloc_strdup() a unix string to upper case.
160 _PUBLIC_ char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *src)
162 return strupper_talloc(ctx, src);
166 Find the number of 'c' chars in a string
168 _PUBLIC_ size_t count_chars_m(const char *s, char c)
170 struct smb_iconv_handle *ic = get_iconv_handle();
171 size_t count = 0;
173 while (*s) {
174 size_t size;
175 codepoint_t c2 = next_codepoint_handle(ic, s, &size);
176 if (c2 == c) count++;
177 s += size;
180 return count;
185 * Copy a string from a char* unix src to a dos codepage string destination.
187 * @converted_size the number of bytes occupied by the string in the destination.
188 * @return bool true if success.
190 * @param flags can include
191 * <dl>
192 * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
193 * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
194 * </dl>
196 * @param dest_len the maximum length in bytes allowed in the
197 * destination. If @p dest_len is -1 then no maximum is used.
199 static bool push_ascii_string(void *dest, const char *src, size_t dest_len, int flags, size_t *converted_size)
201 size_t src_len;
202 bool ret;
204 if (flags & STR_UPPER) {
205 char *tmpbuf = strupper_talloc(NULL, src);
206 if (tmpbuf == NULL) {
207 return false;
209 ret = push_ascii_string(dest, tmpbuf, dest_len, flags & ~STR_UPPER, converted_size);
210 talloc_free(tmpbuf);
211 return ret;
214 src_len = strlen(src);
216 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
217 src_len++;
219 return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, converted_size);
223 * Copy a string from a dos codepage source to a unix char* destination.
225 * The resulting string in "dest" is always null terminated.
227 * @param flags can have:
228 * <dl>
229 * <dt>STR_TERMINATE</dt>
230 * <dd>STR_TERMINATE means the string in @p src
231 * is null terminated, and src_len is ignored.</dd>
232 * </dl>
234 * @param src_len is the length of the source area in bytes.
235 * @returns the number of bytes occupied by the string in @p src.
237 static ssize_t pull_ascii_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
239 size_t size = 0;
241 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
242 if (src_len == (size_t)-1) {
243 src_len = strlen((const char *)src) + 1;
244 } else {
245 size_t len = strnlen((const char *)src, src_len);
246 if (len < src_len)
247 len++;
248 src_len = len;
252 /* We're ignoring the return here.. */
253 (void)convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, &size);
255 if (dest_len)
256 dest[MIN(size, dest_len-1)] = 0;
258 return src_len;
262 * Copy a string from a char* src to a unicode destination.
264 * @returns the number of bytes occupied by the string in the destination.
266 * @param flags can have:
268 * <dl>
269 * <dt>STR_TERMINATE <dd>means include the null termination.
270 * <dt>STR_UPPER <dd>means uppercase in the destination.
271 * <dt>STR_NOALIGN <dd>means don't do alignment.
272 * </dl>
274 * @param dest_len is the maximum length allowed in the
275 * destination. If dest_len is -1 then no maxiumum is used.
277 static ssize_t push_ucs2(void *dest, const char *src, size_t dest_len, int flags)
279 size_t len=0;
280 size_t src_len = strlen(src);
281 size_t size = 0;
282 bool ret;
284 if (flags & STR_UPPER) {
285 char *tmpbuf = strupper_talloc(NULL, src);
286 ssize_t retval;
287 if (tmpbuf == NULL) {
288 return -1;
290 retval = push_ucs2(dest, tmpbuf, dest_len, flags & ~STR_UPPER);
291 talloc_free(tmpbuf);
292 return retval;
295 if (flags & STR_TERMINATE)
296 src_len++;
298 if (ucs2_align(NULL, dest, flags)) {
299 *(char *)dest = 0;
300 dest = (void *)((char *)dest + 1);
301 if (dest_len) dest_len--;
302 len++;
305 /* ucs2 is always a multiple of 2 bytes */
306 dest_len &= ~1;
308 ret = convert_string(CH_UNIX, CH_UTF16, src, src_len, dest, dest_len, &size);
309 if (ret == false) {
310 return 0;
313 len += size;
315 return (ssize_t)len;
320 Copy a string from a ucs2 source to a unix char* destination.
321 Flags can have:
322 STR_TERMINATE means the string in src is null terminated.
323 STR_NOALIGN means don't try to align.
324 if STR_TERMINATE is set then src_len is ignored if it is -1.
325 src_len is the length of the source area in bytes
326 Return the number of bytes occupied by the string in src.
327 The resulting string in "dest" is always null terminated.
330 static size_t pull_ucs2(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
332 size_t size = 0;
334 if (ucs2_align(NULL, src, flags)) {
335 src = (const void *)((const char *)src + 1);
336 if (src_len > 0)
337 src_len--;
340 if (flags & STR_TERMINATE) {
341 if (src_len == (size_t)-1) {
342 src_len = utf16_len(src);
343 } else {
344 src_len = utf16_len_n(src, src_len);
348 /* ucs2 is always a multiple of 2 bytes */
349 if (src_len != (size_t)-1)
350 src_len &= ~1;
352 /* We're ignoring the return here.. */
353 (void)convert_string(CH_UTF16, CH_UNIX, src, src_len, dest, dest_len, &size);
354 if (dest_len)
355 dest[MIN(size, dest_len-1)] = 0;
357 return src_len;
361 Copy a string from a char* src to a unicode or ascii
362 dos codepage destination choosing unicode or ascii based on the
363 flags in the SMB buffer starting at base_ptr.
364 Return the number of bytes occupied by the string in the destination.
365 flags can have:
366 STR_TERMINATE means include the null termination.
367 STR_UPPER means uppercase in the destination.
368 STR_ASCII use ascii even with unicode packet.
369 STR_NOALIGN means don't do alignment.
370 dest_len is the maximum length allowed in the destination. If dest_len
371 is -1 then no maxiumum is used.
374 _PUBLIC_ ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags)
376 if (flags & STR_ASCII) {
377 size_t size = 0;
378 if (push_ascii_string(dest, src, dest_len, flags, &size)) {
379 return (ssize_t)size;
380 } else {
381 return (ssize_t)-1;
383 } else if (flags & STR_UNICODE) {
384 return push_ucs2(dest, src, dest_len, flags);
385 } else {
386 smb_panic("push_string requires either STR_ASCII or STR_UNICODE flag to be set");
387 return -1;
393 Copy a string from a unicode or ascii source (depending on
394 the packet flags) to a char* destination.
395 Flags can have:
396 STR_TERMINATE means the string in src is null terminated.
397 STR_UNICODE means to force as unicode.
398 STR_ASCII use ascii even with unicode packet.
399 STR_NOALIGN means don't do alignment.
400 if STR_TERMINATE is set then src_len is ignored is it is -1
401 src_len is the length of the source area in bytes.
402 Return the number of bytes occupied by the string in src.
403 The resulting string in "dest" is always null terminated.
406 _PUBLIC_ ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
408 if (flags & STR_ASCII) {
409 return pull_ascii_string(dest, src, dest_len, src_len, flags);
410 } else if (flags & STR_UNICODE) {
411 return pull_ucs2(dest, src, dest_len, src_len, flags);
412 } else {
413 smb_panic("pull_string requires either STR_ASCII or STR_UNICODE flag to be set");
414 return -1;