s3:dbwrap_tool: add --persistent switch and mode for non-persistent DBs
[Samba/gebeck_regimport.git] / lib / util / charset / util_unistr.c
blobe4ae65053c733245ad3d0ae792cc0c7679aa04ff
1 /*
2 Unix SMB/CIFS implementation.
3 Samba utility functions
4 Copyright (C) Andrew Tridgell 1992-2001
5 Copyright (C) Simo Sorce 2001
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 #include "includes.h"
22 #include "system/locale.h"
24 /**
25 String replace.
26 NOTE: oldc and newc must be 7 bit characters
27 **/
28 _PUBLIC_ void string_replace_m(char *s, char oldc, char newc)
30 struct smb_iconv_handle *ic = get_iconv_handle();
31 while (s && *s) {
32 size_t size;
33 codepoint_t c = next_codepoint_handle(ic, s, &size);
34 if (c == oldc) {
35 *s = newc;
37 s += size;
41 /**
42 Convert a string to lower case, allocated with talloc
43 **/
44 _PUBLIC_ char *strlower_talloc_handle(struct smb_iconv_handle *iconv_handle,
45 TALLOC_CTX *ctx, const char *src)
47 size_t size=0;
48 char *dest;
50 if(src == NULL) {
51 return NULL;
54 /* this takes advantage of the fact that upper/lower can't
55 change the length of a character by more than 1 byte */
56 dest = talloc_array(ctx, char, 2*(strlen(src))+1);
57 if (dest == NULL) {
58 return NULL;
61 while (*src) {
62 size_t c_size;
63 codepoint_t c = next_codepoint_handle(iconv_handle, src, &c_size);
64 src += c_size;
66 c = tolower_m(c);
68 c_size = push_codepoint_handle(iconv_handle, dest+size, c);
69 if (c_size == -1) {
70 talloc_free(dest);
71 return NULL;
73 size += c_size;
76 dest[size] = 0;
78 /* trim it so talloc_append_string() works */
79 dest = talloc_realloc(ctx, dest, char, size+1);
81 talloc_set_name_const(dest, dest);
83 return dest;
86 _PUBLIC_ char *strlower_talloc(TALLOC_CTX *ctx, const char *src)
88 struct smb_iconv_handle *iconv_handle = get_iconv_handle();
89 return strlower_talloc_handle(iconv_handle, ctx, src);
92 /**
93 Convert a string to UPPER case, allocated with talloc
94 source length limited to n bytes, iconv handle supplied
95 **/
96 _PUBLIC_ char *strupper_talloc_n_handle(struct smb_iconv_handle *iconv_handle,
97 TALLOC_CTX *ctx, const char *src, size_t n)
99 size_t size=0;
100 char *dest;
102 if (!src) {
103 return NULL;
106 /* this takes advantage of the fact that upper/lower can't
107 change the length of a character by more than 1 byte */
108 dest = talloc_array(ctx, char, 2*(n+1));
109 if (dest == NULL) {
110 return NULL;
113 while (n-- && *src) {
114 size_t c_size;
115 codepoint_t c = next_codepoint_handle(iconv_handle, src, &c_size);
116 src += c_size;
118 c = toupper_m(c);
120 c_size = push_codepoint_handle(iconv_handle, dest+size, c);
121 if (c_size == -1) {
122 talloc_free(dest);
123 return NULL;
125 size += c_size;
128 dest[size] = 0;
130 /* trim it so talloc_append_string() works */
131 dest = talloc_realloc(ctx, dest, char, size+1);
133 talloc_set_name_const(dest, dest);
135 return dest;
139 Convert a string to UPPER case, allocated with talloc
140 source length limited to n bytes
142 _PUBLIC_ char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n)
144 struct smb_iconv_handle *iconv_handle = get_iconv_handle();
145 return strupper_talloc_n_handle(iconv_handle, ctx, src, n);
148 Convert a string to UPPER case, allocated with talloc
150 _PUBLIC_ char *strupper_talloc(TALLOC_CTX *ctx, const char *src)
152 return strupper_talloc_n(ctx, src, src?strlen(src):0);
156 talloc_strdup() a unix string to upper case.
158 _PUBLIC_ char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *src)
160 return strupper_talloc(ctx, src);
164 Find the number of 'c' chars in a string
166 _PUBLIC_ size_t count_chars_m(const char *s, char c)
168 struct smb_iconv_handle *ic = get_iconv_handle();
169 size_t count = 0;
171 while (*s) {
172 size_t size;
173 codepoint_t c2 = next_codepoint_handle(ic, s, &size);
174 if (c2 == c) count++;
175 s += size;
178 return count;
183 * Copy a string from a char* unix src to a dos codepage string destination.
185 * @converted_size the number of bytes occupied by the string in the destination.
186 * @return bool true if success.
188 * @param flags can include
189 * <dl>
190 * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
191 * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
192 * </dl>
194 * @param dest_len the maximum length in bytes allowed in the
195 * destination. If @p dest_len is -1 then no maximum is used.
197 static bool push_ascii_string(void *dest, const char *src, size_t dest_len, int flags, size_t *converted_size)
199 size_t src_len;
200 bool ret;
202 if (flags & STR_UPPER) {
203 char *tmpbuf = strupper_talloc(NULL, src);
204 if (tmpbuf == NULL) {
205 return false;
207 ret = push_ascii_string(dest, tmpbuf, dest_len, flags & ~STR_UPPER, converted_size);
208 talloc_free(tmpbuf);
209 return ret;
212 src_len = strlen(src);
214 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
215 src_len++;
217 return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, converted_size);
221 * Copy a string from a dos codepage source to a unix char* destination.
223 * The resulting string in "dest" is always null terminated.
225 * @param flags can have:
226 * <dl>
227 * <dt>STR_TERMINATE</dt>
228 * <dd>STR_TERMINATE means the string in @p src
229 * is null terminated, and src_len is ignored.</dd>
230 * </dl>
232 * @param src_len is the length of the source area in bytes.
233 * @returns the number of bytes occupied by the string in @p src.
235 static ssize_t pull_ascii_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
237 size_t size = 0;
239 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
240 if (src_len == (size_t)-1) {
241 src_len = strlen((const char *)src) + 1;
242 } else {
243 size_t len = strnlen((const char *)src, src_len);
244 if (len < src_len)
245 len++;
246 src_len = len;
250 /* We're ignoring the return here.. */
251 (void)convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, &size);
253 if (dest_len)
254 dest[MIN(size, dest_len-1)] = 0;
256 return src_len;
260 * Copy a string from a char* src to a unicode destination.
262 * @returns the number of bytes occupied by the string in the destination.
264 * @param flags can have:
266 * <dl>
267 * <dt>STR_TERMINATE <dd>means include the null termination.
268 * <dt>STR_UPPER <dd>means uppercase in the destination.
269 * <dt>STR_NOALIGN <dd>means don't do alignment.
270 * </dl>
272 * @param dest_len is the maximum length allowed in the
273 * destination. If dest_len is -1 then no maxiumum is used.
275 static ssize_t push_ucs2(void *dest, const char *src, size_t dest_len, int flags)
277 size_t len=0;
278 size_t src_len = strlen(src);
279 size_t size = 0;
280 bool ret;
282 if (flags & STR_UPPER) {
283 char *tmpbuf = strupper_talloc(NULL, src);
284 ssize_t retval;
285 if (tmpbuf == NULL) {
286 return -1;
288 retval = push_ucs2(dest, tmpbuf, dest_len, flags & ~STR_UPPER);
289 talloc_free(tmpbuf);
290 return retval;
293 if (flags & STR_TERMINATE)
294 src_len++;
296 if (ucs2_align(NULL, dest, flags)) {
297 *(char *)dest = 0;
298 dest = (void *)((char *)dest + 1);
299 if (dest_len) dest_len--;
300 len++;
303 /* ucs2 is always a multiple of 2 bytes */
304 dest_len &= ~1;
306 ret = convert_string(CH_UNIX, CH_UTF16, src, src_len, dest, dest_len, &size);
307 if (ret == false) {
308 return 0;
311 len += size;
313 return (ssize_t)len;
318 Copy a string from a ucs2 source to a unix char* destination.
319 Flags can have:
320 STR_TERMINATE means the string in src is null terminated.
321 STR_NOALIGN means don't try to align.
322 if STR_TERMINATE is set then src_len is ignored if it is -1.
323 src_len is the length of the source area in bytes
324 Return the number of bytes occupied by the string in src.
325 The resulting string in "dest" is always null terminated.
328 static size_t pull_ucs2(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
330 size_t size = 0;
332 if (ucs2_align(NULL, src, flags)) {
333 src = (const void *)((const char *)src + 1);
334 if (src_len > 0)
335 src_len--;
338 if (flags & STR_TERMINATE) {
339 if (src_len == (size_t)-1) {
340 src_len = utf16_len(src);
341 } else {
342 src_len = utf16_len_n(src, src_len);
346 /* ucs2 is always a multiple of 2 bytes */
347 if (src_len != (size_t)-1)
348 src_len &= ~1;
350 /* We're ignoring the return here.. */
351 (void)convert_string(CH_UTF16, CH_UNIX, src, src_len, dest, dest_len, &size);
352 if (dest_len)
353 dest[MIN(size, dest_len-1)] = 0;
355 return src_len;
359 Copy a string from a char* src to a unicode or ascii
360 dos codepage destination choosing unicode or ascii based on the
361 flags in the SMB buffer starting at base_ptr.
362 Return the number of bytes occupied by the string in the destination.
363 flags can have:
364 STR_TERMINATE means include the null termination.
365 STR_UPPER means uppercase in the destination.
366 STR_ASCII use ascii even with unicode packet.
367 STR_NOALIGN means don't do alignment.
368 dest_len is the maximum length allowed in the destination. If dest_len
369 is -1 then no maxiumum is used.
372 _PUBLIC_ ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags)
374 if (flags & STR_ASCII) {
375 size_t size = 0;
376 if (push_ascii_string(dest, src, dest_len, flags, &size)) {
377 return (ssize_t)size;
378 } else {
379 return (ssize_t)-1;
381 } else if (flags & STR_UNICODE) {
382 return push_ucs2(dest, src, dest_len, flags);
383 } else {
384 smb_panic("push_string requires either STR_ASCII or STR_UNICODE flag to be set");
385 return -1;
391 Copy a string from a unicode or ascii source (depending on
392 the packet flags) to a char* destination.
393 Flags can have:
394 STR_TERMINATE means the string in src is null terminated.
395 STR_UNICODE means to force as unicode.
396 STR_ASCII use ascii even with unicode packet.
397 STR_NOALIGN means don't do alignment.
398 if STR_TERMINATE is set then src_len is ignored is it is -1
399 src_len is the length of the source area in bytes.
400 Return the number of bytes occupied by the string in src.
401 The resulting string in "dest" is always null terminated.
404 _PUBLIC_ ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
406 if (flags & STR_ASCII) {
407 return pull_ascii_string(dest, src, dest_len, src_len, flags);
408 } else if (flags & STR_UNICODE) {
409 return pull_ucs2(dest, src, dest_len, src_len, flags);
410 } else {
411 smb_panic("pull_string requires either STR_ASCII or STR_UNICODE flag to be set");
412 return -1;