Describe implication of upstream ICU-22610
[Samba.git] / source3 / lib / charcnv.c
blob6364329b791ca63c5175d75567b500fb09db1941
1 /*
2 Unix SMB/CIFS implementation.
3 Character set conversion Extensions
4 Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5 Copyright (C) Andrew Tridgell 2001
6 Copyright (C) Simo Sorce 2001
7 Copyright (C) Martin Pool 2003
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
23 #include "includes.h"
25 /**
26 * Destroy global objects allocated by init_iconv()
27 **/
28 void gfree_charcnv(void)
30 free_iconv_handle();
33 /**
34 * Copy a string from a char* unix src to a dos codepage string destination.
36 * @return the number of bytes occupied by the string in the destination.
38 * @param flags can include
39 * <dl>
40 * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
41 * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
42 * </dl>
44 * @param dest_len the maximum length in bytes allowed in the
45 * destination.
46 **/
47 size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
49 size_t src_len = 0;
50 char *tmpbuf = NULL;
51 size_t size = 0;
52 bool ret;
54 /* No longer allow a length of -1. */
55 if (dest_len == (size_t)-1) {
56 smb_panic("push_ascii - dest_len == -1");
59 if (flags & STR_UPPER) {
60 tmpbuf = SMB_STRDUP(src);
61 if (!tmpbuf) {
62 smb_panic("malloc fail");
64 if (!strupper_m(tmpbuf)) {
65 if ((flags & (STR_TERMINATE|STR_TERMINATE_ASCII)) &&
66 dest &&
67 dest_len > 0) {
68 *(char *)dest = 0;
70 SAFE_FREE(tmpbuf);
71 return 0;
73 src = tmpbuf;
76 src_len = strlen(src);
77 if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
78 src_len++;
81 ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, &size);
82 SAFE_FREE(tmpbuf);
83 if (ret == false) {
84 if ((flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) &&
85 dest_len > 0) {
86 ((char *)dest)[0] = '\0';
88 return 0;
90 return size;
93 /********************************************************************
94 Push and malloc an ascii string. src and dest null terminated.
95 ********************************************************************/
97 /**
98 * Copy a string from a dos codepage source to a unix char* destination.
100 * The resulting string in "dest" is always null terminated.
102 * @param flags can have:
103 * <dl>
104 * <dt>STR_TERMINATE</dt>
105 * <dd>STR_TERMINATE means the string in @p src
106 * is null terminated, and src_len is ignored.</dd>
107 * </dl>
109 * @param src_len is the length of the source area in bytes.
110 * @returns the number of bytes occupied by the string in @p src.
112 size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
114 bool ret;
115 size_t size = 0;
117 if (dest_len == (size_t)-1) {
118 /* No longer allow dest_len of -1. */
119 smb_panic("pull_ascii - invalid dest_len of -1");
122 if (flags & STR_TERMINATE) {
123 if (src_len == (size_t)-1) {
124 src_len = strlen((const char *)src) + 1;
125 } else {
126 size_t len = strnlen((const char *)src, src_len);
127 if (len < src_len)
128 len++;
129 src_len = len;
133 ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, &size);
134 if (ret == false) {
135 size = 0;
136 dest_len = 0;
139 if (dest_len && size) {
140 /* Did we already process the terminating zero ? */
141 if (dest[MIN(size-1, dest_len-1)] != 0) {
142 dest[MIN(size, dest_len-1)] = 0;
144 } else {
145 dest[0] = 0;
148 return src_len;
152 * Copy a string from a dos codepage source to a unix char* destination.
153 * Talloc version.
155 * The resulting string in "dest" is always null terminated.
157 * @param flags can have:
158 * <dl>
159 * <dt>STR_TERMINATE</dt>
160 * <dd>STR_TERMINATE means the string in @p src
161 * is null terminated, and src_len is ignored.</dd>
162 * </dl>
164 * @param src_len is the length of the source area in bytes.
165 * @returns the number of bytes occupied by the string in @p src.
168 static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
169 char **ppdest,
170 const void *src,
171 size_t src_len,
172 int flags)
174 char *dest = NULL;
175 size_t dest_len;
177 *ppdest = NULL;
179 if (!src_len) {
180 return 0;
183 if (src_len == (size_t)-1) {
184 smb_panic("src_len == -1 in pull_ascii_base_talloc");
187 if (flags & STR_TERMINATE) {
188 size_t len = strnlen((const char *)src, src_len);
189 if (len < src_len)
190 len++;
191 src_len = len;
192 /* Ensure we don't use an insane length from the client. */
193 if (src_len >= 1024*1024) {
194 char *msg = talloc_asprintf(ctx,
195 "Bad src length (%u) in "
196 "pull_ascii_base_talloc",
197 (unsigned int)src_len);
198 smb_panic(msg);
202 /* src_len != -1 here. */
204 if (!convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, &dest,
205 &dest_len)) {
206 dest_len = 0;
209 if (dest_len && dest) {
210 /* Did we already process the terminating zero ? */
211 if (dest[dest_len-1] != 0) {
212 size_t size = talloc_get_size(dest);
213 /* Have we got space to append the '\0' ? */
214 if (size <= dest_len) {
215 /* No, realloc. */
216 dest = talloc_realloc(ctx, dest, char,
217 dest_len+1);
218 if (!dest) {
219 /* talloc fail. */
220 dest_len = (size_t)-1;
221 return 0;
224 /* Yay - space ! */
225 dest[dest_len] = '\0';
226 dest_len++;
228 } else if (dest) {
229 dest[0] = 0;
232 *ppdest = dest;
233 return src_len;
237 * Copy a string from a char* src to a unicode destination.
239 * @returns the number of bytes occupied by the string in the destination.
241 * @param flags can have:
243 * <dl>
244 * <dt>STR_TERMINATE <dd>means include the null termination.
245 * <dt>STR_UPPER <dd>means uppercase in the destination.
246 * <dt>STR_NOALIGN <dd>means don't do alignment.
247 * </dl>
249 * @param dest_len is the maximum length allowed in the
250 * destination.
253 static size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
255 size_t len=0;
256 size_t src_len;
257 size_t size = 0;
258 bool ret;
260 if (dest_len == (size_t)-1) {
261 /* No longer allow dest_len of -1. */
262 smb_panic("push_ucs2 - invalid dest_len of -1");
265 if (flags & STR_TERMINATE)
266 src_len = (size_t)-1;
267 else
268 src_len = strlen(src);
270 if (ucs2_align(base_ptr, dest, flags)) {
271 *(char *)dest = 0;
272 dest = (void *)((char *)dest + 1);
273 if (dest_len)
274 dest_len--;
275 len++;
278 /* ucs2 is always a multiple of 2 bytes */
279 dest_len &= ~1;
281 ret = convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, &size);
282 if (ret == false) {
283 if ((flags & STR_TERMINATE) &&
284 dest &&
285 dest_len) {
286 *(char *)dest = 0;
288 return len;
291 len += size;
293 if (flags & STR_UPPER) {
294 smb_ucs2_t *dest_ucs2 = (smb_ucs2_t *)dest;
295 size_t i;
297 /* We check for i < (size / 2) below as the dest string isn't null
298 terminated if STR_TERMINATE isn't set. */
300 for (i = 0; i < (size / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
301 smb_ucs2_t v = toupper_w(dest_ucs2[i]);
302 if (v != dest_ucs2[i]) {
303 dest_ucs2[i] = v;
308 return len;
312 Copy a string from a ucs2 source to a unix char* destination.
313 Talloc version with a base pointer.
314 Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
315 needs fixing. JRA).
316 Flags can have:
317 STR_TERMINATE means the string in src is null terminated.
318 STR_NOALIGN means don't try to align.
319 if STR_TERMINATE is set then src_len is ignored if it is -1.
320 src_len is the length of the source area in bytes
321 Return the number of bytes occupied by the string in src.
322 The resulting string in "dest" is always null terminated.
325 static size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
326 const void *base_ptr,
327 char **ppdest,
328 const void *src,
329 size_t src_len,
330 int flags)
332 char *dest;
333 size_t dest_len;
334 size_t ucs2_align_len = 0;
336 *ppdest = NULL;
338 #ifdef DEVELOPER
339 /* Ensure we never use the braindead "malloc" variant. */
340 if (ctx == NULL) {
341 smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
343 #endif
345 if (!src_len) {
346 return 0;
349 if (src_len == (size_t)-1) {
350 /* no longer used anywhere, but worth checking */
351 smb_panic("sec_len == -1 in pull_ucs2_base_talloc");
354 if (ucs2_align(base_ptr, src, flags)) {
355 src = (const void *)((const char *)src + 1);
356 src_len--;
357 ucs2_align_len = 1;
360 if (flags & STR_TERMINATE) {
361 /* src_len -1 is the default for null terminated strings. */
362 size_t len = strnlen_w((const smb_ucs2_t *)src,
363 src_len/2);
364 if (len < src_len/2)
365 len++;
366 src_len = len*2;
368 /* Ensure we don't use an insane length from the client. */
369 if (src_len >= 1024*1024) {
370 smb_panic("Bad src length in pull_ucs2_base_talloc\n");
374 /* ucs2 is always a multiple of 2 bytes */
375 src_len &= ~1;
377 if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
378 (void *)&dest, &dest_len)) {
379 dest_len = 0;
382 if (dest_len) {
383 /* Did we already process the terminating zero ? */
384 if (dest[dest_len-1] != 0) {
385 size_t size = talloc_get_size(dest);
386 /* Have we got space to append the '\0' ? */
387 if (size <= dest_len) {
388 /* No, realloc. */
389 dest = talloc_realloc(ctx, dest, char,
390 dest_len+1);
391 if (!dest) {
392 /* talloc fail. */
393 dest_len = (size_t)-1;
394 return 0;
397 /* Yay - space ! */
398 dest[dest_len] = '\0';
399 dest_len++;
401 } else if (dest) {
402 dest[0] = 0;
405 *ppdest = dest;
406 return src_len + ucs2_align_len;
410 Copy a string from a char* src to a unicode or ascii
411 dos codepage destination choosing unicode or ascii based on the
412 flags supplied
413 Return the number of bytes occupied by the string in the destination.
414 flags can have:
415 STR_TERMINATE means include the null termination.
416 STR_UPPER means uppercase in the destination.
417 STR_ASCII use ascii even with unicode packet.
418 STR_NOALIGN means don't do alignment.
419 dest_len is the maximum length allowed in the destination. If dest_len
420 is -1 then no maximum is used.
423 size_t push_string_check_fn(void *dest, const char *src,
424 size_t dest_len, int flags)
426 if (!(flags & STR_ASCII) && (flags & STR_UNICODE)) {
427 return push_ucs2(NULL, dest, src, dest_len, flags);
429 return push_ascii(dest, src, dest_len, flags);
434 Copy a string from a char* src to a unicode or ascii
435 dos codepage destination choosing unicode or ascii based on the
436 flags in the SMB buffer starting at base_ptr.
437 Return the number of bytes occupied by the string in the destination.
438 flags can have:
439 STR_TERMINATE means include the null termination.
440 STR_UPPER means uppercase in the destination.
441 STR_ASCII use ascii even with unicode packet.
442 STR_NOALIGN means don't do alignment.
443 dest_len is the maximum length allowed in the destination. If dest_len
444 is -1 then no maximum is used.
447 size_t push_string_base(const char *base, uint16_t flags2,
448 void *dest, const char *src,
449 size_t dest_len, int flags)
452 if (!(flags & STR_ASCII) && \
453 ((flags & STR_UNICODE || \
454 (flags2 & FLAGS2_UNICODE_STRINGS)))) {
455 return push_ucs2(base, dest, src, dest_len, flags);
457 return push_ascii(dest, src, dest_len, flags);
461 Copy a string from a unicode or ascii source (depending on
462 the packet flags) to a char* destination.
463 Variant that uses talloc.
464 Flags can have:
465 STR_TERMINATE means the string in src is null terminated.
466 STR_UNICODE means to force as unicode.
467 STR_ASCII use ascii even with unicode packet.
468 STR_NOALIGN means don't do alignment.
469 if STR_TERMINATE is set then src_len is ignored is it is -1
470 src_len is the length of the source area in bytes.
471 Return the number of bytes occupied by the string in src.
472 The resulting string in "dest" is always null terminated.
475 size_t pull_string_talloc(TALLOC_CTX *ctx,
476 const void *base_ptr,
477 uint16_t smb_flags2,
478 char **ppdest,
479 const void *src,
480 size_t src_len,
481 int flags)
483 if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
484 smb_panic("No base ptr to get flg2 and neither ASCII nor "
485 "UNICODE defined");
488 if (!(flags & STR_ASCII) &&
489 ((flags & STR_UNICODE || (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
490 return pull_ucs2_base_talloc(ctx,
491 base_ptr,
492 ppdest,
493 src,
494 src_len,
495 flags);
497 return pull_ascii_base_talloc(ctx,
498 ppdest,
499 src,
500 src_len,
501 flags);
504 /*******************************************************************
505 Write a string in (little-endian) unicode format. src is in
506 the current DOS codepage. len is the length in bytes of the
507 string pointed to by dst.
509 if null_terminate is True then null terminate the packet (adds 2 bytes)
511 the return value is the length in bytes consumed by the string, including the
512 null termination if applied
513 ********************************************************************/
515 size_t dos_PutUniCode(char *dst,const char *src, size_t len, bool null_terminate)
517 int flags = null_terminate ? STR_UNICODE|STR_NOALIGN|STR_TERMINATE
518 : STR_UNICODE|STR_NOALIGN;
519 return push_ucs2(NULL, dst, src, len, flags);
523 /* Converts a string from internal samba format to unicode. Always terminates.
524 * Actually just a wrapper round push_ucs2_talloc().
527 int rpcstr_push_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
529 size_t size;
530 if (push_ucs2_talloc(ctx, dest, src, &size))
531 return size;
532 else
533 return -1;