2 Unix SMB/CIFS implementation.
3 minimal iconv implementation
4 Copyright (C) Andrew Tridgell 2001
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 static size_t ascii_pull(void *,char **, size_t *, char **, size_t *);
24 static size_t ascii_push(void *,char **, size_t *, char **, size_t *);
25 static size_t utf8_pull(void *,char **, size_t *, char **, size_t *);
26 static size_t utf8_push(void *,char **, size_t *, char **, size_t *);
27 static size_t weird_pull(void *,char **, size_t *, char **, size_t *);
28 static size_t weird_push(void *,char **, size_t *, char **, size_t *);
29 static size_t ucs2hex_pull(void *,char **, size_t *, char **, size_t *);
30 static size_t ucs2hex_push(void *,char **, size_t *, char **, size_t *);
31 static size_t iconv_copy(void *,char **, size_t *, char **, size_t *);
34 for each charset we have a function that pulls from that charset to
35 a ucs2 buffer, and a function that pushes to a ucs2 buffer
39 size_t (*pull
)(void *, char **inbuf
, size_t *inbytesleft
,
40 char **outbuf
, size_t *outbytesleft
);
41 size_t (*push
)(void *, char **inbuf
, size_t *inbytesleft
,
42 char **outbuf
, size_t *outbytesleft
);
44 {"UCS-2LE", iconv_copy
, iconv_copy
},
45 {"UTF8", utf8_pull
, utf8_push
},
46 {"ASCII", ascii_pull
, ascii_push
},
47 {"WEIRD", weird_pull
, weird_push
},
48 {"UCS2-HEX", ucs2hex_pull
, ucs2hex_push
},
53 /* if there was an error then reset the internal state,
54 this ensures that we don't have a shift state remaining for
55 character sets like SJIS */
56 static size_t sys_iconv(void *cd
,
57 char **inbuf
, size_t *inbytesleft
,
58 char **outbuf
, size_t *outbytesleft
)
60 #ifdef HAVE_NATIVE_ICONV
61 size_t ret
= iconv((iconv_t
)cd
,
63 outbuf
, outbytesleft
);
64 if (ret
== (size_t)-1) iconv(cd
, NULL
, NULL
, NULL
, NULL
);
73 * This is a simple portable iconv() implementaion.
75 * It only knows about a very small number of character sets - just
76 * enough that Samba works on systems that don't have iconv.
78 size_t smb_iconv(smb_iconv_t cd
,
79 const char **inbuf
, size_t *inbytesleft
,
80 char **outbuf
, size_t *outbytesleft
)
86 /* in many cases we can go direct */
88 return cd
->direct(cd
->cd_direct
,
89 (char **)inbuf
, inbytesleft
, outbuf
, outbytesleft
);
93 /* otherwise we have to do it chunks at a time */
94 while (*inbytesleft
> 0) {
96 bufsize
= sizeof(cvtbuf
);
98 if (cd
->pull(cd
->cd_pull
,
99 (char **)inbuf
, inbytesleft
, &bufp
, &bufsize
) == -1
100 && errno
!= E2BIG
) return -1;
103 bufsize
= sizeof(cvtbuf
) - bufsize
;
105 if (cd
->push(cd
->cd_push
,
107 outbuf
, outbytesleft
) == -1) return -1;
114 simple iconv_open() wrapper
116 smb_iconv_t
smb_iconv_open(const char *tocode
, const char *fromcode
)
121 ret
= (smb_iconv_t
)malloc(sizeof(*ret
));
124 return (smb_iconv_t
)-1;
126 memset(ret
, 0, sizeof(*ret
));
128 ret
->from_name
= strdup(fromcode
);
129 ret
->to_name
= strdup(tocode
);
131 /* check for the simplest null conversion */
132 if (strcmp(fromcode
, tocode
) == 0) {
133 ret
->direct
= iconv_copy
;
137 for (from
=0; charsets
[from
].name
; from
++) {
138 if (strcasecmp(charsets
[from
].name
, fromcode
) == 0) break;
140 for (to
=0; charsets
[to
].name
; to
++) {
141 if (strcasecmp(charsets
[to
].name
, tocode
) == 0) break;
144 #ifdef HAVE_NATIVE_ICONV
145 if (!charsets
[from
].name
) {
146 ret
->pull
= sys_iconv
;
147 ret
->cd_pull
= iconv_open("UCS-2LE", fromcode
);
148 if (ret
->cd_pull
== (iconv_t
)-1) goto failed
;
150 if (!charsets
[to
].name
) {
151 ret
->push
= sys_iconv
;
152 ret
->cd_push
= iconv_open(tocode
, "UCS-2LE");
153 if (ret
->cd_push
== (iconv_t
)-1) goto failed
;
156 if (!charsets
[from
].name
|| !charsets
[to
].name
) {
161 /* check for conversion to/from ucs2 */
162 if (from
== 0 && charsets
[to
].name
) {
163 ret
->direct
= charsets
[to
].push
;
166 if (to
== 0 && charsets
[from
].name
) {
167 ret
->direct
= charsets
[from
].pull
;
171 #ifdef HAVE_NATIVE_ICONV
173 ret
->direct
= sys_iconv
;
174 ret
->cd_direct
= ret
->cd_push
;
179 ret
->direct
= sys_iconv
;
180 ret
->cd_direct
= ret
->cd_pull
;
186 /* the general case has to go via a buffer */
187 if (!ret
->pull
) ret
->pull
= charsets
[from
].pull
;
188 if (!ret
->push
) ret
->push
= charsets
[to
].push
;
194 return (smb_iconv_t
)-1;
198 simple iconv_close() wrapper
200 int smb_iconv_close (smb_iconv_t cd
)
202 #ifdef HAVE_NATIVE_ICONV
203 if (cd
->cd_direct
) iconv_close((iconv_t
)cd
->cd_direct
);
204 if (cd
->cd_pull
) iconv_close((iconv_t
)cd
->cd_pull
);
205 if (cd
->cd_push
) iconv_close((iconv_t
)cd
->cd_push
);
208 SAFE_FREE(cd
->from_name
);
209 SAFE_FREE(cd
->to_name
);
211 memset(cd
, 0, sizeof(*cd
));
217 /**********************************************************************
218 the following functions implement the builtin character sets in Samba
219 and also the "test" character sets that are designed to test
220 multi-byte character set support for english users
221 ***********************************************************************/
223 static size_t ascii_pull(void *cd
, char **inbuf
, size_t *inbytesleft
,
224 char **outbuf
, size_t *outbytesleft
)
226 while (*inbytesleft
>= 1 && *outbytesleft
>= 2) {
227 (*outbuf
)[0] = (*inbuf
)[0];
230 (*outbytesleft
) -= 2;
235 if (*inbytesleft
> 0) {
243 static size_t ascii_push(void *cd
, char **inbuf
, size_t *inbytesleft
,
244 char **outbuf
, size_t *outbytesleft
)
248 while (*inbytesleft
>= 2 && *outbytesleft
>= 1) {
249 (*outbuf
)[0] = (*inbuf
)[0] & 0x7F;
250 if ((*inbuf
)[1]) ir_count
++;
252 (*outbytesleft
) -= 1;
257 if (*inbytesleft
== 1) {
262 if (*inbytesleft
> 1) {
271 static size_t ucs2hex_pull(void *cd
, char **inbuf
, size_t *inbytesleft
,
272 char **outbuf
, size_t *outbytesleft
)
274 while (*inbytesleft
>= 1 && *outbytesleft
>= 2) {
277 if ((*inbuf
)[0] != '@') {
278 /* seven bit ascii case */
279 (*outbuf
)[0] = (*inbuf
)[0];
282 (*outbytesleft
) -= 2;
287 /* it's a hex character */
288 if (*inbytesleft
< 5) {
293 if (sscanf(&(*inbuf
)[1], "%04x", &v
) != 1) {
298 (*outbuf
)[0] = v
&0xff;
301 (*outbytesleft
) -= 2;
306 if (*inbytesleft
> 0) {
314 static size_t ucs2hex_push(void *cd
, char **inbuf
, size_t *inbytesleft
,
315 char **outbuf
, size_t *outbytesleft
)
317 while (*inbytesleft
>= 2 && *outbytesleft
>= 1) {
320 if ((*inbuf
)[1] == 0 &&
321 ((*inbuf
)[0] & 0x80) == 0 &&
322 (*inbuf
)[0] != '@') {
323 (*outbuf
)[0] = (*inbuf
)[0];
325 (*outbytesleft
) -= 1;
330 if (*outbytesleft
< 5) {
334 snprintf(buf
, 6, "@%04x", SVAL(*inbuf
, 0));
335 memcpy(*outbuf
, buf
, 5);
337 (*outbytesleft
) -= 5;
342 if (*inbytesleft
== 1) {
347 if (*inbytesleft
> 1) {
356 /* the "weird" character set is very useful for testing multi-byte
357 support and finding bugs. Don't use on a production system!
369 static size_t weird_pull(void *cd
, char **inbuf
, size_t *inbytesleft
,
370 char **outbuf
, size_t *outbytesleft
)
372 while (*inbytesleft
>= 1 && *outbytesleft
>= 2) {
375 for (i
=0;weird_table
[i
].from
;i
++) {
376 if (strncmp((*inbuf
),
378 weird_table
[i
].len
) == 0) {
379 if (*inbytesleft
< weird_table
[i
].len
) {
380 DEBUG(0,("ERROR: truncated weird string\n"));
381 /* smb_panic("weird_pull"); */
384 (*outbuf
)[0] = weird_table
[i
].from
;
386 (*inbytesleft
) -= weird_table
[i
].len
;
387 (*outbytesleft
) -= 2;
388 (*inbuf
) += weird_table
[i
].len
;
396 (*outbuf
)[0] = (*inbuf
)[0];
399 (*outbytesleft
) -= 2;
404 if (*inbytesleft
> 0) {
412 static size_t weird_push(void *cd
, char **inbuf
, size_t *inbytesleft
,
413 char **outbuf
, size_t *outbytesleft
)
417 while (*inbytesleft
>= 2 && *outbytesleft
>= 1) {
420 for (i
=0;weird_table
[i
].from
;i
++) {
421 if ((*inbuf
)[0] == weird_table
[i
].from
&&
423 if (*outbytesleft
< weird_table
[i
].len
) {
424 DEBUG(0,("No room for weird character\n"));
425 /* smb_panic("weird_push"); */
427 memcpy(*outbuf
, weird_table
[i
].to
,
430 (*outbytesleft
) -= weird_table
[i
].len
;
432 (*outbuf
) += weird_table
[i
].len
;
440 (*outbuf
)[0] = (*inbuf
)[0];
441 if ((*inbuf
)[1]) ir_count
++;
443 (*outbytesleft
) -= 1;
448 if (*inbytesleft
== 1) {
453 if (*inbytesleft
> 1) {
461 static size_t iconv_copy(void *cd
, char **inbuf
, size_t *inbytesleft
,
462 char **outbuf
, size_t *outbytesleft
)
466 n
= MIN(*inbytesleft
, *outbytesleft
);
468 memmove(*outbuf
, *inbuf
, n
);
471 (*outbytesleft
) -= n
;
475 if (*inbytesleft
> 0) {
483 static size_t utf8_pull(void *cd
, char **inbuf
, size_t *inbytesleft
,
484 char **outbuf
, size_t *outbytesleft
)
486 while (*inbytesleft
>= 1 && *outbytesleft
>= 2) {
487 unsigned char *c
= (unsigned char *)*inbuf
;
488 unsigned char *uc
= (unsigned char *)*outbuf
;
491 if ((c
[0] & 0x80) == 0) {
494 } else if ((c
[0] & 0xf0) == 0xe0) {
495 if (*inbytesleft
< 3) {
496 DEBUG(0,("short utf8 char\n"));
499 uc
[1] = ((c
[0]&0xF)<<4) | ((c
[1]>>2)&0xF);
500 uc
[0] = (c
[1]<<6) | (c
[2]&0x3f);
502 } else if ((c
[0] & 0xe0) == 0xc0) {
503 if (*inbytesleft
< 2) {
504 DEBUG(0,("short utf8 char\n"));
507 uc
[1] = (c
[0]>>2) & 0x7;
508 uc
[0] = (c
[0]<<6) | (c
[1]&0x3f);
513 (*inbytesleft
) -= len
;
514 (*outbytesleft
) -= 2;
518 if (*inbytesleft
> 0) {
530 static size_t utf8_push(void *cd
, char **inbuf
, size_t *inbytesleft
,
531 char **outbuf
, size_t *outbytesleft
)
533 while (*inbytesleft
>= 2 && *outbytesleft
>= 1) {
534 unsigned char *c
= (unsigned char *)*outbuf
;
535 unsigned char *uc
= (unsigned char *)*inbuf
;
539 if (*outbytesleft
< 3) {
540 DEBUG(0,("short utf8 write\n"));
543 c
[0] = 0xe0 | (uc
[1]>>4);
544 c
[1] = 0x80 | ((uc
[1]&0xF)<<2) | (uc
[0]>>6);
545 c
[2] = 0x80 | (uc
[0]&0x3f);
547 } else if (uc
[1] | (uc
[0] & 0x80)) {
548 if (*outbytesleft
< 2) {
549 DEBUG(0,("short utf8 write\n"));
552 c
[0] = 0xc0 | (uc
[1]<<2) | (uc
[0]>>6);
553 c
[1] = 0x80 | (uc
[0]&0x3f);
561 (*outbytesleft
) -= len
;
566 if (*inbytesleft
== 1) {
571 if (*inbytesleft
> 1) {