4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 * Unicode conversions (yet more)
38 #include <sys/u8_textprep.h>
40 #include <netsmb/smb_lib.h>
45 * Number of unicode symbols in the string,
46 * not including the 2-byte null terminator.
47 * (multiply by two for storage size)
50 unicode_strlen(const uint16_t *us
)
58 static char *convert_ucs2xx_to_utf8(iconv_t
, const uint16_t *);
61 * Convert (native) Unicode string to UTF-8.
62 * Returns allocated memory.
65 convert_unicode_to_utf8(uint16_t *us
)
67 static iconv_t cd1
= (iconv_t
)-1;
69 /* Get conversion descriptor (to, from) */
70 if (cd1
== (iconv_t
)-1)
71 cd1
= iconv_open("UTF-8", "UCS-2");
73 return (convert_ucs2xx_to_utf8(cd1
, us
));
77 * Convert little-endian Unicode string to UTF-8.
78 * Returns allocated memory.
81 convert_leunicode_to_utf8(unsigned short *us
)
83 static iconv_t cd2
= (iconv_t
)-1;
85 /* Get conversion descriptor (to, from) */
86 if (cd2
== (iconv_t
)-1)
87 cd2
= iconv_open("UTF-8", "UCS-2LE");
89 return (convert_ucs2xx_to_utf8(cd2
, us
));
93 convert_ucs2xx_to_utf8(iconv_t cd
, const uint16_t *us
)
97 size_t ileft
, obsize
, oleft
, ret
;
99 if (cd
== (iconv_t
)-1) {
100 smb_error(dgettext(TEXT_DOMAIN
,
101 "iconv_open(UTF-8/UCS-2)"), -1);
105 iptr
= (const char *)us
;
106 ileft
= unicode_strlen(us
);
107 ileft
*= 2; /* now bytes */
109 /* Worst-case output size is 2x input size. */
111 obsize
= oleft
+ 2; /* room for null */
112 obuf
= malloc(obsize
);
117 ret
= iconv(cd
, &iptr
, &ileft
, &optr
, &oleft
);
119 if (ret
== (size_t)-1) {
120 smb_error(dgettext(TEXT_DOMAIN
,
121 "iconv(%s) failed"), errno
, obuf
);
124 smb_error(dgettext(TEXT_DOMAIN
,
125 "iconv(%s) failed"), -1, obuf
);
127 * XXX: What's better? return NULL?
128 * The truncated string? << for now
135 static uint16_t *convert_utf8_to_ucs2xx(iconv_t
, const char *);
138 * Convert UTF-8 string to Unicode.
139 * Returns allocated memory.
142 convert_utf8_to_unicode(const char *utf8_string
)
144 static iconv_t cd3
= (iconv_t
)-1;
146 /* Get conversion descriptor (to, from) */
147 if (cd3
== (iconv_t
)-1)
148 cd3
= iconv_open("UCS-2", "UTF-8");
149 return (convert_utf8_to_ucs2xx(cd3
, utf8_string
));
153 * Convert UTF-8 string to little-endian Unicode.
154 * Returns allocated memory.
157 convert_utf8_to_leunicode(const char *utf8_string
)
159 static iconv_t cd4
= (iconv_t
)-1;
161 /* Get conversion descriptor (to, from) */
162 if (cd4
== (iconv_t
)-1)
163 cd4
= iconv_open("UCS-2LE", "UTF-8");
164 return (convert_utf8_to_ucs2xx(cd4
, utf8_string
));
168 convert_utf8_to_ucs2xx(iconv_t cd
, const char *utf8_string
)
170 uint16_t *obuf
, *optr
;
172 size_t ileft
, obsize
, oleft
, ret
;
174 if (cd
== (iconv_t
)-1) {
175 smb_error(dgettext(TEXT_DOMAIN
,
176 "iconv_open(UCS-2/UTF-8)"), -1);
181 ileft
= strlen(iptr
);
183 /* Worst-case output size is 2x input size. */
185 obsize
= oleft
+ 2; /* room for null */
186 obuf
= malloc(obsize
);
191 ret
= iconv(cd
, &iptr
, &ileft
, (char **)&optr
, &oleft
);
193 if (ret
== (size_t)-1) {
194 smb_error(dgettext(TEXT_DOMAIN
,
195 "iconv(%s) failed"), errno
, utf8_string
);
198 smb_error(dgettext(TEXT_DOMAIN
,
199 "iconv(%s) failed"), -1, utf8_string
);
201 * XXX: What's better? return NULL?
202 * The truncated string? << for now
211 * A simple wrapper around u8_textprep_str() that returns the Unicode
212 * upper-case version of some string. Returns memory from malloc.
213 * Borrowed from idmapd.
216 utf8_str_to_upper_or_lower(const char *s
, int upper_lower
)
220 size_t inlen
, outlen
, inbleft
, outbleft
;
224 * u8_textprep_str() does not allocate memory. The input and
225 * output buffers may differ in size (though that would be more
226 * likely when normalization is done). We have to loop over it...
228 * To improve the chances that we can avoid looping we add 10
229 * bytes of output buffer room the first go around.
231 inlen
= inbleft
= strlen(s
);
232 outlen
= outbleft
= inlen
+ 10;
233 if ((res
= malloc(outlen
)) == NULL
)
237 while ((rc
= u8_textprep_str((char *)s
, &inbleft
, outs
,
238 &outbleft
, upper_lower
, U8_UNICODE_LATEST
, &err
)) < 0 &&
240 if ((res
= realloc(res
, outlen
+ inbleft
)) == NULL
)
242 /* adjust input/output buffer pointers */
243 s
+= (inlen
- inbleft
);
244 outs
= res
+ outlen
- outbleft
;
245 /* adjust outbleft and outlen */
256 res
[outlen
- outbleft
] = '\0';
262 utf8_str_toupper(const char *s
)
264 return (utf8_str_to_upper_or_lower(s
, U8_TEXTPREP_TOUPPER
));
268 utf8_str_tolower(const char *s
)
270 return (utf8_str_to_upper_or_lower(s
, U8_TEXTPREP_TOLOWER
));