3967 iconv() blows up when passed (iconv_t)-1
[illumos-gate.git] / usr / src / lib / libc / port / gen / iconv.c
blob622e26da315a62b1c60131a2d9fab09de9e6392f
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #include "lint.h"
28 #include <sys/types.h>
29 #include <sys/stat.h>
30 #include <sys/mman.h>
31 #include <stdlib.h>
32 #include <stdio.h>
33 #include <dlfcn.h>
34 #include <fcntl.h>
35 #include <unistd.h>
36 #include <string.h>
37 #include <errno.h>
38 #include <sys/param.h>
39 #include <alloca.h>
40 #include "iconv.h"
41 #include "iconvP.h"
42 #include "../i18n/_loc_path.h"
44 static iconv_p iconv_open_all(const char *, const char *, char *);
45 static iconv_p iconv_open_private(const char *, const char *);
46 static iconv_p iconv_search_alias(const char *, const char *, char *);
47 static size_t passthru_icv_iconv(iconv_t, const char **, size_t *, char **,
48 size_t *);
49 static void passthru_icv_close(iconv_t);
51 #define PASSTHRU_MAGIC_NUMBER (0x53756e)
55 * These functions are mainly implemented by using a shared object and
56 * the dlopen() functions. The actual conversion algorithm for a particular
57 * conversion is implemented via a shared object as a loadable conversion
58 * module which is linked dynamically at run time.
60 * The loadable conversion module resides as either:
62 * /usr/lib/iconv/geniconvtbl.so
64 * if the conversion is supported through a geniconvtbl code conversion
65 * binary table or as a module that directly specifies the conversion at:
67 * /usr/lib/iconv/fromcode%tocode.so
69 * where fromcode is the source encoding and tocode is the target encoding.
70 * The modules have 3 entries: _icv_open(), _icv_iconv(), and _icv_close().
72 * If there is no code conversion supported and if the fromcode and the tocode
73 * are specifying the same codeset, then, the byte-by-byte, pass-through code
74 * conversion that is embedded in the libc is used instead.
76 * The following are the related PSARC cases:
78 * PSARC/1993/153 iconv/iconv_open/iconv_close
79 * PSARC/1999/292 Addition of geniconvtbl(1)
80 * PSARC/2001/072 GNU gettext support
81 * PSARC/2009/561 Pass-through iconv code conversion
83 * The PSARC/2001/072 includes the /usr/lib/iconv/alias interface.
86 iconv_t
87 iconv_open(const char *tocode, const char *fromcode)
89 iconv_t cd;
90 char *ipath;
92 if ((cd = malloc(sizeof (struct _iconv_info))) == NULL)
93 return ((iconv_t)-1);
96 * Memory for ipath is allocated/released in this function.
98 ipath = malloc(MAXPATHLEN);
99 if (ipath == NULL) {
100 free(cd);
101 return ((iconv_t)-1);
104 cd->_conv = iconv_open_all(tocode, fromcode, ipath);
105 if (cd->_conv != (iconv_p)-1) {
106 /* found a valid module for this conversion */
107 free(ipath);
108 return (cd);
112 * Now, try using the encoding name aliasing table
114 cd->_conv = iconv_search_alias(tocode, fromcode, ipath);
115 free(ipath);
116 if (cd->_conv == (iconv_p)-1) {
118 * As the last resort, check if the tocode and the fromcode
119 * are referring to the same codeset name or not. If so,
120 * assign the embedded pass-through code conversion.
122 if (strcasecmp(tocode, fromcode) != 0) {
124 * No valid conversion available. Do failure retrun
125 * with the errno set by iconv_search_alias().
127 free(cd);
128 return ((iconv_t)-1);
132 * For a pass-through byte-by-byte code conversion, allocate
133 * an internal conversion descriptor and initialize the data
134 * fields appropriately and we are done.
136 cd->_conv = malloc(sizeof (struct _iconv_fields));
137 if (cd->_conv == NULL) {
138 free(cd);
139 return ((iconv_t)-1);
142 cd->_conv->_icv_handle = NULL;
143 cd->_conv->_icv_iconv = passthru_icv_iconv;
144 cd->_conv->_icv_close = passthru_icv_close;
145 cd->_conv->_icv_state = (void *)PASSTHRU_MAGIC_NUMBER;
148 /* found a valid module for this conversion */
149 return (cd);
152 static size_t
153 search_alias(char **paddr, size_t size, const char *variant)
155 char *addr = *paddr;
156 char *p, *sp, *q;
157 size_t var_len, can_len;
159 var_len = strlen(variant);
160 p = addr;
161 q = addr + size;
162 while (q > p) {
163 if (*p == '#') {
165 * Line beginning with '#' is a comment
167 p++;
168 while ((q > p) && (*p++ != '\n'))
170 continue;
172 /* skip leading spaces */
173 while ((q > p) &&
174 ((*p == ' ') || (*p == '\t')))
175 p++;
176 if (q <= p)
177 break;
178 sp = p;
179 while ((q > p) && (*p != ' ') &&
180 (*p != '\t') && (*p != '\n'))
181 p++;
182 if (q <= p) {
183 /* invalid entry */
184 break;
186 if (*p == '\n') {
187 /* invalid entry */
188 p++;
189 continue;
192 if (((p - sp) != var_len) ||
193 ((strncmp(sp, variant, var_len) != 0) &&
194 (strncasecmp(sp, variant, var_len) != 0))) {
196 * didn't match
199 /* skip remaining chars in this line */
200 p++;
201 while ((q > p) && (*p++ != '\n'))
203 continue;
206 /* matching entry found */
208 /* skip spaces */
209 while ((q > p) &&
210 ((*p == ' ') || (*p == '\t')))
211 p++;
212 if (q <= p)
213 break;
214 sp = p;
215 while ((q > p) && (*p != ' ') &&
216 (*p != '\t') && (*p != '\n'))
217 p++;
218 can_len = p - sp;
219 if (can_len == 0) {
220 while ((q > p) && (*p++ != '\n'))
222 continue;
224 *paddr = sp;
225 return (can_len);
226 /* NOTREACHED */
228 return (0);
231 static iconv_p
232 iconv_open_all(const char *to, const char *from, char *ipath)
234 iconv_p cv;
235 int len;
238 * First, try using the geniconvtbl conversion, which is
239 * performed by /usr/lib/iconv/geniconvtbl.so with
240 * the conversion table file:
241 * /usr/lib/iconv/geniconvtbl/binarytables/fromcode%tocode.bt
243 * If the geniconvtbl conversion cannot be done,
244 * try the conversion by the individual shared object.
247 len = snprintf(ipath, MAXPATHLEN, _GENICONVTBL_PATH, from, to);
248 if ((len <= MAXPATHLEN) && (access(ipath, R_OK) == 0)) {
250 * from%to.bt exists in the table dir
252 cv = iconv_open_private(_GENICONVTBL_INT_PATH, ipath);
253 if (cv != (iconv_p)-1) {
254 /* found a valid module for this conversion */
255 return (cv);
259 /* Next, try /usr/lib/iconv/from%to.so */
260 len = snprintf(ipath, MAXPATHLEN, _ICONV_PATH, from, to);
261 if ((len <= MAXPATHLEN) && (access(ipath, R_OK) == 0)) {
263 * /usr/lib/iconv/from%to.so exists
264 * errno will be set by iconv_open_private on error
266 return (iconv_open_private(ipath, NULL));
268 /* no valid module for this conversion found */
269 errno = EINVAL;
270 return ((iconv_p)-1);
273 static iconv_p
274 iconv_search_alias(const char *tocode, const char *fromcode, char *ipath)
276 char *p;
277 char *to_canonical, *from_canonical;
278 size_t tolen, fromlen;
279 iconv_p cv;
280 int fd;
281 struct stat64 statbuf;
282 caddr_t addr;
283 size_t buflen;
285 fd = open(_ENCODING_ALIAS_PATH, O_RDONLY);
286 if (fd == -1) {
288 * if no alias file found,
289 * errno will be set to EINVAL.
291 errno = EINVAL;
292 return ((iconv_p)-1);
294 if (fstat64(fd, &statbuf) == -1) {
295 (void) close(fd);
296 /* use errno set by fstat64 */
297 return ((iconv_p)-1);
299 buflen = (size_t)statbuf.st_size;
300 addr = mmap(NULL, buflen, PROT_READ, MAP_SHARED, fd, 0);
301 (void) close(fd);
302 if (addr == MAP_FAILED) {
303 /* use errno set by mmap */
304 return ((iconv_p)-1);
306 p = (char *)addr;
307 tolen = search_alias(&p, buflen, tocode);
308 if (tolen) {
309 to_canonical = alloca(tolen + 1);
310 (void) memcpy(to_canonical, p, tolen);
311 to_canonical[tolen] = '\0';
312 } else {
313 to_canonical = (char *)tocode;
315 p = (char *)addr;
316 fromlen = search_alias(&p, buflen, fromcode);
317 if (fromlen) {
318 from_canonical = alloca(fromlen + 1);
319 (void) memcpy(from_canonical, p, fromlen);
320 from_canonical[fromlen] = '\0';
321 } else {
322 from_canonical = (char *)fromcode;
324 (void) munmap(addr, buflen);
325 if (tolen == 0 && fromlen == 0) {
326 errno = EINVAL;
327 return ((iconv_p)-1);
330 cv = iconv_open_all(to_canonical, from_canonical, ipath);
332 /* errno set by iconv_open_all on error */
333 return (cv);
336 static iconv_p
337 iconv_open_private(const char *lib, const char *tbl)
339 iconv_t (*fptr)(const char *);
340 iconv_p cdpath;
342 if ((cdpath = malloc(sizeof (struct _iconv_fields))) == NULL)
343 return ((iconv_p)-1);
345 if ((cdpath->_icv_handle = dlopen(lib, RTLD_LAZY)) == 0) {
346 free(cdpath);
347 /* dlopen does not define error no */
348 errno = EINVAL;
349 return ((iconv_p)-1);
352 /* gets address of _icv_open */
353 if ((fptr = (iconv_t(*)(const char *))dlsym(cdpath->_icv_handle,
354 "_icv_open")) == NULL) {
355 (void) dlclose(cdpath->_icv_handle);
356 free(cdpath);
357 /* dlsym does not define errno */
358 errno = EINVAL;
359 return ((iconv_p)-1);
363 * gets address of _icv_iconv in the loadable conversion module
364 * and stores it in cdpath->_icv_iconv
367 if ((cdpath->_icv_iconv = (size_t(*)(iconv_t, const char **,
368 size_t *, char **, size_t *))dlsym(cdpath->_icv_handle,
369 "_icv_iconv")) == NULL) {
370 (void) dlclose(cdpath->_icv_handle);
371 free(cdpath);
372 /* dlsym does not define errno */
373 errno = EINVAL;
374 return ((iconv_p)-1);
378 * gets address of _icv_close in the loadable conversion module
379 * and stores it in cd->_icv_close
381 if ((cdpath->_icv_close = (void(*)(iconv_t))dlsym(cdpath->_icv_handle,
382 "_icv_close")) == NULL) {
383 (void) dlclose(cdpath->_icv_handle);
384 free(cdpath);
385 /* dlsym does not define errno */
386 errno = EINVAL;
387 return ((iconv_p)-1);
391 * initialize the state of the actual _icv_iconv conversion routine
392 * For the normal iconv module, NULL will be passed as an argument
393 * although the iconv_open() of the module won't use that.
395 cdpath->_icv_state = (void *)(*fptr)(tbl);
397 if (cdpath->_icv_state == (struct _icv_state *)-1) {
398 (void) dlclose(cdpath->_icv_handle);
399 free(cdpath);
400 /* this module does not satisfy this conversion */
401 errno = EINVAL;
402 return ((iconv_p)-1);
405 return (cdpath);
409 iconv_close(iconv_t cd)
411 if (cd == NULL) {
412 errno = EBADF;
413 return (-1);
415 (*(cd->_conv)->_icv_close)(cd->_conv->_icv_state);
416 if (cd->_conv->_icv_handle != NULL)
417 (void) dlclose(cd->_conv->_icv_handle);
418 free(cd->_conv);
419 free(cd);
420 return (0);
424 * To have minimal performance impact to the existing run-time behavior,
425 * we supply a dummy passthru_icv_close() that will just return.
427 static void
428 /*LINTED E_FUNC_ARG_UNUSED*/
429 passthru_icv_close(iconv_t cd)
433 size_t
434 iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft,
435 char **outbuf, size_t *outbytesleft)
437 /* check if cd is valid */
438 if (cd == NULL || cd == (iconv_t)-1) {
439 errno = EBADF;
440 return ((size_t)-1);
443 /* direct conversion */
444 return ((*(cd->_conv)->_icv_iconv)(cd->_conv->_icv_state,
445 inbuf, inbytesleft, outbuf, outbytesleft));
448 static size_t
449 passthru_icv_iconv(iconv_t cd, const char **inbuf, size_t *inbufleft,
450 char **outbuf, size_t *outbufleft)
452 size_t ibl;
453 size_t obl;
454 size_t len;
455 size_t ret_val;
457 /* Check if the conversion descriptor is a valid one. */
458 if (cd != (iconv_t)PASSTHRU_MAGIC_NUMBER) {
459 errno = EBADF;
460 return ((size_t)-1);
463 /* For any state reset request, return success. */
464 if (inbuf == NULL || *inbuf == NULL)
465 return (0);
468 * Initialize internally used variables for a better performance
469 * and prepare for a couple of the return values before the actual
470 * copying of the bytes.
472 ibl = *inbufleft;
473 obl = *outbufleft;
475 if (ibl > obl) {
476 len = obl;
477 errno = E2BIG;
478 ret_val = (size_t)-1;
479 } else {
480 len = ibl;
481 ret_val = 0;
485 * Do the copy using memmove(). There are no EILSEQ or EINVAL
486 * checkings since this is a simple copying.
488 (void) memmove((void *)*outbuf, (const void *)*inbuf, len);
490 /* Update the return values related to the buffers then do return. */
491 *inbuf = *inbuf + len;
492 *outbuf = *outbuf + len;
493 *inbufleft = ibl - len;
494 *outbufleft = obl - len;
496 return (ret_val);