Sync Citrus iconv support with NetBSD.
[dragonfly.git] / lib / libc / citrus / modules / citrus_iconv_std.c
blob43e6c0492f43b0dd8f25254d51f7ee5f3867645f
1 /* $NetBSD: citrus_iconv_std.c,v 1.15 2006/11/13 19:08:19 tnozaki Exp $ */
2 /* $DragonFly: src/lib/libc/citrus/modules/citrus_iconv_std.c,v 1.2 2008/04/10 10:21:01 hasso Exp $ */
4 /*-
5 * Copyright (c)2003 Citrus Project,
6 * All rights reserved.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
30 #include <sys/types.h>
31 #include <sys/endian.h>
32 #include <sys/queue.h>
33 #include <assert.h>
34 #include <errno.h>
35 #include <limits.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
40 #include "citrus_namespace.h"
41 #include "citrus_types.h"
42 #include "citrus_module.h"
43 #include "citrus_region.h"
44 #include "citrus_mmap.h"
45 #include "citrus_hash.h"
46 #include "citrus_iconv.h"
47 #include "citrus_stdenc.h"
48 #include "citrus_mapper.h"
49 #include "citrus_csmapper.h"
50 #include "citrus_memstream.h"
51 #include "citrus_iconv_std.h"
52 #include "citrus_esdb.h"
54 /* ---------------------------------------------------------------------- */
56 _CITRUS_ICONV_DECLS(iconv_std);
57 _CITRUS_ICONV_DEF_OPS(iconv_std);
60 /* ---------------------------------------------------------------------- */
62 int
63 _citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops *ops, size_t lenops,
64 u_int32_t expected_version)
66 if (expected_version<_CITRUS_ICONV_ABI_VERSION || lenops<sizeof(*ops))
67 return (EINVAL);
69 memcpy(ops, &_citrus_iconv_std_iconv_ops,
70 sizeof(_citrus_iconv_std_iconv_ops));
72 return (0);
75 /* ---------------------------------------------------------------------- */
78 * convenience routines for stdenc.
80 static __inline void
81 save_encoding_state(struct _citrus_iconv_std_encoding *se)
83 if (se->se_ps)
84 memcpy(se->se_pssaved, se->se_ps,
85 _stdenc_get_state_size(se->se_handle));
88 static __inline void
89 restore_encoding_state(struct _citrus_iconv_std_encoding *se)
91 if (se->se_ps)
92 memcpy(se->se_ps, se->se_pssaved,
93 _stdenc_get_state_size(se->se_handle));
96 static __inline void
97 init_encoding_state(struct _citrus_iconv_std_encoding *se)
99 if (se->se_ps)
100 _stdenc_init_state(se->se_handle, se->se_ps);
103 static __inline int
104 mbtocsx(struct _citrus_iconv_std_encoding *se,
105 _csid_t *csid, _index_t *idx, const char **s, size_t n,
106 size_t *nresult)
108 return _stdenc_mbtocs(se->se_handle, csid, idx, s, n, se->se_ps,
109 nresult);
112 static __inline int
113 cstombx(struct _citrus_iconv_std_encoding *se,
114 char *s, size_t n, _csid_t csid, _index_t idx, size_t *nresult)
116 return _stdenc_cstomb(se->se_handle, s, n, csid, idx, se->se_ps,
117 nresult);
120 static __inline int
121 wctombx(struct _citrus_iconv_std_encoding *se,
122 char *s, size_t n, _wc_t wc, size_t *nresult)
124 return _stdenc_wctomb(se->se_handle, s, n, wc, se->se_ps, nresult);
127 static __inline int
128 put_state_resetx(struct _citrus_iconv_std_encoding *se,
129 char *s, size_t n, size_t *nresult)
131 return _stdenc_put_state_reset(se->se_handle, s, n, se->se_ps, nresult);
134 static __inline int
135 get_state_desc_gen(struct _citrus_iconv_std_encoding *se, int *rstate)
137 int ret;
138 struct _stdenc_state_desc ssd;
140 ret = _stdenc_get_state_desc(se->se_handle, se->se_ps,
141 _STDENC_SDID_GENERIC, &ssd);
142 if (!ret)
143 *rstate = ssd.u.generic.state;
145 return ret;
149 * init encoding context
151 static int
152 init_encoding(struct _citrus_iconv_std_encoding *se, struct _stdenc *cs,
153 void *ps1, void *ps2)
155 int ret = -1;
157 se->se_handle = cs;
158 se->se_ps = ps1;
159 se->se_pssaved = ps2;
161 if (se->se_ps)
162 ret = _stdenc_init_state(cs, se->se_ps);
163 if (!ret && se->se_pssaved)
164 ret = _stdenc_init_state(cs, se->se_pssaved);
166 return ret;
169 static int
170 open_csmapper(struct _csmapper **rcm, const char *src, const char *dst,
171 unsigned long *rnorm)
173 int ret;
174 struct _csmapper *cm;
176 ret = _csmapper_open(&cm, src, dst, 0, rnorm);
177 if (ret)
178 return ret;
179 if (_csmapper_get_src_max(cm) != 1 || _csmapper_get_dst_max(cm) != 1 ||
180 _csmapper_get_state_size(cm) != 0) {
181 _csmapper_close(cm);
182 return EINVAL;
185 *rcm = cm;
187 return 0;
190 static void
191 close_dsts(struct _citrus_iconv_std_dst_list *dl)
193 struct _citrus_iconv_std_dst *sd;
195 while ((sd=TAILQ_FIRST(dl)) != NULL) {
196 TAILQ_REMOVE(dl, sd, sd_entry);
197 _csmapper_close(sd->sd_mapper);
198 free(sd);
202 static int
203 open_dsts(struct _citrus_iconv_std_dst_list *dl,
204 const struct _esdb_charset *ec, const struct _esdb *dbdst)
206 int i, ret;
207 struct _citrus_iconv_std_dst *sd, *sdtmp;
208 unsigned long norm;
210 sd = malloc(sizeof(*sd));
211 if (sd == NULL)
212 return errno;
214 for (i=0; i<dbdst->db_num_charsets; i++) {
215 ret = open_csmapper(&sd->sd_mapper, ec->ec_csname,
216 dbdst->db_charsets[i].ec_csname, &norm);
217 if (ret == 0) {
218 sd->sd_csid = dbdst->db_charsets[i].ec_csid;
219 sd->sd_norm = norm;
220 /* insert this mapper by sorted order. */
221 TAILQ_FOREACH(sdtmp, dl, sd_entry) {
222 if (sdtmp->sd_norm > norm) {
223 TAILQ_INSERT_BEFORE(sdtmp, sd,
224 sd_entry);
225 sd = NULL;
226 break;
229 if (sd)
230 TAILQ_INSERT_TAIL(dl, sd, sd_entry);
231 sd = malloc(sizeof(*sd));
232 if (sd == NULL) {
233 ret = errno;
234 close_dsts(dl);
235 return ret;
237 } else if (ret != ENOENT) {
238 close_dsts(dl);
239 free(sd);
240 return ret;
243 free(sd);
244 return 0;
247 static void
248 close_srcs(struct _citrus_iconv_std_src_list *sl)
250 struct _citrus_iconv_std_src *ss;
252 while ((ss=TAILQ_FIRST(sl)) != NULL) {
253 TAILQ_REMOVE(sl, ss, ss_entry);
254 close_dsts(&ss->ss_dsts);
255 free(ss);
259 static int
260 open_srcs(struct _citrus_iconv_std_src_list *sl,
261 const struct _esdb *dbsrc, const struct _esdb *dbdst)
263 int i, ret, count = 0;
264 struct _citrus_iconv_std_src *ss;
266 ss = malloc(sizeof(*ss));
267 if (ss == NULL)
268 return errno;
270 TAILQ_INIT(&ss->ss_dsts);
272 for (i=0; i<dbsrc->db_num_charsets; i++) {
273 ret = open_dsts(&ss->ss_dsts, &dbsrc->db_charsets[i], dbdst);
274 if (ret)
275 goto err;
276 if (!TAILQ_EMPTY(&ss->ss_dsts)) {
277 ss->ss_csid = dbsrc->db_charsets[i].ec_csid;
278 TAILQ_INSERT_TAIL(sl, ss, ss_entry);
279 ss = malloc(sizeof(*ss));
280 if (ss == NULL) {
281 ret = errno;
282 goto err;
284 count++;
285 TAILQ_INIT(&ss->ss_dsts);
288 free(ss);
290 return count ? 0 : ENOENT;
292 err:
293 free(ss);
294 close_srcs(sl);
295 return ret;
298 /* do convert a character */
299 #define E_NO_CORRESPONDING_CHAR ENOENT /* XXX */
300 static int
301 /*ARGSUSED*/
302 do_conv(const struct _citrus_iconv_std_shared *is,
303 struct _citrus_iconv_std_context *sc, _csid_t *csid, _index_t *idx)
305 _index_t tmpidx;
306 int ret;
307 struct _citrus_iconv_std_src *ss;
308 struct _citrus_iconv_std_dst *sd;
310 TAILQ_FOREACH(ss, &is->is_srcs, ss_entry) {
311 if (ss->ss_csid == *csid) {
312 TAILQ_FOREACH(sd, &ss->ss_dsts, sd_entry) {
313 ret = _csmapper_convert(sd->sd_mapper,
314 &tmpidx, *idx, NULL);
315 switch (ret) {
316 case _MAPPER_CONVERT_SUCCESS:
317 *csid = sd->sd_csid;
318 *idx = tmpidx;
319 return 0;
320 case _MAPPER_CONVERT_NONIDENTICAL:
321 break;
322 case _MAPPER_CONVERT_SRC_MORE:
323 /*FALLTHROUGH*/
324 case _MAPPER_CONVERT_DST_MORE:
325 /*FALLTHROUGH*/
326 case _MAPPER_CONVERT_FATAL:
327 return EINVAL;
328 case _MAPPER_CONVERT_ILSEQ:
329 return EILSEQ;
332 break;
336 return E_NO_CORRESPONDING_CHAR;
338 /* ---------------------------------------------------------------------- */
340 static int
341 /*ARGSUSED*/
342 _citrus_iconv_std_iconv_init_shared(struct _citrus_iconv_shared *ci,
343 const char * __restrict curdir,
344 const char * __restrict src,
345 const char * __restrict dst,
346 const void * __restrict var, size_t lenvar)
348 int ret;
349 struct _citrus_iconv_std_shared *is;
350 struct _citrus_esdb esdbsrc, esdbdst;
352 is = malloc(sizeof(*is));
353 if (is==NULL) {
354 ret = errno;
355 goto err0;
357 ret = _citrus_esdb_open(&esdbsrc, src);
358 if (ret)
359 goto err1;
360 ret = _citrus_esdb_open(&esdbdst, dst);
361 if (ret)
362 goto err2;
363 ret = _stdenc_open(&is->is_src_encoding, esdbsrc.db_encname,
364 esdbsrc.db_variable, esdbsrc.db_len_variable);
365 if (ret)
366 goto err3;
367 ret = _stdenc_open(&is->is_dst_encoding, esdbdst.db_encname,
368 esdbdst.db_variable, esdbdst.db_len_variable);
369 if (ret)
370 goto err4;
371 is->is_use_invalid = esdbdst.db_use_invalid;
372 is->is_invalid = esdbdst.db_invalid;
374 TAILQ_INIT(&is->is_srcs);
375 ret = open_srcs(&is->is_srcs, &esdbsrc, &esdbdst);
376 if (ret)
377 goto err5;
379 _esdb_close(&esdbsrc);
380 _esdb_close(&esdbdst);
381 ci->ci_closure = is;
383 return 0;
385 err5:
386 _stdenc_close(is->is_dst_encoding);
387 err4:
388 _stdenc_close(is->is_src_encoding);
389 err3:
390 _esdb_close(&esdbdst);
391 err2:
392 _esdb_close(&esdbsrc);
393 err1:
394 free(is);
395 err0:
396 return ret;
399 static void
400 _citrus_iconv_std_iconv_uninit_shared(struct _citrus_iconv_shared *ci)
402 struct _citrus_iconv_std_shared *is = ci->ci_closure;
404 if (is == NULL)
405 return;
407 _stdenc_close(is->is_src_encoding);
408 _stdenc_close(is->is_dst_encoding);
409 close_srcs(&is->is_srcs);
410 free(is);
413 static int
414 _citrus_iconv_std_iconv_init_context(struct _citrus_iconv *cv)
416 const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
417 struct _citrus_iconv_std_context *sc;
418 int ret;
419 size_t szpssrc, szpsdst, sz;
420 char *ptr;
422 szpssrc = _stdenc_get_state_size(is->is_src_encoding);
423 szpsdst = _stdenc_get_state_size(is->is_dst_encoding);
425 sz = (szpssrc + szpsdst)*2 + sizeof(struct _citrus_iconv_std_context);
426 sc = malloc(sz);
427 if (sc == NULL)
428 return errno;
430 ptr = (char *)&sc[1];
431 if (szpssrc)
432 init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
433 ptr, ptr+szpssrc);
434 else
435 init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
436 NULL, NULL);
437 ptr += szpssrc*2;
438 if (szpsdst)
439 init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
440 ptr, ptr+szpsdst);
441 else
442 init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
443 NULL, NULL);
445 cv->cv_closure = (void *)sc;
447 return 0;
450 static void
451 _citrus_iconv_std_iconv_uninit_context(struct _citrus_iconv *cv)
453 free(cv->cv_closure);
456 static int
457 _citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv,
458 const char * __restrict * __restrict in,
459 size_t * __restrict inbytes,
460 char * __restrict * __restrict out,
461 size_t * __restrict outbytes, u_int32_t flags,
462 size_t * __restrict invalids)
464 const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
465 struct _citrus_iconv_std_context *sc = cv->cv_closure;
466 _index_t idx;
467 _csid_t csid;
468 int ret, state;
469 size_t szrin, szrout;
470 size_t inval;
471 const char *tmpin;
473 inval = 0;
474 if (in==NULL || *in==NULL) {
475 /* special cases */
476 if (out!=NULL && *out!=NULL) {
477 /* init output state and store the shift sequence */
478 save_encoding_state(&sc->sc_src_encoding);
479 save_encoding_state(&sc->sc_dst_encoding);
480 szrout = 0;
482 ret = put_state_resetx(&sc->sc_dst_encoding,
483 *out, *outbytes,
484 &szrout);
485 if (ret)
486 goto err;
488 if (szrout == (size_t)-2) {
489 /* too small to store the character */
490 ret = EINVAL;
491 goto err;
493 *out += szrout;
494 *outbytes -= szrout;
495 } else
496 /* otherwise, discard the shift sequence */
497 init_encoding_state(&sc->sc_dst_encoding);
498 init_encoding_state(&sc->sc_src_encoding);
499 *invalids = 0;
500 return 0;
503 /* normal case */
504 for (;;) {
505 if (*inbytes==0) {
506 ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
507 if (state == _STDENC_SDGEN_INITIAL ||
508 state == _STDENC_SDGEN_STABLE)
509 break;
512 /* save the encoding states for the error recovery */
513 save_encoding_state(&sc->sc_src_encoding);
514 save_encoding_state(&sc->sc_dst_encoding);
516 /* mb -> csid/index */
517 tmpin = *in;
518 szrin = szrout = 0;
519 ret = mbtocsx(&sc->sc_src_encoding, &csid, &idx,
520 &tmpin, *inbytes, &szrin);
521 if (ret)
522 goto err;
524 if (szrin == (size_t)-2) {
525 /* incompleted character */
526 ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
527 if (ret) {
528 ret = EINVAL;
529 goto err;
531 switch (state) {
532 case _STDENC_SDGEN_INITIAL:
533 case _STDENC_SDGEN_STABLE:
534 /* fetch shift sequences only. */
535 goto next;
537 ret = EINVAL;
538 goto err;
540 /* convert the character */
541 ret = do_conv(is, sc, &csid, &idx);
542 if (ret) {
543 if (ret == E_NO_CORRESPONDING_CHAR) {
544 inval++;
545 szrout = 0;
546 if ((flags&_CITRUS_ICONV_F_HIDE_INVALID)==0 &&
547 is->is_use_invalid) {
548 ret = wctombx(&sc->sc_dst_encoding,
549 *out, *outbytes,
550 is->is_invalid,
551 &szrout);
552 if (ret)
553 goto err;
555 goto next;
556 } else {
557 goto err;
560 /* csid/index -> mb */
561 ret = cstombx(&sc->sc_dst_encoding,
562 *out, *outbytes, csid, idx, &szrout);
563 if (ret)
564 goto err;
565 next:
566 _DIAGASSERT(*inbytes>=szrin && *outbytes>=szrout);
567 *inbytes -= tmpin-*in; /* szrin is insufficient on \0. */
568 *in = tmpin;
569 *outbytes -= szrout;
570 *out += szrout;
572 *invalids = inval;
574 return 0;
576 err:
577 restore_encoding_state(&sc->sc_src_encoding);
578 restore_encoding_state(&sc->sc_dst_encoding);
579 err_norestore:
580 *invalids = inval;
582 return ret;