Bring in an errno.9 manual page (based on NetBSD's).
[dragonfly.git] / contrib / nvi2 / common / conv.c
blobbdf882dcc3a47cc785f06dc988ab21563734673f
1 /*-
2 * Copyright (c) 1993, 1994
3 * The Regents of the University of California. All rights reserved.
4 * Copyright (c) 1993, 1994, 1995, 1996
5 * Keith Bostic. All rights reserved.
6 * Copyright (c) 2011, 2012
7 * Zhihao Yuan. All rights reserved.
9 * See the LICENSE file for redistribution information.
12 #include "config.h"
14 #ifndef lint
15 static const char sccsid[] = "$Id: conv.c,v 2.40 2014/02/27 16:25:29 zy Exp $";
16 #endif /* not lint */
18 #include <sys/types.h>
19 #include <sys/queue.h>
20 #include <sys/time.h>
22 #include <bitstring.h>
23 #include <errno.h>
24 #include <limits.h>
25 #include <langinfo.h>
26 #include <locale.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <strings.h>
31 #include <unistd.h>
33 #include "common.h"
36 * codeset --
37 * Get the locale encoding.
39 * PUBLIC: char * codeset(void);
41 char *
42 codeset(void)
44 static char *cs;
46 if (cs == NULL)
47 cs = nl_langinfo(CODESET);
49 return cs;
52 #ifdef USE_WIDECHAR
53 static int
54 raw2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen,
55 CHAR_T **dst)
57 int i;
58 CHAR_T **tostr = &cw->bp1.wc;
59 size_t *blen = &cw->blen1;
61 BINC_RETW(NULL, *tostr, *blen, len);
63 *tolen = len;
64 for (i = 0; i < len; ++i)
65 (*tostr)[i] = (u_char) str[i];
67 *dst = cw->bp1.wc;
69 return 0;
72 #define CONV_BUFFER_SIZE 512
73 /* fill the buffer with codeset encoding of string pointed to by str
74 * left has the number of bytes left in str and is adjusted
75 * len contains the number of bytes put in the buffer
77 #ifdef USE_ICONV
78 #define CONVERT(str, left, src, len) \
79 do { \
80 size_t outleft; \
81 char *bp = buffer; \
82 outleft = CONV_BUFFER_SIZE; \
83 errno = 0; \
84 if (iconv(id, (iconv_src_t)&str, &left, &bp, &outleft) \
85 == -1 && errno != E2BIG) \
86 goto err; \
87 if ((len = CONV_BUFFER_SIZE - outleft) == 0) { \
88 error = -left; \
89 goto err; \
90 } \
91 src = buffer; \
92 } while (0)
94 #define IC_RESET() \
95 do { \
96 if (id != (iconv_t)-1) \
97 iconv(id, NULL, NULL, NULL, NULL); \
98 } while(0)
99 #else
100 #define CONVERT(str, left, src, len)
101 #define IC_RESET()
102 #endif
104 static int
105 default_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
106 size_t *tolen, CHAR_T **dst, iconv_t id)
108 size_t i = 0, j;
109 CHAR_T **tostr = &cw->bp1.wc;
110 size_t *blen = &cw->blen1;
111 mbstate_t mbs;
112 size_t n;
113 ssize_t nlen = len;
114 char *src = (char *)str;
115 #ifdef USE_ICONV
116 char buffer[CONV_BUFFER_SIZE];
117 #endif
118 size_t left = len;
119 int error = 1;
121 BZERO(&mbs, 1);
122 BINC_RETW(NULL, *tostr, *blen, nlen);
124 #ifdef USE_ICONV
125 if (id != (iconv_t)-1)
126 CONVERT(str, left, src, len);
127 #endif
129 for (i = 0, j = 0; j < len; ) {
130 n = mbrtowc((*tostr)+i, src+j, len-j, &mbs);
131 /* NULL character converted */
132 if (n == -2)
133 error = -(len-j);
134 if (n == -1 || n == -2)
135 goto err;
136 if (n == 0)
137 n = 1;
138 j += n;
139 if (++i >= *blen) {
140 nlen += 256;
141 BINC_RETW(NULL, *tostr, *blen, nlen);
143 if (id != (iconv_t)-1 && j == len && left) {
144 CONVERT(str, left, src, len);
145 j = 0;
149 error = 0;
150 err:
151 *tolen = i;
152 *dst = cw->bp1.wc;
153 IC_RESET();
155 return error;
158 static int
159 fe_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen,
160 CHAR_T **dst)
162 return default_char2int(sp, str, len, cw, tolen, dst,
163 sp->conv.id[IC_FE_CHAR2INT]);
166 static int
167 ie_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen,
168 CHAR_T **dst)
170 return default_char2int(sp, str, len, cw, tolen, dst,
171 sp->conv.id[IC_IE_CHAR2INT]);
174 static int
175 cs_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen,
176 CHAR_T **dst)
178 return default_char2int(sp, str, len, cw, tolen, dst, (iconv_t)-1);
181 static int
182 int2raw(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, size_t *tolen,
183 char **dst)
185 int i;
186 char **tostr = &cw->bp1.c;
187 size_t *blen = &cw->blen1;
189 BINC_RETC(NULL, *tostr, *blen, len);
191 *tolen = len;
192 for (i = 0; i < len; ++i)
193 (*tostr)[i] = str[i];
195 *dst = cw->bp1.c;
197 return 0;
200 static int
201 default_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
202 size_t *tolen, char **pdst, iconv_t id)
204 size_t i, j, offset = 0;
205 char **tostr = &cw->bp1.c;
206 size_t *blen = &cw->blen1;
207 mbstate_t mbs;
208 size_t n;
209 ssize_t nlen = len + MB_CUR_MAX;
210 char *dst;
211 size_t buflen;
212 #ifdef USE_ICONV
213 char buffer[CONV_BUFFER_SIZE];
214 #endif
215 int error = 1;
217 /* convert first len bytes of buffer and append it to cw->bp
218 * len is adjusted => 0
219 * offset contains the offset in cw->bp and is adjusted
220 * cw->bp is grown as required
222 #ifdef USE_ICONV
223 #define CONVERT2(_buffer, lenp, cw, offset) \
224 do { \
225 char *bp = _buffer; \
226 int ret; \
227 do { \
228 size_t outleft = cw->blen1 - offset; \
229 char *obp = cw->bp1.c + offset; \
230 if (cw->blen1 < offset + MB_CUR_MAX) { \
231 nlen += 256; \
232 BINC_RETC(NULL, cw->bp1.c, cw->blen1, \
233 nlen); \
235 errno = 0; \
236 ret = iconv(id, (iconv_src_t)&bp, lenp, &obp, \
237 &outleft); \
238 if (ret == -1 && errno != E2BIG) \
239 goto err; \
240 offset = cw->blen1 - outleft; \
241 } while (ret != 0); \
242 } while (0)
243 #else
244 #define CONVERT2(_buffer, lenp, cw, offset)
245 #endif
248 BZERO(&mbs, 1);
249 BINC_RETC(NULL, *tostr, *blen, nlen);
250 dst = *tostr; buflen = *blen;
252 #ifdef USE_ICONV
253 if (id != (iconv_t)-1) {
254 dst = buffer; buflen = CONV_BUFFER_SIZE;
256 #endif
258 for (i = 0, j = 0; i < len; ++i) {
259 n = wcrtomb(dst+j, str[i], &mbs);
260 if (n == -1)
261 goto err;
262 j += n;
263 if (buflen < j + MB_CUR_MAX) {
264 if (id != (iconv_t)-1) {
265 CONVERT2(buffer, &j, cw, offset);
266 } else {
267 nlen += 256;
268 BINC_RETC(NULL, *tostr, *blen, nlen);
269 dst = *tostr; buflen = *blen;
274 n = wcrtomb(dst+j, L'\0', &mbs);
275 j += n - 1; /* don't count NUL at the end */
276 *tolen = j;
278 if (id != (iconv_t)-1) {
279 CONVERT2(buffer, &j, cw, offset);
280 /* back to the initial state */
281 CONVERT2(NULL, NULL, cw, offset);
282 *tolen = offset;
285 error = 0;
286 err:
287 if (error)
288 *tolen = j;
289 *pdst = cw->bp1.c;
290 IC_RESET();
292 return error;
295 static int
296 fe_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
297 size_t *tolen, char **dst)
299 return default_int2char(sp, str, len, cw, tolen, dst,
300 sp->conv.id[IC_FE_INT2CHAR]);
303 static int
304 cs_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
305 size_t *tolen, char **dst)
307 return default_int2char(sp, str, len, cw, tolen, dst, (iconv_t)-1);
310 #endif
313 * conv_init --
314 * Initialize the iconv environment.
316 * PUBLIC: void conv_init(SCR *, SCR *);
318 void
319 conv_init(SCR *orig, SCR *sp)
321 int i;
323 if (orig == NULL)
324 setlocale(LC_ALL, "");
325 if (orig != NULL)
326 BCOPY(&orig->conv, &sp->conv, 1);
327 #ifdef USE_WIDECHAR
328 else {
329 char *ctype = setlocale(LC_CTYPE, NULL);
332 * XXX
333 * This hack fixes the libncursesw issue on FreeBSD.
335 if (!strcmp(ctype, "ko_KR.CP949"))
336 setlocale(LC_CTYPE, "ko_KR.eucKR");
337 else if (!strcmp(ctype, "zh_CN.GB2312"))
338 setlocale(LC_CTYPE, "zh_CN.eucCN");
339 else if (!strcmp(ctype, "zh_CN.GBK"))
340 setlocale(LC_CTYPE, "zh_CN.GB18030");
341 else if (!strcmp(ctype, "zh_Hans_CN.GB2312"))
342 setlocale(LC_CTYPE, "zh_Hans_CN.eucCN");
343 else if (!strcmp(ctype, "zh_Hans_CN.GBK"))
344 setlocale(LC_CTYPE, "zh_Hans_CN.GB18030");
347 * Switch to 8bit mode if locale is C;
348 * LC_CTYPE should be reseted to C if unmatched.
350 if (!strcmp(ctype, "C") || !strcmp(ctype, "POSIX")) {
351 sp->conv.sys2int = sp->conv.file2int = raw2int;
352 sp->conv.int2sys = sp->conv.int2file = int2raw;
353 sp->conv.input2int = raw2int;
354 } else {
355 sp->conv.sys2int = cs_char2int;
356 sp->conv.int2sys = cs_int2char;
357 sp->conv.file2int = fe_char2int;
358 sp->conv.int2file = fe_int2char;
359 sp->conv.input2int = ie_char2int;
361 #ifdef USE_ICONV
362 o_set(sp, O_INPUTENCODING, OS_STRDUP, codeset(), 0);
363 #endif
365 #endif
367 /* iconv descriptors must be distinct to screens. */
368 for (i = 0; i <= IC_IE_TO_UTF16; ++i)
369 sp->conv.id[i] = (iconv_t)-1;
370 #ifdef USE_ICONV
371 conv_enc(sp, O_INPUTENCODING, 0);
372 #endif
376 * conv_enc --
377 * Convert file/input encoding.
379 * PUBLIC: int conv_enc(SCR *, int, char *);
382 conv_enc(SCR *sp, int option, char *enc)
384 #if defined(USE_WIDECHAR) && defined(USE_ICONV)
385 iconv_t *c2w, *w2c;
386 iconv_t id_c2w, id_w2c;
388 switch (option) {
389 case O_FILEENCODING:
390 c2w = sp->conv.id + IC_FE_CHAR2INT;
391 w2c = sp->conv.id + IC_FE_INT2CHAR;
392 if (!enc)
393 enc = O_STR(sp, O_FILEENCODING);
395 if (strcasecmp(codeset(), enc)) {
396 if ((id_c2w = iconv_open(codeset(), enc)) ==
397 (iconv_t)-1)
398 goto err;
399 if ((id_w2c = iconv_open(enc, codeset())) ==
400 (iconv_t)-1)
401 goto err;
402 } else {
403 id_c2w = (iconv_t)-1;
404 id_w2c = (iconv_t)-1;
407 break;
409 case O_INPUTENCODING:
410 c2w = sp->conv.id + IC_IE_CHAR2INT;
411 w2c = sp->conv.id + IC_IE_TO_UTF16;
412 if (!enc)
413 enc = O_STR(sp, O_INPUTENCODING);
415 if (strcasecmp(codeset(), enc)) {
416 if ((id_c2w = iconv_open(codeset(), enc)) ==
417 (iconv_t)-1)
418 goto err;
419 } else
420 id_c2w = (iconv_t)-1;
422 /* UTF-16 can not be locale and can not be inputed. */
423 if ((id_w2c = iconv_open("utf-16be", enc)) == (iconv_t)-1)
424 goto err;
426 break;
428 default:
429 abort();
432 if (*c2w != (iconv_t)-1)
433 iconv_close(*c2w);
434 if (*w2c != (iconv_t)-1)
435 iconv_close(*w2c);
437 *c2w = id_c2w;
438 *w2c = id_w2c;
440 F_CLR(sp, SC_CONV_ERROR);
441 F_SET(sp, SC_SCR_REFORMAT);
443 return 0;
444 err:
445 #endif
446 switch (option) {
447 case O_FILEENCODING:
448 msgq(sp, M_ERR, "321|File encoding conversion not supported");
449 break;
450 case O_INPUTENCODING:
451 msgq(sp, M_ERR, "322|Input encoding conversion not supported");
452 break;
454 return 1;
458 * conv_end --
459 * Close the iconv descriptors, release the buffer.
461 * PUBLIC: void conv_end(SCR *);
463 void
464 conv_end(SCR *sp)
466 #if defined(USE_WIDECHAR) && defined(USE_ICONV)
467 int i;
468 for (i = 0; i <= IC_IE_TO_UTF16; ++i)
469 if (sp->conv.id[i] != (iconv_t)-1)
470 iconv_close(sp->conv.id[i]);
471 if (sp->cw.bp1.c != NULL)
472 free(sp->cw.bp1.c);
473 #endif