2 * Copyright (c) 1993, 1994
3 * The Regents of the University of California. All rights reserved.
4 * Copyright (c) 1993, 1994, 1995, 1996
5 * Keith Bostic. All rights reserved.
6 * Copyright (c) 2011, 2012
7 * Zhihao Yuan. All rights reserved.
9 * See the LICENSE file for redistribution information.
15 static const char sccsid
[] = "$Id: conv.c,v 2.40 2014/02/27 16:25:29 zy Exp $";
18 #include <sys/types.h>
19 #include <sys/queue.h>
22 #include <bitstring.h>
37 * Get the locale encoding.
39 * PUBLIC: char * codeset(void);
47 cs
= nl_langinfo(CODESET
);
54 raw2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
, size_t *tolen
,
58 CHAR_T
**tostr
= &cw
->bp1
.wc
;
59 size_t *blen
= &cw
->blen1
;
61 BINC_RETW(NULL
, *tostr
, *blen
, len
);
64 for (i
= 0; i
< len
; ++i
)
65 (*tostr
)[i
] = (u_char
) str
[i
];
72 #define CONV_BUFFER_SIZE 512
73 /* fill the buffer with codeset encoding of string pointed to by str
74 * left has the number of bytes left in str and is adjusted
75 * len contains the number of bytes put in the buffer
78 #define CONVERT(str, left, src, len) \
82 outleft = CONV_BUFFER_SIZE; \
84 if (iconv(id, (iconv_src_t)&str, &left, &bp, &outleft) \
85 == -1 && errno != E2BIG) \
87 if ((len = CONV_BUFFER_SIZE - outleft) == 0) { \
96 if (id != (iconv_t)-1) \
97 iconv(id, NULL, NULL, NULL, NULL); \
100 #define CONVERT(str, left, src, len)
105 default_char2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
,
106 size_t *tolen
, CHAR_T
**dst
, iconv_t id
)
109 CHAR_T
**tostr
= &cw
->bp1
.wc
;
110 size_t *blen
= &cw
->blen1
;
114 char *src
= (char *)str
;
116 char buffer
[CONV_BUFFER_SIZE
];
122 BINC_RETW(NULL
, *tostr
, *blen
, nlen
);
125 if (id
!= (iconv_t
)-1)
126 CONVERT(str
, left
, src
, len
);
129 for (i
= 0, j
= 0; j
< len
; ) {
130 n
= mbrtowc((*tostr
)+i
, src
+j
, len
-j
, &mbs
);
131 /* NULL character converted */
134 if (n
== -1 || n
== -2)
141 BINC_RETW(NULL
, *tostr
, *blen
, nlen
);
143 if (id
!= (iconv_t
)-1 && j
== len
&& left
) {
144 CONVERT(str
, left
, src
, len
);
159 fe_char2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
, size_t *tolen
,
162 return default_char2int(sp
, str
, len
, cw
, tolen
, dst
,
163 sp
->conv
.id
[IC_FE_CHAR2INT
]);
167 ie_char2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
, size_t *tolen
,
170 return default_char2int(sp
, str
, len
, cw
, tolen
, dst
,
171 sp
->conv
.id
[IC_IE_CHAR2INT
]);
175 cs_char2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
, size_t *tolen
,
178 return default_char2int(sp
, str
, len
, cw
, tolen
, dst
, (iconv_t
)-1);
182 int2raw(SCR
*sp
, const CHAR_T
* str
, ssize_t len
, CONVWIN
*cw
, size_t *tolen
,
186 char **tostr
= &cw
->bp1
.c
;
187 size_t *blen
= &cw
->blen1
;
189 BINC_RETC(NULL
, *tostr
, *blen
, len
);
192 for (i
= 0; i
< len
; ++i
)
193 (*tostr
)[i
] = str
[i
];
201 default_int2char(SCR
*sp
, const CHAR_T
* str
, ssize_t len
, CONVWIN
*cw
,
202 size_t *tolen
, char **pdst
, iconv_t id
)
204 size_t i
, j
, offset
= 0;
205 char **tostr
= &cw
->bp1
.c
;
206 size_t *blen
= &cw
->blen1
;
209 ssize_t nlen
= len
+ MB_CUR_MAX
;
213 char buffer
[CONV_BUFFER_SIZE
];
217 /* convert first len bytes of buffer and append it to cw->bp
218 * len is adjusted => 0
219 * offset contains the offset in cw->bp and is adjusted
220 * cw->bp is grown as required
223 #define CONVERT2(_buffer, lenp, cw, offset) \
225 char *bp = _buffer; \
228 size_t outleft = cw->blen1 - offset; \
229 char *obp = cw->bp1.c + offset; \
230 if (cw->blen1 < offset + MB_CUR_MAX) { \
232 BINC_RETC(NULL, cw->bp1.c, cw->blen1, \
236 ret = iconv(id, (iconv_src_t)&bp, lenp, &obp, \
238 if (ret == -1 && errno != E2BIG) \
240 offset = cw->blen1 - outleft; \
241 } while (ret != 0); \
244 #define CONVERT2(_buffer, lenp, cw, offset)
249 BINC_RETC(NULL
, *tostr
, *blen
, nlen
);
250 dst
= *tostr
; buflen
= *blen
;
253 if (id
!= (iconv_t
)-1) {
254 dst
= buffer
; buflen
= CONV_BUFFER_SIZE
;
258 for (i
= 0, j
= 0; i
< len
; ++i
) {
259 n
= wcrtomb(dst
+j
, str
[i
], &mbs
);
263 if (buflen
< j
+ MB_CUR_MAX
) {
264 if (id
!= (iconv_t
)-1) {
265 CONVERT2(buffer
, &j
, cw
, offset
);
268 BINC_RETC(NULL
, *tostr
, *blen
, nlen
);
269 dst
= *tostr
; buflen
= *blen
;
274 n
= wcrtomb(dst
+j
, L
'\0', &mbs
);
275 j
+= n
- 1; /* don't count NUL at the end */
278 if (id
!= (iconv_t
)-1) {
279 CONVERT2(buffer
, &j
, cw
, offset
);
280 /* back to the initial state */
281 CONVERT2(NULL
, NULL
, cw
, offset
);
296 fe_int2char(SCR
*sp
, const CHAR_T
* str
, ssize_t len
, CONVWIN
*cw
,
297 size_t *tolen
, char **dst
)
299 return default_int2char(sp
, str
, len
, cw
, tolen
, dst
,
300 sp
->conv
.id
[IC_FE_INT2CHAR
]);
304 cs_int2char(SCR
*sp
, const CHAR_T
* str
, ssize_t len
, CONVWIN
*cw
,
305 size_t *tolen
, char **dst
)
307 return default_int2char(sp
, str
, len
, cw
, tolen
, dst
, (iconv_t
)-1);
314 * Initialize the iconv environment.
316 * PUBLIC: void conv_init(SCR *, SCR *);
319 conv_init(SCR
*orig
, SCR
*sp
)
324 setlocale(LC_ALL
, "");
326 BCOPY(&orig
->conv
, &sp
->conv
, 1);
329 char *ctype
= setlocale(LC_CTYPE
, NULL
);
333 * This hack fixes the libncursesw issue on FreeBSD.
335 if (!strcmp(ctype
, "ko_KR.CP949"))
336 setlocale(LC_CTYPE
, "ko_KR.eucKR");
337 else if (!strcmp(ctype
, "zh_CN.GB2312"))
338 setlocale(LC_CTYPE
, "zh_CN.eucCN");
339 else if (!strcmp(ctype
, "zh_CN.GBK"))
340 setlocale(LC_CTYPE
, "zh_CN.GB18030");
341 else if (!strcmp(ctype
, "zh_Hans_CN.GB2312"))
342 setlocale(LC_CTYPE
, "zh_Hans_CN.eucCN");
343 else if (!strcmp(ctype
, "zh_Hans_CN.GBK"))
344 setlocale(LC_CTYPE
, "zh_Hans_CN.GB18030");
347 * Switch to 8bit mode if locale is C;
348 * LC_CTYPE should be reseted to C if unmatched.
350 if (!strcmp(ctype
, "C") || !strcmp(ctype
, "POSIX")) {
351 sp
->conv
.sys2int
= sp
->conv
.file2int
= raw2int
;
352 sp
->conv
.int2sys
= sp
->conv
.int2file
= int2raw
;
353 sp
->conv
.input2int
= raw2int
;
355 sp
->conv
.sys2int
= cs_char2int
;
356 sp
->conv
.int2sys
= cs_int2char
;
357 sp
->conv
.file2int
= fe_char2int
;
358 sp
->conv
.int2file
= fe_int2char
;
359 sp
->conv
.input2int
= ie_char2int
;
362 o_set(sp
, O_INPUTENCODING
, OS_STRDUP
, codeset(), 0);
367 /* iconv descriptors must be distinct to screens. */
368 for (i
= 0; i
<= IC_IE_TO_UTF16
; ++i
)
369 sp
->conv
.id
[i
] = (iconv_t
)-1;
371 conv_enc(sp
, O_INPUTENCODING
, 0);
377 * Convert file/input encoding.
379 * PUBLIC: int conv_enc(SCR *, int, char *);
382 conv_enc(SCR
*sp
, int option
, char *enc
)
384 #if defined(USE_WIDECHAR) && defined(USE_ICONV)
386 iconv_t id_c2w
, id_w2c
;
390 c2w
= sp
->conv
.id
+ IC_FE_CHAR2INT
;
391 w2c
= sp
->conv
.id
+ IC_FE_INT2CHAR
;
393 enc
= O_STR(sp
, O_FILEENCODING
);
395 if (strcasecmp(codeset(), enc
)) {
396 if ((id_c2w
= iconv_open(codeset(), enc
)) ==
399 if ((id_w2c
= iconv_open(enc
, codeset())) ==
403 id_c2w
= (iconv_t
)-1;
404 id_w2c
= (iconv_t
)-1;
409 case O_INPUTENCODING
:
410 c2w
= sp
->conv
.id
+ IC_IE_CHAR2INT
;
411 w2c
= sp
->conv
.id
+ IC_IE_TO_UTF16
;
413 enc
= O_STR(sp
, O_INPUTENCODING
);
415 if (strcasecmp(codeset(), enc
)) {
416 if ((id_c2w
= iconv_open(codeset(), enc
)) ==
420 id_c2w
= (iconv_t
)-1;
422 /* UTF-16 can not be locale and can not be inputed. */
423 if ((id_w2c
= iconv_open("utf-16be", enc
)) == (iconv_t
)-1)
432 if (*c2w
!= (iconv_t
)-1)
434 if (*w2c
!= (iconv_t
)-1)
440 F_CLR(sp
, SC_CONV_ERROR
);
441 F_SET(sp
, SC_SCR_REFORMAT
);
448 msgq(sp
, M_ERR
, "321|File encoding conversion not supported");
450 case O_INPUTENCODING
:
451 msgq(sp
, M_ERR
, "322|Input encoding conversion not supported");
459 * Close the iconv descriptors, release the buffer.
461 * PUBLIC: void conv_end(SCR *);
466 #if defined(USE_WIDECHAR) && defined(USE_ICONV)
468 for (i
= 0; i
<= IC_IE_TO_UTF16
; ++i
)
469 if (sp
->conv
.id
[i
] != (iconv_t
)-1)
470 iconv_close(sp
->conv
.id
[i
]);
471 if (sp
->cw
.bp1
.c
!= NULL
)