2 * Copyright (c) 1993, 1994
3 * The Regents of the University of California. All rights reserved.
4 * Copyright (c) 1993, 1994, 1995, 1996
5 * Keith Bostic. All rights reserved.
7 * See the LICENSE file for redistribution information.
13 static const char sccsid
[] = "$Id: conv.c,v 1.27 2001/08/18 21:41:41 skimo Exp $ (Berkeley) $Date: 2001/08/18 21:41:41 $";
16 #include <sys/types.h>
17 #include <sys/queue.h>
20 #include <bitstring.h>
34 #define LANGCODESET nl_langinfo(CODESET)
38 #define LANGCODESET ""
45 raw2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
, size_t *tolen
,
49 CHAR_T
**tostr
= (CHAR_T
**)&cw
->bp1
;
50 size_t *blen
= &cw
->blen1
;
52 BINC_RETW(NULL
, *tostr
, *blen
, len
);
55 for (i
= 0; i
< len
; ++i
)
56 (*tostr
)[i
] = (u_char
) str
[i
];
63 #define CONV_BUFFER_SIZE 512
64 /* fill the buffer with codeset encoding of string pointed to by str
65 * left has the number of bytes left in str and is adjusted
66 * len contains the number of bytes put in the buffer
69 #define CONVERT(str, left, src, len) \
73 outleft = CONV_BUFFER_SIZE; \
75 if (iconv(id, (char **)&str, &left, &bp, &outleft) == -1 /*&& \
78 if ((len = CONV_BUFFER_SIZE - outleft) == 0) { \
85 #define CONVERT(str, left, src, len)
89 default_char2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
,
90 size_t *tolen
, CHAR_T
**dst
, char *enc
)
93 CHAR_T
**tostr
= (CHAR_T
**)&cw
->bp1
;
94 size_t *blen
= &cw
->blen1
;
98 char *src
= (char *)str
;
99 iconv_t id
= (iconv_t
)-1;
100 char buffer
[CONV_BUFFER_SIZE
];
105 BINC_RETW(NULL
, *tostr
, *blen
, nlen
);
108 if (strcmp(nl_langinfo(CODESET
), enc
)) {
109 id
= iconv_open(nl_langinfo(CODESET
), enc
);
110 if (id
== (iconv_t
)-1)
112 CONVERT(str
, left
, src
, len
);
116 for (i
= 0, j
= 0; j
< len
; ) {
117 n
= mbrtowc((*tostr
)+i
, src
+j
, len
-j
, &mbs
);
118 /* NULL character converted */
119 if (n
== -2) error
= -(len
-j
);
120 if (n
== -1 || n
== -2) goto err
;
125 BINC_RETW(NULL
, *tostr
, *blen
, nlen
);
127 if (id
!= (iconv_t
)-1 && j
== len
&& left
) {
128 CONVERT(str
, left
, src
, len
);
134 if (id
!= (iconv_t
)-1)
142 if (id
!= (iconv_t
)-1)
150 fe_char2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
,
151 size_t *tolen
, CHAR_T
**dst
)
153 return default_char2int(sp
, str
, len
, cw
, tolen
, dst
, O_STR(sp
, O_FILEENCODING
));
157 ie_char2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
,
158 size_t *tolen
, CHAR_T
**dst
)
160 return default_char2int(sp
, str
, len
, cw
, tolen
, dst
, O_STR(sp
, O_INPUTENCODING
));
164 cs_char2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
,
165 size_t *tolen
, CHAR_T
**dst
)
167 return default_char2int(sp
, str
, len
, cw
, tolen
, dst
, LANGCODESET
);
171 CHAR_T_int2char(SCR
*sp
, const CHAR_T
* str
, ssize_t len
, CONVWIN
*cw
,
172 size_t *tolen
, char **dst
)
174 *tolen
= len
* sizeof(CHAR_T
);
181 CHAR_T_char2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
,
182 size_t *tolen
, CHAR_T
**dst
)
184 *tolen
= len
/ sizeof(CHAR_T
);
185 *dst
= (CHAR_T
*) str
;
191 int2raw(SCR
*sp
, const CHAR_T
* str
, ssize_t len
, CONVWIN
*cw
, size_t *tolen
,
195 char **tostr
= (char **)&cw
->bp1
;
196 size_t *blen
= &cw
->blen1
;
198 BINC_RETC(NULL
, *tostr
, *blen
, len
);
201 for (i
= 0; i
< len
; ++i
)
202 (*tostr
)[i
] = str
[i
];
210 default_int2char(SCR
*sp
, const CHAR_T
* str
, ssize_t len
, CONVWIN
*cw
,
211 size_t *tolen
, char **pdst
, char *enc
)
213 size_t i
, j
, offset
= 0;
214 char **tostr
= (char **)&cw
->bp1
;
215 size_t *blen
= &cw
->blen1
;
218 ssize_t nlen
= len
+ MB_CUR_MAX
;
221 char buffer
[CONV_BUFFER_SIZE
];
222 iconv_t id
= (iconv_t
)-1;
224 /* convert first len bytes of buffer and append it to cw->bp
225 * len is adjusted => 0
226 * offset contains the offset in cw->bp and is adjusted
227 * cw->bp is grown as required
230 #define CONVERT2(len, cw, offset) \
234 size_t outleft = cw->blen1 - offset; \
235 char *obp = (char *)cw->bp1 + offset; \
236 if (cw->blen1 < offset + MB_CUR_MAX) { \
238 BINC_RETC(NULL, cw->bp1, cw->blen1, nlen); \
241 if (iconv(id, &bp, &len, &obp, &outleft) == -1 && \
244 offset = cw->blen1 - outleft; \
248 #define CONVERT2(len, cw, offset)
253 BINC_RETC(NULL
, *tostr
, *blen
, nlen
);
254 dst
= *tostr
; buflen
= *blen
;
257 if (strcmp(nl_langinfo(CODESET
), enc
)) {
258 id
= iconv_open(enc
, nl_langinfo(CODESET
));
259 if (id
== (iconv_t
)-1)
261 dst
= buffer
; buflen
= CONV_BUFFER_SIZE
;
265 for (i
= 0, j
= 0; i
< len
; ++i
) {
266 n
= wcrtomb(dst
+j
, str
[i
], &mbs
);
267 if (n
== -1) goto err
;
269 if (buflen
< j
+ MB_CUR_MAX
) {
270 if (id
!= (iconv_t
)-1) {
271 CONVERT2(j
, cw
, offset
);
274 BINC_RETC(NULL
, *tostr
, *blen
, nlen
);
275 dst
= *tostr
; buflen
= *blen
;
280 n
= wcrtomb(dst
+j
, L
'\0', &mbs
);
281 j
+= n
- 1; /* don't count NUL at the end */
284 if (id
!= (iconv_t
)-1) {
285 CONVERT2(j
, cw
, offset
);
301 fe_int2char(SCR
*sp
, const CHAR_T
* str
, ssize_t len
, CONVWIN
*cw
,
302 size_t *tolen
, char **dst
)
304 return default_int2char(sp
, str
, len
, cw
, tolen
, dst
, O_STR(sp
, O_FILEENCODING
));
308 cs_int2char(SCR
*sp
, const CHAR_T
* str
, ssize_t len
, CONVWIN
*cw
,
309 size_t *tolen
, char **dst
)
311 return default_int2char(sp
, str
, len
, cw
, tolen
, dst
, LANGCODESET
);
318 conv_init (SCR
*orig
, SCR
*sp
)
321 MEMCPY(&sp
->conv
, &orig
->conv
, 1);
323 setlocale(LC_ALL
, "");
325 sp
->conv
.sys2int
= cs_char2int
;
326 sp
->conv
.int2sys
= cs_int2char
;
327 sp
->conv
.file2int
= fe_char2int
;
328 sp
->conv
.int2file
= fe_int2char
;
329 sp
->conv
.input2int
= ie_char2int
;
332 o_set(sp
, O_FILEENCODING
, OS_STRDUP
, nl_langinfo(CODESET
), 0);
333 o_set(sp
, O_INPUTENCODING
, OS_STRDUP
, nl_langinfo(CODESET
), 0);
339 conv_enc (SCR
*sp
, int option
, char *enc
)
341 #if defined(USE_WIDECHAR) && defined(USE_ICONV)
348 c2w
= &sp
->conv
.file2int
;
349 w2c
= &sp
->conv
.int2file
;
351 case O_INPUTENCODING
:
352 c2w
= &sp
->conv
.input2int
;
358 if (c2w
) *c2w
= raw2int
;
359 if (w2c
) *w2c
= int2raw
;
363 if (!strcmp(enc
, "WCHAR_T")) {
364 if (c2w
) *c2w
= CHAR_T_char2int
;
365 if (w2c
) *w2c
= CHAR_T_int2char
;
369 id
= iconv_open(enc
, nl_langinfo(CODESET
));
370 if (id
== (iconv_t
)-1)
373 id
= iconv_open(nl_langinfo(CODESET
), enc
);
374 if (id
== (iconv_t
)-1)
383 case O_INPUTENCODING
:
388 F_CLR(sp
, SC_CONV_ERROR
);
389 F_SET(sp
, SC_SCR_REFORMAT
);
396 "321|File encoding conversion not supported");
398 case O_INPUTENCODING
:
400 "322|Input encoding conversion not supported");