2 * Copyright (c) 1993, 1994
3 * The Regents of the University of California. All rights reserved.
4 * Copyright (c) 1993, 1994, 1995, 1996
5 * Keith Bostic. All rights reserved.
7 * See the LICENSE file for redistribution information.
13 static const char sccsid
[] = "$Id: conv.c,v 1.24 2001/06/30 17:48:23 skimo Exp $ (Berkeley) $Date: 2001/06/30 17:48:23 $";
16 #include <sys/types.h>
17 #include <sys/queue.h>
20 #include <bitstring.h>
36 raw2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
, size_t *tolen
,
40 CHAR_T
**tostr
= (CHAR_T
**)&cw
->bp1
;
41 size_t *blen
= &cw
->blen1
;
43 BINC_RETW(NULL
, *tostr
, *blen
, len
);
46 for (i
= 0; i
< len
; ++i
)
47 (*tostr
)[i
] = (u_char
) str
[i
];
54 #define CONV_BUFFER_SIZE 512
55 /* fill the buffer with codeset encoding of string pointed to by str
56 * left has the number of bytes left in str and is adjusted
57 * len contains the number of bytes put in the buffer
59 #define CONVERT(str, left, src, len) \
63 outleft = CONV_BUFFER_SIZE; \
65 if (iconv(id, (char **)&str, &left, &bp, &outleft) == -1 /*&& \
68 if ((len = CONV_BUFFER_SIZE - outleft) == 0) { \
76 default_char2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
,
77 size_t *tolen
, CHAR_T
**dst
, char *enc
)
80 CHAR_T
**tostr
= (CHAR_T
**)&cw
->bp1
;
81 size_t *blen
= &cw
->blen1
;
85 char *src
= (char *)str
;
86 iconv_t id
= (iconv_t
)-1;
87 char buffer
[CONV_BUFFER_SIZE
];
92 BINC_RETW(NULL
, *tostr
, *blen
, nlen
);
94 if (strcmp(nl_langinfo(CODESET
), enc
)) {
95 id
= iconv_open(nl_langinfo(CODESET
), enc
);
96 if (id
== (iconv_t
)-1)
98 CONVERT(str
, left
, src
, len
);
101 for (i
= 0, j
= 0; j
< len
; ) {
102 n
= mbrtowc((*tostr
)+i
, src
+j
, len
-j
, &mbs
);
103 /* NULL character converted */
104 if (n
== -2) error
= -(len
-j
);
105 if (n
== -1 || n
== -2) goto err
;
110 BINC_RETW(NULL
, *tostr
, *blen
, nlen
);
112 if (id
!= (iconv_t
)-1 && j
== len
&& left
) {
113 CONVERT(str
, left
, src
, len
);
119 if (id
!= (iconv_t
)-1)
127 if (id
!= (iconv_t
)-1)
135 fe_char2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
,
136 size_t *tolen
, CHAR_T
**dst
)
138 default_char2int(sp
, str
, len
, cw
, tolen
, dst
, O_STR(sp
, O_FILEENCODING
));
142 ie_char2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
,
143 size_t *tolen
, CHAR_T
**dst
)
145 default_char2int(sp
, str
, len
, cw
, tolen
, dst
, O_STR(sp
, O_INPUTENCODING
));
149 cs_char2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
,
150 size_t *tolen
, CHAR_T
**dst
)
152 default_char2int(sp
, str
, len
, cw
, tolen
, dst
, nl_langinfo(CODESET
));
156 CHAR_T_int2char(SCR
*sp
, const CHAR_T
* str
, ssize_t len
, CONVWIN
*cw
,
157 size_t *tolen
, char **dst
)
159 *tolen
= len
* sizeof(CHAR_T
);
166 CHAR_T_char2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
,
167 size_t *tolen
, CHAR_T
**dst
)
169 *tolen
= len
/ sizeof(CHAR_T
);
170 *dst
= (CHAR_T
*) str
;
176 int2raw(SCR
*sp
, const CHAR_T
* str
, ssize_t len
, CONVWIN
*cw
, size_t *tolen
,
180 char **tostr
= (char **)&cw
->bp1
;
181 size_t *blen
= &cw
->blen1
;
183 BINC_RET(NULL
, *tostr
, *blen
, len
);
186 for (i
= 0; i
< len
; ++i
)
187 (*tostr
)[i
] = str
[i
];
195 default_int2char(SCR
*sp
, const CHAR_T
* str
, ssize_t len
, CONVWIN
*cw
,
196 size_t *tolen
, char **pdst
, char *enc
)
198 int i
, j
, offset
= 0;
199 char **tostr
= (char **)&cw
->bp1
;
200 size_t *blen
= &cw
->blen1
;
203 ssize_t nlen
= len
+ MB_CUR_MAX
;
206 char buffer
[CONV_BUFFER_SIZE
];
207 iconv_t id
= (iconv_t
)-1;
209 /* convert first len bytes of buffer and append it to cw->bp
210 * len is adjusted => 0
211 * offset contains the offset in cw->bp and is adjusted
212 * cw->bp is grown as required
214 #define CONVERT2(len, cw, offset) \
218 size_t outleft = cw->blen1 - offset; \
219 char *obp = (char *)cw->bp1 + offset; \
220 if (cw->blen1 < offset + MB_CUR_MAX) { \
222 BINC_RET(NULL, cw->bp1, cw->blen1, nlen); \
225 if (iconv(id, &bp, &len, &obp, &outleft) == -1 && \
228 offset = cw->blen1 - outleft; \
234 BINC_RET(NULL
, *tostr
, *blen
, nlen
);
235 dst
= *tostr
; buflen
= *blen
;
237 if (strcmp(nl_langinfo(CODESET
), enc
)) {
238 id
= iconv_open(enc
, nl_langinfo(CODESET
));
239 if (id
== (iconv_t
)-1)
241 dst
= buffer
; buflen
= CONV_BUFFER_SIZE
;
244 for (i
= 0, j
= 0; i
< len
; ++i
) {
245 n
= wcrtomb(dst
+j
, str
[i
], &mbs
);
246 if (n
== -1) goto err
;
248 if (buflen
< j
+ MB_CUR_MAX
) {
249 if (id
!= (iconv_t
)-1) {
250 CONVERT2(j
, cw
, offset
);
253 BINC_RET(NULL
, *tostr
, *blen
, nlen
);
254 dst
= *tostr
; buflen
= *blen
;
259 n
= wcrtomb(dst
+j
, L
'\0', &mbs
);
260 j
+= n
- 1; /* don't count NUL at the end */
263 if (id
!= (iconv_t
)-1) {
264 CONVERT2(j
, cw
, offset
);
280 fe_int2char(SCR
*sp
, const CHAR_T
* str
, ssize_t len
, CONVWIN
*cw
,
281 size_t *tolen
, char **dst
)
283 default_int2char(sp
, str
, len
, cw
, tolen
, dst
, O_STR(sp
, O_FILEENCODING
));
287 cs_int2char(SCR
*sp
, const CHAR_T
* str
, ssize_t len
, CONVWIN
*cw
,
288 size_t *tolen
, char **dst
)
290 default_int2char(sp
, str
, len
, cw
, tolen
, dst
, nl_langinfo(CODESET
));
297 conv_init (SCR
*orig
, SCR
*sp
)
300 MEMCPY(&sp
->conv
, &orig
->conv
, 1);
302 setlocale(LC_ALL
, "");
304 sp
->conv
.sys2int
= cs_char2int
;
305 sp
->conv
.int2sys
= cs_int2char
;
306 sp
->conv
.file2int
= fe_char2int
;
307 sp
->conv
.int2file
= fe_int2char
;
308 sp
->conv
.input2int
= ie_char2int
;
310 o_set(sp
, O_FILEENCODING
, OS_STRDUP
, nl_langinfo(CODESET
), 0);
311 o_set(sp
, O_INPUTENCODING
, OS_STRDUP
, nl_langinfo(CODESET
), 0);
316 conv_enc (SCR
*sp
, int option
, char *enc
)
325 c2w
= &sp
->conv
.file2int
;
326 w2c
= &sp
->conv
.int2file
;
328 case O_INPUTENCODING
:
329 c2w
= &sp
->conv
.input2int
;
335 if (c2w
) *c2w
= raw2int
;
336 if (w2c
) *w2c
= int2raw
;
340 if (!strcmp(enc
, "WCHAR_T")) {
341 if (c2w
) *c2w
= CHAR_T_char2int
;
342 if (w2c
) *w2c
= CHAR_T_int2char
;
346 id
= iconv_open(enc
, nl_langinfo(CODESET
));
347 if (id
== (iconv_t
)-1)
350 id
= iconv_open(nl_langinfo(CODESET
), enc
);
351 if (id
== (iconv_t
)-1)
360 case O_INPUTENCODING
:
365 F_CLR(sp
, SC_CONV_ERROR
);
366 F_SET(sp
, SC_SCR_REFORMAT
);
373 "321|File encoding conversion not supported");
374 case O_INPUTENCODING
:
376 "322|Input encoding conversion not supported");