2 * Copyright (c) 1993, 1994
3 * The Regents of the University of California. All rights reserved.
4 * Copyright (c) 1993, 1994, 1995, 1996
5 * Keith Bostic. All rights reserved.
7 * See the LICENSE file for redistribution information.
13 static const char sccsid
[] = "$Id: conv.c,v 1.15 2001/05/13 09:05:06 skimo Exp $ (Berkeley) $Date: 2001/05/13 09:05:06 $";
16 #include <sys/types.h>
17 #include <sys/queue.h>
20 #include <bitstring.h>
38 raw2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
, size_t *tolen
,
42 CHAR_T
**tostr
= (CHAR_T
**)&cw
->bp1
;
43 size_t *blen
= &cw
->blen1
;
45 BINC_RETW(NULL
, *tostr
, *blen
, len
);
48 for (i
= 0; i
< len
; ++i
)
49 (*tostr
)[i
] = (u_char
) str
[i
];
56 #define CONV_BUFFER_SIZE 512
57 /* fill the buffer with codeset encoding of string pointed to by str
58 * left has the number of bytes left in str and is adjusted
59 * len contains the number of bytes put in the buffer
61 #define CONVERT(str, left, src, len) \
65 outleft = CONV_BUFFER_SIZE; \
67 if (iconv(id, (char **)&str, &left, &bp, &outleft) == -1 && \
70 len = CONV_BUFFER_SIZE - outleft; \
75 default_char2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
,
76 size_t *tolen
, CHAR_T
**dst
)
79 CHAR_T
**tostr
= (CHAR_T
**)&cw
->bp1
;
80 size_t *blen
= &cw
->blen1
;
84 char *src
= (char *)str
;
85 iconv_t id
= (iconv_t
)-1;
86 char *enc
= O_STR(sp
, O_FILEENCODING
);
87 char buffer
[CONV_BUFFER_SIZE
];
91 BINC_RETW(NULL
, *tostr
, *blen
, nlen
);
93 if (strcmp(nl_langinfo(CODESET
), enc
)) {
94 id
= iconv_open(nl_langinfo(CODESET
), enc
);
95 if (id
== (iconv_t
)-1)
97 CONVERT(str
, left
, src
, len
);
100 for (i
= 0, j
= 0; j
< len
; ) {
101 n
= mbrtowc((*tostr
)+i
, src
+j
, len
-j
, &mbs
);
102 /* NULL character converted */
103 if (n
== -1 || n
== -2) goto err
;
108 BINC_RETW(NULL
, *tostr
, *blen
, nlen
);
110 if (id
!= (iconv_t
)-1 && j
== len
&& left
) {
111 CONVERT(str
, left
, src
, len
);
117 if (id
!= (iconv_t
)-1)
125 if (id
!= (iconv_t
)-1)
133 CHAR_T_int2char(SCR
*sp
, const CHAR_T
* str
, ssize_t len
, CONVWIN
*cw
,
134 size_t *tolen
, char **dst
)
136 *tolen
= len
* sizeof(CHAR_T
);
143 CHAR_T_char2int(SCR
*sp
, const char * str
, ssize_t len
, CONVWIN
*cw
,
144 size_t *tolen
, CHAR_T
**dst
)
146 *tolen
= len
/ sizeof(CHAR_T
);
147 *dst
= (CHAR_T
*) str
;
153 int2raw(SCR
*sp
, const CHAR_T
* str
, ssize_t len
, CONVWIN
*cw
, size_t *tolen
,
157 char **tostr
= (char **)&cw
->bp1
;
158 size_t *blen
= &cw
->blen1
;
160 BINC_RET(NULL
, *tostr
, *blen
, len
);
163 for (i
= 0; i
< len
; ++i
)
164 (*tostr
)[i
] = str
[i
];
172 default_int2char(SCR
*sp
, const CHAR_T
* str
, ssize_t len
, CONVWIN
*cw
,
173 size_t *tolen
, char **pdst
)
175 int i
, j
, offset
= 0;
176 char **tostr
= (char **)&cw
->bp1
;
177 size_t *blen
= &cw
->blen1
;
180 ssize_t nlen
= len
+ MB_CUR_MAX
;
183 char buffer
[CONV_BUFFER_SIZE
];
184 iconv_t id
= (iconv_t
)-1;
185 char *enc
= O_STR(sp
, O_FILEENCODING
);
187 /* convert first len bytes of buffer and append it to cw->bp
188 * len is adjusted => 0
189 * offset contains the offset in cw->bp and is adjusted
190 * cw->bp is grown as required
192 #define CONVERT2(len, cw, offset) \
196 size_t outleft = cw->blen1 - offset; \
197 char *obp = (char *)cw->bp1 + offset; \
198 if (cw->blen1 < offset + MB_CUR_MAX) { \
200 BINC_RET(NULL, cw->bp1, cw->blen1, nlen); \
203 if (iconv(id, &bp, &len, &obp, &outleft) == -1 && \
206 offset = cw->blen1 - outleft; \
212 BINC_RET(NULL
, *tostr
, *blen
, nlen
);
213 dst
= *tostr
; buflen
= *blen
;
215 if (strcmp(nl_langinfo(CODESET
), enc
)) {
216 id
= iconv_open(enc
, nl_langinfo(CODESET
));
217 if (id
== (iconv_t
)-1)
219 dst
= buffer
; buflen
= CONV_BUFFER_SIZE
;
222 for (i
= 0, j
= 0; i
< len
; ++i
) {
223 n
= wcrtomb(dst
+j
, str
[i
], &mbs
);
224 if (n
== -1) goto err
;
226 if (buflen
< j
+ MB_CUR_MAX
) {
227 if (id
!= (iconv_t
)-1) {
228 CONVERT2(j
, cw
, offset
);
231 BINC_RET(NULL
, *tostr
, *blen
, nlen
);
232 dst
= *tostr
; buflen
= *blen
;
237 n
= wcrtomb(dst
+j
, L
'\0', &mbs
);
238 j
+= n
- 1; /* don't count NUL at the end */
241 if (id
!= (iconv_t
)-1) {
242 CONVERT2(j
, cw
, offset
);
259 default_int2disp (SCR
*sp
, const CHAR_T
* str
, ssize_t len
, CONVWIN
*cw
,
260 size_t *tolen
, char **dst
)
264 char **tostr
= (char **)&cw
->bp1
;
265 size_t *blen
= &cw
->blen1
;
267 BINC_RET(NULL
, *tostr
, *blen
, len
* sizeof(chtype
));
271 for (i
= 0, j
= 0; i
< len
; ++i
)
272 if (str
[i
] > 0xffff) {
286 default_int2disp (SCR
*sp
, const CHAR_T
* str
, ssize_t len
, CONVWIN
*cw
,
287 size_t *tolen
, char **dst
)
290 char **tostr
= (char **)&cw
->bp1
;
291 size_t *blen
= &cw
->blen1
;
293 BINC_RET(NULL
, *tostr
, *blen
, len
* 2);
295 for (i
= 0, j
= 0; i
< len
; ++i
)
296 if (CHAR_WIDTH(NULL
, str
[i
]) > 1) {
300 (*tostr
)[j
++] = str
[i
];
311 conv_init (SCR
*orig
, SCR
*sp
)
314 MEMCPY(&sp
->conv
, &orig
->conv
, 1);
316 setlocale(LC_ALL
, "");
317 sp
->conv
.char2int
= raw2int
;
318 sp
->conv
.int2char
= int2raw
;
319 sp
->conv
.file2int
= default_char2int
;
320 sp
->conv
.int2file
= default_int2char
;
321 sp
->conv
.int2disp
= default_int2disp
;
322 o_set(sp
, O_FILEENCODING
, OS_STRDUP
, nl_langinfo(CODESET
), 0);
327 conv_enc (SCR
*sp
, char *enc
)
332 sp
->conv
.file2int
= raw2int
;
333 sp
->conv
.int2file
= int2raw
;
337 if (!strcmp(enc
, "WCHAR_T")) {
338 sp
->conv
.file2int
= CHAR_T_char2int
;
339 sp
->conv
.int2file
= CHAR_T_int2char
;
343 id
= iconv_open(enc
, nl_langinfo(CODESET
));
344 if (id
== (iconv_t
)-1)
347 id
= iconv_open(nl_langinfo(CODESET
), enc
);
348 if (id
== (iconv_t
)-1)
352 sp
->conv
.file2int
= default_char2int
;
353 sp
->conv
.int2file
= default_int2char
;
355 F_CLR(sp
, SC_CONV_ERROR
);
356 F_SET(sp
, SC_SCR_REFORMAT
);
361 "321|File encoding conversion not supported");