remove unneeded #include
[nvi.git] / common / conv.c
blobc3e6ae8faf3e4d53c92cf57b37efd9d2ca3f5839
1 /*-
2 * Copyright (c) 1993, 1994
3 * The Regents of the University of California. All rights reserved.
4 * Copyright (c) 1993, 1994, 1995, 1996
5 * Keith Bostic. All rights reserved.
7 * See the LICENSE file for redistribution information.
8 */
10 #include "config.h"
12 #ifndef lint
13 static const char sccsid[] = "$Id: conv.c,v 1.24 2001/06/30 17:48:23 skimo Exp $ (Berkeley) $Date: 2001/06/30 17:48:23 $";
14 #endif /* not lint */
16 #include <sys/types.h>
17 #include <sys/queue.h>
18 #include <sys/time.h>
20 #include <bitstring.h>
21 #include <errno.h>
22 #include <limits.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <unistd.h>
28 #include "common.h"
30 #include <langinfo.h>
31 #include <iconv.h>
32 #include <locale.h>
34 #ifdef USE_WIDECHAR
35 int
36 raw2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen,
37 CHAR_T **dst)
39 int i;
40 CHAR_T **tostr = (CHAR_T **)&cw->bp1;
41 size_t *blen = &cw->blen1;
43 BINC_RETW(NULL, *tostr, *blen, len);
45 *tolen = len;
46 for (i = 0; i < len; ++i)
47 (*tostr)[i] = (u_char) str[i];
49 *dst = cw->bp1;
51 return 0;
54 #define CONV_BUFFER_SIZE 512
55 /* fill the buffer with codeset encoding of string pointed to by str
56 * left has the number of bytes left in str and is adjusted
57 * len contains the number of bytes put in the buffer
59 #define CONVERT(str, left, src, len) \
60 do { \
61 size_t outleft; \
62 char *bp = buffer; \
63 outleft = CONV_BUFFER_SIZE; \
64 errno = 0; \
65 if (iconv(id, (char **)&str, &left, &bp, &outleft) == -1 /*&& \
66 errno != E2BIG*/) \
67 goto err; \
68 if ((len = CONV_BUFFER_SIZE - outleft) == 0) { \
69 error = -left; \
70 goto err; \
71 } \
72 src = buffer; \
73 } while (0)
75 int
76 default_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
77 size_t *tolen, CHAR_T **dst, char *enc)
79 int i = 0, j;
80 CHAR_T **tostr = (CHAR_T **)&cw->bp1;
81 size_t *blen = &cw->blen1;
82 mbstate_t mbs;
83 size_t n;
84 ssize_t nlen = len;
85 char *src = (char *)str;
86 iconv_t id = (iconv_t)-1;
87 char buffer[CONV_BUFFER_SIZE];
88 size_t left = len;
89 int error = 1;
91 MEMSET(&mbs, 0, 1);
92 BINC_RETW(NULL, *tostr, *blen, nlen);
94 if (strcmp(nl_langinfo(CODESET), enc)) {
95 id = iconv_open(nl_langinfo(CODESET), enc);
96 if (id == (iconv_t)-1)
97 goto err;
98 CONVERT(str, left, src, len);
101 for (i = 0, j = 0; j < len; ) {
102 n = mbrtowc((*tostr)+i, src+j, len-j, &mbs);
103 /* NULL character converted */
104 if (n == -2) error = -(len-j);
105 if (n == -1 || n == -2) goto err;
106 if (n == 0) n = 1;
107 j += n;
108 if (++i >= *blen) {
109 nlen += 256;
110 BINC_RETW(NULL, *tostr, *blen, nlen);
112 if (id != (iconv_t)-1 && j == len && left) {
113 CONVERT(str, left, src, len);
114 j = 0;
117 *tolen = i;
119 if (id != (iconv_t)-1)
120 iconv_close(id);
122 *dst = cw->bp1;
124 return 0;
125 err:
126 *tolen = i;
127 if (id != (iconv_t)-1)
128 iconv_close(id);
129 *dst = cw->bp1;
131 return error;
134 int
135 fe_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
136 size_t *tolen, CHAR_T **dst)
138 default_char2int(sp, str, len, cw, tolen, dst, O_STR(sp, O_FILEENCODING));
141 int
142 ie_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
143 size_t *tolen, CHAR_T **dst)
145 default_char2int(sp, str, len, cw, tolen, dst, O_STR(sp, O_INPUTENCODING));
148 int
149 cs_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
150 size_t *tolen, CHAR_T **dst)
152 default_char2int(sp, str, len, cw, tolen, dst, nl_langinfo(CODESET));
155 int
156 CHAR_T_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
157 size_t *tolen, char **dst)
159 *tolen = len * sizeof(CHAR_T);
160 *dst = (char*) str;
162 return 0;
165 int
166 CHAR_T_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
167 size_t *tolen, CHAR_T **dst)
169 *tolen = len / sizeof(CHAR_T);
170 *dst = (CHAR_T*) str;
172 return 0;
175 int
176 int2raw(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, size_t *tolen,
177 char **dst)
179 int i;
180 char **tostr = (char **)&cw->bp1;
181 size_t *blen = &cw->blen1;
183 BINC_RET(NULL, *tostr, *blen, len);
185 *tolen = len;
186 for (i = 0; i < len; ++i)
187 (*tostr)[i] = str[i];
189 *dst = cw->bp1;
191 return 0;
194 int
195 default_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
196 size_t *tolen, char **pdst, char *enc)
198 int i, j, offset = 0;
199 char **tostr = (char **)&cw->bp1;
200 size_t *blen = &cw->blen1;
201 mbstate_t mbs;
202 size_t n;
203 ssize_t nlen = len + MB_CUR_MAX;
204 char *dst;
205 size_t buflen;
206 char buffer[CONV_BUFFER_SIZE];
207 iconv_t id = (iconv_t)-1;
209 /* convert first len bytes of buffer and append it to cw->bp
210 * len is adjusted => 0
211 * offset contains the offset in cw->bp and is adjusted
212 * cw->bp is grown as required
214 #define CONVERT2(len, cw, offset) \
215 do { \
216 char *bp = buffer; \
217 while (len != 0) { \
218 size_t outleft = cw->blen1 - offset; \
219 char *obp = (char *)cw->bp1 + offset; \
220 if (cw->blen1 < offset + MB_CUR_MAX) { \
221 nlen += 256; \
222 BINC_RET(NULL, cw->bp1, cw->blen1, nlen); \
224 errno = 0; \
225 if (iconv(id, &bp, &len, &obp, &outleft) == -1 && \
226 errno != E2BIG) \
227 goto err; \
228 offset = cw->blen1 - outleft; \
230 } while (0)
233 MEMSET(&mbs, 0, 1);
234 BINC_RET(NULL, *tostr, *blen, nlen);
235 dst = *tostr; buflen = *blen;
237 if (strcmp(nl_langinfo(CODESET), enc)) {
238 id = iconv_open(enc, nl_langinfo(CODESET));
239 if (id == (iconv_t)-1)
240 goto err;
241 dst = buffer; buflen = CONV_BUFFER_SIZE;
244 for (i = 0, j = 0; i < len; ++i) {
245 n = wcrtomb(dst+j, str[i], &mbs);
246 if (n == -1) goto err;
247 j += n;
248 if (buflen < j + MB_CUR_MAX) {
249 if (id != (iconv_t)-1) {
250 CONVERT2(j, cw, offset);
251 } else {
252 nlen += 256;
253 BINC_RET(NULL, *tostr, *blen, nlen);
254 dst = *tostr; buflen = *blen;
259 n = wcrtomb(dst+j, L'\0', &mbs);
260 j += n - 1; /* don't count NUL at the end */
261 *tolen = j;
263 if (id != (iconv_t)-1) {
264 CONVERT2(j, cw, offset);
265 *tolen = offset;
268 *pdst = cw->bp1;
270 return 0;
271 err:
272 *tolen = j;
274 *pdst = cw->bp1;
276 return 1;
279 int
280 fe_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
281 size_t *tolen, char **dst)
283 default_int2char(sp, str, len, cw, tolen, dst, O_STR(sp, O_FILEENCODING));
286 int
287 cs_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
288 size_t *tolen, char **dst)
290 default_int2char(sp, str, len, cw, tolen, dst, nl_langinfo(CODESET));
293 #endif
296 void
297 conv_init (SCR *orig, SCR *sp)
299 if (orig != NULL)
300 MEMCPY(&sp->conv, &orig->conv, 1);
301 else {
302 setlocale(LC_ALL, "");
303 #ifdef USE_WIDECHAR
304 sp->conv.sys2int = cs_char2int;
305 sp->conv.int2sys = cs_int2char;
306 sp->conv.file2int = fe_char2int;
307 sp->conv.int2file = fe_int2char;
308 sp->conv.input2int = ie_char2int;
309 #endif
310 o_set(sp, O_FILEENCODING, OS_STRDUP, nl_langinfo(CODESET), 0);
311 o_set(sp, O_INPUTENCODING, OS_STRDUP, nl_langinfo(CODESET), 0);
316 conv_enc (SCR *sp, int option, char *enc)
318 #ifdef USE_WIDECHAR
319 iconv_t id;
320 char2wchar_t *c2w;
321 wchar2char_t *w2c;
323 switch (option) {
324 case O_FILEENCODING:
325 c2w = &sp->conv.file2int;
326 w2c = &sp->conv.int2file;
327 break;
328 case O_INPUTENCODING:
329 c2w = &sp->conv.input2int;
330 w2c = NULL;
331 break;
334 if (!*enc) {
335 if (c2w) *c2w = raw2int;
336 if (w2c) *w2c = int2raw;
337 return 0;
340 if (!strcmp(enc, "WCHAR_T")) {
341 if (c2w) *c2w = CHAR_T_char2int;
342 if (w2c) *w2c = CHAR_T_int2char;
343 return 0;
346 id = iconv_open(enc, nl_langinfo(CODESET));
347 if (id == (iconv_t)-1)
348 goto err;
349 iconv_close(id);
350 id = iconv_open(nl_langinfo(CODESET), enc);
351 if (id == (iconv_t)-1)
352 goto err;
353 iconv_close(id);
355 switch (option) {
356 case O_FILEENCODING:
357 *c2w = fe_char2int;
358 *w2c = fe_int2char;
359 break;
360 case O_INPUTENCODING:
361 *c2w = ie_char2int;
362 break;
365 F_CLR(sp, SC_CONV_ERROR);
366 F_SET(sp, SC_SCR_REFORMAT);
368 return 0;
369 err:
370 switch (option) {
371 case O_FILEENCODING:
372 msgq(sp, M_ERR,
373 "321|File encoding conversion not supported");
374 case O_INPUTENCODING:
375 msgq(sp, M_ERR,
376 "322|Input encoding conversion not supported");
378 #endif
379 return 1;