02dfd32b7ee4212ef868a0aa898ba7538d68db15
[nvi.git] / common / conv.c
blob02dfd32b7ee4212ef868a0aa898ba7538d68db15
1 /*-
2 * Copyright (c) 1993, 1994
3 * The Regents of the University of California. All rights reserved.
4 * Copyright (c) 1993, 1994, 1995, 1996
5 * Keith Bostic. All rights reserved.
7 * See the LICENSE file for redistribution information.
8 */
10 #include "config.h"
12 #ifndef lint
13 static const char sccsid[] = "$Id: conv.c,v 1.27 2001/08/18 21:41:41 skimo Exp $ (Berkeley) $Date: 2001/08/18 21:41:41 $";
14 #endif /* not lint */
16 #include <sys/types.h>
17 #include <sys/queue.h>
18 #include <sys/time.h>
20 #include <bitstring.h>
21 #include <errno.h>
22 #include <limits.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <unistd.h>
28 #include "common.h"
30 #ifdef USE_ICONV
31 #include <langinfo.h>
32 #include <iconv.h>
34 #define LANGCODESET nl_langinfo(CODESET)
35 #else
36 typedef int iconv_t;
38 #define LANGCODESET ""
39 #endif
41 #include <locale.h>
43 #ifdef USE_WIDECHAR
44 int
45 raw2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen,
46 CHAR_T **dst)
48 int i;
49 CHAR_T **tostr = (CHAR_T **)&cw->bp1;
50 size_t *blen = &cw->blen1;
52 BINC_RETW(NULL, *tostr, *blen, len);
54 *tolen = len;
55 for (i = 0; i < len; ++i)
56 (*tostr)[i] = (u_char) str[i];
58 *dst = cw->bp1;
60 return 0;
63 #define CONV_BUFFER_SIZE 512
64 /* fill the buffer with codeset encoding of string pointed to by str
65 * left has the number of bytes left in str and is adjusted
66 * len contains the number of bytes put in the buffer
68 #ifdef USE_ICONV
69 #define CONVERT(str, left, src, len) \
70 do { \
71 size_t outleft; \
72 char *bp = buffer; \
73 outleft = CONV_BUFFER_SIZE; \
74 errno = 0; \
75 if (iconv(id, (char **)&str, &left, &bp, &outleft) == -1 /*&& \
76 errno != E2BIG*/) \
77 goto err; \
78 if ((len = CONV_BUFFER_SIZE - outleft) == 0) { \
79 error = -left; \
80 goto err; \
81 } \
82 src = buffer; \
83 } while (0)
84 #else
85 #define CONVERT(str, left, src, len)
86 #endif
88 int
89 default_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
90 size_t *tolen, CHAR_T **dst, char *enc)
92 int i = 0, j;
93 CHAR_T **tostr = (CHAR_T **)&cw->bp1;
94 size_t *blen = &cw->blen1;
95 mbstate_t mbs;
96 size_t n;
97 ssize_t nlen = len;
98 char *src = (char *)str;
99 iconv_t id = (iconv_t)-1;
100 char buffer[CONV_BUFFER_SIZE];
101 size_t left = len;
102 int error = 1;
104 MEMSET(&mbs, 0, 1);
105 BINC_RETW(NULL, *tostr, *blen, nlen);
107 #ifdef USE_ICONV
108 if (strcmp(nl_langinfo(CODESET), enc)) {
109 id = iconv_open(nl_langinfo(CODESET), enc);
110 if (id == (iconv_t)-1)
111 goto err;
112 CONVERT(str, left, src, len);
114 #endif
116 for (i = 0, j = 0; j < len; ) {
117 n = mbrtowc((*tostr)+i, src+j, len-j, &mbs);
118 /* NULL character converted */
119 if (n == -2) error = -(len-j);
120 if (n == -1 || n == -2) goto err;
121 if (n == 0) n = 1;
122 j += n;
123 if (++i >= *blen) {
124 nlen += 256;
125 BINC_RETW(NULL, *tostr, *blen, nlen);
127 if (id != (iconv_t)-1 && j == len && left) {
128 CONVERT(str, left, src, len);
129 j = 0;
132 *tolen = i;
134 if (id != (iconv_t)-1)
135 iconv_close(id);
137 *dst = cw->bp1;
139 return 0;
140 err:
141 *tolen = i;
142 if (id != (iconv_t)-1)
143 iconv_close(id);
144 *dst = cw->bp1;
146 return error;
149 int
150 fe_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
151 size_t *tolen, CHAR_T **dst)
153 return default_char2int(sp, str, len, cw, tolen, dst, O_STR(sp, O_FILEENCODING));
156 int
157 ie_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
158 size_t *tolen, CHAR_T **dst)
160 return default_char2int(sp, str, len, cw, tolen, dst, O_STR(sp, O_INPUTENCODING));
163 int
164 cs_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
165 size_t *tolen, CHAR_T **dst)
167 return default_char2int(sp, str, len, cw, tolen, dst, LANGCODESET);
170 int
171 CHAR_T_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
172 size_t *tolen, char **dst)
174 *tolen = len * sizeof(CHAR_T);
175 *dst = (char*) str;
177 return 0;
180 int
181 CHAR_T_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
182 size_t *tolen, CHAR_T **dst)
184 *tolen = len / sizeof(CHAR_T);
185 *dst = (CHAR_T*) str;
187 return 0;
190 int
191 int2raw(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, size_t *tolen,
192 char **dst)
194 int i;
195 char **tostr = (char **)&cw->bp1;
196 size_t *blen = &cw->blen1;
198 BINC_RETC(NULL, *tostr, *blen, len);
200 *tolen = len;
201 for (i = 0; i < len; ++i)
202 (*tostr)[i] = str[i];
204 *dst = cw->bp1;
206 return 0;
209 int
210 default_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
211 size_t *tolen, char **pdst, char *enc)
213 int i, j, offset = 0;
214 char **tostr = (char **)&cw->bp1;
215 size_t *blen = &cw->blen1;
216 mbstate_t mbs;
217 size_t n;
218 ssize_t nlen = len + MB_CUR_MAX;
219 char *dst;
220 size_t buflen;
221 char buffer[CONV_BUFFER_SIZE];
222 iconv_t id = (iconv_t)-1;
224 /* convert first len bytes of buffer and append it to cw->bp
225 * len is adjusted => 0
226 * offset contains the offset in cw->bp and is adjusted
227 * cw->bp is grown as required
229 #ifdef USE_ICONV
230 #define CONVERT2(len, cw, offset) \
231 do { \
232 char *bp = buffer; \
233 while (len != 0) { \
234 size_t outleft = cw->blen1 - offset; \
235 char *obp = (char *)cw->bp1 + offset; \
236 if (cw->blen1 < offset + MB_CUR_MAX) { \
237 nlen += 256; \
238 BINC_RETC(NULL, cw->bp1, cw->blen1, nlen); \
240 errno = 0; \
241 if (iconv(id, &bp, &len, &obp, &outleft) == -1 && \
242 errno != E2BIG) \
243 goto err; \
244 offset = cw->blen1 - outleft; \
246 } while (0)
247 #else
248 #define CONVERT2(len, cw, offset)
249 #endif
252 MEMSET(&mbs, 0, 1);
253 BINC_RETC(NULL, *tostr, *blen, nlen);
254 dst = *tostr; buflen = *blen;
256 #ifdef USE_ICONV
257 if (strcmp(nl_langinfo(CODESET), enc)) {
258 id = iconv_open(enc, nl_langinfo(CODESET));
259 if (id == (iconv_t)-1)
260 goto err;
261 dst = buffer; buflen = CONV_BUFFER_SIZE;
263 #endif
265 for (i = 0, j = 0; i < len; ++i) {
266 n = wcrtomb(dst+j, str[i], &mbs);
267 if (n == -1) goto err;
268 j += n;
269 if (buflen < j + MB_CUR_MAX) {
270 if (id != (iconv_t)-1) {
271 CONVERT2(j, cw, offset);
272 } else {
273 nlen += 256;
274 BINC_RETC(NULL, *tostr, *blen, nlen);
275 dst = *tostr; buflen = *blen;
280 n = wcrtomb(dst+j, L'\0', &mbs);
281 j += n - 1; /* don't count NUL at the end */
282 *tolen = j;
284 if (id != (iconv_t)-1) {
285 CONVERT2(j, cw, offset);
286 *tolen = offset;
289 *pdst = cw->bp1;
291 return 0;
292 err:
293 *tolen = j;
295 *pdst = cw->bp1;
297 return 1;
300 int
301 fe_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
302 size_t *tolen, char **dst)
304 return default_int2char(sp, str, len, cw, tolen, dst, O_STR(sp, O_FILEENCODING));
307 int
308 cs_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
309 size_t *tolen, char **dst)
311 return default_int2char(sp, str, len, cw, tolen, dst, LANGCODESET);
314 #endif
317 void
318 conv_init (SCR *orig, SCR *sp)
320 if (orig != NULL)
321 MEMCPY(&sp->conv, &orig->conv, 1);
322 else {
323 setlocale(LC_ALL, "");
324 #ifdef USE_WIDECHAR
325 sp->conv.sys2int = cs_char2int;
326 sp->conv.int2sys = cs_int2char;
327 sp->conv.file2int = fe_char2int;
328 sp->conv.int2file = fe_int2char;
329 sp->conv.input2int = ie_char2int;
330 #endif
331 #ifdef USE_ICONV
332 o_set(sp, O_FILEENCODING, OS_STRDUP, nl_langinfo(CODESET), 0);
333 o_set(sp, O_INPUTENCODING, OS_STRDUP, nl_langinfo(CODESET), 0);
334 #endif
339 conv_enc (SCR *sp, int option, char *enc)
341 #if defined(USE_WIDECHAR) && defined(USE_ICONV)
342 iconv_t id;
343 char2wchar_t *c2w;
344 wchar2char_t *w2c;
346 switch (option) {
347 case O_FILEENCODING:
348 c2w = &sp->conv.file2int;
349 w2c = &sp->conv.int2file;
350 break;
351 case O_INPUTENCODING:
352 c2w = &sp->conv.input2int;
353 w2c = NULL;
354 break;
357 if (!*enc) {
358 if (c2w) *c2w = raw2int;
359 if (w2c) *w2c = int2raw;
360 return 0;
363 if (!strcmp(enc, "WCHAR_T")) {
364 if (c2w) *c2w = CHAR_T_char2int;
365 if (w2c) *w2c = CHAR_T_int2char;
366 return 0;
369 id = iconv_open(enc, nl_langinfo(CODESET));
370 if (id == (iconv_t)-1)
371 goto err;
372 iconv_close(id);
373 id = iconv_open(nl_langinfo(CODESET), enc);
374 if (id == (iconv_t)-1)
375 goto err;
376 iconv_close(id);
378 switch (option) {
379 case O_FILEENCODING:
380 *c2w = fe_char2int;
381 *w2c = fe_int2char;
382 break;
383 case O_INPUTENCODING:
384 *c2w = ie_char2int;
385 break;
388 F_CLR(sp, SC_CONV_ERROR);
389 F_SET(sp, SC_SCR_REFORMAT);
391 return 0;
392 err:
393 switch (option) {
394 case O_FILEENCODING:
395 msgq(sp, M_ERR,
396 "321|File encoding conversion not supported");
397 break;
398 case O_INPUTENCODING:
399 msgq(sp, M_ERR,
400 "322|Input encoding conversion not supported");
401 break;
403 #endif
404 return 1;