use iconv for non-codeset fileencodings
[nvi.git] / common / conv.c
blob54f1d81d52e8483865c1fb4b3b83b3bdfa5f0565
1 /*-
2 * Copyright (c) 1993, 1994
3 * The Regents of the University of California. All rights reserved.
4 * Copyright (c) 1993, 1994, 1995, 1996
5 * Keith Bostic. All rights reserved.
7 * See the LICENSE file for redistribution information.
8 */
10 #include "config.h"
12 #ifndef lint
13 static const char sccsid[] = "$Id: conv.c,v 1.11 2001/05/06 21:10:27 skimo Exp $ (Berkeley) $Date: 2001/05/06 21:10:27 $";
14 #endif /* not lint */
16 #include <sys/types.h>
17 #include <sys/queue.h>
18 #include <sys/time.h>
20 #include <bitstring.h>
21 #include <errno.h>
22 #include <limits.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <unistd.h>
28 #include "common.h"
29 #ifdef HAVE_NCURSESW
30 #include <ncurses.h>
31 #endif
33 #include <langinfo.h>
34 #include <iconv.h>
35 #include <locale.h>
37 int
38 raw2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen)
40 int i;
41 CHAR_T **tostr = (CHAR_T **)&cw->bp1;
42 size_t *blen = &cw->blen1;
44 BINC_RETW(NULL, *tostr, *blen, len);
46 *tolen = len;
47 for (i = 0; i < len; ++i)
48 (*tostr)[i] = (u_char) str[i];
50 return 0;
53 #define CONV_BUFFER_SIZE 512
54 /* fill the buffer with codeset encoding of string pointed to by str
55 * left has the number of bytes left in str and is adjusted
56 * len contains the number of bytes put in the buffer
58 #define CONVERT(str, left, src, len) \
59 do { \
60 size_t outleft; \
61 char *bp = buffer; \
62 outleft = CONV_BUFFER_SIZE; \
63 errno = 0; \
64 if (iconv(id, (char **)&str, &left, &bp, &outleft) == -1 && \
65 errno != E2BIG) \
66 goto err; \
67 len = CONV_BUFFER_SIZE - outleft; \
68 src = buffer; \
69 } while (0)
71 int
72 default_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen)
74 int i = 0, j;
75 CHAR_T **tostr = (CHAR_T **)&cw->bp1;
76 size_t *blen = &cw->blen1;
77 mbstate_t mbs;
78 size_t n;
79 ssize_t nlen = len;
80 char *src = (char *)str;
81 iconv_t id = (iconv_t)-1;
82 char *enc = O_STR(sp, O_FILEENCODING);
83 char buffer[CONV_BUFFER_SIZE];
84 size_t left = len;
86 MEMSET(&mbs, 0, 1);
87 BINC_RETW(NULL, *tostr, *blen, nlen);
89 if (strcmp(nl_langinfo(CODESET), enc)) {
90 id = iconv_open(nl_langinfo(CODESET), enc);
91 if (id == (iconv_t)-1)
92 goto err;
93 CONVERT(str, left, src, len);
96 for (i = 0, j = 0; j < len; ) {
97 n = mbrtowc((*tostr)+i, src+j, len-j, &mbs);
98 /* NULL character converted */
99 if (n == -1 || n == -2) goto err;
100 if (n == 0) n = 1;
101 j += n;
102 if (++i >= *blen) {
103 nlen += 256;
104 BINC_RETW(NULL, *tostr, *blen, nlen);
106 if (id != (iconv_t)-1 && j == len && left) {
107 CONVERT(str, left, src, len);
108 j = 0;
111 *tolen = i;
113 if (id != (iconv_t)-1)
114 iconv_close(id);
116 return 0;
117 err:
118 *tolen = i;
119 if (id != (iconv_t)-1)
120 iconv_close(id);
121 return 1;
124 int
125 int2raw(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, size_t *tolen)
127 int i;
128 char **tostr = (char **)&cw->bp1;
129 size_t *blen = &cw->blen1;
131 BINC_RET(NULL, *tostr, *blen, len);
133 *tolen = len;
134 for (i = 0; i < len; ++i)
135 (*tostr)[i] = str[i];
137 return 0;
140 int
141 default_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, size_t *tolen)
143 int i, j, offset = 0;
144 char **tostr = (char **)&cw->bp1;
145 size_t *blen = &cw->blen1;
146 mbstate_t mbs;
147 size_t n;
148 ssize_t nlen = len + MB_CUR_MAX;
149 char *dst;
150 size_t buflen;
151 char buffer[CONV_BUFFER_SIZE];
152 iconv_t id = (iconv_t)-1;
153 char *enc = O_STR(sp, O_FILEENCODING);
155 /* convert first len bytes of buffer and append it to cw->bp
156 * len is adjusted => 0
157 * offset contains the offset in cw->bp and is adjusted
158 * cw->bp is grown as required
160 #define CONVERT2(len, cw, offset) \
161 do { \
162 char *bp = buffer; \
163 while (len != 0) { \
164 size_t outleft = cw->blen1 - offset; \
165 char *obp = cw->bp1 + offset; \
166 if (cw->blen1 < offset + MB_CUR_MAX) { \
167 nlen += 256; \
168 BINC_RET(NULL, cw->bp1, cw->blen1, nlen); \
170 errno = 0; \
171 if (iconv(id, &bp, &len, &obp, &outleft) == -1 && \
172 errno != E2BIG) \
173 goto err; \
174 offset = cw->blen1 - outleft; \
176 } while (0)
179 MEMSET(&mbs, 0, 1);
180 BINC_RET(NULL, *tostr, *blen, nlen);
181 dst = *tostr; buflen = *blen;
183 if (strcmp(nl_langinfo(CODESET), enc)) {
184 id = iconv_open(enc, nl_langinfo(CODESET));
185 if (id == (iconv_t)-1)
186 goto err;
187 dst = buffer; buflen = CONV_BUFFER_SIZE;
190 for (i = 0, j = 0; i < len; ++i) {
191 n = wcrtomb(dst+j, str[i], &mbs);
192 if (n == -1) goto err;
193 j += n;
194 if (buflen < j + MB_CUR_MAX) {
195 if (id != (iconv_t)-1) {
196 CONVERT2(j, cw, offset);
197 } else {
198 nlen += 256;
199 BINC_RET(NULL, *tostr, *blen, nlen);
200 dst = *tostr; buflen = *blen;
205 n = wcrtomb(dst+j, L'\0', &mbs);
206 j += n - 1; /* don't count NUL at the end */
207 *tolen = j;
209 if (id != (iconv_t)-1) {
210 CONVERT2(j, cw, offset);
211 *tolen = offset;
214 return 0;
215 err:
216 *tolen = j;
217 return 1;
220 //#ifdef HAVE_NCURSESW
221 #ifdef HAVE_ADDNWSTR
222 int
223 default_int2disp (SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, size_t *tolen)
225 int i, j;
226 chtype *dest;
227 char **tostr = (char **)&cw->bp1;
228 size_t *blen = &cw->blen1;
230 BINC_RET(NULL, *tostr, *blen, len * sizeof(chtype));
232 dest = *tostr;
234 for (i = 0, j = 0; i < len; ++i)
235 if (str[i] > 0xffff) {
236 dest[j++] = 0xfffd;
237 } else
238 dest[j++] = str[i];
239 *tolen = j;
241 return 0;
244 #else
246 int
247 default_int2disp (SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, size_t *tolen)
249 int i, j;
250 char **tostr = (char **)&cw->bp1;
251 size_t *blen = &cw->blen1;
253 BINC_RET(NULL, *tostr, *blen, len * 2);
255 for (i = 0, j = 0; i < len; ++i)
256 if (CHAR_WIDTH(NULL, str[i]) > 1) {
257 (*tostr)[j++] = '[';
258 (*tostr)[j++] = ']';
259 } else
260 (*tostr)[j++] = str[i];
261 *tolen = j;
263 return 0;
265 #endif
267 int
268 gb2int (SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen)
270 int i, j;
271 CHAR_T **tostr = (CHAR_T **)&cw->bp1;
272 size_t *blen = &cw->blen1;
274 BINC_RETW(NULL, *tostr, *blen, len);
276 for (i = 0, j = 0; i < len; ++i) {
277 if (str[i] & 0x80) {
278 if (i+1 < len && str[i+1] & 0x80) {
279 (*tostr)[j++] = INT9494(F_GB,str[i]&0x7F,str[i+1]&0x7F);
280 ++i;
281 } else {
282 (*tostr)[j++] = INTILL(str[i]);
284 } else
285 (*tostr)[j++] = str[i];
287 *tolen = j;
289 return 0;
292 CONV raw_conv = { raw2int, int2raw,
293 raw2int, int2raw, default_int2disp };
294 CONV default_conv = { raw2int, int2raw,
295 default_char2int, default_int2char, default_int2disp };
297 void
298 conv_init (SCR *orig, SCR *sp)
300 if (orig != NULL)
301 sp->conv = orig->conv;
302 else {
303 setlocale(LC_ALL, "");
304 sp->conv = &default_conv;
305 o_set(sp, O_FILEENCODING, OS_STRDUP, nl_langinfo(CODESET), 0);
310 conv_enc (SCR *sp, char *enc)
312 iconv_t id;
314 if (!*enc) {
315 sp->conv = &raw_conv;
316 return 0;
318 id = iconv_open(enc, nl_langinfo(CODESET));
319 if (id == (iconv_t)-1)
320 goto err;
321 iconv_close(id);
322 id = iconv_open(nl_langinfo(CODESET), enc);
323 if (id == (iconv_t)-1)
324 goto err;
325 iconv_close(id);
327 return 0;
328 err:
329 msgq(sp, M_ERR,
330 "321|File encoding conversion not supported");
331 return 1;