From c22f3ba80b9f462e48d4927ab6ddcdacd4ea8481 Mon Sep 17 00:00:00 2001 From: skimo Date: Sun, 6 May 2001 21:10:27 +0000 Subject: [PATCH] use iconv for non-codeset fileencodings --- catalog/english.base | 1 + common/conv.c | 214 ++++++++++++++++++++++++++++----------------------- 2 files changed, 120 insertions(+), 95 deletions(-) diff --git a/catalog/english.base b/catalog/english.base index 1ae8d990..8f6d7e73 100644 --- a/catalog/english.base +++ b/catalog/english.base @@ -309,3 +309,4 @@ 318 "Unexpected command or input" 319 "%d screens backgrounded; use :display to list them" 320 "Unknown cursor position." +321 "File encoding conversion not supported" diff --git a/common/conv.c b/common/conv.c index 32d85fd9..54f1d81d 100644 --- a/common/conv.c +++ b/common/conv.c @@ -10,7 +10,7 @@ #include "config.h" #ifndef lint -static const char sccsid[] = "$Id: conv.c,v 1.10 2001/04/25 23:42:44 skimo Exp $ (Berkeley) $Date: 2001/04/25 23:42:44 $"; +static const char sccsid[] = "$Id: conv.c,v 1.11 2001/05/06 21:10:27 skimo Exp $ (Berkeley) $Date: 2001/05/06 21:10:27 $"; #endif /* not lint */ #include @@ -31,9 +31,11 @@ static const char sccsid[] = "$Id: conv.c,v 1.10 2001/04/25 23:42:44 skimo Exp $ #endif #include +#include +#include int -ascii2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen) +raw2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen) { int i; CHAR_T **tostr = (CHAR_T **)&cw->bp1; @@ -48,21 +50,51 @@ ascii2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen) return 0; } +#define CONV_BUFFER_SIZE 512 +/* fill the buffer with codeset encoding of string pointed to by str + * left has the number of bytes left in str and is adjusted + * len contains the number of bytes put in the buffer + */ +#define CONVERT(str, left, src, len) \ + do { \ + size_t outleft; \ + char *bp = buffer; \ + outleft = CONV_BUFFER_SIZE; \ + errno = 0; \ + if (iconv(id, (char **)&str, &left, &bp, &outleft) == -1 && \ + errno != E2BIG) \ + goto err; \ + len = CONV_BUFFER_SIZE - outleft; \ + src = buffer; \ + } while (0) + int default_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen) { - int i, j; + int i = 0, j; CHAR_T **tostr = (CHAR_T **)&cw->bp1; size_t *blen = &cw->blen1; mbstate_t mbs; size_t n; ssize_t nlen = len; + char *src = (char *)str; + iconv_t id = (iconv_t)-1; + char *enc = O_STR(sp, O_FILEENCODING); + char buffer[CONV_BUFFER_SIZE]; + size_t left = len; MEMSET(&mbs, 0, 1); BINC_RETW(NULL, *tostr, *blen, nlen); + if (strcmp(nl_langinfo(CODESET), enc)) { + id = iconv_open(nl_langinfo(CODESET), enc); + if (id == (iconv_t)-1) + goto err; + CONVERT(str, left, src, len); + } + for (i = 0, j = 0; j < len; ) { - n = mbrtowc((*tostr)+i, str+j, len-j, &mbs); + n = mbrtowc((*tostr)+i, src+j, len-j, &mbs); /* NULL character converted */ if (n == -1 || n == -2) goto err; if (n == 0) n = 1; @@ -71,17 +103,26 @@ default_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *to nlen += 256; BINC_RETW(NULL, *tostr, *blen, nlen); } + if (id != (iconv_t)-1 && j == len && left) { + CONVERT(str, left, src, len); + j = 0; + } } *tolen = i; + if (id != (iconv_t)-1) + iconv_close(id); + return 0; err: *tolen = i; + if (id != (iconv_t)-1) + iconv_close(id); return 1; } int -int2ascii(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, size_t *tolen) +int2raw(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, size_t *tolen) { int i; char **tostr = (char **)&cw->bp1; @@ -99,29 +140,77 @@ int2ascii(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, size_t *tolen) int default_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, size_t *tolen) { - int i, j; + int i, j, offset = 0; char **tostr = (char **)&cw->bp1; size_t *blen = &cw->blen1; mbstate_t mbs; size_t n; ssize_t nlen = len + MB_CUR_MAX; + char *dst; + size_t buflen; + char buffer[CONV_BUFFER_SIZE]; + iconv_t id = (iconv_t)-1; + char *enc = O_STR(sp, O_FILEENCODING); + +/* convert first len bytes of buffer and append it to cw->bp + * len is adjusted => 0 + * offset contains the offset in cw->bp and is adjusted + * cw->bp is grown as required + */ +#define CONVERT2(len, cw, offset) \ + do { \ + char *bp = buffer; \ + while (len != 0) { \ + size_t outleft = cw->blen1 - offset; \ + char *obp = cw->bp1 + offset; \ + if (cw->blen1 < offset + MB_CUR_MAX) { \ + nlen += 256; \ + BINC_RET(NULL, cw->bp1, cw->blen1, nlen); \ + } \ + errno = 0; \ + if (iconv(id, &bp, &len, &obp, &outleft) == -1 && \ + errno != E2BIG) \ + goto err; \ + offset = cw->blen1 - outleft; \ + } \ + } while (0) + MEMSET(&mbs, 0, 1); BINC_RET(NULL, *tostr, *blen, nlen); + dst = *tostr; buflen = *blen; + + if (strcmp(nl_langinfo(CODESET), enc)) { + id = iconv_open(enc, nl_langinfo(CODESET)); + if (id == (iconv_t)-1) + goto err; + dst = buffer; buflen = CONV_BUFFER_SIZE; + } for (i = 0, j = 0; i < len; ++i) { - n = wcrtomb((*tostr)+j, str[i], &mbs); + n = wcrtomb(dst+j, str[i], &mbs); if (n == -1) goto err; j += n; - if (*blen < j + MB_CUR_MAX) { - nlen += 256; - BINC_RET(NULL, *tostr, *blen, nlen); + if (buflen < j + MB_CUR_MAX) { + if (id != (iconv_t)-1) { + CONVERT2(j, cw, offset); + } else { + nlen += 256; + BINC_RET(NULL, *tostr, *blen, nlen); + dst = *tostr; buflen = *blen; + } } } - n = wcrtomb((*tostr)+j, L'\0', &mbs); + + n = wcrtomb(dst+j, L'\0', &mbs); j += n - 1; /* don't count NUL at the end */ *tolen = j; + if (id != (iconv_t)-1) { + CONVERT2(j, cw, offset); + *tolen = offset; + } + return 0; err: *tolen = j; @@ -200,82 +289,10 @@ gb2int (SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen) return 0; } -int -int2gb(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, size_t *tolen) -{ - int i, j; - char **tostr = (char **)&cw->bp1; - size_t *blen = &cw->blen1; - - BINC_RET(NULL, *tostr, *blen, len * 2); - - for (i = 0, j = 0; i < len; ++i) { - if (INTIS9494(str[i])) { - (*tostr)[j++] = INT9494R(str[i]) | 0x80; - (*tostr)[j++] = INT9494C(str[i]) | 0x80; - } else { - (*tostr)[j++] = str[i] & 0xFF; - } - } - *tolen = j; - - return 0; -} - -int -utf82int (SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen) -{ - int i, j; - CHAR_T c; - CHAR_T **tostr = (CHAR_T **)&cw->bp1; - size_t *blen = &cw->blen1; - - BINC_RETW(NULL, *tostr, *blen, len); - - for (i = 0, j = 0; i < len; ++i) { - if (str[i] & 0x80) { - if ((str[i] & 0xe0) == 0xc0 && i+1 < len && str[i+1] & 0x80) { - c = (str[i] & 0x1f) << 6; - c |= (str[i+1] & 0x3f); - (*tostr)[j++] = c; - ++i; - } else if ((str[i] & 0xf0) == 0xe0 && i+2 < len && - str[i+1] & 0x80 && str[i+2] & 0x80) { - c = (str[i] & 0xf) << 12; - c |= (str[i+1] & 0x3f) << 6; - c |= (str[i+2] & 0x3f); - (*tostr)[j++] = c; - i += 2; - } else { - (*tostr)[j++] = INTILL(str[i]); - } - } else - (*tostr)[j++] = str[i]; - } - *tolen = j; - - return 0; -} - -int -int2utf8(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, size_t *tolen) -{ - char **tostr = (char **)&cw->bp1; - size_t *blen = &cw->blen1; - BINC_RET(NULL, *tostr, *blen, len * 3); - - *tolen = ucs2utf8(str, len, *tostr); - - return 0; -} - - -CONV default_conv = { ascii2int, int2ascii, +CONV raw_conv = { raw2int, int2raw, + raw2int, int2raw, default_int2disp }; +CONV default_conv = { raw2int, int2raw, default_char2int, default_int2char, default_int2disp }; -CONV gb_conv = { default_char2int, default_int2char, - gb2int, int2gb, default_int2disp }; -CONV utf8_conv = { default_char2int, default_int2char, - utf82int, int2utf8, default_int2disp }; void conv_init (SCR *orig, SCR *sp) @@ -292,18 +309,25 @@ conv_init (SCR *orig, SCR *sp) int conv_enc (SCR *sp, char *enc) { + iconv_t id; + if (!*enc) { - sp->conv = &default_conv; - return 0; - } - if (!strcmp(enc,"GB")) { - sp->conv = &gb_conv; - return 0; - } - if (!strcmp(enc,"UTF-8")) { - sp->conv = &utf8_conv; + sp->conv = &raw_conv; return 0; } + id = iconv_open(enc, nl_langinfo(CODESET)); + if (id == (iconv_t)-1) + goto err; + iconv_close(id); + id = iconv_open(nl_langinfo(CODESET), enc); + if (id == (iconv_t)-1) + goto err; + iconv_close(id); + + return 0; +err: + msgq(sp, M_ERR, + "321|File encoding conversion not supported"); return 1; } -- 2.11.4.GIT