fix regression from a745c4bfc8a9b5db4e48387170da0dc1d39e3abe
[uclibc-ng.git] / utils / iconv.c
blob48a10155ece0bf4c59f253c15131d71fa9a076ed
2 /* Copyright (C) 2002, 2003, 2004 Manuel Novoa III
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Library General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Library General Public License for more details.
14 * You should have received a copy of the GNU Library General Public
15 * License along with this library; if not, see
16 * <http://www.gnu.org/licenses/>.
19 /* ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION!
21 * Besides uClibc, I'm using this code in my libc for elks, which is
22 * a 16-bit environment with a fairly limited compiler. It would make
23 * things much easier for me if this file isn't modified unnecessarily.
24 * In particular, please put any new or replacement functions somewhere
25 * else, and modify the makefile to use your version instead.
26 * Thanks. Manuel
28 * ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION! */
31 /* May 23, 2002 Initial Notes:
33 * I'm still tweaking this stuff, but it passes the tests I've thrown
34 * at it, and Erik needs it for the gcc port. The glibc extension
35 * __wcsnrtombs() hasn't been tested, as I didn't find a test for it
36 * in the glibc source. I also need to fix the behavior of
37 * _wchar_utf8sntowcs() if the max number of wchars to convert is 0.
39 * UTF-8 -> wchar -> UTF-8 conversion tests on Markus Kuhn's UTF-8-demo.txt
40 * file on my platform (x86) show about 5-10% faster conversion speed than
41 * glibc with mbsrtowcs()/wcsrtombs() and almost twice as fast as glibc with
42 * individual mbrtowc()/wcrtomb() calls.
44 * If 'DECODER' is defined, then _wchar_utf8sntowcs() will be compiled
45 * as a fail-safe UTF-8 decoder appropriate for a terminal, etc. which
46 * needs to deal gracefully with whatever is sent to it. In that mode,
47 * it passes Markus Kuhn's UTF-8-test.txt stress test. I plan to add
48 * an arg to force that behavior, so the interface will be changing.
50 * I need to fix the error checking for 16-bit wide chars. This isn't
51 * an issue for uClibc, but may be for ELKS. I'm currently not sure
52 * if I'll use 16-bit, 32-bit, or configureable wchars in ELKS.
54 * July 1, 2002
56 * Fixed _wchar_utf8sntowcs() for the max number of wchars == 0 case.
57 * Fixed nul-char bug in btowc(), and another in __mbsnrtowcs() for 8-bit
58 * locales.
59 * Enabled building of a C/POSIX-locale-only version, so full locale support
60 * no longer needs to be enabled.
62 * Nov 4, 2002
64 * Fixed a bug in _wchar_wcsntoutf8s(). Don't store wcs position if dst is NULL.
65 * Also, introduce an awful hack into _wchar_wcsntoutf8s() and wcsrtombs() in
66 * order to support %ls in printf. See comments below for details.
67 * Change behaviour of wc<->mb functions when in the C locale. Now they do
68 * a 1-1 map for the range 0x80-UCHAR_MAX. This is for backwards compatibility
69 * and consistency with the stds requirements that a printf format string by
70 * a valid multibyte string beginning and ending in it's initial shift state.
72 * Nov 5, 2002
74 * Forgot to change btowc and wctob when I changed the wc<->mb functions yesterday.
76 * Nov 7, 2002
78 * Add wcwidth and wcswidth, based on Markus Kuhn's wcwidth of 2002-05-08.
79 * Added some size/speed optimizations and integrated it into my locale
80 * framework. Minimally tested at the moment, but the stub C-locale
81 * version (which most people would probably be using) should be fine.
83 * Nov 21, 2002
85 * Revert the wc<->mb changes from earlier this month involving the C-locale.
86 * Add a couple of ugly hacks to support *wprintf.
87 * Add a mini iconv() and iconv implementation (requires locale support).
89 * Aug 1, 2003
90 * Bug fix for mbrtowc.
92 * Aug 18, 2003
93 * Bug fix: _wchar_utf8sntowcs and _wchar_wcsntoutf8s now set errno if EILSEQ.
95 * Feb 11, 2004
96 * Bug fix: Fix size check for remaining output space in iconv().
98 * Manuel
101 /* keep libgen before string.h - and porting.h to use the
102 * XPG version of basename */
103 #include <libgen.h>
104 #include "porting.h"
105 #include <string.h>
106 #include <iconv.h>
107 #include <stdarg.h>
108 #include <wchar.h>
109 #include "wchar.c" /* for _UC_iconv_t and __iconv_codesets */
111 #ifdef L_iconv_main
112 static
113 #else
114 extern
115 #endif
116 const unsigned char __iconv_codesets[];
118 #define IBUF BUFSIZ
119 #define OBUF BUFSIZ
121 static char *progname;
122 static int hide_errors;
124 static void error_msg(const char *fmt, ...)
125 __attribute__ ((noreturn, format (printf, 1, 2)));
127 static void error_msg(const char *fmt, ...)
129 va_list arg;
131 if (!hide_errors) {
132 fprintf(stderr, "%s: ", progname);
133 va_start(arg, fmt);
134 vfprintf(stderr, fmt, arg);
135 va_end(arg);
138 exit(EXIT_FAILURE);
141 int main(int argc, char **argv)
143 FILE *ifile;
144 FILE *ofile = stdout;
145 const char *p;
146 const char *s;
147 static const char opt_chars[] = "tfocsl";
148 /* 012345 */
149 const char *opts[sizeof(opt_chars)]; /* last is infile name */
150 iconv_t ic;
151 char ibuf[IBUF];
152 char obuf[OBUF];
153 char *pi;
154 char *po;
155 size_t ni, no, r, pos;
157 hide_errors = 0;
159 for (s = opt_chars ; *s ; s++) {
160 opts[ s - opt_chars ] = NULL;
163 progname = *argv;
164 while (--argc) {
165 p = *++argv;
166 if ((*p != '-') || (*++p == 0)) {
167 break;
169 do {
170 if ((s = strchr(opt_chars,*p)) == NULL) {
171 USAGE:
172 s = basename(progname);
173 fprintf(stderr,
174 "%s [-cs] -f fromcode -t tocode [-o outputfile] [inputfile ...]\n"
175 " or\n%s -l\n", s, s);
176 return EXIT_FAILURE;
178 if ((s - opt_chars) < 3) {
179 if ((--argc == 0) || opts[s - opt_chars]) {
180 goto USAGE;
182 opts[s - opt_chars] = *++argv;
183 } else {
184 opts[s - opt_chars] = p;
186 } while (*++p);
189 if (opts[5]) { /* -l */
190 fprintf(stderr, "Recognized codesets:\n");
191 for (s = (char *)__iconv_codesets ; *s ; s += *s) {
192 fprintf(stderr," %s\n", s+2);
194 s = __LOCALE_DATA_CODESET_LIST;
195 do {
196 fprintf(stderr," %s\n", __LOCALE_DATA_CODESET_LIST+ (unsigned char)(*s));
197 } while (*++s);
199 return EXIT_SUCCESS;
202 if (opts[4]) {
203 hide_errors = 1;
206 if (!opts[0] || !opts[1]) {
207 goto USAGE;
209 if ((ic = iconv_open(opts[0],opts[1])) == ((iconv_t)(-1))) {
210 error_msg( "unsupported codeset in %s -> %s conversion\n", opts[1], opts[0]);
212 if (opts[3]) { /* -c */
213 ((_UC_iconv_t *) ic)->skip_invalid_input = 1;
216 if ((s = opts[2]) != NULL) {
217 if (!(ofile = fopen(s, "w"))) {
218 error_msg( "couldn't open %s for writing\n", s);
222 pos = ni = 0;
223 do {
224 if (!argc || ((**argv == '-') && !((*argv)[1]))) {
225 ifile = stdin; /* we don't check for duplicates */
226 } else if (!(ifile = fopen(*argv, "r"))) {
227 error_msg( "couldn't open %s for reading\n", *argv);
230 while ((r = fread(ibuf + ni, 1, IBUF - ni, ifile)) > 0) {
231 pos += r;
232 ni += r;
233 no = OBUF;
234 pi = ibuf;
235 po = obuf;
236 if ((r = iconv(ic, &pi, &ni, &po, &no)) == ((size_t)(-1))) {
237 if ((errno != EINVAL) && (errno != E2BIG)) {
238 error_msg( "iconv failed at pos %lu : %m\n", (unsigned long) (pos - ni));
241 if ((r = OBUF - no) > 0) {
242 if (fwrite(obuf, 1, OBUF - no, ofile) < r) {
243 error_msg( "write error\n");
246 if (ni) { /* still bytes in buffer! */
247 memmove(ibuf, pi, ni);
251 if (ferror(ifile)) {
252 error_msg( "read error\n");
255 ++argv;
257 if (ifile != stdin) {
258 fclose(ifile);
261 } while (--argc > 0);
263 iconv_close(ic);
265 if (ni) {
266 error_msg( "incomplete sequence\n");
269 return (((_UC_iconv_t *) ic)->skip_invalid_input < 2)
270 ? EXIT_SUCCESS : EXIT_FAILURE;