2 /* Copyright (C) 2002, 2003, 2004 Manuel Novoa III
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Library General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Library General Public License for more details.
14 * You should have received a copy of the GNU Library General Public
15 * License along with this library; if not, write to the Free
16 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 /* ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION!
21 * Besides uClibc, I'm using this code in my libc for elks, which is
22 * a 16-bit environment with a fairly limited compiler. It would make
23 * things much easier for me if this file isn't modified unnecessarily.
24 * In particular, please put any new or replacement functions somewhere
25 * else, and modify the makefile to use your version instead.
28 * ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION! */
31 /* May 23, 2002 Initial Notes:
33 * I'm still tweaking this stuff, but it passes the tests I've thrown
34 * at it, and Erik needs it for the gcc port. The glibc extension
35 * __wcsnrtombs() hasn't been tested, as I didn't find a test for it
36 * in the glibc source. I also need to fix the behavior of
37 * _wchar_utf8sntowcs() if the max number of wchars to convert is 0.
39 * UTF-8 -> wchar -> UTF-8 conversion tests on Markus Kuhn's UTF-8-demo.txt
40 * file on my platform (x86) show about 5-10% faster conversion speed than
41 * glibc with mbsrtowcs()/wcsrtombs() and almost twice as fast as glibc with
42 * individual mbrtowc()/wcrtomb() calls.
44 * If 'DECODER' is defined, then _wchar_utf8sntowcs() will be compiled
45 * as a fail-safe UTF-8 decoder appropriate for a terminal, etc. which
46 * needs to deal gracefully with whatever is sent to it. In that mode,
47 * it passes Markus Kuhn's UTF-8-test.txt stress test. I plan to add
48 * an arg to force that behavior, so the interface will be changing.
50 * I need to fix the error checking for 16-bit wide chars. This isn't
51 * an issue for uClibc, but may be for ELKS. I'm currently not sure
52 * if I'll use 16-bit, 32-bit, or configureable wchars in ELKS.
56 * Fixed _wchar_utf8sntowcs() for the max number of wchars == 0 case.
57 * Fixed nul-char bug in btowc(), and another in __mbsnrtowcs() for 8-bit
59 * Enabled building of a C/POSIX-locale-only version, so full locale support
60 * no longer needs to be enabled.
64 * Fixed a bug in _wchar_wcsntoutf8s(). Don't store wcs position if dst is NULL.
65 * Also, introduce an awful hack into _wchar_wcsntoutf8s() and wcsrtombs() in
66 * order to support %ls in printf. See comments below for details.
67 * Change behaviour of wc<->mb functions when in the C locale. Now they do
68 * a 1-1 map for the range 0x80-UCHAR_MAX. This is for backwards compatibility
69 * and consistency with the stds requirements that a printf format string by
70 * a valid multibyte string beginning and ending in it's initial shift state.
74 * Forgot to change btowc and wctob when I changed the wc<->mb functions yesterday.
78 * Add wcwidth and wcswidth, based on Markus Kuhn's wcwidth of 2002-05-08.
79 * Added some size/speed optimizations and integrated it into my locale
80 * framework. Minimally tested at the moment, but the stub C-locale
81 * version (which most people would probably be using) should be fine.
85 * Revert the wc<->mb changes from earlier this month involving the C-locale.
86 * Add a couple of ugly hacks to support *wprintf.
87 * Add a mini iconv() and iconv implementation (requires locale support).
90 * Bug fix for mbrtowc.
93 * Bug fix: _wchar_utf8sntowcs and _wchar_wcsntoutf8s now set errno if EILSEQ.
96 * Bug fix: Fix size check for remaining output space in iconv().
107 #include "wchar.c" /* for _UC_iconv_t and __iconv_codesets */
109 extern const unsigned char __iconv_codesets
[];
114 static char *progname
;
115 static int hide_errors
;
117 static void error_msg(const char *fmt
, ...)
118 __attribute__ ((noreturn
, format (printf
, 1, 2)));
120 static void error_msg(const char *fmt
, ...)
125 fprintf(stderr
, "%s: ", progname
);
127 vfprintf(stderr
, fmt
, arg
);
134 int main(int argc
, char **argv
)
137 FILE *ofile
= stdout
;
140 static const char opt_chars
[] = "tfocsl";
142 const char *opts
[sizeof(opt_chars
)]; /* last is infile name */
148 size_t ni
, no
, r
, pos
;
152 for (s
= opt_chars
; *s
; s
++) {
153 opts
[ s
- opt_chars
] = NULL
;
159 if ((*p
!= '-') || (*++p
== 0)) {
163 if ((s
= strchr(opt_chars
,*p
)) == NULL
) {
165 s
= basename(progname
);
167 "%s [-cs] -f fromcode -t tocode [-o outputfile] [inputfile ...]\n"
168 " or\n%s -l\n", s
, s
);
171 if ((s
- opt_chars
) < 3) {
172 if ((--argc
== 0) || opts
[s
- opt_chars
]) {
175 opts
[s
- opt_chars
] = *++argv
;
177 opts
[s
- opt_chars
] = p
;
182 if (opts
[5]) { /* -l */
183 fprintf(stderr
, "Recognized codesets:\n");
184 for (s
= (char *)__iconv_codesets
; *s
; s
+= *s
) {
185 fprintf(stderr
," %s\n", s
+2);
187 s
= __LOCALE_DATA_CODESET_LIST
;
189 fprintf(stderr
," %s\n", __LOCALE_DATA_CODESET_LIST
+ (unsigned char)(*s
));
199 if (!opts
[0] || !opts
[1]) {
202 if ((ic
= iconv_open(opts
[0],opts
[1])) == ((iconv_t
)(-1))) {
203 error_msg( "unsupported codeset in %s -> %s conversion\n", opts
[1], opts
[0]);
205 if (opts
[3]) { /* -c */
206 ((_UC_iconv_t
*) ic
)->skip_invalid_input
= 1;
209 if ((s
= opts
[2]) != NULL
) {
210 if (!(ofile
= fopen(s
, "w"))) {
211 error_msg( "couldn't open %s for writing\n", s
);
217 if (!argc
|| ((**argv
== '-') && !((*argv
)[1]))) {
218 ifile
= stdin
; /* we don't check for duplicates */
219 } else if (!(ifile
= fopen(*argv
, "r"))) {
220 error_msg( "couldn't open %s for reading\n", *argv
);
223 while ((r
= fread(ibuf
+ ni
, 1, IBUF
- ni
, ifile
)) > 0) {
229 if ((r
= iconv(ic
, &pi
, &ni
, &po
, &no
)) == ((size_t)(-1))) {
230 if ((errno
!= EINVAL
) && (errno
!= E2BIG
)) {
231 error_msg( "iconv failed at pos %lu : %m\n", (unsigned long) (pos
- ni
));
234 if ((r
= OBUF
- no
) > 0) {
235 if (fwrite(obuf
, 1, OBUF
- no
, ofile
) < r
) {
236 error_msg( "write error\n");
239 if (ni
) { /* still bytes in buffer! */
240 memmove(ibuf
, pi
, ni
);
245 error_msg( "read error\n");
250 if (ifile
!= stdin
) {
254 } while (--argc
> 0);
259 error_msg( "incomplete sequence\n");
262 return (((_UC_iconv_t
*) ic
)->skip_invalid_input
< 2)
263 ? EXIT_SUCCESS
: EXIT_FAILURE
;