Add some more ulps.
[glibc/pb-stable.git] / iconv / iconv_prog.c
blob1470721893feef74ac86ed66c2e0ddf86220568b
1 /* Convert text in given files from the specified from-set to the to-set.
2 Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
21 #include <argp.h>
22 #include <ctype.h>
23 #include <errno.h>
24 #include <error.h>
25 #include <fcntl.h>
26 #include <iconv.h>
27 #include <locale.h>
28 #include <search.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <unistd.h>
33 #include <libintl.h>
34 #ifdef _POSIX_MAPPED_FILES
35 # include <sys/mman.h>
36 #endif
37 #include <gconv_int.h>
39 /* Get libc version number. */
40 #include "../version.h"
42 #define PACKAGE _libc_intl_domainname
45 /* Name and version of program. */
46 static void print_version (FILE *stream, struct argp_state *state);
47 void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version;
49 #define OPT_VERBOSE 1000
50 #define OPT_LIST 'l'
52 /* Definitions of arguments for argp functions. */
53 static const struct argp_option options[] =
55 { NULL, 0, NULL, 0, N_("Input/Output format specification:") },
56 { "from-code", 'f', "NAME", 0, N_("encoding of original text") },
57 { "to-code", 't', "NAME", 0, N_("encoding for output") },
58 { NULL, 0, NULL, 0, N_("Information:") },
59 { "list", 'l', NULL, 0, N_("list all known coded character sets") },
60 { NULL, 0, NULL, 0, N_("Output control:") },
61 { NULL, 'c', NULL, 0, N_("omit invalid characters from output") },
62 { "output", 'o', "FILE", 0, N_("output file") },
63 { "silent", 's', NULL, 0, N_("suppress warnings") },
64 { "verbose", OPT_VERBOSE, NULL, 0, N_("print progress information") },
65 { NULL, 0, NULL, 0, NULL }
68 /* Short description of program. */
69 static const char doc[] = N_("\
70 Convert encoding of given files from one encoding to another.");
72 /* Strings for arguments in help texts. */
73 static const char args_doc[] = N_("[FILE...]");
75 /* Prototype for option handler. */
76 static error_t parse_opt (int key, char *arg, struct argp_state *state);
78 /* Function to print some extra text in the help message. */
79 static char *more_help (int key, const char *text, void *input);
81 /* Data structure to communicate with argp functions. */
82 static struct argp argp =
84 options, parse_opt, args_doc, doc, NULL, more_help
87 /* Code sets to convert from and to respectively. */
88 static const char *from_code;
89 static const char *to_code;
91 /* File to write output to. If NULL write to stdout. */
92 static const char *output_file;
94 /* Nonzero if verbose ouput is wanted. */
95 static int verbose;
97 /* Nonzero if list of all coded character sets is wanted. */
98 static int list;
100 /* If nonzero omit invalid character from output. */
101 static int omit_invalid;
103 /* Prototypes for the functions doing the actual work. */
104 static int process_block (iconv_t cd, char *addr, size_t len, FILE *output);
105 static int process_fd (iconv_t cd, int fd, FILE *output);
106 static int process_file (iconv_t cd, FILE *input, FILE *output);
107 static void print_known_names (void) internal_function;
111 main (int argc, char *argv[])
113 int status = EXIT_SUCCESS;
114 int remaining;
115 FILE *output;
116 iconv_t cd;
117 const char *orig_to_code;
119 /* Set locale via LC_ALL. */
120 setlocale (LC_ALL, "");
122 /* Set the text message domain. */
123 textdomain (_libc_intl_domainname);
125 /* Parse and process arguments. */
126 argp_parse (&argp, argc, argv, 0, &remaining, NULL);
128 /* List all coded character sets if wanted. */
129 if (list)
131 print_known_names ();
132 exit (EXIT_SUCCESS);
135 /* If either the from- or to-code is not specified this is an error
136 since we do not know what to do. */
137 if (from_code == NULL && to_code == NULL)
138 error (EXIT_FAILURE, 0,
139 _("neither original nor target encoding specified"));
140 if (from_code == NULL)
141 error (EXIT_FAILURE, 0, _("original encoding not specified using `-f'"));
142 if (to_code == NULL)
143 error (EXIT_FAILURE, 0, _("target encoding not specified using `-t'"));
145 /* If we have to ignore errors make sure we use the appropriate name for
146 the to-character-set. */
147 orig_to_code = to_code;
148 if (omit_invalid)
150 const char *errhand = strchrnul (to_code, '/');
151 int nslash = 2;
152 char *newp;
153 char *cp;
155 if (*errhand == '/')
157 --nslash;
158 errhand = strchrnul (errhand, '/');
160 if (*errhand == '/')
162 --nslash;
163 ++errhand;
167 newp = (char *) alloca (errhand - to_code + nslash + 6 + 1);
168 cp = mempcpy (newp, to_code, errhand - to_code);
169 while (nslash-- > 0)
170 *cp++ = '/';
171 memcpy (cp, "NEEDED", sizeof ("NEEDED"));
173 to_code = newp;
176 /* Let's see whether we have these coded character sets. */
177 cd = iconv_open (to_code, from_code);
178 if (cd == (iconv_t) -1)
180 if (errno == EINVAL)
181 error (EXIT_FAILURE, 0,
182 _("conversion from `%s' to `%s' not supported"),
183 from_code, orig_to_code);
184 else
185 error (EXIT_FAILURE, errno,
186 _("failed to start conversion processing"));
189 /* Determine output file. */
190 if (output_file != NULL)
192 output = fopen (output_file, "w");
193 if (output == NULL)
194 error (EXIT_FAILURE, errno, _("cannot open output file"));
196 else
197 output = stdout;
199 /* Now process the remaining files. Write them to stdout or the file
200 specified with the `-o' parameter. If we have no file given as
201 the parameter process all from stdin. */
202 if (remaining == argc)
204 if (process_file (cd, stdin, output) != 0)
205 status = EXIT_FAILURE;
207 else
210 struct stat st;
211 char *addr;
212 int fd;
215 if (verbose)
216 printf ("%s:\n", argv[remaining]);
217 if (strcmp (argv[remaining], "-") == 0)
218 fd = 0;
219 else
221 fd = open (argv[remaining], O_RDONLY);
223 if (fd == -1)
225 error (0, errno, _("cannot open input file `%s'"),
226 argv[remaining]);
227 status = EXIT_FAILURE;
228 continue;
232 #ifdef _POSIX_MAPPED_FILES
233 /* We have possibilities for reading the input file. First try
234 to mmap() it since this will provide the fastest solution. */
235 if (fstat (fd, &st) == 0
236 && ((addr = mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0))
237 != MAP_FAILED))
239 /* Yes, we can use mmap(). The descriptor is not needed
240 anymore. */
241 if (close (fd) != 0)
242 error (EXIT_FAILURE, errno, _("error while closing input `%s'"),
243 argv[remaining]);
245 if (process_block (cd, addr, st.st_size, output) < 0)
247 /* Something went wrong. */
248 status = EXIT_FAILURE;
250 /* We don't need the input data anymore. */
251 munmap ((void *) addr, st.st_size);
253 /* We cannot go on with producing output since it might
254 lead to problem because the last output might leave
255 the output stream in an undefined state. */
256 break;
259 /* We don't need the input data anymore. */
260 munmap ((void *) addr, st.st_size);
262 else
263 #endif /* _POSIX_MAPPED_FILES */
265 /* Read the file in pieces. */
266 if (process_fd (cd, fd, output) != 0)
268 /* Something went wrong. */
269 status = EXIT_FAILURE;
271 /* We don't need the input file anymore. */
272 close (fd);
274 /* We cannot go on with producing output since it might
275 lead to problem because the last output might leave
276 the output stream in an undefined state. */
277 break;
280 /* Now close the file. */
281 close (fd);
284 while (++remaining < argc);
286 /* Close the output file now. */
287 if (fclose (output))
288 error (EXIT_FAILURE, errno, _("error while closing output file"));
290 return status;
294 /* Handle program arguments. */
295 static error_t
296 parse_opt (int key, char *arg, struct argp_state *state)
298 switch (key)
300 case 'f':
301 from_code = arg;
302 break;
303 case 't':
304 to_code = arg;
305 break;
306 case 'o':
307 output_file = arg;
308 break;
309 case 's':
310 /* Nothing, for now at least. We are not giving out any information
311 about missing character or so. */
312 break;
313 case 'c':
314 /* Omit invalid characters from output. */
315 omit_invalid = 1;
316 break;
317 case OPT_VERBOSE:
318 verbose = 1;
319 break;
320 case OPT_LIST:
321 list = 1;
322 break;
323 default:
324 return ARGP_ERR_UNKNOWN;
326 return 0;
330 static char *
331 more_help (int key, const char *text, void *input)
333 switch (key)
335 case ARGP_KEY_HELP_EXTRA:
336 /* We print some extra information. */
337 return strdup (gettext ("\
338 Report bugs using the `glibcbug' script to <bugs@gnu.org>.\n"));
339 default:
340 break;
342 return (char *) text;
346 /* Print the version information. */
347 static void
348 print_version (FILE *stream, struct argp_state *state)
350 fprintf (stream, "iconv (GNU %s) %s\n", PACKAGE, VERSION);
351 fprintf (stream, gettext ("\
352 Copyright (C) %s Free Software Foundation, Inc.\n\
353 This is free software; see the source for copying conditions. There is NO\n\
354 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
355 "), "2000");
356 fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
360 static int
361 process_block (iconv_t cd, char *addr, size_t len, FILE *output)
363 #define OUTBUF_SIZE 32768
364 const char *start = addr;
365 char outbuf[OUTBUF_SIZE];
366 char *outptr;
367 size_t outlen;
368 size_t n;
370 while (len > 0)
372 outptr = outbuf;
373 outlen = OUTBUF_SIZE;
374 n = iconv (cd, &addr, &len, &outptr, &outlen);
376 if (outptr != outbuf)
378 /* We have something to write out. */
379 int errno_save = errno;
381 if (fwrite (outbuf, 1, outptr - outbuf, output) < outptr - outbuf
382 || ferror (output))
384 /* Error occurred while printing the result. */
385 error (0, 0, _("\
386 conversion stopped due to problem in writing the output"));
387 return -1;
390 errno = errno_save;
393 if (n != (size_t) -1)
395 /* All the input test is processed. For state-dependent
396 character sets we have to flush the state now. */
397 outptr = outbuf;
398 outlen = OUTBUF_SIZE;
399 n = iconv (cd, NULL, NULL, &outptr, &outlen);
401 if (outptr != outbuf)
403 /* We have something to write out. */
404 int errno_save = errno;
406 if (fwrite (outbuf, 1, outptr - outbuf, output) < outptr - outbuf
407 || ferror (output))
409 /* Error occurred while printing the result. */
410 error (0, 0, _("\
411 conversion stopped due to problem in writing the output"));
412 return -1;
415 errno = errno_save;
418 break;
421 if (errno != E2BIG)
423 /* iconv() ran into a problem. */
424 switch (errno)
426 case EILSEQ:
427 error (0, 0, _("illegal input sequence at position %ld"),
428 (long) (addr - start));
429 break;
430 case EINVAL:
431 error (0, 0, _("\
432 incomplete character or shift sequence at end of buffer"));
433 break;
434 case EBADF:
435 error (0, 0, _("internal error (illegal descriptor)"));
436 break;
437 default:
438 error (0, 0, _("unknown iconv() error %d"), errno);
439 break;
442 return -1;
446 return 0;
450 static int
451 process_fd (iconv_t cd, int fd, FILE *output)
453 /* we have a problem with reading from a desriptor since we must not
454 provide the iconv() function an incomplete character or shift
455 sequence at the end of the buffer. Since we have to deal with
456 arbitrary encodings we must read the whole text in a buffer and
457 process it in one step. */
458 static char *inbuf = NULL;
459 static size_t maxlen = 0;
460 char *inptr = NULL;
461 size_t actlen = 0;
463 while (actlen < maxlen)
465 size_t n = read (fd, inptr, maxlen - actlen);
467 if (n == 0)
468 /* No more text to read. */
469 break;
471 if (n == -1)
473 /* Error while reading. */
474 error (0, errno, _("error while reading the input"));
475 return -1;
478 inptr += n;
479 actlen += n;
482 if (actlen == maxlen)
483 while (1)
485 size_t n;
487 /* Increase the buffer. */
488 maxlen += 32768;
489 inbuf = realloc (inbuf, maxlen);
490 if (inbuf == NULL)
491 error (0, errno, _("unable to allocate buffer for input"));
492 inptr = inbuf + actlen;
496 n = read (fd, inptr, maxlen - actlen);
498 if (n == 0)
499 /* No more text to read. */
500 break;
502 if (n == -1)
504 /* Error while reading. */
505 error (0, errno, _("error while reading the input"));
506 return -1;
509 inptr += n;
510 actlen += n;
512 while (actlen < maxlen);
514 if (n == 0)
515 /* Break again so we leave both loops. */
516 break;
519 /* Now we have all the input in the buffer. Process it in one run. */
520 return process_block (cd, inbuf, actlen, output);
524 static int
525 process_file (iconv_t cd, FILE *input, FILE *output)
527 /* This should be safe since we use this function only for `stdin' and
528 we haven't read anything so far. */
529 return process_fd (cd, fileno (input), output);
533 /* Print all known character sets/encodings. */
534 static void *printlist;
535 static size_t column;
536 static int not_first;
538 static void
539 insert_print_list (const void *nodep, VISIT value, int level)
541 if (value == leaf || value == postorder)
543 const struct gconv_alias *s = *(const struct gconv_alias **) nodep;
544 tsearch (s->fromname, &printlist, (__compar_fn_t) strverscmp);
548 static void
549 do_print (const void *nodep, VISIT value, int level)
551 if (value == leaf || value == postorder)
553 const char *s = *(const char **) nodep;
554 size_t len = strlen (s);
555 size_t cnt;
557 while (len > 0 && s[len - 1] == '/')
558 --len;
560 for (cnt = 0; cnt < len; ++cnt)
561 if (isalnum (s[cnt]))
562 break;
563 if (cnt == len)
564 return;
566 if (not_first)
568 putchar (',');
569 ++column;
571 if (column > 2 && column + len > 77)
573 fputs ("\n ", stdout);
574 column = 2;
576 else
578 putchar (' ');
579 ++column;
582 else
583 not_first = 1;
585 fwrite (s, len, 1, stdout);
586 column += len;
590 static void
591 internal_function
592 add_known_names (struct gconv_module *node)
594 if (node->left != NULL)
595 add_known_names (node->left);
596 if (node->right != NULL)
597 add_known_names (node->right);
600 if (strcmp (node->from_string, "INTERNAL"))
601 tsearch (node->from_string, &printlist,
602 (__compar_fn_t) strverscmp);
603 if (strcmp (node->to_string, "INTERNAL"))
604 tsearch (node->to_string, &printlist, (__compar_fn_t) strverscmp);
606 node = node->same;
608 while (node != NULL);
611 static void
612 internal_function
613 print_known_names (void)
615 iconv_t h;
617 /* We must initialize the internal databases first. */
618 h = iconv_open ("L1", "L1");
619 iconv_close (h);
621 /* First add the aliases. */
622 twalk (__gconv_alias_db, insert_print_list);
624 /* Add the from- and to-names from the known modules. */
625 add_known_names (__gconv_modules_db);
627 fputs (_("\
628 The following list contain all the coded character sets known. This does\n\
629 not necessarily mean that all combinations of these names can be used for\n\
630 the FROM and TO command line parameters. One coded character set can be\n\
631 listed with several different names (aliases).\n\n "), stdout);
633 /* Now print the collected names. */
634 column = 2;
635 twalk (printlist, do_print);
637 if (column != 0)
638 puts ("");