Update.
[glibc.git] / iconv / iconv_prog.c
blobc5910a55851a3e8f2e25e7490df653b5150fca33
1 /* Convert text in given files from the specified from-set to the to-set.
2 Copyright (C) 1998 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
21 #include <argp.h>
22 #include <errno.h>
23 #include <error.h>
24 #include <fcntl.h>
25 #include <iconv.h>
26 #include <locale.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <sys/mman.h>
33 /* Get libc version number. */
34 #include "../version.h"
36 #define PACKAGE _libc_intl_domainname
39 /* Name and version of program. */
40 static void print_version (FILE *stream, struct argp_state *state);
41 void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version;
43 #define OPT_VERBOSE 1000
45 /* Definitions of arguments for argp functions. */
46 static const struct argp_option options[] =
48 { NULL, 0, NULL, 0, N_("Input/Output format specification:") },
49 { "from-code", 'f', "NAME", 0, N_("encoding of original text") },
50 { "to-code", 't', "NAME", 0, N_("encoding for output") },
51 { NULL, 0, NULL, 0, N_("Output control:") },
52 { "output", 'o', "FILE", 0, N_("output file") },
53 { "verbose", OPT_VERBOSE, NULL, 0, N_("print progress information") },
54 { NULL, 0, NULL, 0, NULL }
57 /* Short description of program. */
58 static const char doc[] = N_("\
59 Convert encoding of given files from one encoding to another.");
61 /* Strings for arguments in help texts. */
62 static const char args_doc[] = N_("[FILE...]");
64 /* Prototype for option handler. */
65 static error_t parse_opt __P ((int key, char *arg, struct argp_state *state));
67 /* Function to print some extra text in the help message. */
68 static char *more_help __P ((int key, const char *text, void *input));
70 /* Data structure to communicate with argp functions. */
71 static struct argp argp =
73 options, parse_opt, args_doc, doc, NULL, more_help
76 /* Code sets to convert from and to respectively. */
77 static const char *from_code;
78 static const char *to_code;
80 /* File to write output to. If NULL write to stdout. */
81 static const char *output_file;
83 /* Nonzero if verbose ouput is wanted. */
84 static int verbose;
86 /* Prototypes for the functions doing the actual work. */
87 static int process_block (iconv_t cd, const char *addr, size_t len,
88 FILE *output);
89 static int process_fd (iconv_t cd, int fd, FILE *output);
90 static int process_file (iconv_t cd, FILE *input, FILE *output);
93 int
94 main (int argc, char *argv[])
96 int status = EXIT_SUCCESS;
97 int remaining;
98 FILE *output;
99 iconv_t cd;
101 /* Set locale via LC_ALL. */
102 setlocale (LC_ALL, "");
104 /* Set the text message domain. */
105 textdomain (_libc_intl_domainname);
107 /* Parse and process arguments. */
108 argp_parse (&argp, argc, argv, 0, &remaining, NULL);
110 /* If either the from- or to-code is not specified this is an error
111 since we do not know what to do. */
112 if (from_code == NULL && to_code == NULL)
113 error (EXIT_FAILURE, 0,
114 _("neither original nor target encoding specified"));
115 if (from_code == NULL)
116 error (EXIT_FAILURE, 0, _("original encoding not specified using `-f'"));
117 if (to_code == NULL)
118 error (EXIT_FAILURE, 0, _("target encoding not specified using `-t'"));
120 /* Let's see whether we have these coded character sets. */
121 cd = iconv_open (to_code, from_code);
122 if (cd == (iconv_t) -1)
123 if (errno == EINVAL)
124 error (EXIT_FAILURE, 0, _("conversion from `%s' to `%s' not supported"),
125 from_code, to_code);
126 else
127 error (EXIT_FAILURE, errno, _("failed to start conversion processing"));
129 /* Determine output file. */
130 if (output_file != NULL)
132 output = fopen (output_file, "w");
133 if (output == NULL)
134 error (EXIT_FAILURE, errno, _("cannot open output file"));
136 else
137 output = stdout;
139 /* Now process the remaining files. Write them to stdout or the file
140 specified with the `-o' parameter. If we have no file given as
141 the parameter process all from stdin. */
142 if (remaining == argc)
143 process_file (cd, stdin, output);
144 else
147 struct stat st;
148 const char *addr;
149 int fd = open (argv[remaining], O_RDONLY);
151 if (verbose)
152 printf ("%s:\n", argv[remaining]);
154 if (fd == -1)
156 error (0, errno, _("cannot open input file `%s'"),
157 argv[remaining]);
158 status = EXIT_FAILURE;
159 continue;
162 /* We have possibilities for reading the input file. First try
163 to mmap() it since this will provide the fastest solution. */
164 if (fstat (fd, &st) == 0
165 && ((addr = mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0))
166 != MAP_FAILED))
168 /* Yes, we can use mmap(). The descriptor is not needed
169 anymore. */
170 if (close (fd) != 0)
171 error (EXIT_FAILURE, errno, _("error while closing input `%s'"),
172 argv[remaining]);
174 if (process_block (cd, addr, st.st_size, stdout) < 0)
176 /* Something went wrong. */
177 status = EXIT_FAILURE;
179 /* We don't need the input data anymore. */
180 munmap ((void *) addr, st.st_size);
182 /* We cannot go on with producing output since it might
183 lead to problem because the last output might leave
184 the output stream in an undefined state. */
185 break;
188 /* We don't need the input data anymore. */
189 munmap ((void *) addr, st.st_size);
191 else
193 /* Read the file in pieces. */
194 if (process_fd (cd, fd, output) != 0)
196 /* Something went wrong. */
197 status = EXIT_FAILURE;
199 /* We don't need the input file anymore. */
200 close (fd);
202 /* We cannot go on with producing output since it might
203 lead to problem because the last output might leave
204 the output stream in an undefined state. */
205 break;
208 /* Now close the file. */
209 close (fd);
212 while (++remaining < argc);
214 /* Close the output file now. */
215 if (fclose (output))
216 error (EXIT_FAILURE, errno, _("error while closing output file"));
218 return status;
222 /* Handle program arguments. */
223 static error_t
224 parse_opt (int key, char *arg, struct argp_state *state)
226 switch (key)
228 case 'f':
229 from_code = arg;
230 break;
231 case 't':
232 to_code = arg;
233 break;
234 case 'o':
235 output_file = arg;
236 break;
237 case OPT_VERBOSE:
238 verbose = 1;
239 break;
240 default:
241 return ARGP_ERR_UNKNOWN;
243 return 0;
247 static char *
248 more_help (int key, const char *text, void *input)
250 switch (key)
252 case ARGP_KEY_HELP_EXTRA:
253 /* We print some extra information. */
254 return strdup (gettext ("\
255 Report bugs using the `glibcbug' script to <bugs@gnu.org>.\n"));
256 default:
257 break;
259 return (char *) text;
263 /* Print the version information. */
264 static void
265 print_version (FILE *stream, struct argp_state *state)
267 fprintf (stream, "iconv (GNU %s) %s\n", PACKAGE, VERSION);
268 fprintf (stream, gettext ("\
269 Copyright (C) %s Free Software Foundation, Inc.\n\
270 This is free software; see the source for copying conditions. There is NO\n\
271 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
272 "), "1998");
273 fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
277 static int
278 process_block (iconv_t cd, const char *addr, size_t len, FILE *output)
280 #define OUTBUF_SIZE 32768
281 char outbuf[OUTBUF_SIZE];
282 char *outptr = outbuf;
283 size_t outlen = OUTBUF_SIZE;
285 while (len > 0)
287 size_t n = iconv (cd, &addr, &len, &outptr, &outlen);
289 if (outptr != outbuf)
291 /* We have something to write out. */
292 if (fwrite (outbuf, 1, outptr - outbuf, output) != 0
293 || ferror (output))
295 /* Error occurred while printing the result. */
296 error (0, 0, _("\
297 conversion stopped due to problem in writing the output"));
298 return -1;
302 if (n != (size_t) -1)
303 /* Everything is processed. */
304 break;
306 if (errno != E2BIG)
308 /* iconv() ran into a problem. */
309 switch (errno)
311 case EILSEQ:
312 error (0, 0, _("illegal input sequence"));
313 break;
314 case EINVAL:
315 error (0, 0, _("\
316 incomplete character or shift sequence at end of buffer"));
317 break;
318 case EBADF:
319 error (0, 0, _("internal error (illegal descriptor)"));
320 break;
321 default:
322 error (0, 0, _("unknown iconv() error %d"), errno);
323 break;
326 return -1;
330 return 0;
334 static int
335 process_fd (iconv_t cd, int fd, FILE *output)
337 /* we have a problem with reading from a desriptor since we must not
338 provide the iconv() function an incomplete character or shift
339 sequence at the end of the buffer. Since we have to deal with
340 arbitrary encodings we must read the whole text in a buffer and
341 process it in one step. */
342 static char *inbuf = NULL;
343 static size_t maxlen = 0;
344 char *inptr = NULL;
345 size_t actlen = 0;
347 while (actlen < maxlen)
349 size_t n = read (fd, inptr, maxlen - actlen);
351 if (n == 0)
352 /* No more text to read. */
353 break;
355 if (n == -1)
357 /* Error while reading. */
358 error (0, errno, _("error while reading the input"));
359 return -1;
362 inptr += n;
363 actlen += n;
366 if (actlen == maxlen)
367 while (1)
369 size_t n;
371 /* Increase the buffer. */
372 maxlen += 32768;
373 inbuf = realloc (inbuf, maxlen);
374 if (inbuf == NULL)
375 error (0, errno, _("unable to allocate buffer for input"));
376 inptr = inbuf + actlen;
380 n = read (fd, inptr, maxlen - actlen);
382 if (n == 0)
383 /* No more text to read. */
384 break;
386 if (n == -1)
388 /* Error while reading. */
389 error (0, errno, _("error while reading the input"));
390 return -1;
393 inptr += n;
394 actlen += n;
396 while (actlen < maxlen);
398 if (n == 0)
399 /* Break again so we leave both loops. */
400 break;
403 /* Now we have all the input in the buffer. Process it in one run. */
404 return process_block (cd, inbuf, actlen, output);
408 static int
409 process_file (iconv_t cd, FILE *input, FILE *output)
411 /* This should be safe since we use this function only for `stdin' and
412 we haven't read anything so far. */
413 return process_fd (cd, fileno (input), output);