Check for an up-to-date copyright year in coreutils.texi.
[coreutils/ericb.git] / src / unexpand.c
blob051fe72e48b092421503b609fec0e118ad3113ce
1 /* unexpand - convert blanks to tabs
2 Copyright (C) 89, 91, 1995-2006 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18 /* By default, convert only maximal strings of initial blanks and tabs
19 into tabs.
20 Preserves backspace characters in the output; they decrement the
21 column count for tab calculations.
22 The default action is equivalent to -8.
24 Options:
25 --tabs=tab1[,tab2[,...]]
26 -t tab1[,tab2[,...]]
27 -tab1[,tab2[,...]] If only one tab stop is given, set the tabs tab1
28 columns apart instead of the default 8. Otherwise,
29 set the tabs at columns tab1, tab2, etc. (numbered from
30 0); preserve any blanks beyond the tab stops given.
31 --all
32 -a Use tabs wherever they would replace 2 or more blanks,
33 not just at the beginnings of lines.
35 David MacKenzie <djm@gnu.ai.mit.edu> */
37 #include <config.h>
39 #include <stdio.h>
40 #include <getopt.h>
41 #include <sys/types.h>
42 #include "system.h"
43 #include "error.h"
44 #include "quote.h"
45 #include "xstrndup.h"
47 /* The official name of this program (e.g., no `g' prefix). */
48 #define PROGRAM_NAME "unexpand"
50 #define AUTHORS "David MacKenzie"
52 /* The number of bytes added at a time to the amount of memory
53 allocated for the output line. */
54 #define OUTPUT_BLOCK 256
56 /* The name this program was run with. */
57 char *program_name;
59 /* If true, convert blanks even after nonblank characters have been
60 read on the line. */
61 static bool convert_entire_line;
63 /* If nonzero, the size of all tab stops. If zero, use `tab_list' instead. */
64 static size_t tab_size;
66 /* The maximum distance between tab stops. */
67 static size_t max_column_width;
69 /* Array of the explicit column numbers of the tab stops;
70 after `tab_list' is exhausted, the rest of the line is printed
71 unchanged. The first column is column 0. */
72 static uintmax_t *tab_list;
74 /* The number of allocated entries in `tab_list'. */
75 static size_t n_tabs_allocated;
77 /* The index of the first invalid element of `tab_list',
78 where the next element can be added. */
79 static size_t first_free_tab;
81 /* Null-terminated array of input filenames. */
82 static char **file_list;
84 /* Default for `file_list' if no files are given on the command line. */
85 static char *stdin_argv[] =
87 "-", NULL
90 /* True if we have ever read standard input. */
91 static bool have_read_stdin;
93 /* The desired exit status. */
94 static int exit_status;
96 /* For long options that have no equivalent short option, use a
97 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
98 enum
100 CONVERT_FIRST_ONLY_OPTION = CHAR_MAX + 1
103 static struct option const longopts[] =
105 {"tabs", required_argument, NULL, 't'},
106 {"all", no_argument, NULL, 'a'},
107 {"first-only", no_argument, NULL, CONVERT_FIRST_ONLY_OPTION},
108 {GETOPT_HELP_OPTION_DECL},
109 {GETOPT_VERSION_OPTION_DECL},
110 {NULL, 0, NULL, 0}
113 void
114 usage (int status)
116 if (status != EXIT_SUCCESS)
117 fprintf (stderr, _("Try `%s --help' for more information.\n"),
118 program_name);
119 else
121 printf (_("\
122 Usage: %s [OPTION]... [FILE]...\n\
124 program_name);
125 fputs (_("\
126 Convert blanks in each FILE to tabs, writing to standard output.\n\
127 With no FILE, or when FILE is -, read standard input.\n\
129 "), stdout);
130 fputs (_("\
131 Mandatory arguments to long options are mandatory for short options too.\n\
132 "), stdout);
133 fputs (_("\
134 -a, --all convert all blanks, instead of just initial blanks\n\
135 --first-only convert only leading sequences of blanks (overrides -a)\n\
136 -t, --tabs=N have tabs N characters apart instead of 8 (enables -a)\n\
137 -t, --tabs=LIST use comma separated LIST of tab positions (enables -a)\n\
138 "), stdout);
139 fputs (HELP_OPTION_DESCRIPTION, stdout);
140 fputs (VERSION_OPTION_DESCRIPTION, stdout);
141 emit_bug_reporting_address ();
143 exit (status);
146 /* Add tab stop TABVAL to the end of `tab_list'. */
148 static void
149 add_tab_stop (uintmax_t tabval)
151 uintmax_t prev_column = first_free_tab ? tab_list[first_free_tab - 1] : 0;
152 uintmax_t column_width = prev_column <= tabval ? tabval - prev_column : 0;
154 if (first_free_tab == n_tabs_allocated)
155 tab_list = X2NREALLOC (tab_list, &n_tabs_allocated);
156 tab_list[first_free_tab++] = tabval;
158 if (max_column_width < column_width)
160 if (SIZE_MAX < column_width)
161 error (EXIT_FAILURE, 0, _("tabs are too far apart"));
162 max_column_width = column_width;
166 /* Add the comma or blank separated list of tab stops STOPS
167 to the list of tab stops. */
169 static void
170 parse_tab_stops (char const *stops)
172 bool have_tabval = false;
173 uintmax_t tabval IF_LINT (= 0);
174 char const *num_start IF_LINT (= NULL);
175 bool ok = true;
177 for (; *stops; stops++)
179 if (*stops == ',' || isblank (to_uchar (*stops)))
181 if (have_tabval)
182 add_tab_stop (tabval);
183 have_tabval = false;
185 else if (ISDIGIT (*stops))
187 if (!have_tabval)
189 tabval = 0;
190 have_tabval = true;
191 num_start = stops;
194 /* Detect overflow. */
195 if (!DECIMAL_DIGIT_ACCUMULATE (tabval, *stops - '0', uintmax_t))
197 size_t len = strspn (num_start, "0123456789");
198 char *bad_num = xstrndup (num_start, len);
199 error (0, 0, _("tab stop is too large %s"), quote (bad_num));
200 free (bad_num);
201 ok = false;
202 stops = num_start + len - 1;
205 else
207 error (0, 0, _("tab size contains invalid character(s): %s"),
208 quote (stops));
209 ok = false;
210 break;
214 if (!ok)
215 exit (EXIT_FAILURE);
217 if (have_tabval)
218 add_tab_stop (tabval);
221 /* Check that the list of tab stops TABS, with ENTRIES entries,
222 contains only nonzero, ascending values. */
224 static void
225 validate_tab_stops (uintmax_t const *tabs, size_t entries)
227 uintmax_t prev_tab = 0;
228 size_t i;
230 for (i = 0; i < entries; i++)
232 if (tabs[i] == 0)
233 error (EXIT_FAILURE, 0, _("tab size cannot be 0"));
234 if (tabs[i] <= prev_tab)
235 error (EXIT_FAILURE, 0, _("tab sizes must be ascending"));
236 prev_tab = tabs[i];
240 /* Close the old stream pointer FP if it is non-NULL,
241 and return a new one opened to read the next input file.
242 Open a filename of `-' as the standard input.
243 Return NULL if there are no more input files. */
245 static FILE *
246 next_file (FILE *fp)
248 static char *prev_file;
249 char *file;
251 if (fp)
253 if (ferror (fp))
255 error (0, errno, "%s", prev_file);
256 exit_status = EXIT_FAILURE;
258 if (STREQ (prev_file, "-"))
259 clearerr (fp); /* Also clear EOF. */
260 else if (fclose (fp) != 0)
262 error (0, errno, "%s", prev_file);
263 exit_status = EXIT_FAILURE;
267 while ((file = *file_list++) != NULL)
269 if (STREQ (file, "-"))
271 have_read_stdin = true;
272 prev_file = file;
273 return stdin;
275 fp = fopen (file, "r");
276 if (fp)
278 prev_file = file;
279 return fp;
281 error (0, errno, "%s", file);
282 exit_status = EXIT_FAILURE;
284 return NULL;
287 /* Change blanks to tabs, writing to stdout.
288 Read each file in `file_list', in order. */
290 static void
291 unexpand (void)
293 /* Input stream. */
294 FILE *fp = next_file (NULL);
296 /* The array of pending blanks. In non-POSIX locales, blanks can
297 include characters other than spaces, so the blanks must be
298 stored, not merely counted. */
299 char *pending_blank;
301 if (!fp)
302 return;
304 /* The worst case is a non-blank character, then one blank, then a
305 tab stop, then MAX_COLUMN_WIDTH - 1 blanks, then a non-blank; so
306 allocate MAX_COLUMN_WIDTH bytes to store the blanks. */
307 pending_blank = xmalloc (max_column_width);
309 for (;;)
311 /* Input character, or EOF. */
312 int c;
314 /* If true, perform translations. */
315 bool convert = true;
318 /* The following variables have valid values only when CONVERT
319 is true: */
321 /* Column of next input character. */
322 uintmax_t column = 0;
324 /* Column the next input tab stop is on. */
325 uintmax_t next_tab_column = 0;
327 /* Index in TAB_LIST of next tab stop to examine. */
328 size_t tab_index = 0;
330 /* If true, the first pending blank came just before a tab stop. */
331 bool one_blank_before_tab_stop = false;
333 /* If true, the previous input character was a blank. This is
334 initially true, since initial strings of blanks are treated
335 as if the line was preceded by a blank. */
336 bool prev_blank = true;
338 /* Number of pending columns of blanks. */
339 size_t pending = 0;
342 /* Convert a line of text. */
346 while ((c = getc (fp)) < 0 && (fp = next_file (fp)))
347 continue;
349 if (convert)
351 bool blank = !! isblank (c);
353 if (blank)
355 if (next_tab_column <= column)
357 if (tab_size)
358 next_tab_column =
359 column + (tab_size - column % tab_size);
360 else
361 for (;;)
362 if (tab_index == first_free_tab)
364 convert = false;
365 break;
367 else
369 uintmax_t tab = tab_list[tab_index++];
370 if (column < tab)
372 next_tab_column = tab;
373 break;
378 if (convert)
380 if (next_tab_column < column)
381 error (EXIT_FAILURE, 0, _("input line is too long"));
383 if (c == '\t')
385 column = next_tab_column;
387 /* Discard pending blanks, unless it was a single
388 blank just before the previous tab stop. */
389 if (! (pending == 1 && one_blank_before_tab_stop))
391 pending = 0;
392 one_blank_before_tab_stop = false;
395 else
397 column++;
399 if (! (prev_blank && column == next_tab_column))
401 /* It is not yet known whether the pending blanks
402 will be replaced by tabs. */
403 if (column == next_tab_column)
404 one_blank_before_tab_stop = true;
405 pending_blank[pending++] = c;
406 prev_blank = true;
407 continue;
410 /* Replace the pending blanks by a tab or two. */
411 pending_blank[0] = c = '\t';
412 pending = one_blank_before_tab_stop;
416 else if (c == '\b')
418 /* Go back one column, and force recalculation of the
419 next tab stop. */
420 column -= !!column;
421 next_tab_column = column;
422 tab_index -= !!tab_index;
424 else
426 column++;
427 if (!column)
428 error (EXIT_FAILURE, 0, _("input line is too long"));
431 if (pending)
433 if (fwrite (pending_blank, 1, pending, stdout) != pending)
434 error (EXIT_FAILURE, errno, _("write error"));
435 pending = 0;
436 one_blank_before_tab_stop = false;
439 prev_blank = blank;
440 convert &= convert_entire_line | blank;
443 if (c < 0)
445 free (pending_blank);
446 return;
449 if (putchar (c) < 0)
450 error (EXIT_FAILURE, errno, _("write error"));
452 while (c != '\n');
457 main (int argc, char **argv)
459 bool have_tabval = false;
460 uintmax_t tabval IF_LINT (= 0);
461 int c;
463 /* If true, cancel the effect of any -a (explicit or implicit in -t),
464 so that only leading blanks will be considered. */
465 bool convert_first_only = false;
467 initialize_main (&argc, &argv);
468 program_name = argv[0];
469 setlocale (LC_ALL, "");
470 bindtextdomain (PACKAGE, LOCALEDIR);
471 textdomain (PACKAGE);
473 atexit (close_stdout);
475 have_read_stdin = false;
476 exit_status = EXIT_SUCCESS;
477 convert_entire_line = false;
478 tab_list = NULL;
479 first_free_tab = 0;
481 while ((c = getopt_long (argc, argv, ",0123456789at:", longopts, NULL))
482 != -1)
484 switch (c)
486 case '?':
487 usage (EXIT_FAILURE);
488 case 'a':
489 convert_entire_line = true;
490 break;
491 case 't':
492 convert_entire_line = true;
493 parse_tab_stops (optarg);
494 break;
495 case CONVERT_FIRST_ONLY_OPTION:
496 convert_first_only = true;
497 break;
498 case ',':
499 if (have_tabval)
500 add_tab_stop (tabval);
501 have_tabval = false;
502 break;
503 case_GETOPT_HELP_CHAR;
504 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
505 default:
506 if (!have_tabval)
508 tabval = 0;
509 have_tabval = true;
511 if (!DECIMAL_DIGIT_ACCUMULATE (tabval, c - '0', uintmax_t))
512 error (EXIT_FAILURE, 0, _("tab stop value is too large"));
513 break;
517 if (convert_first_only)
518 convert_entire_line = false;
520 if (have_tabval)
521 add_tab_stop (tabval);
523 validate_tab_stops (tab_list, first_free_tab);
525 if (first_free_tab == 0)
526 tab_size = max_column_width = 8;
527 else if (first_free_tab == 1)
528 tab_size = tab_list[0];
529 else
530 tab_size = 0;
532 file_list = (optind < argc ? &argv[optind] : stdin_argv);
534 unexpand ();
536 if (have_read_stdin && fclose (stdin) != 0)
537 error (EXIT_FAILURE, errno, "-");
539 exit (exit_status);