maint: revert "build: update gnulib submodule to latest"
[coreutils/ericb.git] / src / unexpand.c
blobe8bf3f0b70265a96130c5f8056e7382db7e55478
1 /* unexpand - convert blanks to tabs
2 Copyright (C) 1989, 1991, 1995-2006, 2008-2011 Free Software Foundation,
3 Inc.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 /* By default, convert only maximal strings of initial blanks and tabs
19 into tabs.
20 Preserves backspace characters in the output; they decrement the
21 column count for tab calculations.
22 The default action is equivalent to -8.
24 Options:
25 --tabs=tab1[,tab2[,...]]
26 -t tab1[,tab2[,...]]
27 -tab1[,tab2[,...]] If only one tab stop is given, set the tabs tab1
28 columns apart instead of the default 8. Otherwise,
29 set the tabs at columns tab1, tab2, etc. (numbered from
30 0); preserve any blanks beyond the tab stops given.
31 --all
32 -a Use tabs wherever they would replace 2 or more blanks,
33 not just at the beginnings of lines.
35 David MacKenzie <djm@gnu.ai.mit.edu> */
37 #include <config.h>
39 #include <stdio.h>
40 #include <getopt.h>
41 #include <sys/types.h>
42 #include "system.h"
43 #include "error.h"
44 #include "fadvise.h"
45 #include "quote.h"
46 #include "xstrndup.h"
48 /* The official name of this program (e.g., no `g' prefix). */
49 #define PROGRAM_NAME "unexpand"
51 #define AUTHORS proper_name ("David MacKenzie")
53 /* If true, convert blanks even after nonblank characters have been
54 read on the line. */
55 static bool convert_entire_line;
57 /* If nonzero, the size of all tab stops. If zero, use `tab_list' instead. */
58 static size_t tab_size;
60 /* The maximum distance between tab stops. */
61 static size_t max_column_width;
63 /* Array of the explicit column numbers of the tab stops;
64 after `tab_list' is exhausted, the rest of the line is printed
65 unchanged. The first column is column 0. */
66 static uintmax_t *tab_list;
68 /* The number of allocated entries in `tab_list'. */
69 static size_t n_tabs_allocated;
71 /* The index of the first invalid element of `tab_list',
72 where the next element can be added. */
73 static size_t first_free_tab;
75 /* Null-terminated array of input filenames. */
76 static char **file_list;
78 /* Default for `file_list' if no files are given on the command line. */
79 static char *stdin_argv[] =
81 (char *) "-", NULL
84 /* True if we have ever read standard input. */
85 static bool have_read_stdin;
87 /* The desired exit status. */
88 static int exit_status;
90 /* For long options that have no equivalent short option, use a
91 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
92 enum
94 CONVERT_FIRST_ONLY_OPTION = CHAR_MAX + 1
97 static struct option const longopts[] =
99 {"tabs", required_argument, NULL, 't'},
100 {"all", no_argument, NULL, 'a'},
101 {"first-only", no_argument, NULL, CONVERT_FIRST_ONLY_OPTION},
102 {GETOPT_HELP_OPTION_DECL},
103 {GETOPT_VERSION_OPTION_DECL},
104 {NULL, 0, NULL, 0}
107 void
108 usage (int status)
110 if (status != EXIT_SUCCESS)
111 fprintf (stderr, _("Try `%s --help' for more information.\n"),
112 program_name);
113 else
115 printf (_("\
116 Usage: %s [OPTION]... [FILE]...\n\
118 program_name);
119 fputs (_("\
120 Convert blanks in each FILE to tabs, writing to standard output.\n\
121 With no FILE, or when FILE is -, read standard input.\n\
123 "), stdout);
124 fputs (_("\
125 Mandatory arguments to long options are mandatory for short options too.\n\
126 "), stdout);
127 fputs (_("\
128 -a, --all convert all blanks, instead of just initial blanks\n\
129 --first-only convert only leading sequences of blanks (overrides -a)\n\
130 -t, --tabs=N have tabs N characters apart instead of 8 (enables -a)\n\
131 -t, --tabs=LIST use comma separated LIST of tab positions (enables -a)\n\
132 "), stdout);
133 fputs (HELP_OPTION_DESCRIPTION, stdout);
134 fputs (VERSION_OPTION_DESCRIPTION, stdout);
135 emit_ancillary_info ();
137 exit (status);
140 /* Add tab stop TABVAL to the end of `tab_list'. */
142 static void
143 add_tab_stop (uintmax_t tabval)
145 uintmax_t prev_column = first_free_tab ? tab_list[first_free_tab - 1] : 0;
146 uintmax_t column_width = prev_column <= tabval ? tabval - prev_column : 0;
148 if (first_free_tab == n_tabs_allocated)
149 tab_list = X2NREALLOC (tab_list, &n_tabs_allocated);
150 tab_list[first_free_tab++] = tabval;
152 if (max_column_width < column_width)
154 if (SIZE_MAX < column_width)
155 error (EXIT_FAILURE, 0, _("tabs are too far apart"));
156 max_column_width = column_width;
160 /* Add the comma or blank separated list of tab stops STOPS
161 to the list of tab stops. */
163 static void
164 parse_tab_stops (char const *stops)
166 bool have_tabval = false;
167 uintmax_t tabval IF_LINT ( = 0);
168 char const *num_start IF_LINT ( = NULL);
169 bool ok = true;
171 for (; *stops; stops++)
173 if (*stops == ',' || isblank (to_uchar (*stops)))
175 if (have_tabval)
176 add_tab_stop (tabval);
177 have_tabval = false;
179 else if (ISDIGIT (*stops))
181 if (!have_tabval)
183 tabval = 0;
184 have_tabval = true;
185 num_start = stops;
188 /* Detect overflow. */
189 if (!DECIMAL_DIGIT_ACCUMULATE (tabval, *stops - '0', uintmax_t))
191 size_t len = strspn (num_start, "0123456789");
192 char *bad_num = xstrndup (num_start, len);
193 error (0, 0, _("tab stop is too large %s"), quote (bad_num));
194 free (bad_num);
195 ok = false;
196 stops = num_start + len - 1;
199 else
201 error (0, 0, _("tab size contains invalid character(s): %s"),
202 quote (stops));
203 ok = false;
204 break;
208 if (!ok)
209 exit (EXIT_FAILURE);
211 if (have_tabval)
212 add_tab_stop (tabval);
215 /* Check that the list of tab stops TABS, with ENTRIES entries,
216 contains only nonzero, ascending values. */
218 static void
219 validate_tab_stops (uintmax_t const *tabs, size_t entries)
221 uintmax_t prev_tab = 0;
222 size_t i;
224 for (i = 0; i < entries; i++)
226 if (tabs[i] == 0)
227 error (EXIT_FAILURE, 0, _("tab size cannot be 0"));
228 if (tabs[i] <= prev_tab)
229 error (EXIT_FAILURE, 0, _("tab sizes must be ascending"));
230 prev_tab = tabs[i];
234 /* Close the old stream pointer FP if it is non-NULL,
235 and return a new one opened to read the next input file.
236 Open a filename of `-' as the standard input.
237 Return NULL if there are no more input files. */
239 static FILE *
240 next_file (FILE *fp)
242 static char *prev_file;
243 char *file;
245 if (fp)
247 if (ferror (fp))
249 error (0, errno, "%s", prev_file);
250 exit_status = EXIT_FAILURE;
252 if (STREQ (prev_file, "-"))
253 clearerr (fp); /* Also clear EOF. */
254 else if (fclose (fp) != 0)
256 error (0, errno, "%s", prev_file);
257 exit_status = EXIT_FAILURE;
261 while ((file = *file_list++) != NULL)
263 if (STREQ (file, "-"))
265 have_read_stdin = true;
266 fp = stdin;
268 else
269 fp = fopen (file, "r");
270 if (fp)
272 prev_file = file;
273 fadvise (fp, FADVISE_SEQUENTIAL);
274 return fp;
276 error (0, errno, "%s", file);
277 exit_status = EXIT_FAILURE;
279 return NULL;
282 /* Change blanks to tabs, writing to stdout.
283 Read each file in `file_list', in order. */
285 static void
286 unexpand (void)
288 /* Input stream. */
289 FILE *fp = next_file (NULL);
291 /* The array of pending blanks. In non-POSIX locales, blanks can
292 include characters other than spaces, so the blanks must be
293 stored, not merely counted. */
294 char *pending_blank;
296 if (!fp)
297 return;
299 /* The worst case is a non-blank character, then one blank, then a
300 tab stop, then MAX_COLUMN_WIDTH - 1 blanks, then a non-blank; so
301 allocate MAX_COLUMN_WIDTH bytes to store the blanks. */
302 pending_blank = xmalloc (max_column_width);
304 while (true)
306 /* Input character, or EOF. */
307 int c;
309 /* If true, perform translations. */
310 bool convert = true;
313 /* The following variables have valid values only when CONVERT
314 is true: */
316 /* Column of next input character. */
317 uintmax_t column = 0;
319 /* Column the next input tab stop is on. */
320 uintmax_t next_tab_column = 0;
322 /* Index in TAB_LIST of next tab stop to examine. */
323 size_t tab_index = 0;
325 /* If true, the first pending blank came just before a tab stop. */
326 bool one_blank_before_tab_stop = false;
328 /* If true, the previous input character was a blank. This is
329 initially true, since initial strings of blanks are treated
330 as if the line was preceded by a blank. */
331 bool prev_blank = true;
333 /* Number of pending columns of blanks. */
334 size_t pending = 0;
337 /* Convert a line of text. */
341 while ((c = getc (fp)) < 0 && (fp = next_file (fp)))
342 continue;
344 if (convert)
346 bool blank = !! isblank (c);
348 if (blank)
350 if (next_tab_column <= column)
352 if (tab_size)
353 next_tab_column =
354 column + (tab_size - column % tab_size);
355 else
356 while (true)
357 if (tab_index == first_free_tab)
359 convert = false;
360 break;
362 else
364 uintmax_t tab = tab_list[tab_index++];
365 if (column < tab)
367 next_tab_column = tab;
368 break;
373 if (convert)
375 if (next_tab_column < column)
376 error (EXIT_FAILURE, 0, _("input line is too long"));
378 if (c == '\t')
380 column = next_tab_column;
382 if (pending)
383 pending_blank[0] = '\t';
385 else
387 column++;
389 if (! (prev_blank && column == next_tab_column))
391 /* It is not yet known whether the pending blanks
392 will be replaced by tabs. */
393 if (column == next_tab_column)
394 one_blank_before_tab_stop = true;
395 pending_blank[pending++] = c;
396 prev_blank = true;
397 continue;
400 /* Replace the pending blanks by a tab or two. */
401 pending_blank[0] = c = '\t';
404 /* Discard pending blanks, unless it was a single
405 blank just before the previous tab stop. */
406 pending = one_blank_before_tab_stop;
409 else if (c == '\b')
411 /* Go back one column, and force recalculation of the
412 next tab stop. */
413 column -= !!column;
414 next_tab_column = column;
415 tab_index -= !!tab_index;
417 else
419 column++;
420 if (!column)
421 error (EXIT_FAILURE, 0, _("input line is too long"));
424 if (pending)
426 if (pending > 1 && one_blank_before_tab_stop)
427 pending_blank[0] = '\t';
428 if (fwrite (pending_blank, 1, pending, stdout) != pending)
429 error (EXIT_FAILURE, errno, _("write error"));
430 pending = 0;
431 one_blank_before_tab_stop = false;
434 prev_blank = blank;
435 convert &= convert_entire_line || blank;
438 if (c < 0)
440 free (pending_blank);
441 return;
444 if (putchar (c) < 0)
445 error (EXIT_FAILURE, errno, _("write error"));
447 while (c != '\n');
452 main (int argc, char **argv)
454 bool have_tabval = false;
455 uintmax_t tabval IF_LINT ( = 0);
456 int c;
458 /* If true, cancel the effect of any -a (explicit or implicit in -t),
459 so that only leading blanks will be considered. */
460 bool convert_first_only = false;
462 initialize_main (&argc, &argv);
463 set_program_name (argv[0]);
464 setlocale (LC_ALL, "");
465 bindtextdomain (PACKAGE, LOCALEDIR);
466 textdomain (PACKAGE);
468 atexit (close_stdout);
470 have_read_stdin = false;
471 exit_status = EXIT_SUCCESS;
472 convert_entire_line = false;
473 tab_list = NULL;
474 first_free_tab = 0;
476 while ((c = getopt_long (argc, argv, ",0123456789at:", longopts, NULL))
477 != -1)
479 switch (c)
481 case '?':
482 usage (EXIT_FAILURE);
483 case 'a':
484 convert_entire_line = true;
485 break;
486 case 't':
487 convert_entire_line = true;
488 parse_tab_stops (optarg);
489 break;
490 case CONVERT_FIRST_ONLY_OPTION:
491 convert_first_only = true;
492 break;
493 case ',':
494 if (have_tabval)
495 add_tab_stop (tabval);
496 have_tabval = false;
497 break;
498 case_GETOPT_HELP_CHAR;
499 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
500 default:
501 if (!have_tabval)
503 tabval = 0;
504 have_tabval = true;
506 if (!DECIMAL_DIGIT_ACCUMULATE (tabval, c - '0', uintmax_t))
507 error (EXIT_FAILURE, 0, _("tab stop value is too large"));
508 break;
512 if (convert_first_only)
513 convert_entire_line = false;
515 if (have_tabval)
516 add_tab_stop (tabval);
518 validate_tab_stops (tab_list, first_free_tab);
520 if (first_free_tab == 0)
521 tab_size = max_column_width = 8;
522 else if (first_free_tab == 1)
523 tab_size = tab_list[0];
524 else
525 tab_size = 0;
527 file_list = (optind < argc ? &argv[optind] : stdin_argv);
529 unexpand ();
531 if (have_read_stdin && fclose (stdin) != 0)
532 error (EXIT_FAILURE, errno, "-");
534 exit (exit_status);