split: port ‘split -n N /dev/null’ better to macOS
[coreutils.git] / src / unexpand.c
blob8c97f0d2d95e4526f120a912ebdd55744a003f15
1 /* unexpand - convert blanks to tabs
2 Copyright (C) 1989-2023 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* By default, convert only maximal strings of initial blanks and tabs
18 into tabs.
19 Preserves backspace characters in the output; they decrement the
20 column count for tab calculations.
21 The default action is equivalent to -8.
23 Options:
24 --tabs=tab1[,tab2[,...]]
25 -t tab1[,tab2[,...]]
26 -tab1[,tab2[,...]] If only one tab stop is given, set the tabs tab1
27 columns apart instead of the default 8. Otherwise,
28 set the tabs at columns tab1, tab2, etc. (numbered from
29 0); preserve any blanks beyond the tab stops given.
30 --all
31 -a Use tabs wherever they would replace 2 or more blanks,
32 not just at the beginnings of lines.
34 David MacKenzie <djm@gnu.ai.mit.edu> */
36 #include <config.h>
38 #include <stdio.h>
39 #include <getopt.h>
40 #include <sys/types.h>
41 #include "system.h"
42 #include "die.h"
44 #include "expand-common.h"
46 /* The official name of this program (e.g., no 'g' prefix). */
47 #define PROGRAM_NAME "unexpand"
49 #define AUTHORS proper_name ("David MacKenzie")
53 /* For long options that have no equivalent short option, use a
54 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
55 enum
57 CONVERT_FIRST_ONLY_OPTION = CHAR_MAX + 1
60 static struct option const longopts[] =
62 {"tabs", required_argument, NULL, 't'},
63 {"all", no_argument, NULL, 'a'},
64 {"first-only", no_argument, NULL, CONVERT_FIRST_ONLY_OPTION},
65 {GETOPT_HELP_OPTION_DECL},
66 {GETOPT_VERSION_OPTION_DECL},
67 {NULL, 0, NULL, 0}
70 void
71 usage (int status)
73 if (status != EXIT_SUCCESS)
74 emit_try_help ();
75 else
77 printf (_("\
78 Usage: %s [OPTION]... [FILE]...\n\
79 "),
80 program_name);
81 fputs (_("\
82 Convert blanks in each FILE to tabs, writing to standard output.\n\
83 "), stdout);
85 emit_stdin_note ();
86 emit_mandatory_arg_note ();
88 fputs (_("\
89 -a, --all convert all blanks, instead of just initial blanks\n\
90 --first-only convert only leading sequences of blanks (overrides -a)\n\
91 -t, --tabs=N have tabs N characters apart instead of 8 (enables -a)\n\
92 "), stdout);
93 emit_tab_list_info ();
94 fputs (HELP_OPTION_DESCRIPTION, stdout);
95 fputs (VERSION_OPTION_DESCRIPTION, stdout);
96 emit_ancillary_info (PROGRAM_NAME);
98 exit (status);
101 /* Change blanks to tabs, writing to stdout.
102 Read each file in 'file_list', in order. */
104 static void
105 unexpand (void)
107 /* Input stream. */
108 FILE *fp = next_file (NULL);
110 /* The array of pending blanks. In non-POSIX locales, blanks can
111 include characters other than spaces, so the blanks must be
112 stored, not merely counted. */
113 char *pending_blank;
115 if (!fp)
116 return;
118 /* The worst case is a non-blank character, then one blank, then a
119 tab stop, then MAX_COLUMN_WIDTH - 1 blanks, then a non-blank; so
120 allocate MAX_COLUMN_WIDTH bytes to store the blanks. */
121 pending_blank = xmalloc (max_column_width);
123 while (true)
125 /* Input character, or EOF. */
126 int c;
128 /* If true, perform translations. */
129 bool convert = true;
132 /* The following variables have valid values only when CONVERT
133 is true: */
135 /* Column of next input character. */
136 uintmax_t column = 0;
138 /* Column the next input tab stop is on. */
139 uintmax_t next_tab_column = 0;
141 /* Index in TAB_LIST of next tab stop to examine. */
142 size_t tab_index = 0;
144 /* If true, the first pending blank came just before a tab stop. */
145 bool one_blank_before_tab_stop = false;
147 /* If true, the previous input character was a blank. This is
148 initially true, since initial strings of blanks are treated
149 as if the line was preceded by a blank. */
150 bool prev_blank = true;
152 /* Number of pending columns of blanks. */
153 size_t pending = 0;
156 /* Convert a line of text. */
160 while ((c = getc (fp)) < 0 && (fp = next_file (fp)))
161 continue;
163 if (convert)
165 bool blank = !! isblank (c);
167 if (blank)
169 bool last_tab;
171 next_tab_column = get_next_tab_column (column, &tab_index,
172 &last_tab);
174 if (last_tab)
175 convert = false;
177 if (convert)
179 if (next_tab_column < column)
180 die (EXIT_FAILURE, 0, _("input line is too long"));
182 if (c == '\t')
184 column = next_tab_column;
186 if (pending)
187 pending_blank[0] = '\t';
189 else
191 column++;
193 if (! (prev_blank && column == next_tab_column))
195 /* It is not yet known whether the pending blanks
196 will be replaced by tabs. */
197 if (column == next_tab_column)
198 one_blank_before_tab_stop = true;
199 pending_blank[pending++] = c;
200 prev_blank = true;
201 continue;
204 /* Replace the pending blanks by a tab or two. */
205 pending_blank[0] = c = '\t';
208 /* Discard pending blanks, unless it was a single
209 blank just before the previous tab stop. */
210 pending = one_blank_before_tab_stop;
213 else if (c == '\b')
215 /* Go back one column, and force recalculation of the
216 next tab stop. */
217 column -= !!column;
218 next_tab_column = column;
219 tab_index -= !!tab_index;
221 else
223 column++;
224 if (!column)
225 die (EXIT_FAILURE, 0, _("input line is too long"));
228 if (pending)
230 if (pending > 1 && one_blank_before_tab_stop)
231 pending_blank[0] = '\t';
232 if (fwrite (pending_blank, 1, pending, stdout) != pending)
233 die (EXIT_FAILURE, errno, _("write error"));
234 pending = 0;
235 one_blank_before_tab_stop = false;
238 prev_blank = blank;
239 convert &= convert_entire_line || blank;
242 if (c < 0)
244 free (pending_blank);
245 return;
248 if (putchar (c) < 0)
249 die (EXIT_FAILURE, errno, _("write error"));
251 while (c != '\n');
256 main (int argc, char **argv)
258 bool have_tabval = false;
259 uintmax_t tabval IF_LINT ( = 0);
260 int c;
262 /* If true, cancel the effect of any -a (explicit or implicit in -t),
263 so that only leading blanks will be considered. */
264 bool convert_first_only = false;
266 initialize_main (&argc, &argv);
267 set_program_name (argv[0]);
268 setlocale (LC_ALL, "");
269 bindtextdomain (PACKAGE, LOCALEDIR);
270 textdomain (PACKAGE);
272 atexit (close_stdout);
274 while ((c = getopt_long (argc, argv, ",0123456789at:", longopts, NULL))
275 != -1)
277 switch (c)
279 case '?':
280 usage (EXIT_FAILURE);
281 case 'a':
282 convert_entire_line = true;
283 break;
284 case 't':
285 convert_entire_line = true;
286 parse_tab_stops (optarg);
287 break;
288 case CONVERT_FIRST_ONLY_OPTION:
289 convert_first_only = true;
290 break;
291 case ',':
292 if (have_tabval)
293 add_tab_stop (tabval);
294 have_tabval = false;
295 break;
296 case_GETOPT_HELP_CHAR;
297 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
298 default:
299 if (!have_tabval)
301 tabval = 0;
302 have_tabval = true;
304 if (!DECIMAL_DIGIT_ACCUMULATE (tabval, c - '0', uintmax_t))
305 die (EXIT_FAILURE, 0, _("tab stop value is too large"));
306 break;
310 if (convert_first_only)
311 convert_entire_line = false;
313 if (have_tabval)
314 add_tab_stop (tabval);
316 finalize_tab_stops ();
318 set_file_list ((optind < argc) ? &argv[optind] : NULL);
320 unexpand ();
322 cleanup_file_list_stdin ();
324 return exit_status;