dd: synchronize output after write errors
[coreutils.git] / src / cut.c
blob5143c8bd94b9fe0f9f299aa3a7f93c06bcbbc238
1 /* cut - remove parts of lines of files
2 Copyright (C) 1997-2022 Free Software Foundation, Inc.
3 Copyright (C) 1984 David M. Ihnat
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18 /* Written by David Ihnat. */
20 /* POSIX changes, bug fixes, long-named options, and cleanup
21 by David MacKenzie <djm@gnu.ai.mit.edu>.
23 Rewrite cut_fields and cut_bytes -- Jim Meyering. */
25 #include <config.h>
27 #include <stdio.h>
28 #include <assert.h>
29 #include <getopt.h>
30 #include <sys/types.h>
31 #include "system.h"
33 #include "error.h"
34 #include "fadvise.h"
35 #include "getndelim2.h"
36 #include "hash.h"
38 #include "set-fields.h"
40 /* The official name of this program (e.g., no 'g' prefix). */
41 #define PROGRAM_NAME "cut"
43 #define AUTHORS \
44 proper_name ("David M. Ihnat"), \
45 proper_name ("David MacKenzie"), \
46 proper_name ("Jim Meyering")
48 #define FATAL_ERROR(Message) \
49 do \
50 { \
51 error (0, 0, (Message)); \
52 usage (EXIT_FAILURE); \
53 } \
54 while (0)
57 /* Pointer inside RP. When checking if a byte or field is selected
58 by a finite range, we check if it is between CURRENT_RP.LO
59 and CURRENT_RP.HI. If the byte or field index is greater than
60 CURRENT_RP.HI then we make CURRENT_RP to point to the next range pair. */
61 static struct field_range_pair *current_rp;
63 /* This buffer is used to support the semantics of the -s option
64 (or lack of same) when the specified field list includes (does
65 not include) the first field. In both of those cases, the entire
66 first field must be read into this buffer to determine whether it
67 is followed by a delimiter or a newline before any of it may be
68 output. Otherwise, cut_fields can do the job without using this
69 buffer. */
70 static char *field_1_buffer;
72 /* The number of bytes allocated for FIELD_1_BUFFER. */
73 static size_t field_1_bufsize;
75 enum operating_mode
77 undefined_mode,
79 /* Output characters that are in the given bytes. */
80 byte_mode,
82 /* Output the given delimiter-separated fields. */
83 field_mode
86 static enum operating_mode operating_mode;
88 /* If true do not output lines containing no delimiter characters.
89 Otherwise, all such lines are printed. This option is valid only
90 with field mode. */
91 static bool suppress_non_delimited;
93 /* If true, print all bytes, characters, or fields _except_
94 those that were specified. */
95 static bool complement;
97 /* The delimiter character for field mode. */
98 static unsigned char delim;
100 /* The delimiter for each line/record. */
101 static unsigned char line_delim = '\n';
103 /* True if the --output-delimiter=STRING option was specified. */
104 static bool output_delimiter_specified;
106 /* The length of output_delimiter_string. */
107 static size_t output_delimiter_length;
109 /* The output field separator string. Defaults to the 1-character
110 string consisting of the input delimiter. */
111 static char *output_delimiter_string;
113 /* True if we have ever read standard input. */
114 static bool have_read_stdin;
116 /* For long options that have no equivalent short option, use a
117 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
118 enum
120 OUTPUT_DELIMITER_OPTION = CHAR_MAX + 1,
121 COMPLEMENT_OPTION
124 static struct option const longopts[] =
126 {"bytes", required_argument, NULL, 'b'},
127 {"characters", required_argument, NULL, 'c'},
128 {"fields", required_argument, NULL, 'f'},
129 {"delimiter", required_argument, NULL, 'd'},
130 {"only-delimited", no_argument, NULL, 's'},
131 {"output-delimiter", required_argument, NULL, OUTPUT_DELIMITER_OPTION},
132 {"complement", no_argument, NULL, COMPLEMENT_OPTION},
133 {"zero-terminated", no_argument, NULL, 'z'},
134 {GETOPT_HELP_OPTION_DECL},
135 {GETOPT_VERSION_OPTION_DECL},
136 {NULL, 0, NULL, 0}
139 void
140 usage (int status)
142 if (status != EXIT_SUCCESS)
143 emit_try_help ();
144 else
146 printf (_("\
147 Usage: %s OPTION... [FILE]...\n\
149 program_name);
150 fputs (_("\
151 Print selected parts of lines from each FILE to standard output.\n\
152 "), stdout);
154 emit_stdin_note ();
155 emit_mandatory_arg_note ();
157 fputs (_("\
158 -b, --bytes=LIST select only these bytes\n\
159 -c, --characters=LIST select only these characters\n\
160 -d, --delimiter=DELIM use DELIM instead of TAB for field delimiter\n\
161 "), stdout);
162 fputs (_("\
163 -f, --fields=LIST select only these fields; also print any line\n\
164 that contains no delimiter character, unless\n\
165 the -s option is specified\n\
166 -n (ignored)\n\
167 "), stdout);
168 fputs (_("\
169 --complement complement the set of selected bytes, characters\n\
170 or fields\n\
171 "), stdout);
172 fputs (_("\
173 -s, --only-delimited do not print lines not containing delimiters\n\
174 --output-delimiter=STRING use STRING as the output delimiter\n\
175 the default is to use the input delimiter\n\
176 "), stdout);
177 fputs (_("\
178 -z, --zero-terminated line delimiter is NUL, not newline\n\
179 "), stdout);
180 fputs (HELP_OPTION_DESCRIPTION, stdout);
181 fputs (VERSION_OPTION_DESCRIPTION, stdout);
182 fputs (_("\
184 Use one, and only one of -b, -c or -f. Each LIST is made up of one\n\
185 range, or many ranges separated by commas. Selected input is written\n\
186 in the same order that it is read, and is written exactly once.\n\
187 "), stdout);
188 fputs (_("\
189 Each range is one of:\n\
191 N N'th byte, character or field, counted from 1\n\
192 N- from N'th byte, character or field, to end of line\n\
193 N-M from N'th to M'th (included) byte, character or field\n\
194 -M from first to M'th (included) byte, character or field\n\
195 "), stdout);
196 emit_ancillary_info (PROGRAM_NAME);
198 exit (status);
202 /* Increment *ITEM_IDX (i.e., a field or byte index),
203 and if required CURRENT_RP. */
205 static inline void
206 next_item (uintmax_t *item_idx)
208 (*item_idx)++;
209 if ((*item_idx) > current_rp->hi)
210 current_rp++;
213 /* Return nonzero if the K'th field or byte is printable. */
215 static inline bool
216 print_kth (uintmax_t k)
218 return current_rp->lo <= k;
221 /* Return nonzero if K'th byte is the beginning of a range. */
223 static inline bool
224 is_range_start_index (uintmax_t k)
226 return k == current_rp->lo;
229 /* Read from stream STREAM, printing to standard output any selected bytes. */
231 static void
232 cut_bytes (FILE *stream)
234 uintmax_t byte_idx; /* Number of bytes in the line so far. */
235 /* Whether to begin printing delimiters between ranges for the current line.
236 Set after we've begun printing data corresponding to the first range. */
237 bool print_delimiter;
239 byte_idx = 0;
240 print_delimiter = false;
241 current_rp = frp;
242 while (true)
244 int c; /* Each character from the file. */
246 c = getc (stream);
248 if (c == line_delim)
250 putchar (c);
251 byte_idx = 0;
252 print_delimiter = false;
253 current_rp = frp;
255 else if (c == EOF)
257 if (byte_idx > 0)
258 putchar (line_delim);
259 break;
261 else
263 next_item (&byte_idx);
264 if (print_kth (byte_idx))
266 if (output_delimiter_specified)
268 if (print_delimiter && is_range_start_index (byte_idx))
270 fwrite (output_delimiter_string, sizeof (char),
271 output_delimiter_length, stdout);
273 print_delimiter = true;
276 putchar (c);
282 /* Read from stream STREAM, printing to standard output any selected fields. */
284 static void
285 cut_fields (FILE *stream)
287 int c;
288 uintmax_t field_idx = 1;
289 bool found_any_selected_field = false;
290 bool buffer_first_field;
292 current_rp = frp;
294 c = getc (stream);
295 if (c == EOF)
296 return;
298 ungetc (c, stream);
299 c = 0;
301 /* To support the semantics of the -s flag, we may have to buffer
302 all of the first field to determine whether it is 'delimited.'
303 But that is unnecessary if all non-delimited lines must be printed
304 and the first field has been selected, or if non-delimited lines
305 must be suppressed and the first field has *not* been selected.
306 That is because a non-delimited line has exactly one field. */
307 buffer_first_field = (suppress_non_delimited ^ !print_kth (1));
309 while (true)
311 if (field_idx == 1 && buffer_first_field)
313 ssize_t len;
314 size_t n_bytes;
316 len = getndelim2 (&field_1_buffer, &field_1_bufsize, 0,
317 GETNLINE_NO_LIMIT, delim, line_delim, stream);
318 if (len < 0)
320 free (field_1_buffer);
321 field_1_buffer = NULL;
322 if (ferror (stream) || feof (stream))
323 break;
324 xalloc_die ();
327 n_bytes = len;
328 assert (n_bytes != 0);
330 c = 0;
332 /* If the first field extends to the end of line (it is not
333 delimited) and we are printing all non-delimited lines,
334 print this one. */
335 if (to_uchar (field_1_buffer[n_bytes - 1]) != delim)
337 if (suppress_non_delimited)
339 /* Empty. */
341 else
343 fwrite (field_1_buffer, sizeof (char), n_bytes, stdout);
344 /* Make sure the output line is newline terminated. */
345 if (field_1_buffer[n_bytes - 1] != line_delim)
346 putchar (line_delim);
347 c = line_delim;
349 continue;
351 if (print_kth (1))
353 /* Print the field, but not the trailing delimiter. */
354 fwrite (field_1_buffer, sizeof (char), n_bytes - 1, stdout);
356 /* With -d$'\n' don't treat the last '\n' as a delimiter. */
357 if (delim == line_delim)
359 int last_c = getc (stream);
360 if (last_c != EOF)
362 ungetc (last_c, stream);
363 found_any_selected_field = true;
366 else
367 found_any_selected_field = true;
369 next_item (&field_idx);
372 int prev_c = c;
374 if (print_kth (field_idx))
376 if (found_any_selected_field)
378 fwrite (output_delimiter_string, sizeof (char),
379 output_delimiter_length, stdout);
381 found_any_selected_field = true;
383 while ((c = getc (stream)) != delim && c != line_delim && c != EOF)
385 putchar (c);
386 prev_c = c;
389 else
391 while ((c = getc (stream)) != delim && c != line_delim && c != EOF)
393 prev_c = c;
397 /* With -d$'\n' don't treat the last '\n' as a delimiter. */
398 if (delim == line_delim && c == delim)
400 int last_c = getc (stream);
401 if (last_c != EOF)
402 ungetc (last_c, stream);
403 else
404 c = last_c;
407 if (c == delim)
408 next_item (&field_idx);
409 else if (c == line_delim || c == EOF)
411 if (found_any_selected_field
412 || !(suppress_non_delimited && field_idx == 1))
414 if (c == line_delim || prev_c != line_delim
415 || delim == line_delim)
416 putchar (line_delim);
418 if (c == EOF)
419 break;
420 field_idx = 1;
421 current_rp = frp;
422 found_any_selected_field = false;
427 static void
428 cut_stream (FILE *stream)
430 if (operating_mode == byte_mode)
431 cut_bytes (stream);
432 else
433 cut_fields (stream);
436 /* Process file FILE to standard output.
437 Return true if successful. */
439 static bool
440 cut_file (char const *file)
442 FILE *stream;
444 if (STREQ (file, "-"))
446 have_read_stdin = true;
447 stream = stdin;
449 else
451 stream = fopen (file, "r");
452 if (stream == NULL)
454 error (0, errno, "%s", quotef (file));
455 return false;
459 fadvise (stream, FADVISE_SEQUENTIAL);
461 cut_stream (stream);
463 int err = errno;
464 if (!ferror (stream))
465 err = 0;
466 if (STREQ (file, "-"))
467 clearerr (stream); /* Also clear EOF. */
468 else if (fclose (stream) == EOF)
469 err = errno;
470 if (err)
472 error (0, err, "%s", quotef (file));
473 return false;
475 return true;
479 main (int argc, char **argv)
481 int optc;
482 bool ok;
483 bool delim_specified = false;
484 char *spec_list_string IF_LINT ( = NULL);
486 initialize_main (&argc, &argv);
487 set_program_name (argv[0]);
488 setlocale (LC_ALL, "");
489 bindtextdomain (PACKAGE, LOCALEDIR);
490 textdomain (PACKAGE);
492 atexit (close_stdout);
494 operating_mode = undefined_mode;
496 /* By default, all non-delimited lines are printed. */
497 suppress_non_delimited = false;
499 delim = '\0';
500 have_read_stdin = false;
502 while ((optc = getopt_long (argc, argv, "b:c:d:f:nsz", longopts, NULL)) != -1)
504 switch (optc)
506 case 'b':
507 case 'c':
508 /* Build the byte list. */
509 if (operating_mode != undefined_mode)
510 FATAL_ERROR (_("only one type of list may be specified"));
511 operating_mode = byte_mode;
512 spec_list_string = optarg;
513 break;
515 case 'f':
516 /* Build the field list. */
517 if (operating_mode != undefined_mode)
518 FATAL_ERROR (_("only one type of list may be specified"));
519 operating_mode = field_mode;
520 spec_list_string = optarg;
521 break;
523 case 'd':
524 /* New delimiter. */
525 /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */
526 if (optarg[0] != '\0' && optarg[1] != '\0')
527 FATAL_ERROR (_("the delimiter must be a single character"));
528 delim = optarg[0];
529 delim_specified = true;
530 break;
532 case OUTPUT_DELIMITER_OPTION:
533 output_delimiter_specified = true;
534 /* Interpret --output-delimiter='' to mean
535 'use the NUL byte as the delimiter.' */
536 output_delimiter_length = (optarg[0] == '\0'
537 ? 1 : strlen (optarg));
538 output_delimiter_string = xstrdup (optarg);
539 break;
541 case 'n':
542 break;
544 case 's':
545 suppress_non_delimited = true;
546 break;
548 case 'z':
549 line_delim = '\0';
550 break;
552 case COMPLEMENT_OPTION:
553 complement = true;
554 break;
556 case_GETOPT_HELP_CHAR;
558 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
560 default:
561 usage (EXIT_FAILURE);
565 if (operating_mode == undefined_mode)
566 FATAL_ERROR (_("you must specify a list of bytes, characters, or fields"));
568 if (delim_specified && operating_mode != field_mode)
569 FATAL_ERROR (_("an input delimiter may be specified only\
570 when operating on fields"));
572 if (suppress_non_delimited && operating_mode != field_mode)
573 FATAL_ERROR (_("suppressing non-delimited lines makes sense\n\
574 \tonly when operating on fields"));
576 set_fields (spec_list_string,
577 ( (operating_mode == field_mode) ? 0 : SETFLD_ERRMSG_USE_POS)
578 | (complement ? SETFLD_COMPLEMENT : 0) );
580 if (!delim_specified)
581 delim = '\t';
583 if (output_delimiter_string == NULL)
585 static char dummy[2];
586 dummy[0] = delim;
587 dummy[1] = '\0';
588 output_delimiter_string = dummy;
589 output_delimiter_length = 1;
592 if (optind == argc)
593 ok = cut_file ("-");
594 else
595 for (ok = true; optind < argc; optind++)
596 ok &= cut_file (argv[optind]);
599 if (have_read_stdin && fclose (stdin) == EOF)
601 error (0, errno, "-");
602 ok = false;
605 IF_LINT (reset_fields ());
607 return ok ? EXIT_SUCCESS : EXIT_FAILURE;