du: --apparent counts only symlinks and regular
[coreutils.git] / src / cut.c
blobe8346b7c78a0f53125d5550493cf798591c898b6
1 /* cut - remove parts of lines of files
2 Copyright (C) 1997-2023 Free Software Foundation, Inc.
3 Copyright (C) 1984 David M. Ihnat
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18 /* Written by David Ihnat. */
20 /* POSIX changes, bug fixes, long-named options, and cleanup
21 by David MacKenzie <djm@gnu.ai.mit.edu>.
23 Rewrite cut_fields and cut_bytes -- Jim Meyering. */
25 #include <config.h>
27 #include <stdio.h>
28 #include <assert.h>
29 #include <getopt.h>
30 #include <sys/types.h>
31 #include "system.h"
33 #include "error.h"
34 #include "fadvise.h"
35 #include "getndelim2.h"
37 #include "set-fields.h"
39 /* The official name of this program (e.g., no 'g' prefix). */
40 #define PROGRAM_NAME "cut"
42 #define AUTHORS \
43 proper_name ("David M. Ihnat"), \
44 proper_name ("David MacKenzie"), \
45 proper_name ("Jim Meyering")
47 #define FATAL_ERROR(Message) \
48 do \
49 { \
50 error (0, 0, (Message)); \
51 usage (EXIT_FAILURE); \
52 } \
53 while (0)
56 /* Pointer inside RP. When checking if a byte or field is selected
57 by a finite range, we check if it is between CURRENT_RP.LO
58 and CURRENT_RP.HI. If the byte or field index is greater than
59 CURRENT_RP.HI then we make CURRENT_RP to point to the next range pair. */
60 static struct field_range_pair *current_rp;
62 /* This buffer is used to support the semantics of the -s option
63 (or lack of same) when the specified field list includes (does
64 not include) the first field. In both of those cases, the entire
65 first field must be read into this buffer to determine whether it
66 is followed by a delimiter or a newline before any of it may be
67 output. Otherwise, cut_fields can do the job without using this
68 buffer. */
69 static char *field_1_buffer;
71 /* The number of bytes allocated for FIELD_1_BUFFER. */
72 static size_t field_1_bufsize;
74 /* If true do not output lines containing no delimiter characters.
75 Otherwise, all such lines are printed. This option is valid only
76 with field mode. */
77 static bool suppress_non_delimited;
79 /* If true, print all bytes, characters, or fields _except_
80 those that were specified. */
81 static bool complement;
83 /* The delimiter character for field mode. */
84 static unsigned char delim;
86 /* The delimiter for each line/record. */
87 static unsigned char line_delim = '\n';
89 /* The length of output_delimiter_string. */
90 static size_t output_delimiter_length;
92 /* The output field separator string. Defaults to the 1-character
93 string consisting of the input delimiter. */
94 static char *output_delimiter_string;
96 /* The output delimiter string contents, if the default. */
97 static char output_delimiter_default[1];
99 /* True if we have ever read standard input. */
100 static bool have_read_stdin;
102 /* For long options that have no equivalent short option, use a
103 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
104 enum
106 OUTPUT_DELIMITER_OPTION = CHAR_MAX + 1,
107 COMPLEMENT_OPTION
110 static struct option const longopts[] =
112 {"bytes", required_argument, NULL, 'b'},
113 {"characters", required_argument, NULL, 'c'},
114 {"fields", required_argument, NULL, 'f'},
115 {"delimiter", required_argument, NULL, 'd'},
116 {"only-delimited", no_argument, NULL, 's'},
117 {"output-delimiter", required_argument, NULL, OUTPUT_DELIMITER_OPTION},
118 {"complement", no_argument, NULL, COMPLEMENT_OPTION},
119 {"zero-terminated", no_argument, NULL, 'z'},
120 {GETOPT_HELP_OPTION_DECL},
121 {GETOPT_VERSION_OPTION_DECL},
122 {NULL, 0, NULL, 0}
125 void
126 usage (int status)
128 if (status != EXIT_SUCCESS)
129 emit_try_help ();
130 else
132 printf (_("\
133 Usage: %s OPTION... [FILE]...\n\
135 program_name);
136 fputs (_("\
137 Print selected parts of lines from each FILE to standard output.\n\
138 "), stdout);
140 emit_stdin_note ();
141 emit_mandatory_arg_note ();
143 fputs (_("\
144 -b, --bytes=LIST select only these bytes\n\
145 -c, --characters=LIST select only these characters\n\
146 -d, --delimiter=DELIM use DELIM instead of TAB for field delimiter\n\
147 "), stdout);
148 fputs (_("\
149 -f, --fields=LIST select only these fields; also print any line\n\
150 that contains no delimiter character, unless\n\
151 the -s option is specified\n\
152 -n (ignored)\n\
153 "), stdout);
154 fputs (_("\
155 --complement complement the set of selected bytes, characters\n\
156 or fields\n\
157 "), stdout);
158 fputs (_("\
159 -s, --only-delimited do not print lines not containing delimiters\n\
160 --output-delimiter=STRING use STRING as the output delimiter\n\
161 the default is to use the input delimiter\n\
162 "), stdout);
163 fputs (_("\
164 -z, --zero-terminated line delimiter is NUL, not newline\n\
165 "), stdout);
166 fputs (HELP_OPTION_DESCRIPTION, stdout);
167 fputs (VERSION_OPTION_DESCRIPTION, stdout);
168 fputs (_("\
170 Use one, and only one of -b, -c or -f. Each LIST is made up of one\n\
171 range, or many ranges separated by commas. Selected input is written\n\
172 in the same order that it is read, and is written exactly once.\n\
173 "), stdout);
174 fputs (_("\
175 Each range is one of:\n\
177 N N'th byte, character or field, counted from 1\n\
178 N- from N'th byte, character or field, to end of line\n\
179 N-M from N'th to M'th (included) byte, character or field\n\
180 -M from first to M'th (included) byte, character or field\n\
181 "), stdout);
182 emit_ancillary_info (PROGRAM_NAME);
184 exit (status);
188 /* Increment *ITEM_IDX (i.e., a field or byte index),
189 and if required CURRENT_RP. */
191 static inline void
192 next_item (uintmax_t *item_idx)
194 (*item_idx)++;
195 if ((*item_idx) > current_rp->hi)
196 current_rp++;
199 /* Return nonzero if the K'th field or byte is printable. */
201 static inline bool
202 print_kth (uintmax_t k)
204 return current_rp->lo <= k;
207 /* Return nonzero if K'th byte is the beginning of a range. */
209 static inline bool
210 is_range_start_index (uintmax_t k)
212 return k == current_rp->lo;
215 /* Read from stream STREAM, printing to standard output any selected bytes. */
217 static void
218 cut_bytes (FILE *stream)
220 uintmax_t byte_idx; /* Number of bytes in the line so far. */
221 /* Whether to begin printing delimiters between ranges for the current line.
222 Set after we've begun printing data corresponding to the first range. */
223 bool print_delimiter;
225 byte_idx = 0;
226 print_delimiter = false;
227 current_rp = frp;
228 while (true)
230 int c; /* Each character from the file. */
232 c = getc (stream);
234 if (c == line_delim)
236 putchar (c);
237 byte_idx = 0;
238 print_delimiter = false;
239 current_rp = frp;
241 else if (c == EOF)
243 if (byte_idx > 0)
244 putchar (line_delim);
245 break;
247 else
249 next_item (&byte_idx);
250 if (print_kth (byte_idx))
252 if (output_delimiter_string != output_delimiter_default)
254 if (print_delimiter && is_range_start_index (byte_idx))
256 fwrite (output_delimiter_string, sizeof (char),
257 output_delimiter_length, stdout);
259 print_delimiter = true;
262 putchar (c);
268 /* Read from stream STREAM, printing to standard output any selected fields. */
270 static void
271 cut_fields (FILE *stream)
273 int c;
274 uintmax_t field_idx = 1;
275 bool found_any_selected_field = false;
276 bool buffer_first_field;
278 current_rp = frp;
280 c = getc (stream);
281 if (c == EOF)
282 return;
284 ungetc (c, stream);
285 c = 0;
287 /* To support the semantics of the -s flag, we may have to buffer
288 all of the first field to determine whether it is 'delimited.'
289 But that is unnecessary if all non-delimited lines must be printed
290 and the first field has been selected, or if non-delimited lines
291 must be suppressed and the first field has *not* been selected.
292 That is because a non-delimited line has exactly one field. */
293 buffer_first_field = (suppress_non_delimited ^ !print_kth (1));
295 while (true)
297 if (field_idx == 1 && buffer_first_field)
299 ssize_t len;
300 size_t n_bytes;
302 len = getndelim2 (&field_1_buffer, &field_1_bufsize, 0,
303 GETNLINE_NO_LIMIT, delim, line_delim, stream);
304 if (len < 0)
306 free (field_1_buffer);
307 field_1_buffer = NULL;
308 if (ferror (stream) || feof (stream))
309 break;
310 xalloc_die ();
313 n_bytes = len;
314 assert (n_bytes != 0);
316 c = 0;
318 /* If the first field extends to the end of line (it is not
319 delimited) and we are printing all non-delimited lines,
320 print this one. */
321 if (to_uchar (field_1_buffer[n_bytes - 1]) != delim)
323 if (suppress_non_delimited)
325 /* Empty. */
327 else
329 fwrite (field_1_buffer, sizeof (char), n_bytes, stdout);
330 /* Make sure the output line is newline terminated. */
331 if (field_1_buffer[n_bytes - 1] != line_delim)
332 putchar (line_delim);
333 c = line_delim;
335 continue;
337 if (print_kth (1))
339 /* Print the field, but not the trailing delimiter. */
340 fwrite (field_1_buffer, sizeof (char), n_bytes - 1, stdout);
342 /* With -d$'\n' don't treat the last '\n' as a delimiter. */
343 if (delim == line_delim)
345 int last_c = getc (stream);
346 if (last_c != EOF)
348 ungetc (last_c, stream);
349 found_any_selected_field = true;
352 else
353 found_any_selected_field = true;
355 next_item (&field_idx);
358 int prev_c = c;
360 if (print_kth (field_idx))
362 if (found_any_selected_field)
364 fwrite (output_delimiter_string, sizeof (char),
365 output_delimiter_length, stdout);
367 found_any_selected_field = true;
369 while ((c = getc (stream)) != delim && c != line_delim && c != EOF)
371 putchar (c);
372 prev_c = c;
375 else
377 while ((c = getc (stream)) != delim && c != line_delim && c != EOF)
379 prev_c = c;
383 /* With -d$'\n' don't treat the last '\n' as a delimiter. */
384 if (delim == line_delim && c == delim)
386 int last_c = getc (stream);
387 if (last_c != EOF)
388 ungetc (last_c, stream);
389 else
390 c = last_c;
393 if (c == delim)
394 next_item (&field_idx);
395 else if (c == line_delim || c == EOF)
397 if (found_any_selected_field
398 || !(suppress_non_delimited && field_idx == 1))
400 if (c == line_delim || prev_c != line_delim
401 || delim == line_delim)
402 putchar (line_delim);
404 if (c == EOF)
405 break;
406 field_idx = 1;
407 current_rp = frp;
408 found_any_selected_field = false;
413 /* Process file FILE to standard output, using CUT_STREAM.
414 Return true if successful. */
416 static bool
417 cut_file (char const *file, void (*cut_stream) (FILE *))
419 FILE *stream;
421 if (STREQ (file, "-"))
423 have_read_stdin = true;
424 stream = stdin;
426 else
428 stream = fopen (file, "r");
429 if (stream == NULL)
431 error (0, errno, "%s", quotef (file));
432 return false;
436 fadvise (stream, FADVISE_SEQUENTIAL);
438 cut_stream (stream);
440 int err = errno;
441 if (!ferror (stream))
442 err = 0;
443 if (STREQ (file, "-"))
444 clearerr (stream); /* Also clear EOF. */
445 else if (fclose (stream) == EOF)
446 err = errno;
447 if (err)
449 error (0, err, "%s", quotef (file));
450 return false;
452 return true;
456 main (int argc, char **argv)
458 int optc;
459 bool ok;
460 bool delim_specified = false;
461 bool byte_mode = false;
462 char *spec_list_string = NULL;
464 initialize_main (&argc, &argv);
465 set_program_name (argv[0]);
466 setlocale (LC_ALL, "");
467 bindtextdomain (PACKAGE, LOCALEDIR);
468 textdomain (PACKAGE);
470 atexit (close_stdout);
472 /* By default, all non-delimited lines are printed. */
473 suppress_non_delimited = false;
475 delim = '\0';
476 have_read_stdin = false;
478 while ((optc = getopt_long (argc, argv, "b:c:d:f:nsz", longopts, NULL)) != -1)
480 switch (optc)
482 case 'b':
483 case 'c':
484 /* Build the byte list. */
485 byte_mode = true;
486 FALLTHROUGH;
487 case 'f':
488 /* Build the field list. */
489 if (spec_list_string)
490 FATAL_ERROR (_("only one list may be specified"));
491 spec_list_string = optarg;
492 break;
494 case 'd':
495 /* New delimiter. */
496 /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */
497 if (optarg[0] != '\0' && optarg[1] != '\0')
498 FATAL_ERROR (_("the delimiter must be a single character"));
499 delim = optarg[0];
500 delim_specified = true;
501 break;
503 case OUTPUT_DELIMITER_OPTION:
504 /* Interpret --output-delimiter='' to mean
505 'use the NUL byte as the delimiter.' */
506 output_delimiter_length = (optarg[0] == '\0'
507 ? 1 : strlen (optarg));
508 output_delimiter_string = optarg;
509 break;
511 case 'n':
512 break;
514 case 's':
515 suppress_non_delimited = true;
516 break;
518 case 'z':
519 line_delim = '\0';
520 break;
522 case COMPLEMENT_OPTION:
523 complement = true;
524 break;
526 case_GETOPT_HELP_CHAR;
528 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
530 default:
531 usage (EXIT_FAILURE);
535 if (!spec_list_string)
536 FATAL_ERROR (_("you must specify a list of bytes, characters, or fields"));
538 if (byte_mode)
540 if (delim_specified)
541 FATAL_ERROR (_("an input delimiter may be specified only\
542 when operating on fields"));
544 if (suppress_non_delimited)
545 FATAL_ERROR (_("suppressing non-delimited lines makes sense\n\
546 \tonly when operating on fields"));
549 set_fields (spec_list_string,
550 ((byte_mode ? SETFLD_ERRMSG_USE_POS : 0)
551 | (complement ? SETFLD_COMPLEMENT : 0)));
553 if (!delim_specified)
554 delim = '\t';
556 if (output_delimiter_string == NULL)
558 output_delimiter_default[0] = delim;
559 output_delimiter_string = output_delimiter_default;
560 output_delimiter_length = 1;
563 void (*cut_stream) (FILE *) = byte_mode ? cut_bytes : cut_fields;
564 if (optind == argc)
565 ok = cut_file ("-", cut_stream);
566 else
567 for (ok = true; optind < argc; optind++)
568 ok &= cut_file (argv[optind], cut_stream);
571 if (have_read_stdin && fclose (stdin) == EOF)
573 error (0, errno, "-");
574 ok = false;
577 return ok ? EXIT_SUCCESS : EXIT_FAILURE;