numfmt: prefer signed types
[coreutils.git] / src / cut.c
blob476df0943b203e3c19098f9c51dec76d58cd1c27
1 /* cut - remove parts of lines of files
2 Copyright (C) 1997-2023 Free Software Foundation, Inc.
3 Copyright (C) 1984 David M. Ihnat
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18 /* Written by David Ihnat. */
20 /* POSIX changes, bug fixes, long-named options, and cleanup
21 by David MacKenzie <djm@gnu.ai.mit.edu>.
23 Rewrite cut_fields and cut_bytes -- Jim Meyering. */
25 #include <config.h>
27 #include <stdio.h>
28 #include <getopt.h>
29 #include <sys/types.h>
30 #include "system.h"
32 #include "assure.h"
33 #include "fadvise.h"
34 #include "getndelim2.h"
36 #include "set-fields.h"
38 /* The official name of this program (e.g., no 'g' prefix). */
39 #define PROGRAM_NAME "cut"
41 #define AUTHORS \
42 proper_name ("David M. Ihnat"), \
43 proper_name ("David MacKenzie"), \
44 proper_name ("Jim Meyering")
46 #define FATAL_ERROR(Message) \
47 do \
48 { \
49 error (0, 0, (Message)); \
50 usage (EXIT_FAILURE); \
51 } \
52 while (0)
55 /* Pointer inside RP. When checking if a byte or field is selected
56 by a finite range, we check if it is between CURRENT_RP.LO
57 and CURRENT_RP.HI. If the byte or field index is greater than
58 CURRENT_RP.HI then we make CURRENT_RP to point to the next range pair. */
59 static struct field_range_pair *current_rp;
61 /* This buffer is used to support the semantics of the -s option
62 (or lack of same) when the specified field list includes (does
63 not include) the first field. In both of those cases, the entire
64 first field must be read into this buffer to determine whether it
65 is followed by a delimiter or a newline before any of it may be
66 output. Otherwise, cut_fields can do the job without using this
67 buffer. */
68 static char *field_1_buffer;
70 /* The number of bytes allocated for FIELD_1_BUFFER. */
71 static size_t field_1_bufsize;
73 /* If true do not output lines containing no delimiter characters.
74 Otherwise, all such lines are printed. This option is valid only
75 with field mode. */
76 static bool suppress_non_delimited;
78 /* If true, print all bytes, characters, or fields _except_
79 those that were specified. */
80 static bool complement;
82 /* The delimiter character for field mode. */
83 static unsigned char delim;
85 /* The delimiter for each line/record. */
86 static unsigned char line_delim = '\n';
88 /* The length of output_delimiter_string. */
89 static size_t output_delimiter_length;
91 /* The output field separator string. Defaults to the 1-character
92 string consisting of the input delimiter. */
93 static char *output_delimiter_string;
95 /* The output delimiter string contents, if the default. */
96 static char output_delimiter_default[1];
98 /* True if we have ever read standard input. */
99 static bool have_read_stdin;
101 /* For long options that have no equivalent short option, use a
102 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
103 enum
105 OUTPUT_DELIMITER_OPTION = CHAR_MAX + 1,
106 COMPLEMENT_OPTION
109 static struct option const longopts[] =
111 {"bytes", required_argument, nullptr, 'b'},
112 {"characters", required_argument, nullptr, 'c'},
113 {"fields", required_argument, nullptr, 'f'},
114 {"delimiter", required_argument, nullptr, 'd'},
115 {"only-delimited", no_argument, nullptr, 's'},
116 {"output-delimiter", required_argument, nullptr, OUTPUT_DELIMITER_OPTION},
117 {"complement", no_argument, nullptr, COMPLEMENT_OPTION},
118 {"zero-terminated", no_argument, nullptr, 'z'},
119 {GETOPT_HELP_OPTION_DECL},
120 {GETOPT_VERSION_OPTION_DECL},
121 {nullptr, 0, nullptr, 0}
124 void
125 usage (int status)
127 if (status != EXIT_SUCCESS)
128 emit_try_help ();
129 else
131 printf (_("\
132 Usage: %s OPTION... [FILE]...\n\
134 program_name);
135 fputs (_("\
136 Print selected parts of lines from each FILE to standard output.\n\
137 "), stdout);
139 emit_stdin_note ();
140 emit_mandatory_arg_note ();
142 fputs (_("\
143 -b, --bytes=LIST select only these bytes\n\
144 -c, --characters=LIST select only these characters\n\
145 -d, --delimiter=DELIM use DELIM instead of TAB for field delimiter\n\
146 "), stdout);
147 fputs (_("\
148 -f, --fields=LIST select only these fields; also print any line\n\
149 that contains no delimiter character, unless\n\
150 the -s option is specified\n\
151 -n (ignored)\n\
152 "), stdout);
153 fputs (_("\
154 --complement complement the set of selected bytes, characters\n\
155 or fields\n\
156 "), stdout);
157 fputs (_("\
158 -s, --only-delimited do not print lines not containing delimiters\n\
159 --output-delimiter=STRING use STRING as the output delimiter\n\
160 the default is to use the input delimiter\n\
161 "), stdout);
162 fputs (_("\
163 -z, --zero-terminated line delimiter is NUL, not newline\n\
164 "), stdout);
165 fputs (HELP_OPTION_DESCRIPTION, stdout);
166 fputs (VERSION_OPTION_DESCRIPTION, stdout);
167 fputs (_("\
169 Use one, and only one of -b, -c or -f. Each LIST is made up of one\n\
170 range, or many ranges separated by commas. Selected input is written\n\
171 in the same order that it is read, and is written exactly once.\n\
172 "), stdout);
173 fputs (_("\
174 Each range is one of:\n\
176 N N'th byte, character or field, counted from 1\n\
177 N- from N'th byte, character or field, to end of line\n\
178 N-M from N'th to M'th (included) byte, character or field\n\
179 -M from first to M'th (included) byte, character or field\n\
180 "), stdout);
181 emit_ancillary_info (PROGRAM_NAME);
183 exit (status);
187 /* Increment *ITEM_IDX (i.e., a field or byte index),
188 and if required CURRENT_RP. */
190 static inline void
191 next_item (uintmax_t *item_idx)
193 (*item_idx)++;
194 if ((*item_idx) > current_rp->hi)
195 current_rp++;
198 /* Return nonzero if the K'th field or byte is printable. */
200 static inline bool
201 print_kth (uintmax_t k)
203 return current_rp->lo <= k;
206 /* Return nonzero if K'th byte is the beginning of a range. */
208 static inline bool
209 is_range_start_index (uintmax_t k)
211 return k == current_rp->lo;
214 /* Read from stream STREAM, printing to standard output any selected bytes. */
216 static void
217 cut_bytes (FILE *stream)
219 uintmax_t byte_idx; /* Number of bytes in the line so far. */
220 /* Whether to begin printing delimiters between ranges for the current line.
221 Set after we've begun printing data corresponding to the first range. */
222 bool print_delimiter;
224 byte_idx = 0;
225 print_delimiter = false;
226 current_rp = frp;
227 while (true)
229 int c; /* Each character from the file. */
231 c = getc (stream);
233 if (c == line_delim)
235 if (putchar (c) < 0)
236 write_error ();
237 byte_idx = 0;
238 print_delimiter = false;
239 current_rp = frp;
241 else if (c == EOF)
243 if (byte_idx > 0)
244 putchar (line_delim);
245 break;
247 else
249 next_item (&byte_idx);
250 if (print_kth (byte_idx))
252 if (output_delimiter_string != output_delimiter_default)
254 if (print_delimiter && is_range_start_index (byte_idx))
256 if (fwrite (output_delimiter_string, sizeof (char),
257 output_delimiter_length, stdout)
258 != output_delimiter_length)
259 write_error ();
261 print_delimiter = true;
264 if (putchar (c) < 0)
265 write_error ();
271 /* Read from stream STREAM, printing to standard output any selected fields. */
273 static void
274 cut_fields (FILE *stream)
276 int c;
277 uintmax_t field_idx = 1;
278 bool found_any_selected_field = false;
279 bool buffer_first_field;
281 current_rp = frp;
283 c = getc (stream);
284 if (c == EOF)
285 return;
287 ungetc (c, stream);
288 c = 0;
290 /* To support the semantics of the -s flag, we may have to buffer
291 all of the first field to determine whether it is 'delimited.'
292 But that is unnecessary if all non-delimited lines must be printed
293 and the first field has been selected, or if non-delimited lines
294 must be suppressed and the first field has *not* been selected.
295 That is because a non-delimited line has exactly one field. */
296 buffer_first_field = (suppress_non_delimited ^ !print_kth (1));
298 while (true)
300 if (field_idx == 1 && buffer_first_field)
302 ssize_t len;
303 size_t n_bytes;
305 len = getndelim2 (&field_1_buffer, &field_1_bufsize, 0,
306 GETNLINE_NO_LIMIT, delim, line_delim, stream);
307 if (len < 0)
309 free (field_1_buffer);
310 field_1_buffer = nullptr;
311 if (ferror (stream) || feof (stream))
312 break;
313 xalloc_die ();
316 n_bytes = len;
317 affirm (n_bytes != 0);
319 c = 0;
321 /* If the first field extends to the end of line (it is not
322 delimited) and we are printing all non-delimited lines,
323 print this one. */
324 if (to_uchar (field_1_buffer[n_bytes - 1]) != delim)
326 if (suppress_non_delimited)
328 /* Empty. */
330 else
332 if (fwrite (field_1_buffer, sizeof (char), n_bytes, stdout)
333 != n_bytes)
334 write_error ();
335 /* Make sure the output line is newline terminated. */
336 if (field_1_buffer[n_bytes - 1] != line_delim)
337 putchar (line_delim);
338 c = line_delim;
340 continue;
342 if (print_kth (1))
344 /* Print the field, but not the trailing delimiter. */
345 if (fwrite (field_1_buffer, sizeof (char), n_bytes - 1, stdout)
346 != n_bytes - 1)
347 write_error ();
349 /* With -d$'\n' don't treat the last '\n' as a delimiter. */
350 if (delim == line_delim)
352 int last_c = getc (stream);
353 if (last_c != EOF)
355 ungetc (last_c, stream);
356 found_any_selected_field = true;
359 else
360 found_any_selected_field = true;
362 next_item (&field_idx);
365 int prev_c = c;
367 if (print_kth (field_idx))
369 if (found_any_selected_field)
371 if (fwrite (output_delimiter_string, sizeof (char),
372 output_delimiter_length, stdout)
373 != output_delimiter_length)
374 write_error ();
376 found_any_selected_field = true;
378 while ((c = getc (stream)) != delim && c != line_delim && c != EOF)
380 if (putchar (c) < 0)
381 write_error ();
382 prev_c = c;
385 else
387 while ((c = getc (stream)) != delim && c != line_delim && c != EOF)
389 prev_c = c;
393 /* With -d$'\n' don't treat the last '\n' as a delimiter. */
394 if (delim == line_delim && c == delim)
396 int last_c = getc (stream);
397 if (last_c != EOF)
398 ungetc (last_c, stream);
399 else
400 c = last_c;
403 if (c == delim)
404 next_item (&field_idx);
405 else if (c == line_delim || c == EOF)
407 if (found_any_selected_field
408 || !(suppress_non_delimited && field_idx == 1))
410 if (c == line_delim || prev_c != line_delim
411 || delim == line_delim)
412 if (putchar (line_delim) < 0)
413 write_error ();
415 if (c == EOF)
416 break;
417 field_idx = 1;
418 current_rp = frp;
419 found_any_selected_field = false;
424 /* Process file FILE to standard output, using CUT_STREAM.
425 Return true if successful. */
427 static bool
428 cut_file (char const *file, void (*cut_stream) (FILE *))
430 FILE *stream;
432 if (STREQ (file, "-"))
434 have_read_stdin = true;
435 stream = stdin;
436 assume (stream); /* Pacify GCC bug#109613. */
438 else
440 stream = fopen (file, "r");
441 if (stream == nullptr)
443 error (0, errno, "%s", quotef (file));
444 return false;
448 fadvise (stream, FADVISE_SEQUENTIAL);
450 cut_stream (stream);
452 int err = errno;
453 if (!ferror (stream))
454 err = 0;
455 if (STREQ (file, "-"))
456 clearerr (stream); /* Also clear EOF. */
457 else if (fclose (stream) == EOF)
458 err = errno;
459 if (err)
461 error (0, err, "%s", quotef (file));
462 return false;
464 return true;
468 main (int argc, char **argv)
470 int optc;
471 bool ok;
472 bool delim_specified = false;
473 bool byte_mode = false;
474 char *spec_list_string = nullptr;
476 initialize_main (&argc, &argv);
477 set_program_name (argv[0]);
478 setlocale (LC_ALL, "");
479 bindtextdomain (PACKAGE, LOCALEDIR);
480 textdomain (PACKAGE);
482 atexit (close_stdout);
484 /* By default, all non-delimited lines are printed. */
485 suppress_non_delimited = false;
487 delim = '\0';
488 have_read_stdin = false;
490 while ((optc = getopt_long (argc, argv, "b:c:d:f:nsz", longopts, nullptr))
491 != -1)
493 switch (optc)
495 case 'b':
496 case 'c':
497 /* Build the byte list. */
498 byte_mode = true;
499 FALLTHROUGH;
500 case 'f':
501 /* Build the field list. */
502 if (spec_list_string)
503 FATAL_ERROR (_("only one list may be specified"));
504 spec_list_string = optarg;
505 break;
507 case 'd':
508 /* New delimiter. */
509 /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */
510 if (optarg[0] != '\0' && optarg[1] != '\0')
511 FATAL_ERROR (_("the delimiter must be a single character"));
512 delim = optarg[0];
513 delim_specified = true;
514 break;
516 case OUTPUT_DELIMITER_OPTION:
517 /* Interpret --output-delimiter='' to mean
518 'use the NUL byte as the delimiter.' */
519 output_delimiter_length = (optarg[0] == '\0'
520 ? 1 : strlen (optarg));
521 output_delimiter_string = optarg;
522 break;
524 case 'n':
525 break;
527 case 's':
528 suppress_non_delimited = true;
529 break;
531 case 'z':
532 line_delim = '\0';
533 break;
535 case COMPLEMENT_OPTION:
536 complement = true;
537 break;
539 case_GETOPT_HELP_CHAR;
541 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
543 default:
544 usage (EXIT_FAILURE);
548 if (!spec_list_string)
549 FATAL_ERROR (_("you must specify a list of bytes, characters, or fields"));
551 if (byte_mode)
553 if (delim_specified)
554 FATAL_ERROR (_("an input delimiter may be specified only\
555 when operating on fields"));
557 if (suppress_non_delimited)
558 FATAL_ERROR (_("suppressing non-delimited lines makes sense\n\
559 \tonly when operating on fields"));
562 set_fields (spec_list_string,
563 ((byte_mode ? SETFLD_ERRMSG_USE_POS : 0)
564 | (complement ? SETFLD_COMPLEMENT : 0)));
566 if (!delim_specified)
567 delim = '\t';
569 if (output_delimiter_string == nullptr)
571 output_delimiter_default[0] = delim;
572 output_delimiter_string = output_delimiter_default;
573 output_delimiter_length = 1;
576 void (*cut_stream) (FILE *) = byte_mode ? cut_bytes : cut_fields;
577 if (optind == argc)
578 ok = cut_file ("-", cut_stream);
579 else
580 for (ok = true; optind < argc; optind++)
581 ok &= cut_file (argv[optind], cut_stream);
584 if (have_read_stdin && fclose (stdin) == EOF)
586 error (0, errno, "-");
587 ok = false;
590 return ok ? EXIT_SUCCESS : EXIT_FAILURE;