1 /* cut - remove parts of lines of files
2 Copyright (C) 1997-2023 Free Software Foundation, Inc.
3 Copyright (C) 1984 David M. Ihnat
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18 /* Written by David Ihnat. */
20 /* POSIX changes, bug fixes, long-named options, and cleanup
21 by David MacKenzie <djm@gnu.ai.mit.edu>.
23 Rewrite cut_fields and cut_bytes -- Jim Meyering. */
29 #include <sys/types.h>
34 #include "getndelim2.h"
36 #include "set-fields.h"
38 /* The official name of this program (e.g., no 'g' prefix). */
39 #define PROGRAM_NAME "cut"
42 proper_name ("David M. Ihnat"), \
43 proper_name ("David MacKenzie"), \
44 proper_name ("Jim Meyering")
46 #define FATAL_ERROR(Message) \
49 error (0, 0, (Message)); \
50 usage (EXIT_FAILURE); \
55 /* Pointer inside RP. When checking if a byte or field is selected
56 by a finite range, we check if it is between CURRENT_RP.LO
57 and CURRENT_RP.HI. If the byte or field index is greater than
58 CURRENT_RP.HI then we make CURRENT_RP to point to the next range pair. */
59 static struct field_range_pair
*current_rp
;
61 /* This buffer is used to support the semantics of the -s option
62 (or lack of same) when the specified field list includes (does
63 not include) the first field. In both of those cases, the entire
64 first field must be read into this buffer to determine whether it
65 is followed by a delimiter or a newline before any of it may be
66 output. Otherwise, cut_fields can do the job without using this
68 static char *field_1_buffer
;
70 /* The number of bytes allocated for FIELD_1_BUFFER. */
71 static size_t field_1_bufsize
;
73 /* If true do not output lines containing no delimiter characters.
74 Otherwise, all such lines are printed. This option is valid only
76 static bool suppress_non_delimited
;
78 /* If true, print all bytes, characters, or fields _except_
79 those that were specified. */
80 static bool complement
;
82 /* The delimiter character for field mode. */
83 static unsigned char delim
;
85 /* The delimiter for each line/record. */
86 static unsigned char line_delim
= '\n';
88 /* The length of output_delimiter_string. */
89 static size_t output_delimiter_length
;
91 /* The output field separator string. Defaults to the 1-character
92 string consisting of the input delimiter. */
93 static char *output_delimiter_string
;
95 /* The output delimiter string contents, if the default. */
96 static char output_delimiter_default
[1];
98 /* True if we have ever read standard input. */
99 static bool have_read_stdin
;
101 /* For long options that have no equivalent short option, use a
102 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
105 OUTPUT_DELIMITER_OPTION
= CHAR_MAX
+ 1,
109 static struct option
const longopts
[] =
111 {"bytes", required_argument
, nullptr, 'b'},
112 {"characters", required_argument
, nullptr, 'c'},
113 {"fields", required_argument
, nullptr, 'f'},
114 {"delimiter", required_argument
, nullptr, 'd'},
115 {"only-delimited", no_argument
, nullptr, 's'},
116 {"output-delimiter", required_argument
, nullptr, OUTPUT_DELIMITER_OPTION
},
117 {"complement", no_argument
, nullptr, COMPLEMENT_OPTION
},
118 {"zero-terminated", no_argument
, nullptr, 'z'},
119 {GETOPT_HELP_OPTION_DECL
},
120 {GETOPT_VERSION_OPTION_DECL
},
121 {nullptr, 0, nullptr, 0}
127 if (status
!= EXIT_SUCCESS
)
132 Usage: %s OPTION... [FILE]...\n\
136 Print selected parts of lines from each FILE to standard output.\n\
140 emit_mandatory_arg_note ();
143 -b, --bytes=LIST select only these bytes\n\
144 -c, --characters=LIST select only these characters\n\
145 -d, --delimiter=DELIM use DELIM instead of TAB for field delimiter\n\
148 -f, --fields=LIST select only these fields; also print any line\n\
149 that contains no delimiter character, unless\n\
150 the -s option is specified\n\
154 --complement complement the set of selected bytes, characters\n\
158 -s, --only-delimited do not print lines not containing delimiters\n\
159 --output-delimiter=STRING use STRING as the output delimiter\n\
160 the default is to use the input delimiter\n\
163 -z, --zero-terminated line delimiter is NUL, not newline\n\
165 fputs (HELP_OPTION_DESCRIPTION
, stdout
);
166 fputs (VERSION_OPTION_DESCRIPTION
, stdout
);
169 Use one, and only one of -b, -c or -f. Each LIST is made up of one\n\
170 range, or many ranges separated by commas. Selected input is written\n\
171 in the same order that it is read, and is written exactly once.\n\
174 Each range is one of:\n\
176 N N'th byte, character or field, counted from 1\n\
177 N- from N'th byte, character or field, to end of line\n\
178 N-M from N'th to M'th (included) byte, character or field\n\
179 -M from first to M'th (included) byte, character or field\n\
181 emit_ancillary_info (PROGRAM_NAME
);
187 /* Increment *ITEM_IDX (i.e., a field or byte index),
188 and if required CURRENT_RP. */
191 next_item (uintmax_t *item_idx
)
194 if ((*item_idx
) > current_rp
->hi
)
198 /* Return nonzero if the K'th field or byte is printable. */
201 print_kth (uintmax_t k
)
203 return current_rp
->lo
<= k
;
206 /* Return nonzero if K'th byte is the beginning of a range. */
209 is_range_start_index (uintmax_t k
)
211 return k
== current_rp
->lo
;
214 /* Read from stream STREAM, printing to standard output any selected bytes. */
217 cut_bytes (FILE *stream
)
219 uintmax_t byte_idx
; /* Number of bytes in the line so far. */
220 /* Whether to begin printing delimiters between ranges for the current line.
221 Set after we've begun printing data corresponding to the first range. */
222 bool print_delimiter
;
225 print_delimiter
= false;
229 int c
; /* Each character from the file. */
238 print_delimiter
= false;
244 putchar (line_delim
);
249 next_item (&byte_idx
);
250 if (print_kth (byte_idx
))
252 if (output_delimiter_string
!= output_delimiter_default
)
254 if (print_delimiter
&& is_range_start_index (byte_idx
))
256 if (fwrite (output_delimiter_string
, sizeof (char),
257 output_delimiter_length
, stdout
)
258 != output_delimiter_length
)
261 print_delimiter
= true;
271 /* Read from stream STREAM, printing to standard output any selected fields. */
274 cut_fields (FILE *stream
)
277 uintmax_t field_idx
= 1;
278 bool found_any_selected_field
= false;
279 bool buffer_first_field
;
290 /* To support the semantics of the -s flag, we may have to buffer
291 all of the first field to determine whether it is 'delimited.'
292 But that is unnecessary if all non-delimited lines must be printed
293 and the first field has been selected, or if non-delimited lines
294 must be suppressed and the first field has *not* been selected.
295 That is because a non-delimited line has exactly one field. */
296 buffer_first_field
= (suppress_non_delimited
^ !print_kth (1));
300 if (field_idx
== 1 && buffer_first_field
)
305 len
= getndelim2 (&field_1_buffer
, &field_1_bufsize
, 0,
306 GETNLINE_NO_LIMIT
, delim
, line_delim
, stream
);
309 free (field_1_buffer
);
310 field_1_buffer
= nullptr;
311 if (ferror (stream
) || feof (stream
))
317 affirm (n_bytes
!= 0);
321 /* If the first field extends to the end of line (it is not
322 delimited) and we are printing all non-delimited lines,
324 if (to_uchar (field_1_buffer
[n_bytes
- 1]) != delim
)
326 if (suppress_non_delimited
)
332 if (fwrite (field_1_buffer
, sizeof (char), n_bytes
, stdout
)
335 /* Make sure the output line is newline terminated. */
336 if (field_1_buffer
[n_bytes
- 1] != line_delim
)
337 putchar (line_delim
);
344 /* Print the field, but not the trailing delimiter. */
345 if (fwrite (field_1_buffer
, sizeof (char), n_bytes
- 1, stdout
)
349 /* With -d$'\n' don't treat the last '\n' as a delimiter. */
350 if (delim
== line_delim
)
352 int last_c
= getc (stream
);
355 ungetc (last_c
, stream
);
356 found_any_selected_field
= true;
360 found_any_selected_field
= true;
362 next_item (&field_idx
);
367 if (print_kth (field_idx
))
369 if (found_any_selected_field
)
371 if (fwrite (output_delimiter_string
, sizeof (char),
372 output_delimiter_length
, stdout
)
373 != output_delimiter_length
)
376 found_any_selected_field
= true;
378 while ((c
= getc (stream
)) != delim
&& c
!= line_delim
&& c
!= EOF
)
387 while ((c
= getc (stream
)) != delim
&& c
!= line_delim
&& c
!= EOF
)
393 /* With -d$'\n' don't treat the last '\n' as a delimiter. */
394 if (delim
== line_delim
&& c
== delim
)
396 int last_c
= getc (stream
);
398 ungetc (last_c
, stream
);
404 next_item (&field_idx
);
405 else if (c
== line_delim
|| c
== EOF
)
407 if (found_any_selected_field
408 || !(suppress_non_delimited
&& field_idx
== 1))
410 if (c
== line_delim
|| prev_c
!= line_delim
411 || delim
== line_delim
)
412 if (putchar (line_delim
) < 0)
419 found_any_selected_field
= false;
424 /* Process file FILE to standard output, using CUT_STREAM.
425 Return true if successful. */
428 cut_file (char const *file
, void (*cut_stream
) (FILE *))
432 if (STREQ (file
, "-"))
434 have_read_stdin
= true;
436 assume (stream
); /* Pacify GCC bug#109613. */
440 stream
= fopen (file
, "r");
441 if (stream
== nullptr)
443 error (0, errno
, "%s", quotef (file
));
448 fadvise (stream
, FADVISE_SEQUENTIAL
);
453 if (!ferror (stream
))
455 if (STREQ (file
, "-"))
456 clearerr (stream
); /* Also clear EOF. */
457 else if (fclose (stream
) == EOF
)
461 error (0, err
, "%s", quotef (file
));
468 main (int argc
, char **argv
)
472 bool delim_specified
= false;
473 bool byte_mode
= false;
474 char *spec_list_string
= nullptr;
476 initialize_main (&argc
, &argv
);
477 set_program_name (argv
[0]);
478 setlocale (LC_ALL
, "");
479 bindtextdomain (PACKAGE
, LOCALEDIR
);
480 textdomain (PACKAGE
);
482 atexit (close_stdout
);
484 /* By default, all non-delimited lines are printed. */
485 suppress_non_delimited
= false;
488 have_read_stdin
= false;
490 while ((optc
= getopt_long (argc
, argv
, "b:c:d:f:nsz", longopts
, nullptr))
497 /* Build the byte list. */
501 /* Build the field list. */
502 if (spec_list_string
)
503 FATAL_ERROR (_("only one list may be specified"));
504 spec_list_string
= optarg
;
509 /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */
510 if (optarg
[0] != '\0' && optarg
[1] != '\0')
511 FATAL_ERROR (_("the delimiter must be a single character"));
513 delim_specified
= true;
516 case OUTPUT_DELIMITER_OPTION
:
517 /* Interpret --output-delimiter='' to mean
518 'use the NUL byte as the delimiter.' */
519 output_delimiter_length
= (optarg
[0] == '\0'
520 ? 1 : strlen (optarg
));
521 output_delimiter_string
= optarg
;
528 suppress_non_delimited
= true;
535 case COMPLEMENT_OPTION
:
539 case_GETOPT_HELP_CHAR
;
541 case_GETOPT_VERSION_CHAR (PROGRAM_NAME
, AUTHORS
);
544 usage (EXIT_FAILURE
);
548 if (!spec_list_string
)
549 FATAL_ERROR (_("you must specify a list of bytes, characters, or fields"));
554 FATAL_ERROR (_("an input delimiter may be specified only\
555 when operating on fields"));
557 if (suppress_non_delimited
)
558 FATAL_ERROR (_("suppressing non-delimited lines makes sense\n\
559 \tonly when operating on fields"));
562 set_fields (spec_list_string
,
563 ((byte_mode
? SETFLD_ERRMSG_USE_POS
: 0)
564 | (complement
? SETFLD_COMPLEMENT
: 0)));
566 if (!delim_specified
)
569 if (output_delimiter_string
== nullptr)
571 output_delimiter_default
[0] = delim
;
572 output_delimiter_string
= output_delimiter_default
;
573 output_delimiter_length
= 1;
576 void (*cut_stream
) (FILE *) = byte_mode
? cut_bytes
: cut_fields
;
578 ok
= cut_file ("-", cut_stream
);
580 for (ok
= true; optind
< argc
; optind
++)
581 ok
&= cut_file (argv
[optind
], cut_stream
);
584 if (have_read_stdin
&& fclose (stdin
) == EOF
)
586 error (0, errno
, "-");
590 return ok
? EXIT_SUCCESS
: EXIT_FAILURE
;