1 /* cut - remove parts of lines of files
2 Copyright (C) 1997-2023 Free Software Foundation, Inc.
3 Copyright (C) 1984 David M. Ihnat
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18 /* Written by David Ihnat. */
20 /* POSIX changes, bug fixes, long-named options, and cleanup
21 by David MacKenzie <djm@gnu.ai.mit.edu>.
23 Rewrite cut_fields and cut_bytes -- Jim Meyering. */
30 #include <sys/types.h>
35 #include "getndelim2.h"
37 #include "set-fields.h"
39 /* The official name of this program (e.g., no 'g' prefix). */
40 #define PROGRAM_NAME "cut"
43 proper_name ("David M. Ihnat"), \
44 proper_name ("David MacKenzie"), \
45 proper_name ("Jim Meyering")
47 #define FATAL_ERROR(Message) \
50 error (0, 0, (Message)); \
51 usage (EXIT_FAILURE); \
56 /* Pointer inside RP. When checking if a byte or field is selected
57 by a finite range, we check if it is between CURRENT_RP.LO
58 and CURRENT_RP.HI. If the byte or field index is greater than
59 CURRENT_RP.HI then we make CURRENT_RP to point to the next range pair. */
60 static struct field_range_pair
*current_rp
;
62 /* This buffer is used to support the semantics of the -s option
63 (or lack of same) when the specified field list includes (does
64 not include) the first field. In both of those cases, the entire
65 first field must be read into this buffer to determine whether it
66 is followed by a delimiter or a newline before any of it may be
67 output. Otherwise, cut_fields can do the job without using this
69 static char *field_1_buffer
;
71 /* The number of bytes allocated for FIELD_1_BUFFER. */
72 static size_t field_1_bufsize
;
74 /* If true do not output lines containing no delimiter characters.
75 Otherwise, all such lines are printed. This option is valid only
77 static bool suppress_non_delimited
;
79 /* If true, print all bytes, characters, or fields _except_
80 those that were specified. */
81 static bool complement
;
83 /* The delimiter character for field mode. */
84 static unsigned char delim
;
86 /* The delimiter for each line/record. */
87 static unsigned char line_delim
= '\n';
89 /* The length of output_delimiter_string. */
90 static size_t output_delimiter_length
;
92 /* The output field separator string. Defaults to the 1-character
93 string consisting of the input delimiter. */
94 static char *output_delimiter_string
;
96 /* The output delimiter string contents, if the default. */
97 static char output_delimiter_default
[1];
99 /* True if we have ever read standard input. */
100 static bool have_read_stdin
;
102 /* For long options that have no equivalent short option, use a
103 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
106 OUTPUT_DELIMITER_OPTION
= CHAR_MAX
+ 1,
110 static struct option
const longopts
[] =
112 {"bytes", required_argument
, NULL
, 'b'},
113 {"characters", required_argument
, NULL
, 'c'},
114 {"fields", required_argument
, NULL
, 'f'},
115 {"delimiter", required_argument
, NULL
, 'd'},
116 {"only-delimited", no_argument
, NULL
, 's'},
117 {"output-delimiter", required_argument
, NULL
, OUTPUT_DELIMITER_OPTION
},
118 {"complement", no_argument
, NULL
, COMPLEMENT_OPTION
},
119 {"zero-terminated", no_argument
, NULL
, 'z'},
120 {GETOPT_HELP_OPTION_DECL
},
121 {GETOPT_VERSION_OPTION_DECL
},
128 if (status
!= EXIT_SUCCESS
)
133 Usage: %s OPTION... [FILE]...\n\
137 Print selected parts of lines from each FILE to standard output.\n\
141 emit_mandatory_arg_note ();
144 -b, --bytes=LIST select only these bytes\n\
145 -c, --characters=LIST select only these characters\n\
146 -d, --delimiter=DELIM use DELIM instead of TAB for field delimiter\n\
149 -f, --fields=LIST select only these fields; also print any line\n\
150 that contains no delimiter character, unless\n\
151 the -s option is specified\n\
155 --complement complement the set of selected bytes, characters\n\
159 -s, --only-delimited do not print lines not containing delimiters\n\
160 --output-delimiter=STRING use STRING as the output delimiter\n\
161 the default is to use the input delimiter\n\
164 -z, --zero-terminated line delimiter is NUL, not newline\n\
166 fputs (HELP_OPTION_DESCRIPTION
, stdout
);
167 fputs (VERSION_OPTION_DESCRIPTION
, stdout
);
170 Use one, and only one of -b, -c or -f. Each LIST is made up of one\n\
171 range, or many ranges separated by commas. Selected input is written\n\
172 in the same order that it is read, and is written exactly once.\n\
175 Each range is one of:\n\
177 N N'th byte, character or field, counted from 1\n\
178 N- from N'th byte, character or field, to end of line\n\
179 N-M from N'th to M'th (included) byte, character or field\n\
180 -M from first to M'th (included) byte, character or field\n\
182 emit_ancillary_info (PROGRAM_NAME
);
188 /* Increment *ITEM_IDX (i.e., a field or byte index),
189 and if required CURRENT_RP. */
192 next_item (uintmax_t *item_idx
)
195 if ((*item_idx
) > current_rp
->hi
)
199 /* Return nonzero if the K'th field or byte is printable. */
202 print_kth (uintmax_t k
)
204 return current_rp
->lo
<= k
;
207 /* Return nonzero if K'th byte is the beginning of a range. */
210 is_range_start_index (uintmax_t k
)
212 return k
== current_rp
->lo
;
215 /* Read from stream STREAM, printing to standard output any selected bytes. */
218 cut_bytes (FILE *stream
)
220 uintmax_t byte_idx
; /* Number of bytes in the line so far. */
221 /* Whether to begin printing delimiters between ranges for the current line.
222 Set after we've begun printing data corresponding to the first range. */
223 bool print_delimiter
;
226 print_delimiter
= false;
230 int c
; /* Each character from the file. */
238 print_delimiter
= false;
244 putchar (line_delim
);
249 next_item (&byte_idx
);
250 if (print_kth (byte_idx
))
252 if (output_delimiter_string
!= output_delimiter_default
)
254 if (print_delimiter
&& is_range_start_index (byte_idx
))
256 fwrite (output_delimiter_string
, sizeof (char),
257 output_delimiter_length
, stdout
);
259 print_delimiter
= true;
268 /* Read from stream STREAM, printing to standard output any selected fields. */
271 cut_fields (FILE *stream
)
274 uintmax_t field_idx
= 1;
275 bool found_any_selected_field
= false;
276 bool buffer_first_field
;
287 /* To support the semantics of the -s flag, we may have to buffer
288 all of the first field to determine whether it is 'delimited.'
289 But that is unnecessary if all non-delimited lines must be printed
290 and the first field has been selected, or if non-delimited lines
291 must be suppressed and the first field has *not* been selected.
292 That is because a non-delimited line has exactly one field. */
293 buffer_first_field
= (suppress_non_delimited
^ !print_kth (1));
297 if (field_idx
== 1 && buffer_first_field
)
302 len
= getndelim2 (&field_1_buffer
, &field_1_bufsize
, 0,
303 GETNLINE_NO_LIMIT
, delim
, line_delim
, stream
);
306 free (field_1_buffer
);
307 field_1_buffer
= NULL
;
308 if (ferror (stream
) || feof (stream
))
314 assert (n_bytes
!= 0);
318 /* If the first field extends to the end of line (it is not
319 delimited) and we are printing all non-delimited lines,
321 if (to_uchar (field_1_buffer
[n_bytes
- 1]) != delim
)
323 if (suppress_non_delimited
)
329 fwrite (field_1_buffer
, sizeof (char), n_bytes
, stdout
);
330 /* Make sure the output line is newline terminated. */
331 if (field_1_buffer
[n_bytes
- 1] != line_delim
)
332 putchar (line_delim
);
339 /* Print the field, but not the trailing delimiter. */
340 fwrite (field_1_buffer
, sizeof (char), n_bytes
- 1, stdout
);
342 /* With -d$'\n' don't treat the last '\n' as a delimiter. */
343 if (delim
== line_delim
)
345 int last_c
= getc (stream
);
348 ungetc (last_c
, stream
);
349 found_any_selected_field
= true;
353 found_any_selected_field
= true;
355 next_item (&field_idx
);
360 if (print_kth (field_idx
))
362 if (found_any_selected_field
)
364 fwrite (output_delimiter_string
, sizeof (char),
365 output_delimiter_length
, stdout
);
367 found_any_selected_field
= true;
369 while ((c
= getc (stream
)) != delim
&& c
!= line_delim
&& c
!= EOF
)
377 while ((c
= getc (stream
)) != delim
&& c
!= line_delim
&& c
!= EOF
)
383 /* With -d$'\n' don't treat the last '\n' as a delimiter. */
384 if (delim
== line_delim
&& c
== delim
)
386 int last_c
= getc (stream
);
388 ungetc (last_c
, stream
);
394 next_item (&field_idx
);
395 else if (c
== line_delim
|| c
== EOF
)
397 if (found_any_selected_field
398 || !(suppress_non_delimited
&& field_idx
== 1))
400 if (c
== line_delim
|| prev_c
!= line_delim
401 || delim
== line_delim
)
402 putchar (line_delim
);
408 found_any_selected_field
= false;
413 /* Process file FILE to standard output, using CUT_STREAM.
414 Return true if successful. */
417 cut_file (char const *file
, void (*cut_stream
) (FILE *))
421 if (STREQ (file
, "-"))
423 have_read_stdin
= true;
428 stream
= fopen (file
, "r");
431 error (0, errno
, "%s", quotef (file
));
436 fadvise (stream
, FADVISE_SEQUENTIAL
);
441 if (!ferror (stream
))
443 if (STREQ (file
, "-"))
444 clearerr (stream
); /* Also clear EOF. */
445 else if (fclose (stream
) == EOF
)
449 error (0, err
, "%s", quotef (file
));
456 main (int argc
, char **argv
)
460 bool delim_specified
= false;
461 bool byte_mode
= false;
462 char *spec_list_string
= NULL
;
464 initialize_main (&argc
, &argv
);
465 set_program_name (argv
[0]);
466 setlocale (LC_ALL
, "");
467 bindtextdomain (PACKAGE
, LOCALEDIR
);
468 textdomain (PACKAGE
);
470 atexit (close_stdout
);
472 /* By default, all non-delimited lines are printed. */
473 suppress_non_delimited
= false;
476 have_read_stdin
= false;
478 while ((optc
= getopt_long (argc
, argv
, "b:c:d:f:nsz", longopts
, NULL
)) != -1)
484 /* Build the byte list. */
488 /* Build the field list. */
489 if (spec_list_string
)
490 FATAL_ERROR (_("only one list may be specified"));
491 spec_list_string
= optarg
;
496 /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */
497 if (optarg
[0] != '\0' && optarg
[1] != '\0')
498 FATAL_ERROR (_("the delimiter must be a single character"));
500 delim_specified
= true;
503 case OUTPUT_DELIMITER_OPTION
:
504 /* Interpret --output-delimiter='' to mean
505 'use the NUL byte as the delimiter.' */
506 output_delimiter_length
= (optarg
[0] == '\0'
507 ? 1 : strlen (optarg
));
508 output_delimiter_string
= optarg
;
515 suppress_non_delimited
= true;
522 case COMPLEMENT_OPTION
:
526 case_GETOPT_HELP_CHAR
;
528 case_GETOPT_VERSION_CHAR (PROGRAM_NAME
, AUTHORS
);
531 usage (EXIT_FAILURE
);
535 if (!spec_list_string
)
536 FATAL_ERROR (_("you must specify a list of bytes, characters, or fields"));
541 FATAL_ERROR (_("an input delimiter may be specified only\
542 when operating on fields"));
544 if (suppress_non_delimited
)
545 FATAL_ERROR (_("suppressing non-delimited lines makes sense\n\
546 \tonly when operating on fields"));
549 set_fields (spec_list_string
,
550 ((byte_mode
? SETFLD_ERRMSG_USE_POS
: 0)
551 | (complement
? SETFLD_COMPLEMENT
: 0)));
553 if (!delim_specified
)
556 if (output_delimiter_string
== NULL
)
558 output_delimiter_default
[0] = delim
;
559 output_delimiter_string
= output_delimiter_default
;
560 output_delimiter_length
= 1;
563 void (*cut_stream
) (FILE *) = byte_mode
? cut_bytes
: cut_fields
;
565 ok
= cut_file ("-", cut_stream
);
567 for (ok
= true; optind
< argc
; optind
++)
568 ok
&= cut_file (argv
[optind
], cut_stream
);
571 if (have_read_stdin
&& fclose (stdin
) == EOF
)
573 error (0, errno
, "-");
577 return ok
? EXIT_SUCCESS
: EXIT_FAILURE
;