1 /* cut - remove parts of lines of files
2 Copyright (C) 1997-2022 Free Software Foundation, Inc.
3 Copyright (C) 1984 David M. Ihnat
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18 /* Written by David Ihnat. */
20 /* POSIX changes, bug fixes, long-named options, and cleanup
21 by David MacKenzie <djm@gnu.ai.mit.edu>.
23 Rewrite cut_fields and cut_bytes -- Jim Meyering. */
30 #include <sys/types.h>
35 #include "getndelim2.h"
38 #include "set-fields.h"
40 /* The official name of this program (e.g., no 'g' prefix). */
41 #define PROGRAM_NAME "cut"
44 proper_name ("David M. Ihnat"), \
45 proper_name ("David MacKenzie"), \
46 proper_name ("Jim Meyering")
48 #define FATAL_ERROR(Message) \
51 error (0, 0, (Message)); \
52 usage (EXIT_FAILURE); \
57 /* Pointer inside RP. When checking if a byte or field is selected
58 by a finite range, we check if it is between CURRENT_RP.LO
59 and CURRENT_RP.HI. If the byte or field index is greater than
60 CURRENT_RP.HI then we make CURRENT_RP to point to the next range pair. */
61 static struct field_range_pair
*current_rp
;
63 /* This buffer is used to support the semantics of the -s option
64 (or lack of same) when the specified field list includes (does
65 not include) the first field. In both of those cases, the entire
66 first field must be read into this buffer to determine whether it
67 is followed by a delimiter or a newline before any of it may be
68 output. Otherwise, cut_fields can do the job without using this
70 static char *field_1_buffer
;
72 /* The number of bytes allocated for FIELD_1_BUFFER. */
73 static size_t field_1_bufsize
;
79 /* Output characters that are in the given bytes. */
82 /* Output the given delimiter-separated fields. */
86 static enum operating_mode operating_mode
;
88 /* If true do not output lines containing no delimiter characters.
89 Otherwise, all such lines are printed. This option is valid only
91 static bool suppress_non_delimited
;
93 /* If true, print all bytes, characters, or fields _except_
94 those that were specified. */
95 static bool complement
;
97 /* The delimiter character for field mode. */
98 static unsigned char delim
;
100 /* The delimiter for each line/record. */
101 static unsigned char line_delim
= '\n';
103 /* True if the --output-delimiter=STRING option was specified. */
104 static bool output_delimiter_specified
;
106 /* The length of output_delimiter_string. */
107 static size_t output_delimiter_length
;
109 /* The output field separator string. Defaults to the 1-character
110 string consisting of the input delimiter. */
111 static char *output_delimiter_string
;
113 /* True if we have ever read standard input. */
114 static bool have_read_stdin
;
116 /* For long options that have no equivalent short option, use a
117 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
120 OUTPUT_DELIMITER_OPTION
= CHAR_MAX
+ 1,
124 static struct option
const longopts
[] =
126 {"bytes", required_argument
, NULL
, 'b'},
127 {"characters", required_argument
, NULL
, 'c'},
128 {"fields", required_argument
, NULL
, 'f'},
129 {"delimiter", required_argument
, NULL
, 'd'},
130 {"only-delimited", no_argument
, NULL
, 's'},
131 {"output-delimiter", required_argument
, NULL
, OUTPUT_DELIMITER_OPTION
},
132 {"complement", no_argument
, NULL
, COMPLEMENT_OPTION
},
133 {"zero-terminated", no_argument
, NULL
, 'z'},
134 {GETOPT_HELP_OPTION_DECL
},
135 {GETOPT_VERSION_OPTION_DECL
},
142 if (status
!= EXIT_SUCCESS
)
147 Usage: %s OPTION... [FILE]...\n\
151 Print selected parts of lines from each FILE to standard output.\n\
155 emit_mandatory_arg_note ();
158 -b, --bytes=LIST select only these bytes\n\
159 -c, --characters=LIST select only these characters\n\
160 -d, --delimiter=DELIM use DELIM instead of TAB for field delimiter\n\
163 -f, --fields=LIST select only these fields; also print any line\n\
164 that contains no delimiter character, unless\n\
165 the -s option is specified\n\
169 --complement complement the set of selected bytes, characters\n\
173 -s, --only-delimited do not print lines not containing delimiters\n\
174 --output-delimiter=STRING use STRING as the output delimiter\n\
175 the default is to use the input delimiter\n\
178 -z, --zero-terminated line delimiter is NUL, not newline\n\
180 fputs (HELP_OPTION_DESCRIPTION
, stdout
);
181 fputs (VERSION_OPTION_DESCRIPTION
, stdout
);
184 Use one, and only one of -b, -c or -f. Each LIST is made up of one\n\
185 range, or many ranges separated by commas. Selected input is written\n\
186 in the same order that it is read, and is written exactly once.\n\
189 Each range is one of:\n\
191 N N'th byte, character or field, counted from 1\n\
192 N- from N'th byte, character or field, to end of line\n\
193 N-M from N'th to M'th (included) byte, character or field\n\
194 -M from first to M'th (included) byte, character or field\n\
196 emit_ancillary_info (PROGRAM_NAME
);
202 /* Increment *ITEM_IDX (i.e., a field or byte index),
203 and if required CURRENT_RP. */
206 next_item (uintmax_t *item_idx
)
209 if ((*item_idx
) > current_rp
->hi
)
213 /* Return nonzero if the K'th field or byte is printable. */
216 print_kth (uintmax_t k
)
218 return current_rp
->lo
<= k
;
221 /* Return nonzero if K'th byte is the beginning of a range. */
224 is_range_start_index (uintmax_t k
)
226 return k
== current_rp
->lo
;
229 /* Read from stream STREAM, printing to standard output any selected bytes. */
232 cut_bytes (FILE *stream
)
234 uintmax_t byte_idx
; /* Number of bytes in the line so far. */
235 /* Whether to begin printing delimiters between ranges for the current line.
236 Set after we've begun printing data corresponding to the first range. */
237 bool print_delimiter
;
240 print_delimiter
= false;
244 int c
; /* Each character from the file. */
252 print_delimiter
= false;
258 putchar (line_delim
);
263 next_item (&byte_idx
);
264 if (print_kth (byte_idx
))
266 if (output_delimiter_specified
)
268 if (print_delimiter
&& is_range_start_index (byte_idx
))
270 fwrite (output_delimiter_string
, sizeof (char),
271 output_delimiter_length
, stdout
);
273 print_delimiter
= true;
282 /* Read from stream STREAM, printing to standard output any selected fields. */
285 cut_fields (FILE *stream
)
288 uintmax_t field_idx
= 1;
289 bool found_any_selected_field
= false;
290 bool buffer_first_field
;
301 /* To support the semantics of the -s flag, we may have to buffer
302 all of the first field to determine whether it is 'delimited.'
303 But that is unnecessary if all non-delimited lines must be printed
304 and the first field has been selected, or if non-delimited lines
305 must be suppressed and the first field has *not* been selected.
306 That is because a non-delimited line has exactly one field. */
307 buffer_first_field
= (suppress_non_delimited
^ !print_kth (1));
311 if (field_idx
== 1 && buffer_first_field
)
316 len
= getndelim2 (&field_1_buffer
, &field_1_bufsize
, 0,
317 GETNLINE_NO_LIMIT
, delim
, line_delim
, stream
);
320 free (field_1_buffer
);
321 field_1_buffer
= NULL
;
322 if (ferror (stream
) || feof (stream
))
328 assert (n_bytes
!= 0);
332 /* If the first field extends to the end of line (it is not
333 delimited) and we are printing all non-delimited lines,
335 if (to_uchar (field_1_buffer
[n_bytes
- 1]) != delim
)
337 if (suppress_non_delimited
)
343 fwrite (field_1_buffer
, sizeof (char), n_bytes
, stdout
);
344 /* Make sure the output line is newline terminated. */
345 if (field_1_buffer
[n_bytes
- 1] != line_delim
)
346 putchar (line_delim
);
353 /* Print the field, but not the trailing delimiter. */
354 fwrite (field_1_buffer
, sizeof (char), n_bytes
- 1, stdout
);
356 /* With -d$'\n' don't treat the last '\n' as a delimiter. */
357 if (delim
== line_delim
)
359 int last_c
= getc (stream
);
362 ungetc (last_c
, stream
);
363 found_any_selected_field
= true;
367 found_any_selected_field
= true;
369 next_item (&field_idx
);
374 if (print_kth (field_idx
))
376 if (found_any_selected_field
)
378 fwrite (output_delimiter_string
, sizeof (char),
379 output_delimiter_length
, stdout
);
381 found_any_selected_field
= true;
383 while ((c
= getc (stream
)) != delim
&& c
!= line_delim
&& c
!= EOF
)
391 while ((c
= getc (stream
)) != delim
&& c
!= line_delim
&& c
!= EOF
)
397 /* With -d$'\n' don't treat the last '\n' as a delimiter. */
398 if (delim
== line_delim
&& c
== delim
)
400 int last_c
= getc (stream
);
402 ungetc (last_c
, stream
);
408 next_item (&field_idx
);
409 else if (c
== line_delim
|| c
== EOF
)
411 if (found_any_selected_field
412 || !(suppress_non_delimited
&& field_idx
== 1))
414 if (c
== line_delim
|| prev_c
!= line_delim
415 || delim
== line_delim
)
416 putchar (line_delim
);
422 found_any_selected_field
= false;
428 cut_stream (FILE *stream
)
430 if (operating_mode
== byte_mode
)
436 /* Process file FILE to standard output.
437 Return true if successful. */
440 cut_file (char const *file
)
444 if (STREQ (file
, "-"))
446 have_read_stdin
= true;
451 stream
= fopen (file
, "r");
454 error (0, errno
, "%s", quotef (file
));
459 fadvise (stream
, FADVISE_SEQUENTIAL
);
464 if (!ferror (stream
))
466 if (STREQ (file
, "-"))
467 clearerr (stream
); /* Also clear EOF. */
468 else if (fclose (stream
) == EOF
)
472 error (0, err
, "%s", quotef (file
));
479 main (int argc
, char **argv
)
483 bool delim_specified
= false;
484 char *spec_list_string
IF_LINT ( = NULL
);
486 initialize_main (&argc
, &argv
);
487 set_program_name (argv
[0]);
488 setlocale (LC_ALL
, "");
489 bindtextdomain (PACKAGE
, LOCALEDIR
);
490 textdomain (PACKAGE
);
492 atexit (close_stdout
);
494 operating_mode
= undefined_mode
;
496 /* By default, all non-delimited lines are printed. */
497 suppress_non_delimited
= false;
500 have_read_stdin
= false;
502 while ((optc
= getopt_long (argc
, argv
, "b:c:d:f:nsz", longopts
, NULL
)) != -1)
508 /* Build the byte list. */
509 if (operating_mode
!= undefined_mode
)
510 FATAL_ERROR (_("only one type of list may be specified"));
511 operating_mode
= byte_mode
;
512 spec_list_string
= optarg
;
516 /* Build the field list. */
517 if (operating_mode
!= undefined_mode
)
518 FATAL_ERROR (_("only one type of list may be specified"));
519 operating_mode
= field_mode
;
520 spec_list_string
= optarg
;
525 /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */
526 if (optarg
[0] != '\0' && optarg
[1] != '\0')
527 FATAL_ERROR (_("the delimiter must be a single character"));
529 delim_specified
= true;
532 case OUTPUT_DELIMITER_OPTION
:
533 output_delimiter_specified
= true;
534 /* Interpret --output-delimiter='' to mean
535 'use the NUL byte as the delimiter.' */
536 output_delimiter_length
= (optarg
[0] == '\0'
537 ? 1 : strlen (optarg
));
538 output_delimiter_string
= xstrdup (optarg
);
545 suppress_non_delimited
= true;
552 case COMPLEMENT_OPTION
:
556 case_GETOPT_HELP_CHAR
;
558 case_GETOPT_VERSION_CHAR (PROGRAM_NAME
, AUTHORS
);
561 usage (EXIT_FAILURE
);
565 if (operating_mode
== undefined_mode
)
566 FATAL_ERROR (_("you must specify a list of bytes, characters, or fields"));
568 if (delim_specified
&& operating_mode
!= field_mode
)
569 FATAL_ERROR (_("an input delimiter may be specified only\
570 when operating on fields"));
572 if (suppress_non_delimited
&& operating_mode
!= field_mode
)
573 FATAL_ERROR (_("suppressing non-delimited lines makes sense\n\
574 \tonly when operating on fields"));
576 set_fields (spec_list_string
,
577 ( (operating_mode
== field_mode
) ? 0 : SETFLD_ERRMSG_USE_POS
)
578 | (complement
? SETFLD_COMPLEMENT
: 0) );
580 if (!delim_specified
)
583 if (output_delimiter_string
== NULL
)
585 static char dummy
[2];
588 output_delimiter_string
= dummy
;
589 output_delimiter_length
= 1;
595 for (ok
= true; optind
< argc
; optind
++)
596 ok
&= cut_file (argv
[optind
]);
599 if (have_read_stdin
&& fclose (stdin
) == EOF
)
601 error (0, errno
, "-");
605 IF_LINT (reset_fields ());
607 return ok
? EXIT_SUCCESS
: EXIT_FAILURE
;