stty: fix sane setting of susp to ^z on Solaris
[coreutils.git] / src / cut.c
blob7ab6be4cb435e9d1536c5c3ee4390258f4a16694
1 /* cut - remove parts of lines of files
2 Copyright (C) 1997-2016 Free Software Foundation, Inc.
3 Copyright (C) 1984 David M. Ihnat
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 /* Written by David Ihnat. */
20 /* POSIX changes, bug fixes, long-named options, and cleanup
21 by David MacKenzie <djm@gnu.ai.mit.edu>.
23 Rewrite cut_fields and cut_bytes -- Jim Meyering. */
25 #include <config.h>
27 #include <stdio.h>
28 #include <assert.h>
29 #include <getopt.h>
30 #include <sys/types.h>
31 #include "system.h"
33 #include "error.h"
34 #include "fadvise.h"
35 #include "getndelim2.h"
36 #include "hash.h"
37 #include "xstrndup.h"
39 #include "set-fields.h"
41 /* The official name of this program (e.g., no 'g' prefix). */
42 #define PROGRAM_NAME "cut"
44 #define AUTHORS \
45 proper_name ("David M. Ihnat"), \
46 proper_name ("David MacKenzie"), \
47 proper_name ("Jim Meyering")
49 #define FATAL_ERROR(Message) \
50 do \
51 { \
52 error (0, 0, (Message)); \
53 usage (EXIT_FAILURE); \
54 } \
55 while (0)
58 /* Pointer inside RP. When checking if a byte or field is selected
59 by a finite range, we check if it is between CURRENT_RP.LO
60 and CURRENT_RP.HI. If the byte or field index is greater than
61 CURRENT_RP.HI then we make CURRENT_RP to point to the next range pair. */
62 static struct field_range_pair *current_rp;
64 /* This buffer is used to support the semantics of the -s option
65 (or lack of same) when the specified field list includes (does
66 not include) the first field. In both of those cases, the entire
67 first field must be read into this buffer to determine whether it
68 is followed by a delimiter or a newline before any of it may be
69 output. Otherwise, cut_fields can do the job without using this
70 buffer. */
71 static char *field_1_buffer;
73 /* The number of bytes allocated for FIELD_1_BUFFER. */
74 static size_t field_1_bufsize;
76 enum operating_mode
78 undefined_mode,
80 /* Output characters that are in the given bytes. */
81 byte_mode,
83 /* Output the given delimiter-separated fields. */
84 field_mode
87 static enum operating_mode operating_mode;
89 /* If true do not output lines containing no delimiter characters.
90 Otherwise, all such lines are printed. This option is valid only
91 with field mode. */
92 static bool suppress_non_delimited;
94 /* If true, print all bytes, characters, or fields _except_
95 those that were specified. */
96 static bool complement;
98 /* The delimiter character for field mode. */
99 static unsigned char delim;
101 /* The delimiter for each line/record. */
102 static unsigned char line_delim = '\n';
104 /* True if the --output-delimiter=STRING option was specified. */
105 static bool output_delimiter_specified;
107 /* The length of output_delimiter_string. */
108 static size_t output_delimiter_length;
110 /* The output field separator string. Defaults to the 1-character
111 string consisting of the input delimiter. */
112 static char *output_delimiter_string;
114 /* True if we have ever read standard input. */
115 static bool have_read_stdin;
117 /* For long options that have no equivalent short option, use a
118 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
119 enum
121 OUTPUT_DELIMITER_OPTION = CHAR_MAX + 1,
122 COMPLEMENT_OPTION
125 static struct option const longopts[] =
127 {"bytes", required_argument, NULL, 'b'},
128 {"characters", required_argument, NULL, 'c'},
129 {"fields", required_argument, NULL, 'f'},
130 {"delimiter", required_argument, NULL, 'd'},
131 {"only-delimited", no_argument, NULL, 's'},
132 {"output-delimiter", required_argument, NULL, OUTPUT_DELIMITER_OPTION},
133 {"complement", no_argument, NULL, COMPLEMENT_OPTION},
134 {"zero-terminated", no_argument, NULL, 'z'},
135 {GETOPT_HELP_OPTION_DECL},
136 {GETOPT_VERSION_OPTION_DECL},
137 {NULL, 0, NULL, 0}
140 void
141 usage (int status)
143 if (status != EXIT_SUCCESS)
144 emit_try_help ();
145 else
147 printf (_("\
148 Usage: %s OPTION... [FILE]...\n\
150 program_name);
151 fputs (_("\
152 Print selected parts of lines from each FILE to standard output.\n\
153 "), stdout);
155 emit_stdin_note ();
156 emit_mandatory_arg_note ();
158 fputs (_("\
159 -b, --bytes=LIST select only these bytes\n\
160 -c, --characters=LIST select only these characters\n\
161 -d, --delimiter=DELIM use DELIM instead of TAB for field delimiter\n\
162 "), stdout);
163 fputs (_("\
164 -f, --fields=LIST select only these fields; also print any line\n\
165 that contains no delimiter character, unless\n\
166 the -s option is specified\n\
167 -n (ignored)\n\
168 "), stdout);
169 fputs (_("\
170 --complement complement the set of selected bytes, characters\n\
171 or fields\n\
172 "), stdout);
173 fputs (_("\
174 -s, --only-delimited do not print lines not containing delimiters\n\
175 --output-delimiter=STRING use STRING as the output delimiter\n\
176 the default is to use the input delimiter\n\
177 "), stdout);
178 fputs (_("\
179 -z, --zero-terminated line delimiter is NUL, not newline\n\
180 "), stdout);
181 fputs (HELP_OPTION_DESCRIPTION, stdout);
182 fputs (VERSION_OPTION_DESCRIPTION, stdout);
183 fputs (_("\
185 Use one, and only one of -b, -c or -f. Each LIST is made up of one\n\
186 range, or many ranges separated by commas. Selected input is written\n\
187 in the same order that it is read, and is written exactly once.\n\
188 "), stdout);
189 fputs (_("\
190 Each range is one of:\n\
192 N N'th byte, character or field, counted from 1\n\
193 N- from N'th byte, character or field, to end of line\n\
194 N-M from N'th to M'th (included) byte, character or field\n\
195 -M from first to M'th (included) byte, character or field\n\
196 "), stdout);
197 emit_ancillary_info (PROGRAM_NAME);
199 exit (status);
203 /* Increment *ITEM_IDX (i.e., a field or byte index),
204 and if required CURRENT_RP. */
206 static inline void
207 next_item (size_t *item_idx)
209 (*item_idx)++;
210 if ((*item_idx) > current_rp->hi)
211 current_rp++;
214 /* Return nonzero if the K'th field or byte is printable. */
216 static inline bool
217 print_kth (size_t k)
219 return current_rp->lo <= k;
222 /* Return nonzero if K'th byte is the beginning of a range. */
224 static inline bool
225 is_range_start_index (size_t k)
227 return k == current_rp->lo;
230 /* Read from stream STREAM, printing to standard output any selected bytes. */
232 static void
233 cut_bytes (FILE *stream)
235 size_t byte_idx; /* Number of bytes in the line so far. */
236 /* Whether to begin printing delimiters between ranges for the current line.
237 Set after we've begun printing data corresponding to the first range. */
238 bool print_delimiter;
240 byte_idx = 0;
241 print_delimiter = false;
242 current_rp = frp;
243 while (true)
245 int c; /* Each character from the file. */
247 c = getc (stream);
249 if (c == line_delim)
251 putchar (c);
252 byte_idx = 0;
253 print_delimiter = false;
254 current_rp = frp;
256 else if (c == EOF)
258 if (byte_idx > 0)
259 putchar (line_delim);
260 break;
262 else
264 next_item (&byte_idx);
265 if (print_kth (byte_idx))
267 if (output_delimiter_specified)
269 if (print_delimiter && is_range_start_index (byte_idx))
271 fwrite (output_delimiter_string, sizeof (char),
272 output_delimiter_length, stdout);
274 print_delimiter = true;
277 putchar (c);
283 /* Read from stream STREAM, printing to standard output any selected fields. */
285 static void
286 cut_fields (FILE *stream)
288 int c;
289 size_t field_idx = 1;
290 bool found_any_selected_field = false;
291 bool buffer_first_field;
293 current_rp = frp;
295 c = getc (stream);
296 if (c == EOF)
297 return;
299 ungetc (c, stream);
300 c = 0;
302 /* To support the semantics of the -s flag, we may have to buffer
303 all of the first field to determine whether it is 'delimited.'
304 But that is unnecessary if all non-delimited lines must be printed
305 and the first field has been selected, or if non-delimited lines
306 must be suppressed and the first field has *not* been selected.
307 That is because a non-delimited line has exactly one field. */
308 buffer_first_field = (suppress_non_delimited ^ !print_kth (1));
310 while (1)
312 if (field_idx == 1 && buffer_first_field)
314 ssize_t len;
315 size_t n_bytes;
317 len = getndelim2 (&field_1_buffer, &field_1_bufsize, 0,
318 GETNLINE_NO_LIMIT, delim, line_delim, stream);
319 if (len < 0)
321 free (field_1_buffer);
322 field_1_buffer = NULL;
323 if (ferror (stream) || feof (stream))
324 break;
325 xalloc_die ();
328 n_bytes = len;
329 assert (n_bytes != 0);
331 c = 0;
333 /* If the first field extends to the end of line (it is not
334 delimited) and we are printing all non-delimited lines,
335 print this one. */
336 if (to_uchar (field_1_buffer[n_bytes - 1]) != delim)
338 if (suppress_non_delimited)
340 /* Empty. */
342 else
344 fwrite (field_1_buffer, sizeof (char), n_bytes, stdout);
345 /* Make sure the output line is newline terminated. */
346 if (field_1_buffer[n_bytes - 1] != line_delim)
347 putchar (line_delim);
348 c = line_delim;
350 continue;
352 if (print_kth (1))
354 /* Print the field, but not the trailing delimiter. */
355 fwrite (field_1_buffer, sizeof (char), n_bytes - 1, stdout);
357 /* With -d$'\n' don't treat the last '\n' as a delimiter. */
358 if (delim == line_delim)
360 int last_c = getc (stream);
361 if (last_c != EOF)
363 ungetc (last_c, stream);
364 found_any_selected_field = true;
367 else
368 found_any_selected_field = true;
370 next_item (&field_idx);
373 int prev_c = c;
375 if (print_kth (field_idx))
377 if (found_any_selected_field)
379 fwrite (output_delimiter_string, sizeof (char),
380 output_delimiter_length, stdout);
382 found_any_selected_field = true;
384 while ((c = getc (stream)) != delim && c != line_delim && c != EOF)
386 putchar (c);
387 prev_c = c;
390 else
392 while ((c = getc (stream)) != delim && c != line_delim && c != EOF)
394 prev_c = c;
398 /* With -d$'\n' don't treat the last '\n' as a delimiter. */
399 if (delim == line_delim && c == delim)
401 int last_c = getc (stream);
402 if (last_c != EOF)
403 ungetc (last_c, stream);
404 else
405 c = last_c;
408 if (c == delim)
409 next_item (&field_idx);
410 else if (c == line_delim || c == EOF)
412 if (found_any_selected_field
413 || !(suppress_non_delimited && field_idx == 1))
415 if (c == line_delim || prev_c != line_delim
416 || delim == line_delim)
417 putchar (line_delim);
419 if (c == EOF)
420 break;
421 field_idx = 1;
422 current_rp = frp;
423 found_any_selected_field = false;
428 static void
429 cut_stream (FILE *stream)
431 if (operating_mode == byte_mode)
432 cut_bytes (stream);
433 else
434 cut_fields (stream);
437 /* Process file FILE to standard output.
438 Return true if successful. */
440 static bool
441 cut_file (char const *file)
443 FILE *stream;
445 if (STREQ (file, "-"))
447 have_read_stdin = true;
448 stream = stdin;
450 else
452 stream = fopen (file, "r");
453 if (stream == NULL)
455 error (0, errno, "%s", quotef (file));
456 return false;
460 fadvise (stream, FADVISE_SEQUENTIAL);
462 cut_stream (stream);
464 if (ferror (stream))
466 error (0, errno, "%s", quotef (file));
467 return false;
469 if (STREQ (file, "-"))
470 clearerr (stream); /* Also clear EOF. */
471 else if (fclose (stream) == EOF)
473 error (0, errno, "%s", quotef (file));
474 return false;
476 return true;
480 main (int argc, char **argv)
482 int optc;
483 bool ok;
484 bool delim_specified = false;
485 char *spec_list_string IF_LINT ( = NULL);
487 initialize_main (&argc, &argv);
488 set_program_name (argv[0]);
489 setlocale (LC_ALL, "");
490 bindtextdomain (PACKAGE, LOCALEDIR);
491 textdomain (PACKAGE);
493 atexit (close_stdout);
495 operating_mode = undefined_mode;
497 /* By default, all non-delimited lines are printed. */
498 suppress_non_delimited = false;
500 delim = '\0';
501 have_read_stdin = false;
503 while ((optc = getopt_long (argc, argv, "b:c:d:f:nsz", longopts, NULL)) != -1)
505 switch (optc)
507 case 'b':
508 case 'c':
509 /* Build the byte list. */
510 if (operating_mode != undefined_mode)
511 FATAL_ERROR (_("only one type of list may be specified"));
512 operating_mode = byte_mode;
513 spec_list_string = optarg;
514 break;
516 case 'f':
517 /* Build the field list. */
518 if (operating_mode != undefined_mode)
519 FATAL_ERROR (_("only one type of list may be specified"));
520 operating_mode = field_mode;
521 spec_list_string = optarg;
522 break;
524 case 'd':
525 /* New delimiter. */
526 /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */
527 if (optarg[0] != '\0' && optarg[1] != '\0')
528 FATAL_ERROR (_("the delimiter must be a single character"));
529 delim = optarg[0];
530 delim_specified = true;
531 break;
533 case OUTPUT_DELIMITER_OPTION:
534 output_delimiter_specified = true;
535 /* Interpret --output-delimiter='' to mean
536 'use the NUL byte as the delimiter.' */
537 output_delimiter_length = (optarg[0] == '\0'
538 ? 1 : strlen (optarg));
539 output_delimiter_string = xstrdup (optarg);
540 break;
542 case 'n':
543 break;
545 case 's':
546 suppress_non_delimited = true;
547 break;
549 case 'z':
550 line_delim = '\0';
551 break;
553 case COMPLEMENT_OPTION:
554 complement = true;
555 break;
557 case_GETOPT_HELP_CHAR;
559 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
561 default:
562 usage (EXIT_FAILURE);
566 if (operating_mode == undefined_mode)
567 FATAL_ERROR (_("you must specify a list of bytes, characters, or fields"));
569 if (delim_specified && operating_mode != field_mode)
570 FATAL_ERROR (_("an input delimiter may be specified only\
571 when operating on fields"));
573 if (suppress_non_delimited && operating_mode != field_mode)
574 FATAL_ERROR (_("suppressing non-delimited lines makes sense\n\
575 \tonly when operating on fields"));
577 set_fields (spec_list_string,
578 ( (operating_mode == field_mode) ? 0 : SETFLD_ERRMSG_USE_POS)
579 | (complement ? SETFLD_COMPLEMENT : 0) );
581 if (!delim_specified)
582 delim = '\t';
584 if (output_delimiter_string == NULL)
586 static char dummy[2];
587 dummy[0] = delim;
588 dummy[1] = '\0';
589 output_delimiter_string = dummy;
590 output_delimiter_length = 1;
593 if (optind == argc)
594 ok = cut_file ("-");
595 else
596 for (ok = true; optind < argc; optind++)
597 ok &= cut_file (argv[optind]);
600 if (have_read_stdin && fclose (stdin) == EOF)
602 error (0, errno, "-");
603 ok = false;
606 IF_LINT (reset_fields ());
608 return ok ? EXIT_SUCCESS : EXIT_FAILURE;