cat: with -E fix handling of \r\n spanning buffers
[coreutils.git] / src / cat.c
blob17bc4fab9e3affb9c7864960a9b2f5c02a9acc3d
1 /* cat -- concatenate files and print on the standard output.
2 Copyright (C) 1988-2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Differences from the Unix cat:
18 * Always unbuffered, -u is ignored.
19 * Usually much faster than other versions of cat, the difference
20 is especially apparent when using the -v option.
22 By tege@sics.se, Torbjorn Granlund, advised by rms, Richard Stallman. */
24 #include <config.h>
26 #include <stdio.h>
27 #include <getopt.h>
28 #include <sys/types.h>
30 #if HAVE_STROPTS_H
31 # include <stropts.h>
32 #endif
33 #include <sys/ioctl.h>
35 #include "system.h"
36 #include "ioblksize.h"
37 #include "die.h"
38 #include "error.h"
39 #include "fadvise.h"
40 #include "full-write.h"
41 #include "safe-read.h"
42 #include "xbinary-io.h"
44 /* The official name of this program (e.g., no 'g' prefix). */
45 #define PROGRAM_NAME "cat"
47 #define AUTHORS \
48 proper_name ("Torbjorn Granlund"), \
49 proper_name ("Richard M. Stallman")
51 /* Name of input file. May be "-". */
52 static char const *infile;
54 /* Descriptor on which input file is open. */
55 static int input_desc;
57 /* Buffer for line numbers.
58 An 11 digit counter may overflow within an hour on a P2/466,
59 an 18 digit counter needs about 1000y */
60 #define LINE_COUNTER_BUF_LEN 20
61 static char line_buf[LINE_COUNTER_BUF_LEN] =
63 ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
64 ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '0',
65 '\t', '\0'
68 /* Position in 'line_buf' where printing starts. This will not change
69 unless the number of lines is larger than 999999. */
70 static char *line_num_print = line_buf + LINE_COUNTER_BUF_LEN - 8;
72 /* Position of the first digit in 'line_buf'. */
73 static char *line_num_start = line_buf + LINE_COUNTER_BUF_LEN - 3;
75 /* Position of the last digit in 'line_buf'. */
76 static char *line_num_end = line_buf + LINE_COUNTER_BUF_LEN - 3;
78 /* Preserves the 'cat' function's local 'newlines' between invocations. */
79 static int newlines2 = 0;
81 /* Whether there is a pending CR to process. */
82 static bool pending_cr = false;
84 void
85 usage (int status)
87 if (status != EXIT_SUCCESS)
88 emit_try_help ();
89 else
91 printf (_("\
92 Usage: %s [OPTION]... [FILE]...\n\
93 "),
94 program_name);
95 fputs (_("\
96 Concatenate FILE(s) to standard output.\n\
97 "), stdout);
99 emit_stdin_note ();
101 fputs (_("\
103 -A, --show-all equivalent to -vET\n\
104 -b, --number-nonblank number nonempty output lines, overrides -n\n\
105 -e equivalent to -vE\n\
106 -E, --show-ends display $ at end of each line\n\
107 -n, --number number all output lines\n\
108 -s, --squeeze-blank suppress repeated empty output lines\n\
109 "), stdout);
110 fputs (_("\
111 -t equivalent to -vT\n\
112 -T, --show-tabs display TAB characters as ^I\n\
113 -u (ignored)\n\
114 -v, --show-nonprinting use ^ and M- notation, except for LFD and TAB\n\
115 "), stdout);
116 fputs (HELP_OPTION_DESCRIPTION, stdout);
117 fputs (VERSION_OPTION_DESCRIPTION, stdout);
118 printf (_("\
120 Examples:\n\
121 %s f - g Output f's contents, then standard input, then g's contents.\n\
122 %s Copy standard input to standard output.\n\
124 program_name, program_name);
125 emit_ancillary_info (PROGRAM_NAME);
127 exit (status);
130 /* Compute the next line number. */
132 static void
133 next_line_num (void)
135 char *endp = line_num_end;
138 if ((*endp)++ < '9')
139 return;
140 *endp-- = '0';
142 while (endp >= line_num_start);
143 if (line_num_start > line_buf)
144 *--line_num_start = '1';
145 else
146 *line_buf = '>';
147 if (line_num_start < line_num_print)
148 line_num_print--;
151 /* Plain cat. Copies the file behind 'input_desc' to STDOUT_FILENO.
152 Return true if successful. */
154 static bool
155 simple_cat (
156 /* Pointer to the buffer, used by reads and writes. */
157 char *buf,
159 /* Number of characters preferably read or written by each read and write
160 call. */
161 size_t bufsize)
163 /* Actual number of characters read, and therefore written. */
164 size_t n_read;
166 /* Loop until the end of the file. */
168 while (true)
170 /* Read a block of input. */
172 n_read = safe_read (input_desc, buf, bufsize);
173 if (n_read == SAFE_READ_ERROR)
175 error (0, errno, "%s", quotef (infile));
176 return false;
179 /* End of this file? */
181 if (n_read == 0)
182 return true;
184 /* Write this block out. */
187 /* The following is ok, since we know that 0 < n_read. */
188 size_t n = n_read;
189 if (full_write (STDOUT_FILENO, buf, n) != n)
190 die (EXIT_FAILURE, errno, _("write error"));
195 /* Write any pending output to STDOUT_FILENO.
196 Pending is defined to be the *BPOUT - OUTBUF bytes starting at OUTBUF.
197 Then set *BPOUT to OUTPUT if it's not already that value. */
199 static inline void
200 write_pending (char *outbuf, char **bpout)
202 size_t n_write = *bpout - outbuf;
203 if (0 < n_write)
205 if (full_write (STDOUT_FILENO, outbuf, n_write) != n_write)
206 die (EXIT_FAILURE, errno, _("write error"));
207 *bpout = outbuf;
211 /* Cat the file behind INPUT_DESC to the file behind OUTPUT_DESC.
212 Return true if successful.
213 Called if any option more than -u was specified.
215 A newline character is always put at the end of the buffer, to make
216 an explicit test for buffer end unnecessary. */
218 static bool
219 cat (
220 /* Pointer to the beginning of the input buffer. */
221 char *inbuf,
223 /* Number of characters read in each read call. */
224 size_t insize,
226 /* Pointer to the beginning of the output buffer. */
227 char *outbuf,
229 /* Number of characters written by each write call. */
230 size_t outsize,
232 /* Variables that have values according to the specified options. */
233 bool show_nonprinting,
234 bool show_tabs,
235 bool number,
236 bool number_nonblank,
237 bool show_ends,
238 bool squeeze_blank)
240 /* Last character read from the input buffer. */
241 unsigned char ch;
243 /* Pointer to the next character in the input buffer. */
244 char *bpin;
246 /* Pointer to the first non-valid byte in the input buffer, i.e., the
247 current end of the buffer. */
248 char *eob;
250 /* Pointer to the position where the next character shall be written. */
251 char *bpout;
253 /* Number of characters read by the last read call. */
254 size_t n_read;
256 /* Determines how many consecutive newlines there have been in the
257 input. 0 newlines makes NEWLINES -1, 1 newline makes NEWLINES 1,
258 etc. Initially 0 to indicate that we are at the beginning of a
259 new line. The "state" of the procedure is determined by
260 NEWLINES. */
261 int newlines = newlines2;
263 #ifdef FIONREAD
264 /* If nonzero, use the FIONREAD ioctl, as an optimization.
265 (On Ultrix, it is not supported on NFS file systems.) */
266 bool use_fionread = true;
267 #endif
269 /* The inbuf pointers are initialized so that BPIN > EOB, and thereby input
270 is read immediately. */
272 eob = inbuf;
273 bpin = eob + 1;
275 bpout = outbuf;
277 while (true)
281 /* Write if there are at least OUTSIZE bytes in OUTBUF. */
283 if (outbuf + outsize <= bpout)
285 char *wp = outbuf;
286 size_t remaining_bytes;
289 if (full_write (STDOUT_FILENO, wp, outsize) != outsize)
290 die (EXIT_FAILURE, errno, _("write error"));
291 wp += outsize;
292 remaining_bytes = bpout - wp;
294 while (outsize <= remaining_bytes);
296 /* Move the remaining bytes to the beginning of the
297 buffer. */
299 memmove (outbuf, wp, remaining_bytes);
300 bpout = outbuf + remaining_bytes;
303 /* Is INBUF empty? */
305 if (bpin > eob)
307 bool input_pending = false;
308 #ifdef FIONREAD
309 int n_to_read = 0;
311 /* Is there any input to read immediately?
312 If not, we are about to wait,
313 so write all buffered output before waiting. */
315 if (use_fionread
316 && ioctl (input_desc, FIONREAD, &n_to_read) < 0)
318 /* Ultrix returns EOPNOTSUPP on NFS;
319 HP-UX returns ENOTTY on pipes.
320 SunOS returns EINVAL and
321 More/BSD returns ENODEV on special files
322 like /dev/null.
323 Irix-5 returns ENOSYS on pipes. */
324 if (errno == EOPNOTSUPP || errno == ENOTTY
325 || errno == EINVAL || errno == ENODEV
326 || errno == ENOSYS)
327 use_fionread = false;
328 else
330 error (0, errno, _("cannot do ioctl on %s"),
331 quoteaf (infile));
332 newlines2 = newlines;
333 return false;
336 if (n_to_read != 0)
337 input_pending = true;
338 #endif
340 if (!input_pending)
341 write_pending (outbuf, &bpout);
343 /* Read more input into INBUF. */
345 n_read = safe_read (input_desc, inbuf, insize);
346 if (n_read == SAFE_READ_ERROR)
348 error (0, errno, "%s", quotef (infile));
349 write_pending (outbuf, &bpout);
350 newlines2 = newlines;
351 return false;
353 if (n_read == 0)
355 write_pending (outbuf, &bpout);
356 newlines2 = newlines;
357 return true;
360 /* Update the pointers and insert a sentinel at the buffer
361 end. */
363 bpin = inbuf;
364 eob = bpin + n_read;
365 *eob = '\n';
367 else
369 /* It was a real (not a sentinel) newline. */
371 /* Was the last line empty?
372 (i.e., have two or more consecutive newlines been read?) */
374 if (++newlines > 0)
376 if (newlines >= 2)
378 /* Limit this to 2 here. Otherwise, with lots of
379 consecutive newlines, the counter could wrap
380 around at INT_MAX. */
381 newlines = 2;
383 /* Are multiple adjacent empty lines to be substituted
384 by single ditto (-s), and this was the second empty
385 line? */
386 if (squeeze_blank)
388 ch = *bpin++;
389 continue;
393 /* Are line numbers to be written at empty lines (-n)? */
395 if (number && !number_nonblank)
397 next_line_num ();
398 bpout = stpcpy (bpout, line_num_print);
402 /* Output a currency symbol if requested (-e). */
403 if (show_ends)
405 if (pending_cr)
407 *bpout++ = '^';
408 *bpout++ = 'M';
409 pending_cr = false;
411 *bpout++ = '$';
414 /* Output the newline. */
416 *bpout++ = '\n';
418 ch = *bpin++;
420 while (ch == '\n');
422 /* Here CH cannot contain a newline character. */
424 if (pending_cr)
426 *bpout++ = '\r';
427 pending_cr = false;
430 /* Are we at the beginning of a line, and line numbers are requested? */
432 if (newlines >= 0 && number)
434 next_line_num ();
435 bpout = stpcpy (bpout, line_num_print);
438 /* The loops below continue until a newline character is found,
439 which means that the buffer is empty or that a proper newline
440 has been found. */
442 /* If quoting, i.e., at least one of -v, -e, or -t specified,
443 scan for chars that need conversion. */
444 if (show_nonprinting)
446 while (true)
448 if (ch >= 32)
450 if (ch < 127)
451 *bpout++ = ch;
452 else if (ch == 127)
454 *bpout++ = '^';
455 *bpout++ = '?';
457 else
459 *bpout++ = 'M';
460 *bpout++ = '-';
461 if (ch >= 128 + 32)
463 if (ch < 128 + 127)
464 *bpout++ = ch - 128;
465 else
467 *bpout++ = '^';
468 *bpout++ = '?';
471 else
473 *bpout++ = '^';
474 *bpout++ = ch - 128 + 64;
478 else if (ch == '\t' && !show_tabs)
479 *bpout++ = '\t';
480 else if (ch == '\n')
482 newlines = -1;
483 break;
485 else
487 *bpout++ = '^';
488 *bpout++ = ch + 64;
491 ch = *bpin++;
494 else
496 /* Not quoting, neither of -v, -e, or -t specified. */
497 while (true)
499 if (ch == '\t' && show_tabs)
501 *bpout++ = '^';
502 *bpout++ = ch + 64;
504 else if (ch != '\n')
506 if (ch == '\r' && *bpin == '\n' && show_ends)
508 if (bpin == eob)
509 pending_cr = true;
510 else
512 *bpout++ = '^';
513 *bpout++ = 'M';
516 else
517 *bpout++ = ch;
519 else
521 newlines = -1;
522 break;
525 ch = *bpin++;
532 main (int argc, char **argv)
534 /* Optimal size of i/o operations of output. */
535 size_t outsize;
537 /* Optimal size of i/o operations of input. */
538 size_t insize;
540 size_t page_size = getpagesize ();
542 /* Pointer to the input buffer. */
543 char *inbuf;
545 /* Pointer to the output buffer. */
546 char *outbuf;
548 bool ok = true;
549 int c;
551 /* Index in argv to processed argument. */
552 int argind;
554 /* Device number of the output (file or whatever). */
555 dev_t out_dev;
557 /* I-node number of the output. */
558 ino_t out_ino;
560 /* True if the output is a regular file. */
561 bool out_isreg;
563 /* Nonzero if we have ever read standard input. */
564 bool have_read_stdin = false;
566 struct stat stat_buf;
568 /* Variables that are set according to the specified options. */
569 bool number = false;
570 bool number_nonblank = false;
571 bool squeeze_blank = false;
572 bool show_ends = false;
573 bool show_nonprinting = false;
574 bool show_tabs = false;
575 int file_open_mode = O_RDONLY;
577 static struct option const long_options[] =
579 {"number-nonblank", no_argument, NULL, 'b'},
580 {"number", no_argument, NULL, 'n'},
581 {"squeeze-blank", no_argument, NULL, 's'},
582 {"show-nonprinting", no_argument, NULL, 'v'},
583 {"show-ends", no_argument, NULL, 'E'},
584 {"show-tabs", no_argument, NULL, 'T'},
585 {"show-all", no_argument, NULL, 'A'},
586 {GETOPT_HELP_OPTION_DECL},
587 {GETOPT_VERSION_OPTION_DECL},
588 {NULL, 0, NULL, 0}
591 initialize_main (&argc, &argv);
592 set_program_name (argv[0]);
593 setlocale (LC_ALL, "");
594 bindtextdomain (PACKAGE, LOCALEDIR);
595 textdomain (PACKAGE);
597 /* Arrange to close stdout if we exit via the
598 case_GETOPT_HELP_CHAR or case_GETOPT_VERSION_CHAR code.
599 Normally STDOUT_FILENO is used rather than stdout, so
600 close_stdout does nothing. */
601 atexit (close_stdout);
603 /* Parse command line options. */
605 while ((c = getopt_long (argc, argv, "benstuvAET", long_options, NULL))
606 != -1)
608 switch (c)
610 case 'b':
611 number = true;
612 number_nonblank = true;
613 break;
615 case 'e':
616 show_ends = true;
617 show_nonprinting = true;
618 break;
620 case 'n':
621 number = true;
622 break;
624 case 's':
625 squeeze_blank = true;
626 break;
628 case 't':
629 show_tabs = true;
630 show_nonprinting = true;
631 break;
633 case 'u':
634 /* We provide the -u feature unconditionally. */
635 break;
637 case 'v':
638 show_nonprinting = true;
639 break;
641 case 'A':
642 show_nonprinting = true;
643 show_ends = true;
644 show_tabs = true;
645 break;
647 case 'E':
648 show_ends = true;
649 break;
651 case 'T':
652 show_tabs = true;
653 break;
655 case_GETOPT_HELP_CHAR;
657 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
659 default:
660 usage (EXIT_FAILURE);
664 /* Get device, i-node number, and optimal blocksize of output. */
666 if (fstat (STDOUT_FILENO, &stat_buf) < 0)
667 die (EXIT_FAILURE, errno, _("standard output"));
669 outsize = io_blksize (stat_buf);
670 out_dev = stat_buf.st_dev;
671 out_ino = stat_buf.st_ino;
672 out_isreg = S_ISREG (stat_buf.st_mode) != 0;
674 if (! (number || show_ends || squeeze_blank))
676 file_open_mode |= O_BINARY;
677 xset_binary_mode (STDOUT_FILENO, O_BINARY);
680 /* Check if any of the input files are the same as the output file. */
682 /* Main loop. */
684 infile = "-";
685 argind = optind;
689 if (argind < argc)
690 infile = argv[argind];
692 if (STREQ (infile, "-"))
694 have_read_stdin = true;
695 input_desc = STDIN_FILENO;
696 if (file_open_mode & O_BINARY)
697 xset_binary_mode (STDIN_FILENO, O_BINARY);
699 else
701 input_desc = open (infile, file_open_mode);
702 if (input_desc < 0)
704 error (0, errno, "%s", quotef (infile));
705 ok = false;
706 continue;
710 if (fstat (input_desc, &stat_buf) < 0)
712 error (0, errno, "%s", quotef (infile));
713 ok = false;
714 goto contin;
716 insize = io_blksize (stat_buf);
718 fdadvise (input_desc, 0, 0, FADVISE_SEQUENTIAL);
720 /* Don't copy a nonempty regular file to itself, as that would
721 merely exhaust the output device. It's better to catch this
722 error earlier rather than later. */
724 if (out_isreg
725 && stat_buf.st_dev == out_dev && stat_buf.st_ino == out_ino
726 && lseek (input_desc, 0, SEEK_CUR) < stat_buf.st_size)
728 error (0, 0, _("%s: input file is output file"), quotef (infile));
729 ok = false;
730 goto contin;
733 /* Select which version of 'cat' to use. If any format-oriented
734 options were given use 'cat'; otherwise use 'simple_cat'. */
736 if (! (number || show_ends || show_nonprinting
737 || show_tabs || squeeze_blank))
739 insize = MAX (insize, outsize);
740 inbuf = xmalloc (insize + page_size - 1);
742 ok &= simple_cat (ptr_align (inbuf, page_size), insize);
744 else
746 inbuf = xmalloc (insize + 1 + page_size - 1);
748 /* Why are
749 (OUTSIZE - 1 + INSIZE * 4 + LINE_COUNTER_BUF_LEN + PAGE_SIZE - 1)
750 bytes allocated for the output buffer?
752 A test whether output needs to be written is done when the input
753 buffer empties or when a newline appears in the input. After
754 output is written, at most (OUTSIZE - 1) bytes will remain in the
755 buffer. Now INSIZE bytes of input is read. Each input character
756 may grow by a factor of 4 (by the prepending of M-^). If all
757 characters do, and no newlines appear in this block of input, we
758 will have at most (OUTSIZE - 1 + INSIZE * 4) bytes in the buffer.
759 If the last character in the preceding block of input was a
760 newline, a line number may be written (according to the given
761 options) as the first thing in the output buffer. (Done after the
762 new input is read, but before processing of the input begins.)
763 A line number requires seldom more than LINE_COUNTER_BUF_LEN
764 positions.
766 Align the output buffer to a page size boundary, for efficiency
767 on some paging implementations, so add PAGE_SIZE - 1 bytes to the
768 request to make room for the alignment. */
770 outbuf = xmalloc (outsize - 1 + insize * 4 + LINE_COUNTER_BUF_LEN
771 + page_size - 1);
773 ok &= cat (ptr_align (inbuf, page_size), insize,
774 ptr_align (outbuf, page_size), outsize, show_nonprinting,
775 show_tabs, number, number_nonblank, show_ends,
776 squeeze_blank);
778 free (outbuf);
781 free (inbuf);
783 contin:
784 if (!STREQ (infile, "-") && close (input_desc) < 0)
786 error (0, errno, "%s", quotef (infile));
787 ok = false;
790 while (++argind < argc);
792 if (pending_cr)
794 if (full_write (STDOUT_FILENO, "\r", 1) != 1)
795 die (EXIT_FAILURE, errno, _("write error"));
798 if (have_read_stdin && close (STDIN_FILENO) < 0)
799 die (EXIT_FAILURE, errno, _("closing standard input"));
801 return ok ? EXIT_SUCCESS : EXIT_FAILURE;