du: --apparent counts only symlinks and regular
[coreutils.git] / src / cat.c
blobc215473cf22a77322050661798e9f01e9412a0d5
1 /* cat -- concatenate files and print on the standard output.
2 Copyright (C) 1988-2023 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Differences from the Unix cat:
18 * Always unbuffered, -u is ignored.
19 * Usually much faster than other versions of cat, the difference
20 is especially apparent when using the -v option.
22 By tege@sics.se, Torbjorn Granlund, advised by rms, Richard Stallman. */
24 #include <config.h>
26 #include <stdio.h>
27 #include <getopt.h>
28 #include <sys/types.h>
30 #if HAVE_STROPTS_H
31 # include <stropts.h>
32 #endif
33 #include <sys/ioctl.h>
35 #include "system.h"
36 #include "alignalloc.h"
37 #include "idx.h"
38 #include "ioblksize.h"
39 #include "die.h"
40 #include "error.h"
41 #include "fadvise.h"
42 #include "full-write.h"
43 #include "safe-read.h"
44 #include "xbinary-io.h"
46 /* The official name of this program (e.g., no 'g' prefix). */
47 #define PROGRAM_NAME "cat"
49 #define AUTHORS \
50 proper_name ("Torbjorn Granlund"), \
51 proper_name ("Richard M. Stallman")
53 /* Name of input file. May be "-". */
54 static char const *infile;
56 /* Descriptor on which input file is open. */
57 static int input_desc;
59 /* Buffer for line numbers.
60 An 11 digit counter may overflow within an hour on a P2/466,
61 an 18 digit counter needs about 1000y */
62 #define LINE_COUNTER_BUF_LEN 20
63 static char line_buf[LINE_COUNTER_BUF_LEN] =
65 ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
66 ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '0',
67 '\t', '\0'
70 /* Position in 'line_buf' where printing starts. This will not change
71 unless the number of lines is larger than 999999. */
72 static char *line_num_print = line_buf + LINE_COUNTER_BUF_LEN - 8;
74 /* Position of the first digit in 'line_buf'. */
75 static char *line_num_start = line_buf + LINE_COUNTER_BUF_LEN - 3;
77 /* Position of the last digit in 'line_buf'. */
78 static char *line_num_end = line_buf + LINE_COUNTER_BUF_LEN - 3;
80 /* Preserves the 'cat' function's local 'newlines' between invocations. */
81 static int newlines2 = 0;
83 /* Whether there is a pending CR to process. */
84 static bool pending_cr = false;
86 void
87 usage (int status)
89 if (status != EXIT_SUCCESS)
90 emit_try_help ();
91 else
93 printf (_("\
94 Usage: %s [OPTION]... [FILE]...\n\
95 "),
96 program_name);
97 fputs (_("\
98 Concatenate FILE(s) to standard output.\n\
99 "), stdout);
101 emit_stdin_note ();
103 fputs (_("\
105 -A, --show-all equivalent to -vET\n\
106 -b, --number-nonblank number nonempty output lines, overrides -n\n\
107 -e equivalent to -vE\n\
108 -E, --show-ends display $ at end of each line\n\
109 -n, --number number all output lines\n\
110 -s, --squeeze-blank suppress repeated empty output lines\n\
111 "), stdout);
112 fputs (_("\
113 -t equivalent to -vT\n\
114 -T, --show-tabs display TAB characters as ^I\n\
115 -u (ignored)\n\
116 -v, --show-nonprinting use ^ and M- notation, except for LFD and TAB\n\
117 "), stdout);
118 fputs (HELP_OPTION_DESCRIPTION, stdout);
119 fputs (VERSION_OPTION_DESCRIPTION, stdout);
120 printf (_("\
122 Examples:\n\
123 %s f - g Output f's contents, then standard input, then g's contents.\n\
124 %s Copy standard input to standard output.\n\
126 program_name, program_name);
127 emit_ancillary_info (PROGRAM_NAME);
129 exit (status);
132 /* Compute the next line number. */
134 static void
135 next_line_num (void)
137 char *endp = line_num_end;
140 if ((*endp)++ < '9')
141 return;
142 *endp-- = '0';
144 while (endp >= line_num_start);
146 if (line_num_start > line_buf)
147 *--line_num_start = '1';
148 else
149 *line_buf = '>';
150 if (line_num_start < line_num_print)
151 line_num_print--;
154 /* Plain cat. Copy the file behind 'input_desc' to STDOUT_FILENO.
155 BUF (of size BUFSIZE) is the I/O buffer, used by reads and writes.
156 Return true if successful. */
158 static bool
159 simple_cat (char *buf, idx_t bufsize)
161 /* Loop until the end of the file. */
163 while (true)
165 /* Read a block of input. */
167 size_t n_read = safe_read (input_desc, buf, bufsize);
168 if (n_read == SAFE_READ_ERROR)
170 error (0, errno, "%s", quotef (infile));
171 return false;
174 /* End of this file? */
176 if (n_read == 0)
177 return true;
179 /* Write this block out. */
181 if (full_write (STDOUT_FILENO, buf, n_read) != n_read)
182 die (EXIT_FAILURE, errno, _("write error"));
186 /* Write any pending output to STDOUT_FILENO.
187 Pending is defined to be the *BPOUT - OUTBUF bytes starting at OUTBUF.
188 Then set *BPOUT to OUTPUT if it's not already that value. */
190 static inline void
191 write_pending (char *outbuf, char **bpout)
193 idx_t n_write = *bpout - outbuf;
194 if (0 < n_write)
196 if (full_write (STDOUT_FILENO, outbuf, n_write) != n_write)
197 die (EXIT_FAILURE, errno, _("write error"));
198 *bpout = outbuf;
202 /* Copy the file behind 'input_desc' to STDOUT_FILENO.
203 Use INBUF and read INSIZE with each call,
204 and OUTBUF and write OUTSIZE with each call.
205 (The buffers are a bit larger than the I/O sizes.)
206 The remaining boolean args say what 'cat' options to use.
208 Return true if successful.
209 Called if any option more than -u was specified.
211 A newline character is always put at the end of the buffer, to make
212 an explicit test for buffer end unnecessary. */
214 static bool
215 cat (char *inbuf, idx_t insize, char *outbuf, idx_t outsize,
216 bool show_nonprinting, bool show_tabs, bool number, bool number_nonblank,
217 bool show_ends, bool squeeze_blank)
219 /* Last character read from the input buffer. */
220 unsigned char ch;
222 /* Determines how many consecutive newlines there have been in the
223 input. 0 newlines makes NEWLINES -1, 1 newline makes NEWLINES 1,
224 etc. Initially 0 to indicate that we are at the beginning of a
225 new line. The "state" of the procedure is determined by
226 NEWLINES. */
227 int newlines = newlines2;
229 #ifdef FIONREAD
230 /* If nonzero, use the FIONREAD ioctl, as an optimization.
231 (On Ultrix, it is not supported on NFS file systems.) */
232 bool use_fionread = true;
233 #endif
235 /* The inbuf pointers are initialized so that BPIN > EOB, and thereby input
236 is read immediately. */
238 /* Pointer to the first non-valid byte in the input buffer, i.e., the
239 current end of the buffer. */
240 char *eob = inbuf;
242 /* Pointer to the next character in the input buffer. */
243 char *bpin = eob + 1;
245 /* Pointer to the position where the next character shall be written. */
246 char *bpout = outbuf;
248 while (true)
252 /* Write if there are at least OUTSIZE bytes in OUTBUF. */
254 if (outbuf + outsize <= bpout)
256 char *wp = outbuf;
257 idx_t remaining_bytes;
260 if (full_write (STDOUT_FILENO, wp, outsize) != outsize)
261 die (EXIT_FAILURE, errno, _("write error"));
262 wp += outsize;
263 remaining_bytes = bpout - wp;
265 while (outsize <= remaining_bytes);
267 /* Move the remaining bytes to the beginning of the
268 buffer. */
270 memmove (outbuf, wp, remaining_bytes);
271 bpout = outbuf + remaining_bytes;
274 /* Is INBUF empty? */
276 if (bpin > eob)
278 bool input_pending = false;
279 #ifdef FIONREAD
280 int n_to_read = 0;
282 /* Is there any input to read immediately?
283 If not, we are about to wait,
284 so write all buffered output before waiting. */
286 if (use_fionread
287 && ioctl (input_desc, FIONREAD, &n_to_read) < 0)
289 /* Ultrix returns EOPNOTSUPP on NFS;
290 HP-UX returns ENOTTY on pipes.
291 SunOS returns EINVAL and
292 More/BSD returns ENODEV on special files
293 like /dev/null.
294 Irix-5 returns ENOSYS on pipes. */
295 if (errno == EOPNOTSUPP || errno == ENOTTY
296 || errno == EINVAL || errno == ENODEV
297 || errno == ENOSYS)
298 use_fionread = false;
299 else
301 error (0, errno, _("cannot do ioctl on %s"),
302 quoteaf (infile));
303 newlines2 = newlines;
304 return false;
307 if (n_to_read != 0)
308 input_pending = true;
309 #endif
311 if (!input_pending)
312 write_pending (outbuf, &bpout);
314 /* Read more input into INBUF. */
316 size_t n_read = safe_read (input_desc, inbuf, insize);
317 if (n_read == SAFE_READ_ERROR)
319 error (0, errno, "%s", quotef (infile));
320 write_pending (outbuf, &bpout);
321 newlines2 = newlines;
322 return false;
324 if (n_read == 0)
326 write_pending (outbuf, &bpout);
327 newlines2 = newlines;
328 return true;
331 /* Update the pointers and insert a sentinel at the buffer
332 end. */
334 bpin = inbuf;
335 eob = bpin + n_read;
336 *eob = '\n';
338 else
340 /* It was a real (not a sentinel) newline. */
342 /* Was the last line empty?
343 (i.e., have two or more consecutive newlines been read?) */
345 if (++newlines > 0)
347 if (newlines >= 2)
349 /* Limit this to 2 here. Otherwise, with lots of
350 consecutive newlines, the counter could wrap
351 around at INT_MAX. */
352 newlines = 2;
354 /* Are multiple adjacent empty lines to be substituted
355 by single ditto (-s), and this was the second empty
356 line? */
357 if (squeeze_blank)
359 ch = *bpin++;
360 continue;
364 /* Are line numbers to be written at empty lines (-n)? */
366 if (number && !number_nonblank)
368 next_line_num ();
369 bpout = stpcpy (bpout, line_num_print);
373 /* Output a currency symbol if requested (-e). */
374 if (show_ends)
376 if (pending_cr)
378 *bpout++ = '^';
379 *bpout++ = 'M';
380 pending_cr = false;
382 *bpout++ = '$';
385 /* Output the newline. */
387 *bpout++ = '\n';
389 ch = *bpin++;
391 while (ch == '\n');
393 /* Here CH cannot contain a newline character. */
395 if (pending_cr)
397 *bpout++ = '\r';
398 pending_cr = false;
401 /* Are we at the beginning of a line, and line numbers are requested? */
403 if (newlines >= 0 && number)
405 next_line_num ();
406 bpout = stpcpy (bpout, line_num_print);
409 /* The loops below continue until a newline character is found,
410 which means that the buffer is empty or that a proper newline
411 has been found. */
413 /* If quoting, i.e., at least one of -v, -e, or -t specified,
414 scan for chars that need conversion. */
415 if (show_nonprinting)
417 while (true)
419 if (ch >= 32)
421 if (ch < 127)
422 *bpout++ = ch;
423 else if (ch == 127)
425 *bpout++ = '^';
426 *bpout++ = '?';
428 else
430 *bpout++ = 'M';
431 *bpout++ = '-';
432 if (ch >= 128 + 32)
434 if (ch < 128 + 127)
435 *bpout++ = ch - 128;
436 else
438 *bpout++ = '^';
439 *bpout++ = '?';
442 else
444 *bpout++ = '^';
445 *bpout++ = ch - 128 + 64;
449 else if (ch == '\t' && !show_tabs)
450 *bpout++ = '\t';
451 else if (ch == '\n')
453 newlines = -1;
454 break;
456 else
458 *bpout++ = '^';
459 *bpout++ = ch + 64;
462 ch = *bpin++;
465 else
467 /* Not quoting, neither of -v, -e, or -t specified. */
468 while (true)
470 if (ch == '\t' && show_tabs)
472 *bpout++ = '^';
473 *bpout++ = ch + 64;
475 else if (ch != '\n')
477 if (ch == '\r' && *bpin == '\n' && show_ends)
479 if (bpin == eob)
480 pending_cr = true;
481 else
483 *bpout++ = '^';
484 *bpout++ = 'M';
487 else
488 *bpout++ = ch;
490 else
492 newlines = -1;
493 break;
496 ch = *bpin++;
502 /* Copy data from input to output using copy_file_range if possible.
503 Return 1 if successful, 0 if ordinary read+write should be tried,
504 -1 if a serious problem has been diagnosed. */
506 static int
507 copy_cat (void)
509 /* Copy at most COPY_MAX bytes at a time; this is min
510 (SSIZE_MAX, SIZE_MAX) truncated to a value that is
511 surely aligned well. */
512 ssize_t copy_max = MIN (SSIZE_MAX, SIZE_MAX) >> 30 << 30;
514 /* copy_file_range does not support some cases, and it
515 incorrectly returns 0 when reading from the proc file
516 system on the Linux kernel through at least 5.6.19 (2020),
517 so fall back on read+write if the copy_file_range is
518 unsupported or the input file seems empty. */
520 for (bool some_copied = false; ; some_copied = true)
521 switch (copy_file_range (input_desc, NULL, STDOUT_FILENO, NULL,
522 copy_max, 0))
524 case 0:
525 return some_copied;
527 case -1:
528 if (errno == ENOSYS || is_ENOTSUP (errno) || errno == EINVAL
529 || errno == EBADF || errno == EXDEV || errno == ETXTBSY
530 || errno == EPERM)
531 return 0;
532 error (0, errno, "%s", quotef (infile));
533 return -1;
539 main (int argc, char **argv)
541 /* Nonzero if we have ever read standard input. */
542 bool have_read_stdin = false;
544 struct stat stat_buf;
546 /* Variables that are set according to the specified options. */
547 bool number = false;
548 bool number_nonblank = false;
549 bool squeeze_blank = false;
550 bool show_ends = false;
551 bool show_nonprinting = false;
552 bool show_tabs = false;
553 int file_open_mode = O_RDONLY;
555 static struct option const long_options[] =
557 {"number-nonblank", no_argument, NULL, 'b'},
558 {"number", no_argument, NULL, 'n'},
559 {"squeeze-blank", no_argument, NULL, 's'},
560 {"show-nonprinting", no_argument, NULL, 'v'},
561 {"show-ends", no_argument, NULL, 'E'},
562 {"show-tabs", no_argument, NULL, 'T'},
563 {"show-all", no_argument, NULL, 'A'},
564 {GETOPT_HELP_OPTION_DECL},
565 {GETOPT_VERSION_OPTION_DECL},
566 {NULL, 0, NULL, 0}
569 initialize_main (&argc, &argv);
570 set_program_name (argv[0]);
571 setlocale (LC_ALL, "");
572 bindtextdomain (PACKAGE, LOCALEDIR);
573 textdomain (PACKAGE);
575 /* Arrange to close stdout if we exit via the
576 case_GETOPT_HELP_CHAR or case_GETOPT_VERSION_CHAR code.
577 Normally STDOUT_FILENO is used rather than stdout, so
578 close_stdout does nothing. */
579 atexit (close_stdout);
581 /* Parse command line options. */
583 int c;
584 while ((c = getopt_long (argc, argv, "benstuvAET", long_options, NULL))
585 != -1)
587 switch (c)
589 case 'b':
590 number = true;
591 number_nonblank = true;
592 break;
594 case 'e':
595 show_ends = true;
596 show_nonprinting = true;
597 break;
599 case 'n':
600 number = true;
601 break;
603 case 's':
604 squeeze_blank = true;
605 break;
607 case 't':
608 show_tabs = true;
609 show_nonprinting = true;
610 break;
612 case 'u':
613 /* We provide the -u feature unconditionally. */
614 break;
616 case 'v':
617 show_nonprinting = true;
618 break;
620 case 'A':
621 show_nonprinting = true;
622 show_ends = true;
623 show_tabs = true;
624 break;
626 case 'E':
627 show_ends = true;
628 break;
630 case 'T':
631 show_tabs = true;
632 break;
634 case_GETOPT_HELP_CHAR;
636 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
638 default:
639 usage (EXIT_FAILURE);
643 /* Get device, i-node number, and optimal blocksize of output. */
645 if (fstat (STDOUT_FILENO, &stat_buf) < 0)
646 die (EXIT_FAILURE, errno, _("standard output"));
648 /* Optimal size of i/o operations of output. */
649 idx_t outsize = io_blksize (stat_buf);
651 /* Device and I-node number of the output. */
652 dev_t out_dev = stat_buf.st_dev;
653 ino_t out_ino = stat_buf.st_ino;
655 /* True if the output is a regular file. */
656 bool out_isreg = S_ISREG (stat_buf.st_mode) != 0;
658 if (! (number || show_ends || squeeze_blank))
660 file_open_mode |= O_BINARY;
661 xset_binary_mode (STDOUT_FILENO, O_BINARY);
664 /* Main loop. */
666 infile = "-";
667 int argind = optind;
668 bool ok = true;
669 idx_t page_size = getpagesize ();
673 if (argind < argc)
674 infile = argv[argind];
676 bool reading_stdin = STREQ (infile, "-");
677 if (reading_stdin)
679 have_read_stdin = true;
680 input_desc = STDIN_FILENO;
681 if (file_open_mode & O_BINARY)
682 xset_binary_mode (STDIN_FILENO, O_BINARY);
684 else
686 input_desc = open (infile, file_open_mode);
687 if (input_desc < 0)
689 error (0, errno, "%s", quotef (infile));
690 ok = false;
691 continue;
695 if (fstat (input_desc, &stat_buf) < 0)
697 error (0, errno, "%s", quotef (infile));
698 ok = false;
699 goto contin;
702 /* Optimal size of i/o operations of input. */
703 idx_t insize = io_blksize (stat_buf);
705 fdadvise (input_desc, 0, 0, FADVISE_SEQUENTIAL);
707 /* Don't copy a nonempty regular file to itself, as that would
708 merely exhaust the output device. It's better to catch this
709 error earlier rather than later. */
711 if (out_isreg
712 && stat_buf.st_dev == out_dev && stat_buf.st_ino == out_ino
713 && lseek (input_desc, 0, SEEK_CUR) < stat_buf.st_size)
715 error (0, 0, _("%s: input file is output file"), quotef (infile));
716 ok = false;
717 goto contin;
720 /* Pointer to the input buffer. */
721 char *inbuf;
723 /* Select which version of 'cat' to use. If any format-oriented
724 options were given use 'cat'; if not, use 'copy_cat' if it
725 works, 'simple_cat' otherwise. */
727 if (! (number || show_ends || show_nonprinting
728 || show_tabs || squeeze_blank))
730 int copy_cat_status =
731 out_isreg && S_ISREG (stat_buf.st_mode) ? copy_cat () : 0;
732 if (copy_cat_status != 0)
734 inbuf = NULL;
735 ok &= 0 < copy_cat_status;
737 else
739 insize = MAX (insize, outsize);
740 inbuf = xalignalloc (page_size, insize);
741 ok &= simple_cat (inbuf, insize);
744 else
746 /* Allocate, with an extra byte for a newline sentinel. */
747 inbuf = xalignalloc (page_size, insize + 1);
749 /* Why are
750 (OUTSIZE - 1 + INSIZE * 4 + LINE_COUNTER_BUF_LEN)
751 bytes allocated for the output buffer?
753 A test whether output needs to be written is done when the input
754 buffer empties or when a newline appears in the input. After
755 output is written, at most (OUTSIZE - 1) bytes will remain in the
756 buffer. Now INSIZE bytes of input is read. Each input character
757 may grow by a factor of 4 (by the prepending of M-^). If all
758 characters do, and no newlines appear in this block of input, we
759 will have at most (OUTSIZE - 1 + INSIZE * 4) bytes in the buffer.
760 If the last character in the preceding block of input was a
761 newline, a line number may be written (according to the given
762 options) as the first thing in the output buffer. (Done after the
763 new input is read, but before processing of the input begins.)
764 A line number requires seldom more than LINE_COUNTER_BUF_LEN
765 positions.
767 Align the output buffer to a page size boundary, for efficiency
768 on some paging implementations. */
770 idx_t bufsize;
771 if (INT_MULTIPLY_WRAPV (insize, 4, &bufsize)
772 || INT_ADD_WRAPV (bufsize, outsize, &bufsize)
773 || INT_ADD_WRAPV (bufsize, LINE_COUNTER_BUF_LEN - 1, &bufsize))
774 xalloc_die ();
775 char *outbuf = xalignalloc (page_size, bufsize);
777 ok &= cat (inbuf, insize, outbuf, outsize, show_nonprinting,
778 show_tabs, number, number_nonblank, show_ends,
779 squeeze_blank);
781 alignfree (outbuf);
784 alignfree (inbuf);
786 contin:
787 if (!reading_stdin && close (input_desc) < 0)
789 error (0, errno, "%s", quotef (infile));
790 ok = false;
793 while (++argind < argc);
795 if (pending_cr)
797 if (full_write (STDOUT_FILENO, "\r", 1) != 1)
798 die (EXIT_FAILURE, errno, _("write error"));
801 if (have_read_stdin && close (STDIN_FILENO) < 0)
802 die (EXIT_FAILURE, errno, _("closing standard input"));
804 return ok ? EXIT_SUCCESS : EXIT_FAILURE;