maint: go back to using ‘error’
[coreutils.git] / src / tac.c
blobe52d4b7f26d93f95fda37f7ade95fb2769d8cce3
1 /* tac - concatenate and print files in reverse
2 Copyright (C) 1988-2023 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Jay Lepreau (lepreau@cs.utah.edu).
18 GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */
20 /* Copy each FILE, or the standard input if none are given or when a
21 FILE name of "-" is encountered, to the standard output with the
22 order of the records reversed. The records are separated by
23 instances of a string, or a newline if none is given. By default, the
24 separator string is attached to the end of the record that it
25 follows in the file.
27 Options:
28 -b, --before The separator is attached to the beginning
29 of the record that it precedes in the file.
30 -r, --regex The separator is a regular expression.
31 -s, --separator=separator Use SEPARATOR as the record separator.
33 To reverse a file byte by byte, use (in bash, ksh, or sh):
34 tac -r -s '.\|
35 ' file */
37 #include <config.h>
39 #include <stdio.h>
40 #include <getopt.h>
41 #include <sys/types.h>
42 #include "system.h"
44 #include <regex.h>
46 #include "filenamecat.h"
47 #include "safe-read.h"
48 #include "stdlib--.h"
49 #include "xbinary-io.h"
51 /* The official name of this program (e.g., no 'g' prefix). */
52 #define PROGRAM_NAME "tac"
54 #define AUTHORS \
55 proper_name ("Jay Lepreau"), \
56 proper_name ("David MacKenzie")
58 #if defined __MSDOS__ || defined _WIN32
59 /* Define this to non-zero on systems for which the regular mechanism
60 (of unlinking an open file and expecting to be able to write, seek
61 back to the beginning, then reread it) doesn't work. E.g., on Windows
62 and DOS systems. */
63 # define DONT_UNLINK_WHILE_OPEN 1
64 #endif
67 #ifndef DEFAULT_TMPDIR
68 # define DEFAULT_TMPDIR "/tmp"
69 #endif
71 /* The number of bytes per atomic read. */
72 #define INITIAL_READSIZE 8192
74 /* The number of bytes per atomic write. */
75 #define WRITESIZE 8192
77 /* The string that separates the records of the file. */
78 static char const *separator;
80 /* True if we have ever read standard input. */
81 static bool have_read_stdin = false;
83 /* If true, print 'separator' along with the record preceding it
84 in the file; otherwise with the record following it. */
85 static bool separator_ends_record;
87 /* 0 if 'separator' is to be matched as a regular expression;
88 otherwise, the length of 'separator', used as a sentinel to
89 stop the search. */
90 static size_t sentinel_length;
92 /* The length of a match with 'separator'. If 'sentinel_length' is 0,
93 'match_length' is computed every time a match succeeds;
94 otherwise, it is simply the length of 'separator'. */
95 static size_t match_length;
97 /* The input buffer. */
98 static char *G_buffer;
100 /* The number of bytes to read at once into 'buffer'. */
101 static size_t read_size;
103 /* The size of 'buffer'. This is read_size * 2 + sentinel_length + 2.
104 The extra 2 bytes allow 'past_end' to have a value beyond the
105 end of 'G_buffer' and 'match_start' to run off the front of 'G_buffer'. */
106 static size_t G_buffer_size;
108 /* The compiled regular expression representing 'separator'. */
109 static struct re_pattern_buffer compiled_separator;
110 static char compiled_separator_fastmap[UCHAR_MAX + 1];
111 static struct re_registers regs;
113 static struct option const longopts[] =
115 {"before", no_argument, nullptr, 'b'},
116 {"regex", no_argument, nullptr, 'r'},
117 {"separator", required_argument, nullptr, 's'},
118 {GETOPT_HELP_OPTION_DECL},
119 {GETOPT_VERSION_OPTION_DECL},
120 {nullptr, 0, nullptr, 0}
123 void
124 usage (int status)
126 if (status != EXIT_SUCCESS)
127 emit_try_help ();
128 else
130 printf (_("\
131 Usage: %s [OPTION]... [FILE]...\n\
133 program_name);
134 fputs (_("\
135 Write each FILE to standard output, last line first.\n\
136 "), stdout);
138 emit_stdin_note ();
139 emit_mandatory_arg_note ();
141 fputs (_("\
142 -b, --before attach the separator before instead of after\n\
143 -r, --regex interpret the separator as a regular expression\n\
144 -s, --separator=STRING use STRING as the separator instead of newline\n\
145 "), stdout);
146 fputs (HELP_OPTION_DESCRIPTION, stdout);
147 fputs (VERSION_OPTION_DESCRIPTION, stdout);
148 emit_ancillary_info (PROGRAM_NAME);
150 exit (status);
153 /* Print the characters from START to PAST_END - 1.
154 If START is null, just flush the buffer. */
156 static void
157 output (char const *start, char const *past_end)
159 static char buffer[WRITESIZE];
160 static size_t bytes_in_buffer = 0;
161 size_t bytes_to_add = past_end - start;
162 size_t bytes_available = WRITESIZE - bytes_in_buffer;
164 if (start == 0)
166 fwrite (buffer, 1, bytes_in_buffer, stdout);
167 bytes_in_buffer = 0;
168 return;
171 /* Write out as many full buffers as possible. */
172 while (bytes_to_add >= bytes_available)
174 memcpy (buffer + bytes_in_buffer, start, bytes_available);
175 bytes_to_add -= bytes_available;
176 start += bytes_available;
177 fwrite (buffer, 1, WRITESIZE, stdout);
178 bytes_in_buffer = 0;
179 bytes_available = WRITESIZE;
182 memcpy (buffer + bytes_in_buffer, start, bytes_to_add);
183 bytes_in_buffer += bytes_to_add;
186 /* Print in reverse the file open on descriptor FD for reading FILE.
187 The file is already positioned at FILE_POS, which should be near its end.
188 Return true if successful. */
190 static bool
191 tac_seekable (int input_fd, char const *file, off_t file_pos)
193 /* Pointer to the location in 'G_buffer' where the search for
194 the next separator will begin. */
195 char *match_start;
197 /* Pointer to one past the rightmost character in 'G_buffer' that
198 has not been printed yet. */
199 char *past_end;
201 /* Length of the record growing in 'G_buffer'. */
202 size_t saved_record_size;
204 /* True if 'output' has not been called yet for any file.
205 Only used when the separator is attached to the preceding record. */
206 bool first_time = true;
207 char first_char = *separator; /* Speed optimization, non-regexp. */
208 char const *separator1 = separator + 1; /* Speed optimization, non-regexp. */
209 size_t match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
211 /* Arrange for the first read to lop off enough to leave the rest of the
212 file a multiple of 'read_size'. Since 'read_size' can change, this may
213 not always hold during the program run, but since it usually will, leave
214 it here for i/o efficiency (page/sector boundaries and all that).
215 Note: the efficiency gain has not been verified. */
216 size_t remainder = file_pos % read_size;
217 if (remainder != 0)
219 file_pos -= remainder;
220 if (lseek (input_fd, file_pos, SEEK_SET) < 0)
221 error (0, errno, _("%s: seek failed"), quotef (file));
224 /* Scan backward, looking for end of file. This caters to proc-like
225 file systems where the file size is just an estimate. */
226 while ((saved_record_size = safe_read (input_fd, G_buffer, read_size)) == 0
227 && file_pos != 0)
229 off_t rsize = read_size;
230 if (lseek (input_fd, -rsize, SEEK_CUR) < 0)
231 error (0, errno, _("%s: seek failed"), quotef (file));
232 file_pos -= read_size;
235 /* Now scan forward, looking for end of file. */
236 while (saved_record_size == read_size)
238 size_t nread = safe_read (input_fd, G_buffer, read_size);
239 if (nread == 0)
240 break;
241 saved_record_size = nread;
242 if (saved_record_size == SAFE_READ_ERROR)
243 break;
244 file_pos += nread;
247 if (saved_record_size == SAFE_READ_ERROR)
249 error (0, errno, _("%s: read error"), quotef (file));
250 return false;
253 match_start = past_end = G_buffer + saved_record_size;
254 /* For non-regexp search, move past impossible positions for a match. */
255 if (sentinel_length)
256 match_start -= match_length1;
258 while (true)
260 /* Search backward from 'match_start' - 1 to 'G_buffer' for a match
261 with 'separator'; for speed, use strncmp if 'separator' contains no
262 metacharacters.
263 If the match succeeds, set 'match_start' to point to the start of
264 the match and 'match_length' to the length of the match.
265 Otherwise, make 'match_start' < 'G_buffer'. */
266 if (sentinel_length == 0)
268 size_t i = match_start - G_buffer;
269 regoff_t ri = i;
270 regoff_t range = 1 - ri;
271 regoff_t ret;
273 if (1 < range)
274 error (EXIT_FAILURE, 0, _("record too large"));
276 if (range == 1
277 || ((ret = re_search (&compiled_separator, G_buffer,
278 i, i - 1, range, &regs))
279 == -1))
280 match_start = G_buffer - 1;
281 else if (ret == -2)
282 error (EXIT_FAILURE, 0,
283 _("error in regular expression search"));
284 else
286 match_start = G_buffer + regs.start[0];
287 match_length = regs.end[0] - regs.start[0];
290 else
292 /* 'match_length' is constant for non-regexp boundaries. */
293 while (*--match_start != first_char
294 || (match_length1 && !STREQ_LEN (match_start + 1, separator1,
295 match_length1)))
296 /* Do nothing. */ ;
299 /* Check whether we backed off the front of 'G_buffer' without finding
300 a match for 'separator'. */
301 if (match_start < G_buffer)
303 if (file_pos == 0)
305 /* Hit the beginning of the file; print the remaining record. */
306 output (G_buffer, past_end);
307 return true;
310 saved_record_size = past_end - G_buffer;
311 if (saved_record_size > read_size)
313 /* 'G_buffer_size' is about twice 'read_size', so since
314 we want to read in another 'read_size' bytes before
315 the data already in 'G_buffer', we need to increase
316 'G_buffer_size'. */
317 char *newbuffer;
318 size_t offset = sentinel_length ? sentinel_length : 1;
319 size_t old_G_buffer_size = G_buffer_size;
321 read_size *= 2;
322 G_buffer_size = read_size * 2 + sentinel_length + 2;
323 if (G_buffer_size < old_G_buffer_size)
324 xalloc_die ();
325 newbuffer = xrealloc (G_buffer - offset, G_buffer_size);
326 newbuffer += offset;
327 G_buffer = newbuffer;
330 /* Back up to the start of the next bufferfull of the file. */
331 if (file_pos >= read_size)
332 file_pos -= read_size;
333 else
335 read_size = file_pos;
336 file_pos = 0;
338 if (lseek (input_fd, file_pos, SEEK_SET) < 0)
339 error (0, errno, _("%s: seek failed"), quotef (file));
341 /* Shift the pending record data right to make room for the new.
342 The source and destination regions probably overlap. */
343 memmove (G_buffer + read_size, G_buffer, saved_record_size);
344 past_end = G_buffer + read_size + saved_record_size;
345 /* For non-regexp searches, avoid unnecessary scanning. */
346 if (sentinel_length)
347 match_start = G_buffer + read_size;
348 else
349 match_start = past_end;
351 if (safe_read (input_fd, G_buffer, read_size) != read_size)
353 error (0, errno, _("%s: read error"), quotef (file));
354 return false;
357 else
359 /* Found a match of 'separator'. */
360 if (separator_ends_record)
362 char *match_end = match_start + match_length;
364 /* If this match of 'separator' isn't at the end of the
365 file, print the record. */
366 if (!first_time || match_end != past_end)
367 output (match_end, past_end);
368 past_end = match_end;
369 first_time = false;
371 else
373 output (match_start, past_end);
374 past_end = match_start;
377 /* For non-regex matching, we can back up. */
378 if (sentinel_length > 0)
379 match_start -= match_length - 1;
384 #if DONT_UNLINK_WHILE_OPEN
386 /* FIXME-someday: remove all of this DONT_UNLINK_WHILE_OPEN junk.
387 Using atexit like this is wrong, since it can fail
388 when called e.g. 32 or more times.
389 But this isn't a big deal, since the code is used only on WOE/DOS
390 systems, and few people invoke tac on that many nonseekable files. */
392 static char const *file_to_remove;
393 static FILE *fp_to_close;
395 static void
396 unlink_tempfile (void)
398 fclose (fp_to_close);
399 unlink (file_to_remove);
402 static void
403 record_or_unlink_tempfile (char const *fn, FILE *fp)
405 if (!file_to_remove)
407 file_to_remove = fn;
408 fp_to_close = fp;
409 atexit (unlink_tempfile);
413 #else
415 static void
416 record_or_unlink_tempfile (char const *fn, MAYBE_UNUSED FILE *fp)
418 unlink (fn);
421 #endif
423 /* A wrapper around mkstemp that gives us both an open stream pointer,
424 FP, and the corresponding FILE_NAME. Always return the same FP/name
425 pair, rewinding/truncating it upon each reuse. */
426 static bool
427 temp_stream (FILE **fp, char **file_name)
429 static char *tempfile = nullptr;
430 static FILE *tmp_fp;
431 if (tempfile == nullptr)
433 char const *t = getenv ("TMPDIR");
434 char const *tempdir = t ? t : DEFAULT_TMPDIR;
435 tempfile = mfile_name_concat (tempdir, "tacXXXXXX", nullptr);
436 if (tempdir == nullptr)
438 error (0, 0, _("memory exhausted"));
439 return false;
442 /* FIXME: there's a small window between a successful mkstemp call
443 and the unlink that's performed by record_or_unlink_tempfile.
444 If we're interrupted in that interval, this code fails to remove
445 the temporary file. On systems that define DONT_UNLINK_WHILE_OPEN,
446 the window is much larger -- it extends to the atexit-called
447 unlink_tempfile.
448 FIXME: clean up upon fatal signal. Don't block them, in case
449 $TMPFILE is a remote file system. */
451 int fd = mkstemp (tempfile);
452 if (fd < 0)
454 error (0, errno, _("failed to create temporary file in %s"),
455 quoteaf (tempdir));
456 goto Reset;
459 tmp_fp = fdopen (fd, (O_BINARY ? "w+b" : "w+"));
460 if (! tmp_fp)
462 error (0, errno, _("failed to open %s for writing"),
463 quoteaf (tempfile));
464 close (fd);
465 unlink (tempfile);
466 Reset:
467 free (tempfile);
468 tempfile = nullptr;
469 return false;
472 record_or_unlink_tempfile (tempfile, tmp_fp);
474 else
476 clearerr (tmp_fp);
477 if (fseeko (tmp_fp, 0, SEEK_SET) < 0
478 || ftruncate (fileno (tmp_fp), 0) < 0)
480 error (0, errno, _("failed to rewind stream for %s"),
481 quoteaf (tempfile));
482 return false;
486 *fp = tmp_fp;
487 *file_name = tempfile;
488 return true;
491 /* Copy from file descriptor INPUT_FD (corresponding to the named FILE) to
492 a temporary file, and set *G_TMP and *G_TEMPFILE to the resulting stream
493 and file name. Return the number of bytes copied, or -1 on error. */
495 static off_t
496 copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file)
498 FILE *fp;
499 char *file_name;
500 uintmax_t bytes_copied = 0;
501 if (!temp_stream (&fp, &file_name))
502 return -1;
504 while (true)
506 size_t bytes_read = safe_read (input_fd, G_buffer, read_size);
507 if (bytes_read == 0)
508 break;
509 if (bytes_read == SAFE_READ_ERROR)
511 error (0, errno, _("%s: read error"), quotef (file));
512 return -1;
515 if (fwrite (G_buffer, 1, bytes_read, fp) != bytes_read)
517 error (0, errno, _("%s: write error"), quotef (file_name));
518 return -1;
521 /* Implicitly <= OFF_T_MAX due to preceding fwrite(),
522 but unsigned type used to avoid compiler warnings
523 not aware of this fact. */
524 bytes_copied += bytes_read;
527 if (fflush (fp) != 0)
529 error (0, errno, _("%s: write error"), quotef (file_name));
530 return -1;
533 *g_tmp = fp;
534 *g_tempfile = file_name;
535 return bytes_copied;
538 /* Copy INPUT_FD to a temporary, then tac that file.
539 Return true if successful. */
541 static bool
542 tac_nonseekable (int input_fd, char const *file)
544 FILE *tmp_stream;
545 char *tmp_file;
546 off_t bytes_copied = copy_to_temp (&tmp_stream, &tmp_file, input_fd, file);
547 if (bytes_copied < 0)
548 return false;
550 bool ok = tac_seekable (fileno (tmp_stream), tmp_file, bytes_copied);
551 return ok;
554 /* Print FILE in reverse, copying it to a temporary
555 file first if it is not seekable.
556 Return true if successful. */
558 static bool
559 tac_file (char const *filename)
561 bool ok;
562 off_t file_size;
563 int fd;
564 bool is_stdin = STREQ (filename, "-");
566 if (is_stdin)
568 have_read_stdin = true;
569 fd = STDIN_FILENO;
570 filename = _("standard input");
571 xset_binary_mode (STDIN_FILENO, O_BINARY);
573 else
575 fd = open (filename, O_RDONLY | O_BINARY);
576 if (fd < 0)
578 error (0, errno, _("failed to open %s for reading"),
579 quoteaf (filename));
580 return false;
584 file_size = lseek (fd, 0, SEEK_END);
586 ok = (file_size < 0 || isatty (fd)
587 ? tac_nonseekable (fd, filename)
588 : tac_seekable (fd, filename, file_size));
590 if (!is_stdin && close (fd) != 0)
592 error (0, errno, _("%s: read error"), quotef (filename));
593 ok = false;
595 return ok;
599 main (int argc, char **argv)
601 char const *error_message; /* Return value from re_compile_pattern. */
602 int optc;
603 bool ok;
604 size_t half_buffer_size;
606 /* Initializer for file_list if no file-arguments
607 were specified on the command line. */
608 static char const *const default_file_list[] = {"-", nullptr};
609 char const *const *file;
611 initialize_main (&argc, &argv);
612 set_program_name (argv[0]);
613 setlocale (LC_ALL, "");
614 bindtextdomain (PACKAGE, LOCALEDIR);
615 textdomain (PACKAGE);
617 atexit (close_stdout);
619 separator = "\n";
620 sentinel_length = 1;
621 separator_ends_record = true;
623 while ((optc = getopt_long (argc, argv, "brs:", longopts, nullptr)) != -1)
625 switch (optc)
627 case 'b':
628 separator_ends_record = false;
629 break;
630 case 'r':
631 sentinel_length = 0;
632 break;
633 case 's':
634 separator = optarg;
635 break;
636 case_GETOPT_HELP_CHAR;
637 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
638 default:
639 usage (EXIT_FAILURE);
643 if (sentinel_length == 0)
645 if (*separator == 0)
646 error (EXIT_FAILURE, 0, _("separator cannot be empty"));
648 compiled_separator.buffer = nullptr;
649 compiled_separator.allocated = 0;
650 compiled_separator.fastmap = compiled_separator_fastmap;
651 compiled_separator.translate = nullptr;
652 error_message = re_compile_pattern (separator, strlen (separator),
653 &compiled_separator);
654 if (error_message)
655 error (EXIT_FAILURE, 0, "%s", (error_message));
657 else
658 match_length = sentinel_length = *separator ? strlen (separator) : 1;
660 read_size = INITIAL_READSIZE;
661 while (sentinel_length >= read_size / 2)
663 if (SIZE_MAX / 2 < read_size)
664 xalloc_die ();
665 read_size *= 2;
667 half_buffer_size = read_size + sentinel_length + 1;
668 G_buffer_size = 2 * half_buffer_size;
669 if (! (read_size < half_buffer_size && half_buffer_size < G_buffer_size))
670 xalloc_die ();
671 G_buffer = xmalloc (G_buffer_size);
672 if (sentinel_length)
674 memcpy (G_buffer, separator, sentinel_length + 1);
675 G_buffer += sentinel_length;
677 else
679 ++G_buffer;
682 file = (optind < argc
683 ? (char const *const *) &argv[optind]
684 : default_file_list);
686 xset_binary_mode (STDOUT_FILENO, O_BINARY);
689 ok = true;
690 for (size_t i = 0; file[i]; ++i)
691 ok &= tac_file (file[i]);
694 /* Flush the output buffer. */
695 output ((char *) nullptr, (char *) nullptr);
697 if (have_read_stdin && close (STDIN_FILENO) < 0)
699 error (0, errno, "-");
700 ok = false;
703 main_exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);