1 /* tac - concatenate and print files in reverse
2 Copyright (C) 1988-2023 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Jay Lepreau (lepreau@cs.utah.edu).
18 GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */
20 /* Copy each FILE, or the standard input if none are given or when a
21 FILE name of "-" is encountered, to the standard output with the
22 order of the records reversed. The records are separated by
23 instances of a string, or a newline if none is given. By default, the
24 separator string is attached to the end of the record that it
28 -b, --before The separator is attached to the beginning
29 of the record that it precedes in the file.
30 -r, --regex The separator is a regular expression.
31 -s, --separator=separator Use SEPARATOR as the record separator.
33 To reverse a file byte by byte, use (in bash, ksh, or sh):
41 #include <sys/types.h>
46 #include "filenamecat.h"
47 #include "safe-read.h"
49 #include "xbinary-io.h"
51 /* The official name of this program (e.g., no 'g' prefix). */
52 #define PROGRAM_NAME "tac"
55 proper_name ("Jay Lepreau"), \
56 proper_name ("David MacKenzie")
58 #if defined __MSDOS__ || defined _WIN32
59 /* Define this to non-zero on systems for which the regular mechanism
60 (of unlinking an open file and expecting to be able to write, seek
61 back to the beginning, then reread it) doesn't work. E.g., on Windows
63 # define DONT_UNLINK_WHILE_OPEN 1
67 #ifndef DEFAULT_TMPDIR
68 # define DEFAULT_TMPDIR "/tmp"
71 /* The number of bytes per atomic read. */
72 #define INITIAL_READSIZE 8192
74 /* The number of bytes per atomic write. */
75 #define WRITESIZE 8192
77 /* The string that separates the records of the file. */
78 static char const *separator
;
80 /* True if we have ever read standard input. */
81 static bool have_read_stdin
= false;
83 /* If true, print 'separator' along with the record preceding it
84 in the file; otherwise with the record following it. */
85 static bool separator_ends_record
;
87 /* 0 if 'separator' is to be matched as a regular expression;
88 otherwise, the length of 'separator', used as a sentinel to
90 static size_t sentinel_length
;
92 /* The length of a match with 'separator'. If 'sentinel_length' is 0,
93 'match_length' is computed every time a match succeeds;
94 otherwise, it is simply the length of 'separator'. */
95 static size_t match_length
;
97 /* The input buffer. */
98 static char *G_buffer
;
100 /* The number of bytes to read at once into 'buffer'. */
101 static size_t read_size
;
103 /* The size of 'buffer'. This is read_size * 2 + sentinel_length + 2.
104 The extra 2 bytes allow 'past_end' to have a value beyond the
105 end of 'G_buffer' and 'match_start' to run off the front of 'G_buffer'. */
106 static size_t G_buffer_size
;
108 /* The compiled regular expression representing 'separator'. */
109 static struct re_pattern_buffer compiled_separator
;
110 static char compiled_separator_fastmap
[UCHAR_MAX
+ 1];
111 static struct re_registers regs
;
113 static struct option
const longopts
[] =
115 {"before", no_argument
, nullptr, 'b'},
116 {"regex", no_argument
, nullptr, 'r'},
117 {"separator", required_argument
, nullptr, 's'},
118 {GETOPT_HELP_OPTION_DECL
},
119 {GETOPT_VERSION_OPTION_DECL
},
120 {nullptr, 0, nullptr, 0}
126 if (status
!= EXIT_SUCCESS
)
131 Usage: %s [OPTION]... [FILE]...\n\
135 Write each FILE to standard output, last line first.\n\
139 emit_mandatory_arg_note ();
142 -b, --before attach the separator before instead of after\n\
143 -r, --regex interpret the separator as a regular expression\n\
144 -s, --separator=STRING use STRING as the separator instead of newline\n\
146 fputs (HELP_OPTION_DESCRIPTION
, stdout
);
147 fputs (VERSION_OPTION_DESCRIPTION
, stdout
);
148 emit_ancillary_info (PROGRAM_NAME
);
153 /* Print the characters from START to PAST_END - 1.
154 If START is null, just flush the buffer. */
157 output (char const *start
, char const *past_end
)
159 static char buffer
[WRITESIZE
];
160 static size_t bytes_in_buffer
= 0;
161 size_t bytes_to_add
= past_end
- start
;
162 size_t bytes_available
= WRITESIZE
- bytes_in_buffer
;
166 fwrite (buffer
, 1, bytes_in_buffer
, stdout
);
171 /* Write out as many full buffers as possible. */
172 while (bytes_to_add
>= bytes_available
)
174 memcpy (buffer
+ bytes_in_buffer
, start
, bytes_available
);
175 bytes_to_add
-= bytes_available
;
176 start
+= bytes_available
;
177 fwrite (buffer
, 1, WRITESIZE
, stdout
);
179 bytes_available
= WRITESIZE
;
182 memcpy (buffer
+ bytes_in_buffer
, start
, bytes_to_add
);
183 bytes_in_buffer
+= bytes_to_add
;
186 /* Print in reverse the file open on descriptor FD for reading FILE.
187 The file is already positioned at FILE_POS, which should be near its end.
188 Return true if successful. */
191 tac_seekable (int input_fd
, char const *file
, off_t file_pos
)
193 /* Pointer to the location in 'G_buffer' where the search for
194 the next separator will begin. */
197 /* Pointer to one past the rightmost character in 'G_buffer' that
198 has not been printed yet. */
201 /* Length of the record growing in 'G_buffer'. */
202 size_t saved_record_size
;
204 /* True if 'output' has not been called yet for any file.
205 Only used when the separator is attached to the preceding record. */
206 bool first_time
= true;
207 char first_char
= *separator
; /* Speed optimization, non-regexp. */
208 char const *separator1
= separator
+ 1; /* Speed optimization, non-regexp. */
209 size_t match_length1
= match_length
- 1; /* Speed optimization, non-regexp. */
211 /* Arrange for the first read to lop off enough to leave the rest of the
212 file a multiple of 'read_size'. Since 'read_size' can change, this may
213 not always hold during the program run, but since it usually will, leave
214 it here for i/o efficiency (page/sector boundaries and all that).
215 Note: the efficiency gain has not been verified. */
216 size_t remainder
= file_pos
% read_size
;
219 file_pos
-= remainder
;
220 if (lseek (input_fd
, file_pos
, SEEK_SET
) < 0)
221 error (0, errno
, _("%s: seek failed"), quotef (file
));
224 /* Scan backward, looking for end of file. This caters to proc-like
225 file systems where the file size is just an estimate. */
226 while ((saved_record_size
= safe_read (input_fd
, G_buffer
, read_size
)) == 0
229 off_t rsize
= read_size
;
230 if (lseek (input_fd
, -rsize
, SEEK_CUR
) < 0)
231 error (0, errno
, _("%s: seek failed"), quotef (file
));
232 file_pos
-= read_size
;
235 /* Now scan forward, looking for end of file. */
236 while (saved_record_size
== read_size
)
238 size_t nread
= safe_read (input_fd
, G_buffer
, read_size
);
241 saved_record_size
= nread
;
242 if (saved_record_size
== SAFE_READ_ERROR
)
247 if (saved_record_size
== SAFE_READ_ERROR
)
249 error (0, errno
, _("%s: read error"), quotef (file
));
253 match_start
= past_end
= G_buffer
+ saved_record_size
;
254 /* For non-regexp search, move past impossible positions for a match. */
256 match_start
-= match_length1
;
260 /* Search backward from 'match_start' - 1 to 'G_buffer' for a match
261 with 'separator'; for speed, use strncmp if 'separator' contains no
263 If the match succeeds, set 'match_start' to point to the start of
264 the match and 'match_length' to the length of the match.
265 Otherwise, make 'match_start' < 'G_buffer'. */
266 if (sentinel_length
== 0)
268 size_t i
= match_start
- G_buffer
;
270 regoff_t range
= 1 - ri
;
274 error (EXIT_FAILURE
, 0, _("record too large"));
277 || ((ret
= re_search (&compiled_separator
, G_buffer
,
278 i
, i
- 1, range
, ®s
))
280 match_start
= G_buffer
- 1;
282 error (EXIT_FAILURE
, 0,
283 _("error in regular expression search"));
286 match_start
= G_buffer
+ regs
.start
[0];
287 match_length
= regs
.end
[0] - regs
.start
[0];
292 /* 'match_length' is constant for non-regexp boundaries. */
293 while (*--match_start
!= first_char
294 || (match_length1
&& !STREQ_LEN (match_start
+ 1, separator1
,
299 /* Check whether we backed off the front of 'G_buffer' without finding
300 a match for 'separator'. */
301 if (match_start
< G_buffer
)
305 /* Hit the beginning of the file; print the remaining record. */
306 output (G_buffer
, past_end
);
310 saved_record_size
= past_end
- G_buffer
;
311 if (saved_record_size
> read_size
)
313 /* 'G_buffer_size' is about twice 'read_size', so since
314 we want to read in another 'read_size' bytes before
315 the data already in 'G_buffer', we need to increase
318 size_t offset
= sentinel_length
? sentinel_length
: 1;
319 size_t old_G_buffer_size
= G_buffer_size
;
322 G_buffer_size
= read_size
* 2 + sentinel_length
+ 2;
323 if (G_buffer_size
< old_G_buffer_size
)
325 newbuffer
= xrealloc (G_buffer
- offset
, G_buffer_size
);
327 G_buffer
= newbuffer
;
330 /* Back up to the start of the next bufferfull of the file. */
331 if (file_pos
>= read_size
)
332 file_pos
-= read_size
;
335 read_size
= file_pos
;
338 if (lseek (input_fd
, file_pos
, SEEK_SET
) < 0)
339 error (0, errno
, _("%s: seek failed"), quotef (file
));
341 /* Shift the pending record data right to make room for the new.
342 The source and destination regions probably overlap. */
343 memmove (G_buffer
+ read_size
, G_buffer
, saved_record_size
);
344 past_end
= G_buffer
+ read_size
+ saved_record_size
;
345 /* For non-regexp searches, avoid unnecessary scanning. */
347 match_start
= G_buffer
+ read_size
;
349 match_start
= past_end
;
351 if (safe_read (input_fd
, G_buffer
, read_size
) != read_size
)
353 error (0, errno
, _("%s: read error"), quotef (file
));
359 /* Found a match of 'separator'. */
360 if (separator_ends_record
)
362 char *match_end
= match_start
+ match_length
;
364 /* If this match of 'separator' isn't at the end of the
365 file, print the record. */
366 if (!first_time
|| match_end
!= past_end
)
367 output (match_end
, past_end
);
368 past_end
= match_end
;
373 output (match_start
, past_end
);
374 past_end
= match_start
;
377 /* For non-regex matching, we can back up. */
378 if (sentinel_length
> 0)
379 match_start
-= match_length
- 1;
384 #if DONT_UNLINK_WHILE_OPEN
386 /* FIXME-someday: remove all of this DONT_UNLINK_WHILE_OPEN junk.
387 Using atexit like this is wrong, since it can fail
388 when called e.g. 32 or more times.
389 But this isn't a big deal, since the code is used only on WOE/DOS
390 systems, and few people invoke tac on that many nonseekable files. */
392 static char const *file_to_remove
;
393 static FILE *fp_to_close
;
396 unlink_tempfile (void)
398 fclose (fp_to_close
);
399 unlink (file_to_remove
);
403 record_or_unlink_tempfile (char const *fn
, FILE *fp
)
409 atexit (unlink_tempfile
);
416 record_or_unlink_tempfile (char const *fn
, MAYBE_UNUSED
FILE *fp
)
423 /* A wrapper around mkstemp that gives us both an open stream pointer,
424 FP, and the corresponding FILE_NAME. Always return the same FP/name
425 pair, rewinding/truncating it upon each reuse. */
427 temp_stream (FILE **fp
, char **file_name
)
429 static char *tempfile
= nullptr;
431 if (tempfile
== nullptr)
433 char const *t
= getenv ("TMPDIR");
434 char const *tempdir
= t
? t
: DEFAULT_TMPDIR
;
435 tempfile
= mfile_name_concat (tempdir
, "tacXXXXXX", nullptr);
436 if (tempdir
== nullptr)
438 error (0, 0, _("memory exhausted"));
442 /* FIXME: there's a small window between a successful mkstemp call
443 and the unlink that's performed by record_or_unlink_tempfile.
444 If we're interrupted in that interval, this code fails to remove
445 the temporary file. On systems that define DONT_UNLINK_WHILE_OPEN,
446 the window is much larger -- it extends to the atexit-called
448 FIXME: clean up upon fatal signal. Don't block them, in case
449 $TMPFILE is a remote file system. */
451 int fd
= mkstemp (tempfile
);
454 error (0, errno
, _("failed to create temporary file in %s"),
459 tmp_fp
= fdopen (fd
, (O_BINARY
? "w+b" : "w+"));
462 error (0, errno
, _("failed to open %s for writing"),
472 record_or_unlink_tempfile (tempfile
, tmp_fp
);
477 if (fseeko (tmp_fp
, 0, SEEK_SET
) < 0
478 || ftruncate (fileno (tmp_fp
), 0) < 0)
480 error (0, errno
, _("failed to rewind stream for %s"),
487 *file_name
= tempfile
;
491 /* Copy from file descriptor INPUT_FD (corresponding to the named FILE) to
492 a temporary file, and set *G_TMP and *G_TEMPFILE to the resulting stream
493 and file name. Return the number of bytes copied, or -1 on error. */
496 copy_to_temp (FILE **g_tmp
, char **g_tempfile
, int input_fd
, char const *file
)
500 uintmax_t bytes_copied
= 0;
501 if (!temp_stream (&fp
, &file_name
))
506 size_t bytes_read
= safe_read (input_fd
, G_buffer
, read_size
);
509 if (bytes_read
== SAFE_READ_ERROR
)
511 error (0, errno
, _("%s: read error"), quotef (file
));
515 if (fwrite (G_buffer
, 1, bytes_read
, fp
) != bytes_read
)
517 error (0, errno
, _("%s: write error"), quotef (file_name
));
521 /* Implicitly <= OFF_T_MAX due to preceding fwrite(),
522 but unsigned type used to avoid compiler warnings
523 not aware of this fact. */
524 bytes_copied
+= bytes_read
;
527 if (fflush (fp
) != 0)
529 error (0, errno
, _("%s: write error"), quotef (file_name
));
534 *g_tempfile
= file_name
;
538 /* Copy INPUT_FD to a temporary, then tac that file.
539 Return true if successful. */
542 tac_nonseekable (int input_fd
, char const *file
)
546 off_t bytes_copied
= copy_to_temp (&tmp_stream
, &tmp_file
, input_fd
, file
);
547 if (bytes_copied
< 0)
550 bool ok
= tac_seekable (fileno (tmp_stream
), tmp_file
, bytes_copied
);
554 /* Print FILE in reverse, copying it to a temporary
555 file first if it is not seekable.
556 Return true if successful. */
559 tac_file (char const *filename
)
564 bool is_stdin
= STREQ (filename
, "-");
568 have_read_stdin
= true;
570 filename
= _("standard input");
571 xset_binary_mode (STDIN_FILENO
, O_BINARY
);
575 fd
= open (filename
, O_RDONLY
| O_BINARY
);
578 error (0, errno
, _("failed to open %s for reading"),
584 file_size
= lseek (fd
, 0, SEEK_END
);
586 ok
= (file_size
< 0 || isatty (fd
)
587 ? tac_nonseekable (fd
, filename
)
588 : tac_seekable (fd
, filename
, file_size
));
590 if (!is_stdin
&& close (fd
) != 0)
592 error (0, errno
, _("%s: read error"), quotef (filename
));
599 main (int argc
, char **argv
)
601 char const *error_message
; /* Return value from re_compile_pattern. */
604 size_t half_buffer_size
;
606 /* Initializer for file_list if no file-arguments
607 were specified on the command line. */
608 static char const *const default_file_list
[] = {"-", nullptr};
609 char const *const *file
;
611 initialize_main (&argc
, &argv
);
612 set_program_name (argv
[0]);
613 setlocale (LC_ALL
, "");
614 bindtextdomain (PACKAGE
, LOCALEDIR
);
615 textdomain (PACKAGE
);
617 atexit (close_stdout
);
621 separator_ends_record
= true;
623 while ((optc
= getopt_long (argc
, argv
, "brs:", longopts
, nullptr)) != -1)
628 separator_ends_record
= false;
636 case_GETOPT_HELP_CHAR
;
637 case_GETOPT_VERSION_CHAR (PROGRAM_NAME
, AUTHORS
);
639 usage (EXIT_FAILURE
);
643 if (sentinel_length
== 0)
646 error (EXIT_FAILURE
, 0, _("separator cannot be empty"));
648 compiled_separator
.buffer
= nullptr;
649 compiled_separator
.allocated
= 0;
650 compiled_separator
.fastmap
= compiled_separator_fastmap
;
651 compiled_separator
.translate
= nullptr;
652 error_message
= re_compile_pattern (separator
, strlen (separator
),
653 &compiled_separator
);
655 error (EXIT_FAILURE
, 0, "%s", (error_message
));
658 match_length
= sentinel_length
= *separator
? strlen (separator
) : 1;
660 read_size
= INITIAL_READSIZE
;
661 while (sentinel_length
>= read_size
/ 2)
663 if (SIZE_MAX
/ 2 < read_size
)
667 half_buffer_size
= read_size
+ sentinel_length
+ 1;
668 G_buffer_size
= 2 * half_buffer_size
;
669 if (! (read_size
< half_buffer_size
&& half_buffer_size
< G_buffer_size
))
671 G_buffer
= xmalloc (G_buffer_size
);
674 memcpy (G_buffer
, separator
, sentinel_length
+ 1);
675 G_buffer
+= sentinel_length
;
682 file
= (optind
< argc
683 ? (char const *const *) &argv
[optind
]
684 : default_file_list
);
686 xset_binary_mode (STDOUT_FILENO
, O_BINARY
);
690 for (size_t i
= 0; file
[i
]; ++i
)
691 ok
&= tac_file (file
[i
]);
694 /* Flush the output buffer. */
695 output ((char *) nullptr, (char *) nullptr);
697 if (have_read_stdin
&& close (STDIN_FILENO
) < 0)
699 error (0, errno
, "-");
703 main_exit (ok
? EXIT_SUCCESS
: EXIT_FAILURE
);