1 /* tac - concatenate and print files in reverse
2 Copyright (C) 1988-2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Jay Lepreau (lepreau@cs.utah.edu).
18 GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */
20 /* Copy each FILE, or the standard input if none are given or when a
21 FILE name of "-" is encountered, to the standard output with the
22 order of the records reversed. The records are separated by
23 instances of a string, or a newline if none is given. By default, the
24 separator string is attached to the end of the record that it
28 -b, --before The separator is attached to the beginning
29 of the record that it precedes in the file.
30 -r, --regex The separator is a regular expression.
31 -s, --separator=separator Use SEPARATOR as the record separator.
33 To reverse a file byte by byte, use (in bash, ksh, or sh):
41 #include <sys/types.h>
47 #include "filenamecat.h"
50 #include "safe-read.h"
54 /* The official name of this program (e.g., no 'g' prefix). */
55 #define PROGRAM_NAME "tac"
58 proper_name ("Jay Lepreau"), \
59 proper_name ("David MacKenzie")
61 #if defined __MSDOS__ || defined _WIN32
62 /* Define this to non-zero on systems for which the regular mechanism
63 (of unlinking an open file and expecting to be able to write, seek
64 back to the beginning, then reread it) doesn't work. E.g., on Windows
66 # define DONT_UNLINK_WHILE_OPEN 1
70 #ifndef DEFAULT_TMPDIR
71 # define DEFAULT_TMPDIR "/tmp"
74 /* The number of bytes per atomic read. */
75 #define INITIAL_READSIZE 8192
77 /* The number of bytes per atomic write. */
78 #define WRITESIZE 8192
80 /* The string that separates the records of the file. */
81 static char const *separator
;
83 /* True if we have ever read standard input. */
84 static bool have_read_stdin
= false;
86 /* If true, print 'separator' along with the record preceding it
87 in the file; otherwise with the record following it. */
88 static bool separator_ends_record
;
90 /* 0 if 'separator' is to be matched as a regular expression;
91 otherwise, the length of 'separator', used as a sentinel to
93 static size_t sentinel_length
;
95 /* The length of a match with 'separator'. If 'sentinel_length' is 0,
96 'match_length' is computed every time a match succeeds;
97 otherwise, it is simply the length of 'separator'. */
98 static size_t match_length
;
100 /* The input buffer. */
101 static char *G_buffer
;
103 /* The number of bytes to read at once into 'buffer'. */
104 static size_t read_size
;
106 /* The size of 'buffer'. This is read_size * 2 + sentinel_length + 2.
107 The extra 2 bytes allow 'past_end' to have a value beyond the
108 end of 'G_buffer' and 'match_start' to run off the front of 'G_buffer'. */
109 static size_t G_buffer_size
;
111 /* The compiled regular expression representing 'separator'. */
112 static struct re_pattern_buffer compiled_separator
;
113 static char compiled_separator_fastmap
[UCHAR_MAX
+ 1];
114 static struct re_registers regs
;
116 static struct option
const longopts
[] =
118 {"before", no_argument
, NULL
, 'b'},
119 {"regex", no_argument
, NULL
, 'r'},
120 {"separator", required_argument
, NULL
, 's'},
121 {GETOPT_HELP_OPTION_DECL
},
122 {GETOPT_VERSION_OPTION_DECL
},
129 if (status
!= EXIT_SUCCESS
)
134 Usage: %s [OPTION]... [FILE]...\n\
138 Write each FILE to standard output, last line first.\n\
139 With no FILE, or when FILE is -, read standard input.\n\
143 Mandatory arguments to long options are mandatory for short options too.\n\
146 -b, --before attach the separator before instead of after\n\
147 -r, --regex interpret the separator as a regular expression\n\
148 -s, --separator=STRING use STRING as the separator instead of newline\n\
150 fputs (HELP_OPTION_DESCRIPTION
, stdout
);
151 fputs (VERSION_OPTION_DESCRIPTION
, stdout
);
152 emit_ancillary_info ();
157 /* Print the characters from START to PAST_END - 1.
158 If START is NULL, just flush the buffer. */
161 output (const char *start
, const char *past_end
)
163 static char buffer
[WRITESIZE
];
164 static size_t bytes_in_buffer
= 0;
165 size_t bytes_to_add
= past_end
- start
;
166 size_t bytes_available
= WRITESIZE
- bytes_in_buffer
;
170 fwrite (buffer
, 1, bytes_in_buffer
, stdout
);
175 /* Write out as many full buffers as possible. */
176 while (bytes_to_add
>= bytes_available
)
178 memcpy (buffer
+ bytes_in_buffer
, start
, bytes_available
);
179 bytes_to_add
-= bytes_available
;
180 start
+= bytes_available
;
181 fwrite (buffer
, 1, WRITESIZE
, stdout
);
183 bytes_available
= WRITESIZE
;
186 memcpy (buffer
+ bytes_in_buffer
, start
, bytes_to_add
);
187 bytes_in_buffer
+= bytes_to_add
;
190 /* Print in reverse the file open on descriptor FD for reading FILE.
191 Return true if successful. */
194 tac_seekable (int input_fd
, const char *file
)
196 /* Pointer to the location in 'G_buffer' where the search for
197 the next separator will begin. */
200 /* Pointer to one past the rightmost character in 'G_buffer' that
201 has not been printed yet. */
204 /* Length of the record growing in 'G_buffer'. */
205 size_t saved_record_size
;
207 /* Offset in the file of the next read. */
210 /* True if 'output' has not been called yet for any file.
211 Only used when the separator is attached to the preceding record. */
212 bool first_time
= true;
213 char first_char
= *separator
; /* Speed optimization, non-regexp. */
214 char const *separator1
= separator
+ 1; /* Speed optimization, non-regexp. */
215 size_t match_length1
= match_length
- 1; /* Speed optimization, non-regexp. */
217 /* Find the size of the input file. */
218 file_pos
= lseek (input_fd
, 0, SEEK_END
);
220 return true; /* It's an empty file. */
222 /* Arrange for the first read to lop off enough to leave the rest of the
223 file a multiple of 'read_size'. Since 'read_size' can change, this may
224 not always hold during the program run, but since it usually will, leave
225 it here for i/o efficiency (page/sector boundaries and all that).
226 Note: the efficiency gain has not been verified. */
227 saved_record_size
= file_pos
% read_size
;
228 if (saved_record_size
== 0)
229 saved_record_size
= read_size
;
230 file_pos
-= saved_record_size
;
231 /* 'file_pos' now points to the start of the last (probably partial) block
232 in the input file. */
234 if (lseek (input_fd
, file_pos
, SEEK_SET
) < 0)
235 error (0, errno
, _("%s: seek failed"), quotearg_colon (file
));
237 if (safe_read (input_fd
, G_buffer
, saved_record_size
) != saved_record_size
)
239 error (0, errno
, _("%s: read error"), quotearg_colon (file
));
243 match_start
= past_end
= G_buffer
+ saved_record_size
;
244 /* For non-regexp search, move past impossible positions for a match. */
246 match_start
-= match_length1
;
250 /* Search backward from 'match_start' - 1 to 'G_buffer' for a match
251 with 'separator'; for speed, use strncmp if 'separator' contains no
253 If the match succeeds, set 'match_start' to point to the start of
254 the match and 'match_length' to the length of the match.
255 Otherwise, make 'match_start' < 'G_buffer'. */
256 if (sentinel_length
== 0)
258 size_t i
= match_start
- G_buffer
;
260 regoff_t range
= 1 - ri
;
264 error (EXIT_FAILURE
, 0, _("record too large"));
267 || ((ret
= re_search (&compiled_separator
, G_buffer
,
268 i
, i
- 1, range
, ®s
))
270 match_start
= G_buffer
- 1;
273 error (EXIT_FAILURE
, 0,
274 _("error in regular expression search"));
278 match_start
= G_buffer
+ regs
.start
[0];
279 match_length
= regs
.end
[0] - regs
.start
[0];
284 /* 'match_length' is constant for non-regexp boundaries. */
285 while (*--match_start
!= first_char
286 || (match_length1
&& strncmp (match_start
+ 1, separator1
,
291 /* Check whether we backed off the front of 'G_buffer' without finding
292 a match for 'separator'. */
293 if (match_start
< G_buffer
)
297 /* Hit the beginning of the file; print the remaining record. */
298 output (G_buffer
, past_end
);
302 saved_record_size
= past_end
- G_buffer
;
303 if (saved_record_size
> read_size
)
305 /* 'G_buffer_size' is about twice 'read_size', so since
306 we want to read in another 'read_size' bytes before
307 the data already in 'G_buffer', we need to increase
310 size_t offset
= sentinel_length
? sentinel_length
: 1;
311 ptrdiff_t match_start_offset
= match_start
- G_buffer
;
312 ptrdiff_t past_end_offset
= past_end
- G_buffer
;
313 size_t old_G_buffer_size
= G_buffer_size
;
316 G_buffer_size
= read_size
* 2 + sentinel_length
+ 2;
317 if (G_buffer_size
< old_G_buffer_size
)
319 newbuffer
= xrealloc (G_buffer
- offset
, G_buffer_size
);
321 /* Adjust the pointers for the new buffer location. */
322 match_start
= newbuffer
+ match_start_offset
;
323 past_end
= newbuffer
+ past_end_offset
;
324 G_buffer
= newbuffer
;
327 /* Back up to the start of the next bufferfull of the file. */
328 if (file_pos
>= read_size
)
329 file_pos
-= read_size
;
332 read_size
= file_pos
;
335 if (lseek (input_fd
, file_pos
, SEEK_SET
) < 0)
336 error (0, errno
, _("%s: seek failed"), quotearg_colon (file
));
338 /* Shift the pending record data right to make room for the new.
339 The source and destination regions probably overlap. */
340 memmove (G_buffer
+ read_size
, G_buffer
, saved_record_size
);
341 past_end
= G_buffer
+ read_size
+ saved_record_size
;
342 /* For non-regexp searches, avoid unnecessary scanning. */
344 match_start
= G_buffer
+ read_size
;
346 match_start
= past_end
;
348 if (safe_read (input_fd
, G_buffer
, read_size
) != read_size
)
350 error (0, errno
, _("%s: read error"), quotearg_colon (file
));
356 /* Found a match of 'separator'. */
357 if (separator_ends_record
)
359 char *match_end
= match_start
+ match_length
;
361 /* If this match of 'separator' isn't at the end of the
362 file, print the record. */
363 if (!first_time
|| match_end
!= past_end
)
364 output (match_end
, past_end
);
365 past_end
= match_end
;
370 output (match_start
, past_end
);
371 past_end
= match_start
;
374 /* For non-regex matching, we can back up. */
375 if (sentinel_length
> 0)
376 match_start
-= match_length
- 1;
381 #if DONT_UNLINK_WHILE_OPEN
383 /* FIXME-someday: remove all of this DONT_UNLINK_WHILE_OPEN junk.
384 Using atexit like this is wrong, since it can fail
385 when called e.g. 32 or more times.
386 But this isn't a big deal, since the code is used only on WOE/DOS
387 systems, and few people invoke tac on that many nonseekable files. */
389 static const char *file_to_remove
;
390 static FILE *fp_to_close
;
393 unlink_tempfile (void)
395 fclose (fp_to_close
);
396 unlink (file_to_remove
);
400 record_or_unlink_tempfile (char const *fn
, FILE *fp
)
406 atexit (unlink_tempfile
);
413 record_or_unlink_tempfile (char const *fn
, FILE *fp ATTRIBUTE_UNUSED
)
420 /* A wrapper around mkstemp that gives us both an open stream pointer,
421 FP, and the corresponding FILE_NAME. Always return the same FP/name
422 pair, rewinding/truncating it upon each reuse. */
424 temp_stream (FILE **fp
, char **file_name
)
426 static char *tempfile
= NULL
;
428 if (tempfile
== NULL
)
430 char const *t
= getenv ("TMPDIR");
431 char const *tempdir
= t
? t
: DEFAULT_TMPDIR
;
432 tempfile
= mfile_name_concat (tempdir
, "tacXXXXXX", NULL
);
435 error (0, 0, _("memory exhausted"));
439 /* FIXME: there's a small window between a successful mkstemp call
440 and the unlink that's performed by record_or_unlink_tempfile.
441 If we're interrupted in that interval, this code fails to remove
442 the temporary file. On systems that define DONT_UNLINK_WHILE_OPEN,
443 the window is much larger -- it extends to the atexit-called
445 FIXME: clean up upon fatal signal. Don't block them, in case
446 $TMPFILE is a remote file system. */
448 int fd
= mkstemp (tempfile
);
451 error (0, errno
, _("failed to create temporary file in %s"),
456 tmp_fp
= fdopen (fd
, (O_BINARY
? "w+b" : "w+"));
459 error (0, errno
, _("failed to open %s for writing"),
469 record_or_unlink_tempfile (tempfile
, tmp_fp
);
473 if (fseeko (tmp_fp
, 0, SEEK_SET
) < 0
474 || ftruncate (fileno (tmp_fp
), 0) < 0)
476 error (0, errno
, _("failed to rewind stream for %s"),
483 *file_name
= tempfile
;
487 /* Copy from file descriptor INPUT_FD (corresponding to the named FILE) to
488 a temporary file, and set *G_TMP and *G_TEMPFILE to the resulting stream
489 and file name. Return true if successful. */
492 copy_to_temp (FILE **g_tmp
, char **g_tempfile
, int input_fd
, char const *file
)
496 if (!temp_stream (&fp
, &file_name
))
501 size_t bytes_read
= safe_read (input_fd
, G_buffer
, read_size
);
504 if (bytes_read
== SAFE_READ_ERROR
)
506 error (0, errno
, _("%s: read error"), quotearg_colon (file
));
510 if (fwrite (G_buffer
, 1, bytes_read
, fp
) != bytes_read
)
512 error (0, errno
, _("%s: write error"), quotearg_colon (file_name
));
517 if (fflush (fp
) != 0)
519 error (0, errno
, _("%s: write error"), quotearg_colon (file_name
));
524 *g_tempfile
= file_name
;
532 /* Copy INPUT_FD to a temporary, then tac that file.
533 Return true if successful. */
536 tac_nonseekable (int input_fd
, const char *file
)
540 if (!copy_to_temp (&tmp_stream
, &tmp_file
, input_fd
, file
))
543 bool ok
= tac_seekable (fileno (tmp_stream
), tmp_file
);
547 /* Print FILE in reverse, copying it to a temporary
548 file first if it is not seekable.
549 Return true if successful. */
552 tac_file (const char *filename
)
557 bool is_stdin
= STREQ (filename
, "-");
561 have_read_stdin
= true;
563 filename
= _("standard input");
564 if (O_BINARY
&& ! isatty (STDIN_FILENO
))
565 xfreopen (NULL
, "rb", stdin
);
569 fd
= open (filename
, O_RDONLY
| O_BINARY
);
572 error (0, errno
, _("failed to open %s for reading"),
578 file_size
= lseek (fd
, 0, SEEK_END
);
580 ok
= (file_size
< 0 || isatty (fd
)
581 ? tac_nonseekable (fd
, filename
)
582 : tac_seekable (fd
, filename
));
584 if (!is_stdin
&& close (fd
) != 0)
586 error (0, errno
, _("%s: read error"), quotearg_colon (filename
));
593 main (int argc
, char **argv
)
595 const char *error_message
; /* Return value from re_compile_pattern. */
598 size_t half_buffer_size
;
600 /* Initializer for file_list if no file-arguments
601 were specified on the command line. */
602 static char const *const default_file_list
[] = {"-", NULL
};
603 char const *const *file
;
605 initialize_main (&argc
, &argv
);
606 set_program_name (argv
[0]);
607 setlocale (LC_ALL
, "");
608 bindtextdomain (PACKAGE
, LOCALEDIR
);
609 textdomain (PACKAGE
);
611 atexit (close_stdout
);
615 separator_ends_record
= true;
617 while ((optc
= getopt_long (argc
, argv
, "brs:", longopts
, NULL
)) != -1)
622 separator_ends_record
= false;
630 error (EXIT_FAILURE
, 0, _("separator cannot be empty"));
632 case_GETOPT_HELP_CHAR
;
633 case_GETOPT_VERSION_CHAR (PROGRAM_NAME
, AUTHORS
);
635 usage (EXIT_FAILURE
);
639 if (sentinel_length
== 0)
641 compiled_separator
.buffer
= NULL
;
642 compiled_separator
.allocated
= 0;
643 compiled_separator
.fastmap
= compiled_separator_fastmap
;
644 compiled_separator
.translate
= NULL
;
645 error_message
= re_compile_pattern (separator
, strlen (separator
),
646 &compiled_separator
);
648 error (EXIT_FAILURE
, 0, "%s", error_message
);
651 match_length
= sentinel_length
= strlen (separator
);
653 read_size
= INITIAL_READSIZE
;
654 while (sentinel_length
>= read_size
/ 2)
656 if (SIZE_MAX
/ 2 < read_size
)
660 half_buffer_size
= read_size
+ sentinel_length
+ 1;
661 G_buffer_size
= 2 * half_buffer_size
;
662 if (! (read_size
< half_buffer_size
&& half_buffer_size
< G_buffer_size
))
664 G_buffer
= xmalloc (G_buffer_size
);
667 memcpy (G_buffer
, separator
, sentinel_length
+ 1);
668 G_buffer
+= sentinel_length
;
675 file
= (optind
< argc
676 ? (char const *const *) &argv
[optind
]
677 : default_file_list
);
679 if (O_BINARY
&& ! isatty (STDOUT_FILENO
))
680 xfreopen (NULL
, "wb", stdout
);
685 for (i
= 0; file
[i
]; ++i
)
686 ok
&= tac_file (file
[i
]);
689 /* Flush the output buffer. */
690 output ((char *) NULL
, (char *) NULL
);
692 if (have_read_stdin
&& close (STDIN_FILENO
) < 0)
694 error (0, errno
, "-");
699 size_t offset
= sentinel_length
? sentinel_length
: 1;
700 free (G_buffer
- offset
);
703 exit (ok
? EXIT_SUCCESS
: EXIT_FAILURE
);