shred: enable direct I/O when possible
[coreutils.git] / src / tac.c
blob5bf40a7e8524bbfcda64ae729580a2e609ad72d9
1 /* tac - concatenate and print files in reverse
2 Copyright (C) 1988-2013 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Jay Lepreau (lepreau@cs.utah.edu).
18 GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */
20 /* Copy each FILE, or the standard input if none are given or when a
21 FILE name of "-" is encountered, to the standard output with the
22 order of the records reversed. The records are separated by
23 instances of a string, or a newline if none is given. By default, the
24 separator string is attached to the end of the record that it
25 follows in the file.
27 Options:
28 -b, --before The separator is attached to the beginning
29 of the record that it precedes in the file.
30 -r, --regex The separator is a regular expression.
31 -s, --separator=separator Use SEPARATOR as the record separator.
33 To reverse a file byte by byte, use (in bash, ksh, or sh):
34 tac -r -s '.\|
35 ' file */
37 #include <config.h>
39 #include <stdio.h>
40 #include <getopt.h>
41 #include <sys/types.h>
42 #include "system.h"
44 #include <regex.h>
46 #include "error.h"
47 #include "filenamecat.h"
48 #include "quote.h"
49 #include "quotearg.h"
50 #include "safe-read.h"
51 #include "stdlib--.h"
52 #include "xfreopen.h"
54 /* The official name of this program (e.g., no 'g' prefix). */
55 #define PROGRAM_NAME "tac"
57 #define AUTHORS \
58 proper_name ("Jay Lepreau"), \
59 proper_name ("David MacKenzie")
61 #if defined __MSDOS__ || defined _WIN32
62 /* Define this to non-zero on systems for which the regular mechanism
63 (of unlinking an open file and expecting to be able to write, seek
64 back to the beginning, then reread it) doesn't work. E.g., on Windows
65 and DOS systems. */
66 # define DONT_UNLINK_WHILE_OPEN 1
67 #endif
70 #ifndef DEFAULT_TMPDIR
71 # define DEFAULT_TMPDIR "/tmp"
72 #endif
74 /* The number of bytes per atomic read. */
75 #define INITIAL_READSIZE 8192
77 /* The number of bytes per atomic write. */
78 #define WRITESIZE 8192
80 /* The string that separates the records of the file. */
81 static char const *separator;
83 /* True if we have ever read standard input. */
84 static bool have_read_stdin = false;
86 /* If true, print 'separator' along with the record preceding it
87 in the file; otherwise with the record following it. */
88 static bool separator_ends_record;
90 /* 0 if 'separator' is to be matched as a regular expression;
91 otherwise, the length of 'separator', used as a sentinel to
92 stop the search. */
93 static size_t sentinel_length;
95 /* The length of a match with 'separator'. If 'sentinel_length' is 0,
96 'match_length' is computed every time a match succeeds;
97 otherwise, it is simply the length of 'separator'. */
98 static size_t match_length;
100 /* The input buffer. */
101 static char *G_buffer;
103 /* The number of bytes to read at once into 'buffer'. */
104 static size_t read_size;
106 /* The size of 'buffer'. This is read_size * 2 + sentinel_length + 2.
107 The extra 2 bytes allow 'past_end' to have a value beyond the
108 end of 'G_buffer' and 'match_start' to run off the front of 'G_buffer'. */
109 static size_t G_buffer_size;
111 /* The compiled regular expression representing 'separator'. */
112 static struct re_pattern_buffer compiled_separator;
113 static char compiled_separator_fastmap[UCHAR_MAX + 1];
114 static struct re_registers regs;
116 static struct option const longopts[] =
118 {"before", no_argument, NULL, 'b'},
119 {"regex", no_argument, NULL, 'r'},
120 {"separator", required_argument, NULL, 's'},
121 {GETOPT_HELP_OPTION_DECL},
122 {GETOPT_VERSION_OPTION_DECL},
123 {NULL, 0, NULL, 0}
126 void
127 usage (int status)
129 if (status != EXIT_SUCCESS)
130 emit_try_help ();
131 else
133 printf (_("\
134 Usage: %s [OPTION]... [FILE]...\n\
136 program_name);
137 fputs (_("\
138 Write each FILE to standard output, last line first.\n\
139 With no FILE, or when FILE is -, read standard input.\n\
140 "), stdout);
142 emit_mandatory_arg_note ();
144 fputs (_("\
145 -b, --before attach the separator before instead of after\n\
146 -r, --regex interpret the separator as a regular expression\n\
147 -s, --separator=STRING use STRING as the separator instead of newline\n\
148 "), stdout);
149 fputs (HELP_OPTION_DESCRIPTION, stdout);
150 fputs (VERSION_OPTION_DESCRIPTION, stdout);
151 emit_ancillary_info ();
153 exit (status);
156 /* Print the characters from START to PAST_END - 1.
157 If START is NULL, just flush the buffer. */
159 static void
160 output (const char *start, const char *past_end)
162 static char buffer[WRITESIZE];
163 static size_t bytes_in_buffer = 0;
164 size_t bytes_to_add = past_end - start;
165 size_t bytes_available = WRITESIZE - bytes_in_buffer;
167 if (start == 0)
169 fwrite (buffer, 1, bytes_in_buffer, stdout);
170 bytes_in_buffer = 0;
171 return;
174 /* Write out as many full buffers as possible. */
175 while (bytes_to_add >= bytes_available)
177 memcpy (buffer + bytes_in_buffer, start, bytes_available);
178 bytes_to_add -= bytes_available;
179 start += bytes_available;
180 fwrite (buffer, 1, WRITESIZE, stdout);
181 bytes_in_buffer = 0;
182 bytes_available = WRITESIZE;
185 memcpy (buffer + bytes_in_buffer, start, bytes_to_add);
186 bytes_in_buffer += bytes_to_add;
189 /* Print in reverse the file open on descriptor FD for reading FILE.
190 Return true if successful. */
192 static bool
193 tac_seekable (int input_fd, const char *file)
195 /* Pointer to the location in 'G_buffer' where the search for
196 the next separator will begin. */
197 char *match_start;
199 /* Pointer to one past the rightmost character in 'G_buffer' that
200 has not been printed yet. */
201 char *past_end;
203 /* Length of the record growing in 'G_buffer'. */
204 size_t saved_record_size;
206 /* Offset in the file of the next read. */
207 off_t file_pos;
209 /* True if 'output' has not been called yet for any file.
210 Only used when the separator is attached to the preceding record. */
211 bool first_time = true;
212 char first_char = *separator; /* Speed optimization, non-regexp. */
213 char const *separator1 = separator + 1; /* Speed optimization, non-regexp. */
214 size_t match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
216 /* Find the size of the input file. */
217 file_pos = lseek (input_fd, 0, SEEK_END);
218 if (file_pos < 1)
219 return true; /* It's an empty file. */
221 /* Arrange for the first read to lop off enough to leave the rest of the
222 file a multiple of 'read_size'. Since 'read_size' can change, this may
223 not always hold during the program run, but since it usually will, leave
224 it here for i/o efficiency (page/sector boundaries and all that).
225 Note: the efficiency gain has not been verified. */
226 saved_record_size = file_pos % read_size;
227 if (saved_record_size == 0)
228 saved_record_size = read_size;
229 file_pos -= saved_record_size;
230 /* 'file_pos' now points to the start of the last (probably partial) block
231 in the input file. */
233 if (lseek (input_fd, file_pos, SEEK_SET) < 0)
234 error (0, errno, _("%s: seek failed"), quotearg_colon (file));
236 if (safe_read (input_fd, G_buffer, saved_record_size) != saved_record_size)
238 error (0, errno, _("%s: read error"), quotearg_colon (file));
239 return false;
242 match_start = past_end = G_buffer + saved_record_size;
243 /* For non-regexp search, move past impossible positions for a match. */
244 if (sentinel_length)
245 match_start -= match_length1;
247 while (true)
249 /* Search backward from 'match_start' - 1 to 'G_buffer' for a match
250 with 'separator'; for speed, use strncmp if 'separator' contains no
251 metacharacters.
252 If the match succeeds, set 'match_start' to point to the start of
253 the match and 'match_length' to the length of the match.
254 Otherwise, make 'match_start' < 'G_buffer'. */
255 if (sentinel_length == 0)
257 size_t i = match_start - G_buffer;
258 regoff_t ri = i;
259 regoff_t range = 1 - ri;
260 regoff_t ret;
262 if (1 < range)
263 error (EXIT_FAILURE, 0, _("record too large"));
265 if (range == 1
266 || ((ret = re_search (&compiled_separator, G_buffer,
267 i, i - 1, range, &regs))
268 == -1))
269 match_start = G_buffer - 1;
270 else if (ret == -2)
272 error (EXIT_FAILURE, 0,
273 _("error in regular expression search"));
275 else
277 match_start = G_buffer + regs.start[0];
278 match_length = regs.end[0] - regs.start[0];
281 else
283 /* 'match_length' is constant for non-regexp boundaries. */
284 while (*--match_start != first_char
285 || (match_length1 && strncmp (match_start + 1, separator1,
286 match_length1)))
287 /* Do nothing. */ ;
290 /* Check whether we backed off the front of 'G_buffer' without finding
291 a match for 'separator'. */
292 if (match_start < G_buffer)
294 if (file_pos == 0)
296 /* Hit the beginning of the file; print the remaining record. */
297 output (G_buffer, past_end);
298 return true;
301 saved_record_size = past_end - G_buffer;
302 if (saved_record_size > read_size)
304 /* 'G_buffer_size' is about twice 'read_size', so since
305 we want to read in another 'read_size' bytes before
306 the data already in 'G_buffer', we need to increase
307 'G_buffer_size'. */
308 char *newbuffer;
309 size_t offset = sentinel_length ? sentinel_length : 1;
310 ptrdiff_t match_start_offset = match_start - G_buffer;
311 ptrdiff_t past_end_offset = past_end - G_buffer;
312 size_t old_G_buffer_size = G_buffer_size;
314 read_size *= 2;
315 G_buffer_size = read_size * 2 + sentinel_length + 2;
316 if (G_buffer_size < old_G_buffer_size)
317 xalloc_die ();
318 newbuffer = xrealloc (G_buffer - offset, G_buffer_size);
319 newbuffer += offset;
320 /* Adjust the pointers for the new buffer location. */
321 match_start = newbuffer + match_start_offset;
322 past_end = newbuffer + past_end_offset;
323 G_buffer = newbuffer;
326 /* Back up to the start of the next bufferfull of the file. */
327 if (file_pos >= read_size)
328 file_pos -= read_size;
329 else
331 read_size = file_pos;
332 file_pos = 0;
334 if (lseek (input_fd, file_pos, SEEK_SET) < 0)
335 error (0, errno, _("%s: seek failed"), quotearg_colon (file));
337 /* Shift the pending record data right to make room for the new.
338 The source and destination regions probably overlap. */
339 memmove (G_buffer + read_size, G_buffer, saved_record_size);
340 past_end = G_buffer + read_size + saved_record_size;
341 /* For non-regexp searches, avoid unnecessary scanning. */
342 if (sentinel_length)
343 match_start = G_buffer + read_size;
344 else
345 match_start = past_end;
347 if (safe_read (input_fd, G_buffer, read_size) != read_size)
349 error (0, errno, _("%s: read error"), quotearg_colon (file));
350 return false;
353 else
355 /* Found a match of 'separator'. */
356 if (separator_ends_record)
358 char *match_end = match_start + match_length;
360 /* If this match of 'separator' isn't at the end of the
361 file, print the record. */
362 if (!first_time || match_end != past_end)
363 output (match_end, past_end);
364 past_end = match_end;
365 first_time = false;
367 else
369 output (match_start, past_end);
370 past_end = match_start;
373 /* For non-regex matching, we can back up. */
374 if (sentinel_length > 0)
375 match_start -= match_length - 1;
380 #if DONT_UNLINK_WHILE_OPEN
382 /* FIXME-someday: remove all of this DONT_UNLINK_WHILE_OPEN junk.
383 Using atexit like this is wrong, since it can fail
384 when called e.g. 32 or more times.
385 But this isn't a big deal, since the code is used only on WOE/DOS
386 systems, and few people invoke tac on that many nonseekable files. */
388 static const char *file_to_remove;
389 static FILE *fp_to_close;
391 static void
392 unlink_tempfile (void)
394 fclose (fp_to_close);
395 unlink (file_to_remove);
398 static void
399 record_or_unlink_tempfile (char const *fn, FILE *fp)
401 if (!file_to_remove)
403 file_to_remove = fn;
404 fp_to_close = fp;
405 atexit (unlink_tempfile);
409 #else
411 static void
412 record_or_unlink_tempfile (char const *fn, FILE *fp _GL_UNUSED)
414 unlink (fn);
417 #endif
419 /* A wrapper around mkstemp that gives us both an open stream pointer,
420 FP, and the corresponding FILE_NAME. Always return the same FP/name
421 pair, rewinding/truncating it upon each reuse. */
422 static bool
423 temp_stream (FILE **fp, char **file_name)
425 static char *tempfile = NULL;
426 static FILE *tmp_fp;
427 if (tempfile == NULL)
429 char const *t = getenv ("TMPDIR");
430 char const *tempdir = t ? t : DEFAULT_TMPDIR;
431 tempfile = mfile_name_concat (tempdir, "tacXXXXXX", NULL);
432 if (tempdir == NULL)
434 error (0, 0, _("memory exhausted"));
435 return false;
438 /* FIXME: there's a small window between a successful mkstemp call
439 and the unlink that's performed by record_or_unlink_tempfile.
440 If we're interrupted in that interval, this code fails to remove
441 the temporary file. On systems that define DONT_UNLINK_WHILE_OPEN,
442 the window is much larger -- it extends to the atexit-called
443 unlink_tempfile.
444 FIXME: clean up upon fatal signal. Don't block them, in case
445 $TMPFILE is a remote file system. */
447 int fd = mkstemp (tempfile);
448 if (fd < 0)
450 error (0, errno, _("failed to create temporary file in %s"),
451 quote (tempdir));
452 goto Reset;
455 tmp_fp = fdopen (fd, (O_BINARY ? "w+b" : "w+"));
456 if (! tmp_fp)
458 error (0, errno, _("failed to open %s for writing"),
459 quote (tempfile));
460 close (fd);
461 unlink (tempfile);
462 Reset:
463 free (tempfile);
464 tempfile = NULL;
465 return false;
468 record_or_unlink_tempfile (tempfile, tmp_fp);
470 else
472 if (fseeko (tmp_fp, 0, SEEK_SET) < 0
473 || ftruncate (fileno (tmp_fp), 0) < 0)
475 error (0, errno, _("failed to rewind stream for %s"),
476 quote (tempfile));
477 return false;
481 *fp = tmp_fp;
482 *file_name = tempfile;
483 return true;
486 /* Copy from file descriptor INPUT_FD (corresponding to the named FILE) to
487 a temporary file, and set *G_TMP and *G_TEMPFILE to the resulting stream
488 and file name. Return true if successful. */
490 static bool
491 copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file)
493 FILE *fp;
494 char *file_name;
495 if (!temp_stream (&fp, &file_name))
496 return false;
498 while (1)
500 size_t bytes_read = safe_read (input_fd, G_buffer, read_size);
501 if (bytes_read == 0)
502 break;
503 if (bytes_read == SAFE_READ_ERROR)
505 error (0, errno, _("%s: read error"), quotearg_colon (file));
506 goto Fail;
509 if (fwrite (G_buffer, 1, bytes_read, fp) != bytes_read)
511 error (0, errno, _("%s: write error"), quotearg_colon (file_name));
512 goto Fail;
516 if (fflush (fp) != 0)
518 error (0, errno, _("%s: write error"), quotearg_colon (file_name));
519 goto Fail;
522 *g_tmp = fp;
523 *g_tempfile = file_name;
524 return true;
526 Fail:
527 fclose (fp);
528 return false;
531 /* Copy INPUT_FD to a temporary, then tac that file.
532 Return true if successful. */
534 static bool
535 tac_nonseekable (int input_fd, const char *file)
537 FILE *tmp_stream;
538 char *tmp_file;
539 if (!copy_to_temp (&tmp_stream, &tmp_file, input_fd, file))
540 return false;
542 bool ok = tac_seekable (fileno (tmp_stream), tmp_file);
543 return ok;
546 /* Print FILE in reverse, copying it to a temporary
547 file first if it is not seekable.
548 Return true if successful. */
550 static bool
551 tac_file (const char *filename)
553 bool ok;
554 off_t file_size;
555 int fd;
556 bool is_stdin = STREQ (filename, "-");
558 if (is_stdin)
560 have_read_stdin = true;
561 fd = STDIN_FILENO;
562 filename = _("standard input");
563 if (O_BINARY && ! isatty (STDIN_FILENO))
564 xfreopen (NULL, "rb", stdin);
566 else
568 fd = open (filename, O_RDONLY | O_BINARY);
569 if (fd < 0)
571 error (0, errno, _("failed to open %s for reading"),
572 quote (filename));
573 return false;
577 file_size = lseek (fd, 0, SEEK_END);
579 ok = (file_size < 0 || isatty (fd)
580 ? tac_nonseekable (fd, filename)
581 : tac_seekable (fd, filename));
583 if (!is_stdin && close (fd) != 0)
585 error (0, errno, _("%s: read error"), quotearg_colon (filename));
586 ok = false;
588 return ok;
592 main (int argc, char **argv)
594 const char *error_message; /* Return value from re_compile_pattern. */
595 int optc;
596 bool ok;
597 size_t half_buffer_size;
599 /* Initializer for file_list if no file-arguments
600 were specified on the command line. */
601 static char const *const default_file_list[] = {"-", NULL};
602 char const *const *file;
604 initialize_main (&argc, &argv);
605 set_program_name (argv[0]);
606 setlocale (LC_ALL, "");
607 bindtextdomain (PACKAGE, LOCALEDIR);
608 textdomain (PACKAGE);
610 atexit (close_stdout);
612 separator = "\n";
613 sentinel_length = 1;
614 separator_ends_record = true;
616 while ((optc = getopt_long (argc, argv, "brs:", longopts, NULL)) != -1)
618 switch (optc)
620 case 'b':
621 separator_ends_record = false;
622 break;
623 case 'r':
624 sentinel_length = 0;
625 break;
626 case 's':
627 separator = optarg;
628 if (*separator == 0)
629 error (EXIT_FAILURE, 0, _("separator cannot be empty"));
630 break;
631 case_GETOPT_HELP_CHAR;
632 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
633 default:
634 usage (EXIT_FAILURE);
638 if (sentinel_length == 0)
640 compiled_separator.buffer = NULL;
641 compiled_separator.allocated = 0;
642 compiled_separator.fastmap = compiled_separator_fastmap;
643 compiled_separator.translate = NULL;
644 error_message = re_compile_pattern (separator, strlen (separator),
645 &compiled_separator);
646 if (error_message)
647 error (EXIT_FAILURE, 0, "%s", error_message);
649 else
650 match_length = sentinel_length = strlen (separator);
652 read_size = INITIAL_READSIZE;
653 while (sentinel_length >= read_size / 2)
655 if (SIZE_MAX / 2 < read_size)
656 xalloc_die ();
657 read_size *= 2;
659 half_buffer_size = read_size + sentinel_length + 1;
660 G_buffer_size = 2 * half_buffer_size;
661 if (! (read_size < half_buffer_size && half_buffer_size < G_buffer_size))
662 xalloc_die ();
663 G_buffer = xmalloc (G_buffer_size);
664 if (sentinel_length)
666 memcpy (G_buffer, separator, sentinel_length + 1);
667 G_buffer += sentinel_length;
669 else
671 ++G_buffer;
674 file = (optind < argc
675 ? (char const *const *) &argv[optind]
676 : default_file_list);
678 if (O_BINARY && ! isatty (STDOUT_FILENO))
679 xfreopen (NULL, "wb", stdout);
682 size_t i;
683 ok = true;
684 for (i = 0; file[i]; ++i)
685 ok &= tac_file (file[i]);
688 /* Flush the output buffer. */
689 output ((char *) NULL, (char *) NULL);
691 if (have_read_stdin && close (STDIN_FILENO) < 0)
693 error (0, errno, "-");
694 ok = false;
697 #ifdef lint
698 size_t offset = sentinel_length ? sentinel_length : 1;
699 free (G_buffer - offset);
700 #endif
702 exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);