doc: clarify the operation of wc -L
[coreutils.git] / src / tac.c
blob2d73c6e78bcec65d4d45b6121a0038d2d9896e49
1 /* tac - concatenate and print files in reverse
2 Copyright (C) 1988-2015 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Jay Lepreau (lepreau@cs.utah.edu).
18 GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */
20 /* Copy each FILE, or the standard input if none are given or when a
21 FILE name of "-" is encountered, to the standard output with the
22 order of the records reversed. The records are separated by
23 instances of a string, or a newline if none is given. By default, the
24 separator string is attached to the end of the record that it
25 follows in the file.
27 Options:
28 -b, --before The separator is attached to the beginning
29 of the record that it precedes in the file.
30 -r, --regex The separator is a regular expression.
31 -s, --separator=separator Use SEPARATOR as the record separator.
33 To reverse a file byte by byte, use (in bash, ksh, or sh):
34 tac -r -s '.\|
35 ' file */
37 #include <config.h>
39 #include <stdio.h>
40 #include <getopt.h>
41 #include <sys/types.h>
42 #include "system.h"
44 #include <regex.h>
46 #include "error.h"
47 #include "filenamecat.h"
48 #include "quote.h"
49 #include "quotearg.h"
50 #include "safe-read.h"
51 #include "stdlib--.h"
52 #include "xfreopen.h"
54 /* The official name of this program (e.g., no 'g' prefix). */
55 #define PROGRAM_NAME "tac"
57 #define AUTHORS \
58 proper_name ("Jay Lepreau"), \
59 proper_name ("David MacKenzie")
61 #if defined __MSDOS__ || defined _WIN32
62 /* Define this to non-zero on systems for which the regular mechanism
63 (of unlinking an open file and expecting to be able to write, seek
64 back to the beginning, then reread it) doesn't work. E.g., on Windows
65 and DOS systems. */
66 # define DONT_UNLINK_WHILE_OPEN 1
67 #endif
70 #ifndef DEFAULT_TMPDIR
71 # define DEFAULT_TMPDIR "/tmp"
72 #endif
74 /* The number of bytes per atomic read. */
75 #define INITIAL_READSIZE 8192
77 /* The number of bytes per atomic write. */
78 #define WRITESIZE 8192
80 /* The string that separates the records of the file. */
81 static char const *separator;
83 /* True if we have ever read standard input. */
84 static bool have_read_stdin = false;
86 /* If true, print 'separator' along with the record preceding it
87 in the file; otherwise with the record following it. */
88 static bool separator_ends_record;
90 /* 0 if 'separator' is to be matched as a regular expression;
91 otherwise, the length of 'separator', used as a sentinel to
92 stop the search. */
93 static size_t sentinel_length;
95 /* The length of a match with 'separator'. If 'sentinel_length' is 0,
96 'match_length' is computed every time a match succeeds;
97 otherwise, it is simply the length of 'separator'. */
98 static size_t match_length;
100 /* The input buffer. */
101 static char *G_buffer;
103 /* The number of bytes to read at once into 'buffer'. */
104 static size_t read_size;
106 /* The size of 'buffer'. This is read_size * 2 + sentinel_length + 2.
107 The extra 2 bytes allow 'past_end' to have a value beyond the
108 end of 'G_buffer' and 'match_start' to run off the front of 'G_buffer'. */
109 static size_t G_buffer_size;
111 /* The compiled regular expression representing 'separator'. */
112 static struct re_pattern_buffer compiled_separator;
113 static char compiled_separator_fastmap[UCHAR_MAX + 1];
114 static struct re_registers regs;
116 static struct option const longopts[] =
118 {"before", no_argument, NULL, 'b'},
119 {"regex", no_argument, NULL, 'r'},
120 {"separator", required_argument, NULL, 's'},
121 {GETOPT_HELP_OPTION_DECL},
122 {GETOPT_VERSION_OPTION_DECL},
123 {NULL, 0, NULL, 0}
126 void
127 usage (int status)
129 if (status != EXIT_SUCCESS)
130 emit_try_help ();
131 else
133 printf (_("\
134 Usage: %s [OPTION]... [FILE]...\n\
136 program_name);
137 fputs (_("\
138 Write each FILE to standard output, last line first.\n\
139 "), stdout);
141 emit_stdin_note ();
142 emit_mandatory_arg_note ();
144 fputs (_("\
145 -b, --before attach the separator before instead of after\n\
146 -r, --regex interpret the separator as a regular expression\n\
147 -s, --separator=STRING use STRING as the separator instead of newline\n\
148 "), stdout);
149 fputs (HELP_OPTION_DESCRIPTION, stdout);
150 fputs (VERSION_OPTION_DESCRIPTION, stdout);
151 emit_ancillary_info (PROGRAM_NAME);
153 exit (status);
156 /* Print the characters from START to PAST_END - 1.
157 If START is NULL, just flush the buffer. */
159 static void
160 output (const char *start, const char *past_end)
162 static char buffer[WRITESIZE];
163 static size_t bytes_in_buffer = 0;
164 size_t bytes_to_add = past_end - start;
165 size_t bytes_available = WRITESIZE - bytes_in_buffer;
167 if (start == 0)
169 fwrite (buffer, 1, bytes_in_buffer, stdout);
170 bytes_in_buffer = 0;
171 return;
174 /* Write out as many full buffers as possible. */
175 while (bytes_to_add >= bytes_available)
177 memcpy (buffer + bytes_in_buffer, start, bytes_available);
178 bytes_to_add -= bytes_available;
179 start += bytes_available;
180 fwrite (buffer, 1, WRITESIZE, stdout);
181 bytes_in_buffer = 0;
182 bytes_available = WRITESIZE;
185 memcpy (buffer + bytes_in_buffer, start, bytes_to_add);
186 bytes_in_buffer += bytes_to_add;
189 /* Print in reverse the file open on descriptor FD for reading FILE.
190 The file is already positioned at FILE_POS, which should be near its end.
191 Return true if successful. */
193 static bool
194 tac_seekable (int input_fd, const char *file, off_t file_pos)
196 /* Pointer to the location in 'G_buffer' where the search for
197 the next separator will begin. */
198 char *match_start;
200 /* Pointer to one past the rightmost character in 'G_buffer' that
201 has not been printed yet. */
202 char *past_end;
204 /* Length of the record growing in 'G_buffer'. */
205 size_t saved_record_size;
207 /* True if 'output' has not been called yet for any file.
208 Only used when the separator is attached to the preceding record. */
209 bool first_time = true;
210 char first_char = *separator; /* Speed optimization, non-regexp. */
211 char const *separator1 = separator + 1; /* Speed optimization, non-regexp. */
212 size_t match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
214 /* Arrange for the first read to lop off enough to leave the rest of the
215 file a multiple of 'read_size'. Since 'read_size' can change, this may
216 not always hold during the program run, but since it usually will, leave
217 it here for i/o efficiency (page/sector boundaries and all that).
218 Note: the efficiency gain has not been verified. */
219 size_t remainder = file_pos % read_size;
220 if (remainder != 0)
222 file_pos -= remainder;
223 if (lseek (input_fd, file_pos, SEEK_SET) < 0)
224 error (0, errno, _("%s: seek failed"), quotearg_colon (file));
227 /* Scan backward, looking for end of file. This caters to proc-like
228 file systems where the file size is just an estimate. */
229 while ((saved_record_size = safe_read (input_fd, G_buffer, read_size)) == 0
230 && file_pos != 0)
232 off_t rsize = read_size;
233 if (lseek (input_fd, -rsize, SEEK_CUR) < 0)
234 error (0, errno, _("%s: seek failed"), quotearg_colon (file));
235 file_pos -= read_size;
238 /* Now scan forward, looking for end of file. */
239 while (saved_record_size == read_size)
241 size_t nread = safe_read (input_fd, G_buffer, read_size);
242 if (nread == 0)
243 break;
244 saved_record_size = nread;
245 if (saved_record_size == SAFE_READ_ERROR)
246 break;
247 file_pos += nread;
250 if (saved_record_size == SAFE_READ_ERROR)
252 error (0, errno, _("%s: read error"), quotearg_colon (file));
253 return false;
256 match_start = past_end = G_buffer + saved_record_size;
257 /* For non-regexp search, move past impossible positions for a match. */
258 if (sentinel_length)
259 match_start -= match_length1;
261 while (true)
263 /* Search backward from 'match_start' - 1 to 'G_buffer' for a match
264 with 'separator'; for speed, use strncmp if 'separator' contains no
265 metacharacters.
266 If the match succeeds, set 'match_start' to point to the start of
267 the match and 'match_length' to the length of the match.
268 Otherwise, make 'match_start' < 'G_buffer'. */
269 if (sentinel_length == 0)
271 size_t i = match_start - G_buffer;
272 regoff_t ri = i;
273 regoff_t range = 1 - ri;
274 regoff_t ret;
276 if (1 < range)
277 error (EXIT_FAILURE, 0, _("record too large"));
279 if (range == 1
280 || ((ret = re_search (&compiled_separator, G_buffer,
281 i, i - 1, range, &regs))
282 == -1))
283 match_start = G_buffer - 1;
284 else if (ret == -2)
286 error (EXIT_FAILURE, 0,
287 _("error in regular expression search"));
289 else
291 match_start = G_buffer + regs.start[0];
292 match_length = regs.end[0] - regs.start[0];
295 else
297 /* 'match_length' is constant for non-regexp boundaries. */
298 while (*--match_start != first_char
299 || (match_length1 && !STREQ_LEN (match_start + 1, separator1,
300 match_length1)))
301 /* Do nothing. */ ;
304 /* Check whether we backed off the front of 'G_buffer' without finding
305 a match for 'separator'. */
306 if (match_start < G_buffer)
308 if (file_pos == 0)
310 /* Hit the beginning of the file; print the remaining record. */
311 output (G_buffer, past_end);
312 return true;
315 saved_record_size = past_end - G_buffer;
316 if (saved_record_size > read_size)
318 /* 'G_buffer_size' is about twice 'read_size', so since
319 we want to read in another 'read_size' bytes before
320 the data already in 'G_buffer', we need to increase
321 'G_buffer_size'. */
322 char *newbuffer;
323 size_t offset = sentinel_length ? sentinel_length : 1;
324 ptrdiff_t match_start_offset = match_start - G_buffer;
325 ptrdiff_t past_end_offset = past_end - G_buffer;
326 size_t old_G_buffer_size = G_buffer_size;
328 read_size *= 2;
329 G_buffer_size = read_size * 2 + sentinel_length + 2;
330 if (G_buffer_size < old_G_buffer_size)
331 xalloc_die ();
332 newbuffer = xrealloc (G_buffer - offset, G_buffer_size);
333 newbuffer += offset;
334 /* Adjust the pointers for the new buffer location. */
335 match_start = newbuffer + match_start_offset;
336 past_end = newbuffer + past_end_offset;
337 G_buffer = newbuffer;
340 /* Back up to the start of the next bufferfull of the file. */
341 if (file_pos >= read_size)
342 file_pos -= read_size;
343 else
345 read_size = file_pos;
346 file_pos = 0;
348 if (lseek (input_fd, file_pos, SEEK_SET) < 0)
349 error (0, errno, _("%s: seek failed"), quotearg_colon (file));
351 /* Shift the pending record data right to make room for the new.
352 The source and destination regions probably overlap. */
353 memmove (G_buffer + read_size, G_buffer, saved_record_size);
354 past_end = G_buffer + read_size + saved_record_size;
355 /* For non-regexp searches, avoid unnecessary scanning. */
356 if (sentinel_length)
357 match_start = G_buffer + read_size;
358 else
359 match_start = past_end;
361 if (safe_read (input_fd, G_buffer, read_size) != read_size)
363 error (0, errno, _("%s: read error"), quotearg_colon (file));
364 return false;
367 else
369 /* Found a match of 'separator'. */
370 if (separator_ends_record)
372 char *match_end = match_start + match_length;
374 /* If this match of 'separator' isn't at the end of the
375 file, print the record. */
376 if (!first_time || match_end != past_end)
377 output (match_end, past_end);
378 past_end = match_end;
379 first_time = false;
381 else
383 output (match_start, past_end);
384 past_end = match_start;
387 /* For non-regex matching, we can back up. */
388 if (sentinel_length > 0)
389 match_start -= match_length - 1;
394 #if DONT_UNLINK_WHILE_OPEN
396 /* FIXME-someday: remove all of this DONT_UNLINK_WHILE_OPEN junk.
397 Using atexit like this is wrong, since it can fail
398 when called e.g. 32 or more times.
399 But this isn't a big deal, since the code is used only on WOE/DOS
400 systems, and few people invoke tac on that many nonseekable files. */
402 static const char *file_to_remove;
403 static FILE *fp_to_close;
405 static void
406 unlink_tempfile (void)
408 fclose (fp_to_close);
409 unlink (file_to_remove);
412 static void
413 record_or_unlink_tempfile (char const *fn, FILE *fp)
415 if (!file_to_remove)
417 file_to_remove = fn;
418 fp_to_close = fp;
419 atexit (unlink_tempfile);
423 #else
425 static void
426 record_or_unlink_tempfile (char const *fn, FILE *fp _GL_UNUSED)
428 unlink (fn);
431 #endif
433 /* A wrapper around mkstemp that gives us both an open stream pointer,
434 FP, and the corresponding FILE_NAME. Always return the same FP/name
435 pair, rewinding/truncating it upon each reuse. */
436 static bool
437 temp_stream (FILE **fp, char **file_name)
439 static char *tempfile = NULL;
440 static FILE *tmp_fp;
441 if (tempfile == NULL)
443 char const *t = getenv ("TMPDIR");
444 char const *tempdir = t ? t : DEFAULT_TMPDIR;
445 tempfile = mfile_name_concat (tempdir, "tacXXXXXX", NULL);
446 if (tempdir == NULL)
448 error (0, 0, _("memory exhausted"));
449 return false;
452 /* FIXME: there's a small window between a successful mkstemp call
453 and the unlink that's performed by record_or_unlink_tempfile.
454 If we're interrupted in that interval, this code fails to remove
455 the temporary file. On systems that define DONT_UNLINK_WHILE_OPEN,
456 the window is much larger -- it extends to the atexit-called
457 unlink_tempfile.
458 FIXME: clean up upon fatal signal. Don't block them, in case
459 $TMPFILE is a remote file system. */
461 int fd = mkstemp (tempfile);
462 if (fd < 0)
464 error (0, errno, _("failed to create temporary file in %s"),
465 quote (tempdir));
466 goto Reset;
469 tmp_fp = fdopen (fd, (O_BINARY ? "w+b" : "w+"));
470 if (! tmp_fp)
472 error (0, errno, _("failed to open %s for writing"),
473 quote (tempfile));
474 close (fd);
475 unlink (tempfile);
476 Reset:
477 free (tempfile);
478 tempfile = NULL;
479 return false;
482 record_or_unlink_tempfile (tempfile, tmp_fp);
484 else
486 if (fseeko (tmp_fp, 0, SEEK_SET) < 0
487 || ftruncate (fileno (tmp_fp), 0) < 0)
489 error (0, errno, _("failed to rewind stream for %s"),
490 quote (tempfile));
491 return false;
495 *fp = tmp_fp;
496 *file_name = tempfile;
497 return true;
500 /* Copy from file descriptor INPUT_FD (corresponding to the named FILE) to
501 a temporary file, and set *G_TMP and *G_TEMPFILE to the resulting stream
502 and file name. Return the number of bytes copied, or -1 on error. */
504 static off_t
505 copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file)
507 FILE *fp;
508 char *file_name;
509 uintmax_t bytes_copied = 0;
510 if (!temp_stream (&fp, &file_name))
511 return -1;
513 while (1)
515 size_t bytes_read = safe_read (input_fd, G_buffer, read_size);
516 if (bytes_read == 0)
517 break;
518 if (bytes_read == SAFE_READ_ERROR)
520 error (0, errno, _("%s: read error"), quotearg_colon (file));
521 goto Fail;
524 if (fwrite (G_buffer, 1, bytes_read, fp) != bytes_read)
526 error (0, errno, _("%s: write error"), quotearg_colon (file_name));
527 goto Fail;
530 /* Implicitly <= OFF_T_MAX due to preceding fwrite(),
531 but unsigned type used to avoid compiler warnings
532 not aware of this fact. */
533 bytes_copied += bytes_read;
536 if (fflush (fp) != 0)
538 error (0, errno, _("%s: write error"), quotearg_colon (file_name));
539 goto Fail;
542 *g_tmp = fp;
543 *g_tempfile = file_name;
544 return bytes_copied;
546 Fail:
547 fclose (fp);
548 return -1;
551 /* Copy INPUT_FD to a temporary, then tac that file.
552 Return true if successful. */
554 static bool
555 tac_nonseekable (int input_fd, const char *file)
557 FILE *tmp_stream;
558 char *tmp_file;
559 off_t bytes_copied = copy_to_temp (&tmp_stream, &tmp_file, input_fd, file);
560 if (bytes_copied < 0)
561 return false;
563 bool ok = tac_seekable (fileno (tmp_stream), tmp_file, bytes_copied);
564 return ok;
567 /* Print FILE in reverse, copying it to a temporary
568 file first if it is not seekable.
569 Return true if successful. */
571 static bool
572 tac_file (const char *filename)
574 bool ok;
575 off_t file_size;
576 int fd;
577 bool is_stdin = STREQ (filename, "-");
579 if (is_stdin)
581 have_read_stdin = true;
582 fd = STDIN_FILENO;
583 filename = _("standard input");
584 if (O_BINARY && ! isatty (STDIN_FILENO))
585 xfreopen (NULL, "rb", stdin);
587 else
589 fd = open (filename, O_RDONLY | O_BINARY);
590 if (fd < 0)
592 error (0, errno, _("failed to open %s for reading"),
593 quote (filename));
594 return false;
598 file_size = lseek (fd, 0, SEEK_END);
600 ok = (file_size < 0 || isatty (fd)
601 ? tac_nonseekable (fd, filename)
602 : tac_seekable (fd, filename, file_size));
604 if (!is_stdin && close (fd) != 0)
606 error (0, errno, _("%s: read error"), quotearg_colon (filename));
607 ok = false;
609 return ok;
613 main (int argc, char **argv)
615 const char *error_message; /* Return value from re_compile_pattern. */
616 int optc;
617 bool ok;
618 size_t half_buffer_size;
620 /* Initializer for file_list if no file-arguments
621 were specified on the command line. */
622 static char const *const default_file_list[] = {"-", NULL};
623 char const *const *file;
625 initialize_main (&argc, &argv);
626 set_program_name (argv[0]);
627 setlocale (LC_ALL, "");
628 bindtextdomain (PACKAGE, LOCALEDIR);
629 textdomain (PACKAGE);
631 atexit (close_stdout);
633 separator = "\n";
634 sentinel_length = 1;
635 separator_ends_record = true;
637 while ((optc = getopt_long (argc, argv, "brs:", longopts, NULL)) != -1)
639 switch (optc)
641 case 'b':
642 separator_ends_record = false;
643 break;
644 case 'r':
645 sentinel_length = 0;
646 break;
647 case 's':
648 separator = optarg;
649 if (*separator == 0)
650 error (EXIT_FAILURE, 0, _("separator cannot be empty"));
651 break;
652 case_GETOPT_HELP_CHAR;
653 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
654 default:
655 usage (EXIT_FAILURE);
659 if (sentinel_length == 0)
661 compiled_separator.buffer = NULL;
662 compiled_separator.allocated = 0;
663 compiled_separator.fastmap = compiled_separator_fastmap;
664 compiled_separator.translate = NULL;
665 error_message = re_compile_pattern (separator, strlen (separator),
666 &compiled_separator);
667 if (error_message)
668 error (EXIT_FAILURE, 0, "%s", error_message);
670 else
671 match_length = sentinel_length = strlen (separator);
673 read_size = INITIAL_READSIZE;
674 while (sentinel_length >= read_size / 2)
676 if (SIZE_MAX / 2 < read_size)
677 xalloc_die ();
678 read_size *= 2;
680 half_buffer_size = read_size + sentinel_length + 1;
681 G_buffer_size = 2 * half_buffer_size;
682 if (! (read_size < half_buffer_size && half_buffer_size < G_buffer_size))
683 xalloc_die ();
684 G_buffer = xmalloc (G_buffer_size);
685 if (sentinel_length)
687 memcpy (G_buffer, separator, sentinel_length + 1);
688 G_buffer += sentinel_length;
690 else
692 ++G_buffer;
695 file = (optind < argc
696 ? (char const *const *) &argv[optind]
697 : default_file_list);
699 if (O_BINARY && ! isatty (STDOUT_FILENO))
700 xfreopen (NULL, "wb", stdout);
703 size_t i;
704 ok = true;
705 for (i = 0; file[i]; ++i)
706 ok &= tac_file (file[i]);
709 /* Flush the output buffer. */
710 output ((char *) NULL, (char *) NULL);
712 if (have_read_stdin && close (STDIN_FILENO) < 0)
714 error (0, errno, "-");
715 ok = false;
718 #ifdef lint
719 size_t offset = sentinel_length ? sentinel_length : 1;
720 free (G_buffer - offset);
721 #endif
723 return ok ? EXIT_SUCCESS : EXIT_FAILURE;