Test yesterday's change to groups.
[coreutils.git] / src / tac.c
blob262bf875e4e76d60c18930af6a416b7e19fac484
1 /* tac - concatenate and print files in reverse
2 Copyright (C) 1988-1991, 1995-2006 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Jay Lepreau (lepreau@cs.utah.edu).
18 GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */
20 /* Copy each FILE, or the standard input if none are given or when a
21 FILE name of "-" is encountered, to the standard output with the
22 order of the records reversed. The records are separated by
23 instances of a string, or a newline if none is given. By default, the
24 separator string is attached to the end of the record that it
25 follows in the file.
27 Options:
28 -b, --before The separator is attached to the beginning
29 of the record that it precedes in the file.
30 -r, --regex The separator is a regular expression.
31 -s, --separator=separator Use SEPARATOR as the record separator.
33 To reverse a file byte by byte, use (in bash, ksh, or sh):
34 tac -r -s '.\|
35 ' file */
37 #include <config.h>
39 #include <stdio.h>
40 #include <getopt.h>
41 #include <sys/types.h>
42 #include "system.h"
44 #include <regex.h>
46 #include "error.h"
47 #include "quote.h"
48 #include "quotearg.h"
49 #include "safe-read.h"
50 #include "stdlib--.h"
52 /* The official name of this program (e.g., no `g' prefix). */
53 #define PROGRAM_NAME "tac"
55 #define AUTHORS "Jay Lepreau", "David MacKenzie"
57 #if defined __MSDOS__ || defined _WIN32
58 /* Define this to non-zero on systems for which the regular mechanism
59 (of unlinking an open file and expecting to be able to write, seek
60 back to the beginning, then reread it) doesn't work. E.g., on Windows
61 and DOS systems. */
62 # define DONT_UNLINK_WHILE_OPEN 1
63 #endif
66 #ifndef DEFAULT_TMPDIR
67 # define DEFAULT_TMPDIR "/tmp"
68 #endif
70 /* The number of bytes per atomic read. */
71 #define INITIAL_READSIZE 8192
73 /* The number of bytes per atomic write. */
74 #define WRITESIZE 8192
76 /* The name this program was run with. */
77 char *program_name;
79 /* The string that separates the records of the file. */
80 static char const *separator;
82 /* True if we have ever read standard input. */
83 static bool have_read_stdin = false;
85 /* If true, print `separator' along with the record preceding it
86 in the file; otherwise with the record following it. */
87 static bool separator_ends_record;
89 /* 0 if `separator' is to be matched as a regular expression;
90 otherwise, the length of `separator', used as a sentinel to
91 stop the search. */
92 static size_t sentinel_length;
94 /* The length of a match with `separator'. If `sentinel_length' is 0,
95 `match_length' is computed every time a match succeeds;
96 otherwise, it is simply the length of `separator'. */
97 static size_t match_length;
99 /* The input buffer. */
100 static char *G_buffer;
102 /* The number of bytes to read at once into `buffer'. */
103 static size_t read_size;
105 /* The size of `buffer'. This is read_size * 2 + sentinel_length + 2.
106 The extra 2 bytes allow `past_end' to have a value beyond the
107 end of `G_buffer' and `match_start' to run off the front of `G_buffer'. */
108 static size_t G_buffer_size;
110 /* The compiled regular expression representing `separator'. */
111 static struct re_pattern_buffer compiled_separator;
112 static char compiled_separator_fastmap[UCHAR_MAX + 1];
114 static struct option const longopts[] =
116 {"before", no_argument, NULL, 'b'},
117 {"regex", no_argument, NULL, 'r'},
118 {"separator", required_argument, NULL, 's'},
119 {GETOPT_HELP_OPTION_DECL},
120 {GETOPT_VERSION_OPTION_DECL},
121 {NULL, 0, NULL, 0}
124 void
125 usage (int status)
127 if (status != EXIT_SUCCESS)
128 fprintf (stderr, _("Try `%s --help' for more information.\n"),
129 program_name);
130 else
132 printf (_("\
133 Usage: %s [OPTION]... [FILE]...\n\
135 program_name);
136 fputs (_("\
137 Write each FILE to standard output, last line first.\n\
138 With no FILE, or when FILE is -, read standard input.\n\
140 "), stdout);
141 fputs (_("\
142 Mandatory arguments to long options are mandatory for short options too.\n\
143 "), stdout);
144 fputs (_("\
145 -b, --before attach the separator before instead of after\n\
146 -r, --regex interpret the separator as a regular expression\n\
147 -s, --separator=STRING use STRING as the separator instead of newline\n\
148 "), stdout);
149 fputs (HELP_OPTION_DESCRIPTION, stdout);
150 fputs (VERSION_OPTION_DESCRIPTION, stdout);
151 emit_bug_reporting_address ();
153 exit (status);
156 /* Print the characters from START to PAST_END - 1.
157 If START is NULL, just flush the buffer. */
159 static void
160 output (const char *start, const char *past_end)
162 static char buffer[WRITESIZE];
163 static size_t bytes_in_buffer = 0;
164 size_t bytes_to_add = past_end - start;
165 size_t bytes_available = WRITESIZE - bytes_in_buffer;
167 if (start == 0)
169 fwrite (buffer, 1, bytes_in_buffer, stdout);
170 bytes_in_buffer = 0;
171 return;
174 /* Write out as many full buffers as possible. */
175 while (bytes_to_add >= bytes_available)
177 memcpy (buffer + bytes_in_buffer, start, bytes_available);
178 bytes_to_add -= bytes_available;
179 start += bytes_available;
180 fwrite (buffer, 1, WRITESIZE, stdout);
181 bytes_in_buffer = 0;
182 bytes_available = WRITESIZE;
185 memcpy (buffer + bytes_in_buffer, start, bytes_to_add);
186 bytes_in_buffer += bytes_to_add;
189 /* Print in reverse the file open on descriptor FD for reading FILE.
190 Return true if successful. */
192 static bool
193 tac_seekable (int input_fd, const char *file)
195 /* Pointer to the location in `G_buffer' where the search for
196 the next separator will begin. */
197 char *match_start;
199 /* Pointer to one past the rightmost character in `G_buffer' that
200 has not been printed yet. */
201 char *past_end;
203 /* Length of the record growing in `G_buffer'. */
204 size_t saved_record_size;
206 /* Offset in the file of the next read. */
207 off_t file_pos;
209 /* True if `output' has not been called yet for any file.
210 Only used when the separator is attached to the preceding record. */
211 bool first_time = true;
212 char first_char = *separator; /* Speed optimization, non-regexp. */
213 char const *separator1 = separator + 1; /* Speed optimization, non-regexp. */
214 size_t match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
215 struct re_registers regs;
217 /* Find the size of the input file. */
218 file_pos = lseek (input_fd, (off_t) 0, SEEK_END);
219 if (file_pos < 1)
220 return true; /* It's an empty file. */
222 /* Arrange for the first read to lop off enough to leave the rest of the
223 file a multiple of `read_size'. Since `read_size' can change, this may
224 not always hold during the program run, but since it usually will, leave
225 it here for i/o efficiency (page/sector boundaries and all that).
226 Note: the efficiency gain has not been verified. */
227 saved_record_size = file_pos % read_size;
228 if (saved_record_size == 0)
229 saved_record_size = read_size;
230 file_pos -= saved_record_size;
231 /* `file_pos' now points to the start of the last (probably partial) block
232 in the input file. */
234 if (lseek (input_fd, file_pos, SEEK_SET) < 0)
235 error (0, errno, _("%s: seek failed"), quotearg_colon (file));
237 if (safe_read (input_fd, G_buffer, saved_record_size) != saved_record_size)
239 error (0, errno, _("%s: read error"), quotearg_colon (file));
240 return false;
243 match_start = past_end = G_buffer + saved_record_size;
244 /* For non-regexp search, move past impossible positions for a match. */
245 if (sentinel_length)
246 match_start -= match_length1;
248 for (;;)
250 /* Search backward from `match_start' - 1 to `G_buffer' for a match
251 with `separator'; for speed, use strncmp if `separator' contains no
252 metacharacters.
253 If the match succeeds, set `match_start' to point to the start of
254 the match and `match_length' to the length of the match.
255 Otherwise, make `match_start' < `G_buffer'. */
256 if (sentinel_length == 0)
258 size_t i = match_start - G_buffer;
259 regoff_t ri = i;
260 regoff_t range = 1 - ri;
261 regoff_t ret;
263 if (1 < range)
264 error (EXIT_FAILURE, 0, _("record too large"));
266 if (range == 1
267 || ((ret = re_search (&compiled_separator, G_buffer,
268 i, i - 1, range, &regs))
269 == -1))
270 match_start = G_buffer - 1;
271 else if (ret == -2)
273 error (EXIT_FAILURE, 0,
274 _("error in regular expression search"));
276 else
278 match_start = G_buffer + regs.start[0];
279 match_length = regs.end[0] - regs.start[0];
282 else
284 /* `match_length' is constant for non-regexp boundaries. */
285 while (*--match_start != first_char
286 || (match_length1 && strncmp (match_start + 1, separator1,
287 match_length1)))
288 /* Do nothing. */ ;
291 /* Check whether we backed off the front of `G_buffer' without finding
292 a match for `separator'. */
293 if (match_start < G_buffer)
295 if (file_pos == 0)
297 /* Hit the beginning of the file; print the remaining record. */
298 output (G_buffer, past_end);
299 return true;
302 saved_record_size = past_end - G_buffer;
303 if (saved_record_size > read_size)
305 /* `G_buffer_size' is about twice `read_size', so since
306 we want to read in another `read_size' bytes before
307 the data already in `G_buffer', we need to increase
308 `G_buffer_size'. */
309 char *newbuffer;
310 size_t offset = sentinel_length ? sentinel_length : 1;
311 ptrdiff_t match_start_offset = match_start - G_buffer;
312 ptrdiff_t past_end_offset = past_end - G_buffer;
313 size_t old_G_buffer_size = G_buffer_size;
315 read_size *= 2;
316 G_buffer_size = read_size * 2 + sentinel_length + 2;
317 if (G_buffer_size < old_G_buffer_size)
318 xalloc_die ();
319 newbuffer = xrealloc (G_buffer - offset, G_buffer_size);
320 newbuffer += offset;
321 /* Adjust the pointers for the new buffer location. */
322 match_start = newbuffer + match_start_offset;
323 past_end = newbuffer + past_end_offset;
324 G_buffer = newbuffer;
327 /* Back up to the start of the next bufferfull of the file. */
328 if (file_pos >= read_size)
329 file_pos -= read_size;
330 else
332 read_size = file_pos;
333 file_pos = 0;
335 if (lseek (input_fd, file_pos, SEEK_SET) < 0)
336 error (0, errno, _("%s: seek failed"), quotearg_colon (file));
338 /* Shift the pending record data right to make room for the new.
339 The source and destination regions probably overlap. */
340 memmove (G_buffer + read_size, G_buffer, saved_record_size);
341 past_end = G_buffer + read_size + saved_record_size;
342 /* For non-regexp searches, avoid unneccessary scanning. */
343 if (sentinel_length)
344 match_start = G_buffer + read_size;
345 else
346 match_start = past_end;
348 if (safe_read (input_fd, G_buffer, read_size) != read_size)
350 error (0, errno, _("%s: read error"), quotearg_colon (file));
351 return false;
354 else
356 /* Found a match of `separator'. */
357 if (separator_ends_record)
359 char *match_end = match_start + match_length;
361 /* If this match of `separator' isn't at the end of the
362 file, print the record. */
363 if (!first_time || match_end != past_end)
364 output (match_end, past_end);
365 past_end = match_end;
366 first_time = false;
368 else
370 output (match_start, past_end);
371 past_end = match_start;
374 /* For non-regex matching, we can back up. */
375 if (sentinel_length > 0)
376 match_start -= match_length - 1;
381 #if DONT_UNLINK_WHILE_OPEN
383 /* FIXME-someday: remove all of this DONT_UNLINK_WHILE_OPEN junk.
384 Using atexit like this is wrong, since it can fail
385 when called e.g. 32 or more times.
386 But this isn't a big deal, since the code is used only on WOE/DOS
387 systems, and few people invoke tac on that many nonseekable files. */
389 static const char *file_to_remove;
390 static FILE *fp_to_close;
392 static void
393 unlink_tempfile (void)
395 fclose (fp_to_close);
396 unlink (file_to_remove);
399 static void
400 record_or_unlink_tempfile (char const *fn, FILE *fp)
402 if (!file_to_remove)
404 file_to_remove = fn;
405 fp_to_close = fp;
406 atexit (unlink_tempfile);
410 #else
412 static void
413 record_or_unlink_tempfile (char const *fn, FILE *fp ATTRIBUTE_UNUSED)
415 unlink (fn);
418 #endif
420 /* Copy from file descriptor INPUT_FD (corresponding to the named FILE) to
421 a temporary file, and set *G_TMP and *G_TEMPFILE to the resulting stream
422 and file name. Return true if successful. */
424 static bool
425 copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file)
427 static char *template = NULL;
428 static char const *tempdir;
429 char *tempfile;
430 FILE *tmp;
431 int fd;
433 if (template == NULL)
435 char const * const Template = "%s/tacXXXXXX";
436 tempdir = getenv ("TMPDIR");
437 if (tempdir == NULL)
438 tempdir = DEFAULT_TMPDIR;
440 /* Subtract 2 for `%s' and add 1 for the trailing NUL byte. */
441 template = xmalloc (strlen (tempdir) + strlen (Template) - 2 + 1);
442 sprintf (template, Template, tempdir);
445 /* FIXME: there's a small window between a successful mkstemp call
446 and the unlink that's performed by record_or_unlink_tempfile.
447 If we're interrupted in that interval, this code fails to remove
448 the temporary file. On systems that define DONT_UNLINK_WHILE_OPEN,
449 the window is much larger -- it extends to the atexit-called
450 unlink_tempfile.
451 FIXME: clean up upon fatal signal. Don't block them, in case
452 $TMPFILE is a remote file system. */
454 tempfile = template;
455 fd = mkstemp (template);
456 if (fd < 0)
458 error (0, errno, _("cannot create temporary file %s"), quote (tempfile));
459 return false;
462 tmp = fdopen (fd, (O_BINARY ? "w+b" : "w+"));
463 if (! tmp)
465 error (0, errno, _("cannot open %s for writing"), quote (tempfile));
466 close (fd);
467 unlink (tempfile);
468 return false;
471 record_or_unlink_tempfile (tempfile, tmp);
473 while (1)
475 size_t bytes_read = safe_read (input_fd, G_buffer, read_size);
476 if (bytes_read == 0)
477 break;
478 if (bytes_read == SAFE_READ_ERROR)
480 error (0, errno, _("%s: read error"), quotearg_colon (file));
481 goto Fail;
484 if (fwrite (G_buffer, 1, bytes_read, tmp) != bytes_read)
486 error (0, errno, _("%s: write error"), quotearg_colon (tempfile));
487 goto Fail;
491 if (fflush (tmp) != 0)
493 error (0, errno, _("%s: write error"), quotearg_colon (tempfile));
494 goto Fail;
497 *g_tmp = tmp;
498 *g_tempfile = tempfile;
499 return true;
501 Fail:
502 fclose (tmp);
503 return false;
506 /* Copy INPUT_FD to a temporary, then tac that file.
507 Return true if successful. */
509 static bool
510 tac_nonseekable (int input_fd, const char *file)
512 FILE *tmp_stream;
513 char *tmp_file;
514 return (copy_to_temp (&tmp_stream, &tmp_file, input_fd, file)
515 && tac_seekable (fileno (tmp_stream), tmp_file));
518 /* Print FILE in reverse, copying it to a temporary
519 file first if it is not seekable.
520 Return true if successful. */
522 static bool
523 tac_file (const char *filename)
525 bool ok;
526 off_t file_size;
527 int fd;
528 bool is_stdin = STREQ (filename, "-");
530 if (is_stdin)
532 have_read_stdin = true;
533 fd = STDIN_FILENO;
534 filename = _("standard input");
535 if (O_BINARY && ! isatty (STDIN_FILENO))
536 freopen (NULL, "rb", stdin);
538 else
540 fd = open (filename, O_RDONLY | O_BINARY);
541 if (fd < 0)
543 error (0, errno, _("cannot open %s for reading"), quote (filename));
544 return false;
548 file_size = lseek (fd, (off_t) 0, SEEK_END);
550 ok = (file_size < 0 || isatty (fd)
551 ? tac_nonseekable (fd, filename)
552 : tac_seekable (fd, filename));
554 if (!is_stdin && close (fd) != 0)
556 error (0, errno, _("%s: read error"), quotearg_colon (filename));
557 ok = false;
559 return ok;
563 main (int argc, char **argv)
565 const char *error_message; /* Return value from re_compile_pattern. */
566 int optc;
567 bool ok;
568 size_t half_buffer_size;
570 /* Initializer for file_list if no file-arguments
571 were specified on the command line. */
572 static char const *const default_file_list[] = {"-", NULL};
573 char const *const *file;
575 initialize_main (&argc, &argv);
576 program_name = argv[0];
577 setlocale (LC_ALL, "");
578 bindtextdomain (PACKAGE, LOCALEDIR);
579 textdomain (PACKAGE);
581 atexit (close_stdout);
583 separator = "\n";
584 sentinel_length = 1;
585 separator_ends_record = true;
587 while ((optc = getopt_long (argc, argv, "brs:", longopts, NULL)) != -1)
589 switch (optc)
591 case 'b':
592 separator_ends_record = false;
593 break;
594 case 'r':
595 sentinel_length = 0;
596 break;
597 case 's':
598 separator = optarg;
599 if (*separator == 0)
600 error (EXIT_FAILURE, 0, _("separator cannot be empty"));
601 break;
602 case_GETOPT_HELP_CHAR;
603 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
604 default:
605 usage (EXIT_FAILURE);
609 if (sentinel_length == 0)
611 compiled_separator.buffer = NULL;
612 compiled_separator.allocated = 0;
613 compiled_separator.fastmap = compiled_separator_fastmap;
614 compiled_separator.translate = NULL;
615 error_message = re_compile_pattern (separator, strlen (separator),
616 &compiled_separator);
617 if (error_message)
618 error (EXIT_FAILURE, 0, "%s", error_message);
620 else
621 match_length = sentinel_length = strlen (separator);
623 read_size = INITIAL_READSIZE;
624 while (sentinel_length >= read_size / 2)
626 if (SIZE_MAX / 2 < read_size)
627 xalloc_die ();
628 read_size *= 2;
630 half_buffer_size = read_size + sentinel_length + 1;
631 G_buffer_size = 2 * half_buffer_size;
632 if (! (read_size < half_buffer_size && half_buffer_size < G_buffer_size))
633 xalloc_die ();
634 G_buffer = xmalloc (G_buffer_size);
635 if (sentinel_length)
637 strcpy (G_buffer, separator);
638 G_buffer += sentinel_length;
640 else
642 ++G_buffer;
645 file = (optind < argc
646 ? (char const *const *) &argv[optind]
647 : default_file_list);
649 if (O_BINARY && ! isatty (STDOUT_FILENO))
650 freopen (NULL, "wb", stdout);
653 size_t i;
654 ok = true;
655 for (i = 0; file[i]; ++i)
656 ok &= tac_file (file[i]);
659 /* Flush the output buffer. */
660 output ((char *) NULL, (char *) NULL);
662 if (have_read_stdin && close (STDIN_FILENO) < 0)
663 error (EXIT_FAILURE, errno, "-");
664 exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);