tests: more automated quote adjustment
[coreutils/ericb.git] / src / tac.c
blob5c183f59a0fe1fce4a23d40991171f42fbb7a2ed
1 /* tac - concatenate and print files in reverse
2 Copyright (C) 1988-1991, 1995-2006, 2008-2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Jay Lepreau (lepreau@cs.utah.edu).
18 GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */
20 /* Copy each FILE, or the standard input if none are given or when a
21 FILE name of "-" is encountered, to the standard output with the
22 order of the records reversed. The records are separated by
23 instances of a string, or a newline if none is given. By default, the
24 separator string is attached to the end of the record that it
25 follows in the file.
27 Options:
28 -b, --before The separator is attached to the beginning
29 of the record that it precedes in the file.
30 -r, --regex The separator is a regular expression.
31 -s, --separator=separator Use SEPARATOR as the record separator.
33 To reverse a file byte by byte, use (in bash, ksh, or sh):
34 tac -r -s '.\|
35 ' file */
37 #include <config.h>
39 #include <stdio.h>
40 #include <getopt.h>
41 #include <sys/types.h>
42 #include "system.h"
44 #include <regex.h>
46 #include "error.h"
47 #include "filenamecat.h"
48 #include "quote.h"
49 #include "quotearg.h"
50 #include "safe-read.h"
51 #include "stdlib--.h"
52 #include "xfreopen.h"
54 /* The official name of this program (e.g., no `g' prefix). */
55 #define PROGRAM_NAME "tac"
57 #define AUTHORS \
58 proper_name ("Jay Lepreau"), \
59 proper_name ("David MacKenzie")
61 #if defined __MSDOS__ || defined _WIN32
62 /* Define this to non-zero on systems for which the regular mechanism
63 (of unlinking an open file and expecting to be able to write, seek
64 back to the beginning, then reread it) doesn't work. E.g., on Windows
65 and DOS systems. */
66 # define DONT_UNLINK_WHILE_OPEN 1
67 #endif
70 #ifndef DEFAULT_TMPDIR
71 # define DEFAULT_TMPDIR "/tmp"
72 #endif
74 /* The number of bytes per atomic read. */
75 #define INITIAL_READSIZE 8192
77 /* The number of bytes per atomic write. */
78 #define WRITESIZE 8192
80 /* The string that separates the records of the file. */
81 static char const *separator;
83 /* True if we have ever read standard input. */
84 static bool have_read_stdin = false;
86 /* If true, print `separator' along with the record preceding it
87 in the file; otherwise with the record following it. */
88 static bool separator_ends_record;
90 /* 0 if `separator' is to be matched as a regular expression;
91 otherwise, the length of `separator', used as a sentinel to
92 stop the search. */
93 static size_t sentinel_length;
95 /* The length of a match with `separator'. If `sentinel_length' is 0,
96 `match_length' is computed every time a match succeeds;
97 otherwise, it is simply the length of `separator'. */
98 static size_t match_length;
100 /* The input buffer. */
101 static char *G_buffer;
103 /* The number of bytes to read at once into `buffer'. */
104 static size_t read_size;
106 /* The size of `buffer'. This is read_size * 2 + sentinel_length + 2.
107 The extra 2 bytes allow `past_end' to have a value beyond the
108 end of `G_buffer' and `match_start' to run off the front of `G_buffer'. */
109 static size_t G_buffer_size;
111 /* The compiled regular expression representing `separator'. */
112 static struct re_pattern_buffer compiled_separator;
113 static char compiled_separator_fastmap[UCHAR_MAX + 1];
114 static struct re_registers regs;
116 static struct option const longopts[] =
118 {"before", no_argument, NULL, 'b'},
119 {"regex", no_argument, NULL, 'r'},
120 {"separator", required_argument, NULL, 's'},
121 {GETOPT_HELP_OPTION_DECL},
122 {GETOPT_VERSION_OPTION_DECL},
123 {NULL, 0, NULL, 0}
126 void
127 usage (int status)
129 if (status != EXIT_SUCCESS)
130 emit_try_help ();
131 else
133 printf (_("\
134 Usage: %s [OPTION]... [FILE]...\n\
136 program_name);
137 fputs (_("\
138 Write each FILE to standard output, last line first.\n\
139 With no FILE, or when FILE is -, read standard input.\n\
141 "), stdout);
142 fputs (_("\
143 Mandatory arguments to long options are mandatory for short options too.\n\
144 "), stdout);
145 fputs (_("\
146 -b, --before attach the separator before instead of after\n\
147 -r, --regex interpret the separator as a regular expression\n\
148 -s, --separator=STRING use STRING as the separator instead of newline\n\
149 "), stdout);
150 fputs (HELP_OPTION_DESCRIPTION, stdout);
151 fputs (VERSION_OPTION_DESCRIPTION, stdout);
152 emit_ancillary_info ();
154 exit (status);
157 /* Print the characters from START to PAST_END - 1.
158 If START is NULL, just flush the buffer. */
160 static void
161 output (const char *start, const char *past_end)
163 static char buffer[WRITESIZE];
164 static size_t bytes_in_buffer = 0;
165 size_t bytes_to_add = past_end - start;
166 size_t bytes_available = WRITESIZE - bytes_in_buffer;
168 if (start == 0)
170 fwrite (buffer, 1, bytes_in_buffer, stdout);
171 bytes_in_buffer = 0;
172 return;
175 /* Write out as many full buffers as possible. */
176 while (bytes_to_add >= bytes_available)
178 memcpy (buffer + bytes_in_buffer, start, bytes_available);
179 bytes_to_add -= bytes_available;
180 start += bytes_available;
181 fwrite (buffer, 1, WRITESIZE, stdout);
182 bytes_in_buffer = 0;
183 bytes_available = WRITESIZE;
186 memcpy (buffer + bytes_in_buffer, start, bytes_to_add);
187 bytes_in_buffer += bytes_to_add;
190 /* Print in reverse the file open on descriptor FD for reading FILE.
191 Return true if successful. */
193 static bool
194 tac_seekable (int input_fd, const char *file)
196 /* Pointer to the location in `G_buffer' where the search for
197 the next separator will begin. */
198 char *match_start;
200 /* Pointer to one past the rightmost character in `G_buffer' that
201 has not been printed yet. */
202 char *past_end;
204 /* Length of the record growing in `G_buffer'. */
205 size_t saved_record_size;
207 /* Offset in the file of the next read. */
208 off_t file_pos;
210 /* True if `output' has not been called yet for any file.
211 Only used when the separator is attached to the preceding record. */
212 bool first_time = true;
213 char first_char = *separator; /* Speed optimization, non-regexp. */
214 char const *separator1 = separator + 1; /* Speed optimization, non-regexp. */
215 size_t match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
217 /* Find the size of the input file. */
218 file_pos = lseek (input_fd, 0, SEEK_END);
219 if (file_pos < 1)
220 return true; /* It's an empty file. */
222 /* Arrange for the first read to lop off enough to leave the rest of the
223 file a multiple of `read_size'. Since `read_size' can change, this may
224 not always hold during the program run, but since it usually will, leave
225 it here for i/o efficiency (page/sector boundaries and all that).
226 Note: the efficiency gain has not been verified. */
227 saved_record_size = file_pos % read_size;
228 if (saved_record_size == 0)
229 saved_record_size = read_size;
230 file_pos -= saved_record_size;
231 /* `file_pos' now points to the start of the last (probably partial) block
232 in the input file. */
234 if (lseek (input_fd, file_pos, SEEK_SET) < 0)
235 error (0, errno, _("%s: seek failed"), quotearg_colon (file));
237 if (safe_read (input_fd, G_buffer, saved_record_size) != saved_record_size)
239 error (0, errno, _("%s: read error"), quotearg_colon (file));
240 return false;
243 match_start = past_end = G_buffer + saved_record_size;
244 /* For non-regexp search, move past impossible positions for a match. */
245 if (sentinel_length)
246 match_start -= match_length1;
248 while (true)
250 /* Search backward from `match_start' - 1 to `G_buffer' for a match
251 with `separator'; for speed, use strncmp if `separator' contains no
252 metacharacters.
253 If the match succeeds, set `match_start' to point to the start of
254 the match and `match_length' to the length of the match.
255 Otherwise, make `match_start' < `G_buffer'. */
256 if (sentinel_length == 0)
258 size_t i = match_start - G_buffer;
259 regoff_t ri = i;
260 regoff_t range = 1 - ri;
261 regoff_t ret;
263 if (1 < range)
264 error (EXIT_FAILURE, 0, _("record too large"));
266 if (range == 1
267 || ((ret = re_search (&compiled_separator, G_buffer,
268 i, i - 1, range, &regs))
269 == -1))
270 match_start = G_buffer - 1;
271 else if (ret == -2)
273 error (EXIT_FAILURE, 0,
274 _("error in regular expression search"));
276 else
278 match_start = G_buffer + regs.start[0];
279 match_length = regs.end[0] - regs.start[0];
282 else
284 /* `match_length' is constant for non-regexp boundaries. */
285 while (*--match_start != first_char
286 || (match_length1 && strncmp (match_start + 1, separator1,
287 match_length1)))
288 /* Do nothing. */ ;
291 /* Check whether we backed off the front of `G_buffer' without finding
292 a match for `separator'. */
293 if (match_start < G_buffer)
295 if (file_pos == 0)
297 /* Hit the beginning of the file; print the remaining record. */
298 output (G_buffer, past_end);
299 return true;
302 saved_record_size = past_end - G_buffer;
303 if (saved_record_size > read_size)
305 /* `G_buffer_size' is about twice `read_size', so since
306 we want to read in another `read_size' bytes before
307 the data already in `G_buffer', we need to increase
308 `G_buffer_size'. */
309 char *newbuffer;
310 size_t offset = sentinel_length ? sentinel_length : 1;
311 ptrdiff_t match_start_offset = match_start - G_buffer;
312 ptrdiff_t past_end_offset = past_end - G_buffer;
313 size_t old_G_buffer_size = G_buffer_size;
315 read_size *= 2;
316 G_buffer_size = read_size * 2 + sentinel_length + 2;
317 if (G_buffer_size < old_G_buffer_size)
318 xalloc_die ();
319 newbuffer = xrealloc (G_buffer - offset, G_buffer_size);
320 newbuffer += offset;
321 /* Adjust the pointers for the new buffer location. */
322 match_start = newbuffer + match_start_offset;
323 past_end = newbuffer + past_end_offset;
324 G_buffer = newbuffer;
327 /* Back up to the start of the next bufferfull of the file. */
328 if (file_pos >= read_size)
329 file_pos -= read_size;
330 else
332 read_size = file_pos;
333 file_pos = 0;
335 if (lseek (input_fd, file_pos, SEEK_SET) < 0)
336 error (0, errno, _("%s: seek failed"), quotearg_colon (file));
338 /* Shift the pending record data right to make room for the new.
339 The source and destination regions probably overlap. */
340 memmove (G_buffer + read_size, G_buffer, saved_record_size);
341 past_end = G_buffer + read_size + saved_record_size;
342 /* For non-regexp searches, avoid unneccessary scanning. */
343 if (sentinel_length)
344 match_start = G_buffer + read_size;
345 else
346 match_start = past_end;
348 if (safe_read (input_fd, G_buffer, read_size) != read_size)
350 error (0, errno, _("%s: read error"), quotearg_colon (file));
351 return false;
354 else
356 /* Found a match of `separator'. */
357 if (separator_ends_record)
359 char *match_end = match_start + match_length;
361 /* If this match of `separator' isn't at the end of the
362 file, print the record. */
363 if (!first_time || match_end != past_end)
364 output (match_end, past_end);
365 past_end = match_end;
366 first_time = false;
368 else
370 output (match_start, past_end);
371 past_end = match_start;
374 /* For non-regex matching, we can back up. */
375 if (sentinel_length > 0)
376 match_start -= match_length - 1;
381 #if DONT_UNLINK_WHILE_OPEN
383 /* FIXME-someday: remove all of this DONT_UNLINK_WHILE_OPEN junk.
384 Using atexit like this is wrong, since it can fail
385 when called e.g. 32 or more times.
386 But this isn't a big deal, since the code is used only on WOE/DOS
387 systems, and few people invoke tac on that many nonseekable files. */
389 static const char *file_to_remove;
390 static FILE *fp_to_close;
392 static void
393 unlink_tempfile (void)
395 fclose (fp_to_close);
396 unlink (file_to_remove);
399 static void
400 record_or_unlink_tempfile (char const *fn, FILE *fp)
402 if (!file_to_remove)
404 file_to_remove = fn;
405 fp_to_close = fp;
406 atexit (unlink_tempfile);
410 #else
412 static void
413 record_or_unlink_tempfile (char const *fn, FILE *fp ATTRIBUTE_UNUSED)
415 unlink (fn);
418 #endif
420 /* A wrapper around mkstemp that gives us both an open stream pointer,
421 FP, and the corresponding FILE_NAME. Always return the same FP/name
422 pair, rewinding/truncating it upon each reuse. */
423 static bool
424 temp_stream (FILE **fp, char **file_name)
426 static char *tempfile = NULL;
427 static FILE *tmp_fp;
428 if (tempfile == NULL)
430 char const *t = getenv ("TMPDIR");
431 char const *tempdir = t ? t : DEFAULT_TMPDIR;
432 tempfile = mfile_name_concat (tempdir, "tacXXXXXX", NULL);
433 if (tempdir == NULL)
435 error (0, 0, _("memory exhausted"));
436 return false;
439 /* FIXME: there's a small window between a successful mkstemp call
440 and the unlink that's performed by record_or_unlink_tempfile.
441 If we're interrupted in that interval, this code fails to remove
442 the temporary file. On systems that define DONT_UNLINK_WHILE_OPEN,
443 the window is much larger -- it extends to the atexit-called
444 unlink_tempfile.
445 FIXME: clean up upon fatal signal. Don't block them, in case
446 $TMPFILE is a remote file system. */
448 int fd = mkstemp (tempfile);
449 if (fd < 0)
451 error (0, errno, _("failed to create temporary file in %s"),
452 quote (tempdir));
453 goto Reset;
456 tmp_fp = fdopen (fd, (O_BINARY ? "w+b" : "w+"));
457 if (! tmp_fp)
459 error (0, errno, _("failed to open %s for writing"),
460 quote (tempfile));
461 close (fd);
462 unlink (tempfile);
463 Reset:
464 free (tempfile);
465 tempfile = NULL;
466 return false;
469 record_or_unlink_tempfile (tempfile, tmp_fp);
471 else
473 if (fseek (tmp_fp, 0, SEEK_SET) < 0
474 || ftruncate (fileno (tmp_fp), 0) < 0)
476 error (0, errno, _("failed to rewind stream for %s"),
477 quote (tempfile));
478 return false;
482 *fp = tmp_fp;
483 *file_name = tempfile;
484 return true;
487 /* Copy from file descriptor INPUT_FD (corresponding to the named FILE) to
488 a temporary file, and set *G_TMP and *G_TEMPFILE to the resulting stream
489 and file name. Return true if successful. */
491 static bool
492 copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file)
494 FILE *fp;
495 char *file_name;
496 if (!temp_stream (&fp, &file_name))
497 return false;
499 while (1)
501 size_t bytes_read = safe_read (input_fd, G_buffer, read_size);
502 if (bytes_read == 0)
503 break;
504 if (bytes_read == SAFE_READ_ERROR)
506 error (0, errno, _("%s: read error"), quotearg_colon (file));
507 goto Fail;
510 if (fwrite (G_buffer, 1, bytes_read, fp) != bytes_read)
512 error (0, errno, _("%s: write error"), quotearg_colon (file_name));
513 goto Fail;
517 if (fflush (fp) != 0)
519 error (0, errno, _("%s: write error"), quotearg_colon (file_name));
520 goto Fail;
523 *g_tmp = fp;
524 *g_tempfile = file_name;
525 return true;
527 Fail:
528 fclose (fp);
529 return false;
532 /* Copy INPUT_FD to a temporary, then tac that file.
533 Return true if successful. */
535 static bool
536 tac_nonseekable (int input_fd, const char *file)
538 FILE *tmp_stream;
539 char *tmp_file;
540 if (!copy_to_temp (&tmp_stream, &tmp_file, input_fd, file))
541 return false;
543 bool ok = tac_seekable (fileno (tmp_stream), tmp_file);
544 return ok;
547 /* Print FILE in reverse, copying it to a temporary
548 file first if it is not seekable.
549 Return true if successful. */
551 static bool
552 tac_file (const char *filename)
554 bool ok;
555 off_t file_size;
556 int fd;
557 bool is_stdin = STREQ (filename, "-");
559 if (is_stdin)
561 have_read_stdin = true;
562 fd = STDIN_FILENO;
563 filename = _("standard input");
564 if (O_BINARY && ! isatty (STDIN_FILENO))
565 xfreopen (NULL, "rb", stdin);
567 else
569 fd = open (filename, O_RDONLY | O_BINARY);
570 if (fd < 0)
572 error (0, errno, _("failed to open %s for reading"),
573 quote (filename));
574 return false;
578 file_size = lseek (fd, 0, SEEK_END);
580 ok = (file_size < 0 || isatty (fd)
581 ? tac_nonseekable (fd, filename)
582 : tac_seekable (fd, filename));
584 if (!is_stdin && close (fd) != 0)
586 error (0, errno, _("%s: read error"), quotearg_colon (filename));
587 ok = false;
589 return ok;
593 main (int argc, char **argv)
595 const char *error_message; /* Return value from re_compile_pattern. */
596 int optc;
597 bool ok;
598 size_t half_buffer_size;
600 /* Initializer for file_list if no file-arguments
601 were specified on the command line. */
602 static char const *const default_file_list[] = {"-", NULL};
603 char const *const *file;
605 initialize_main (&argc, &argv);
606 set_program_name (argv[0]);
607 setlocale (LC_ALL, "");
608 bindtextdomain (PACKAGE, LOCALEDIR);
609 textdomain (PACKAGE);
611 atexit (close_stdout);
613 separator = "\n";
614 sentinel_length = 1;
615 separator_ends_record = true;
617 while ((optc = getopt_long (argc, argv, "brs:", longopts, NULL)) != -1)
619 switch (optc)
621 case 'b':
622 separator_ends_record = false;
623 break;
624 case 'r':
625 sentinel_length = 0;
626 break;
627 case 's':
628 separator = optarg;
629 if (*separator == 0)
630 error (EXIT_FAILURE, 0, _("separator cannot be empty"));
631 break;
632 case_GETOPT_HELP_CHAR;
633 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
634 default:
635 usage (EXIT_FAILURE);
639 if (sentinel_length == 0)
641 compiled_separator.buffer = NULL;
642 compiled_separator.allocated = 0;
643 compiled_separator.fastmap = compiled_separator_fastmap;
644 compiled_separator.translate = NULL;
645 error_message = re_compile_pattern (separator, strlen (separator),
646 &compiled_separator);
647 if (error_message)
648 error (EXIT_FAILURE, 0, "%s", error_message);
650 else
651 match_length = sentinel_length = strlen (separator);
653 read_size = INITIAL_READSIZE;
654 while (sentinel_length >= read_size / 2)
656 if (SIZE_MAX / 2 < read_size)
657 xalloc_die ();
658 read_size *= 2;
660 half_buffer_size = read_size + sentinel_length + 1;
661 G_buffer_size = 2 * half_buffer_size;
662 if (! (read_size < half_buffer_size && half_buffer_size < G_buffer_size))
663 xalloc_die ();
664 G_buffer = xmalloc (G_buffer_size);
665 if (sentinel_length)
667 strcpy (G_buffer, separator);
668 G_buffer += sentinel_length;
670 else
672 ++G_buffer;
675 file = (optind < argc
676 ? (char const *const *) &argv[optind]
677 : default_file_list);
679 if (O_BINARY && ! isatty (STDOUT_FILENO))
680 xfreopen (NULL, "wb", stdout);
683 size_t i;
684 ok = true;
685 for (i = 0; file[i]; ++i)
686 ok &= tac_file (file[i]);
689 /* Flush the output buffer. */
690 output ((char *) NULL, (char *) NULL);
692 if (have_read_stdin && close (STDIN_FILENO) < 0)
694 error (0, errno, "-");
695 ok = false;
698 #ifdef lint
699 size_t offset = sentinel_length ? sentinel_length : 1;
700 free (G_buffer - offset);
701 #endif
703 exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);