wc: simplify #if MB_LEN_MAX
[coreutils.git] / src / tac.c
blob4c3655895694b65c8392bfb2b1bd09cb8c6a8883
1 /* tac - concatenate and print files in reverse
2 Copyright (C) 1988-2023 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Jay Lepreau (lepreau@cs.utah.edu).
18 GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */
20 /* Copy each FILE, or the standard input if none are given or when a
21 FILE name of "-" is encountered, to the standard output with the
22 order of the records reversed. The records are separated by
23 instances of a string, or a newline if none is given. By default, the
24 separator string is attached to the end of the record that it
25 follows in the file.
27 Options:
28 -b, --before The separator is attached to the beginning
29 of the record that it precedes in the file.
30 -r, --regex The separator is a regular expression.
31 -s, --separator=separator Use SEPARATOR as the record separator.
33 To reverse a file byte by byte, use (in bash, ksh, or sh):
34 tac -r -s '.\|
35 ' file */
37 #include <config.h>
39 #include <stdio.h>
40 #include <getopt.h>
41 #include <sys/types.h>
42 #include "system.h"
44 #include <regex.h>
46 #include "filenamecat.h"
47 #include "full-read.h"
48 #include "safe-read.h"
49 #include "temp-stream.h"
50 #include "xbinary-io.h"
52 /* The official name of this program (e.g., no 'g' prefix). */
53 #define PROGRAM_NAME "tac"
55 #define AUTHORS \
56 proper_name ("Jay Lepreau"), \
57 proper_name ("David MacKenzie")
60 /* The number of bytes per atomic read. */
61 #define INITIAL_READSIZE 8192
63 /* The number of bytes per atomic write. */
64 #define WRITESIZE 8192
66 /* The string that separates the records of the file. */
67 static char const *separator;
69 /* True if we have ever read standard input. */
70 static bool have_read_stdin = false;
72 /* If true, print 'separator' along with the record preceding it
73 in the file; otherwise with the record following it. */
74 static bool separator_ends_record;
76 /* 0 if 'separator' is to be matched as a regular expression;
77 otherwise, the length of 'separator', used as a sentinel to
78 stop the search. */
79 static size_t sentinel_length;
81 /* The length of a match with 'separator'. If 'sentinel_length' is 0,
82 'match_length' is computed every time a match succeeds;
83 otherwise, it is simply the length of 'separator'. */
84 static size_t match_length;
86 /* The input buffer. */
87 static char *G_buffer;
89 /* The number of bytes to read at once into 'buffer'. */
90 static size_t read_size;
92 /* The size of 'buffer'. This is read_size * 2 + sentinel_length + 2.
93 The extra 2 bytes allow 'past_end' to have a value beyond the
94 end of 'G_buffer' and 'match_start' to run off the front of 'G_buffer'. */
95 static size_t G_buffer_size;
97 /* The compiled regular expression representing 'separator'. */
98 static struct re_pattern_buffer compiled_separator;
99 static char compiled_separator_fastmap[UCHAR_MAX + 1];
100 static struct re_registers regs;
102 static struct option const longopts[] =
104 {"before", no_argument, nullptr, 'b'},
105 {"regex", no_argument, nullptr, 'r'},
106 {"separator", required_argument, nullptr, 's'},
107 {GETOPT_HELP_OPTION_DECL},
108 {GETOPT_VERSION_OPTION_DECL},
109 {nullptr, 0, nullptr, 0}
112 void
113 usage (int status)
115 if (status != EXIT_SUCCESS)
116 emit_try_help ();
117 else
119 printf (_("\
120 Usage: %s [OPTION]... [FILE]...\n\
122 program_name);
123 fputs (_("\
124 Write each FILE to standard output, last line first.\n\
125 "), stdout);
127 emit_stdin_note ();
128 emit_mandatory_arg_note ();
130 fputs (_("\
131 -b, --before attach the separator before instead of after\n\
132 -r, --regex interpret the separator as a regular expression\n\
133 -s, --separator=STRING use STRING as the separator instead of newline\n\
134 "), stdout);
135 fputs (HELP_OPTION_DESCRIPTION, stdout);
136 fputs (VERSION_OPTION_DESCRIPTION, stdout);
137 emit_ancillary_info (PROGRAM_NAME);
139 exit (status);
142 /* Print the characters from START to PAST_END - 1.
143 If START is null, just flush the buffer. */
145 static void
146 output (char const *start, char const *past_end)
148 static char buffer[WRITESIZE];
149 static size_t bytes_in_buffer = 0;
150 size_t bytes_to_add = past_end - start;
151 size_t bytes_available = WRITESIZE - bytes_in_buffer;
153 if (start == 0)
155 fwrite (buffer, 1, bytes_in_buffer, stdout);
156 bytes_in_buffer = 0;
157 return;
160 /* Write out as many full buffers as possible. */
161 while (bytes_to_add >= bytes_available)
163 memcpy (buffer + bytes_in_buffer, start, bytes_available);
164 bytes_to_add -= bytes_available;
165 start += bytes_available;
166 fwrite (buffer, 1, WRITESIZE, stdout);
167 bytes_in_buffer = 0;
168 bytes_available = WRITESIZE;
171 memcpy (buffer + bytes_in_buffer, start, bytes_to_add);
172 bytes_in_buffer += bytes_to_add;
175 /* Print in reverse the file open on descriptor FD for reading FILE.
176 The file is already positioned at FILE_POS, which should be near its end.
177 Return true if successful. */
179 static bool
180 tac_seekable (int input_fd, char const *file, off_t file_pos)
182 /* Pointer to the location in 'G_buffer' where the search for
183 the next separator will begin. */
184 char *match_start;
186 /* Pointer to one past the rightmost character in 'G_buffer' that
187 has not been printed yet. */
188 char *past_end;
190 /* Length of the record growing in 'G_buffer'. */
191 size_t saved_record_size;
193 /* True if 'output' has not been called yet for any file.
194 Only used when the separator is attached to the preceding record. */
195 bool first_time = true;
196 char first_char = *separator; /* Speed optimization, non-regexp. */
197 char const *separator1 = separator + 1; /* Speed optimization, non-regexp. */
198 size_t match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
200 /* Arrange for the first read to lop off enough to leave the rest of the
201 file a multiple of 'read_size'. Since 'read_size' can change, this may
202 not always hold during the program run, but since it usually will, leave
203 it here for i/o efficiency (page/sector boundaries and all that).
204 Note: the efficiency gain has not been verified. */
205 size_t remainder = file_pos % read_size;
206 if (remainder != 0)
208 file_pos -= remainder;
209 if (lseek (input_fd, file_pos, SEEK_SET) < 0)
210 error (0, errno, _("%s: seek failed"), quotef (file));
213 /* Scan backward, looking for end of file. This caters to proc-like
214 file systems where the file size is just an estimate. */
215 while ((saved_record_size = safe_read (input_fd, G_buffer, read_size)) == 0
216 && file_pos != 0)
218 off_t rsize = read_size;
219 if (lseek (input_fd, -rsize, SEEK_CUR) < 0)
220 error (0, errno, _("%s: seek failed"), quotef (file));
221 file_pos -= read_size;
224 /* Now scan forward, looking for end of file. */
225 while (saved_record_size == read_size)
227 size_t nread = safe_read (input_fd, G_buffer, read_size);
228 if (nread == 0)
229 break;
230 saved_record_size = nread;
231 if (saved_record_size == SAFE_READ_ERROR)
232 break;
233 file_pos += nread;
236 if (saved_record_size == SAFE_READ_ERROR)
238 error (0, errno, _("%s: read error"), quotef (file));
239 return false;
242 match_start = past_end = G_buffer + saved_record_size;
243 /* For non-regexp search, move past impossible positions for a match. */
244 if (sentinel_length)
245 match_start -= match_length1;
247 while (true)
249 /* Search backward from 'match_start' - 1 to 'G_buffer' for a match
250 with 'separator'; for speed, use strncmp if 'separator' contains no
251 metacharacters.
252 If the match succeeds, set 'match_start' to point to the start of
253 the match and 'match_length' to the length of the match.
254 Otherwise, make 'match_start' < 'G_buffer'. */
255 if (sentinel_length == 0)
257 size_t i = match_start - G_buffer;
258 regoff_t ri = i;
259 regoff_t range = 1 - ri;
260 regoff_t ret;
262 if (1 < range)
263 error (EXIT_FAILURE, 0, _("record too large"));
265 if (range == 1
266 || ((ret = re_search (&compiled_separator, G_buffer,
267 i, i - 1, range, &regs))
268 == -1))
269 match_start = G_buffer - 1;
270 else if (ret == -2)
271 error (EXIT_FAILURE, 0,
272 _("error in regular expression search"));
273 else
275 match_start = G_buffer + regs.start[0];
276 match_length = regs.end[0] - regs.start[0];
279 else
281 /* 'match_length' is constant for non-regexp boundaries. */
282 while (*--match_start != first_char
283 || (match_length1 && !STREQ_LEN (match_start + 1, separator1,
284 match_length1)))
285 /* Do nothing. */ ;
288 /* Check whether we backed off the front of 'G_buffer' without finding
289 a match for 'separator'. */
290 if (match_start < G_buffer)
292 if (file_pos == 0)
294 /* Hit the beginning of the file; print the remaining record. */
295 output (G_buffer, past_end);
296 return true;
299 saved_record_size = past_end - G_buffer;
300 if (saved_record_size > read_size)
302 /* 'G_buffer_size' is about twice 'read_size', so since
303 we want to read in another 'read_size' bytes before
304 the data already in 'G_buffer', we need to increase
305 'G_buffer_size'. */
306 char *newbuffer;
307 size_t offset = sentinel_length ? sentinel_length : 1;
308 size_t old_G_buffer_size = G_buffer_size;
310 read_size *= 2;
311 G_buffer_size = read_size * 2 + sentinel_length + 2;
312 if (G_buffer_size < old_G_buffer_size)
313 xalloc_die ();
314 newbuffer = xrealloc (G_buffer - offset, G_buffer_size);
315 newbuffer += offset;
316 G_buffer = newbuffer;
319 /* Back up to the start of the next bufferfull of the file. */
320 if (file_pos >= read_size)
321 file_pos -= read_size;
322 else
324 read_size = file_pos;
325 file_pos = 0;
327 if (lseek (input_fd, file_pos, SEEK_SET) < 0)
328 error (0, errno, _("%s: seek failed"), quotef (file));
330 /* Shift the pending record data right to make room for the new.
331 The source and destination regions probably overlap. */
332 memmove (G_buffer + read_size, G_buffer, saved_record_size);
333 past_end = G_buffer + read_size + saved_record_size;
334 /* For non-regexp searches, avoid unnecessary scanning. */
335 if (sentinel_length)
336 match_start = G_buffer + read_size;
337 else
338 match_start = past_end;
340 if (full_read (input_fd, G_buffer, read_size) != read_size)
342 error (0, errno, _("%s: read error"), quotef (file));
343 return false;
346 else
348 /* Found a match of 'separator'. */
349 if (separator_ends_record)
351 char *match_end = match_start + match_length;
353 /* If this match of 'separator' isn't at the end of the
354 file, print the record. */
355 if (!first_time || match_end != past_end)
356 output (match_end, past_end);
357 past_end = match_end;
358 first_time = false;
360 else
362 output (match_start, past_end);
363 past_end = match_start;
366 /* For non-regex matching, we can back up. */
367 if (sentinel_length > 0)
368 match_start -= match_length - 1;
373 /* Copy from file descriptor INPUT_FD (corresponding to the named FILE) to
374 a temporary file, and set *G_TMP and *G_TEMPFILE to the resulting stream
375 and file name. Return the number of bytes copied, or -1 on error. */
377 static off_t
378 copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file)
380 FILE *fp;
381 char *file_name;
382 uintmax_t bytes_copied = 0;
383 if (!temp_stream (&fp, &file_name))
384 return -1;
386 while (true)
388 size_t bytes_read = safe_read (input_fd, G_buffer, read_size);
389 if (bytes_read == 0)
390 break;
391 if (bytes_read == SAFE_READ_ERROR)
393 error (0, errno, _("%s: read error"), quotef (file));
394 return -1;
397 if (fwrite (G_buffer, 1, bytes_read, fp) != bytes_read)
399 error (0, errno, _("%s: write error"), quotef (file_name));
400 return -1;
403 /* Implicitly <= OFF_T_MAX due to preceding fwrite(),
404 but unsigned type used to avoid compiler warnings
405 not aware of this fact. */
406 bytes_copied += bytes_read;
409 if (fflush (fp) != 0)
411 error (0, errno, _("%s: write error"), quotef (file_name));
412 return -1;
415 *g_tmp = fp;
416 *g_tempfile = file_name;
417 return bytes_copied;
420 /* Copy INPUT_FD to a temporary, then tac that file.
421 Return true if successful. */
423 static bool
424 tac_nonseekable (int input_fd, char const *file)
426 FILE *tmp_stream;
427 char *tmp_file;
428 off_t bytes_copied = copy_to_temp (&tmp_stream, &tmp_file, input_fd, file);
429 if (bytes_copied < 0)
430 return false;
432 bool ok = tac_seekable (fileno (tmp_stream), tmp_file, bytes_copied);
433 return ok;
436 /* Print FILE in reverse, copying it to a temporary
437 file first if it is not seekable.
438 Return true if successful. */
440 static bool
441 tac_file (char const *filename)
443 bool ok;
444 off_t file_size;
445 int fd;
446 bool is_stdin = STREQ (filename, "-");
448 if (is_stdin)
450 have_read_stdin = true;
451 fd = STDIN_FILENO;
452 filename = _("standard input");
453 xset_binary_mode (STDIN_FILENO, O_BINARY);
455 else
457 fd = open (filename, O_RDONLY | O_BINARY);
458 if (fd < 0)
460 error (0, errno, _("failed to open %s for reading"),
461 quoteaf (filename));
462 return false;
466 file_size = lseek (fd, 0, SEEK_END);
468 ok = (file_size < 0 || isatty (fd)
469 ? tac_nonseekable (fd, filename)
470 : tac_seekable (fd, filename, file_size));
472 if (!is_stdin && close (fd) != 0)
474 error (0, errno, _("%s: read error"), quotef (filename));
475 ok = false;
477 return ok;
481 main (int argc, char **argv)
483 char const *error_message; /* Return value from re_compile_pattern. */
484 int optc;
485 bool ok;
486 size_t half_buffer_size;
488 /* Initializer for file_list if no file-arguments
489 were specified on the command line. */
490 static char const *const default_file_list[] = {"-", nullptr};
491 char const *const *file;
493 initialize_main (&argc, &argv);
494 set_program_name (argv[0]);
495 setlocale (LC_ALL, "");
496 bindtextdomain (PACKAGE, LOCALEDIR);
497 textdomain (PACKAGE);
499 atexit (close_stdout);
501 separator = "\n";
502 sentinel_length = 1;
503 separator_ends_record = true;
505 while ((optc = getopt_long (argc, argv, "brs:", longopts, nullptr)) != -1)
507 switch (optc)
509 case 'b':
510 separator_ends_record = false;
511 break;
512 case 'r':
513 sentinel_length = 0;
514 break;
515 case 's':
516 separator = optarg;
517 break;
518 case_GETOPT_HELP_CHAR;
519 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
520 default:
521 usage (EXIT_FAILURE);
525 if (sentinel_length == 0)
527 if (*separator == 0)
528 error (EXIT_FAILURE, 0, _("separator cannot be empty"));
530 compiled_separator.buffer = nullptr;
531 compiled_separator.allocated = 0;
532 compiled_separator.fastmap = compiled_separator_fastmap;
533 compiled_separator.translate = nullptr;
534 error_message = re_compile_pattern (separator, strlen (separator),
535 &compiled_separator);
536 if (error_message)
537 error (EXIT_FAILURE, 0, "%s", (error_message));
539 else
540 match_length = sentinel_length = *separator ? strlen (separator) : 1;
542 read_size = INITIAL_READSIZE;
543 while (sentinel_length >= read_size / 2)
545 if (SIZE_MAX / 2 < read_size)
546 xalloc_die ();
547 read_size *= 2;
549 half_buffer_size = read_size + sentinel_length + 1;
550 G_buffer_size = 2 * half_buffer_size;
551 if (! (read_size < half_buffer_size && half_buffer_size < G_buffer_size))
552 xalloc_die ();
553 G_buffer = xmalloc (G_buffer_size);
554 if (sentinel_length)
556 memcpy (G_buffer, separator, sentinel_length + 1);
557 G_buffer += sentinel_length;
559 else
561 ++G_buffer;
564 file = (optind < argc
565 ? (char const *const *) &argv[optind]
566 : default_file_list);
568 xset_binary_mode (STDOUT_FILENO, O_BINARY);
571 ok = true;
572 for (size_t i = 0; file[i]; ++i)
573 ok &= tac_file (file[i]);
576 /* Flush the output buffer. */
577 output ((char *) nullptr, (char *) nullptr);
579 if (have_read_stdin && close (STDIN_FILENO) < 0)
581 error (0, errno, "-");
582 ok = false;
585 main_exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);