maint: prefer C23-style nullptr
[coreutils.git] / src / csplit.c
blob8104bab95dc33ee98dd0588329e278c40fff4a32
1 /* csplit - split a file into sections determined by context lines
2 Copyright (C) 1991-2023 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
18 Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
20 #include <config.h>
22 #include <assert.h>
23 #include <getopt.h>
24 #include <sys/types.h>
25 #include <signal.h>
27 #include "system.h"
29 #include <regex.h>
31 #include "die.h"
32 #include "error.h"
33 #include "fd-reopen.h"
34 #include "idx.h"
35 #include "quote.h"
36 #include "safe-read.h"
37 #include "stdio--.h"
38 #include "xdectoint.h"
39 #include "xstrtol.h"
41 /* The official name of this program (e.g., no 'g' prefix). */
42 #define PROGRAM_NAME "csplit"
44 #define AUTHORS \
45 proper_name ("Stuart Kemp"), \
46 proper_name ("David MacKenzie")
48 /* The default prefix for output file names. */
49 #define DEFAULT_PREFIX "xx"
51 /* A compiled pattern arg. */
52 struct control
54 intmax_t offset; /* Offset from regexp to split at. */
55 intmax_t lines_required; /* Number of lines required. */
56 intmax_t repeat; /* Repeat count. */
57 int argnum; /* ARGV index. */
58 bool repeat_forever; /* True if '*' used as a repeat count. */
59 bool ignore; /* If true, produce no output (for regexp). */
60 bool regexpr; /* True if regular expression was used. */
61 struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
64 /* Initial size of data area in buffers. */
65 #define START_SIZE 8191
67 /* Number of lines kept in each node in line list. */
68 #define CTRL_SIZE 80
70 #ifdef DEBUG
71 /* Some small values to test the algorithms. */
72 # define START_SIZE 200
73 # define CTRL_SIZE 1
74 #endif
76 /* A string with a length count. */
77 struct cstring
79 idx_t len;
80 char *str;
83 /* Pointers to the beginnings of lines in the buffer area.
84 These structures are linked together if needed. */
85 struct line
87 idx_t used; /* Number of offsets used in this struct. */
88 idx_t insert_index; /* Next offset to use when inserting line. */
89 idx_t retrieve_index; /* Next index to use when retrieving line. */
90 struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
91 struct line *next; /* Next in linked list. */
94 /* The structure to hold the input lines.
95 Contains a pointer to the data area and a list containing
96 pointers to the individual lines. */
97 struct buffer_record
99 idx_t bytes_alloc; /* Size of the buffer area. */
100 idx_t bytes_used; /* Bytes used in the buffer area. */
101 intmax_t start_line; /* First line number in this buffer. */
102 intmax_t first_available; /* First line that can be retrieved. */
103 idx_t num_lines; /* Number of complete lines in this buffer. */
104 char *buffer; /* Data area. */
105 struct line *line_start; /* Head of list of pointers to lines. */
106 struct line *curr_line; /* The line start record currently in use. */
107 struct buffer_record *next;
110 static void close_output_file (void);
111 static void create_output_file (void);
112 static void delete_all_files (bool);
113 static void save_line_to_file (const struct cstring *line);
115 /* Start of buffer list. */
116 static struct buffer_record *head = nullptr;
118 /* Partially read line. */
119 static char *hold_area = nullptr;
121 /* Number of bytes in 'hold_area'. */
122 static idx_t hold_count = 0;
124 /* Number of the last line in the buffers. */
125 static intmax_t last_line_number = 0;
127 /* Number of the line currently being examined. */
128 static intmax_t current_line = 0;
130 /* If true, we have read EOF. */
131 static bool have_read_eof = false;
133 /* Name of output files. */
134 static char *volatile filename_space = nullptr;
136 /* Prefix part of output file names. */
137 static char const *volatile prefix = nullptr;
139 /* Suffix part of output file names. */
140 static char *volatile suffix = nullptr;
142 /* Number of digits to use in output file names. */
143 static int volatile digits = 2;
145 /* Number of files created so far. */
146 static int volatile files_created = 0;
148 /* Number of bytes written to current file. */
149 static intmax_t bytes_written;
151 /* Output file pointer. */
152 static FILE *output_stream = nullptr;
154 /* Output file name. */
155 static char *output_filename = nullptr;
157 /* Perhaps it would be cleaner to pass arg values instead of indexes. */
158 static char **global_argv;
160 /* If true, do not print the count of bytes in each output file. */
161 static bool suppress_count;
163 /* If true, remove output files on error. */
164 static bool volatile remove_files;
166 /* If true, remove all output files which have a zero length. */
167 static bool elide_empty_files;
169 /* If true, suppress the lines that match the PATTERN */
170 static bool suppress_matched;
172 /* The compiled pattern arguments, which determine how to split
173 the input file. */
174 static struct control *controls;
176 /* Number of elements in 'controls'. */
177 static idx_t control_used;
179 /* The set of signals that are caught. */
180 static sigset_t caught_signals;
182 /* For long options that have no equivalent short option, use a
183 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
184 enum
186 SUPPRESS_MATCHED_OPTION = CHAR_MAX + 1
189 static struct option const longopts[] =
191 {"digits", required_argument, nullptr, 'n'},
192 {"quiet", no_argument, nullptr, 'q'},
193 {"silent", no_argument, nullptr, 's'},
194 {"keep-files", no_argument, nullptr, 'k'},
195 {"elide-empty-files", no_argument, nullptr, 'z'},
196 {"prefix", required_argument, nullptr, 'f'},
197 {"suffix-format", required_argument, nullptr, 'b'},
198 {"suppress-matched", no_argument, nullptr, SUPPRESS_MATCHED_OPTION},
199 {GETOPT_HELP_OPTION_DECL},
200 {GETOPT_VERSION_OPTION_DECL},
201 {nullptr, 0, nullptr, 0}
204 /* Optionally remove files created so far; then exit.
205 Called when an error detected. */
207 static void
208 cleanup (void)
210 sigset_t oldset;
212 close_output_file ();
214 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
215 delete_all_files (false);
216 sigprocmask (SIG_SETMASK, &oldset, nullptr);
219 static _Noreturn void
220 cleanup_fatal (void)
222 cleanup ();
223 exit (EXIT_FAILURE);
226 extern void
227 xalloc_die (void)
229 error (0, 0, "%s", _("memory exhausted"));
230 cleanup_fatal ();
233 static void
234 interrupt_handler (int sig)
236 delete_all_files (true);
237 signal (sig, SIG_DFL);
238 /* The signal has been reset to SIG_DFL, but blocked during this
239 handler. Force the default action of this signal once the
240 handler returns and the block is removed. */
241 raise (sig);
244 /* Keep track of NUM bytes of a partial line in buffer START.
245 These bytes will be retrieved later when another large buffer is read. */
247 static void
248 save_to_hold_area (char *start, idx_t num)
250 free (hold_area);
251 hold_area = start;
252 hold_count = num;
255 /* Read up to MAX_N_BYTES bytes from the input stream into DEST.
256 Return the number of bytes read. */
258 static idx_t
259 read_input (char *dest, idx_t max_n_bytes)
261 idx_t bytes_read;
263 if (max_n_bytes == 0)
264 return 0;
266 bytes_read = safe_read (STDIN_FILENO, dest, max_n_bytes);
268 if (bytes_read == 0)
269 have_read_eof = true;
271 if (bytes_read == SAFE_READ_ERROR)
273 error (0, errno, _("read error"));
274 cleanup_fatal ();
277 return bytes_read;
280 /* Initialize existing line record P. */
282 static void
283 clear_line_control (struct line *p)
285 p->used = 0;
286 p->insert_index = 0;
287 p->retrieve_index = 0;
290 /* Return a new, initialized line record. */
292 static struct line *
293 new_line_control (void)
295 struct line *p = xmalloc (sizeof *p);
297 p->next = nullptr;
298 clear_line_control (p);
300 return p;
303 /* Record LINE_START, which is the address of the start of a line
304 of length LINE_LEN in the large buffer, in the lines buffer of B. */
306 static void
307 keep_new_line (struct buffer_record *b, char *line_start, idx_t line_len)
309 struct line *l;
311 /* If there is no existing area to keep line info, get some. */
312 if (b->line_start == nullptr)
313 b->line_start = b->curr_line = new_line_control ();
315 /* If existing area for lines is full, get more. */
316 if (b->curr_line->used == CTRL_SIZE)
318 b->curr_line->next = new_line_control ();
319 b->curr_line = b->curr_line->next;
322 l = b->curr_line;
324 /* Record the start of the line, and update counters. */
325 l->starts[l->insert_index].str = line_start;
326 l->starts[l->insert_index].len = line_len;
327 l->used++;
328 l->insert_index++;
331 /* Scan the buffer in B for newline characters
332 and record the line start locations and lengths in B.
333 Return the number of lines found in this buffer.
335 There may be an incomplete line at the end of the buffer;
336 a pointer is kept to this area, which will be used when
337 the next buffer is filled. */
339 static idx_t
340 record_line_starts (struct buffer_record *b)
342 char *line_start; /* Start of current line. */
343 idx_t lines; /* Number of lines found. */
344 idx_t line_length; /* Length of each line found. */
346 if (b->bytes_used == 0)
347 return 0;
349 lines = 0;
350 line_start = b->buffer;
351 char *buffer_end = line_start + b->bytes_used;
352 *buffer_end = '\n';
354 while (true)
356 char *line_end = rawmemchr (line_start, '\n');
357 if (line_end == buffer_end)
358 break;
359 line_length = line_end - line_start + 1;
360 keep_new_line (b, line_start, line_length);
361 line_start = line_end + 1;
362 lines++;
365 /* Check for an incomplete last line. */
366 idx_t bytes_left = buffer_end - line_start;
367 if (bytes_left)
369 if (have_read_eof)
371 keep_new_line (b, line_start, bytes_left);
372 lines++;
374 else
375 save_to_hold_area (ximemdup (line_start, bytes_left), bytes_left);
378 b->num_lines = lines;
379 b->first_available = b->start_line = last_line_number + 1;
380 last_line_number += lines;
382 return lines;
385 /* Work around <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109614>. */
386 #if 13 <= __GNUC__
387 # pragma GCC diagnostic ignored "-Wanalyzer-mismatching-deallocation"
388 # pragma GCC diagnostic ignored "-Wanalyzer-use-after-free"
389 # pragma GCC diagnostic ignored "-Wanalyzer-use-of-uninitialized-value"
390 #endif
392 static void
393 free_buffer (struct buffer_record *buf)
395 for (struct line *l = buf->line_start; l;)
397 struct line *n = l->next;
398 free (l);
399 l = n;
401 free (buf->buffer);
402 free (buf);
405 /* Return a new buffer of at least MINSIZE bytes. */
407 static ATTRIBUTE_DEALLOC (free_buffer, 1)
408 struct buffer_record *
409 get_new_buffer (idx_t min_size)
411 struct buffer_record *new_buffer = xmalloc (sizeof *new_buffer);
412 new_buffer->bytes_alloc = 0;
413 new_buffer->buffer = xpalloc (nullptr, &new_buffer->bytes_alloc, min_size,
414 -1, 1);
415 new_buffer->bytes_used = 0;
416 new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
417 new_buffer->num_lines = 0;
418 new_buffer->line_start = new_buffer->curr_line = nullptr;
419 new_buffer->next = nullptr;
421 return new_buffer;
424 /* Append buffer BUF to the linked list of buffers that contain
425 some data yet to be processed. */
427 static void
428 save_buffer (struct buffer_record *buf)
430 struct buffer_record *p;
432 buf->next = nullptr;
433 buf->curr_line = buf->line_start;
435 if (head == nullptr)
436 head = buf;
437 else
439 for (p = head; p->next; p = p->next)
440 /* Do nothing. */ ;
441 p->next = buf;
445 /* Fill a buffer of input.
447 Set the initial size of the buffer to a default.
448 Fill the buffer (from the hold area and input stream)
449 and find the individual lines.
450 If no lines are found (the buffer is too small to hold the next line),
451 release the current buffer (whose contents would have been put in the
452 hold area) and repeat the process with another large buffer until at least
453 one entire line has been read.
455 Return true if a new buffer was obtained, otherwise false
456 (in which case end-of-file must have been encountered). */
458 static bool
459 load_buffer (void)
461 if (have_read_eof)
462 return false;
464 /* We must make the buffer at least as large as the amount of data
465 in the partial line left over from the last call,
466 plus room for a sentinel '\n'. */
467 idx_t bytes_wanted = MAX (START_SIZE, hold_count + 1);
469 while (true)
471 struct buffer_record *b = get_new_buffer (bytes_wanted);
472 idx_t bytes_alloc = b->bytes_alloc;
473 idx_t bytes_avail = bytes_alloc;
474 char *p = b->buffer;
476 /* First check the 'holding' area for a partial line. */
477 if (hold_count)
479 p = mempcpy (p, hold_area, hold_count);
480 b->bytes_used += hold_count;
481 bytes_avail -= hold_count;
482 hold_count = 0;
485 b->bytes_used += read_input (p, bytes_avail - 1);
487 if (record_line_starts (b) != 0)
489 save_buffer (b);
490 return true;
493 free_buffer (b);
494 if (have_read_eof)
495 return false;
496 if (INT_ADD_WRAPV (bytes_alloc, bytes_alloc >> 1, &bytes_wanted))
497 xalloc_die ();
501 /* Return the line number of the first line that has not yet been retrieved. */
503 static intmax_t
504 get_first_line_in_buffer (void)
506 if (head == nullptr && !load_buffer ())
507 die (EXIT_FAILURE, errno, _("input disappeared"));
509 return head->first_available;
512 /* Return a pointer to the logical first line in the buffer and make the
513 next line the logical first line.
514 Return nullptr if there is no more input. */
516 static struct cstring *
517 remove_line (void)
519 /* If non-null, this is the buffer for which the previous call
520 returned the final line. So now, presuming that line has been
521 processed, we can free the buffer and reset this pointer. */
522 static struct buffer_record *prev_buf = nullptr;
524 struct cstring *line; /* Return value. */
525 struct line *l; /* For convenience. */
527 if (prev_buf)
529 free_buffer (prev_buf);
530 prev_buf = nullptr;
533 if (head == nullptr && !load_buffer ())
534 return nullptr;
536 if (current_line < head->first_available)
537 current_line = head->first_available;
539 ++(head->first_available);
541 l = head->curr_line;
543 line = &l->starts[l->retrieve_index];
545 /* Advance index to next line. */
546 if (++l->retrieve_index == l->used)
548 /* Go on to the next line record. */
549 head->curr_line = l->next;
550 if (head->curr_line == nullptr || head->curr_line->used == 0)
552 /* Go on to the next data block.
553 but first record the current one so we can free it
554 once the line we're returning has been processed. */
555 prev_buf = head;
556 head = head->next;
560 return line;
563 /* Search the buffers for line LINENUM, reading more input if necessary.
564 Return a pointer to the line, or nullptr if it is not found in the file. */
566 static struct cstring *
567 find_line (intmax_t linenum)
569 struct buffer_record *b;
571 if (head == nullptr && !load_buffer ())
572 return nullptr;
574 if (linenum < head->start_line)
575 return nullptr;
577 for (b = head;;)
579 assert (b);
580 if (linenum < b->start_line + b->num_lines)
582 /* The line is in this buffer. */
583 struct line *l;
584 idx_t offset; /* How far into the buffer the line is. */
586 l = b->line_start;
587 offset = linenum - b->start_line;
588 /* Find the control record. */
589 while (offset >= CTRL_SIZE)
591 l = l->next;
592 offset -= CTRL_SIZE;
594 return &l->starts[offset];
596 if (b->next == nullptr && !load_buffer ())
597 return nullptr;
598 b = b->next; /* Try the next data block. */
602 /* Return true if at least one more line is available for input. */
604 static bool
605 no_more_lines (void)
607 return find_line (current_line + 1) == nullptr;
610 /* Open NAME as standard input. */
612 static void
613 set_input_file (char const *name)
615 if (! STREQ (name, "-") && fd_reopen (STDIN_FILENO, name, O_RDONLY, 0) < 0)
616 die (EXIT_FAILURE, errno, _("cannot open %s for reading"),
617 quoteaf (name));
620 /* Write all lines from the beginning of the buffer up to, but
621 not including, line LAST_LINE, to the current output file.
622 If IGNORE is true, do not output lines selected here.
623 ARGNUM is the index in ARGV of the current pattern. */
625 static void
626 write_to_file (intmax_t last_line, bool ignore, int argnum)
628 struct cstring *line;
629 intmax_t first_line; /* First available input line. */
630 intmax_t lines; /* Number of lines to output. */
631 intmax_t i;
633 first_line = get_first_line_in_buffer ();
635 if (first_line > last_line)
637 error (0, 0, _("%s: line number out of range"),
638 quote (global_argv[argnum]));
639 cleanup_fatal ();
642 lines = last_line - first_line;
644 for (i = 0; i < lines; i++)
646 line = remove_line ();
647 if (line == nullptr)
649 error (0, 0, _("%s: line number out of range"),
650 quote (global_argv[argnum]));
651 cleanup_fatal ();
653 if (!ignore)
654 save_line_to_file (line);
658 /* Output any lines left after all regexps have been processed. */
660 static void
661 dump_rest_of_file (void)
663 struct cstring *line;
665 while ((line = remove_line ()) != nullptr)
666 save_line_to_file (line);
669 /* Handle an attempt to read beyond EOF under the control of record P,
670 on iteration REPETITION if nonzero. */
672 static void
673 handle_line_error (const struct control *p, intmax_t repetition)
675 char buf[INT_BUFSIZE_BOUND (intmax_t)];
677 fprintf (stderr, _("%s: %s: line number out of range"),
678 program_name, quote (imaxtostr (p->lines_required, buf)));
679 if (repetition)
680 fprintf (stderr, _(" on repetition %s\n"), imaxtostr (repetition, buf));
681 else
682 fprintf (stderr, "\n");
684 cleanup_fatal ();
687 /* Determine the line number that marks the end of this file,
688 then get those lines and save them to the output file.
689 P is the control record.
690 REPETITION is the repetition number. */
692 static void
693 process_line_count (const struct control *p, intmax_t repetition)
695 intmax_t linenum;
696 intmax_t last_line_to_save = p->lines_required * (repetition + 1);
698 create_output_file ();
700 /* Ensure that the line number specified is not 1 greater than
701 the number of lines in the file.
702 When suppressing matched lines, check before the loop. */
703 if (no_more_lines () && suppress_matched)
704 handle_line_error (p, repetition);
706 linenum = get_first_line_in_buffer ();
707 while (linenum++ < last_line_to_save)
709 struct cstring *line = remove_line ();
710 if (line == nullptr)
711 handle_line_error (p, repetition);
712 save_line_to_file (line);
715 close_output_file ();
717 if (suppress_matched)
718 remove_line ();
720 /* Ensure that the line number specified is not 1 greater than
721 the number of lines in the file. */
722 if (no_more_lines () && !suppress_matched)
723 handle_line_error (p, repetition);
726 static void
727 regexp_error (struct control *p, intmax_t repetition, bool ignore)
729 fprintf (stderr, _("%s: %s: match not found"),
730 program_name, quote (global_argv[p->argnum]));
732 if (repetition)
734 char buf[INT_BUFSIZE_BOUND (intmax_t)];
735 fprintf (stderr, _(" on repetition %s\n"), imaxtostr (repetition, buf));
737 else
738 fprintf (stderr, "\n");
740 if (!ignore)
742 dump_rest_of_file ();
743 close_output_file ();
745 cleanup_fatal ();
748 /* Read the input until a line matches the regexp in P, outputting
749 it unless P->IGNORE is true.
750 REPETITION is this repeat-count; 0 means the first time. */
752 static void
753 process_regexp (struct control *p, intmax_t repetition)
755 struct cstring *line; /* From input file. */
756 idx_t line_len; /* To make "$" in regexps work. */
757 intmax_t break_line; /* First line number of next file. */
758 bool ignore = p->ignore; /* If true, skip this section. */
759 regoff_t ret;
761 if (!ignore)
762 create_output_file ();
764 /* If there is no offset for the regular expression, or
765 it is positive, then it is not necessary to buffer the lines. */
767 if (p->offset >= 0)
769 while (true)
771 line = find_line (++current_line);
772 if (line == nullptr)
774 if (p->repeat_forever)
776 if (!ignore)
778 dump_rest_of_file ();
779 close_output_file ();
781 exit (EXIT_SUCCESS);
783 else
784 regexp_error (p, repetition, ignore);
786 line_len = line->len;
787 if (line->str[line_len - 1] == '\n')
788 line_len--;
789 ret = re_search (&p->re_compiled, line->str, line_len,
790 0, line_len, nullptr);
791 if (ret == -2)
793 error (0, 0, _("error in regular expression search"));
794 cleanup_fatal ();
796 if (ret == -1)
798 line = remove_line ();
799 if (!ignore)
800 save_line_to_file (line);
802 else
803 break;
806 else
808 /* Buffer the lines. */
809 while (true)
811 line = find_line (++current_line);
812 if (line == nullptr)
814 if (p->repeat_forever)
816 if (!ignore)
818 dump_rest_of_file ();
819 close_output_file ();
821 exit (EXIT_SUCCESS);
823 else
824 regexp_error (p, repetition, ignore);
826 line_len = line->len;
827 if (line->str[line_len - 1] == '\n')
828 line_len--;
829 ret = re_search (&p->re_compiled, line->str, line_len,
830 0, line_len, nullptr);
831 if (ret == -2)
833 error (0, 0, _("error in regular expression search"));
834 cleanup_fatal ();
836 if (ret != -1)
837 break;
841 /* Account for any offset from this regexp. */
842 break_line = current_line + p->offset;
844 write_to_file (break_line, ignore, p->argnum);
846 if (!ignore)
847 close_output_file ();
849 if (p->offset > 0)
850 current_line = break_line;
852 if (suppress_matched)
853 remove_line ();
856 /* Split the input file according to the control records we have built. */
858 static void
859 split_file (void)
861 for (idx_t i = 0; i < control_used; i++)
863 intmax_t j;
864 if (controls[i].regexpr)
866 for (j = 0; (controls[i].repeat_forever
867 || j <= controls[i].repeat); j++)
868 process_regexp (&controls[i], j);
870 else
872 for (j = 0; (controls[i].repeat_forever
873 || j <= controls[i].repeat); j++)
874 process_line_count (&controls[i], j);
878 create_output_file ();
879 dump_rest_of_file ();
880 close_output_file ();
883 /* Return the name of output file number NUM.
885 This function is called from a signal handler, so it should invoke
886 only reentrant functions that are async-signal-safe. POSIX does
887 not guarantee this for the functions called below, but we don't
888 know of any hosts where this implementation isn't safe. */
890 static char *
891 make_filename (int num)
893 strcpy (filename_space, prefix);
894 if (suffix)
895 sprintf (filename_space + strlen (prefix), suffix, num);
896 else
897 sprintf (filename_space + strlen (prefix), "%0*d", digits, num);
898 return filename_space;
901 /* Create the next output file. */
903 static void
904 create_output_file (void)
906 int nfiles = files_created;
907 bool fopen_ok;
908 int fopen_errno;
910 output_filename = make_filename (nfiles);
912 if (nfiles == INT_MAX)
914 fopen_ok = false;
915 fopen_errno = EOVERFLOW;
917 else
919 /* Create the output file in a critical section, to avoid races. */
920 sigset_t oldset;
921 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
922 output_stream = fopen (output_filename, "w");
923 fopen_ok = (output_stream != nullptr);
924 fopen_errno = errno;
925 files_created = nfiles + fopen_ok;
926 sigprocmask (SIG_SETMASK, &oldset, nullptr);
929 if (! fopen_ok)
931 error (0, fopen_errno, "%s", quotef (output_filename));
932 cleanup_fatal ();
934 bytes_written = 0;
937 /* If requested, delete all the files we have created. This function
938 must be called only from critical sections. */
940 static void
941 delete_all_files (bool in_signal_handler)
943 if (! remove_files)
944 return;
946 for (int i = files_created; 0 <= --i; )
948 char const *name = make_filename (i);
949 if (unlink (name) != 0 && errno != ENOENT && !in_signal_handler)
950 error (0, errno, "%s", quotef (name));
953 files_created = 0;
956 /* Close the current output file and print the count
957 of characters in this file. */
959 static void
960 close_output_file (void)
962 if (output_stream)
964 if (ferror (output_stream))
966 error (0, 0, _("write error for %s"), quoteaf (output_filename));
967 output_stream = nullptr;
968 cleanup_fatal ();
970 if (fclose (output_stream) != 0)
972 error (0, errno, "%s", quotef (output_filename));
973 output_stream = nullptr;
974 cleanup_fatal ();
976 if (bytes_written == 0 && elide_empty_files)
978 sigset_t oldset;
979 bool unlink_ok;
980 int unlink_errno;
982 /* Remove the output file in a critical section, to avoid races. */
983 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
984 unlink_ok = (unlink (output_filename) == 0);
985 unlink_errno = errno;
986 files_created--;
987 sigprocmask (SIG_SETMASK, &oldset, nullptr);
989 if (! unlink_ok && unlink_errno != ENOENT)
990 error (0, unlink_errno, "%s", quotef (output_filename));
992 else
994 if (!suppress_count)
996 char buf[INT_BUFSIZE_BOUND (intmax_t)];
997 fprintf (stdout, "%s\n", imaxtostr (bytes_written, buf));
1000 output_stream = nullptr;
1004 /* Save line LINE to the output file and
1005 increment the character count for the current file. */
1007 static void
1008 save_line_to_file (const struct cstring *line)
1010 idx_t l = fwrite (line->str, sizeof (char), line->len, output_stream);
1011 if (l != line->len)
1013 error (0, errno, _("write error for %s"), quoteaf (output_filename));
1014 output_stream = nullptr;
1015 cleanup_fatal ();
1017 bytes_written += line->len;
1020 /* Return a new, initialized control record. */
1022 static struct control *
1023 new_control_record (void)
1025 static idx_t control_allocated = 0; /* Total space allocated. */
1026 struct control *p;
1028 if (control_used == control_allocated)
1029 controls = xpalloc (controls, &control_allocated, 1, -1, sizeof *controls);
1030 p = &controls[control_used++];
1031 p->regexpr = false;
1032 p->repeat = 0;
1033 p->repeat_forever = false;
1034 p->lines_required = 0;
1035 p->offset = 0;
1036 return p;
1039 /* Check if there is a numeric offset after a regular expression.
1040 STR is the entire command line argument.
1041 P is the control record for this regular expression.
1042 NUM is the numeric part of STR. */
1044 static void
1045 check_for_offset (struct control *p, char const *str, char const *num)
1047 if (xstrtoimax (num, nullptr, 10, &p->offset, "") != LONGINT_OK)
1048 die (EXIT_FAILURE, 0, _("%s: integer expected after delimiter"),
1049 quote (str));
1052 /* Given that the first character of command line arg STR is '{',
1053 make sure that the rest of the string is a valid repeat count
1054 and store its value in P.
1055 ARGNUM is the ARGV index of STR. */
1057 static void
1058 parse_repeat_count (int argnum, struct control *p, char *str)
1060 char *end;
1062 end = str + strlen (str) - 1;
1063 if (*end != '}')
1064 die (EXIT_FAILURE, 0, _("%s: '}' is required in repeat count"),
1065 quote (str));
1066 *end = '\0';
1068 if (str + 1 == end - 1 && *(str + 1) == '*')
1069 p->repeat_forever = true;
1070 else
1072 uintmax_t val;
1073 if (xstrtoumax (str + 1, nullptr, 10, &val, "") != LONGINT_OK
1074 || INTMAX_MAX < val)
1076 die (EXIT_FAILURE, 0,
1077 _("%s}: integer required between '{' and '}'"),
1078 quote (global_argv[argnum]));
1080 p->repeat = val;
1083 *end = '}';
1086 /* Extract the regular expression from STR and check for a numeric offset.
1087 STR should start with the regexp delimiter character.
1088 Return a new control record for the regular expression.
1089 ARGNUM is the ARGV index of STR.
1090 Unless IGNORE is true, mark these lines for output. */
1092 static struct control *
1093 extract_regexp (int argnum, bool ignore, char const *str)
1095 idx_t len; /* Number of bytes in this regexp. */
1096 char delim = *str;
1097 char const *closing_delim;
1098 struct control *p;
1099 char const *err;
1101 closing_delim = strrchr (str + 1, delim);
1102 if (closing_delim == nullptr)
1103 die (EXIT_FAILURE, 0,
1104 _("%s: closing delimiter '%c' missing"), str, delim);
1106 len = closing_delim - str - 1;
1107 p = new_control_record ();
1108 p->argnum = argnum;
1109 p->ignore = ignore;
1111 p->regexpr = true;
1112 p->re_compiled.buffer = nullptr;
1113 p->re_compiled.allocated = 0;
1114 p->re_compiled.fastmap = xmalloc (UCHAR_MAX + 1);
1115 p->re_compiled.translate = nullptr;
1116 re_syntax_options =
1117 RE_SYNTAX_POSIX_BASIC & ~RE_CONTEXT_INVALID_DUP & ~RE_NO_EMPTY_RANGES;
1118 err = re_compile_pattern (str + 1, len, &p->re_compiled);
1119 if (err)
1121 error (0, 0, _("%s: invalid regular expression: %s"), quote (str), err);
1122 cleanup_fatal ();
1125 if (closing_delim[1])
1126 check_for_offset (p, str, closing_delim + 1);
1128 return p;
1131 /* Extract the break patterns from args START through ARGC - 1 of ARGV.
1132 After each pattern, check if the next argument is a repeat count. */
1134 static void
1135 parse_patterns (int argc, int start, char **argv)
1137 struct control *p; /* New control record created. */
1138 static intmax_t last_val = 0;
1140 for (int i = start; i < argc; i++)
1142 if (*argv[i] == '/' || *argv[i] == '%')
1144 p = extract_regexp (i, *argv[i] == '%', argv[i]);
1146 else
1148 p = new_control_record ();
1149 p->argnum = i;
1151 uintmax_t val;
1152 if (xstrtoumax (argv[i], nullptr, 10, &val, "") != LONGINT_OK
1153 || INTMAX_MAX < val)
1154 die (EXIT_FAILURE, 0, _("%s: invalid pattern"), quote (argv[i]));
1155 if (val == 0)
1156 die (EXIT_FAILURE, 0,
1157 _("%s: line number must be greater than zero"), argv[i]);
1158 if (val < last_val)
1160 char buf[INT_BUFSIZE_BOUND (intmax_t)];
1161 die (EXIT_FAILURE, 0,
1162 _("line number %s is smaller than preceding line number, %s"),
1163 quote (argv[i]), imaxtostr (last_val, buf));
1166 if (val == last_val)
1167 error (0, 0,
1168 _("warning: line number %s is the same as preceding line number"),
1169 quote (argv[i]));
1171 last_val = val;
1173 p->lines_required = val;
1176 if (i + 1 < argc && *argv[i + 1] == '{')
1178 /* We have a repeat count. */
1179 i++;
1180 parse_repeat_count (i, p, argv[i]);
1187 /* Names for the printf format flags ' and #. These can be ORed together. */
1188 enum { FLAG_THOUSANDS = 1, FLAG_ALTERNATIVE = 2 };
1190 /* Scan the printf format flags in FORMAT, storing info about the
1191 flags into *FLAGS_PTR. Return the number of flags found. */
1192 static idx_t
1193 get_format_flags (char const *format, int *flags_ptr)
1195 int flags = 0;
1197 for (idx_t count = 0; ; count++)
1199 switch (format[count])
1201 case '-':
1202 case '0':
1203 break;
1205 case '\'':
1206 flags |= FLAG_THOUSANDS;
1207 break;
1209 case '#':
1210 flags |= FLAG_ALTERNATIVE;
1211 break;
1213 default:
1214 *flags_ptr = flags;
1215 return count;
1220 /* Check that the printf format conversion specifier *FORMAT is valid
1221 and compatible with FLAGS. Change it to 'd' if it is 'u',
1222 since the format will be used with a signed value. */
1223 static void
1224 check_format_conv_type (char *format, int flags)
1226 unsigned char ch = *format;
1227 int compatible_flags = FLAG_THOUSANDS;
1229 switch (ch)
1231 case 'd':
1232 case 'i':
1233 break;
1235 case 'u':
1236 *format = 'd';
1237 break;
1239 case 'o':
1240 case 'x':
1241 case 'X':
1242 compatible_flags = FLAG_ALTERNATIVE;
1243 break;
1245 case 0:
1246 die (EXIT_FAILURE, 0, _("missing conversion specifier in suffix"));
1248 default:
1249 if (isprint (ch))
1250 die (EXIT_FAILURE, 0,
1251 _("invalid conversion specifier in suffix: %c"), ch);
1252 else
1253 die (EXIT_FAILURE, 0,
1254 _("invalid conversion specifier in suffix: \\%.3o"), ch);
1257 if (flags & ~ compatible_flags)
1258 die (EXIT_FAILURE, 0,
1259 _("invalid flags in conversion specification: %%%c%c"),
1260 (flags & ~ compatible_flags & FLAG_ALTERNATIVE ? '#' : '\''), ch);
1263 /* Return the maximum number of bytes that can be generated by
1264 applying FORMAT to an int value. If the format is
1265 invalid, diagnose the problem and exit. */
1266 static idx_t
1267 max_out (char *format)
1269 bool percent = false;
1271 for (char *f = format; *f; f++)
1272 if (*f == '%' && *++f != '%')
1274 if (percent)
1275 die (EXIT_FAILURE, 0,
1276 _("too many %% conversion specifications in suffix"));
1277 percent = true;
1278 int flags;
1279 f += get_format_flags (f, &flags);
1280 while (ISDIGIT (*f))
1281 f++;
1282 if (*f == '.')
1283 while (ISDIGIT (*++f))
1284 continue;
1285 check_format_conv_type (f, flags);
1288 if (! percent)
1289 die (EXIT_FAILURE, 0,
1290 _("missing %% conversion specification in suffix"));
1292 int maxlen = snprintf (nullptr, 0, format, INT_MAX);
1293 if (maxlen < 0)
1294 xalloc_die ();
1295 return maxlen;
1299 main (int argc, char **argv)
1301 int optc;
1303 initialize_main (&argc, &argv);
1304 set_program_name (argv[0]);
1305 setlocale (LC_ALL, "");
1306 bindtextdomain (PACKAGE, LOCALEDIR);
1307 textdomain (PACKAGE);
1309 atexit (close_stdout);
1311 global_argv = argv;
1312 controls = nullptr;
1313 control_used = 0;
1314 suppress_count = false;
1315 remove_files = true;
1316 suppress_matched = false;
1317 prefix = DEFAULT_PREFIX;
1319 while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, nullptr))
1320 != -1)
1321 switch (optc)
1323 case 'f':
1324 prefix = optarg;
1325 break;
1327 case 'b':
1328 suffix = optarg;
1329 break;
1331 case 'k':
1332 remove_files = false;
1333 break;
1335 case 'n':
1336 digits = xdectoimax (optarg, 0, MIN (INT_MAX, IDX_MAX), "",
1337 _("invalid number"), 0);
1338 break;
1340 case 's':
1341 case 'q':
1342 suppress_count = true;
1343 break;
1345 case 'z':
1346 elide_empty_files = true;
1347 break;
1349 case SUPPRESS_MATCHED_OPTION:
1350 suppress_matched = true;
1351 break;
1353 case_GETOPT_HELP_CHAR;
1355 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1357 default:
1358 usage (EXIT_FAILURE);
1361 if (argc - optind < 2)
1363 if (argc <= optind)
1364 error (0, 0, _("missing operand"));
1365 else
1366 error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
1367 usage (EXIT_FAILURE);
1370 idx_t prefix_len = strlen (prefix);
1371 idx_t max_digit_string_len
1372 = (suffix
1373 ? max_out (suffix)
1374 : MAX (INT_STRLEN_BOUND (int), digits));
1375 idx_t filename_size;
1376 if (INT_ADD_WRAPV (prefix_len, max_digit_string_len + 1, &filename_size))
1377 xalloc_die ();
1378 filename_space = ximalloc (filename_size);
1380 set_input_file (argv[optind++]);
1382 parse_patterns (argc, optind, argv);
1385 int i;
1386 static int const sig[] =
1388 /* The usual suspects. */
1389 SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM,
1390 #ifdef SIGPOLL
1391 SIGPOLL,
1392 #endif
1393 #ifdef SIGPROF
1394 SIGPROF,
1395 #endif
1396 #ifdef SIGVTALRM
1397 SIGVTALRM,
1398 #endif
1399 #ifdef SIGXCPU
1400 SIGXCPU,
1401 #endif
1402 #ifdef SIGXFSZ
1403 SIGXFSZ,
1404 #endif
1406 enum { nsigs = ARRAY_CARDINALITY (sig) };
1408 struct sigaction act;
1410 sigemptyset (&caught_signals);
1411 for (i = 0; i < nsigs; i++)
1413 sigaction (sig[i], nullptr, &act);
1414 if (act.sa_handler != SIG_IGN)
1415 sigaddset (&caught_signals, sig[i]);
1418 act.sa_handler = interrupt_handler;
1419 act.sa_mask = caught_signals;
1420 act.sa_flags = 0;
1422 for (i = 0; i < nsigs; i++)
1423 if (sigismember (&caught_signals, sig[i]))
1424 sigaction (sig[i], &act, nullptr);
1427 split_file ();
1429 if (close (STDIN_FILENO) != 0)
1431 error (0, errno, _("read error"));
1432 cleanup_fatal ();
1435 return EXIT_SUCCESS;
1438 void
1439 usage (int status)
1441 if (status != EXIT_SUCCESS)
1442 emit_try_help ();
1443 else
1445 printf (_("\
1446 Usage: %s [OPTION]... FILE PATTERN...\n\
1448 program_name);
1449 fputs (_("\
1450 Output pieces of FILE separated by PATTERN(s) to files 'xx00', 'xx01', ...,\n\
1451 and output byte counts of each piece to standard output.\n\
1452 "), stdout);
1453 fputs (_("\
1455 Read standard input if FILE is -\n\
1456 "), stdout);
1458 emit_mandatory_arg_note ();
1460 fputs (_("\
1461 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %02d\n\
1462 -f, --prefix=PREFIX use PREFIX instead of 'xx'\n\
1463 -k, --keep-files do not remove output files on errors\n\
1464 "), stdout);
1465 fputs (_("\
1466 --suppress-matched suppress the lines matching PATTERN\n\
1467 "), stdout);
1468 fputs (_("\
1469 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1470 -s, --quiet, --silent do not print counts of output file sizes\n\
1471 -z, --elide-empty-files suppress empty output files\n\
1472 "), stdout);
1473 fputs (HELP_OPTION_DESCRIPTION, stdout);
1474 fputs (VERSION_OPTION_DESCRIPTION, stdout);
1475 fputs (_("\
1477 Each PATTERN may be:\n\
1478 INTEGER copy up to but not including specified line number\n\
1479 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1480 %REGEXP%[OFFSET] skip to, but not including a matching line\n\
1481 {INTEGER} repeat the previous pattern specified number of times\n\
1482 {*} repeat the previous pattern as many times as possible\n\
1484 A line OFFSET is an integer optionally preceded by '+' or '-'\n\
1485 "), stdout);
1486 emit_ancillary_info (PROGRAM_NAME);
1488 exit (status);