dd: output final progress before syncing
[coreutils.git] / src / csplit.c
blob8f21414e927b282b5ff48f50200c0e8f0ca56e18
1 /* csplit - split a file into sections determined by context lines
2 Copyright (C) 1991-2022 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
18 Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
20 #include <config.h>
22 #include <assert.h>
23 #include <getopt.h>
24 #include <sys/types.h>
25 #include <signal.h>
27 #include "system.h"
29 #include <regex.h>
31 #include "die.h"
32 #include "error.h"
33 #include "fd-reopen.h"
34 #include "idx.h"
35 #include "quote.h"
36 #include "safe-read.h"
37 #include "stdio--.h"
38 #include "xdectoint.h"
39 #include "xstrtol.h"
41 /* The official name of this program (e.g., no 'g' prefix). */
42 #define PROGRAM_NAME "csplit"
44 #define AUTHORS \
45 proper_name ("Stuart Kemp"), \
46 proper_name ("David MacKenzie")
48 /* The default prefix for output file names. */
49 #define DEFAULT_PREFIX "xx"
51 /* A compiled pattern arg. */
52 struct control
54 intmax_t offset; /* Offset from regexp to split at. */
55 intmax_t lines_required; /* Number of lines required. */
56 intmax_t repeat; /* Repeat count. */
57 int argnum; /* ARGV index. */
58 bool repeat_forever; /* True if '*' used as a repeat count. */
59 bool ignore; /* If true, produce no output (for regexp). */
60 bool regexpr; /* True if regular expression was used. */
61 struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
64 /* Initial size of data area in buffers. */
65 #define START_SIZE 8191
67 /* Number of lines kept in each node in line list. */
68 #define CTRL_SIZE 80
70 #ifdef DEBUG
71 /* Some small values to test the algorithms. */
72 # define START_SIZE 200
73 # define CTRL_SIZE 1
74 #endif
76 /* A string with a length count. */
77 struct cstring
79 idx_t len;
80 char *str;
83 /* Pointers to the beginnings of lines in the buffer area.
84 These structures are linked together if needed. */
85 struct line
87 idx_t used; /* Number of offsets used in this struct. */
88 idx_t insert_index; /* Next offset to use when inserting line. */
89 idx_t retrieve_index; /* Next index to use when retrieving line. */
90 struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
91 struct line *next; /* Next in linked list. */
94 /* The structure to hold the input lines.
95 Contains a pointer to the data area and a list containing
96 pointers to the individual lines. */
97 struct buffer_record
99 idx_t bytes_alloc; /* Size of the buffer area. */
100 idx_t bytes_used; /* Bytes used in the buffer area. */
101 intmax_t start_line; /* First line number in this buffer. */
102 intmax_t first_available; /* First line that can be retrieved. */
103 idx_t num_lines; /* Number of complete lines in this buffer. */
104 char *buffer; /* Data area. */
105 struct line *line_start; /* Head of list of pointers to lines. */
106 struct line *curr_line; /* The line start record currently in use. */
107 struct buffer_record *next;
110 static void close_output_file (void);
111 static void create_output_file (void);
112 static void delete_all_files (bool);
113 static void save_line_to_file (const struct cstring *line);
115 /* Start of buffer list. */
116 static struct buffer_record *head = NULL;
118 /* Partially read line. */
119 static char *hold_area = NULL;
121 /* Number of bytes in 'hold_area'. */
122 static idx_t hold_count = 0;
124 /* Number of the last line in the buffers. */
125 static intmax_t last_line_number = 0;
127 /* Number of the line currently being examined. */
128 static intmax_t current_line = 0;
130 /* If true, we have read EOF. */
131 static bool have_read_eof = false;
133 /* Name of output files. */
134 static char *volatile filename_space = NULL;
136 /* Prefix part of output file names. */
137 static char const *volatile prefix = NULL;
139 /* Suffix part of output file names. */
140 static char *volatile suffix = NULL;
142 /* Number of digits to use in output file names. */
143 static int volatile digits = 2;
145 /* Number of files created so far. */
146 static int volatile files_created = 0;
148 /* Number of bytes written to current file. */
149 static intmax_t bytes_written;
151 /* Output file pointer. */
152 static FILE *output_stream = NULL;
154 /* Output file name. */
155 static char *output_filename = NULL;
157 /* Perhaps it would be cleaner to pass arg values instead of indexes. */
158 static char **global_argv;
160 /* If true, do not print the count of bytes in each output file. */
161 static bool suppress_count;
163 /* If true, remove output files on error. */
164 static bool volatile remove_files;
166 /* If true, remove all output files which have a zero length. */
167 static bool elide_empty_files;
169 /* If true, suppress the lines that match the PATTERN */
170 static bool suppress_matched;
172 /* The compiled pattern arguments, which determine how to split
173 the input file. */
174 static struct control *controls;
176 /* Number of elements in 'controls'. */
177 static idx_t control_used;
179 /* The set of signals that are caught. */
180 static sigset_t caught_signals;
182 /* For long options that have no equivalent short option, use a
183 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
184 enum
186 SUPPRESS_MATCHED_OPTION = CHAR_MAX + 1
189 static struct option const longopts[] =
191 {"digits", required_argument, NULL, 'n'},
192 {"quiet", no_argument, NULL, 'q'},
193 {"silent", no_argument, NULL, 's'},
194 {"keep-files", no_argument, NULL, 'k'},
195 {"elide-empty-files", no_argument, NULL, 'z'},
196 {"prefix", required_argument, NULL, 'f'},
197 {"suffix-format", required_argument, NULL, 'b'},
198 {"suppress-matched", no_argument, NULL, SUPPRESS_MATCHED_OPTION},
199 {GETOPT_HELP_OPTION_DECL},
200 {GETOPT_VERSION_OPTION_DECL},
201 {NULL, 0, NULL, 0}
204 /* Optionally remove files created so far; then exit.
205 Called when an error detected. */
207 static void
208 cleanup (void)
210 sigset_t oldset;
212 close_output_file ();
214 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
215 delete_all_files (false);
216 sigprocmask (SIG_SETMASK, &oldset, NULL);
219 static _Noreturn void
220 cleanup_fatal (void)
222 cleanup ();
223 exit (EXIT_FAILURE);
226 extern void
227 xalloc_die (void)
229 error (0, 0, "%s", _("memory exhausted"));
230 cleanup_fatal ();
233 static void
234 interrupt_handler (int sig)
236 delete_all_files (true);
237 signal (sig, SIG_DFL);
238 /* The signal has been reset to SIG_DFL, but blocked during this
239 handler. Force the default action of this signal once the
240 handler returns and the block is removed. */
241 raise (sig);
244 /* Keep track of NUM bytes of a partial line in buffer START.
245 These bytes will be retrieved later when another large buffer is read. */
247 static void
248 save_to_hold_area (char *start, idx_t num)
250 free (hold_area);
251 hold_area = start;
252 hold_count = num;
255 /* Read up to MAX_N_BYTES bytes from the input stream into DEST.
256 Return the number of bytes read. */
258 static idx_t
259 read_input (char *dest, idx_t max_n_bytes)
261 idx_t bytes_read;
263 if (max_n_bytes == 0)
264 return 0;
266 bytes_read = safe_read (STDIN_FILENO, dest, max_n_bytes);
268 if (bytes_read == 0)
269 have_read_eof = true;
271 if (bytes_read == SAFE_READ_ERROR)
273 error (0, errno, _("read error"));
274 cleanup_fatal ();
277 return bytes_read;
280 /* Initialize existing line record P. */
282 static void
283 clear_line_control (struct line *p)
285 p->used = 0;
286 p->insert_index = 0;
287 p->retrieve_index = 0;
290 /* Return a new, initialized line record. */
292 static struct line *
293 new_line_control (void)
295 struct line *p = xmalloc (sizeof *p);
297 p->next = NULL;
298 clear_line_control (p);
300 return p;
303 /* Record LINE_START, which is the address of the start of a line
304 of length LINE_LEN in the large buffer, in the lines buffer of B. */
306 static void
307 keep_new_line (struct buffer_record *b, char *line_start, idx_t line_len)
309 struct line *l;
311 /* If there is no existing area to keep line info, get some. */
312 if (b->line_start == NULL)
313 b->line_start = b->curr_line = new_line_control ();
315 /* If existing area for lines is full, get more. */
316 if (b->curr_line->used == CTRL_SIZE)
318 b->curr_line->next = new_line_control ();
319 b->curr_line = b->curr_line->next;
322 l = b->curr_line;
324 /* Record the start of the line, and update counters. */
325 l->starts[l->insert_index].str = line_start;
326 l->starts[l->insert_index].len = line_len;
327 l->used++;
328 l->insert_index++;
331 /* Scan the buffer in B for newline characters
332 and record the line start locations and lengths in B.
333 Return the number of lines found in this buffer.
335 There may be an incomplete line at the end of the buffer;
336 a pointer is kept to this area, which will be used when
337 the next buffer is filled. */
339 static idx_t
340 record_line_starts (struct buffer_record *b)
342 char *line_start; /* Start of current line. */
343 idx_t lines; /* Number of lines found. */
344 idx_t line_length; /* Length of each line found. */
346 if (b->bytes_used == 0)
347 return 0;
349 lines = 0;
350 line_start = b->buffer;
351 char *buffer_end = line_start + b->bytes_used;
352 *buffer_end = '\n';
354 while (true)
356 char *line_end = rawmemchr (line_start, '\n');
357 if (line_end == buffer_end)
358 break;
359 line_length = line_end - line_start + 1;
360 keep_new_line (b, line_start, line_length);
361 line_start = line_end + 1;
362 lines++;
365 /* Check for an incomplete last line. */
366 idx_t bytes_left = buffer_end - line_start;
367 if (bytes_left)
369 if (have_read_eof)
371 keep_new_line (b, line_start, bytes_left);
372 lines++;
374 else
375 save_to_hold_area (ximemdup (line_start, bytes_left), bytes_left);
378 b->num_lines = lines;
379 b->first_available = b->start_line = last_line_number + 1;
380 last_line_number += lines;
382 return lines;
385 static void
386 free_buffer (struct buffer_record *buf)
388 for (struct line *l = buf->line_start; l;)
390 struct line *n = l->next;
391 free (l);
392 l = n;
394 free (buf->buffer);
395 free (buf);
398 /* Return a new buffer of at least MINSIZE bytes. */
400 static ATTRIBUTE_DEALLOC (free_buffer, 1)
401 struct buffer_record *
402 get_new_buffer (idx_t min_size)
404 struct buffer_record *new_buffer = xmalloc (sizeof *new_buffer);
405 new_buffer->bytes_alloc = 0;
406 new_buffer->buffer = xpalloc (NULL, &new_buffer->bytes_alloc, min_size,
407 -1, 1);
408 new_buffer->bytes_used = 0;
409 new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
410 new_buffer->num_lines = 0;
411 new_buffer->line_start = new_buffer->curr_line = NULL;
412 new_buffer->next = NULL;
414 return new_buffer;
417 /* Append buffer BUF to the linked list of buffers that contain
418 some data yet to be processed. */
420 static void
421 save_buffer (struct buffer_record *buf)
423 struct buffer_record *p;
425 buf->next = NULL;
426 buf->curr_line = buf->line_start;
428 if (head == NULL)
429 head = buf;
430 else
432 for (p = head; p->next; p = p->next)
433 /* Do nothing. */ ;
434 p->next = buf;
438 /* Fill a buffer of input.
440 Set the initial size of the buffer to a default.
441 Fill the buffer (from the hold area and input stream)
442 and find the individual lines.
443 If no lines are found (the buffer is too small to hold the next line),
444 release the current buffer (whose contents would have been put in the
445 hold area) and repeat the process with another large buffer until at least
446 one entire line has been read.
448 Return true if a new buffer was obtained, otherwise false
449 (in which case end-of-file must have been encountered). */
451 static bool
452 load_buffer (void)
454 struct buffer_record *b;
455 idx_t bytes_wanted = START_SIZE; /* Minimum buffer size. */
456 idx_t bytes_avail; /* Size of new buffer created. */
457 idx_t lines_found; /* Number of lines in this new buffer. */
458 char *p; /* Place to load into buffer. */
460 if (have_read_eof)
461 return false;
463 /* We must make the buffer at least as large as the amount of data
464 in the partial line left over from the last call,
465 plus room for a sentinel '\n'. */
466 if (bytes_wanted <= hold_count)
467 bytes_wanted = hold_count + 1;
469 while (true)
471 b = get_new_buffer (bytes_wanted);
472 bytes_avail = b->bytes_alloc; /* Size of buffer returned. */
473 p = b->buffer;
475 /* First check the 'holding' area for a partial line. */
476 if (hold_count)
478 memcpy (p, hold_area, hold_count);
479 p += hold_count;
480 b->bytes_used += hold_count;
481 bytes_avail -= hold_count;
482 hold_count = 0;
485 b->bytes_used += read_input (p, bytes_avail - 1);
487 lines_found = record_line_starts (b);
489 if (lines_found || have_read_eof)
490 break;
492 if (INT_MULTIPLY_WRAPV (b->bytes_alloc, 2, &bytes_wanted))
493 xalloc_die ();
494 free_buffer (b);
497 if (lines_found)
498 save_buffer (b);
499 else
500 free_buffer (b);
502 return lines_found != 0;
505 /* Return the line number of the first line that has not yet been retrieved. */
507 static intmax_t
508 get_first_line_in_buffer (void)
510 if (head == NULL && !load_buffer ())
511 die (EXIT_FAILURE, errno, _("input disappeared"));
513 return head->first_available;
516 /* Return a pointer to the logical first line in the buffer and make the
517 next line the logical first line.
518 Return NULL if there is no more input. */
520 static struct cstring *
521 remove_line (void)
523 /* If non-NULL, this is the buffer for which the previous call
524 returned the final line. So now, presuming that line has been
525 processed, we can free the buffer and reset this pointer. */
526 static struct buffer_record *prev_buf = NULL;
528 struct cstring *line; /* Return value. */
529 struct line *l; /* For convenience. */
531 if (prev_buf)
533 free_buffer (prev_buf);
534 prev_buf = NULL;
537 if (head == NULL && !load_buffer ())
538 return NULL;
540 if (current_line < head->first_available)
541 current_line = head->first_available;
543 ++(head->first_available);
545 l = head->curr_line;
547 line = &l->starts[l->retrieve_index];
549 /* Advance index to next line. */
550 if (++l->retrieve_index == l->used)
552 /* Go on to the next line record. */
553 head->curr_line = l->next;
554 if (head->curr_line == NULL || head->curr_line->used == 0)
556 /* Go on to the next data block.
557 but first record the current one so we can free it
558 once the line we're returning has been processed. */
559 prev_buf = head;
560 head = head->next;
564 return line;
567 /* Search the buffers for line LINENUM, reading more input if necessary.
568 Return a pointer to the line, or NULL if it is not found in the file. */
570 static struct cstring *
571 find_line (intmax_t linenum)
573 struct buffer_record *b;
575 if (head == NULL && !load_buffer ())
576 return NULL;
578 if (linenum < head->start_line)
579 return NULL;
581 for (b = head;;)
583 assert (b);
584 if (linenum < b->start_line + b->num_lines)
586 /* The line is in this buffer. */
587 struct line *l;
588 idx_t offset; /* How far into the buffer the line is. */
590 l = b->line_start;
591 offset = linenum - b->start_line;
592 /* Find the control record. */
593 while (offset >= CTRL_SIZE)
595 l = l->next;
596 offset -= CTRL_SIZE;
598 return &l->starts[offset];
600 if (b->next == NULL && !load_buffer ())
601 return NULL;
602 b = b->next; /* Try the next data block. */
606 /* Return true if at least one more line is available for input. */
608 static bool
609 no_more_lines (void)
611 return find_line (current_line + 1) == NULL;
614 /* Open NAME as standard input. */
616 static void
617 set_input_file (char const *name)
619 if (! STREQ (name, "-") && fd_reopen (STDIN_FILENO, name, O_RDONLY, 0) < 0)
620 die (EXIT_FAILURE, errno, _("cannot open %s for reading"),
621 quoteaf (name));
624 /* Write all lines from the beginning of the buffer up to, but
625 not including, line LAST_LINE, to the current output file.
626 If IGNORE is true, do not output lines selected here.
627 ARGNUM is the index in ARGV of the current pattern. */
629 static void
630 write_to_file (intmax_t last_line, bool ignore, int argnum)
632 struct cstring *line;
633 intmax_t first_line; /* First available input line. */
634 intmax_t lines; /* Number of lines to output. */
635 intmax_t i;
637 first_line = get_first_line_in_buffer ();
639 if (first_line > last_line)
641 error (0, 0, _("%s: line number out of range"),
642 quote (global_argv[argnum]));
643 cleanup_fatal ();
646 lines = last_line - first_line;
648 for (i = 0; i < lines; i++)
650 line = remove_line ();
651 if (line == NULL)
653 error (0, 0, _("%s: line number out of range"),
654 quote (global_argv[argnum]));
655 cleanup_fatal ();
657 if (!ignore)
658 save_line_to_file (line);
662 /* Output any lines left after all regexps have been processed. */
664 static void
665 dump_rest_of_file (void)
667 struct cstring *line;
669 while ((line = remove_line ()) != NULL)
670 save_line_to_file (line);
673 /* Handle an attempt to read beyond EOF under the control of record P,
674 on iteration REPETITION if nonzero. */
676 static void
677 handle_line_error (const struct control *p, intmax_t repetition)
679 char buf[INT_BUFSIZE_BOUND (intmax_t)];
681 fprintf (stderr, _("%s: %s: line number out of range"),
682 program_name, quote (imaxtostr (p->lines_required, buf)));
683 if (repetition)
684 fprintf (stderr, _(" on repetition %s\n"), imaxtostr (repetition, buf));
685 else
686 fprintf (stderr, "\n");
688 cleanup_fatal ();
691 /* Determine the line number that marks the end of this file,
692 then get those lines and save them to the output file.
693 P is the control record.
694 REPETITION is the repetition number. */
696 static void
697 process_line_count (const struct control *p, intmax_t repetition)
699 intmax_t linenum;
700 intmax_t last_line_to_save = p->lines_required * (repetition + 1);
702 create_output_file ();
704 /* Ensure that the line number specified is not 1 greater than
705 the number of lines in the file.
706 When suppressing matched lines, check before the loop. */
707 if (no_more_lines () && suppress_matched)
708 handle_line_error (p, repetition);
710 linenum = get_first_line_in_buffer ();
711 while (linenum++ < last_line_to_save)
713 struct cstring *line = remove_line ();
714 if (line == NULL)
715 handle_line_error (p, repetition);
716 save_line_to_file (line);
719 close_output_file ();
721 if (suppress_matched)
722 remove_line ();
724 /* Ensure that the line number specified is not 1 greater than
725 the number of lines in the file. */
726 if (no_more_lines () && !suppress_matched)
727 handle_line_error (p, repetition);
730 static void
731 regexp_error (struct control *p, intmax_t repetition, bool ignore)
733 fprintf (stderr, _("%s: %s: match not found"),
734 program_name, quote (global_argv[p->argnum]));
736 if (repetition)
738 char buf[INT_BUFSIZE_BOUND (intmax_t)];
739 fprintf (stderr, _(" on repetition %s\n"), imaxtostr (repetition, buf));
741 else
742 fprintf (stderr, "\n");
744 if (!ignore)
746 dump_rest_of_file ();
747 close_output_file ();
749 cleanup_fatal ();
752 /* Read the input until a line matches the regexp in P, outputting
753 it unless P->IGNORE is true.
754 REPETITION is this repeat-count; 0 means the first time. */
756 static void
757 process_regexp (struct control *p, intmax_t repetition)
759 struct cstring *line; /* From input file. */
760 idx_t line_len; /* To make "$" in regexps work. */
761 intmax_t break_line; /* First line number of next file. */
762 bool ignore = p->ignore; /* If true, skip this section. */
763 regoff_t ret;
765 if (!ignore)
766 create_output_file ();
768 /* If there is no offset for the regular expression, or
769 it is positive, then it is not necessary to buffer the lines. */
771 if (p->offset >= 0)
773 while (true)
775 line = find_line (++current_line);
776 if (line == NULL)
778 if (p->repeat_forever)
780 if (!ignore)
782 dump_rest_of_file ();
783 close_output_file ();
785 exit (EXIT_SUCCESS);
787 else
788 regexp_error (p, repetition, ignore);
790 line_len = line->len;
791 if (line->str[line_len - 1] == '\n')
792 line_len--;
793 ret = re_search (&p->re_compiled, line->str, line_len,
794 0, line_len, NULL);
795 if (ret == -2)
797 error (0, 0, _("error in regular expression search"));
798 cleanup_fatal ();
800 if (ret == -1)
802 line = remove_line ();
803 if (!ignore)
804 save_line_to_file (line);
806 else
807 break;
810 else
812 /* Buffer the lines. */
813 while (true)
815 line = find_line (++current_line);
816 if (line == NULL)
818 if (p->repeat_forever)
820 if (!ignore)
822 dump_rest_of_file ();
823 close_output_file ();
825 exit (EXIT_SUCCESS);
827 else
828 regexp_error (p, repetition, ignore);
830 line_len = line->len;
831 if (line->str[line_len - 1] == '\n')
832 line_len--;
833 ret = re_search (&p->re_compiled, line->str, line_len,
834 0, line_len, NULL);
835 if (ret == -2)
837 error (0, 0, _("error in regular expression search"));
838 cleanup_fatal ();
840 if (ret != -1)
841 break;
845 /* Account for any offset from this regexp. */
846 break_line = current_line + p->offset;
848 write_to_file (break_line, ignore, p->argnum);
850 if (!ignore)
851 close_output_file ();
853 if (p->offset > 0)
854 current_line = break_line;
856 if (suppress_matched)
857 remove_line ();
860 /* Split the input file according to the control records we have built. */
862 static void
863 split_file (void)
865 for (idx_t i = 0; i < control_used; i++)
867 intmax_t j;
868 if (controls[i].regexpr)
870 for (j = 0; (controls[i].repeat_forever
871 || j <= controls[i].repeat); j++)
872 process_regexp (&controls[i], j);
874 else
876 for (j = 0; (controls[i].repeat_forever
877 || j <= controls[i].repeat); j++)
878 process_line_count (&controls[i], j);
882 create_output_file ();
883 dump_rest_of_file ();
884 close_output_file ();
887 /* Return the name of output file number NUM.
889 This function is called from a signal handler, so it should invoke
890 only reentrant functions that are async-signal-safe. POSIX does
891 not guarantee this for the functions called below, but we don't
892 know of any hosts where this implementation isn't safe. */
894 static char *
895 make_filename (int num)
897 strcpy (filename_space, prefix);
898 if (suffix)
899 sprintf (filename_space + strlen (prefix), suffix, num);
900 else
901 sprintf (filename_space + strlen (prefix), "%0*d", digits, num);
902 return filename_space;
905 /* Create the next output file. */
907 static void
908 create_output_file (void)
910 int nfiles = files_created;
911 bool fopen_ok;
912 int fopen_errno;
914 output_filename = make_filename (nfiles);
916 if (nfiles == INT_MAX)
918 fopen_ok = false;
919 fopen_errno = EOVERFLOW;
921 else
923 /* Create the output file in a critical section, to avoid races. */
924 sigset_t oldset;
925 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
926 output_stream = fopen (output_filename, "w");
927 fopen_ok = (output_stream != NULL);
928 fopen_errno = errno;
929 files_created = nfiles + fopen_ok;
930 sigprocmask (SIG_SETMASK, &oldset, NULL);
933 if (! fopen_ok)
935 error (0, fopen_errno, "%s", quotef (output_filename));
936 cleanup_fatal ();
938 bytes_written = 0;
941 /* If requested, delete all the files we have created. This function
942 must be called only from critical sections. */
944 static void
945 delete_all_files (bool in_signal_handler)
947 if (! remove_files)
948 return;
950 for (int i = files_created; 0 <= --i; )
952 char const *name = make_filename (i);
953 if (unlink (name) != 0 && errno != ENOENT && !in_signal_handler)
954 error (0, errno, "%s", quotef (name));
957 files_created = 0;
960 /* Close the current output file and print the count
961 of characters in this file. */
963 static void
964 close_output_file (void)
966 if (output_stream)
968 if (ferror (output_stream))
970 error (0, 0, _("write error for %s"), quoteaf (output_filename));
971 output_stream = NULL;
972 cleanup_fatal ();
974 if (fclose (output_stream) != 0)
976 error (0, errno, "%s", quotef (output_filename));
977 output_stream = NULL;
978 cleanup_fatal ();
980 if (bytes_written == 0 && elide_empty_files)
982 sigset_t oldset;
983 bool unlink_ok;
984 int unlink_errno;
986 /* Remove the output file in a critical section, to avoid races. */
987 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
988 unlink_ok = (unlink (output_filename) == 0);
989 unlink_errno = errno;
990 files_created--;
991 sigprocmask (SIG_SETMASK, &oldset, NULL);
993 if (! unlink_ok && unlink_errno != ENOENT)
994 error (0, unlink_errno, "%s", quotef (output_filename));
996 else
998 if (!suppress_count)
1000 char buf[INT_BUFSIZE_BOUND (intmax_t)];
1001 fprintf (stdout, "%s\n", imaxtostr (bytes_written, buf));
1004 output_stream = NULL;
1008 /* Save line LINE to the output file and
1009 increment the character count for the current file. */
1011 static void
1012 save_line_to_file (const struct cstring *line)
1014 idx_t l = fwrite (line->str, sizeof (char), line->len, output_stream);
1015 if (l != line->len)
1017 error (0, errno, _("write error for %s"), quoteaf (output_filename));
1018 output_stream = NULL;
1019 cleanup_fatal ();
1021 bytes_written += line->len;
1024 /* Return a new, initialized control record. */
1026 static struct control *
1027 new_control_record (void)
1029 static idx_t control_allocated = 0; /* Total space allocated. */
1030 struct control *p;
1032 if (control_used == control_allocated)
1033 controls = xpalloc (controls, &control_allocated, 1, -1, sizeof *controls);
1034 p = &controls[control_used++];
1035 p->regexpr = false;
1036 p->repeat = 0;
1037 p->repeat_forever = false;
1038 p->lines_required = 0;
1039 p->offset = 0;
1040 return p;
1043 /* Check if there is a numeric offset after a regular expression.
1044 STR is the entire command line argument.
1045 P is the control record for this regular expression.
1046 NUM is the numeric part of STR. */
1048 static void
1049 check_for_offset (struct control *p, char const *str, char const *num)
1051 if (xstrtoimax (num, NULL, 10, &p->offset, "") != LONGINT_OK)
1052 die (EXIT_FAILURE, 0, _("%s: integer expected after delimiter"),
1053 quote (str));
1056 /* Given that the first character of command line arg STR is '{',
1057 make sure that the rest of the string is a valid repeat count
1058 and store its value in P.
1059 ARGNUM is the ARGV index of STR. */
1061 static void
1062 parse_repeat_count (int argnum, struct control *p, char *str)
1064 char *end;
1066 end = str + strlen (str) - 1;
1067 if (*end != '}')
1068 die (EXIT_FAILURE, 0, _("%s: '}' is required in repeat count"),
1069 quote (str));
1070 *end = '\0';
1072 if (str + 1 == end - 1 && *(str + 1) == '*')
1073 p->repeat_forever = true;
1074 else
1076 uintmax_t val;
1077 if (xstrtoumax (str + 1, NULL, 10, &val, "") != LONGINT_OK
1078 || INTMAX_MAX < val)
1080 die (EXIT_FAILURE, 0,
1081 _("%s}: integer required between '{' and '}'"),
1082 quote (global_argv[argnum]));
1084 p->repeat = val;
1087 *end = '}';
1090 /* Extract the regular expression from STR and check for a numeric offset.
1091 STR should start with the regexp delimiter character.
1092 Return a new control record for the regular expression.
1093 ARGNUM is the ARGV index of STR.
1094 Unless IGNORE is true, mark these lines for output. */
1096 static struct control *
1097 extract_regexp (int argnum, bool ignore, char const *str)
1099 idx_t len; /* Number of bytes in this regexp. */
1100 char delim = *str;
1101 char const *closing_delim;
1102 struct control *p;
1103 char const *err;
1105 closing_delim = strrchr (str + 1, delim);
1106 if (closing_delim == NULL)
1107 die (EXIT_FAILURE, 0,
1108 _("%s: closing delimiter '%c' missing"), str, delim);
1110 len = closing_delim - str - 1;
1111 p = new_control_record ();
1112 p->argnum = argnum;
1113 p->ignore = ignore;
1115 p->regexpr = true;
1116 p->re_compiled.buffer = NULL;
1117 p->re_compiled.allocated = 0;
1118 p->re_compiled.fastmap = xmalloc (UCHAR_MAX + 1);
1119 p->re_compiled.translate = NULL;
1120 re_syntax_options =
1121 RE_SYNTAX_POSIX_BASIC & ~RE_CONTEXT_INVALID_DUP & ~RE_NO_EMPTY_RANGES;
1122 err = re_compile_pattern (str + 1, len, &p->re_compiled);
1123 if (err)
1125 error (0, 0, _("%s: invalid regular expression: %s"), quote (str), err);
1126 cleanup_fatal ();
1129 if (closing_delim[1])
1130 check_for_offset (p, str, closing_delim + 1);
1132 return p;
1135 /* Extract the break patterns from args START through ARGC - 1 of ARGV.
1136 After each pattern, check if the next argument is a repeat count. */
1138 static void
1139 parse_patterns (int argc, int start, char **argv)
1141 struct control *p; /* New control record created. */
1142 static intmax_t last_val = 0;
1144 for (int i = start; i < argc; i++)
1146 if (*argv[i] == '/' || *argv[i] == '%')
1148 p = extract_regexp (i, *argv[i] == '%', argv[i]);
1150 else
1152 p = new_control_record ();
1153 p->argnum = i;
1155 uintmax_t val;
1156 if (xstrtoumax (argv[i], NULL, 10, &val, "") != LONGINT_OK
1157 || INTMAX_MAX < val)
1158 die (EXIT_FAILURE, 0, _("%s: invalid pattern"), quote (argv[i]));
1159 if (val == 0)
1160 die (EXIT_FAILURE, 0,
1161 _("%s: line number must be greater than zero"), argv[i]);
1162 if (val < last_val)
1164 char buf[INT_BUFSIZE_BOUND (intmax_t)];
1165 die (EXIT_FAILURE, 0,
1166 _("line number %s is smaller than preceding line number, %s"),
1167 quote (argv[i]), imaxtostr (last_val, buf));
1170 if (val == last_val)
1171 error (0, 0,
1172 _("warning: line number %s is the same as preceding line number"),
1173 quote (argv[i]));
1175 last_val = val;
1177 p->lines_required = val;
1180 if (i + 1 < argc && *argv[i + 1] == '{')
1182 /* We have a repeat count. */
1183 i++;
1184 parse_repeat_count (i, p, argv[i]);
1191 /* Names for the printf format flags ' and #. These can be ORed together. */
1192 enum { FLAG_THOUSANDS = 1, FLAG_ALTERNATIVE = 2 };
1194 /* Scan the printf format flags in FORMAT, storing info about the
1195 flags into *FLAGS_PTR. Return the number of flags found. */
1196 static idx_t
1197 get_format_flags (char const *format, int *flags_ptr)
1199 int flags = 0;
1201 for (idx_t count = 0; ; count++)
1203 switch (format[count])
1205 case '-':
1206 case '0':
1207 break;
1209 case '\'':
1210 flags |= FLAG_THOUSANDS;
1211 break;
1213 case '#':
1214 flags |= FLAG_ALTERNATIVE;
1215 break;
1217 default:
1218 *flags_ptr = flags;
1219 return count;
1224 /* Check that the printf format conversion specifier *FORMAT is valid
1225 and compatible with FLAGS. Change it to 'd' if it is 'u',
1226 since the format will be used with a signed value. */
1227 static void
1228 check_format_conv_type (char *format, int flags)
1230 unsigned char ch = *format;
1231 int compatible_flags = FLAG_THOUSANDS;
1233 switch (ch)
1235 case 'd':
1236 case 'i':
1237 break;
1239 case 'u':
1240 *format = 'd';
1241 break;
1243 case 'o':
1244 case 'x':
1245 case 'X':
1246 compatible_flags = FLAG_ALTERNATIVE;
1247 break;
1249 case 0:
1250 die (EXIT_FAILURE, 0, _("missing conversion specifier in suffix"));
1252 default:
1253 if (isprint (ch))
1254 die (EXIT_FAILURE, 0,
1255 _("invalid conversion specifier in suffix: %c"), ch);
1256 else
1257 die (EXIT_FAILURE, 0,
1258 _("invalid conversion specifier in suffix: \\%.3o"), ch);
1261 if (flags & ~ compatible_flags)
1262 die (EXIT_FAILURE, 0,
1263 _("invalid flags in conversion specification: %%%c%c"),
1264 (flags & ~ compatible_flags & FLAG_ALTERNATIVE ? '#' : '\''), ch);
1267 /* Return the maximum number of bytes that can be generated by
1268 applying FORMAT to an int value. If the format is
1269 invalid, diagnose the problem and exit. */
1270 static idx_t
1271 max_out (char *format)
1273 bool percent = false;
1275 for (char *f = format; *f; f++)
1276 if (*f == '%' && *++f != '%')
1278 if (percent)
1279 die (EXIT_FAILURE, 0,
1280 _("too many %% conversion specifications in suffix"));
1281 percent = true;
1282 int flags;
1283 f += get_format_flags (f, &flags);
1284 while (ISDIGIT (*f))
1285 f++;
1286 if (*f == '.')
1287 while (ISDIGIT (*++f))
1288 continue;
1289 check_format_conv_type (f, flags);
1292 if (! percent)
1293 die (EXIT_FAILURE, 0,
1294 _("missing %% conversion specification in suffix"));
1296 int maxlen = snprintf (NULL, 0, format, INT_MAX);
1297 if (maxlen < 0)
1298 xalloc_die ();
1299 return maxlen;
1303 main (int argc, char **argv)
1305 int optc;
1307 initialize_main (&argc, &argv);
1308 set_program_name (argv[0]);
1309 setlocale (LC_ALL, "");
1310 bindtextdomain (PACKAGE, LOCALEDIR);
1311 textdomain (PACKAGE);
1313 atexit (close_stdout);
1315 global_argv = argv;
1316 controls = NULL;
1317 control_used = 0;
1318 suppress_count = false;
1319 remove_files = true;
1320 suppress_matched = false;
1321 prefix = DEFAULT_PREFIX;
1323 while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, NULL)) != -1)
1324 switch (optc)
1326 case 'f':
1327 prefix = optarg;
1328 break;
1330 case 'b':
1331 suffix = optarg;
1332 break;
1334 case 'k':
1335 remove_files = false;
1336 break;
1338 case 'n':
1339 digits = xdectoimax (optarg, 0, MIN (INT_MAX, IDX_MAX), "",
1340 _("invalid number"), 0);
1341 break;
1343 case 's':
1344 case 'q':
1345 suppress_count = true;
1346 break;
1348 case 'z':
1349 elide_empty_files = true;
1350 break;
1352 case SUPPRESS_MATCHED_OPTION:
1353 suppress_matched = true;
1354 break;
1356 case_GETOPT_HELP_CHAR;
1358 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1360 default:
1361 usage (EXIT_FAILURE);
1364 if (argc - optind < 2)
1366 if (argc <= optind)
1367 error (0, 0, _("missing operand"));
1368 else
1369 error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
1370 usage (EXIT_FAILURE);
1373 idx_t prefix_len = strlen (prefix);
1374 idx_t max_digit_string_len
1375 = (suffix
1376 ? max_out (suffix)
1377 : MAX (INT_STRLEN_BOUND (int), digits));
1378 idx_t filename_size;
1379 if (INT_ADD_WRAPV (prefix_len, max_digit_string_len + 1, &filename_size))
1380 xalloc_die ();
1381 filename_space = ximalloc (filename_size);
1383 set_input_file (argv[optind++]);
1385 parse_patterns (argc, optind, argv);
1388 int i;
1389 static int const sig[] =
1391 /* The usual suspects. */
1392 SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM,
1393 #ifdef SIGPOLL
1394 SIGPOLL,
1395 #endif
1396 #ifdef SIGPROF
1397 SIGPROF,
1398 #endif
1399 #ifdef SIGVTALRM
1400 SIGVTALRM,
1401 #endif
1402 #ifdef SIGXCPU
1403 SIGXCPU,
1404 #endif
1405 #ifdef SIGXFSZ
1406 SIGXFSZ,
1407 #endif
1409 enum { nsigs = ARRAY_CARDINALITY (sig) };
1411 struct sigaction act;
1413 sigemptyset (&caught_signals);
1414 for (i = 0; i < nsigs; i++)
1416 sigaction (sig[i], NULL, &act);
1417 if (act.sa_handler != SIG_IGN)
1418 sigaddset (&caught_signals, sig[i]);
1421 act.sa_handler = interrupt_handler;
1422 act.sa_mask = caught_signals;
1423 act.sa_flags = 0;
1425 for (i = 0; i < nsigs; i++)
1426 if (sigismember (&caught_signals, sig[i]))
1427 sigaction (sig[i], &act, NULL);
1430 split_file ();
1432 if (close (STDIN_FILENO) != 0)
1434 error (0, errno, _("read error"));
1435 cleanup_fatal ();
1438 return EXIT_SUCCESS;
1441 void
1442 usage (int status)
1444 if (status != EXIT_SUCCESS)
1445 emit_try_help ();
1446 else
1448 printf (_("\
1449 Usage: %s [OPTION]... FILE PATTERN...\n\
1451 program_name);
1452 fputs (_("\
1453 Output pieces of FILE separated by PATTERN(s) to files 'xx00', 'xx01', ...,\n\
1454 and output byte counts of each piece to standard output.\n\
1455 "), stdout);
1456 fputs (_("\
1458 Read standard input if FILE is -\n\
1459 "), stdout);
1461 emit_mandatory_arg_note ();
1463 fputs (_("\
1464 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %02d\n\
1465 -f, --prefix=PREFIX use PREFIX instead of 'xx'\n\
1466 -k, --keep-files do not remove output files on errors\n\
1467 "), stdout);
1468 fputs (_("\
1469 --suppress-matched suppress the lines matching PATTERN\n\
1470 "), stdout);
1471 fputs (_("\
1472 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1473 -s, --quiet, --silent do not print counts of output file sizes\n\
1474 -z, --elide-empty-files remove empty output files\n\
1475 "), stdout);
1476 fputs (HELP_OPTION_DESCRIPTION, stdout);
1477 fputs (VERSION_OPTION_DESCRIPTION, stdout);
1478 fputs (_("\
1480 Each PATTERN may be:\n\
1481 INTEGER copy up to but not including specified line number\n\
1482 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1483 %REGEXP%[OFFSET] skip to, but not including a matching line\n\
1484 {INTEGER} repeat the previous pattern specified number of times\n\
1485 {*} repeat the previous pattern as many times as possible\n\
1487 A line OFFSET is an integer optionally preceded by '+' or '-'\n\
1488 "), stdout);
1489 emit_ancillary_info (PROGRAM_NAME);
1491 exit (status);