maint: update all copyright year number ranges
[coreutils.git] / src / csplit.c
bloba3f30e8745e3e70f7229c733b2c90c8b5d31d81b
1 /* csplit - split a file into sections determined by context lines
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
18 Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
20 #include <config.h>
22 #include <assert.h>
23 #include <getopt.h>
24 #include <sys/types.h>
25 #include <signal.h>
27 #include "system.h"
29 #include <regex.h>
31 #include "die.h"
32 #include "error.h"
33 #include "fd-reopen.h"
34 #include "quote.h"
35 #include "safe-read.h"
36 #include "stdio--.h"
37 #include "xdectoint.h"
38 #include "xstrtol.h"
40 /* The official name of this program (e.g., no 'g' prefix). */
41 #define PROGRAM_NAME "csplit"
43 #define AUTHORS \
44 proper_name ("Stuart Kemp"), \
45 proper_name ("David MacKenzie")
47 /* The default prefix for output file names. */
48 #define DEFAULT_PREFIX "xx"
50 /* A compiled pattern arg. */
51 struct control
53 intmax_t offset; /* Offset from regexp to split at. */
54 uintmax_t lines_required; /* Number of lines required. */
55 uintmax_t repeat; /* Repeat count. */
56 int argnum; /* ARGV index. */
57 bool repeat_forever; /* True if '*' used as a repeat count. */
58 bool ignore; /* If true, produce no output (for regexp). */
59 bool regexpr; /* True if regular expression was used. */
60 struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
63 /* Initial size of data area in buffers. */
64 #define START_SIZE 8191
66 /* Increment size for data area. */
67 #define INCR_SIZE 2048
69 /* Number of lines kept in each node in line list. */
70 #define CTRL_SIZE 80
72 #ifdef DEBUG
73 /* Some small values to test the algorithms. */
74 # define START_SIZE 200
75 # define INCR_SIZE 10
76 # define CTRL_SIZE 1
77 #endif
79 /* A string with a length count. */
80 struct cstring
82 size_t len;
83 char *str;
86 /* Pointers to the beginnings of lines in the buffer area.
87 These structures are linked together if needed. */
88 struct line
90 size_t used; /* Number of offsets used in this struct. */
91 size_t insert_index; /* Next offset to use when inserting line. */
92 size_t retrieve_index; /* Next index to use when retrieving line. */
93 struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
94 struct line *next; /* Next in linked list. */
97 /* The structure to hold the input lines.
98 Contains a pointer to the data area and a list containing
99 pointers to the individual lines. */
100 struct buffer_record
102 size_t bytes_alloc; /* Size of the buffer area. */
103 size_t bytes_used; /* Bytes used in the buffer area. */
104 uintmax_t start_line; /* First line number in this buffer. */
105 uintmax_t first_available; /* First line that can be retrieved. */
106 size_t num_lines; /* Number of complete lines in this buffer. */
107 char *buffer; /* Data area. */
108 struct line *line_start; /* Head of list of pointers to lines. */
109 struct line *curr_line; /* The line start record currently in use. */
110 struct buffer_record *next;
113 static void close_output_file (void);
114 static void create_output_file (void);
115 static void delete_all_files (bool);
116 static void save_line_to_file (const struct cstring *line);
118 /* Start of buffer list. */
119 static struct buffer_record *head = NULL;
121 /* Partially read line. */
122 static char *hold_area = NULL;
124 /* Number of bytes in 'hold_area'. */
125 static size_t hold_count = 0;
127 /* Number of the last line in the buffers. */
128 static uintmax_t last_line_number = 0;
130 /* Number of the line currently being examined. */
131 static uintmax_t current_line = 0;
133 /* If true, we have read EOF. */
134 static bool have_read_eof = false;
136 /* Name of output files. */
137 static char *volatile filename_space = NULL;
139 /* Prefix part of output file names. */
140 static char const *volatile prefix = NULL;
142 /* Suffix part of output file names. */
143 static char *volatile suffix = NULL;
145 /* Number of digits to use in output file names. */
146 static int volatile digits = 2;
148 /* Number of files created so far. */
149 static unsigned int volatile files_created = 0;
151 /* Number of bytes written to current file. */
152 static uintmax_t bytes_written;
154 /* Output file pointer. */
155 static FILE *output_stream = NULL;
157 /* Output file name. */
158 static char *output_filename = NULL;
160 /* Perhaps it would be cleaner to pass arg values instead of indexes. */
161 static char **global_argv;
163 /* If true, do not print the count of bytes in each output file. */
164 static bool suppress_count;
166 /* If true, remove output files on error. */
167 static bool volatile remove_files;
169 /* If true, remove all output files which have a zero length. */
170 static bool elide_empty_files;
172 /* If true, suppress the lines that match the PATTERN */
173 static bool suppress_matched;
175 /* The compiled pattern arguments, which determine how to split
176 the input file. */
177 static struct control *controls;
179 /* Number of elements in 'controls'. */
180 static size_t control_used;
182 /* The set of signals that are caught. */
183 static sigset_t caught_signals;
185 /* For long options that have no equivalent short option, use a
186 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
187 enum
189 SUPPRESS_MATCHED_OPTION = CHAR_MAX + 1
192 static struct option const longopts[] =
194 {"digits", required_argument, NULL, 'n'},
195 {"quiet", no_argument, NULL, 'q'},
196 {"silent", no_argument, NULL, 's'},
197 {"keep-files", no_argument, NULL, 'k'},
198 {"elide-empty-files", no_argument, NULL, 'z'},
199 {"prefix", required_argument, NULL, 'f'},
200 {"suffix-format", required_argument, NULL, 'b'},
201 {"suppress-matched", no_argument, NULL, SUPPRESS_MATCHED_OPTION},
202 {GETOPT_HELP_OPTION_DECL},
203 {GETOPT_VERSION_OPTION_DECL},
204 {NULL, 0, NULL, 0}
207 /* Optionally remove files created so far; then exit.
208 Called when an error detected. */
210 static void
211 cleanup (void)
213 sigset_t oldset;
215 close_output_file ();
217 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
218 delete_all_files (false);
219 sigprocmask (SIG_SETMASK, &oldset, NULL);
222 static void cleanup_fatal (void) ATTRIBUTE_NORETURN;
223 static void
224 cleanup_fatal (void)
226 cleanup ();
227 exit (EXIT_FAILURE);
230 extern void
231 xalloc_die (void)
233 error (0, 0, "%s", _("memory exhausted"));
234 cleanup_fatal ();
237 static void
238 interrupt_handler (int sig)
240 delete_all_files (true);
241 signal (sig, SIG_DFL);
242 /* The signal has been reset to SIG_DFL, but blocked during this
243 handler. Force the default action of this signal once the
244 handler returns and the block is removed. */
245 raise (sig);
248 /* Keep track of NUM bytes of a partial line in buffer START.
249 These bytes will be retrieved later when another large buffer is read. */
251 static void
252 save_to_hold_area (char *start, size_t num)
254 free (hold_area);
255 hold_area = start;
256 hold_count = num;
259 /* Read up to MAX_N_BYTES bytes from the input stream into DEST.
260 Return the number of bytes read. */
262 static size_t
263 read_input (char *dest, size_t max_n_bytes)
265 size_t bytes_read;
267 if (max_n_bytes == 0)
268 return 0;
270 bytes_read = safe_read (STDIN_FILENO, dest, max_n_bytes);
272 if (bytes_read == 0)
273 have_read_eof = true;
275 if (bytes_read == SAFE_READ_ERROR)
277 error (0, errno, _("read error"));
278 cleanup_fatal ();
281 return bytes_read;
284 /* Initialize existing line record P. */
286 static void
287 clear_line_control (struct line *p)
289 p->used = 0;
290 p->insert_index = 0;
291 p->retrieve_index = 0;
294 /* Return a new, initialized line record. */
296 static struct line *
297 new_line_control (void)
299 struct line *p = xmalloc (sizeof *p);
301 p->next = NULL;
302 clear_line_control (p);
304 return p;
307 /* Record LINE_START, which is the address of the start of a line
308 of length LINE_LEN in the large buffer, in the lines buffer of B. */
310 static void
311 keep_new_line (struct buffer_record *b, char *line_start, size_t line_len)
313 struct line *l;
315 /* If there is no existing area to keep line info, get some. */
316 if (b->line_start == NULL)
317 b->line_start = b->curr_line = new_line_control ();
319 /* If existing area for lines is full, get more. */
320 if (b->curr_line->used == CTRL_SIZE)
322 b->curr_line->next = new_line_control ();
323 b->curr_line = b->curr_line->next;
326 l = b->curr_line;
328 /* Record the start of the line, and update counters. */
329 l->starts[l->insert_index].str = line_start;
330 l->starts[l->insert_index].len = line_len;
331 l->used++;
332 l->insert_index++;
335 /* Scan the buffer in B for newline characters
336 and record the line start locations and lengths in B.
337 Return the number of lines found in this buffer.
339 There may be an incomplete line at the end of the buffer;
340 a pointer is kept to this area, which will be used when
341 the next buffer is filled. */
343 static size_t
344 record_line_starts (struct buffer_record *b)
346 char *line_start; /* Start of current line. */
347 char *line_end; /* End of each line found. */
348 size_t bytes_left; /* Length of incomplete last line. */
349 size_t lines; /* Number of lines found. */
350 size_t line_length; /* Length of each line found. */
352 if (b->bytes_used == 0)
353 return 0;
355 lines = 0;
356 line_start = b->buffer;
357 bytes_left = b->bytes_used;
359 while (true)
361 line_end = memchr (line_start, '\n', bytes_left);
362 if (line_end == NULL)
363 break;
364 line_length = line_end - line_start + 1;
365 keep_new_line (b, line_start, line_length);
366 bytes_left -= line_length;
367 line_start = line_end + 1;
368 lines++;
371 /* Check for an incomplete last line. */
372 if (bytes_left)
374 if (have_read_eof)
376 keep_new_line (b, line_start, bytes_left);
377 lines++;
379 else
380 save_to_hold_area (xmemdup (line_start, bytes_left), bytes_left);
383 b->num_lines = lines;
384 b->first_available = b->start_line = last_line_number + 1;
385 last_line_number += lines;
387 return lines;
390 /* Return a new buffer with room to store SIZE bytes, plus
391 an extra byte for safety. */
393 static struct buffer_record *
394 create_new_buffer (size_t size)
396 struct buffer_record *new_buffer = xmalloc (sizeof *new_buffer);
398 new_buffer->buffer = xmalloc (size + 1);
400 new_buffer->bytes_alloc = size;
401 new_buffer->line_start = new_buffer->curr_line = NULL;
403 return new_buffer;
406 /* Return a new buffer of at least MINSIZE bytes. If a buffer of at
407 least that size is currently free, use it, otherwise create a new one. */
409 static struct buffer_record *
410 get_new_buffer (size_t min_size)
412 struct buffer_record *new_buffer; /* Buffer to return. */
413 size_t alloc_size; /* Actual size that will be requested. */
415 alloc_size = START_SIZE;
416 if (alloc_size < min_size)
418 size_t s = min_size - alloc_size + INCR_SIZE - 1;
419 alloc_size += s - s % INCR_SIZE;
422 new_buffer = create_new_buffer (alloc_size);
424 new_buffer->num_lines = 0;
425 new_buffer->bytes_used = 0;
426 new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
427 new_buffer->next = NULL;
429 return new_buffer;
432 static void
433 free_buffer (struct buffer_record *buf)
435 struct line *l;
436 for (l = buf->line_start; l;)
438 struct line *n = l->next;
439 free (l);
440 l = n;
442 buf->line_start = NULL;
443 free (buf->buffer);
444 buf->buffer = NULL;
447 /* Append buffer BUF to the linked list of buffers that contain
448 some data yet to be processed. */
450 static void
451 save_buffer (struct buffer_record *buf)
453 struct buffer_record *p;
455 buf->next = NULL;
456 buf->curr_line = buf->line_start;
458 if (head == NULL)
459 head = buf;
460 else
462 for (p = head; p->next; p = p->next)
463 /* Do nothing. */ ;
464 p->next = buf;
468 /* Fill a buffer of input.
470 Set the initial size of the buffer to a default.
471 Fill the buffer (from the hold area and input stream)
472 and find the individual lines.
473 If no lines are found (the buffer is too small to hold the next line),
474 release the current buffer (whose contents would have been put in the
475 hold area) and repeat the process with another large buffer until at least
476 one entire line has been read.
478 Return true if a new buffer was obtained, otherwise false
479 (in which case end-of-file must have been encountered). */
481 static bool
482 load_buffer (void)
484 struct buffer_record *b;
485 size_t bytes_wanted = START_SIZE; /* Minimum buffer size. */
486 size_t bytes_avail; /* Size of new buffer created. */
487 size_t lines_found; /* Number of lines in this new buffer. */
488 char *p; /* Place to load into buffer. */
490 if (have_read_eof)
491 return false;
493 /* We must make the buffer at least as large as the amount of data
494 in the partial line left over from the last call. */
495 if (bytes_wanted < hold_count)
496 bytes_wanted = hold_count;
498 while (1)
500 b = get_new_buffer (bytes_wanted);
501 bytes_avail = b->bytes_alloc; /* Size of buffer returned. */
502 p = b->buffer;
504 /* First check the 'holding' area for a partial line. */
505 if (hold_count)
507 memcpy (p, hold_area, hold_count);
508 p += hold_count;
509 b->bytes_used += hold_count;
510 bytes_avail -= hold_count;
511 hold_count = 0;
514 b->bytes_used += read_input (p, bytes_avail);
516 lines_found = record_line_starts (b);
518 if (lines_found || have_read_eof)
519 break;
521 if (xalloc_oversized (2, b->bytes_alloc))
522 xalloc_die ();
523 bytes_wanted = 2 * b->bytes_alloc;
524 free_buffer (b);
525 free (b);
528 if (lines_found)
529 save_buffer (b);
530 else
532 free_buffer (b);
533 free (b);
536 return lines_found != 0;
539 /* Return the line number of the first line that has not yet been retrieved. */
541 static uintmax_t
542 get_first_line_in_buffer (void)
544 if (head == NULL && !load_buffer ())
545 die (EXIT_FAILURE, errno, _("input disappeared"));
547 return head->first_available;
550 /* Return a pointer to the logical first line in the buffer and make the
551 next line the logical first line.
552 Return NULL if there is no more input. */
554 static struct cstring *
555 remove_line (void)
557 /* If non-NULL, this is the buffer for which the previous call
558 returned the final line. So now, presuming that line has been
559 processed, we can free the buffer and reset this pointer. */
560 static struct buffer_record *prev_buf = NULL;
562 struct cstring *line; /* Return value. */
563 struct line *l; /* For convenience. */
565 if (prev_buf)
567 free_buffer (prev_buf);
568 free (prev_buf);
569 prev_buf = NULL;
572 if (head == NULL && !load_buffer ())
573 return NULL;
575 if (current_line < head->first_available)
576 current_line = head->first_available;
578 ++(head->first_available);
580 l = head->curr_line;
582 line = &l->starts[l->retrieve_index];
584 /* Advance index to next line. */
585 if (++l->retrieve_index == l->used)
587 /* Go on to the next line record. */
588 head->curr_line = l->next;
589 if (head->curr_line == NULL || head->curr_line->used == 0)
591 /* Go on to the next data block.
592 but first record the current one so we can free it
593 once the line we're returning has been processed. */
594 prev_buf = head;
595 head = head->next;
599 return line;
602 /* Search the buffers for line LINENUM, reading more input if necessary.
603 Return a pointer to the line, or NULL if it is not found in the file. */
605 static struct cstring *
606 find_line (uintmax_t linenum)
608 struct buffer_record *b;
610 if (head == NULL && !load_buffer ())
611 return NULL;
613 if (linenum < head->start_line)
614 return NULL;
616 for (b = head;;)
618 assert (b);
619 if (linenum < b->start_line + b->num_lines)
621 /* The line is in this buffer. */
622 struct line *l;
623 size_t offset; /* How far into the buffer the line is. */
625 l = b->line_start;
626 offset = linenum - b->start_line;
627 /* Find the control record. */
628 while (offset >= CTRL_SIZE)
630 l = l->next;
631 offset -= CTRL_SIZE;
633 return &l->starts[offset];
635 if (b->next == NULL && !load_buffer ())
636 return NULL;
637 b = b->next; /* Try the next data block. */
641 /* Return true if at least one more line is available for input. */
643 static bool
644 no_more_lines (void)
646 return find_line (current_line + 1) == NULL;
649 /* Open NAME as standard input. */
651 static void
652 set_input_file (const char *name)
654 if (! STREQ (name, "-") && fd_reopen (STDIN_FILENO, name, O_RDONLY, 0) < 0)
655 die (EXIT_FAILURE, errno, _("cannot open %s for reading"),
656 quoteaf (name));
659 /* Write all lines from the beginning of the buffer up to, but
660 not including, line LAST_LINE, to the current output file.
661 If IGNORE is true, do not output lines selected here.
662 ARGNUM is the index in ARGV of the current pattern. */
664 static void
665 write_to_file (uintmax_t last_line, bool ignore, int argnum)
667 struct cstring *line;
668 uintmax_t first_line; /* First available input line. */
669 uintmax_t lines; /* Number of lines to output. */
670 uintmax_t i;
672 first_line = get_first_line_in_buffer ();
674 if (first_line > last_line)
676 error (0, 0, _("%s: line number out of range"),
677 quote (global_argv[argnum]));
678 cleanup_fatal ();
681 lines = last_line - first_line;
683 for (i = 0; i < lines; i++)
685 line = remove_line ();
686 if (line == NULL)
688 error (0, 0, _("%s: line number out of range"),
689 quote (global_argv[argnum]));
690 cleanup_fatal ();
692 if (!ignore)
693 save_line_to_file (line);
697 /* Output any lines left after all regexps have been processed. */
699 static void
700 dump_rest_of_file (void)
702 struct cstring *line;
704 while ((line = remove_line ()) != NULL)
705 save_line_to_file (line);
708 /* Handle an attempt to read beyond EOF under the control of record P,
709 on iteration REPETITION if nonzero. */
711 static void handle_line_error (const struct control *, uintmax_t)
712 ATTRIBUTE_NORETURN;
713 static void
714 handle_line_error (const struct control *p, uintmax_t repetition)
716 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
718 fprintf (stderr, _("%s: %s: line number out of range"),
719 program_name, quote (umaxtostr (p->lines_required, buf)));
720 if (repetition)
721 fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf));
722 else
723 fprintf (stderr, "\n");
725 cleanup_fatal ();
728 /* Determine the line number that marks the end of this file,
729 then get those lines and save them to the output file.
730 P is the control record.
731 REPETITION is the repetition number. */
733 static void
734 process_line_count (const struct control *p, uintmax_t repetition)
736 uintmax_t linenum;
737 uintmax_t last_line_to_save = p->lines_required * (repetition + 1);
739 create_output_file ();
741 /* Ensure that the line number specified is not 1 greater than
742 the number of lines in the file.
743 When suppressing matched lines, check before the loop. */
744 if (no_more_lines () && suppress_matched)
745 handle_line_error (p, repetition);
747 linenum = get_first_line_in_buffer ();
748 while (linenum++ < last_line_to_save)
750 struct cstring *line = remove_line ();
751 if (line == NULL)
752 handle_line_error (p, repetition);
753 save_line_to_file (line);
756 close_output_file ();
758 if (suppress_matched)
759 remove_line ();
761 /* Ensure that the line number specified is not 1 greater than
762 the number of lines in the file. */
763 if (no_more_lines () && !suppress_matched)
764 handle_line_error (p, repetition);
767 static void regexp_error (struct control *, uintmax_t, bool) ATTRIBUTE_NORETURN;
768 static void
769 regexp_error (struct control *p, uintmax_t repetition, bool ignore)
771 fprintf (stderr, _("%s: %s: match not found"),
772 program_name, quote (global_argv[p->argnum]));
774 if (repetition)
776 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
777 fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf));
779 else
780 fprintf (stderr, "\n");
782 if (!ignore)
784 dump_rest_of_file ();
785 close_output_file ();
787 cleanup_fatal ();
790 /* Read the input until a line matches the regexp in P, outputting
791 it unless P->IGNORE is true.
792 REPETITION is this repeat-count; 0 means the first time. */
794 static void
795 process_regexp (struct control *p, uintmax_t repetition)
797 struct cstring *line; /* From input file. */
798 size_t line_len; /* To make "$" in regexps work. */
799 uintmax_t break_line; /* First line number of next file. */
800 bool ignore = p->ignore; /* If true, skip this section. */
801 regoff_t ret;
803 if (!ignore)
804 create_output_file ();
806 if (suppress_matched && current_line > 0)
807 remove_line ();
809 /* If there is no offset for the regular expression, or
810 it is positive, then it is not necessary to buffer the lines. */
812 if (p->offset >= 0)
814 while (true)
816 line = find_line (++current_line);
817 if (line == NULL)
819 if (p->repeat_forever)
821 if (!ignore)
823 dump_rest_of_file ();
824 close_output_file ();
826 exit (EXIT_SUCCESS);
828 else
829 regexp_error (p, repetition, ignore);
831 line_len = line->len;
832 if (line->str[line_len - 1] == '\n')
833 line_len--;
834 ret = re_search (&p->re_compiled, line->str, line_len,
835 0, line_len, NULL);
836 if (ret == -2)
838 error (0, 0, _("error in regular expression search"));
839 cleanup_fatal ();
841 if (ret == -1)
843 line = remove_line ();
844 if (!ignore)
845 save_line_to_file (line);
847 else
848 break;
851 else
853 /* Buffer the lines. */
854 while (true)
856 line = find_line (++current_line);
857 if (line == NULL)
859 if (p->repeat_forever)
861 if (!ignore)
863 dump_rest_of_file ();
864 close_output_file ();
866 exit (EXIT_SUCCESS);
868 else
869 regexp_error (p, repetition, ignore);
871 line_len = line->len;
872 if (line->str[line_len - 1] == '\n')
873 line_len--;
874 ret = re_search (&p->re_compiled, line->str, line_len,
875 0, line_len, NULL);
876 if (ret == -2)
878 error (0, 0, _("error in regular expression search"));
879 cleanup_fatal ();
881 if (ret != -1)
882 break;
886 /* Account for any offset from this regexp. */
887 break_line = current_line + p->offset;
889 write_to_file (break_line, ignore, p->argnum);
891 if (!ignore)
892 close_output_file ();
894 if (p->offset > 0)
895 current_line = break_line;
898 /* Split the input file according to the control records we have built. */
900 static void
901 split_file (void)
903 size_t i;
905 for (i = 0; i < control_used; i++)
907 uintmax_t j;
908 if (controls[i].regexpr)
910 for (j = 0; (controls[i].repeat_forever
911 || j <= controls[i].repeat); j++)
912 process_regexp (&controls[i], j);
914 else
916 for (j = 0; (controls[i].repeat_forever
917 || j <= controls[i].repeat); j++)
918 process_line_count (&controls[i], j);
922 create_output_file ();
923 dump_rest_of_file ();
924 close_output_file ();
927 /* Return the name of output file number NUM.
929 This function is called from a signal handler, so it should invoke
930 only reentrant functions that are async-signal-safe. POSIX does
931 not guarantee this for the functions called below, but we don't
932 know of any hosts where this implementation isn't safe. */
934 static char *
935 make_filename (unsigned int num)
937 strcpy (filename_space, prefix);
938 if (suffix)
939 sprintf (filename_space + strlen (prefix), suffix, num);
940 else
941 sprintf (filename_space + strlen (prefix), "%0*u", digits, num);
942 return filename_space;
945 /* Create the next output file. */
947 static void
948 create_output_file (void)
950 bool fopen_ok;
951 int fopen_errno;
953 output_filename = make_filename (files_created);
955 if (files_created == UINT_MAX)
957 fopen_ok = false;
958 fopen_errno = EOVERFLOW;
960 else
962 /* Create the output file in a critical section, to avoid races. */
963 sigset_t oldset;
964 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
965 output_stream = fopen (output_filename, "w");
966 fopen_ok = (output_stream != NULL);
967 fopen_errno = errno;
968 files_created += fopen_ok;
969 sigprocmask (SIG_SETMASK, &oldset, NULL);
972 if (! fopen_ok)
974 error (0, fopen_errno, "%s", quotef (output_filename));
975 cleanup_fatal ();
977 bytes_written = 0;
980 /* If requested, delete all the files we have created. This function
981 must be called only from critical sections. */
983 static void
984 delete_all_files (bool in_signal_handler)
986 unsigned int i;
988 if (! remove_files)
989 return;
991 for (i = 0; i < files_created; i++)
993 const char *name = make_filename (i);
994 if (unlink (name) != 0 && !in_signal_handler)
995 error (0, errno, "%s", quotef (name));
998 files_created = 0;
1001 /* Close the current output file and print the count
1002 of characters in this file. */
1004 static void
1005 close_output_file (void)
1007 if (output_stream)
1009 if (ferror (output_stream))
1011 error (0, 0, _("write error for %s"), quoteaf (output_filename));
1012 output_stream = NULL;
1013 cleanup_fatal ();
1015 if (fclose (output_stream) != 0)
1017 error (0, errno, "%s", quotef (output_filename));
1018 output_stream = NULL;
1019 cleanup_fatal ();
1021 if (bytes_written == 0 && elide_empty_files)
1023 sigset_t oldset;
1024 bool unlink_ok;
1025 int unlink_errno;
1027 /* Remove the output file in a critical section, to avoid races. */
1028 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
1029 unlink_ok = (unlink (output_filename) == 0);
1030 unlink_errno = errno;
1031 files_created -= unlink_ok;
1032 sigprocmask (SIG_SETMASK, &oldset, NULL);
1034 if (! unlink_ok)
1035 error (0, unlink_errno, "%s", quotef (output_filename));
1037 else
1039 if (!suppress_count)
1041 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
1042 fprintf (stdout, "%s\n", umaxtostr (bytes_written, buf));
1045 output_stream = NULL;
1049 /* Save line LINE to the output file and
1050 increment the character count for the current file. */
1052 static void
1053 save_line_to_file (const struct cstring *line)
1055 size_t l = fwrite (line->str, sizeof (char), line->len, output_stream);
1056 if (l != line->len)
1058 error (0, errno, _("write error for %s"), quoteaf (output_filename));
1059 output_stream = NULL;
1060 cleanup_fatal ();
1062 bytes_written += line->len;
1065 /* Return a new, initialized control record. */
1067 static struct control *
1068 new_control_record (void)
1070 static size_t control_allocated = 0; /* Total space allocated. */
1071 struct control *p;
1073 if (control_used == control_allocated)
1074 controls = X2NREALLOC (controls, &control_allocated);
1075 p = &controls[control_used++];
1076 p->regexpr = false;
1077 p->repeat = 0;
1078 p->repeat_forever = false;
1079 p->lines_required = 0;
1080 p->offset = 0;
1081 return p;
1084 /* Check if there is a numeric offset after a regular expression.
1085 STR is the entire command line argument.
1086 P is the control record for this regular expression.
1087 NUM is the numeric part of STR. */
1089 static void
1090 check_for_offset (struct control *p, const char *str, const char *num)
1092 if (xstrtoimax (num, NULL, 10, &p->offset, "") != LONGINT_OK)
1093 die (EXIT_FAILURE, 0, _("%s: integer expected after delimiter"),
1094 quote (str));
1097 /* Given that the first character of command line arg STR is '{',
1098 make sure that the rest of the string is a valid repeat count
1099 and store its value in P.
1100 ARGNUM is the ARGV index of STR. */
1102 static void
1103 parse_repeat_count (int argnum, struct control *p, char *str)
1105 uintmax_t val;
1106 char *end;
1108 end = str + strlen (str) - 1;
1109 if (*end != '}')
1110 die (EXIT_FAILURE, 0, _("%s: '}' is required in repeat count"),
1111 quote (str));
1112 *end = '\0';
1114 if (str+1 == end-1 && *(str+1) == '*')
1115 p->repeat_forever = true;
1116 else
1118 if (xstrtoumax (str + 1, NULL, 10, &val, "") != LONGINT_OK)
1120 die (EXIT_FAILURE, 0,
1121 _("%s}: integer required between '{' and '}'"),
1122 quote (global_argv[argnum]));
1124 p->repeat = val;
1127 *end = '}';
1130 /* Extract the regular expression from STR and check for a numeric offset.
1131 STR should start with the regexp delimiter character.
1132 Return a new control record for the regular expression.
1133 ARGNUM is the ARGV index of STR.
1134 Unless IGNORE is true, mark these lines for output. */
1136 static struct control *
1137 extract_regexp (int argnum, bool ignore, char const *str)
1139 size_t len; /* Number of bytes in this regexp. */
1140 char delim = *str;
1141 char const *closing_delim;
1142 struct control *p;
1143 const char *err;
1145 closing_delim = strrchr (str + 1, delim);
1146 if (closing_delim == NULL)
1147 die (EXIT_FAILURE, 0,
1148 _("%s: closing delimiter '%c' missing"), str, delim);
1150 len = closing_delim - str - 1;
1151 p = new_control_record ();
1152 p->argnum = argnum;
1153 p->ignore = ignore;
1155 p->regexpr = true;
1156 p->re_compiled.buffer = NULL;
1157 p->re_compiled.allocated = 0;
1158 p->re_compiled.fastmap = xmalloc (UCHAR_MAX + 1);
1159 p->re_compiled.translate = NULL;
1160 re_syntax_options =
1161 RE_SYNTAX_POSIX_BASIC & ~RE_CONTEXT_INVALID_DUP & ~RE_NO_EMPTY_RANGES;
1162 err = re_compile_pattern (str + 1, len, &p->re_compiled);
1163 if (err)
1165 error (0, 0, _("%s: invalid regular expression: %s"), quote (str), err);
1166 cleanup_fatal ();
1169 if (closing_delim[1])
1170 check_for_offset (p, str, closing_delim + 1);
1172 return p;
1175 /* Extract the break patterns from args START through ARGC - 1 of ARGV.
1176 After each pattern, check if the next argument is a repeat count. */
1178 static void
1179 parse_patterns (int argc, int start, char **argv)
1181 int i; /* Index into ARGV. */
1182 struct control *p; /* New control record created. */
1183 uintmax_t val;
1184 static uintmax_t last_val = 0;
1186 for (i = start; i < argc; i++)
1188 if (*argv[i] == '/' || *argv[i] == '%')
1190 p = extract_regexp (i, *argv[i] == '%', argv[i]);
1192 else
1194 p = new_control_record ();
1195 p->argnum = i;
1197 if (xstrtoumax (argv[i], NULL, 10, &val, "") != LONGINT_OK)
1198 die (EXIT_FAILURE, 0, _("%s: invalid pattern"), quote (argv[i]));
1199 if (val == 0)
1200 die (EXIT_FAILURE, 0,
1201 _("%s: line number must be greater than zero"), argv[i]);
1202 if (val < last_val)
1204 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
1205 die (EXIT_FAILURE, 0,
1206 _("line number %s is smaller than preceding line number, %s"),
1207 quote (argv[i]), umaxtostr (last_val, buf));
1210 if (val == last_val)
1211 error (0, 0,
1212 _("warning: line number %s is the same as preceding line number"),
1213 quote (argv[i]));
1215 last_val = val;
1217 p->lines_required = val;
1220 if (i + 1 < argc && *argv[i + 1] == '{')
1222 /* We have a repeat count. */
1223 i++;
1224 parse_repeat_count (i, p, argv[i]);
1231 /* Names for the printf format flags ' and #. These can be ORed together. */
1232 enum { FLAG_THOUSANDS = 1, FLAG_ALTERNATIVE = 2 };
1234 /* Scan the printf format flags in FORMAT, storing info about the
1235 flags into *FLAGS_PTR. Return the number of flags found. */
1236 static size_t
1237 get_format_flags (char const *format, int *flags_ptr)
1239 int flags = 0;
1241 for (size_t count = 0; ; count++)
1243 switch (format[count])
1245 case '-':
1246 case '0':
1247 break;
1249 case '\'':
1250 flags |= FLAG_THOUSANDS;
1251 break;
1253 case '#':
1254 flags |= FLAG_ALTERNATIVE;
1255 break;
1257 default:
1258 *flags_ptr = flags;
1259 return count;
1264 /* Check that the printf format conversion specifier *FORMAT is valid
1265 and compatible with FLAGS. Change it to 'u' if it is 'd' or 'i',
1266 since the format will be used with an unsigned value. */
1267 static void
1268 check_format_conv_type (char *format, int flags)
1270 unsigned char ch = *format;
1271 int compatible_flags = FLAG_THOUSANDS;
1273 switch (ch)
1275 case 'd':
1276 case 'i':
1277 *format = 'u';
1278 break;
1280 case 'u':
1281 break;
1283 case 'o':
1284 case 'x':
1285 case 'X':
1286 compatible_flags = FLAG_ALTERNATIVE;
1287 break;
1289 case 0:
1290 die (EXIT_FAILURE, 0, _("missing conversion specifier in suffix"));
1292 default:
1293 if (isprint (ch))
1294 die (EXIT_FAILURE, 0,
1295 _("invalid conversion specifier in suffix: %c"), ch);
1296 else
1297 die (EXIT_FAILURE, 0,
1298 _("invalid conversion specifier in suffix: \\%.3o"), ch);
1301 if (flags & ~ compatible_flags)
1302 die (EXIT_FAILURE, 0,
1303 _("invalid flags in conversion specification: %%%c%c"),
1304 (flags & ~ compatible_flags & FLAG_ALTERNATIVE ? '#' : '\''), ch);
1307 /* Return the maximum number of bytes that can be generated by
1308 applying FORMAT to an unsigned int value. If the format is
1309 invalid, diagnose the problem and exit. */
1310 static size_t
1311 max_out (char *format)
1313 bool percent = false;
1315 for (char *f = format; *f; f++)
1316 if (*f == '%' && *++f != '%')
1318 if (percent)
1319 die (EXIT_FAILURE, 0,
1320 _("too many %% conversion specifications in suffix"));
1321 percent = true;
1322 int flags;
1323 f += get_format_flags (f, &flags);
1324 while (ISDIGIT (*f))
1325 f++;
1326 if (*f == '.')
1327 while (ISDIGIT (*++f))
1328 continue;
1329 check_format_conv_type (f, flags);
1332 if (! percent)
1333 die (EXIT_FAILURE, 0,
1334 _("missing %% conversion specification in suffix"));
1336 int maxlen = snprintf (NULL, 0, format, UINT_MAX);
1337 if (! (0 <= maxlen && maxlen <= SIZE_MAX))
1338 xalloc_die ();
1339 return maxlen;
1343 main (int argc, char **argv)
1345 int optc;
1347 initialize_main (&argc, &argv);
1348 set_program_name (argv[0]);
1349 setlocale (LC_ALL, "");
1350 bindtextdomain (PACKAGE, LOCALEDIR);
1351 textdomain (PACKAGE);
1353 atexit (close_stdout);
1355 global_argv = argv;
1356 controls = NULL;
1357 control_used = 0;
1358 suppress_count = false;
1359 remove_files = true;
1360 suppress_matched = false;
1361 prefix = DEFAULT_PREFIX;
1363 while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, NULL)) != -1)
1364 switch (optc)
1366 case 'f':
1367 prefix = optarg;
1368 break;
1370 case 'b':
1371 suffix = optarg;
1372 break;
1374 case 'k':
1375 remove_files = false;
1376 break;
1378 case 'n':
1379 digits = xdectoimax (optarg, 0, MIN (INT_MAX, SIZE_MAX), "",
1380 _("invalid number"), 0);
1381 break;
1383 case 's':
1384 case 'q':
1385 suppress_count = true;
1386 break;
1388 case 'z':
1389 elide_empty_files = true;
1390 break;
1392 case SUPPRESS_MATCHED_OPTION:
1393 suppress_matched = true;
1394 break;
1396 case_GETOPT_HELP_CHAR;
1398 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1400 default:
1401 usage (EXIT_FAILURE);
1404 if (argc - optind < 2)
1406 if (argc <= optind)
1407 error (0, 0, _("missing operand"));
1408 else
1409 error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
1410 usage (EXIT_FAILURE);
1413 size_t prefix_len = strlen (prefix);
1414 size_t max_digit_string_len
1415 = (suffix
1416 ? max_out (suffix)
1417 : MAX (INT_STRLEN_BOUND (unsigned int), digits));
1418 if (SIZE_MAX - 1 - prefix_len < max_digit_string_len)
1419 xalloc_die ();
1420 filename_space = xmalloc (prefix_len + max_digit_string_len + 1);
1422 set_input_file (argv[optind++]);
1424 parse_patterns (argc, optind, argv);
1427 int i;
1428 static int const sig[] =
1430 /* The usual suspects. */
1431 SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM,
1432 #ifdef SIGPOLL
1433 SIGPOLL,
1434 #endif
1435 #ifdef SIGPROF
1436 SIGPROF,
1437 #endif
1438 #ifdef SIGVTALRM
1439 SIGVTALRM,
1440 #endif
1441 #ifdef SIGXCPU
1442 SIGXCPU,
1443 #endif
1444 #ifdef SIGXFSZ
1445 SIGXFSZ,
1446 #endif
1448 enum { nsigs = ARRAY_CARDINALITY (sig) };
1450 struct sigaction act;
1452 sigemptyset (&caught_signals);
1453 for (i = 0; i < nsigs; i++)
1455 sigaction (sig[i], NULL, &act);
1456 if (act.sa_handler != SIG_IGN)
1457 sigaddset (&caught_signals, sig[i]);
1460 act.sa_handler = interrupt_handler;
1461 act.sa_mask = caught_signals;
1462 act.sa_flags = 0;
1464 for (i = 0; i < nsigs; i++)
1465 if (sigismember (&caught_signals, sig[i]))
1466 sigaction (sig[i], &act, NULL);
1469 split_file ();
1471 if (close (STDIN_FILENO) != 0)
1473 error (0, errno, _("read error"));
1474 cleanup_fatal ();
1477 return EXIT_SUCCESS;
1480 void
1481 usage (int status)
1483 if (status != EXIT_SUCCESS)
1484 emit_try_help ();
1485 else
1487 printf (_("\
1488 Usage: %s [OPTION]... FILE PATTERN...\n\
1490 program_name);
1491 fputs (_("\
1492 Output pieces of FILE separated by PATTERN(s) to files 'xx00', 'xx01', ...,\n\
1493 and output byte counts of each piece to standard output.\n\
1494 "), stdout);
1495 fputs (_("\
1497 Read standard input if FILE is -\n\
1498 "), stdout);
1500 emit_mandatory_arg_note ();
1502 fputs (_("\
1503 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %02d\n\
1504 -f, --prefix=PREFIX use PREFIX instead of 'xx'\n\
1505 -k, --keep-files do not remove output files on errors\n\
1506 "), stdout);
1507 fputs (_("\
1508 --suppress-matched suppress the lines matching PATTERN\n\
1509 "), stdout);
1510 fputs (_("\
1511 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1512 -s, --quiet, --silent do not print counts of output file sizes\n\
1513 -z, --elide-empty-files remove empty output files\n\
1514 "), stdout);
1515 fputs (HELP_OPTION_DESCRIPTION, stdout);
1516 fputs (VERSION_OPTION_DESCRIPTION, stdout);
1517 fputs (_("\
1519 Each PATTERN may be:\n\
1520 INTEGER copy up to but not including specified line number\n\
1521 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1522 %REGEXP%[OFFSET] skip to, but not including a matching line\n\
1523 {INTEGER} repeat the previous pattern specified number of times\n\
1524 {*} repeat the previous pattern as many times as possible\n\
1526 A line OFFSET is a required '+' or '-' followed by a positive integer.\n\
1527 "), stdout);
1528 emit_ancillary_info (PROGRAM_NAME);
1530 exit (status);