maint: move two small functions, so we can remove a fwd decl
[coreutils.git] / src / csplit.c
blob438d888a51b3f2179dd5056d1d15c5c52b5cf927
1 /* csplit - split a file into sections determined by context lines
2 Copyright (C) 1991, 1995-2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
18 Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
20 #include <config.h>
22 #include <getopt.h>
23 #include <sys/types.h>
24 #include <signal.h>
26 #include "system.h"
28 #include <regex.h>
30 #include "error.h"
31 #include "fd-reopen.h"
32 #include "quote.h"
33 #include "safe-read.h"
34 #include "stdio--.h"
35 #include "xstrtol.h"
37 /* The official name of this program (e.g., no `g' prefix). */
38 #define PROGRAM_NAME "csplit"
40 #define AUTHORS \
41 proper_name ("Stuart Kemp"), \
42 proper_name ("David MacKenzie")
44 /* The default prefix for output file names. */
45 #define DEFAULT_PREFIX "xx"
47 /* A compiled pattern arg. */
48 struct control
50 intmax_t offset; /* Offset from regexp to split at. */
51 uintmax_t lines_required; /* Number of lines required. */
52 uintmax_t repeat; /* Repeat count. */
53 int argnum; /* ARGV index. */
54 bool repeat_forever; /* True if `*' used as a repeat count. */
55 bool ignore; /* If true, produce no output (for regexp). */
56 bool regexpr; /* True if regular expression was used. */
57 struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
60 /* Initial size of data area in buffers. */
61 #define START_SIZE 8191
63 /* Increment size for data area. */
64 #define INCR_SIZE 2048
66 /* Number of lines kept in each node in line list. */
67 #define CTRL_SIZE 80
69 #ifdef DEBUG
70 /* Some small values to test the algorithms. */
71 # define START_SIZE 200
72 # define INCR_SIZE 10
73 # define CTRL_SIZE 1
74 #endif
76 /* A string with a length count. */
77 struct cstring
79 size_t len;
80 char *str;
83 /* Pointers to the beginnings of lines in the buffer area.
84 These structures are linked together if needed. */
85 struct line
87 size_t used; /* Number of offsets used in this struct. */
88 size_t insert_index; /* Next offset to use when inserting line. */
89 size_t retrieve_index; /* Next index to use when retrieving line. */
90 struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
91 struct line *next; /* Next in linked list. */
94 /* The structure to hold the input lines.
95 Contains a pointer to the data area and a list containing
96 pointers to the individual lines. */
97 struct buffer_record
99 size_t bytes_alloc; /* Size of the buffer area. */
100 size_t bytes_used; /* Bytes used in the buffer area. */
101 uintmax_t start_line; /* First line number in this buffer. */
102 uintmax_t first_available; /* First line that can be retrieved. */
103 size_t num_lines; /* Number of complete lines in this buffer. */
104 char *buffer; /* Data area. */
105 struct line *line_start; /* Head of list of pointers to lines. */
106 struct line *curr_line; /* The line start record currently in use. */
107 struct buffer_record *next;
110 static void close_output_file (void);
111 static void create_output_file (void);
112 static void delete_all_files (bool);
113 static void save_line_to_file (const struct cstring *line);
114 void usage (int status);
116 /* Start of buffer list. */
117 static struct buffer_record *head = NULL;
119 /* Partially read line. */
120 static char *hold_area = NULL;
122 /* Number of bytes in `hold_area'. */
123 static size_t hold_count = 0;
125 /* Number of the last line in the buffers. */
126 static uintmax_t last_line_number = 0;
128 /* Number of the line currently being examined. */
129 static uintmax_t current_line = 0;
131 /* If true, we have read EOF. */
132 static bool have_read_eof = false;
134 /* Name of output files. */
135 static char *volatile filename_space = NULL;
137 /* Prefix part of output file names. */
138 static char const *volatile prefix = NULL;
140 /* Suffix part of output file names. */
141 static char *volatile suffix = NULL;
143 /* Number of digits to use in output file names. */
144 static int volatile digits = 2;
146 /* Number of files created so far. */
147 static unsigned int volatile files_created = 0;
149 /* Number of bytes written to current file. */
150 static uintmax_t bytes_written;
152 /* Output file pointer. */
153 static FILE *output_stream = NULL;
155 /* Output file name. */
156 static char *output_filename = NULL;
158 /* Perhaps it would be cleaner to pass arg values instead of indexes. */
159 static char **global_argv;
161 /* If true, do not print the count of bytes in each output file. */
162 static bool suppress_count;
164 /* If true, remove output files on error. */
165 static bool volatile remove_files;
167 /* If true, remove all output files which have a zero length. */
168 static bool elide_empty_files;
170 /* The compiled pattern arguments, which determine how to split
171 the input file. */
172 static struct control *controls;
174 /* Number of elements in `controls'. */
175 static size_t control_used;
177 /* The set of signals that are caught. */
178 static sigset_t caught_signals;
180 static struct option const longopts[] =
182 {"digits", required_argument, NULL, 'n'},
183 {"quiet", no_argument, NULL, 'q'},
184 {"silent", no_argument, NULL, 's'},
185 {"keep-files", no_argument, NULL, 'k'},
186 {"elide-empty-files", no_argument, NULL, 'z'},
187 {"prefix", required_argument, NULL, 'f'},
188 {"suffix-format", required_argument, NULL, 'b'},
189 {GETOPT_HELP_OPTION_DECL},
190 {GETOPT_VERSION_OPTION_DECL},
191 {NULL, 0, NULL, 0}
194 /* Optionally remove files created so far; then exit.
195 Called when an error detected. */
197 static void
198 cleanup (void)
200 sigset_t oldset;
202 close_output_file ();
204 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
205 delete_all_files (false);
206 sigprocmask (SIG_SETMASK, &oldset, NULL);
209 static void cleanup_fatal (void) ATTRIBUTE_NORETURN;
210 static void
211 cleanup_fatal (void)
213 cleanup ();
214 exit (EXIT_FAILURE);
217 extern void
218 xalloc_die (void)
220 error (0, 0, "%s", _("memory exhausted"));
221 cleanup_fatal ();
224 static void
225 interrupt_handler (int sig)
227 delete_all_files (true);
228 /* The signal has been reset to SIG_DFL, but blocked during this
229 handler. Force the default action of this signal once the
230 handler returns and the block is removed. */
231 raise (sig);
234 /* Keep track of NUM bytes of a partial line in buffer START.
235 These bytes will be retrieved later when another large buffer is read. */
237 static void
238 save_to_hold_area (char *start, size_t num)
240 free (hold_area);
241 hold_area = start;
242 hold_count = num;
245 /* Read up to MAX_N_BYTES bytes from the input stream into DEST.
246 Return the number of bytes read. */
248 static size_t
249 read_input (char *dest, size_t max_n_bytes)
251 size_t bytes_read;
253 if (max_n_bytes == 0)
254 return 0;
256 bytes_read = safe_read (STDIN_FILENO, dest, max_n_bytes);
258 if (bytes_read == 0)
259 have_read_eof = true;
261 if (bytes_read == SAFE_READ_ERROR)
263 error (0, errno, _("read error"));
264 cleanup_fatal ();
267 return bytes_read;
270 /* Initialize existing line record P. */
272 static void
273 clear_line_control (struct line *p)
275 p->used = 0;
276 p->insert_index = 0;
277 p->retrieve_index = 0;
280 /* Return a new, initialized line record. */
282 static struct line *
283 new_line_control (void)
285 struct line *p = xmalloc (sizeof *p);
287 p->next = NULL;
288 clear_line_control (p);
290 return p;
293 /* Record LINE_START, which is the address of the start of a line
294 of length LINE_LEN in the large buffer, in the lines buffer of B. */
296 static void
297 keep_new_line (struct buffer_record *b, char *line_start, size_t line_len)
299 struct line *l;
301 /* If there is no existing area to keep line info, get some. */
302 if (b->line_start == NULL)
303 b->line_start = b->curr_line = new_line_control ();
305 /* If existing area for lines is full, get more. */
306 if (b->curr_line->used == CTRL_SIZE)
308 b->curr_line->next = new_line_control ();
309 b->curr_line = b->curr_line->next;
312 l = b->curr_line;
314 /* Record the start of the line, and update counters. */
315 l->starts[l->insert_index].str = line_start;
316 l->starts[l->insert_index].len = line_len;
317 l->used++;
318 l->insert_index++;
321 /* Scan the buffer in B for newline characters
322 and record the line start locations and lengths in B.
323 Return the number of lines found in this buffer.
325 There may be an incomplete line at the end of the buffer;
326 a pointer is kept to this area, which will be used when
327 the next buffer is filled. */
329 static size_t
330 record_line_starts (struct buffer_record *b)
332 char *line_start; /* Start of current line. */
333 char *line_end; /* End of each line found. */
334 size_t bytes_left; /* Length of incomplete last line. */
335 size_t lines; /* Number of lines found. */
336 size_t line_length; /* Length of each line found. */
338 if (b->bytes_used == 0)
339 return 0;
341 lines = 0;
342 line_start = b->buffer;
343 bytes_left = b->bytes_used;
345 while (true)
347 line_end = memchr (line_start, '\n', bytes_left);
348 if (line_end == NULL)
349 break;
350 line_length = line_end - line_start + 1;
351 keep_new_line (b, line_start, line_length);
352 bytes_left -= line_length;
353 line_start = line_end + 1;
354 lines++;
357 /* Check for an incomplete last line. */
358 if (bytes_left)
360 if (have_read_eof)
362 keep_new_line (b, line_start, bytes_left);
363 lines++;
365 else
366 save_to_hold_area (xmemdup (line_start, bytes_left), bytes_left);
369 b->num_lines = lines;
370 b->first_available = b->start_line = last_line_number + 1;
371 last_line_number += lines;
373 return lines;
376 /* Return a new buffer with room to store SIZE bytes, plus
377 an extra byte for safety. */
379 static struct buffer_record *
380 create_new_buffer (size_t size)
382 struct buffer_record *new_buffer = xmalloc (sizeof *new_buffer);
384 new_buffer->buffer = xmalloc (size + 1);
386 new_buffer->bytes_alloc = size;
387 new_buffer->line_start = new_buffer->curr_line = NULL;
389 return new_buffer;
392 /* Return a new buffer of at least MINSIZE bytes. If a buffer of at
393 least that size is currently free, use it, otherwise create a new one. */
395 static struct buffer_record *
396 get_new_buffer (size_t min_size)
398 struct buffer_record *new_buffer; /* Buffer to return. */
399 size_t alloc_size; /* Actual size that will be requested. */
401 alloc_size = START_SIZE;
402 if (alloc_size < min_size)
404 size_t s = min_size - alloc_size + INCR_SIZE - 1;
405 alloc_size += s - s % INCR_SIZE;
408 new_buffer = create_new_buffer (alloc_size);
410 new_buffer->num_lines = 0;
411 new_buffer->bytes_used = 0;
412 new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
413 new_buffer->next = NULL;
415 return new_buffer;
418 static void
419 free_buffer (struct buffer_record *buf)
421 struct line *l;
422 for (l = buf->line_start; l;)
424 struct line *n = l->next;
425 free (l);
426 l = n;
428 free (buf->buffer);
429 buf->buffer = NULL;
432 /* Append buffer BUF to the linked list of buffers that contain
433 some data yet to be processed. */
435 static void
436 save_buffer (struct buffer_record *buf)
438 struct buffer_record *p;
440 buf->next = NULL;
441 buf->curr_line = buf->line_start;
443 if (head == NULL)
444 head = buf;
445 else
447 for (p = head; p->next; p = p->next)
448 /* Do nothing. */ ;
449 p->next = buf;
453 /* Fill a buffer of input.
455 Set the initial size of the buffer to a default.
456 Fill the buffer (from the hold area and input stream)
457 and find the individual lines.
458 If no lines are found (the buffer is too small to hold the next line),
459 release the current buffer (whose contents would have been put in the
460 hold area) and repeat the process with another large buffer until at least
461 one entire line has been read.
463 Return true if a new buffer was obtained, otherwise false
464 (in which case end-of-file must have been encountered). */
466 static bool
467 load_buffer (void)
469 struct buffer_record *b;
470 size_t bytes_wanted = START_SIZE; /* Minimum buffer size. */
471 size_t bytes_avail; /* Size of new buffer created. */
472 size_t lines_found; /* Number of lines in this new buffer. */
473 char *p; /* Place to load into buffer. */
475 if (have_read_eof)
476 return false;
478 /* We must make the buffer at least as large as the amount of data
479 in the partial line left over from the last call. */
480 if (bytes_wanted < hold_count)
481 bytes_wanted = hold_count;
483 while (1)
485 b = get_new_buffer (bytes_wanted);
486 bytes_avail = b->bytes_alloc; /* Size of buffer returned. */
487 p = b->buffer;
489 /* First check the `holding' area for a partial line. */
490 if (hold_count)
492 memcpy (p, hold_area, hold_count);
493 p += hold_count;
494 b->bytes_used += hold_count;
495 bytes_avail -= hold_count;
496 hold_count = 0;
499 b->bytes_used += read_input (p, bytes_avail);
501 lines_found = record_line_starts (b);
502 if (!lines_found)
503 free_buffer (b);
505 if (lines_found || have_read_eof)
506 break;
508 if (xalloc_oversized (2, b->bytes_alloc))
509 xalloc_die ();
510 bytes_wanted = 2 * b->bytes_alloc;
511 free_buffer (b);
512 free (b);
515 if (lines_found)
516 save_buffer (b);
517 else
518 free (b);
520 return lines_found != 0;
523 /* Return the line number of the first line that has not yet been retrieved. */
525 static uintmax_t
526 get_first_line_in_buffer (void)
528 if (head == NULL && !load_buffer ())
529 error (EXIT_FAILURE, errno, _("input disappeared"));
531 return head->first_available;
534 /* Return a pointer to the logical first line in the buffer and make the
535 next line the logical first line.
536 Return NULL if there is no more input. */
538 static struct cstring *
539 remove_line (void)
541 /* If non-NULL, this is the buffer for which the previous call
542 returned the final line. So now, presuming that line has been
543 processed, we can free the buffer and reset this pointer. */
544 static struct buffer_record *prev_buf = NULL;
546 struct cstring *line; /* Return value. */
547 struct line *l; /* For convenience. */
549 if (prev_buf)
551 free_buffer (prev_buf);
552 free (prev_buf);
553 prev_buf = NULL;
556 if (head == NULL && !load_buffer ())
557 return NULL;
559 if (current_line < head->first_available)
560 current_line = head->first_available;
562 ++(head->first_available);
564 l = head->curr_line;
566 line = &l->starts[l->retrieve_index];
568 /* Advance index to next line. */
569 if (++l->retrieve_index == l->used)
571 /* Go on to the next line record. */
572 head->curr_line = l->next;
573 if (head->curr_line == NULL || head->curr_line->used == 0)
575 /* Go on to the next data block.
576 but first record the current one so we can free it
577 once the line we're returning has been processed. */
578 prev_buf = head;
579 head = head->next;
583 return line;
586 /* Search the buffers for line LINENUM, reading more input if necessary.
587 Return a pointer to the line, or NULL if it is not found in the file. */
589 static struct cstring *
590 find_line (uintmax_t linenum)
592 struct buffer_record *b;
594 if (head == NULL && !load_buffer ())
595 return NULL;
597 if (linenum < head->start_line)
598 return NULL;
600 for (b = head;;)
602 if (linenum < b->start_line + b->num_lines)
604 /* The line is in this buffer. */
605 struct line *l;
606 size_t offset; /* How far into the buffer the line is. */
608 l = b->line_start;
609 offset = linenum - b->start_line;
610 /* Find the control record. */
611 while (offset >= CTRL_SIZE)
613 l = l->next;
614 offset -= CTRL_SIZE;
616 return &l->starts[offset];
618 if (b->next == NULL && !load_buffer ())
619 return NULL;
620 b = b->next; /* Try the next data block. */
624 /* Return true if at least one more line is available for input. */
626 static bool
627 no_more_lines (void)
629 return find_line (current_line + 1) == NULL;
632 /* Open NAME as standard input. */
634 static void
635 set_input_file (const char *name)
637 if (! STREQ (name, "-") && fd_reopen (STDIN_FILENO, name, O_RDONLY, 0) < 0)
638 error (EXIT_FAILURE, errno, _("cannot open %s for reading"), quote (name));
641 /* Write all lines from the beginning of the buffer up to, but
642 not including, line LAST_LINE, to the current output file.
643 If IGNORE is true, do not output lines selected here.
644 ARGNUM is the index in ARGV of the current pattern. */
646 static void
647 write_to_file (uintmax_t last_line, bool ignore, int argnum)
649 struct cstring *line;
650 uintmax_t first_line; /* First available input line. */
651 uintmax_t lines; /* Number of lines to output. */
652 uintmax_t i;
654 first_line = get_first_line_in_buffer ();
656 if (first_line > last_line)
658 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
659 cleanup_fatal ();
662 lines = last_line - first_line;
664 for (i = 0; i < lines; i++)
666 line = remove_line ();
667 if (line == NULL)
669 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
670 cleanup_fatal ();
672 if (!ignore)
673 save_line_to_file (line);
677 /* Output any lines left after all regexps have been processed. */
679 static void
680 dump_rest_of_file (void)
682 struct cstring *line;
684 while ((line = remove_line ()) != NULL)
685 save_line_to_file (line);
688 /* Handle an attempt to read beyond EOF under the control of record P,
689 on iteration REPETITION if nonzero. */
691 static void handle_line_error (const struct control *, uintmax_t)
692 ATTRIBUTE_NORETURN;
693 static void
694 handle_line_error (const struct control *p, uintmax_t repetition)
696 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
698 fprintf (stderr, _("%s: %s: line number out of range"),
699 program_name, quote (umaxtostr (p->lines_required, buf)));
700 if (repetition)
701 fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf));
702 else
703 fprintf (stderr, "\n");
705 cleanup_fatal ();
708 /* Determine the line number that marks the end of this file,
709 then get those lines and save them to the output file.
710 P is the control record.
711 REPETITION is the repetition number. */
713 static void
714 process_line_count (const struct control *p, uintmax_t repetition)
716 uintmax_t linenum;
717 uintmax_t last_line_to_save = p->lines_required * (repetition + 1);
718 struct cstring *line;
720 create_output_file ();
722 linenum = get_first_line_in_buffer ();
724 while (linenum++ < last_line_to_save)
726 line = remove_line ();
727 if (line == NULL)
728 handle_line_error (p, repetition);
729 save_line_to_file (line);
732 close_output_file ();
734 /* Ensure that the line number specified is not 1 greater than
735 the number of lines in the file. */
736 if (no_more_lines ())
737 handle_line_error (p, repetition);
740 static void regexp_error (struct control *, uintmax_t, bool) ATTRIBUTE_NORETURN;
741 static void
742 regexp_error (struct control *p, uintmax_t repetition, bool ignore)
744 fprintf (stderr, _("%s: %s: match not found"),
745 program_name, quote (global_argv[p->argnum]));
747 if (repetition)
749 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
750 fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf));
752 else
753 fprintf (stderr, "\n");
755 if (!ignore)
757 dump_rest_of_file ();
758 close_output_file ();
760 cleanup_fatal ();
763 /* Read the input until a line matches the regexp in P, outputting
764 it unless P->IGNORE is true.
765 REPETITION is this repeat-count; 0 means the first time. */
767 static void
768 process_regexp (struct control *p, uintmax_t repetition)
770 struct cstring *line; /* From input file. */
771 size_t line_len; /* To make "$" in regexps work. */
772 uintmax_t break_line; /* First line number of next file. */
773 bool ignore = p->ignore; /* If true, skip this section. */
774 regoff_t ret;
776 if (!ignore)
777 create_output_file ();
779 /* If there is no offset for the regular expression, or
780 it is positive, then it is not necessary to buffer the lines. */
782 if (p->offset >= 0)
784 while (true)
786 line = find_line (++current_line);
787 if (line == NULL)
789 if (p->repeat_forever)
791 if (!ignore)
793 dump_rest_of_file ();
794 close_output_file ();
796 exit (EXIT_SUCCESS);
798 else
799 regexp_error (p, repetition, ignore);
801 line_len = line->len;
802 if (line->str[line_len - 1] == '\n')
803 line_len--;
804 ret = re_search (&p->re_compiled, line->str, line_len,
805 0, line_len, NULL);
806 if (ret == -2)
808 error (0, 0, _("error in regular expression search"));
809 cleanup_fatal ();
811 if (ret == -1)
813 line = remove_line ();
814 if (!ignore)
815 save_line_to_file (line);
817 else
818 break;
821 else
823 /* Buffer the lines. */
824 while (true)
826 line = find_line (++current_line);
827 if (line == NULL)
829 if (p->repeat_forever)
831 if (!ignore)
833 dump_rest_of_file ();
834 close_output_file ();
836 exit (EXIT_SUCCESS);
838 else
839 regexp_error (p, repetition, ignore);
841 line_len = line->len;
842 if (line->str[line_len - 1] == '\n')
843 line_len--;
844 ret = re_search (&p->re_compiled, line->str, line_len,
845 0, line_len, NULL);
846 if (ret == -2)
848 error (0, 0, _("error in regular expression search"));
849 cleanup_fatal ();
851 if (ret != -1)
852 break;
856 /* Account for any offset from this regexp. */
857 break_line = current_line + p->offset;
859 write_to_file (break_line, ignore, p->argnum);
861 if (!ignore)
862 close_output_file ();
864 if (p->offset > 0)
865 current_line = break_line;
868 /* Split the input file according to the control records we have built. */
870 static void
871 split_file (void)
873 size_t i;
875 for (i = 0; i < control_used; i++)
877 uintmax_t j;
878 if (controls[i].regexpr)
880 for (j = 0; (controls[i].repeat_forever
881 || j <= controls[i].repeat); j++)
882 process_regexp (&controls[i], j);
884 else
886 for (j = 0; (controls[i].repeat_forever
887 || j <= controls[i].repeat); j++)
888 process_line_count (&controls[i], j);
892 create_output_file ();
893 dump_rest_of_file ();
894 close_output_file ();
897 /* Return the name of output file number NUM.
899 This function is called from a signal handler, so it should invoke
900 only reentrant functions that are async-signal-safe. POSIX does
901 not guarantee this for the functions called below, but we don't
902 know of any hosts where this implementation isn't safe. */
904 static char *
905 make_filename (unsigned int num)
907 strcpy (filename_space, prefix);
908 if (suffix)
909 sprintf (filename_space + strlen (prefix), suffix, num);
910 else
911 sprintf (filename_space + strlen (prefix), "%0*u", digits, num);
912 return filename_space;
915 /* Create the next output file. */
917 static void
918 create_output_file (void)
920 bool fopen_ok;
921 int fopen_errno;
923 output_filename = make_filename (files_created);
925 if (files_created == UINT_MAX)
927 fopen_ok = false;
928 fopen_errno = EOVERFLOW;
930 else
932 /* Create the output file in a critical section, to avoid races. */
933 sigset_t oldset;
934 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
935 output_stream = fopen (output_filename, "w");
936 fopen_ok = (output_stream != NULL);
937 fopen_errno = errno;
938 files_created += fopen_ok;
939 sigprocmask (SIG_SETMASK, &oldset, NULL);
942 if (! fopen_ok)
944 error (0, fopen_errno, "%s", output_filename);
945 cleanup_fatal ();
947 bytes_written = 0;
950 /* If requested, delete all the files we have created. This function
951 must be called only from critical sections. */
953 static void
954 delete_all_files (bool in_signal_handler)
956 unsigned int i;
958 if (! remove_files)
959 return;
961 for (i = 0; i < files_created; i++)
963 const char *name = make_filename (i);
964 if (unlink (name) != 0 && !in_signal_handler)
965 error (0, errno, "%s", name);
968 files_created = 0;
971 /* Close the current output file and print the count
972 of characters in this file. */
974 static void
975 close_output_file (void)
977 if (output_stream)
979 if (ferror (output_stream))
981 error (0, 0, _("write error for %s"), quote (output_filename));
982 output_stream = NULL;
983 cleanup_fatal ();
985 if (fclose (output_stream) != 0)
987 error (0, errno, "%s", output_filename);
988 output_stream = NULL;
989 cleanup_fatal ();
991 if (bytes_written == 0 && elide_empty_files)
993 sigset_t oldset;
994 bool unlink_ok;
995 int unlink_errno;
997 /* Remove the output file in a critical section, to avoid races. */
998 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
999 unlink_ok = (unlink (output_filename) == 0);
1000 unlink_errno = errno;
1001 files_created -= unlink_ok;
1002 sigprocmask (SIG_SETMASK, &oldset, NULL);
1004 if (! unlink_ok)
1005 error (0, unlink_errno, "%s", output_filename);
1007 else
1009 if (!suppress_count)
1011 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
1012 fprintf (stdout, "%s\n", umaxtostr (bytes_written, buf));
1015 output_stream = NULL;
1019 /* Save line LINE to the output file and
1020 increment the character count for the current file. */
1022 static void
1023 save_line_to_file (const struct cstring *line)
1025 fwrite (line->str, sizeof (char), line->len, output_stream);
1026 bytes_written += line->len;
1029 /* Return a new, initialized control record. */
1031 static struct control *
1032 new_control_record (void)
1034 static size_t control_allocated = 0; /* Total space allocated. */
1035 struct control *p;
1037 if (control_used == control_allocated)
1038 controls = X2NREALLOC (controls, &control_allocated);
1039 p = &controls[control_used++];
1040 p->regexpr = false;
1041 p->repeat = 0;
1042 p->repeat_forever = false;
1043 p->lines_required = 0;
1044 p->offset = 0;
1045 return p;
1048 /* Check if there is a numeric offset after a regular expression.
1049 STR is the entire command line argument.
1050 P is the control record for this regular expression.
1051 NUM is the numeric part of STR. */
1053 static void
1054 check_for_offset (struct control *p, const char *str, const char *num)
1056 if (xstrtoimax (num, NULL, 10, &p->offset, "") != LONGINT_OK)
1057 error (EXIT_FAILURE, 0, _("%s: integer expected after delimiter"), str);
1060 /* Given that the first character of command line arg STR is '{',
1061 make sure that the rest of the string is a valid repeat count
1062 and store its value in P.
1063 ARGNUM is the ARGV index of STR. */
1065 static void
1066 parse_repeat_count (int argnum, struct control *p, char *str)
1068 uintmax_t val;
1069 char *end;
1071 end = str + strlen (str) - 1;
1072 if (*end != '}')
1073 error (EXIT_FAILURE, 0, _("%s: `}' is required in repeat count"), str);
1074 *end = '\0';
1076 if (str+1 == end-1 && *(str+1) == '*')
1077 p->repeat_forever = true;
1078 else
1080 if (xstrtoumax (str + 1, NULL, 10, &val, "") != LONGINT_OK)
1082 error (EXIT_FAILURE, 0,
1083 _("%s}: integer required between `{' and `}'"),
1084 global_argv[argnum]);
1086 p->repeat = val;
1089 *end = '}';
1092 /* Extract the regular expression from STR and check for a numeric offset.
1093 STR should start with the regexp delimiter character.
1094 Return a new control record for the regular expression.
1095 ARGNUM is the ARGV index of STR.
1096 Unless IGNORE is true, mark these lines for output. */
1098 static struct control *
1099 extract_regexp (int argnum, bool ignore, char const *str)
1101 size_t len; /* Number of bytes in this regexp. */
1102 char delim = *str;
1103 char const *closing_delim;
1104 struct control *p;
1105 const char *err;
1107 closing_delim = strrchr (str + 1, delim);
1108 if (closing_delim == NULL)
1109 error (EXIT_FAILURE, 0,
1110 _("%s: closing delimiter `%c' missing"), str, delim);
1112 len = closing_delim - str - 1;
1113 p = new_control_record ();
1114 p->argnum = argnum;
1115 p->ignore = ignore;
1117 p->regexpr = true;
1118 p->re_compiled.buffer = NULL;
1119 p->re_compiled.allocated = 0;
1120 p->re_compiled.fastmap = xmalloc (UCHAR_MAX + 1);
1121 p->re_compiled.translate = NULL;
1122 re_syntax_options =
1123 RE_SYNTAX_POSIX_BASIC & ~RE_CONTEXT_INVALID_DUP & ~RE_NO_EMPTY_RANGES;
1124 err = re_compile_pattern (str + 1, len, &p->re_compiled);
1125 if (err)
1127 error (0, 0, _("%s: invalid regular expression: %s"), str, err);
1128 cleanup_fatal ();
1131 if (closing_delim[1])
1132 check_for_offset (p, str, closing_delim + 1);
1134 return p;
1137 /* Extract the break patterns from args START through ARGC - 1 of ARGV.
1138 After each pattern, check if the next argument is a repeat count. */
1140 static void
1141 parse_patterns (int argc, int start, char **argv)
1143 int i; /* Index into ARGV. */
1144 struct control *p; /* New control record created. */
1145 uintmax_t val;
1146 static uintmax_t last_val = 0;
1148 for (i = start; i < argc; i++)
1150 if (*argv[i] == '/' || *argv[i] == '%')
1152 p = extract_regexp (i, *argv[i] == '%', argv[i]);
1154 else
1156 p = new_control_record ();
1157 p->argnum = i;
1159 if (xstrtoumax (argv[i], NULL, 10, &val, "") != LONGINT_OK)
1160 error (EXIT_FAILURE, 0, _("%s: invalid pattern"), argv[i]);
1161 if (val == 0)
1162 error (EXIT_FAILURE, 0,
1163 _("%s: line number must be greater than zero"),
1164 argv[i]);
1165 if (val < last_val)
1167 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
1168 error (EXIT_FAILURE, 0,
1169 _("line number %s is smaller than preceding line number, %s"),
1170 quote (argv[i]), umaxtostr (last_val, buf));
1173 if (val == last_val)
1174 error (0, 0,
1175 _("warning: line number %s is the same as preceding line number"),
1176 quote (argv[i]));
1178 last_val = val;
1180 p->lines_required = val;
1183 if (i + 1 < argc && *argv[i + 1] == '{')
1185 /* We have a repeat count. */
1186 i++;
1187 parse_repeat_count (i, p, argv[i]);
1194 /* Names for the printf format flags ' and #. These can be ORed together. */
1195 enum { FLAG_THOUSANDS = 1, FLAG_ALTERNATIVE = 2 };
1197 /* Scan the printf format flags in FORMAT, storing info about the
1198 flags into *FLAGS_PTR. Return the number of flags found. */
1199 static size_t
1200 get_format_flags (char const *format, int *flags_ptr)
1202 int flags = 0;
1204 for (size_t count = 0; ; count++)
1206 switch (format[count])
1208 case '-':
1209 case '0':
1210 break;
1212 case '\'':
1213 flags |= FLAG_THOUSANDS;
1214 break;
1216 case '#':
1217 flags |= FLAG_ALTERNATIVE;
1218 break;
1220 default:
1221 *flags_ptr = flags;
1222 return count;
1227 /* Check that the printf format conversion specifier *FORMAT is valid
1228 and compatible with FLAGS. Change it to 'u' if it is 'd' or 'i',
1229 since the format will be used with an unsigned value. */
1230 static void
1231 check_format_conv_type (char *format, int flags)
1233 unsigned char ch = *format;
1234 int compatible_flags = FLAG_THOUSANDS;
1236 switch (ch)
1238 case 'd':
1239 case 'i':
1240 *format = 'u';
1241 break;
1243 case 'u':
1244 break;
1246 case 'o':
1247 case 'x':
1248 case 'X':
1249 compatible_flags = FLAG_ALTERNATIVE;
1250 break;
1252 case 0:
1253 error (EXIT_FAILURE, 0, _("missing conversion specifier in suffix"));
1254 break;
1256 default:
1257 if (isprint (ch))
1258 error (EXIT_FAILURE, 0,
1259 _("invalid conversion specifier in suffix: %c"), ch);
1260 else
1261 error (EXIT_FAILURE, 0,
1262 _("invalid conversion specifier in suffix: \\%.3o"), ch);
1265 if (flags & ~ compatible_flags)
1266 error (EXIT_FAILURE, 0,
1267 _("invalid flags in conversion specification: %%%c%c"),
1268 (flags & ~ compatible_flags & FLAG_ALTERNATIVE ? '#' : '\''), ch);
1271 /* Return the maximum number of bytes that can be generated by
1272 applying FORMAT to an unsigned int value. If the format is
1273 invalid, diagnose the problem and exit. */
1274 static size_t
1275 max_out (char *format)
1277 bool percent = false;
1279 for (char *f = format; *f; f++)
1280 if (*f == '%' && *++f != '%')
1282 if (percent)
1283 error (EXIT_FAILURE, 0,
1284 _("too many %% conversion specifications in suffix"));
1285 percent = true;
1286 int flags;
1287 f += get_format_flags (f, &flags);
1288 while (ISDIGIT (*f))
1289 f++;
1290 if (*f == '.')
1291 while (ISDIGIT (*++f))
1292 continue;
1293 check_format_conv_type (f, flags);
1296 if (! percent)
1297 error (EXIT_FAILURE, 0,
1298 _("missing %% conversion specification in suffix"));
1300 int maxlen = snprintf (NULL, 0, format, UINT_MAX);
1301 if (! (0 <= maxlen && maxlen <= SIZE_MAX))
1302 xalloc_die ();
1303 return maxlen;
1307 main (int argc, char **argv)
1309 int optc;
1310 unsigned long int val;
1312 initialize_main (&argc, &argv);
1313 set_program_name (argv[0]);
1314 setlocale (LC_ALL, "");
1315 bindtextdomain (PACKAGE, LOCALEDIR);
1316 textdomain (PACKAGE);
1318 atexit (close_stdout);
1320 global_argv = argv;
1321 controls = NULL;
1322 control_used = 0;
1323 suppress_count = false;
1324 remove_files = true;
1325 prefix = DEFAULT_PREFIX;
1327 while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, NULL)) != -1)
1328 switch (optc)
1330 case 'f':
1331 prefix = optarg;
1332 break;
1334 case 'b':
1335 suffix = optarg;
1336 break;
1338 case 'k':
1339 remove_files = false;
1340 break;
1342 case 'n':
1343 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
1344 || MIN (INT_MAX, SIZE_MAX) < val)
1345 error (EXIT_FAILURE, 0, _("%s: invalid number"), optarg);
1346 digits = val;
1347 break;
1349 case 's':
1350 case 'q':
1351 suppress_count = true;
1352 break;
1354 case 'z':
1355 elide_empty_files = true;
1356 break;
1358 case_GETOPT_HELP_CHAR;
1360 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1362 default:
1363 usage (EXIT_FAILURE);
1366 if (argc - optind < 2)
1368 if (argc <= optind)
1369 error (0, 0, _("missing operand"));
1370 else
1371 error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
1372 usage (EXIT_FAILURE);
1375 size_t prefix_len = strlen (prefix);
1376 size_t max_digit_string_len
1377 = (suffix
1378 ? max_out (suffix)
1379 : MAX (INT_STRLEN_BOUND (unsigned int), digits));
1380 if (SIZE_MAX - 1 - prefix_len < max_digit_string_len)
1381 xalloc_die ();
1382 filename_space = xmalloc (prefix_len + max_digit_string_len + 1);
1384 set_input_file (argv[optind++]);
1386 parse_patterns (argc, optind, argv);
1389 int i;
1390 static int const sig[] =
1392 /* The usual suspects. */
1393 SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM,
1394 #ifdef SIGPOLL
1395 SIGPOLL,
1396 #endif
1397 #ifdef SIGPROF
1398 SIGPROF,
1399 #endif
1400 #ifdef SIGVTALRM
1401 SIGVTALRM,
1402 #endif
1403 #ifdef SIGXCPU
1404 SIGXCPU,
1405 #endif
1406 #ifdef SIGXFSZ
1407 SIGXFSZ,
1408 #endif
1410 enum { nsigs = ARRAY_CARDINALITY (sig) };
1412 struct sigaction act;
1414 sigemptyset (&caught_signals);
1415 for (i = 0; i < nsigs; i++)
1417 sigaction (sig[i], NULL, &act);
1418 if (act.sa_handler != SIG_IGN)
1419 sigaddset (&caught_signals, sig[i]);
1422 act.sa_handler = interrupt_handler;
1423 act.sa_mask = caught_signals;
1424 act.sa_flags = SA_NODEFER | SA_RESETHAND;
1426 for (i = 0; i < nsigs; i++)
1427 if (sigismember (&caught_signals, sig[i]))
1428 sigaction (sig[i], &act, NULL);
1431 split_file ();
1433 if (close (STDIN_FILENO) != 0)
1435 error (0, errno, _("read error"));
1436 cleanup_fatal ();
1439 exit (EXIT_SUCCESS);
1442 void
1443 usage (int status)
1445 if (status != EXIT_SUCCESS)
1446 fprintf (stderr, _("Try `%s --help' for more information.\n"),
1447 program_name);
1448 else
1450 printf (_("\
1451 Usage: %s [OPTION]... FILE PATTERN...\n\
1453 program_name);
1454 fputs (_("\
1455 Output pieces of FILE separated by PATTERN(s) to files `xx00', `xx01', ...,\n\
1456 and output byte counts of each piece to standard output.\n\
1458 "), stdout);
1459 fputs (_("\
1460 Mandatory arguments to long options are mandatory for short options too.\n\
1461 "), stdout);
1462 fputs (_("\
1463 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %02d\n\
1464 -f, --prefix=PREFIX use PREFIX instead of `xx'\n\
1465 -k, --keep-files do not remove output files on errors\n\
1466 "), stdout);
1467 fputs (_("\
1468 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1469 -s, --quiet, --silent do not print counts of output file sizes\n\
1470 -z, --elide-empty-files remove empty output files\n\
1471 "), stdout);
1472 fputs (HELP_OPTION_DESCRIPTION, stdout);
1473 fputs (VERSION_OPTION_DESCRIPTION, stdout);
1474 fputs (_("\
1476 Read standard input if FILE is -. Each PATTERN may be:\n\
1477 "), stdout);
1478 fputs (_("\
1480 INTEGER copy up to but not including specified line number\n\
1481 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1482 %REGEXP%[OFFSET] skip to, but not including a matching line\n\
1483 {INTEGER} repeat the previous pattern specified number of times\n\
1484 {*} repeat the previous pattern as many times as possible\n\
1486 A line OFFSET is a required `+' or `-' followed by a positive integer.\n\
1487 "), stdout);
1488 emit_ancillary_info ();
1490 exit (status);