maint: cleanup up various uses of __attribute__
[coreutils.git] / src / csplit.c
blob22f3ad4b12b85661d9ba9e2446c1f937cccbf3bf
1 /* csplit - split a file into sections determined by context lines
2 Copyright (C) 1991-2013 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
18 Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
20 #include <config.h>
22 #include <getopt.h>
23 #include <sys/types.h>
24 #include <signal.h>
26 #include "system.h"
28 #include <regex.h>
30 #include "error.h"
31 #include "fd-reopen.h"
32 #include "quote.h"
33 #include "safe-read.h"
34 #include "stdio--.h"
35 #include "xstrtol.h"
37 /* The official name of this program (e.g., no 'g' prefix). */
38 #define PROGRAM_NAME "csplit"
40 #define AUTHORS \
41 proper_name ("Stuart Kemp"), \
42 proper_name ("David MacKenzie")
44 /* The default prefix for output file names. */
45 #define DEFAULT_PREFIX "xx"
47 /* A compiled pattern arg. */
48 struct control
50 intmax_t offset; /* Offset from regexp to split at. */
51 uintmax_t lines_required; /* Number of lines required. */
52 uintmax_t repeat; /* Repeat count. */
53 int argnum; /* ARGV index. */
54 bool repeat_forever; /* True if '*' used as a repeat count. */
55 bool ignore; /* If true, produce no output (for regexp). */
56 bool regexpr; /* True if regular expression was used. */
57 struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
60 /* Initial size of data area in buffers. */
61 #define START_SIZE 8191
63 /* Increment size for data area. */
64 #define INCR_SIZE 2048
66 /* Number of lines kept in each node in line list. */
67 #define CTRL_SIZE 80
69 #ifdef DEBUG
70 /* Some small values to test the algorithms. */
71 # define START_SIZE 200
72 # define INCR_SIZE 10
73 # define CTRL_SIZE 1
74 #endif
76 /* A string with a length count. */
77 struct cstring
79 size_t len;
80 char *str;
83 /* Pointers to the beginnings of lines in the buffer area.
84 These structures are linked together if needed. */
85 struct line
87 size_t used; /* Number of offsets used in this struct. */
88 size_t insert_index; /* Next offset to use when inserting line. */
89 size_t retrieve_index; /* Next index to use when retrieving line. */
90 struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
91 struct line *next; /* Next in linked list. */
94 /* The structure to hold the input lines.
95 Contains a pointer to the data area and a list containing
96 pointers to the individual lines. */
97 struct buffer_record
99 size_t bytes_alloc; /* Size of the buffer area. */
100 size_t bytes_used; /* Bytes used in the buffer area. */
101 uintmax_t start_line; /* First line number in this buffer. */
102 uintmax_t first_available; /* First line that can be retrieved. */
103 size_t num_lines; /* Number of complete lines in this buffer. */
104 char *buffer; /* Data area. */
105 struct line *line_start; /* Head of list of pointers to lines. */
106 struct line *curr_line; /* The line start record currently in use. */
107 struct buffer_record *next;
110 static void close_output_file (void);
111 static void create_output_file (void);
112 static void delete_all_files (bool);
113 static void save_line_to_file (const struct cstring *line);
115 /* Start of buffer list. */
116 static struct buffer_record *head = NULL;
118 /* Partially read line. */
119 static char *hold_area = NULL;
121 /* Number of bytes in 'hold_area'. */
122 static size_t hold_count = 0;
124 /* Number of the last line in the buffers. */
125 static uintmax_t last_line_number = 0;
127 /* Number of the line currently being examined. */
128 static uintmax_t current_line = 0;
130 /* If true, we have read EOF. */
131 static bool have_read_eof = false;
133 /* Name of output files. */
134 static char *volatile filename_space = NULL;
136 /* Prefix part of output file names. */
137 static char const *volatile prefix = NULL;
139 /* Suffix part of output file names. */
140 static char *volatile suffix = NULL;
142 /* Number of digits to use in output file names. */
143 static int volatile digits = 2;
145 /* Number of files created so far. */
146 static unsigned int volatile files_created = 0;
148 /* Number of bytes written to current file. */
149 static uintmax_t bytes_written;
151 /* Output file pointer. */
152 static FILE *output_stream = NULL;
154 /* Output file name. */
155 static char *output_filename = NULL;
157 /* Perhaps it would be cleaner to pass arg values instead of indexes. */
158 static char **global_argv;
160 /* If true, do not print the count of bytes in each output file. */
161 static bool suppress_count;
163 /* If true, remove output files on error. */
164 static bool volatile remove_files;
166 /* If true, remove all output files which have a zero length. */
167 static bool elide_empty_files;
169 /* The compiled pattern arguments, which determine how to split
170 the input file. */
171 static struct control *controls;
173 /* Number of elements in 'controls'. */
174 static size_t control_used;
176 /* The set of signals that are caught. */
177 static sigset_t caught_signals;
179 static struct option const longopts[] =
181 {"digits", required_argument, NULL, 'n'},
182 {"quiet", no_argument, NULL, 'q'},
183 {"silent", no_argument, NULL, 's'},
184 {"keep-files", no_argument, NULL, 'k'},
185 {"elide-empty-files", no_argument, NULL, 'z'},
186 {"prefix", required_argument, NULL, 'f'},
187 {"suffix-format", required_argument, NULL, 'b'},
188 {GETOPT_HELP_OPTION_DECL},
189 {GETOPT_VERSION_OPTION_DECL},
190 {NULL, 0, NULL, 0}
193 /* Optionally remove files created so far; then exit.
194 Called when an error detected. */
196 static void
197 cleanup (void)
199 sigset_t oldset;
201 close_output_file ();
203 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
204 delete_all_files (false);
205 sigprocmask (SIG_SETMASK, &oldset, NULL);
208 static void cleanup_fatal (void) ATTRIBUTE_NORETURN;
209 static void
210 cleanup_fatal (void)
212 cleanup ();
213 exit (EXIT_FAILURE);
216 extern void
217 xalloc_die (void)
219 error (0, 0, "%s", _("memory exhausted"));
220 cleanup_fatal ();
223 static void
224 interrupt_handler (int sig)
226 delete_all_files (true);
227 signal (sig, SIG_DFL);
228 /* The signal has been reset to SIG_DFL, but blocked during this
229 handler. Force the default action of this signal once the
230 handler returns and the block is removed. */
231 raise (sig);
234 /* Keep track of NUM bytes of a partial line in buffer START.
235 These bytes will be retrieved later when another large buffer is read. */
237 static void
238 save_to_hold_area (char *start, size_t num)
240 free (hold_area);
241 hold_area = start;
242 hold_count = num;
245 /* Read up to MAX_N_BYTES bytes from the input stream into DEST.
246 Return the number of bytes read. */
248 static size_t
249 read_input (char *dest, size_t max_n_bytes)
251 size_t bytes_read;
253 if (max_n_bytes == 0)
254 return 0;
256 bytes_read = safe_read (STDIN_FILENO, dest, max_n_bytes);
258 if (bytes_read == 0)
259 have_read_eof = true;
261 if (bytes_read == SAFE_READ_ERROR)
263 error (0, errno, _("read error"));
264 cleanup_fatal ();
267 return bytes_read;
270 /* Initialize existing line record P. */
272 static void
273 clear_line_control (struct line *p)
275 p->used = 0;
276 p->insert_index = 0;
277 p->retrieve_index = 0;
280 /* Return a new, initialized line record. */
282 static struct line *
283 new_line_control (void)
285 struct line *p = xmalloc (sizeof *p);
287 p->next = NULL;
288 clear_line_control (p);
290 return p;
293 /* Record LINE_START, which is the address of the start of a line
294 of length LINE_LEN in the large buffer, in the lines buffer of B. */
296 static void
297 keep_new_line (struct buffer_record *b, char *line_start, size_t line_len)
299 struct line *l;
301 /* If there is no existing area to keep line info, get some. */
302 if (b->line_start == NULL)
303 b->line_start = b->curr_line = new_line_control ();
305 /* If existing area for lines is full, get more. */
306 if (b->curr_line->used == CTRL_SIZE)
308 b->curr_line->next = new_line_control ();
309 b->curr_line = b->curr_line->next;
312 l = b->curr_line;
314 /* Record the start of the line, and update counters. */
315 l->starts[l->insert_index].str = line_start;
316 l->starts[l->insert_index].len = line_len;
317 l->used++;
318 l->insert_index++;
321 /* Scan the buffer in B for newline characters
322 and record the line start locations and lengths in B.
323 Return the number of lines found in this buffer.
325 There may be an incomplete line at the end of the buffer;
326 a pointer is kept to this area, which will be used when
327 the next buffer is filled. */
329 static size_t
330 record_line_starts (struct buffer_record *b)
332 char *line_start; /* Start of current line. */
333 char *line_end; /* End of each line found. */
334 size_t bytes_left; /* Length of incomplete last line. */
335 size_t lines; /* Number of lines found. */
336 size_t line_length; /* Length of each line found. */
338 if (b->bytes_used == 0)
339 return 0;
341 lines = 0;
342 line_start = b->buffer;
343 bytes_left = b->bytes_used;
345 while (true)
347 line_end = memchr (line_start, '\n', bytes_left);
348 if (line_end == NULL)
349 break;
350 line_length = line_end - line_start + 1;
351 keep_new_line (b, line_start, line_length);
352 bytes_left -= line_length;
353 line_start = line_end + 1;
354 lines++;
357 /* Check for an incomplete last line. */
358 if (bytes_left)
360 if (have_read_eof)
362 keep_new_line (b, line_start, bytes_left);
363 lines++;
365 else
366 save_to_hold_area (xmemdup (line_start, bytes_left), bytes_left);
369 b->num_lines = lines;
370 b->first_available = b->start_line = last_line_number + 1;
371 last_line_number += lines;
373 return lines;
376 /* Return a new buffer with room to store SIZE bytes, plus
377 an extra byte for safety. */
379 static struct buffer_record *
380 create_new_buffer (size_t size)
382 struct buffer_record *new_buffer = xmalloc (sizeof *new_buffer);
384 new_buffer->buffer = xmalloc (size + 1);
386 new_buffer->bytes_alloc = size;
387 new_buffer->line_start = new_buffer->curr_line = NULL;
389 return new_buffer;
392 /* Return a new buffer of at least MINSIZE bytes. If a buffer of at
393 least that size is currently free, use it, otherwise create a new one. */
395 static struct buffer_record *
396 get_new_buffer (size_t min_size)
398 struct buffer_record *new_buffer; /* Buffer to return. */
399 size_t alloc_size; /* Actual size that will be requested. */
401 alloc_size = START_SIZE;
402 if (alloc_size < min_size)
404 size_t s = min_size - alloc_size + INCR_SIZE - 1;
405 alloc_size += s - s % INCR_SIZE;
408 new_buffer = create_new_buffer (alloc_size);
410 new_buffer->num_lines = 0;
411 new_buffer->bytes_used = 0;
412 new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
413 new_buffer->next = NULL;
415 return new_buffer;
418 static void
419 free_buffer (struct buffer_record *buf)
421 struct line *l;
422 for (l = buf->line_start; l;)
424 struct line *n = l->next;
425 free (l);
426 l = n;
428 buf->line_start = NULL;
429 free (buf->buffer);
430 buf->buffer = NULL;
433 /* Append buffer BUF to the linked list of buffers that contain
434 some data yet to be processed. */
436 static void
437 save_buffer (struct buffer_record *buf)
439 struct buffer_record *p;
441 buf->next = NULL;
442 buf->curr_line = buf->line_start;
444 if (head == NULL)
445 head = buf;
446 else
448 for (p = head; p->next; p = p->next)
449 /* Do nothing. */ ;
450 p->next = buf;
454 /* Fill a buffer of input.
456 Set the initial size of the buffer to a default.
457 Fill the buffer (from the hold area and input stream)
458 and find the individual lines.
459 If no lines are found (the buffer is too small to hold the next line),
460 release the current buffer (whose contents would have been put in the
461 hold area) and repeat the process with another large buffer until at least
462 one entire line has been read.
464 Return true if a new buffer was obtained, otherwise false
465 (in which case end-of-file must have been encountered). */
467 static bool
468 load_buffer (void)
470 struct buffer_record *b;
471 size_t bytes_wanted = START_SIZE; /* Minimum buffer size. */
472 size_t bytes_avail; /* Size of new buffer created. */
473 size_t lines_found; /* Number of lines in this new buffer. */
474 char *p; /* Place to load into buffer. */
476 if (have_read_eof)
477 return false;
479 /* We must make the buffer at least as large as the amount of data
480 in the partial line left over from the last call. */
481 if (bytes_wanted < hold_count)
482 bytes_wanted = hold_count;
484 while (1)
486 b = get_new_buffer (bytes_wanted);
487 bytes_avail = b->bytes_alloc; /* Size of buffer returned. */
488 p = b->buffer;
490 /* First check the 'holding' area for a partial line. */
491 if (hold_count)
493 memcpy (p, hold_area, hold_count);
494 p += hold_count;
495 b->bytes_used += hold_count;
496 bytes_avail -= hold_count;
497 hold_count = 0;
500 b->bytes_used += read_input (p, bytes_avail);
502 lines_found = record_line_starts (b);
504 if (lines_found || have_read_eof)
505 break;
507 if (xalloc_oversized (2, b->bytes_alloc))
508 xalloc_die ();
509 bytes_wanted = 2 * b->bytes_alloc;
510 free_buffer (b);
511 free (b);
514 if (lines_found)
515 save_buffer (b);
516 else
518 free_buffer (b);
519 free (b);
522 return lines_found != 0;
525 /* Return the line number of the first line that has not yet been retrieved. */
527 static uintmax_t
528 get_first_line_in_buffer (void)
530 if (head == NULL && !load_buffer ())
531 error (EXIT_FAILURE, errno, _("input disappeared"));
533 return head->first_available;
536 /* Return a pointer to the logical first line in the buffer and make the
537 next line the logical first line.
538 Return NULL if there is no more input. */
540 static struct cstring *
541 remove_line (void)
543 /* If non-NULL, this is the buffer for which the previous call
544 returned the final line. So now, presuming that line has been
545 processed, we can free the buffer and reset this pointer. */
546 static struct buffer_record *prev_buf = NULL;
548 struct cstring *line; /* Return value. */
549 struct line *l; /* For convenience. */
551 if (prev_buf)
553 free_buffer (prev_buf);
554 free (prev_buf);
555 prev_buf = NULL;
558 if (head == NULL && !load_buffer ())
559 return NULL;
561 if (current_line < head->first_available)
562 current_line = head->first_available;
564 ++(head->first_available);
566 l = head->curr_line;
568 line = &l->starts[l->retrieve_index];
570 /* Advance index to next line. */
571 if (++l->retrieve_index == l->used)
573 /* Go on to the next line record. */
574 head->curr_line = l->next;
575 if (head->curr_line == NULL || head->curr_line->used == 0)
577 /* Go on to the next data block.
578 but first record the current one so we can free it
579 once the line we're returning has been processed. */
580 prev_buf = head;
581 head = head->next;
585 return line;
588 /* Search the buffers for line LINENUM, reading more input if necessary.
589 Return a pointer to the line, or NULL if it is not found in the file. */
591 static struct cstring *
592 find_line (uintmax_t linenum)
594 struct buffer_record *b;
596 if (head == NULL && !load_buffer ())
597 return NULL;
599 if (linenum < head->start_line)
600 return NULL;
602 for (b = head;;)
604 if (linenum < b->start_line + b->num_lines)
606 /* The line is in this buffer. */
607 struct line *l;
608 size_t offset; /* How far into the buffer the line is. */
610 l = b->line_start;
611 offset = linenum - b->start_line;
612 /* Find the control record. */
613 while (offset >= CTRL_SIZE)
615 l = l->next;
616 offset -= CTRL_SIZE;
618 return &l->starts[offset];
620 if (b->next == NULL && !load_buffer ())
621 return NULL;
622 b = b->next; /* Try the next data block. */
626 /* Return true if at least one more line is available for input. */
628 static bool
629 no_more_lines (void)
631 return find_line (current_line + 1) == NULL;
634 /* Open NAME as standard input. */
636 static void
637 set_input_file (const char *name)
639 if (! STREQ (name, "-") && fd_reopen (STDIN_FILENO, name, O_RDONLY, 0) < 0)
640 error (EXIT_FAILURE, errno, _("cannot open %s for reading"), quote (name));
643 /* Write all lines from the beginning of the buffer up to, but
644 not including, line LAST_LINE, to the current output file.
645 If IGNORE is true, do not output lines selected here.
646 ARGNUM is the index in ARGV of the current pattern. */
648 static void
649 write_to_file (uintmax_t last_line, bool ignore, int argnum)
651 struct cstring *line;
652 uintmax_t first_line; /* First available input line. */
653 uintmax_t lines; /* Number of lines to output. */
654 uintmax_t i;
656 first_line = get_first_line_in_buffer ();
658 if (first_line > last_line)
660 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
661 cleanup_fatal ();
664 lines = last_line - first_line;
666 for (i = 0; i < lines; i++)
668 line = remove_line ();
669 if (line == NULL)
671 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
672 cleanup_fatal ();
674 if (!ignore)
675 save_line_to_file (line);
679 /* Output any lines left after all regexps have been processed. */
681 static void
682 dump_rest_of_file (void)
684 struct cstring *line;
686 while ((line = remove_line ()) != NULL)
687 save_line_to_file (line);
690 /* Handle an attempt to read beyond EOF under the control of record P,
691 on iteration REPETITION if nonzero. */
693 static void handle_line_error (const struct control *, uintmax_t)
694 ATTRIBUTE_NORETURN;
695 static void
696 handle_line_error (const struct control *p, uintmax_t repetition)
698 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
700 fprintf (stderr, _("%s: %s: line number out of range"),
701 program_name, quote (umaxtostr (p->lines_required, buf)));
702 if (repetition)
703 fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf));
704 else
705 fprintf (stderr, "\n");
707 cleanup_fatal ();
710 /* Determine the line number that marks the end of this file,
711 then get those lines and save them to the output file.
712 P is the control record.
713 REPETITION is the repetition number. */
715 static void
716 process_line_count (const struct control *p, uintmax_t repetition)
718 uintmax_t linenum;
719 uintmax_t last_line_to_save = p->lines_required * (repetition + 1);
720 struct cstring *line;
722 create_output_file ();
724 linenum = get_first_line_in_buffer ();
726 while (linenum++ < last_line_to_save)
728 line = remove_line ();
729 if (line == NULL)
730 handle_line_error (p, repetition);
731 save_line_to_file (line);
734 close_output_file ();
736 /* Ensure that the line number specified is not 1 greater than
737 the number of lines in the file. */
738 if (no_more_lines ())
739 handle_line_error (p, repetition);
742 static void regexp_error (struct control *, uintmax_t, bool) ATTRIBUTE_NORETURN;
743 static void
744 regexp_error (struct control *p, uintmax_t repetition, bool ignore)
746 fprintf (stderr, _("%s: %s: match not found"),
747 program_name, quote (global_argv[p->argnum]));
749 if (repetition)
751 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
752 fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf));
754 else
755 fprintf (stderr, "\n");
757 if (!ignore)
759 dump_rest_of_file ();
760 close_output_file ();
762 cleanup_fatal ();
765 /* Read the input until a line matches the regexp in P, outputting
766 it unless P->IGNORE is true.
767 REPETITION is this repeat-count; 0 means the first time. */
769 static void
770 process_regexp (struct control *p, uintmax_t repetition)
772 struct cstring *line; /* From input file. */
773 size_t line_len; /* To make "$" in regexps work. */
774 uintmax_t break_line; /* First line number of next file. */
775 bool ignore = p->ignore; /* If true, skip this section. */
776 regoff_t ret;
778 if (!ignore)
779 create_output_file ();
781 /* If there is no offset for the regular expression, or
782 it is positive, then it is not necessary to buffer the lines. */
784 if (p->offset >= 0)
786 while (true)
788 line = find_line (++current_line);
789 if (line == NULL)
791 if (p->repeat_forever)
793 if (!ignore)
795 dump_rest_of_file ();
796 close_output_file ();
798 exit (EXIT_SUCCESS);
800 else
801 regexp_error (p, repetition, ignore);
803 line_len = line->len;
804 if (line->str[line_len - 1] == '\n')
805 line_len--;
806 ret = re_search (&p->re_compiled, line->str, line_len,
807 0, line_len, NULL);
808 if (ret == -2)
810 error (0, 0, _("error in regular expression search"));
811 cleanup_fatal ();
813 if (ret == -1)
815 line = remove_line ();
816 if (!ignore)
817 save_line_to_file (line);
819 else
820 break;
823 else
825 /* Buffer the lines. */
826 while (true)
828 line = find_line (++current_line);
829 if (line == NULL)
831 if (p->repeat_forever)
833 if (!ignore)
835 dump_rest_of_file ();
836 close_output_file ();
838 exit (EXIT_SUCCESS);
840 else
841 regexp_error (p, repetition, ignore);
843 line_len = line->len;
844 if (line->str[line_len - 1] == '\n')
845 line_len--;
846 ret = re_search (&p->re_compiled, line->str, line_len,
847 0, line_len, NULL);
848 if (ret == -2)
850 error (0, 0, _("error in regular expression search"));
851 cleanup_fatal ();
853 if (ret != -1)
854 break;
858 /* Account for any offset from this regexp. */
859 break_line = current_line + p->offset;
861 write_to_file (break_line, ignore, p->argnum);
863 if (!ignore)
864 close_output_file ();
866 if (p->offset > 0)
867 current_line = break_line;
870 /* Split the input file according to the control records we have built. */
872 static void
873 split_file (void)
875 size_t i;
877 for (i = 0; i < control_used; i++)
879 uintmax_t j;
880 if (controls[i].regexpr)
882 for (j = 0; (controls[i].repeat_forever
883 || j <= controls[i].repeat); j++)
884 process_regexp (&controls[i], j);
886 else
888 for (j = 0; (controls[i].repeat_forever
889 || j <= controls[i].repeat); j++)
890 process_line_count (&controls[i], j);
894 create_output_file ();
895 dump_rest_of_file ();
896 close_output_file ();
899 /* Return the name of output file number NUM.
901 This function is called from a signal handler, so it should invoke
902 only reentrant functions that are async-signal-safe. POSIX does
903 not guarantee this for the functions called below, but we don't
904 know of any hosts where this implementation isn't safe. */
906 static char *
907 make_filename (unsigned int num)
909 strcpy (filename_space, prefix);
910 if (suffix)
911 sprintf (filename_space + strlen (prefix), suffix, num);
912 else
913 sprintf (filename_space + strlen (prefix), "%0*u", digits, num);
914 return filename_space;
917 /* Create the next output file. */
919 static void
920 create_output_file (void)
922 bool fopen_ok;
923 int fopen_errno;
925 output_filename = make_filename (files_created);
927 if (files_created == UINT_MAX)
929 fopen_ok = false;
930 fopen_errno = EOVERFLOW;
932 else
934 /* Create the output file in a critical section, to avoid races. */
935 sigset_t oldset;
936 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
937 output_stream = fopen (output_filename, "w");
938 fopen_ok = (output_stream != NULL);
939 fopen_errno = errno;
940 files_created += fopen_ok;
941 sigprocmask (SIG_SETMASK, &oldset, NULL);
944 if (! fopen_ok)
946 error (0, fopen_errno, "%s", output_filename);
947 cleanup_fatal ();
949 bytes_written = 0;
952 /* If requested, delete all the files we have created. This function
953 must be called only from critical sections. */
955 static void
956 delete_all_files (bool in_signal_handler)
958 unsigned int i;
960 if (! remove_files)
961 return;
963 for (i = 0; i < files_created; i++)
965 const char *name = make_filename (i);
966 if (unlink (name) != 0 && !in_signal_handler)
967 error (0, errno, "%s", name);
970 files_created = 0;
973 /* Close the current output file and print the count
974 of characters in this file. */
976 static void
977 close_output_file (void)
979 if (output_stream)
981 if (ferror (output_stream))
983 error (0, 0, _("write error for %s"), quote (output_filename));
984 output_stream = NULL;
985 cleanup_fatal ();
987 if (fclose (output_stream) != 0)
989 error (0, errno, "%s", output_filename);
990 output_stream = NULL;
991 cleanup_fatal ();
993 if (bytes_written == 0 && elide_empty_files)
995 sigset_t oldset;
996 bool unlink_ok;
997 int unlink_errno;
999 /* Remove the output file in a critical section, to avoid races. */
1000 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
1001 unlink_ok = (unlink (output_filename) == 0);
1002 unlink_errno = errno;
1003 files_created -= unlink_ok;
1004 sigprocmask (SIG_SETMASK, &oldset, NULL);
1006 if (! unlink_ok)
1007 error (0, unlink_errno, "%s", output_filename);
1009 else
1011 if (!suppress_count)
1013 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
1014 fprintf (stdout, "%s\n", umaxtostr (bytes_written, buf));
1017 output_stream = NULL;
1021 /* Save line LINE to the output file and
1022 increment the character count for the current file. */
1024 static void
1025 save_line_to_file (const struct cstring *line)
1027 fwrite (line->str, sizeof (char), line->len, output_stream);
1028 bytes_written += line->len;
1031 /* Return a new, initialized control record. */
1033 static struct control *
1034 new_control_record (void)
1036 static size_t control_allocated = 0; /* Total space allocated. */
1037 struct control *p;
1039 if (control_used == control_allocated)
1040 controls = X2NREALLOC (controls, &control_allocated);
1041 p = &controls[control_used++];
1042 p->regexpr = false;
1043 p->repeat = 0;
1044 p->repeat_forever = false;
1045 p->lines_required = 0;
1046 p->offset = 0;
1047 return p;
1050 /* Check if there is a numeric offset after a regular expression.
1051 STR is the entire command line argument.
1052 P is the control record for this regular expression.
1053 NUM is the numeric part of STR. */
1055 static void
1056 check_for_offset (struct control *p, const char *str, const char *num)
1058 if (xstrtoimax (num, NULL, 10, &p->offset, "") != LONGINT_OK)
1059 error (EXIT_FAILURE, 0, _("%s: integer expected after delimiter"), str);
1062 /* Given that the first character of command line arg STR is '{',
1063 make sure that the rest of the string is a valid repeat count
1064 and store its value in P.
1065 ARGNUM is the ARGV index of STR. */
1067 static void
1068 parse_repeat_count (int argnum, struct control *p, char *str)
1070 uintmax_t val;
1071 char *end;
1073 end = str + strlen (str) - 1;
1074 if (*end != '}')
1075 error (EXIT_FAILURE, 0, _("%s: '}' is required in repeat count"), str);
1076 *end = '\0';
1078 if (str+1 == end-1 && *(str+1) == '*')
1079 p->repeat_forever = true;
1080 else
1082 if (xstrtoumax (str + 1, NULL, 10, &val, "") != LONGINT_OK)
1084 error (EXIT_FAILURE, 0,
1085 _("%s}: integer required between '{' and '}'"),
1086 global_argv[argnum]);
1088 p->repeat = val;
1091 *end = '}';
1094 /* Extract the regular expression from STR and check for a numeric offset.
1095 STR should start with the regexp delimiter character.
1096 Return a new control record for the regular expression.
1097 ARGNUM is the ARGV index of STR.
1098 Unless IGNORE is true, mark these lines for output. */
1100 static struct control *
1101 extract_regexp (int argnum, bool ignore, char const *str)
1103 size_t len; /* Number of bytes in this regexp. */
1104 char delim = *str;
1105 char const *closing_delim;
1106 struct control *p;
1107 const char *err;
1109 closing_delim = strrchr (str + 1, delim);
1110 if (closing_delim == NULL)
1111 error (EXIT_FAILURE, 0,
1112 _("%s: closing delimiter '%c' missing"), str, delim);
1114 len = closing_delim - str - 1;
1115 p = new_control_record ();
1116 p->argnum = argnum;
1117 p->ignore = ignore;
1119 p->regexpr = true;
1120 p->re_compiled.buffer = NULL;
1121 p->re_compiled.allocated = 0;
1122 p->re_compiled.fastmap = xmalloc (UCHAR_MAX + 1);
1123 p->re_compiled.translate = NULL;
1124 re_syntax_options =
1125 RE_SYNTAX_POSIX_BASIC & ~RE_CONTEXT_INVALID_DUP & ~RE_NO_EMPTY_RANGES;
1126 err = re_compile_pattern (str + 1, len, &p->re_compiled);
1127 if (err)
1129 error (0, 0, _("%s: invalid regular expression: %s"), str, err);
1130 cleanup_fatal ();
1133 if (closing_delim[1])
1134 check_for_offset (p, str, closing_delim + 1);
1136 return p;
1139 /* Extract the break patterns from args START through ARGC - 1 of ARGV.
1140 After each pattern, check if the next argument is a repeat count. */
1142 static void
1143 parse_patterns (int argc, int start, char **argv)
1145 int i; /* Index into ARGV. */
1146 struct control *p; /* New control record created. */
1147 uintmax_t val;
1148 static uintmax_t last_val = 0;
1150 for (i = start; i < argc; i++)
1152 if (*argv[i] == '/' || *argv[i] == '%')
1154 p = extract_regexp (i, *argv[i] == '%', argv[i]);
1156 else
1158 p = new_control_record ();
1159 p->argnum = i;
1161 if (xstrtoumax (argv[i], NULL, 10, &val, "") != LONGINT_OK)
1162 error (EXIT_FAILURE, 0, _("%s: invalid pattern"), argv[i]);
1163 if (val == 0)
1164 error (EXIT_FAILURE, 0,
1165 _("%s: line number must be greater than zero"),
1166 argv[i]);
1167 if (val < last_val)
1169 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
1170 error (EXIT_FAILURE, 0,
1171 _("line number %s is smaller than preceding line number, %s"),
1172 quote (argv[i]), umaxtostr (last_val, buf));
1175 if (val == last_val)
1176 error (0, 0,
1177 _("warning: line number %s is the same as preceding line number"),
1178 quote (argv[i]));
1180 last_val = val;
1182 p->lines_required = val;
1185 if (i + 1 < argc && *argv[i + 1] == '{')
1187 /* We have a repeat count. */
1188 i++;
1189 parse_repeat_count (i, p, argv[i]);
1196 /* Names for the printf format flags ' and #. These can be ORed together. */
1197 enum { FLAG_THOUSANDS = 1, FLAG_ALTERNATIVE = 2 };
1199 /* Scan the printf format flags in FORMAT, storing info about the
1200 flags into *FLAGS_PTR. Return the number of flags found. */
1201 static size_t
1202 get_format_flags (char const *format, int *flags_ptr)
1204 int flags = 0;
1206 for (size_t count = 0; ; count++)
1208 switch (format[count])
1210 case '-':
1211 case '0':
1212 break;
1214 case '\'':
1215 flags |= FLAG_THOUSANDS;
1216 break;
1218 case '#':
1219 flags |= FLAG_ALTERNATIVE;
1220 break;
1222 default:
1223 *flags_ptr = flags;
1224 return count;
1229 /* Check that the printf format conversion specifier *FORMAT is valid
1230 and compatible with FLAGS. Change it to 'u' if it is 'd' or 'i',
1231 since the format will be used with an unsigned value. */
1232 static void
1233 check_format_conv_type (char *format, int flags)
1235 unsigned char ch = *format;
1236 int compatible_flags = FLAG_THOUSANDS;
1238 switch (ch)
1240 case 'd':
1241 case 'i':
1242 *format = 'u';
1243 break;
1245 case 'u':
1246 break;
1248 case 'o':
1249 case 'x':
1250 case 'X':
1251 compatible_flags = FLAG_ALTERNATIVE;
1252 break;
1254 case 0:
1255 error (EXIT_FAILURE, 0, _("missing conversion specifier in suffix"));
1256 break;
1258 default:
1259 if (isprint (ch))
1260 error (EXIT_FAILURE, 0,
1261 _("invalid conversion specifier in suffix: %c"), ch);
1262 else
1263 error (EXIT_FAILURE, 0,
1264 _("invalid conversion specifier in suffix: \\%.3o"), ch);
1267 if (flags & ~ compatible_flags)
1268 error (EXIT_FAILURE, 0,
1269 _("invalid flags in conversion specification: %%%c%c"),
1270 (flags & ~ compatible_flags & FLAG_ALTERNATIVE ? '#' : '\''), ch);
1273 /* Return the maximum number of bytes that can be generated by
1274 applying FORMAT to an unsigned int value. If the format is
1275 invalid, diagnose the problem and exit. */
1276 static size_t
1277 max_out (char *format)
1279 bool percent = false;
1281 for (char *f = format; *f; f++)
1282 if (*f == '%' && *++f != '%')
1284 if (percent)
1285 error (EXIT_FAILURE, 0,
1286 _("too many %% conversion specifications in suffix"));
1287 percent = true;
1288 int flags;
1289 f += get_format_flags (f, &flags);
1290 while (ISDIGIT (*f))
1291 f++;
1292 if (*f == '.')
1293 while (ISDIGIT (*++f))
1294 continue;
1295 check_format_conv_type (f, flags);
1298 if (! percent)
1299 error (EXIT_FAILURE, 0,
1300 _("missing %% conversion specification in suffix"));
1302 int maxlen = snprintf (NULL, 0, format, UINT_MAX);
1303 if (! (0 <= maxlen && maxlen <= SIZE_MAX))
1304 xalloc_die ();
1305 return maxlen;
1309 main (int argc, char **argv)
1311 int optc;
1312 unsigned long int val;
1314 initialize_main (&argc, &argv);
1315 set_program_name (argv[0]);
1316 setlocale (LC_ALL, "");
1317 bindtextdomain (PACKAGE, LOCALEDIR);
1318 textdomain (PACKAGE);
1320 atexit (close_stdout);
1322 global_argv = argv;
1323 controls = NULL;
1324 control_used = 0;
1325 suppress_count = false;
1326 remove_files = true;
1327 prefix = DEFAULT_PREFIX;
1329 while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, NULL)) != -1)
1330 switch (optc)
1332 case 'f':
1333 prefix = optarg;
1334 break;
1336 case 'b':
1337 suffix = optarg;
1338 break;
1340 case 'k':
1341 remove_files = false;
1342 break;
1344 case 'n':
1345 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
1346 || MIN (INT_MAX, SIZE_MAX) < val)
1347 error (EXIT_FAILURE, 0, _("%s: invalid number"), optarg);
1348 digits = val;
1349 break;
1351 case 's':
1352 case 'q':
1353 suppress_count = true;
1354 break;
1356 case 'z':
1357 elide_empty_files = true;
1358 break;
1360 case_GETOPT_HELP_CHAR;
1362 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1364 default:
1365 usage (EXIT_FAILURE);
1368 if (argc - optind < 2)
1370 if (argc <= optind)
1371 error (0, 0, _("missing operand"));
1372 else
1373 error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
1374 usage (EXIT_FAILURE);
1377 size_t prefix_len = strlen (prefix);
1378 size_t max_digit_string_len
1379 = (suffix
1380 ? max_out (suffix)
1381 : MAX (INT_STRLEN_BOUND (unsigned int), digits));
1382 if (SIZE_MAX - 1 - prefix_len < max_digit_string_len)
1383 xalloc_die ();
1384 filename_space = xmalloc (prefix_len + max_digit_string_len + 1);
1386 set_input_file (argv[optind++]);
1388 parse_patterns (argc, optind, argv);
1391 int i;
1392 static int const sig[] =
1394 /* The usual suspects. */
1395 SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM,
1396 #ifdef SIGPOLL
1397 SIGPOLL,
1398 #endif
1399 #ifdef SIGPROF
1400 SIGPROF,
1401 #endif
1402 #ifdef SIGVTALRM
1403 SIGVTALRM,
1404 #endif
1405 #ifdef SIGXCPU
1406 SIGXCPU,
1407 #endif
1408 #ifdef SIGXFSZ
1409 SIGXFSZ,
1410 #endif
1412 enum { nsigs = ARRAY_CARDINALITY (sig) };
1414 struct sigaction act;
1416 sigemptyset (&caught_signals);
1417 for (i = 0; i < nsigs; i++)
1419 sigaction (sig[i], NULL, &act);
1420 if (act.sa_handler != SIG_IGN)
1421 sigaddset (&caught_signals, sig[i]);
1424 act.sa_handler = interrupt_handler;
1425 act.sa_mask = caught_signals;
1426 act.sa_flags = 0;
1428 for (i = 0; i < nsigs; i++)
1429 if (sigismember (&caught_signals, sig[i]))
1430 sigaction (sig[i], &act, NULL);
1433 split_file ();
1435 if (close (STDIN_FILENO) != 0)
1437 error (0, errno, _("read error"));
1438 cleanup_fatal ();
1441 exit (EXIT_SUCCESS);
1444 void
1445 usage (int status)
1447 if (status != EXIT_SUCCESS)
1448 emit_try_help ();
1449 else
1451 printf (_("\
1452 Usage: %s [OPTION]... FILE PATTERN...\n\
1454 program_name);
1455 fputs (_("\
1456 Output pieces of FILE separated by PATTERN(s) to files 'xx00', 'xx01', ...,\n\
1457 and output byte counts of each piece to standard output.\n\
1458 "), stdout);
1460 emit_mandatory_arg_note ();
1462 fputs (_("\
1463 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %02d\n\
1464 -f, --prefix=PREFIX use PREFIX instead of 'xx'\n\
1465 -k, --keep-files do not remove output files on errors\n\
1466 "), stdout);
1467 fputs (_("\
1468 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1469 -s, --quiet, --silent do not print counts of output file sizes\n\
1470 -z, --elide-empty-files remove empty output files\n\
1471 "), stdout);
1472 fputs (HELP_OPTION_DESCRIPTION, stdout);
1473 fputs (VERSION_OPTION_DESCRIPTION, stdout);
1474 fputs (_("\
1476 Read standard input if FILE is -. Each PATTERN may be:\n\
1477 "), stdout);
1478 fputs (_("\
1480 INTEGER copy up to but not including specified line number\n\
1481 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1482 %REGEXP%[OFFSET] skip to, but not including a matching line\n\
1483 {INTEGER} repeat the previous pattern specified number of times\n\
1484 {*} repeat the previous pattern as many times as possible\n\
1486 A line OFFSET is a required '+' or '-' followed by a positive integer.\n\
1487 "), stdout);
1488 emit_ancillary_info ();
1490 exit (status);