maint: revert "build: update gnulib submodule to latest"
[coreutils/ericb.git] / src / csplit.c
blob5d84c4580ae2651bf0afd96776df0c35c5782349
1 /* csplit - split a file into sections determined by context lines
2 Copyright (C) 1991, 1995-2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
18 Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
20 #include <config.h>
22 #include <getopt.h>
23 #include <sys/types.h>
24 #include <signal.h>
26 #include "system.h"
28 #include <regex.h>
30 #include "error.h"
31 #include "fd-reopen.h"
32 #include "quote.h"
33 #include "safe-read.h"
34 #include "stdio--.h"
35 #include "xstrtol.h"
37 /* The official name of this program (e.g., no `g' prefix). */
38 #define PROGRAM_NAME "csplit"
40 #define AUTHORS \
41 proper_name ("Stuart Kemp"), \
42 proper_name ("David MacKenzie")
44 /* The default prefix for output file names. */
45 #define DEFAULT_PREFIX "xx"
47 /* A compiled pattern arg. */
48 struct control
50 intmax_t offset; /* Offset from regexp to split at. */
51 uintmax_t lines_required; /* Number of lines required. */
52 uintmax_t repeat; /* Repeat count. */
53 int argnum; /* ARGV index. */
54 bool repeat_forever; /* True if `*' used as a repeat count. */
55 bool ignore; /* If true, produce no output (for regexp). */
56 bool regexpr; /* True if regular expression was used. */
57 struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
60 /* Initial size of data area in buffers. */
61 #define START_SIZE 8191
63 /* Increment size for data area. */
64 #define INCR_SIZE 2048
66 /* Number of lines kept in each node in line list. */
67 #define CTRL_SIZE 80
69 #ifdef DEBUG
70 /* Some small values to test the algorithms. */
71 # define START_SIZE 200
72 # define INCR_SIZE 10
73 # define CTRL_SIZE 1
74 #endif
76 /* A string with a length count. */
77 struct cstring
79 size_t len;
80 char *str;
83 /* Pointers to the beginnings of lines in the buffer area.
84 These structures are linked together if needed. */
85 struct line
87 size_t used; /* Number of offsets used in this struct. */
88 size_t insert_index; /* Next offset to use when inserting line. */
89 size_t retrieve_index; /* Next index to use when retrieving line. */
90 struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
91 struct line *next; /* Next in linked list. */
94 /* The structure to hold the input lines.
95 Contains a pointer to the data area and a list containing
96 pointers to the individual lines. */
97 struct buffer_record
99 size_t bytes_alloc; /* Size of the buffer area. */
100 size_t bytes_used; /* Bytes used in the buffer area. */
101 uintmax_t start_line; /* First line number in this buffer. */
102 uintmax_t first_available; /* First line that can be retrieved. */
103 size_t num_lines; /* Number of complete lines in this buffer. */
104 char *buffer; /* Data area. */
105 struct line *line_start; /* Head of list of pointers to lines. */
106 struct line *curr_line; /* The line start record currently in use. */
107 struct buffer_record *next;
110 static void close_output_file (void);
111 static void create_output_file (void);
112 static void delete_all_files (bool);
113 static void save_line_to_file (const struct cstring *line);
114 void usage (int status);
116 /* Start of buffer list. */
117 static struct buffer_record *head = NULL;
119 /* Partially read line. */
120 static char *hold_area = NULL;
122 /* Number of bytes in `hold_area'. */
123 static size_t hold_count = 0;
125 /* Number of the last line in the buffers. */
126 static uintmax_t last_line_number = 0;
128 /* Number of the line currently being examined. */
129 static uintmax_t current_line = 0;
131 /* If true, we have read EOF. */
132 static bool have_read_eof = false;
134 /* Name of output files. */
135 static char *volatile filename_space = NULL;
137 /* Prefix part of output file names. */
138 static char const *volatile prefix = NULL;
140 /* Suffix part of output file names. */
141 static char *volatile suffix = NULL;
143 /* Number of digits to use in output file names. */
144 static int volatile digits = 2;
146 /* Number of files created so far. */
147 static unsigned int volatile files_created = 0;
149 /* Number of bytes written to current file. */
150 static uintmax_t bytes_written;
152 /* Output file pointer. */
153 static FILE *output_stream = NULL;
155 /* Output file name. */
156 static char *output_filename = NULL;
158 /* Perhaps it would be cleaner to pass arg values instead of indexes. */
159 static char **global_argv;
161 /* If true, do not print the count of bytes in each output file. */
162 static bool suppress_count;
164 /* If true, remove output files on error. */
165 static bool volatile remove_files;
167 /* If true, remove all output files which have a zero length. */
168 static bool elide_empty_files;
170 /* The compiled pattern arguments, which determine how to split
171 the input file. */
172 static struct control *controls;
174 /* Number of elements in `controls'. */
175 static size_t control_used;
177 /* The set of signals that are caught. */
178 static sigset_t caught_signals;
180 static struct option const longopts[] =
182 {"digits", required_argument, NULL, 'n'},
183 {"quiet", no_argument, NULL, 'q'},
184 {"silent", no_argument, NULL, 's'},
185 {"keep-files", no_argument, NULL, 'k'},
186 {"elide-empty-files", no_argument, NULL, 'z'},
187 {"prefix", required_argument, NULL, 'f'},
188 {"suffix-format", required_argument, NULL, 'b'},
189 {GETOPT_HELP_OPTION_DECL},
190 {GETOPT_VERSION_OPTION_DECL},
191 {NULL, 0, NULL, 0}
194 /* Optionally remove files created so far; then exit.
195 Called when an error detected. */
197 static void
198 cleanup (void)
200 sigset_t oldset;
202 close_output_file ();
204 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
205 delete_all_files (false);
206 sigprocmask (SIG_SETMASK, &oldset, NULL);
209 static void cleanup_fatal (void) ATTRIBUTE_NORETURN;
210 static void
211 cleanup_fatal (void)
213 cleanup ();
214 exit (EXIT_FAILURE);
217 extern void
218 xalloc_die (void)
220 error (0, 0, "%s", _("memory exhausted"));
221 cleanup_fatal ();
224 static void
225 interrupt_handler (int sig)
227 delete_all_files (true);
228 signal (sig, SIG_DFL);
229 /* The signal has been reset to SIG_DFL, but blocked during this
230 handler. Force the default action of this signal once the
231 handler returns and the block is removed. */
232 raise (sig);
235 /* Keep track of NUM bytes of a partial line in buffer START.
236 These bytes will be retrieved later when another large buffer is read. */
238 static void
239 save_to_hold_area (char *start, size_t num)
241 free (hold_area);
242 hold_area = start;
243 hold_count = num;
246 /* Read up to MAX_N_BYTES bytes from the input stream into DEST.
247 Return the number of bytes read. */
249 static size_t
250 read_input (char *dest, size_t max_n_bytes)
252 size_t bytes_read;
254 if (max_n_bytes == 0)
255 return 0;
257 bytes_read = safe_read (STDIN_FILENO, dest, max_n_bytes);
259 if (bytes_read == 0)
260 have_read_eof = true;
262 if (bytes_read == SAFE_READ_ERROR)
264 error (0, errno, _("read error"));
265 cleanup_fatal ();
268 return bytes_read;
271 /* Initialize existing line record P. */
273 static void
274 clear_line_control (struct line *p)
276 p->used = 0;
277 p->insert_index = 0;
278 p->retrieve_index = 0;
281 /* Return a new, initialized line record. */
283 static struct line *
284 new_line_control (void)
286 struct line *p = xmalloc (sizeof *p);
288 p->next = NULL;
289 clear_line_control (p);
291 return p;
294 /* Record LINE_START, which is the address of the start of a line
295 of length LINE_LEN in the large buffer, in the lines buffer of B. */
297 static void
298 keep_new_line (struct buffer_record *b, char *line_start, size_t line_len)
300 struct line *l;
302 /* If there is no existing area to keep line info, get some. */
303 if (b->line_start == NULL)
304 b->line_start = b->curr_line = new_line_control ();
306 /* If existing area for lines is full, get more. */
307 if (b->curr_line->used == CTRL_SIZE)
309 b->curr_line->next = new_line_control ();
310 b->curr_line = b->curr_line->next;
313 l = b->curr_line;
315 /* Record the start of the line, and update counters. */
316 l->starts[l->insert_index].str = line_start;
317 l->starts[l->insert_index].len = line_len;
318 l->used++;
319 l->insert_index++;
322 /* Scan the buffer in B for newline characters
323 and record the line start locations and lengths in B.
324 Return the number of lines found in this buffer.
326 There may be an incomplete line at the end of the buffer;
327 a pointer is kept to this area, which will be used when
328 the next buffer is filled. */
330 static size_t
331 record_line_starts (struct buffer_record *b)
333 char *line_start; /* Start of current line. */
334 char *line_end; /* End of each line found. */
335 size_t bytes_left; /* Length of incomplete last line. */
336 size_t lines; /* Number of lines found. */
337 size_t line_length; /* Length of each line found. */
339 if (b->bytes_used == 0)
340 return 0;
342 lines = 0;
343 line_start = b->buffer;
344 bytes_left = b->bytes_used;
346 while (true)
348 line_end = memchr (line_start, '\n', bytes_left);
349 if (line_end == NULL)
350 break;
351 line_length = line_end - line_start + 1;
352 keep_new_line (b, line_start, line_length);
353 bytes_left -= line_length;
354 line_start = line_end + 1;
355 lines++;
358 /* Check for an incomplete last line. */
359 if (bytes_left)
361 if (have_read_eof)
363 keep_new_line (b, line_start, bytes_left);
364 lines++;
366 else
367 save_to_hold_area (xmemdup (line_start, bytes_left), bytes_left);
370 b->num_lines = lines;
371 b->first_available = b->start_line = last_line_number + 1;
372 last_line_number += lines;
374 return lines;
377 /* Return a new buffer with room to store SIZE bytes, plus
378 an extra byte for safety. */
380 static struct buffer_record *
381 create_new_buffer (size_t size)
383 struct buffer_record *new_buffer = xmalloc (sizeof *new_buffer);
385 new_buffer->buffer = xmalloc (size + 1);
387 new_buffer->bytes_alloc = size;
388 new_buffer->line_start = new_buffer->curr_line = NULL;
390 return new_buffer;
393 /* Return a new buffer of at least MINSIZE bytes. If a buffer of at
394 least that size is currently free, use it, otherwise create a new one. */
396 static struct buffer_record *
397 get_new_buffer (size_t min_size)
399 struct buffer_record *new_buffer; /* Buffer to return. */
400 size_t alloc_size; /* Actual size that will be requested. */
402 alloc_size = START_SIZE;
403 if (alloc_size < min_size)
405 size_t s = min_size - alloc_size + INCR_SIZE - 1;
406 alloc_size += s - s % INCR_SIZE;
409 new_buffer = create_new_buffer (alloc_size);
411 new_buffer->num_lines = 0;
412 new_buffer->bytes_used = 0;
413 new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
414 new_buffer->next = NULL;
416 return new_buffer;
419 static void
420 free_buffer (struct buffer_record *buf)
422 struct line *l;
423 for (l = buf->line_start; l;)
425 struct line *n = l->next;
426 free (l);
427 l = n;
429 free (buf->buffer);
430 buf->buffer = NULL;
433 /* Append buffer BUF to the linked list of buffers that contain
434 some data yet to be processed. */
436 static void
437 save_buffer (struct buffer_record *buf)
439 struct buffer_record *p;
441 buf->next = NULL;
442 buf->curr_line = buf->line_start;
444 if (head == NULL)
445 head = buf;
446 else
448 for (p = head; p->next; p = p->next)
449 /* Do nothing. */ ;
450 p->next = buf;
454 /* Fill a buffer of input.
456 Set the initial size of the buffer to a default.
457 Fill the buffer (from the hold area and input stream)
458 and find the individual lines.
459 If no lines are found (the buffer is too small to hold the next line),
460 release the current buffer (whose contents would have been put in the
461 hold area) and repeat the process with another large buffer until at least
462 one entire line has been read.
464 Return true if a new buffer was obtained, otherwise false
465 (in which case end-of-file must have been encountered). */
467 static bool
468 load_buffer (void)
470 struct buffer_record *b;
471 size_t bytes_wanted = START_SIZE; /* Minimum buffer size. */
472 size_t bytes_avail; /* Size of new buffer created. */
473 size_t lines_found; /* Number of lines in this new buffer. */
474 char *p; /* Place to load into buffer. */
476 if (have_read_eof)
477 return false;
479 /* We must make the buffer at least as large as the amount of data
480 in the partial line left over from the last call. */
481 if (bytes_wanted < hold_count)
482 bytes_wanted = hold_count;
484 while (1)
486 b = get_new_buffer (bytes_wanted);
487 bytes_avail = b->bytes_alloc; /* Size of buffer returned. */
488 p = b->buffer;
490 /* First check the `holding' area for a partial line. */
491 if (hold_count)
493 memcpy (p, hold_area, hold_count);
494 p += hold_count;
495 b->bytes_used += hold_count;
496 bytes_avail -= hold_count;
497 hold_count = 0;
500 b->bytes_used += read_input (p, bytes_avail);
502 lines_found = record_line_starts (b);
503 if (!lines_found)
504 free_buffer (b);
506 if (lines_found || have_read_eof)
507 break;
509 if (xalloc_oversized (2, b->bytes_alloc))
510 xalloc_die ();
511 bytes_wanted = 2 * b->bytes_alloc;
512 free_buffer (b);
513 free (b);
516 if (lines_found)
517 save_buffer (b);
518 else
519 free (b);
521 return lines_found != 0;
524 /* Return the line number of the first line that has not yet been retrieved. */
526 static uintmax_t
527 get_first_line_in_buffer (void)
529 if (head == NULL && !load_buffer ())
530 error (EXIT_FAILURE, errno, _("input disappeared"));
532 return head->first_available;
535 /* Return a pointer to the logical first line in the buffer and make the
536 next line the logical first line.
537 Return NULL if there is no more input. */
539 static struct cstring *
540 remove_line (void)
542 /* If non-NULL, this is the buffer for which the previous call
543 returned the final line. So now, presuming that line has been
544 processed, we can free the buffer and reset this pointer. */
545 static struct buffer_record *prev_buf = NULL;
547 struct cstring *line; /* Return value. */
548 struct line *l; /* For convenience. */
550 if (prev_buf)
552 free_buffer (prev_buf);
553 free (prev_buf);
554 prev_buf = NULL;
557 if (head == NULL && !load_buffer ())
558 return NULL;
560 if (current_line < head->first_available)
561 current_line = head->first_available;
563 ++(head->first_available);
565 l = head->curr_line;
567 line = &l->starts[l->retrieve_index];
569 /* Advance index to next line. */
570 if (++l->retrieve_index == l->used)
572 /* Go on to the next line record. */
573 head->curr_line = l->next;
574 if (head->curr_line == NULL || head->curr_line->used == 0)
576 /* Go on to the next data block.
577 but first record the current one so we can free it
578 once the line we're returning has been processed. */
579 prev_buf = head;
580 head = head->next;
584 return line;
587 /* Search the buffers for line LINENUM, reading more input if necessary.
588 Return a pointer to the line, or NULL if it is not found in the file. */
590 static struct cstring *
591 find_line (uintmax_t linenum)
593 struct buffer_record *b;
595 if (head == NULL && !load_buffer ())
596 return NULL;
598 if (linenum < head->start_line)
599 return NULL;
601 for (b = head;;)
603 if (linenum < b->start_line + b->num_lines)
605 /* The line is in this buffer. */
606 struct line *l;
607 size_t offset; /* How far into the buffer the line is. */
609 l = b->line_start;
610 offset = linenum - b->start_line;
611 /* Find the control record. */
612 while (offset >= CTRL_SIZE)
614 l = l->next;
615 offset -= CTRL_SIZE;
617 return &l->starts[offset];
619 if (b->next == NULL && !load_buffer ())
620 return NULL;
621 b = b->next; /* Try the next data block. */
625 /* Return true if at least one more line is available for input. */
627 static bool
628 no_more_lines (void)
630 return find_line (current_line + 1) == NULL;
633 /* Open NAME as standard input. */
635 static void
636 set_input_file (const char *name)
638 if (! STREQ (name, "-") && fd_reopen (STDIN_FILENO, name, O_RDONLY, 0) < 0)
639 error (EXIT_FAILURE, errno, _("cannot open %s for reading"), quote (name));
642 /* Write all lines from the beginning of the buffer up to, but
643 not including, line LAST_LINE, to the current output file.
644 If IGNORE is true, do not output lines selected here.
645 ARGNUM is the index in ARGV of the current pattern. */
647 static void
648 write_to_file (uintmax_t last_line, bool ignore, int argnum)
650 struct cstring *line;
651 uintmax_t first_line; /* First available input line. */
652 uintmax_t lines; /* Number of lines to output. */
653 uintmax_t i;
655 first_line = get_first_line_in_buffer ();
657 if (first_line > last_line)
659 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
660 cleanup_fatal ();
663 lines = last_line - first_line;
665 for (i = 0; i < lines; i++)
667 line = remove_line ();
668 if (line == NULL)
670 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
671 cleanup_fatal ();
673 if (!ignore)
674 save_line_to_file (line);
678 /* Output any lines left after all regexps have been processed. */
680 static void
681 dump_rest_of_file (void)
683 struct cstring *line;
685 while ((line = remove_line ()) != NULL)
686 save_line_to_file (line);
689 /* Handle an attempt to read beyond EOF under the control of record P,
690 on iteration REPETITION if nonzero. */
692 static void handle_line_error (const struct control *, uintmax_t)
693 ATTRIBUTE_NORETURN;
694 static void
695 handle_line_error (const struct control *p, uintmax_t repetition)
697 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
699 fprintf (stderr, _("%s: %s: line number out of range"),
700 program_name, quote (umaxtostr (p->lines_required, buf)));
701 if (repetition)
702 fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf));
703 else
704 fprintf (stderr, "\n");
706 cleanup_fatal ();
709 /* Determine the line number that marks the end of this file,
710 then get those lines and save them to the output file.
711 P is the control record.
712 REPETITION is the repetition number. */
714 static void
715 process_line_count (const struct control *p, uintmax_t repetition)
717 uintmax_t linenum;
718 uintmax_t last_line_to_save = p->lines_required * (repetition + 1);
719 struct cstring *line;
721 create_output_file ();
723 linenum = get_first_line_in_buffer ();
725 while (linenum++ < last_line_to_save)
727 line = remove_line ();
728 if (line == NULL)
729 handle_line_error (p, repetition);
730 save_line_to_file (line);
733 close_output_file ();
735 /* Ensure that the line number specified is not 1 greater than
736 the number of lines in the file. */
737 if (no_more_lines ())
738 handle_line_error (p, repetition);
741 static void regexp_error (struct control *, uintmax_t, bool) ATTRIBUTE_NORETURN;
742 static void
743 regexp_error (struct control *p, uintmax_t repetition, bool ignore)
745 fprintf (stderr, _("%s: %s: match not found"),
746 program_name, quote (global_argv[p->argnum]));
748 if (repetition)
750 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
751 fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf));
753 else
754 fprintf (stderr, "\n");
756 if (!ignore)
758 dump_rest_of_file ();
759 close_output_file ();
761 cleanup_fatal ();
764 /* Read the input until a line matches the regexp in P, outputting
765 it unless P->IGNORE is true.
766 REPETITION is this repeat-count; 0 means the first time. */
768 static void
769 process_regexp (struct control *p, uintmax_t repetition)
771 struct cstring *line; /* From input file. */
772 size_t line_len; /* To make "$" in regexps work. */
773 uintmax_t break_line; /* First line number of next file. */
774 bool ignore = p->ignore; /* If true, skip this section. */
775 regoff_t ret;
777 if (!ignore)
778 create_output_file ();
780 /* If there is no offset for the regular expression, or
781 it is positive, then it is not necessary to buffer the lines. */
783 if (p->offset >= 0)
785 while (true)
787 line = find_line (++current_line);
788 if (line == NULL)
790 if (p->repeat_forever)
792 if (!ignore)
794 dump_rest_of_file ();
795 close_output_file ();
797 exit (EXIT_SUCCESS);
799 else
800 regexp_error (p, repetition, ignore);
802 line_len = line->len;
803 if (line->str[line_len - 1] == '\n')
804 line_len--;
805 ret = re_search (&p->re_compiled, line->str, line_len,
806 0, line_len, NULL);
807 if (ret == -2)
809 error (0, 0, _("error in regular expression search"));
810 cleanup_fatal ();
812 if (ret == -1)
814 line = remove_line ();
815 if (!ignore)
816 save_line_to_file (line);
818 else
819 break;
822 else
824 /* Buffer the lines. */
825 while (true)
827 line = find_line (++current_line);
828 if (line == NULL)
830 if (p->repeat_forever)
832 if (!ignore)
834 dump_rest_of_file ();
835 close_output_file ();
837 exit (EXIT_SUCCESS);
839 else
840 regexp_error (p, repetition, ignore);
842 line_len = line->len;
843 if (line->str[line_len - 1] == '\n')
844 line_len--;
845 ret = re_search (&p->re_compiled, line->str, line_len,
846 0, line_len, NULL);
847 if (ret == -2)
849 error (0, 0, _("error in regular expression search"));
850 cleanup_fatal ();
852 if (ret != -1)
853 break;
857 /* Account for any offset from this regexp. */
858 break_line = current_line + p->offset;
860 write_to_file (break_line, ignore, p->argnum);
862 if (!ignore)
863 close_output_file ();
865 if (p->offset > 0)
866 current_line = break_line;
869 /* Split the input file according to the control records we have built. */
871 static void
872 split_file (void)
874 size_t i;
876 for (i = 0; i < control_used; i++)
878 uintmax_t j;
879 if (controls[i].regexpr)
881 for (j = 0; (controls[i].repeat_forever
882 || j <= controls[i].repeat); j++)
883 process_regexp (&controls[i], j);
885 else
887 for (j = 0; (controls[i].repeat_forever
888 || j <= controls[i].repeat); j++)
889 process_line_count (&controls[i], j);
893 create_output_file ();
894 dump_rest_of_file ();
895 close_output_file ();
898 /* Return the name of output file number NUM.
900 This function is called from a signal handler, so it should invoke
901 only reentrant functions that are async-signal-safe. POSIX does
902 not guarantee this for the functions called below, but we don't
903 know of any hosts where this implementation isn't safe. */
905 static char *
906 make_filename (unsigned int num)
908 strcpy (filename_space, prefix);
909 if (suffix)
910 sprintf (filename_space + strlen (prefix), suffix, num);
911 else
912 sprintf (filename_space + strlen (prefix), "%0*u", digits, num);
913 return filename_space;
916 /* Create the next output file. */
918 static void
919 create_output_file (void)
921 bool fopen_ok;
922 int fopen_errno;
924 output_filename = make_filename (files_created);
926 if (files_created == UINT_MAX)
928 fopen_ok = false;
929 fopen_errno = EOVERFLOW;
931 else
933 /* Create the output file in a critical section, to avoid races. */
934 sigset_t oldset;
935 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
936 output_stream = fopen (output_filename, "w");
937 fopen_ok = (output_stream != NULL);
938 fopen_errno = errno;
939 files_created += fopen_ok;
940 sigprocmask (SIG_SETMASK, &oldset, NULL);
943 if (! fopen_ok)
945 error (0, fopen_errno, "%s", output_filename);
946 cleanup_fatal ();
948 bytes_written = 0;
951 /* If requested, delete all the files we have created. This function
952 must be called only from critical sections. */
954 static void
955 delete_all_files (bool in_signal_handler)
957 unsigned int i;
959 if (! remove_files)
960 return;
962 for (i = 0; i < files_created; i++)
964 const char *name = make_filename (i);
965 if (unlink (name) != 0 && !in_signal_handler)
966 error (0, errno, "%s", name);
969 files_created = 0;
972 /* Close the current output file and print the count
973 of characters in this file. */
975 static void
976 close_output_file (void)
978 if (output_stream)
980 if (ferror (output_stream))
982 error (0, 0, _("write error for %s"), quote (output_filename));
983 output_stream = NULL;
984 cleanup_fatal ();
986 if (fclose (output_stream) != 0)
988 error (0, errno, "%s", output_filename);
989 output_stream = NULL;
990 cleanup_fatal ();
992 if (bytes_written == 0 && elide_empty_files)
994 sigset_t oldset;
995 bool unlink_ok;
996 int unlink_errno;
998 /* Remove the output file in a critical section, to avoid races. */
999 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
1000 unlink_ok = (unlink (output_filename) == 0);
1001 unlink_errno = errno;
1002 files_created -= unlink_ok;
1003 sigprocmask (SIG_SETMASK, &oldset, NULL);
1005 if (! unlink_ok)
1006 error (0, unlink_errno, "%s", output_filename);
1008 else
1010 if (!suppress_count)
1012 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
1013 fprintf (stdout, "%s\n", umaxtostr (bytes_written, buf));
1016 output_stream = NULL;
1020 /* Save line LINE to the output file and
1021 increment the character count for the current file. */
1023 static void
1024 save_line_to_file (const struct cstring *line)
1026 fwrite (line->str, sizeof (char), line->len, output_stream);
1027 bytes_written += line->len;
1030 /* Return a new, initialized control record. */
1032 static struct control *
1033 new_control_record (void)
1035 static size_t control_allocated = 0; /* Total space allocated. */
1036 struct control *p;
1038 if (control_used == control_allocated)
1039 controls = X2NREALLOC (controls, &control_allocated);
1040 p = &controls[control_used++];
1041 p->regexpr = false;
1042 p->repeat = 0;
1043 p->repeat_forever = false;
1044 p->lines_required = 0;
1045 p->offset = 0;
1046 return p;
1049 /* Check if there is a numeric offset after a regular expression.
1050 STR is the entire command line argument.
1051 P is the control record for this regular expression.
1052 NUM is the numeric part of STR. */
1054 static void
1055 check_for_offset (struct control *p, const char *str, const char *num)
1057 if (xstrtoimax (num, NULL, 10, &p->offset, "") != LONGINT_OK)
1058 error (EXIT_FAILURE, 0, _("%s: integer expected after delimiter"), str);
1061 /* Given that the first character of command line arg STR is '{',
1062 make sure that the rest of the string is a valid repeat count
1063 and store its value in P.
1064 ARGNUM is the ARGV index of STR. */
1066 static void
1067 parse_repeat_count (int argnum, struct control *p, char *str)
1069 uintmax_t val;
1070 char *end;
1072 end = str + strlen (str) - 1;
1073 if (*end != '}')
1074 error (EXIT_FAILURE, 0, _("%s: `}' is required in repeat count"), str);
1075 *end = '\0';
1077 if (str+1 == end-1 && *(str+1) == '*')
1078 p->repeat_forever = true;
1079 else
1081 if (xstrtoumax (str + 1, NULL, 10, &val, "") != LONGINT_OK)
1083 error (EXIT_FAILURE, 0,
1084 _("%s}: integer required between `{' and `}'"),
1085 global_argv[argnum]);
1087 p->repeat = val;
1090 *end = '}';
1093 /* Extract the regular expression from STR and check for a numeric offset.
1094 STR should start with the regexp delimiter character.
1095 Return a new control record for the regular expression.
1096 ARGNUM is the ARGV index of STR.
1097 Unless IGNORE is true, mark these lines for output. */
1099 static struct control *
1100 extract_regexp (int argnum, bool ignore, char const *str)
1102 size_t len; /* Number of bytes in this regexp. */
1103 char delim = *str;
1104 char const *closing_delim;
1105 struct control *p;
1106 const char *err;
1108 closing_delim = strrchr (str + 1, delim);
1109 if (closing_delim == NULL)
1110 error (EXIT_FAILURE, 0,
1111 _("%s: closing delimiter `%c' missing"), str, delim);
1113 len = closing_delim - str - 1;
1114 p = new_control_record ();
1115 p->argnum = argnum;
1116 p->ignore = ignore;
1118 p->regexpr = true;
1119 p->re_compiled.buffer = NULL;
1120 p->re_compiled.allocated = 0;
1121 p->re_compiled.fastmap = xmalloc (UCHAR_MAX + 1);
1122 p->re_compiled.translate = NULL;
1123 re_syntax_options =
1124 RE_SYNTAX_POSIX_BASIC & ~RE_CONTEXT_INVALID_DUP & ~RE_NO_EMPTY_RANGES;
1125 err = re_compile_pattern (str + 1, len, &p->re_compiled);
1126 if (err)
1128 error (0, 0, _("%s: invalid regular expression: %s"), str, err);
1129 cleanup_fatal ();
1132 if (closing_delim[1])
1133 check_for_offset (p, str, closing_delim + 1);
1135 return p;
1138 /* Extract the break patterns from args START through ARGC - 1 of ARGV.
1139 After each pattern, check if the next argument is a repeat count. */
1141 static void
1142 parse_patterns (int argc, int start, char **argv)
1144 int i; /* Index into ARGV. */
1145 struct control *p; /* New control record created. */
1146 uintmax_t val;
1147 static uintmax_t last_val = 0;
1149 for (i = start; i < argc; i++)
1151 if (*argv[i] == '/' || *argv[i] == '%')
1153 p = extract_regexp (i, *argv[i] == '%', argv[i]);
1155 else
1157 p = new_control_record ();
1158 p->argnum = i;
1160 if (xstrtoumax (argv[i], NULL, 10, &val, "") != LONGINT_OK)
1161 error (EXIT_FAILURE, 0, _("%s: invalid pattern"), argv[i]);
1162 if (val == 0)
1163 error (EXIT_FAILURE, 0,
1164 _("%s: line number must be greater than zero"),
1165 argv[i]);
1166 if (val < last_val)
1168 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
1169 error (EXIT_FAILURE, 0,
1170 _("line number %s is smaller than preceding line number, %s"),
1171 quote (argv[i]), umaxtostr (last_val, buf));
1174 if (val == last_val)
1175 error (0, 0,
1176 _("warning: line number %s is the same as preceding line number"),
1177 quote (argv[i]));
1179 last_val = val;
1181 p->lines_required = val;
1184 if (i + 1 < argc && *argv[i + 1] == '{')
1186 /* We have a repeat count. */
1187 i++;
1188 parse_repeat_count (i, p, argv[i]);
1195 /* Names for the printf format flags ' and #. These can be ORed together. */
1196 enum { FLAG_THOUSANDS = 1, FLAG_ALTERNATIVE = 2 };
1198 /* Scan the printf format flags in FORMAT, storing info about the
1199 flags into *FLAGS_PTR. Return the number of flags found. */
1200 static size_t
1201 get_format_flags (char const *format, int *flags_ptr)
1203 int flags = 0;
1205 for (size_t count = 0; ; count++)
1207 switch (format[count])
1209 case '-':
1210 case '0':
1211 break;
1213 case '\'':
1214 flags |= FLAG_THOUSANDS;
1215 break;
1217 case '#':
1218 flags |= FLAG_ALTERNATIVE;
1219 break;
1221 default:
1222 *flags_ptr = flags;
1223 return count;
1228 /* Check that the printf format conversion specifier *FORMAT is valid
1229 and compatible with FLAGS. Change it to 'u' if it is 'd' or 'i',
1230 since the format will be used with an unsigned value. */
1231 static void
1232 check_format_conv_type (char *format, int flags)
1234 unsigned char ch = *format;
1235 int compatible_flags = FLAG_THOUSANDS;
1237 switch (ch)
1239 case 'd':
1240 case 'i':
1241 *format = 'u';
1242 break;
1244 case 'u':
1245 break;
1247 case 'o':
1248 case 'x':
1249 case 'X':
1250 compatible_flags = FLAG_ALTERNATIVE;
1251 break;
1253 case 0:
1254 error (EXIT_FAILURE, 0, _("missing conversion specifier in suffix"));
1255 break;
1257 default:
1258 if (isprint (ch))
1259 error (EXIT_FAILURE, 0,
1260 _("invalid conversion specifier in suffix: %c"), ch);
1261 else
1262 error (EXIT_FAILURE, 0,
1263 _("invalid conversion specifier in suffix: \\%.3o"), ch);
1266 if (flags & ~ compatible_flags)
1267 error (EXIT_FAILURE, 0,
1268 _("invalid flags in conversion specification: %%%c%c"),
1269 (flags & ~ compatible_flags & FLAG_ALTERNATIVE ? '#' : '\''), ch);
1272 /* Return the maximum number of bytes that can be generated by
1273 applying FORMAT to an unsigned int value. If the format is
1274 invalid, diagnose the problem and exit. */
1275 static size_t
1276 max_out (char *format)
1278 bool percent = false;
1280 for (char *f = format; *f; f++)
1281 if (*f == '%' && *++f != '%')
1283 if (percent)
1284 error (EXIT_FAILURE, 0,
1285 _("too many %% conversion specifications in suffix"));
1286 percent = true;
1287 int flags;
1288 f += get_format_flags (f, &flags);
1289 while (ISDIGIT (*f))
1290 f++;
1291 if (*f == '.')
1292 while (ISDIGIT (*++f))
1293 continue;
1294 check_format_conv_type (f, flags);
1297 if (! percent)
1298 error (EXIT_FAILURE, 0,
1299 _("missing %% conversion specification in suffix"));
1301 int maxlen = snprintf (NULL, 0, format, UINT_MAX);
1302 if (! (0 <= maxlen && maxlen <= SIZE_MAX))
1303 xalloc_die ();
1304 return maxlen;
1308 main (int argc, char **argv)
1310 int optc;
1311 unsigned long int val;
1313 initialize_main (&argc, &argv);
1314 set_program_name (argv[0]);
1315 setlocale (LC_ALL, "");
1316 bindtextdomain (PACKAGE, LOCALEDIR);
1317 textdomain (PACKAGE);
1319 atexit (close_stdout);
1321 global_argv = argv;
1322 controls = NULL;
1323 control_used = 0;
1324 suppress_count = false;
1325 remove_files = true;
1326 prefix = DEFAULT_PREFIX;
1328 while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, NULL)) != -1)
1329 switch (optc)
1331 case 'f':
1332 prefix = optarg;
1333 break;
1335 case 'b':
1336 suffix = optarg;
1337 break;
1339 case 'k':
1340 remove_files = false;
1341 break;
1343 case 'n':
1344 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
1345 || MIN (INT_MAX, SIZE_MAX) < val)
1346 error (EXIT_FAILURE, 0, _("%s: invalid number"), optarg);
1347 digits = val;
1348 break;
1350 case 's':
1351 case 'q':
1352 suppress_count = true;
1353 break;
1355 case 'z':
1356 elide_empty_files = true;
1357 break;
1359 case_GETOPT_HELP_CHAR;
1361 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1363 default:
1364 usage (EXIT_FAILURE);
1367 if (argc - optind < 2)
1369 if (argc <= optind)
1370 error (0, 0, _("missing operand"));
1371 else
1372 error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
1373 usage (EXIT_FAILURE);
1376 size_t prefix_len = strlen (prefix);
1377 size_t max_digit_string_len
1378 = (suffix
1379 ? max_out (suffix)
1380 : MAX (INT_STRLEN_BOUND (unsigned int), digits));
1381 if (SIZE_MAX - 1 - prefix_len < max_digit_string_len)
1382 xalloc_die ();
1383 filename_space = xmalloc (prefix_len + max_digit_string_len + 1);
1385 set_input_file (argv[optind++]);
1387 parse_patterns (argc, optind, argv);
1390 int i;
1391 static int const sig[] =
1393 /* The usual suspects. */
1394 SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM,
1395 #ifdef SIGPOLL
1396 SIGPOLL,
1397 #endif
1398 #ifdef SIGPROF
1399 SIGPROF,
1400 #endif
1401 #ifdef SIGVTALRM
1402 SIGVTALRM,
1403 #endif
1404 #ifdef SIGXCPU
1405 SIGXCPU,
1406 #endif
1407 #ifdef SIGXFSZ
1408 SIGXFSZ,
1409 #endif
1411 enum { nsigs = ARRAY_CARDINALITY (sig) };
1413 struct sigaction act;
1415 sigemptyset (&caught_signals);
1416 for (i = 0; i < nsigs; i++)
1418 sigaction (sig[i], NULL, &act);
1419 if (act.sa_handler != SIG_IGN)
1420 sigaddset (&caught_signals, sig[i]);
1423 act.sa_handler = interrupt_handler;
1424 act.sa_mask = caught_signals;
1425 act.sa_flags = 0;
1427 for (i = 0; i < nsigs; i++)
1428 if (sigismember (&caught_signals, sig[i]))
1429 sigaction (sig[i], &act, NULL);
1432 split_file ();
1434 if (close (STDIN_FILENO) != 0)
1436 error (0, errno, _("read error"));
1437 cleanup_fatal ();
1440 exit (EXIT_SUCCESS);
1443 void
1444 usage (int status)
1446 if (status != EXIT_SUCCESS)
1447 fprintf (stderr, _("Try `%s --help' for more information.\n"),
1448 program_name);
1449 else
1451 printf (_("\
1452 Usage: %s [OPTION]... FILE PATTERN...\n\
1454 program_name);
1455 fputs (_("\
1456 Output pieces of FILE separated by PATTERN(s) to files `xx00', `xx01', ...,\n\
1457 and output byte counts of each piece to standard output.\n\
1459 "), stdout);
1460 fputs (_("\
1461 Mandatory arguments to long options are mandatory for short options too.\n\
1462 "), stdout);
1463 fputs (_("\
1464 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %02d\n\
1465 -f, --prefix=PREFIX use PREFIX instead of `xx'\n\
1466 -k, --keep-files do not remove output files on errors\n\
1467 "), stdout);
1468 fputs (_("\
1469 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1470 -s, --quiet, --silent do not print counts of output file sizes\n\
1471 -z, --elide-empty-files remove empty output files\n\
1472 "), stdout);
1473 fputs (HELP_OPTION_DESCRIPTION, stdout);
1474 fputs (VERSION_OPTION_DESCRIPTION, stdout);
1475 fputs (_("\
1477 Read standard input if FILE is -. Each PATTERN may be:\n\
1478 "), stdout);
1479 fputs (_("\
1481 INTEGER copy up to but not including specified line number\n\
1482 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1483 %REGEXP%[OFFSET] skip to, but not including a matching line\n\
1484 {INTEGER} repeat the previous pattern specified number of times\n\
1485 {*} repeat the previous pattern as many times as possible\n\
1487 A line OFFSET is a required `+' or `-' followed by a positive integer.\n\
1488 "), stdout);
1489 emit_ancillary_info ();
1491 exit (status);