doc: improve various BLOCKSIZE and SIZE help
[coreutils.git] / src / csplit.c
blobba93d2b593329da528cdd60a0c1e851204a6f63c
1 /* csplit - split a file into sections determined by context lines
2 Copyright (C) 91, 1995-2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
18 Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
20 #include <config.h>
22 #include <getopt.h>
23 #include <sys/types.h>
24 #include <signal.h>
26 #include "system.h"
28 #include <regex.h>
30 #include "error.h"
31 #include "fd-reopen.h"
32 #include "quote.h"
33 #include "safe-read.h"
34 #include "stdio--.h"
35 #include "xstrtol.h"
37 /* The official name of this program (e.g., no `g' prefix). */
38 #define PROGRAM_NAME "csplit"
40 #define AUTHORS \
41 proper_name ("Stuart Kemp"), \
42 proper_name ("David MacKenzie")
44 /* The default prefix for output file names. */
45 #define DEFAULT_PREFIX "xx"
47 /* A compiled pattern arg. */
48 struct control
50 intmax_t offset; /* Offset from regexp to split at. */
51 uintmax_t lines_required; /* Number of lines required. */
52 uintmax_t repeat; /* Repeat count. */
53 int argnum; /* ARGV index. */
54 bool repeat_forever; /* True if `*' used as a repeat count. */
55 bool ignore; /* If true, produce no output (for regexp). */
56 bool regexpr; /* True if regular expression was used. */
57 struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
60 /* Initial size of data area in buffers. */
61 #define START_SIZE 8191
63 /* Increment size for data area. */
64 #define INCR_SIZE 2048
66 /* Number of lines kept in each node in line list. */
67 #define CTRL_SIZE 80
69 #ifdef DEBUG
70 /* Some small values to test the algorithms. */
71 # define START_SIZE 200
72 # define INCR_SIZE 10
73 # define CTRL_SIZE 1
74 #endif
76 /* A string with a length count. */
77 struct cstring
79 size_t len;
80 char *str;
83 /* Pointers to the beginnings of lines in the buffer area.
84 These structures are linked together if needed. */
85 struct line
87 size_t used; /* Number of offsets used in this struct. */
88 size_t insert_index; /* Next offset to use when inserting line. */
89 size_t retrieve_index; /* Next index to use when retrieving line. */
90 struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
91 struct line *next; /* Next in linked list. */
94 /* The structure to hold the input lines.
95 Contains a pointer to the data area and a list containing
96 pointers to the individual lines. */
97 struct buffer_record
99 size_t bytes_alloc; /* Size of the buffer area. */
100 size_t bytes_used; /* Bytes used in the buffer area. */
101 uintmax_t start_line; /* First line number in this buffer. */
102 uintmax_t first_available; /* First line that can be retrieved. */
103 size_t num_lines; /* Number of complete lines in this buffer. */
104 char *buffer; /* Data area. */
105 struct line *line_start; /* Head of list of pointers to lines. */
106 struct line *curr_line; /* The line start record currently in use. */
107 struct buffer_record *next;
110 static void close_output_file (void);
111 static void create_output_file (void);
112 static void delete_all_files (bool);
113 static void save_line_to_file (const struct cstring *line);
114 void usage (int status);
116 /* Start of buffer list. */
117 static struct buffer_record *head = NULL;
119 /* Partially read line. */
120 static char *hold_area = NULL;
122 /* Number of bytes in `hold_area'. */
123 static size_t hold_count = 0;
125 /* Number of the last line in the buffers. */
126 static uintmax_t last_line_number = 0;
128 /* Number of the line currently being examined. */
129 static uintmax_t current_line = 0;
131 /* If true, we have read EOF. */
132 static bool have_read_eof = false;
134 /* Name of output files. */
135 static char *volatile filename_space = NULL;
137 /* Prefix part of output file names. */
138 static char const *volatile prefix = NULL;
140 /* Suffix part of output file names. */
141 static char *volatile suffix = NULL;
143 /* Number of digits to use in output file names. */
144 static int volatile digits = 2;
146 /* Number of files created so far. */
147 static unsigned int volatile files_created = 0;
149 /* Number of bytes written to current file. */
150 static uintmax_t bytes_written;
152 /* Output file pointer. */
153 static FILE *output_stream = NULL;
155 /* Output file name. */
156 static char *output_filename = NULL;
158 /* Perhaps it would be cleaner to pass arg values instead of indexes. */
159 static char **global_argv;
161 /* If true, do not print the count of bytes in each output file. */
162 static bool suppress_count;
164 /* If true, remove output files on error. */
165 static bool volatile remove_files;
167 /* If true, remove all output files which have a zero length. */
168 static bool elide_empty_files;
170 /* The compiled pattern arguments, which determine how to split
171 the input file. */
172 static struct control *controls;
174 /* Number of elements in `controls'. */
175 static size_t control_used;
177 /* The set of signals that are caught. */
178 static sigset_t caught_signals;
180 static struct option const longopts[] =
182 {"digits", required_argument, NULL, 'n'},
183 {"quiet", no_argument, NULL, 'q'},
184 {"silent", no_argument, NULL, 's'},
185 {"keep-files", no_argument, NULL, 'k'},
186 {"elide-empty-files", no_argument, NULL, 'z'},
187 {"prefix", required_argument, NULL, 'f'},
188 {"suffix-format", required_argument, NULL, 'b'},
189 {GETOPT_HELP_OPTION_DECL},
190 {GETOPT_VERSION_OPTION_DECL},
191 {NULL, 0, NULL, 0}
194 /* Optionally remove files created so far; then exit.
195 Called when an error detected. */
197 static void
198 cleanup (void)
200 sigset_t oldset;
202 close_output_file ();
204 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
205 delete_all_files (false);
206 sigprocmask (SIG_SETMASK, &oldset, NULL);
209 static void cleanup_fatal (void) ATTRIBUTE_NORETURN;
210 static void
211 cleanup_fatal (void)
213 cleanup ();
214 exit (EXIT_FAILURE);
217 extern void
218 xalloc_die (void)
220 error (0, 0, "%s", _("memory exhausted"));
221 cleanup_fatal ();
224 static void
225 interrupt_handler (int sig)
227 delete_all_files (true);
228 /* The signal has been reset to SIG_DFL, but blocked during this
229 handler. Force the default action of this signal once the
230 handler returns and the block is removed. */
231 raise (sig);
234 /* Keep track of NUM bytes of a partial line in buffer START.
235 These bytes will be retrieved later when another large buffer is read. */
237 static void
238 save_to_hold_area (char *start, size_t num)
240 free (hold_area);
241 hold_area = start;
242 hold_count = num;
245 /* Read up to MAX_N_BYTES bytes from the input stream into DEST.
246 Return the number of bytes read. */
248 static size_t
249 read_input (char *dest, size_t max_n_bytes)
251 size_t bytes_read;
253 if (max_n_bytes == 0)
254 return 0;
256 bytes_read = safe_read (STDIN_FILENO, dest, max_n_bytes);
258 if (bytes_read == 0)
259 have_read_eof = true;
261 if (bytes_read == SAFE_READ_ERROR)
263 error (0, errno, _("read error"));
264 cleanup_fatal ();
267 return bytes_read;
270 /* Initialize existing line record P. */
272 static void
273 clear_line_control (struct line *p)
275 p->used = 0;
276 p->insert_index = 0;
277 p->retrieve_index = 0;
280 /* Return a new, initialized line record. */
282 static struct line *
283 new_line_control (void)
285 struct line *p = xmalloc (sizeof *p);
287 p->next = NULL;
288 clear_line_control (p);
290 return p;
293 /* Record LINE_START, which is the address of the start of a line
294 of length LINE_LEN in the large buffer, in the lines buffer of B. */
296 static void
297 keep_new_line (struct buffer_record *b, char *line_start, size_t line_len)
299 struct line *l;
301 /* If there is no existing area to keep line info, get some. */
302 if (b->line_start == NULL)
303 b->line_start = b->curr_line = new_line_control ();
305 /* If existing area for lines is full, get more. */
306 if (b->curr_line->used == CTRL_SIZE)
308 b->curr_line->next = new_line_control ();
309 b->curr_line = b->curr_line->next;
312 l = b->curr_line;
314 /* Record the start of the line, and update counters. */
315 l->starts[l->insert_index].str = line_start;
316 l->starts[l->insert_index].len = line_len;
317 l->used++;
318 l->insert_index++;
321 /* Scan the buffer in B for newline characters
322 and record the line start locations and lengths in B.
323 Return the number of lines found in this buffer.
325 There may be an incomplete line at the end of the buffer;
326 a pointer is kept to this area, which will be used when
327 the next buffer is filled. */
329 static size_t
330 record_line_starts (struct buffer_record *b)
332 char *line_start; /* Start of current line. */
333 char *line_end; /* End of each line found. */
334 size_t bytes_left; /* Length of incomplete last line. */
335 size_t lines; /* Number of lines found. */
336 size_t line_length; /* Length of each line found. */
338 if (b->bytes_used == 0)
339 return 0;
341 lines = 0;
342 line_start = b->buffer;
343 bytes_left = b->bytes_used;
345 for (;;)
347 line_end = memchr (line_start, '\n', bytes_left);
348 if (line_end == NULL)
349 break;
350 line_length = line_end - line_start + 1;
351 keep_new_line (b, line_start, line_length);
352 bytes_left -= line_length;
353 line_start = line_end + 1;
354 lines++;
357 /* Check for an incomplete last line. */
358 if (bytes_left)
360 if (have_read_eof)
362 keep_new_line (b, line_start, bytes_left);
363 lines++;
365 else
366 save_to_hold_area (xmemdup (line_start, bytes_left), bytes_left);
369 b->num_lines = lines;
370 b->first_available = b->start_line = last_line_number + 1;
371 last_line_number += lines;
373 return lines;
376 /* Return a new buffer with room to store SIZE bytes, plus
377 an extra byte for safety. */
379 static struct buffer_record *
380 create_new_buffer (size_t size)
382 struct buffer_record *new_buffer = xmalloc (sizeof *new_buffer);
384 new_buffer->buffer = xmalloc (size + 1);
386 new_buffer->bytes_alloc = size;
387 new_buffer->line_start = new_buffer->curr_line = NULL;
389 return new_buffer;
392 /* Return a new buffer of at least MINSIZE bytes. If a buffer of at
393 least that size is currently free, use it, otherwise create a new one. */
395 static struct buffer_record *
396 get_new_buffer (size_t min_size)
398 struct buffer_record *new_buffer; /* Buffer to return. */
399 size_t alloc_size; /* Actual size that will be requested. */
401 alloc_size = START_SIZE;
402 if (alloc_size < min_size)
404 size_t s = min_size - alloc_size + INCR_SIZE - 1;
405 alloc_size += s - s % INCR_SIZE;
408 new_buffer = create_new_buffer (alloc_size);
410 new_buffer->num_lines = 0;
411 new_buffer->bytes_used = 0;
412 new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
413 new_buffer->next = NULL;
415 return new_buffer;
418 static void
419 free_buffer (struct buffer_record *buf)
421 free (buf->buffer);
422 buf->buffer = NULL;
425 /* Append buffer BUF to the linked list of buffers that contain
426 some data yet to be processed. */
428 static void
429 save_buffer (struct buffer_record *buf)
431 struct buffer_record *p;
433 buf->next = NULL;
434 buf->curr_line = buf->line_start;
436 if (head == NULL)
437 head = buf;
438 else
440 for (p = head; p->next; p = p->next)
441 /* Do nothing. */ ;
442 p->next = buf;
446 /* Fill a buffer of input.
448 Set the initial size of the buffer to a default.
449 Fill the buffer (from the hold area and input stream)
450 and find the individual lines.
451 If no lines are found (the buffer is too small to hold the next line),
452 release the current buffer (whose contents would have been put in the
453 hold area) and repeat the process with another large buffer until at least
454 one entire line has been read.
456 Return true if a new buffer was obtained, otherwise false
457 (in which case end-of-file must have been encountered). */
459 static bool
460 load_buffer (void)
462 struct buffer_record *b;
463 size_t bytes_wanted = START_SIZE; /* Minimum buffer size. */
464 size_t bytes_avail; /* Size of new buffer created. */
465 size_t lines_found; /* Number of lines in this new buffer. */
466 char *p; /* Place to load into buffer. */
468 if (have_read_eof)
469 return false;
471 /* We must make the buffer at least as large as the amount of data
472 in the partial line left over from the last call. */
473 if (bytes_wanted < hold_count)
474 bytes_wanted = hold_count;
476 while (1)
478 b = get_new_buffer (bytes_wanted);
479 bytes_avail = b->bytes_alloc; /* Size of buffer returned. */
480 p = b->buffer;
482 /* First check the `holding' area for a partial line. */
483 if (hold_count)
485 memcpy (p, hold_area, hold_count);
486 p += hold_count;
487 b->bytes_used += hold_count;
488 bytes_avail -= hold_count;
489 hold_count = 0;
492 b->bytes_used += read_input (p, bytes_avail);
494 lines_found = record_line_starts (b);
495 if (!lines_found)
496 free_buffer (b);
498 if (lines_found || have_read_eof)
499 break;
501 if (xalloc_oversized (2, b->bytes_alloc))
502 xalloc_die ();
503 bytes_wanted = 2 * b->bytes_alloc;
504 free_buffer (b);
505 free (b);
508 if (lines_found)
509 save_buffer (b);
510 else
511 free (b);
513 return lines_found != 0;
516 /* Return the line number of the first line that has not yet been retrieved. */
518 static uintmax_t
519 get_first_line_in_buffer (void)
521 if (head == NULL && !load_buffer ())
522 error (EXIT_FAILURE, errno, _("input disappeared"));
524 return head->first_available;
527 /* Return a pointer to the logical first line in the buffer and make the
528 next line the logical first line.
529 Return NULL if there is no more input. */
531 static struct cstring *
532 remove_line (void)
534 /* If non-NULL, this is the buffer for which the previous call
535 returned the final line. So now, presuming that line has been
536 processed, we can free the buffer and reset this pointer. */
537 static struct buffer_record *prev_buf = NULL;
539 struct cstring *line; /* Return value. */
540 struct line *l; /* For convenience. */
542 if (prev_buf)
544 free_buffer (prev_buf);
545 prev_buf = NULL;
548 if (head == NULL && !load_buffer ())
549 return NULL;
551 if (current_line < head->first_available)
552 current_line = head->first_available;
554 ++(head->first_available);
556 l = head->curr_line;
558 line = &l->starts[l->retrieve_index];
560 /* Advance index to next line. */
561 if (++l->retrieve_index == l->used)
563 /* Go on to the next line record. */
564 head->curr_line = l->next;
565 if (head->curr_line == NULL || head->curr_line->used == 0)
567 /* Go on to the next data block.
568 but first record the current one so we can free it
569 once the line we're returning has been processed. */
570 prev_buf = head;
571 head = head->next;
575 return line;
578 /* Search the buffers for line LINENUM, reading more input if necessary.
579 Return a pointer to the line, or NULL if it is not found in the file. */
581 static struct cstring *
582 find_line (uintmax_t linenum)
584 struct buffer_record *b;
586 if (head == NULL && !load_buffer ())
587 return NULL;
589 if (linenum < head->start_line)
590 return NULL;
592 for (b = head;;)
594 if (linenum < b->start_line + b->num_lines)
596 /* The line is in this buffer. */
597 struct line *l;
598 size_t offset; /* How far into the buffer the line is. */
600 l = b->line_start;
601 offset = linenum - b->start_line;
602 /* Find the control record. */
603 while (offset >= CTRL_SIZE)
605 l = l->next;
606 offset -= CTRL_SIZE;
608 return &l->starts[offset];
610 if (b->next == NULL && !load_buffer ())
611 return NULL;
612 b = b->next; /* Try the next data block. */
616 /* Return true if at least one more line is available for input. */
618 static bool
619 no_more_lines (void)
621 return find_line (current_line + 1) == NULL;
624 /* Open NAME as standard input. */
626 static void
627 set_input_file (const char *name)
629 if (! STREQ (name, "-") && fd_reopen (STDIN_FILENO, name, O_RDONLY, 0) < 0)
630 error (EXIT_FAILURE, errno, _("cannot open %s for reading"), quote (name));
633 /* Write all lines from the beginning of the buffer up to, but
634 not including, line LAST_LINE, to the current output file.
635 If IGNORE is true, do not output lines selected here.
636 ARGNUM is the index in ARGV of the current pattern. */
638 static void
639 write_to_file (uintmax_t last_line, bool ignore, int argnum)
641 struct cstring *line;
642 uintmax_t first_line; /* First available input line. */
643 uintmax_t lines; /* Number of lines to output. */
644 uintmax_t i;
646 first_line = get_first_line_in_buffer ();
648 if (first_line > last_line)
650 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
651 cleanup_fatal ();
654 lines = last_line - first_line;
656 for (i = 0; i < lines; i++)
658 line = remove_line ();
659 if (line == NULL)
661 error (0, 0, _("%s: line number out of range"), global_argv[argnum]);
662 cleanup_fatal ();
664 if (!ignore)
665 save_line_to_file (line);
669 /* Output any lines left after all regexps have been processed. */
671 static void
672 dump_rest_of_file (void)
674 struct cstring *line;
676 while ((line = remove_line ()) != NULL)
677 save_line_to_file (line);
680 /* Handle an attempt to read beyond EOF under the control of record P,
681 on iteration REPETITION if nonzero. */
683 static void handle_line_error (const struct control *, uintmax_t)
684 ATTRIBUTE_NORETURN;
685 static void
686 handle_line_error (const struct control *p, uintmax_t repetition)
688 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
690 fprintf (stderr, _("%s: %s: line number out of range"),
691 program_name, quote (umaxtostr (p->lines_required, buf)));
692 if (repetition)
693 fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf));
694 else
695 fprintf (stderr, "\n");
697 cleanup_fatal ();
700 /* Determine the line number that marks the end of this file,
701 then get those lines and save them to the output file.
702 P is the control record.
703 REPETITION is the repetition number. */
705 static void
706 process_line_count (const struct control *p, uintmax_t repetition)
708 uintmax_t linenum;
709 uintmax_t last_line_to_save = p->lines_required * (repetition + 1);
710 struct cstring *line;
712 create_output_file ();
714 linenum = get_first_line_in_buffer ();
716 while (linenum++ < last_line_to_save)
718 line = remove_line ();
719 if (line == NULL)
720 handle_line_error (p, repetition);
721 save_line_to_file (line);
724 close_output_file ();
726 /* Ensure that the line number specified is not 1 greater than
727 the number of lines in the file. */
728 if (no_more_lines ())
729 handle_line_error (p, repetition);
732 static void regexp_error (struct control *, uintmax_t, bool) ATTRIBUTE_NORETURN;
733 static void
734 regexp_error (struct control *p, uintmax_t repetition, bool ignore)
736 fprintf (stderr, _("%s: %s: match not found"),
737 program_name, quote (global_argv[p->argnum]));
739 if (repetition)
741 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
742 fprintf (stderr, _(" on repetition %s\n"), umaxtostr (repetition, buf));
744 else
745 fprintf (stderr, "\n");
747 if (!ignore)
749 dump_rest_of_file ();
750 close_output_file ();
752 cleanup_fatal ();
755 /* Read the input until a line matches the regexp in P, outputting
756 it unless P->IGNORE is true.
757 REPETITION is this repeat-count; 0 means the first time. */
759 static void
760 process_regexp (struct control *p, uintmax_t repetition)
762 struct cstring *line; /* From input file. */
763 size_t line_len; /* To make "$" in regexps work. */
764 uintmax_t break_line; /* First line number of next file. */
765 bool ignore = p->ignore; /* If true, skip this section. */
766 regoff_t ret;
768 if (!ignore)
769 create_output_file ();
771 /* If there is no offset for the regular expression, or
772 it is positive, then it is not necessary to buffer the lines. */
774 if (p->offset >= 0)
776 for (;;)
778 line = find_line (++current_line);
779 if (line == NULL)
781 if (p->repeat_forever)
783 if (!ignore)
785 dump_rest_of_file ();
786 close_output_file ();
788 exit (EXIT_SUCCESS);
790 else
791 regexp_error (p, repetition, ignore);
793 line_len = line->len;
794 if (line->str[line_len - 1] == '\n')
795 line_len--;
796 ret = re_search (&p->re_compiled, line->str, line_len,
797 0, line_len, NULL);
798 if (ret == -2)
800 error (0, 0, _("error in regular expression search"));
801 cleanup_fatal ();
803 if (ret == -1)
805 line = remove_line ();
806 if (!ignore)
807 save_line_to_file (line);
809 else
810 break;
813 else
815 /* Buffer the lines. */
816 for (;;)
818 line = find_line (++current_line);
819 if (line == NULL)
821 if (p->repeat_forever)
823 if (!ignore)
825 dump_rest_of_file ();
826 close_output_file ();
828 exit (EXIT_SUCCESS);
830 else
831 regexp_error (p, repetition, ignore);
833 line_len = line->len;
834 if (line->str[line_len - 1] == '\n')
835 line_len--;
836 ret = re_search (&p->re_compiled, line->str, line_len,
837 0, line_len, NULL);
838 if (ret == -2)
840 error (0, 0, _("error in regular expression search"));
841 cleanup_fatal ();
843 if (ret != -1)
844 break;
848 /* Account for any offset from this regexp. */
849 break_line = current_line + p->offset;
851 write_to_file (break_line, ignore, p->argnum);
853 if (!ignore)
854 close_output_file ();
856 if (p->offset > 0)
857 current_line = break_line;
860 /* Split the input file according to the control records we have built. */
862 static void
863 split_file (void)
865 size_t i;
867 for (i = 0; i < control_used; i++)
869 uintmax_t j;
870 if (controls[i].regexpr)
872 for (j = 0; (controls[i].repeat_forever
873 || j <= controls[i].repeat); j++)
874 process_regexp (&controls[i], j);
876 else
878 for (j = 0; (controls[i].repeat_forever
879 || j <= controls[i].repeat); j++)
880 process_line_count (&controls[i], j);
884 create_output_file ();
885 dump_rest_of_file ();
886 close_output_file ();
889 /* Return the name of output file number NUM.
891 This function is called from a signal handler, so it should invoke
892 only reentrant functions that are async-signal-safe. POSIX does
893 not guarantee this for the functions called below, but we don't
894 know of any hosts where this implementation isn't safe. */
896 static char *
897 make_filename (unsigned int num)
899 strcpy (filename_space, prefix);
900 if (suffix)
901 sprintf (filename_space + strlen (prefix), suffix, num);
902 else
903 sprintf (filename_space + strlen (prefix), "%0*u", digits, num);
904 return filename_space;
907 /* Create the next output file. */
909 static void
910 create_output_file (void)
912 sigset_t oldset;
913 bool fopen_ok;
914 int fopen_errno;
916 output_filename = make_filename (files_created);
918 /* Create the output file in a critical section, to avoid races. */
919 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
920 output_stream = fopen (output_filename, "w");
921 fopen_ok = (output_stream != NULL);
922 fopen_errno = errno;
923 files_created += fopen_ok;
924 sigprocmask (SIG_SETMASK, &oldset, NULL);
926 if (! fopen_ok)
928 error (0, fopen_errno, "%s", output_filename);
929 cleanup_fatal ();
931 bytes_written = 0;
934 /* If requested, delete all the files we have created. This function
935 must be called only from critical sections. */
937 static void
938 delete_all_files (bool in_signal_handler)
940 unsigned int i;
942 if (! remove_files)
943 return;
945 for (i = 0; i < files_created; i++)
947 const char *name = make_filename (i);
948 if (unlink (name) != 0 && !in_signal_handler)
949 error (0, errno, "%s", name);
952 files_created = 0;
955 /* Close the current output file and print the count
956 of characters in this file. */
958 static void
959 close_output_file (void)
961 if (output_stream)
963 if (ferror (output_stream))
965 error (0, 0, _("write error for %s"), quote (output_filename));
966 output_stream = NULL;
967 cleanup_fatal ();
969 if (fclose (output_stream) != 0)
971 error (0, errno, "%s", output_filename);
972 output_stream = NULL;
973 cleanup_fatal ();
975 if (bytes_written == 0 && elide_empty_files)
977 sigset_t oldset;
978 bool unlink_ok;
979 int unlink_errno;
981 /* Remove the output file in a critical section, to avoid races. */
982 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
983 unlink_ok = (unlink (output_filename) == 0);
984 unlink_errno = errno;
985 files_created -= unlink_ok;
986 sigprocmask (SIG_SETMASK, &oldset, NULL);
988 if (! unlink_ok)
989 error (0, unlink_errno, "%s", output_filename);
991 else
993 if (!suppress_count)
995 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
996 fprintf (stdout, "%s\n", umaxtostr (bytes_written, buf));
999 output_stream = NULL;
1003 /* Save line LINE to the output file and
1004 increment the character count for the current file. */
1006 static void
1007 save_line_to_file (const struct cstring *line)
1009 fwrite (line->str, sizeof (char), line->len, output_stream);
1010 bytes_written += line->len;
1013 /* Return a new, initialized control record. */
1015 static struct control *
1016 new_control_record (void)
1018 static size_t control_allocated = 0; /* Total space allocated. */
1019 struct control *p;
1021 if (control_used == control_allocated)
1022 controls = X2NREALLOC (controls, &control_allocated);
1023 p = &controls[control_used++];
1024 p->regexpr = false;
1025 p->repeat = 0;
1026 p->repeat_forever = false;
1027 p->lines_required = 0;
1028 p->offset = 0;
1029 return p;
1032 /* Check if there is a numeric offset after a regular expression.
1033 STR is the entire command line argument.
1034 P is the control record for this regular expression.
1035 NUM is the numeric part of STR. */
1037 static void
1038 check_for_offset (struct control *p, const char *str, const char *num)
1040 if (xstrtoimax (num, NULL, 10, &p->offset, "") != LONGINT_OK)
1041 error (EXIT_FAILURE, 0, _("%s: integer expected after delimiter"), str);
1044 /* Given that the first character of command line arg STR is '{',
1045 make sure that the rest of the string is a valid repeat count
1046 and store its value in P.
1047 ARGNUM is the ARGV index of STR. */
1049 static void
1050 parse_repeat_count (int argnum, struct control *p, char *str)
1052 uintmax_t val;
1053 char *end;
1055 end = str + strlen (str) - 1;
1056 if (*end != '}')
1057 error (EXIT_FAILURE, 0, _("%s: `}' is required in repeat count"), str);
1058 *end = '\0';
1060 if (str+1 == end-1 && *(str+1) == '*')
1061 p->repeat_forever = true;
1062 else
1064 if (xstrtoumax (str + 1, NULL, 10, &val, "") != LONGINT_OK)
1066 error (EXIT_FAILURE, 0,
1067 _("%s}: integer required between `{' and `}'"),
1068 global_argv[argnum]);
1070 p->repeat = val;
1073 *end = '}';
1076 /* Extract the regular expression from STR and check for a numeric offset.
1077 STR should start with the regexp delimiter character.
1078 Return a new control record for the regular expression.
1079 ARGNUM is the ARGV index of STR.
1080 Unless IGNORE is true, mark these lines for output. */
1082 static struct control *
1083 extract_regexp (int argnum, bool ignore, char const *str)
1085 size_t len; /* Number of bytes in this regexp. */
1086 char delim = *str;
1087 char const *closing_delim;
1088 struct control *p;
1089 const char *err;
1091 closing_delim = strrchr (str + 1, delim);
1092 if (closing_delim == NULL)
1093 error (EXIT_FAILURE, 0,
1094 _("%s: closing delimiter `%c' missing"), str, delim);
1096 len = closing_delim - str - 1;
1097 p = new_control_record ();
1098 p->argnum = argnum;
1099 p->ignore = ignore;
1101 p->regexpr = true;
1102 p->re_compiled.buffer = NULL;
1103 p->re_compiled.allocated = 0;
1104 p->re_compiled.fastmap = xmalloc (UCHAR_MAX + 1);
1105 p->re_compiled.translate = NULL;
1106 re_syntax_options =
1107 RE_SYNTAX_POSIX_BASIC & ~RE_CONTEXT_INVALID_DUP & ~RE_NO_EMPTY_RANGES;
1108 err = re_compile_pattern (str + 1, len, &p->re_compiled);
1109 if (err)
1111 error (0, 0, _("%s: invalid regular expression: %s"), str, err);
1112 cleanup_fatal ();
1115 if (closing_delim[1])
1116 check_for_offset (p, str, closing_delim + 1);
1118 return p;
1121 /* Extract the break patterns from args START through ARGC - 1 of ARGV.
1122 After each pattern, check if the next argument is a repeat count. */
1124 static void
1125 parse_patterns (int argc, int start, char **argv)
1127 int i; /* Index into ARGV. */
1128 struct control *p; /* New control record created. */
1129 uintmax_t val;
1130 static uintmax_t last_val = 0;
1132 for (i = start; i < argc; i++)
1134 if (*argv[i] == '/' || *argv[i] == '%')
1136 p = extract_regexp (i, *argv[i] == '%', argv[i]);
1138 else
1140 p = new_control_record ();
1141 p->argnum = i;
1143 if (xstrtoumax (argv[i], NULL, 10, &val, "") != LONGINT_OK)
1144 error (EXIT_FAILURE, 0, _("%s: invalid pattern"), argv[i]);
1145 if (val == 0)
1146 error (EXIT_FAILURE, 0,
1147 _("%s: line number must be greater than zero"),
1148 argv[i]);
1149 if (val < last_val)
1151 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
1152 error (EXIT_FAILURE, 0,
1153 _("line number %s is smaller than preceding line number, %s"),
1154 quote (argv[i]), umaxtostr (last_val, buf));
1157 if (val == last_val)
1158 error (0, 0,
1159 _("warning: line number %s is the same as preceding line number"),
1160 quote (argv[i]));
1162 last_val = val;
1164 p->lines_required = val;
1167 if (i + 1 < argc && *argv[i + 1] == '{')
1169 /* We have a repeat count. */
1170 i++;
1171 parse_repeat_count (i, p, argv[i]);
1176 static unsigned int
1177 get_format_flags (char **format_ptr)
1179 unsigned int count = 0;
1181 for (; **format_ptr; (*format_ptr)++)
1183 switch (**format_ptr)
1185 case '-':
1186 break;
1188 case '+':
1189 case ' ':
1190 count |= 1;
1191 break;
1193 case '#':
1194 count |= 2; /* Allow for 0x prefix preceding an `x' conversion. */
1195 break;
1197 default:
1198 return count;
1201 return count;
1204 static size_t
1205 get_format_width (char **format_ptr)
1207 unsigned long int val = 0;
1209 if (ISDIGIT (**format_ptr)
1210 && (xstrtoul (*format_ptr, format_ptr, 10, &val, NULL) != LONGINT_OK
1211 || SIZE_MAX < val))
1212 error (EXIT_FAILURE, 0, _("invalid format width"));
1214 /* Allow for enough octal digits to represent the value of UINT_MAX,
1215 even if the field width is less than that. */
1216 return MAX (val, (sizeof (unsigned int) * CHAR_BIT + 2) / 3);
1219 static size_t
1220 get_format_prec (char **format_ptr)
1222 if (**format_ptr != '.')
1223 return 0;
1224 (*format_ptr)++;
1226 if (! ISDIGIT (**format_ptr))
1227 return 0;
1228 else
1230 unsigned long int val;
1231 if (xstrtoul (*format_ptr, format_ptr, 10, &val, NULL) != LONGINT_OK
1232 || SIZE_MAX < val)
1233 error (EXIT_FAILURE, 0, _("invalid format precision"));
1234 return val;
1238 static void
1239 get_format_conv_type (char **format_ptr)
1241 unsigned char ch = *(*format_ptr)++;
1243 switch (ch)
1245 case 'd':
1246 case 'i':
1247 case 'o':
1248 case 'u':
1249 case 'x':
1250 case 'X':
1251 break;
1253 case 0:
1254 error (EXIT_FAILURE, 0, _("missing conversion specifier in suffix"));
1255 break;
1257 default:
1258 if (isprint (ch))
1259 error (EXIT_FAILURE, 0,
1260 _("invalid conversion specifier in suffix: %c"), ch);
1261 else
1262 error (EXIT_FAILURE, 0,
1263 _("invalid conversion specifier in suffix: \\%.3o"), ch);
1267 static size_t
1268 max_out (char *format)
1270 size_t out_count = 0;
1271 bool percent = false;
1273 while (*format)
1275 if (*format++ != '%')
1276 out_count++;
1277 else if (*format == '%')
1279 format++;
1280 out_count++;
1282 else
1284 if (percent)
1285 error (EXIT_FAILURE, 0,
1286 _("too many %% conversion specifications in suffix"));
1287 percent = true;
1288 out_count += get_format_flags (&format);
1290 size_t width = get_format_width (&format);
1291 size_t prec = get_format_prec (&format);
1293 out_count += MAX (width, prec);
1295 get_format_conv_type (&format);
1299 if (! percent)
1300 error (EXIT_FAILURE, 0,
1301 _("missing %% conversion specification in suffix"));
1303 return out_count;
1307 main (int argc, char **argv)
1309 int optc;
1310 unsigned long int val;
1312 initialize_main (&argc, &argv);
1313 set_program_name (argv[0]);
1314 setlocale (LC_ALL, "");
1315 bindtextdomain (PACKAGE, LOCALEDIR);
1316 textdomain (PACKAGE);
1318 atexit (close_stdout);
1320 global_argv = argv;
1321 controls = NULL;
1322 control_used = 0;
1323 suppress_count = false;
1324 remove_files = true;
1325 prefix = DEFAULT_PREFIX;
1327 while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, NULL)) != -1)
1328 switch (optc)
1330 case 'f':
1331 prefix = optarg;
1332 break;
1334 case 'b':
1335 suffix = optarg;
1336 break;
1338 case 'k':
1339 remove_files = false;
1340 break;
1342 case 'n':
1343 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
1344 || val > INT_MAX)
1345 error (EXIT_FAILURE, 0, _("%s: invalid number"), optarg);
1346 digits = val;
1347 break;
1349 case 's':
1350 case 'q':
1351 suppress_count = true;
1352 break;
1354 case 'z':
1355 elide_empty_files = true;
1356 break;
1358 case_GETOPT_HELP_CHAR;
1360 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1362 default:
1363 usage (EXIT_FAILURE);
1366 if (argc - optind < 2)
1368 if (argc <= optind)
1369 error (0, 0, _("missing operand"));
1370 else
1371 error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
1372 usage (EXIT_FAILURE);
1375 if (suffix)
1376 filename_space = xmalloc (strlen (prefix) + max_out (suffix) + 2);
1377 else
1378 filename_space = xmalloc (strlen (prefix) + digits + 2);
1380 set_input_file (argv[optind++]);
1382 parse_patterns (argc, optind, argv);
1385 int i;
1386 static int const sig[] =
1388 /* The usual suspects. */
1389 SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM,
1390 #ifdef SIGPOLL
1391 SIGPOLL,
1392 #endif
1393 #ifdef SIGPROF
1394 SIGPROF,
1395 #endif
1396 #ifdef SIGVTALRM
1397 SIGVTALRM,
1398 #endif
1399 #ifdef SIGXCPU
1400 SIGXCPU,
1401 #endif
1402 #ifdef SIGXFSZ
1403 SIGXFSZ,
1404 #endif
1406 enum { nsigs = ARRAY_CARDINALITY (sig) };
1408 struct sigaction act;
1410 sigemptyset (&caught_signals);
1411 for (i = 0; i < nsigs; i++)
1413 sigaction (sig[i], NULL, &act);
1414 if (act.sa_handler != SIG_IGN)
1415 sigaddset (&caught_signals, sig[i]);
1418 act.sa_handler = interrupt_handler;
1419 act.sa_mask = caught_signals;
1420 act.sa_flags = SA_NODEFER | SA_RESETHAND;
1422 for (i = 0; i < nsigs; i++)
1423 if (sigismember (&caught_signals, sig[i]))
1424 sigaction (sig[i], &act, NULL);
1427 split_file ();
1429 if (close (STDIN_FILENO) != 0)
1431 error (0, errno, _("read error"));
1432 cleanup_fatal ();
1435 exit (EXIT_SUCCESS);
1438 void
1439 usage (int status)
1441 if (status != EXIT_SUCCESS)
1442 fprintf (stderr, _("Try `%s --help' for more information.\n"),
1443 program_name);
1444 else
1446 printf (_("\
1447 Usage: %s [OPTION]... FILE PATTERN...\n\
1449 program_name);
1450 fputs (_("\
1451 Output pieces of FILE separated by PATTERN(s) to files `xx00', `xx01', ...,\n\
1452 and output byte counts of each piece to standard output.\n\
1454 "), stdout);
1455 fputs (_("\
1456 Mandatory arguments to long options are mandatory for short options too.\n\
1457 "), stdout);
1458 fputs (_("\
1459 -b, --suffix-format=FORMAT use sprintf FORMAT instead of %02d\n\
1460 -f, --prefix=PREFIX use PREFIX instead of `xx'\n\
1461 -k, --keep-files do not remove output files on errors\n\
1462 "), stdout);
1463 fputs (_("\
1464 -n, --digits=DIGITS use specified number of digits instead of 2\n\
1465 -s, --quiet, --silent do not print counts of output file sizes\n\
1466 -z, --elide-empty-files remove empty output files\n\
1467 "), stdout);
1468 fputs (HELP_OPTION_DESCRIPTION, stdout);
1469 fputs (VERSION_OPTION_DESCRIPTION, stdout);
1470 fputs (_("\
1472 Read standard input if FILE is -. Each PATTERN may be:\n\
1473 "), stdout);
1474 fputs (_("\
1476 INTEGER copy up to but not including specified line number\n\
1477 /REGEXP/[OFFSET] copy up to but not including a matching line\n\
1478 %REGEXP%[OFFSET] skip to, but not including a matching line\n\
1479 {INTEGER} repeat the previous pattern specified number of times\n\
1480 {*} repeat the previous pattern as many times as possible\n\
1482 A line OFFSET is a required `+' or `-' followed by a positive integer.\n\
1483 "), stdout);
1484 emit_bug_reporting_address ();
1486 exit (status);