1 /* split.c -- split a file into pieces.
2 Copyright (C) 1988, 1991, 1995-2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* By tege@sics.se, with rms.
20 * Implement -t CHAR or -t REGEX to specify break characters other
29 #include <sys/types.h>
34 #include "fd-reopen.h"
36 #include "full-read.h"
37 #include "full-write.h"
38 #include "ioblksize.h"
40 #include "safe-read.h"
45 /* The official name of this program (e.g., no `g' prefix). */
46 #define PROGRAM_NAME "split"
49 proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
50 proper_name ("Richard M. Stallman")
52 /* Shell command to filter through, instead of creating files. */
53 static char const *filter_command
;
55 /* Process ID of the filter. */
56 static int filter_pid
;
58 /* Array of open pipes. */
59 static int *open_pipes
;
60 static size_t open_pipes_alloc
;
61 static size_t n_open_pipes
;
63 /* Blocked signals. */
64 static sigset_t oldblocked
;
65 static sigset_t newblocked
;
67 /* Base name of output files. */
68 static char const *outbase
;
70 /* Name of output files. */
73 /* Pointer to the end of the prefix in OUTFILE.
74 Suffixes are inserted here. */
75 static char *outfile_mid
;
77 /* Length of OUTFILE's suffix. */
78 static size_t suffix_length
;
80 /* Alphabet of characters to use in suffix. */
81 static char const *suffix_alphabet
= "abcdefghijklmnopqrstuvwxyz";
83 /* Name of input file. May be "-". */
86 /* Descriptor on which output file is open. */
87 static int output_desc
= -1;
89 /* If true, print a diagnostic on standard error just before each
90 output file is opened. */
93 /* If true, don't generate zero length output files. */
94 static bool elide_empty_files
;
96 /* If true, in round robin mode, immediately copy
97 input to output, which is much slower, so disabled by default. */
98 static bool unbuffered
;
100 /* The split mode to use. */
103 type_undef
, type_bytes
, type_byteslines
, type_lines
, type_digits
,
104 type_chunk_bytes
, type_chunk_lines
, type_rr
107 /* For long options that have no equivalent short option, use a
108 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
111 VERBOSE_OPTION
= CHAR_MAX
+ 1,
116 static struct option
const longopts
[] =
118 {"bytes", required_argument
, NULL
, 'b'},
119 {"lines", required_argument
, NULL
, 'l'},
120 {"line-bytes", required_argument
, NULL
, 'C'},
121 {"number", required_argument
, NULL
, 'n'},
122 {"elide-empty-files", no_argument
, NULL
, 'e'},
123 {"unbuffered", no_argument
, NULL
, 'u'},
124 {"suffix-length", required_argument
, NULL
, 'a'},
125 {"numeric-suffixes", no_argument
, NULL
, 'd'},
126 {"filter", required_argument
, NULL
, FILTER_OPTION
},
127 {"verbose", no_argument
, NULL
, VERBOSE_OPTION
},
128 {"-io-blksize", required_argument
, NULL
,
129 IO_BLKSIZE_OPTION
}, /* do not document */
130 {GETOPT_HELP_OPTION_DECL
},
131 {GETOPT_VERSION_OPTION_DECL
},
135 /* Return true if the errno value, ERR, is ignorable. */
139 return filter_command
&& err
== EPIPE
;
143 set_suffix_length (uintmax_t n_units
, enum Split_type split_type
)
145 #define DEFAULT_SUFFIX_LENGTH 2
147 size_t suffix_needed
= 0;
149 /* Auto-calculate the suffix length if the number of files is given. */
150 if (split_type
== type_chunk_bytes
|| split_type
== type_chunk_lines
151 || split_type
== type_rr
)
153 size_t alphabet_len
= strlen (suffix_alphabet
);
154 bool alphabet_slop
= (n_units
% alphabet_len
) != 0;
155 while (n_units
/= alphabet_len
)
157 suffix_needed
+= alphabet_slop
;
160 if (suffix_length
) /* set by user */
162 if (suffix_length
< suffix_needed
)
164 error (EXIT_FAILURE
, 0,
165 _("the suffix length needs to be at least %zu"),
171 suffix_length
= MAX (DEFAULT_SUFFIX_LENGTH
, suffix_needed
);
177 if (status
!= EXIT_SUCCESS
)
182 Usage: %s [OPTION]... [INPUT [PREFIX]]\n\
186 Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default\n\
187 size is 1000 lines, and default PREFIX is `x'. With no INPUT, or when INPUT\n\
188 is -, read standard input.\n\
192 Mandatory arguments to long options are mandatory for short options too.\n\
194 fprintf (stdout
, _("\
195 -a, --suffix-length=N use suffixes of length N (default %d)\n\
196 -b, --bytes=SIZE put SIZE bytes per output file\n\
197 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
198 -d, --numeric-suffixes use numeric suffixes instead of alphabetic\n\
199 -e, --elide-empty-files do not generate empty output files with `-n'\n\
200 --filter=COMMAND write to shell COMMAND; file name is $FILE\n\
201 -l, --lines=NUMBER put NUMBER lines per output file\n\
202 -n, --number=CHUNKS generate CHUNKS output files. See below\n\
203 -u, --unbuffered immediately copy input to output with `-n r/...'\n\
204 "), DEFAULT_SUFFIX_LENGTH
);
206 --verbose print a diagnostic just before each\n\
207 output file is opened\n\
209 fputs (HELP_OPTION_DESCRIPTION
, stdout
);
210 fputs (VERSION_OPTION_DESCRIPTION
, stdout
);
214 N split into N files based on size of input\n\
215 K/N output Kth of N to stdout\n\
216 l/N split into N files without splitting lines\n\
217 l/K/N output Kth of N to stdout without splitting lines\n\
218 r/N like `l' but use round robin distribution\n\
219 r/K/N likewise but only output Kth of N to stdout\n\
221 emit_ancillary_info ();
226 /* Compute the next sequential output file name and store it into the
230 next_file_name (void)
232 /* Index in suffix_alphabet of each character in the suffix. */
233 static size_t *sufindex
;
237 /* Allocate and initialize the first file name. */
239 size_t outbase_length
= strlen (outbase
);
240 size_t outfile_length
= outbase_length
+ suffix_length
;
241 if (outfile_length
+ 1 < outbase_length
)
243 outfile
= xmalloc (outfile_length
+ 1);
244 outfile_mid
= outfile
+ outbase_length
;
245 memcpy (outfile
, outbase
, outbase_length
);
246 memset (outfile_mid
, suffix_alphabet
[0], suffix_length
);
247 outfile
[outfile_length
] = 0;
248 sufindex
= xcalloc (suffix_length
, sizeof *sufindex
);
250 #if ! _POSIX_NO_TRUNC && HAVE_PATHCONF && defined _PC_NAME_MAX
251 /* POSIX requires that if the output file name is too long for
252 its directory, `split' must fail without creating any files.
253 This must be checked for explicitly on operating systems that
254 silently truncate file names. */
256 char *dir
= dir_name (outfile
);
257 long name_max
= pathconf (dir
, _PC_NAME_MAX
);
258 if (0 <= name_max
&& name_max
< base_len (last_component (outfile
)))
259 error (EXIT_FAILURE
, ENAMETOOLONG
, "%s", outfile
);
266 /* Increment the suffix in place, if possible. */
268 size_t i
= suffix_length
;
272 outfile_mid
[i
] = suffix_alphabet
[sufindex
[i
]];
276 outfile_mid
[i
] = suffix_alphabet
[sufindex
[i
]];
278 error (EXIT_FAILURE
, 0, _("output file suffixes exhausted"));
282 /* Create or truncate a file. */
285 create (const char *name
)
290 fprintf (stdout
, _("creating file %s\n"), quote (name
));
291 return open (name
, O_WRONLY
| O_CREAT
| O_TRUNC
| O_BINARY
,
292 (S_IRUSR
| S_IWUSR
| S_IRGRP
| S_IWGRP
| S_IROTH
| S_IWOTH
));
298 char const *shell_prog
= getenv ("SHELL");
299 if (shell_prog
== NULL
)
300 shell_prog
= "/bin/sh";
301 if (setenv ("FILE", name
, 1) != 0)
302 error (EXIT_FAILURE
, errno
,
303 _("failed to set FILE environment variable"));
305 fprintf (stdout
, _("executing with FILE=%s\n"), quote (name
));
306 if (pipe (fd_pair
) != 0)
307 error (EXIT_FAILURE
, errno
, _("failed to create pipe"));
311 /* This is the child process. If an error occurs here, the
312 parent will eventually learn about it after doing a wait,
313 at which time it will emit its own error message. */
315 /* We have to close any pipes that were opened during an
316 earlier call, otherwise this process will be holding a
317 write-pipe that will prevent the earlier process from
318 reading an EOF on the corresponding read-pipe. */
319 for (j
= 0; j
< n_open_pipes
; ++j
)
320 if (close (open_pipes
[j
]) != 0)
321 error (EXIT_FAILURE
, errno
, _("closing prior pipe"));
322 if (close (fd_pair
[1]))
323 error (EXIT_FAILURE
, errno
, _("closing output pipe"));
324 if (fd_pair
[0] != STDIN_FILENO
)
326 if (dup2 (fd_pair
[0], STDIN_FILENO
) != STDIN_FILENO
)
327 error (EXIT_FAILURE
, errno
, _("moving input pipe"));
328 if (close (fd_pair
[0]) != 0)
329 error (EXIT_FAILURE
, errno
, _("closing input pipe"));
331 sigprocmask (SIG_SETMASK
, &oldblocked
, NULL
);
332 execl (shell_prog
, last_component (shell_prog
), "-c",
333 filter_command
, (char *) NULL
);
334 error (EXIT_FAILURE
, errno
, _("failed to run command: \"%s -c %s\""),
335 shell_prog
, filter_command
);
338 error (EXIT_FAILURE
, errno
, _("fork system call failed"));
339 if (close (fd_pair
[0]) != 0)
340 error (EXIT_FAILURE
, errno
, _("failed to close input pipe"));
341 filter_pid
= child_pid
;
342 if (n_open_pipes
== open_pipes_alloc
)
343 open_pipes
= x2nrealloc (open_pipes
, &open_pipes_alloc
,
345 open_pipes
[n_open_pipes
++] = fd_pair
[1];
350 /* Close the output file, and do any associated cleanup.
351 If FP and FD are both specified, they refer to the same open file;
352 in this case FP is closed, but FD is still used in cleanup. */
354 closeout (FILE *fp
, int fd
, pid_t pid
, char const *name
)
356 if (fp
!= NULL
&& fclose (fp
) != 0 && ! ignorable (errno
))
357 error (EXIT_FAILURE
, errno
, "%s", name
);
360 if (fp
== NULL
&& close (fd
) < 0)
361 error (EXIT_FAILURE
, errno
, "%s", name
);
363 for (j
= 0; j
< n_open_pipes
; ++j
)
365 if (open_pipes
[j
] == fd
)
367 open_pipes
[j
] = open_pipes
[--n_open_pipes
];
375 if (waitpid (pid
, &wstatus
, 0) == -1 && errno
!= ECHILD
)
376 error (EXIT_FAILURE
, errno
, _("waiting for child process"));
377 if (WIFSIGNALED (wstatus
))
379 int sig
= WTERMSIG (wstatus
);
382 char signame
[MAX (SIG2STR_MAX
, INT_BUFSIZE_BOUND (int))];
383 if (sig2str (sig
, signame
) != 0)
384 sprintf (signame
, "%d", sig
);
386 _("with FILE=%s, signal %s from command: %s"),
387 name
, signame
, filter_command
);
390 else if (WIFEXITED (wstatus
))
392 int ex
= WEXITSTATUS (wstatus
);
394 error (ex
, 0, _("with FILE=%s, exit %d from command: %s"),
395 name
, ex
, filter_command
);
399 /* shouldn't happen. */
400 error (EXIT_FAILURE
, 0,
401 _("unknown status from command (0x%X)"), wstatus
);
406 /* Write BYTES bytes at BP to an output file.
407 If NEW_FILE_FLAG is true, open the next output file.
408 Otherwise add to the same output file already in use. */
411 cwrite (bool new_file_flag
, const char *bp
, size_t bytes
)
415 if (!bp
&& bytes
== 0 && elide_empty_files
)
417 closeout (NULL
, output_desc
, filter_pid
, outfile
);
419 if ((output_desc
= create (outfile
)) < 0)
420 error (EXIT_FAILURE
, errno
, "%s", outfile
);
422 if (full_write (output_desc
, bp
, bytes
) != bytes
&& ! ignorable (errno
))
423 error (EXIT_FAILURE
, errno
, "%s", outfile
);
426 /* Split into pieces of exactly N_BYTES bytes.
427 Use buffer BUF, whose size is BUFSIZE. */
430 bytes_split (uintmax_t n_bytes
, char *buf
, size_t bufsize
, uintmax_t max_files
)
433 bool new_file_flag
= true;
435 uintmax_t to_write
= n_bytes
;
437 uintmax_t opened
= 0;
441 n_read
= full_read (STDIN_FILENO
, buf
, bufsize
);
442 if (n_read
< bufsize
&& errno
)
443 error (EXIT_FAILURE
, errno
, "%s", infile
);
448 if (to_read
< to_write
)
450 if (to_read
) /* do not write 0 bytes! */
452 cwrite (new_file_flag
, bp_out
, to_read
);
453 opened
+= new_file_flag
;
455 new_file_flag
= false;
462 cwrite (new_file_flag
, bp_out
, w
);
463 opened
+= new_file_flag
;
464 new_file_flag
= !max_files
|| (opened
< max_files
);
465 if (!new_file_flag
&& ignorable (errno
))
467 /* If filter no longer accepting input, stop reading. */
477 while (n_read
== bufsize
);
479 /* Ensure NUMBER files are created, which truncates
480 any existing files or notifies any consumers on fifos.
481 FIXME: Should we do this before EXIT_FAILURE? */
482 while (opened
++ < max_files
)
483 cwrite (true, NULL
, 0);
486 /* Split into pieces of exactly N_LINES lines.
487 Use buffer BUF, whose size is BUFSIZE. */
490 lines_split (uintmax_t n_lines
, char *buf
, size_t bufsize
)
493 char *bp
, *bp_out
, *eob
;
494 bool new_file_flag
= true;
499 n_read
= full_read (STDIN_FILENO
, buf
, bufsize
);
500 if (n_read
< bufsize
&& errno
)
501 error (EXIT_FAILURE
, errno
, "%s", infile
);
507 bp
= memchr (bp
, '\n', eob
- bp
+ 1);
510 if (eob
!= bp_out
) /* do not write 0 bytes! */
512 size_t len
= eob
- bp_out
;
513 cwrite (new_file_flag
, bp_out
, len
);
514 new_file_flag
= false;
522 cwrite (new_file_flag
, bp_out
, bp
- bp_out
);
524 new_file_flag
= true;
529 while (n_read
== bufsize
);
532 /* Split into pieces that are as large as possible while still not more
533 than N_BYTES bytes, and are split on line boundaries except
534 where lines longer than N_BYTES bytes occur.
535 FIXME: Allow N_BYTES to be any uintmax_t value, and don't require a
536 buffer of size N_BYTES, in case N_BYTES is very large. */
539 line_bytes_split (size_t n_bytes
)
543 size_t n_buffered
= 0;
544 char *buf
= xmalloc (n_bytes
);
548 /* Fill up the full buffer size from the input file. */
550 size_t to_read
= n_bytes
- n_buffered
;
551 size_t n_read
= full_read (STDIN_FILENO
, buf
+ n_buffered
, to_read
);
552 if (n_read
< to_read
&& errno
)
553 error (EXIT_FAILURE
, errno
, "%s", infile
);
555 n_buffered
+= n_read
;
556 if (n_buffered
!= n_bytes
)
563 /* Find where to end this chunk. */
564 bp
= buf
+ n_buffered
;
565 if (n_buffered
== n_bytes
)
567 while (bp
> buf
&& bp
[-1] != '\n')
571 /* If chunk has no newlines, use all the chunk. */
573 bp
= buf
+ n_buffered
;
575 /* Output the chars as one output file. */
576 cwrite (true, buf
, bp
- buf
);
578 /* Discard the chars we just output; move rest of chunk
579 down to be the start of the next chunk. Source and
580 destination probably overlap. */
581 n_buffered
-= bp
- buf
;
583 memmove (buf
, bp
, n_buffered
);
589 /* -n l/[K/]N: Write lines to files of approximately file size / N.
590 The file is partitioned into file size / N sized portions, with the
591 last assigned any excess. If a line _starts_ within a partition
592 it is written completely to the corresponding file. Since lines
593 are not split even if they overlap a partition, the files written
594 can be larger or smaller than the partition size, and even empty
595 if a line is so long as to completely overlap the partition. */
598 lines_chunk_split (uintmax_t k
, uintmax_t n
, char *buf
, size_t bufsize
,
601 assert (n
&& k
<= n
&& n
<= file_size
);
603 const off_t chunk_size
= file_size
/ n
;
604 uintmax_t chunk_no
= 1;
605 off_t chunk_end
= chunk_size
- 1;
607 bool new_file_flag
= true;
608 bool chunk_truncated
= false;
612 /* Start reading 1 byte before kth chunk of file. */
613 off_t start
= (k
- 1) * chunk_size
- 1;
614 if (lseek (STDIN_FILENO
, start
, SEEK_CUR
) < 0)
615 error (EXIT_FAILURE
, errno
, "%s", infile
);
618 chunk_end
= chunk_no
* chunk_size
- 1;
621 while (n_written
< file_size
)
623 char *bp
= buf
, *eob
;
624 size_t n_read
= full_read (STDIN_FILENO
, buf
, bufsize
);
625 if (n_read
< bufsize
&& errno
)
626 error (EXIT_FAILURE
, errno
, "%s", infile
);
627 else if (n_read
== 0)
629 n_read
= MIN (n_read
, file_size
- n_written
);
630 chunk_truncated
= false;
638 /* Begin looking for '\n' at last byte of chunk. */
639 off_t skip
= MIN (n_read
, MAX (0, chunk_end
- n_written
));
640 char *bp_out
= memchr (bp
+ skip
, '\n', n_read
- skip
);
645 to_write
= bp_out
- bp
;
649 /* We don't use the stdout buffer here since we're writing
650 large chunks from an existing file, so it's more efficient
651 to write out directly. */
652 if (full_write (STDOUT_FILENO
, bp
, to_write
) != to_write
)
653 error (EXIT_FAILURE
, errno
, "%s", _("write error"));
656 cwrite (new_file_flag
, bp
, to_write
);
657 n_written
+= to_write
;
660 new_file_flag
= next
;
662 /* A line could have been so long that it skipped
663 entire chunks. So create empty files in that case. */
664 while (next
|| chunk_end
<= n_written
- 1)
666 if (!next
&& bp
== eob
)
668 /* replenish buf, before going to next chunk. */
669 chunk_truncated
= true;
673 if (k
&& chunk_no
> k
)
676 chunk_end
= file_size
- 1; /* >= chunk_size. */
678 chunk_end
+= chunk_size
;
679 if (chunk_end
<= n_written
- 1)
682 cwrite (true, NULL
, 0);
693 /* Ensure NUMBER files are created, which truncates
694 any existing files or notifies any consumers on fifos.
695 FIXME: Should we do this before EXIT_FAILURE? */
696 while (!k
&& chunk_no
++ <= n
)
697 cwrite (true, NULL
, 0);
700 /* -n K/N: Extract Kth of N chunks. */
703 bytes_chunk_extract (uintmax_t k
, uintmax_t n
, char *buf
, size_t bufsize
,
709 assert (k
&& n
&& k
<= n
&& n
<= file_size
);
711 start
= (k
- 1) * (file_size
/ n
);
712 end
= (k
== n
) ? file_size
: k
* (file_size
/ n
);
714 if (lseek (STDIN_FILENO
, start
, SEEK_CUR
) < 0)
715 error (EXIT_FAILURE
, errno
, "%s", infile
);
719 size_t n_read
= full_read (STDIN_FILENO
, buf
, bufsize
);
720 if (n_read
< bufsize
&& errno
)
721 error (EXIT_FAILURE
, errno
, "%s", infile
);
722 else if (n_read
== 0)
724 n_read
= MIN (n_read
, end
- start
);
725 if (full_write (STDOUT_FILENO
, buf
, n_read
) != n_read
726 && ! ignorable (errno
))
727 error (EXIT_FAILURE
, errno
, "%s", quote ("-"));
732 typedef struct of_info
746 /* Rotate file descriptors when we're writing to more output files than we
747 have available file descriptors.
748 Return whether we came under file resource pressure.
749 If so, it's probably best to close each file when finished with it. */
752 ofile_open (of_t
*files
, size_t i_check
, size_t nfiles
)
754 bool file_limit
= false;
756 if (files
[i_check
].ofd
<= OFD_NEW
)
759 size_t i_reopen
= i_check
? i_check
- 1 : nfiles
- 1;
761 /* Another process could have opened a file in between the calls to
762 close and open, so we should keep trying until open succeeds or
763 we've closed all of our files. */
766 if (files
[i_check
].ofd
== OFD_NEW
)
767 fd
= create (files
[i_check
].of_name
);
768 else /* OFD_APPEND */
770 /* Attempt to append to previously opened file.
771 We use O_NONBLOCK to support writing to fifos,
772 where the other end has closed because of our
773 previous close. In that case we'll immediately
774 get an error, rather than waiting indefinitely.
775 In specialised cases the consumer can keep reading
776 from the fifo, terminating on conditions in the data
777 itself, or perhaps never in the case of `tail -f`.
778 I.E. for fifos it is valid to attempt this reopen.
780 We don't handle the filter_command case here, as create()
781 will exit if there are not enough files in that case.
782 I.E. we don't support restarting filters, as that would
783 put too much burden on users specifying --filter commands. */
784 fd
= open (files
[i_check
].of_name
,
785 O_WRONLY
| O_BINARY
| O_APPEND
| O_NONBLOCK
);
791 if (!(errno
== EMFILE
|| errno
== ENFILE
))
792 error (EXIT_FAILURE
, errno
, "%s", files
[i_check
].of_name
);
796 /* Search backwards for an open file to close. */
797 while (files
[i_reopen
].ofd
< 0)
799 i_reopen
= i_reopen
? i_reopen
- 1 : nfiles
- 1;
800 /* No more open files to close, exit with E[NM]FILE. */
801 if (i_reopen
== i_check
)
802 error (EXIT_FAILURE
, errno
, "%s", files
[i_check
].of_name
);
805 if (fclose (files
[i_reopen
].ofile
) != 0)
806 error (EXIT_FAILURE
, errno
, "%s", files
[i_reopen
].of_name
);
807 files
[i_reopen
].ofile
= NULL
;
808 files
[i_reopen
].ofd
= OFD_APPEND
;
811 files
[i_check
].ofd
= fd
;
812 if (!(files
[i_check
].ofile
= fdopen (fd
, "a")))
813 error (EXIT_FAILURE
, errno
, "%s", files
[i_check
].of_name
);
814 files
[i_check
].opid
= filter_pid
;
821 /* -n r/[K/]N: Divide file into N chunks in round robin fashion.
822 When K == 0, we try to keep the files open in parallel.
823 If we run out of file resources, then we revert
824 to opening and closing each file for each line. */
827 lines_rr (uintmax_t k
, uintmax_t n
, char *buf
, size_t bufsize
)
829 bool wrapped
= false;
833 of_t
*files
IF_LINT (= NULL
);
841 error (exit_failure
, 0, "%s", _("memory exhausted"));
842 files
= xnmalloc (n
, sizeof *files
);
844 /* Generate output file names. */
845 for (i_file
= 0; i_file
< n
; i_file
++)
848 files
[i_file
].of_name
= xstrdup (outfile
);
849 files
[i_file
].ofd
= OFD_NEW
;
850 files
[i_file
].ofile
= NULL
;
851 files
[i_file
].opid
= 0;
859 char *bp
= buf
, *eob
;
860 /* Use safe_read() rather than full_read() here
861 so that we process available data immediately. */
862 size_t n_read
= safe_read (STDIN_FILENO
, buf
, bufsize
);
863 if (n_read
== SAFE_READ_ERROR
)
864 error (EXIT_FAILURE
, errno
, "%s", infile
);
865 else if (n_read
== 0)
874 /* Find end of line. */
875 char *bp_out
= memchr (bp
, '\n', eob
- bp
);
883 to_write
= bp_out
- bp
;
887 if (line_no
== k
&& unbuffered
)
889 if (full_write (STDOUT_FILENO
, bp
, to_write
) != to_write
)
890 error (EXIT_FAILURE
, errno
, "%s", _("write error"));
892 else if (line_no
== k
&& fwrite (bp
, to_write
, 1, stdout
) != 1)
894 clearerr (stdout
); /* To silence close_stdout(). */
895 error (EXIT_FAILURE
, errno
, "%s", _("write error"));
898 line_no
= (line_no
== n
) ? 1 : line_no
+ 1;
902 /* Secure file descriptor. */
903 file_limit
|= ofile_open (files
, i_file
, n
);
906 /* Note writing to fd, rather than flushing the FILE gives
907 an 8% performance benefit, due to reduced data copying. */
908 if (full_write (files
[i_file
].ofd
, bp
, to_write
) != to_write
909 && ! ignorable (errno
))
910 error (EXIT_FAILURE
, errno
, "%s", files
[i_file
].of_name
);
912 else if (fwrite (bp
, to_write
, 1, files
[i_file
].ofile
) != 1
913 && ! ignorable (errno
))
914 error (EXIT_FAILURE
, errno
, "%s", files
[i_file
].of_name
);
915 if (! ignorable (errno
))
920 if (fclose (files
[i_file
].ofile
) != 0)
921 error (EXIT_FAILURE
, errno
, "%s", files
[i_file
].of_name
);
922 files
[i_file
].ofile
= NULL
;
923 files
[i_file
].ofd
= OFD_APPEND
;
925 if (next
&& ++i_file
== n
)
928 /* If no filters are accepting input, stop reading. */
941 /* Ensure all files created, so that any existing files are truncated,
942 and to signal any waiting fifo consumers.
943 Also, close any open file descriptors.
944 FIXME: Should we do this before EXIT_FAILURE? */
947 int ceiling
= (wrapped
? n
: i_file
);
948 for (i_file
= 0; i_file
< n
; i_file
++)
950 if (i_file
>= ceiling
&& !elide_empty_files
)
951 file_limit
|= ofile_open (files
, i_file
, n
);
952 if (files
[i_file
].ofd
>= 0)
953 closeout (files
[i_file
].ofile
, files
[i_file
].ofd
,
954 files
[i_file
].opid
, files
[i_file
].of_name
);
955 files
[i_file
].ofd
= OFD_APPEND
;
960 #define FAIL_ONLY_ONE_WAY() \
963 error (0, 0, _("cannot split in more than one way")); \
964 usage (EXIT_FAILURE); \
968 /* Parse K/N syntax of chunk options. */
971 parse_chunk (uintmax_t *k_units
, uintmax_t *n_units
, char *slash
)
974 if (xstrtoumax (slash
+ 1, NULL
, 10, n_units
, "") != LONGINT_OK
976 error (EXIT_FAILURE
, 0, _("%s: invalid number of chunks"), slash
+ 1);
977 if (slash
!= optarg
/* a leading number is specified. */
978 && (xstrtoumax (optarg
, NULL
, 10, k_units
, "") != LONGINT_OK
979 || *k_units
== 0 || *n_units
< *k_units
))
980 error (EXIT_FAILURE
, 0, _("%s: invalid chunk number"), optarg
);
985 main (int argc
, char **argv
)
987 struct stat stat_buf
;
988 enum Split_type split_type
= type_undef
;
989 size_t in_blk_size
= 0; /* optimal block size of input file device */
990 char *buf
; /* file i/o buffer */
991 size_t page_size
= getpagesize ();
992 uintmax_t k_units
= 0;
995 static char const multipliers
[] = "bEGKkMmPTYZ0";
997 int digits_optind
= 0;
1000 initialize_main (&argc
, &argv
);
1001 set_program_name (argv
[0]);
1002 setlocale (LC_ALL
, "");
1003 bindtextdomain (PACKAGE
, LOCALEDIR
);
1004 textdomain (PACKAGE
);
1006 atexit (close_stdout
);
1008 /* Parse command line options. */
1010 infile
= bad_cast ("-");
1011 outbase
= bad_cast ("x");
1015 /* This is the argv-index of the option we will read next. */
1016 int this_optind
= optind
? optind
: 1;
1019 c
= getopt_long (argc
, argv
, "0123456789C:a:b:del:n:u",
1029 if (xstrtoul (optarg
, NULL
, 10, &tmp
, "") != LONGINT_OK
1030 || SIZE_MAX
/ sizeof (size_t) < tmp
)
1032 error (0, 0, _("%s: invalid suffix length"), optarg
);
1033 usage (EXIT_FAILURE
);
1035 suffix_length
= tmp
;
1040 if (split_type
!= type_undef
)
1041 FAIL_ONLY_ONE_WAY ();
1042 split_type
= type_bytes
;
1043 if (xstrtoumax (optarg
, NULL
, 10, &n_units
, multipliers
) != LONGINT_OK
1046 error (0, 0, _("%s: invalid number of bytes"), optarg
);
1047 usage (EXIT_FAILURE
);
1049 /* If input is a pipe, we could get more data than is possible
1050 to write to a single file, so indicate that immediately
1051 rather than having possibly future invocations fail. */
1052 if (OFF_T_MAX
< n_units
)
1053 error (EXIT_FAILURE
, EFBIG
,
1054 _("%s: invalid number of bytes"), optarg
);
1059 if (split_type
!= type_undef
)
1060 FAIL_ONLY_ONE_WAY ();
1061 split_type
= type_lines
;
1062 if (xstrtoumax (optarg
, NULL
, 10, &n_units
, "") != LONGINT_OK
1065 error (0, 0, _("%s: invalid number of lines"), optarg
);
1066 usage (EXIT_FAILURE
);
1071 if (split_type
!= type_undef
)
1072 FAIL_ONLY_ONE_WAY ();
1073 split_type
= type_byteslines
;
1074 if (xstrtoumax (optarg
, NULL
, 10, &n_units
, multipliers
) != LONGINT_OK
1075 || n_units
== 0 || SIZE_MAX
< n_units
)
1077 error (0, 0, _("%s: invalid number of bytes"), optarg
);
1078 usage (EXIT_FAILURE
);
1080 if (OFF_T_MAX
< n_units
)
1081 error (EXIT_FAILURE
, EFBIG
,
1082 _("%s: invalid number of bytes"), optarg
);
1086 if (split_type
!= type_undef
)
1087 FAIL_ONLY_ONE_WAY ();
1088 /* skip any whitespace */
1089 while (isspace (to_uchar (*optarg
)))
1091 if (STRNCMP_LIT (optarg
, "r/") == 0)
1093 split_type
= type_rr
;
1096 else if (STRNCMP_LIT (optarg
, "l/") == 0)
1098 split_type
= type_chunk_lines
;
1102 split_type
= type_chunk_bytes
;
1103 if ((slash
= strchr (optarg
, '/')))
1104 parse_chunk (&k_units
, &n_units
, slash
);
1105 else if (xstrtoumax (optarg
, NULL
, 10, &n_units
, "") != LONGINT_OK
1107 error (EXIT_FAILURE
, 0, _("%s: invalid number of chunks"), optarg
);
1124 if (split_type
== type_undef
)
1126 split_type
= type_digits
;
1129 if (split_type
!= type_undef
&& split_type
!= type_digits
)
1130 FAIL_ONLY_ONE_WAY ();
1131 if (digits_optind
!= 0 && digits_optind
!= this_optind
)
1132 n_units
= 0; /* More than one number given; ignore other. */
1133 digits_optind
= this_optind
;
1134 if (!DECIMAL_DIGIT_ACCUMULATE (n_units
, c
- '0', uintmax_t))
1136 char buffer
[INT_BUFSIZE_BOUND (uintmax_t)];
1137 error (EXIT_FAILURE
, 0,
1138 _("line count option -%s%c... is too large"),
1139 umaxtostr (n_units
, buffer
), c
);
1144 suffix_alphabet
= "0123456789";
1148 elide_empty_files
= true;
1152 filter_command
= optarg
;
1155 case IO_BLKSIZE_OPTION
:
1157 uintmax_t tmp_blk_size
;
1158 if (xstrtoumax (optarg
, NULL
, 10, &tmp_blk_size
,
1159 multipliers
) != LONGINT_OK
1160 || tmp_blk_size
== 0 || SIZE_MAX
- page_size
< tmp_blk_size
)
1161 error (0, 0, _("%s: invalid IO block size"), optarg
);
1163 in_blk_size
= tmp_blk_size
;
1167 case VERBOSE_OPTION
:
1171 case_GETOPT_HELP_CHAR
;
1173 case_GETOPT_VERSION_CHAR (PROGRAM_NAME
, AUTHORS
);
1176 usage (EXIT_FAILURE
);
1180 if (k_units
!= 0 && filter_command
)
1182 error (0, 0, _("--filter does not process a chunk extracted to stdout"));
1183 usage (EXIT_FAILURE
);
1186 /* Handle default case. */
1187 if (split_type
== type_undef
)
1189 split_type
= type_lines
;
1195 error (0, 0, _("%s: invalid number of lines"), "0");
1196 usage (EXIT_FAILURE
);
1199 set_suffix_length (n_units
, split_type
);
1201 /* Get out the filename arguments. */
1204 infile
= argv
[optind
++];
1207 outbase
= argv
[optind
++];
1211 error (0, 0, _("extra operand %s"), quote (argv
[optind
]));
1212 usage (EXIT_FAILURE
);
1215 /* Open the input file. */
1216 if (! STREQ (infile
, "-")
1217 && fd_reopen (STDIN_FILENO
, infile
, O_RDONLY
, 0) < 0)
1218 error (EXIT_FAILURE
, errno
, _("cannot open %s for reading"),
1221 /* Binary I/O is safer when byte counts are used. */
1222 if (O_BINARY
&& ! isatty (STDIN_FILENO
))
1223 xfreopen (NULL
, "rb", stdin
);
1225 /* Get the optimal block size of input device and make a buffer. */
1227 if (fstat (STDIN_FILENO
, &stat_buf
) != 0)
1228 error (EXIT_FAILURE
, errno
, "%s", infile
);
1229 if (in_blk_size
== 0)
1230 in_blk_size
= io_blksize (stat_buf
);
1231 file_size
= stat_buf
.st_size
;
1233 if (split_type
== type_chunk_bytes
|| split_type
== type_chunk_lines
)
1235 off_t input_offset
= lseek (STDIN_FILENO
, 0, SEEK_CUR
);
1236 if (input_offset
< 0)
1237 error (EXIT_FAILURE
, 0, _("%s: cannot determine file size"),
1239 file_size
-= input_offset
;
1240 /* Overflow, and sanity checking. */
1241 if (OFF_T_MAX
< n_units
)
1243 char buffer
[INT_BUFSIZE_BOUND (uintmax_t)];
1244 error (EXIT_FAILURE
, EFBIG
, _("%s: invalid number of chunks"),
1245 umaxtostr (n_units
, buffer
));
1247 /* increase file_size to n_units here, so that we still process
1248 any input data, and create empty files for the rest. */
1249 file_size
= MAX (file_size
, n_units
);
1252 buf
= ptr_align (xmalloc (in_blk_size
+ 1 + page_size
- 1), page_size
);
1254 /* When filtering, closure of one pipe must not terminate the process,
1255 as there may still be other streams expecting input from us. */
1258 struct sigaction act
;
1259 sigemptyset (&newblocked
);
1260 sigaction (SIGPIPE
, NULL
, &act
);
1261 if (act
.sa_handler
!= SIG_IGN
)
1262 sigaddset (&newblocked
, SIGPIPE
);
1263 sigprocmask (SIG_BLOCK
, &newblocked
, &oldblocked
);
1270 lines_split (n_units
, buf
, in_blk_size
);
1274 bytes_split (n_units
, buf
, in_blk_size
, 0);
1277 case type_byteslines
:
1278 line_bytes_split (n_units
);
1281 case type_chunk_bytes
:
1283 bytes_split (file_size
/ n_units
, buf
, in_blk_size
, n_units
);
1285 bytes_chunk_extract (k_units
, n_units
, buf
, in_blk_size
, file_size
);
1288 case type_chunk_lines
:
1289 lines_chunk_split (k_units
, n_units
, buf
, in_blk_size
, file_size
);
1293 /* Note, this is like `sed -n ${k}~${n}p` when k > 0,
1294 but the functionality is provided for symmetry. */
1295 lines_rr (k_units
, n_units
, buf
, in_blk_size
);
1302 if (close (STDIN_FILENO
) != 0)
1303 error (EXIT_FAILURE
, errno
, "%s", infile
);
1304 closeout (NULL
, output_desc
, filter_pid
, outfile
);
1306 exit (EXIT_SUCCESS
);