split: support split -n on larger pipe input
[coreutils.git] / src / split.c
blobd872ec56a251b71fb347b545e010676d66e040d3
1 /* split.c -- split a file into pieces.
2 Copyright (C) 1988-2023 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* By tege@sics.se, with rms.
19 TODO:
20 * support -p REGEX as in BSD's split.
21 * support --suppress-matched as in csplit. */
22 #include <config.h>
24 #include <assert.h>
25 #include <stdio.h>
26 #include <getopt.h>
27 #include <signal.h>
28 #include <sys/types.h>
29 #include <sys/wait.h>
31 #include "system.h"
32 #include "alignalloc.h"
33 #include "die.h"
34 #include "error.h"
35 #include "fd-reopen.h"
36 #include "fcntl--.h"
37 #include "full-write.h"
38 #include "idx.h"
39 #include "ioblksize.h"
40 #include "quote.h"
41 #include "sig2str.h"
42 #include "sys-limits.h"
43 #include "xbinary-io.h"
44 #include "xdectoint.h"
45 #include "xstrtol.h"
47 /* The official name of this program (e.g., no 'g' prefix). */
48 #define PROGRAM_NAME "split"
50 #define AUTHORS \
51 proper_name ("Torbjorn Granlund"), \
52 proper_name ("Richard M. Stallman")
54 /* Shell command to filter through, instead of creating files. */
55 static char const *filter_command;
57 /* Process ID of the filter. */
58 static pid_t filter_pid;
60 /* Array of open pipes. */
61 static int *open_pipes;
62 static idx_t open_pipes_alloc;
63 static int n_open_pipes;
65 /* Whether SIGPIPE has the default action, when --filter is used. */
66 static bool default_SIGPIPE;
68 /* Base name of output files. */
69 static char const *outbase;
71 /* Name of output files. */
72 static char *outfile;
74 /* Pointer to the end of the prefix in OUTFILE.
75 Suffixes are inserted here. */
76 static char *outfile_mid;
78 /* Generate new suffix when suffixes are exhausted. */
79 static bool suffix_auto = true;
81 /* Length of OUTFILE's suffix. */
82 static idx_t suffix_length;
84 /* Alphabet of characters to use in suffix. */
85 static char const *suffix_alphabet = "abcdefghijklmnopqrstuvwxyz";
87 /* Numerical suffix start value. */
88 static char const *numeric_suffix_start;
90 /* Additional suffix to append to output file names. */
91 static char const *additional_suffix;
93 /* Name of input file. May be "-". */
94 static char *infile;
96 /* stat buf for input file. */
97 static struct stat in_stat_buf;
99 /* Descriptor on which output file is open. */
100 static int output_desc = -1;
102 /* If true, print a diagnostic on standard error just before each
103 output file is opened. */
104 static bool verbose;
106 /* If true, don't generate zero length output files. */
107 static bool elide_empty_files;
109 /* If true, in round robin mode, immediately copy
110 input to output, which is much slower, so disabled by default. */
111 static bool unbuffered;
113 /* The character marking end of line. Defaults to \n below. */
114 static int eolchar = -1;
116 /* The split mode to use. */
117 enum Split_type
119 type_undef, type_bytes, type_byteslines, type_lines, type_digits,
120 type_chunk_bytes, type_chunk_lines, type_rr
123 /* For long options that have no equivalent short option, use a
124 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
125 enum
127 VERBOSE_OPTION = CHAR_MAX + 1,
128 FILTER_OPTION,
129 IO_BLKSIZE_OPTION,
130 ADDITIONAL_SUFFIX_OPTION
133 static struct option const longopts[] =
135 {"bytes", required_argument, NULL, 'b'},
136 {"lines", required_argument, NULL, 'l'},
137 {"line-bytes", required_argument, NULL, 'C'},
138 {"number", required_argument, NULL, 'n'},
139 {"elide-empty-files", no_argument, NULL, 'e'},
140 {"unbuffered", no_argument, NULL, 'u'},
141 {"suffix-length", required_argument, NULL, 'a'},
142 {"additional-suffix", required_argument, NULL,
143 ADDITIONAL_SUFFIX_OPTION},
144 {"numeric-suffixes", optional_argument, NULL, 'd'},
145 {"hex-suffixes", optional_argument, NULL, 'x'},
146 {"filter", required_argument, NULL, FILTER_OPTION},
147 {"verbose", no_argument, NULL, VERBOSE_OPTION},
148 {"separator", required_argument, NULL, 't'},
149 {"-io-blksize", required_argument, NULL,
150 IO_BLKSIZE_OPTION}, /* do not document */
151 {GETOPT_HELP_OPTION_DECL},
152 {GETOPT_VERSION_OPTION_DECL},
153 {NULL, 0, NULL, 0}
156 /* Return true if the errno value, ERR, is ignorable. */
157 static inline bool
158 ignorable (int err)
160 return filter_command && err == EPIPE;
163 static void
164 set_suffix_length (intmax_t n_units, enum Split_type split_type)
166 #define DEFAULT_SUFFIX_LENGTH 2
168 int suffix_length_needed = 0;
170 /* The suffix auto length feature is incompatible with
171 a user specified start value as the generated suffixes
172 are not all consecutive. */
173 if (numeric_suffix_start)
174 suffix_auto = false;
176 /* Auto-calculate the suffix length if the number of files is given. */
177 if (split_type == type_chunk_bytes || split_type == type_chunk_lines
178 || split_type == type_rr)
180 intmax_t n_units_end = n_units - 1;
181 if (numeric_suffix_start)
183 intmax_t n_start;
184 strtol_error e = xstrtoimax (numeric_suffix_start, NULL, 10,
185 &n_start, "");
186 if (e == LONGINT_OK && n_start < n_units)
188 /* Restrict auto adjustment so we don't keep
189 incrementing a suffix size arbitrarily,
190 as that would break sort order for files
191 generated from multiple split runs. */
192 if (INT_ADD_WRAPV (n_units_end, n_start, &n_units_end))
193 n_units_end = INTMAX_MAX;
197 idx_t alphabet_len = strlen (suffix_alphabet);
199 suffix_length_needed++;
200 while (n_units_end /= alphabet_len);
202 suffix_auto = false;
205 if (suffix_length) /* set by user */
207 if (suffix_length < suffix_length_needed)
209 die (EXIT_FAILURE, 0,
210 _("the suffix length needs to be at least %d"),
211 suffix_length_needed);
213 suffix_auto = false;
214 return;
216 else
217 suffix_length = MAX (DEFAULT_SUFFIX_LENGTH, suffix_length_needed);
220 void
221 usage (int status)
223 if (status != EXIT_SUCCESS)
224 emit_try_help ();
225 else
227 printf (_("\
228 Usage: %s [OPTION]... [FILE [PREFIX]]\n\
230 program_name);
231 fputs (_("\
232 Output pieces of FILE to PREFIXaa, PREFIXab, ...;\n\
233 default size is 1000 lines, and default PREFIX is 'x'.\n\
234 "), stdout);
236 emit_stdin_note ();
237 emit_mandatory_arg_note ();
239 fprintf (stdout, _("\
240 -a, --suffix-length=N generate suffixes of length N (default %d)\n\
241 --additional-suffix=SUFFIX append an additional SUFFIX to file names\n\
242 -b, --bytes=SIZE put SIZE bytes per output file\n\
243 -C, --line-bytes=SIZE put at most SIZE bytes of records per output file\n\
244 -d use numeric suffixes starting at 0, not alphabetic\n\
245 --numeric-suffixes[=FROM] same as -d, but allow setting the start value\
247 -x use hex suffixes starting at 0, not alphabetic\n\
248 --hex-suffixes[=FROM] same as -x, but allow setting the start value\n\
249 -e, --elide-empty-files do not generate empty output files with '-n'\n\
250 --filter=COMMAND write to shell COMMAND; file name is $FILE\n\
251 -l, --lines=NUMBER put NUMBER lines/records per output file\n\
252 -n, --number=CHUNKS generate CHUNKS output files; see explanation below\n\
253 -t, --separator=SEP use SEP instead of newline as the record separator;\n\
254 '\\0' (zero) specifies the NUL character\n\
255 -u, --unbuffered immediately copy input to output with '-n r/...'\n\
256 "), DEFAULT_SUFFIX_LENGTH);
257 fputs (_("\
258 --verbose print a diagnostic just before each\n\
259 output file is opened\n\
260 "), stdout);
261 fputs (HELP_OPTION_DESCRIPTION, stdout);
262 fputs (VERSION_OPTION_DESCRIPTION, stdout);
263 emit_size_note ();
264 fputs (_("\n\
265 CHUNKS may be:\n\
266 N split into N files based on size of input\n\
267 K/N output Kth of N to stdout\n\
268 l/N split into N files without splitting lines/records\n\
269 l/K/N output Kth of N to stdout without splitting lines/records\n\
270 r/N like 'l' but use round robin distribution\n\
271 r/K/N likewise but only output Kth of N to stdout\n\
272 "), stdout);
273 emit_ancillary_info (PROGRAM_NAME);
275 exit (status);
278 /* Copy the data in FD to a temporary file, then make that file FD.
279 Use BUF, of size BUFSIZE, to copy. Return the number of
280 bytes copied, or -1 (setting errno) on error. */
281 static off_t
282 copy_to_tmpfile (int fd, char *buf, idx_t bufsize)
284 FILE *tmp = tmpfile ();
285 if (!tmp)
286 return -1;
287 off_t copied = 0;
288 off_t r;
290 while (0 < (r = read (fd, buf, bufsize)))
292 if (fwrite (buf, 1, r, tmp) != r)
293 return -1;
294 if (INT_ADD_WRAPV (copied, r, &copied))
296 errno = EOVERFLOW;
297 return -1;
301 if (r < 0)
302 return r;
303 r = dup2 (fileno (tmp), fd);
304 if (r < 0)
305 return r;
306 if (fclose (tmp) < 0)
307 return -1;
308 return copied;
311 /* Return the number of bytes that can be read from FD with status ST.
312 Store up to the first BUFSIZE bytes of the file's data into BUF,
313 and advance the file position by the number of bytes read. On
314 input error, set errno and return -1. */
316 static off_t
317 input_file_size (int fd, struct stat const *st, char *buf, idx_t bufsize)
319 off_t size = 0;
322 ssize_t n_read = read (fd, buf + size, bufsize - size);
323 if (n_read <= 0)
324 return n_read < 0 ? n_read : size;
325 size += n_read;
327 while (size < bufsize);
329 off_t cur, end;
330 if ((usable_st_size (st) && st->st_size < size)
331 || (cur = lseek (fd, 0, SEEK_CUR)) < 0
332 || cur < size /* E.g., /dev/zero on GNU/Linux. */
333 || (end = lseek (fd, 0, SEEK_END)) < 0)
335 char *tmpbuf = xmalloc (bufsize);
336 end = copy_to_tmpfile (fd, tmpbuf, bufsize);
337 free (tmpbuf);
338 if (end < 0)
339 return end;
340 cur = 0;
343 if (end == OFF_T_MAX /* E.g., /dev/zero on GNU/Hurd. */
344 || (cur < end && INT_ADD_WRAPV (size, end - cur, &size)))
346 errno = EOVERFLOW;
347 return -1;
350 if (cur < end)
352 off_t r = lseek (fd, cur, SEEK_SET);
353 if (r < 0)
354 return r;
357 return size;
360 /* Compute the next sequential output file name and store it into the
361 string 'outfile'. */
363 static void
364 next_file_name (void)
366 /* Index in suffix_alphabet of each character in the suffix. */
367 static idx_t *sufindex;
368 static idx_t outbase_length;
369 static idx_t outfile_length;
370 static idx_t addsuf_length;
372 if (! outfile)
374 bool overflow, widen;
376 new_name:
377 widen = !! outfile_length;
379 if (! widen)
381 /* Allocate and initialize the first file name. */
383 outbase_length = strlen (outbase);
384 addsuf_length = additional_suffix ? strlen (additional_suffix) : 0;
385 overflow = INT_ADD_WRAPV (outbase_length + addsuf_length,
386 suffix_length, &outfile_length);
388 else
390 /* Reallocate and initialize a new wider file name.
391 We do this by subsuming the unchanging part of
392 the generated suffix into the prefix (base), and
393 reinitializing the now one longer suffix. */
395 overflow = INT_ADD_WRAPV (outfile_length, 2, &outfile_length);
396 suffix_length++;
399 idx_t outfile_size;
400 overflow |= INT_ADD_WRAPV (outfile_length, 1, &outfile_size);
401 if (overflow)
402 xalloc_die ();
403 outfile = xirealloc (outfile, outfile_size);
405 if (! widen)
406 memcpy (outfile, outbase, outbase_length);
407 else
409 /* Append the last alphabet character to the file name prefix. */
410 outfile[outbase_length] = suffix_alphabet[sufindex[0]];
411 outbase_length++;
414 outfile_mid = outfile + outbase_length;
415 memset (outfile_mid, suffix_alphabet[0], suffix_length);
416 if (additional_suffix)
417 memcpy (outfile_mid + suffix_length, additional_suffix, addsuf_length);
418 outfile[outfile_length] = 0;
420 free (sufindex);
421 sufindex = xicalloc (suffix_length, sizeof *sufindex);
423 if (numeric_suffix_start)
425 assert (! widen);
427 /* Update the output file name. */
428 idx_t i = strlen (numeric_suffix_start);
429 memcpy (outfile_mid + suffix_length - i, numeric_suffix_start, i);
431 /* Update the suffix index. */
432 idx_t *sufindex_end = sufindex + suffix_length;
433 while (i-- != 0)
434 *--sufindex_end = numeric_suffix_start[i] - '0';
437 #if ! _POSIX_NO_TRUNC && HAVE_PATHCONF && defined _PC_NAME_MAX
438 /* POSIX requires that if the output file name is too long for
439 its directory, 'split' must fail without creating any files.
440 This must be checked for explicitly on operating systems that
441 silently truncate file names. */
443 char *dir = dir_name (outfile);
444 long name_max = pathconf (dir, _PC_NAME_MAX);
445 if (0 <= name_max && name_max < base_len (last_component (outfile)))
446 die (EXIT_FAILURE, ENAMETOOLONG, "%s", quotef (outfile));
447 free (dir);
449 #endif
451 else
453 /* Increment the suffix in place, if possible. */
455 idx_t i = suffix_length;
456 while (i-- != 0)
458 sufindex[i]++;
459 if (suffix_auto && i == 0 && ! suffix_alphabet[sufindex[0] + 1])
460 goto new_name;
461 outfile_mid[i] = suffix_alphabet[sufindex[i]];
462 if (outfile_mid[i])
463 return;
464 sufindex[i] = 0;
465 outfile_mid[i] = suffix_alphabet[sufindex[i]];
467 die (EXIT_FAILURE, 0, _("output file suffixes exhausted"));
471 /* Create or truncate a file. */
473 static int
474 create (char const *name)
476 if (!filter_command)
478 if (verbose)
479 fprintf (stdout, _("creating file %s\n"), quoteaf (name));
481 int oflags = O_WRONLY | O_CREAT | O_BINARY;
482 int fd = open (name, oflags | O_EXCL, MODE_RW_UGO);
483 if (0 <= fd || errno != EEXIST)
484 return fd;
485 fd = open (name, oflags, MODE_RW_UGO);
486 if (fd < 0)
487 return fd;
488 struct stat out_stat_buf;
489 if (fstat (fd, &out_stat_buf) != 0)
490 die (EXIT_FAILURE, errno, _("failed to stat %s"), quoteaf (name));
491 if (SAME_INODE (in_stat_buf, out_stat_buf))
492 die (EXIT_FAILURE, 0, _("%s would overwrite input; aborting"),
493 quoteaf (name));
494 bool regularish
495 = S_ISREG (out_stat_buf.st_mode) || S_TYPEISSHM (&out_stat_buf);
496 if (! (regularish && out_stat_buf.st_size == 0)
497 && ftruncate (fd, 0) < 0 && regularish)
498 die (EXIT_FAILURE, errno, _("%s: error truncating"), quotef (name));
500 return fd;
502 else
504 int fd_pair[2];
505 pid_t child_pid;
506 char const *shell_prog = getenv ("SHELL");
507 if (shell_prog == NULL)
508 shell_prog = "/bin/sh";
509 if (setenv ("FILE", name, 1) != 0)
510 die (EXIT_FAILURE, errno,
511 _("failed to set FILE environment variable"));
512 if (verbose)
513 fprintf (stdout, _("executing with FILE=%s\n"), quotef (name));
514 if (pipe (fd_pair) != 0)
515 die (EXIT_FAILURE, errno, _("failed to create pipe"));
516 child_pid = fork ();
517 if (child_pid == 0)
519 /* This is the child process. If an error occurs here, the
520 parent will eventually learn about it after doing a wait,
521 at which time it will emit its own error message. */
522 int j;
523 /* We have to close any pipes that were opened during an
524 earlier call, otherwise this process will be holding a
525 write-pipe that will prevent the earlier process from
526 reading an EOF on the corresponding read-pipe. */
527 for (j = 0; j < n_open_pipes; ++j)
528 if (close (open_pipes[j]) != 0)
529 die (EXIT_FAILURE, errno, _("closing prior pipe"));
530 if (close (fd_pair[1]))
531 die (EXIT_FAILURE, errno, _("closing output pipe"));
532 if (fd_pair[0] != STDIN_FILENO)
534 if (dup2 (fd_pair[0], STDIN_FILENO) != STDIN_FILENO)
535 die (EXIT_FAILURE, errno, _("moving input pipe"));
536 if (close (fd_pair[0]) != 0)
537 die (EXIT_FAILURE, errno, _("closing input pipe"));
539 if (default_SIGPIPE)
540 signal (SIGPIPE, SIG_DFL);
541 execl (shell_prog, last_component (shell_prog), "-c",
542 filter_command, (char *) NULL);
543 die (EXIT_FAILURE, errno, _("failed to run command: \"%s -c %s\""),
544 shell_prog, filter_command);
546 if (child_pid < 0)
547 die (EXIT_FAILURE, errno, _("fork system call failed"));
548 if (close (fd_pair[0]) != 0)
549 die (EXIT_FAILURE, errno, _("failed to close input pipe"));
550 filter_pid = child_pid;
551 if (n_open_pipes == open_pipes_alloc)
552 open_pipes = xpalloc (open_pipes, &open_pipes_alloc, 1,
553 MIN (INT_MAX, IDX_MAX), sizeof *open_pipes);
554 open_pipes[n_open_pipes++] = fd_pair[1];
555 return fd_pair[1];
559 /* Close the output file, and do any associated cleanup.
560 If FP and FD are both specified, they refer to the same open file;
561 in this case FP is closed, but FD is still used in cleanup. */
562 static void
563 closeout (FILE *fp, int fd, pid_t pid, char const *name)
565 if (fp != NULL && fclose (fp) != 0 && ! ignorable (errno))
566 die (EXIT_FAILURE, errno, "%s", quotef (name));
567 if (fd >= 0)
569 if (fp == NULL && close (fd) < 0)
570 die (EXIT_FAILURE, errno, "%s", quotef (name));
571 int j;
572 for (j = 0; j < n_open_pipes; ++j)
574 if (open_pipes[j] == fd)
576 open_pipes[j] = open_pipes[--n_open_pipes];
577 break;
581 if (pid > 0)
583 int wstatus;
584 if (waitpid (pid, &wstatus, 0) < 0)
585 die (EXIT_FAILURE, errno, _("waiting for child process"));
586 else if (WIFSIGNALED (wstatus))
588 int sig = WTERMSIG (wstatus);
589 if (sig != SIGPIPE)
591 char signame[MAX (SIG2STR_MAX, INT_BUFSIZE_BOUND (int))];
592 if (sig2str (sig, signame) != 0)
593 sprintf (signame, "%d", sig);
594 error (sig + 128, 0,
595 _("with FILE=%s, signal %s from command: %s"),
596 quotef (name), signame, filter_command);
599 else if (WIFEXITED (wstatus))
601 int ex = WEXITSTATUS (wstatus);
602 if (ex != 0)
603 error (ex, 0, _("with FILE=%s, exit %d from command: %s"),
604 quotef (name), ex, filter_command);
606 else
608 /* shouldn't happen. */
609 die (EXIT_FAILURE, 0,
610 _("unknown status from command (0x%X)"), wstatus + 0u);
615 /* Write BYTES bytes at BP to an output file.
616 If NEW_FILE_FLAG is true, open the next output file.
617 Otherwise add to the same output file already in use.
618 Return true if successful. */
620 static bool
621 cwrite (bool new_file_flag, char const *bp, idx_t bytes)
623 if (new_file_flag)
625 if (!bp && bytes == 0 && elide_empty_files)
626 return true;
627 closeout (NULL, output_desc, filter_pid, outfile);
628 next_file_name ();
629 output_desc = create (outfile);
630 if (output_desc < 0)
631 die (EXIT_FAILURE, errno, "%s", quotef (outfile));
634 if (full_write (output_desc, bp, bytes) == bytes)
635 return true;
636 else
638 if (! ignorable (errno))
639 die (EXIT_FAILURE, errno, "%s", quotef (outfile));
640 return false;
644 /* Split into pieces of exactly N_BYTES bytes.
645 However, the first REM_BYTES pieces should be 1 byte longer.
646 Use buffer BUF, whose size is BUFSIZE.
647 If INITIAL_READ is nonnegative,
648 BUF contains the first INITIAL_READ input bytes. */
650 static void
651 bytes_split (intmax_t n_bytes, intmax_t rem_bytes,
652 char *buf, idx_t bufsize, ssize_t initial_read,
653 intmax_t max_files)
655 bool new_file_flag = true;
656 bool filter_ok = true;
657 intmax_t opened = 0;
658 intmax_t to_write = n_bytes + (0 < rem_bytes);
659 bool eof = ! to_write;
661 while (! eof)
663 ssize_t n_read;
664 if (0 <= initial_read)
666 n_read = initial_read;
667 initial_read = -1;
668 eof = n_read < bufsize;
670 else
672 if (! filter_ok
673 && 0 <= lseek (STDIN_FILENO, to_write, SEEK_CUR))
675 to_write = n_bytes + (opened + 1 < rem_bytes);
676 new_file_flag = true;
679 n_read = read (STDIN_FILENO, buf, bufsize);
680 if (n_read < 0)
681 die (EXIT_FAILURE, errno, "%s", quotef (infile));
682 eof = n_read == 0;
684 char *bp_out = buf;
685 while (0 < to_write && to_write <= n_read)
687 if (filter_ok || new_file_flag)
688 filter_ok = cwrite (new_file_flag, bp_out, to_write);
689 opened += new_file_flag;
690 new_file_flag = !max_files || (opened < max_files);
691 if (! filter_ok && ! new_file_flag)
693 /* If filters no longer accepting input, stop reading. */
694 n_read = 0;
695 eof = true;
696 break;
698 bp_out += to_write;
699 n_read -= to_write;
700 to_write = n_bytes + (opened < rem_bytes);
702 if (0 < n_read)
704 if (filter_ok || new_file_flag)
705 filter_ok = cwrite (new_file_flag, bp_out, n_read);
706 opened += new_file_flag;
707 new_file_flag = false;
708 if (! filter_ok && opened == max_files)
710 /* If filters no longer accepting input, stop reading. */
711 break;
713 to_write -= n_read;
717 /* Ensure NUMBER files are created, which truncates
718 any existing files or notifies any consumers on fifos.
719 FIXME: Should we do this before EXIT_FAILURE? */
720 while (opened++ < max_files)
721 cwrite (true, NULL, 0);
724 /* Split into pieces of exactly N_LINES lines.
725 Use buffer BUF, whose size is BUFSIZE. */
727 static void
728 lines_split (intmax_t n_lines, char *buf, idx_t bufsize)
730 ssize_t n_read;
731 char *bp, *bp_out, *eob;
732 bool new_file_flag = true;
733 intmax_t n = 0;
737 n_read = read (STDIN_FILENO, buf, bufsize);
738 if (n_read < 0)
739 die (EXIT_FAILURE, errno, "%s", quotef (infile));
740 bp = bp_out = buf;
741 eob = bp + n_read;
742 *eob = eolchar;
743 while (true)
745 bp = rawmemchr (bp, eolchar);
746 if (bp == eob)
748 if (eob != bp_out) /* do not write 0 bytes! */
750 idx_t len = eob - bp_out;
751 cwrite (new_file_flag, bp_out, len);
752 new_file_flag = false;
754 break;
757 ++bp;
758 if (++n >= n_lines)
760 cwrite (new_file_flag, bp_out, bp - bp_out);
761 bp_out = bp;
762 new_file_flag = true;
763 n = 0;
767 while (n_read);
770 /* Split into pieces that are as large as possible while still not more
771 than N_BYTES bytes, and are split on line boundaries except
772 where lines longer than N_BYTES bytes occur. */
774 static void
775 line_bytes_split (intmax_t n_bytes, char *buf, idx_t bufsize)
777 ssize_t n_read;
778 intmax_t n_out = 0; /* for each split. */
779 idx_t n_hold = 0;
780 char *hold = NULL; /* for lines > bufsize. */
781 idx_t hold_size = 0;
782 bool split_line = false; /* Whether a \n was output in a split. */
786 n_read = read (STDIN_FILENO, buf, bufsize);
787 if (n_read < 0)
788 die (EXIT_FAILURE, errno, "%s", quotef (infile));
789 idx_t n_left = n_read;
790 char *sob = buf;
791 while (n_left)
793 idx_t split_rest = 0;
794 char *eoc = NULL;
795 char *eol;
797 /* Determine End Of Chunk and/or End of Line,
798 which are used below to select what to write or buffer. */
799 if (n_bytes - n_out - n_hold <= n_left)
801 /* Have enough for split. */
802 split_rest = n_bytes - n_out - n_hold;
803 eoc = sob + split_rest - 1;
804 eol = memrchr (sob, eolchar, split_rest);
806 else
807 eol = memrchr (sob, eolchar, n_left);
809 /* Output hold space if possible. */
810 if (n_hold && !(!eol && n_out))
812 cwrite (n_out == 0, hold, n_hold);
813 n_out += n_hold;
814 if (n_hold > bufsize)
815 hold = xirealloc (hold, bufsize);
816 n_hold = 0;
817 hold_size = bufsize;
820 /* Output to eol if present. */
821 if (eol)
823 split_line = true;
824 idx_t n_write = eol - sob + 1;
825 cwrite (n_out == 0, sob, n_write);
826 n_out += n_write;
827 n_left -= n_write;
828 sob += n_write;
829 if (eoc)
830 split_rest -= n_write;
833 /* Output to eoc or eob if possible. */
834 if (n_left && !split_line)
836 idx_t n_write = eoc ? split_rest : n_left;
837 cwrite (n_out == 0, sob, n_write);
838 n_out += n_write;
839 n_left -= n_write;
840 sob += n_write;
841 if (eoc)
842 split_rest -= n_write;
845 /* Update hold if needed. */
846 if ((eoc && split_rest) || (!eoc && n_left))
848 idx_t n_buf = eoc ? split_rest : n_left;
849 if (hold_size - n_hold < n_buf)
850 hold = xpalloc (hold, &hold_size, n_buf - (hold_size - n_hold),
851 -1, sizeof *hold);
852 memcpy (hold + n_hold, sob, n_buf);
853 n_hold += n_buf;
854 n_left -= n_buf;
855 sob += n_buf;
858 /* Reset for new split. */
859 if (eoc)
861 n_out = 0;
862 split_line = false;
866 while (n_read);
868 /* Handle no eol at end of file. */
869 if (n_hold)
870 cwrite (n_out == 0, hold, n_hold);
872 free (hold);
875 /* -n l/[K/]N: Write lines to files of approximately file size / N.
876 The file is partitioned into file size / N sized portions, with the
877 last assigned any excess. If a line _starts_ within a partition
878 it is written completely to the corresponding file. Since lines
879 are not split even if they overlap a partition, the files written
880 can be larger or smaller than the partition size, and even empty
881 if a line is so long as to completely overlap the partition. */
883 static void
884 lines_chunk_split (intmax_t k, intmax_t n, char *buf, idx_t bufsize,
885 ssize_t initial_read, off_t file_size)
887 assert (n && k <= n);
889 intmax_t rem_bytes = file_size % n;
890 off_t chunk_size = file_size / n;
891 intmax_t chunk_no = 1;
892 off_t chunk_end = chunk_size + (0 < rem_bytes);
893 off_t n_written = 0;
894 bool new_file_flag = true;
895 bool chunk_truncated = false;
897 if (k > 1 && 0 < file_size)
899 /* Start reading 1 byte before kth chunk of file. */
900 off_t start = (k - 1) * chunk_size + MIN (k - 1, rem_bytes) - 1;
901 if (start < initial_read)
903 memmove (buf, buf + start, initial_read - start);
904 initial_read -= start;
906 else
908 if (initial_read < start
909 && lseek (STDIN_FILENO, start - initial_read, SEEK_CUR) < 0)
910 die (EXIT_FAILURE, errno, "%s", quotef (infile));
911 initial_read = -1;
913 n_written = start;
914 chunk_no = k - 1;
915 chunk_end = start + 1;
918 while (n_written < file_size)
920 char *bp = buf, *eob;
921 ssize_t n_read;
922 if (0 <= initial_read)
924 n_read = initial_read;
925 initial_read = -1;
927 else
929 n_read = read (STDIN_FILENO, buf,
930 MIN (bufsize, file_size - n_written));
931 if (n_read < 0)
932 die (EXIT_FAILURE, errno, "%s", quotef (infile));
934 if (n_read == 0)
935 break; /* eof. */
936 chunk_truncated = false;
937 eob = buf + n_read;
939 while (bp != eob)
941 idx_t to_write;
942 bool next = false;
944 /* Begin looking for '\n' at last byte of chunk. */
945 off_t skip = MIN (n_read, MAX (0, chunk_end - 1 - n_written));
946 char *bp_out = memchr (bp + skip, eolchar, n_read - skip);
947 if (bp_out)
949 bp_out++;
950 next = true;
952 else
953 bp_out = eob;
954 to_write = bp_out - bp;
956 if (k == chunk_no)
958 /* We don't use the stdout buffer here since we're writing
959 large chunks from an existing file, so it's more efficient
960 to write out directly. */
961 if (full_write (STDOUT_FILENO, bp, to_write) != to_write)
962 die (EXIT_FAILURE, errno, "%s", _("write error"));
964 else if (! k)
965 cwrite (new_file_flag, bp, to_write);
966 n_written += to_write;
967 bp += to_write;
968 n_read -= to_write;
969 new_file_flag = next;
971 /* A line could have been so long that it skipped
972 entire chunks. So create empty files in that case. */
973 while (next || chunk_end <= n_written)
975 if (!next && bp == eob)
977 /* replenish buf, before going to next chunk. */
978 chunk_truncated = true;
979 break;
981 if (k == chunk_no)
982 return;
983 chunk_end += chunk_size + (chunk_no < rem_bytes);
984 chunk_no++;
985 if (chunk_end <= n_written)
987 if (! k)
988 cwrite (true, NULL, 0);
990 else
991 next = false;
996 if (chunk_truncated)
997 chunk_no++;
999 /* Ensure NUMBER files are created, which truncates
1000 any existing files or notifies any consumers on fifos.
1001 FIXME: Should we do this before EXIT_FAILURE? */
1002 if (!k)
1003 while (chunk_no++ <= n)
1004 cwrite (true, NULL, 0);
1007 /* -n K/N: Extract Kth of N chunks. */
1009 static void
1010 bytes_chunk_extract (intmax_t k, intmax_t n, char *buf, idx_t bufsize,
1011 ssize_t initial_read, off_t file_size)
1013 off_t start;
1014 off_t end;
1016 assert (0 < k && k <= n);
1018 start = (k - 1) * (file_size / n) + MIN (k - 1, file_size % n);
1019 end = k == n ? file_size : k * (file_size / n) + MIN (k, file_size % n);
1021 if (start < initial_read)
1023 memmove (buf, buf + start, initial_read - start);
1024 initial_read -= start;
1026 else
1028 if (initial_read < start
1029 && lseek (STDIN_FILENO, start - initial_read, SEEK_CUR) < 0)
1030 die (EXIT_FAILURE, errno, "%s", quotef (infile));
1031 initial_read = -1;
1034 while (start < end)
1036 ssize_t n_read;
1037 if (0 <= initial_read)
1039 n_read = initial_read;
1040 initial_read = -1;
1042 else
1044 n_read = read (STDIN_FILENO, buf, bufsize);
1045 if (n_read < 0)
1046 die (EXIT_FAILURE, errno, "%s", quotef (infile));
1048 if (n_read == 0)
1049 break; /* eof. */
1050 n_read = MIN (n_read, end - start);
1051 if (full_write (STDOUT_FILENO, buf, n_read) != n_read
1052 && ! ignorable (errno))
1053 die (EXIT_FAILURE, errno, "%s", quotef ("-"));
1054 start += n_read;
1058 typedef struct of_info
1060 char *of_name;
1061 int ofd;
1062 FILE *ofile;
1063 pid_t opid;
1064 } of_t;
1066 enum
1068 OFD_NEW = -1,
1069 OFD_APPEND = -2
1072 /* Rotate file descriptors when we're writing to more output files than we
1073 have available file descriptors.
1074 Return whether we came under file resource pressure.
1075 If so, it's probably best to close each file when finished with it. */
1077 static bool
1078 ofile_open (of_t *files, idx_t i_check, idx_t nfiles)
1080 bool file_limit = false;
1082 if (files[i_check].ofd <= OFD_NEW)
1084 int fd;
1085 idx_t i_reopen = i_check ? i_check - 1 : nfiles - 1;
1087 /* Another process could have opened a file in between the calls to
1088 close and open, so we should keep trying until open succeeds or
1089 we've closed all of our files. */
1090 while (true)
1092 if (files[i_check].ofd == OFD_NEW)
1093 fd = create (files[i_check].of_name);
1094 else /* OFD_APPEND */
1096 /* Attempt to append to previously opened file.
1097 We use O_NONBLOCK to support writing to fifos,
1098 where the other end has closed because of our
1099 previous close. In that case we'll immediately
1100 get an error, rather than waiting indefinitely.
1101 In specialised cases the consumer can keep reading
1102 from the fifo, terminating on conditions in the data
1103 itself, or perhaps never in the case of 'tail -f'.
1104 I.e., for fifos it is valid to attempt this reopen.
1106 We don't handle the filter_command case here, as create()
1107 will exit if there are not enough files in that case.
1108 I.e., we don't support restarting filters, as that would
1109 put too much burden on users specifying --filter commands. */
1110 fd = open (files[i_check].of_name,
1111 O_WRONLY | O_BINARY | O_APPEND | O_NONBLOCK);
1114 if (0 <= fd)
1115 break;
1117 if (!(errno == EMFILE || errno == ENFILE))
1118 die (EXIT_FAILURE, errno, "%s", quotef (files[i_check].of_name));
1120 file_limit = true;
1122 /* Search backwards for an open file to close. */
1123 while (files[i_reopen].ofd < 0)
1125 i_reopen = i_reopen ? i_reopen - 1 : nfiles - 1;
1126 /* No more open files to close, exit with E[NM]FILE. */
1127 if (i_reopen == i_check)
1128 die (EXIT_FAILURE, errno, "%s",
1129 quotef (files[i_check].of_name));
1132 if (fclose (files[i_reopen].ofile) != 0)
1133 die (EXIT_FAILURE, errno, "%s", quotef (files[i_reopen].of_name));
1134 files[i_reopen].ofile = NULL;
1135 files[i_reopen].ofd = OFD_APPEND;
1138 files[i_check].ofd = fd;
1139 FILE *ofile = fdopen (fd, "a");
1140 if (!ofile)
1141 die (EXIT_FAILURE, errno, "%s", quotef (files[i_check].of_name));
1142 files[i_check].ofile = ofile;
1143 files[i_check].opid = filter_pid;
1144 filter_pid = 0;
1147 return file_limit;
1150 /* -n r/[K/]N: Divide file into N chunks in round robin fashion.
1151 Use BUF of size BUFSIZE for the buffer, and if allocating storage
1152 put its address into *FILESP to pacify -fsanitize=leak.
1153 When K == 0, we try to keep the files open in parallel.
1154 If we run out of file resources, then we revert
1155 to opening and closing each file for each line. */
1157 static void
1158 lines_rr (intmax_t k, intmax_t n, char *buf, idx_t bufsize, of_t **filesp)
1160 bool wrapped = false;
1161 bool wrote = false;
1162 bool file_limit;
1163 idx_t i_file;
1164 of_t *files IF_LINT (= NULL);
1165 intmax_t line_no;
1167 if (k)
1168 line_no = 1;
1169 else
1171 if (IDX_MAX < n)
1172 xalloc_die ();
1173 files = *filesp = xinmalloc (n, sizeof *files);
1175 /* Generate output file names. */
1176 for (i_file = 0; i_file < n; i_file++)
1178 next_file_name ();
1179 files[i_file].of_name = xstrdup (outfile);
1180 files[i_file].ofd = OFD_NEW;
1181 files[i_file].ofile = NULL;
1182 files[i_file].opid = 0;
1184 i_file = 0;
1185 file_limit = false;
1188 while (true)
1190 char *bp = buf, *eob;
1191 ssize_t n_read = read (STDIN_FILENO, buf, bufsize);
1192 if (n_read < 0)
1193 die (EXIT_FAILURE, errno, "%s", quotef (infile));
1194 else if (n_read == 0)
1195 break; /* eof. */
1196 eob = buf + n_read;
1198 while (bp != eob)
1200 idx_t to_write;
1201 bool next = false;
1203 /* Find end of line. */
1204 char *bp_out = memchr (bp, eolchar, eob - bp);
1205 if (bp_out)
1207 bp_out++;
1208 next = true;
1210 else
1211 bp_out = eob;
1212 to_write = bp_out - bp;
1214 if (k)
1216 if (line_no == k && unbuffered)
1218 if (full_write (STDOUT_FILENO, bp, to_write) != to_write)
1219 die (EXIT_FAILURE, errno, "%s", _("write error"));
1221 else if (line_no == k && fwrite (bp, to_write, 1, stdout) != 1)
1223 clearerr (stdout); /* To silence close_stdout(). */
1224 die (EXIT_FAILURE, errno, "%s", _("write error"));
1226 if (next)
1227 line_no = (line_no == n) ? 1 : line_no + 1;
1229 else
1231 /* Secure file descriptor. */
1232 file_limit |= ofile_open (files, i_file, n);
1233 if (unbuffered)
1235 /* Note writing to fd, rather than flushing the FILE gives
1236 an 8% performance benefit, due to reduced data copying. */
1237 if (full_write (files[i_file].ofd, bp, to_write) != to_write
1238 && ! ignorable (errno))
1240 die (EXIT_FAILURE, errno, "%s",
1241 quotef (files[i_file].of_name));
1244 else if (fwrite (bp, to_write, 1, files[i_file].ofile) != 1
1245 && ! ignorable (errno))
1247 die (EXIT_FAILURE, errno, "%s",
1248 quotef (files[i_file].of_name));
1251 if (! ignorable (errno))
1252 wrote = true;
1254 if (file_limit)
1256 if (fclose (files[i_file].ofile) != 0)
1258 die (EXIT_FAILURE, errno, "%s",
1259 quotef (files[i_file].of_name));
1261 files[i_file].ofile = NULL;
1262 files[i_file].ofd = OFD_APPEND;
1264 if (next && ++i_file == n)
1266 wrapped = true;
1267 /* If no filters are accepting input, stop reading. */
1268 if (! wrote)
1269 goto no_filters;
1270 wrote = false;
1271 i_file = 0;
1275 bp = bp_out;
1279 no_filters:
1280 /* Ensure all files created, so that any existing files are truncated,
1281 and to signal any waiting fifo consumers.
1282 Also, close any open file descriptors.
1283 FIXME: Should we do this before EXIT_FAILURE? */
1284 if (!k)
1286 idx_t ceiling = wrapped ? n : i_file;
1287 for (i_file = 0; i_file < n; i_file++)
1289 if (i_file >= ceiling && !elide_empty_files)
1290 file_limit |= ofile_open (files, i_file, n);
1291 if (files[i_file].ofd >= 0)
1292 closeout (files[i_file].ofile, files[i_file].ofd,
1293 files[i_file].opid, files[i_file].of_name);
1294 files[i_file].ofd = OFD_APPEND;
1299 #define FAIL_ONLY_ONE_WAY() \
1300 do \
1302 error (0, 0, _("cannot split in more than one way")); \
1303 usage (EXIT_FAILURE); \
1305 while (0)
1307 /* Report a string-to-integer conversion failure MSGID with ARG. */
1309 static _Noreturn void
1310 strtoint_die (char const *msgid, char const *arg)
1312 die (EXIT_FAILURE, errno == EINVAL ? 0 : errno, "%s: %s",
1313 gettext (msgid), quote (arg));
1316 /* Use OVERFLOW_OK when it is OK to ignore LONGINT_OVERFLOW errors, since the
1317 extreme value will do the right thing anyway on any practical platform. */
1318 #define OVERFLOW_OK LONGINT_OVERFLOW
1320 /* Parse ARG for number of bytes or lines. The number can be followed
1321 by MULTIPLIERS, and the resulting value must be positive.
1322 If the number cannot be parsed, diagnose with MSG.
1323 Return the number parsed, or an INTMAX_MAX on overflow. */
1325 static intmax_t
1326 parse_n_units (char const *arg, char const *multipliers, char const *msgid)
1328 intmax_t n;
1329 if (OVERFLOW_OK < xstrtoimax (arg, NULL, 10, &n, multipliers) || n < 1)
1330 strtoint_die (msgid, arg);
1331 return n;
1334 /* Parse K/N syntax of chunk options. */
1336 static void
1337 parse_chunk (intmax_t *k_units, intmax_t *n_units, char const *arg)
1339 char *argend;
1340 strtol_error e = xstrtoimax (arg, &argend, 10, n_units, "");
1341 if (e == LONGINT_INVALID_SUFFIX_CHAR && *argend == '/')
1343 *k_units = *n_units;
1344 *n_units = parse_n_units (argend + 1, "",
1345 N_("invalid number of chunks"));
1346 if (! (0 < *k_units && *k_units <= *n_units))
1347 die (EXIT_FAILURE, 0, "%s: %s", _("invalid chunk number"),
1348 quote_mem (arg, argend - arg));
1350 else if (! (e <= OVERFLOW_OK && 0 < *n_units))
1351 strtoint_die (N_("invalid number of chunks"), arg);
1356 main (int argc, char **argv)
1358 enum Split_type split_type = type_undef;
1359 idx_t in_blk_size = 0; /* optimal block size of input file device */
1360 idx_t page_size = getpagesize ();
1361 intmax_t k_units = 0;
1362 intmax_t n_units = 0;
1364 static char const multipliers[] = "bEGKkMmPQRTYZ0";
1365 int c;
1366 int digits_optind = 0;
1367 off_t file_size = OFF_T_MAX;
1369 initialize_main (&argc, &argv);
1370 set_program_name (argv[0]);
1371 setlocale (LC_ALL, "");
1372 bindtextdomain (PACKAGE, LOCALEDIR);
1373 textdomain (PACKAGE);
1375 atexit (close_stdout);
1377 /* Parse command line options. */
1379 infile = bad_cast ("-");
1380 outbase = bad_cast ("x");
1382 while (true)
1384 /* This is the argv-index of the option we will read next. */
1385 int this_optind = optind ? optind : 1;
1387 c = getopt_long (argc, argv, "0123456789C:a:b:del:n:t:ux",
1388 longopts, NULL);
1389 if (c == -1)
1390 break;
1392 switch (c)
1394 case 'a':
1395 suffix_length = xdectoimax (optarg, 0, IDX_MAX,
1396 "", _("invalid suffix length"), 0);
1397 break;
1399 case ADDITIONAL_SUFFIX_OPTION:
1400 if (last_component (optarg) != optarg)
1402 error (0, 0,
1403 _("invalid suffix %s, contains directory separator"),
1404 quote (optarg));
1405 usage (EXIT_FAILURE);
1407 additional_suffix = optarg;
1408 break;
1410 case 'b':
1411 if (split_type != type_undef)
1412 FAIL_ONLY_ONE_WAY ();
1413 split_type = type_bytes;
1414 n_units = parse_n_units (optarg, multipliers,
1415 N_("invalid number of bytes"));
1416 break;
1418 case 'l':
1419 if (split_type != type_undef)
1420 FAIL_ONLY_ONE_WAY ();
1421 split_type = type_lines;
1422 n_units = parse_n_units (optarg, "", N_("invalid number of lines"));
1423 break;
1425 case 'C':
1426 if (split_type != type_undef)
1427 FAIL_ONLY_ONE_WAY ();
1428 split_type = type_byteslines;
1429 n_units = parse_n_units (optarg, multipliers,
1430 N_("invalid number of lines"));
1431 break;
1433 case 'n':
1434 if (split_type != type_undef)
1435 FAIL_ONLY_ONE_WAY ();
1436 /* skip any whitespace */
1437 while (isspace (to_uchar (*optarg)))
1438 optarg++;
1439 if (STRNCMP_LIT (optarg, "r/") == 0)
1441 split_type = type_rr;
1442 optarg += 2;
1444 else if (STRNCMP_LIT (optarg, "l/") == 0)
1446 split_type = type_chunk_lines;
1447 optarg += 2;
1449 else
1450 split_type = type_chunk_bytes;
1451 parse_chunk (&k_units, &n_units, optarg);
1452 break;
1454 case 'u':
1455 unbuffered = true;
1456 break;
1458 case 't':
1460 char neweol = optarg[0];
1461 if (! neweol)
1462 die (EXIT_FAILURE, 0, _("empty record separator"));
1463 if (optarg[1])
1465 if (STREQ (optarg, "\\0"))
1466 neweol = '\0';
1467 else
1469 /* Provoke with 'split -txx'. Complain about
1470 "multi-character tab" instead of "multibyte tab", so
1471 that the diagnostic's wording does not need to be
1472 changed once multibyte characters are supported. */
1473 die (EXIT_FAILURE, 0, _("multi-character separator %s"),
1474 quote (optarg));
1477 /* Make it explicit we don't support multiple separators. */
1478 if (0 <= eolchar && neweol != eolchar)
1480 die (EXIT_FAILURE, 0,
1481 _("multiple separator characters specified"));
1484 eolchar = neweol;
1486 break;
1488 case '0':
1489 case '1':
1490 case '2':
1491 case '3':
1492 case '4':
1493 case '5':
1494 case '6':
1495 case '7':
1496 case '8':
1497 case '9':
1498 if (split_type == type_undef)
1500 split_type = type_digits;
1501 n_units = 0;
1503 if (split_type != type_undef && split_type != type_digits)
1504 FAIL_ONLY_ONE_WAY ();
1505 if (digits_optind != 0 && digits_optind != this_optind)
1506 n_units = 0; /* More than one number given; ignore other. */
1507 digits_optind = this_optind;
1508 if (INT_MULTIPLY_WRAPV (n_units, 10, &n_units)
1509 || INT_ADD_WRAPV (n_units, c - '0', &n_units))
1510 n_units = INTMAX_MAX;
1511 break;
1513 case 'd':
1514 case 'x':
1515 if (c == 'd')
1516 suffix_alphabet = "0123456789";
1517 else
1518 suffix_alphabet = "0123456789abcdef";
1519 if (optarg)
1521 if (strlen (optarg) != strspn (optarg, suffix_alphabet))
1523 error (0, 0,
1524 (c == 'd') ?
1525 _("%s: invalid start value for numerical suffix") :
1526 _("%s: invalid start value for hexadecimal suffix"),
1527 quote (optarg));
1528 usage (EXIT_FAILURE);
1530 else
1532 /* Skip any leading zero. */
1533 while (*optarg == '0' && *(optarg + 1) != '\0')
1534 optarg++;
1535 numeric_suffix_start = optarg;
1538 break;
1540 case 'e':
1541 elide_empty_files = true;
1542 break;
1544 case FILTER_OPTION:
1545 filter_command = optarg;
1546 break;
1548 case IO_BLKSIZE_OPTION:
1549 in_blk_size = xdectoumax (optarg, 1,
1550 MIN (SYS_BUFSIZE_MAX,
1551 MIN (IDX_MAX, SIZE_MAX) - 1),
1552 multipliers, _("invalid IO block size"), 0);
1553 break;
1555 case VERBOSE_OPTION:
1556 verbose = true;
1557 break;
1559 case_GETOPT_HELP_CHAR;
1561 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1563 default:
1564 usage (EXIT_FAILURE);
1568 if (k_units != 0 && filter_command)
1570 error (0, 0, _("--filter does not process a chunk extracted to stdout"));
1571 usage (EXIT_FAILURE);
1574 /* Handle default case. */
1575 if (split_type == type_undef)
1577 split_type = type_lines;
1578 n_units = 1000;
1581 if (n_units == 0)
1583 error (0, 0, _("invalid number of lines: %s"), quote ("0"));
1584 usage (EXIT_FAILURE);
1587 if (eolchar < 0)
1588 eolchar = '\n';
1590 set_suffix_length (n_units, split_type);
1592 /* Get out the filename arguments. */
1594 if (optind < argc)
1595 infile = argv[optind++];
1597 if (optind < argc)
1598 outbase = argv[optind++];
1600 if (optind < argc)
1602 error (0, 0, _("extra operand %s"), quote (argv[optind]));
1603 usage (EXIT_FAILURE);
1606 /* Check that the suffix length is large enough for the numerical
1607 suffix start value. */
1608 if (numeric_suffix_start && strlen (numeric_suffix_start) > suffix_length)
1610 error (0, 0, _("numerical suffix start value is too large "
1611 "for the suffix length"));
1612 usage (EXIT_FAILURE);
1615 /* Open the input file. */
1616 if (! STREQ (infile, "-")
1617 && fd_reopen (STDIN_FILENO, infile, O_RDONLY, 0) < 0)
1618 die (EXIT_FAILURE, errno, _("cannot open %s for reading"),
1619 quoteaf (infile));
1621 /* Binary I/O is safer when byte counts are used. */
1622 xset_binary_mode (STDIN_FILENO, O_BINARY);
1624 /* Get the optimal block size of input device and make a buffer. */
1626 if (fstat (STDIN_FILENO, &in_stat_buf) != 0)
1627 die (EXIT_FAILURE, errno, "%s", quotef (infile));
1629 if (in_blk_size == 0)
1631 in_blk_size = io_blksize (in_stat_buf);
1632 if (SYS_BUFSIZE_MAX < in_blk_size)
1633 in_blk_size = SYS_BUFSIZE_MAX;
1636 char *buf = xalignalloc (page_size, in_blk_size + 1);
1637 ssize_t initial_read = -1;
1639 if (split_type == type_chunk_bytes || split_type == type_chunk_lines)
1641 file_size = input_file_size (STDIN_FILENO, &in_stat_buf,
1642 buf, in_blk_size);
1643 if (file_size < 0)
1644 die (EXIT_FAILURE, errno, _("%s: cannot determine file size"),
1645 quotef (infile));
1646 initial_read = MIN (file_size, in_blk_size);
1649 /* When filtering, closure of one pipe must not terminate the process,
1650 as there may still be other streams expecting input from us. */
1651 if (filter_command)
1652 default_SIGPIPE = signal (SIGPIPE, SIG_IGN) == SIG_DFL;
1654 switch (split_type)
1656 case type_digits:
1657 case type_lines:
1658 lines_split (n_units, buf, in_blk_size);
1659 break;
1661 case type_bytes:
1662 bytes_split (n_units, 0, buf, in_blk_size, -1, 0);
1663 break;
1665 case type_byteslines:
1666 line_bytes_split (n_units, buf, in_blk_size);
1667 break;
1669 case type_chunk_bytes:
1670 if (k_units == 0)
1671 bytes_split (file_size / n_units, file_size % n_units,
1672 buf, in_blk_size, initial_read, n_units);
1673 else
1674 bytes_chunk_extract (k_units, n_units, buf, in_blk_size, initial_read,
1675 file_size);
1676 break;
1678 case type_chunk_lines:
1679 lines_chunk_split (k_units, n_units, buf, in_blk_size, initial_read,
1680 file_size);
1681 break;
1683 case type_rr:
1684 /* Note, this is like 'sed -n ${k}~${n}p' when k > 0,
1685 but the functionality is provided for symmetry. */
1687 of_t *files;
1688 lines_rr (k_units, n_units, buf, in_blk_size, &files);
1690 break;
1692 default:
1693 abort ();
1696 if (close (STDIN_FILENO) != 0)
1697 die (EXIT_FAILURE, errno, "%s", quotef (infile));
1698 closeout (NULL, output_desc, filter_pid, outfile);
1700 main_exit (EXIT_SUCCESS);