split: port ‘split -n N /dev/null’ better to macOS
[coreutils.git] / src / split.c
blob424ca9fe0b6216b3e2c6b8323e6aa35fac0cb808
1 /* split.c -- split a file into pieces.
2 Copyright (C) 1988-2023 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* By tege@sics.se, with rms.
19 TODO:
20 * support -p REGEX as in BSD's split.
21 * support --suppress-matched as in csplit. */
22 #include <config.h>
24 #include <assert.h>
25 #include <stdio.h>
26 #include <getopt.h>
27 #include <signal.h>
28 #include <sys/types.h>
29 #include <sys/wait.h>
31 #include "system.h"
32 #include "alignalloc.h"
33 #include "die.h"
34 #include "error.h"
35 #include "fd-reopen.h"
36 #include "fcntl--.h"
37 #include "full-write.h"
38 #include "ioblksize.h"
39 #include "quote.h"
40 #include "safe-read.h"
41 #include "sig2str.h"
42 #include "xbinary-io.h"
43 #include "xdectoint.h"
44 #include "xstrtol.h"
46 /* The official name of this program (e.g., no 'g' prefix). */
47 #define PROGRAM_NAME "split"
49 #define AUTHORS \
50 proper_name ("Torbjorn Granlund"), \
51 proper_name ("Richard M. Stallman")
53 /* Shell command to filter through, instead of creating files. */
54 static char const *filter_command;
56 /* Process ID of the filter. */
57 static int filter_pid;
59 /* Array of open pipes. */
60 static int *open_pipes;
61 static size_t open_pipes_alloc;
62 static size_t n_open_pipes;
64 /* Blocked signals. */
65 static sigset_t oldblocked;
66 static sigset_t newblocked;
68 /* Base name of output files. */
69 static char const *outbase;
71 /* Name of output files. */
72 static char *outfile;
74 /* Pointer to the end of the prefix in OUTFILE.
75 Suffixes are inserted here. */
76 static char *outfile_mid;
78 /* Generate new suffix when suffixes are exhausted. */
79 static bool suffix_auto = true;
81 /* Length of OUTFILE's suffix. */
82 static size_t suffix_length;
84 /* Alphabet of characters to use in suffix. */
85 static char const *suffix_alphabet = "abcdefghijklmnopqrstuvwxyz";
87 /* Numerical suffix start value. */
88 static char const *numeric_suffix_start;
90 /* Additional suffix to append to output file names. */
91 static char const *additional_suffix;
93 /* Name of input file. May be "-". */
94 static char *infile;
96 /* stat buf for input file. */
97 static struct stat in_stat_buf;
99 /* Descriptor on which output file is open. */
100 static int output_desc = -1;
102 /* If true, print a diagnostic on standard error just before each
103 output file is opened. */
104 static bool verbose;
106 /* If true, don't generate zero length output files. */
107 static bool elide_empty_files;
109 /* If true, in round robin mode, immediately copy
110 input to output, which is much slower, so disabled by default. */
111 static bool unbuffered;
113 /* The character marking end of line. Defaults to \n below. */
114 static int eolchar = -1;
116 /* The split mode to use. */
117 enum Split_type
119 type_undef, type_bytes, type_byteslines, type_lines, type_digits,
120 type_chunk_bytes, type_chunk_lines, type_rr
123 /* For long options that have no equivalent short option, use a
124 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
125 enum
127 VERBOSE_OPTION = CHAR_MAX + 1,
128 FILTER_OPTION,
129 IO_BLKSIZE_OPTION,
130 ADDITIONAL_SUFFIX_OPTION
133 static struct option const longopts[] =
135 {"bytes", required_argument, NULL, 'b'},
136 {"lines", required_argument, NULL, 'l'},
137 {"line-bytes", required_argument, NULL, 'C'},
138 {"number", required_argument, NULL, 'n'},
139 {"elide-empty-files", no_argument, NULL, 'e'},
140 {"unbuffered", no_argument, NULL, 'u'},
141 {"suffix-length", required_argument, NULL, 'a'},
142 {"additional-suffix", required_argument, NULL,
143 ADDITIONAL_SUFFIX_OPTION},
144 {"numeric-suffixes", optional_argument, NULL, 'd'},
145 {"hex-suffixes", optional_argument, NULL, 'x'},
146 {"filter", required_argument, NULL, FILTER_OPTION},
147 {"verbose", no_argument, NULL, VERBOSE_OPTION},
148 {"separator", required_argument, NULL, 't'},
149 {"-io-blksize", required_argument, NULL,
150 IO_BLKSIZE_OPTION}, /* do not document */
151 {GETOPT_HELP_OPTION_DECL},
152 {GETOPT_VERSION_OPTION_DECL},
153 {NULL, 0, NULL, 0}
156 /* Return true if the errno value, ERR, is ignorable. */
157 static inline bool
158 ignorable (int err)
160 return filter_command && err == EPIPE;
163 static void
164 set_suffix_length (uintmax_t n_units, enum Split_type split_type)
166 #define DEFAULT_SUFFIX_LENGTH 2
168 uintmax_t suffix_length_needed = 0;
170 /* The suffix auto length feature is incompatible with
171 a user specified start value as the generated suffixes
172 are not all consecutive. */
173 if (numeric_suffix_start)
174 suffix_auto = false;
176 /* Auto-calculate the suffix length if the number of files is given. */
177 if (split_type == type_chunk_bytes || split_type == type_chunk_lines
178 || split_type == type_rr)
180 uintmax_t n_units_end = n_units - 1;
181 if (numeric_suffix_start)
183 uintmax_t n_start;
184 strtol_error e = xstrtoumax (numeric_suffix_start, NULL, 10,
185 &n_start, "");
186 if (e == LONGINT_OK && n_start <= UINTMAX_MAX - n_units)
188 /* Restrict auto adjustment so we don't keep
189 incrementing a suffix size arbitrarily,
190 as that would break sort order for files
191 generated from multiple split runs. */
192 if (n_start < n_units)
193 n_units_end += n_start;
197 size_t alphabet_len = strlen (suffix_alphabet);
199 suffix_length_needed++;
200 while (n_units_end /= alphabet_len);
202 suffix_auto = false;
205 if (suffix_length) /* set by user */
207 if (suffix_length < suffix_length_needed)
209 die (EXIT_FAILURE, 0,
210 _("the suffix length needs to be at least %"PRIuMAX),
211 suffix_length_needed);
213 suffix_auto = false;
214 return;
216 else
217 suffix_length = MAX (DEFAULT_SUFFIX_LENGTH, suffix_length_needed);
220 void
221 usage (int status)
223 if (status != EXIT_SUCCESS)
224 emit_try_help ();
225 else
227 printf (_("\
228 Usage: %s [OPTION]... [FILE [PREFIX]]\n\
230 program_name);
231 fputs (_("\
232 Output pieces of FILE to PREFIXaa, PREFIXab, ...;\n\
233 default size is 1000 lines, and default PREFIX is 'x'.\n\
234 "), stdout);
236 emit_stdin_note ();
237 emit_mandatory_arg_note ();
239 fprintf (stdout, _("\
240 -a, --suffix-length=N generate suffixes of length N (default %d)\n\
241 --additional-suffix=SUFFIX append an additional SUFFIX to file names\n\
242 -b, --bytes=SIZE put SIZE bytes per output file\n\
243 -C, --line-bytes=SIZE put at most SIZE bytes of records per output file\n\
244 -d use numeric suffixes starting at 0, not alphabetic\n\
245 --numeric-suffixes[=FROM] same as -d, but allow setting the start value\
247 -x use hex suffixes starting at 0, not alphabetic\n\
248 --hex-suffixes[=FROM] same as -x, but allow setting the start value\n\
249 -e, --elide-empty-files do not generate empty output files with '-n'\n\
250 --filter=COMMAND write to shell COMMAND; file name is $FILE\n\
251 -l, --lines=NUMBER put NUMBER lines/records per output file\n\
252 -n, --number=CHUNKS generate CHUNKS output files; see explanation below\n\
253 -t, --separator=SEP use SEP instead of newline as the record separator;\n\
254 '\\0' (zero) specifies the NUL character\n\
255 -u, --unbuffered immediately copy input to output with '-n r/...'\n\
256 "), DEFAULT_SUFFIX_LENGTH);
257 fputs (_("\
258 --verbose print a diagnostic just before each\n\
259 output file is opened\n\
260 "), stdout);
261 fputs (HELP_OPTION_DESCRIPTION, stdout);
262 fputs (VERSION_OPTION_DESCRIPTION, stdout);
263 emit_size_note ();
264 fputs (_("\n\
265 CHUNKS may be:\n\
266 N split into N files based on size of input\n\
267 K/N output Kth of N to stdout\n\
268 l/N split into N files without splitting lines/records\n\
269 l/K/N output Kth of N to stdout without splitting lines/records\n\
270 r/N like 'l' but use round robin distribution\n\
271 r/K/N likewise but only output Kth of N to stdout\n\
272 "), stdout);
273 emit_ancillary_info (PROGRAM_NAME);
275 exit (status);
278 /* Return the number of bytes that can be read from FD with status ST.
279 Store up to the first BUFSIZE bytes of the file's data into BUF,
280 and advance the file position by the number of bytes read. On
281 input error, set errno and return -1. */
283 static off_t
284 input_file_size (int fd, struct stat const *st, char *buf, size_t bufsize)
286 off_t size = 0;
289 size_t n_read = safe_read (fd, buf + size, bufsize - size);
290 if (n_read == 0)
291 return size;
292 if (n_read == SAFE_READ_ERROR)
293 return -1;
294 size += n_read;
296 while (size < bufsize);
298 off_t cur = lseek (fd, 0, SEEK_CUR);
299 if (cur < 0)
301 if (errno == ESPIPE)
302 errno = 0; /* Suppress confusing seek error. */
303 return cur;
306 off_t end;
307 if (usable_st_size (st))
308 end = st->st_size;
309 else
311 end = lseek (fd, 0, SEEK_END);
312 if (end < 0)
313 return end;
314 if (end == OFF_T_MAX)
315 goto overflow; /* E.g., /dev/zero on GNU/Hurd. */
316 if (cur < end)
318 off_t cur1 = lseek (fd, cur, SEEK_SET);
319 if (cur1 < 0)
320 return cur1;
324 /* Report overflow if we filled the buffer from a file with more
325 bytes than stat or lseek reports. This can happen with mutating
326 (e.g., /proc) files that are larger than the input block size.
327 FIXME: Handle this properly, e.g., by copying the growing file's
328 data into the first output file, and then splitting that output
329 file (which should not grow) into the other output files. */
330 if (end < size)
331 goto overflow;
333 if (cur < end && INT_ADD_WRAPV (size, end - cur, &size))
334 goto overflow;
336 return size;
338 overflow:
339 errno = EOVERFLOW;
340 return -1;
343 /* Compute the next sequential output file name and store it into the
344 string 'outfile'. */
346 static void
347 next_file_name (void)
349 /* Index in suffix_alphabet of each character in the suffix. */
350 static size_t *sufindex;
351 static size_t outbase_length;
352 static size_t outfile_length;
353 static size_t addsuf_length;
355 if (! outfile)
357 bool widen;
359 new_name:
360 widen = !! outfile_length;
362 if (! widen)
364 /* Allocate and initialize the first file name. */
366 outbase_length = strlen (outbase);
367 addsuf_length = additional_suffix ? strlen (additional_suffix) : 0;
368 outfile_length = outbase_length + suffix_length + addsuf_length;
370 else
372 /* Reallocate and initialize a new wider file name.
373 We do this by subsuming the unchanging part of
374 the generated suffix into the prefix (base), and
375 reinitializing the now one longer suffix. */
377 outfile_length += 2;
378 suffix_length++;
381 if (outfile_length + 1 < outbase_length)
382 xalloc_die ();
383 outfile = xrealloc (outfile, outfile_length + 1);
385 if (! widen)
386 memcpy (outfile, outbase, outbase_length);
387 else
389 /* Append the last alphabet character to the file name prefix. */
390 outfile[outbase_length] = suffix_alphabet[sufindex[0]];
391 outbase_length++;
394 outfile_mid = outfile + outbase_length;
395 memset (outfile_mid, suffix_alphabet[0], suffix_length);
396 if (additional_suffix)
397 memcpy (outfile_mid + suffix_length, additional_suffix, addsuf_length);
398 outfile[outfile_length] = 0;
400 free (sufindex);
401 sufindex = xcalloc (suffix_length, sizeof *sufindex);
403 if (numeric_suffix_start)
405 assert (! widen);
407 /* Update the output file name. */
408 size_t i = strlen (numeric_suffix_start);
409 memcpy (outfile_mid + suffix_length - i, numeric_suffix_start, i);
411 /* Update the suffix index. */
412 size_t *sufindex_end = sufindex + suffix_length;
413 while (i-- != 0)
414 *--sufindex_end = numeric_suffix_start[i] - '0';
417 #if ! _POSIX_NO_TRUNC && HAVE_PATHCONF && defined _PC_NAME_MAX
418 /* POSIX requires that if the output file name is too long for
419 its directory, 'split' must fail without creating any files.
420 This must be checked for explicitly on operating systems that
421 silently truncate file names. */
423 char *dir = dir_name (outfile);
424 long name_max = pathconf (dir, _PC_NAME_MAX);
425 if (0 <= name_max && name_max < base_len (last_component (outfile)))
426 die (EXIT_FAILURE, ENAMETOOLONG, "%s", quotef (outfile));
427 free (dir);
429 #endif
431 else
433 /* Increment the suffix in place, if possible. */
435 size_t i = suffix_length;
436 while (i-- != 0)
438 sufindex[i]++;
439 if (suffix_auto && i == 0 && ! suffix_alphabet[sufindex[0] + 1])
440 goto new_name;
441 outfile_mid[i] = suffix_alphabet[sufindex[i]];
442 if (outfile_mid[i])
443 return;
444 sufindex[i] = 0;
445 outfile_mid[i] = suffix_alphabet[sufindex[i]];
447 die (EXIT_FAILURE, 0, _("output file suffixes exhausted"));
451 /* Create or truncate a file. */
453 static int
454 create (char const *name)
456 if (!filter_command)
458 if (verbose)
459 fprintf (stdout, _("creating file %s\n"), quoteaf (name));
461 int fd = open (name, O_WRONLY | O_CREAT | O_BINARY, MODE_RW_UGO);
462 if (fd < 0)
463 return fd;
464 struct stat out_stat_buf;
465 if (fstat (fd, &out_stat_buf) != 0)
466 die (EXIT_FAILURE, errno, _("failed to stat %s"), quoteaf (name));
467 if (SAME_INODE (in_stat_buf, out_stat_buf))
468 die (EXIT_FAILURE, 0, _("%s would overwrite input; aborting"),
469 quoteaf (name));
470 if (ftruncate (fd, 0) != 0
471 && (S_ISREG (out_stat_buf.st_mode) || S_TYPEISSHM (&out_stat_buf)))
472 die (EXIT_FAILURE, errno, _("%s: error truncating"), quotef (name));
474 return fd;
476 else
478 int fd_pair[2];
479 pid_t child_pid;
480 char const *shell_prog = getenv ("SHELL");
481 if (shell_prog == NULL)
482 shell_prog = "/bin/sh";
483 if (setenv ("FILE", name, 1) != 0)
484 die (EXIT_FAILURE, errno,
485 _("failed to set FILE environment variable"));
486 if (verbose)
487 fprintf (stdout, _("executing with FILE=%s\n"), quotef (name));
488 if (pipe (fd_pair) != 0)
489 die (EXIT_FAILURE, errno, _("failed to create pipe"));
490 child_pid = fork ();
491 if (child_pid == 0)
493 /* This is the child process. If an error occurs here, the
494 parent will eventually learn about it after doing a wait,
495 at which time it will emit its own error message. */
496 int j;
497 /* We have to close any pipes that were opened during an
498 earlier call, otherwise this process will be holding a
499 write-pipe that will prevent the earlier process from
500 reading an EOF on the corresponding read-pipe. */
501 for (j = 0; j < n_open_pipes; ++j)
502 if (close (open_pipes[j]) != 0)
503 die (EXIT_FAILURE, errno, _("closing prior pipe"));
504 if (close (fd_pair[1]))
505 die (EXIT_FAILURE, errno, _("closing output pipe"));
506 if (fd_pair[0] != STDIN_FILENO)
508 if (dup2 (fd_pair[0], STDIN_FILENO) != STDIN_FILENO)
509 die (EXIT_FAILURE, errno, _("moving input pipe"));
510 if (close (fd_pair[0]) != 0)
511 die (EXIT_FAILURE, errno, _("closing input pipe"));
513 sigprocmask (SIG_SETMASK, &oldblocked, NULL);
514 execl (shell_prog, last_component (shell_prog), "-c",
515 filter_command, (char *) NULL);
516 die (EXIT_FAILURE, errno, _("failed to run command: \"%s -c %s\""),
517 shell_prog, filter_command);
519 if (child_pid == -1)
520 die (EXIT_FAILURE, errno, _("fork system call failed"));
521 if (close (fd_pair[0]) != 0)
522 die (EXIT_FAILURE, errno, _("failed to close input pipe"));
523 filter_pid = child_pid;
524 if (n_open_pipes == open_pipes_alloc)
525 open_pipes = x2nrealloc (open_pipes, &open_pipes_alloc,
526 sizeof *open_pipes);
527 open_pipes[n_open_pipes++] = fd_pair[1];
528 return fd_pair[1];
532 /* Close the output file, and do any associated cleanup.
533 If FP and FD are both specified, they refer to the same open file;
534 in this case FP is closed, but FD is still used in cleanup. */
535 static void
536 closeout (FILE *fp, int fd, pid_t pid, char const *name)
538 if (fp != NULL && fclose (fp) != 0 && ! ignorable (errno))
539 die (EXIT_FAILURE, errno, "%s", quotef (name));
540 if (fd >= 0)
542 if (fp == NULL && close (fd) < 0)
543 die (EXIT_FAILURE, errno, "%s", quotef (name));
544 int j;
545 for (j = 0; j < n_open_pipes; ++j)
547 if (open_pipes[j] == fd)
549 open_pipes[j] = open_pipes[--n_open_pipes];
550 break;
554 if (pid > 0)
556 int wstatus = 0;
557 if (waitpid (pid, &wstatus, 0) == -1 && errno != ECHILD)
558 die (EXIT_FAILURE, errno, _("waiting for child process"));
559 if (WIFSIGNALED (wstatus))
561 int sig = WTERMSIG (wstatus);
562 if (sig != SIGPIPE)
564 char signame[MAX (SIG2STR_MAX, INT_BUFSIZE_BOUND (int))];
565 if (sig2str (sig, signame) != 0)
566 sprintf (signame, "%d", sig);
567 error (sig + 128, 0,
568 _("with FILE=%s, signal %s from command: %s"),
569 quotef (name), signame, filter_command);
572 else if (WIFEXITED (wstatus))
574 int ex = WEXITSTATUS (wstatus);
575 if (ex != 0)
576 error (ex, 0, _("with FILE=%s, exit %d from command: %s"),
577 quotef (name), ex, filter_command);
579 else
581 /* shouldn't happen. */
582 die (EXIT_FAILURE, 0,
583 _("unknown status from command (0x%X)"), wstatus + 0u);
588 /* Write BYTES bytes at BP to an output file.
589 If NEW_FILE_FLAG is true, open the next output file.
590 Otherwise add to the same output file already in use.
591 Return true if successful. */
593 static bool
594 cwrite (bool new_file_flag, char const *bp, size_t bytes)
596 if (new_file_flag)
598 if (!bp && bytes == 0 && elide_empty_files)
599 return true;
600 closeout (NULL, output_desc, filter_pid, outfile);
601 next_file_name ();
602 output_desc = create (outfile);
603 if (output_desc < 0)
604 die (EXIT_FAILURE, errno, "%s", quotef (outfile));
607 if (full_write (output_desc, bp, bytes) == bytes)
608 return true;
609 else
611 if (! ignorable (errno))
612 die (EXIT_FAILURE, errno, "%s", quotef (outfile));
613 return false;
617 /* Split into pieces of exactly N_BYTES bytes.
618 However, the first REM_BYTES pieces should be 1 byte longer.
619 Use buffer BUF, whose size is BUFSIZE.
620 BUF contains the first INITIAL_READ input bytes. */
622 static void
623 bytes_split (uintmax_t n_bytes, uintmax_t rem_bytes,
624 char *buf, size_t bufsize, size_t initial_read,
625 uintmax_t max_files)
627 size_t n_read;
628 bool new_file_flag = true;
629 bool filter_ok = true;
630 uintmax_t opened = 0;
631 uintmax_t to_write = n_bytes + (0 < rem_bytes);
632 bool eof = ! to_write;
634 while (! eof)
636 if (initial_read != SIZE_MAX)
638 n_read = initial_read;
639 initial_read = SIZE_MAX;
640 eof = n_read < bufsize;
642 else
644 if (! filter_ok
645 && lseek (STDIN_FILENO, to_write, SEEK_CUR) != -1)
647 to_write = n_bytes + (opened + 1 < rem_bytes);
648 new_file_flag = true;
651 n_read = safe_read (STDIN_FILENO, buf, bufsize);
652 if (n_read == SAFE_READ_ERROR)
653 die (EXIT_FAILURE, errno, "%s", quotef (infile));
654 eof = n_read == 0;
656 char *bp_out = buf;
657 while (0 < to_write && to_write <= n_read)
659 if (filter_ok || new_file_flag)
660 filter_ok = cwrite (new_file_flag, bp_out, to_write);
661 opened += new_file_flag;
662 new_file_flag = !max_files || (opened < max_files);
663 if (! filter_ok && ! new_file_flag)
665 /* If filters no longer accepting input, stop reading. */
666 n_read = 0;
667 eof = true;
668 break;
670 bp_out += to_write;
671 n_read -= to_write;
672 to_write = n_bytes + (opened < rem_bytes);
674 if (n_read != 0)
676 if (filter_ok || new_file_flag)
677 filter_ok = cwrite (new_file_flag, bp_out, n_read);
678 opened += new_file_flag;
679 new_file_flag = false;
680 if (! filter_ok && opened == max_files)
682 /* If filters no longer accepting input, stop reading. */
683 break;
685 to_write -= n_read;
689 /* Ensure NUMBER files are created, which truncates
690 any existing files or notifies any consumers on fifos.
691 FIXME: Should we do this before EXIT_FAILURE? */
692 while (opened++ < max_files)
693 cwrite (true, NULL, 0);
696 /* Split into pieces of exactly N_LINES lines.
697 Use buffer BUF, whose size is BUFSIZE. */
699 static void
700 lines_split (uintmax_t n_lines, char *buf, size_t bufsize)
702 size_t n_read;
703 char *bp, *bp_out, *eob;
704 bool new_file_flag = true;
705 uintmax_t n = 0;
709 n_read = safe_read (STDIN_FILENO, buf, bufsize);
710 if (n_read == SAFE_READ_ERROR)
711 die (EXIT_FAILURE, errno, "%s", quotef (infile));
712 bp = bp_out = buf;
713 eob = bp + n_read;
714 *eob = eolchar;
715 while (true)
717 bp = rawmemchr (bp, eolchar);
718 if (bp == eob)
720 if (eob != bp_out) /* do not write 0 bytes! */
722 size_t len = eob - bp_out;
723 cwrite (new_file_flag, bp_out, len);
724 new_file_flag = false;
726 break;
729 ++bp;
730 if (++n >= n_lines)
732 cwrite (new_file_flag, bp_out, bp - bp_out);
733 bp_out = bp;
734 new_file_flag = true;
735 n = 0;
739 while (n_read);
742 /* Split into pieces that are as large as possible while still not more
743 than N_BYTES bytes, and are split on line boundaries except
744 where lines longer than N_BYTES bytes occur. */
746 static void
747 line_bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize)
749 size_t n_read;
750 uintmax_t n_out = 0; /* for each split. */
751 size_t n_hold = 0;
752 char *hold = NULL; /* for lines > bufsize. */
753 size_t hold_size = 0;
754 bool split_line = false; /* Whether a \n was output in a split. */
758 n_read = safe_read (STDIN_FILENO, buf, bufsize);
759 if (n_read == SAFE_READ_ERROR)
760 die (EXIT_FAILURE, errno, "%s", quotef (infile));
761 size_t n_left = n_read;
762 char *sob = buf;
763 while (n_left)
765 size_t split_rest = 0;
766 char *eoc = NULL;
767 char *eol;
769 /* Determine End Of Chunk and/or End of Line,
770 which are used below to select what to write or buffer. */
771 if (n_bytes - n_out - n_hold <= n_left)
773 /* Have enough for split. */
774 split_rest = n_bytes - n_out - n_hold;
775 eoc = sob + split_rest - 1;
776 eol = memrchr (sob, eolchar, split_rest);
778 else
779 eol = memrchr (sob, eolchar, n_left);
781 /* Output hold space if possible. */
782 if (n_hold && !(!eol && n_out))
784 cwrite (n_out == 0, hold, n_hold);
785 n_out += n_hold;
786 if (n_hold > bufsize)
787 hold = xrealloc (hold, bufsize);
788 n_hold = 0;
789 hold_size = bufsize;
792 /* Output to eol if present. */
793 if (eol)
795 split_line = true;
796 size_t n_write = eol - sob + 1;
797 cwrite (n_out == 0, sob, n_write);
798 n_out += n_write;
799 n_left -= n_write;
800 sob += n_write;
801 if (eoc)
802 split_rest -= n_write;
805 /* Output to eoc or eob if possible. */
806 if (n_left && !split_line)
808 size_t n_write = eoc ? split_rest : n_left;
809 cwrite (n_out == 0, sob, n_write);
810 n_out += n_write;
811 n_left -= n_write;
812 sob += n_write;
813 if (eoc)
814 split_rest -= n_write;
817 /* Update hold if needed. */
818 if ((eoc && split_rest) || (!eoc && n_left))
820 size_t n_buf = eoc ? split_rest : n_left;
821 if (hold_size - n_hold < n_buf)
823 if (hold_size <= SIZE_MAX - bufsize)
824 hold_size += bufsize;
825 else
826 xalloc_die ();
827 hold = xrealloc (hold, hold_size);
829 memcpy (hold + n_hold, sob, n_buf);
830 n_hold += n_buf;
831 n_left -= n_buf;
832 sob += n_buf;
835 /* Reset for new split. */
836 if (eoc)
838 n_out = 0;
839 split_line = false;
843 while (n_read);
845 /* Handle no eol at end of file. */
846 if (n_hold)
847 cwrite (n_out == 0, hold, n_hold);
849 free (hold);
852 /* -n l/[K/]N: Write lines to files of approximately file size / N.
853 The file is partitioned into file size / N sized portions, with the
854 last assigned any excess. If a line _starts_ within a partition
855 it is written completely to the corresponding file. Since lines
856 are not split even if they overlap a partition, the files written
857 can be larger or smaller than the partition size, and even empty
858 if a line is so long as to completely overlap the partition. */
860 static void
861 lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
862 size_t initial_read, off_t file_size)
864 assert (n && k <= n);
866 uintmax_t rem_bytes = file_size % n;
867 off_t chunk_size = file_size / n;
868 uintmax_t chunk_no = 1;
869 off_t chunk_end = chunk_size + (0 < rem_bytes);
870 off_t n_written = 0;
871 bool new_file_flag = true;
872 bool chunk_truncated = false;
874 if (k > 1 && 0 < file_size)
876 /* Start reading 1 byte before kth chunk of file. */
877 off_t start = (k - 1) * chunk_size + MIN (k - 1, rem_bytes) - 1;
878 if (start < initial_read)
880 memmove (buf, buf + start, initial_read - start);
881 initial_read -= start;
883 else
885 if (initial_read < start
886 && lseek (STDIN_FILENO, start - initial_read, SEEK_CUR) < 0)
887 die (EXIT_FAILURE, errno, "%s", quotef (infile));
888 initial_read = SIZE_MAX;
890 n_written = start;
891 chunk_no = k - 1;
892 chunk_end = start + 1;
895 while (n_written < file_size)
897 char *bp = buf, *eob;
898 size_t n_read;
899 if (initial_read != SIZE_MAX)
901 n_read = initial_read;
902 initial_read = SIZE_MAX;
904 else
906 n_read = safe_read (STDIN_FILENO, buf,
907 MIN (bufsize, file_size - n_written));
908 if (n_read == SAFE_READ_ERROR)
909 die (EXIT_FAILURE, errno, "%s", quotef (infile));
911 if (n_read == 0)
912 break; /* eof. */
913 chunk_truncated = false;
914 eob = buf + n_read;
916 while (bp != eob)
918 size_t to_write;
919 bool next = false;
921 /* Begin looking for '\n' at last byte of chunk. */
922 off_t skip = MIN (n_read, MAX (0, chunk_end - 1 - n_written));
923 char *bp_out = memchr (bp + skip, eolchar, n_read - skip);
924 if (bp_out)
926 bp_out++;
927 next = true;
929 else
930 bp_out = eob;
931 to_write = bp_out - bp;
933 if (k == chunk_no)
935 /* We don't use the stdout buffer here since we're writing
936 large chunks from an existing file, so it's more efficient
937 to write out directly. */
938 if (full_write (STDOUT_FILENO, bp, to_write) != to_write)
939 die (EXIT_FAILURE, errno, "%s", _("write error"));
941 else if (! k)
942 cwrite (new_file_flag, bp, to_write);
943 n_written += to_write;
944 bp += to_write;
945 n_read -= to_write;
946 new_file_flag = next;
948 /* A line could have been so long that it skipped
949 entire chunks. So create empty files in that case. */
950 while (next || chunk_end <= n_written)
952 if (!next && bp == eob)
954 /* replenish buf, before going to next chunk. */
955 chunk_truncated = true;
956 break;
958 if (k == chunk_no)
959 return;
960 chunk_end += chunk_size + (chunk_no < rem_bytes);
961 chunk_no++;
962 if (chunk_end <= n_written)
964 if (! k)
965 cwrite (true, NULL, 0);
967 else
968 next = false;
973 if (chunk_truncated)
974 chunk_no++;
976 /* Ensure NUMBER files are created, which truncates
977 any existing files or notifies any consumers on fifos.
978 FIXME: Should we do this before EXIT_FAILURE? */
979 if (!k)
980 while (chunk_no++ <= n)
981 cwrite (true, NULL, 0);
984 /* -n K/N: Extract Kth of N chunks. */
986 static void
987 bytes_chunk_extract (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
988 size_t initial_read, off_t file_size)
990 off_t start;
991 off_t end;
993 assert (0 < k && k <= n);
995 start = (k - 1) * (file_size / n) + MIN (k - 1, file_size % n);
996 end = k == n ? file_size : k * (file_size / n) + MIN (k, file_size % n);
998 if (start < initial_read)
1000 memmove (buf, buf + start, initial_read - start);
1001 initial_read -= start;
1003 else
1005 if (initial_read < start
1006 && lseek (STDIN_FILENO, start - initial_read, SEEK_CUR) < 0)
1007 die (EXIT_FAILURE, errno, "%s", quotef (infile));
1008 initial_read = SIZE_MAX;
1011 while (start < end)
1013 size_t n_read;
1014 if (initial_read != SIZE_MAX)
1016 n_read = initial_read;
1017 initial_read = SIZE_MAX;
1019 else
1021 n_read = safe_read (STDIN_FILENO, buf, bufsize);
1022 if (n_read == SAFE_READ_ERROR)
1023 die (EXIT_FAILURE, errno, "%s", quotef (infile));
1025 if (n_read == 0)
1026 break; /* eof. */
1027 n_read = MIN (n_read, end - start);
1028 if (full_write (STDOUT_FILENO, buf, n_read) != n_read
1029 && ! ignorable (errno))
1030 die (EXIT_FAILURE, errno, "%s", quotef ("-"));
1031 start += n_read;
1035 typedef struct of_info
1037 char *of_name;
1038 int ofd;
1039 FILE *ofile;
1040 int opid;
1041 } of_t;
1043 enum
1045 OFD_NEW = -1,
1046 OFD_APPEND = -2
1049 /* Rotate file descriptors when we're writing to more output files than we
1050 have available file descriptors.
1051 Return whether we came under file resource pressure.
1052 If so, it's probably best to close each file when finished with it. */
1054 static bool
1055 ofile_open (of_t *files, size_t i_check, size_t nfiles)
1057 bool file_limit = false;
1059 if (files[i_check].ofd <= OFD_NEW)
1061 int fd;
1062 size_t i_reopen = i_check ? i_check - 1 : nfiles - 1;
1064 /* Another process could have opened a file in between the calls to
1065 close and open, so we should keep trying until open succeeds or
1066 we've closed all of our files. */
1067 while (true)
1069 if (files[i_check].ofd == OFD_NEW)
1070 fd = create (files[i_check].of_name);
1071 else /* OFD_APPEND */
1073 /* Attempt to append to previously opened file.
1074 We use O_NONBLOCK to support writing to fifos,
1075 where the other end has closed because of our
1076 previous close. In that case we'll immediately
1077 get an error, rather than waiting indefinitely.
1078 In specialised cases the consumer can keep reading
1079 from the fifo, terminating on conditions in the data
1080 itself, or perhaps never in the case of 'tail -f'.
1081 I.e., for fifos it is valid to attempt this reopen.
1083 We don't handle the filter_command case here, as create()
1084 will exit if there are not enough files in that case.
1085 I.e., we don't support restarting filters, as that would
1086 put too much burden on users specifying --filter commands. */
1087 fd = open (files[i_check].of_name,
1088 O_WRONLY | O_BINARY | O_APPEND | O_NONBLOCK);
1091 if (-1 < fd)
1092 break;
1094 if (!(errno == EMFILE || errno == ENFILE))
1095 die (EXIT_FAILURE, errno, "%s", quotef (files[i_check].of_name));
1097 file_limit = true;
1099 /* Search backwards for an open file to close. */
1100 while (files[i_reopen].ofd < 0)
1102 i_reopen = i_reopen ? i_reopen - 1 : nfiles - 1;
1103 /* No more open files to close, exit with E[NM]FILE. */
1104 if (i_reopen == i_check)
1105 die (EXIT_FAILURE, errno, "%s",
1106 quotef (files[i_check].of_name));
1109 if (fclose (files[i_reopen].ofile) != 0)
1110 die (EXIT_FAILURE, errno, "%s", quotef (files[i_reopen].of_name));
1111 files[i_reopen].ofile = NULL;
1112 files[i_reopen].ofd = OFD_APPEND;
1115 files[i_check].ofd = fd;
1116 if (!(files[i_check].ofile = fdopen (fd, "a")))
1117 die (EXIT_FAILURE, errno, "%s", quotef (files[i_check].of_name));
1118 files[i_check].opid = filter_pid;
1119 filter_pid = 0;
1122 return file_limit;
1125 /* -n r/[K/]N: Divide file into N chunks in round robin fashion.
1126 Use BUF of size BUFSIZE for the buffer, and if allocating storage
1127 put its address into *FILESP to pacify -fsanitize=leak.
1128 When K == 0, we try to keep the files open in parallel.
1129 If we run out of file resources, then we revert
1130 to opening and closing each file for each line. */
1132 static void
1133 lines_rr (uintmax_t k, uintmax_t n, char *buf, size_t bufsize, of_t **filesp)
1135 bool wrapped = false;
1136 bool wrote = false;
1137 bool file_limit;
1138 size_t i_file;
1139 of_t *files IF_LINT (= NULL);
1140 uintmax_t line_no;
1142 if (k)
1143 line_no = 1;
1144 else
1146 if (SIZE_MAX < n)
1147 xalloc_die ();
1148 files = *filesp = xnmalloc (n, sizeof *files);
1150 /* Generate output file names. */
1151 for (i_file = 0; i_file < n; i_file++)
1153 next_file_name ();
1154 files[i_file].of_name = xstrdup (outfile);
1155 files[i_file].ofd = OFD_NEW;
1156 files[i_file].ofile = NULL;
1157 files[i_file].opid = 0;
1159 i_file = 0;
1160 file_limit = false;
1163 while (true)
1165 char *bp = buf, *eob;
1166 size_t n_read = safe_read (STDIN_FILENO, buf, bufsize);
1167 if (n_read == SAFE_READ_ERROR)
1168 die (EXIT_FAILURE, errno, "%s", quotef (infile));
1169 else if (n_read == 0)
1170 break; /* eof. */
1171 eob = buf + n_read;
1173 while (bp != eob)
1175 size_t to_write;
1176 bool next = false;
1178 /* Find end of line. */
1179 char *bp_out = memchr (bp, eolchar, eob - bp);
1180 if (bp_out)
1182 bp_out++;
1183 next = true;
1185 else
1186 bp_out = eob;
1187 to_write = bp_out - bp;
1189 if (k)
1191 if (line_no == k && unbuffered)
1193 if (full_write (STDOUT_FILENO, bp, to_write) != to_write)
1194 die (EXIT_FAILURE, errno, "%s", _("write error"));
1196 else if (line_no == k && fwrite (bp, to_write, 1, stdout) != 1)
1198 clearerr (stdout); /* To silence close_stdout(). */
1199 die (EXIT_FAILURE, errno, "%s", _("write error"));
1201 if (next)
1202 line_no = (line_no == n) ? 1 : line_no + 1;
1204 else
1206 /* Secure file descriptor. */
1207 file_limit |= ofile_open (files, i_file, n);
1208 if (unbuffered)
1210 /* Note writing to fd, rather than flushing the FILE gives
1211 an 8% performance benefit, due to reduced data copying. */
1212 if (full_write (files[i_file].ofd, bp, to_write) != to_write
1213 && ! ignorable (errno))
1215 die (EXIT_FAILURE, errno, "%s",
1216 quotef (files[i_file].of_name));
1219 else if (fwrite (bp, to_write, 1, files[i_file].ofile) != 1
1220 && ! ignorable (errno))
1222 die (EXIT_FAILURE, errno, "%s",
1223 quotef (files[i_file].of_name));
1226 if (! ignorable (errno))
1227 wrote = true;
1229 if (file_limit)
1231 if (fclose (files[i_file].ofile) != 0)
1233 die (EXIT_FAILURE, errno, "%s",
1234 quotef (files[i_file].of_name));
1236 files[i_file].ofile = NULL;
1237 files[i_file].ofd = OFD_APPEND;
1239 if (next && ++i_file == n)
1241 wrapped = true;
1242 /* If no filters are accepting input, stop reading. */
1243 if (! wrote)
1244 goto no_filters;
1245 wrote = false;
1246 i_file = 0;
1250 bp = bp_out;
1254 no_filters:
1255 /* Ensure all files created, so that any existing files are truncated,
1256 and to signal any waiting fifo consumers.
1257 Also, close any open file descriptors.
1258 FIXME: Should we do this before EXIT_FAILURE? */
1259 if (!k)
1261 int ceiling = (wrapped ? n : i_file);
1262 for (i_file = 0; i_file < n; i_file++)
1264 if (i_file >= ceiling && !elide_empty_files)
1265 file_limit |= ofile_open (files, i_file, n);
1266 if (files[i_file].ofd >= 0)
1267 closeout (files[i_file].ofile, files[i_file].ofd,
1268 files[i_file].opid, files[i_file].of_name);
1269 files[i_file].ofd = OFD_APPEND;
1274 #define FAIL_ONLY_ONE_WAY() \
1275 do \
1277 error (0, 0, _("cannot split in more than one way")); \
1278 usage (EXIT_FAILURE); \
1280 while (0)
1283 /* Parse K/N syntax of chunk options. */
1285 static void
1286 parse_chunk (uintmax_t *k_units, uintmax_t *n_units, char *slash)
1288 *n_units = xdectoumax (slash + 1, 1, UINTMAX_MAX, "",
1289 _("invalid number of chunks"), 0);
1290 if (slash != optarg) /* a leading number is specified. */
1292 *slash = '\0';
1293 *k_units = xdectoumax (optarg, 1, *n_units, "",
1294 _("invalid chunk number"), 0);
1300 main (int argc, char **argv)
1302 enum Split_type split_type = type_undef;
1303 idx_t in_blk_size = 0; /* optimal block size of input file device */
1304 size_t page_size = getpagesize ();
1305 uintmax_t k_units = 0;
1306 uintmax_t n_units = 0;
1308 static char const multipliers[] = "bEGKkMmPQRTYZ0";
1309 int c;
1310 int digits_optind = 0;
1311 off_t file_size = OFF_T_MAX;
1313 initialize_main (&argc, &argv);
1314 set_program_name (argv[0]);
1315 setlocale (LC_ALL, "");
1316 bindtextdomain (PACKAGE, LOCALEDIR);
1317 textdomain (PACKAGE);
1319 atexit (close_stdout);
1321 /* Parse command line options. */
1323 infile = bad_cast ("-");
1324 outbase = bad_cast ("x");
1326 while (true)
1328 /* This is the argv-index of the option we will read next. */
1329 int this_optind = optind ? optind : 1;
1330 char *slash;
1332 c = getopt_long (argc, argv, "0123456789C:a:b:del:n:t:ux",
1333 longopts, NULL);
1334 if (c == -1)
1335 break;
1337 switch (c)
1339 case 'a':
1340 suffix_length = xdectoumax (optarg, 0, SIZE_MAX / sizeof (size_t),
1341 "", _("invalid suffix length"), 0);
1342 break;
1344 case ADDITIONAL_SUFFIX_OPTION:
1345 if (last_component (optarg) != optarg)
1347 error (0, 0,
1348 _("invalid suffix %s, contains directory separator"),
1349 quote (optarg));
1350 usage (EXIT_FAILURE);
1352 additional_suffix = optarg;
1353 break;
1355 case 'b':
1356 if (split_type != type_undef)
1357 FAIL_ONLY_ONE_WAY ();
1358 split_type = type_bytes;
1359 /* Limit to OFF_T_MAX, because if input is a pipe, we could get more
1360 data than is possible to write to a single file, so indicate that
1361 immediately rather than having possibly future invocations fail. */
1362 n_units = xdectoumax (optarg, 1, OFF_T_MAX, multipliers,
1363 _("invalid number of bytes"), 0);
1364 break;
1366 case 'l':
1367 if (split_type != type_undef)
1368 FAIL_ONLY_ONE_WAY ();
1369 split_type = type_lines;
1370 n_units = xdectoumax (optarg, 1, UINTMAX_MAX, "",
1371 _("invalid number of lines"), 0);
1372 break;
1374 case 'C':
1375 if (split_type != type_undef)
1376 FAIL_ONLY_ONE_WAY ();
1377 split_type = type_byteslines;
1378 n_units = xdectoumax (optarg, 1, MIN (SIZE_MAX, OFF_T_MAX),
1379 multipliers, _("invalid number of bytes"), 0);
1380 break;
1382 case 'n':
1383 if (split_type != type_undef)
1384 FAIL_ONLY_ONE_WAY ();
1385 /* skip any whitespace */
1386 while (isspace (to_uchar (*optarg)))
1387 optarg++;
1388 if (STRNCMP_LIT (optarg, "r/") == 0)
1390 split_type = type_rr;
1391 optarg += 2;
1393 else if (STRNCMP_LIT (optarg, "l/") == 0)
1395 split_type = type_chunk_lines;
1396 optarg += 2;
1398 else
1399 split_type = type_chunk_bytes;
1400 if ((slash = strchr (optarg, '/')))
1401 parse_chunk (&k_units, &n_units, slash);
1402 else
1403 n_units = xdectoumax (optarg, 1, UINTMAX_MAX, "",
1404 _("invalid number of chunks"), 0);
1405 break;
1407 case 'u':
1408 unbuffered = true;
1409 break;
1411 case 't':
1413 char neweol = optarg[0];
1414 if (! neweol)
1415 die (EXIT_FAILURE, 0, _("empty record separator"));
1416 if (optarg[1])
1418 if (STREQ (optarg, "\\0"))
1419 neweol = '\0';
1420 else
1422 /* Provoke with 'split -txx'. Complain about
1423 "multi-character tab" instead of "multibyte tab", so
1424 that the diagnostic's wording does not need to be
1425 changed once multibyte characters are supported. */
1426 die (EXIT_FAILURE, 0, _("multi-character separator %s"),
1427 quote (optarg));
1430 /* Make it explicit we don't support multiple separators. */
1431 if (0 <= eolchar && neweol != eolchar)
1433 die (EXIT_FAILURE, 0,
1434 _("multiple separator characters specified"));
1437 eolchar = neweol;
1439 break;
1441 case '0':
1442 case '1':
1443 case '2':
1444 case '3':
1445 case '4':
1446 case '5':
1447 case '6':
1448 case '7':
1449 case '8':
1450 case '9':
1451 if (split_type == type_undef)
1453 split_type = type_digits;
1454 n_units = 0;
1456 if (split_type != type_undef && split_type != type_digits)
1457 FAIL_ONLY_ONE_WAY ();
1458 if (digits_optind != 0 && digits_optind != this_optind)
1459 n_units = 0; /* More than one number given; ignore other. */
1460 digits_optind = this_optind;
1461 if (!DECIMAL_DIGIT_ACCUMULATE (n_units, c - '0', uintmax_t))
1463 char buffer[INT_BUFSIZE_BOUND (uintmax_t)];
1464 die (EXIT_FAILURE, 0,
1465 _("line count option -%s%c... is too large"),
1466 umaxtostr (n_units, buffer), c);
1468 break;
1470 case 'd':
1471 case 'x':
1472 if (c == 'd')
1473 suffix_alphabet = "0123456789";
1474 else
1475 suffix_alphabet = "0123456789abcdef";
1476 if (optarg)
1478 if (strlen (optarg) != strspn (optarg, suffix_alphabet))
1480 error (0, 0,
1481 (c == 'd') ?
1482 _("%s: invalid start value for numerical suffix") :
1483 _("%s: invalid start value for hexadecimal suffix"),
1484 quote (optarg));
1485 usage (EXIT_FAILURE);
1487 else
1489 /* Skip any leading zero. */
1490 while (*optarg == '0' && *(optarg + 1) != '\0')
1491 optarg++;
1492 numeric_suffix_start = optarg;
1495 break;
1497 case 'e':
1498 elide_empty_files = true;
1499 break;
1501 case FILTER_OPTION:
1502 filter_command = optarg;
1503 break;
1505 case IO_BLKSIZE_OPTION:
1506 in_blk_size = xdectoumax (optarg, 1, MIN (IDX_MAX, SIZE_MAX) - 1,
1507 multipliers, _("invalid IO block size"), 0);
1508 break;
1510 case VERBOSE_OPTION:
1511 verbose = true;
1512 break;
1514 case_GETOPT_HELP_CHAR;
1516 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1518 default:
1519 usage (EXIT_FAILURE);
1523 if (k_units != 0 && filter_command)
1525 error (0, 0, _("--filter does not process a chunk extracted to stdout"));
1526 usage (EXIT_FAILURE);
1529 /* Handle default case. */
1530 if (split_type == type_undef)
1532 split_type = type_lines;
1533 n_units = 1000;
1536 if (n_units == 0)
1538 error (0, 0, "%s: %s", _("invalid number of lines"), quote ("0"));
1539 usage (EXIT_FAILURE);
1542 if (eolchar < 0)
1543 eolchar = '\n';
1545 set_suffix_length (n_units, split_type);
1547 /* Get out the filename arguments. */
1549 if (optind < argc)
1550 infile = argv[optind++];
1552 if (optind < argc)
1553 outbase = argv[optind++];
1555 if (optind < argc)
1557 error (0, 0, _("extra operand %s"), quote (argv[optind]));
1558 usage (EXIT_FAILURE);
1561 /* Check that the suffix length is large enough for the numerical
1562 suffix start value. */
1563 if (numeric_suffix_start && strlen (numeric_suffix_start) > suffix_length)
1565 error (0, 0, _("numerical suffix start value is too large "
1566 "for the suffix length"));
1567 usage (EXIT_FAILURE);
1570 /* Open the input file. */
1571 if (! STREQ (infile, "-")
1572 && fd_reopen (STDIN_FILENO, infile, O_RDONLY, 0) < 0)
1573 die (EXIT_FAILURE, errno, _("cannot open %s for reading"),
1574 quoteaf (infile));
1576 /* Binary I/O is safer when byte counts are used. */
1577 xset_binary_mode (STDIN_FILENO, O_BINARY);
1579 /* Get the optimal block size of input device and make a buffer. */
1581 if (fstat (STDIN_FILENO, &in_stat_buf) != 0)
1582 die (EXIT_FAILURE, errno, "%s", quotef (infile));
1584 bool specified_buf_size = !! in_blk_size;
1585 if (! specified_buf_size)
1586 in_blk_size = io_blksize (in_stat_buf);
1588 char *buf = xalignalloc (page_size, in_blk_size + 1);
1589 size_t initial_read = SIZE_MAX;
1591 if (split_type == type_chunk_bytes || split_type == type_chunk_lines)
1593 file_size = input_file_size (STDIN_FILENO, &in_stat_buf,
1594 buf, in_blk_size);
1595 if (file_size < 0)
1596 die (EXIT_FAILURE, errno, _("%s: cannot determine file size"),
1597 quotef (infile));
1598 initial_read = MIN (file_size, in_blk_size);
1599 /* Overflow, and sanity checking. */
1600 if (OFF_T_MAX < n_units)
1602 char buffer[INT_BUFSIZE_BOUND (uintmax_t)];
1603 die (EXIT_FAILURE, EOVERFLOW, "%s: %s",
1604 _("invalid number of chunks"),
1605 quote (umaxtostr (n_units, buffer)));
1609 /* When filtering, closure of one pipe must not terminate the process,
1610 as there may still be other streams expecting input from us. */
1611 if (filter_command)
1613 struct sigaction act;
1614 sigemptyset (&newblocked);
1615 sigaction (SIGPIPE, NULL, &act);
1616 if (act.sa_handler != SIG_IGN)
1617 sigaddset (&newblocked, SIGPIPE);
1618 sigprocmask (SIG_BLOCK, &newblocked, &oldblocked);
1621 switch (split_type)
1623 case type_digits:
1624 case type_lines:
1625 lines_split (n_units, buf, in_blk_size);
1626 break;
1628 case type_bytes:
1629 bytes_split (n_units, 0, buf, in_blk_size, SIZE_MAX, 0);
1630 break;
1632 case type_byteslines:
1633 line_bytes_split (n_units, buf, in_blk_size);
1634 break;
1636 case type_chunk_bytes:
1637 if (k_units == 0)
1638 bytes_split (file_size / n_units, file_size % n_units,
1639 buf, in_blk_size, initial_read, n_units);
1640 else
1641 bytes_chunk_extract (k_units, n_units, buf, in_blk_size, initial_read,
1642 file_size);
1643 break;
1645 case type_chunk_lines:
1646 lines_chunk_split (k_units, n_units, buf, in_blk_size, initial_read,
1647 file_size);
1648 break;
1650 case type_rr:
1651 /* Note, this is like 'sed -n ${k}~${n}p' when k > 0,
1652 but the functionality is provided for symmetry. */
1654 of_t *files;
1655 lines_rr (k_units, n_units, buf, in_blk_size, &files);
1657 break;
1659 default:
1660 abort ();
1663 if (close (STDIN_FILENO) != 0)
1664 die (EXIT_FAILURE, errno, "%s", quotef (infile));
1665 closeout (NULL, output_desc, filter_pid, outfile);
1667 main_exit (EXIT_SUCCESS);