doc: remove older ChangeLog items
[coreutils.git] / src / split.c
blob037960a5940b54ab410bcb8528b1c4ec35c5c3a8
1 /* split.c -- split a file into pieces.
2 Copyright (C) 1988-2024 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* By tege@sics.se, with rms.
19 TODO:
20 * support -p REGEX as in BSD's split.
21 * support --suppress-matched as in csplit. */
22 #include <config.h>
24 #include <ctype.h>
25 #include <stdio.h>
26 #include <getopt.h>
27 #include <signal.h>
28 #include <sys/types.h>
29 #include <sys/wait.h>
31 #include "system.h"
32 #include "alignalloc.h"
33 #include "assure.h"
34 #include "fadvise.h"
35 #include "fd-reopen.h"
36 #include "fcntl--.h"
37 #include "full-write.h"
38 #include "ioblksize.h"
39 #include "quote.h"
40 #include "sig2str.h"
41 #include "sys-limits.h"
42 #include "temp-stream.h"
43 #include "xbinary-io.h"
44 #include "xdectoint.h"
45 #include "xstrtol.h"
47 /* The official name of this program (e.g., no 'g' prefix). */
48 #define PROGRAM_NAME "split"
50 #define AUTHORS \
51 proper_name_lite ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
52 proper_name ("Richard M. Stallman")
54 /* Shell command to filter through, instead of creating files. */
55 static char const *filter_command;
57 /* Process ID of the filter. */
58 static pid_t filter_pid;
60 /* Array of open pipes. */
61 static int *open_pipes;
62 static idx_t open_pipes_alloc;
63 static int n_open_pipes;
65 /* Whether SIGPIPE has the default action, when --filter is used. */
66 static bool default_SIGPIPE;
68 /* Base name of output files. */
69 static char const *outbase;
71 /* Name of output files. */
72 static char *outfile;
74 /* Pointer to the end of the prefix in OUTFILE.
75 Suffixes are inserted here. */
76 static char *outfile_mid;
78 /* Generate new suffix when suffixes are exhausted. */
79 static bool suffix_auto = true;
81 /* Length of OUTFILE's suffix. */
82 static idx_t suffix_length;
84 /* Alphabet of characters to use in suffix. */
85 static char const *suffix_alphabet = "abcdefghijklmnopqrstuvwxyz";
87 /* Numerical suffix start value. */
88 static char const *numeric_suffix_start;
90 /* Additional suffix to append to output file names. */
91 static char const *additional_suffix;
93 /* Name of input file. May be "-". */
94 static char const *infile;
96 /* stat buf for input file. */
97 static struct stat in_stat_buf;
99 /* Descriptor on which output file is open. */
100 static int output_desc = -1;
102 /* If true, print a diagnostic on standard error just before each
103 output file is opened. */
104 static bool verbose;
106 /* If true, don't generate zero length output files. */
107 static bool elide_empty_files;
109 /* If true, in round robin mode, immediately copy
110 input to output, which is much slower, so disabled by default. */
111 static bool unbuffered;
113 /* The character marking end of line. Defaults to \n below. */
114 static int eolchar = -1;
116 /* The split mode to use. */
117 enum Split_type
119 type_undef, type_bytes, type_byteslines, type_lines, type_digits,
120 type_chunk_bytes, type_chunk_lines, type_rr
123 /* For long options that have no equivalent short option, use a
124 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
125 enum
127 VERBOSE_OPTION = CHAR_MAX + 1,
128 FILTER_OPTION,
129 IO_BLKSIZE_OPTION,
130 ADDITIONAL_SUFFIX_OPTION
133 static struct option const longopts[] =
135 {"bytes", required_argument, nullptr, 'b'},
136 {"lines", required_argument, nullptr, 'l'},
137 {"line-bytes", required_argument, nullptr, 'C'},
138 {"number", required_argument, nullptr, 'n'},
139 {"elide-empty-files", no_argument, nullptr, 'e'},
140 {"unbuffered", no_argument, nullptr, 'u'},
141 {"suffix-length", required_argument, nullptr, 'a'},
142 {"additional-suffix", required_argument, nullptr,
143 ADDITIONAL_SUFFIX_OPTION},
144 {"numeric-suffixes", optional_argument, nullptr, 'd'},
145 {"hex-suffixes", optional_argument, nullptr, 'x'},
146 {"filter", required_argument, nullptr, FILTER_OPTION},
147 {"verbose", no_argument, nullptr, VERBOSE_OPTION},
148 {"separator", required_argument, nullptr, 't'},
149 {"-io-blksize", required_argument, nullptr,
150 IO_BLKSIZE_OPTION}, /* do not document */
151 {GETOPT_HELP_OPTION_DECL},
152 {GETOPT_VERSION_OPTION_DECL},
153 {nullptr, 0, nullptr, 0}
156 /* Return true if the errno value, ERR, is ignorable. */
157 static inline bool
158 ignorable (int err)
160 return filter_command && err == EPIPE;
163 static void
164 set_suffix_length (intmax_t n_units, enum Split_type split_type)
166 #define DEFAULT_SUFFIX_LENGTH 2
168 int suffix_length_needed = 0;
170 /* The suffix auto length feature is incompatible with
171 a user specified start value as the generated suffixes
172 are not all consecutive. */
173 if (numeric_suffix_start)
174 suffix_auto = false;
176 /* Auto-calculate the suffix length if the number of files is given. */
177 if (split_type == type_chunk_bytes || split_type == type_chunk_lines
178 || split_type == type_rr)
180 intmax_t n_units_end = n_units - 1;
181 if (numeric_suffix_start)
183 intmax_t n_start;
184 strtol_error e = xstrtoimax (numeric_suffix_start, nullptr, 10,
185 &n_start, "");
186 if (e == LONGINT_OK && n_start < n_units)
188 /* Restrict auto adjustment so we don't keep
189 incrementing a suffix size arbitrarily,
190 as that would break sort order for files
191 generated from multiple split runs. */
192 if (ckd_add (&n_units_end, n_units_end, n_start))
193 n_units_end = INTMAX_MAX;
197 idx_t alphabet_len = strlen (suffix_alphabet);
199 suffix_length_needed++;
200 while (n_units_end /= alphabet_len);
202 suffix_auto = false;
205 if (suffix_length) /* set by user */
207 if (suffix_length < suffix_length_needed)
208 error (EXIT_FAILURE, 0,
209 _("the suffix length needs to be at least %d"),
210 suffix_length_needed);
211 suffix_auto = false;
212 return;
214 else
215 suffix_length = MAX (DEFAULT_SUFFIX_LENGTH, suffix_length_needed);
218 void
219 usage (int status)
221 if (status != EXIT_SUCCESS)
222 emit_try_help ();
223 else
225 printf (_("\
226 Usage: %s [OPTION]... [FILE [PREFIX]]\n\
228 program_name);
229 fputs (_("\
230 Output pieces of FILE to PREFIXaa, PREFIXab, ...;\n\
231 default size is 1000 lines, and default PREFIX is 'x'.\n\
232 "), stdout);
234 emit_stdin_note ();
235 emit_mandatory_arg_note ();
237 fprintf (stdout, _("\
238 -a, --suffix-length=N generate suffixes of length N (default %d)\n\
239 --additional-suffix=SUFFIX append an additional SUFFIX to file names\n\
240 -b, --bytes=SIZE put SIZE bytes per output file\n\
241 -C, --line-bytes=SIZE put at most SIZE bytes of records per output file\n\
242 -d use numeric suffixes starting at 0, not alphabetic\n\
243 --numeric-suffixes[=FROM] same as -d, but allow setting the start value\
245 -x use hex suffixes starting at 0, not alphabetic\n\
246 --hex-suffixes[=FROM] same as -x, but allow setting the start value\n\
247 -e, --elide-empty-files do not generate empty output files with '-n'\n\
248 --filter=COMMAND write to shell COMMAND; file name is $FILE\n\
249 -l, --lines=NUMBER put NUMBER lines/records per output file\n\
250 -n, --number=CHUNKS generate CHUNKS output files; see explanation below\n\
251 -t, --separator=SEP use SEP instead of newline as the record separator;\n\
252 '\\0' (zero) specifies the NUL character\n\
253 -u, --unbuffered immediately copy input to output with '-n r/...'\n\
254 "), DEFAULT_SUFFIX_LENGTH);
255 fputs (_("\
256 --verbose print a diagnostic just before each\n\
257 output file is opened\n\
258 "), stdout);
259 fputs (HELP_OPTION_DESCRIPTION, stdout);
260 fputs (VERSION_OPTION_DESCRIPTION, stdout);
261 emit_size_note ();
262 fputs (_("\n\
263 CHUNKS may be:\n\
264 N split into N files based on size of input\n\
265 K/N output Kth of N to stdout\n\
266 l/N split into N files without splitting lines/records\n\
267 l/K/N output Kth of N to stdout without splitting lines/records\n\
268 r/N like 'l' but use round robin distribution\n\
269 r/K/N likewise but only output Kth of N to stdout\n\
270 "), stdout);
271 emit_ancillary_info (PROGRAM_NAME);
273 exit (status);
276 /* Copy the data in FD to a temporary file, then make that file FD.
277 Use BUF, of size BUFSIZE, to copy. Return the number of
278 bytes copied, or -1 (setting errno) on error. */
279 static off_t
280 copy_to_tmpfile (int fd, char *buf, idx_t bufsize)
282 FILE *tmp;
283 if (!temp_stream (&tmp, nullptr))
284 return -1;
285 off_t copied = 0;
286 off_t r;
288 while (0 < (r = read (fd, buf, bufsize)))
290 if (fwrite (buf, 1, r, tmp) != r)
291 return -1;
292 if (ckd_add (&copied, copied, r))
294 errno = EOVERFLOW;
295 return -1;
299 if (r < 0)
300 return r;
301 r = dup2 (fileno (tmp), fd);
302 if (r < 0)
303 return r;
304 if (fclose (tmp) < 0)
305 return -1;
306 return copied;
309 /* Return the number of bytes that can be read from FD with status ST.
310 Store up to the first BUFSIZE bytes of the file's data into BUF,
311 and advance the file position by the number of bytes read. On
312 input error, set errno and return -1. */
314 static off_t
315 input_file_size (int fd, struct stat const *st, char *buf, idx_t bufsize)
317 off_t size = 0;
320 ssize_t n_read = read (fd, buf + size, bufsize - size);
321 if (n_read <= 0)
322 return n_read < 0 ? n_read : size;
323 size += n_read;
325 while (size < bufsize);
327 off_t cur, end;
328 if ((usable_st_size (st) && st->st_size < size)
329 || (cur = lseek (fd, 0, SEEK_CUR)) < 0
330 || cur < size /* E.g., /dev/zero on GNU/Linux. */
331 || (end = lseek (fd, 0, SEEK_END)) < 0)
333 char *tmpbuf = xmalloc (bufsize);
334 end = copy_to_tmpfile (fd, tmpbuf, bufsize);
335 free (tmpbuf);
336 if (end < 0)
337 return end;
338 cur = 0;
341 if (end == OFF_T_MAX /* E.g., /dev/zero on GNU/Hurd. */
342 || (cur < end && ckd_add (&size, size, end - cur)))
344 errno = EOVERFLOW;
345 return -1;
348 if (cur < end)
350 off_t r = lseek (fd, cur, SEEK_SET);
351 if (r < 0)
352 return r;
355 return size;
358 /* Compute the next sequential output file name and store it into the
359 string 'outfile'. */
361 static void
362 next_file_name (void)
364 /* Index in suffix_alphabet of each character in the suffix. */
365 static idx_t *sufindex;
366 static idx_t outbase_length;
367 static idx_t outfile_length;
368 static idx_t addsuf_length;
370 if (! outfile)
372 bool overflow, widen;
374 new_name:
375 widen = !! outfile_length;
377 if (! widen)
379 /* Allocate and initialize the first file name. */
381 outbase_length = strlen (outbase);
382 addsuf_length = additional_suffix ? strlen (additional_suffix) : 0;
383 overflow = ckd_add (&outfile_length, outbase_length + addsuf_length,
384 suffix_length);
386 else
388 /* Reallocate and initialize a new wider file name.
389 We do this by subsuming the unchanging part of
390 the generated suffix into the prefix (base), and
391 reinitializing the now one longer suffix. */
393 overflow = ckd_add (&outfile_length, outfile_length, 2);
394 suffix_length++;
397 idx_t outfile_size;
398 overflow |= ckd_add (&outfile_size, outfile_length, 1);
399 if (overflow)
400 xalloc_die ();
401 outfile = xirealloc (outfile, outfile_size);
403 if (! widen)
404 memcpy (outfile, outbase, outbase_length);
405 else
407 /* Append the last alphabet character to the file name prefix. */
408 outfile[outbase_length] = suffix_alphabet[sufindex[0]];
409 outbase_length++;
412 outfile_mid = outfile + outbase_length;
413 memset (outfile_mid, suffix_alphabet[0], suffix_length);
414 if (additional_suffix)
415 memcpy (outfile_mid + suffix_length, additional_suffix, addsuf_length);
416 outfile[outfile_length] = 0;
418 free (sufindex);
419 sufindex = xicalloc (suffix_length, sizeof *sufindex);
421 if (numeric_suffix_start)
423 affirm (! widen);
425 /* Update the output file name. */
426 idx_t i = strlen (numeric_suffix_start);
427 memcpy (outfile_mid + suffix_length - i, numeric_suffix_start, i);
429 /* Update the suffix index. */
430 idx_t *sufindex_end = sufindex + suffix_length;
431 while (i-- != 0)
432 *--sufindex_end = numeric_suffix_start[i] - '0';
435 #if ! _POSIX_NO_TRUNC && HAVE_PATHCONF && defined _PC_NAME_MAX
436 /* POSIX requires that if the output file name is too long for
437 its directory, 'split' must fail without creating any files.
438 This must be checked for explicitly on operating systems that
439 silently truncate file names. */
441 char *dir = dir_name (outfile);
442 long name_max = pathconf (dir, _PC_NAME_MAX);
443 if (0 <= name_max && name_max < base_len (last_component (outfile)))
444 error (EXIT_FAILURE, ENAMETOOLONG, "%s", quotef (outfile));
445 free (dir);
447 #endif
449 else
451 /* Increment the suffix in place, if possible. */
453 idx_t i = suffix_length;
454 while (i-- != 0)
456 sufindex[i]++;
457 if (suffix_auto && i == 0 && ! suffix_alphabet[sufindex[0] + 1])
458 goto new_name;
459 outfile_mid[i] = suffix_alphabet[sufindex[i]];
460 if (outfile_mid[i])
461 return;
462 sufindex[i] = 0;
463 outfile_mid[i] = suffix_alphabet[sufindex[i]];
465 error (EXIT_FAILURE, 0, _("output file suffixes exhausted"));
469 /* Create or truncate a file. */
471 static int
472 create (char const *name)
474 if (!filter_command)
476 if (verbose)
477 fprintf (stdout, _("creating file %s\n"), quoteaf (name));
479 int oflags = O_WRONLY | O_CREAT | O_BINARY;
480 int fd = open (name, oflags | O_EXCL, MODE_RW_UGO);
481 if (0 <= fd || errno != EEXIST)
482 return fd;
483 fd = open (name, oflags, MODE_RW_UGO);
484 if (fd < 0)
485 return fd;
486 struct stat out_stat_buf;
487 if (fstat (fd, &out_stat_buf) != 0)
488 error (EXIT_FAILURE, errno, _("failed to stat %s"), quoteaf (name));
489 if (psame_inode (&in_stat_buf, &out_stat_buf))
490 error (EXIT_FAILURE, 0, _("%s would overwrite input; aborting"),
491 quoteaf (name));
492 bool regularish
493 = S_ISREG (out_stat_buf.st_mode) || S_TYPEISSHM (&out_stat_buf);
494 if (! (regularish && out_stat_buf.st_size == 0)
495 && ftruncate (fd, 0) < 0 && regularish)
496 error (EXIT_FAILURE, errno, _("%s: error truncating"), quotef (name));
498 return fd;
500 else
502 int fd_pair[2];
503 pid_t child_pid;
504 char const *shell_prog = getenv ("SHELL");
505 if (shell_prog == nullptr)
506 shell_prog = "/bin/sh";
507 if (setenv ("FILE", name, 1) != 0)
508 error (EXIT_FAILURE, errno,
509 _("failed to set FILE environment variable"));
510 if (verbose)
511 fprintf (stdout, _("executing with FILE=%s\n"), quotef (name));
512 if (pipe (fd_pair) != 0)
513 error (EXIT_FAILURE, errno, _("failed to create pipe"));
514 child_pid = fork ();
515 if (child_pid == 0)
517 /* This is the child process. If an error occurs here, the
518 parent will eventually learn about it after doing a wait,
519 at which time it will emit its own error message. */
520 int j;
521 /* We have to close any pipes that were opened during an
522 earlier call, otherwise this process will be holding a
523 write-pipe that will prevent the earlier process from
524 reading an EOF on the corresponding read-pipe. */
525 for (j = 0; j < n_open_pipes; ++j)
526 if (close (open_pipes[j]) != 0)
527 error (EXIT_FAILURE, errno, _("closing prior pipe"));
528 if (close (fd_pair[1]))
529 error (EXIT_FAILURE, errno, _("closing output pipe"));
530 if (fd_pair[0] != STDIN_FILENO)
532 if (dup2 (fd_pair[0], STDIN_FILENO) != STDIN_FILENO)
533 error (EXIT_FAILURE, errno, _("moving input pipe"));
534 if (close (fd_pair[0]) != 0)
535 error (EXIT_FAILURE, errno, _("closing input pipe"));
537 if (default_SIGPIPE)
538 signal (SIGPIPE, SIG_DFL);
539 execl (shell_prog, last_component (shell_prog), "-c",
540 filter_command, (char *) nullptr);
541 error (EXIT_FAILURE, errno, _("failed to run command: \"%s -c %s\""),
542 shell_prog, filter_command);
544 if (child_pid < 0)
545 error (EXIT_FAILURE, errno, _("fork system call failed"));
546 if (close (fd_pair[0]) != 0)
547 error (EXIT_FAILURE, errno, _("failed to close input pipe"));
548 filter_pid = child_pid;
549 if (n_open_pipes == open_pipes_alloc)
550 open_pipes = xpalloc (open_pipes, &open_pipes_alloc, 1,
551 MIN (INT_MAX, IDX_MAX), sizeof *open_pipes);
552 open_pipes[n_open_pipes++] = fd_pair[1];
553 return fd_pair[1];
557 /* Close the output file, and do any associated cleanup.
558 If FP and FD are both specified, they refer to the same open file;
559 in this case FP is closed, but FD is still used in cleanup. */
560 static void
561 closeout (FILE *fp, int fd, pid_t pid, char const *name)
563 if (fp != nullptr && fclose (fp) != 0 && ! ignorable (errno))
564 error (EXIT_FAILURE, errno, "%s", quotef (name));
565 if (fd >= 0)
567 if (fp == nullptr && close (fd) < 0)
568 error (EXIT_FAILURE, errno, "%s", quotef (name));
569 int j;
570 for (j = 0; j < n_open_pipes; ++j)
572 if (open_pipes[j] == fd)
574 open_pipes[j] = open_pipes[--n_open_pipes];
575 break;
579 if (pid > 0)
581 int wstatus;
582 if (waitpid (pid, &wstatus, 0) < 0)
583 error (EXIT_FAILURE, errno, _("waiting for child process"));
584 else if (WIFSIGNALED (wstatus))
586 int sig = WTERMSIG (wstatus);
587 if (sig != SIGPIPE)
589 char signame[MAX (SIG2STR_MAX, INT_BUFSIZE_BOUND (int))];
590 if (sig2str (sig, signame) != 0)
591 sprintf (signame, "%d", sig);
592 error (sig + 128, 0,
593 _("with FILE=%s, signal %s from command: %s"),
594 quotef (name), signame, filter_command);
597 else if (WIFEXITED (wstatus))
599 int ex = WEXITSTATUS (wstatus);
600 if (ex != 0)
601 error (ex, 0, _("with FILE=%s, exit %d from command: %s"),
602 quotef (name), ex, filter_command);
604 else
606 /* shouldn't happen. */
607 error (EXIT_FAILURE, 0,
608 _("unknown status from command (0x%X)"), wstatus + 0u);
613 /* Write BYTES bytes at BP to an output file.
614 If NEW_FILE_FLAG is true, open the next output file.
615 Otherwise add to the same output file already in use.
616 Return true if successful. */
618 static bool
619 cwrite (bool new_file_flag, char const *bp, idx_t bytes)
621 if (new_file_flag)
623 if (!bp && bytes == 0 && elide_empty_files)
624 return true;
625 closeout (nullptr, output_desc, filter_pid, outfile);
626 next_file_name ();
627 output_desc = create (outfile);
628 if (output_desc < 0)
629 error (EXIT_FAILURE, errno, "%s", quotef (outfile));
632 if (full_write (output_desc, bp, bytes) == bytes)
633 return true;
634 else
636 if (! ignorable (errno))
637 error (EXIT_FAILURE, errno, "%s", quotef (outfile));
638 return false;
642 /* Split into pieces of exactly N_BYTES bytes.
643 However, the first REM_BYTES pieces should be 1 byte longer.
644 Use buffer BUF, whose size is BUFSIZE.
645 If INITIAL_READ is nonnegative,
646 BUF contains the first INITIAL_READ input bytes. */
648 static void
649 bytes_split (intmax_t n_bytes, intmax_t rem_bytes,
650 char *buf, idx_t bufsize, ssize_t initial_read,
651 intmax_t max_files)
653 bool new_file_flag = true;
654 bool filter_ok = true;
655 intmax_t opened = 0;
656 intmax_t to_write = n_bytes + (0 < rem_bytes);
657 bool eof = ! to_write;
659 while (! eof)
661 ssize_t n_read;
662 if (0 <= initial_read)
664 n_read = initial_read;
665 initial_read = -1;
666 eof = n_read < bufsize;
668 else
670 if (! filter_ok
671 && 0 <= lseek (STDIN_FILENO, to_write, SEEK_CUR))
673 to_write = n_bytes + (opened + 1 < rem_bytes);
674 new_file_flag = true;
677 n_read = read (STDIN_FILENO, buf, bufsize);
678 if (n_read < 0)
679 error (EXIT_FAILURE, errno, "%s", quotef (infile));
680 eof = n_read == 0;
682 char *bp_out = buf;
683 while (0 < to_write && to_write <= n_read)
685 if (filter_ok || new_file_flag)
686 filter_ok = cwrite (new_file_flag, bp_out, to_write);
687 opened += new_file_flag;
688 new_file_flag = !max_files || (opened < max_files);
689 if (! filter_ok && ! new_file_flag)
691 /* If filters no longer accepting input, stop reading. */
692 n_read = 0;
693 eof = true;
694 break;
696 bp_out += to_write;
697 n_read -= to_write;
698 to_write = n_bytes + (opened < rem_bytes);
700 if (0 < n_read)
702 if (filter_ok || new_file_flag)
703 filter_ok = cwrite (new_file_flag, bp_out, n_read);
704 opened += new_file_flag;
705 new_file_flag = false;
706 if (! filter_ok && opened == max_files)
708 /* If filters no longer accepting input, stop reading. */
709 break;
711 to_write -= n_read;
715 /* Ensure NUMBER files are created, which truncates
716 any existing files or notifies any consumers on fifos.
717 FIXME: Should we do this before EXIT_FAILURE? */
718 while (opened++ < max_files)
719 cwrite (true, nullptr, 0);
722 /* Split into pieces of exactly N_LINES lines.
723 Use buffer BUF, whose size is BUFSIZE. */
725 static void
726 lines_split (intmax_t n_lines, char *buf, idx_t bufsize)
728 ssize_t n_read;
729 char *bp, *bp_out, *eob;
730 bool new_file_flag = true;
731 intmax_t n = 0;
735 n_read = read (STDIN_FILENO, buf, bufsize);
736 if (n_read < 0)
737 error (EXIT_FAILURE, errno, "%s", quotef (infile));
738 bp = bp_out = buf;
739 eob = bp + n_read;
740 *eob = eolchar;
741 while (true)
743 bp = rawmemchr (bp, eolchar);
744 if (bp == eob)
746 if (eob != bp_out) /* do not write 0 bytes! */
748 idx_t len = eob - bp_out;
749 cwrite (new_file_flag, bp_out, len);
750 new_file_flag = false;
752 break;
755 ++bp;
756 if (++n >= n_lines)
758 cwrite (new_file_flag, bp_out, bp - bp_out);
759 bp_out = bp;
760 new_file_flag = true;
761 n = 0;
765 while (n_read);
768 /* Split into pieces that are as large as possible while still not more
769 than N_BYTES bytes, and are split on line boundaries except
770 where lines longer than N_BYTES bytes occur. */
772 static void
773 line_bytes_split (intmax_t n_bytes, char *buf, idx_t bufsize)
775 ssize_t n_read;
776 intmax_t n_out = 0; /* for each split. */
777 idx_t n_hold = 0;
778 char *hold = nullptr; /* for lines > bufsize. */
779 idx_t hold_size = 0;
780 bool split_line = false; /* Whether a \n was output in a split. */
784 n_read = read (STDIN_FILENO, buf, bufsize);
785 if (n_read < 0)
786 error (EXIT_FAILURE, errno, "%s", quotef (infile));
787 idx_t n_left = n_read;
788 char *sob = buf;
789 while (n_left)
791 idx_t split_rest = 0;
792 char *eoc = nullptr;
793 char *eol;
795 /* Determine End Of Chunk and/or End of Line,
796 which are used below to select what to write or buffer. */
797 if (n_bytes - n_out - n_hold <= n_left)
799 /* Have enough for split. */
800 split_rest = n_bytes - n_out - n_hold;
801 eoc = sob + split_rest - 1;
802 eol = memrchr (sob, eolchar, split_rest);
804 else
805 eol = memrchr (sob, eolchar, n_left);
807 /* Output hold space if possible. */
808 if (n_hold && !(!eol && n_out))
810 cwrite (n_out == 0, hold, n_hold);
811 n_out += n_hold;
812 n_hold = 0;
815 /* Output to eol if present. */
816 if (eol)
818 split_line = true;
819 idx_t n_write = eol - sob + 1;
820 cwrite (n_out == 0, sob, n_write);
821 n_out += n_write;
822 n_left -= n_write;
823 sob += n_write;
824 if (eoc)
825 split_rest -= n_write;
828 /* Output to eoc or eob if possible. */
829 if (n_left && !split_line)
831 idx_t n_write = eoc ? split_rest : n_left;
832 cwrite (n_out == 0, sob, n_write);
833 n_out += n_write;
834 n_left -= n_write;
835 sob += n_write;
836 if (eoc)
837 split_rest -= n_write;
840 /* Update hold if needed. */
841 if ((eoc && split_rest) || (!eoc && n_left))
843 idx_t n_buf = eoc ? split_rest : n_left;
844 if (hold_size - n_hold < n_buf)
845 hold = xpalloc (hold, &hold_size, n_buf - (hold_size - n_hold),
846 -1, sizeof *hold);
847 memcpy (hold + n_hold, sob, n_buf);
848 n_hold += n_buf;
849 n_left -= n_buf;
850 sob += n_buf;
853 /* Reset for new split. */
854 if (eoc)
856 n_out = 0;
857 split_line = false;
861 while (n_read);
863 /* Handle no eol at end of file. */
864 if (n_hold)
865 cwrite (n_out == 0, hold, n_hold);
867 free (hold);
870 /* -n l/[K/]N: Write lines to files of approximately file size / N.
871 The file is partitioned into file size / N sized portions, with the
872 last assigned any excess. If a line _starts_ within a partition
873 it is written completely to the corresponding file. Since lines
874 are not split even if they overlap a partition, the files written
875 can be larger or smaller than the partition size, and even empty
876 if a line is so long as to completely overlap the partition. */
878 static void
879 lines_chunk_split (intmax_t k, intmax_t n, char *buf, idx_t bufsize,
880 ssize_t initial_read, off_t file_size)
882 affirm (n && k <= n);
884 intmax_t rem_bytes = file_size % n;
885 off_t chunk_size = file_size / n;
886 intmax_t chunk_no = 1;
887 off_t chunk_end = chunk_size + (0 < rem_bytes);
888 off_t n_written = 0;
889 bool new_file_flag = true;
890 bool chunk_truncated = false;
892 if (k > 1 && 0 < file_size)
894 /* Start reading 1 byte before kth chunk of file. */
895 off_t start = (k - 1) * chunk_size + MIN (k - 1, rem_bytes) - 1;
896 if (start < initial_read)
898 memmove (buf, buf + start, initial_read - start);
899 initial_read -= start;
901 else
903 if (initial_read < start
904 && lseek (STDIN_FILENO, start - initial_read, SEEK_CUR) < 0)
905 error (EXIT_FAILURE, errno, "%s", quotef (infile));
906 initial_read = -1;
908 n_written = start;
909 chunk_no = k - 1;
910 chunk_end = start + 1;
913 while (n_written < file_size)
915 char *bp = buf, *eob;
916 ssize_t n_read;
917 if (0 <= initial_read)
919 n_read = initial_read;
920 initial_read = -1;
922 else
924 n_read = read (STDIN_FILENO, buf,
925 MIN (bufsize, file_size - n_written));
926 if (n_read < 0)
927 error (EXIT_FAILURE, errno, "%s", quotef (infile));
929 if (n_read == 0)
930 break; /* eof. */
931 chunk_truncated = false;
932 eob = buf + n_read;
934 while (bp != eob)
936 idx_t to_write;
937 bool next = false;
939 /* Begin looking for '\n' at last byte of chunk. */
940 off_t skip = MIN (n_read, MAX (0, chunk_end - 1 - n_written));
941 char *bp_out = memchr (bp + skip, eolchar, n_read - skip);
942 if (bp_out)
944 bp_out++;
945 next = true;
947 else
948 bp_out = eob;
949 to_write = bp_out - bp;
951 if (k == chunk_no)
953 /* We don't use the stdout buffer here since we're writing
954 large chunks from an existing file, so it's more efficient
955 to write out directly. */
956 if (full_write (STDOUT_FILENO, bp, to_write) != to_write)
957 write_error ();
959 else if (! k)
960 cwrite (new_file_flag, bp, to_write);
961 n_written += to_write;
962 bp += to_write;
963 n_read -= to_write;
964 new_file_flag = next;
966 /* A line could have been so long that it skipped
967 entire chunks. So create empty files in that case. */
968 while (next || chunk_end <= n_written)
970 if (!next && bp == eob)
972 /* replenish buf, before going to next chunk. */
973 chunk_truncated = true;
974 break;
976 if (k == chunk_no)
977 return;
978 chunk_end += chunk_size + (chunk_no < rem_bytes);
979 chunk_no++;
980 if (chunk_end <= n_written)
982 if (! k)
983 cwrite (true, nullptr, 0);
985 else
986 next = false;
991 if (chunk_truncated)
992 chunk_no++;
994 /* Ensure NUMBER files are created, which truncates
995 any existing files or notifies any consumers on fifos.
996 FIXME: Should we do this before EXIT_FAILURE? */
997 if (!k)
998 while (chunk_no++ <= n)
999 cwrite (true, nullptr, 0);
1002 /* -n K/N: Extract Kth of N chunks. */
1004 static void
1005 bytes_chunk_extract (intmax_t k, intmax_t n, char *buf, idx_t bufsize,
1006 ssize_t initial_read, off_t file_size)
1008 off_t start;
1009 off_t end;
1011 assert (0 < k && k <= n);
1013 start = (k - 1) * (file_size / n) + MIN (k - 1, file_size % n);
1014 end = k == n ? file_size : k * (file_size / n) + MIN (k, file_size % n);
1016 if (start < initial_read)
1018 memmove (buf, buf + start, initial_read - start);
1019 initial_read -= start;
1021 else
1023 if (initial_read < start
1024 && lseek (STDIN_FILENO, start - initial_read, SEEK_CUR) < 0)
1025 error (EXIT_FAILURE, errno, "%s", quotef (infile));
1026 initial_read = -1;
1029 while (start < end)
1031 ssize_t n_read;
1032 if (0 <= initial_read)
1034 n_read = initial_read;
1035 initial_read = -1;
1037 else
1039 n_read = read (STDIN_FILENO, buf, bufsize);
1040 if (n_read < 0)
1041 error (EXIT_FAILURE, errno, "%s", quotef (infile));
1043 if (n_read == 0)
1044 break; /* eof. */
1045 n_read = MIN (n_read, end - start);
1046 if (full_write (STDOUT_FILENO, buf, n_read) != n_read
1047 && ! ignorable (errno))
1048 error (EXIT_FAILURE, errno, "%s", quotef ("-"));
1049 start += n_read;
1053 typedef struct of_info
1055 char *of_name;
1056 int ofd;
1057 FILE *ofile;
1058 pid_t opid;
1059 } of_t;
1061 enum
1063 OFD_NEW = -1,
1064 OFD_APPEND = -2
1067 /* Rotate file descriptors when we're writing to more output files than we
1068 have available file descriptors.
1069 Return whether we came under file resource pressure.
1070 If so, it's probably best to close each file when finished with it. */
1072 static bool
1073 ofile_open (of_t *files, idx_t i_check, idx_t nfiles)
1075 bool file_limit = false;
1077 if (files[i_check].ofd <= OFD_NEW)
1079 int fd;
1080 idx_t i_reopen = i_check ? i_check - 1 : nfiles - 1;
1082 /* Another process could have opened a file in between the calls to
1083 close and open, so we should keep trying until open succeeds or
1084 we've closed all of our files. */
1085 while (true)
1087 if (files[i_check].ofd == OFD_NEW)
1088 fd = create (files[i_check].of_name);
1089 else /* OFD_APPEND */
1091 /* Attempt to append to previously opened file.
1092 We use O_NONBLOCK to support writing to fifos,
1093 where the other end has closed because of our
1094 previous close. In that case we'll immediately
1095 get an error, rather than waiting indefinitely.
1096 In specialized cases the consumer can keep reading
1097 from the fifo, terminating on conditions in the data
1098 itself, or perhaps never in the case of 'tail -f'.
1099 I.e., for fifos it is valid to attempt this reopen.
1101 We don't handle the filter_command case here, as create()
1102 will exit if there are not enough files in that case.
1103 I.e., we don't support restarting filters, as that would
1104 put too much burden on users specifying --filter commands. */
1105 fd = open (files[i_check].of_name,
1106 O_WRONLY | O_BINARY | O_APPEND | O_NONBLOCK);
1109 if (0 <= fd)
1110 break;
1112 if (!(errno == EMFILE || errno == ENFILE))
1113 error (EXIT_FAILURE, errno, "%s", quotef (files[i_check].of_name));
1115 file_limit = true;
1117 /* Search backwards for an open file to close. */
1118 while (files[i_reopen].ofd < 0)
1120 i_reopen = i_reopen ? i_reopen - 1 : nfiles - 1;
1121 /* No more open files to close, exit with E[NM]FILE. */
1122 if (i_reopen == i_check)
1123 error (EXIT_FAILURE, errno, "%s",
1124 quotef (files[i_check].of_name));
1127 if (fclose (files[i_reopen].ofile) != 0)
1128 error (EXIT_FAILURE, errno, "%s", quotef (files[i_reopen].of_name));
1129 files[i_reopen].ofile = nullptr;
1130 files[i_reopen].ofd = OFD_APPEND;
1133 files[i_check].ofd = fd;
1134 FILE *ofile = fdopen (fd, "a");
1135 if (!ofile)
1136 error (EXIT_FAILURE, errno, "%s", quotef (files[i_check].of_name));
1137 files[i_check].ofile = ofile;
1138 files[i_check].opid = filter_pid;
1139 filter_pid = 0;
1142 return file_limit;
1145 /* -n r/[K/]N: Divide file into N chunks in round robin fashion.
1146 Use BUF of size BUFSIZE for the buffer, and if allocating storage
1147 put its address into *FILESP to pacify -fsanitize=leak.
1148 When K == 0, we try to keep the files open in parallel.
1149 If we run out of file resources, then we revert
1150 to opening and closing each file for each line. */
1152 static void
1153 lines_rr (intmax_t k, intmax_t n, char *buf, idx_t bufsize, of_t **filesp)
1155 bool wrapped = false;
1156 bool wrote = false;
1157 bool file_limit;
1158 idx_t i_file;
1159 of_t *files IF_LINT (= nullptr);
1160 intmax_t line_no;
1162 if (k)
1163 line_no = 1;
1164 else
1166 if (IDX_MAX < n)
1167 xalloc_die ();
1168 files = *filesp = xinmalloc (n, sizeof *files);
1170 /* Generate output file names. */
1171 for (i_file = 0; i_file < n; i_file++)
1173 next_file_name ();
1174 files[i_file].of_name = xstrdup (outfile);
1175 files[i_file].ofd = OFD_NEW;
1176 files[i_file].ofile = nullptr;
1177 files[i_file].opid = 0;
1179 i_file = 0;
1180 file_limit = false;
1183 while (true)
1185 char *bp = buf, *eob;
1186 ssize_t n_read = read (STDIN_FILENO, buf, bufsize);
1187 if (n_read < 0)
1188 error (EXIT_FAILURE, errno, "%s", quotef (infile));
1189 else if (n_read == 0)
1190 break; /* eof. */
1191 eob = buf + n_read;
1193 while (bp != eob)
1195 idx_t to_write;
1196 bool next = false;
1198 /* Find end of line. */
1199 char *bp_out = memchr (bp, eolchar, eob - bp);
1200 if (bp_out)
1202 bp_out++;
1203 next = true;
1205 else
1206 bp_out = eob;
1207 to_write = bp_out - bp;
1209 if (k)
1211 if (line_no == k && unbuffered)
1213 if (full_write (STDOUT_FILENO, bp, to_write) != to_write)
1214 write_error ();
1216 else if (line_no == k && fwrite (bp, to_write, 1, stdout) != 1)
1218 write_error ();
1220 if (next)
1221 line_no = (line_no == n) ? 1 : line_no + 1;
1223 else
1225 /* Secure file descriptor. */
1226 file_limit |= ofile_open (files, i_file, n);
1227 if (unbuffered)
1229 /* Note writing to fd, rather than flushing the FILE gives
1230 an 8% performance benefit, due to reduced data copying. */
1231 if (full_write (files[i_file].ofd, bp, to_write) != to_write
1232 && ! ignorable (errno))
1233 error (EXIT_FAILURE, errno, "%s",
1234 quotef (files[i_file].of_name));
1236 else if (fwrite (bp, to_write, 1, files[i_file].ofile) != 1
1237 && ! ignorable (errno))
1238 error (EXIT_FAILURE, errno, "%s",
1239 quotef (files[i_file].of_name));
1241 if (! ignorable (errno))
1242 wrote = true;
1244 if (file_limit)
1246 if (fclose (files[i_file].ofile) != 0)
1247 error (EXIT_FAILURE, errno, "%s",
1248 quotef (files[i_file].of_name));
1249 files[i_file].ofile = nullptr;
1250 files[i_file].ofd = OFD_APPEND;
1252 if (next && ++i_file == n)
1254 wrapped = true;
1255 /* If no filters are accepting input, stop reading. */
1256 if (! wrote)
1257 goto no_filters;
1258 wrote = false;
1259 i_file = 0;
1263 bp = bp_out;
1267 no_filters:
1268 /* Ensure all files created, so that any existing files are truncated,
1269 and to signal any waiting fifo consumers.
1270 Also, close any open file descriptors.
1271 FIXME: Should we do this before EXIT_FAILURE? */
1272 if (!k)
1274 idx_t ceiling = wrapped ? n : i_file;
1275 for (i_file = 0; i_file < n; i_file++)
1277 if (i_file >= ceiling && !elide_empty_files)
1278 file_limit |= ofile_open (files, i_file, n);
1279 if (files[i_file].ofd >= 0)
1280 closeout (files[i_file].ofile, files[i_file].ofd,
1281 files[i_file].opid, files[i_file].of_name);
1282 files[i_file].ofd = OFD_APPEND;
1287 #define FAIL_ONLY_ONE_WAY() \
1288 do \
1290 error (0, 0, _("cannot split in more than one way")); \
1291 usage (EXIT_FAILURE); \
1293 while (0)
1295 /* Report a string-to-integer conversion failure MSGID with ARG. */
1297 static _Noreturn void
1298 strtoint_die (char const *msgid, char const *arg)
1300 error (EXIT_FAILURE, errno == EINVAL ? 0 : errno, "%s: %s",
1301 gettext (msgid), quote (arg));
1304 /* Use OVERFLOW_OK when it is OK to ignore LONGINT_OVERFLOW errors, since the
1305 extreme value will do the right thing anyway on any practical platform. */
1306 #define OVERFLOW_OK LONGINT_OVERFLOW
1308 /* Parse ARG for number of bytes or lines. The number can be followed
1309 by MULTIPLIERS, and the resulting value must be positive.
1310 If the number cannot be parsed, diagnose with MSG.
1311 Return the number parsed, or an INTMAX_MAX on overflow. */
1313 static intmax_t
1314 parse_n_units (char const *arg, char const *multipliers, char const *msgid)
1316 intmax_t n;
1317 if (OVERFLOW_OK < xstrtoimax (arg, nullptr, 10, &n, multipliers) || n < 1)
1318 strtoint_die (msgid, arg);
1319 return n;
1322 /* Parse K/N syntax of chunk options. */
1324 static void
1325 parse_chunk (intmax_t *k_units, intmax_t *n_units, char const *arg)
1327 char *argend;
1328 strtol_error e = xstrtoimax (arg, &argend, 10, n_units, "");
1329 if (e == LONGINT_INVALID_SUFFIX_CHAR && *argend == '/')
1331 *k_units = *n_units;
1332 *n_units = parse_n_units (argend + 1, "",
1333 N_("invalid number of chunks"));
1334 if (! (0 < *k_units && *k_units <= *n_units))
1335 error (EXIT_FAILURE, 0, "%s: %s", _("invalid chunk number"),
1336 quote_mem (arg, argend - arg));
1338 else if (! (e <= OVERFLOW_OK && 0 < *n_units))
1339 strtoint_die (N_("invalid number of chunks"), arg);
1344 main (int argc, char **argv)
1346 enum Split_type split_type = type_undef;
1347 idx_t in_blk_size = 0; /* optimal block size of input file device */
1348 idx_t page_size = getpagesize ();
1349 intmax_t k_units = 0;
1350 intmax_t n_units = 0;
1352 static char const multipliers[] = "bEGKkMmPQRTYZ0";
1353 int c;
1354 int digits_optind = 0;
1355 off_t file_size = OFF_T_MAX;
1357 initialize_main (&argc, &argv);
1358 set_program_name (argv[0]);
1359 setlocale (LC_ALL, "");
1360 bindtextdomain (PACKAGE, LOCALEDIR);
1361 textdomain (PACKAGE);
1363 atexit (close_stdout);
1365 /* Parse command line options. */
1367 infile = "-";
1368 outbase = "x";
1370 while (true)
1372 /* This is the argv-index of the option we will read next. */
1373 int this_optind = optind ? optind : 1;
1375 c = getopt_long (argc, argv, "0123456789C:a:b:del:n:t:ux",
1376 longopts, nullptr);
1377 if (c == -1)
1378 break;
1380 switch (c)
1382 case 'a':
1383 suffix_length = xdectoimax (optarg, 0, IDX_MAX,
1384 "", _("invalid suffix length"), 0);
1385 break;
1387 case ADDITIONAL_SUFFIX_OPTION:
1389 int suffix_len = strlen (optarg);
1390 if (last_component (optarg) != optarg
1391 || (suffix_len && ISSLASH (optarg[suffix_len - 1])))
1393 error (0, 0,
1394 _("invalid suffix %s, contains directory separator"),
1395 quote (optarg));
1396 usage (EXIT_FAILURE);
1399 additional_suffix = optarg;
1400 break;
1402 case 'b':
1403 if (split_type != type_undef)
1404 FAIL_ONLY_ONE_WAY ();
1405 split_type = type_bytes;
1406 n_units = parse_n_units (optarg, multipliers,
1407 N_("invalid number of bytes"));
1408 break;
1410 case 'l':
1411 if (split_type != type_undef)
1412 FAIL_ONLY_ONE_WAY ();
1413 split_type = type_lines;
1414 n_units = parse_n_units (optarg, "", N_("invalid number of lines"));
1415 break;
1417 case 'C':
1418 if (split_type != type_undef)
1419 FAIL_ONLY_ONE_WAY ();
1420 split_type = type_byteslines;
1421 n_units = parse_n_units (optarg, multipliers,
1422 N_("invalid number of lines"));
1423 break;
1425 case 'n':
1426 if (split_type != type_undef)
1427 FAIL_ONLY_ONE_WAY ();
1428 /* skip any whitespace */
1429 while (isspace (to_uchar (*optarg)))
1430 optarg++;
1431 if (STRNCMP_LIT (optarg, "r/") == 0)
1433 split_type = type_rr;
1434 optarg += 2;
1436 else if (STRNCMP_LIT (optarg, "l/") == 0)
1438 split_type = type_chunk_lines;
1439 optarg += 2;
1441 else
1442 split_type = type_chunk_bytes;
1443 parse_chunk (&k_units, &n_units, optarg);
1444 break;
1446 case 'u':
1447 unbuffered = true;
1448 break;
1450 case 't':
1452 char neweol = optarg[0];
1453 if (! neweol)
1454 error (EXIT_FAILURE, 0, _("empty record separator"));
1455 if (optarg[1])
1457 if (STREQ (optarg, "\\0"))
1458 neweol = '\0';
1459 else
1461 /* Provoke with 'split -txx'. Complain about
1462 "multi-character tab" instead of "multibyte tab", so
1463 that the diagnostic's wording does not need to be
1464 changed once multibyte characters are supported. */
1465 error (EXIT_FAILURE, 0, _("multi-character separator %s"),
1466 quote (optarg));
1469 /* Make it explicit we don't support multiple separators. */
1470 if (0 <= eolchar && neweol != eolchar)
1472 error (EXIT_FAILURE, 0,
1473 _("multiple separator characters specified"));
1476 eolchar = neweol;
1478 break;
1480 case '0':
1481 case '1':
1482 case '2':
1483 case '3':
1484 case '4':
1485 case '5':
1486 case '6':
1487 case '7':
1488 case '8':
1489 case '9':
1490 if (split_type == type_undef)
1492 split_type = type_digits;
1493 n_units = 0;
1495 if (split_type != type_undef && split_type != type_digits)
1496 FAIL_ONLY_ONE_WAY ();
1497 if (digits_optind != 0 && digits_optind != this_optind)
1498 n_units = 0; /* More than one number given; ignore other. */
1499 digits_optind = this_optind;
1500 if (ckd_mul (&n_units, n_units, 10)
1501 || ckd_add (&n_units, n_units, c - '0'))
1502 n_units = INTMAX_MAX;
1503 break;
1505 case 'd':
1506 case 'x':
1507 if (c == 'd')
1508 suffix_alphabet = "0123456789";
1509 else
1510 suffix_alphabet = "0123456789abcdef";
1511 if (optarg)
1513 if (strlen (optarg) != strspn (optarg, suffix_alphabet))
1515 error (0, 0,
1516 (c == 'd') ?
1517 _("%s: invalid start value for numerical suffix") :
1518 _("%s: invalid start value for hexadecimal suffix"),
1519 quote (optarg));
1520 usage (EXIT_FAILURE);
1522 else
1524 /* Skip any leading zero. */
1525 while (*optarg == '0' && *(optarg + 1) != '\0')
1526 optarg++;
1527 numeric_suffix_start = optarg;
1530 break;
1532 case 'e':
1533 elide_empty_files = true;
1534 break;
1536 case FILTER_OPTION:
1537 filter_command = optarg;
1538 break;
1540 case IO_BLKSIZE_OPTION:
1541 in_blk_size = xdectoumax (optarg, 1,
1542 MIN (SYS_BUFSIZE_MAX,
1543 MIN (IDX_MAX, SIZE_MAX) - 1),
1544 multipliers, _("invalid IO block size"), 0);
1545 break;
1547 case VERBOSE_OPTION:
1548 verbose = true;
1549 break;
1551 case_GETOPT_HELP_CHAR;
1553 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1555 default:
1556 usage (EXIT_FAILURE);
1560 if (k_units != 0 && filter_command)
1562 error (0, 0, _("--filter does not process a chunk extracted to stdout"));
1563 usage (EXIT_FAILURE);
1566 /* Handle default case. */
1567 if (split_type == type_undef)
1569 split_type = type_lines;
1570 n_units = 1000;
1573 if (n_units == 0)
1575 error (0, 0, _("invalid number of lines: %s"), quote ("0"));
1576 usage (EXIT_FAILURE);
1579 if (eolchar < 0)
1580 eolchar = '\n';
1582 set_suffix_length (n_units, split_type);
1584 /* Get out the filename arguments. */
1586 if (optind < argc)
1587 infile = argv[optind++];
1589 if (optind < argc)
1590 outbase = argv[optind++];
1592 if (optind < argc)
1594 error (0, 0, _("extra operand %s"), quote (argv[optind]));
1595 usage (EXIT_FAILURE);
1598 /* Check that the suffix length is large enough for the numerical
1599 suffix start value. */
1600 if (numeric_suffix_start && strlen (numeric_suffix_start) > suffix_length)
1602 error (0, 0, _("numerical suffix start value is too large "
1603 "for the suffix length"));
1604 usage (EXIT_FAILURE);
1607 /* Open the input file. */
1608 if (! STREQ (infile, "-")
1609 && fd_reopen (STDIN_FILENO, infile, O_RDONLY, 0) < 0)
1610 error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
1611 quoteaf (infile));
1613 /* Binary I/O is safer when byte counts are used. */
1614 xset_binary_mode (STDIN_FILENO, O_BINARY);
1616 /* Advise the kernel of our access pattern. */
1617 fdadvise (STDIN_FILENO, 0, 0, FADVISE_SEQUENTIAL);
1619 /* Get the optimal block size of input device and make a buffer. */
1621 if (fstat (STDIN_FILENO, &in_stat_buf) != 0)
1622 error (EXIT_FAILURE, errno, "%s", quotef (infile));
1624 if (in_blk_size == 0)
1626 in_blk_size = io_blksize (&in_stat_buf);
1627 if (SYS_BUFSIZE_MAX < in_blk_size)
1628 in_blk_size = SYS_BUFSIZE_MAX;
1631 char *buf = xalignalloc (page_size, in_blk_size + 1);
1632 ssize_t initial_read = -1;
1634 if (split_type == type_chunk_bytes || split_type == type_chunk_lines)
1636 file_size = input_file_size (STDIN_FILENO, &in_stat_buf,
1637 buf, in_blk_size);
1638 if (file_size < 0)
1639 error (EXIT_FAILURE, errno, _("%s: cannot determine file size"),
1640 quotef (infile));
1641 initial_read = MIN (file_size, in_blk_size);
1644 /* When filtering, closure of one pipe must not terminate the process,
1645 as there may still be other streams expecting input from us. */
1646 if (filter_command)
1647 default_SIGPIPE = signal (SIGPIPE, SIG_IGN) == SIG_DFL;
1649 switch (split_type)
1651 case type_digits:
1652 case type_lines:
1653 lines_split (n_units, buf, in_blk_size);
1654 break;
1656 case type_bytes:
1657 bytes_split (n_units, 0, buf, in_blk_size, -1, 0);
1658 break;
1660 case type_byteslines:
1661 line_bytes_split (n_units, buf, in_blk_size);
1662 break;
1664 case type_chunk_bytes:
1665 if (k_units == 0)
1666 bytes_split (file_size / n_units, file_size % n_units,
1667 buf, in_blk_size, initial_read, n_units);
1668 else
1669 bytes_chunk_extract (k_units, n_units, buf, in_blk_size, initial_read,
1670 file_size);
1671 break;
1673 case type_chunk_lines:
1674 lines_chunk_split (k_units, n_units, buf, in_blk_size, initial_read,
1675 file_size);
1676 break;
1678 case type_rr:
1679 /* Note, this is like 'sed -n ${k}~${n}p' when k > 0,
1680 but the functionality is provided for symmetry. */
1682 of_t *files;
1683 lines_rr (k_units, n_units, buf, in_blk_size, &files);
1685 break;
1687 default:
1688 affirm (false);
1691 if (close (STDIN_FILENO) != 0)
1692 error (EXIT_FAILURE, errno, "%s", quotef (infile));
1693 closeout (nullptr, output_desc, filter_pid, outfile);
1695 main_exit (EXIT_SUCCESS);