doc: sort: give example for sorting on the last field
[coreutils.git] / src / split.c
blobf82a7f74b40160c27cbed0a4cc219220d5c5cefb
1 /* split.c -- split a file into pieces.
2 Copyright (C) 1988-2024 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* By tege@sics.se, with rms.
19 TODO:
20 * support -p REGEX as in BSD's split.
21 * support --suppress-matched as in csplit. */
22 #include <config.h>
24 #include <ctype.h>
25 #include <stdio.h>
26 #include <getopt.h>
27 #include <signal.h>
28 #include <sys/types.h>
29 #include <sys/wait.h>
31 #include "system.h"
32 #include "alignalloc.h"
33 #include "assure.h"
34 #include "fadvise.h"
35 #include "fd-reopen.h"
36 #include "fcntl--.h"
37 #include "full-write.h"
38 #include "ioblksize.h"
39 #include "quote.h"
40 #include "sig2str.h"
41 #include "sys-limits.h"
42 #include "temp-stream.h"
43 #include "xbinary-io.h"
44 #include "xdectoint.h"
45 #include "xstrtol.h"
47 /* The official name of this program (e.g., no 'g' prefix). */
48 #define PROGRAM_NAME "split"
50 #define AUTHORS \
51 proper_name_lite ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
52 proper_name ("Richard M. Stallman")
54 /* Shell command to filter through, instead of creating files. */
55 static char const *filter_command;
57 /* Process ID of the filter. */
58 static pid_t filter_pid;
60 /* Array of open pipes. */
61 static int *open_pipes;
62 static idx_t open_pipes_alloc;
63 static int n_open_pipes;
65 /* Whether SIGPIPE has the default action, when --filter is used. */
66 static bool default_SIGPIPE;
68 /* Base name of output files. */
69 static char const *outbase;
71 /* Name of output files. */
72 static char *outfile;
74 /* Pointer to the end of the prefix in OUTFILE.
75 Suffixes are inserted here. */
76 static char *outfile_mid;
78 /* Generate new suffix when suffixes are exhausted. */
79 static bool suffix_auto = true;
81 /* Length of OUTFILE's suffix. */
82 static idx_t suffix_length;
84 /* Alphabet of characters to use in suffix. */
85 static char const *suffix_alphabet = "abcdefghijklmnopqrstuvwxyz";
87 /* Numerical suffix start value. */
88 static char const *numeric_suffix_start;
90 /* Additional suffix to append to output file names. */
91 static char const *additional_suffix;
93 /* Name of input file. May be "-". */
94 static char const *infile;
96 /* stat buf for input file. */
97 static struct stat in_stat_buf;
99 /* Descriptor on which output file is open. */
100 static int output_desc = -1;
102 /* If true, print a diagnostic on standard error just before each
103 output file is opened. */
104 static bool verbose;
106 /* If true, don't generate zero length output files. */
107 static bool elide_empty_files;
109 /* If true, in round robin mode, immediately copy
110 input to output, which is much slower, so disabled by default. */
111 static bool unbuffered;
113 /* The character marking end of line. Defaults to \n below. */
114 static int eolchar = -1;
116 /* The split mode to use. */
117 enum Split_type
119 type_undef, type_bytes, type_byteslines, type_lines, type_digits,
120 type_chunk_bytes, type_chunk_lines, type_rr
123 /* For long options that have no equivalent short option, use a
124 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
125 enum
127 VERBOSE_OPTION = CHAR_MAX + 1,
128 FILTER_OPTION,
129 IO_BLKSIZE_OPTION,
130 ADDITIONAL_SUFFIX_OPTION
133 static struct option const longopts[] =
135 {"bytes", required_argument, nullptr, 'b'},
136 {"lines", required_argument, nullptr, 'l'},
137 {"line-bytes", required_argument, nullptr, 'C'},
138 {"number", required_argument, nullptr, 'n'},
139 {"elide-empty-files", no_argument, nullptr, 'e'},
140 {"unbuffered", no_argument, nullptr, 'u'},
141 {"suffix-length", required_argument, nullptr, 'a'},
142 {"additional-suffix", required_argument, nullptr,
143 ADDITIONAL_SUFFIX_OPTION},
144 {"numeric-suffixes", optional_argument, nullptr, 'd'},
145 {"hex-suffixes", optional_argument, nullptr, 'x'},
146 {"filter", required_argument, nullptr, FILTER_OPTION},
147 {"verbose", no_argument, nullptr, VERBOSE_OPTION},
148 {"separator", required_argument, nullptr, 't'},
149 {"-io-blksize", required_argument, nullptr,
150 IO_BLKSIZE_OPTION}, /* do not document */
151 {GETOPT_HELP_OPTION_DECL},
152 {GETOPT_VERSION_OPTION_DECL},
153 {nullptr, 0, nullptr, 0}
156 /* Return true if the errno value, ERR, is ignorable. */
157 static inline bool
158 ignorable (int err)
160 return filter_command && err == EPIPE;
163 static void
164 set_suffix_length (intmax_t n_units, enum Split_type split_type)
166 #define DEFAULT_SUFFIX_LENGTH 2
168 int suffix_length_needed = 0;
170 /* The suffix auto length feature is incompatible with
171 a user specified start value as the generated suffixes
172 are not all consecutive. */
173 if (numeric_suffix_start)
174 suffix_auto = false;
176 /* Auto-calculate the suffix length if the number of files is given. */
177 if (split_type == type_chunk_bytes || split_type == type_chunk_lines
178 || split_type == type_rr)
180 intmax_t n_units_end = n_units - 1;
181 if (numeric_suffix_start)
183 intmax_t n_start;
184 strtol_error e = xstrtoimax (numeric_suffix_start, nullptr, 10,
185 &n_start, "");
186 if (e == LONGINT_OK && n_start < n_units)
188 /* Restrict auto adjustment so we don't keep
189 incrementing a suffix size arbitrarily,
190 as that would break sort order for files
191 generated from multiple split runs. */
192 if (ckd_add (&n_units_end, n_units_end, n_start))
193 n_units_end = INTMAX_MAX;
197 idx_t alphabet_len = strlen (suffix_alphabet);
199 suffix_length_needed++;
200 while (n_units_end /= alphabet_len);
202 suffix_auto = false;
205 if (suffix_length) /* set by user */
207 if (suffix_length < suffix_length_needed)
208 error (EXIT_FAILURE, 0,
209 _("the suffix length needs to be at least %d"),
210 suffix_length_needed);
211 suffix_auto = false;
212 return;
214 else
215 suffix_length = MAX (DEFAULT_SUFFIX_LENGTH, suffix_length_needed);
218 void
219 usage (int status)
221 if (status != EXIT_SUCCESS)
222 emit_try_help ();
223 else
225 printf (_("\
226 Usage: %s [OPTION]... [FILE [PREFIX]]\n\
228 program_name);
229 fputs (_("\
230 Output pieces of FILE to PREFIXaa, PREFIXab, ...;\n\
231 default size is 1000 lines, and default PREFIX is 'x'.\n\
232 "), stdout);
234 emit_stdin_note ();
235 emit_mandatory_arg_note ();
237 fprintf (stdout, _("\
238 -a, --suffix-length=N generate suffixes of length N (default %d)\n\
239 --additional-suffix=SUFFIX append an additional SUFFIX to file names\n\
240 -b, --bytes=SIZE put SIZE bytes per output file\n\
241 -C, --line-bytes=SIZE put at most SIZE bytes of records per output file\n\
242 -d use numeric suffixes starting at 0, not alphabetic\n\
243 --numeric-suffixes[=FROM] same as -d, but allow setting the start value\
245 -x use hex suffixes starting at 0, not alphabetic\n\
246 --hex-suffixes[=FROM] same as -x, but allow setting the start value\n\
247 -e, --elide-empty-files do not generate empty output files with '-n'\n\
248 --filter=COMMAND write to shell COMMAND; file name is $FILE\n\
249 -l, --lines=NUMBER put NUMBER lines/records per output file\n\
250 -n, --number=CHUNKS generate CHUNKS output files; see explanation below\n\
251 -t, --separator=SEP use SEP instead of newline as the record separator;\n\
252 '\\0' (zero) specifies the NUL character\n\
253 -u, --unbuffered immediately copy input to output with '-n r/...'\n\
254 "), DEFAULT_SUFFIX_LENGTH);
255 fputs (_("\
256 --verbose print a diagnostic just before each\n\
257 output file is opened\n\
258 "), stdout);
259 fputs (HELP_OPTION_DESCRIPTION, stdout);
260 fputs (VERSION_OPTION_DESCRIPTION, stdout);
261 emit_size_note ();
262 fputs (_("\n\
263 CHUNKS may be:\n\
264 N split into N files based on size of input\n\
265 K/N output Kth of N to stdout\n\
266 l/N split into N files without splitting lines/records\n\
267 l/K/N output Kth of N to stdout without splitting lines/records\n\
268 r/N like 'l' but use round robin distribution\n\
269 r/K/N likewise but only output Kth of N to stdout\n\
270 "), stdout);
271 emit_ancillary_info (PROGRAM_NAME);
273 exit (status);
276 /* Copy the data in FD to a temporary file, then make that file FD.
277 Use BUF, of size BUFSIZE, to copy. Return the number of
278 bytes copied, or -1 (setting errno) on error. */
279 static off_t
280 copy_to_tmpfile (int fd, char *buf, idx_t bufsize)
282 FILE *tmp;
283 if (!temp_stream (&tmp, nullptr))
284 return -1;
285 off_t copied = 0;
286 off_t r;
288 while (0 < (r = read (fd, buf, bufsize)))
290 if (fwrite (buf, 1, r, tmp) != r)
291 return -1;
292 if (ckd_add (&copied, copied, r))
294 errno = EOVERFLOW;
295 return -1;
299 if (r < 0)
300 return r;
301 r = dup2 (fileno (tmp), fd);
302 if (r < 0)
303 return r;
304 if (fclose (tmp) < 0)
305 return -1;
306 return copied;
309 /* Return the number of bytes that can be read from FD with status ST.
310 Store up to the first BUFSIZE bytes of the file's data into BUF,
311 and advance the file position by the number of bytes read. On
312 input error, set errno and return -1. */
314 static off_t
315 input_file_size (int fd, struct stat const *st, char *buf, idx_t bufsize)
317 off_t size = 0;
320 ssize_t n_read = read (fd, buf + size, bufsize - size);
321 if (n_read <= 0)
322 return n_read < 0 ? n_read : size;
323 size += n_read;
325 while (size < bufsize);
327 off_t cur, end;
328 if ((usable_st_size (st) && st->st_size < size)
329 || (cur = lseek (fd, 0, SEEK_CUR)) < 0
330 || cur < size /* E.g., /dev/zero on GNU/Linux. */
331 || (end = lseek (fd, 0, SEEK_END)) < 0)
333 char *tmpbuf = xmalloc (bufsize);
334 end = copy_to_tmpfile (fd, tmpbuf, bufsize);
335 free (tmpbuf);
336 if (end < 0)
337 return end;
338 cur = 0;
341 if (end == OFF_T_MAX /* E.g., /dev/zero on GNU/Hurd. */
342 || (cur < end && ckd_add (&size, size, end - cur)))
344 errno = EOVERFLOW;
345 return -1;
348 if (cur < end)
350 off_t r = lseek (fd, cur, SEEK_SET);
351 if (r < 0)
352 return r;
355 return size;
358 /* Compute the next sequential output file name and store it into the
359 string 'outfile'. */
361 static void
362 next_file_name (void)
364 /* Index in suffix_alphabet of each character in the suffix. */
365 static idx_t *sufindex;
366 static idx_t outbase_length;
367 static idx_t outfile_length;
368 static idx_t addsuf_length;
370 if (! outfile)
372 bool overflow, widen;
374 new_name:
375 widen = !! outfile_length;
377 if (! widen)
379 /* Allocate and initialize the first file name. */
381 outbase_length = strlen (outbase);
382 addsuf_length = additional_suffix ? strlen (additional_suffix) : 0;
383 overflow = ckd_add (&outfile_length, outbase_length + addsuf_length,
384 suffix_length);
386 else
388 /* Reallocate and initialize a new wider file name.
389 We do this by subsuming the unchanging part of
390 the generated suffix into the prefix (base), and
391 reinitializing the now one longer suffix. */
393 overflow = ckd_add (&outfile_length, outfile_length, 2);
394 suffix_length++;
397 idx_t outfile_size;
398 overflow |= ckd_add (&outfile_size, outfile_length, 1);
399 if (overflow)
400 xalloc_die ();
401 outfile = xirealloc (outfile, outfile_size);
403 if (! widen)
404 memcpy (outfile, outbase, outbase_length);
405 else
407 /* Append the last alphabet character to the file name prefix. */
408 outfile[outbase_length] = suffix_alphabet[sufindex[0]];
409 outbase_length++;
412 outfile_mid = outfile + outbase_length;
413 memset (outfile_mid, suffix_alphabet[0], suffix_length);
414 if (additional_suffix)
415 memcpy (outfile_mid + suffix_length, additional_suffix, addsuf_length);
416 outfile[outfile_length] = 0;
418 free (sufindex);
419 sufindex = xicalloc (suffix_length, sizeof *sufindex);
421 if (numeric_suffix_start)
423 affirm (! widen);
425 /* Update the output file name. */
426 idx_t i = strlen (numeric_suffix_start);
427 memcpy (outfile_mid + suffix_length - i, numeric_suffix_start, i);
429 /* Update the suffix index. */
430 idx_t *sufindex_end = sufindex + suffix_length;
431 while (i-- != 0)
432 *--sufindex_end = numeric_suffix_start[i] - '0';
435 #if ! _POSIX_NO_TRUNC && HAVE_PATHCONF && defined _PC_NAME_MAX
436 /* POSIX requires that if the output file name is too long for
437 its directory, 'split' must fail without creating any files.
438 This must be checked for explicitly on operating systems that
439 silently truncate file names. */
441 char *dir = dir_name (outfile);
442 long name_max = pathconf (dir, _PC_NAME_MAX);
443 if (0 <= name_max && name_max < base_len (last_component (outfile)))
444 error (EXIT_FAILURE, ENAMETOOLONG, "%s", quotef (outfile));
445 free (dir);
447 #endif
449 else
451 /* Increment the suffix in place, if possible. */
453 idx_t i = suffix_length;
454 while (i-- != 0)
456 sufindex[i]++;
457 if (suffix_auto && i == 0 && ! suffix_alphabet[sufindex[0] + 1])
458 goto new_name;
459 outfile_mid[i] = suffix_alphabet[sufindex[i]];
460 if (outfile_mid[i])
461 return;
462 sufindex[i] = 0;
463 outfile_mid[i] = suffix_alphabet[sufindex[i]];
465 error (EXIT_FAILURE, 0, _("output file suffixes exhausted"));
469 /* Create or truncate a file. */
471 static int
472 create (char const *name)
474 if (!filter_command)
476 if (verbose)
477 fprintf (stdout, _("creating file %s\n"), quoteaf (name));
479 int oflags = O_WRONLY | O_CREAT | O_BINARY;
480 int fd = open (name, oflags | O_EXCL, MODE_RW_UGO);
481 if (0 <= fd || errno != EEXIST)
482 return fd;
483 fd = open (name, oflags, MODE_RW_UGO);
484 if (fd < 0)
485 return fd;
486 struct stat out_stat_buf;
487 if (fstat (fd, &out_stat_buf) != 0)
488 error (EXIT_FAILURE, errno, _("failed to stat %s"), quoteaf (name));
489 if (psame_inode (&in_stat_buf, &out_stat_buf))
490 error (EXIT_FAILURE, 0, _("%s would overwrite input; aborting"),
491 quoteaf (name));
492 if (ftruncate (fd, 0) < 0
493 && (S_ISREG (out_stat_buf.st_mode) || S_TYPEISSHM (&out_stat_buf)))
494 error (EXIT_FAILURE, errno, _("%s: error truncating"), quotef (name));
496 return fd;
498 else
500 int fd_pair[2];
501 pid_t child_pid;
502 char const *shell_prog = getenv ("SHELL");
503 if (shell_prog == nullptr)
504 shell_prog = "/bin/sh";
505 if (setenv ("FILE", name, 1) != 0)
506 error (EXIT_FAILURE, errno,
507 _("failed to set FILE environment variable"));
508 if (verbose)
509 fprintf (stdout, _("executing with FILE=%s\n"), quotef (name));
510 if (pipe (fd_pair) != 0)
511 error (EXIT_FAILURE, errno, _("failed to create pipe"));
512 child_pid = fork ();
513 if (child_pid == 0)
515 /* This is the child process. If an error occurs here, the
516 parent will eventually learn about it after doing a wait,
517 at which time it will emit its own error message. */
518 int j;
519 /* We have to close any pipes that were opened during an
520 earlier call, otherwise this process will be holding a
521 write-pipe that will prevent the earlier process from
522 reading an EOF on the corresponding read-pipe. */
523 for (j = 0; j < n_open_pipes; ++j)
524 if (close (open_pipes[j]) != 0)
525 error (EXIT_FAILURE, errno, _("closing prior pipe"));
526 if (close (fd_pair[1]))
527 error (EXIT_FAILURE, errno, _("closing output pipe"));
528 if (fd_pair[0] != STDIN_FILENO)
530 if (dup2 (fd_pair[0], STDIN_FILENO) != STDIN_FILENO)
531 error (EXIT_FAILURE, errno, _("moving input pipe"));
532 if (close (fd_pair[0]) != 0)
533 error (EXIT_FAILURE, errno, _("closing input pipe"));
535 if (default_SIGPIPE)
536 signal (SIGPIPE, SIG_DFL);
537 execl (shell_prog, last_component (shell_prog), "-c",
538 filter_command, (char *) nullptr);
539 error (EXIT_FAILURE, errno, _("failed to run command: \"%s -c %s\""),
540 shell_prog, filter_command);
542 if (child_pid < 0)
543 error (EXIT_FAILURE, errno, _("fork system call failed"));
544 if (close (fd_pair[0]) != 0)
545 error (EXIT_FAILURE, errno, _("failed to close input pipe"));
546 filter_pid = child_pid;
547 if (n_open_pipes == open_pipes_alloc)
548 open_pipes = xpalloc (open_pipes, &open_pipes_alloc, 1,
549 MIN (INT_MAX, IDX_MAX), sizeof *open_pipes);
550 open_pipes[n_open_pipes++] = fd_pair[1];
551 return fd_pair[1];
555 /* Close the output file, and do any associated cleanup.
556 If FP and FD are both specified, they refer to the same open file;
557 in this case FP is closed, but FD is still used in cleanup. */
558 static void
559 closeout (FILE *fp, int fd, pid_t pid, char const *name)
561 if (fp != nullptr && fclose (fp) != 0 && ! ignorable (errno))
562 error (EXIT_FAILURE, errno, "%s", quotef (name));
563 if (fd >= 0)
565 if (fp == nullptr && close (fd) < 0)
566 error (EXIT_FAILURE, errno, "%s", quotef (name));
567 int j;
568 for (j = 0; j < n_open_pipes; ++j)
570 if (open_pipes[j] == fd)
572 open_pipes[j] = open_pipes[--n_open_pipes];
573 break;
577 if (pid > 0)
579 int wstatus;
580 if (waitpid (pid, &wstatus, 0) < 0)
581 error (EXIT_FAILURE, errno, _("waiting for child process"));
582 else if (WIFSIGNALED (wstatus))
584 int sig = WTERMSIG (wstatus);
585 if (sig != SIGPIPE)
587 char signame[MAX (SIG2STR_MAX, INT_BUFSIZE_BOUND (int))];
588 if (sig2str (sig, signame) != 0)
589 sprintf (signame, "%d", sig);
590 error (sig + 128, 0,
591 _("with FILE=%s, signal %s from command: %s"),
592 quotef (name), signame, filter_command);
595 else if (WIFEXITED (wstatus))
597 int ex = WEXITSTATUS (wstatus);
598 if (ex != 0)
599 error (ex, 0, _("with FILE=%s, exit %d from command: %s"),
600 quotef (name), ex, filter_command);
602 else
604 /* shouldn't happen. */
605 error (EXIT_FAILURE, 0,
606 _("unknown status from command (0x%X)"), wstatus + 0u);
611 /* Write BYTES bytes at BP to an output file.
612 If NEW_FILE_FLAG is true, open the next output file.
613 Otherwise add to the same output file already in use.
614 Return true if successful. */
616 static bool
617 cwrite (bool new_file_flag, char const *bp, idx_t bytes)
619 if (new_file_flag)
621 if (!bp && bytes == 0 && elide_empty_files)
622 return true;
623 closeout (nullptr, output_desc, filter_pid, outfile);
624 next_file_name ();
625 output_desc = create (outfile);
626 if (output_desc < 0)
627 error (EXIT_FAILURE, errno, "%s", quotef (outfile));
630 if (full_write (output_desc, bp, bytes) == bytes)
631 return true;
632 else
634 if (! ignorable (errno))
635 error (EXIT_FAILURE, errno, "%s", quotef (outfile));
636 return false;
640 /* Split into pieces of exactly N_BYTES bytes.
641 However, the first REM_BYTES pieces should be 1 byte longer.
642 Use buffer BUF, whose size is BUFSIZE.
643 If INITIAL_READ is nonnegative,
644 BUF contains the first INITIAL_READ input bytes. */
646 static void
647 bytes_split (intmax_t n_bytes, intmax_t rem_bytes,
648 char *buf, idx_t bufsize, ssize_t initial_read,
649 intmax_t max_files)
651 bool new_file_flag = true;
652 bool filter_ok = true;
653 intmax_t opened = 0;
654 intmax_t to_write = n_bytes + (0 < rem_bytes);
655 bool eof = ! to_write;
657 while (! eof)
659 ssize_t n_read;
660 if (0 <= initial_read)
662 n_read = initial_read;
663 initial_read = -1;
664 eof = n_read < bufsize;
666 else
668 if (! filter_ok
669 && 0 <= lseek (STDIN_FILENO, to_write, SEEK_CUR))
671 to_write = n_bytes + (opened + 1 < rem_bytes);
672 new_file_flag = true;
675 n_read = read (STDIN_FILENO, buf, bufsize);
676 if (n_read < 0)
677 error (EXIT_FAILURE, errno, "%s", quotef (infile));
678 eof = n_read == 0;
680 char *bp_out = buf;
681 while (0 < to_write && to_write <= n_read)
683 if (filter_ok || new_file_flag)
684 filter_ok = cwrite (new_file_flag, bp_out, to_write);
685 opened += new_file_flag;
686 new_file_flag = !max_files || (opened < max_files);
687 if (! filter_ok && ! new_file_flag)
689 /* If filters no longer accepting input, stop reading. */
690 n_read = 0;
691 eof = true;
692 break;
694 bp_out += to_write;
695 n_read -= to_write;
696 to_write = n_bytes + (opened < rem_bytes);
698 if (0 < n_read)
700 if (filter_ok || new_file_flag)
701 filter_ok = cwrite (new_file_flag, bp_out, n_read);
702 opened += new_file_flag;
703 new_file_flag = false;
704 if (! filter_ok && opened == max_files)
706 /* If filters no longer accepting input, stop reading. */
707 break;
709 to_write -= n_read;
713 /* Ensure NUMBER files are created, which truncates
714 any existing files or notifies any consumers on fifos.
715 FIXME: Should we do this before EXIT_FAILURE? */
716 while (opened++ < max_files)
717 cwrite (true, nullptr, 0);
720 /* Split into pieces of exactly N_LINES lines.
721 Use buffer BUF, whose size is BUFSIZE. */
723 static void
724 lines_split (intmax_t n_lines, char *buf, idx_t bufsize)
726 ssize_t n_read;
727 char *bp, *bp_out, *eob;
728 bool new_file_flag = true;
729 intmax_t n = 0;
733 n_read = read (STDIN_FILENO, buf, bufsize);
734 if (n_read < 0)
735 error (EXIT_FAILURE, errno, "%s", quotef (infile));
736 bp = bp_out = buf;
737 eob = bp + n_read;
738 *eob = eolchar;
739 while (true)
741 bp = rawmemchr (bp, eolchar);
742 if (bp == eob)
744 if (eob != bp_out) /* do not write 0 bytes! */
746 idx_t len = eob - bp_out;
747 cwrite (new_file_flag, bp_out, len);
748 new_file_flag = false;
750 break;
753 ++bp;
754 if (++n >= n_lines)
756 cwrite (new_file_flag, bp_out, bp - bp_out);
757 bp_out = bp;
758 new_file_flag = true;
759 n = 0;
763 while (n_read);
766 /* Split into pieces that are as large as possible while still not more
767 than N_BYTES bytes, and are split on line boundaries except
768 where lines longer than N_BYTES bytes occur. */
770 static void
771 line_bytes_split (intmax_t n_bytes, char *buf, idx_t bufsize)
773 ssize_t n_read;
774 intmax_t n_out = 0; /* for each split. */
775 idx_t n_hold = 0;
776 char *hold = nullptr; /* for lines > bufsize. */
777 idx_t hold_size = 0;
778 bool split_line = false; /* Whether a \n was output in a split. */
782 n_read = read (STDIN_FILENO, buf, bufsize);
783 if (n_read < 0)
784 error (EXIT_FAILURE, errno, "%s", quotef (infile));
785 idx_t n_left = n_read;
786 char *sob = buf;
787 while (n_left)
789 idx_t split_rest = 0;
790 char *eoc = nullptr;
791 char *eol;
793 /* Determine End Of Chunk and/or End of Line,
794 which are used below to select what to write or buffer. */
795 if (n_bytes - n_out - n_hold <= n_left)
797 /* Have enough for split. */
798 split_rest = n_bytes - n_out - n_hold;
799 eoc = sob + split_rest - 1;
800 eol = memrchr (sob, eolchar, split_rest);
802 else
803 eol = memrchr (sob, eolchar, n_left);
805 /* Output hold space if possible. */
806 if (n_hold && !(!eol && n_out))
808 cwrite (n_out == 0, hold, n_hold);
809 n_out += n_hold;
810 n_hold = 0;
813 /* Output to eol if present. */
814 if (eol)
816 split_line = true;
817 idx_t n_write = eol - sob + 1;
818 cwrite (n_out == 0, sob, n_write);
819 n_out += n_write;
820 n_left -= n_write;
821 sob += n_write;
822 if (eoc)
823 split_rest -= n_write;
826 /* Output to eoc or eob if possible. */
827 if (n_left && !split_line)
829 idx_t n_write = eoc ? split_rest : n_left;
830 cwrite (n_out == 0, sob, n_write);
831 n_out += n_write;
832 n_left -= n_write;
833 sob += n_write;
834 if (eoc)
835 split_rest -= n_write;
838 /* Update hold if needed. */
839 if ((eoc && split_rest) || (!eoc && n_left))
841 idx_t n_buf = eoc ? split_rest : n_left;
842 if (hold_size - n_hold < n_buf)
843 hold = xpalloc (hold, &hold_size, n_buf - (hold_size - n_hold),
844 -1, sizeof *hold);
845 memcpy (hold + n_hold, sob, n_buf);
846 n_hold += n_buf;
847 n_left -= n_buf;
848 sob += n_buf;
851 /* Reset for new split. */
852 if (eoc)
854 n_out = 0;
855 split_line = false;
859 while (n_read);
861 /* Handle no eol at end of file. */
862 if (n_hold)
863 cwrite (n_out == 0, hold, n_hold);
865 free (hold);
868 /* -n l/[K/]N: Write lines to files of approximately file size / N.
869 The file is partitioned into file size / N sized portions, with the
870 last assigned any excess. If a line _starts_ within a partition
871 it is written completely to the corresponding file. Since lines
872 are not split even if they overlap a partition, the files written
873 can be larger or smaller than the partition size, and even empty
874 if a line is so long as to completely overlap the partition. */
876 static void
877 lines_chunk_split (intmax_t k, intmax_t n, char *buf, idx_t bufsize,
878 ssize_t initial_read, off_t file_size)
880 affirm (n && k <= n);
882 intmax_t rem_bytes = file_size % n;
883 off_t chunk_size = file_size / n;
884 intmax_t chunk_no = 1;
885 off_t chunk_end = chunk_size + (0 < rem_bytes);
886 off_t n_written = 0;
887 bool new_file_flag = true;
888 bool chunk_truncated = false;
890 if (k > 1 && 0 < file_size)
892 /* Start reading 1 byte before kth chunk of file. */
893 off_t start = (k - 1) * chunk_size + MIN (k - 1, rem_bytes) - 1;
894 if (start < initial_read)
896 memmove (buf, buf + start, initial_read - start);
897 initial_read -= start;
899 else
901 if (initial_read < start
902 && lseek (STDIN_FILENO, start - initial_read, SEEK_CUR) < 0)
903 error (EXIT_FAILURE, errno, "%s", quotef (infile));
904 initial_read = -1;
906 n_written = start;
907 chunk_no = k - 1;
908 chunk_end = start + 1;
911 while (n_written < file_size)
913 char *bp = buf, *eob;
914 ssize_t n_read;
915 if (0 <= initial_read)
917 n_read = initial_read;
918 initial_read = -1;
920 else
922 n_read = read (STDIN_FILENO, buf,
923 MIN (bufsize, file_size - n_written));
924 if (n_read < 0)
925 error (EXIT_FAILURE, errno, "%s", quotef (infile));
927 if (n_read == 0)
928 break; /* eof. */
929 chunk_truncated = false;
930 eob = buf + n_read;
932 while (bp != eob)
934 idx_t to_write;
935 bool next = false;
937 /* Begin looking for '\n' at last byte of chunk. */
938 off_t skip = MIN (n_read, MAX (0, chunk_end - 1 - n_written));
939 char *bp_out = memchr (bp + skip, eolchar, n_read - skip);
940 if (bp_out)
942 bp_out++;
943 next = true;
945 else
946 bp_out = eob;
947 to_write = bp_out - bp;
949 if (k == chunk_no)
951 /* We don't use the stdout buffer here since we're writing
952 large chunks from an existing file, so it's more efficient
953 to write out directly. */
954 if (full_write (STDOUT_FILENO, bp, to_write) != to_write)
955 write_error ();
957 else if (! k)
958 cwrite (new_file_flag, bp, to_write);
959 n_written += to_write;
960 bp += to_write;
961 n_read -= to_write;
962 new_file_flag = next;
964 /* A line could have been so long that it skipped
965 entire chunks. So create empty files in that case. */
966 while (next || chunk_end <= n_written)
968 if (!next && bp == eob)
970 /* replenish buf, before going to next chunk. */
971 chunk_truncated = true;
972 break;
974 if (k == chunk_no)
975 return;
976 chunk_end += chunk_size + (chunk_no < rem_bytes);
977 chunk_no++;
978 if (chunk_end <= n_written)
980 if (! k)
981 cwrite (true, nullptr, 0);
983 else
984 next = false;
989 if (chunk_truncated)
990 chunk_no++;
992 /* Ensure NUMBER files are created, which truncates
993 any existing files or notifies any consumers on fifos.
994 FIXME: Should we do this before EXIT_FAILURE? */
995 if (!k)
996 while (chunk_no++ <= n)
997 cwrite (true, nullptr, 0);
1000 /* -n K/N: Extract Kth of N chunks. */
1002 static void
1003 bytes_chunk_extract (intmax_t k, intmax_t n, char *buf, idx_t bufsize,
1004 ssize_t initial_read, off_t file_size)
1006 off_t start;
1007 off_t end;
1009 assert (0 < k && k <= n);
1011 start = (k - 1) * (file_size / n) + MIN (k - 1, file_size % n);
1012 end = k == n ? file_size : k * (file_size / n) + MIN (k, file_size % n);
1014 if (start < initial_read)
1016 memmove (buf, buf + start, initial_read - start);
1017 initial_read -= start;
1019 else
1021 if (initial_read < start
1022 && lseek (STDIN_FILENO, start - initial_read, SEEK_CUR) < 0)
1023 error (EXIT_FAILURE, errno, "%s", quotef (infile));
1024 initial_read = -1;
1027 while (start < end)
1029 ssize_t n_read;
1030 if (0 <= initial_read)
1032 n_read = initial_read;
1033 initial_read = -1;
1035 else
1037 n_read = read (STDIN_FILENO, buf, bufsize);
1038 if (n_read < 0)
1039 error (EXIT_FAILURE, errno, "%s", quotef (infile));
1041 if (n_read == 0)
1042 break; /* eof. */
1043 n_read = MIN (n_read, end - start);
1044 if (full_write (STDOUT_FILENO, buf, n_read) != n_read
1045 && ! ignorable (errno))
1046 error (EXIT_FAILURE, errno, "%s", quotef ("-"));
1047 start += n_read;
1051 typedef struct of_info
1053 char *of_name;
1054 int ofd;
1055 FILE *ofile;
1056 pid_t opid;
1057 } of_t;
1059 enum
1061 OFD_NEW = -1,
1062 OFD_APPEND = -2
1065 /* Rotate file descriptors when we're writing to more output files than we
1066 have available file descriptors.
1067 Return whether we came under file resource pressure.
1068 If so, it's probably best to close each file when finished with it. */
1070 static bool
1071 ofile_open (of_t *files, idx_t i_check, idx_t nfiles)
1073 bool file_limit = false;
1075 if (files[i_check].ofd <= OFD_NEW)
1077 int fd;
1078 idx_t i_reopen = i_check ? i_check - 1 : nfiles - 1;
1080 /* Another process could have opened a file in between the calls to
1081 close and open, so we should keep trying until open succeeds or
1082 we've closed all of our files. */
1083 while (true)
1085 if (files[i_check].ofd == OFD_NEW)
1086 fd = create (files[i_check].of_name);
1087 else /* OFD_APPEND */
1089 /* Attempt to append to previously opened file.
1090 We use O_NONBLOCK to support writing to fifos,
1091 where the other end has closed because of our
1092 previous close. In that case we'll immediately
1093 get an error, rather than waiting indefinitely.
1094 In specialized cases the consumer can keep reading
1095 from the fifo, terminating on conditions in the data
1096 itself, or perhaps never in the case of 'tail -f'.
1097 I.e., for fifos it is valid to attempt this reopen.
1099 We don't handle the filter_command case here, as create()
1100 will exit if there are not enough files in that case.
1101 I.e., we don't support restarting filters, as that would
1102 put too much burden on users specifying --filter commands. */
1103 fd = open (files[i_check].of_name,
1104 O_WRONLY | O_BINARY | O_APPEND | O_NONBLOCK);
1107 if (0 <= fd)
1108 break;
1110 if (!(errno == EMFILE || errno == ENFILE))
1111 error (EXIT_FAILURE, errno, "%s", quotef (files[i_check].of_name));
1113 file_limit = true;
1115 /* Search backwards for an open file to close. */
1116 while (files[i_reopen].ofd < 0)
1118 i_reopen = i_reopen ? i_reopen - 1 : nfiles - 1;
1119 /* No more open files to close, exit with E[NM]FILE. */
1120 if (i_reopen == i_check)
1121 error (EXIT_FAILURE, errno, "%s",
1122 quotef (files[i_check].of_name));
1125 if (fclose (files[i_reopen].ofile) != 0)
1126 error (EXIT_FAILURE, errno, "%s", quotef (files[i_reopen].of_name));
1127 files[i_reopen].ofile = nullptr;
1128 files[i_reopen].ofd = OFD_APPEND;
1131 files[i_check].ofd = fd;
1132 FILE *ofile = fdopen (fd, "a");
1133 if (!ofile)
1134 error (EXIT_FAILURE, errno, "%s", quotef (files[i_check].of_name));
1135 files[i_check].ofile = ofile;
1136 files[i_check].opid = filter_pid;
1137 filter_pid = 0;
1140 return file_limit;
1143 /* -n r/[K/]N: Divide file into N chunks in round robin fashion.
1144 Use BUF of size BUFSIZE for the buffer, and if allocating storage
1145 put its address into *FILESP to pacify -fsanitize=leak.
1146 When K == 0, we try to keep the files open in parallel.
1147 If we run out of file resources, then we revert
1148 to opening and closing each file for each line. */
1150 static void
1151 lines_rr (intmax_t k, intmax_t n, char *buf, idx_t bufsize, of_t **filesp)
1153 bool wrapped = false;
1154 bool wrote = false;
1155 bool file_limit;
1156 idx_t i_file;
1157 of_t *files IF_LINT (= nullptr);
1158 intmax_t line_no;
1160 if (k)
1161 line_no = 1;
1162 else
1164 if (IDX_MAX < n)
1165 xalloc_die ();
1166 files = *filesp = xinmalloc (n, sizeof *files);
1168 /* Generate output file names. */
1169 for (i_file = 0; i_file < n; i_file++)
1171 next_file_name ();
1172 files[i_file].of_name = xstrdup (outfile);
1173 files[i_file].ofd = OFD_NEW;
1174 files[i_file].ofile = nullptr;
1175 files[i_file].opid = 0;
1177 i_file = 0;
1178 file_limit = false;
1181 while (true)
1183 char *bp = buf, *eob;
1184 ssize_t n_read = read (STDIN_FILENO, buf, bufsize);
1185 if (n_read < 0)
1186 error (EXIT_FAILURE, errno, "%s", quotef (infile));
1187 else if (n_read == 0)
1188 break; /* eof. */
1189 eob = buf + n_read;
1191 while (bp != eob)
1193 idx_t to_write;
1194 bool next = false;
1196 /* Find end of line. */
1197 char *bp_out = memchr (bp, eolchar, eob - bp);
1198 if (bp_out)
1200 bp_out++;
1201 next = true;
1203 else
1204 bp_out = eob;
1205 to_write = bp_out - bp;
1207 if (k)
1209 if (line_no == k && unbuffered)
1211 if (full_write (STDOUT_FILENO, bp, to_write) != to_write)
1212 write_error ();
1214 else if (line_no == k && fwrite (bp, to_write, 1, stdout) != 1)
1216 write_error ();
1218 if (next)
1219 line_no = (line_no == n) ? 1 : line_no + 1;
1221 else
1223 /* Secure file descriptor. */
1224 file_limit |= ofile_open (files, i_file, n);
1225 if (unbuffered)
1227 /* Note writing to fd, rather than flushing the FILE gives
1228 an 8% performance benefit, due to reduced data copying. */
1229 if (full_write (files[i_file].ofd, bp, to_write) != to_write
1230 && ! ignorable (errno))
1231 error (EXIT_FAILURE, errno, "%s",
1232 quotef (files[i_file].of_name));
1234 else if (fwrite (bp, to_write, 1, files[i_file].ofile) != 1
1235 && ! ignorable (errno))
1236 error (EXIT_FAILURE, errno, "%s",
1237 quotef (files[i_file].of_name));
1239 if (! ignorable (errno))
1240 wrote = true;
1242 if (file_limit)
1244 if (fclose (files[i_file].ofile) != 0)
1245 error (EXIT_FAILURE, errno, "%s",
1246 quotef (files[i_file].of_name));
1247 files[i_file].ofile = nullptr;
1248 files[i_file].ofd = OFD_APPEND;
1250 if (next && ++i_file == n)
1252 wrapped = true;
1253 /* If no filters are accepting input, stop reading. */
1254 if (! wrote)
1255 goto no_filters;
1256 wrote = false;
1257 i_file = 0;
1261 bp = bp_out;
1265 no_filters:
1266 /* Ensure all files created, so that any existing files are truncated,
1267 and to signal any waiting fifo consumers.
1268 Also, close any open file descriptors.
1269 FIXME: Should we do this before EXIT_FAILURE? */
1270 if (!k)
1272 idx_t ceiling = wrapped ? n : i_file;
1273 for (i_file = 0; i_file < n; i_file++)
1275 if (i_file >= ceiling && !elide_empty_files)
1276 file_limit |= ofile_open (files, i_file, n);
1277 if (files[i_file].ofd >= 0)
1278 closeout (files[i_file].ofile, files[i_file].ofd,
1279 files[i_file].opid, files[i_file].of_name);
1280 files[i_file].ofd = OFD_APPEND;
1285 #define FAIL_ONLY_ONE_WAY() \
1286 do \
1288 error (0, 0, _("cannot split in more than one way")); \
1289 usage (EXIT_FAILURE); \
1291 while (0)
1293 /* Report a string-to-integer conversion failure MSGID with ARG. */
1295 static _Noreturn void
1296 strtoint_die (char const *msgid, char const *arg)
1298 error (EXIT_FAILURE, errno == EINVAL ? 0 : errno, "%s: %s",
1299 gettext (msgid), quote (arg));
1302 /* Use OVERFLOW_OK when it is OK to ignore LONGINT_OVERFLOW errors, since the
1303 extreme value will do the right thing anyway on any practical platform. */
1304 #define OVERFLOW_OK LONGINT_OVERFLOW
1306 /* Parse ARG for number of bytes or lines. The number can be followed
1307 by MULTIPLIERS, and the resulting value must be positive.
1308 If the number cannot be parsed, diagnose with MSG.
1309 Return the number parsed, or an INTMAX_MAX on overflow. */
1311 static intmax_t
1312 parse_n_units (char const *arg, char const *multipliers, char const *msgid)
1314 intmax_t n;
1315 if (OVERFLOW_OK < xstrtoimax (arg, nullptr, 10, &n, multipliers) || n < 1)
1316 strtoint_die (msgid, arg);
1317 return n;
1320 /* Parse K/N syntax of chunk options. */
1322 static void
1323 parse_chunk (intmax_t *k_units, intmax_t *n_units, char const *arg)
1325 char *argend;
1326 strtol_error e = xstrtoimax (arg, &argend, 10, n_units, "");
1327 if (e == LONGINT_INVALID_SUFFIX_CHAR && *argend == '/')
1329 *k_units = *n_units;
1330 *n_units = parse_n_units (argend + 1, "",
1331 N_("invalid number of chunks"));
1332 if (! (0 < *k_units && *k_units <= *n_units))
1333 error (EXIT_FAILURE, 0, "%s: %s", _("invalid chunk number"),
1334 quote_mem (arg, argend - arg));
1336 else if (! (e <= OVERFLOW_OK && 0 < *n_units))
1337 strtoint_die (N_("invalid number of chunks"), arg);
1342 main (int argc, char **argv)
1344 enum Split_type split_type = type_undef;
1345 idx_t in_blk_size = 0; /* optimal block size of input file device */
1346 idx_t page_size = getpagesize ();
1347 intmax_t k_units = 0;
1348 intmax_t n_units = 0;
1350 static char const multipliers[] = "bEGKkMmPQRTYZ0";
1351 int c;
1352 int digits_optind = 0;
1353 off_t file_size = OFF_T_MAX;
1355 initialize_main (&argc, &argv);
1356 set_program_name (argv[0]);
1357 setlocale (LC_ALL, "");
1358 bindtextdomain (PACKAGE, LOCALEDIR);
1359 textdomain (PACKAGE);
1361 atexit (close_stdout);
1363 /* Parse command line options. */
1365 infile = "-";
1366 outbase = "x";
1368 while (true)
1370 /* This is the argv-index of the option we will read next. */
1371 int this_optind = optind ? optind : 1;
1373 c = getopt_long (argc, argv, "0123456789C:a:b:del:n:t:ux",
1374 longopts, nullptr);
1375 if (c == -1)
1376 break;
1378 switch (c)
1380 case 'a':
1381 suffix_length = xdectoimax (optarg, 0, IDX_MAX,
1382 "", _("invalid suffix length"), 0);
1383 break;
1385 case ADDITIONAL_SUFFIX_OPTION:
1387 int suffix_len = strlen (optarg);
1388 if (last_component (optarg) != optarg
1389 || (suffix_len && ISSLASH (optarg[suffix_len - 1])))
1391 error (0, 0,
1392 _("invalid suffix %s, contains directory separator"),
1393 quote (optarg));
1394 usage (EXIT_FAILURE);
1397 additional_suffix = optarg;
1398 break;
1400 case 'b':
1401 if (split_type != type_undef)
1402 FAIL_ONLY_ONE_WAY ();
1403 split_type = type_bytes;
1404 n_units = parse_n_units (optarg, multipliers,
1405 N_("invalid number of bytes"));
1406 break;
1408 case 'l':
1409 if (split_type != type_undef)
1410 FAIL_ONLY_ONE_WAY ();
1411 split_type = type_lines;
1412 n_units = parse_n_units (optarg, "", N_("invalid number of lines"));
1413 break;
1415 case 'C':
1416 if (split_type != type_undef)
1417 FAIL_ONLY_ONE_WAY ();
1418 split_type = type_byteslines;
1419 n_units = parse_n_units (optarg, multipliers,
1420 N_("invalid number of lines"));
1421 break;
1423 case 'n':
1424 if (split_type != type_undef)
1425 FAIL_ONLY_ONE_WAY ();
1426 /* skip any whitespace */
1427 while (isspace (to_uchar (*optarg)))
1428 optarg++;
1429 if (STRNCMP_LIT (optarg, "r/") == 0)
1431 split_type = type_rr;
1432 optarg += 2;
1434 else if (STRNCMP_LIT (optarg, "l/") == 0)
1436 split_type = type_chunk_lines;
1437 optarg += 2;
1439 else
1440 split_type = type_chunk_bytes;
1441 parse_chunk (&k_units, &n_units, optarg);
1442 break;
1444 case 'u':
1445 unbuffered = true;
1446 break;
1448 case 't':
1450 char neweol = optarg[0];
1451 if (! neweol)
1452 error (EXIT_FAILURE, 0, _("empty record separator"));
1453 if (optarg[1])
1455 if (STREQ (optarg, "\\0"))
1456 neweol = '\0';
1457 else
1459 /* Provoke with 'split -txx'. Complain about
1460 "multi-character tab" instead of "multibyte tab", so
1461 that the diagnostic's wording does not need to be
1462 changed once multibyte characters are supported. */
1463 error (EXIT_FAILURE, 0, _("multi-character separator %s"),
1464 quote (optarg));
1467 /* Make it explicit we don't support multiple separators. */
1468 if (0 <= eolchar && neweol != eolchar)
1470 error (EXIT_FAILURE, 0,
1471 _("multiple separator characters specified"));
1474 eolchar = neweol;
1476 break;
1478 case '0':
1479 case '1':
1480 case '2':
1481 case '3':
1482 case '4':
1483 case '5':
1484 case '6':
1485 case '7':
1486 case '8':
1487 case '9':
1488 if (split_type == type_undef)
1490 split_type = type_digits;
1491 n_units = 0;
1493 if (split_type != type_undef && split_type != type_digits)
1494 FAIL_ONLY_ONE_WAY ();
1495 if (digits_optind != 0 && digits_optind != this_optind)
1496 n_units = 0; /* More than one number given; ignore other. */
1497 digits_optind = this_optind;
1498 if (ckd_mul (&n_units, n_units, 10)
1499 || ckd_add (&n_units, n_units, c - '0'))
1500 n_units = INTMAX_MAX;
1501 break;
1503 case 'd':
1504 case 'x':
1505 if (c == 'd')
1506 suffix_alphabet = "0123456789";
1507 else
1508 suffix_alphabet = "0123456789abcdef";
1509 if (optarg)
1511 if (strlen (optarg) != strspn (optarg, suffix_alphabet))
1513 error (0, 0,
1514 (c == 'd') ?
1515 _("%s: invalid start value for numerical suffix") :
1516 _("%s: invalid start value for hexadecimal suffix"),
1517 quote (optarg));
1518 usage (EXIT_FAILURE);
1520 else
1522 /* Skip any leading zero. */
1523 while (*optarg == '0' && *(optarg + 1) != '\0')
1524 optarg++;
1525 numeric_suffix_start = optarg;
1528 break;
1530 case 'e':
1531 elide_empty_files = true;
1532 break;
1534 case FILTER_OPTION:
1535 filter_command = optarg;
1536 break;
1538 case IO_BLKSIZE_OPTION:
1539 in_blk_size = xdectoumax (optarg, 1,
1540 MIN (SYS_BUFSIZE_MAX,
1541 MIN (IDX_MAX, SIZE_MAX) - 1),
1542 multipliers, _("invalid IO block size"), 0);
1543 break;
1545 case VERBOSE_OPTION:
1546 verbose = true;
1547 break;
1549 case_GETOPT_HELP_CHAR;
1551 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1553 default:
1554 usage (EXIT_FAILURE);
1558 if (k_units != 0 && filter_command)
1560 error (0, 0, _("--filter does not process a chunk extracted to stdout"));
1561 usage (EXIT_FAILURE);
1564 /* Handle default case. */
1565 if (split_type == type_undef)
1567 split_type = type_lines;
1568 n_units = 1000;
1571 if (n_units == 0)
1573 error (0, 0, _("invalid number of lines: %s"), quote ("0"));
1574 usage (EXIT_FAILURE);
1577 if (eolchar < 0)
1578 eolchar = '\n';
1580 set_suffix_length (n_units, split_type);
1582 /* Get out the filename arguments. */
1584 if (optind < argc)
1585 infile = argv[optind++];
1587 if (optind < argc)
1588 outbase = argv[optind++];
1590 if (optind < argc)
1592 error (0, 0, _("extra operand %s"), quote (argv[optind]));
1593 usage (EXIT_FAILURE);
1596 /* Check that the suffix length is large enough for the numerical
1597 suffix start value. */
1598 if (numeric_suffix_start && strlen (numeric_suffix_start) > suffix_length)
1600 error (0, 0, _("numerical suffix start value is too large "
1601 "for the suffix length"));
1602 usage (EXIT_FAILURE);
1605 /* Open the input file. */
1606 if (! STREQ (infile, "-")
1607 && fd_reopen (STDIN_FILENO, infile, O_RDONLY, 0) < 0)
1608 error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
1609 quoteaf (infile));
1611 /* Binary I/O is safer when byte counts are used. */
1612 xset_binary_mode (STDIN_FILENO, O_BINARY);
1614 /* Advise the kernel of our access pattern. */
1615 fdadvise (STDIN_FILENO, 0, 0, FADVISE_SEQUENTIAL);
1617 /* Get the optimal block size of input device and make a buffer. */
1619 if (fstat (STDIN_FILENO, &in_stat_buf) != 0)
1620 error (EXIT_FAILURE, errno, "%s", quotef (infile));
1622 if (in_blk_size == 0)
1624 in_blk_size = io_blksize (&in_stat_buf);
1625 if (SYS_BUFSIZE_MAX < in_blk_size)
1626 in_blk_size = SYS_BUFSIZE_MAX;
1629 char *buf = xalignalloc (page_size, in_blk_size + 1);
1630 ssize_t initial_read = -1;
1632 if (split_type == type_chunk_bytes || split_type == type_chunk_lines)
1634 file_size = input_file_size (STDIN_FILENO, &in_stat_buf,
1635 buf, in_blk_size);
1636 if (file_size < 0)
1637 error (EXIT_FAILURE, errno, _("%s: cannot determine file size"),
1638 quotef (infile));
1639 initial_read = MIN (file_size, in_blk_size);
1642 /* When filtering, closure of one pipe must not terminate the process,
1643 as there may still be other streams expecting input from us. */
1644 if (filter_command)
1645 default_SIGPIPE = signal (SIGPIPE, SIG_IGN) == SIG_DFL;
1647 switch (split_type)
1649 case type_digits:
1650 case type_lines:
1651 lines_split (n_units, buf, in_blk_size);
1652 break;
1654 case type_bytes:
1655 bytes_split (n_units, 0, buf, in_blk_size, -1, 0);
1656 break;
1658 case type_byteslines:
1659 line_bytes_split (n_units, buf, in_blk_size);
1660 break;
1662 case type_chunk_bytes:
1663 if (k_units == 0)
1664 bytes_split (file_size / n_units, file_size % n_units,
1665 buf, in_blk_size, initial_read, n_units);
1666 else
1667 bytes_chunk_extract (k_units, n_units, buf, in_blk_size, initial_read,
1668 file_size);
1669 break;
1671 case type_chunk_lines:
1672 lines_chunk_split (k_units, n_units, buf, in_blk_size, initial_read,
1673 file_size);
1674 break;
1676 case type_rr:
1677 /* Note, this is like 'sed -n ${k}~${n}p' when k > 0,
1678 but the functionality is provided for symmetry. */
1680 of_t *files;
1681 lines_rr (k_units, n_units, buf, in_blk_size, &files);
1683 break;
1685 default:
1686 affirm (false);
1689 if (close (STDIN_FILENO) != 0)
1690 error (EXIT_FAILURE, errno, "%s", quotef (infile));
1691 closeout (nullptr, output_desc, filter_pid, outfile);
1693 main_exit (EXIT_SUCCESS);