shred: increase I/O block size for periodic pattern case
[coreutils.git] / src / split.c
blobf740652ddbed27acf97be185231f71b4e8856003
1 /* split.c -- split a file into pieces.
2 Copyright (C) 1988-2013 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* By tege@sics.se, with rms.
19 To do:
20 * Implement -t CHAR or -t REGEX to specify break characters other
21 than newline. */
23 #include <config.h>
25 #include <assert.h>
26 #include <stdio.h>
27 #include <getopt.h>
28 #include <signal.h>
29 #include <sys/types.h>
30 #include <sys/wait.h>
32 #include "system.h"
33 #include "error.h"
34 #include "fd-reopen.h"
35 #include "fcntl--.h"
36 #include "full-read.h"
37 #include "full-write.h"
38 #include "ioblksize.h"
39 #include "quote.h"
40 #include "safe-read.h"
41 #include "sig2str.h"
42 #include "xfreopen.h"
43 #include "xstrtol.h"
45 /* The official name of this program (e.g., no 'g' prefix). */
46 #define PROGRAM_NAME "split"
48 #define AUTHORS \
49 proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
50 proper_name ("Richard M. Stallman")
52 /* Shell command to filter through, instead of creating files. */
53 static char const *filter_command;
55 /* Process ID of the filter. */
56 static int filter_pid;
58 /* Array of open pipes. */
59 static int *open_pipes;
60 static size_t open_pipes_alloc;
61 static size_t n_open_pipes;
63 /* Blocked signals. */
64 static sigset_t oldblocked;
65 static sigset_t newblocked;
67 /* Base name of output files. */
68 static char const *outbase;
70 /* Name of output files. */
71 static char *outfile;
73 /* Pointer to the end of the prefix in OUTFILE.
74 Suffixes are inserted here. */
75 static char *outfile_mid;
77 /* Generate new suffix when suffixes are exhausted. */
78 static bool suffix_auto = true;
80 /* Length of OUTFILE's suffix. */
81 static size_t suffix_length;
83 /* Alphabet of characters to use in suffix. */
84 static char const *suffix_alphabet = "abcdefghijklmnopqrstuvwxyz";
86 /* Numerical suffix start value. */
87 static const char *numeric_suffix_start;
89 /* Additional suffix to append to output file names. */
90 static char const *additional_suffix;
92 /* Name of input file. May be "-". */
93 static char *infile;
95 /* stat buf for input file. */
96 static struct stat in_stat_buf;
98 /* Descriptor on which output file is open. */
99 static int output_desc = -1;
101 /* If true, print a diagnostic on standard error just before each
102 output file is opened. */
103 static bool verbose;
105 /* If true, don't generate zero length output files. */
106 static bool elide_empty_files;
108 /* If true, in round robin mode, immediately copy
109 input to output, which is much slower, so disabled by default. */
110 static bool unbuffered;
112 /* The split mode to use. */
113 enum Split_type
115 type_undef, type_bytes, type_byteslines, type_lines, type_digits,
116 type_chunk_bytes, type_chunk_lines, type_rr
119 /* For long options that have no equivalent short option, use a
120 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
121 enum
123 VERBOSE_OPTION = CHAR_MAX + 1,
124 FILTER_OPTION,
125 IO_BLKSIZE_OPTION,
126 ADDITIONAL_SUFFIX_OPTION
129 static struct option const longopts[] =
131 {"bytes", required_argument, NULL, 'b'},
132 {"lines", required_argument, NULL, 'l'},
133 {"line-bytes", required_argument, NULL, 'C'},
134 {"number", required_argument, NULL, 'n'},
135 {"elide-empty-files", no_argument, NULL, 'e'},
136 {"unbuffered", no_argument, NULL, 'u'},
137 {"suffix-length", required_argument, NULL, 'a'},
138 {"additional-suffix", required_argument, NULL,
139 ADDITIONAL_SUFFIX_OPTION},
140 {"numeric-suffixes", optional_argument, NULL, 'd'},
141 {"filter", required_argument, NULL, FILTER_OPTION},
142 {"verbose", no_argument, NULL, VERBOSE_OPTION},
143 {"-io-blksize", required_argument, NULL,
144 IO_BLKSIZE_OPTION}, /* do not document */
145 {GETOPT_HELP_OPTION_DECL},
146 {GETOPT_VERSION_OPTION_DECL},
147 {NULL, 0, NULL, 0}
150 /* Return true if the errno value, ERR, is ignorable. */
151 static inline bool
152 ignorable (int err)
154 return filter_command && err == EPIPE;
157 static void
158 set_suffix_length (uintmax_t n_units, enum Split_type split_type)
160 #define DEFAULT_SUFFIX_LENGTH 2
162 size_t suffix_needed = 0;
164 /* The suffix auto length feature is incompatible with
165 a user specified start value as the generated suffixes
166 are not all consecutive. */
167 if (numeric_suffix_start)
168 suffix_auto = false;
170 /* Auto-calculate the suffix length if the number of files is given. */
171 if (split_type == type_chunk_bytes || split_type == type_chunk_lines
172 || split_type == type_rr)
174 size_t alphabet_len = strlen (suffix_alphabet);
175 bool alphabet_slop = (n_units % alphabet_len) != 0;
176 while (n_units /= alphabet_len)
177 suffix_needed++;
178 suffix_needed += alphabet_slop;
179 suffix_auto = false;
182 if (suffix_length) /* set by user */
184 if (suffix_length < suffix_needed)
186 error (EXIT_FAILURE, 0,
187 _("the suffix length needs to be at least %zu"),
188 suffix_needed);
190 suffix_auto = false;
191 return;
193 else
194 suffix_length = MAX (DEFAULT_SUFFIX_LENGTH, suffix_needed);
197 void
198 usage (int status)
200 if (status != EXIT_SUCCESS)
201 emit_try_help ();
202 else
204 printf (_("\
205 Usage: %s [OPTION]... [INPUT [PREFIX]]\n\
207 program_name);
208 fputs (_("\
209 Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default\n\
210 size is 1000 lines, and default PREFIX is 'x'. With no INPUT, or when INPUT\n\
211 is -, read standard input.\n\
212 "), stdout);
214 emit_mandatory_arg_note ();
216 fprintf (stdout, _("\
217 -a, --suffix-length=N generate suffixes of length N (default %d)\n\
218 --additional-suffix=SUFFIX append an additional SUFFIX to file names\n\
219 -b, --bytes=SIZE put SIZE bytes per output file\n\
220 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
221 -d, --numeric-suffixes[=FROM] use numeric suffixes instead of alphabetic;\n\
222 FROM changes the start value (default 0)\n\
223 -e, --elide-empty-files do not generate empty output files with '-n'\n\
224 --filter=COMMAND write to shell COMMAND; file name is $FILE\n\
225 -l, --lines=NUMBER put NUMBER lines per output file\n\
226 -n, --number=CHUNKS generate CHUNKS output files; see explanation below\n\
227 -u, --unbuffered immediately copy input to output with '-n r/...'\n\
228 "), DEFAULT_SUFFIX_LENGTH);
229 fputs (_("\
230 --verbose print a diagnostic just before each\n\
231 output file is opened\n\
232 "), stdout);
233 fputs (HELP_OPTION_DESCRIPTION, stdout);
234 fputs (VERSION_OPTION_DESCRIPTION, stdout);
235 emit_size_note ();
236 fputs (_("\n\
237 CHUNKS may be:\n\
238 N split into N files based on size of input\n\
239 K/N output Kth of N to stdout\n\
240 l/N split into N files without splitting lines\n\
241 l/K/N output Kth of N to stdout without splitting lines\n\
242 r/N like 'l' but use round robin distribution\n\
243 r/K/N likewise but only output Kth of N to stdout\n\
244 "), stdout);
245 emit_ancillary_info ();
247 exit (status);
250 /* Compute the next sequential output file name and store it into the
251 string 'outfile'. */
253 static void
254 next_file_name (void)
256 /* Index in suffix_alphabet of each character in the suffix. */
257 static size_t *sufindex;
258 static size_t outbase_length;
259 static size_t outfile_length;
260 static size_t addsuf_length;
262 if (! outfile)
264 bool widen;
266 new_name:
267 widen = !! outfile_length;
269 if (! widen)
271 /* Allocate and initialize the first file name. */
273 outbase_length = strlen (outbase);
274 addsuf_length = additional_suffix ? strlen (additional_suffix) : 0;
275 outfile_length = outbase_length + suffix_length + addsuf_length;
277 else
279 /* Reallocate and initialize a new wider file name.
280 We do this by subsuming the unchanging part of
281 the generated suffix into the prefix (base), and
282 reinitializing the now one longer suffix. */
284 outfile_length += 2;
285 suffix_length++;
288 if (outfile_length + 1 < outbase_length)
289 xalloc_die ();
290 outfile = xrealloc (outfile, outfile_length + 1);
292 if (! widen)
293 memcpy (outfile, outbase, outbase_length);
294 else
296 /* Append the last alphabet character to the file name prefix. */
297 outfile[outbase_length] = suffix_alphabet[sufindex[0]];
298 outbase_length++;
301 outfile_mid = outfile + outbase_length;
302 memset (outfile_mid, suffix_alphabet[0], suffix_length);
303 if (additional_suffix)
304 memcpy (outfile_mid + suffix_length, additional_suffix, addsuf_length);
305 outfile[outfile_length] = 0;
307 free (sufindex);
308 sufindex = xcalloc (suffix_length, sizeof *sufindex);
310 if (numeric_suffix_start)
312 assert (! widen);
314 /* Update the output file name. */
315 size_t i = strlen (numeric_suffix_start);
316 memcpy (outfile_mid + suffix_length - i, numeric_suffix_start, i);
318 /* Update the suffix index. */
319 size_t *sufindex_end = sufindex + suffix_length;
320 while (i-- != 0)
321 *--sufindex_end = numeric_suffix_start[i] - '0';
324 #if ! _POSIX_NO_TRUNC && HAVE_PATHCONF && defined _PC_NAME_MAX
325 /* POSIX requires that if the output file name is too long for
326 its directory, 'split' must fail without creating any files.
327 This must be checked for explicitly on operating systems that
328 silently truncate file names. */
330 char *dir = dir_name (outfile);
331 long name_max = pathconf (dir, _PC_NAME_MAX);
332 if (0 <= name_max && name_max < base_len (last_component (outfile)))
333 error (EXIT_FAILURE, ENAMETOOLONG, "%s", outfile);
334 free (dir);
336 #endif
338 else
340 /* Increment the suffix in place, if possible. */
342 size_t i = suffix_length;
343 while (i-- != 0)
345 sufindex[i]++;
346 if (suffix_auto && i == 0 && ! suffix_alphabet[sufindex[0] + 1])
347 goto new_name;
348 outfile_mid[i] = suffix_alphabet[sufindex[i]];
349 if (outfile_mid[i])
350 return;
351 sufindex[i] = 0;
352 outfile_mid[i] = suffix_alphabet[sufindex[i]];
354 error (EXIT_FAILURE, 0, _("output file suffixes exhausted"));
358 /* Create or truncate a file. */
360 static int
361 create (const char *name)
363 if (!filter_command)
365 if (verbose)
366 fprintf (stdout, _("creating file %s\n"), quote (name));
368 int fd = open (name, O_WRONLY | O_CREAT | O_BINARY, MODE_RW_UGO);
369 if (fd < 0)
370 return fd;
371 struct stat out_stat_buf;
372 if (fstat (fd, &out_stat_buf) != 0)
373 error (EXIT_FAILURE, errno, _("failed to stat %s"), quote (name));
374 if (SAME_INODE (in_stat_buf, out_stat_buf))
375 error (EXIT_FAILURE, 0, _("%s would overwrite input; aborting"),
376 quote (name));
377 if (ftruncate (fd, 0) != 0)
378 error (EXIT_FAILURE, errno, _("%s: error truncating"), quote (name));
380 return fd;
382 else
384 int fd_pair[2];
385 pid_t child_pid;
386 char const *shell_prog = getenv ("SHELL");
387 if (shell_prog == NULL)
388 shell_prog = "/bin/sh";
389 if (setenv ("FILE", name, 1) != 0)
390 error (EXIT_FAILURE, errno,
391 _("failed to set FILE environment variable"));
392 if (verbose)
393 fprintf (stdout, _("executing with FILE=%s\n"), quote (name));
394 if (pipe (fd_pair) != 0)
395 error (EXIT_FAILURE, errno, _("failed to create pipe"));
396 child_pid = fork ();
397 if (child_pid == 0)
399 /* This is the child process. If an error occurs here, the
400 parent will eventually learn about it after doing a wait,
401 at which time it will emit its own error message. */
402 int j;
403 /* We have to close any pipes that were opened during an
404 earlier call, otherwise this process will be holding a
405 write-pipe that will prevent the earlier process from
406 reading an EOF on the corresponding read-pipe. */
407 for (j = 0; j < n_open_pipes; ++j)
408 if (close (open_pipes[j]) != 0)
409 error (EXIT_FAILURE, errno, _("closing prior pipe"));
410 if (close (fd_pair[1]))
411 error (EXIT_FAILURE, errno, _("closing output pipe"));
412 if (fd_pair[0] != STDIN_FILENO)
414 if (dup2 (fd_pair[0], STDIN_FILENO) != STDIN_FILENO)
415 error (EXIT_FAILURE, errno, _("moving input pipe"));
416 if (close (fd_pair[0]) != 0)
417 error (EXIT_FAILURE, errno, _("closing input pipe"));
419 sigprocmask (SIG_SETMASK, &oldblocked, NULL);
420 execl (shell_prog, last_component (shell_prog), "-c",
421 filter_command, (char *) NULL);
422 error (EXIT_FAILURE, errno, _("failed to run command: \"%s -c %s\""),
423 shell_prog, filter_command);
425 if (child_pid == -1)
426 error (EXIT_FAILURE, errno, _("fork system call failed"));
427 if (close (fd_pair[0]) != 0)
428 error (EXIT_FAILURE, errno, _("failed to close input pipe"));
429 filter_pid = child_pid;
430 if (n_open_pipes == open_pipes_alloc)
431 open_pipes = x2nrealloc (open_pipes, &open_pipes_alloc,
432 sizeof *open_pipes);
433 open_pipes[n_open_pipes++] = fd_pair[1];
434 return fd_pair[1];
438 /* Close the output file, and do any associated cleanup.
439 If FP and FD are both specified, they refer to the same open file;
440 in this case FP is closed, but FD is still used in cleanup. */
441 static void
442 closeout (FILE *fp, int fd, pid_t pid, char const *name)
444 if (fp != NULL && fclose (fp) != 0 && ! ignorable (errno))
445 error (EXIT_FAILURE, errno, "%s", name);
446 if (fd >= 0)
448 if (fp == NULL && close (fd) < 0)
449 error (EXIT_FAILURE, errno, "%s", name);
450 int j;
451 for (j = 0; j < n_open_pipes; ++j)
453 if (open_pipes[j] == fd)
455 open_pipes[j] = open_pipes[--n_open_pipes];
456 break;
460 if (pid > 0)
462 int wstatus = 0;
463 if (waitpid (pid, &wstatus, 0) == -1 && errno != ECHILD)
464 error (EXIT_FAILURE, errno, _("waiting for child process"));
465 if (WIFSIGNALED (wstatus))
467 int sig = WTERMSIG (wstatus);
468 if (sig != SIGPIPE)
470 char signame[MAX (SIG2STR_MAX, INT_BUFSIZE_BOUND (int))];
471 if (sig2str (sig, signame) != 0)
472 sprintf (signame, "%d", sig);
473 error (sig + 128, 0,
474 _("with FILE=%s, signal %s from command: %s"),
475 name, signame, filter_command);
478 else if (WIFEXITED (wstatus))
480 int ex = WEXITSTATUS (wstatus);
481 if (ex != 0)
482 error (ex, 0, _("with FILE=%s, exit %d from command: %s"),
483 name, ex, filter_command);
485 else
487 /* shouldn't happen. */
488 error (EXIT_FAILURE, 0,
489 _("unknown status from command (0x%X)"), wstatus);
494 /* Write BYTES bytes at BP to an output file.
495 If NEW_FILE_FLAG is true, open the next output file.
496 Otherwise add to the same output file already in use. */
498 static void
499 cwrite (bool new_file_flag, const char *bp, size_t bytes)
501 if (new_file_flag)
503 if (!bp && bytes == 0 && elide_empty_files)
504 return;
505 closeout (NULL, output_desc, filter_pid, outfile);
506 next_file_name ();
507 if ((output_desc = create (outfile)) < 0)
508 error (EXIT_FAILURE, errno, "%s", outfile);
510 if (full_write (output_desc, bp, bytes) != bytes && ! ignorable (errno))
511 error (EXIT_FAILURE, errno, "%s", outfile);
514 /* Split into pieces of exactly N_BYTES bytes.
515 Use buffer BUF, whose size is BUFSIZE. */
517 static void
518 bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, uintmax_t max_files)
520 size_t n_read;
521 bool new_file_flag = true;
522 size_t to_read;
523 uintmax_t to_write = n_bytes;
524 char *bp_out;
525 uintmax_t opened = 0;
529 n_read = full_read (STDIN_FILENO, buf, bufsize);
530 if (n_read < bufsize && errno)
531 error (EXIT_FAILURE, errno, "%s", infile);
532 bp_out = buf;
533 to_read = n_read;
534 while (true)
536 if (to_read < to_write)
538 if (to_read) /* do not write 0 bytes! */
540 cwrite (new_file_flag, bp_out, to_read);
541 opened += new_file_flag;
542 to_write -= to_read;
543 new_file_flag = false;
545 break;
547 else
549 size_t w = to_write;
550 cwrite (new_file_flag, bp_out, w);
551 opened += new_file_flag;
552 new_file_flag = !max_files || (opened < max_files);
553 if (!new_file_flag && ignorable (errno))
555 /* If filter no longer accepting input, stop reading. */
556 n_read = 0;
557 break;
559 bp_out += w;
560 to_read -= w;
561 to_write = n_bytes;
565 while (n_read == bufsize);
567 /* Ensure NUMBER files are created, which truncates
568 any existing files or notifies any consumers on fifos.
569 FIXME: Should we do this before EXIT_FAILURE? */
570 while (opened++ < max_files)
571 cwrite (true, NULL, 0);
574 /* Split into pieces of exactly N_LINES lines.
575 Use buffer BUF, whose size is BUFSIZE. */
577 static void
578 lines_split (uintmax_t n_lines, char *buf, size_t bufsize)
580 size_t n_read;
581 char *bp, *bp_out, *eob;
582 bool new_file_flag = true;
583 uintmax_t n = 0;
587 n_read = full_read (STDIN_FILENO, buf, bufsize);
588 if (n_read < bufsize && errno)
589 error (EXIT_FAILURE, errno, "%s", infile);
590 bp = bp_out = buf;
591 eob = bp + n_read;
592 *eob = '\n';
593 while (true)
595 bp = memchr (bp, '\n', eob - bp + 1);
596 if (bp == eob)
598 if (eob != bp_out) /* do not write 0 bytes! */
600 size_t len = eob - bp_out;
601 cwrite (new_file_flag, bp_out, len);
602 new_file_flag = false;
604 break;
607 ++bp;
608 if (++n >= n_lines)
610 cwrite (new_file_flag, bp_out, bp - bp_out);
611 bp_out = bp;
612 new_file_flag = true;
613 n = 0;
617 while (n_read == bufsize);
620 /* Split into pieces that are as large as possible while still not more
621 than N_BYTES bytes, and are split on line boundaries except
622 where lines longer than N_BYTES bytes occur. */
624 static void
625 line_bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize)
627 size_t n_read;
628 uintmax_t n_out = 0; /* for each split. */
629 size_t n_hold = 0;
630 char *hold = NULL; /* for lines > bufsize. */
631 size_t hold_size = 0;
632 bool split_line = false; /* Whether a \n was output in a split. */
636 n_read = full_read (STDIN_FILENO, buf, bufsize);
637 if (n_read < bufsize && errno)
638 error (EXIT_FAILURE, errno, "%s", infile);
639 size_t n_left = n_read;
640 char *sob = buf;
641 while (n_left)
643 size_t split_rest = 0;
644 char *eoc = NULL;
645 char *eol;
647 /* Determine End Of Chunk and/or End of Line,
648 which are used below to select what to write or buffer. */
649 if (n_bytes - n_out - n_hold <= n_left)
651 /* Have enough for split. */
652 split_rest = n_bytes - n_out - n_hold;
653 eoc = sob + split_rest - 1;
654 eol = memrchr (sob, '\n', split_rest);
656 else
657 eol = memrchr (sob, '\n', n_left);
659 /* Output hold space if possible. */
660 if (n_hold && !(!eol && n_out))
662 cwrite (n_out == 0, hold, n_hold);
663 n_out += n_hold;
664 if (n_hold > bufsize)
665 hold = xrealloc (hold, bufsize);
666 n_hold = 0;
667 hold_size = bufsize;
670 /* Output to eol if present. */
671 if (eol)
673 split_line = true;
674 size_t n_write = eol - sob + 1;
675 cwrite (n_out == 0, sob, n_write);
676 n_out += n_write;
677 n_left -= n_write;
678 sob += n_write;
679 if (eoc)
680 split_rest -= n_write;
683 /* Output to eoc or eob if possible. */
684 if (n_left && !split_line)
686 size_t n_write = eoc ? split_rest : n_left;
687 cwrite (n_out == 0, sob, n_write);
688 n_out += n_write;
689 n_left -= n_write;
690 sob += n_write;
691 if (eoc)
692 split_rest -= n_write;
695 /* Update hold if needed. */
696 if ((eoc && split_rest) || (!eoc && n_left))
698 size_t n_buf = eoc ? split_rest : n_left;
699 if (hold_size - n_hold < n_buf)
701 if (hold_size <= SIZE_MAX - bufsize)
702 hold_size += bufsize;
703 else
704 xalloc_die ();
705 hold = xrealloc (hold, hold_size);
707 memcpy (hold + n_hold, sob, n_buf);
708 n_hold += n_buf;
709 n_left -= n_buf;
710 sob += n_buf;
713 /* Reset for new split. */
714 if (eoc)
716 n_out = 0;
717 split_line = false;
721 while (n_read == bufsize);
723 /* Handle no eol at end of file. */
724 if (n_hold)
725 cwrite (n_out == 0, hold, n_hold);
727 free (hold);
730 /* -n l/[K/]N: Write lines to files of approximately file size / N.
731 The file is partitioned into file size / N sized portions, with the
732 last assigned any excess. If a line _starts_ within a partition
733 it is written completely to the corresponding file. Since lines
734 are not split even if they overlap a partition, the files written
735 can be larger or smaller than the partition size, and even empty
736 if a line is so long as to completely overlap the partition. */
738 static void
739 lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
740 off_t file_size)
742 assert (n && k <= n && n <= file_size);
744 const off_t chunk_size = file_size / n;
745 uintmax_t chunk_no = 1;
746 off_t chunk_end = chunk_size - 1;
747 off_t n_written = 0;
748 bool new_file_flag = true;
749 bool chunk_truncated = false;
751 if (k > 1)
753 /* Start reading 1 byte before kth chunk of file. */
754 off_t start = (k - 1) * chunk_size - 1;
755 if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0)
756 error (EXIT_FAILURE, errno, "%s", infile);
757 n_written = start;
758 chunk_no = k - 1;
759 chunk_end = chunk_no * chunk_size - 1;
762 while (n_written < file_size)
764 char *bp = buf, *eob;
765 size_t n_read = full_read (STDIN_FILENO, buf, bufsize);
766 if (n_read < bufsize && errno)
767 error (EXIT_FAILURE, errno, "%s", infile);
768 else if (n_read == 0)
769 break; /* eof. */
770 n_read = MIN (n_read, file_size - n_written);
771 chunk_truncated = false;
772 eob = buf + n_read;
774 while (bp != eob)
776 size_t to_write;
777 bool next = false;
779 /* Begin looking for '\n' at last byte of chunk. */
780 off_t skip = MIN (n_read, MAX (0, chunk_end - n_written));
781 char *bp_out = memchr (bp + skip, '\n', n_read - skip);
782 if (bp_out++)
783 next = true;
784 else
785 bp_out = eob;
786 to_write = bp_out - bp;
788 if (k == chunk_no)
790 /* We don't use the stdout buffer here since we're writing
791 large chunks from an existing file, so it's more efficient
792 to write out directly. */
793 if (full_write (STDOUT_FILENO, bp, to_write) != to_write)
794 error (EXIT_FAILURE, errno, "%s", _("write error"));
796 else if (! k)
797 cwrite (new_file_flag, bp, to_write);
798 n_written += to_write;
799 bp += to_write;
800 n_read -= to_write;
801 new_file_flag = next;
803 /* A line could have been so long that it skipped
804 entire chunks. So create empty files in that case. */
805 while (next || chunk_end <= n_written - 1)
807 if (!next && bp == eob)
809 /* replenish buf, before going to next chunk. */
810 chunk_truncated = true;
811 break;
813 chunk_no++;
814 if (k && chunk_no > k)
815 return;
816 if (chunk_no == n)
817 chunk_end = file_size - 1; /* >= chunk_size. */
818 else
819 chunk_end += chunk_size;
820 if (chunk_end <= n_written - 1)
822 if (! k)
823 cwrite (true, NULL, 0);
825 else
826 next = false;
831 if (chunk_truncated)
832 chunk_no++;
834 /* Ensure NUMBER files are created, which truncates
835 any existing files or notifies any consumers on fifos.
836 FIXME: Should we do this before EXIT_FAILURE? */
837 while (!k && chunk_no++ <= n)
838 cwrite (true, NULL, 0);
841 /* -n K/N: Extract Kth of N chunks. */
843 static void
844 bytes_chunk_extract (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
845 off_t file_size)
847 off_t start;
848 off_t end;
850 assert (k && n && k <= n && n <= file_size);
852 start = (k - 1) * (file_size / n);
853 end = (k == n) ? file_size : k * (file_size / n);
855 if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0)
856 error (EXIT_FAILURE, errno, "%s", infile);
858 while (start < end)
860 size_t n_read = full_read (STDIN_FILENO, buf, bufsize);
861 if (n_read < bufsize && errno)
862 error (EXIT_FAILURE, errno, "%s", infile);
863 else if (n_read == 0)
864 break; /* eof. */
865 n_read = MIN (n_read, end - start);
866 if (full_write (STDOUT_FILENO, buf, n_read) != n_read
867 && ! ignorable (errno))
868 error (EXIT_FAILURE, errno, "%s", quote ("-"));
869 start += n_read;
873 typedef struct of_info
875 char *of_name;
876 int ofd;
877 FILE *ofile;
878 int opid;
879 } of_t;
881 enum
883 OFD_NEW = -1,
884 OFD_APPEND = -2
887 /* Rotate file descriptors when we're writing to more output files than we
888 have available file descriptors.
889 Return whether we came under file resource pressure.
890 If so, it's probably best to close each file when finished with it. */
892 static bool
893 ofile_open (of_t *files, size_t i_check, size_t nfiles)
895 bool file_limit = false;
897 if (files[i_check].ofd <= OFD_NEW)
899 int fd;
900 size_t i_reopen = i_check ? i_check - 1 : nfiles - 1;
902 /* Another process could have opened a file in between the calls to
903 close and open, so we should keep trying until open succeeds or
904 we've closed all of our files. */
905 while (true)
907 if (files[i_check].ofd == OFD_NEW)
908 fd = create (files[i_check].of_name);
909 else /* OFD_APPEND */
911 /* Attempt to append to previously opened file.
912 We use O_NONBLOCK to support writing to fifos,
913 where the other end has closed because of our
914 previous close. In that case we'll immediately
915 get an error, rather than waiting indefinitely.
916 In specialised cases the consumer can keep reading
917 from the fifo, terminating on conditions in the data
918 itself, or perhaps never in the case of 'tail -f'.
919 I.E. for fifos it is valid to attempt this reopen.
921 We don't handle the filter_command case here, as create()
922 will exit if there are not enough files in that case.
923 I.E. we don't support restarting filters, as that would
924 put too much burden on users specifying --filter commands. */
925 fd = open (files[i_check].of_name,
926 O_WRONLY | O_BINARY | O_APPEND | O_NONBLOCK);
929 if (-1 < fd)
930 break;
932 if (!(errno == EMFILE || errno == ENFILE))
933 error (EXIT_FAILURE, errno, "%s", files[i_check].of_name);
935 file_limit = true;
937 /* Search backwards for an open file to close. */
938 while (files[i_reopen].ofd < 0)
940 i_reopen = i_reopen ? i_reopen - 1 : nfiles - 1;
941 /* No more open files to close, exit with E[NM]FILE. */
942 if (i_reopen == i_check)
943 error (EXIT_FAILURE, errno, "%s", files[i_check].of_name);
946 if (fclose (files[i_reopen].ofile) != 0)
947 error (EXIT_FAILURE, errno, "%s", files[i_reopen].of_name);
948 files[i_reopen].ofile = NULL;
949 files[i_reopen].ofd = OFD_APPEND;
952 files[i_check].ofd = fd;
953 if (!(files[i_check].ofile = fdopen (fd, "a")))
954 error (EXIT_FAILURE, errno, "%s", files[i_check].of_name);
955 files[i_check].opid = filter_pid;
956 filter_pid = 0;
959 return file_limit;
962 /* -n r/[K/]N: Divide file into N chunks in round robin fashion.
963 When K == 0, we try to keep the files open in parallel.
964 If we run out of file resources, then we revert
965 to opening and closing each file for each line. */
967 static void
968 lines_rr (uintmax_t k, uintmax_t n, char *buf, size_t bufsize)
970 bool wrapped = false;
971 bool wrote = false;
972 bool file_limit;
973 size_t i_file;
974 of_t *files IF_LINT (= NULL);
975 uintmax_t line_no;
977 if (k)
978 line_no = 1;
979 else
981 if (SIZE_MAX < n)
982 xalloc_die ();
983 files = xnmalloc (n, sizeof *files);
985 /* Generate output file names. */
986 for (i_file = 0; i_file < n; i_file++)
988 next_file_name ();
989 files[i_file].of_name = xstrdup (outfile);
990 files[i_file].ofd = OFD_NEW;
991 files[i_file].ofile = NULL;
992 files[i_file].opid = 0;
994 i_file = 0;
995 file_limit = false;
998 while (true)
1000 char *bp = buf, *eob;
1001 /* Use safe_read() rather than full_read() here
1002 so that we process available data immediately. */
1003 size_t n_read = safe_read (STDIN_FILENO, buf, bufsize);
1004 if (n_read == SAFE_READ_ERROR)
1005 error (EXIT_FAILURE, errno, "%s", infile);
1006 else if (n_read == 0)
1007 break; /* eof. */
1008 eob = buf + n_read;
1010 while (bp != eob)
1012 size_t to_write;
1013 bool next = false;
1015 /* Find end of line. */
1016 char *bp_out = memchr (bp, '\n', eob - bp);
1017 if (bp_out)
1019 bp_out++;
1020 next = true;
1022 else
1023 bp_out = eob;
1024 to_write = bp_out - bp;
1026 if (k)
1028 if (line_no == k && unbuffered)
1030 if (full_write (STDOUT_FILENO, bp, to_write) != to_write)
1031 error (EXIT_FAILURE, errno, "%s", _("write error"));
1033 else if (line_no == k && fwrite (bp, to_write, 1, stdout) != 1)
1035 clearerr (stdout); /* To silence close_stdout(). */
1036 error (EXIT_FAILURE, errno, "%s", _("write error"));
1038 if (next)
1039 line_no = (line_no == n) ? 1 : line_no + 1;
1041 else
1043 /* Secure file descriptor. */
1044 file_limit |= ofile_open (files, i_file, n);
1045 if (unbuffered)
1047 /* Note writing to fd, rather than flushing the FILE gives
1048 an 8% performance benefit, due to reduced data copying. */
1049 if (full_write (files[i_file].ofd, bp, to_write) != to_write
1050 && ! ignorable (errno))
1051 error (EXIT_FAILURE, errno, "%s", files[i_file].of_name);
1053 else if (fwrite (bp, to_write, 1, files[i_file].ofile) != 1
1054 && ! ignorable (errno))
1055 error (EXIT_FAILURE, errno, "%s", files[i_file].of_name);
1056 if (! ignorable (errno))
1057 wrote = true;
1059 if (file_limit)
1061 if (fclose (files[i_file].ofile) != 0)
1062 error (EXIT_FAILURE, errno, "%s", files[i_file].of_name);
1063 files[i_file].ofile = NULL;
1064 files[i_file].ofd = OFD_APPEND;
1066 if (next && ++i_file == n)
1068 wrapped = true;
1069 /* If no filters are accepting input, stop reading. */
1070 if (! wrote)
1071 goto no_filters;
1072 wrote = false;
1073 i_file = 0;
1077 bp = bp_out;
1081 no_filters:
1082 /* Ensure all files created, so that any existing files are truncated,
1083 and to signal any waiting fifo consumers.
1084 Also, close any open file descriptors.
1085 FIXME: Should we do this before EXIT_FAILURE? */
1086 if (!k)
1088 int ceiling = (wrapped ? n : i_file);
1089 for (i_file = 0; i_file < n; i_file++)
1091 if (i_file >= ceiling && !elide_empty_files)
1092 file_limit |= ofile_open (files, i_file, n);
1093 if (files[i_file].ofd >= 0)
1094 closeout (files[i_file].ofile, files[i_file].ofd,
1095 files[i_file].opid, files[i_file].of_name);
1096 files[i_file].ofd = OFD_APPEND;
1099 IF_LINT (free (files));
1102 #define FAIL_ONLY_ONE_WAY() \
1103 do \
1105 error (0, 0, _("cannot split in more than one way")); \
1106 usage (EXIT_FAILURE); \
1108 while (0)
1110 /* Parse K/N syntax of chunk options. */
1112 static void
1113 parse_chunk (uintmax_t *k_units, uintmax_t *n_units, char *slash)
1115 *slash = '\0';
1116 if (xstrtoumax (slash + 1, NULL, 10, n_units, "") != LONGINT_OK
1117 || *n_units == 0)
1118 error (EXIT_FAILURE, 0, _("%s: invalid number of chunks"), slash + 1);
1119 if (slash != optarg /* a leading number is specified. */
1120 && (xstrtoumax (optarg, NULL, 10, k_units, "") != LONGINT_OK
1121 || *k_units == 0 || *n_units < *k_units))
1122 error (EXIT_FAILURE, 0, _("%s: invalid chunk number"), optarg);
1127 main (int argc, char **argv)
1129 enum Split_type split_type = type_undef;
1130 size_t in_blk_size = 0; /* optimal block size of input file device */
1131 size_t page_size = getpagesize ();
1132 uintmax_t k_units = 0;
1133 uintmax_t n_units;
1135 static char const multipliers[] = "bEGKkMmPTYZ0";
1136 int c;
1137 int digits_optind = 0;
1138 off_t file_size IF_LINT (= 0);
1140 initialize_main (&argc, &argv);
1141 set_program_name (argv[0]);
1142 setlocale (LC_ALL, "");
1143 bindtextdomain (PACKAGE, LOCALEDIR);
1144 textdomain (PACKAGE);
1146 atexit (close_stdout);
1148 /* Parse command line options. */
1150 infile = bad_cast ("-");
1151 outbase = bad_cast ("x");
1153 while (true)
1155 /* This is the argv-index of the option we will read next. */
1156 int this_optind = optind ? optind : 1;
1157 char *slash;
1159 c = getopt_long (argc, argv, "0123456789C:a:b:del:n:u",
1160 longopts, NULL);
1161 if (c == -1)
1162 break;
1164 switch (c)
1166 case 'a':
1168 unsigned long tmp;
1169 if (xstrtoul (optarg, NULL, 10, &tmp, "") != LONGINT_OK
1170 || SIZE_MAX / sizeof (size_t) < tmp)
1172 error (0, 0, _("%s: invalid suffix length"), optarg);
1173 usage (EXIT_FAILURE);
1175 suffix_length = tmp;
1177 break;
1179 case ADDITIONAL_SUFFIX_OPTION:
1180 if (last_component (optarg) != optarg)
1182 error (0, 0,
1183 _("invalid suffix %s, contains directory separator"),
1184 quote (optarg));
1185 usage (EXIT_FAILURE);
1187 additional_suffix = optarg;
1188 break;
1190 case 'b':
1191 if (split_type != type_undef)
1192 FAIL_ONLY_ONE_WAY ();
1193 split_type = type_bytes;
1194 if (xstrtoumax (optarg, NULL, 10, &n_units, multipliers) != LONGINT_OK
1195 || n_units == 0)
1197 error (0, 0, _("%s: invalid number of bytes"), optarg);
1198 usage (EXIT_FAILURE);
1200 /* If input is a pipe, we could get more data than is possible
1201 to write to a single file, so indicate that immediately
1202 rather than having possibly future invocations fail. */
1203 if (OFF_T_MAX < n_units)
1204 error (EXIT_FAILURE, EFBIG,
1205 _("%s: invalid number of bytes"), optarg);
1207 break;
1209 case 'l':
1210 if (split_type != type_undef)
1211 FAIL_ONLY_ONE_WAY ();
1212 split_type = type_lines;
1213 if (xstrtoumax (optarg, NULL, 10, &n_units, "") != LONGINT_OK
1214 || n_units == 0)
1216 error (0, 0, _("%s: invalid number of lines"), optarg);
1217 usage (EXIT_FAILURE);
1219 break;
1221 case 'C':
1222 if (split_type != type_undef)
1223 FAIL_ONLY_ONE_WAY ();
1224 split_type = type_byteslines;
1225 if (xstrtoumax (optarg, NULL, 10, &n_units, multipliers) != LONGINT_OK
1226 || n_units == 0 || SIZE_MAX < n_units)
1228 error (0, 0, _("%s: invalid number of bytes"), optarg);
1229 usage (EXIT_FAILURE);
1231 if (OFF_T_MAX < n_units)
1232 error (EXIT_FAILURE, EFBIG,
1233 _("%s: invalid number of bytes"), optarg);
1234 break;
1236 case 'n':
1237 if (split_type != type_undef)
1238 FAIL_ONLY_ONE_WAY ();
1239 /* skip any whitespace */
1240 while (isspace (to_uchar (*optarg)))
1241 optarg++;
1242 if (STRNCMP_LIT (optarg, "r/") == 0)
1244 split_type = type_rr;
1245 optarg += 2;
1247 else if (STRNCMP_LIT (optarg, "l/") == 0)
1249 split_type = type_chunk_lines;
1250 optarg += 2;
1252 else
1253 split_type = type_chunk_bytes;
1254 if ((slash = strchr (optarg, '/')))
1255 parse_chunk (&k_units, &n_units, slash);
1256 else if (xstrtoumax (optarg, NULL, 10, &n_units, "") != LONGINT_OK
1257 || n_units == 0)
1258 error (EXIT_FAILURE, 0, _("%s: invalid number of chunks"), optarg);
1259 break;
1261 case 'u':
1262 unbuffered = true;
1263 break;
1265 case '0':
1266 case '1':
1267 case '2':
1268 case '3':
1269 case '4':
1270 case '5':
1271 case '6':
1272 case '7':
1273 case '8':
1274 case '9':
1275 if (split_type == type_undef)
1277 split_type = type_digits;
1278 n_units = 0;
1280 if (split_type != type_undef && split_type != type_digits)
1281 FAIL_ONLY_ONE_WAY ();
1282 if (digits_optind != 0 && digits_optind != this_optind)
1283 n_units = 0; /* More than one number given; ignore other. */
1284 digits_optind = this_optind;
1285 if (!DECIMAL_DIGIT_ACCUMULATE (n_units, c - '0', uintmax_t))
1287 char buffer[INT_BUFSIZE_BOUND (uintmax_t)];
1288 error (EXIT_FAILURE, 0,
1289 _("line count option -%s%c... is too large"),
1290 umaxtostr (n_units, buffer), c);
1292 break;
1294 case 'd':
1295 suffix_alphabet = "0123456789";
1296 if (optarg)
1298 if (strlen (optarg) != strspn (optarg, suffix_alphabet))
1300 error (0, 0,
1301 _("%s: invalid start value for numerical suffix"),
1302 optarg);
1303 usage (EXIT_FAILURE);
1305 else
1307 /* Skip any leading zero. */
1308 while (*optarg == '0' && *(optarg + 1) != '\0')
1309 optarg++;
1310 numeric_suffix_start = optarg;
1313 break;
1315 case 'e':
1316 elide_empty_files = true;
1317 break;
1319 case FILTER_OPTION:
1320 filter_command = optarg;
1321 break;
1323 case IO_BLKSIZE_OPTION:
1325 uintmax_t tmp_blk_size;
1326 if (xstrtoumax (optarg, NULL, 10, &tmp_blk_size,
1327 multipliers) != LONGINT_OK
1328 || tmp_blk_size == 0 || SIZE_MAX - page_size < tmp_blk_size)
1329 error (0, 0, _("%s: invalid IO block size"), optarg);
1330 else
1331 in_blk_size = tmp_blk_size;
1333 break;
1335 case VERBOSE_OPTION:
1336 verbose = true;
1337 break;
1339 case_GETOPT_HELP_CHAR;
1341 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1343 default:
1344 usage (EXIT_FAILURE);
1348 if (k_units != 0 && filter_command)
1350 error (0, 0, _("--filter does not process a chunk extracted to stdout"));
1351 usage (EXIT_FAILURE);
1354 /* Handle default case. */
1355 if (split_type == type_undef)
1357 split_type = type_lines;
1358 n_units = 1000;
1361 if (n_units == 0)
1363 error (0, 0, _("%s: invalid number of lines"), "0");
1364 usage (EXIT_FAILURE);
1367 set_suffix_length (n_units, split_type);
1369 /* Get out the filename arguments. */
1371 if (optind < argc)
1372 infile = argv[optind++];
1374 if (optind < argc)
1375 outbase = argv[optind++];
1377 if (optind < argc)
1379 error (0, 0, _("extra operand %s"), quote (argv[optind]));
1380 usage (EXIT_FAILURE);
1383 /* Check that the suffix length is large enough for the numerical
1384 suffix start value. */
1385 if (numeric_suffix_start && strlen (numeric_suffix_start) > suffix_length)
1387 error (0, 0, _("numerical suffix start value is too large "
1388 "for the suffix length"));
1389 usage (EXIT_FAILURE);
1392 /* Open the input file. */
1393 if (! STREQ (infile, "-")
1394 && fd_reopen (STDIN_FILENO, infile, O_RDONLY, 0) < 0)
1395 error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
1396 quote (infile));
1398 /* Binary I/O is safer when byte counts are used. */
1399 if (O_BINARY && ! isatty (STDIN_FILENO))
1400 xfreopen (NULL, "rb", stdin);
1402 /* Get the optimal block size of input device and make a buffer. */
1404 if (fstat (STDIN_FILENO, &in_stat_buf) != 0)
1405 error (EXIT_FAILURE, errno, "%s", infile);
1406 if (in_blk_size == 0)
1407 in_blk_size = io_blksize (in_stat_buf);
1409 if (split_type == type_chunk_bytes || split_type == type_chunk_lines)
1411 off_t input_offset = lseek (STDIN_FILENO, 0, SEEK_CUR);
1412 if (usable_st_size (&in_stat_buf))
1413 file_size = in_stat_buf.st_size;
1414 else if (0 <= input_offset)
1416 file_size = lseek (STDIN_FILENO, 0, SEEK_END);
1417 input_offset = (file_size < 0
1418 ? file_size
1419 : lseek (STDIN_FILENO, input_offset, SEEK_SET));
1421 if (input_offset < 0)
1422 error (EXIT_FAILURE, 0, _("%s: cannot determine file size"),
1423 quote (infile));
1424 file_size -= input_offset;
1425 /* Overflow, and sanity checking. */
1426 if (OFF_T_MAX < n_units)
1428 char buffer[INT_BUFSIZE_BOUND (uintmax_t)];
1429 error (EXIT_FAILURE, EFBIG, _("%s: invalid number of chunks"),
1430 umaxtostr (n_units, buffer));
1432 /* increase file_size to n_units here, so that we still process
1433 any input data, and create empty files for the rest. */
1434 file_size = MAX (file_size, n_units);
1437 void *b = xmalloc (in_blk_size + 1 + page_size - 1);
1438 char *buf = ptr_align (b, page_size);
1440 /* When filtering, closure of one pipe must not terminate the process,
1441 as there may still be other streams expecting input from us. */
1442 if (filter_command)
1444 struct sigaction act;
1445 sigemptyset (&newblocked);
1446 sigaction (SIGPIPE, NULL, &act);
1447 if (act.sa_handler != SIG_IGN)
1448 sigaddset (&newblocked, SIGPIPE);
1449 sigprocmask (SIG_BLOCK, &newblocked, &oldblocked);
1452 switch (split_type)
1454 case type_digits:
1455 case type_lines:
1456 lines_split (n_units, buf, in_blk_size);
1457 break;
1459 case type_bytes:
1460 bytes_split (n_units, buf, in_blk_size, 0);
1461 break;
1463 case type_byteslines:
1464 line_bytes_split (n_units, buf, in_blk_size);
1465 break;
1467 case type_chunk_bytes:
1468 if (k_units == 0)
1469 bytes_split (file_size / n_units, buf, in_blk_size, n_units);
1470 else
1471 bytes_chunk_extract (k_units, n_units, buf, in_blk_size, file_size);
1472 break;
1474 case type_chunk_lines:
1475 lines_chunk_split (k_units, n_units, buf, in_blk_size, file_size);
1476 break;
1478 case type_rr:
1479 /* Note, this is like 'sed -n ${k}~${n}p' when k > 0,
1480 but the functionality is provided for symmetry. */
1481 lines_rr (k_units, n_units, buf, in_blk_size);
1482 break;
1484 default:
1485 abort ();
1488 IF_LINT (free (b));
1490 if (close (STDIN_FILENO) != 0)
1491 error (EXIT_FAILURE, errno, "%s", infile);
1492 closeout (NULL, output_desc, filter_pid, outfile);
1494 exit (EXIT_SUCCESS);