split: avoid failure due to leftover 'errno' value
[coreutils.git] / src / split.c
blob2eb343b15cfebf3948e3d68bb509efd7a46ba961
1 /* split.c -- split a file into pieces.
2 Copyright (C) 1988, 1991, 1995-2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* By tege@sics.se, with rms.
19 To do:
20 * Implement -t CHAR or -t REGEX to specify break characters other
21 than newline. */
23 #include <config.h>
25 #include <assert.h>
26 #include <stdio.h>
27 #include <getopt.h>
28 #include <signal.h>
29 #include <sys/types.h>
30 #include <sys/wait.h>
32 #include "system.h"
33 #include "error.h"
34 #include "fd-reopen.h"
35 #include "fcntl--.h"
36 #include "full-read.h"
37 #include "full-write.h"
38 #include "ioblksize.h"
39 #include "quote.h"
40 #include "safe-read.h"
41 #include "sig2str.h"
42 #include "xfreopen.h"
43 #include "xstrtol.h"
45 /* The official name of this program (e.g., no `g' prefix). */
46 #define PROGRAM_NAME "split"
48 #define AUTHORS \
49 proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
50 proper_name ("Richard M. Stallman")
52 /* Shell command to filter through, instead of creating files. */
53 static char const *filter_command;
55 /* Process ID of the filter. */
56 static int filter_pid;
58 /* Array of open pipes. */
59 static int *open_pipes;
60 static size_t open_pipes_alloc;
61 static size_t n_open_pipes;
63 /* Blocked signals. */
64 static sigset_t oldblocked;
65 static sigset_t newblocked;
67 /* Base name of output files. */
68 static char const *outbase;
70 /* Name of output files. */
71 static char *outfile;
73 /* Pointer to the end of the prefix in OUTFILE.
74 Suffixes are inserted here. */
75 static char *outfile_mid;
77 /* Length of OUTFILE's suffix. */
78 static size_t suffix_length;
80 /* Alphabet of characters to use in suffix. */
81 static char const *suffix_alphabet = "abcdefghijklmnopqrstuvwxyz";
83 /* Name of input file. May be "-". */
84 static char *infile;
86 /* Descriptor on which output file is open. */
87 static int output_desc = -1;
89 /* If true, print a diagnostic on standard error just before each
90 output file is opened. */
91 static bool verbose;
93 /* If true, don't generate zero length output files. */
94 static bool elide_empty_files;
96 /* If true, in round robin mode, immediately copy
97 input to output, which is much slower, so disabled by default. */
98 static bool unbuffered;
100 /* The split mode to use. */
101 enum Split_type
103 type_undef, type_bytes, type_byteslines, type_lines, type_digits,
104 type_chunk_bytes, type_chunk_lines, type_rr
107 /* For long options that have no equivalent short option, use a
108 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
109 enum
111 VERBOSE_OPTION = CHAR_MAX + 1,
112 FILTER_OPTION,
113 IO_BLKSIZE_OPTION
116 static struct option const longopts[] =
118 {"bytes", required_argument, NULL, 'b'},
119 {"lines", required_argument, NULL, 'l'},
120 {"line-bytes", required_argument, NULL, 'C'},
121 {"number", required_argument, NULL, 'n'},
122 {"elide-empty-files", no_argument, NULL, 'e'},
123 {"unbuffered", no_argument, NULL, 'u'},
124 {"suffix-length", required_argument, NULL, 'a'},
125 {"numeric-suffixes", no_argument, NULL, 'd'},
126 {"filter", required_argument, NULL, FILTER_OPTION},
127 {"verbose", no_argument, NULL, VERBOSE_OPTION},
128 {"-io-blksize", required_argument, NULL,
129 IO_BLKSIZE_OPTION}, /* do not document */
130 {GETOPT_HELP_OPTION_DECL},
131 {GETOPT_VERSION_OPTION_DECL},
132 {NULL, 0, NULL, 0}
135 /* Return true if the errno value, ERR, is ignorable. */
136 static inline bool
137 ignorable (int err)
139 return filter_command && err == EPIPE;
142 static void
143 set_suffix_length (uintmax_t n_units, enum Split_type split_type)
145 #define DEFAULT_SUFFIX_LENGTH 2
147 size_t suffix_needed = 0;
149 /* Auto-calculate the suffix length if the number of files is given. */
150 if (split_type == type_chunk_bytes || split_type == type_chunk_lines
151 || split_type == type_rr)
153 size_t alphabet_len = strlen (suffix_alphabet);
154 bool alphabet_slop = (n_units % alphabet_len) != 0;
155 while (n_units /= alphabet_len)
156 suffix_needed++;
157 suffix_needed += alphabet_slop;
160 if (suffix_length) /* set by user */
162 if (suffix_length < suffix_needed)
164 error (EXIT_FAILURE, 0,
165 _("the suffix length needs to be at least %zu"),
166 suffix_needed);
168 return;
170 else
171 suffix_length = MAX (DEFAULT_SUFFIX_LENGTH, suffix_needed);
174 void
175 usage (int status)
177 if (status != EXIT_SUCCESS)
178 fprintf (stderr, _("Try `%s --help' for more information.\n"),
179 program_name);
180 else
182 printf (_("\
183 Usage: %s [OPTION]... [INPUT [PREFIX]]\n\
185 program_name);
186 fputs (_("\
187 Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default\n\
188 size is 1000 lines, and default PREFIX is `x'. With no INPUT, or when INPUT\n\
189 is -, read standard input.\n\
191 "), stdout);
192 fputs (_("\
193 Mandatory arguments to long options are mandatory for short options too.\n\
194 "), stdout);
195 fprintf (stdout, _("\
196 -a, --suffix-length=N use suffixes of length N (default %d)\n\
197 -b, --bytes=SIZE put SIZE bytes per output file\n\
198 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
199 -d, --numeric-suffixes use numeric suffixes instead of alphabetic\n\
200 -e, --elide-empty-files do not generate empty output files with `-n'\n\
201 --filter=COMMAND write to shell COMMAND; file name is $FILE\n\
202 -l, --lines=NUMBER put NUMBER lines per output file\n\
203 -n, --number=CHUNKS generate CHUNKS output files. See below\n\
204 -u, --unbuffered immediately copy input to output with `-n r/...'\n\
205 "), DEFAULT_SUFFIX_LENGTH);
206 fputs (_("\
207 --verbose print a diagnostic just before each\n\
208 output file is opened\n\
209 "), stdout);
210 fputs (HELP_OPTION_DESCRIPTION, stdout);
211 fputs (VERSION_OPTION_DESCRIPTION, stdout);
212 emit_size_note ();
213 fputs (_("\n\
214 CHUNKS may be:\n\
215 N split into N files based on size of input\n\
216 K/N output Kth of N to stdout\n\
217 l/N split into N files without splitting lines\n\
218 l/K/N output Kth of N to stdout without splitting lines\n\
219 r/N like `l' but use round robin distribution\n\
220 r/K/N likewise but only output Kth of N to stdout\n\
221 "), stdout);
222 emit_ancillary_info ();
224 exit (status);
227 /* Compute the next sequential output file name and store it into the
228 string `outfile'. */
230 static void
231 next_file_name (void)
233 /* Index in suffix_alphabet of each character in the suffix. */
234 static size_t *sufindex;
236 if (! outfile)
238 /* Allocate and initialize the first file name. */
240 size_t outbase_length = strlen (outbase);
241 size_t outfile_length = outbase_length + suffix_length;
242 if (outfile_length + 1 < outbase_length)
243 xalloc_die ();
244 outfile = xmalloc (outfile_length + 1);
245 outfile_mid = outfile + outbase_length;
246 memcpy (outfile, outbase, outbase_length);
247 memset (outfile_mid, suffix_alphabet[0], suffix_length);
248 outfile[outfile_length] = 0;
249 sufindex = xcalloc (suffix_length, sizeof *sufindex);
251 #if ! _POSIX_NO_TRUNC && HAVE_PATHCONF && defined _PC_NAME_MAX
252 /* POSIX requires that if the output file name is too long for
253 its directory, `split' must fail without creating any files.
254 This must be checked for explicitly on operating systems that
255 silently truncate file names. */
257 char *dir = dir_name (outfile);
258 long name_max = pathconf (dir, _PC_NAME_MAX);
259 if (0 <= name_max && name_max < base_len (last_component (outfile)))
260 error (EXIT_FAILURE, ENAMETOOLONG, "%s", outfile);
261 free (dir);
263 #endif
265 else
267 /* Increment the suffix in place, if possible. */
269 size_t i = suffix_length;
270 while (i-- != 0)
272 sufindex[i]++;
273 outfile_mid[i] = suffix_alphabet[sufindex[i]];
274 if (outfile_mid[i])
275 return;
276 sufindex[i] = 0;
277 outfile_mid[i] = suffix_alphabet[sufindex[i]];
279 error (EXIT_FAILURE, 0, _("output file suffixes exhausted"));
283 /* Create or truncate a file. */
285 static int
286 create (const char *name)
288 if (!filter_command)
290 if (verbose)
291 fprintf (stdout, _("creating file %s\n"), quote (name));
292 return open (name, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
293 (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH));
295 else
297 int fd_pair[2];
298 pid_t child_pid;
299 char const *shell_prog = getenv ("SHELL");
300 if (shell_prog == NULL)
301 shell_prog = "/bin/sh";
302 if (setenv ("FILE", name, 1) != 0)
303 error (EXIT_FAILURE, errno,
304 _("failed to set FILE environment variable"));
305 if (verbose)
306 fprintf (stdout, _("executing with FILE=%s\n"), quote (name));
307 if (pipe (fd_pair) != 0)
308 error (EXIT_FAILURE, errno, _("failed to create pipe"));
309 child_pid = fork ();
310 if (child_pid == 0)
312 /* This is the child process. If an error occurs here, the
313 parent will eventually learn about it after doing a wait,
314 at which time it will emit its own error message. */
315 int j;
316 /* We have to close any pipes that were opened during an
317 earlier call, otherwise this process will be holding a
318 write-pipe that will prevent the earlier process from
319 reading an EOF on the corresponding read-pipe. */
320 for (j = 0; j < n_open_pipes; ++j)
321 if (close (open_pipes[j]) != 0)
322 error (EXIT_FAILURE, errno, _("closing prior pipe"));
323 if (close (fd_pair[1]))
324 error (EXIT_FAILURE, errno, _("closing output pipe"));
325 if (fd_pair[0] != STDIN_FILENO)
327 if (dup2 (fd_pair[0], STDIN_FILENO) != STDIN_FILENO)
328 error (EXIT_FAILURE, errno, _("moving input pipe"));
329 if (close (fd_pair[0]) != 0)
330 error (EXIT_FAILURE, errno, _("closing input pipe"));
332 sigprocmask (SIG_SETMASK, &oldblocked, NULL);
333 execl (shell_prog, last_component (shell_prog), "-c",
334 filter_command, (char *) NULL);
335 error (EXIT_FAILURE, errno, _("failed to run command: \"%s -c %s\""),
336 shell_prog, filter_command);
338 if (child_pid == -1)
339 error (EXIT_FAILURE, errno, _("fork system call failed"));
340 if (close (fd_pair[0]) != 0)
341 error (EXIT_FAILURE, errno, _("failed to close input pipe"));
342 filter_pid = child_pid;
343 if (n_open_pipes == open_pipes_alloc)
344 open_pipes = x2nrealloc (open_pipes, &open_pipes_alloc,
345 sizeof *open_pipes);
346 open_pipes[n_open_pipes++] = fd_pair[1];
347 return fd_pair[1];
351 /* Close the output file, and do any associated cleanup.
352 If FP and FD are both specified, they refer to the same open file;
353 in this case FP is closed, but FD is still used in cleanup. */
354 static void
355 closeout (FILE *fp, int fd, pid_t pid, char const *name)
357 if (fp != NULL && fclose (fp) != 0 && ! ignorable (errno))
358 error (EXIT_FAILURE, errno, "%s", name);
359 if (fd >= 0)
361 if (fp == NULL && close (fd) < 0)
362 error (EXIT_FAILURE, errno, "%s", name);
363 int j;
364 for (j = 0; j < n_open_pipes; ++j)
366 if (open_pipes[j] == fd)
368 open_pipes[j] = open_pipes[--n_open_pipes];
369 break;
373 if (pid > 0)
375 int wstatus = 0;
376 if (waitpid (pid, &wstatus, 0) == -1 && errno != ECHILD)
377 error (EXIT_FAILURE, errno, _("waiting for child process"));
378 if (WIFSIGNALED (wstatus))
380 int sig = WTERMSIG (wstatus);
381 if (sig != SIGPIPE)
383 char signame[MAX (SIG2STR_MAX, INT_BUFSIZE_BOUND (int))];
384 if (sig2str (sig, signame) != 0)
385 sprintf (signame, "%d", sig);
386 error (sig + 128, 0,
387 _("with FILE=%s, signal %s from command: %s"),
388 name, signame, filter_command);
391 else if (WIFEXITED (wstatus))
393 int ex = WEXITSTATUS (wstatus);
394 if (ex != 0)
395 error (ex, 0, _("with FILE=%s, exit %d from command: %s"),
396 name, ex, filter_command);
398 else
400 /* shouldn't happen. */
401 error (EXIT_FAILURE, 0,
402 _("unknown status from command (0x%X)"), wstatus);
407 /* Write BYTES bytes at BP to an output file.
408 If NEW_FILE_FLAG is true, open the next output file.
409 Otherwise add to the same output file already in use. */
411 static void
412 cwrite (bool new_file_flag, const char *bp, size_t bytes)
414 if (new_file_flag)
416 if (!bp && bytes == 0 && elide_empty_files)
417 return;
418 closeout (NULL, output_desc, filter_pid, outfile);
419 next_file_name ();
420 if ((output_desc = create (outfile)) < 0)
421 error (EXIT_FAILURE, errno, "%s", outfile);
423 if (full_write (output_desc, bp, bytes) != bytes && ! ignorable (errno))
424 error (EXIT_FAILURE, errno, "%s", outfile);
427 /* Split into pieces of exactly N_BYTES bytes.
428 Use buffer BUF, whose size is BUFSIZE. */
430 static void
431 bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, uintmax_t max_files)
433 size_t n_read;
434 bool new_file_flag = true;
435 size_t to_read;
436 uintmax_t to_write = n_bytes;
437 char *bp_out;
438 uintmax_t opened = 0;
442 n_read = full_read (STDIN_FILENO, buf, bufsize);
443 if (n_read < bufsize && errno)
444 error (EXIT_FAILURE, errno, "%s", infile);
445 bp_out = buf;
446 to_read = n_read;
447 while (true)
449 if (to_read < to_write)
451 if (to_read) /* do not write 0 bytes! */
453 cwrite (new_file_flag, bp_out, to_read);
454 opened += new_file_flag;
455 to_write -= to_read;
456 new_file_flag = false;
458 break;
460 else
462 size_t w = to_write;
463 cwrite (new_file_flag, bp_out, w);
464 opened += new_file_flag;
465 new_file_flag = !max_files || (opened < max_files);
466 if (!new_file_flag && ignorable (errno))
468 /* If filter no longer accepting input, stop reading. */
469 n_read = 0;
470 break;
472 bp_out += w;
473 to_read -= w;
474 to_write = n_bytes;
478 while (n_read == bufsize);
480 /* Ensure NUMBER files are created, which truncates
481 any existing files or notifies any consumers on fifos.
482 FIXME: Should we do this before EXIT_FAILURE? */
483 while (opened++ < max_files)
484 cwrite (true, NULL, 0);
487 /* Split into pieces of exactly N_LINES lines.
488 Use buffer BUF, whose size is BUFSIZE. */
490 static void
491 lines_split (uintmax_t n_lines, char *buf, size_t bufsize)
493 size_t n_read;
494 char *bp, *bp_out, *eob;
495 bool new_file_flag = true;
496 uintmax_t n = 0;
500 n_read = full_read (STDIN_FILENO, buf, bufsize);
501 if (n_read < bufsize && errno)
502 error (EXIT_FAILURE, errno, "%s", infile);
503 bp = bp_out = buf;
504 eob = bp + n_read;
505 *eob = '\n';
506 while (true)
508 bp = memchr (bp, '\n', eob - bp + 1);
509 if (bp == eob)
511 if (eob != bp_out) /* do not write 0 bytes! */
513 size_t len = eob - bp_out;
514 cwrite (new_file_flag, bp_out, len);
515 new_file_flag = false;
517 break;
520 ++bp;
521 if (++n >= n_lines)
523 cwrite (new_file_flag, bp_out, bp - bp_out);
524 bp_out = bp;
525 new_file_flag = true;
526 n = 0;
530 while (n_read == bufsize);
533 /* Split into pieces that are as large as possible while still not more
534 than N_BYTES bytes, and are split on line boundaries except
535 where lines longer than N_BYTES bytes occur.
536 FIXME: Allow N_BYTES to be any uintmax_t value, and don't require a
537 buffer of size N_BYTES, in case N_BYTES is very large. */
539 static void
540 line_bytes_split (size_t n_bytes)
542 char *bp;
543 bool eof = false;
544 size_t n_buffered = 0;
545 char *buf = xmalloc (n_bytes);
549 /* Fill up the full buffer size from the input file. */
551 size_t to_read = n_bytes - n_buffered;
552 size_t n_read = full_read (STDIN_FILENO, buf + n_buffered, to_read);
553 if (n_read < to_read && errno)
554 error (EXIT_FAILURE, errno, "%s", infile);
556 n_buffered += n_read;
557 if (n_buffered != n_bytes)
559 if (n_buffered == 0)
560 break;
561 eof = true;
564 /* Find where to end this chunk. */
565 bp = buf + n_buffered;
566 if (n_buffered == n_bytes)
568 while (bp > buf && bp[-1] != '\n')
569 bp--;
572 /* If chunk has no newlines, use all the chunk. */
573 if (bp == buf)
574 bp = buf + n_buffered;
576 /* Output the chars as one output file. */
577 cwrite (true, buf, bp - buf);
579 /* Discard the chars we just output; move rest of chunk
580 down to be the start of the next chunk. Source and
581 destination probably overlap. */
582 n_buffered -= bp - buf;
583 if (n_buffered > 0)
584 memmove (buf, bp, n_buffered);
586 while (!eof);
587 free (buf);
590 /* -n l/[K/]N: Write lines to files of approximately file size / N.
591 The file is partitioned into file size / N sized portions, with the
592 last assigned any excess. If a line _starts_ within a partition
593 it is written completely to the corresponding file. Since lines
594 are not split even if they overlap a partition, the files written
595 can be larger or smaller than the partition size, and even empty
596 if a line is so long as to completely overlap the partition. */
598 static void
599 lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
600 off_t file_size)
602 assert (n && k <= n && n <= file_size);
604 const off_t chunk_size = file_size / n;
605 uintmax_t chunk_no = 1;
606 off_t chunk_end = chunk_size - 1;
607 off_t n_written = 0;
608 bool new_file_flag = true;
609 bool chunk_truncated = false;
611 if (k > 1)
613 /* Start reading 1 byte before kth chunk of file. */
614 off_t start = (k - 1) * chunk_size - 1;
615 if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0)
616 error (EXIT_FAILURE, errno, "%s", infile);
617 n_written = start;
618 chunk_no = k - 1;
619 chunk_end = chunk_no * chunk_size - 1;
622 while (n_written < file_size)
624 char *bp = buf, *eob;
625 size_t n_read = full_read (STDIN_FILENO, buf, bufsize);
626 if (n_read < bufsize && errno)
627 error (EXIT_FAILURE, errno, "%s", infile);
628 else if (n_read == 0)
629 break; /* eof. */
630 n_read = MIN (n_read, file_size - n_written);
631 chunk_truncated = false;
632 eob = buf + n_read;
634 while (bp != eob)
636 size_t to_write;
637 bool next = false;
639 /* Begin looking for '\n' at last byte of chunk. */
640 off_t skip = MIN (n_read, MAX (0, chunk_end - n_written));
641 char *bp_out = memchr (bp + skip, '\n', n_read - skip);
642 if (bp_out++)
643 next = true;
644 else
645 bp_out = eob;
646 to_write = bp_out - bp;
648 if (k == chunk_no)
650 /* We don't use the stdout buffer here since we're writing
651 large chunks from an existing file, so it's more efficient
652 to write out directly. */
653 if (full_write (STDOUT_FILENO, bp, to_write) != to_write)
654 error (EXIT_FAILURE, errno, "%s", _("write error"));
656 else if (! k)
657 cwrite (new_file_flag, bp, to_write);
658 n_written += to_write;
659 bp += to_write;
660 n_read -= to_write;
661 new_file_flag = next;
663 /* A line could have been so long that it skipped
664 entire chunks. So create empty files in that case. */
665 while (next || chunk_end <= n_written - 1)
667 if (!next && bp == eob)
669 /* replenish buf, before going to next chunk. */
670 chunk_truncated = true;
671 break;
673 chunk_no++;
674 if (k && chunk_no > k)
675 return;
676 if (chunk_no == n)
677 chunk_end = file_size - 1; /* >= chunk_size. */
678 else
679 chunk_end += chunk_size;
680 if (chunk_end <= n_written - 1)
682 if (! k)
683 cwrite (true, NULL, 0);
685 else
686 next = false;
691 if (chunk_truncated)
692 chunk_no++;
694 /* Ensure NUMBER files are created, which truncates
695 any existing files or notifies any consumers on fifos.
696 FIXME: Should we do this before EXIT_FAILURE? */
697 while (!k && chunk_no++ <= n)
698 cwrite (true, NULL, 0);
701 /* -n K/N: Extract Kth of N chunks. */
703 static void
704 bytes_chunk_extract (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
705 off_t file_size)
707 off_t start;
708 off_t end;
710 assert (k && n && k <= n && n <= file_size);
712 start = (k - 1) * (file_size / n);
713 end = (k == n) ? file_size : k * (file_size / n);
715 if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0)
716 error (EXIT_FAILURE, errno, "%s", infile);
718 while (start < end)
720 size_t n_read = full_read (STDIN_FILENO, buf, bufsize);
721 if (n_read < bufsize && errno)
722 error (EXIT_FAILURE, errno, "%s", infile);
723 else if (n_read == 0)
724 break; /* eof. */
725 n_read = MIN (n_read, end - start);
726 if (full_write (STDOUT_FILENO, buf, n_read) != n_read
727 && ! ignorable (errno))
728 error (EXIT_FAILURE, errno, "%s", quote ("-"));
729 start += n_read;
733 typedef struct of_info
735 char *of_name;
736 int ofd;
737 FILE *ofile;
738 int opid;
739 } of_t;
741 enum
743 OFD_NEW = -1,
744 OFD_APPEND = -2
747 /* Rotate file descriptors when we're writing to more output files than we
748 have available file descriptors.
749 Return whether we came under file resource pressure.
750 If so, it's probably best to close each file when finished with it. */
752 static bool
753 ofile_open (of_t *files, size_t i_check, size_t nfiles)
755 bool file_limit = false;
757 if (files[i_check].ofd <= OFD_NEW)
759 int fd;
760 size_t i_reopen = i_check ? i_check - 1 : nfiles - 1;
762 /* Another process could have opened a file in between the calls to
763 close and open, so we should keep trying until open succeeds or
764 we've closed all of our files. */
765 while (true)
767 if (files[i_check].ofd == OFD_NEW)
768 fd = create (files[i_check].of_name);
769 else /* OFD_APPEND */
771 /* Attempt to append to previously opened file.
772 We use O_NONBLOCK to support writing to fifos,
773 where the other end has closed because of our
774 previous close. In that case we'll immediately
775 get an error, rather than waiting indefinitely.
776 In specialised cases the consumer can keep reading
777 from the fifo, terminating on conditions in the data
778 itself, or perhaps never in the case of `tail -f`.
779 I.E. for fifos it is valid to attempt this reopen.
781 We don't handle the filter_command case here, as create()
782 will exit if there are not enough files in that case.
783 I.E. we don't support restarting filters, as that would
784 put too much burden on users specifying --filter commands. */
785 fd = open (files[i_check].of_name,
786 O_WRONLY | O_BINARY | O_APPEND | O_NONBLOCK);
789 if (-1 < fd)
790 break;
792 if (!(errno == EMFILE || errno == ENFILE))
793 error (EXIT_FAILURE, errno, "%s", files[i_check].of_name);
795 file_limit = true;
797 /* Search backwards for an open file to close. */
798 while (files[i_reopen].ofd < 0)
800 i_reopen = i_reopen ? i_reopen - 1 : nfiles - 1;
801 /* No more open files to close, exit with E[NM]FILE. */
802 if (i_reopen == i_check)
803 error (EXIT_FAILURE, errno, "%s", files[i_check].of_name);
806 if (fclose (files[i_reopen].ofile) != 0)
807 error (EXIT_FAILURE, errno, "%s", files[i_reopen].of_name);
808 files[i_reopen].ofile = NULL;
809 files[i_reopen].ofd = OFD_APPEND;
812 files[i_check].ofd = fd;
813 if (!(files[i_check].ofile = fdopen (fd, "a")))
814 error (EXIT_FAILURE, errno, "%s", files[i_check].of_name);
815 files[i_check].opid = filter_pid;
816 filter_pid = 0;
819 return file_limit;
822 /* -n r/[K/]N: Divide file into N chunks in round robin fashion.
823 When K == 0, we try to keep the files open in parallel.
824 If we run out of file resources, then we revert
825 to opening and closing each file for each line. */
827 static void
828 lines_rr (uintmax_t k, uintmax_t n, char *buf, size_t bufsize)
830 bool wrapped = false;
831 bool wrote = false;
832 bool file_limit;
833 size_t i_file;
834 of_t *files IF_LINT (= NULL);
835 uintmax_t line_no;
837 if (k)
838 line_no = 1;
839 else
841 if (SIZE_MAX < n)
842 error (exit_failure, 0, "%s", _("memory exhausted"));
843 files = xnmalloc (n, sizeof *files);
845 /* Generate output file names. */
846 for (i_file = 0; i_file < n; i_file++)
848 next_file_name ();
849 files[i_file].of_name = xstrdup (outfile);
850 files[i_file].ofd = OFD_NEW;
851 files[i_file].ofile = NULL;
852 files[i_file].opid = 0;
854 i_file = 0;
855 file_limit = false;
858 while (true)
860 char *bp = buf, *eob;
861 /* Use safe_read() rather than full_read() here
862 so that we process available data immediately. */
863 size_t n_read = safe_read (STDIN_FILENO, buf, bufsize);
864 if (n_read == SAFE_READ_ERROR)
865 error (EXIT_FAILURE, errno, "%s", infile);
866 else if (n_read == 0)
867 break; /* eof. */
868 eob = buf + n_read;
870 while (bp != eob)
872 size_t to_write;
873 bool next = false;
875 /* Find end of line. */
876 char *bp_out = memchr (bp, '\n', eob - bp);
877 if (bp_out)
879 bp_out++;
880 next = true;
882 else
883 bp_out = eob;
884 to_write = bp_out - bp;
886 if (k)
888 if (line_no == k && unbuffered)
890 if (full_write (STDOUT_FILENO, bp, to_write) != to_write)
891 error (EXIT_FAILURE, errno, "%s", _("write error"));
893 else if (line_no == k && fwrite (bp, to_write, 1, stdout) != 1)
895 clearerr (stdout); /* To silence close_stdout(). */
896 error (EXIT_FAILURE, errno, "%s", _("write error"));
898 if (next)
899 line_no = (line_no == n) ? 1 : line_no + 1;
901 else
903 /* Secure file descriptor. */
904 file_limit |= ofile_open (files, i_file, n);
905 if (unbuffered)
907 /* Note writing to fd, rather than flushing the FILE gives
908 an 8% performance benefit, due to reduced data copying. */
909 if (full_write (files[i_file].ofd, bp, to_write) != to_write
910 && ! ignorable (errno))
911 error (EXIT_FAILURE, errno, "%s", files[i_file].of_name);
913 else if (fwrite (bp, to_write, 1, files[i_file].ofile) != 1
914 && ! ignorable (errno))
915 error (EXIT_FAILURE, errno, "%s", files[i_file].of_name);
916 if (! ignorable (errno))
917 wrote = true;
919 if (file_limit)
921 if (fclose (files[i_file].ofile) != 0)
922 error (EXIT_FAILURE, errno, "%s", files[i_file].of_name);
923 files[i_file].ofile = NULL;
924 files[i_file].ofd = OFD_APPEND;
926 if (next && ++i_file == n)
928 wrapped = true;
929 /* If no filters are accepting input, stop reading. */
930 if (! wrote)
931 goto no_filters;
932 wrote = false;
933 i_file = 0;
937 bp = bp_out;
941 no_filters:
942 /* Ensure all files created, so that any existing files are truncated,
943 and to signal any waiting fifo consumers.
944 Also, close any open file descriptors.
945 FIXME: Should we do this before EXIT_FAILURE? */
946 if (!k)
948 int ceiling = (wrapped ? n : i_file);
949 for (i_file = 0; i_file < n; i_file++)
951 if (i_file >= ceiling && !elide_empty_files)
952 file_limit |= ofile_open (files, i_file, n);
953 if (files[i_file].ofd >= 0)
954 closeout (files[i_file].ofile, files[i_file].ofd,
955 files[i_file].opid, files[i_file].of_name);
956 files[i_file].ofd = OFD_APPEND;
961 #define FAIL_ONLY_ONE_WAY() \
962 do \
964 error (0, 0, _("cannot split in more than one way")); \
965 usage (EXIT_FAILURE); \
967 while (0)
969 /* Parse K/N syntax of chunk options. */
971 static void
972 parse_chunk (uintmax_t *k_units, uintmax_t *n_units, char *slash)
974 *slash = '\0';
975 if (xstrtoumax (slash + 1, NULL, 10, n_units, "") != LONGINT_OK
976 || *n_units == 0)
977 error (EXIT_FAILURE, 0, _("%s: invalid number of chunks"), slash + 1);
978 if (slash != optarg /* a leading number is specified. */
979 && (xstrtoumax (optarg, NULL, 10, k_units, "") != LONGINT_OK
980 || *k_units == 0 || *n_units < *k_units))
981 error (EXIT_FAILURE, 0, _("%s: invalid chunk number"), optarg);
986 main (int argc, char **argv)
988 struct stat stat_buf;
989 enum Split_type split_type = type_undef;
990 size_t in_blk_size = 0; /* optimal block size of input file device */
991 char *buf; /* file i/o buffer */
992 size_t page_size = getpagesize ();
993 uintmax_t k_units = 0;
994 uintmax_t n_units;
996 static char const multipliers[] = "bEGKkMmPTYZ0";
997 int c;
998 int digits_optind = 0;
999 off_t file_size;
1001 initialize_main (&argc, &argv);
1002 set_program_name (argv[0]);
1003 setlocale (LC_ALL, "");
1004 bindtextdomain (PACKAGE, LOCALEDIR);
1005 textdomain (PACKAGE);
1007 atexit (close_stdout);
1009 /* Parse command line options. */
1011 infile = bad_cast ("-");
1012 outbase = bad_cast ("x");
1014 while (true)
1016 /* This is the argv-index of the option we will read next. */
1017 int this_optind = optind ? optind : 1;
1018 char *slash;
1020 c = getopt_long (argc, argv, "0123456789C:a:b:del:n:u",
1021 longopts, NULL);
1022 if (c == -1)
1023 break;
1025 switch (c)
1027 case 'a':
1029 unsigned long tmp;
1030 if (xstrtoul (optarg, NULL, 10, &tmp, "") != LONGINT_OK
1031 || SIZE_MAX / sizeof (size_t) < tmp)
1033 error (0, 0, _("%s: invalid suffix length"), optarg);
1034 usage (EXIT_FAILURE);
1036 suffix_length = tmp;
1038 break;
1040 case 'b':
1041 if (split_type != type_undef)
1042 FAIL_ONLY_ONE_WAY ();
1043 split_type = type_bytes;
1044 if (xstrtoumax (optarg, NULL, 10, &n_units, multipliers) != LONGINT_OK
1045 || n_units == 0)
1047 error (0, 0, _("%s: invalid number of bytes"), optarg);
1048 usage (EXIT_FAILURE);
1050 /* If input is a pipe, we could get more data than is possible
1051 to write to a single file, so indicate that immediately
1052 rather than having possibly future invocations fail. */
1053 if (OFF_T_MAX < n_units)
1054 error (EXIT_FAILURE, EFBIG,
1055 _("%s: invalid number of bytes"), optarg);
1057 break;
1059 case 'l':
1060 if (split_type != type_undef)
1061 FAIL_ONLY_ONE_WAY ();
1062 split_type = type_lines;
1063 if (xstrtoumax (optarg, NULL, 10, &n_units, "") != LONGINT_OK
1064 || n_units == 0)
1066 error (0, 0, _("%s: invalid number of lines"), optarg);
1067 usage (EXIT_FAILURE);
1069 break;
1071 case 'C':
1072 if (split_type != type_undef)
1073 FAIL_ONLY_ONE_WAY ();
1074 split_type = type_byteslines;
1075 if (xstrtoumax (optarg, NULL, 10, &n_units, multipliers) != LONGINT_OK
1076 || n_units == 0 || SIZE_MAX < n_units)
1078 error (0, 0, _("%s: invalid number of bytes"), optarg);
1079 usage (EXIT_FAILURE);
1081 if (OFF_T_MAX < n_units)
1082 error (EXIT_FAILURE, EFBIG,
1083 _("%s: invalid number of bytes"), optarg);
1084 break;
1086 case 'n':
1087 if (split_type != type_undef)
1088 FAIL_ONLY_ONE_WAY ();
1089 /* skip any whitespace */
1090 while (isspace (to_uchar (*optarg)))
1091 optarg++;
1092 if (STRNCMP_LIT (optarg, "r/") == 0)
1094 split_type = type_rr;
1095 optarg += 2;
1097 else if (STRNCMP_LIT (optarg, "l/") == 0)
1099 split_type = type_chunk_lines;
1100 optarg += 2;
1102 else
1103 split_type = type_chunk_bytes;
1104 if ((slash = strchr (optarg, '/')))
1105 parse_chunk (&k_units, &n_units, slash);
1106 else if (xstrtoumax (optarg, NULL, 10, &n_units, "") != LONGINT_OK
1107 || n_units == 0)
1108 error (EXIT_FAILURE, 0, _("%s: invalid number of chunks"), optarg);
1109 break;
1111 case 'u':
1112 unbuffered = true;
1113 break;
1115 case '0':
1116 case '1':
1117 case '2':
1118 case '3':
1119 case '4':
1120 case '5':
1121 case '6':
1122 case '7':
1123 case '8':
1124 case '9':
1125 if (split_type == type_undef)
1127 split_type = type_digits;
1128 n_units = 0;
1130 if (split_type != type_undef && split_type != type_digits)
1131 FAIL_ONLY_ONE_WAY ();
1132 if (digits_optind != 0 && digits_optind != this_optind)
1133 n_units = 0; /* More than one number given; ignore other. */
1134 digits_optind = this_optind;
1135 if (!DECIMAL_DIGIT_ACCUMULATE (n_units, c - '0', uintmax_t))
1137 char buffer[INT_BUFSIZE_BOUND (uintmax_t)];
1138 error (EXIT_FAILURE, 0,
1139 _("line count option -%s%c... is too large"),
1140 umaxtostr (n_units, buffer), c);
1142 break;
1144 case 'd':
1145 suffix_alphabet = "0123456789";
1146 break;
1148 case 'e':
1149 elide_empty_files = true;
1150 break;
1152 case FILTER_OPTION:
1153 filter_command = optarg;
1154 break;
1156 case IO_BLKSIZE_OPTION:
1158 uintmax_t tmp_blk_size;
1159 if (xstrtoumax (optarg, NULL, 10, &tmp_blk_size,
1160 multipliers) != LONGINT_OK
1161 || tmp_blk_size == 0 || SIZE_MAX - page_size < tmp_blk_size)
1162 error (0, 0, _("%s: invalid IO block size"), optarg);
1163 else
1164 in_blk_size = tmp_blk_size;
1166 break;
1168 case VERBOSE_OPTION:
1169 verbose = true;
1170 break;
1172 case_GETOPT_HELP_CHAR;
1174 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1176 default:
1177 usage (EXIT_FAILURE);
1181 if (k_units != 0 && filter_command)
1183 error (0, 0, _("--filter does not process a chunk extracted to stdout"));
1184 usage (EXIT_FAILURE);
1187 /* Handle default case. */
1188 if (split_type == type_undef)
1190 split_type = type_lines;
1191 n_units = 1000;
1194 if (n_units == 0)
1196 error (0, 0, _("%s: invalid number of lines"), "0");
1197 usage (EXIT_FAILURE);
1200 set_suffix_length (n_units, split_type);
1202 /* Get out the filename arguments. */
1204 if (optind < argc)
1205 infile = argv[optind++];
1207 if (optind < argc)
1208 outbase = argv[optind++];
1210 if (optind < argc)
1212 error (0, 0, _("extra operand %s"), quote (argv[optind]));
1213 usage (EXIT_FAILURE);
1216 /* Open the input file. */
1217 if (! STREQ (infile, "-")
1218 && fd_reopen (STDIN_FILENO, infile, O_RDONLY, 0) < 0)
1219 error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
1220 quote (infile));
1222 /* Binary I/O is safer when byte counts are used. */
1223 if (O_BINARY && ! isatty (STDIN_FILENO))
1224 xfreopen (NULL, "rb", stdin);
1226 /* Get the optimal block size of input device and make a buffer. */
1228 if (fstat (STDIN_FILENO, &stat_buf) != 0)
1229 error (EXIT_FAILURE, errno, "%s", infile);
1230 if (in_blk_size == 0)
1231 in_blk_size = io_blksize (stat_buf);
1232 file_size = stat_buf.st_size;
1234 if (split_type == type_chunk_bytes || split_type == type_chunk_lines)
1236 off_t input_offset = lseek (STDIN_FILENO, 0, SEEK_CUR);
1237 if (input_offset < 0)
1238 error (EXIT_FAILURE, 0, _("%s: cannot determine file size"),
1239 quote (infile));
1240 file_size -= input_offset;
1241 /* Overflow, and sanity checking. */
1242 if (OFF_T_MAX < n_units)
1244 char buffer[INT_BUFSIZE_BOUND (uintmax_t)];
1245 error (EXIT_FAILURE, EFBIG, _("%s: invalid number of chunks"),
1246 umaxtostr (n_units, buffer));
1248 /* increase file_size to n_units here, so that we still process
1249 any input data, and create empty files for the rest. */
1250 file_size = MAX (file_size, n_units);
1253 buf = ptr_align (xmalloc (in_blk_size + 1 + page_size - 1), page_size);
1255 /* When filtering, closure of one pipe must not terminate the process,
1256 as there may still be other streams expecting input from us. */
1257 if (filter_command)
1259 struct sigaction act;
1260 sigemptyset (&newblocked);
1261 sigaction (SIGPIPE, NULL, &act);
1262 if (act.sa_handler != SIG_IGN)
1263 sigaddset (&newblocked, SIGPIPE);
1264 sigprocmask (SIG_BLOCK, &newblocked, &oldblocked);
1267 switch (split_type)
1269 case type_digits:
1270 case type_lines:
1271 lines_split (n_units, buf, in_blk_size);
1272 break;
1274 case type_bytes:
1275 bytes_split (n_units, buf, in_blk_size, 0);
1276 break;
1278 case type_byteslines:
1279 line_bytes_split (n_units);
1280 break;
1282 case type_chunk_bytes:
1283 if (k_units == 0)
1284 bytes_split (file_size / n_units, buf, in_blk_size, n_units);
1285 else
1286 bytes_chunk_extract (k_units, n_units, buf, in_blk_size, file_size);
1287 break;
1289 case type_chunk_lines:
1290 lines_chunk_split (k_units, n_units, buf, in_blk_size, file_size);
1291 break;
1293 case type_rr:
1294 /* Note, this is like `sed -n ${k}~${n}p` when k > 0,
1295 but the functionality is provided for symmetry. */
1296 lines_rr (k_units, n_units, buf, in_blk_size);
1297 break;
1299 default:
1300 abort ();
1303 if (close (STDIN_FILENO) != 0)
1304 error (EXIT_FAILURE, errno, "%s", infile);
1305 closeout (NULL, output_desc, filter_pid, outfile);
1307 exit (EXIT_SUCCESS);