tests: more automated quote adjustment
[coreutils/ericb.git] / src / split.c
blobd0abd65fb0724b4b29add2d7802036014aea62a7
1 /* split.c -- split a file into pieces.
2 Copyright (C) 1988, 1991, 1995-2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* By tege@sics.se, with rms.
19 To do:
20 * Implement -t CHAR or -t REGEX to specify break characters other
21 than newline. */
23 #include <config.h>
25 #include <assert.h>
26 #include <stdio.h>
27 #include <getopt.h>
28 #include <signal.h>
29 #include <sys/types.h>
30 #include <sys/wait.h>
32 #include "system.h"
33 #include "error.h"
34 #include "fd-reopen.h"
35 #include "fcntl--.h"
36 #include "full-read.h"
37 #include "full-write.h"
38 #include "ioblksize.h"
39 #include "quote.h"
40 #include "safe-read.h"
41 #include "sig2str.h"
42 #include "xfreopen.h"
43 #include "xstrtol.h"
45 /* The official name of this program (e.g., no `g' prefix). */
46 #define PROGRAM_NAME "split"
48 #define AUTHORS \
49 proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
50 proper_name ("Richard M. Stallman")
52 /* Shell command to filter through, instead of creating files. */
53 static char const *filter_command;
55 /* Process ID of the filter. */
56 static int filter_pid;
58 /* Array of open pipes. */
59 static int *open_pipes;
60 static size_t open_pipes_alloc;
61 static size_t n_open_pipes;
63 /* Blocked signals. */
64 static sigset_t oldblocked;
65 static sigset_t newblocked;
67 /* Base name of output files. */
68 static char const *outbase;
70 /* Name of output files. */
71 static char *outfile;
73 /* Pointer to the end of the prefix in OUTFILE.
74 Suffixes are inserted here. */
75 static char *outfile_mid;
77 /* Length of OUTFILE's suffix. */
78 static size_t suffix_length;
80 /* Alphabet of characters to use in suffix. */
81 static char const *suffix_alphabet = "abcdefghijklmnopqrstuvwxyz";
83 /* Name of input file. May be "-". */
84 static char *infile;
86 /* Descriptor on which output file is open. */
87 static int output_desc = -1;
89 /* If true, print a diagnostic on standard error just before each
90 output file is opened. */
91 static bool verbose;
93 /* If true, don't generate zero length output files. */
94 static bool elide_empty_files;
96 /* If true, in round robin mode, immediately copy
97 input to output, which is much slower, so disabled by default. */
98 static bool unbuffered;
100 /* The split mode to use. */
101 enum Split_type
103 type_undef, type_bytes, type_byteslines, type_lines, type_digits,
104 type_chunk_bytes, type_chunk_lines, type_rr
107 /* For long options that have no equivalent short option, use a
108 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
109 enum
111 VERBOSE_OPTION = CHAR_MAX + 1,
112 FILTER_OPTION,
113 IO_BLKSIZE_OPTION
116 static struct option const longopts[] =
118 {"bytes", required_argument, NULL, 'b'},
119 {"lines", required_argument, NULL, 'l'},
120 {"line-bytes", required_argument, NULL, 'C'},
121 {"number", required_argument, NULL, 'n'},
122 {"elide-empty-files", no_argument, NULL, 'e'},
123 {"unbuffered", no_argument, NULL, 'u'},
124 {"suffix-length", required_argument, NULL, 'a'},
125 {"numeric-suffixes", no_argument, NULL, 'd'},
126 {"filter", required_argument, NULL, FILTER_OPTION},
127 {"verbose", no_argument, NULL, VERBOSE_OPTION},
128 {"-io-blksize", required_argument, NULL,
129 IO_BLKSIZE_OPTION}, /* do not document */
130 {GETOPT_HELP_OPTION_DECL},
131 {GETOPT_VERSION_OPTION_DECL},
132 {NULL, 0, NULL, 0}
135 /* Return true if the errno value, ERR, is ignorable. */
136 static inline bool
137 ignorable (int err)
139 return filter_command && err == EPIPE;
142 static void
143 set_suffix_length (uintmax_t n_units, enum Split_type split_type)
145 #define DEFAULT_SUFFIX_LENGTH 2
147 size_t suffix_needed = 0;
149 /* Auto-calculate the suffix length if the number of files is given. */
150 if (split_type == type_chunk_bytes || split_type == type_chunk_lines
151 || split_type == type_rr)
153 size_t alphabet_len = strlen (suffix_alphabet);
154 bool alphabet_slop = (n_units % alphabet_len) != 0;
155 while (n_units /= alphabet_len)
156 suffix_needed++;
157 suffix_needed += alphabet_slop;
160 if (suffix_length) /* set by user */
162 if (suffix_length < suffix_needed)
164 error (EXIT_FAILURE, 0,
165 _("the suffix length needs to be at least %zu"),
166 suffix_needed);
168 return;
170 else
171 suffix_length = MAX (DEFAULT_SUFFIX_LENGTH, suffix_needed);
174 void
175 usage (int status)
177 if (status != EXIT_SUCCESS)
178 emit_try_help ();
179 else
181 printf (_("\
182 Usage: %s [OPTION]... [INPUT [PREFIX]]\n\
184 program_name);
185 fputs (_("\
186 Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default\n\
187 size is 1000 lines, and default PREFIX is `x'. With no INPUT, or when INPUT\n\
188 is -, read standard input.\n\
190 "), stdout);
191 fputs (_("\
192 Mandatory arguments to long options are mandatory for short options too.\n\
193 "), stdout);
194 fprintf (stdout, _("\
195 -a, --suffix-length=N use suffixes of length N (default %d)\n\
196 -b, --bytes=SIZE put SIZE bytes per output file\n\
197 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
198 -d, --numeric-suffixes use numeric suffixes instead of alphabetic\n\
199 -e, --elide-empty-files do not generate empty output files with `-n'\n\
200 --filter=COMMAND write to shell COMMAND; file name is $FILE\n\
201 -l, --lines=NUMBER put NUMBER lines per output file\n\
202 -n, --number=CHUNKS generate CHUNKS output files. See below\n\
203 -u, --unbuffered immediately copy input to output with `-n r/...'\n\
204 "), DEFAULT_SUFFIX_LENGTH);
205 fputs (_("\
206 --verbose print a diagnostic just before each\n\
207 output file is opened\n\
208 "), stdout);
209 fputs (HELP_OPTION_DESCRIPTION, stdout);
210 fputs (VERSION_OPTION_DESCRIPTION, stdout);
211 emit_size_note ();
212 fputs (_("\n\
213 CHUNKS may be:\n\
214 N split into N files based on size of input\n\
215 K/N output Kth of N to stdout\n\
216 l/N split into N files without splitting lines\n\
217 l/K/N output Kth of N to stdout without splitting lines\n\
218 r/N like `l' but use round robin distribution\n\
219 r/K/N likewise but only output Kth of N to stdout\n\
220 "), stdout);
221 emit_ancillary_info ();
223 exit (status);
226 /* Compute the next sequential output file name and store it into the
227 string `outfile'. */
229 static void
230 next_file_name (void)
232 /* Index in suffix_alphabet of each character in the suffix. */
233 static size_t *sufindex;
235 if (! outfile)
237 /* Allocate and initialize the first file name. */
239 size_t outbase_length = strlen (outbase);
240 size_t outfile_length = outbase_length + suffix_length;
241 if (outfile_length + 1 < outbase_length)
242 xalloc_die ();
243 outfile = xmalloc (outfile_length + 1);
244 outfile_mid = outfile + outbase_length;
245 memcpy (outfile, outbase, outbase_length);
246 memset (outfile_mid, suffix_alphabet[0], suffix_length);
247 outfile[outfile_length] = 0;
248 sufindex = xcalloc (suffix_length, sizeof *sufindex);
250 #if ! _POSIX_NO_TRUNC && HAVE_PATHCONF && defined _PC_NAME_MAX
251 /* POSIX requires that if the output file name is too long for
252 its directory, `split' must fail without creating any files.
253 This must be checked for explicitly on operating systems that
254 silently truncate file names. */
256 char *dir = dir_name (outfile);
257 long name_max = pathconf (dir, _PC_NAME_MAX);
258 if (0 <= name_max && name_max < base_len (last_component (outfile)))
259 error (EXIT_FAILURE, ENAMETOOLONG, "%s", outfile);
260 free (dir);
262 #endif
264 else
266 /* Increment the suffix in place, if possible. */
268 size_t i = suffix_length;
269 while (i-- != 0)
271 sufindex[i]++;
272 outfile_mid[i] = suffix_alphabet[sufindex[i]];
273 if (outfile_mid[i])
274 return;
275 sufindex[i] = 0;
276 outfile_mid[i] = suffix_alphabet[sufindex[i]];
278 error (EXIT_FAILURE, 0, _("output file suffixes exhausted"));
282 /* Create or truncate a file. */
284 static int
285 create (const char *name)
287 if (!filter_command)
289 if (verbose)
290 fprintf (stdout, _("creating file %s\n"), quote (name));
291 return open (name, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
292 (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH));
294 else
296 int fd_pair[2];
297 pid_t child_pid;
298 char const *shell_prog = getenv ("SHELL");
299 if (shell_prog == NULL)
300 shell_prog = "/bin/sh";
301 if (setenv ("FILE", name, 1) != 0)
302 error (EXIT_FAILURE, errno,
303 _("failed to set FILE environment variable"));
304 if (verbose)
305 fprintf (stdout, _("executing with FILE=%s\n"), quote (name));
306 if (pipe (fd_pair) != 0)
307 error (EXIT_FAILURE, errno, _("failed to create pipe"));
308 child_pid = fork ();
309 if (child_pid == 0)
311 /* This is the child process. If an error occurs here, the
312 parent will eventually learn about it after doing a wait,
313 at which time it will emit its own error message. */
314 int j;
315 /* We have to close any pipes that were opened during an
316 earlier call, otherwise this process will be holding a
317 write-pipe that will prevent the earlier process from
318 reading an EOF on the corresponding read-pipe. */
319 for (j = 0; j < n_open_pipes; ++j)
320 if (close (open_pipes[j]) != 0)
321 error (EXIT_FAILURE, errno, _("closing prior pipe"));
322 if (close (fd_pair[1]))
323 error (EXIT_FAILURE, errno, _("closing output pipe"));
324 if (fd_pair[0] != STDIN_FILENO)
326 if (dup2 (fd_pair[0], STDIN_FILENO) != STDIN_FILENO)
327 error (EXIT_FAILURE, errno, _("moving input pipe"));
328 if (close (fd_pair[0]) != 0)
329 error (EXIT_FAILURE, errno, _("closing input pipe"));
331 sigprocmask (SIG_SETMASK, &oldblocked, NULL);
332 execl (shell_prog, last_component (shell_prog), "-c",
333 filter_command, (char *) NULL);
334 error (EXIT_FAILURE, errno, _("failed to run command: \"%s -c %s\""),
335 shell_prog, filter_command);
337 if (child_pid == -1)
338 error (EXIT_FAILURE, errno, _("fork system call failed"));
339 if (close (fd_pair[0]) != 0)
340 error (EXIT_FAILURE, errno, _("failed to close input pipe"));
341 filter_pid = child_pid;
342 if (n_open_pipes == open_pipes_alloc)
343 open_pipes = x2nrealloc (open_pipes, &open_pipes_alloc,
344 sizeof *open_pipes);
345 open_pipes[n_open_pipes++] = fd_pair[1];
346 return fd_pair[1];
350 /* Close the output file, and do any associated cleanup.
351 If FP and FD are both specified, they refer to the same open file;
352 in this case FP is closed, but FD is still used in cleanup. */
353 static void
354 closeout (FILE *fp, int fd, pid_t pid, char const *name)
356 if (fp != NULL && fclose (fp) != 0 && ! ignorable (errno))
357 error (EXIT_FAILURE, errno, "%s", name);
358 if (fd >= 0)
360 if (fp == NULL && close (fd) < 0)
361 error (EXIT_FAILURE, errno, "%s", name);
362 int j;
363 for (j = 0; j < n_open_pipes; ++j)
365 if (open_pipes[j] == fd)
367 open_pipes[j] = open_pipes[--n_open_pipes];
368 break;
372 if (pid > 0)
374 int wstatus = 0;
375 if (waitpid (pid, &wstatus, 0) == -1 && errno != ECHILD)
376 error (EXIT_FAILURE, errno, _("waiting for child process"));
377 if (WIFSIGNALED (wstatus))
379 int sig = WTERMSIG (wstatus);
380 if (sig != SIGPIPE)
382 char signame[MAX (SIG2STR_MAX, INT_BUFSIZE_BOUND (int))];
383 if (sig2str (sig, signame) != 0)
384 sprintf (signame, "%d", sig);
385 error (sig + 128, 0,
386 _("with FILE=%s, signal %s from command: %s"),
387 name, signame, filter_command);
390 else if (WIFEXITED (wstatus))
392 int ex = WEXITSTATUS (wstatus);
393 if (ex != 0)
394 error (ex, 0, _("with FILE=%s, exit %d from command: %s"),
395 name, ex, filter_command);
397 else
399 /* shouldn't happen. */
400 error (EXIT_FAILURE, 0,
401 _("unknown status from command (0x%X)"), wstatus);
406 /* Write BYTES bytes at BP to an output file.
407 If NEW_FILE_FLAG is true, open the next output file.
408 Otherwise add to the same output file already in use. */
410 static void
411 cwrite (bool new_file_flag, const char *bp, size_t bytes)
413 if (new_file_flag)
415 if (!bp && bytes == 0 && elide_empty_files)
416 return;
417 closeout (NULL, output_desc, filter_pid, outfile);
418 next_file_name ();
419 if ((output_desc = create (outfile)) < 0)
420 error (EXIT_FAILURE, errno, "%s", outfile);
422 if (full_write (output_desc, bp, bytes) != bytes && ! ignorable (errno))
423 error (EXIT_FAILURE, errno, "%s", outfile);
426 /* Split into pieces of exactly N_BYTES bytes.
427 Use buffer BUF, whose size is BUFSIZE. */
429 static void
430 bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, uintmax_t max_files)
432 size_t n_read;
433 bool new_file_flag = true;
434 size_t to_read;
435 uintmax_t to_write = n_bytes;
436 char *bp_out;
437 uintmax_t opened = 0;
441 n_read = full_read (STDIN_FILENO, buf, bufsize);
442 if (n_read < bufsize && errno)
443 error (EXIT_FAILURE, errno, "%s", infile);
444 bp_out = buf;
445 to_read = n_read;
446 while (true)
448 if (to_read < to_write)
450 if (to_read) /* do not write 0 bytes! */
452 cwrite (new_file_flag, bp_out, to_read);
453 opened += new_file_flag;
454 to_write -= to_read;
455 new_file_flag = false;
457 break;
459 else
461 size_t w = to_write;
462 cwrite (new_file_flag, bp_out, w);
463 opened += new_file_flag;
464 new_file_flag = !max_files || (opened < max_files);
465 if (!new_file_flag && ignorable (errno))
467 /* If filter no longer accepting input, stop reading. */
468 n_read = 0;
469 break;
471 bp_out += w;
472 to_read -= w;
473 to_write = n_bytes;
477 while (n_read == bufsize);
479 /* Ensure NUMBER files are created, which truncates
480 any existing files or notifies any consumers on fifos.
481 FIXME: Should we do this before EXIT_FAILURE? */
482 while (opened++ < max_files)
483 cwrite (true, NULL, 0);
486 /* Split into pieces of exactly N_LINES lines.
487 Use buffer BUF, whose size is BUFSIZE. */
489 static void
490 lines_split (uintmax_t n_lines, char *buf, size_t bufsize)
492 size_t n_read;
493 char *bp, *bp_out, *eob;
494 bool new_file_flag = true;
495 uintmax_t n = 0;
499 n_read = full_read (STDIN_FILENO, buf, bufsize);
500 if (n_read < bufsize && errno)
501 error (EXIT_FAILURE, errno, "%s", infile);
502 bp = bp_out = buf;
503 eob = bp + n_read;
504 *eob = '\n';
505 while (true)
507 bp = memchr (bp, '\n', eob - bp + 1);
508 if (bp == eob)
510 if (eob != bp_out) /* do not write 0 bytes! */
512 size_t len = eob - bp_out;
513 cwrite (new_file_flag, bp_out, len);
514 new_file_flag = false;
516 break;
519 ++bp;
520 if (++n >= n_lines)
522 cwrite (new_file_flag, bp_out, bp - bp_out);
523 bp_out = bp;
524 new_file_flag = true;
525 n = 0;
529 while (n_read == bufsize);
532 /* Split into pieces that are as large as possible while still not more
533 than N_BYTES bytes, and are split on line boundaries except
534 where lines longer than N_BYTES bytes occur.
535 FIXME: Allow N_BYTES to be any uintmax_t value, and don't require a
536 buffer of size N_BYTES, in case N_BYTES is very large. */
538 static void
539 line_bytes_split (size_t n_bytes)
541 char *bp;
542 bool eof = false;
543 size_t n_buffered = 0;
544 char *buf = xmalloc (n_bytes);
548 /* Fill up the full buffer size from the input file. */
550 size_t to_read = n_bytes - n_buffered;
551 size_t n_read = full_read (STDIN_FILENO, buf + n_buffered, to_read);
552 if (n_read < to_read && errno)
553 error (EXIT_FAILURE, errno, "%s", infile);
555 n_buffered += n_read;
556 if (n_buffered != n_bytes)
558 if (n_buffered == 0)
559 break;
560 eof = true;
563 /* Find where to end this chunk. */
564 bp = buf + n_buffered;
565 if (n_buffered == n_bytes)
567 while (bp > buf && bp[-1] != '\n')
568 bp--;
571 /* If chunk has no newlines, use all the chunk. */
572 if (bp == buf)
573 bp = buf + n_buffered;
575 /* Output the chars as one output file. */
576 cwrite (true, buf, bp - buf);
578 /* Discard the chars we just output; move rest of chunk
579 down to be the start of the next chunk. Source and
580 destination probably overlap. */
581 n_buffered -= bp - buf;
582 if (n_buffered > 0)
583 memmove (buf, bp, n_buffered);
585 while (!eof);
586 free (buf);
589 /* -n l/[K/]N: Write lines to files of approximately file size / N.
590 The file is partitioned into file size / N sized portions, with the
591 last assigned any excess. If a line _starts_ within a partition
592 it is written completely to the corresponding file. Since lines
593 are not split even if they overlap a partition, the files written
594 can be larger or smaller than the partition size, and even empty
595 if a line is so long as to completely overlap the partition. */
597 static void
598 lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
599 off_t file_size)
601 assert (n && k <= n && n <= file_size);
603 const off_t chunk_size = file_size / n;
604 uintmax_t chunk_no = 1;
605 off_t chunk_end = chunk_size - 1;
606 off_t n_written = 0;
607 bool new_file_flag = true;
608 bool chunk_truncated = false;
610 if (k > 1)
612 /* Start reading 1 byte before kth chunk of file. */
613 off_t start = (k - 1) * chunk_size - 1;
614 if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0)
615 error (EXIT_FAILURE, errno, "%s", infile);
616 n_written = start;
617 chunk_no = k - 1;
618 chunk_end = chunk_no * chunk_size - 1;
621 while (n_written < file_size)
623 char *bp = buf, *eob;
624 size_t n_read = full_read (STDIN_FILENO, buf, bufsize);
625 if (n_read < bufsize && errno)
626 error (EXIT_FAILURE, errno, "%s", infile);
627 else if (n_read == 0)
628 break; /* eof. */
629 n_read = MIN (n_read, file_size - n_written);
630 chunk_truncated = false;
631 eob = buf + n_read;
633 while (bp != eob)
635 size_t to_write;
636 bool next = false;
638 /* Begin looking for '\n' at last byte of chunk. */
639 off_t skip = MIN (n_read, MAX (0, chunk_end - n_written));
640 char *bp_out = memchr (bp + skip, '\n', n_read - skip);
641 if (bp_out++)
642 next = true;
643 else
644 bp_out = eob;
645 to_write = bp_out - bp;
647 if (k == chunk_no)
649 /* We don't use the stdout buffer here since we're writing
650 large chunks from an existing file, so it's more efficient
651 to write out directly. */
652 if (full_write (STDOUT_FILENO, bp, to_write) != to_write)
653 error (EXIT_FAILURE, errno, "%s", _("write error"));
655 else if (! k)
656 cwrite (new_file_flag, bp, to_write);
657 n_written += to_write;
658 bp += to_write;
659 n_read -= to_write;
660 new_file_flag = next;
662 /* A line could have been so long that it skipped
663 entire chunks. So create empty files in that case. */
664 while (next || chunk_end <= n_written - 1)
666 if (!next && bp == eob)
668 /* replenish buf, before going to next chunk. */
669 chunk_truncated = true;
670 break;
672 chunk_no++;
673 if (k && chunk_no > k)
674 return;
675 if (chunk_no == n)
676 chunk_end = file_size - 1; /* >= chunk_size. */
677 else
678 chunk_end += chunk_size;
679 if (chunk_end <= n_written - 1)
681 if (! k)
682 cwrite (true, NULL, 0);
684 else
685 next = false;
690 if (chunk_truncated)
691 chunk_no++;
693 /* Ensure NUMBER files are created, which truncates
694 any existing files or notifies any consumers on fifos.
695 FIXME: Should we do this before EXIT_FAILURE? */
696 while (!k && chunk_no++ <= n)
697 cwrite (true, NULL, 0);
700 /* -n K/N: Extract Kth of N chunks. */
702 static void
703 bytes_chunk_extract (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
704 off_t file_size)
706 off_t start;
707 off_t end;
709 assert (k && n && k <= n && n <= file_size);
711 start = (k - 1) * (file_size / n);
712 end = (k == n) ? file_size : k * (file_size / n);
714 if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0)
715 error (EXIT_FAILURE, errno, "%s", infile);
717 while (start < end)
719 size_t n_read = full_read (STDIN_FILENO, buf, bufsize);
720 if (n_read < bufsize && errno)
721 error (EXIT_FAILURE, errno, "%s", infile);
722 else if (n_read == 0)
723 break; /* eof. */
724 n_read = MIN (n_read, end - start);
725 if (full_write (STDOUT_FILENO, buf, n_read) != n_read
726 && ! ignorable (errno))
727 error (EXIT_FAILURE, errno, "%s", quote ("-"));
728 start += n_read;
732 typedef struct of_info
734 char *of_name;
735 int ofd;
736 FILE *ofile;
737 int opid;
738 } of_t;
740 enum
742 OFD_NEW = -1,
743 OFD_APPEND = -2
746 /* Rotate file descriptors when we're writing to more output files than we
747 have available file descriptors.
748 Return whether we came under file resource pressure.
749 If so, it's probably best to close each file when finished with it. */
751 static bool
752 ofile_open (of_t *files, size_t i_check, size_t nfiles)
754 bool file_limit = false;
756 if (files[i_check].ofd <= OFD_NEW)
758 int fd;
759 size_t i_reopen = i_check ? i_check - 1 : nfiles - 1;
761 /* Another process could have opened a file in between the calls to
762 close and open, so we should keep trying until open succeeds or
763 we've closed all of our files. */
764 while (true)
766 if (files[i_check].ofd == OFD_NEW)
767 fd = create (files[i_check].of_name);
768 else /* OFD_APPEND */
770 /* Attempt to append to previously opened file.
771 We use O_NONBLOCK to support writing to fifos,
772 where the other end has closed because of our
773 previous close. In that case we'll immediately
774 get an error, rather than waiting indefinitely.
775 In specialised cases the consumer can keep reading
776 from the fifo, terminating on conditions in the data
777 itself, or perhaps never in the case of `tail -f`.
778 I.E. for fifos it is valid to attempt this reopen.
780 We don't handle the filter_command case here, as create()
781 will exit if there are not enough files in that case.
782 I.E. we don't support restarting filters, as that would
783 put too much burden on users specifying --filter commands. */
784 fd = open (files[i_check].of_name,
785 O_WRONLY | O_BINARY | O_APPEND | O_NONBLOCK);
788 if (-1 < fd)
789 break;
791 if (!(errno == EMFILE || errno == ENFILE))
792 error (EXIT_FAILURE, errno, "%s", files[i_check].of_name);
794 file_limit = true;
796 /* Search backwards for an open file to close. */
797 while (files[i_reopen].ofd < 0)
799 i_reopen = i_reopen ? i_reopen - 1 : nfiles - 1;
800 /* No more open files to close, exit with E[NM]FILE. */
801 if (i_reopen == i_check)
802 error (EXIT_FAILURE, errno, "%s", files[i_check].of_name);
805 if (fclose (files[i_reopen].ofile) != 0)
806 error (EXIT_FAILURE, errno, "%s", files[i_reopen].of_name);
807 files[i_reopen].ofile = NULL;
808 files[i_reopen].ofd = OFD_APPEND;
811 files[i_check].ofd = fd;
812 if (!(files[i_check].ofile = fdopen (fd, "a")))
813 error (EXIT_FAILURE, errno, "%s", files[i_check].of_name);
814 files[i_check].opid = filter_pid;
815 filter_pid = 0;
818 return file_limit;
821 /* -n r/[K/]N: Divide file into N chunks in round robin fashion.
822 When K == 0, we try to keep the files open in parallel.
823 If we run out of file resources, then we revert
824 to opening and closing each file for each line. */
826 static void
827 lines_rr (uintmax_t k, uintmax_t n, char *buf, size_t bufsize)
829 bool wrapped = false;
830 bool wrote = false;
831 bool file_limit;
832 size_t i_file;
833 of_t *files IF_LINT (= NULL);
834 uintmax_t line_no;
836 if (k)
837 line_no = 1;
838 else
840 if (SIZE_MAX < n)
841 error (exit_failure, 0, "%s", _("memory exhausted"));
842 files = xnmalloc (n, sizeof *files);
844 /* Generate output file names. */
845 for (i_file = 0; i_file < n; i_file++)
847 next_file_name ();
848 files[i_file].of_name = xstrdup (outfile);
849 files[i_file].ofd = OFD_NEW;
850 files[i_file].ofile = NULL;
851 files[i_file].opid = 0;
853 i_file = 0;
854 file_limit = false;
857 while (true)
859 char *bp = buf, *eob;
860 /* Use safe_read() rather than full_read() here
861 so that we process available data immediately. */
862 size_t n_read = safe_read (STDIN_FILENO, buf, bufsize);
863 if (n_read == SAFE_READ_ERROR)
864 error (EXIT_FAILURE, errno, "%s", infile);
865 else if (n_read == 0)
866 break; /* eof. */
867 eob = buf + n_read;
869 while (bp != eob)
871 size_t to_write;
872 bool next = false;
874 /* Find end of line. */
875 char *bp_out = memchr (bp, '\n', eob - bp);
876 if (bp_out)
878 bp_out++;
879 next = true;
881 else
882 bp_out = eob;
883 to_write = bp_out - bp;
885 if (k)
887 if (line_no == k && unbuffered)
889 if (full_write (STDOUT_FILENO, bp, to_write) != to_write)
890 error (EXIT_FAILURE, errno, "%s", _("write error"));
892 else if (line_no == k && fwrite (bp, to_write, 1, stdout) != 1)
894 clearerr (stdout); /* To silence close_stdout(). */
895 error (EXIT_FAILURE, errno, "%s", _("write error"));
897 if (next)
898 line_no = (line_no == n) ? 1 : line_no + 1;
900 else
902 /* Secure file descriptor. */
903 file_limit |= ofile_open (files, i_file, n);
904 if (unbuffered)
906 /* Note writing to fd, rather than flushing the FILE gives
907 an 8% performance benefit, due to reduced data copying. */
908 if (full_write (files[i_file].ofd, bp, to_write) != to_write
909 && ! ignorable (errno))
910 error (EXIT_FAILURE, errno, "%s", files[i_file].of_name);
912 else if (fwrite (bp, to_write, 1, files[i_file].ofile) != 1
913 && ! ignorable (errno))
914 error (EXIT_FAILURE, errno, "%s", files[i_file].of_name);
915 if (! ignorable (errno))
916 wrote = true;
918 if (file_limit)
920 if (fclose (files[i_file].ofile) != 0)
921 error (EXIT_FAILURE, errno, "%s", files[i_file].of_name);
922 files[i_file].ofile = NULL;
923 files[i_file].ofd = OFD_APPEND;
925 if (next && ++i_file == n)
927 wrapped = true;
928 /* If no filters are accepting input, stop reading. */
929 if (! wrote)
930 goto no_filters;
931 wrote = false;
932 i_file = 0;
936 bp = bp_out;
940 no_filters:
941 /* Ensure all files created, so that any existing files are truncated,
942 and to signal any waiting fifo consumers.
943 Also, close any open file descriptors.
944 FIXME: Should we do this before EXIT_FAILURE? */
945 if (!k)
947 int ceiling = (wrapped ? n : i_file);
948 for (i_file = 0; i_file < n; i_file++)
950 if (i_file >= ceiling && !elide_empty_files)
951 file_limit |= ofile_open (files, i_file, n);
952 if (files[i_file].ofd >= 0)
953 closeout (files[i_file].ofile, files[i_file].ofd,
954 files[i_file].opid, files[i_file].of_name);
955 files[i_file].ofd = OFD_APPEND;
960 #define FAIL_ONLY_ONE_WAY() \
961 do \
963 error (0, 0, _("cannot split in more than one way")); \
964 usage (EXIT_FAILURE); \
966 while (0)
968 /* Parse K/N syntax of chunk options. */
970 static void
971 parse_chunk (uintmax_t *k_units, uintmax_t *n_units, char *slash)
973 *slash = '\0';
974 if (xstrtoumax (slash + 1, NULL, 10, n_units, "") != LONGINT_OK
975 || *n_units == 0)
976 error (EXIT_FAILURE, 0, _("%s: invalid number of chunks"), slash + 1);
977 if (slash != optarg /* a leading number is specified. */
978 && (xstrtoumax (optarg, NULL, 10, k_units, "") != LONGINT_OK
979 || *k_units == 0 || *n_units < *k_units))
980 error (EXIT_FAILURE, 0, _("%s: invalid chunk number"), optarg);
985 main (int argc, char **argv)
987 struct stat stat_buf;
988 enum Split_type split_type = type_undef;
989 size_t in_blk_size = 0; /* optimal block size of input file device */
990 char *buf; /* file i/o buffer */
991 size_t page_size = getpagesize ();
992 uintmax_t k_units = 0;
993 uintmax_t n_units;
995 static char const multipliers[] = "bEGKkMmPTYZ0";
996 int c;
997 int digits_optind = 0;
998 off_t file_size;
1000 initialize_main (&argc, &argv);
1001 set_program_name (argv[0]);
1002 setlocale (LC_ALL, "");
1003 bindtextdomain (PACKAGE, LOCALEDIR);
1004 textdomain (PACKAGE);
1006 atexit (close_stdout);
1008 /* Parse command line options. */
1010 infile = bad_cast ("-");
1011 outbase = bad_cast ("x");
1013 while (true)
1015 /* This is the argv-index of the option we will read next. */
1016 int this_optind = optind ? optind : 1;
1017 char *slash;
1019 c = getopt_long (argc, argv, "0123456789C:a:b:del:n:u",
1020 longopts, NULL);
1021 if (c == -1)
1022 break;
1024 switch (c)
1026 case 'a':
1028 unsigned long tmp;
1029 if (xstrtoul (optarg, NULL, 10, &tmp, "") != LONGINT_OK
1030 || SIZE_MAX / sizeof (size_t) < tmp)
1032 error (0, 0, _("%s: invalid suffix length"), optarg);
1033 usage (EXIT_FAILURE);
1035 suffix_length = tmp;
1037 break;
1039 case 'b':
1040 if (split_type != type_undef)
1041 FAIL_ONLY_ONE_WAY ();
1042 split_type = type_bytes;
1043 if (xstrtoumax (optarg, NULL, 10, &n_units, multipliers) != LONGINT_OK
1044 || n_units == 0)
1046 error (0, 0, _("%s: invalid number of bytes"), optarg);
1047 usage (EXIT_FAILURE);
1049 /* If input is a pipe, we could get more data than is possible
1050 to write to a single file, so indicate that immediately
1051 rather than having possibly future invocations fail. */
1052 if (OFF_T_MAX < n_units)
1053 error (EXIT_FAILURE, EFBIG,
1054 _("%s: invalid number of bytes"), optarg);
1056 break;
1058 case 'l':
1059 if (split_type != type_undef)
1060 FAIL_ONLY_ONE_WAY ();
1061 split_type = type_lines;
1062 if (xstrtoumax (optarg, NULL, 10, &n_units, "") != LONGINT_OK
1063 || n_units == 0)
1065 error (0, 0, _("%s: invalid number of lines"), optarg);
1066 usage (EXIT_FAILURE);
1068 break;
1070 case 'C':
1071 if (split_type != type_undef)
1072 FAIL_ONLY_ONE_WAY ();
1073 split_type = type_byteslines;
1074 if (xstrtoumax (optarg, NULL, 10, &n_units, multipliers) != LONGINT_OK
1075 || n_units == 0 || SIZE_MAX < n_units)
1077 error (0, 0, _("%s: invalid number of bytes"), optarg);
1078 usage (EXIT_FAILURE);
1080 if (OFF_T_MAX < n_units)
1081 error (EXIT_FAILURE, EFBIG,
1082 _("%s: invalid number of bytes"), optarg);
1083 break;
1085 case 'n':
1086 if (split_type != type_undef)
1087 FAIL_ONLY_ONE_WAY ();
1088 /* skip any whitespace */
1089 while (isspace (to_uchar (*optarg)))
1090 optarg++;
1091 if (STRNCMP_LIT (optarg, "r/") == 0)
1093 split_type = type_rr;
1094 optarg += 2;
1096 else if (STRNCMP_LIT (optarg, "l/") == 0)
1098 split_type = type_chunk_lines;
1099 optarg += 2;
1101 else
1102 split_type = type_chunk_bytes;
1103 if ((slash = strchr (optarg, '/')))
1104 parse_chunk (&k_units, &n_units, slash);
1105 else if (xstrtoumax (optarg, NULL, 10, &n_units, "") != LONGINT_OK
1106 || n_units == 0)
1107 error (EXIT_FAILURE, 0, _("%s: invalid number of chunks"), optarg);
1108 break;
1110 case 'u':
1111 unbuffered = true;
1112 break;
1114 case '0':
1115 case '1':
1116 case '2':
1117 case '3':
1118 case '4':
1119 case '5':
1120 case '6':
1121 case '7':
1122 case '8':
1123 case '9':
1124 if (split_type == type_undef)
1126 split_type = type_digits;
1127 n_units = 0;
1129 if (split_type != type_undef && split_type != type_digits)
1130 FAIL_ONLY_ONE_WAY ();
1131 if (digits_optind != 0 && digits_optind != this_optind)
1132 n_units = 0; /* More than one number given; ignore other. */
1133 digits_optind = this_optind;
1134 if (!DECIMAL_DIGIT_ACCUMULATE (n_units, c - '0', uintmax_t))
1136 char buffer[INT_BUFSIZE_BOUND (uintmax_t)];
1137 error (EXIT_FAILURE, 0,
1138 _("line count option -%s%c... is too large"),
1139 umaxtostr (n_units, buffer), c);
1141 break;
1143 case 'd':
1144 suffix_alphabet = "0123456789";
1145 break;
1147 case 'e':
1148 elide_empty_files = true;
1149 break;
1151 case FILTER_OPTION:
1152 filter_command = optarg;
1153 break;
1155 case IO_BLKSIZE_OPTION:
1157 uintmax_t tmp_blk_size;
1158 if (xstrtoumax (optarg, NULL, 10, &tmp_blk_size,
1159 multipliers) != LONGINT_OK
1160 || tmp_blk_size == 0 || SIZE_MAX - page_size < tmp_blk_size)
1161 error (0, 0, _("%s: invalid IO block size"), optarg);
1162 else
1163 in_blk_size = tmp_blk_size;
1165 break;
1167 case VERBOSE_OPTION:
1168 verbose = true;
1169 break;
1171 case_GETOPT_HELP_CHAR;
1173 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1175 default:
1176 usage (EXIT_FAILURE);
1180 if (k_units != 0 && filter_command)
1182 error (0, 0, _("--filter does not process a chunk extracted to stdout"));
1183 usage (EXIT_FAILURE);
1186 /* Handle default case. */
1187 if (split_type == type_undef)
1189 split_type = type_lines;
1190 n_units = 1000;
1193 if (n_units == 0)
1195 error (0, 0, _("%s: invalid number of lines"), "0");
1196 usage (EXIT_FAILURE);
1199 set_suffix_length (n_units, split_type);
1201 /* Get out the filename arguments. */
1203 if (optind < argc)
1204 infile = argv[optind++];
1206 if (optind < argc)
1207 outbase = argv[optind++];
1209 if (optind < argc)
1211 error (0, 0, _("extra operand %s"), quote (argv[optind]));
1212 usage (EXIT_FAILURE);
1215 /* Open the input file. */
1216 if (! STREQ (infile, "-")
1217 && fd_reopen (STDIN_FILENO, infile, O_RDONLY, 0) < 0)
1218 error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
1219 quote (infile));
1221 /* Binary I/O is safer when byte counts are used. */
1222 if (O_BINARY && ! isatty (STDIN_FILENO))
1223 xfreopen (NULL, "rb", stdin);
1225 /* Get the optimal block size of input device and make a buffer. */
1227 if (fstat (STDIN_FILENO, &stat_buf) != 0)
1228 error (EXIT_FAILURE, errno, "%s", infile);
1229 if (in_blk_size == 0)
1230 in_blk_size = io_blksize (stat_buf);
1231 file_size = stat_buf.st_size;
1233 if (split_type == type_chunk_bytes || split_type == type_chunk_lines)
1235 off_t input_offset = lseek (STDIN_FILENO, 0, SEEK_CUR);
1236 if (input_offset < 0)
1237 error (EXIT_FAILURE, 0, _("%s: cannot determine file size"),
1238 quote (infile));
1239 file_size -= input_offset;
1240 /* Overflow, and sanity checking. */
1241 if (OFF_T_MAX < n_units)
1243 char buffer[INT_BUFSIZE_BOUND (uintmax_t)];
1244 error (EXIT_FAILURE, EFBIG, _("%s: invalid number of chunks"),
1245 umaxtostr (n_units, buffer));
1247 /* increase file_size to n_units here, so that we still process
1248 any input data, and create empty files for the rest. */
1249 file_size = MAX (file_size, n_units);
1252 buf = ptr_align (xmalloc (in_blk_size + 1 + page_size - 1), page_size);
1254 /* When filtering, closure of one pipe must not terminate the process,
1255 as there may still be other streams expecting input from us. */
1256 if (filter_command)
1258 struct sigaction act;
1259 sigemptyset (&newblocked);
1260 sigaction (SIGPIPE, NULL, &act);
1261 if (act.sa_handler != SIG_IGN)
1262 sigaddset (&newblocked, SIGPIPE);
1263 sigprocmask (SIG_BLOCK, &newblocked, &oldblocked);
1266 switch (split_type)
1268 case type_digits:
1269 case type_lines:
1270 lines_split (n_units, buf, in_blk_size);
1271 break;
1273 case type_bytes:
1274 bytes_split (n_units, buf, in_blk_size, 0);
1275 break;
1277 case type_byteslines:
1278 line_bytes_split (n_units);
1279 break;
1281 case type_chunk_bytes:
1282 if (k_units == 0)
1283 bytes_split (file_size / n_units, buf, in_blk_size, n_units);
1284 else
1285 bytes_chunk_extract (k_units, n_units, buf, in_blk_size, file_size);
1286 break;
1288 case type_chunk_lines:
1289 lines_chunk_split (k_units, n_units, buf, in_blk_size, file_size);
1290 break;
1292 case type_rr:
1293 /* Note, this is like `sed -n ${k}~${n}p` when k > 0,
1294 but the functionality is provided for symmetry. */
1295 lines_rr (k_units, n_units, buf, in_blk_size);
1296 break;
1298 default:
1299 abort ();
1302 if (close (STDIN_FILENO) != 0)
1303 error (EXIT_FAILURE, errno, "%s", infile);
1304 closeout (NULL, output_desc, filter_pid, outfile);
1306 exit (EXIT_SUCCESS);