split: fix a case where --elide-empty causes invalid chunking
[coreutils/ericb.git] / src / split.c
blobae98bc76a6a06b3e306ff24d9dffce80fb6cda43
1 /* split.c -- split a file into pieces.
2 Copyright (C) 1988, 1991, 1995-2010 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* By tege@sics.se, with rms.
19 To do:
20 * Implement -t CHAR or -t REGEX to specify break characters other
21 than newline. */
23 #include <config.h>
25 #include <assert.h>
26 #include <stdio.h>
27 #include <getopt.h>
28 #include <sys/types.h>
30 #include "system.h"
31 #include "error.h"
32 #include "fd-reopen.h"
33 #include "fcntl--.h"
34 #include "full-read.h"
35 #include "full-write.h"
36 #include "quote.h"
37 #include "safe-read.h"
38 #include "xfreopen.h"
39 #include "xstrtol.h"
41 /* The official name of this program (e.g., no `g' prefix). */
42 #define PROGRAM_NAME "split"
44 #define AUTHORS \
45 proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
46 proper_name ("Richard M. Stallman")
48 /* Base name of output files. */
49 static char const *outbase;
51 /* Name of output files. */
52 static char *outfile;
54 /* Pointer to the end of the prefix in OUTFILE.
55 Suffixes are inserted here. */
56 static char *outfile_mid;
58 /* Length of OUTFILE's suffix. */
59 static size_t suffix_length;
61 /* Alphabet of characters to use in suffix. */
62 static char const *suffix_alphabet = "abcdefghijklmnopqrstuvwxyz";
64 /* Name of input file. May be "-". */
65 static char *infile;
67 /* Descriptor on which output file is open. */
68 static int output_desc = -1;
70 /* If true, print a diagnostic on standard error just before each
71 output file is opened. */
72 static bool verbose;
74 /* If true, don't generate zero length output files. */
75 static bool elide_empty_files;
77 /* If true, in round robin mode, immediately copy
78 input to output, which is much slower, so disabled by default. */
79 static bool unbuffered;
81 /* For long options that have no equivalent short option, use a
82 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
83 enum
85 VERBOSE_OPTION = CHAR_MAX + 1,
86 IO_BLKSIZE_OPTION
89 static struct option const longopts[] =
91 {"bytes", required_argument, NULL, 'b'},
92 {"lines", required_argument, NULL, 'l'},
93 {"line-bytes", required_argument, NULL, 'C'},
94 {"number", required_argument, NULL, 'n'},
95 {"elide-empty-files", no_argument, NULL, 'e'},
96 {"unbuffered", no_argument, NULL, 'u'},
97 {"suffix-length", required_argument, NULL, 'a'},
98 {"numeric-suffixes", no_argument, NULL, 'd'},
99 {"verbose", no_argument, NULL, VERBOSE_OPTION},
100 {"-io-blksize", required_argument, NULL,
101 IO_BLKSIZE_OPTION}, /* do not document */
102 {GETOPT_HELP_OPTION_DECL},
103 {GETOPT_VERSION_OPTION_DECL},
104 {NULL, 0, NULL, 0}
107 static void
108 set_suffix_length (uintmax_t n_units)
110 #define DEFAULT_SUFFIX_LENGTH 2
112 size_t suffix_needed = 0;
113 size_t alphabet_len = strlen (suffix_alphabet);
114 bool alphabet_slop = (n_units % alphabet_len) != 0;
115 while (n_units /= alphabet_len)
116 suffix_needed++;
117 suffix_needed += alphabet_slop;
119 if (suffix_length) /* set by user */
121 if (suffix_length < suffix_needed)
123 error (EXIT_FAILURE, 0,
124 _("the suffix length needs to be at least %zu"),
125 suffix_needed);
127 return;
129 else
130 suffix_length = MAX (DEFAULT_SUFFIX_LENGTH, suffix_needed);
133 void
134 usage (int status)
136 if (status != EXIT_SUCCESS)
137 fprintf (stderr, _("Try `%s --help' for more information.\n"),
138 program_name);
139 else
141 printf (_("\
142 Usage: %s [OPTION]... [INPUT [PREFIX]]\n\
144 program_name);
145 fputs (_("\
146 Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default\n\
147 size is 1000 lines, and default PREFIX is `x'. With no INPUT, or when INPUT\n\
148 is -, read standard input.\n\
150 "), stdout);
151 fputs (_("\
152 Mandatory arguments to long options are mandatory for short options too.\n\
153 "), stdout);
154 fprintf (stdout, _("\
155 -a, --suffix-length=N use suffixes of length N (default %d)\n\
156 -b, --bytes=SIZE put SIZE bytes per output file\n\
157 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
158 -d, --numeric-suffixes use numeric suffixes instead of alphabetic\n\
159 -e, --elide-empty-files do not generate empty output files with `-n'\n\
160 -l, --lines=NUMBER put NUMBER lines per output file\n\
161 -n, --number=CHUNKS generate CHUNKS output files. See below\n\
162 -u, --unbuffered immediately copy input to output with `-n r/...'\n\
163 "), DEFAULT_SUFFIX_LENGTH);
164 fputs (_("\
165 --verbose print a diagnostic just before each\n\
166 output file is opened\n\
167 "), stdout);
168 fputs (HELP_OPTION_DESCRIPTION, stdout);
169 fputs (VERSION_OPTION_DESCRIPTION, stdout);
170 emit_size_note ();
171 fputs (_("\n\
172 CHUNKS may be:\n\
173 N split into N files based on size of input\n\
174 K/N output Kth of N to stdout\n\
175 l/N split into N files without splitting lines\n\
176 l/K/N output Kth of N to stdout without splitting lines\n\
177 r/N like `l' but use round robin distribution\n\
178 r/K/N likewise but only output Kth of N to stdout\n\
179 "), stdout);
180 emit_ancillary_info ();
182 exit (status);
185 /* Compute the next sequential output file name and store it into the
186 string `outfile'. */
188 static void
189 next_file_name (void)
191 /* Index in suffix_alphabet of each character in the suffix. */
192 static size_t *sufindex;
194 if (! outfile)
196 /* Allocate and initialize the first file name. */
198 size_t outbase_length = strlen (outbase);
199 size_t outfile_length = outbase_length + suffix_length;
200 if (outfile_length + 1 < outbase_length)
201 xalloc_die ();
202 outfile = xmalloc (outfile_length + 1);
203 outfile_mid = outfile + outbase_length;
204 memcpy (outfile, outbase, outbase_length);
205 memset (outfile_mid, suffix_alphabet[0], suffix_length);
206 outfile[outfile_length] = 0;
207 sufindex = xcalloc (suffix_length, sizeof *sufindex);
209 #if ! _POSIX_NO_TRUNC && HAVE_PATHCONF && defined _PC_NAME_MAX
210 /* POSIX requires that if the output file name is too long for
211 its directory, `split' must fail without creating any files.
212 This must be checked for explicitly on operating systems that
213 silently truncate file names. */
215 char *dir = dir_name (outfile);
216 long name_max = pathconf (dir, _PC_NAME_MAX);
217 if (0 <= name_max && name_max < base_len (last_component (outfile)))
218 error (EXIT_FAILURE, ENAMETOOLONG, "%s", outfile);
219 free (dir);
221 #endif
223 else
225 /* Increment the suffix in place, if possible. */
227 size_t i = suffix_length;
228 while (i-- != 0)
230 sufindex[i]++;
231 outfile_mid[i] = suffix_alphabet[sufindex[i]];
232 if (outfile_mid[i])
233 return;
234 sufindex[i] = 0;
235 outfile_mid[i] = suffix_alphabet[sufindex[i]];
237 error (EXIT_FAILURE, 0, _("output file suffixes exhausted"));
241 /* Create or truncate a file. */
243 static int
244 create (const char* name)
246 if (verbose)
247 fprintf (stdout, _("creating file %s\n"), quote (name));
248 return open (name, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
249 (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH));
252 /* Write BYTES bytes at BP to an output file.
253 If NEW_FILE_FLAG is true, open the next output file.
254 Otherwise add to the same output file already in use. */
256 static void
257 cwrite (bool new_file_flag, const char *bp, size_t bytes)
259 if (new_file_flag)
261 if (!bp && bytes == 0 && elide_empty_files)
262 return;
263 if (output_desc >= 0 && close (output_desc) < 0)
264 error (EXIT_FAILURE, errno, "%s", outfile);
265 next_file_name ();
266 if ((output_desc = create (outfile)) < 0)
267 error (EXIT_FAILURE, errno, "%s", outfile);
269 if (full_write (output_desc, bp, bytes) != bytes)
270 error (EXIT_FAILURE, errno, "%s", outfile);
273 /* Split into pieces of exactly N_BYTES bytes.
274 Use buffer BUF, whose size is BUFSIZE. */
276 static void
277 bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, uintmax_t max_files)
279 size_t n_read;
280 bool new_file_flag = true;
281 size_t to_read;
282 uintmax_t to_write = n_bytes;
283 char *bp_out;
284 uintmax_t opened = 0;
288 n_read = full_read (STDIN_FILENO, buf, bufsize);
289 if (n_read < bufsize && errno)
290 error (EXIT_FAILURE, errno, "%s", infile);
291 bp_out = buf;
292 to_read = n_read;
293 while (true)
295 if (to_read < to_write)
297 if (to_read) /* do not write 0 bytes! */
299 cwrite (new_file_flag, bp_out, to_read);
300 opened += new_file_flag;
301 to_write -= to_read;
302 new_file_flag = false;
304 break;
306 else
308 size_t w = to_write;
309 cwrite (new_file_flag, bp_out, w);
310 opened += new_file_flag;
311 new_file_flag = !max_files || (opened < max_files);
312 bp_out += w;
313 to_read -= w;
314 to_write = n_bytes;
318 while (n_read == bufsize);
320 /* Ensure NUMBER files are created, which truncates
321 any existing files or notifies any consumers on fifos.
322 FIXME: Should we do this before EXIT_FAILURE? */
323 while (opened++ < max_files)
324 cwrite (true, NULL, 0);
327 /* Split into pieces of exactly N_LINES lines.
328 Use buffer BUF, whose size is BUFSIZE. */
330 static void
331 lines_split (uintmax_t n_lines, char *buf, size_t bufsize)
333 size_t n_read;
334 char *bp, *bp_out, *eob;
335 bool new_file_flag = true;
336 uintmax_t n = 0;
340 n_read = full_read (STDIN_FILENO, buf, bufsize);
341 if (n_read < bufsize && errno)
342 error (EXIT_FAILURE, errno, "%s", infile);
343 bp = bp_out = buf;
344 eob = bp + n_read;
345 *eob = '\n';
346 while (true)
348 bp = memchr (bp, '\n', eob - bp + 1);
349 if (bp == eob)
351 if (eob != bp_out) /* do not write 0 bytes! */
353 size_t len = eob - bp_out;
354 cwrite (new_file_flag, bp_out, len);
355 new_file_flag = false;
357 break;
360 ++bp;
361 if (++n >= n_lines)
363 cwrite (new_file_flag, bp_out, bp - bp_out);
364 bp_out = bp;
365 new_file_flag = true;
366 n = 0;
370 while (n_read == bufsize);
373 /* Split into pieces that are as large as possible while still not more
374 than N_BYTES bytes, and are split on line boundaries except
375 where lines longer than N_BYTES bytes occur.
376 FIXME: Allow N_BYTES to be any uintmax_t value, and don't require a
377 buffer of size N_BYTES, in case N_BYTES is very large. */
379 static void
380 line_bytes_split (size_t n_bytes)
382 char *bp;
383 bool eof = false;
384 size_t n_buffered = 0;
385 char *buf = xmalloc (n_bytes);
389 /* Fill up the full buffer size from the input file. */
391 size_t to_read = n_bytes - n_buffered;
392 size_t n_read = full_read (STDIN_FILENO, buf + n_buffered, to_read);
393 if (n_read < to_read && errno)
394 error (EXIT_FAILURE, errno, "%s", infile);
396 n_buffered += n_read;
397 if (n_buffered != n_bytes)
399 if (n_buffered == 0)
400 break;
401 eof = true;
404 /* Find where to end this chunk. */
405 bp = buf + n_buffered;
406 if (n_buffered == n_bytes)
408 while (bp > buf && bp[-1] != '\n')
409 bp--;
412 /* If chunk has no newlines, use all the chunk. */
413 if (bp == buf)
414 bp = buf + n_buffered;
416 /* Output the chars as one output file. */
417 cwrite (true, buf, bp - buf);
419 /* Discard the chars we just output; move rest of chunk
420 down to be the start of the next chunk. Source and
421 destination probably overlap. */
422 n_buffered -= bp - buf;
423 if (n_buffered > 0)
424 memmove (buf, bp, n_buffered);
426 while (!eof);
427 free (buf);
430 /* -n l/[K/]N: Write lines to files of approximately file size / N.
431 The file is partitioned into file size / N sized portions, with the
432 last assigned any excess. If a line _starts_ within a partition
433 it is written completely to the corresponding file. Since lines
434 are not split even if they overlap a partition, the files written
435 can be larger or smaller than the partition size, and even empty
436 if a line is so long as to completely overlap the partition. */
438 static void
439 lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
440 off_t file_size)
442 assert (n && k <= n && n <= file_size);
444 const off_t chunk_size = file_size / n;
445 uintmax_t chunk_no = 1;
446 off_t chunk_end = chunk_size - 1;
447 off_t n_written = 0;
448 bool new_file_flag = true;
450 if (k > 1)
452 /* Start reading 1 byte before kth chunk of file. */
453 off_t start = (k - 1) * chunk_size - 1;
454 if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0)
455 error (EXIT_FAILURE, errno, "%s", infile);
456 n_written = start;
457 chunk_no = k - 1;
458 chunk_end = chunk_no * chunk_size - 1;
461 while (n_written < file_size)
463 char *bp = buf, *eob;
464 size_t n_read = full_read (STDIN_FILENO, buf, bufsize);
465 n_read = MIN (n_read, file_size - n_written);
466 if (n_read < bufsize && errno)
467 error (EXIT_FAILURE, errno, "%s", infile);
468 else if (n_read == 0)
469 break; /* eof. */
470 eob = buf + n_read;
472 while (bp != eob)
474 size_t to_write;
475 bool next = false;
477 /* Begin looking for '\n' at last byte of chunk. */
478 off_t skip = MIN (n_read, MAX (0, chunk_end - n_written));
479 char *bp_out = memchr (bp + skip, '\n', n_read - skip);
480 if (bp_out++)
481 next = true;
482 else
483 bp_out = eob;
484 to_write = bp_out - bp;
486 if (k == chunk_no)
488 /* We don't use the stdout buffer here since we're writing
489 large chunks from an existing file, so it's more efficient
490 to write out directly. */
491 if (full_write (STDOUT_FILENO, bp, to_write) != to_write)
492 error (EXIT_FAILURE, errno, "%s", _("write error"));
494 else
495 cwrite (new_file_flag, bp, to_write);
496 n_written += to_write;
497 bp += to_write;
498 n_read -= to_write;
499 new_file_flag = next;
501 /* A line could have been so long that it skipped
502 entire chunks. So create empty files in that case. */
503 while (next || chunk_end <= n_written - 1)
505 if (!next && bp == eob)
506 break; /* replenish buf, before going to next chunk. */
507 chunk_no++;
508 if (k && chunk_no > k)
509 return;
510 if (chunk_no == n)
511 chunk_end = file_size - 1; /* >= chunk_size. */
512 else
513 chunk_end += chunk_size;
514 if (chunk_end <= n_written - 1)
515 cwrite (true, NULL, 0);
516 else
517 next = false;
522 /* Ensure NUMBER files are created, which truncates
523 any existing files or notifies any consumers on fifos.
524 FIXME: Should we do this before EXIT_FAILURE? */
525 while (!k && chunk_no++ <= n)
526 cwrite (true, NULL, 0);
529 /* -n K/N: Extract Kth of N chunks. */
531 static void
532 bytes_chunk_extract (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
533 off_t file_size)
535 off_t start;
536 off_t end;
538 assert (k && n && k <= n && n <= file_size);
540 start = (k - 1) * (file_size / n);
541 end = (k == n) ? file_size : k * (file_size / n);
543 if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0)
544 error (EXIT_FAILURE, errno, "%s", infile);
546 while (start < end)
548 size_t n_read = full_read (STDIN_FILENO, buf, bufsize);
549 n_read = MIN (n_read, end - start);
550 if (n_read < bufsize && errno)
551 error (EXIT_FAILURE, errno, "%s", infile);
552 else if (n_read == 0)
553 break; /* eof. */
554 if (full_write (STDOUT_FILENO, buf, n_read) != n_read)
555 error (EXIT_FAILURE, errno, "%s", quote ("-"));
556 start += n_read;
560 typedef struct of_info
562 char *of_name;
563 int ofd;
564 FILE* ofile;
565 } of_t;
567 enum
569 OFD_NEW = -1,
570 OFD_APPEND = -2
573 /* Rotate file descriptors when we're writing to more output files than we
574 have available file descriptors.
575 Return whether we came under file resource pressure.
576 If so, it's probably best to close each file when finished with it. */
578 static bool
579 ofile_open (of_t *files, size_t i_check, size_t nfiles)
581 bool file_limit = false;
583 if (files[i_check].ofd <= OFD_NEW)
585 int fd;
586 size_t i_reopen = i_check ? i_check - 1 : nfiles - 1;
588 /* Another process could have opened a file in between the calls to
589 close and open, so we should keep trying until open succeeds or
590 we've closed all of our files. */
591 while (true)
593 if (files[i_check].ofd == OFD_NEW)
594 fd = create (files[i_check].of_name);
595 else /* OFD_APPEND */
597 /* Attempt to append to previously opened file.
598 We use O_NONBLOCK to support writing to fifos,
599 where the other end has closed because of our
600 previous close. In that case we'll immediately
601 get an error, rather than waiting indefinitely.
602 In specialised cases the consumer can keep reading
603 from the fifo, terminating on conditions in the data
604 itself, or perhaps never in the case of `tail -f`.
605 I.E. for fifos it is valid to attempt this reopen. */
606 fd = open (files[i_check].of_name,
607 O_WRONLY | O_BINARY | O_APPEND | O_NONBLOCK);
610 if (-1 < fd)
611 break;
613 if (!(errno == EMFILE || errno == ENFILE))
614 error (EXIT_FAILURE, errno, "%s", files[i_check].of_name);
616 file_limit = true;
618 /* Search backwards for an open file to close. */
619 while (files[i_reopen].ofd < 0)
621 i_reopen = i_reopen ? i_reopen - 1 : nfiles - 1;
622 /* No more open files to close, exit with E[NM]FILE. */
623 if (i_reopen == i_check)
624 error (EXIT_FAILURE, errno, "%s", files[i_check].of_name);
627 if (fclose (files[i_reopen].ofile) != 0)
628 error (EXIT_FAILURE, errno, "%s", files[i_reopen].of_name);
629 files[i_reopen].ofd = OFD_APPEND;
632 files[i_check].ofd = fd;
633 if (!(files[i_check].ofile = fdopen (fd, "a")))
634 error (EXIT_FAILURE, errno, "%s", files[i_check].of_name);
637 return file_limit;
640 /* -n r/[K/]N: Divide file into N chunks in round robin fashion.
641 When K == 0, we try to keep the files open in parallel.
642 If we run out of file resources, then we revert
643 to opening and closing each file for each line. */
645 static void
646 lines_rr (uintmax_t k, uintmax_t n, char *buf, size_t bufsize)
648 bool file_limit;
649 size_t i_file;
650 of_t *files;
651 uintmax_t line_no;
653 if (k)
654 line_no = 1;
655 else
657 if (SIZE_MAX < n)
658 error (exit_failure, 0, "%s", _("memory exhausted"));
659 files = xnmalloc (n, sizeof *files);
661 /* Generate output file names. */
662 for (i_file = 0; i_file < n; i_file++)
664 next_file_name ();
665 files[i_file].of_name = xstrdup (outfile);
666 files[i_file].ofd = OFD_NEW;
667 files[i_file].ofile = NULL;
669 i_file = 0;
670 file_limit = false;
673 while (true)
675 char *bp = buf, *eob;
676 /* Use safe_read() rather than full_read() here
677 so that we process available data immediately. */
678 size_t n_read = safe_read (STDIN_FILENO, buf, bufsize);
679 if (n_read == SAFE_READ_ERROR)
680 error (EXIT_FAILURE, errno, "%s", infile);
681 else if (n_read == 0)
682 break; /* eof. */
683 eob = buf + n_read;
685 while (bp != eob)
687 size_t to_write;
688 bool next = false;
690 /* Find end of line. */
691 char *bp_out = memchr (bp, '\n', eob - bp);
692 if (bp_out)
694 bp_out++;
695 next = true;
697 else
698 bp_out = eob;
699 to_write = bp_out - bp;
701 if (k)
703 if (line_no == k && unbuffered)
705 if (full_write (STDOUT_FILENO, bp, to_write) != to_write)
706 error (EXIT_FAILURE, errno, "%s", _("write error"));
708 else if (line_no == k && fwrite (bp, to_write, 1, stdout) != 1)
710 clearerr (stdout); /* To silence close_stdout(). */
711 error (EXIT_FAILURE, errno, "%s", _("write error"));
713 if (next)
714 line_no = (line_no == n) ? 1 : line_no + 1;
716 else
718 /* Secure file descriptor. */
719 file_limit |= ofile_open (files, i_file, n);
720 if (unbuffered)
722 /* Note writing to fd, rather than flushing the FILE gives
723 an 8% performance benefit, due to reduced data copying. */
724 if (full_write (files[i_file].ofd, bp, to_write) != to_write)
725 error (EXIT_FAILURE, errno, "%s", files[i_file].of_name);
727 else if (fwrite (bp, to_write, 1, files[i_file].ofile) != 1)
728 error (EXIT_FAILURE, errno, "%s", files[i_file].of_name);
729 if (file_limit)
731 if (fclose (files[i_file].ofile) != 0)
732 error (EXIT_FAILURE, errno, "%s", files[i_file].of_name);
733 files[i_file].ofd = OFD_APPEND;
735 if (next && ++i_file == n)
736 i_file = 0;
739 bp = bp_out;
743 /* Ensure all files created, so that any existing files are truncated,
744 and to signal any waiting fifo consumers.
745 Also, close any open file descriptors.
746 FIXME: Should we do this before EXIT_FAILURE? */
747 for (i_file = 0; !k && !elide_empty_files && i_file < n; i_file++)
749 file_limit |= ofile_open (files, i_file, n);
750 if (fclose (files[i_file].ofile) != 0)
751 error (EXIT_FAILURE, errno, "%s", files[i_file].of_name);
755 #define FAIL_ONLY_ONE_WAY() \
756 do \
758 error (0, 0, _("cannot split in more than one way")); \
759 usage (EXIT_FAILURE); \
761 while (0)
763 /* Parse K/N syntax of chunk options. */
765 static void
766 parse_chunk (uintmax_t *k_units, uintmax_t *n_units, char *slash)
768 *slash = '\0';
769 if (xstrtoumax (slash+1, NULL, 10, n_units, "") != LONGINT_OK
770 || *n_units == 0)
771 error (EXIT_FAILURE, 0, _("%s: invalid number of chunks"), slash+1);
772 if (slash != optarg /* a leading number is specified. */
773 && (xstrtoumax (optarg, NULL, 10, k_units, "") != LONGINT_OK
774 || *k_units == 0 || *n_units < *k_units))
775 error (EXIT_FAILURE, 0, _("%s: invalid chunk number"), optarg);
780 main (int argc, char **argv)
782 struct stat stat_buf;
783 enum
785 type_undef, type_bytes, type_byteslines, type_lines, type_digits,
786 type_chunk_bytes, type_chunk_lines, type_rr
787 } split_type = type_undef;
788 size_t in_blk_size = 0; /* optimal block size of input file device */
789 char *buf; /* file i/o buffer */
790 size_t page_size = getpagesize ();
791 uintmax_t k_units = 0;
792 uintmax_t n_units;
794 static char const multipliers[] = "bEGKkMmPTYZ0";
795 int c;
796 int digits_optind = 0;
797 off_t file_size;
799 initialize_main (&argc, &argv);
800 set_program_name (argv[0]);
801 setlocale (LC_ALL, "");
802 bindtextdomain (PACKAGE, LOCALEDIR);
803 textdomain (PACKAGE);
805 atexit (close_stdout);
807 /* Parse command line options. */
809 infile = bad_cast ("-");
810 outbase = bad_cast ("x");
812 while (true)
814 /* This is the argv-index of the option we will read next. */
815 int this_optind = optind ? optind : 1;
816 char *slash;
818 c = getopt_long (argc, argv, "0123456789C:a:b:del:n:u", longopts, NULL);
819 if (c == -1)
820 break;
822 switch (c)
824 case 'a':
826 unsigned long tmp;
827 if (xstrtoul (optarg, NULL, 10, &tmp, "") != LONGINT_OK
828 || SIZE_MAX / sizeof (size_t) < tmp)
830 error (0, 0, _("%s: invalid suffix length"), optarg);
831 usage (EXIT_FAILURE);
833 suffix_length = tmp;
835 break;
837 case 'b':
838 if (split_type != type_undef)
839 FAIL_ONLY_ONE_WAY ();
840 split_type = type_bytes;
841 if (xstrtoumax (optarg, NULL, 10, &n_units, multipliers) != LONGINT_OK
842 || n_units == 0)
844 error (0, 0, _("%s: invalid number of bytes"), optarg);
845 usage (EXIT_FAILURE);
847 /* If input is a pipe, we could get more data than is possible
848 to write to a single file, so indicate that immediately
849 rather than having possibly future invocations fail. */
850 if (OFF_T_MAX < n_units)
851 error (EXIT_FAILURE, EFBIG,
852 _("%s: invalid number of bytes"), optarg);
854 break;
856 case 'l':
857 if (split_type != type_undef)
858 FAIL_ONLY_ONE_WAY ();
859 split_type = type_lines;
860 if (xstrtoumax (optarg, NULL, 10, &n_units, "") != LONGINT_OK
861 || n_units == 0)
863 error (0, 0, _("%s: invalid number of lines"), optarg);
864 usage (EXIT_FAILURE);
866 break;
868 case 'C':
869 if (split_type != type_undef)
870 FAIL_ONLY_ONE_WAY ();
871 split_type = type_byteslines;
872 if (xstrtoumax (optarg, NULL, 10, &n_units, multipliers) != LONGINT_OK
873 || n_units == 0 || SIZE_MAX < n_units)
875 error (0, 0, _("%s: invalid number of bytes"), optarg);
876 usage (EXIT_FAILURE);
878 if (OFF_T_MAX < n_units)
879 error (EXIT_FAILURE, EFBIG,
880 _("%s: invalid number of bytes"), optarg);
881 break;
883 case 'n':
884 if (split_type != type_undef)
885 FAIL_ONLY_ONE_WAY ();
886 /* skip any whitespace */
887 while (isspace (to_uchar (*optarg)))
888 optarg++;
889 if (strncmp (optarg, "r/", 2) == 0)
891 split_type = type_rr;
892 optarg += 2;
894 else if (strncmp (optarg, "l/", 2) == 0)
896 split_type = type_chunk_lines;
897 optarg += 2;
899 else
900 split_type = type_chunk_bytes;
901 if ((slash = strchr (optarg, '/')))
902 parse_chunk (&k_units, &n_units, slash);
903 else if (xstrtoumax (optarg, NULL, 10, &n_units, "") != LONGINT_OK
904 || n_units == 0)
905 error (EXIT_FAILURE, 0, _("%s: invalid number of chunks"), optarg);
906 break;
908 case 'u':
909 unbuffered = true;
910 break;
912 case '0':
913 case '1':
914 case '2':
915 case '3':
916 case '4':
917 case '5':
918 case '6':
919 case '7':
920 case '8':
921 case '9':
922 if (split_type == type_undef)
924 split_type = type_digits;
925 n_units = 0;
927 if (split_type != type_undef && split_type != type_digits)
928 FAIL_ONLY_ONE_WAY ();
929 if (digits_optind != 0 && digits_optind != this_optind)
930 n_units = 0; /* More than one number given; ignore other. */
931 digits_optind = this_optind;
932 if (!DECIMAL_DIGIT_ACCUMULATE (n_units, c - '0', uintmax_t))
934 char buffer[INT_BUFSIZE_BOUND (uintmax_t)];
935 error (EXIT_FAILURE, 0,
936 _("line count option -%s%c... is too large"),
937 umaxtostr (n_units, buffer), c);
939 break;
941 case 'd':
942 suffix_alphabet = "0123456789";
943 break;
945 case 'e':
946 elide_empty_files = true;
947 break;
949 case IO_BLKSIZE_OPTION:
951 uintmax_t tmp_blk_size;
952 if (xstrtoumax (optarg, NULL, 10, &tmp_blk_size,
953 multipliers) != LONGINT_OK
954 || tmp_blk_size == 0 || SIZE_MAX - page_size < tmp_blk_size)
955 error (0, 0, _("%s: invalid IO block size"), optarg);
956 else
957 in_blk_size = tmp_blk_size;
959 break;
961 case VERBOSE_OPTION:
962 verbose = true;
963 break;
965 case_GETOPT_HELP_CHAR;
967 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
969 default:
970 usage (EXIT_FAILURE);
974 /* Handle default case. */
975 if (split_type == type_undef)
977 split_type = type_lines;
978 n_units = 1000;
981 if (n_units == 0)
983 error (0, 0, _("%s: invalid number of lines"), "0");
984 usage (EXIT_FAILURE);
987 set_suffix_length (n_units);
989 /* Get out the filename arguments. */
991 if (optind < argc)
992 infile = argv[optind++];
994 if (optind < argc)
995 outbase = argv[optind++];
997 if (optind < argc)
999 error (0, 0, _("extra operand %s"), quote (argv[optind]));
1000 usage (EXIT_FAILURE);
1003 /* Open the input file. */
1004 if (! STREQ (infile, "-")
1005 && fd_reopen (STDIN_FILENO, infile, O_RDONLY, 0) < 0)
1006 error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
1007 quote (infile));
1009 /* Binary I/O is safer when byte counts are used. */
1010 if (O_BINARY && ! isatty (STDIN_FILENO))
1011 xfreopen (NULL, "rb", stdin);
1013 /* Get the optimal block size of input device and make a buffer. */
1015 if (fstat (STDIN_FILENO, &stat_buf) != 0)
1016 error (EXIT_FAILURE, errno, "%s", infile);
1017 if (in_blk_size == 0)
1018 in_blk_size = io_blksize (stat_buf);
1019 file_size = stat_buf.st_size;
1021 if (split_type == type_chunk_bytes || split_type == type_chunk_lines)
1023 off_t input_offset = lseek (STDIN_FILENO, 0, SEEK_CUR);
1024 if (input_offset < 0)
1025 error (EXIT_FAILURE, 0, _("%s: cannot determine file size"),
1026 quote (infile));
1027 file_size -= input_offset;
1028 /* Overflow, and sanity checking. */
1029 if (OFF_T_MAX < n_units)
1031 char buffer[INT_BUFSIZE_BOUND (uintmax_t)];
1032 error (EXIT_FAILURE, EFBIG, _("%s: invalid number of chunks"),
1033 umaxtostr (n_units, buffer));
1035 /* increase file_size to n_units here, so that we still process
1036 any input data, and create empty files for the rest. */
1037 file_size = MAX (file_size, n_units);
1040 buf = ptr_align (xmalloc (in_blk_size + 1 + page_size - 1), page_size);
1042 switch (split_type)
1044 case type_digits:
1045 case type_lines:
1046 lines_split (n_units, buf, in_blk_size);
1047 break;
1049 case type_bytes:
1050 bytes_split (n_units, buf, in_blk_size, 0);
1051 break;
1053 case type_byteslines:
1054 line_bytes_split (n_units);
1055 break;
1057 case type_chunk_bytes:
1058 if (k_units == 0)
1059 bytes_split (file_size / n_units, buf, in_blk_size, n_units);
1060 else
1061 bytes_chunk_extract (k_units, n_units, buf, in_blk_size, file_size);
1062 break;
1064 case type_chunk_lines:
1065 lines_chunk_split (k_units, n_units, buf, in_blk_size, file_size);
1066 break;
1068 case type_rr:
1069 /* Note, this is like `sed -n ${k}~${n}p` when k > 0,
1070 but the functionality is provided for symmetry. */
1071 lines_rr (k_units, n_units, buf, in_blk_size);
1072 break;
1074 default:
1075 abort ();
1078 if (close (STDIN_FILENO) != 0)
1079 error (EXIT_FAILURE, errno, "%s", infile);
1080 if (output_desc >= 0 && close (output_desc) < 0)
1081 error (EXIT_FAILURE, errno, "%s", outfile);
1083 exit (EXIT_SUCCESS);