dd: add a flag to discard cached data
[coreutils/ericb.git] / src / split.c
blob364576aa8baa46e2193feb6928075b43c7342f85
1 /* split.c -- split a file into pieces.
2 Copyright (C) 1988, 1991, 1995-2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* By tege@sics.se, with rms.
19 To do:
20 * Implement -t CHAR or -t REGEX to specify break characters other
21 than newline. */
23 #include <config.h>
25 #include <assert.h>
26 #include <stdio.h>
27 #include <getopt.h>
28 #include <sys/types.h>
30 #include "system.h"
31 #include "error.h"
32 #include "fd-reopen.h"
33 #include "fcntl--.h"
34 #include "full-read.h"
35 #include "full-write.h"
36 #include "quote.h"
37 #include "safe-read.h"
38 #include "xfreopen.h"
39 #include "xstrtol.h"
41 /* The official name of this program (e.g., no `g' prefix). */
42 #define PROGRAM_NAME "split"
44 #define AUTHORS \
45 proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
46 proper_name ("Richard M. Stallman")
48 /* Base name of output files. */
49 static char const *outbase;
51 /* Name of output files. */
52 static char *outfile;
54 /* Pointer to the end of the prefix in OUTFILE.
55 Suffixes are inserted here. */
56 static char *outfile_mid;
58 /* Length of OUTFILE's suffix. */
59 static size_t suffix_length;
61 /* Alphabet of characters to use in suffix. */
62 static char const *suffix_alphabet = "abcdefghijklmnopqrstuvwxyz";
64 /* Name of input file. May be "-". */
65 static char *infile;
67 /* Descriptor on which output file is open. */
68 static int output_desc = -1;
70 /* If true, print a diagnostic on standard error just before each
71 output file is opened. */
72 static bool verbose;
74 /* If true, don't generate zero length output files. */
75 static bool elide_empty_files;
77 /* If true, in round robin mode, immediately copy
78 input to output, which is much slower, so disabled by default. */
79 static bool unbuffered;
81 /* The split mode to use. */
82 enum Split_type
84 type_undef, type_bytes, type_byteslines, type_lines, type_digits,
85 type_chunk_bytes, type_chunk_lines, type_rr
88 /* For long options that have no equivalent short option, use a
89 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
90 enum
92 VERBOSE_OPTION = CHAR_MAX + 1,
93 IO_BLKSIZE_OPTION
96 static struct option const longopts[] =
98 {"bytes", required_argument, NULL, 'b'},
99 {"lines", required_argument, NULL, 'l'},
100 {"line-bytes", required_argument, NULL, 'C'},
101 {"number", required_argument, NULL, 'n'},
102 {"elide-empty-files", no_argument, NULL, 'e'},
103 {"unbuffered", no_argument, NULL, 'u'},
104 {"suffix-length", required_argument, NULL, 'a'},
105 {"numeric-suffixes", no_argument, NULL, 'd'},
106 {"verbose", no_argument, NULL, VERBOSE_OPTION},
107 {"-io-blksize", required_argument, NULL,
108 IO_BLKSIZE_OPTION}, /* do not document */
109 {GETOPT_HELP_OPTION_DECL},
110 {GETOPT_VERSION_OPTION_DECL},
111 {NULL, 0, NULL, 0}
114 static void
115 set_suffix_length (uintmax_t n_units, enum Split_type split_type)
117 #define DEFAULT_SUFFIX_LENGTH 2
119 size_t suffix_needed = 0;
121 /* Auto-calculate the suffix length if the number of files is given. */
122 if (split_type == type_chunk_bytes || split_type == type_chunk_lines
123 || split_type == type_rr)
125 size_t alphabet_len = strlen (suffix_alphabet);
126 bool alphabet_slop = (n_units % alphabet_len) != 0;
127 while (n_units /= alphabet_len)
128 suffix_needed++;
129 suffix_needed += alphabet_slop;
132 if (suffix_length) /* set by user */
134 if (suffix_length < suffix_needed)
136 error (EXIT_FAILURE, 0,
137 _("the suffix length needs to be at least %zu"),
138 suffix_needed);
140 return;
142 else
143 suffix_length = MAX (DEFAULT_SUFFIX_LENGTH, suffix_needed);
146 void
147 usage (int status)
149 if (status != EXIT_SUCCESS)
150 fprintf (stderr, _("Try `%s --help' for more information.\n"),
151 program_name);
152 else
154 printf (_("\
155 Usage: %s [OPTION]... [INPUT [PREFIX]]\n\
157 program_name);
158 fputs (_("\
159 Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default\n\
160 size is 1000 lines, and default PREFIX is `x'. With no INPUT, or when INPUT\n\
161 is -, read standard input.\n\
163 "), stdout);
164 fputs (_("\
165 Mandatory arguments to long options are mandatory for short options too.\n\
166 "), stdout);
167 fprintf (stdout, _("\
168 -a, --suffix-length=N use suffixes of length N (default %d)\n\
169 -b, --bytes=SIZE put SIZE bytes per output file\n\
170 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
171 -d, --numeric-suffixes use numeric suffixes instead of alphabetic\n\
172 -e, --elide-empty-files do not generate empty output files with `-n'\n\
173 -l, --lines=NUMBER put NUMBER lines per output file\n\
174 -n, --number=CHUNKS generate CHUNKS output files. See below\n\
175 -u, --unbuffered immediately copy input to output with `-n r/...'\n\
176 "), DEFAULT_SUFFIX_LENGTH);
177 fputs (_("\
178 --verbose print a diagnostic just before each\n\
179 output file is opened\n\
180 "), stdout);
181 fputs (HELP_OPTION_DESCRIPTION, stdout);
182 fputs (VERSION_OPTION_DESCRIPTION, stdout);
183 emit_size_note ();
184 fputs (_("\n\
185 CHUNKS may be:\n\
186 N split into N files based on size of input\n\
187 K/N output Kth of N to stdout\n\
188 l/N split into N files without splitting lines\n\
189 l/K/N output Kth of N to stdout without splitting lines\n\
190 r/N like `l' but use round robin distribution\n\
191 r/K/N likewise but only output Kth of N to stdout\n\
192 "), stdout);
193 emit_ancillary_info ();
195 exit (status);
198 /* Compute the next sequential output file name and store it into the
199 string `outfile'. */
201 static void
202 next_file_name (void)
204 /* Index in suffix_alphabet of each character in the suffix. */
205 static size_t *sufindex;
207 if (! outfile)
209 /* Allocate and initialize the first file name. */
211 size_t outbase_length = strlen (outbase);
212 size_t outfile_length = outbase_length + suffix_length;
213 if (outfile_length + 1 < outbase_length)
214 xalloc_die ();
215 outfile = xmalloc (outfile_length + 1);
216 outfile_mid = outfile + outbase_length;
217 memcpy (outfile, outbase, outbase_length);
218 memset (outfile_mid, suffix_alphabet[0], suffix_length);
219 outfile[outfile_length] = 0;
220 sufindex = xcalloc (suffix_length, sizeof *sufindex);
222 #if ! _POSIX_NO_TRUNC && HAVE_PATHCONF && defined _PC_NAME_MAX
223 /* POSIX requires that if the output file name is too long for
224 its directory, `split' must fail without creating any files.
225 This must be checked for explicitly on operating systems that
226 silently truncate file names. */
228 char *dir = dir_name (outfile);
229 long name_max = pathconf (dir, _PC_NAME_MAX);
230 if (0 <= name_max && name_max < base_len (last_component (outfile)))
231 error (EXIT_FAILURE, ENAMETOOLONG, "%s", outfile);
232 free (dir);
234 #endif
236 else
238 /* Increment the suffix in place, if possible. */
240 size_t i = suffix_length;
241 while (i-- != 0)
243 sufindex[i]++;
244 outfile_mid[i] = suffix_alphabet[sufindex[i]];
245 if (outfile_mid[i])
246 return;
247 sufindex[i] = 0;
248 outfile_mid[i] = suffix_alphabet[sufindex[i]];
250 error (EXIT_FAILURE, 0, _("output file suffixes exhausted"));
254 /* Create or truncate a file. */
256 static int
257 create (const char* name)
259 if (verbose)
260 fprintf (stdout, _("creating file %s\n"), quote (name));
261 return open (name, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
262 (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH));
265 /* Write BYTES bytes at BP to an output file.
266 If NEW_FILE_FLAG is true, open the next output file.
267 Otherwise add to the same output file already in use. */
269 static void
270 cwrite (bool new_file_flag, const char *bp, size_t bytes)
272 if (new_file_flag)
274 if (!bp && bytes == 0 && elide_empty_files)
275 return;
276 if (output_desc >= 0 && close (output_desc) < 0)
277 error (EXIT_FAILURE, errno, "%s", outfile);
278 next_file_name ();
279 if ((output_desc = create (outfile)) < 0)
280 error (EXIT_FAILURE, errno, "%s", outfile);
282 if (full_write (output_desc, bp, bytes) != bytes)
283 error (EXIT_FAILURE, errno, "%s", outfile);
286 /* Split into pieces of exactly N_BYTES bytes.
287 Use buffer BUF, whose size is BUFSIZE. */
289 static void
290 bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, uintmax_t max_files)
292 size_t n_read;
293 bool new_file_flag = true;
294 size_t to_read;
295 uintmax_t to_write = n_bytes;
296 char *bp_out;
297 uintmax_t opened = 0;
301 n_read = full_read (STDIN_FILENO, buf, bufsize);
302 if (n_read < bufsize && errno)
303 error (EXIT_FAILURE, errno, "%s", infile);
304 bp_out = buf;
305 to_read = n_read;
306 while (true)
308 if (to_read < to_write)
310 if (to_read) /* do not write 0 bytes! */
312 cwrite (new_file_flag, bp_out, to_read);
313 opened += new_file_flag;
314 to_write -= to_read;
315 new_file_flag = false;
317 break;
319 else
321 size_t w = to_write;
322 cwrite (new_file_flag, bp_out, w);
323 opened += new_file_flag;
324 new_file_flag = !max_files || (opened < max_files);
325 bp_out += w;
326 to_read -= w;
327 to_write = n_bytes;
331 while (n_read == bufsize);
333 /* Ensure NUMBER files are created, which truncates
334 any existing files or notifies any consumers on fifos.
335 FIXME: Should we do this before EXIT_FAILURE? */
336 while (opened++ < max_files)
337 cwrite (true, NULL, 0);
340 /* Split into pieces of exactly N_LINES lines.
341 Use buffer BUF, whose size is BUFSIZE. */
343 static void
344 lines_split (uintmax_t n_lines, char *buf, size_t bufsize)
346 size_t n_read;
347 char *bp, *bp_out, *eob;
348 bool new_file_flag = true;
349 uintmax_t n = 0;
353 n_read = full_read (STDIN_FILENO, buf, bufsize);
354 if (n_read < bufsize && errno)
355 error (EXIT_FAILURE, errno, "%s", infile);
356 bp = bp_out = buf;
357 eob = bp + n_read;
358 *eob = '\n';
359 while (true)
361 bp = memchr (bp, '\n', eob - bp + 1);
362 if (bp == eob)
364 if (eob != bp_out) /* do not write 0 bytes! */
366 size_t len = eob - bp_out;
367 cwrite (new_file_flag, bp_out, len);
368 new_file_flag = false;
370 break;
373 ++bp;
374 if (++n >= n_lines)
376 cwrite (new_file_flag, bp_out, bp - bp_out);
377 bp_out = bp;
378 new_file_flag = true;
379 n = 0;
383 while (n_read == bufsize);
386 /* Split into pieces that are as large as possible while still not more
387 than N_BYTES bytes, and are split on line boundaries except
388 where lines longer than N_BYTES bytes occur.
389 FIXME: Allow N_BYTES to be any uintmax_t value, and don't require a
390 buffer of size N_BYTES, in case N_BYTES is very large. */
392 static void
393 line_bytes_split (size_t n_bytes)
395 char *bp;
396 bool eof = false;
397 size_t n_buffered = 0;
398 char *buf = xmalloc (n_bytes);
402 /* Fill up the full buffer size from the input file. */
404 size_t to_read = n_bytes - n_buffered;
405 size_t n_read = full_read (STDIN_FILENO, buf + n_buffered, to_read);
406 if (n_read < to_read && errno)
407 error (EXIT_FAILURE, errno, "%s", infile);
409 n_buffered += n_read;
410 if (n_buffered != n_bytes)
412 if (n_buffered == 0)
413 break;
414 eof = true;
417 /* Find where to end this chunk. */
418 bp = buf + n_buffered;
419 if (n_buffered == n_bytes)
421 while (bp > buf && bp[-1] != '\n')
422 bp--;
425 /* If chunk has no newlines, use all the chunk. */
426 if (bp == buf)
427 bp = buf + n_buffered;
429 /* Output the chars as one output file. */
430 cwrite (true, buf, bp - buf);
432 /* Discard the chars we just output; move rest of chunk
433 down to be the start of the next chunk. Source and
434 destination probably overlap. */
435 n_buffered -= bp - buf;
436 if (n_buffered > 0)
437 memmove (buf, bp, n_buffered);
439 while (!eof);
440 free (buf);
443 /* -n l/[K/]N: Write lines to files of approximately file size / N.
444 The file is partitioned into file size / N sized portions, with the
445 last assigned any excess. If a line _starts_ within a partition
446 it is written completely to the corresponding file. Since lines
447 are not split even if they overlap a partition, the files written
448 can be larger or smaller than the partition size, and even empty
449 if a line is so long as to completely overlap the partition. */
451 static void
452 lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
453 off_t file_size)
455 assert (n && k <= n && n <= file_size);
457 const off_t chunk_size = file_size / n;
458 uintmax_t chunk_no = 1;
459 off_t chunk_end = chunk_size - 1;
460 off_t n_written = 0;
461 bool new_file_flag = true;
463 if (k > 1)
465 /* Start reading 1 byte before kth chunk of file. */
466 off_t start = (k - 1) * chunk_size - 1;
467 if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0)
468 error (EXIT_FAILURE, errno, "%s", infile);
469 n_written = start;
470 chunk_no = k - 1;
471 chunk_end = chunk_no * chunk_size - 1;
474 while (n_written < file_size)
476 char *bp = buf, *eob;
477 size_t n_read = full_read (STDIN_FILENO, buf, bufsize);
478 n_read = MIN (n_read, file_size - n_written);
479 if (n_read < bufsize && errno)
480 error (EXIT_FAILURE, errno, "%s", infile);
481 else if (n_read == 0)
482 break; /* eof. */
483 eob = buf + n_read;
485 while (bp != eob)
487 size_t to_write;
488 bool next = false;
490 /* Begin looking for '\n' at last byte of chunk. */
491 off_t skip = MIN (n_read, MAX (0, chunk_end - n_written));
492 char *bp_out = memchr (bp + skip, '\n', n_read - skip);
493 if (bp_out++)
494 next = true;
495 else
496 bp_out = eob;
497 to_write = bp_out - bp;
499 if (k == chunk_no)
501 /* We don't use the stdout buffer here since we're writing
502 large chunks from an existing file, so it's more efficient
503 to write out directly. */
504 if (full_write (STDOUT_FILENO, bp, to_write) != to_write)
505 error (EXIT_FAILURE, errno, "%s", _("write error"));
507 else
508 cwrite (new_file_flag, bp, to_write);
509 n_written += to_write;
510 bp += to_write;
511 n_read -= to_write;
512 new_file_flag = next;
514 /* A line could have been so long that it skipped
515 entire chunks. So create empty files in that case. */
516 while (next || chunk_end <= n_written - 1)
518 if (!next && bp == eob)
519 break; /* replenish buf, before going to next chunk. */
520 chunk_no++;
521 if (k && chunk_no > k)
522 return;
523 if (chunk_no == n)
524 chunk_end = file_size - 1; /* >= chunk_size. */
525 else
526 chunk_end += chunk_size;
527 if (chunk_end <= n_written - 1)
528 cwrite (true, NULL, 0);
529 else
530 next = false;
535 /* Ensure NUMBER files are created, which truncates
536 any existing files or notifies any consumers on fifos.
537 FIXME: Should we do this before EXIT_FAILURE? */
538 while (!k && chunk_no++ <= n)
539 cwrite (true, NULL, 0);
542 /* -n K/N: Extract Kth of N chunks. */
544 static void
545 bytes_chunk_extract (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
546 off_t file_size)
548 off_t start;
549 off_t end;
551 assert (k && n && k <= n && n <= file_size);
553 start = (k - 1) * (file_size / n);
554 end = (k == n) ? file_size : k * (file_size / n);
556 if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0)
557 error (EXIT_FAILURE, errno, "%s", infile);
559 while (start < end)
561 size_t n_read = full_read (STDIN_FILENO, buf, bufsize);
562 n_read = MIN (n_read, end - start);
563 if (n_read < bufsize && errno)
564 error (EXIT_FAILURE, errno, "%s", infile);
565 else if (n_read == 0)
566 break; /* eof. */
567 if (full_write (STDOUT_FILENO, buf, n_read) != n_read)
568 error (EXIT_FAILURE, errno, "%s", quote ("-"));
569 start += n_read;
573 typedef struct of_info
575 char *of_name;
576 int ofd;
577 FILE* ofile;
578 } of_t;
580 enum
582 OFD_NEW = -1,
583 OFD_APPEND = -2
586 /* Rotate file descriptors when we're writing to more output files than we
587 have available file descriptors.
588 Return whether we came under file resource pressure.
589 If so, it's probably best to close each file when finished with it. */
591 static bool
592 ofile_open (of_t *files, size_t i_check, size_t nfiles)
594 bool file_limit = false;
596 if (files[i_check].ofd <= OFD_NEW)
598 int fd;
599 size_t i_reopen = i_check ? i_check - 1 : nfiles - 1;
601 /* Another process could have opened a file in between the calls to
602 close and open, so we should keep trying until open succeeds or
603 we've closed all of our files. */
604 while (true)
606 if (files[i_check].ofd == OFD_NEW)
607 fd = create (files[i_check].of_name);
608 else /* OFD_APPEND */
610 /* Attempt to append to previously opened file.
611 We use O_NONBLOCK to support writing to fifos,
612 where the other end has closed because of our
613 previous close. In that case we'll immediately
614 get an error, rather than waiting indefinitely.
615 In specialised cases the consumer can keep reading
616 from the fifo, terminating on conditions in the data
617 itself, or perhaps never in the case of `tail -f`.
618 I.E. for fifos it is valid to attempt this reopen. */
619 fd = open (files[i_check].of_name,
620 O_WRONLY | O_BINARY | O_APPEND | O_NONBLOCK);
623 if (-1 < fd)
624 break;
626 if (!(errno == EMFILE || errno == ENFILE))
627 error (EXIT_FAILURE, errno, "%s", files[i_check].of_name);
629 file_limit = true;
631 /* Search backwards for an open file to close. */
632 while (files[i_reopen].ofd < 0)
634 i_reopen = i_reopen ? i_reopen - 1 : nfiles - 1;
635 /* No more open files to close, exit with E[NM]FILE. */
636 if (i_reopen == i_check)
637 error (EXIT_FAILURE, errno, "%s", files[i_check].of_name);
640 if (fclose (files[i_reopen].ofile) != 0)
641 error (EXIT_FAILURE, errno, "%s", files[i_reopen].of_name);
642 files[i_reopen].ofd = OFD_APPEND;
645 files[i_check].ofd = fd;
646 if (!(files[i_check].ofile = fdopen (fd, "a")))
647 error (EXIT_FAILURE, errno, "%s", files[i_check].of_name);
650 return file_limit;
653 /* -n r/[K/]N: Divide file into N chunks in round robin fashion.
654 When K == 0, we try to keep the files open in parallel.
655 If we run out of file resources, then we revert
656 to opening and closing each file for each line. */
658 static void
659 lines_rr (uintmax_t k, uintmax_t n, char *buf, size_t bufsize)
661 bool file_limit;
662 size_t i_file;
663 of_t *files IF_LINT (= NULL);
664 uintmax_t line_no;
666 if (k)
667 line_no = 1;
668 else
670 if (SIZE_MAX < n)
671 error (exit_failure, 0, "%s", _("memory exhausted"));
672 files = xnmalloc (n, sizeof *files);
674 /* Generate output file names. */
675 for (i_file = 0; i_file < n; i_file++)
677 next_file_name ();
678 files[i_file].of_name = xstrdup (outfile);
679 files[i_file].ofd = OFD_NEW;
680 files[i_file].ofile = NULL;
682 i_file = 0;
683 file_limit = false;
686 while (true)
688 char *bp = buf, *eob;
689 /* Use safe_read() rather than full_read() here
690 so that we process available data immediately. */
691 size_t n_read = safe_read (STDIN_FILENO, buf, bufsize);
692 if (n_read == SAFE_READ_ERROR)
693 error (EXIT_FAILURE, errno, "%s", infile);
694 else if (n_read == 0)
695 break; /* eof. */
696 eob = buf + n_read;
698 while (bp != eob)
700 size_t to_write;
701 bool next = false;
703 /* Find end of line. */
704 char *bp_out = memchr (bp, '\n', eob - bp);
705 if (bp_out)
707 bp_out++;
708 next = true;
710 else
711 bp_out = eob;
712 to_write = bp_out - bp;
714 if (k)
716 if (line_no == k && unbuffered)
718 if (full_write (STDOUT_FILENO, bp, to_write) != to_write)
719 error (EXIT_FAILURE, errno, "%s", _("write error"));
721 else if (line_no == k && fwrite (bp, to_write, 1, stdout) != 1)
723 clearerr (stdout); /* To silence close_stdout(). */
724 error (EXIT_FAILURE, errno, "%s", _("write error"));
726 if (next)
727 line_no = (line_no == n) ? 1 : line_no + 1;
729 else
731 /* Secure file descriptor. */
732 file_limit |= ofile_open (files, i_file, n);
733 if (unbuffered)
735 /* Note writing to fd, rather than flushing the FILE gives
736 an 8% performance benefit, due to reduced data copying. */
737 if (full_write (files[i_file].ofd, bp, to_write) != to_write)
738 error (EXIT_FAILURE, errno, "%s", files[i_file].of_name);
740 else if (fwrite (bp, to_write, 1, files[i_file].ofile) != 1)
741 error (EXIT_FAILURE, errno, "%s", files[i_file].of_name);
742 if (file_limit)
744 if (fclose (files[i_file].ofile) != 0)
745 error (EXIT_FAILURE, errno, "%s", files[i_file].of_name);
746 files[i_file].ofd = OFD_APPEND;
748 if (next && ++i_file == n)
749 i_file = 0;
752 bp = bp_out;
756 /* Ensure all files created, so that any existing files are truncated,
757 and to signal any waiting fifo consumers.
758 Also, close any open file descriptors.
759 FIXME: Should we do this before EXIT_FAILURE? */
760 for (i_file = 0; !k && !elide_empty_files && i_file < n; i_file++)
762 file_limit |= ofile_open (files, i_file, n);
763 if (fclose (files[i_file].ofile) != 0)
764 error (EXIT_FAILURE, errno, "%s", files[i_file].of_name);
768 #define FAIL_ONLY_ONE_WAY() \
769 do \
771 error (0, 0, _("cannot split in more than one way")); \
772 usage (EXIT_FAILURE); \
774 while (0)
776 /* Parse K/N syntax of chunk options. */
778 static void
779 parse_chunk (uintmax_t *k_units, uintmax_t *n_units, char *slash)
781 *slash = '\0';
782 if (xstrtoumax (slash+1, NULL, 10, n_units, "") != LONGINT_OK
783 || *n_units == 0)
784 error (EXIT_FAILURE, 0, _("%s: invalid number of chunks"), slash+1);
785 if (slash != optarg /* a leading number is specified. */
786 && (xstrtoumax (optarg, NULL, 10, k_units, "") != LONGINT_OK
787 || *k_units == 0 || *n_units < *k_units))
788 error (EXIT_FAILURE, 0, _("%s: invalid chunk number"), optarg);
793 main (int argc, char **argv)
795 struct stat stat_buf;
796 enum Split_type split_type = type_undef;
797 size_t in_blk_size = 0; /* optimal block size of input file device */
798 char *buf; /* file i/o buffer */
799 size_t page_size = getpagesize ();
800 uintmax_t k_units = 0;
801 uintmax_t n_units;
803 static char const multipliers[] = "bEGKkMmPTYZ0";
804 int c;
805 int digits_optind = 0;
806 off_t file_size;
808 initialize_main (&argc, &argv);
809 set_program_name (argv[0]);
810 setlocale (LC_ALL, "");
811 bindtextdomain (PACKAGE, LOCALEDIR);
812 textdomain (PACKAGE);
814 atexit (close_stdout);
816 /* Parse command line options. */
818 infile = bad_cast ("-");
819 outbase = bad_cast ("x");
821 while (true)
823 /* This is the argv-index of the option we will read next. */
824 int this_optind = optind ? optind : 1;
825 char *slash;
827 c = getopt_long (argc, argv, "0123456789C:a:b:del:n:u", longopts, NULL);
828 if (c == -1)
829 break;
831 switch (c)
833 case 'a':
835 unsigned long tmp;
836 if (xstrtoul (optarg, NULL, 10, &tmp, "") != LONGINT_OK
837 || SIZE_MAX / sizeof (size_t) < tmp)
839 error (0, 0, _("%s: invalid suffix length"), optarg);
840 usage (EXIT_FAILURE);
842 suffix_length = tmp;
844 break;
846 case 'b':
847 if (split_type != type_undef)
848 FAIL_ONLY_ONE_WAY ();
849 split_type = type_bytes;
850 if (xstrtoumax (optarg, NULL, 10, &n_units, multipliers) != LONGINT_OK
851 || n_units == 0)
853 error (0, 0, _("%s: invalid number of bytes"), optarg);
854 usage (EXIT_FAILURE);
856 /* If input is a pipe, we could get more data than is possible
857 to write to a single file, so indicate that immediately
858 rather than having possibly future invocations fail. */
859 if (OFF_T_MAX < n_units)
860 error (EXIT_FAILURE, EFBIG,
861 _("%s: invalid number of bytes"), optarg);
863 break;
865 case 'l':
866 if (split_type != type_undef)
867 FAIL_ONLY_ONE_WAY ();
868 split_type = type_lines;
869 if (xstrtoumax (optarg, NULL, 10, &n_units, "") != LONGINT_OK
870 || n_units == 0)
872 error (0, 0, _("%s: invalid number of lines"), optarg);
873 usage (EXIT_FAILURE);
875 break;
877 case 'C':
878 if (split_type != type_undef)
879 FAIL_ONLY_ONE_WAY ();
880 split_type = type_byteslines;
881 if (xstrtoumax (optarg, NULL, 10, &n_units, multipliers) != LONGINT_OK
882 || n_units == 0 || SIZE_MAX < n_units)
884 error (0, 0, _("%s: invalid number of bytes"), optarg);
885 usage (EXIT_FAILURE);
887 if (OFF_T_MAX < n_units)
888 error (EXIT_FAILURE, EFBIG,
889 _("%s: invalid number of bytes"), optarg);
890 break;
892 case 'n':
893 if (split_type != type_undef)
894 FAIL_ONLY_ONE_WAY ();
895 /* skip any whitespace */
896 while (isspace (to_uchar (*optarg)))
897 optarg++;
898 if (strncmp (optarg, "r/", 2) == 0)
900 split_type = type_rr;
901 optarg += 2;
903 else if (strncmp (optarg, "l/", 2) == 0)
905 split_type = type_chunk_lines;
906 optarg += 2;
908 else
909 split_type = type_chunk_bytes;
910 if ((slash = strchr (optarg, '/')))
911 parse_chunk (&k_units, &n_units, slash);
912 else if (xstrtoumax (optarg, NULL, 10, &n_units, "") != LONGINT_OK
913 || n_units == 0)
914 error (EXIT_FAILURE, 0, _("%s: invalid number of chunks"), optarg);
915 break;
917 case 'u':
918 unbuffered = true;
919 break;
921 case '0':
922 case '1':
923 case '2':
924 case '3':
925 case '4':
926 case '5':
927 case '6':
928 case '7':
929 case '8':
930 case '9':
931 if (split_type == type_undef)
933 split_type = type_digits;
934 n_units = 0;
936 if (split_type != type_undef && split_type != type_digits)
937 FAIL_ONLY_ONE_WAY ();
938 if (digits_optind != 0 && digits_optind != this_optind)
939 n_units = 0; /* More than one number given; ignore other. */
940 digits_optind = this_optind;
941 if (!DECIMAL_DIGIT_ACCUMULATE (n_units, c - '0', uintmax_t))
943 char buffer[INT_BUFSIZE_BOUND (uintmax_t)];
944 error (EXIT_FAILURE, 0,
945 _("line count option -%s%c... is too large"),
946 umaxtostr (n_units, buffer), c);
948 break;
950 case 'd':
951 suffix_alphabet = "0123456789";
952 break;
954 case 'e':
955 elide_empty_files = true;
956 break;
958 case IO_BLKSIZE_OPTION:
960 uintmax_t tmp_blk_size;
961 if (xstrtoumax (optarg, NULL, 10, &tmp_blk_size,
962 multipliers) != LONGINT_OK
963 || tmp_blk_size == 0 || SIZE_MAX - page_size < tmp_blk_size)
964 error (0, 0, _("%s: invalid IO block size"), optarg);
965 else
966 in_blk_size = tmp_blk_size;
968 break;
970 case VERBOSE_OPTION:
971 verbose = true;
972 break;
974 case_GETOPT_HELP_CHAR;
976 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
978 default:
979 usage (EXIT_FAILURE);
983 /* Handle default case. */
984 if (split_type == type_undef)
986 split_type = type_lines;
987 n_units = 1000;
990 if (n_units == 0)
992 error (0, 0, _("%s: invalid number of lines"), "0");
993 usage (EXIT_FAILURE);
996 set_suffix_length (n_units, split_type);
998 /* Get out the filename arguments. */
1000 if (optind < argc)
1001 infile = argv[optind++];
1003 if (optind < argc)
1004 outbase = argv[optind++];
1006 if (optind < argc)
1008 error (0, 0, _("extra operand %s"), quote (argv[optind]));
1009 usage (EXIT_FAILURE);
1012 /* Open the input file. */
1013 if (! STREQ (infile, "-")
1014 && fd_reopen (STDIN_FILENO, infile, O_RDONLY, 0) < 0)
1015 error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
1016 quote (infile));
1018 /* Binary I/O is safer when byte counts are used. */
1019 if (O_BINARY && ! isatty (STDIN_FILENO))
1020 xfreopen (NULL, "rb", stdin);
1022 /* Get the optimal block size of input device and make a buffer. */
1024 if (fstat (STDIN_FILENO, &stat_buf) != 0)
1025 error (EXIT_FAILURE, errno, "%s", infile);
1026 if (in_blk_size == 0)
1027 in_blk_size = io_blksize (stat_buf);
1028 file_size = stat_buf.st_size;
1030 if (split_type == type_chunk_bytes || split_type == type_chunk_lines)
1032 off_t input_offset = lseek (STDIN_FILENO, 0, SEEK_CUR);
1033 if (input_offset < 0)
1034 error (EXIT_FAILURE, 0, _("%s: cannot determine file size"),
1035 quote (infile));
1036 file_size -= input_offset;
1037 /* Overflow, and sanity checking. */
1038 if (OFF_T_MAX < n_units)
1040 char buffer[INT_BUFSIZE_BOUND (uintmax_t)];
1041 error (EXIT_FAILURE, EFBIG, _("%s: invalid number of chunks"),
1042 umaxtostr (n_units, buffer));
1044 /* increase file_size to n_units here, so that we still process
1045 any input data, and create empty files for the rest. */
1046 file_size = MAX (file_size, n_units);
1049 buf = ptr_align (xmalloc (in_blk_size + 1 + page_size - 1), page_size);
1051 switch (split_type)
1053 case type_digits:
1054 case type_lines:
1055 lines_split (n_units, buf, in_blk_size);
1056 break;
1058 case type_bytes:
1059 bytes_split (n_units, buf, in_blk_size, 0);
1060 break;
1062 case type_byteslines:
1063 line_bytes_split (n_units);
1064 break;
1066 case type_chunk_bytes:
1067 if (k_units == 0)
1068 bytes_split (file_size / n_units, buf, in_blk_size, n_units);
1069 else
1070 bytes_chunk_extract (k_units, n_units, buf, in_blk_size, file_size);
1071 break;
1073 case type_chunk_lines:
1074 lines_chunk_split (k_units, n_units, buf, in_blk_size, file_size);
1075 break;
1077 case type_rr:
1078 /* Note, this is like `sed -n ${k}~${n}p` when k > 0,
1079 but the functionality is provided for symmetry. */
1080 lines_rr (k_units, n_units, buf, in_blk_size);
1081 break;
1083 default:
1084 abort ();
1087 if (close (STDIN_FILENO) != 0)
1088 error (EXIT_FAILURE, errno, "%s", infile);
1089 if (output_desc >= 0 && close (output_desc) < 0)
1090 error (EXIT_FAILURE, errno, "%s", outfile);
1092 exit (EXIT_SUCCESS);