1 /* split.c -- split a file into pieces.
2 Copyright (C) 1988, 1991, 1995-2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* By tege@sics.se, with rms.
20 * Implement -t CHAR or -t REGEX to specify break characters other
28 #include <sys/types.h>
32 #include "fd-reopen.h"
34 #include "full-read.h"
35 #include "full-write.h"
37 #include "safe-read.h"
41 /* The official name of this program (e.g., no `g' prefix). */
42 #define PROGRAM_NAME "split"
45 proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
46 proper_name ("Richard M. Stallman")
48 /* Base name of output files. */
49 static char const *outbase
;
51 /* Name of output files. */
54 /* Pointer to the end of the prefix in OUTFILE.
55 Suffixes are inserted here. */
56 static char *outfile_mid
;
58 /* Length of OUTFILE's suffix. */
59 static size_t suffix_length
;
61 /* Alphabet of characters to use in suffix. */
62 static char const *suffix_alphabet
= "abcdefghijklmnopqrstuvwxyz";
64 /* Name of input file. May be "-". */
67 /* Descriptor on which output file is open. */
68 static int output_desc
= -1;
70 /* If true, print a diagnostic on standard error just before each
71 output file is opened. */
74 /* If true, don't generate zero length output files. */
75 static bool elide_empty_files
;
77 /* If true, in round robin mode, immediately copy
78 input to output, which is much slower, so disabled by default. */
79 static bool unbuffered
;
81 /* The split mode to use. */
84 type_undef
, type_bytes
, type_byteslines
, type_lines
, type_digits
,
85 type_chunk_bytes
, type_chunk_lines
, type_rr
88 /* For long options that have no equivalent short option, use a
89 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
92 VERBOSE_OPTION
= CHAR_MAX
+ 1,
96 static struct option
const longopts
[] =
98 {"bytes", required_argument
, NULL
, 'b'},
99 {"lines", required_argument
, NULL
, 'l'},
100 {"line-bytes", required_argument
, NULL
, 'C'},
101 {"number", required_argument
, NULL
, 'n'},
102 {"elide-empty-files", no_argument
, NULL
, 'e'},
103 {"unbuffered", no_argument
, NULL
, 'u'},
104 {"suffix-length", required_argument
, NULL
, 'a'},
105 {"numeric-suffixes", no_argument
, NULL
, 'd'},
106 {"verbose", no_argument
, NULL
, VERBOSE_OPTION
},
107 {"-io-blksize", required_argument
, NULL
,
108 IO_BLKSIZE_OPTION
}, /* do not document */
109 {GETOPT_HELP_OPTION_DECL
},
110 {GETOPT_VERSION_OPTION_DECL
},
115 set_suffix_length (uintmax_t n_units
, enum Split_type split_type
)
117 #define DEFAULT_SUFFIX_LENGTH 2
119 size_t suffix_needed
= 0;
121 /* Auto-calculate the suffix length if the number of files is given. */
122 if (split_type
== type_chunk_bytes
|| split_type
== type_chunk_lines
123 || split_type
== type_rr
)
125 size_t alphabet_len
= strlen (suffix_alphabet
);
126 bool alphabet_slop
= (n_units
% alphabet_len
) != 0;
127 while (n_units
/= alphabet_len
)
129 suffix_needed
+= alphabet_slop
;
132 if (suffix_length
) /* set by user */
134 if (suffix_length
< suffix_needed
)
136 error (EXIT_FAILURE
, 0,
137 _("the suffix length needs to be at least %zu"),
143 suffix_length
= MAX (DEFAULT_SUFFIX_LENGTH
, suffix_needed
);
149 if (status
!= EXIT_SUCCESS
)
150 fprintf (stderr
, _("Try `%s --help' for more information.\n"),
155 Usage: %s [OPTION]... [INPUT [PREFIX]]\n\
159 Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default\n\
160 size is 1000 lines, and default PREFIX is `x'. With no INPUT, or when INPUT\n\
161 is -, read standard input.\n\
165 Mandatory arguments to long options are mandatory for short options too.\n\
167 fprintf (stdout
, _("\
168 -a, --suffix-length=N use suffixes of length N (default %d)\n\
169 -b, --bytes=SIZE put SIZE bytes per output file\n\
170 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
171 -d, --numeric-suffixes use numeric suffixes instead of alphabetic\n\
172 -e, --elide-empty-files do not generate empty output files with `-n'\n\
173 -l, --lines=NUMBER put NUMBER lines per output file\n\
174 -n, --number=CHUNKS generate CHUNKS output files. See below\n\
175 -u, --unbuffered immediately copy input to output with `-n r/...'\n\
176 "), DEFAULT_SUFFIX_LENGTH
);
178 --verbose print a diagnostic just before each\n\
179 output file is opened\n\
181 fputs (HELP_OPTION_DESCRIPTION
, stdout
);
182 fputs (VERSION_OPTION_DESCRIPTION
, stdout
);
186 N split into N files based on size of input\n\
187 K/N output Kth of N to stdout\n\
188 l/N split into N files without splitting lines\n\
189 l/K/N output Kth of N to stdout without splitting lines\n\
190 r/N like `l' but use round robin distribution\n\
191 r/K/N likewise but only output Kth of N to stdout\n\
193 emit_ancillary_info ();
198 /* Compute the next sequential output file name and store it into the
202 next_file_name (void)
204 /* Index in suffix_alphabet of each character in the suffix. */
205 static size_t *sufindex
;
209 /* Allocate and initialize the first file name. */
211 size_t outbase_length
= strlen (outbase
);
212 size_t outfile_length
= outbase_length
+ suffix_length
;
213 if (outfile_length
+ 1 < outbase_length
)
215 outfile
= xmalloc (outfile_length
+ 1);
216 outfile_mid
= outfile
+ outbase_length
;
217 memcpy (outfile
, outbase
, outbase_length
);
218 memset (outfile_mid
, suffix_alphabet
[0], suffix_length
);
219 outfile
[outfile_length
] = 0;
220 sufindex
= xcalloc (suffix_length
, sizeof *sufindex
);
222 #if ! _POSIX_NO_TRUNC && HAVE_PATHCONF && defined _PC_NAME_MAX
223 /* POSIX requires that if the output file name is too long for
224 its directory, `split' must fail without creating any files.
225 This must be checked for explicitly on operating systems that
226 silently truncate file names. */
228 char *dir
= dir_name (outfile
);
229 long name_max
= pathconf (dir
, _PC_NAME_MAX
);
230 if (0 <= name_max
&& name_max
< base_len (last_component (outfile
)))
231 error (EXIT_FAILURE
, ENAMETOOLONG
, "%s", outfile
);
238 /* Increment the suffix in place, if possible. */
240 size_t i
= suffix_length
;
244 outfile_mid
[i
] = suffix_alphabet
[sufindex
[i
]];
248 outfile_mid
[i
] = suffix_alphabet
[sufindex
[i
]];
250 error (EXIT_FAILURE
, 0, _("output file suffixes exhausted"));
254 /* Create or truncate a file. */
257 create (const char* name
)
260 fprintf (stdout
, _("creating file %s\n"), quote (name
));
261 return open (name
, O_WRONLY
| O_CREAT
| O_TRUNC
| O_BINARY
,
262 (S_IRUSR
| S_IWUSR
| S_IRGRP
| S_IWGRP
| S_IROTH
| S_IWOTH
));
265 /* Write BYTES bytes at BP to an output file.
266 If NEW_FILE_FLAG is true, open the next output file.
267 Otherwise add to the same output file already in use. */
270 cwrite (bool new_file_flag
, const char *bp
, size_t bytes
)
274 if (!bp
&& bytes
== 0 && elide_empty_files
)
276 if (output_desc
>= 0 && close (output_desc
) < 0)
277 error (EXIT_FAILURE
, errno
, "%s", outfile
);
279 if ((output_desc
= create (outfile
)) < 0)
280 error (EXIT_FAILURE
, errno
, "%s", outfile
);
282 if (full_write (output_desc
, bp
, bytes
) != bytes
)
283 error (EXIT_FAILURE
, errno
, "%s", outfile
);
286 /* Split into pieces of exactly N_BYTES bytes.
287 Use buffer BUF, whose size is BUFSIZE. */
290 bytes_split (uintmax_t n_bytes
, char *buf
, size_t bufsize
, uintmax_t max_files
)
293 bool new_file_flag
= true;
295 uintmax_t to_write
= n_bytes
;
297 uintmax_t opened
= 0;
301 n_read
= full_read (STDIN_FILENO
, buf
, bufsize
);
302 if (n_read
< bufsize
&& errno
)
303 error (EXIT_FAILURE
, errno
, "%s", infile
);
308 if (to_read
< to_write
)
310 if (to_read
) /* do not write 0 bytes! */
312 cwrite (new_file_flag
, bp_out
, to_read
);
313 opened
+= new_file_flag
;
315 new_file_flag
= false;
322 cwrite (new_file_flag
, bp_out
, w
);
323 opened
+= new_file_flag
;
324 new_file_flag
= !max_files
|| (opened
< max_files
);
331 while (n_read
== bufsize
);
333 /* Ensure NUMBER files are created, which truncates
334 any existing files or notifies any consumers on fifos.
335 FIXME: Should we do this before EXIT_FAILURE? */
336 while (opened
++ < max_files
)
337 cwrite (true, NULL
, 0);
340 /* Split into pieces of exactly N_LINES lines.
341 Use buffer BUF, whose size is BUFSIZE. */
344 lines_split (uintmax_t n_lines
, char *buf
, size_t bufsize
)
347 char *bp
, *bp_out
, *eob
;
348 bool new_file_flag
= true;
353 n_read
= full_read (STDIN_FILENO
, buf
, bufsize
);
354 if (n_read
< bufsize
&& errno
)
355 error (EXIT_FAILURE
, errno
, "%s", infile
);
361 bp
= memchr (bp
, '\n', eob
- bp
+ 1);
364 if (eob
!= bp_out
) /* do not write 0 bytes! */
366 size_t len
= eob
- bp_out
;
367 cwrite (new_file_flag
, bp_out
, len
);
368 new_file_flag
= false;
376 cwrite (new_file_flag
, bp_out
, bp
- bp_out
);
378 new_file_flag
= true;
383 while (n_read
== bufsize
);
386 /* Split into pieces that are as large as possible while still not more
387 than N_BYTES bytes, and are split on line boundaries except
388 where lines longer than N_BYTES bytes occur.
389 FIXME: Allow N_BYTES to be any uintmax_t value, and don't require a
390 buffer of size N_BYTES, in case N_BYTES is very large. */
393 line_bytes_split (size_t n_bytes
)
397 size_t n_buffered
= 0;
398 char *buf
= xmalloc (n_bytes
);
402 /* Fill up the full buffer size from the input file. */
404 size_t to_read
= n_bytes
- n_buffered
;
405 size_t n_read
= full_read (STDIN_FILENO
, buf
+ n_buffered
, to_read
);
406 if (n_read
< to_read
&& errno
)
407 error (EXIT_FAILURE
, errno
, "%s", infile
);
409 n_buffered
+= n_read
;
410 if (n_buffered
!= n_bytes
)
417 /* Find where to end this chunk. */
418 bp
= buf
+ n_buffered
;
419 if (n_buffered
== n_bytes
)
421 while (bp
> buf
&& bp
[-1] != '\n')
425 /* If chunk has no newlines, use all the chunk. */
427 bp
= buf
+ n_buffered
;
429 /* Output the chars as one output file. */
430 cwrite (true, buf
, bp
- buf
);
432 /* Discard the chars we just output; move rest of chunk
433 down to be the start of the next chunk. Source and
434 destination probably overlap. */
435 n_buffered
-= bp
- buf
;
437 memmove (buf
, bp
, n_buffered
);
443 /* -n l/[K/]N: Write lines to files of approximately file size / N.
444 The file is partitioned into file size / N sized portions, with the
445 last assigned any excess. If a line _starts_ within a partition
446 it is written completely to the corresponding file. Since lines
447 are not split even if they overlap a partition, the files written
448 can be larger or smaller than the partition size, and even empty
449 if a line is so long as to completely overlap the partition. */
452 lines_chunk_split (uintmax_t k
, uintmax_t n
, char *buf
, size_t bufsize
,
455 assert (n
&& k
<= n
&& n
<= file_size
);
457 const off_t chunk_size
= file_size
/ n
;
458 uintmax_t chunk_no
= 1;
459 off_t chunk_end
= chunk_size
- 1;
461 bool new_file_flag
= true;
465 /* Start reading 1 byte before kth chunk of file. */
466 off_t start
= (k
- 1) * chunk_size
- 1;
467 if (lseek (STDIN_FILENO
, start
, SEEK_CUR
) < 0)
468 error (EXIT_FAILURE
, errno
, "%s", infile
);
471 chunk_end
= chunk_no
* chunk_size
- 1;
474 while (n_written
< file_size
)
476 char *bp
= buf
, *eob
;
477 size_t n_read
= full_read (STDIN_FILENO
, buf
, bufsize
);
478 n_read
= MIN (n_read
, file_size
- n_written
);
479 if (n_read
< bufsize
&& errno
)
480 error (EXIT_FAILURE
, errno
, "%s", infile
);
481 else if (n_read
== 0)
490 /* Begin looking for '\n' at last byte of chunk. */
491 off_t skip
= MIN (n_read
, MAX (0, chunk_end
- n_written
));
492 char *bp_out
= memchr (bp
+ skip
, '\n', n_read
- skip
);
497 to_write
= bp_out
- bp
;
501 /* We don't use the stdout buffer here since we're writing
502 large chunks from an existing file, so it's more efficient
503 to write out directly. */
504 if (full_write (STDOUT_FILENO
, bp
, to_write
) != to_write
)
505 error (EXIT_FAILURE
, errno
, "%s", _("write error"));
508 cwrite (new_file_flag
, bp
, to_write
);
509 n_written
+= to_write
;
512 new_file_flag
= next
;
514 /* A line could have been so long that it skipped
515 entire chunks. So create empty files in that case. */
516 while (next
|| chunk_end
<= n_written
- 1)
518 if (!next
&& bp
== eob
)
519 break; /* replenish buf, before going to next chunk. */
521 if (k
&& chunk_no
> k
)
524 chunk_end
= file_size
- 1; /* >= chunk_size. */
526 chunk_end
+= chunk_size
;
527 if (chunk_end
<= n_written
- 1)
528 cwrite (true, NULL
, 0);
535 /* Ensure NUMBER files are created, which truncates
536 any existing files or notifies any consumers on fifos.
537 FIXME: Should we do this before EXIT_FAILURE? */
538 while (!k
&& chunk_no
++ <= n
)
539 cwrite (true, NULL
, 0);
542 /* -n K/N: Extract Kth of N chunks. */
545 bytes_chunk_extract (uintmax_t k
, uintmax_t n
, char *buf
, size_t bufsize
,
551 assert (k
&& n
&& k
<= n
&& n
<= file_size
);
553 start
= (k
- 1) * (file_size
/ n
);
554 end
= (k
== n
) ? file_size
: k
* (file_size
/ n
);
556 if (lseek (STDIN_FILENO
, start
, SEEK_CUR
) < 0)
557 error (EXIT_FAILURE
, errno
, "%s", infile
);
561 size_t n_read
= full_read (STDIN_FILENO
, buf
, bufsize
);
562 n_read
= MIN (n_read
, end
- start
);
563 if (n_read
< bufsize
&& errno
)
564 error (EXIT_FAILURE
, errno
, "%s", infile
);
565 else if (n_read
== 0)
567 if (full_write (STDOUT_FILENO
, buf
, n_read
) != n_read
)
568 error (EXIT_FAILURE
, errno
, "%s", quote ("-"));
573 typedef struct of_info
586 /* Rotate file descriptors when we're writing to more output files than we
587 have available file descriptors.
588 Return whether we came under file resource pressure.
589 If so, it's probably best to close each file when finished with it. */
592 ofile_open (of_t
*files
, size_t i_check
, size_t nfiles
)
594 bool file_limit
= false;
596 if (files
[i_check
].ofd
<= OFD_NEW
)
599 size_t i_reopen
= i_check
? i_check
- 1 : nfiles
- 1;
601 /* Another process could have opened a file in between the calls to
602 close and open, so we should keep trying until open succeeds or
603 we've closed all of our files. */
606 if (files
[i_check
].ofd
== OFD_NEW
)
607 fd
= create (files
[i_check
].of_name
);
608 else /* OFD_APPEND */
610 /* Attempt to append to previously opened file.
611 We use O_NONBLOCK to support writing to fifos,
612 where the other end has closed because of our
613 previous close. In that case we'll immediately
614 get an error, rather than waiting indefinitely.
615 In specialised cases the consumer can keep reading
616 from the fifo, terminating on conditions in the data
617 itself, or perhaps never in the case of `tail -f`.
618 I.E. for fifos it is valid to attempt this reopen. */
619 fd
= open (files
[i_check
].of_name
,
620 O_WRONLY
| O_BINARY
| O_APPEND
| O_NONBLOCK
);
626 if (!(errno
== EMFILE
|| errno
== ENFILE
))
627 error (EXIT_FAILURE
, errno
, "%s", files
[i_check
].of_name
);
631 /* Search backwards for an open file to close. */
632 while (files
[i_reopen
].ofd
< 0)
634 i_reopen
= i_reopen
? i_reopen
- 1 : nfiles
- 1;
635 /* No more open files to close, exit with E[NM]FILE. */
636 if (i_reopen
== i_check
)
637 error (EXIT_FAILURE
, errno
, "%s", files
[i_check
].of_name
);
640 if (fclose (files
[i_reopen
].ofile
) != 0)
641 error (EXIT_FAILURE
, errno
, "%s", files
[i_reopen
].of_name
);
642 files
[i_reopen
].ofd
= OFD_APPEND
;
645 files
[i_check
].ofd
= fd
;
646 if (!(files
[i_check
].ofile
= fdopen (fd
, "a")))
647 error (EXIT_FAILURE
, errno
, "%s", files
[i_check
].of_name
);
653 /* -n r/[K/]N: Divide file into N chunks in round robin fashion.
654 When K == 0, we try to keep the files open in parallel.
655 If we run out of file resources, then we revert
656 to opening and closing each file for each line. */
659 lines_rr (uintmax_t k
, uintmax_t n
, char *buf
, size_t bufsize
)
663 of_t
*files
IF_LINT (= NULL
);
671 error (exit_failure
, 0, "%s", _("memory exhausted"));
672 files
= xnmalloc (n
, sizeof *files
);
674 /* Generate output file names. */
675 for (i_file
= 0; i_file
< n
; i_file
++)
678 files
[i_file
].of_name
= xstrdup (outfile
);
679 files
[i_file
].ofd
= OFD_NEW
;
680 files
[i_file
].ofile
= NULL
;
688 char *bp
= buf
, *eob
;
689 /* Use safe_read() rather than full_read() here
690 so that we process available data immediately. */
691 size_t n_read
= safe_read (STDIN_FILENO
, buf
, bufsize
);
692 if (n_read
== SAFE_READ_ERROR
)
693 error (EXIT_FAILURE
, errno
, "%s", infile
);
694 else if (n_read
== 0)
703 /* Find end of line. */
704 char *bp_out
= memchr (bp
, '\n', eob
- bp
);
712 to_write
= bp_out
- bp
;
716 if (line_no
== k
&& unbuffered
)
718 if (full_write (STDOUT_FILENO
, bp
, to_write
) != to_write
)
719 error (EXIT_FAILURE
, errno
, "%s", _("write error"));
721 else if (line_no
== k
&& fwrite (bp
, to_write
, 1, stdout
) != 1)
723 clearerr (stdout
); /* To silence close_stdout(). */
724 error (EXIT_FAILURE
, errno
, "%s", _("write error"));
727 line_no
= (line_no
== n
) ? 1 : line_no
+ 1;
731 /* Secure file descriptor. */
732 file_limit
|= ofile_open (files
, i_file
, n
);
735 /* Note writing to fd, rather than flushing the FILE gives
736 an 8% performance benefit, due to reduced data copying. */
737 if (full_write (files
[i_file
].ofd
, bp
, to_write
) != to_write
)
738 error (EXIT_FAILURE
, errno
, "%s", files
[i_file
].of_name
);
740 else if (fwrite (bp
, to_write
, 1, files
[i_file
].ofile
) != 1)
741 error (EXIT_FAILURE
, errno
, "%s", files
[i_file
].of_name
);
744 if (fclose (files
[i_file
].ofile
) != 0)
745 error (EXIT_FAILURE
, errno
, "%s", files
[i_file
].of_name
);
746 files
[i_file
].ofd
= OFD_APPEND
;
748 if (next
&& ++i_file
== n
)
756 /* Ensure all files created, so that any existing files are truncated,
757 and to signal any waiting fifo consumers.
758 Also, close any open file descriptors.
759 FIXME: Should we do this before EXIT_FAILURE? */
760 for (i_file
= 0; !k
&& !elide_empty_files
&& i_file
< n
; i_file
++)
762 file_limit
|= ofile_open (files
, i_file
, n
);
763 if (fclose (files
[i_file
].ofile
) != 0)
764 error (EXIT_FAILURE
, errno
, "%s", files
[i_file
].of_name
);
768 #define FAIL_ONLY_ONE_WAY() \
771 error (0, 0, _("cannot split in more than one way")); \
772 usage (EXIT_FAILURE); \
776 /* Parse K/N syntax of chunk options. */
779 parse_chunk (uintmax_t *k_units
, uintmax_t *n_units
, char *slash
)
782 if (xstrtoumax (slash
+1, NULL
, 10, n_units
, "") != LONGINT_OK
784 error (EXIT_FAILURE
, 0, _("%s: invalid number of chunks"), slash
+1);
785 if (slash
!= optarg
/* a leading number is specified. */
786 && (xstrtoumax (optarg
, NULL
, 10, k_units
, "") != LONGINT_OK
787 || *k_units
== 0 || *n_units
< *k_units
))
788 error (EXIT_FAILURE
, 0, _("%s: invalid chunk number"), optarg
);
793 main (int argc
, char **argv
)
795 struct stat stat_buf
;
796 enum Split_type split_type
= type_undef
;
797 size_t in_blk_size
= 0; /* optimal block size of input file device */
798 char *buf
; /* file i/o buffer */
799 size_t page_size
= getpagesize ();
800 uintmax_t k_units
= 0;
803 static char const multipliers
[] = "bEGKkMmPTYZ0";
805 int digits_optind
= 0;
808 initialize_main (&argc
, &argv
);
809 set_program_name (argv
[0]);
810 setlocale (LC_ALL
, "");
811 bindtextdomain (PACKAGE
, LOCALEDIR
);
812 textdomain (PACKAGE
);
814 atexit (close_stdout
);
816 /* Parse command line options. */
818 infile
= bad_cast ("-");
819 outbase
= bad_cast ("x");
823 /* This is the argv-index of the option we will read next. */
824 int this_optind
= optind
? optind
: 1;
827 c
= getopt_long (argc
, argv
, "0123456789C:a:b:del:n:u", longopts
, NULL
);
836 if (xstrtoul (optarg
, NULL
, 10, &tmp
, "") != LONGINT_OK
837 || SIZE_MAX
/ sizeof (size_t) < tmp
)
839 error (0, 0, _("%s: invalid suffix length"), optarg
);
840 usage (EXIT_FAILURE
);
847 if (split_type
!= type_undef
)
848 FAIL_ONLY_ONE_WAY ();
849 split_type
= type_bytes
;
850 if (xstrtoumax (optarg
, NULL
, 10, &n_units
, multipliers
) != LONGINT_OK
853 error (0, 0, _("%s: invalid number of bytes"), optarg
);
854 usage (EXIT_FAILURE
);
856 /* If input is a pipe, we could get more data than is possible
857 to write to a single file, so indicate that immediately
858 rather than having possibly future invocations fail. */
859 if (OFF_T_MAX
< n_units
)
860 error (EXIT_FAILURE
, EFBIG
,
861 _("%s: invalid number of bytes"), optarg
);
866 if (split_type
!= type_undef
)
867 FAIL_ONLY_ONE_WAY ();
868 split_type
= type_lines
;
869 if (xstrtoumax (optarg
, NULL
, 10, &n_units
, "") != LONGINT_OK
872 error (0, 0, _("%s: invalid number of lines"), optarg
);
873 usage (EXIT_FAILURE
);
878 if (split_type
!= type_undef
)
879 FAIL_ONLY_ONE_WAY ();
880 split_type
= type_byteslines
;
881 if (xstrtoumax (optarg
, NULL
, 10, &n_units
, multipliers
) != LONGINT_OK
882 || n_units
== 0 || SIZE_MAX
< n_units
)
884 error (0, 0, _("%s: invalid number of bytes"), optarg
);
885 usage (EXIT_FAILURE
);
887 if (OFF_T_MAX
< n_units
)
888 error (EXIT_FAILURE
, EFBIG
,
889 _("%s: invalid number of bytes"), optarg
);
893 if (split_type
!= type_undef
)
894 FAIL_ONLY_ONE_WAY ();
895 /* skip any whitespace */
896 while (isspace (to_uchar (*optarg
)))
898 if (strncmp (optarg
, "r/", 2) == 0)
900 split_type
= type_rr
;
903 else if (strncmp (optarg
, "l/", 2) == 0)
905 split_type
= type_chunk_lines
;
909 split_type
= type_chunk_bytes
;
910 if ((slash
= strchr (optarg
, '/')))
911 parse_chunk (&k_units
, &n_units
, slash
);
912 else if (xstrtoumax (optarg
, NULL
, 10, &n_units
, "") != LONGINT_OK
914 error (EXIT_FAILURE
, 0, _("%s: invalid number of chunks"), optarg
);
931 if (split_type
== type_undef
)
933 split_type
= type_digits
;
936 if (split_type
!= type_undef
&& split_type
!= type_digits
)
937 FAIL_ONLY_ONE_WAY ();
938 if (digits_optind
!= 0 && digits_optind
!= this_optind
)
939 n_units
= 0; /* More than one number given; ignore other. */
940 digits_optind
= this_optind
;
941 if (!DECIMAL_DIGIT_ACCUMULATE (n_units
, c
- '0', uintmax_t))
943 char buffer
[INT_BUFSIZE_BOUND (uintmax_t)];
944 error (EXIT_FAILURE
, 0,
945 _("line count option -%s%c... is too large"),
946 umaxtostr (n_units
, buffer
), c
);
951 suffix_alphabet
= "0123456789";
955 elide_empty_files
= true;
958 case IO_BLKSIZE_OPTION
:
960 uintmax_t tmp_blk_size
;
961 if (xstrtoumax (optarg
, NULL
, 10, &tmp_blk_size
,
962 multipliers
) != LONGINT_OK
963 || tmp_blk_size
== 0 || SIZE_MAX
- page_size
< tmp_blk_size
)
964 error (0, 0, _("%s: invalid IO block size"), optarg
);
966 in_blk_size
= tmp_blk_size
;
974 case_GETOPT_HELP_CHAR
;
976 case_GETOPT_VERSION_CHAR (PROGRAM_NAME
, AUTHORS
);
979 usage (EXIT_FAILURE
);
983 /* Handle default case. */
984 if (split_type
== type_undef
)
986 split_type
= type_lines
;
992 error (0, 0, _("%s: invalid number of lines"), "0");
993 usage (EXIT_FAILURE
);
996 set_suffix_length (n_units
, split_type
);
998 /* Get out the filename arguments. */
1001 infile
= argv
[optind
++];
1004 outbase
= argv
[optind
++];
1008 error (0, 0, _("extra operand %s"), quote (argv
[optind
]));
1009 usage (EXIT_FAILURE
);
1012 /* Open the input file. */
1013 if (! STREQ (infile
, "-")
1014 && fd_reopen (STDIN_FILENO
, infile
, O_RDONLY
, 0) < 0)
1015 error (EXIT_FAILURE
, errno
, _("cannot open %s for reading"),
1018 /* Binary I/O is safer when byte counts are used. */
1019 if (O_BINARY
&& ! isatty (STDIN_FILENO
))
1020 xfreopen (NULL
, "rb", stdin
);
1022 /* Get the optimal block size of input device and make a buffer. */
1024 if (fstat (STDIN_FILENO
, &stat_buf
) != 0)
1025 error (EXIT_FAILURE
, errno
, "%s", infile
);
1026 if (in_blk_size
== 0)
1027 in_blk_size
= io_blksize (stat_buf
);
1028 file_size
= stat_buf
.st_size
;
1030 if (split_type
== type_chunk_bytes
|| split_type
== type_chunk_lines
)
1032 off_t input_offset
= lseek (STDIN_FILENO
, 0, SEEK_CUR
);
1033 if (input_offset
< 0)
1034 error (EXIT_FAILURE
, 0, _("%s: cannot determine file size"),
1036 file_size
-= input_offset
;
1037 /* Overflow, and sanity checking. */
1038 if (OFF_T_MAX
< n_units
)
1040 char buffer
[INT_BUFSIZE_BOUND (uintmax_t)];
1041 error (EXIT_FAILURE
, EFBIG
, _("%s: invalid number of chunks"),
1042 umaxtostr (n_units
, buffer
));
1044 /* increase file_size to n_units here, so that we still process
1045 any input data, and create empty files for the rest. */
1046 file_size
= MAX (file_size
, n_units
);
1049 buf
= ptr_align (xmalloc (in_blk_size
+ 1 + page_size
- 1), page_size
);
1055 lines_split (n_units
, buf
, in_blk_size
);
1059 bytes_split (n_units
, buf
, in_blk_size
, 0);
1062 case type_byteslines
:
1063 line_bytes_split (n_units
);
1066 case type_chunk_bytes
:
1068 bytes_split (file_size
/ n_units
, buf
, in_blk_size
, n_units
);
1070 bytes_chunk_extract (k_units
, n_units
, buf
, in_blk_size
, file_size
);
1073 case type_chunk_lines
:
1074 lines_chunk_split (k_units
, n_units
, buf
, in_blk_size
, file_size
);
1078 /* Note, this is like `sed -n ${k}~${n}p` when k > 0,
1079 but the functionality is provided for symmetry. */
1080 lines_rr (k_units
, n_units
, buf
, in_blk_size
);
1087 if (close (STDIN_FILENO
) != 0)
1088 error (EXIT_FAILURE
, errno
, "%s", infile
);
1089 if (output_desc
>= 0 && close (output_desc
) < 0)
1090 error (EXIT_FAILURE
, errno
, "%s", outfile
);
1092 exit (EXIT_SUCCESS
);