1 /* split.c -- split a file into pieces.
2 Copyright (C) 1988, 1991, 1995-2010 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* By tege@sics.se, with rms.
20 * Implement -t CHAR or -t REGEX to specify break characters other
28 #include <sys/types.h>
32 #include "fd-reopen.h"
34 #include "full-read.h"
35 #include "full-write.h"
37 #include "safe-read.h"
41 /* The official name of this program (e.g., no `g' prefix). */
42 #define PROGRAM_NAME "split"
45 proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
46 proper_name ("Richard M. Stallman")
48 /* Base name of output files. */
49 static char const *outbase
;
51 /* Name of output files. */
54 /* Pointer to the end of the prefix in OUTFILE.
55 Suffixes are inserted here. */
56 static char *outfile_mid
;
58 /* Length of OUTFILE's suffix. */
59 static size_t suffix_length
;
61 /* Alphabet of characters to use in suffix. */
62 static char const *suffix_alphabet
= "abcdefghijklmnopqrstuvwxyz";
64 /* Name of input file. May be "-". */
67 /* Descriptor on which output file is open. */
68 static int output_desc
= -1;
70 /* If true, print a diagnostic on standard error just before each
71 output file is opened. */
74 /* If true, don't generate zero length output files. */
75 static bool elide_empty_files
;
77 /* If true, in round robin mode, immediately copy
78 input to output, which is much slower, so disabled by default. */
79 static bool unbuffered
;
81 /* For long options that have no equivalent short option, use a
82 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
85 VERBOSE_OPTION
= CHAR_MAX
+ 1,
89 static struct option
const longopts
[] =
91 {"bytes", required_argument
, NULL
, 'b'},
92 {"lines", required_argument
, NULL
, 'l'},
93 {"line-bytes", required_argument
, NULL
, 'C'},
94 {"number", required_argument
, NULL
, 'n'},
95 {"elide-empty-files", no_argument
, NULL
, 'e'},
96 {"unbuffered", no_argument
, NULL
, 'u'},
97 {"suffix-length", required_argument
, NULL
, 'a'},
98 {"numeric-suffixes", no_argument
, NULL
, 'd'},
99 {"verbose", no_argument
, NULL
, VERBOSE_OPTION
},
100 {"-io-blksize", required_argument
, NULL
,
101 IO_BLKSIZE_OPTION
}, /* do not document */
102 {GETOPT_HELP_OPTION_DECL
},
103 {GETOPT_VERSION_OPTION_DECL
},
108 set_suffix_length (uintmax_t n_units
)
110 #define DEFAULT_SUFFIX_LENGTH 2
112 size_t suffix_needed
= 0;
113 size_t alphabet_len
= strlen (suffix_alphabet
);
114 bool alphabet_slop
= (n_units
% alphabet_len
) != 0;
115 while (n_units
/= alphabet_len
)
117 suffix_needed
+= alphabet_slop
;
119 if (suffix_length
) /* set by user */
121 if (suffix_length
< suffix_needed
)
123 error (EXIT_FAILURE
, 0,
124 _("the suffix length needs to be at least %zu"),
130 suffix_length
= MAX (DEFAULT_SUFFIX_LENGTH
, suffix_needed
);
136 if (status
!= EXIT_SUCCESS
)
137 fprintf (stderr
, _("Try `%s --help' for more information.\n"),
142 Usage: %s [OPTION]... [INPUT [PREFIX]]\n\
146 Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default\n\
147 size is 1000 lines, and default PREFIX is `x'. With no INPUT, or when INPUT\n\
148 is -, read standard input.\n\
152 Mandatory arguments to long options are mandatory for short options too.\n\
154 fprintf (stdout
, _("\
155 -a, --suffix-length=N use suffixes of length N (default %d)\n\
156 -b, --bytes=SIZE put SIZE bytes per output file\n\
157 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
158 -d, --numeric-suffixes use numeric suffixes instead of alphabetic\n\
159 -e, --elide-empty-files do not generate empty output files with `-n'\n\
160 -l, --lines=NUMBER put NUMBER lines per output file\n\
161 -n, --number=CHUNKS generate CHUNKS output files. See below\n\
162 -u, --unbuffered immediately copy input to output with `-n r/...'\n\
163 "), DEFAULT_SUFFIX_LENGTH
);
165 --verbose print a diagnostic just before each\n\
166 output file is opened\n\
168 fputs (HELP_OPTION_DESCRIPTION
, stdout
);
169 fputs (VERSION_OPTION_DESCRIPTION
, stdout
);
173 N split into N files based on size of input\n\
174 K/N output Kth of N to stdout\n\
175 l/N split into N files without splitting lines\n\
176 l/K/N output Kth of N to stdout without splitting lines\n\
177 r/N like `l' but use round robin distribution\n\
178 r/K/N likewise but only output Kth of N to stdout\n\
180 emit_ancillary_info ();
185 /* Compute the next sequential output file name and store it into the
189 next_file_name (void)
191 /* Index in suffix_alphabet of each character in the suffix. */
192 static size_t *sufindex
;
196 /* Allocate and initialize the first file name. */
198 size_t outbase_length
= strlen (outbase
);
199 size_t outfile_length
= outbase_length
+ suffix_length
;
200 if (outfile_length
+ 1 < outbase_length
)
202 outfile
= xmalloc (outfile_length
+ 1);
203 outfile_mid
= outfile
+ outbase_length
;
204 memcpy (outfile
, outbase
, outbase_length
);
205 memset (outfile_mid
, suffix_alphabet
[0], suffix_length
);
206 outfile
[outfile_length
] = 0;
207 sufindex
= xcalloc (suffix_length
, sizeof *sufindex
);
209 #if ! _POSIX_NO_TRUNC && HAVE_PATHCONF && defined _PC_NAME_MAX
210 /* POSIX requires that if the output file name is too long for
211 its directory, `split' must fail without creating any files.
212 This must be checked for explicitly on operating systems that
213 silently truncate file names. */
215 char *dir
= dir_name (outfile
);
216 long name_max
= pathconf (dir
, _PC_NAME_MAX
);
217 if (0 <= name_max
&& name_max
< base_len (last_component (outfile
)))
218 error (EXIT_FAILURE
, ENAMETOOLONG
, "%s", outfile
);
225 /* Increment the suffix in place, if possible. */
227 size_t i
= suffix_length
;
231 outfile_mid
[i
] = suffix_alphabet
[sufindex
[i
]];
235 outfile_mid
[i
] = suffix_alphabet
[sufindex
[i
]];
237 error (EXIT_FAILURE
, 0, _("output file suffixes exhausted"));
241 /* Create or truncate a file. */
244 create (const char* name
)
247 fprintf (stdout
, _("creating file %s\n"), quote (name
));
248 return open (name
, O_WRONLY
| O_CREAT
| O_TRUNC
| O_BINARY
,
249 (S_IRUSR
| S_IWUSR
| S_IRGRP
| S_IWGRP
| S_IROTH
| S_IWOTH
));
252 /* Write BYTES bytes at BP to an output file.
253 If NEW_FILE_FLAG is true, open the next output file.
254 Otherwise add to the same output file already in use. */
257 cwrite (bool new_file_flag
, const char *bp
, size_t bytes
)
261 if (!bp
&& bytes
== 0 && elide_empty_files
)
263 if (output_desc
>= 0 && close (output_desc
) < 0)
264 error (EXIT_FAILURE
, errno
, "%s", outfile
);
266 if ((output_desc
= create (outfile
)) < 0)
267 error (EXIT_FAILURE
, errno
, "%s", outfile
);
269 if (full_write (output_desc
, bp
, bytes
) != bytes
)
270 error (EXIT_FAILURE
, errno
, "%s", outfile
);
273 /* Split into pieces of exactly N_BYTES bytes.
274 Use buffer BUF, whose size is BUFSIZE. */
277 bytes_split (uintmax_t n_bytes
, char *buf
, size_t bufsize
, uintmax_t max_files
)
280 bool new_file_flag
= true;
282 uintmax_t to_write
= n_bytes
;
284 uintmax_t opened
= 0;
288 n_read
= full_read (STDIN_FILENO
, buf
, bufsize
);
289 if (n_read
< bufsize
&& errno
)
290 error (EXIT_FAILURE
, errno
, "%s", infile
);
295 if (to_read
< to_write
)
297 if (to_read
) /* do not write 0 bytes! */
299 cwrite (new_file_flag
, bp_out
, to_read
);
300 opened
+= new_file_flag
;
302 new_file_flag
= false;
309 cwrite (new_file_flag
, bp_out
, w
);
310 opened
+= new_file_flag
;
311 new_file_flag
= !max_files
|| (opened
< max_files
);
318 while (n_read
== bufsize
);
320 /* Ensure NUMBER files are created, which truncates
321 any existing files or notifies any consumers on fifos.
322 FIXME: Should we do this before EXIT_FAILURE? */
323 while (opened
++ < max_files
)
324 cwrite (true, NULL
, 0);
327 /* Split into pieces of exactly N_LINES lines.
328 Use buffer BUF, whose size is BUFSIZE. */
331 lines_split (uintmax_t n_lines
, char *buf
, size_t bufsize
)
334 char *bp
, *bp_out
, *eob
;
335 bool new_file_flag
= true;
340 n_read
= full_read (STDIN_FILENO
, buf
, bufsize
);
341 if (n_read
< bufsize
&& errno
)
342 error (EXIT_FAILURE
, errno
, "%s", infile
);
348 bp
= memchr (bp
, '\n', eob
- bp
+ 1);
351 if (eob
!= bp_out
) /* do not write 0 bytes! */
353 size_t len
= eob
- bp_out
;
354 cwrite (new_file_flag
, bp_out
, len
);
355 new_file_flag
= false;
363 cwrite (new_file_flag
, bp_out
, bp
- bp_out
);
365 new_file_flag
= true;
370 while (n_read
== bufsize
);
373 /* Split into pieces that are as large as possible while still not more
374 than N_BYTES bytes, and are split on line boundaries except
375 where lines longer than N_BYTES bytes occur.
376 FIXME: Allow N_BYTES to be any uintmax_t value, and don't require a
377 buffer of size N_BYTES, in case N_BYTES is very large. */
380 line_bytes_split (size_t n_bytes
)
384 size_t n_buffered
= 0;
385 char *buf
= xmalloc (n_bytes
);
389 /* Fill up the full buffer size from the input file. */
391 size_t to_read
= n_bytes
- n_buffered
;
392 size_t n_read
= full_read (STDIN_FILENO
, buf
+ n_buffered
, to_read
);
393 if (n_read
< to_read
&& errno
)
394 error (EXIT_FAILURE
, errno
, "%s", infile
);
396 n_buffered
+= n_read
;
397 if (n_buffered
!= n_bytes
)
404 /* Find where to end this chunk. */
405 bp
= buf
+ n_buffered
;
406 if (n_buffered
== n_bytes
)
408 while (bp
> buf
&& bp
[-1] != '\n')
412 /* If chunk has no newlines, use all the chunk. */
414 bp
= buf
+ n_buffered
;
416 /* Output the chars as one output file. */
417 cwrite (true, buf
, bp
- buf
);
419 /* Discard the chars we just output; move rest of chunk
420 down to be the start of the next chunk. Source and
421 destination probably overlap. */
422 n_buffered
-= bp
- buf
;
424 memmove (buf
, bp
, n_buffered
);
430 /* -n l/[K/]N: Write lines to files of approximately file size / N.
431 The file is partitioned into file size / N sized portions, with the
432 last assigned any excess. If a line _starts_ within a partition
433 it is written completely to the corresponding file. Since lines
434 are not split even if they overlap a partition, the files written
435 can be larger or smaller than the partition size, and even empty
436 if a line is so long as to completely overlap the partition. */
439 lines_chunk_split (uintmax_t k
, uintmax_t n
, char *buf
, size_t bufsize
,
442 assert (n
&& k
<= n
&& n
<= file_size
);
444 const off_t chunk_size
= file_size
/ n
;
445 uintmax_t chunk_no
= 1;
446 off_t chunk_end
= chunk_size
- 1;
448 bool new_file_flag
= true;
452 /* Start reading 1 byte before kth chunk of file. */
453 off_t start
= (k
- 1) * chunk_size
- 1;
454 if (lseek (STDIN_FILENO
, start
, SEEK_CUR
) < 0)
455 error (EXIT_FAILURE
, errno
, "%s", infile
);
458 chunk_end
= chunk_no
* chunk_size
- 1;
461 while (n_written
< file_size
)
463 char *bp
= buf
, *eob
;
464 size_t n_read
= full_read (STDIN_FILENO
, buf
, bufsize
);
465 n_read
= MIN (n_read
, file_size
- n_written
);
466 if (n_read
< bufsize
&& errno
)
467 error (EXIT_FAILURE
, errno
, "%s", infile
);
468 else if (n_read
== 0)
477 /* Begin looking for '\n' at last byte of chunk. */
478 off_t skip
= MIN (n_read
, MAX (0, chunk_end
- n_written
));
479 char *bp_out
= memchr (bp
+ skip
, '\n', n_read
- skip
);
484 to_write
= bp_out
- bp
;
488 /* We don't use the stdout buffer here since we're writing
489 large chunks from an existing file, so it's more efficient
490 to write out directly. */
491 if (full_write (STDOUT_FILENO
, bp
, to_write
) != to_write
)
492 error (EXIT_FAILURE
, errno
, "%s", _("write error"));
495 cwrite (new_file_flag
, bp
, to_write
);
496 n_written
+= to_write
;
499 new_file_flag
= next
;
501 /* A line could have been so long that it skipped
502 entire chunks. So create empty files in that case. */
503 while (next
|| chunk_end
<= n_written
- 1)
505 if (!next
&& bp
== eob
)
506 break; /* replenish buf, before going to next chunk. */
508 if (k
&& chunk_no
> k
)
511 chunk_end
= file_size
- 1; /* >= chunk_size. */
513 chunk_end
+= chunk_size
;
514 if (chunk_end
<= n_written
- 1)
515 cwrite (true, NULL
, 0);
522 /* Ensure NUMBER files are created, which truncates
523 any existing files or notifies any consumers on fifos.
524 FIXME: Should we do this before EXIT_FAILURE? */
525 while (!k
&& chunk_no
++ <= n
)
526 cwrite (true, NULL
, 0);
529 /* -n K/N: Extract Kth of N chunks. */
532 bytes_chunk_extract (uintmax_t k
, uintmax_t n
, char *buf
, size_t bufsize
,
538 assert (k
&& n
&& k
<= n
&& n
<= file_size
);
540 start
= (k
- 1) * (file_size
/ n
);
541 end
= (k
== n
) ? file_size
: k
* (file_size
/ n
);
543 if (lseek (STDIN_FILENO
, start
, SEEK_CUR
) < 0)
544 error (EXIT_FAILURE
, errno
, "%s", infile
);
548 size_t n_read
= full_read (STDIN_FILENO
, buf
, bufsize
);
549 n_read
= MIN (n_read
, end
- start
);
550 if (n_read
< bufsize
&& errno
)
551 error (EXIT_FAILURE
, errno
, "%s", infile
);
552 else if (n_read
== 0)
554 if (full_write (STDOUT_FILENO
, buf
, n_read
) != n_read
)
555 error (EXIT_FAILURE
, errno
, "%s", quote ("-"));
560 typedef struct of_info
573 /* Rotate file descriptors when we're writing to more output files than we
574 have available file descriptors.
575 Return whether we came under file resource pressure.
576 If so, it's probably best to close each file when finished with it. */
579 ofile_open (of_t
*files
, size_t i_check
, size_t nfiles
)
581 bool file_limit
= false;
583 if (files
[i_check
].ofd
<= OFD_NEW
)
586 size_t i_reopen
= i_check
? i_check
- 1 : nfiles
- 1;
588 /* Another process could have opened a file in between the calls to
589 close and open, so we should keep trying until open succeeds or
590 we've closed all of our files. */
593 if (files
[i_check
].ofd
== OFD_NEW
)
594 fd
= create (files
[i_check
].of_name
);
595 else /* OFD_APPEND */
597 /* Attempt to append to previously opened file.
598 We use O_NONBLOCK to support writing to fifos,
599 where the other end has closed because of our
600 previous close. In that case we'll immediately
601 get an error, rather than waiting indefinitely.
602 In specialised cases the consumer can keep reading
603 from the fifo, terminating on conditions in the data
604 itself, or perhaps never in the case of `tail -f`.
605 I.E. for fifos it is valid to attempt this reopen. */
606 fd
= open (files
[i_check
].of_name
,
607 O_WRONLY
| O_BINARY
| O_APPEND
| O_NONBLOCK
);
613 if (!(errno
== EMFILE
|| errno
== ENFILE
))
614 error (EXIT_FAILURE
, errno
, "%s", files
[i_check
].of_name
);
618 /* Search backwards for an open file to close. */
619 while (files
[i_reopen
].ofd
< 0)
621 i_reopen
= i_reopen
? i_reopen
- 1 : nfiles
- 1;
622 /* No more open files to close, exit with E[NM]FILE. */
623 if (i_reopen
== i_check
)
624 error (EXIT_FAILURE
, errno
, "%s", files
[i_check
].of_name
);
627 if (fclose (files
[i_reopen
].ofile
) != 0)
628 error (EXIT_FAILURE
, errno
, "%s", files
[i_reopen
].of_name
);
629 files
[i_reopen
].ofd
= OFD_APPEND
;
632 files
[i_check
].ofd
= fd
;
633 if (!(files
[i_check
].ofile
= fdopen (fd
, "a")))
634 error (EXIT_FAILURE
, errno
, "%s", files
[i_check
].of_name
);
640 /* -n r/[K/]N: Divide file into N chunks in round robin fashion.
641 When K == 0, we try to keep the files open in parallel.
642 If we run out of file resources, then we revert
643 to opening and closing each file for each line. */
646 lines_rr (uintmax_t k
, uintmax_t n
, char *buf
, size_t bufsize
)
658 error (exit_failure
, 0, "%s", _("memory exhausted"));
659 files
= xnmalloc (n
, sizeof *files
);
661 /* Generate output file names. */
662 for (i_file
= 0; i_file
< n
; i_file
++)
665 files
[i_file
].of_name
= xstrdup (outfile
);
666 files
[i_file
].ofd
= OFD_NEW
;
667 files
[i_file
].ofile
= NULL
;
675 char *bp
= buf
, *eob
;
676 /* Use safe_read() rather than full_read() here
677 so that we process available data immediately. */
678 size_t n_read
= safe_read (STDIN_FILENO
, buf
, bufsize
);
679 if (n_read
== SAFE_READ_ERROR
)
680 error (EXIT_FAILURE
, errno
, "%s", infile
);
681 else if (n_read
== 0)
690 /* Find end of line. */
691 char *bp_out
= memchr (bp
, '\n', eob
- bp
);
699 to_write
= bp_out
- bp
;
703 if (line_no
== k
&& unbuffered
)
705 if (full_write (STDOUT_FILENO
, bp
, to_write
) != to_write
)
706 error (EXIT_FAILURE
, errno
, "%s", _("write error"));
708 else if (line_no
== k
&& fwrite (bp
, to_write
, 1, stdout
) != 1)
710 clearerr (stdout
); /* To silence close_stdout(). */
711 error (EXIT_FAILURE
, errno
, "%s", _("write error"));
714 line_no
= (line_no
== n
) ? 1 : line_no
+ 1;
718 /* Secure file descriptor. */
719 file_limit
|= ofile_open (files
, i_file
, n
);
722 /* Note writing to fd, rather than flushing the FILE gives
723 an 8% performance benefit, due to reduced data copying. */
724 if (full_write (files
[i_file
].ofd
, bp
, to_write
) != to_write
)
725 error (EXIT_FAILURE
, errno
, "%s", files
[i_file
].of_name
);
727 else if (fwrite (bp
, to_write
, 1, files
[i_file
].ofile
) != 1)
728 error (EXIT_FAILURE
, errno
, "%s", files
[i_file
].of_name
);
731 if (fclose (files
[i_file
].ofile
) != 0)
732 error (EXIT_FAILURE
, errno
, "%s", files
[i_file
].of_name
);
733 files
[i_file
].ofd
= OFD_APPEND
;
735 if (next
&& ++i_file
== n
)
743 /* Ensure all files created, so that any existing files are truncated,
744 and to signal any waiting fifo consumers.
745 Also, close any open file descriptors.
746 FIXME: Should we do this before EXIT_FAILURE? */
747 for (i_file
= 0; !k
&& !elide_empty_files
&& i_file
< n
; i_file
++)
749 file_limit
|= ofile_open (files
, i_file
, n
);
750 if (fclose (files
[i_file
].ofile
) != 0)
751 error (EXIT_FAILURE
, errno
, "%s", files
[i_file
].of_name
);
755 #define FAIL_ONLY_ONE_WAY() \
758 error (0, 0, _("cannot split in more than one way")); \
759 usage (EXIT_FAILURE); \
763 /* Parse K/N syntax of chunk options. */
766 parse_chunk (uintmax_t *k_units
, uintmax_t *n_units
, char *slash
)
769 if (xstrtoumax (slash
+1, NULL
, 10, n_units
, "") != LONGINT_OK
771 error (EXIT_FAILURE
, 0, _("%s: invalid number of chunks"), slash
+1);
772 if (slash
!= optarg
/* a leading number is specified. */
773 && (xstrtoumax (optarg
, NULL
, 10, k_units
, "") != LONGINT_OK
774 || *k_units
== 0 || *n_units
< *k_units
))
775 error (EXIT_FAILURE
, 0, _("%s: invalid chunk number"), optarg
);
780 main (int argc
, char **argv
)
782 struct stat stat_buf
;
785 type_undef
, type_bytes
, type_byteslines
, type_lines
, type_digits
,
786 type_chunk_bytes
, type_chunk_lines
, type_rr
787 } split_type
= type_undef
;
788 size_t in_blk_size
= 0; /* optimal block size of input file device */
789 char *buf
; /* file i/o buffer */
790 size_t page_size
= getpagesize ();
791 uintmax_t k_units
= 0;
794 static char const multipliers
[] = "bEGKkMmPTYZ0";
796 int digits_optind
= 0;
799 initialize_main (&argc
, &argv
);
800 set_program_name (argv
[0]);
801 setlocale (LC_ALL
, "");
802 bindtextdomain (PACKAGE
, LOCALEDIR
);
803 textdomain (PACKAGE
);
805 atexit (close_stdout
);
807 /* Parse command line options. */
809 infile
= bad_cast ("-");
810 outbase
= bad_cast ("x");
814 /* This is the argv-index of the option we will read next. */
815 int this_optind
= optind
? optind
: 1;
818 c
= getopt_long (argc
, argv
, "0123456789C:a:b:del:n:u", longopts
, NULL
);
827 if (xstrtoul (optarg
, NULL
, 10, &tmp
, "") != LONGINT_OK
828 || SIZE_MAX
/ sizeof (size_t) < tmp
)
830 error (0, 0, _("%s: invalid suffix length"), optarg
);
831 usage (EXIT_FAILURE
);
838 if (split_type
!= type_undef
)
839 FAIL_ONLY_ONE_WAY ();
840 split_type
= type_bytes
;
841 if (xstrtoumax (optarg
, NULL
, 10, &n_units
, multipliers
) != LONGINT_OK
844 error (0, 0, _("%s: invalid number of bytes"), optarg
);
845 usage (EXIT_FAILURE
);
847 /* If input is a pipe, we could get more data than is possible
848 to write to a single file, so indicate that immediately
849 rather than having possibly future invocations fail. */
850 if (OFF_T_MAX
< n_units
)
851 error (EXIT_FAILURE
, EFBIG
,
852 _("%s: invalid number of bytes"), optarg
);
857 if (split_type
!= type_undef
)
858 FAIL_ONLY_ONE_WAY ();
859 split_type
= type_lines
;
860 if (xstrtoumax (optarg
, NULL
, 10, &n_units
, "") != LONGINT_OK
863 error (0, 0, _("%s: invalid number of lines"), optarg
);
864 usage (EXIT_FAILURE
);
869 if (split_type
!= type_undef
)
870 FAIL_ONLY_ONE_WAY ();
871 split_type
= type_byteslines
;
872 if (xstrtoumax (optarg
, NULL
, 10, &n_units
, multipliers
) != LONGINT_OK
873 || n_units
== 0 || SIZE_MAX
< n_units
)
875 error (0, 0, _("%s: invalid number of bytes"), optarg
);
876 usage (EXIT_FAILURE
);
878 if (OFF_T_MAX
< n_units
)
879 error (EXIT_FAILURE
, EFBIG
,
880 _("%s: invalid number of bytes"), optarg
);
884 if (split_type
!= type_undef
)
885 FAIL_ONLY_ONE_WAY ();
886 /* skip any whitespace */
887 while (isspace (to_uchar (*optarg
)))
889 if (strncmp (optarg
, "r/", 2) == 0)
891 split_type
= type_rr
;
894 else if (strncmp (optarg
, "l/", 2) == 0)
896 split_type
= type_chunk_lines
;
900 split_type
= type_chunk_bytes
;
901 if ((slash
= strchr (optarg
, '/')))
902 parse_chunk (&k_units
, &n_units
, slash
);
903 else if (xstrtoumax (optarg
, NULL
, 10, &n_units
, "") != LONGINT_OK
905 error (EXIT_FAILURE
, 0, _("%s: invalid number of chunks"), optarg
);
922 if (split_type
== type_undef
)
924 split_type
= type_digits
;
927 if (split_type
!= type_undef
&& split_type
!= type_digits
)
928 FAIL_ONLY_ONE_WAY ();
929 if (digits_optind
!= 0 && digits_optind
!= this_optind
)
930 n_units
= 0; /* More than one number given; ignore other. */
931 digits_optind
= this_optind
;
932 if (!DECIMAL_DIGIT_ACCUMULATE (n_units
, c
- '0', uintmax_t))
934 char buffer
[INT_BUFSIZE_BOUND (uintmax_t)];
935 error (EXIT_FAILURE
, 0,
936 _("line count option -%s%c... is too large"),
937 umaxtostr (n_units
, buffer
), c
);
942 suffix_alphabet
= "0123456789";
946 elide_empty_files
= true;
949 case IO_BLKSIZE_OPTION
:
951 uintmax_t tmp_blk_size
;
952 if (xstrtoumax (optarg
, NULL
, 10, &tmp_blk_size
,
953 multipliers
) != LONGINT_OK
954 || tmp_blk_size
== 0 || SIZE_MAX
- page_size
< tmp_blk_size
)
955 error (0, 0, _("%s: invalid IO block size"), optarg
);
957 in_blk_size
= tmp_blk_size
;
965 case_GETOPT_HELP_CHAR
;
967 case_GETOPT_VERSION_CHAR (PROGRAM_NAME
, AUTHORS
);
970 usage (EXIT_FAILURE
);
974 /* Handle default case. */
975 if (split_type
== type_undef
)
977 split_type
= type_lines
;
983 error (0, 0, _("%s: invalid number of lines"), "0");
984 usage (EXIT_FAILURE
);
987 set_suffix_length (n_units
);
989 /* Get out the filename arguments. */
992 infile
= argv
[optind
++];
995 outbase
= argv
[optind
++];
999 error (0, 0, _("extra operand %s"), quote (argv
[optind
]));
1000 usage (EXIT_FAILURE
);
1003 /* Open the input file. */
1004 if (! STREQ (infile
, "-")
1005 && fd_reopen (STDIN_FILENO
, infile
, O_RDONLY
, 0) < 0)
1006 error (EXIT_FAILURE
, errno
, _("cannot open %s for reading"),
1009 /* Binary I/O is safer when byte counts are used. */
1010 if (O_BINARY
&& ! isatty (STDIN_FILENO
))
1011 xfreopen (NULL
, "rb", stdin
);
1013 /* Get the optimal block size of input device and make a buffer. */
1015 if (fstat (STDIN_FILENO
, &stat_buf
) != 0)
1016 error (EXIT_FAILURE
, errno
, "%s", infile
);
1017 if (in_blk_size
== 0)
1018 in_blk_size
= io_blksize (stat_buf
);
1019 file_size
= stat_buf
.st_size
;
1021 if (split_type
== type_chunk_bytes
|| split_type
== type_chunk_lines
)
1023 off_t input_offset
= lseek (STDIN_FILENO
, 0, SEEK_CUR
);
1024 if (input_offset
< 0)
1025 error (EXIT_FAILURE
, 0, _("%s: cannot determine file size"),
1027 file_size
-= input_offset
;
1028 /* Overflow, and sanity checking. */
1029 if (OFF_T_MAX
< n_units
)
1031 char buffer
[INT_BUFSIZE_BOUND (uintmax_t)];
1032 error (EXIT_FAILURE
, EFBIG
, _("%s: invalid number of chunks"),
1033 umaxtostr (n_units
, buffer
));
1035 /* increase file_size to n_units here, so that we still process
1036 any input data, and create empty files for the rest. */
1037 file_size
= MAX (file_size
, n_units
);
1040 buf
= ptr_align (xmalloc (in_blk_size
+ 1 + page_size
- 1), page_size
);
1046 lines_split (n_units
, buf
, in_blk_size
);
1050 bytes_split (n_units
, buf
, in_blk_size
, 0);
1053 case type_byteslines
:
1054 line_bytes_split (n_units
);
1057 case type_chunk_bytes
:
1059 bytes_split (file_size
/ n_units
, buf
, in_blk_size
, n_units
);
1061 bytes_chunk_extract (k_units
, n_units
, buf
, in_blk_size
, file_size
);
1064 case type_chunk_lines
:
1065 lines_chunk_split (k_units
, n_units
, buf
, in_blk_size
, file_size
);
1069 /* Note, this is like `sed -n ${k}~${n}p` when k > 0,
1070 but the functionality is provided for symmetry. */
1071 lines_rr (k_units
, n_units
, buf
, in_blk_size
);
1078 if (close (STDIN_FILENO
) != 0)
1079 error (EXIT_FAILURE
, errno
, "%s", infile
);
1080 if (output_desc
>= 0 && close (output_desc
) < 0)
1081 error (EXIT_FAILURE
, errno
, "%s", outfile
);
1083 exit (EXIT_SUCCESS
);