1 /* split.c -- split a file into pieces.
2 Copyright (C) 1988, 1991, 1995-2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* By tege@sics.se, with rms.
20 * Implement -t CHAR or -t REGEX to specify break characters other
29 #include <sys/types.h>
34 #include "fd-reopen.h"
36 #include "full-read.h"
37 #include "full-write.h"
38 #include "ioblksize.h"
40 #include "safe-read.h"
45 /* The official name of this program (e.g., no `g' prefix). */
46 #define PROGRAM_NAME "split"
49 proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
50 proper_name ("Richard M. Stallman")
52 /* Shell command to filter through, instead of creating files. */
53 static char const *filter_command
;
55 /* Process ID of the filter. */
56 static int filter_pid
;
58 /* Array of open pipes. */
59 static int *open_pipes
;
60 static size_t open_pipes_alloc
;
61 static size_t n_open_pipes
;
63 /* Blocked signals. */
64 static sigset_t oldblocked
;
65 static sigset_t newblocked
;
67 /* Base name of output files. */
68 static char const *outbase
;
70 /* Name of output files. */
73 /* Pointer to the end of the prefix in OUTFILE.
74 Suffixes are inserted here. */
75 static char *outfile_mid
;
77 /* Length of OUTFILE's suffix. */
78 static size_t suffix_length
;
80 /* Alphabet of characters to use in suffix. */
81 static char const *suffix_alphabet
= "abcdefghijklmnopqrstuvwxyz";
83 /* Name of input file. May be "-". */
86 /* Descriptor on which output file is open. */
87 static int output_desc
= -1;
89 /* If true, print a diagnostic on standard error just before each
90 output file is opened. */
93 /* If true, don't generate zero length output files. */
94 static bool elide_empty_files
;
96 /* If true, in round robin mode, immediately copy
97 input to output, which is much slower, so disabled by default. */
98 static bool unbuffered
;
100 /* The split mode to use. */
103 type_undef
, type_bytes
, type_byteslines
, type_lines
, type_digits
,
104 type_chunk_bytes
, type_chunk_lines
, type_rr
107 /* For long options that have no equivalent short option, use a
108 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
111 VERBOSE_OPTION
= CHAR_MAX
+ 1,
116 static struct option
const longopts
[] =
118 {"bytes", required_argument
, NULL
, 'b'},
119 {"lines", required_argument
, NULL
, 'l'},
120 {"line-bytes", required_argument
, NULL
, 'C'},
121 {"number", required_argument
, NULL
, 'n'},
122 {"elide-empty-files", no_argument
, NULL
, 'e'},
123 {"unbuffered", no_argument
, NULL
, 'u'},
124 {"suffix-length", required_argument
, NULL
, 'a'},
125 {"numeric-suffixes", no_argument
, NULL
, 'd'},
126 {"filter", required_argument
, NULL
, FILTER_OPTION
},
127 {"verbose", no_argument
, NULL
, VERBOSE_OPTION
},
128 {"-io-blksize", required_argument
, NULL
,
129 IO_BLKSIZE_OPTION
}, /* do not document */
130 {GETOPT_HELP_OPTION_DECL
},
131 {GETOPT_VERSION_OPTION_DECL
},
135 /* Return true if the errno value, ERR, is ignorable. */
139 return filter_command
&& err
== EPIPE
;
143 set_suffix_length (uintmax_t n_units
, enum Split_type split_type
)
145 #define DEFAULT_SUFFIX_LENGTH 2
147 size_t suffix_needed
= 0;
149 /* Auto-calculate the suffix length if the number of files is given. */
150 if (split_type
== type_chunk_bytes
|| split_type
== type_chunk_lines
151 || split_type
== type_rr
)
153 size_t alphabet_len
= strlen (suffix_alphabet
);
154 bool alphabet_slop
= (n_units
% alphabet_len
) != 0;
155 while (n_units
/= alphabet_len
)
157 suffix_needed
+= alphabet_slop
;
160 if (suffix_length
) /* set by user */
162 if (suffix_length
< suffix_needed
)
164 error (EXIT_FAILURE
, 0,
165 _("the suffix length needs to be at least %zu"),
171 suffix_length
= MAX (DEFAULT_SUFFIX_LENGTH
, suffix_needed
);
177 if (status
!= EXIT_SUCCESS
)
178 fprintf (stderr
, _("Try `%s --help' for more information.\n"),
183 Usage: %s [OPTION]... [INPUT [PREFIX]]\n\
187 Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default\n\
188 size is 1000 lines, and default PREFIX is `x'. With no INPUT, or when INPUT\n\
189 is -, read standard input.\n\
193 Mandatory arguments to long options are mandatory for short options too.\n\
195 fprintf (stdout
, _("\
196 -a, --suffix-length=N use suffixes of length N (default %d)\n\
197 -b, --bytes=SIZE put SIZE bytes per output file\n\
198 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
199 -d, --numeric-suffixes use numeric suffixes instead of alphabetic\n\
200 -e, --elide-empty-files do not generate empty output files with `-n'\n\
201 --filter=COMMAND write to shell COMMAND; file name is $FILE\n\
202 -l, --lines=NUMBER put NUMBER lines per output file\n\
203 -n, --number=CHUNKS generate CHUNKS output files. See below\n\
204 -u, --unbuffered immediately copy input to output with `-n r/...'\n\
205 "), DEFAULT_SUFFIX_LENGTH
);
207 --verbose print a diagnostic just before each\n\
208 output file is opened\n\
210 fputs (HELP_OPTION_DESCRIPTION
, stdout
);
211 fputs (VERSION_OPTION_DESCRIPTION
, stdout
);
215 N split into N files based on size of input\n\
216 K/N output Kth of N to stdout\n\
217 l/N split into N files without splitting lines\n\
218 l/K/N output Kth of N to stdout without splitting lines\n\
219 r/N like `l' but use round robin distribution\n\
220 r/K/N likewise but only output Kth of N to stdout\n\
222 emit_ancillary_info ();
227 /* Compute the next sequential output file name and store it into the
231 next_file_name (void)
233 /* Index in suffix_alphabet of each character in the suffix. */
234 static size_t *sufindex
;
238 /* Allocate and initialize the first file name. */
240 size_t outbase_length
= strlen (outbase
);
241 size_t outfile_length
= outbase_length
+ suffix_length
;
242 if (outfile_length
+ 1 < outbase_length
)
244 outfile
= xmalloc (outfile_length
+ 1);
245 outfile_mid
= outfile
+ outbase_length
;
246 memcpy (outfile
, outbase
, outbase_length
);
247 memset (outfile_mid
, suffix_alphabet
[0], suffix_length
);
248 outfile
[outfile_length
] = 0;
249 sufindex
= xcalloc (suffix_length
, sizeof *sufindex
);
251 #if ! _POSIX_NO_TRUNC && HAVE_PATHCONF && defined _PC_NAME_MAX
252 /* POSIX requires that if the output file name is too long for
253 its directory, `split' must fail without creating any files.
254 This must be checked for explicitly on operating systems that
255 silently truncate file names. */
257 char *dir
= dir_name (outfile
);
258 long name_max
= pathconf (dir
, _PC_NAME_MAX
);
259 if (0 <= name_max
&& name_max
< base_len (last_component (outfile
)))
260 error (EXIT_FAILURE
, ENAMETOOLONG
, "%s", outfile
);
267 /* Increment the suffix in place, if possible. */
269 size_t i
= suffix_length
;
273 outfile_mid
[i
] = suffix_alphabet
[sufindex
[i
]];
277 outfile_mid
[i
] = suffix_alphabet
[sufindex
[i
]];
279 error (EXIT_FAILURE
, 0, _("output file suffixes exhausted"));
283 /* Create or truncate a file. */
286 create (const char *name
)
291 fprintf (stdout
, _("creating file %s\n"), quote (name
));
292 return open (name
, O_WRONLY
| O_CREAT
| O_TRUNC
| O_BINARY
,
293 (S_IRUSR
| S_IWUSR
| S_IRGRP
| S_IWGRP
| S_IROTH
| S_IWOTH
));
299 char const *shell_prog
= getenv ("SHELL");
300 if (shell_prog
== NULL
)
301 shell_prog
= "/bin/sh";
302 if (setenv ("FILE", name
, 1) != 0)
303 error (EXIT_FAILURE
, errno
,
304 _("failed to set FILE environment variable"));
306 fprintf (stdout
, _("executing with FILE=%s\n"), quote (name
));
307 if (pipe (fd_pair
) != 0)
308 error (EXIT_FAILURE
, errno
, _("failed to create pipe"));
312 /* This is the child process. If an error occurs here, the
313 parent will eventually learn about it after doing a wait,
314 at which time it will emit its own error message. */
316 /* We have to close any pipes that were opened during an
317 earlier call, otherwise this process will be holding a
318 write-pipe that will prevent the earlier process from
319 reading an EOF on the corresponding read-pipe. */
320 for (j
= 0; j
< n_open_pipes
; ++j
)
321 if (close (open_pipes
[j
]) != 0)
322 error (EXIT_FAILURE
, errno
, _("closing prior pipe"));
323 if (close (fd_pair
[1]))
324 error (EXIT_FAILURE
, errno
, _("closing output pipe"));
325 if (fd_pair
[0] != STDIN_FILENO
)
327 if (dup2 (fd_pair
[0], STDIN_FILENO
) != STDIN_FILENO
)
328 error (EXIT_FAILURE
, errno
, _("moving input pipe"));
329 if (close (fd_pair
[0]) != 0)
330 error (EXIT_FAILURE
, errno
, _("closing input pipe"));
332 sigprocmask (SIG_SETMASK
, &oldblocked
, NULL
);
333 execl (shell_prog
, last_component (shell_prog
), "-c",
334 filter_command
, (char *) NULL
);
335 error (EXIT_FAILURE
, errno
, _("failed to run command: \"%s -c %s\""),
336 shell_prog
, filter_command
);
339 error (EXIT_FAILURE
, errno
, _("fork system call failed"));
340 if (close (fd_pair
[0]) != 0)
341 error (EXIT_FAILURE
, errno
, _("failed to close input pipe"));
342 filter_pid
= child_pid
;
343 if (n_open_pipes
== open_pipes_alloc
)
344 open_pipes
= x2nrealloc (open_pipes
, &open_pipes_alloc
,
346 open_pipes
[n_open_pipes
++] = fd_pair
[1];
351 /* Close the output file, and do any associated cleanup.
352 If FP and FD are both specified, they refer to the same open file;
353 in this case FP is closed, but FD is still used in cleanup. */
355 closeout (FILE *fp
, int fd
, pid_t pid
, char const *name
)
357 if (fp
!= NULL
&& fclose (fp
) != 0 && ! ignorable (errno
))
358 error (EXIT_FAILURE
, errno
, "%s", name
);
361 if (fp
== NULL
&& close (fd
) < 0)
362 error (EXIT_FAILURE
, errno
, "%s", name
);
364 for (j
= 0; j
< n_open_pipes
; ++j
)
366 if (open_pipes
[j
] == fd
)
368 open_pipes
[j
] = open_pipes
[--n_open_pipes
];
376 if (waitpid (pid
, &wstatus
, 0) == -1 && errno
!= ECHILD
)
377 error (EXIT_FAILURE
, errno
, _("waiting for child process"));
378 if (WIFSIGNALED (wstatus
))
380 int sig
= WTERMSIG (wstatus
);
383 char signame
[MAX (SIG2STR_MAX
, INT_BUFSIZE_BOUND (int))];
384 if (sig2str (sig
, signame
) != 0)
385 sprintf (signame
, "%d", sig
);
387 _("with FILE=%s, signal %s from command: %s"),
388 name
, signame
, filter_command
);
391 else if (WIFEXITED (wstatus
))
393 int ex
= WEXITSTATUS (wstatus
);
395 error (ex
, 0, _("with FILE=%s, exit %d from command: %s"),
396 name
, ex
, filter_command
);
400 /* shouldn't happen. */
401 error (EXIT_FAILURE
, 0,
402 _("unknown status from command (0x%X)"), wstatus
);
407 /* Write BYTES bytes at BP to an output file.
408 If NEW_FILE_FLAG is true, open the next output file.
409 Otherwise add to the same output file already in use. */
412 cwrite (bool new_file_flag
, const char *bp
, size_t bytes
)
416 if (!bp
&& bytes
== 0 && elide_empty_files
)
418 closeout (NULL
, output_desc
, filter_pid
, outfile
);
420 if ((output_desc
= create (outfile
)) < 0)
421 error (EXIT_FAILURE
, errno
, "%s", outfile
);
423 if (full_write (output_desc
, bp
, bytes
) != bytes
&& ! ignorable (errno
))
424 error (EXIT_FAILURE
, errno
, "%s", outfile
);
427 /* Split into pieces of exactly N_BYTES bytes.
428 Use buffer BUF, whose size is BUFSIZE. */
431 bytes_split (uintmax_t n_bytes
, char *buf
, size_t bufsize
, uintmax_t max_files
)
434 bool new_file_flag
= true;
436 uintmax_t to_write
= n_bytes
;
438 uintmax_t opened
= 0;
442 n_read
= full_read (STDIN_FILENO
, buf
, bufsize
);
443 if (n_read
< bufsize
&& errno
)
444 error (EXIT_FAILURE
, errno
, "%s", infile
);
449 if (to_read
< to_write
)
451 if (to_read
) /* do not write 0 bytes! */
453 cwrite (new_file_flag
, bp_out
, to_read
);
454 opened
+= new_file_flag
;
456 new_file_flag
= false;
463 cwrite (new_file_flag
, bp_out
, w
);
464 opened
+= new_file_flag
;
465 new_file_flag
= !max_files
|| (opened
< max_files
);
466 if (!new_file_flag
&& ignorable (errno
))
468 /* If filter no longer accepting input, stop reading. */
478 while (n_read
== bufsize
);
480 /* Ensure NUMBER files are created, which truncates
481 any existing files or notifies any consumers on fifos.
482 FIXME: Should we do this before EXIT_FAILURE? */
483 while (opened
++ < max_files
)
484 cwrite (true, NULL
, 0);
487 /* Split into pieces of exactly N_LINES lines.
488 Use buffer BUF, whose size is BUFSIZE. */
491 lines_split (uintmax_t n_lines
, char *buf
, size_t bufsize
)
494 char *bp
, *bp_out
, *eob
;
495 bool new_file_flag
= true;
500 n_read
= full_read (STDIN_FILENO
, buf
, bufsize
);
501 if (n_read
< bufsize
&& errno
)
502 error (EXIT_FAILURE
, errno
, "%s", infile
);
508 bp
= memchr (bp
, '\n', eob
- bp
+ 1);
511 if (eob
!= bp_out
) /* do not write 0 bytes! */
513 size_t len
= eob
- bp_out
;
514 cwrite (new_file_flag
, bp_out
, len
);
515 new_file_flag
= false;
523 cwrite (new_file_flag
, bp_out
, bp
- bp_out
);
525 new_file_flag
= true;
530 while (n_read
== bufsize
);
533 /* Split into pieces that are as large as possible while still not more
534 than N_BYTES bytes, and are split on line boundaries except
535 where lines longer than N_BYTES bytes occur.
536 FIXME: Allow N_BYTES to be any uintmax_t value, and don't require a
537 buffer of size N_BYTES, in case N_BYTES is very large. */
540 line_bytes_split (size_t n_bytes
)
544 size_t n_buffered
= 0;
545 char *buf
= xmalloc (n_bytes
);
549 /* Fill up the full buffer size from the input file. */
551 size_t to_read
= n_bytes
- n_buffered
;
552 size_t n_read
= full_read (STDIN_FILENO
, buf
+ n_buffered
, to_read
);
553 if (n_read
< to_read
&& errno
)
554 error (EXIT_FAILURE
, errno
, "%s", infile
);
556 n_buffered
+= n_read
;
557 if (n_buffered
!= n_bytes
)
564 /* Find where to end this chunk. */
565 bp
= buf
+ n_buffered
;
566 if (n_buffered
== n_bytes
)
568 while (bp
> buf
&& bp
[-1] != '\n')
572 /* If chunk has no newlines, use all the chunk. */
574 bp
= buf
+ n_buffered
;
576 /* Output the chars as one output file. */
577 cwrite (true, buf
, bp
- buf
);
579 /* Discard the chars we just output; move rest of chunk
580 down to be the start of the next chunk. Source and
581 destination probably overlap. */
582 n_buffered
-= bp
- buf
;
584 memmove (buf
, bp
, n_buffered
);
590 /* -n l/[K/]N: Write lines to files of approximately file size / N.
591 The file is partitioned into file size / N sized portions, with the
592 last assigned any excess. If a line _starts_ within a partition
593 it is written completely to the corresponding file. Since lines
594 are not split even if they overlap a partition, the files written
595 can be larger or smaller than the partition size, and even empty
596 if a line is so long as to completely overlap the partition. */
599 lines_chunk_split (uintmax_t k
, uintmax_t n
, char *buf
, size_t bufsize
,
602 assert (n
&& k
<= n
&& n
<= file_size
);
604 const off_t chunk_size
= file_size
/ n
;
605 uintmax_t chunk_no
= 1;
606 off_t chunk_end
= chunk_size
- 1;
608 bool new_file_flag
= true;
609 bool chunk_truncated
= false;
613 /* Start reading 1 byte before kth chunk of file. */
614 off_t start
= (k
- 1) * chunk_size
- 1;
615 if (lseek (STDIN_FILENO
, start
, SEEK_CUR
) < 0)
616 error (EXIT_FAILURE
, errno
, "%s", infile
);
619 chunk_end
= chunk_no
* chunk_size
- 1;
622 while (n_written
< file_size
)
624 char *bp
= buf
, *eob
;
625 size_t n_read
= full_read (STDIN_FILENO
, buf
, bufsize
);
626 if (n_read
< bufsize
&& errno
)
627 error (EXIT_FAILURE
, errno
, "%s", infile
);
628 else if (n_read
== 0)
630 n_read
= MIN (n_read
, file_size
- n_written
);
631 chunk_truncated
= false;
639 /* Begin looking for '\n' at last byte of chunk. */
640 off_t skip
= MIN (n_read
, MAX (0, chunk_end
- n_written
));
641 char *bp_out
= memchr (bp
+ skip
, '\n', n_read
- skip
);
646 to_write
= bp_out
- bp
;
650 /* We don't use the stdout buffer here since we're writing
651 large chunks from an existing file, so it's more efficient
652 to write out directly. */
653 if (full_write (STDOUT_FILENO
, bp
, to_write
) != to_write
)
654 error (EXIT_FAILURE
, errno
, "%s", _("write error"));
657 cwrite (new_file_flag
, bp
, to_write
);
658 n_written
+= to_write
;
661 new_file_flag
= next
;
663 /* A line could have been so long that it skipped
664 entire chunks. So create empty files in that case. */
665 while (next
|| chunk_end
<= n_written
- 1)
667 if (!next
&& bp
== eob
)
669 /* replenish buf, before going to next chunk. */
670 chunk_truncated
= true;
674 if (k
&& chunk_no
> k
)
677 chunk_end
= file_size
- 1; /* >= chunk_size. */
679 chunk_end
+= chunk_size
;
680 if (chunk_end
<= n_written
- 1)
683 cwrite (true, NULL
, 0);
694 /* Ensure NUMBER files are created, which truncates
695 any existing files or notifies any consumers on fifos.
696 FIXME: Should we do this before EXIT_FAILURE? */
697 while (!k
&& chunk_no
++ <= n
)
698 cwrite (true, NULL
, 0);
701 /* -n K/N: Extract Kth of N chunks. */
704 bytes_chunk_extract (uintmax_t k
, uintmax_t n
, char *buf
, size_t bufsize
,
710 assert (k
&& n
&& k
<= n
&& n
<= file_size
);
712 start
= (k
- 1) * (file_size
/ n
);
713 end
= (k
== n
) ? file_size
: k
* (file_size
/ n
);
715 if (lseek (STDIN_FILENO
, start
, SEEK_CUR
) < 0)
716 error (EXIT_FAILURE
, errno
, "%s", infile
);
720 size_t n_read
= full_read (STDIN_FILENO
, buf
, bufsize
);
721 if (n_read
< bufsize
&& errno
)
722 error (EXIT_FAILURE
, errno
, "%s", infile
);
723 else if (n_read
== 0)
725 n_read
= MIN (n_read
, end
- start
);
726 if (full_write (STDOUT_FILENO
, buf
, n_read
) != n_read
727 && ! ignorable (errno
))
728 error (EXIT_FAILURE
, errno
, "%s", quote ("-"));
733 typedef struct of_info
747 /* Rotate file descriptors when we're writing to more output files than we
748 have available file descriptors.
749 Return whether we came under file resource pressure.
750 If so, it's probably best to close each file when finished with it. */
753 ofile_open (of_t
*files
, size_t i_check
, size_t nfiles
)
755 bool file_limit
= false;
757 if (files
[i_check
].ofd
<= OFD_NEW
)
760 size_t i_reopen
= i_check
? i_check
- 1 : nfiles
- 1;
762 /* Another process could have opened a file in between the calls to
763 close and open, so we should keep trying until open succeeds or
764 we've closed all of our files. */
767 if (files
[i_check
].ofd
== OFD_NEW
)
768 fd
= create (files
[i_check
].of_name
);
769 else /* OFD_APPEND */
771 /* Attempt to append to previously opened file.
772 We use O_NONBLOCK to support writing to fifos,
773 where the other end has closed because of our
774 previous close. In that case we'll immediately
775 get an error, rather than waiting indefinitely.
776 In specialised cases the consumer can keep reading
777 from the fifo, terminating on conditions in the data
778 itself, or perhaps never in the case of `tail -f`.
779 I.E. for fifos it is valid to attempt this reopen.
781 We don't handle the filter_command case here, as create()
782 will exit if there are not enough files in that case.
783 I.E. we don't support restarting filters, as that would
784 put too much burden on users specifying --filter commands. */
785 fd
= open (files
[i_check
].of_name
,
786 O_WRONLY
| O_BINARY
| O_APPEND
| O_NONBLOCK
);
792 if (!(errno
== EMFILE
|| errno
== ENFILE
))
793 error (EXIT_FAILURE
, errno
, "%s", files
[i_check
].of_name
);
797 /* Search backwards for an open file to close. */
798 while (files
[i_reopen
].ofd
< 0)
800 i_reopen
= i_reopen
? i_reopen
- 1 : nfiles
- 1;
801 /* No more open files to close, exit with E[NM]FILE. */
802 if (i_reopen
== i_check
)
803 error (EXIT_FAILURE
, errno
, "%s", files
[i_check
].of_name
);
806 if (fclose (files
[i_reopen
].ofile
) != 0)
807 error (EXIT_FAILURE
, errno
, "%s", files
[i_reopen
].of_name
);
808 files
[i_reopen
].ofile
= NULL
;
809 files
[i_reopen
].ofd
= OFD_APPEND
;
812 files
[i_check
].ofd
= fd
;
813 if (!(files
[i_check
].ofile
= fdopen (fd
, "a")))
814 error (EXIT_FAILURE
, errno
, "%s", files
[i_check
].of_name
);
815 files
[i_check
].opid
= filter_pid
;
822 /* -n r/[K/]N: Divide file into N chunks in round robin fashion.
823 When K == 0, we try to keep the files open in parallel.
824 If we run out of file resources, then we revert
825 to opening and closing each file for each line. */
828 lines_rr (uintmax_t k
, uintmax_t n
, char *buf
, size_t bufsize
)
830 bool wrapped
= false;
834 of_t
*files
IF_LINT (= NULL
);
842 error (exit_failure
, 0, "%s", _("memory exhausted"));
843 files
= xnmalloc (n
, sizeof *files
);
845 /* Generate output file names. */
846 for (i_file
= 0; i_file
< n
; i_file
++)
849 files
[i_file
].of_name
= xstrdup (outfile
);
850 files
[i_file
].ofd
= OFD_NEW
;
851 files
[i_file
].ofile
= NULL
;
852 files
[i_file
].opid
= 0;
860 char *bp
= buf
, *eob
;
861 /* Use safe_read() rather than full_read() here
862 so that we process available data immediately. */
863 size_t n_read
= safe_read (STDIN_FILENO
, buf
, bufsize
);
864 if (n_read
== SAFE_READ_ERROR
)
865 error (EXIT_FAILURE
, errno
, "%s", infile
);
866 else if (n_read
== 0)
875 /* Find end of line. */
876 char *bp_out
= memchr (bp
, '\n', eob
- bp
);
884 to_write
= bp_out
- bp
;
888 if (line_no
== k
&& unbuffered
)
890 if (full_write (STDOUT_FILENO
, bp
, to_write
) != to_write
)
891 error (EXIT_FAILURE
, errno
, "%s", _("write error"));
893 else if (line_no
== k
&& fwrite (bp
, to_write
, 1, stdout
) != 1)
895 clearerr (stdout
); /* To silence close_stdout(). */
896 error (EXIT_FAILURE
, errno
, "%s", _("write error"));
899 line_no
= (line_no
== n
) ? 1 : line_no
+ 1;
903 /* Secure file descriptor. */
904 file_limit
|= ofile_open (files
, i_file
, n
);
907 /* Note writing to fd, rather than flushing the FILE gives
908 an 8% performance benefit, due to reduced data copying. */
909 if (full_write (files
[i_file
].ofd
, bp
, to_write
) != to_write
910 && ! ignorable (errno
))
911 error (EXIT_FAILURE
, errno
, "%s", files
[i_file
].of_name
);
913 else if (fwrite (bp
, to_write
, 1, files
[i_file
].ofile
) != 1
914 && ! ignorable (errno
))
915 error (EXIT_FAILURE
, errno
, "%s", files
[i_file
].of_name
);
916 if (! ignorable (errno
))
921 if (fclose (files
[i_file
].ofile
) != 0)
922 error (EXIT_FAILURE
, errno
, "%s", files
[i_file
].of_name
);
923 files
[i_file
].ofile
= NULL
;
924 files
[i_file
].ofd
= OFD_APPEND
;
926 if (next
&& ++i_file
== n
)
929 /* If no filters are accepting input, stop reading. */
942 /* Ensure all files created, so that any existing files are truncated,
943 and to signal any waiting fifo consumers.
944 Also, close any open file descriptors.
945 FIXME: Should we do this before EXIT_FAILURE? */
948 int ceiling
= (wrapped
? n
: i_file
);
949 for (i_file
= 0; i_file
< n
; i_file
++)
951 if (i_file
>= ceiling
&& !elide_empty_files
)
952 file_limit
|= ofile_open (files
, i_file
, n
);
953 if (files
[i_file
].ofd
>= 0)
954 closeout (files
[i_file
].ofile
, files
[i_file
].ofd
,
955 files
[i_file
].opid
, files
[i_file
].of_name
);
956 files
[i_file
].ofd
= OFD_APPEND
;
961 #define FAIL_ONLY_ONE_WAY() \
964 error (0, 0, _("cannot split in more than one way")); \
965 usage (EXIT_FAILURE); \
969 /* Parse K/N syntax of chunk options. */
972 parse_chunk (uintmax_t *k_units
, uintmax_t *n_units
, char *slash
)
975 if (xstrtoumax (slash
+ 1, NULL
, 10, n_units
, "") != LONGINT_OK
977 error (EXIT_FAILURE
, 0, _("%s: invalid number of chunks"), slash
+ 1);
978 if (slash
!= optarg
/* a leading number is specified. */
979 && (xstrtoumax (optarg
, NULL
, 10, k_units
, "") != LONGINT_OK
980 || *k_units
== 0 || *n_units
< *k_units
))
981 error (EXIT_FAILURE
, 0, _("%s: invalid chunk number"), optarg
);
986 main (int argc
, char **argv
)
988 struct stat stat_buf
;
989 enum Split_type split_type
= type_undef
;
990 size_t in_blk_size
= 0; /* optimal block size of input file device */
991 char *buf
; /* file i/o buffer */
992 size_t page_size
= getpagesize ();
993 uintmax_t k_units
= 0;
996 static char const multipliers
[] = "bEGKkMmPTYZ0";
998 int digits_optind
= 0;
1001 initialize_main (&argc
, &argv
);
1002 set_program_name (argv
[0]);
1003 setlocale (LC_ALL
, "");
1004 bindtextdomain (PACKAGE
, LOCALEDIR
);
1005 textdomain (PACKAGE
);
1007 atexit (close_stdout
);
1009 /* Parse command line options. */
1011 infile
= bad_cast ("-");
1012 outbase
= bad_cast ("x");
1016 /* This is the argv-index of the option we will read next. */
1017 int this_optind
= optind
? optind
: 1;
1020 c
= getopt_long (argc
, argv
, "0123456789C:a:b:del:n:u",
1030 if (xstrtoul (optarg
, NULL
, 10, &tmp
, "") != LONGINT_OK
1031 || SIZE_MAX
/ sizeof (size_t) < tmp
)
1033 error (0, 0, _("%s: invalid suffix length"), optarg
);
1034 usage (EXIT_FAILURE
);
1036 suffix_length
= tmp
;
1041 if (split_type
!= type_undef
)
1042 FAIL_ONLY_ONE_WAY ();
1043 split_type
= type_bytes
;
1044 if (xstrtoumax (optarg
, NULL
, 10, &n_units
, multipliers
) != LONGINT_OK
1047 error (0, 0, _("%s: invalid number of bytes"), optarg
);
1048 usage (EXIT_FAILURE
);
1050 /* If input is a pipe, we could get more data than is possible
1051 to write to a single file, so indicate that immediately
1052 rather than having possibly future invocations fail. */
1053 if (OFF_T_MAX
< n_units
)
1054 error (EXIT_FAILURE
, EFBIG
,
1055 _("%s: invalid number of bytes"), optarg
);
1060 if (split_type
!= type_undef
)
1061 FAIL_ONLY_ONE_WAY ();
1062 split_type
= type_lines
;
1063 if (xstrtoumax (optarg
, NULL
, 10, &n_units
, "") != LONGINT_OK
1066 error (0, 0, _("%s: invalid number of lines"), optarg
);
1067 usage (EXIT_FAILURE
);
1072 if (split_type
!= type_undef
)
1073 FAIL_ONLY_ONE_WAY ();
1074 split_type
= type_byteslines
;
1075 if (xstrtoumax (optarg
, NULL
, 10, &n_units
, multipliers
) != LONGINT_OK
1076 || n_units
== 0 || SIZE_MAX
< n_units
)
1078 error (0, 0, _("%s: invalid number of bytes"), optarg
);
1079 usage (EXIT_FAILURE
);
1081 if (OFF_T_MAX
< n_units
)
1082 error (EXIT_FAILURE
, EFBIG
,
1083 _("%s: invalid number of bytes"), optarg
);
1087 if (split_type
!= type_undef
)
1088 FAIL_ONLY_ONE_WAY ();
1089 /* skip any whitespace */
1090 while (isspace (to_uchar (*optarg
)))
1092 if (STRNCMP_LIT (optarg
, "r/") == 0)
1094 split_type
= type_rr
;
1097 else if (STRNCMP_LIT (optarg
, "l/") == 0)
1099 split_type
= type_chunk_lines
;
1103 split_type
= type_chunk_bytes
;
1104 if ((slash
= strchr (optarg
, '/')))
1105 parse_chunk (&k_units
, &n_units
, slash
);
1106 else if (xstrtoumax (optarg
, NULL
, 10, &n_units
, "") != LONGINT_OK
1108 error (EXIT_FAILURE
, 0, _("%s: invalid number of chunks"), optarg
);
1125 if (split_type
== type_undef
)
1127 split_type
= type_digits
;
1130 if (split_type
!= type_undef
&& split_type
!= type_digits
)
1131 FAIL_ONLY_ONE_WAY ();
1132 if (digits_optind
!= 0 && digits_optind
!= this_optind
)
1133 n_units
= 0; /* More than one number given; ignore other. */
1134 digits_optind
= this_optind
;
1135 if (!DECIMAL_DIGIT_ACCUMULATE (n_units
, c
- '0', uintmax_t))
1137 char buffer
[INT_BUFSIZE_BOUND (uintmax_t)];
1138 error (EXIT_FAILURE
, 0,
1139 _("line count option -%s%c... is too large"),
1140 umaxtostr (n_units
, buffer
), c
);
1145 suffix_alphabet
= "0123456789";
1149 elide_empty_files
= true;
1153 filter_command
= optarg
;
1156 case IO_BLKSIZE_OPTION
:
1158 uintmax_t tmp_blk_size
;
1159 if (xstrtoumax (optarg
, NULL
, 10, &tmp_blk_size
,
1160 multipliers
) != LONGINT_OK
1161 || tmp_blk_size
== 0 || SIZE_MAX
- page_size
< tmp_blk_size
)
1162 error (0, 0, _("%s: invalid IO block size"), optarg
);
1164 in_blk_size
= tmp_blk_size
;
1168 case VERBOSE_OPTION
:
1172 case_GETOPT_HELP_CHAR
;
1174 case_GETOPT_VERSION_CHAR (PROGRAM_NAME
, AUTHORS
);
1177 usage (EXIT_FAILURE
);
1181 if (k_units
!= 0 && filter_command
)
1183 error (0, 0, _("--filter does not process a chunk extracted to stdout"));
1184 usage (EXIT_FAILURE
);
1187 /* Handle default case. */
1188 if (split_type
== type_undef
)
1190 split_type
= type_lines
;
1196 error (0, 0, _("%s: invalid number of lines"), "0");
1197 usage (EXIT_FAILURE
);
1200 set_suffix_length (n_units
, split_type
);
1202 /* Get out the filename arguments. */
1205 infile
= argv
[optind
++];
1208 outbase
= argv
[optind
++];
1212 error (0, 0, _("extra operand %s"), quote (argv
[optind
]));
1213 usage (EXIT_FAILURE
);
1216 /* Open the input file. */
1217 if (! STREQ (infile
, "-")
1218 && fd_reopen (STDIN_FILENO
, infile
, O_RDONLY
, 0) < 0)
1219 error (EXIT_FAILURE
, errno
, _("cannot open %s for reading"),
1222 /* Binary I/O is safer when byte counts are used. */
1223 if (O_BINARY
&& ! isatty (STDIN_FILENO
))
1224 xfreopen (NULL
, "rb", stdin
);
1226 /* Get the optimal block size of input device and make a buffer. */
1228 if (fstat (STDIN_FILENO
, &stat_buf
) != 0)
1229 error (EXIT_FAILURE
, errno
, "%s", infile
);
1230 if (in_blk_size
== 0)
1231 in_blk_size
= io_blksize (stat_buf
);
1232 file_size
= stat_buf
.st_size
;
1234 if (split_type
== type_chunk_bytes
|| split_type
== type_chunk_lines
)
1236 off_t input_offset
= lseek (STDIN_FILENO
, 0, SEEK_CUR
);
1237 if (input_offset
< 0)
1238 error (EXIT_FAILURE
, 0, _("%s: cannot determine file size"),
1240 file_size
-= input_offset
;
1241 /* Overflow, and sanity checking. */
1242 if (OFF_T_MAX
< n_units
)
1244 char buffer
[INT_BUFSIZE_BOUND (uintmax_t)];
1245 error (EXIT_FAILURE
, EFBIG
, _("%s: invalid number of chunks"),
1246 umaxtostr (n_units
, buffer
));
1248 /* increase file_size to n_units here, so that we still process
1249 any input data, and create empty files for the rest. */
1250 file_size
= MAX (file_size
, n_units
);
1253 buf
= ptr_align (xmalloc (in_blk_size
+ 1 + page_size
- 1), page_size
);
1255 /* When filtering, closure of one pipe must not terminate the process,
1256 as there may still be other streams expecting input from us. */
1259 struct sigaction act
;
1260 sigemptyset (&newblocked
);
1261 sigaction (SIGPIPE
, NULL
, &act
);
1262 if (act
.sa_handler
!= SIG_IGN
)
1263 sigaddset (&newblocked
, SIGPIPE
);
1264 sigprocmask (SIG_BLOCK
, &newblocked
, &oldblocked
);
1271 lines_split (n_units
, buf
, in_blk_size
);
1275 bytes_split (n_units
, buf
, in_blk_size
, 0);
1278 case type_byteslines
:
1279 line_bytes_split (n_units
);
1282 case type_chunk_bytes
:
1284 bytes_split (file_size
/ n_units
, buf
, in_blk_size
, n_units
);
1286 bytes_chunk_extract (k_units
, n_units
, buf
, in_blk_size
, file_size
);
1289 case type_chunk_lines
:
1290 lines_chunk_split (k_units
, n_units
, buf
, in_blk_size
, file_size
);
1294 /* Note, this is like `sed -n ${k}~${n}p` when k > 0,
1295 but the functionality is provided for symmetry. */
1296 lines_rr (k_units
, n_units
, buf
, in_blk_size
);
1303 if (close (STDIN_FILENO
) != 0)
1304 error (EXIT_FAILURE
, errno
, "%s", infile
);
1305 closeout (NULL
, output_desc
, filter_pid
, outfile
);
1307 exit (EXIT_SUCCESS
);