1 /* split.c -- split a file into pieces.
2 Copyright (C) 1988-2013 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* By tege@sics.se, with rms.
20 * Implement -t CHAR or -t REGEX to specify break characters other
29 #include <sys/types.h>
34 #include "fd-reopen.h"
36 #include "full-read.h"
37 #include "full-write.h"
38 #include "ioblksize.h"
40 #include "safe-read.h"
45 /* The official name of this program (e.g., no 'g' prefix). */
46 #define PROGRAM_NAME "split"
49 proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
50 proper_name ("Richard M. Stallman")
52 /* Shell command to filter through, instead of creating files. */
53 static char const *filter_command
;
55 /* Process ID of the filter. */
56 static int filter_pid
;
58 /* Array of open pipes. */
59 static int *open_pipes
;
60 static size_t open_pipes_alloc
;
61 static size_t n_open_pipes
;
63 /* Blocked signals. */
64 static sigset_t oldblocked
;
65 static sigset_t newblocked
;
67 /* Base name of output files. */
68 static char const *outbase
;
70 /* Name of output files. */
73 /* Pointer to the end of the prefix in OUTFILE.
74 Suffixes are inserted here. */
75 static char *outfile_mid
;
77 /* Generate new suffix when suffixes are exhausted. */
78 static bool suffix_auto
= true;
80 /* Length of OUTFILE's suffix. */
81 static size_t suffix_length
;
83 /* Alphabet of characters to use in suffix. */
84 static char const *suffix_alphabet
= "abcdefghijklmnopqrstuvwxyz";
86 /* Numerical suffix start value. */
87 static const char *numeric_suffix_start
;
89 /* Additional suffix to append to output file names. */
90 static char const *additional_suffix
;
92 /* Name of input file. May be "-". */
95 /* stat buf for input file. */
96 static struct stat in_stat_buf
;
98 /* Descriptor on which output file is open. */
99 static int output_desc
= -1;
101 /* If true, print a diagnostic on standard error just before each
102 output file is opened. */
105 /* If true, don't generate zero length output files. */
106 static bool elide_empty_files
;
108 /* If true, in round robin mode, immediately copy
109 input to output, which is much slower, so disabled by default. */
110 static bool unbuffered
;
112 /* The split mode to use. */
115 type_undef
, type_bytes
, type_byteslines
, type_lines
, type_digits
,
116 type_chunk_bytes
, type_chunk_lines
, type_rr
119 /* For long options that have no equivalent short option, use a
120 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
123 VERBOSE_OPTION
= CHAR_MAX
+ 1,
126 ADDITIONAL_SUFFIX_OPTION
129 static struct option
const longopts
[] =
131 {"bytes", required_argument
, NULL
, 'b'},
132 {"lines", required_argument
, NULL
, 'l'},
133 {"line-bytes", required_argument
, NULL
, 'C'},
134 {"number", required_argument
, NULL
, 'n'},
135 {"elide-empty-files", no_argument
, NULL
, 'e'},
136 {"unbuffered", no_argument
, NULL
, 'u'},
137 {"suffix-length", required_argument
, NULL
, 'a'},
138 {"additional-suffix", required_argument
, NULL
,
139 ADDITIONAL_SUFFIX_OPTION
},
140 {"numeric-suffixes", optional_argument
, NULL
, 'd'},
141 {"filter", required_argument
, NULL
, FILTER_OPTION
},
142 {"verbose", no_argument
, NULL
, VERBOSE_OPTION
},
143 {"-io-blksize", required_argument
, NULL
,
144 IO_BLKSIZE_OPTION
}, /* do not document */
145 {GETOPT_HELP_OPTION_DECL
},
146 {GETOPT_VERSION_OPTION_DECL
},
150 /* Return true if the errno value, ERR, is ignorable. */
154 return filter_command
&& err
== EPIPE
;
158 set_suffix_length (uintmax_t n_units
, enum Split_type split_type
)
160 #define DEFAULT_SUFFIX_LENGTH 2
162 size_t suffix_needed
= 0;
164 /* The suffix auto length feature is incompatible with
165 a user specified start value as the generated suffixes
166 are not all consecutive. */
167 if (numeric_suffix_start
)
170 /* Auto-calculate the suffix length if the number of files is given. */
171 if (split_type
== type_chunk_bytes
|| split_type
== type_chunk_lines
172 || split_type
== type_rr
)
174 size_t alphabet_len
= strlen (suffix_alphabet
);
175 bool alphabet_slop
= (n_units
% alphabet_len
) != 0;
176 while (n_units
/= alphabet_len
)
178 suffix_needed
+= alphabet_slop
;
182 if (suffix_length
) /* set by user */
184 if (suffix_length
< suffix_needed
)
186 error (EXIT_FAILURE
, 0,
187 _("the suffix length needs to be at least %zu"),
194 suffix_length
= MAX (DEFAULT_SUFFIX_LENGTH
, suffix_needed
);
200 if (status
!= EXIT_SUCCESS
)
205 Usage: %s [OPTION]... [INPUT [PREFIX]]\n\
209 Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default\n\
210 size is 1000 lines, and default PREFIX is 'x'. With no INPUT, or when INPUT\n\
211 is -, read standard input.\n\
214 emit_mandatory_arg_note ();
216 fprintf (stdout
, _("\
217 -a, --suffix-length=N generate suffixes of length N (default %d)\n\
218 --additional-suffix=SUFFIX append an additional SUFFIX to file names\n\
219 -b, --bytes=SIZE put SIZE bytes per output file\n\
220 -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
221 -d, --numeric-suffixes[=FROM] use numeric suffixes instead of alphabetic;\n\
222 FROM changes the start value (default 0)\n\
223 -e, --elide-empty-files do not generate empty output files with '-n'\n\
224 --filter=COMMAND write to shell COMMAND; file name is $FILE\n\
225 -l, --lines=NUMBER put NUMBER lines per output file\n\
226 -n, --number=CHUNKS generate CHUNKS output files; see explanation below\n\
227 -u, --unbuffered immediately copy input to output with '-n r/...'\n\
228 "), DEFAULT_SUFFIX_LENGTH
);
230 --verbose print a diagnostic just before each\n\
231 output file is opened\n\
233 fputs (HELP_OPTION_DESCRIPTION
, stdout
);
234 fputs (VERSION_OPTION_DESCRIPTION
, stdout
);
238 N split into N files based on size of input\n\
239 K/N output Kth of N to stdout\n\
240 l/N split into N files without splitting lines\n\
241 l/K/N output Kth of N to stdout without splitting lines\n\
242 r/N like 'l' but use round robin distribution\n\
243 r/K/N likewise but only output Kth of N to stdout\n\
245 emit_ancillary_info ();
250 /* Compute the next sequential output file name and store it into the
254 next_file_name (void)
256 /* Index in suffix_alphabet of each character in the suffix. */
257 static size_t *sufindex
;
258 static size_t outbase_length
;
259 static size_t outfile_length
;
260 static size_t addsuf_length
;
267 widen
= !! outfile_length
;
271 /* Allocate and initialize the first file name. */
273 outbase_length
= strlen (outbase
);
274 addsuf_length
= additional_suffix
? strlen (additional_suffix
) : 0;
275 outfile_length
= outbase_length
+ suffix_length
+ addsuf_length
;
279 /* Reallocate and initialize a new wider file name.
280 We do this by subsuming the unchanging part of
281 the generated suffix into the prefix (base), and
282 reinitializing the now one longer suffix. */
288 if (outfile_length
+ 1 < outbase_length
)
290 outfile
= xrealloc (outfile
, outfile_length
+ 1);
293 memcpy (outfile
, outbase
, outbase_length
);
296 /* Append the last alphabet character to the file name prefix. */
297 outfile
[outbase_length
] = suffix_alphabet
[sufindex
[0]];
301 outfile_mid
= outfile
+ outbase_length
;
302 memset (outfile_mid
, suffix_alphabet
[0], suffix_length
);
303 if (additional_suffix
)
304 memcpy (outfile_mid
+ suffix_length
, additional_suffix
, addsuf_length
);
305 outfile
[outfile_length
] = 0;
308 sufindex
= xcalloc (suffix_length
, sizeof *sufindex
);
310 if (numeric_suffix_start
)
314 /* Update the output file name. */
315 size_t i
= strlen (numeric_suffix_start
);
316 memcpy (outfile_mid
+ suffix_length
- i
, numeric_suffix_start
, i
);
318 /* Update the suffix index. */
319 size_t *sufindex_end
= sufindex
+ suffix_length
;
321 *--sufindex_end
= numeric_suffix_start
[i
] - '0';
324 #if ! _POSIX_NO_TRUNC && HAVE_PATHCONF && defined _PC_NAME_MAX
325 /* POSIX requires that if the output file name is too long for
326 its directory, 'split' must fail without creating any files.
327 This must be checked for explicitly on operating systems that
328 silently truncate file names. */
330 char *dir
= dir_name (outfile
);
331 long name_max
= pathconf (dir
, _PC_NAME_MAX
);
332 if (0 <= name_max
&& name_max
< base_len (last_component (outfile
)))
333 error (EXIT_FAILURE
, ENAMETOOLONG
, "%s", outfile
);
340 /* Increment the suffix in place, if possible. */
342 size_t i
= suffix_length
;
346 if (suffix_auto
&& i
== 0 && ! suffix_alphabet
[sufindex
[0] + 1])
348 outfile_mid
[i
] = suffix_alphabet
[sufindex
[i
]];
352 outfile_mid
[i
] = suffix_alphabet
[sufindex
[i
]];
354 error (EXIT_FAILURE
, 0, _("output file suffixes exhausted"));
358 /* Create or truncate a file. */
361 create (const char *name
)
366 fprintf (stdout
, _("creating file %s\n"), quote (name
));
368 int fd
= open (name
, O_WRONLY
| O_CREAT
| O_BINARY
, MODE_RW_UGO
);
371 struct stat out_stat_buf
;
372 if (fstat (fd
, &out_stat_buf
) != 0)
373 error (EXIT_FAILURE
, errno
, _("failed to stat %s"), quote (name
));
374 if (SAME_INODE (in_stat_buf
, out_stat_buf
))
375 error (EXIT_FAILURE
, 0, _("%s would overwrite input; aborting"),
377 if (ftruncate (fd
, 0) != 0)
378 error (EXIT_FAILURE
, errno
, _("%s: error truncating"), quote (name
));
386 char const *shell_prog
= getenv ("SHELL");
387 if (shell_prog
== NULL
)
388 shell_prog
= "/bin/sh";
389 if (setenv ("FILE", name
, 1) != 0)
390 error (EXIT_FAILURE
, errno
,
391 _("failed to set FILE environment variable"));
393 fprintf (stdout
, _("executing with FILE=%s\n"), quote (name
));
394 if (pipe (fd_pair
) != 0)
395 error (EXIT_FAILURE
, errno
, _("failed to create pipe"));
399 /* This is the child process. If an error occurs here, the
400 parent will eventually learn about it after doing a wait,
401 at which time it will emit its own error message. */
403 /* We have to close any pipes that were opened during an
404 earlier call, otherwise this process will be holding a
405 write-pipe that will prevent the earlier process from
406 reading an EOF on the corresponding read-pipe. */
407 for (j
= 0; j
< n_open_pipes
; ++j
)
408 if (close (open_pipes
[j
]) != 0)
409 error (EXIT_FAILURE
, errno
, _("closing prior pipe"));
410 if (close (fd_pair
[1]))
411 error (EXIT_FAILURE
, errno
, _("closing output pipe"));
412 if (fd_pair
[0] != STDIN_FILENO
)
414 if (dup2 (fd_pair
[0], STDIN_FILENO
) != STDIN_FILENO
)
415 error (EXIT_FAILURE
, errno
, _("moving input pipe"));
416 if (close (fd_pair
[0]) != 0)
417 error (EXIT_FAILURE
, errno
, _("closing input pipe"));
419 sigprocmask (SIG_SETMASK
, &oldblocked
, NULL
);
420 execl (shell_prog
, last_component (shell_prog
), "-c",
421 filter_command
, (char *) NULL
);
422 error (EXIT_FAILURE
, errno
, _("failed to run command: \"%s -c %s\""),
423 shell_prog
, filter_command
);
426 error (EXIT_FAILURE
, errno
, _("fork system call failed"));
427 if (close (fd_pair
[0]) != 0)
428 error (EXIT_FAILURE
, errno
, _("failed to close input pipe"));
429 filter_pid
= child_pid
;
430 if (n_open_pipes
== open_pipes_alloc
)
431 open_pipes
= x2nrealloc (open_pipes
, &open_pipes_alloc
,
433 open_pipes
[n_open_pipes
++] = fd_pair
[1];
438 /* Close the output file, and do any associated cleanup.
439 If FP and FD are both specified, they refer to the same open file;
440 in this case FP is closed, but FD is still used in cleanup. */
442 closeout (FILE *fp
, int fd
, pid_t pid
, char const *name
)
444 if (fp
!= NULL
&& fclose (fp
) != 0 && ! ignorable (errno
))
445 error (EXIT_FAILURE
, errno
, "%s", name
);
448 if (fp
== NULL
&& close (fd
) < 0)
449 error (EXIT_FAILURE
, errno
, "%s", name
);
451 for (j
= 0; j
< n_open_pipes
; ++j
)
453 if (open_pipes
[j
] == fd
)
455 open_pipes
[j
] = open_pipes
[--n_open_pipes
];
463 if (waitpid (pid
, &wstatus
, 0) == -1 && errno
!= ECHILD
)
464 error (EXIT_FAILURE
, errno
, _("waiting for child process"));
465 if (WIFSIGNALED (wstatus
))
467 int sig
= WTERMSIG (wstatus
);
470 char signame
[MAX (SIG2STR_MAX
, INT_BUFSIZE_BOUND (int))];
471 if (sig2str (sig
, signame
) != 0)
472 sprintf (signame
, "%d", sig
);
474 _("with FILE=%s, signal %s from command: %s"),
475 name
, signame
, filter_command
);
478 else if (WIFEXITED (wstatus
))
480 int ex
= WEXITSTATUS (wstatus
);
482 error (ex
, 0, _("with FILE=%s, exit %d from command: %s"),
483 name
, ex
, filter_command
);
487 /* shouldn't happen. */
488 error (EXIT_FAILURE
, 0,
489 _("unknown status from command (0x%X)"), wstatus
);
494 /* Write BYTES bytes at BP to an output file.
495 If NEW_FILE_FLAG is true, open the next output file.
496 Otherwise add to the same output file already in use. */
499 cwrite (bool new_file_flag
, const char *bp
, size_t bytes
)
503 if (!bp
&& bytes
== 0 && elide_empty_files
)
505 closeout (NULL
, output_desc
, filter_pid
, outfile
);
507 if ((output_desc
= create (outfile
)) < 0)
508 error (EXIT_FAILURE
, errno
, "%s", outfile
);
510 if (full_write (output_desc
, bp
, bytes
) != bytes
&& ! ignorable (errno
))
511 error (EXIT_FAILURE
, errno
, "%s", outfile
);
514 /* Split into pieces of exactly N_BYTES bytes.
515 Use buffer BUF, whose size is BUFSIZE. */
518 bytes_split (uintmax_t n_bytes
, char *buf
, size_t bufsize
, uintmax_t max_files
)
521 bool new_file_flag
= true;
523 uintmax_t to_write
= n_bytes
;
525 uintmax_t opened
= 0;
529 n_read
= full_read (STDIN_FILENO
, buf
, bufsize
);
530 if (n_read
< bufsize
&& errno
)
531 error (EXIT_FAILURE
, errno
, "%s", infile
);
536 if (to_read
< to_write
)
538 if (to_read
) /* do not write 0 bytes! */
540 cwrite (new_file_flag
, bp_out
, to_read
);
541 opened
+= new_file_flag
;
543 new_file_flag
= false;
550 cwrite (new_file_flag
, bp_out
, w
);
551 opened
+= new_file_flag
;
552 new_file_flag
= !max_files
|| (opened
< max_files
);
553 if (!new_file_flag
&& ignorable (errno
))
555 /* If filter no longer accepting input, stop reading. */
565 while (n_read
== bufsize
);
567 /* Ensure NUMBER files are created, which truncates
568 any existing files or notifies any consumers on fifos.
569 FIXME: Should we do this before EXIT_FAILURE? */
570 while (opened
++ < max_files
)
571 cwrite (true, NULL
, 0);
574 /* Split into pieces of exactly N_LINES lines.
575 Use buffer BUF, whose size is BUFSIZE. */
578 lines_split (uintmax_t n_lines
, char *buf
, size_t bufsize
)
581 char *bp
, *bp_out
, *eob
;
582 bool new_file_flag
= true;
587 n_read
= full_read (STDIN_FILENO
, buf
, bufsize
);
588 if (n_read
< bufsize
&& errno
)
589 error (EXIT_FAILURE
, errno
, "%s", infile
);
595 bp
= memchr (bp
, '\n', eob
- bp
+ 1);
598 if (eob
!= bp_out
) /* do not write 0 bytes! */
600 size_t len
= eob
- bp_out
;
601 cwrite (new_file_flag
, bp_out
, len
);
602 new_file_flag
= false;
610 cwrite (new_file_flag
, bp_out
, bp
- bp_out
);
612 new_file_flag
= true;
617 while (n_read
== bufsize
);
620 /* Split into pieces that are as large as possible while still not more
621 than N_BYTES bytes, and are split on line boundaries except
622 where lines longer than N_BYTES bytes occur. */
625 line_bytes_split (uintmax_t n_bytes
, char *buf
, size_t bufsize
)
628 uintmax_t n_out
= 0; /* for each split. */
630 char *hold
= NULL
; /* for lines > bufsize. */
631 size_t hold_size
= 0;
632 bool split_line
= false; /* Whether a \n was output in a split. */
636 n_read
= full_read (STDIN_FILENO
, buf
, bufsize
);
637 if (n_read
< bufsize
&& errno
)
638 error (EXIT_FAILURE
, errno
, "%s", infile
);
639 size_t n_left
= n_read
;
643 size_t split_rest
= 0;
647 /* Determine End Of Chunk and/or End of Line,
648 which are used below to select what to write or buffer. */
649 if (n_bytes
- n_out
- n_hold
<= n_left
)
651 /* Have enough for split. */
652 split_rest
= n_bytes
- n_out
- n_hold
;
653 eoc
= sob
+ split_rest
- 1;
654 eol
= memrchr (sob
, '\n', split_rest
);
657 eol
= memrchr (sob
, '\n', n_left
);
659 /* Output hold space if possible. */
660 if (n_hold
&& !(!eol
&& n_out
))
662 cwrite (n_out
== 0, hold
, n_hold
);
664 if (n_hold
> bufsize
)
665 hold
= xrealloc (hold
, bufsize
);
670 /* Output to eol if present. */
674 size_t n_write
= eol
- sob
+ 1;
675 cwrite (n_out
== 0, sob
, n_write
);
680 split_rest
-= n_write
;
683 /* Output to eoc or eob if possible. */
684 if (n_left
&& !split_line
)
686 size_t n_write
= eoc
? split_rest
: n_left
;
687 cwrite (n_out
== 0, sob
, n_write
);
692 split_rest
-= n_write
;
695 /* Update hold if needed. */
696 if ((eoc
&& split_rest
) || (!eoc
&& n_left
))
698 size_t n_buf
= eoc
? split_rest
: n_left
;
699 if (hold_size
- n_hold
< n_buf
)
701 if (hold_size
<= SIZE_MAX
- bufsize
)
702 hold_size
+= bufsize
;
705 hold
= xrealloc (hold
, hold_size
);
707 memcpy (hold
+ n_hold
, sob
, n_buf
);
713 /* Reset for new split. */
721 while (n_read
== bufsize
);
723 /* Handle no eol at end of file. */
725 cwrite (n_out
== 0, hold
, n_hold
);
730 /* -n l/[K/]N: Write lines to files of approximately file size / N.
731 The file is partitioned into file size / N sized portions, with the
732 last assigned any excess. If a line _starts_ within a partition
733 it is written completely to the corresponding file. Since lines
734 are not split even if they overlap a partition, the files written
735 can be larger or smaller than the partition size, and even empty
736 if a line is so long as to completely overlap the partition. */
739 lines_chunk_split (uintmax_t k
, uintmax_t n
, char *buf
, size_t bufsize
,
742 assert (n
&& k
<= n
&& n
<= file_size
);
744 const off_t chunk_size
= file_size
/ n
;
745 uintmax_t chunk_no
= 1;
746 off_t chunk_end
= chunk_size
- 1;
748 bool new_file_flag
= true;
749 bool chunk_truncated
= false;
753 /* Start reading 1 byte before kth chunk of file. */
754 off_t start
= (k
- 1) * chunk_size
- 1;
755 if (lseek (STDIN_FILENO
, start
, SEEK_CUR
) < 0)
756 error (EXIT_FAILURE
, errno
, "%s", infile
);
759 chunk_end
= chunk_no
* chunk_size
- 1;
762 while (n_written
< file_size
)
764 char *bp
= buf
, *eob
;
765 size_t n_read
= full_read (STDIN_FILENO
, buf
, bufsize
);
766 if (n_read
< bufsize
&& errno
)
767 error (EXIT_FAILURE
, errno
, "%s", infile
);
768 else if (n_read
== 0)
770 n_read
= MIN (n_read
, file_size
- n_written
);
771 chunk_truncated
= false;
779 /* Begin looking for '\n' at last byte of chunk. */
780 off_t skip
= MIN (n_read
, MAX (0, chunk_end
- n_written
));
781 char *bp_out
= memchr (bp
+ skip
, '\n', n_read
- skip
);
786 to_write
= bp_out
- bp
;
790 /* We don't use the stdout buffer here since we're writing
791 large chunks from an existing file, so it's more efficient
792 to write out directly. */
793 if (full_write (STDOUT_FILENO
, bp
, to_write
) != to_write
)
794 error (EXIT_FAILURE
, errno
, "%s", _("write error"));
797 cwrite (new_file_flag
, bp
, to_write
);
798 n_written
+= to_write
;
801 new_file_flag
= next
;
803 /* A line could have been so long that it skipped
804 entire chunks. So create empty files in that case. */
805 while (next
|| chunk_end
<= n_written
- 1)
807 if (!next
&& bp
== eob
)
809 /* replenish buf, before going to next chunk. */
810 chunk_truncated
= true;
814 if (k
&& chunk_no
> k
)
817 chunk_end
= file_size
- 1; /* >= chunk_size. */
819 chunk_end
+= chunk_size
;
820 if (chunk_end
<= n_written
- 1)
823 cwrite (true, NULL
, 0);
834 /* Ensure NUMBER files are created, which truncates
835 any existing files or notifies any consumers on fifos.
836 FIXME: Should we do this before EXIT_FAILURE? */
837 while (!k
&& chunk_no
++ <= n
)
838 cwrite (true, NULL
, 0);
841 /* -n K/N: Extract Kth of N chunks. */
844 bytes_chunk_extract (uintmax_t k
, uintmax_t n
, char *buf
, size_t bufsize
,
850 assert (k
&& n
&& k
<= n
&& n
<= file_size
);
852 start
= (k
- 1) * (file_size
/ n
);
853 end
= (k
== n
) ? file_size
: k
* (file_size
/ n
);
855 if (lseek (STDIN_FILENO
, start
, SEEK_CUR
) < 0)
856 error (EXIT_FAILURE
, errno
, "%s", infile
);
860 size_t n_read
= full_read (STDIN_FILENO
, buf
, bufsize
);
861 if (n_read
< bufsize
&& errno
)
862 error (EXIT_FAILURE
, errno
, "%s", infile
);
863 else if (n_read
== 0)
865 n_read
= MIN (n_read
, end
- start
);
866 if (full_write (STDOUT_FILENO
, buf
, n_read
) != n_read
867 && ! ignorable (errno
))
868 error (EXIT_FAILURE
, errno
, "%s", quote ("-"));
873 typedef struct of_info
887 /* Rotate file descriptors when we're writing to more output files than we
888 have available file descriptors.
889 Return whether we came under file resource pressure.
890 If so, it's probably best to close each file when finished with it. */
893 ofile_open (of_t
*files
, size_t i_check
, size_t nfiles
)
895 bool file_limit
= false;
897 if (files
[i_check
].ofd
<= OFD_NEW
)
900 size_t i_reopen
= i_check
? i_check
- 1 : nfiles
- 1;
902 /* Another process could have opened a file in between the calls to
903 close and open, so we should keep trying until open succeeds or
904 we've closed all of our files. */
907 if (files
[i_check
].ofd
== OFD_NEW
)
908 fd
= create (files
[i_check
].of_name
);
909 else /* OFD_APPEND */
911 /* Attempt to append to previously opened file.
912 We use O_NONBLOCK to support writing to fifos,
913 where the other end has closed because of our
914 previous close. In that case we'll immediately
915 get an error, rather than waiting indefinitely.
916 In specialised cases the consumer can keep reading
917 from the fifo, terminating on conditions in the data
918 itself, or perhaps never in the case of 'tail -f'.
919 I.E. for fifos it is valid to attempt this reopen.
921 We don't handle the filter_command case here, as create()
922 will exit if there are not enough files in that case.
923 I.E. we don't support restarting filters, as that would
924 put too much burden on users specifying --filter commands. */
925 fd
= open (files
[i_check
].of_name
,
926 O_WRONLY
| O_BINARY
| O_APPEND
| O_NONBLOCK
);
932 if (!(errno
== EMFILE
|| errno
== ENFILE
))
933 error (EXIT_FAILURE
, errno
, "%s", files
[i_check
].of_name
);
937 /* Search backwards for an open file to close. */
938 while (files
[i_reopen
].ofd
< 0)
940 i_reopen
= i_reopen
? i_reopen
- 1 : nfiles
- 1;
941 /* No more open files to close, exit with E[NM]FILE. */
942 if (i_reopen
== i_check
)
943 error (EXIT_FAILURE
, errno
, "%s", files
[i_check
].of_name
);
946 if (fclose (files
[i_reopen
].ofile
) != 0)
947 error (EXIT_FAILURE
, errno
, "%s", files
[i_reopen
].of_name
);
948 files
[i_reopen
].ofile
= NULL
;
949 files
[i_reopen
].ofd
= OFD_APPEND
;
952 files
[i_check
].ofd
= fd
;
953 if (!(files
[i_check
].ofile
= fdopen (fd
, "a")))
954 error (EXIT_FAILURE
, errno
, "%s", files
[i_check
].of_name
);
955 files
[i_check
].opid
= filter_pid
;
962 /* -n r/[K/]N: Divide file into N chunks in round robin fashion.
963 When K == 0, we try to keep the files open in parallel.
964 If we run out of file resources, then we revert
965 to opening and closing each file for each line. */
968 lines_rr (uintmax_t k
, uintmax_t n
, char *buf
, size_t bufsize
)
970 bool wrapped
= false;
974 of_t
*files
IF_LINT (= NULL
);
983 files
= xnmalloc (n
, sizeof *files
);
985 /* Generate output file names. */
986 for (i_file
= 0; i_file
< n
; i_file
++)
989 files
[i_file
].of_name
= xstrdup (outfile
);
990 files
[i_file
].ofd
= OFD_NEW
;
991 files
[i_file
].ofile
= NULL
;
992 files
[i_file
].opid
= 0;
1000 char *bp
= buf
, *eob
;
1001 /* Use safe_read() rather than full_read() here
1002 so that we process available data immediately. */
1003 size_t n_read
= safe_read (STDIN_FILENO
, buf
, bufsize
);
1004 if (n_read
== SAFE_READ_ERROR
)
1005 error (EXIT_FAILURE
, errno
, "%s", infile
);
1006 else if (n_read
== 0)
1015 /* Find end of line. */
1016 char *bp_out
= memchr (bp
, '\n', eob
- bp
);
1024 to_write
= bp_out
- bp
;
1028 if (line_no
== k
&& unbuffered
)
1030 if (full_write (STDOUT_FILENO
, bp
, to_write
) != to_write
)
1031 error (EXIT_FAILURE
, errno
, "%s", _("write error"));
1033 else if (line_no
== k
&& fwrite (bp
, to_write
, 1, stdout
) != 1)
1035 clearerr (stdout
); /* To silence close_stdout(). */
1036 error (EXIT_FAILURE
, errno
, "%s", _("write error"));
1039 line_no
= (line_no
== n
) ? 1 : line_no
+ 1;
1043 /* Secure file descriptor. */
1044 file_limit
|= ofile_open (files
, i_file
, n
);
1047 /* Note writing to fd, rather than flushing the FILE gives
1048 an 8% performance benefit, due to reduced data copying. */
1049 if (full_write (files
[i_file
].ofd
, bp
, to_write
) != to_write
1050 && ! ignorable (errno
))
1051 error (EXIT_FAILURE
, errno
, "%s", files
[i_file
].of_name
);
1053 else if (fwrite (bp
, to_write
, 1, files
[i_file
].ofile
) != 1
1054 && ! ignorable (errno
))
1055 error (EXIT_FAILURE
, errno
, "%s", files
[i_file
].of_name
);
1056 if (! ignorable (errno
))
1061 if (fclose (files
[i_file
].ofile
) != 0)
1062 error (EXIT_FAILURE
, errno
, "%s", files
[i_file
].of_name
);
1063 files
[i_file
].ofile
= NULL
;
1064 files
[i_file
].ofd
= OFD_APPEND
;
1066 if (next
&& ++i_file
== n
)
1069 /* If no filters are accepting input, stop reading. */
1082 /* Ensure all files created, so that any existing files are truncated,
1083 and to signal any waiting fifo consumers.
1084 Also, close any open file descriptors.
1085 FIXME: Should we do this before EXIT_FAILURE? */
1088 int ceiling
= (wrapped
? n
: i_file
);
1089 for (i_file
= 0; i_file
< n
; i_file
++)
1091 if (i_file
>= ceiling
&& !elide_empty_files
)
1092 file_limit
|= ofile_open (files
, i_file
, n
);
1093 if (files
[i_file
].ofd
>= 0)
1094 closeout (files
[i_file
].ofile
, files
[i_file
].ofd
,
1095 files
[i_file
].opid
, files
[i_file
].of_name
);
1096 files
[i_file
].ofd
= OFD_APPEND
;
1099 IF_LINT (free (files
));
1102 #define FAIL_ONLY_ONE_WAY() \
1105 error (0, 0, _("cannot split in more than one way")); \
1106 usage (EXIT_FAILURE); \
1110 /* Parse K/N syntax of chunk options. */
1113 parse_chunk (uintmax_t *k_units
, uintmax_t *n_units
, char *slash
)
1116 if (xstrtoumax (slash
+ 1, NULL
, 10, n_units
, "") != LONGINT_OK
1118 error (EXIT_FAILURE
, 0, _("%s: invalid number of chunks"), slash
+ 1);
1119 if (slash
!= optarg
/* a leading number is specified. */
1120 && (xstrtoumax (optarg
, NULL
, 10, k_units
, "") != LONGINT_OK
1121 || *k_units
== 0 || *n_units
< *k_units
))
1122 error (EXIT_FAILURE
, 0, _("%s: invalid chunk number"), optarg
);
1127 main (int argc
, char **argv
)
1129 enum Split_type split_type
= type_undef
;
1130 size_t in_blk_size
= 0; /* optimal block size of input file device */
1131 size_t page_size
= getpagesize ();
1132 uintmax_t k_units
= 0;
1135 static char const multipliers
[] = "bEGKkMmPTYZ0";
1137 int digits_optind
= 0;
1138 off_t file_size
IF_LINT (= 0);
1140 initialize_main (&argc
, &argv
);
1141 set_program_name (argv
[0]);
1142 setlocale (LC_ALL
, "");
1143 bindtextdomain (PACKAGE
, LOCALEDIR
);
1144 textdomain (PACKAGE
);
1146 atexit (close_stdout
);
1148 /* Parse command line options. */
1150 infile
= bad_cast ("-");
1151 outbase
= bad_cast ("x");
1155 /* This is the argv-index of the option we will read next. */
1156 int this_optind
= optind
? optind
: 1;
1159 c
= getopt_long (argc
, argv
, "0123456789C:a:b:del:n:u",
1169 if (xstrtoul (optarg
, NULL
, 10, &tmp
, "") != LONGINT_OK
1170 || SIZE_MAX
/ sizeof (size_t) < tmp
)
1172 error (0, 0, _("%s: invalid suffix length"), optarg
);
1173 usage (EXIT_FAILURE
);
1175 suffix_length
= tmp
;
1179 case ADDITIONAL_SUFFIX_OPTION
:
1180 if (last_component (optarg
) != optarg
)
1183 _("invalid suffix %s, contains directory separator"),
1185 usage (EXIT_FAILURE
);
1187 additional_suffix
= optarg
;
1191 if (split_type
!= type_undef
)
1192 FAIL_ONLY_ONE_WAY ();
1193 split_type
= type_bytes
;
1194 if (xstrtoumax (optarg
, NULL
, 10, &n_units
, multipliers
) != LONGINT_OK
1197 error (0, 0, _("%s: invalid number of bytes"), optarg
);
1198 usage (EXIT_FAILURE
);
1200 /* If input is a pipe, we could get more data than is possible
1201 to write to a single file, so indicate that immediately
1202 rather than having possibly future invocations fail. */
1203 if (OFF_T_MAX
< n_units
)
1204 error (EXIT_FAILURE
, EFBIG
,
1205 _("%s: invalid number of bytes"), optarg
);
1210 if (split_type
!= type_undef
)
1211 FAIL_ONLY_ONE_WAY ();
1212 split_type
= type_lines
;
1213 if (xstrtoumax (optarg
, NULL
, 10, &n_units
, "") != LONGINT_OK
1216 error (0, 0, _("%s: invalid number of lines"), optarg
);
1217 usage (EXIT_FAILURE
);
1222 if (split_type
!= type_undef
)
1223 FAIL_ONLY_ONE_WAY ();
1224 split_type
= type_byteslines
;
1225 if (xstrtoumax (optarg
, NULL
, 10, &n_units
, multipliers
) != LONGINT_OK
1226 || n_units
== 0 || SIZE_MAX
< n_units
)
1228 error (0, 0, _("%s: invalid number of bytes"), optarg
);
1229 usage (EXIT_FAILURE
);
1231 if (OFF_T_MAX
< n_units
)
1232 error (EXIT_FAILURE
, EFBIG
,
1233 _("%s: invalid number of bytes"), optarg
);
1237 if (split_type
!= type_undef
)
1238 FAIL_ONLY_ONE_WAY ();
1239 /* skip any whitespace */
1240 while (isspace (to_uchar (*optarg
)))
1242 if (STRNCMP_LIT (optarg
, "r/") == 0)
1244 split_type
= type_rr
;
1247 else if (STRNCMP_LIT (optarg
, "l/") == 0)
1249 split_type
= type_chunk_lines
;
1253 split_type
= type_chunk_bytes
;
1254 if ((slash
= strchr (optarg
, '/')))
1255 parse_chunk (&k_units
, &n_units
, slash
);
1256 else if (xstrtoumax (optarg
, NULL
, 10, &n_units
, "") != LONGINT_OK
1258 error (EXIT_FAILURE
, 0, _("%s: invalid number of chunks"), optarg
);
1275 if (split_type
== type_undef
)
1277 split_type
= type_digits
;
1280 if (split_type
!= type_undef
&& split_type
!= type_digits
)
1281 FAIL_ONLY_ONE_WAY ();
1282 if (digits_optind
!= 0 && digits_optind
!= this_optind
)
1283 n_units
= 0; /* More than one number given; ignore other. */
1284 digits_optind
= this_optind
;
1285 if (!DECIMAL_DIGIT_ACCUMULATE (n_units
, c
- '0', uintmax_t))
1287 char buffer
[INT_BUFSIZE_BOUND (uintmax_t)];
1288 error (EXIT_FAILURE
, 0,
1289 _("line count option -%s%c... is too large"),
1290 umaxtostr (n_units
, buffer
), c
);
1295 suffix_alphabet
= "0123456789";
1298 if (strlen (optarg
) != strspn (optarg
, suffix_alphabet
))
1301 _("%s: invalid start value for numerical suffix"),
1303 usage (EXIT_FAILURE
);
1307 /* Skip any leading zero. */
1308 while (*optarg
== '0' && *(optarg
+ 1) != '\0')
1310 numeric_suffix_start
= optarg
;
1316 elide_empty_files
= true;
1320 filter_command
= optarg
;
1323 case IO_BLKSIZE_OPTION
:
1325 uintmax_t tmp_blk_size
;
1326 if (xstrtoumax (optarg
, NULL
, 10, &tmp_blk_size
,
1327 multipliers
) != LONGINT_OK
1328 || tmp_blk_size
== 0 || SIZE_MAX
- page_size
< tmp_blk_size
)
1329 error (0, 0, _("%s: invalid IO block size"), optarg
);
1331 in_blk_size
= tmp_blk_size
;
1335 case VERBOSE_OPTION
:
1339 case_GETOPT_HELP_CHAR
;
1341 case_GETOPT_VERSION_CHAR (PROGRAM_NAME
, AUTHORS
);
1344 usage (EXIT_FAILURE
);
1348 if (k_units
!= 0 && filter_command
)
1350 error (0, 0, _("--filter does not process a chunk extracted to stdout"));
1351 usage (EXIT_FAILURE
);
1354 /* Handle default case. */
1355 if (split_type
== type_undef
)
1357 split_type
= type_lines
;
1363 error (0, 0, _("%s: invalid number of lines"), "0");
1364 usage (EXIT_FAILURE
);
1367 set_suffix_length (n_units
, split_type
);
1369 /* Get out the filename arguments. */
1372 infile
= argv
[optind
++];
1375 outbase
= argv
[optind
++];
1379 error (0, 0, _("extra operand %s"), quote (argv
[optind
]));
1380 usage (EXIT_FAILURE
);
1383 /* Check that the suffix length is large enough for the numerical
1384 suffix start value. */
1385 if (numeric_suffix_start
&& strlen (numeric_suffix_start
) > suffix_length
)
1387 error (0, 0, _("numerical suffix start value is too large "
1388 "for the suffix length"));
1389 usage (EXIT_FAILURE
);
1392 /* Open the input file. */
1393 if (! STREQ (infile
, "-")
1394 && fd_reopen (STDIN_FILENO
, infile
, O_RDONLY
, 0) < 0)
1395 error (EXIT_FAILURE
, errno
, _("cannot open %s for reading"),
1398 /* Binary I/O is safer when byte counts are used. */
1399 if (O_BINARY
&& ! isatty (STDIN_FILENO
))
1400 xfreopen (NULL
, "rb", stdin
);
1402 /* Get the optimal block size of input device and make a buffer. */
1404 if (fstat (STDIN_FILENO
, &in_stat_buf
) != 0)
1405 error (EXIT_FAILURE
, errno
, "%s", infile
);
1406 if (in_blk_size
== 0)
1407 in_blk_size
= io_blksize (in_stat_buf
);
1409 if (split_type
== type_chunk_bytes
|| split_type
== type_chunk_lines
)
1411 off_t input_offset
= lseek (STDIN_FILENO
, 0, SEEK_CUR
);
1412 if (usable_st_size (&in_stat_buf
))
1413 file_size
= in_stat_buf
.st_size
;
1414 else if (0 <= input_offset
)
1416 file_size
= lseek (STDIN_FILENO
, 0, SEEK_END
);
1417 input_offset
= (file_size
< 0
1419 : lseek (STDIN_FILENO
, input_offset
, SEEK_SET
));
1421 if (input_offset
< 0)
1422 error (EXIT_FAILURE
, 0, _("%s: cannot determine file size"),
1424 file_size
-= input_offset
;
1425 /* Overflow, and sanity checking. */
1426 if (OFF_T_MAX
< n_units
)
1428 char buffer
[INT_BUFSIZE_BOUND (uintmax_t)];
1429 error (EXIT_FAILURE
, EFBIG
, _("%s: invalid number of chunks"),
1430 umaxtostr (n_units
, buffer
));
1432 /* increase file_size to n_units here, so that we still process
1433 any input data, and create empty files for the rest. */
1434 file_size
= MAX (file_size
, n_units
);
1437 void *b
= xmalloc (in_blk_size
+ 1 + page_size
- 1);
1438 char *buf
= ptr_align (b
, page_size
);
1440 /* When filtering, closure of one pipe must not terminate the process,
1441 as there may still be other streams expecting input from us. */
1444 struct sigaction act
;
1445 sigemptyset (&newblocked
);
1446 sigaction (SIGPIPE
, NULL
, &act
);
1447 if (act
.sa_handler
!= SIG_IGN
)
1448 sigaddset (&newblocked
, SIGPIPE
);
1449 sigprocmask (SIG_BLOCK
, &newblocked
, &oldblocked
);
1456 lines_split (n_units
, buf
, in_blk_size
);
1460 bytes_split (n_units
, buf
, in_blk_size
, 0);
1463 case type_byteslines
:
1464 line_bytes_split (n_units
, buf
, in_blk_size
);
1467 case type_chunk_bytes
:
1469 bytes_split (file_size
/ n_units
, buf
, in_blk_size
, n_units
);
1471 bytes_chunk_extract (k_units
, n_units
, buf
, in_blk_size
, file_size
);
1474 case type_chunk_lines
:
1475 lines_chunk_split (k_units
, n_units
, buf
, in_blk_size
, file_size
);
1479 /* Note, this is like 'sed -n ${k}~${n}p' when k > 0,
1480 but the functionality is provided for symmetry. */
1481 lines_rr (k_units
, n_units
, buf
, in_blk_size
);
1490 if (close (STDIN_FILENO
) != 0)
1491 error (EXIT_FAILURE
, errno
, "%s", infile
);
1492 closeout (NULL
, output_desc
, filter_pid
, outfile
);
1494 exit (EXIT_SUCCESS
);