du: --apparent counts only symlinks and regular
[coreutils.git] / src / paste.c
blob5c194d8fe213284cf38aa7f59809a81863ec99e3
1 /* paste - merge lines of files
2 Copyright (C) 1997-2023 Free Software Foundation, Inc.
3 Copyright (C) 1984 David M. Ihnat
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18 /* Written by David Ihnat. */
20 /* The list of valid escape sequences has been expanded over the Unix
21 version, to include \b, \f, \r, and \v.
23 POSIX changes, bug fixes, long-named options, and cleanup
24 by David MacKenzie <djm@gnu.ai.mit.edu>.
26 Options:
27 --serial
28 -s Paste one file at a time rather than
29 one line from each file.
30 --delimiters=delim-list
31 -d delim-list Consecutively use the characters in
32 DELIM-LIST instead of tab to separate
33 merged lines. When DELIM-LIST is exhausted,
34 start again at its beginning.
35 A FILE of '-' means standard input.
36 If no FILEs are given, standard input is used. */
38 #include <config.h>
40 #include <stdio.h>
41 #include <getopt.h>
42 #include <sys/types.h>
43 #include "system.h"
44 #include "die.h"
45 #include "error.h"
46 #include "fadvise.h"
48 /* The official name of this program (e.g., no 'g' prefix). */
49 #define PROGRAM_NAME "paste"
51 #define AUTHORS \
52 proper_name ("David M. Ihnat"), \
53 proper_name ("David MacKenzie")
55 /* Indicates that no delimiter should be added in the current position. */
56 #define EMPTY_DELIM '\0'
58 /* If nonzero, we have read standard input at some point. */
59 static bool have_read_stdin;
61 /* If nonzero, merge subsequent lines of each file rather than
62 corresponding lines from each file in parallel. */
63 static bool serial_merge;
65 /* The delimiters between lines of input files (used cyclically). */
66 static char *delims;
68 /* A pointer to the character after the end of 'delims'. */
69 static char const *delim_end;
71 static unsigned char line_delim = '\n';
73 static struct option const longopts[] =
75 {"serial", no_argument, NULL, 's'},
76 {"delimiters", required_argument, NULL, 'd'},
77 {"zero-terminated", no_argument, NULL, 'z'},
78 {GETOPT_HELP_OPTION_DECL},
79 {GETOPT_VERSION_OPTION_DECL},
80 {NULL, 0, NULL, 0}
83 /* Set globals delims and delim_end. Copy STRPTR to DELIMS, converting
84 backslash representations of special characters in STRPTR to their actual
85 values. The set of possible backslash characters has been expanded beyond
86 that recognized by the Unix version.
87 Return 0 upon success.
88 If the string ends in an odd number of backslashes, ignore the
89 final backslash and return nonzero. */
91 static int
92 collapse_escapes (char const *strptr)
94 char *strout = xstrdup (strptr);
95 bool backslash_at_end = false;
97 delims = strout;
99 while (*strptr)
101 if (*strptr != '\\') /* Is it an escape character? */
102 *strout++ = *strptr++; /* No, just transfer it. */
103 else
105 switch (*++strptr)
107 case '0':
108 *strout++ = EMPTY_DELIM;
109 break;
111 case 'b':
112 *strout++ = '\b';
113 break;
115 case 'f':
116 *strout++ = '\f';
117 break;
119 case 'n':
120 *strout++ = '\n';
121 break;
123 case 'r':
124 *strout++ = '\r';
125 break;
127 case 't':
128 *strout++ = '\t';
129 break;
131 case 'v':
132 *strout++ = '\v';
133 break;
135 case '\\':
136 *strout++ = '\\';
137 break;
139 case '\0':
140 backslash_at_end = true;
141 goto done;
143 default:
144 *strout++ = *strptr;
145 break;
147 strptr++;
151 done:
153 delim_end = strout;
154 return backslash_at_end ? 1 : 0;
157 /* Report a write error and exit. */
159 static void
160 write_error (void)
162 die (EXIT_FAILURE, errno, _("write error"));
165 /* Output a single byte, reporting any write errors. */
167 static inline void
168 xputchar (char c)
170 if (putchar (c) < 0)
171 write_error ();
174 /* Perform column paste on the NFILES files named in FNAMPTR.
175 Return true if successful, false if one or more files could not be
176 opened or read. */
178 static bool
179 paste_parallel (size_t nfiles, char **fnamptr)
181 bool ok = true;
182 /* If all files are just ready to be closed, or will be on this
183 round, the string of delimiters must be preserved.
184 delbuf[0] through delbuf[nfiles]
185 store the delimiters for closed files. */
186 char *delbuf = xmalloc (nfiles + 2);
188 /* Streams open to the files to process; NULL if the corresponding
189 stream is closed. */
190 FILE **fileptr = xnmalloc (nfiles + 1, sizeof *fileptr);
192 /* Number of files still open to process. */
193 size_t files_open;
195 /* True if any fopen got fd == STDIN_FILENO. */
196 bool opened_stdin = false;
198 /* Attempt to open all files. This could be expanded to an infinite
199 number of files, but at the (considerable) expense of remembering
200 each file and its current offset, then opening/reading/closing. */
202 for (files_open = 0; files_open < nfiles; ++files_open)
204 if (STREQ (fnamptr[files_open], "-"))
206 have_read_stdin = true;
207 fileptr[files_open] = stdin;
209 else
211 fileptr[files_open] = fopen (fnamptr[files_open], "r");
212 if (fileptr[files_open] == NULL)
213 die (EXIT_FAILURE, errno, "%s", quotef (fnamptr[files_open]));
214 else if (fileno (fileptr[files_open]) == STDIN_FILENO)
215 opened_stdin = true;
216 fadvise (fileptr[files_open], FADVISE_SEQUENTIAL);
220 if (opened_stdin && have_read_stdin)
221 die (EXIT_FAILURE, 0, _("standard input is closed"));
223 /* Read a line from each file and output it to stdout separated by a
224 delimiter, until we go through the loop without successfully
225 reading from any of the files. */
227 while (files_open)
229 /* Set up for the next line. */
230 bool somedone = false;
231 char const *delimptr = delims;
232 size_t delims_saved = 0; /* Number of delims saved in 'delbuf'. */
234 for (size_t i = 0; i < nfiles && files_open; i++)
236 int chr; /* Input character. */
237 int err; /* Input errno value. */
238 bool sometodo = false; /* Input chars to process. */
240 if (fileptr[i])
242 chr = getc (fileptr[i]);
243 err = errno;
244 if (chr != EOF && delims_saved)
246 if (fwrite (delbuf, 1, delims_saved, stdout) != delims_saved)
247 write_error ();
248 delims_saved = 0;
251 while (chr != EOF)
253 sometodo = true;
254 if (chr == line_delim)
255 break;
256 xputchar (chr);
257 chr = getc (fileptr[i]);
258 err = errno;
262 if (! sometodo)
264 /* EOF, read error, or closed file.
265 If an EOF or error, close the file. */
266 if (fileptr[i])
268 if (!ferror (fileptr[i]))
269 err = 0;
270 if (fileptr[i] == stdin)
271 clearerr (fileptr[i]); /* Also clear EOF. */
272 else if (fclose (fileptr[i]) == EOF && !err)
273 err = errno;
274 if (err)
276 error (0, err, "%s", quotef (fnamptr[i]));
277 ok = false;
280 fileptr[i] = NULL;
281 files_open--;
284 if (i + 1 == nfiles)
286 /* End of this output line.
287 Is this the end of the whole thing? */
288 if (somedone)
290 /* No. Some files were not closed for this line. */
291 if (delims_saved)
293 if (fwrite (delbuf, 1, delims_saved, stdout)
294 != delims_saved)
295 write_error ();
296 delims_saved = 0;
298 xputchar (line_delim);
300 continue; /* Next read of files, or exit. */
302 else
304 /* Closed file; add delimiter to 'delbuf'. */
305 if (*delimptr != EMPTY_DELIM)
306 delbuf[delims_saved++] = *delimptr;
307 if (++delimptr == delim_end)
308 delimptr = delims;
311 else
313 /* Some data read. */
314 somedone = true;
316 /* Except for last file, replace last newline with delim. */
317 if (i + 1 != nfiles)
319 if (chr != line_delim && chr != EOF)
320 xputchar (chr);
321 if (*delimptr != EMPTY_DELIM)
322 xputchar (*delimptr);
323 if (++delimptr == delim_end)
324 delimptr = delims;
326 else
328 /* If the last line of the last file lacks a newline,
329 print one anyhow. POSIX requires this. */
330 char c = (chr == EOF ? line_delim : chr);
331 xputchar (c);
336 free (fileptr);
337 free (delbuf);
338 return ok;
341 /* Perform serial paste on the NFILES files named in FNAMPTR.
342 Return true if no errors, false if one or more files could not be
343 opened or read. */
345 static bool
346 paste_serial (size_t nfiles, char **fnamptr)
348 bool ok = true; /* false if open or read errors occur. */
349 int charnew, charold; /* Current and previous char read. */
350 char const *delimptr; /* Current delimiter char. */
351 FILE *fileptr; /* Open for reading current file. */
353 for (; nfiles; nfiles--, fnamptr++)
355 int saved_errno;
356 bool is_stdin = STREQ (*fnamptr, "-");
357 if (is_stdin)
359 have_read_stdin = true;
360 fileptr = stdin;
362 else
364 fileptr = fopen (*fnamptr, "r");
365 if (fileptr == NULL)
367 error (0, errno, "%s", quotef (*fnamptr));
368 ok = false;
369 continue;
371 fadvise (fileptr, FADVISE_SEQUENTIAL);
374 delimptr = delims; /* Set up for delimiter string. */
376 charold = getc (fileptr);
377 saved_errno = errno;
378 if (charold != EOF)
380 /* 'charold' is set up. Hit it!
381 Keep reading characters, stashing them in 'charnew';
382 output 'charold', converting to the appropriate delimiter
383 character if needed. After the EOF, output 'charold'
384 if it's a newline; otherwise, output it and then a newline. */
386 while ((charnew = getc (fileptr)) != EOF)
388 /* Process the old character. */
389 if (charold == line_delim)
391 if (*delimptr != EMPTY_DELIM)
392 xputchar (*delimptr);
394 if (++delimptr == delim_end)
395 delimptr = delims;
397 else
398 xputchar (charold);
400 charold = charnew;
402 saved_errno = errno;
404 /* Hit EOF. Process that last character. */
405 xputchar (charold);
408 if (charold != line_delim)
409 xputchar (line_delim);
411 if (!ferror (fileptr))
412 saved_errno = 0;
413 if (is_stdin)
414 clearerr (fileptr); /* Also clear EOF. */
415 else if (fclose (fileptr) != 0 && !saved_errno)
416 saved_errno = errno;
417 if (saved_errno)
419 error (0, saved_errno, "%s", quotef (*fnamptr));
420 ok = false;
423 return ok;
426 void
427 usage (int status)
429 if (status != EXIT_SUCCESS)
430 emit_try_help ();
431 else
433 printf (_("\
434 Usage: %s [OPTION]... [FILE]...\n\
436 program_name);
437 fputs (_("\
438 Write lines consisting of the sequentially corresponding lines from\n\
439 each FILE, separated by TABs, to standard output.\n\
440 "), stdout);
442 emit_stdin_note ();
443 emit_mandatory_arg_note ();
445 fputs (_("\
446 -d, --delimiters=LIST reuse characters from LIST instead of TABs\n\
447 -s, --serial paste one file at a time instead of in parallel\n\
448 "), stdout);
449 fputs (_("\
450 -z, --zero-terminated line delimiter is NUL, not newline\n\
451 "), stdout);
452 fputs (HELP_OPTION_DESCRIPTION, stdout);
453 fputs (VERSION_OPTION_DESCRIPTION, stdout);
454 /* FIXME: add a couple of examples. */
455 emit_ancillary_info (PROGRAM_NAME);
457 exit (status);
461 main (int argc, char **argv)
463 int optc;
464 char const *delim_arg = "\t";
466 initialize_main (&argc, &argv);
467 set_program_name (argv[0]);
468 setlocale (LC_ALL, "");
469 bindtextdomain (PACKAGE, LOCALEDIR);
470 textdomain (PACKAGE);
472 atexit (close_stdout);
474 have_read_stdin = false;
475 serial_merge = false;
477 while ((optc = getopt_long (argc, argv, "d:sz", longopts, NULL)) != -1)
479 switch (optc)
481 case 'd':
482 /* Delimiter character(s). */
483 delim_arg = (optarg[0] == '\0' ? "\\0" : optarg);
484 break;
486 case 's':
487 serial_merge = true;
488 break;
490 case 'z':
491 line_delim = '\0';
492 break;
494 case_GETOPT_HELP_CHAR;
496 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
498 default:
499 usage (EXIT_FAILURE);
503 int nfiles = argc - optind;
504 if (nfiles == 0)
506 argv[optind] = bad_cast ("-");
507 nfiles++;
510 if (collapse_escapes (delim_arg))
512 /* Don't use the quote() quoting style, because that would double the
513 number of displayed backslashes, making the diagnostic look bogus. */
514 die (EXIT_FAILURE, 0,
515 _("delimiter list ends with an unescaped backslash: %s"),
516 quotearg_n_style_colon (0, c_maybe_quoting_style, delim_arg));
519 bool ok = ((serial_merge ? paste_serial : paste_parallel)
520 (nfiles, &argv[optind]));
522 free (delims);
524 if (have_read_stdin && fclose (stdin) == EOF)
525 die (EXIT_FAILURE, errno, "-");
526 return ok ? EXIT_SUCCESS : EXIT_FAILURE;