split: port ‘split -n N /dev/null’ better to macOS
[coreutils.git] / src / join.c
blob7965a699d6ccaabdad397c4cf0d665a5eb3c0cfb
1 /* join - join lines of two files on a common field
2 Copyright (C) 1991-2023 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>.
17 Written by Mike Haertel, mike@gnu.ai.mit.edu. */
19 #include <config.h>
21 #include <assert.h>
22 #include <sys/types.h>
23 #include <getopt.h>
25 #include "system.h"
26 #include "die.h"
27 #include "error.h"
28 #include "fadvise.h"
29 #include "hard-locale.h"
30 #include "linebuffer.h"
31 #include "memcasecmp.h"
32 #include "quote.h"
33 #include "stdio--.h"
34 #include "xmemcoll.h"
35 #include "xstrtol.h"
36 #include "argmatch.h"
38 /* The official name of this program (e.g., no 'g' prefix). */
39 #define PROGRAM_NAME "join"
41 #define AUTHORS proper_name ("Mike Haertel")
43 #define join system_join
45 #define SWAPLINES(a, b) do { \
46 struct line *tmp = a; \
47 a = b; \
48 b = tmp; \
49 } while (0);
51 /* An element of the list identifying which fields to print for each
52 output line. */
53 struct outlist
55 /* File number: 0, 1, or 2. 0 means use the join field.
56 1 means use the first file argument, 2 the second. */
57 int file;
59 /* Field index (zero-based), specified only when FILE is 1 or 2. */
60 size_t field;
62 struct outlist *next;
65 /* A field of a line. */
66 struct field
68 char *beg; /* First character in field. */
69 size_t len; /* The length of the field. */
72 /* A line read from an input file. */
73 struct line
75 struct linebuffer buf; /* The line itself. */
76 size_t nfields; /* Number of elements in 'fields'. */
77 size_t nfields_allocated; /* Number of elements allocated for 'fields'. */
78 struct field *fields;
81 /* One or more consecutive lines read from a file that all have the
82 same join field value. */
83 struct seq
85 size_t count; /* Elements used in 'lines'. */
86 size_t alloc; /* Elements allocated in 'lines'. */
87 struct line **lines;
90 /* The previous line read from each file. */
91 static struct line *prevline[2] = {NULL, NULL};
93 /* The number of lines read from each file. */
94 static uintmax_t line_no[2] = {0, 0};
96 /* The input file names. */
97 static char *g_names[2];
99 /* This provides an extra line buffer for each file. We need these if we
100 try to read two consecutive lines into the same buffer, since we don't
101 want to overwrite the previous buffer before we check order. */
102 static struct line *spareline[2] = {NULL, NULL};
104 /* True if the LC_COLLATE locale is hard. */
105 static bool hard_LC_COLLATE;
107 /* If nonzero, print unpairable lines in file 1 or 2. */
108 static bool print_unpairables_1, print_unpairables_2;
110 /* If nonzero, print pairable lines. */
111 static bool print_pairables;
113 /* If nonzero, we have seen at least one unpairable line. */
114 static bool seen_unpairable;
116 /* If nonzero, we have warned about disorder in that file. */
117 static bool issued_disorder_warning[2];
119 /* Empty output field filler. */
120 static char const *empty_filler;
122 /* Whether to ensure the same number of fields are output from each line. */
123 static bool autoformat;
124 /* The number of fields to output for each line.
125 Only significant when autoformat is true. */
126 static size_t autocount_1;
127 static size_t autocount_2;
129 /* Field to join on; SIZE_MAX means they haven't been determined yet. */
130 static size_t join_field_1 = SIZE_MAX;
131 static size_t join_field_2 = SIZE_MAX;
133 /* List of fields to print. */
134 static struct outlist outlist_head;
136 /* Last element in 'outlist', where a new element can be added. */
137 static struct outlist *outlist_end = &outlist_head;
139 /* Tab character separating fields. If negative, fields are separated
140 by any nonempty string of blanks, otherwise by exactly one
141 tab character whose value (when cast to unsigned char) equals TAB. */
142 static int tab = -1;
144 /* If nonzero, check that the input is correctly ordered. */
145 static enum
147 CHECK_ORDER_DEFAULT,
148 CHECK_ORDER_ENABLED,
149 CHECK_ORDER_DISABLED
150 } check_input_order;
152 enum
154 CHECK_ORDER_OPTION = CHAR_MAX + 1,
155 NOCHECK_ORDER_OPTION,
156 HEADER_LINE_OPTION
160 static struct option const longopts[] =
162 {"ignore-case", no_argument, NULL, 'i'},
163 {"check-order", no_argument, NULL, CHECK_ORDER_OPTION},
164 {"nocheck-order", no_argument, NULL, NOCHECK_ORDER_OPTION},
165 {"zero-terminated", no_argument, NULL, 'z'},
166 {"header", no_argument, NULL, HEADER_LINE_OPTION},
167 {GETOPT_HELP_OPTION_DECL},
168 {GETOPT_VERSION_OPTION_DECL},
169 {NULL, 0, NULL, 0}
172 /* Used to print non-joining lines */
173 static struct line uni_blank;
175 /* If nonzero, ignore case when comparing join fields. */
176 static bool ignore_case;
178 /* If nonzero, treat the first line of each file as column headers --
179 join them without checking for ordering */
180 static bool join_header_lines;
182 /* The character marking end of line. Default to \n. */
183 static char eolchar = '\n';
185 void
186 usage (int status)
188 if (status != EXIT_SUCCESS)
189 emit_try_help ();
190 else
192 printf (_("\
193 Usage: %s [OPTION]... FILE1 FILE2\n\
195 program_name);
196 fputs (_("\
197 For each pair of input lines with identical join fields, write a line to\n\
198 standard output. The default join field is the first, delimited by blanks.\
200 "), stdout);
201 fputs (_("\
203 When FILE1 or FILE2 (not both) is -, read standard input.\n\
204 "), stdout);
205 fputs (_("\
207 -a FILENUM also print unpairable lines from file FILENUM, where\n\
208 FILENUM is 1 or 2, corresponding to FILE1 or FILE2\n\
209 "), stdout);
210 fputs (_("\
211 -e STRING replace missing (empty) input fields with STRING;\n\
212 I.e., missing fields specified with '-12jo' options\
214 "), stdout);
215 fputs (_("\
216 -i, --ignore-case ignore differences in case when comparing fields\n\
217 -j FIELD equivalent to '-1 FIELD -2 FIELD'\n\
218 -o FORMAT obey FORMAT while constructing output line\n\
219 -t CHAR use CHAR as input and output field separator\n\
220 "), stdout);
221 fputs (_("\
222 -v FILENUM like -a FILENUM, but suppress joined output lines\n\
223 -1 FIELD join on this FIELD of file 1\n\
224 -2 FIELD join on this FIELD of file 2\n\
225 --check-order check that the input is correctly sorted, even\n\
226 if all input lines are pairable\n\
227 --nocheck-order do not check that the input is correctly sorted\n\
228 --header treat the first line in each file as field headers,\n\
229 print them without trying to pair them\n\
230 "), stdout);
231 fputs (_("\
232 -z, --zero-terminated line delimiter is NUL, not newline\n\
233 "), stdout);
234 fputs (HELP_OPTION_DESCRIPTION, stdout);
235 fputs (VERSION_OPTION_DESCRIPTION, stdout);
236 fputs (_("\
238 Unless -t CHAR is given, leading blanks separate fields and are ignored,\n\
239 else fields are separated by CHAR. Any FIELD is a field number counted\n\
240 from 1. FORMAT is one or more comma or blank separated specifications,\n\
241 each being 'FILENUM.FIELD' or '0'. Default FORMAT outputs the join field,\n\
242 the remaining fields from FILE1, the remaining fields from FILE2, all\n\
243 separated by CHAR. If FORMAT is the keyword 'auto', then the first\n\
244 line of each file determines the number of fields output for each line.\n\
246 Important: FILE1 and FILE2 must be sorted on the join fields.\n\
247 E.g., use \"sort -k 1b,1\" if 'join' has no options,\n\
248 or use \"join -t ''\" if 'sort' has no options.\n\
249 Note, comparisons honor the rules specified by 'LC_COLLATE'.\n\
250 If the input is not sorted and some lines cannot be joined, a\n\
251 warning message will be given.\n\
252 "), stdout);
253 emit_ancillary_info (PROGRAM_NAME);
255 exit (status);
258 /* Record a field in LINE, with location FIELD and size LEN. */
260 static void
261 extract_field (struct line *line, char *field, size_t len)
263 if (line->nfields >= line->nfields_allocated)
265 line->fields = X2NREALLOC (line->fields, &line->nfields_allocated);
267 line->fields[line->nfields].beg = field;
268 line->fields[line->nfields].len = len;
269 ++(line->nfields);
272 /* Fill in the 'fields' structure in LINE. */
274 static void
275 xfields (struct line *line)
277 char *ptr = line->buf.buffer;
278 char const *lim = ptr + line->buf.length - 1;
280 if (ptr == lim)
281 return;
283 if (0 <= tab && tab != '\n')
285 char *sep;
286 for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1)
287 extract_field (line, ptr, sep - ptr);
289 else if (tab < 0)
291 /* Skip leading blanks before the first field. */
292 while (field_sep (*ptr))
293 if (++ptr == lim)
294 return;
298 char *sep;
299 for (sep = ptr + 1; sep != lim && ! field_sep (*sep); sep++)
300 continue;
301 extract_field (line, ptr, sep - ptr);
302 if (sep == lim)
303 return;
304 for (ptr = sep + 1; ptr != lim && field_sep (*ptr); ptr++)
305 continue;
307 while (ptr != lim);
310 extract_field (line, ptr, lim - ptr);
313 static void
314 freeline (struct line *line)
316 if (line == NULL)
317 return;
318 free (line->fields);
319 line->fields = NULL;
320 free (line->buf.buffer);
321 line->buf.buffer = NULL;
324 /* Return <0 if the join field in LINE1 compares less than the one in LINE2;
325 >0 if it compares greater; 0 if it compares equal.
326 Report an error and exit if the comparison fails.
327 Use join fields JF_1 and JF_2 respectively. */
329 static int
330 keycmp (struct line const *line1, struct line const *line2,
331 size_t jf_1, size_t jf_2)
333 /* Start of field to compare in each file. */
334 char *beg1;
335 char *beg2;
337 size_t len1;
338 size_t len2; /* Length of fields to compare. */
339 int diff;
341 if (jf_1 < line1->nfields)
343 beg1 = line1->fields[jf_1].beg;
344 len1 = line1->fields[jf_1].len;
346 else
348 beg1 = NULL;
349 len1 = 0;
352 if (jf_2 < line2->nfields)
354 beg2 = line2->fields[jf_2].beg;
355 len2 = line2->fields[jf_2].len;
357 else
359 beg2 = NULL;
360 len2 = 0;
363 if (len1 == 0)
364 return len2 == 0 ? 0 : -1;
365 if (len2 == 0)
366 return 1;
368 if (ignore_case)
370 /* FIXME: ignore_case does not work with NLS (in particular,
371 with multibyte chars). */
372 diff = memcasecmp (beg1, beg2, MIN (len1, len2));
374 else
376 if (hard_LC_COLLATE)
377 return xmemcoll (beg1, len1, beg2, len2);
378 diff = memcmp (beg1, beg2, MIN (len1, len2));
381 if (diff)
382 return diff;
383 return (len1 > len2) - (len1 < len2);
386 /* Check that successive input lines PREV and CURRENT from input file
387 WHATFILE are presented in order, unless the user may be relying on
388 the GNU extension that input lines may be out of order if no input
389 lines are unpairable.
391 If the user specified --nocheck-order, the check is not made.
392 If the user specified --check-order, the problem is fatal.
393 Otherwise (the default), the message is simply a warning.
395 A message is printed at most once per input file. */
397 static void
398 check_order (const struct line *prev,
399 const struct line *current,
400 int whatfile)
402 if (check_input_order != CHECK_ORDER_DISABLED
403 && ((check_input_order == CHECK_ORDER_ENABLED) || seen_unpairable))
405 if (!issued_disorder_warning[whatfile - 1])
407 size_t join_field = whatfile == 1 ? join_field_1 : join_field_2;
408 if (keycmp (prev, current, join_field, join_field) > 0)
410 /* Exclude any trailing newline. */
411 size_t len = current->buf.length;
412 if (0 < len && current->buf.buffer[len - 1] == '\n')
413 --len;
415 /* If the offending line is longer than INT_MAX, output
416 only the first INT_MAX bytes in this diagnostic. */
417 len = MIN (INT_MAX, len);
419 error ((check_input_order == CHECK_ORDER_ENABLED
420 ? EXIT_FAILURE : 0),
421 0, _("%s:%"PRIuMAX": is not sorted: %.*s"),
422 g_names[whatfile - 1], line_no[whatfile - 1],
423 (int) len, current->buf.buffer);
425 /* If we get to here, the message was merely a warning.
426 Arrange to issue it only once per file. */
427 issued_disorder_warning[whatfile - 1] = true;
433 static inline void
434 reset_line (struct line *line)
436 line->nfields = 0;
439 static struct line *
440 init_linep (struct line **linep)
442 struct line *line = xcalloc (1, sizeof *line);
443 *linep = line;
444 return line;
447 /* Read a line from FP into LINE and split it into fields.
448 Return true if successful. */
450 static bool
451 get_line (FILE *fp, struct line **linep, int which)
453 struct line *line = *linep;
455 if (line == prevline[which - 1])
457 SWAPLINES (line, spareline[which - 1]);
458 *linep = line;
461 if (line)
462 reset_line (line);
463 else
464 line = init_linep (linep);
466 if (! readlinebuffer_delim (&line->buf, fp, eolchar))
468 if (ferror (fp))
469 die (EXIT_FAILURE, errno, _("read error"));
470 freeline (line);
471 return false;
473 ++line_no[which - 1];
475 xfields (line);
477 if (prevline[which - 1])
478 check_order (prevline[which - 1], line, which);
480 prevline[which - 1] = line;
481 return true;
484 static void
485 free_spareline (void)
487 for (size_t i = 0; i < ARRAY_CARDINALITY (spareline); i++)
489 if (spareline[i])
491 freeline (spareline[i]);
492 free (spareline[i]);
497 static void
498 initseq (struct seq *seq)
500 seq->count = 0;
501 seq->alloc = 0;
502 seq->lines = NULL;
505 /* Read a line from FP and add it to SEQ. Return true if successful. */
507 static bool
508 getseq (FILE *fp, struct seq *seq, int whichfile)
510 if (seq->count == seq->alloc)
512 seq->lines = X2NREALLOC (seq->lines, &seq->alloc);
513 for (size_t i = seq->count; i < seq->alloc; i++)
514 seq->lines[i] = NULL;
517 if (get_line (fp, &seq->lines[seq->count], whichfile))
519 ++seq->count;
520 return true;
522 return false;
525 /* Read a line from FP and add it to SEQ, as the first item if FIRST is
526 true, else as the next. */
527 static bool
528 advance_seq (FILE *fp, struct seq *seq, bool first, int whichfile)
530 if (first)
531 seq->count = 0;
533 return getseq (fp, seq, whichfile);
536 static void
537 delseq (struct seq *seq)
539 for (size_t i = 0; i < seq->alloc; i++)
541 freeline (seq->lines[i]);
542 free (seq->lines[i]);
544 free (seq->lines);
548 /* Print field N of LINE if it exists and is nonempty, otherwise
549 'empty_filler' if it is nonempty. */
551 static void
552 prfield (size_t n, struct line const *line)
554 size_t len;
556 if (n < line->nfields)
558 len = line->fields[n].len;
559 if (len)
560 fwrite (line->fields[n].beg, 1, len, stdout);
561 else if (empty_filler)
562 fputs (empty_filler, stdout);
564 else if (empty_filler)
565 fputs (empty_filler, stdout);
568 /* Output all the fields in line, other than the join field. */
570 static void
571 prfields (struct line const *line, size_t join_field, size_t autocount)
573 size_t i;
574 size_t nfields = autoformat ? autocount : line->nfields;
575 char output_separator = tab < 0 ? ' ' : tab;
577 for (i = 0; i < join_field && i < nfields; ++i)
579 putchar (output_separator);
580 prfield (i, line);
582 for (i = join_field + 1; i < nfields; ++i)
584 putchar (output_separator);
585 prfield (i, line);
589 /* Print the join of LINE1 and LINE2. */
591 static void
592 prjoin (struct line const *line1, struct line const *line2)
594 const struct outlist *outlist;
595 char output_separator = tab < 0 ? ' ' : tab;
596 size_t field;
597 struct line const *line;
599 outlist = outlist_head.next;
600 if (outlist)
602 const struct outlist *o;
604 o = outlist;
605 while (true)
607 if (o->file == 0)
609 if (line1 == &uni_blank)
611 line = line2;
612 field = join_field_2;
614 else
616 line = line1;
617 field = join_field_1;
620 else
622 line = (o->file == 1 ? line1 : line2);
623 field = o->field;
625 prfield (field, line);
626 o = o->next;
627 if (o == NULL)
628 break;
629 putchar (output_separator);
631 putchar (eolchar);
633 else
635 if (line1 == &uni_blank)
637 line = line2;
638 field = join_field_2;
640 else
642 line = line1;
643 field = join_field_1;
646 /* Output the join field. */
647 prfield (field, line);
649 /* Output other fields. */
650 prfields (line1, join_field_1, autocount_1);
651 prfields (line2, join_field_2, autocount_2);
653 putchar (eolchar);
657 /* Print the join of the files in FP1 and FP2. */
659 static void
660 join (FILE *fp1, FILE *fp2)
662 struct seq seq1, seq2;
663 int diff;
664 bool eof1, eof2;
666 fadvise (fp1, FADVISE_SEQUENTIAL);
667 fadvise (fp2, FADVISE_SEQUENTIAL);
669 /* Read the first line of each file. */
670 initseq (&seq1);
671 getseq (fp1, &seq1, 1);
672 initseq (&seq2);
673 getseq (fp2, &seq2, 2);
675 if (autoformat)
677 autocount_1 = seq1.count ? seq1.lines[0]->nfields : 0;
678 autocount_2 = seq2.count ? seq2.lines[0]->nfields : 0;
681 if (join_header_lines && (seq1.count || seq2.count))
683 struct line const *hline1 = seq1.count ? seq1.lines[0] : &uni_blank;
684 struct line const *hline2 = seq2.count ? seq2.lines[0] : &uni_blank;
685 prjoin (hline1, hline2);
686 prevline[0] = NULL;
687 prevline[1] = NULL;
688 if (seq1.count)
689 advance_seq (fp1, &seq1, true, 1);
690 if (seq2.count)
691 advance_seq (fp2, &seq2, true, 2);
694 while (seq1.count && seq2.count)
696 diff = keycmp (seq1.lines[0], seq2.lines[0],
697 join_field_1, join_field_2);
698 if (diff < 0)
700 if (print_unpairables_1)
701 prjoin (seq1.lines[0], &uni_blank);
702 advance_seq (fp1, &seq1, true, 1);
703 seen_unpairable = true;
704 continue;
706 if (diff > 0)
708 if (print_unpairables_2)
709 prjoin (&uni_blank, seq2.lines[0]);
710 advance_seq (fp2, &seq2, true, 2);
711 seen_unpairable = true;
712 continue;
715 /* Keep reading lines from file1 as long as they continue to
716 match the current line from file2. */
717 eof1 = false;
719 if (!advance_seq (fp1, &seq1, false, 1))
721 eof1 = true;
722 ++seq1.count;
723 break;
725 while (!keycmp (seq1.lines[seq1.count - 1], seq2.lines[0],
726 join_field_1, join_field_2));
728 /* Keep reading lines from file2 as long as they continue to
729 match the current line from file1. */
730 eof2 = false;
732 if (!advance_seq (fp2, &seq2, false, 2))
734 eof2 = true;
735 ++seq2.count;
736 break;
738 while (!keycmp (seq1.lines[0], seq2.lines[seq2.count - 1],
739 join_field_1, join_field_2));
741 if (print_pairables)
743 for (size_t i = 0; i < seq1.count - 1; ++i)
745 size_t j;
746 for (j = 0; j < seq2.count - 1; ++j)
747 prjoin (seq1.lines[i], seq2.lines[j]);
751 if (!eof1)
753 SWAPLINES (seq1.lines[0], seq1.lines[seq1.count - 1]);
754 seq1.count = 1;
756 else
757 seq1.count = 0;
759 if (!eof2)
761 SWAPLINES (seq2.lines[0], seq2.lines[seq2.count - 1]);
762 seq2.count = 1;
764 else
765 seq2.count = 0;
768 /* If the user did not specify --nocheck-order, then we read the
769 tail ends of both inputs to verify that they are in order. We
770 skip the rest of the tail once we have issued a warning for that
771 file, unless we actually need to print the unpairable lines. */
772 struct line *line = NULL;
773 bool checktail = false;
775 if (check_input_order != CHECK_ORDER_DISABLED
776 && !(issued_disorder_warning[0] && issued_disorder_warning[1]))
777 checktail = true;
779 if ((print_unpairables_1 || checktail) && seq1.count)
781 if (print_unpairables_1)
782 prjoin (seq1.lines[0], &uni_blank);
783 if (seq2.count)
784 seen_unpairable = true;
785 while (get_line (fp1, &line, 1))
787 if (print_unpairables_1)
788 prjoin (line, &uni_blank);
789 if (issued_disorder_warning[0] && !print_unpairables_1)
790 break;
794 if ((print_unpairables_2 || checktail) && seq2.count)
796 if (print_unpairables_2)
797 prjoin (&uni_blank, seq2.lines[0]);
798 if (seq1.count)
799 seen_unpairable = true;
800 while (get_line (fp2, &line, 2))
802 if (print_unpairables_2)
803 prjoin (&uni_blank, line);
804 if (issued_disorder_warning[1] && !print_unpairables_2)
805 break;
809 freeline (line);
810 free (line);
812 delseq (&seq1);
813 delseq (&seq2);
816 /* Add a field spec for field FIELD of file FILE to 'outlist'. */
818 static void
819 add_field (int file, size_t field)
821 struct outlist *o;
823 assert (file == 0 || file == 1 || file == 2);
824 assert (file != 0 || field == 0);
826 o = xmalloc (sizeof *o);
827 o->file = file;
828 o->field = field;
829 o->next = NULL;
831 /* Add to the end of the list so the fields are in the right order. */
832 outlist_end->next = o;
833 outlist_end = o;
836 /* Convert a string of decimal digits, STR (the 1-based join field number),
837 to an integral value. Upon successful conversion, return one less
838 (the zero-based field number). Silently convert too-large values
839 to SIZE_MAX - 1. Otherwise, if a value cannot be converted, give a
840 diagnostic and exit. */
842 static size_t
843 string_to_join_field (char const *str)
845 size_t result;
846 uintmax_t val;
848 strtol_error s_err = xstrtoumax (str, NULL, 10, &val, "");
849 if (s_err == LONGINT_OVERFLOW || (s_err == LONGINT_OK && SIZE_MAX < val))
850 val = SIZE_MAX;
851 else if (s_err != LONGINT_OK || val == 0)
852 die (EXIT_FAILURE, 0, _("invalid field number: %s"), quote (str));
854 result = val - 1;
856 return result;
859 /* Convert a single field specifier string, S, to a *FILE_INDEX, *FIELD_INDEX
860 pair. In S, the field index string is 1-based; *FIELD_INDEX is zero-based.
861 If S is valid, return true. Otherwise, give a diagnostic and exit. */
863 static void
864 decode_field_spec (char const *s, int *file_index, size_t *field_index)
866 /* The first character must be 0, 1, or 2. */
867 switch (s[0])
869 case '0':
870 if (s[1])
872 /* '0' must be all alone -- no '.FIELD'. */
873 die (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s));
875 *file_index = 0;
876 *field_index = 0;
877 break;
879 case '1':
880 case '2':
881 if (s[1] != '.')
882 die (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s));
883 *file_index = s[0] - '0';
884 *field_index = string_to_join_field (s + 2);
885 break;
887 default:
888 die (EXIT_FAILURE, 0,
889 _("invalid file number in field spec: %s"), quote (s));
891 /* Tell gcc -W -Wall that we can't get beyond this point.
892 This avoids a warning (otherwise legit) that the caller's copies
893 of *file_index and *field_index might be used uninitialized. */
894 abort ();
896 break;
900 /* Add the comma or blank separated field spec(s) in STR to 'outlist'. */
902 static void
903 add_field_list (char *str)
905 char *p = str;
909 int file_index;
910 size_t field_index;
911 char const *spec_item = p;
913 p = strpbrk (p, ", \t");
914 if (p)
915 *p++ = '\0';
916 decode_field_spec (spec_item, &file_index, &field_index);
917 add_field (file_index, field_index);
919 while (p);
922 /* Set the join field *VAR to VAL, but report an error if *VAR is set
923 more than once to incompatible values. */
925 static void
926 set_join_field (size_t *var, size_t val)
928 if (*var != SIZE_MAX && *var != val)
930 unsigned long int var1 = *var + 1;
931 unsigned long int val1 = val + 1;
932 die (EXIT_FAILURE, 0,
933 _("incompatible join fields %lu, %lu"), var1, val1);
935 *var = val;
938 /* Status of command-line arguments. */
940 enum operand_status
942 /* This argument must be an operand, i.e., one of the files to be
943 joined. */
944 MUST_BE_OPERAND,
946 /* This might be the argument of the preceding -j1 or -j2 option,
947 or it might be an operand. */
948 MIGHT_BE_J1_ARG,
949 MIGHT_BE_J2_ARG,
951 /* This might be the argument of the preceding -o option, or it might be
952 an operand. */
953 MIGHT_BE_O_ARG
956 /* Add NAME to the array of input file NAMES with operand statuses
957 OPERAND_STATUS; currently there are NFILES names in the list. */
959 static void
960 add_file_name (char *name, char *names[2],
961 int operand_status[2], int joption_count[2], int *nfiles,
962 int *prev_optc_status, int *optc_status)
964 int n = *nfiles;
966 if (n == 2)
968 bool op0 = (operand_status[0] == MUST_BE_OPERAND);
969 char *arg = names[op0];
970 switch (operand_status[op0])
972 case MUST_BE_OPERAND:
973 error (0, 0, _("extra operand %s"), quoteaf (name));
974 usage (EXIT_FAILURE);
976 case MIGHT_BE_J1_ARG:
977 joption_count[0]--;
978 set_join_field (&join_field_1, string_to_join_field (arg));
979 break;
981 case MIGHT_BE_J2_ARG:
982 joption_count[1]--;
983 set_join_field (&join_field_2, string_to_join_field (arg));
984 break;
986 case MIGHT_BE_O_ARG:
987 add_field_list (arg);
988 break;
990 if (!op0)
992 operand_status[0] = operand_status[1];
993 names[0] = names[1];
995 n = 1;
998 operand_status[n] = *prev_optc_status;
999 names[n] = name;
1000 *nfiles = n + 1;
1001 if (*prev_optc_status == MIGHT_BE_O_ARG)
1002 *optc_status = MIGHT_BE_O_ARG;
1006 main (int argc, char **argv)
1008 int optc_status;
1009 int prev_optc_status = MUST_BE_OPERAND;
1010 int operand_status[2];
1011 int joption_count[2] = { 0, 0 };
1012 FILE *fp1, *fp2;
1013 int optc;
1014 int nfiles = 0;
1015 int i;
1017 initialize_main (&argc, &argv);
1018 set_program_name (argv[0]);
1019 setlocale (LC_ALL, "");
1020 bindtextdomain (PACKAGE, LOCALEDIR);
1021 textdomain (PACKAGE);
1022 hard_LC_COLLATE = hard_locale (LC_COLLATE);
1024 atexit (close_stdout);
1025 atexit (free_spareline);
1027 print_pairables = true;
1028 seen_unpairable = false;
1029 issued_disorder_warning[0] = issued_disorder_warning[1] = false;
1030 check_input_order = CHECK_ORDER_DEFAULT;
1032 while ((optc = getopt_long (argc, argv, "-a:e:i1:2:j:o:t:v:z",
1033 longopts, NULL))
1034 != -1)
1036 optc_status = MUST_BE_OPERAND;
1038 switch (optc)
1040 case 'v':
1041 print_pairables = false;
1042 FALLTHROUGH;
1044 case 'a':
1046 unsigned long int val;
1047 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
1048 || (val != 1 && val != 2))
1049 die (EXIT_FAILURE, 0,
1050 _("invalid field number: %s"), quote (optarg));
1051 if (val == 1)
1052 print_unpairables_1 = true;
1053 else
1054 print_unpairables_2 = true;
1056 break;
1058 case 'e':
1059 if (empty_filler && ! STREQ (empty_filler, optarg))
1060 die (EXIT_FAILURE, 0,
1061 _("conflicting empty-field replacement strings"));
1062 empty_filler = optarg;
1063 break;
1065 case 'i':
1066 ignore_case = true;
1067 break;
1069 case '1':
1070 set_join_field (&join_field_1, string_to_join_field (optarg));
1071 break;
1073 case '2':
1074 set_join_field (&join_field_2, string_to_join_field (optarg));
1075 break;
1077 case 'j':
1078 if ((optarg[0] == '1' || optarg[0] == '2') && !optarg[1]
1079 && optarg == argv[optind - 1] + 2)
1081 /* The argument was either "-j1" or "-j2". */
1082 bool is_j2 = (optarg[0] == '2');
1083 joption_count[is_j2]++;
1084 optc_status = MIGHT_BE_J1_ARG + is_j2;
1086 else
1088 set_join_field (&join_field_1, string_to_join_field (optarg));
1089 set_join_field (&join_field_2, join_field_1);
1091 break;
1093 case 'o':
1094 if (STREQ (optarg, "auto"))
1095 autoformat = true;
1096 else
1098 add_field_list (optarg);
1099 optc_status = MIGHT_BE_O_ARG;
1101 break;
1103 case 't':
1105 unsigned char newtab = optarg[0];
1106 if (! newtab)
1107 newtab = '\n'; /* '' => process the whole line. */
1108 else if (optarg[1])
1110 if (STREQ (optarg, "\\0"))
1111 newtab = '\0';
1112 else
1113 die (EXIT_FAILURE, 0, _("multi-character tab %s"),
1114 quote (optarg));
1116 if (0 <= tab && tab != newtab)
1117 die (EXIT_FAILURE, 0, _("incompatible tabs"));
1118 tab = newtab;
1120 break;
1122 case 'z':
1123 eolchar = 0;
1124 break;
1126 case NOCHECK_ORDER_OPTION:
1127 check_input_order = CHECK_ORDER_DISABLED;
1128 break;
1130 case CHECK_ORDER_OPTION:
1131 check_input_order = CHECK_ORDER_ENABLED;
1132 break;
1134 case 1: /* Non-option argument. */
1135 add_file_name (optarg, g_names, operand_status, joption_count,
1136 &nfiles, &prev_optc_status, &optc_status);
1137 break;
1139 case HEADER_LINE_OPTION:
1140 join_header_lines = true;
1141 break;
1143 case_GETOPT_HELP_CHAR;
1145 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1147 default:
1148 usage (EXIT_FAILURE);
1151 prev_optc_status = optc_status;
1154 /* Process any operands after "--". */
1155 prev_optc_status = MUST_BE_OPERAND;
1156 while (optind < argc)
1157 add_file_name (argv[optind++], g_names, operand_status, joption_count,
1158 &nfiles, &prev_optc_status, &optc_status);
1160 if (nfiles != 2)
1162 if (nfiles == 0)
1163 error (0, 0, _("missing operand"));
1164 else
1165 error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
1166 usage (EXIT_FAILURE);
1169 /* If "-j1" was specified and it turns out not to have had an argument,
1170 treat it as "-j 1". Likewise for -j2. */
1171 for (i = 0; i < 2; i++)
1172 if (joption_count[i] != 0)
1174 set_join_field (&join_field_1, i);
1175 set_join_field (&join_field_2, i);
1178 if (join_field_1 == SIZE_MAX)
1179 join_field_1 = 0;
1180 if (join_field_2 == SIZE_MAX)
1181 join_field_2 = 0;
1183 fp1 = STREQ (g_names[0], "-") ? stdin : fopen (g_names[0], "r");
1184 if (!fp1)
1185 die (EXIT_FAILURE, errno, "%s", quotef (g_names[0]));
1186 fp2 = STREQ (g_names[1], "-") ? stdin : fopen (g_names[1], "r");
1187 if (!fp2)
1188 die (EXIT_FAILURE, errno, "%s", quotef (g_names[1]));
1189 if (fp1 == fp2)
1190 die (EXIT_FAILURE, errno, _("both files cannot be standard input"));
1191 join (fp1, fp2);
1193 if (fclose (fp1) != 0)
1194 die (EXIT_FAILURE, errno, "%s", quotef (g_names[0]));
1195 if (fclose (fp2) != 0)
1196 die (EXIT_FAILURE, errno, "%s", quotef (g_names[1]));
1198 if (issued_disorder_warning[0] || issued_disorder_warning[1])
1199 die (EXIT_FAILURE, 0, _("input is not in sorted order"));
1200 else
1201 return EXIT_SUCCESS;