maint: include ctype.h selectively
[coreutils.git] / src / join.c
blobb95cf2b9be9198766abf0c86a0fd21a203ad95e0
1 /* join - join lines of two files on a common field
2 Copyright (C) 1991-2023 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>.
17 Written by Mike Haertel, mike@gnu.ai.mit.edu. */
19 #include <config.h>
21 #include <sys/types.h>
22 #include <getopt.h>
24 #include "system.h"
25 #include "assure.h"
26 #include "cu-ctype.h"
27 #include "fadvise.h"
28 #include "hard-locale.h"
29 #include "linebuffer.h"
30 #include "memcasecmp.h"
31 #include "quote.h"
32 #include "stdio--.h"
33 #include "xmemcoll.h"
34 #include "xstrtol.h"
35 #include "argmatch.h"
37 /* The official name of this program (e.g., no 'g' prefix). */
38 #define PROGRAM_NAME "join"
40 #define AUTHORS proper_name ("Mike Haertel")
42 #define join system_join
44 #define SWAPLINES(a, b) do { \
45 struct line *tmp = a; \
46 a = b; \
47 b = tmp; \
48 } while (0);
50 /* An element of the list identifying which fields to print for each
51 output line. */
52 struct outlist
54 /* File number: 0, 1, or 2. 0 means use the join field.
55 1 means use the first file argument, 2 the second. */
56 int file;
58 /* Field index (zero-based), specified only when FILE is 1 or 2. */
59 idx_t field;
61 struct outlist *next;
64 /* A field of a line. */
65 struct field
67 char *beg; /* First character in field. */
68 idx_t len; /* The length of the field. */
71 /* A line read from an input file. */
72 struct line
74 struct linebuffer buf; /* The line itself. */
75 idx_t nfields; /* Number of elements in 'fields'. */
76 idx_t nfields_allocated; /* Number of elements allocated for 'fields'. */
77 struct field *fields;
80 /* One or more consecutive lines read from a file that all have the
81 same join field value. */
82 struct seq
84 idx_t count; /* Elements used in 'lines'. */
85 idx_t alloc; /* Elements allocated in 'lines'. */
86 struct line **lines;
89 /* The previous line read from each file. */
90 static struct line *prevline[2] = {nullptr, nullptr};
92 /* The number of lines read from each file. */
93 static uintmax_t line_no[2] = {0, 0};
95 /* The input file names. */
96 static char *g_names[2];
98 /* This provides an extra line buffer for each file. We need these if we
99 try to read two consecutive lines into the same buffer, since we don't
100 want to overwrite the previous buffer before we check order. */
101 static struct line *spareline[2] = {nullptr, nullptr};
103 /* True if the LC_COLLATE locale is hard. */
104 static bool hard_LC_COLLATE;
106 /* If nonzero, print unpairable lines in file 1 or 2. */
107 static bool print_unpairables_1, print_unpairables_2;
109 /* If nonzero, print pairable lines. */
110 static bool print_pairables;
112 /* If nonzero, we have seen at least one unpairable line. */
113 static bool seen_unpairable;
115 /* If nonzero, we have warned about disorder in that file. */
116 static bool issued_disorder_warning[2];
118 /* Empty output field filler. */
119 static char const *empty_filler;
121 /* Whether to ensure the same number of fields are output from each line. */
122 static bool autoformat;
123 /* The number of fields to output for each line.
124 Only significant when autoformat is true. */
125 static idx_t autocount_1;
126 static idx_t autocount_2;
128 /* Field to join on; -1 means they haven't been determined yet. */
129 static ptrdiff_t join_field_1 = -1;
130 static ptrdiff_t join_field_2 = -1;
132 /* List of fields to print. */
133 static struct outlist outlist_head;
135 /* Last element in 'outlist', where a new element can be added. */
136 static struct outlist *outlist_end = &outlist_head;
138 /* Tab character separating fields. If negative, fields are separated
139 by any nonempty string of blanks, otherwise by exactly one
140 tab character whose value (when cast to unsigned char) equals TAB. */
141 static int tab = -1;
143 /* If nonzero, check that the input is correctly ordered. */
144 static enum
146 CHECK_ORDER_DEFAULT,
147 CHECK_ORDER_ENABLED,
148 CHECK_ORDER_DISABLED
149 } check_input_order;
151 enum
153 CHECK_ORDER_OPTION = CHAR_MAX + 1,
154 NOCHECK_ORDER_OPTION,
155 HEADER_LINE_OPTION
159 static struct option const longopts[] =
161 {"ignore-case", no_argument, nullptr, 'i'},
162 {"check-order", no_argument, nullptr, CHECK_ORDER_OPTION},
163 {"nocheck-order", no_argument, nullptr, NOCHECK_ORDER_OPTION},
164 {"zero-terminated", no_argument, nullptr, 'z'},
165 {"header", no_argument, nullptr, HEADER_LINE_OPTION},
166 {GETOPT_HELP_OPTION_DECL},
167 {GETOPT_VERSION_OPTION_DECL},
168 {nullptr, 0, nullptr, 0}
171 /* Used to print non-joining lines */
172 static struct line uni_blank;
174 /* If nonzero, ignore case when comparing join fields. */
175 static bool ignore_case;
177 /* If nonzero, treat the first line of each file as column headers --
178 join them without checking for ordering */
179 static bool join_header_lines;
181 /* The character marking end of line. Default to \n. */
182 static char eolchar = '\n';
184 void
185 usage (int status)
187 if (status != EXIT_SUCCESS)
188 emit_try_help ();
189 else
191 printf (_("\
192 Usage: %s [OPTION]... FILE1 FILE2\n\
194 program_name);
195 fputs (_("\
196 For each pair of input lines with identical join fields, write a line to\n\
197 standard output. The default join field is the first, delimited by blanks.\
199 "), stdout);
200 fputs (_("\
202 When FILE1 or FILE2 (not both) is -, read standard input.\n\
203 "), stdout);
204 fputs (_("\
206 -a FILENUM also print unpairable lines from file FILENUM, where\n\
207 FILENUM is 1 or 2, corresponding to FILE1 or FILE2\n\
208 "), stdout);
209 fputs (_("\
210 -e STRING replace missing (empty) input fields with STRING;\n\
211 I.e., missing fields specified with '-12jo' options\
213 "), stdout);
214 fputs (_("\
215 -i, --ignore-case ignore differences in case when comparing fields\n\
216 -j FIELD equivalent to '-1 FIELD -2 FIELD'\n\
217 -o FORMAT obey FORMAT while constructing output line\n\
218 -t CHAR use CHAR as input and output field separator\n\
219 "), stdout);
220 fputs (_("\
221 -v FILENUM like -a FILENUM, but suppress joined output lines\n\
222 -1 FIELD join on this FIELD of file 1\n\
223 -2 FIELD join on this FIELD of file 2\n\
224 --check-order check that the input is correctly sorted, even\n\
225 if all input lines are pairable\n\
226 --nocheck-order do not check that the input is correctly sorted\n\
227 --header treat the first line in each file as field headers,\n\
228 print them without trying to pair them\n\
229 "), stdout);
230 fputs (_("\
231 -z, --zero-terminated line delimiter is NUL, not newline\n\
232 "), stdout);
233 fputs (HELP_OPTION_DESCRIPTION, stdout);
234 fputs (VERSION_OPTION_DESCRIPTION, stdout);
235 fputs (_("\
237 Unless -t CHAR is given, leading blanks separate fields and are ignored,\n\
238 else fields are separated by CHAR. Any FIELD is a field number counted\n\
239 from 1. FORMAT is one or more comma or blank separated specifications,\n\
240 each being 'FILENUM.FIELD' or '0'. Default FORMAT outputs the join field,\n\
241 the remaining fields from FILE1, the remaining fields from FILE2, all\n\
242 separated by CHAR. If FORMAT is the keyword 'auto', then the first\n\
243 line of each file determines the number of fields output for each line.\n\
245 Important: FILE1 and FILE2 must be sorted on the join fields.\n\
246 E.g., use \"sort -k 1b,1\" if 'join' has no options,\n\
247 or use \"join -t ''\" if 'sort' has no options.\n\
248 Note, comparisons honor the rules specified by 'LC_COLLATE'.\n\
249 If the input is not sorted and some lines cannot be joined, a\n\
250 warning message will be given.\n\
251 "), stdout);
252 emit_ancillary_info (PROGRAM_NAME);
254 exit (status);
257 /* Record a field in LINE, with location FIELD and size LEN. */
259 static void
260 extract_field (struct line *line, char *field, idx_t len)
262 if (line->nfields >= line->nfields_allocated)
263 line->fields = xpalloc (line->fields, &line->nfields_allocated, 1,
264 -1, sizeof *line->fields);
265 line->fields[line->nfields].beg = field;
266 line->fields[line->nfields].len = len;
267 ++(line->nfields);
270 /* Fill in the 'fields' structure in LINE. */
272 static void
273 xfields (struct line *line)
275 char *ptr = line->buf.buffer;
276 char const *lim = ptr + line->buf.length - 1;
278 if (ptr == lim)
279 return;
281 if (0 <= tab && tab != '\n')
283 char *sep;
284 for (; (sep = memchr (ptr, tab, lim - ptr)) != nullptr; ptr = sep + 1)
285 extract_field (line, ptr, sep - ptr);
287 else if (tab < 0)
289 /* Skip leading blanks before the first field. */
290 while (field_sep (*ptr))
291 if (++ptr == lim)
292 return;
296 char *sep;
297 for (sep = ptr + 1; sep != lim && ! field_sep (*sep); sep++)
298 continue;
299 extract_field (line, ptr, sep - ptr);
300 if (sep == lim)
301 return;
302 for (ptr = sep + 1; ptr != lim && field_sep (*ptr); ptr++)
303 continue;
305 while (ptr != lim);
308 extract_field (line, ptr, lim - ptr);
311 static void
312 freeline (struct line *line)
314 if (line == nullptr)
315 return;
316 free (line->fields);
317 line->fields = nullptr;
318 free (line->buf.buffer);
319 line->buf.buffer = nullptr;
322 /* Return <0 if the join field in LINE1 compares less than the one in LINE2;
323 >0 if it compares greater; 0 if it compares equal.
324 Report an error and exit if the comparison fails.
325 Use join fields JF_1 and JF_2 respectively. */
327 static int
328 keycmp (struct line const *line1, struct line const *line2,
329 idx_t jf_1, idx_t jf_2)
331 /* Start of field to compare in each file. */
332 char *beg1;
333 char *beg2;
335 idx_t len1;
336 idx_t len2; /* Length of fields to compare. */
337 int diff;
339 if (jf_1 < line1->nfields)
341 beg1 = line1->fields[jf_1].beg;
342 len1 = line1->fields[jf_1].len;
344 else
346 beg1 = nullptr;
347 len1 = 0;
350 if (jf_2 < line2->nfields)
352 beg2 = line2->fields[jf_2].beg;
353 len2 = line2->fields[jf_2].len;
355 else
357 beg2 = nullptr;
358 len2 = 0;
361 if (len1 == 0)
362 return len2 == 0 ? 0 : -1;
363 if (len2 == 0)
364 return 1;
366 if (ignore_case)
368 /* FIXME: ignore_case does not work with NLS (in particular,
369 with multibyte chars). */
370 diff = memcasecmp (beg1, beg2, MIN (len1, len2));
372 else
374 if (hard_LC_COLLATE)
375 return xmemcoll (beg1, len1, beg2, len2);
376 diff = memcmp (beg1, beg2, MIN (len1, len2));
379 if (diff)
380 return diff;
381 return (len1 > len2) - (len1 < len2);
384 /* Check that successive input lines PREV and CURRENT from input file
385 WHATFILE are presented in order, unless the user may be relying on
386 the GNU extension that input lines may be out of order if no input
387 lines are unpairable.
389 If the user specified --nocheck-order, the check is not made.
390 If the user specified --check-order, the problem is fatal.
391 Otherwise (the default), the message is simply a warning.
393 A message is printed at most once per input file. */
395 static void
396 check_order (const struct line *prev,
397 const struct line *current,
398 int whatfile)
400 if (check_input_order != CHECK_ORDER_DISABLED
401 && ((check_input_order == CHECK_ORDER_ENABLED) || seen_unpairable))
403 if (!issued_disorder_warning[whatfile - 1])
405 idx_t join_field = whatfile == 1 ? join_field_1 : join_field_2;
406 if (keycmp (prev, current, join_field, join_field) > 0)
408 /* Exclude any trailing newline. */
409 idx_t len = current->buf.length;
410 if (0 < len && current->buf.buffer[len - 1] == '\n')
411 --len;
413 /* If the offending line is longer than INT_MAX, output
414 only the first INT_MAX bytes in this diagnostic. */
415 len = MIN (INT_MAX, len);
417 error ((check_input_order == CHECK_ORDER_ENABLED
418 ? EXIT_FAILURE : 0),
419 0, _("%s:%ju: is not sorted: %.*s"),
420 g_names[whatfile - 1], line_no[whatfile - 1],
421 (int) len, current->buf.buffer);
423 /* If we get to here, the message was merely a warning.
424 Arrange to issue it only once per file. */
425 issued_disorder_warning[whatfile - 1] = true;
431 static inline void
432 reset_line (struct line *line)
434 line->nfields = 0;
437 static struct line *
438 init_linep (struct line **linep)
440 struct line *line = xzalloc (sizeof *line);
441 *linep = line;
442 return line;
445 /* Read a line from FP into LINE and split it into fields.
446 Return true if successful. */
448 static bool
449 get_line (FILE *fp, struct line **linep, int which)
451 struct line *line = *linep;
453 if (line == prevline[which - 1])
455 SWAPLINES (line, spareline[which - 1]);
456 *linep = line;
459 if (line)
460 reset_line (line);
461 else
462 line = init_linep (linep);
464 if (! readlinebuffer_delim (&line->buf, fp, eolchar))
466 if (ferror (fp))
467 error (EXIT_FAILURE, errno, _("read error"));
468 freeline (line);
469 return false;
471 ++line_no[which - 1];
473 xfields (line);
475 if (prevline[which - 1])
476 check_order (prevline[which - 1], line, which);
478 prevline[which - 1] = line;
479 return true;
482 static void
483 free_spareline (void)
485 for (idx_t i = 0; i < ARRAY_CARDINALITY (spareline); i++)
487 if (spareline[i])
489 freeline (spareline[i]);
490 free (spareline[i]);
495 static void
496 initseq (struct seq *seq)
498 seq->count = 0;
499 seq->alloc = 0;
500 seq->lines = nullptr;
503 /* Read a line from FP and add it to SEQ. Return true if successful. */
505 static bool
506 getseq (FILE *fp, struct seq *seq, int whichfile)
508 if (seq->count == seq->alloc)
510 seq->lines = xpalloc (seq->lines, &seq->alloc, 1, -1, sizeof *seq->lines);
511 for (idx_t i = seq->count; i < seq->alloc; i++)
512 seq->lines[i] = nullptr;
515 if (get_line (fp, &seq->lines[seq->count], whichfile))
517 ++seq->count;
518 return true;
520 return false;
523 /* Read a line from FP and add it to SEQ, as the first item if FIRST is
524 true, else as the next. */
525 static bool
526 advance_seq (FILE *fp, struct seq *seq, bool first, int whichfile)
528 if (first)
529 seq->count = 0;
531 return getseq (fp, seq, whichfile);
534 static void
535 delseq (struct seq *seq)
537 for (idx_t i = 0; i < seq->alloc; i++)
539 freeline (seq->lines[i]);
540 free (seq->lines[i]);
542 free (seq->lines);
546 /* Print field N of LINE if it exists and is nonempty, otherwise
547 'empty_filler' if it is nonempty. */
549 static void
550 prfield (idx_t n, struct line const *line)
552 if (n < line->nfields)
554 idx_t len = line->fields[n].len;
555 if (len)
556 fwrite (line->fields[n].beg, 1, len, stdout);
557 else if (empty_filler)
558 fputs (empty_filler, stdout);
560 else if (empty_filler)
561 fputs (empty_filler, stdout);
564 /* Output all the fields in line, other than the join field. */
566 static void
567 prfields (struct line const *line, idx_t join_field, idx_t autocount)
569 idx_t i;
570 idx_t nfields = autoformat ? autocount : line->nfields;
571 char output_separator = tab < 0 ? ' ' : tab;
573 for (i = 0; i < join_field && i < nfields; ++i)
575 putchar (output_separator);
576 prfield (i, line);
578 for (i = join_field + 1; i < nfields; ++i)
580 putchar (output_separator);
581 prfield (i, line);
585 /* Print the join of LINE1 and LINE2. */
587 static void
588 prjoin (struct line const *line1, struct line const *line2)
590 const struct outlist *outlist;
591 char output_separator = tab < 0 ? ' ' : tab;
592 idx_t field;
593 struct line const *line;
595 outlist = outlist_head.next;
596 if (outlist)
598 const struct outlist *o;
600 o = outlist;
601 while (true)
603 if (o->file == 0)
605 if (line1 == &uni_blank)
607 line = line2;
608 field = join_field_2;
610 else
612 line = line1;
613 field = join_field_1;
616 else
618 line = (o->file == 1 ? line1 : line2);
619 field = o->field;
621 prfield (field, line);
622 o = o->next;
623 if (o == nullptr)
624 break;
625 putchar (output_separator);
627 putchar (eolchar);
629 else
631 if (line1 == &uni_blank)
633 line = line2;
634 field = join_field_2;
636 else
638 line = line1;
639 field = join_field_1;
642 /* Output the join field. */
643 prfield (field, line);
645 /* Output other fields. */
646 prfields (line1, join_field_1, autocount_1);
647 prfields (line2, join_field_2, autocount_2);
649 putchar (eolchar);
652 if (ferror (stdout))
653 write_error ();
656 /* Print the join of the files in FP1 and FP2. */
658 static void
659 join (FILE *fp1, FILE *fp2)
661 struct seq seq1, seq2;
662 int diff;
663 bool eof1, eof2;
665 fadvise (fp1, FADVISE_SEQUENTIAL);
666 fadvise (fp2, FADVISE_SEQUENTIAL);
668 /* Read the first line of each file. */
669 initseq (&seq1);
670 getseq (fp1, &seq1, 1);
671 initseq (&seq2);
672 getseq (fp2, &seq2, 2);
674 if (autoformat)
676 autocount_1 = seq1.count ? seq1.lines[0]->nfields : 0;
677 autocount_2 = seq2.count ? seq2.lines[0]->nfields : 0;
680 if (join_header_lines && (seq1.count || seq2.count))
682 struct line const *hline1 = seq1.count ? seq1.lines[0] : &uni_blank;
683 struct line const *hline2 = seq2.count ? seq2.lines[0] : &uni_blank;
684 prjoin (hline1, hline2);
685 prevline[0] = nullptr;
686 prevline[1] = nullptr;
687 if (seq1.count)
688 advance_seq (fp1, &seq1, true, 1);
689 if (seq2.count)
690 advance_seq (fp2, &seq2, true, 2);
693 while (seq1.count && seq2.count)
695 diff = keycmp (seq1.lines[0], seq2.lines[0],
696 join_field_1, join_field_2);
697 if (diff < 0)
699 if (print_unpairables_1)
700 prjoin (seq1.lines[0], &uni_blank);
701 advance_seq (fp1, &seq1, true, 1);
702 seen_unpairable = true;
703 continue;
705 if (diff > 0)
707 if (print_unpairables_2)
708 prjoin (&uni_blank, seq2.lines[0]);
709 advance_seq (fp2, &seq2, true, 2);
710 seen_unpairable = true;
711 continue;
714 /* Keep reading lines from file1 as long as they continue to
715 match the current line from file2. */
716 eof1 = false;
718 if (!advance_seq (fp1, &seq1, false, 1))
720 eof1 = true;
721 ++seq1.count;
722 break;
724 while (!keycmp (seq1.lines[seq1.count - 1], seq2.lines[0],
725 join_field_1, join_field_2));
727 /* Keep reading lines from file2 as long as they continue to
728 match the current line from file1. */
729 eof2 = false;
731 if (!advance_seq (fp2, &seq2, false, 2))
733 eof2 = true;
734 ++seq2.count;
735 break;
737 while (!keycmp (seq1.lines[0], seq2.lines[seq2.count - 1],
738 join_field_1, join_field_2));
740 if (print_pairables)
742 for (idx_t i = 0; i < seq1.count - 1; ++i)
744 idx_t j;
745 for (j = 0; j < seq2.count - 1; ++j)
746 prjoin (seq1.lines[i], seq2.lines[j]);
750 if (!eof1)
752 SWAPLINES (seq1.lines[0], seq1.lines[seq1.count - 1]);
753 seq1.count = 1;
755 else
756 seq1.count = 0;
758 if (!eof2)
760 SWAPLINES (seq2.lines[0], seq2.lines[seq2.count - 1]);
761 seq2.count = 1;
763 else
764 seq2.count = 0;
767 /* If the user did not specify --nocheck-order, then we read the
768 tail ends of both inputs to verify that they are in order. We
769 skip the rest of the tail once we have issued a warning for that
770 file, unless we actually need to print the unpairable lines. */
771 struct line *line = nullptr;
772 bool checktail = false;
774 if (check_input_order != CHECK_ORDER_DISABLED
775 && !(issued_disorder_warning[0] && issued_disorder_warning[1]))
776 checktail = true;
778 if ((print_unpairables_1 || checktail) && seq1.count)
780 if (print_unpairables_1)
781 prjoin (seq1.lines[0], &uni_blank);
782 if (seq2.count)
783 seen_unpairable = true;
784 while (get_line (fp1, &line, 1))
786 if (print_unpairables_1)
787 prjoin (line, &uni_blank);
788 if (issued_disorder_warning[0] && !print_unpairables_1)
789 break;
793 if ((print_unpairables_2 || checktail) && seq2.count)
795 if (print_unpairables_2)
796 prjoin (&uni_blank, seq2.lines[0]);
797 if (seq1.count)
798 seen_unpairable = true;
799 while (get_line (fp2, &line, 2))
801 if (print_unpairables_2)
802 prjoin (&uni_blank, line);
803 if (issued_disorder_warning[1] && !print_unpairables_2)
804 break;
808 freeline (line);
809 free (line);
811 delseq (&seq1);
812 delseq (&seq2);
815 /* Add a field spec for field FIELD of file FILE to 'outlist'. */
817 static void
818 add_field (int file, idx_t field)
820 struct outlist *o;
822 affirm (file == 0 || file == 1 || file == 2);
823 affirm (file != 0 || field == 0);
825 o = xmalloc (sizeof *o);
826 o->file = file;
827 o->field = field;
828 o->next = nullptr;
830 /* Add to the end of the list so the fields are in the right order. */
831 outlist_end->next = o;
832 outlist_end = o;
835 /* Convert a string of decimal digits, STR (the 1-based join field number),
836 to an integral value. Upon successful conversion, return one less
837 (the zero-based field number). Silently convert too-large values
838 to PTRDIFF_MAX. Otherwise, if a value cannot be converted, give a
839 diagnostic and exit. */
841 static idx_t
842 string_to_join_field (char const *str)
844 intmax_t val;
846 strtol_error s_err = xstrtoimax (str, nullptr, 10, &val, "");
847 if (s_err == LONGINT_OVERFLOW || (s_err == LONGINT_OK && PTRDIFF_MAX < val))
848 val = PTRDIFF_MAX;
849 else if (s_err != LONGINT_OK || val <= 0)
850 error (EXIT_FAILURE, 0, _("invalid field number: %s"), quote (str));
852 return val - 1;
855 /* Convert a single field specifier string, S, to a *FILE_INDEX, *FIELD_INDEX
856 pair. In S, the field index string is 1-based; *FIELD_INDEX is zero-based.
857 If S is valid, return true. Otherwise, give a diagnostic and exit. */
859 static void
860 decode_field_spec (char const *s, int *file_index, idx_t *field_index)
862 /* The first character must be 0, 1, or 2. */
863 switch (s[0])
865 case '0':
866 if (s[1])
868 /* '0' must be all alone -- no '.FIELD'. */
869 error (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s));
871 *file_index = 0;
872 *field_index = 0;
873 break;
875 case '1':
876 case '2':
877 if (s[1] != '.')
878 error (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s));
879 *file_index = s[0] - '0';
880 *field_index = string_to_join_field (s + 2);
881 break;
883 default:
884 error (EXIT_FAILURE, 0,
885 _("invalid file number in field spec: %s"), quote (s));
889 /* Add the comma or blank separated field spec(s) in STR to 'outlist'. */
891 static void
892 add_field_list (char *str)
894 char *p = str;
898 int file_index;
899 idx_t field_index;
900 char const *spec_item = p;
902 p = strpbrk (p, ", \t");
903 if (p)
904 *p++ = '\0';
905 decode_field_spec (spec_item, &file_index, &field_index);
906 add_field (file_index, field_index);
908 while (p);
911 /* Set the join field *VAR to VAL, but report an error if *VAR is set
912 more than once to incompatible values. */
914 static void
915 set_join_field (ptrdiff_t *var, idx_t val)
917 if (0 <= *var && *var != val)
918 error (EXIT_FAILURE, 0,
919 _("incompatible join fields %td, %td"), *var, val);
920 *var = val;
923 /* Status of command-line arguments. */
925 enum operand_status
927 /* This argument must be an operand, i.e., one of the files to be
928 joined. */
929 MUST_BE_OPERAND,
931 /* This might be the argument of the preceding -j1 or -j2 option,
932 or it might be an operand. */
933 MIGHT_BE_J1_ARG,
934 MIGHT_BE_J2_ARG,
936 /* This might be the argument of the preceding -o option, or it might be
937 an operand. */
938 MIGHT_BE_O_ARG
941 /* Add NAME to the array of input file NAMES with operand statuses
942 OPERAND_STATUS; currently there are NFILES names in the list. */
944 static void
945 add_file_name (char *name, char *names[2],
946 int operand_status[2], int joption_count[2], int *nfiles,
947 int *prev_optc_status, int *optc_status)
949 int n = *nfiles;
951 if (n == 2)
953 bool op0 = (operand_status[0] == MUST_BE_OPERAND);
954 char *arg = names[op0];
955 switch (operand_status[op0])
957 case MUST_BE_OPERAND:
958 error (0, 0, _("extra operand %s"), quoteaf (name));
959 usage (EXIT_FAILURE);
961 case MIGHT_BE_J1_ARG:
962 joption_count[0]--;
963 set_join_field (&join_field_1, string_to_join_field (arg));
964 break;
966 case MIGHT_BE_J2_ARG:
967 joption_count[1]--;
968 set_join_field (&join_field_2, string_to_join_field (arg));
969 break;
971 case MIGHT_BE_O_ARG:
972 add_field_list (arg);
973 break;
975 if (!op0)
977 operand_status[0] = operand_status[1];
978 names[0] = names[1];
980 n = 1;
983 operand_status[n] = *prev_optc_status;
984 names[n] = name;
985 *nfiles = n + 1;
986 if (*prev_optc_status == MIGHT_BE_O_ARG)
987 *optc_status = MIGHT_BE_O_ARG;
991 main (int argc, char **argv)
993 int optc_status;
994 int prev_optc_status = MUST_BE_OPERAND;
995 int operand_status[2];
996 int joption_count[2] = { 0, 0 };
997 FILE *fp1, *fp2;
998 int optc;
999 int nfiles = 0;
1000 int i;
1002 initialize_main (&argc, &argv);
1003 set_program_name (argv[0]);
1004 setlocale (LC_ALL, "");
1005 bindtextdomain (PACKAGE, LOCALEDIR);
1006 textdomain (PACKAGE);
1007 hard_LC_COLLATE = hard_locale (LC_COLLATE);
1009 atexit (close_stdout);
1010 atexit (free_spareline);
1012 print_pairables = true;
1013 seen_unpairable = false;
1014 issued_disorder_warning[0] = issued_disorder_warning[1] = false;
1015 check_input_order = CHECK_ORDER_DEFAULT;
1017 while ((optc = getopt_long (argc, argv, "-a:e:i1:2:j:o:t:v:z",
1018 longopts, nullptr))
1019 != -1)
1021 optc_status = MUST_BE_OPERAND;
1023 switch (optc)
1025 case 'v':
1026 print_pairables = false;
1027 FALLTHROUGH;
1029 case 'a':
1031 long int val;
1032 if (xstrtol (optarg, nullptr, 10, &val, "") != LONGINT_OK
1033 || (val != 1 && val != 2))
1034 error (EXIT_FAILURE, 0,
1035 _("invalid field number: %s"), quote (optarg));
1036 if (val == 1)
1037 print_unpairables_1 = true;
1038 else
1039 print_unpairables_2 = true;
1041 break;
1043 case 'e':
1044 if (empty_filler && ! STREQ (empty_filler, optarg))
1045 error (EXIT_FAILURE, 0,
1046 _("conflicting empty-field replacement strings"));
1047 empty_filler = optarg;
1048 break;
1050 case 'i':
1051 ignore_case = true;
1052 break;
1054 case '1':
1055 set_join_field (&join_field_1, string_to_join_field (optarg));
1056 break;
1058 case '2':
1059 set_join_field (&join_field_2, string_to_join_field (optarg));
1060 break;
1062 case 'j':
1063 if ((optarg[0] == '1' || optarg[0] == '2') && !optarg[1]
1064 && optarg == argv[optind - 1] + 2)
1066 /* The argument was either "-j1" or "-j2". */
1067 bool is_j2 = (optarg[0] == '2');
1068 joption_count[is_j2]++;
1069 optc_status = MIGHT_BE_J1_ARG + is_j2;
1071 else
1073 set_join_field (&join_field_1, string_to_join_field (optarg));
1074 set_join_field (&join_field_2, join_field_1);
1076 break;
1078 case 'o':
1079 if (STREQ (optarg, "auto"))
1080 autoformat = true;
1081 else
1083 add_field_list (optarg);
1084 optc_status = MIGHT_BE_O_ARG;
1086 break;
1088 case 't':
1090 unsigned char newtab = optarg[0];
1091 if (! newtab)
1092 newtab = '\n'; /* '' => process the whole line. */
1093 else if (optarg[1])
1095 if (STREQ (optarg, "\\0"))
1096 newtab = '\0';
1097 else
1098 error (EXIT_FAILURE, 0, _("multi-character tab %s"),
1099 quote (optarg));
1101 if (0 <= tab && tab != newtab)
1102 error (EXIT_FAILURE, 0, _("incompatible tabs"));
1103 tab = newtab;
1105 break;
1107 case 'z':
1108 eolchar = 0;
1109 break;
1111 case NOCHECK_ORDER_OPTION:
1112 check_input_order = CHECK_ORDER_DISABLED;
1113 break;
1115 case CHECK_ORDER_OPTION:
1116 check_input_order = CHECK_ORDER_ENABLED;
1117 break;
1119 case 1: /* Non-option argument. */
1120 add_file_name (optarg, g_names, operand_status, joption_count,
1121 &nfiles, &prev_optc_status, &optc_status);
1122 break;
1124 case HEADER_LINE_OPTION:
1125 join_header_lines = true;
1126 break;
1128 case_GETOPT_HELP_CHAR;
1130 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1132 default:
1133 usage (EXIT_FAILURE);
1136 prev_optc_status = optc_status;
1139 /* Process any operands after "--". */
1140 prev_optc_status = MUST_BE_OPERAND;
1141 while (optind < argc)
1142 add_file_name (argv[optind++], g_names, operand_status, joption_count,
1143 &nfiles, &prev_optc_status, &optc_status);
1145 if (nfiles != 2)
1147 if (nfiles == 0)
1148 error (0, 0, _("missing operand"));
1149 else
1150 error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
1151 usage (EXIT_FAILURE);
1154 /* If "-j1" was specified and it turns out not to have had an argument,
1155 treat it as "-j 1". Likewise for -j2. */
1156 for (i = 0; i < 2; i++)
1157 if (joption_count[i] != 0)
1159 set_join_field (&join_field_1, i);
1160 set_join_field (&join_field_2, i);
1163 if (join_field_1 < 0)
1164 join_field_1 = 0;
1165 if (join_field_2 < 0)
1166 join_field_2 = 0;
1168 fp1 = STREQ (g_names[0], "-") ? stdin : fopen (g_names[0], "r");
1169 if (!fp1)
1170 error (EXIT_FAILURE, errno, "%s", quotef (g_names[0]));
1171 fp2 = STREQ (g_names[1], "-") ? stdin : fopen (g_names[1], "r");
1172 if (!fp2)
1173 error (EXIT_FAILURE, errno, "%s", quotef (g_names[1]));
1174 if (fp1 == fp2)
1175 error (EXIT_FAILURE, errno, _("both files cannot be standard input"));
1176 join (fp1, fp2);
1178 if (fclose (fp1) != 0)
1179 error (EXIT_FAILURE, errno, "%s", quotef (g_names[0]));
1180 if (fclose (fp2) != 0)
1181 error (EXIT_FAILURE, errno, "%s", quotef (g_names[1]));
1183 if (issued_disorder_warning[0] || issued_disorder_warning[1])
1184 error (EXIT_FAILURE, 0, _("input is not in sorted order"));
1185 else
1186 return EXIT_SUCCESS;