maint: revert "build: update gnulib submodule to latest"
[coreutils/ericb.git] / src / join.c
blob809eead0f8df8667c897cb2588c71d130ca678f4
1 /* join - join lines of two files on a common field
2 Copyright (C) 1991, 1995-2006, 2008-2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
17 Written by Mike Haertel, mike@gnu.ai.mit.edu. */
19 #include <config.h>
21 #include <assert.h>
22 #include <sys/types.h>
23 #include <getopt.h>
25 #include "system.h"
26 #include "error.h"
27 #include "fadvise.h"
28 #include "hard-locale.h"
29 #include "linebuffer.h"
30 #include "memcasecmp.h"
31 #include "quote.h"
32 #include "stdio--.h"
33 #include "xmemcoll.h"
34 #include "xstrtol.h"
35 #include "argmatch.h"
37 /* The official name of this program (e.g., no `g' prefix). */
38 #define PROGRAM_NAME "join"
40 #define AUTHORS proper_name ("Mike Haertel")
42 #define join system_join
44 #define SWAPLINES(a, b) do { \
45 struct line *tmp = a; \
46 a = b; \
47 b = tmp; \
48 } while (0);
50 /* An element of the list identifying which fields to print for each
51 output line. */
52 struct outlist
54 /* File number: 0, 1, or 2. 0 means use the join field.
55 1 means use the first file argument, 2 the second. */
56 int file;
58 /* Field index (zero-based), specified only when FILE is 1 or 2. */
59 size_t field;
61 struct outlist *next;
64 /* A field of a line. */
65 struct field
67 char *beg; /* First character in field. */
68 size_t len; /* The length of the field. */
71 /* A line read from an input file. */
72 struct line
74 struct linebuffer buf; /* The line itself. */
75 size_t nfields; /* Number of elements in `fields'. */
76 size_t nfields_allocated; /* Number of elements allocated for `fields'. */
77 struct field *fields;
80 /* One or more consecutive lines read from a file that all have the
81 same join field value. */
82 struct seq
84 size_t count; /* Elements used in `lines'. */
85 size_t alloc; /* Elements allocated in `lines'. */
86 struct line **lines;
89 /* The previous line read from each file. */
90 static struct line *prevline[2] = {NULL, NULL};
92 /* The number of lines read from each file. */
93 static uintmax_t line_no[2] = {0, 0};
95 /* The input file names. */
96 static char *g_names[2];
98 /* This provides an extra line buffer for each file. We need these if we
99 try to read two consecutive lines into the same buffer, since we don't
100 want to overwrite the previous buffer before we check order. */
101 static struct line *spareline[2] = {NULL, NULL};
103 /* True if the LC_COLLATE locale is hard. */
104 static bool hard_LC_COLLATE;
106 /* If nonzero, print unpairable lines in file 1 or 2. */
107 static bool print_unpairables_1, print_unpairables_2;
109 /* If nonzero, print pairable lines. */
110 static bool print_pairables;
112 /* If nonzero, we have seen at least one unpairable line. */
113 static bool seen_unpairable;
115 /* If nonzero, we have warned about disorder in that file. */
116 static bool issued_disorder_warning[2];
118 /* Empty output field filler. */
119 static char const *empty_filler;
121 /* Whether to ensure the same number of fields are output from each line. */
122 static bool autoformat;
123 /* The number of fields to output for each line.
124 Only significant when autoformat is true. */
125 static size_t autocount_1;
126 static size_t autocount_2;
128 /* Field to join on; SIZE_MAX means they haven't been determined yet. */
129 static size_t join_field_1 = SIZE_MAX;
130 static size_t join_field_2 = SIZE_MAX;
132 /* List of fields to print. */
133 static struct outlist outlist_head;
135 /* Last element in `outlist', where a new element can be added. */
136 static struct outlist *outlist_end = &outlist_head;
138 /* Tab character separating fields. If negative, fields are separated
139 by any nonempty string of blanks, otherwise by exactly one
140 tab character whose value (when cast to unsigned char) equals TAB. */
141 static int tab = -1;
143 /* If nonzero, check that the input is correctly ordered. */
144 static enum
146 CHECK_ORDER_DEFAULT,
147 CHECK_ORDER_ENABLED,
148 CHECK_ORDER_DISABLED
149 } check_input_order;
151 enum
153 CHECK_ORDER_OPTION = CHAR_MAX + 1,
154 NOCHECK_ORDER_OPTION,
155 HEADER_LINE_OPTION
159 static struct option const longopts[] =
161 {"ignore-case", no_argument, NULL, 'i'},
162 {"check-order", no_argument, NULL, CHECK_ORDER_OPTION},
163 {"nocheck-order", no_argument, NULL, NOCHECK_ORDER_OPTION},
164 {"header", no_argument, NULL, HEADER_LINE_OPTION},
165 {GETOPT_HELP_OPTION_DECL},
166 {GETOPT_VERSION_OPTION_DECL},
167 {NULL, 0, NULL, 0}
170 /* Used to print non-joining lines */
171 static struct line uni_blank;
173 /* If nonzero, ignore case when comparing join fields. */
174 static bool ignore_case;
176 /* If nonzero, treat the first line of each file as column headers -
177 join them without checking for ordering */
178 static bool join_header_lines;
180 void
181 usage (int status)
183 if (status != EXIT_SUCCESS)
184 fprintf (stderr, _("Try `%s --help' for more information.\n"),
185 program_name);
186 else
188 printf (_("\
189 Usage: %s [OPTION]... FILE1 FILE2\n\
191 program_name);
192 fputs (_("\
193 For each pair of input lines with identical join fields, write a line to\n\
194 standard output. The default join field is the first, delimited\n\
195 by whitespace. When FILE1 or FILE2 (not both) is -, read standard input.\n\
197 -a FILENUM also print unpairable lines from file FILENUM, where\n\
198 FILENUM is 1 or 2, corresponding to FILE1 or FILE2\n\
199 -e EMPTY replace missing input fields with EMPTY\n\
200 "), stdout);
201 fputs (_("\
202 -i, --ignore-case ignore differences in case when comparing fields\n\
203 -j FIELD equivalent to `-1 FIELD -2 FIELD'\n\
204 -o FORMAT obey FORMAT while constructing output line\n\
205 -t CHAR use CHAR as input and output field separator\n\
206 "), stdout);
207 fputs (_("\
208 -v FILENUM like -a FILENUM, but suppress joined output lines\n\
209 -1 FIELD join on this FIELD of file 1\n\
210 -2 FIELD join on this FIELD of file 2\n\
211 --check-order check that the input is correctly sorted, even\n\
212 if all input lines are pairable\n\
213 --nocheck-order do not check that the input is correctly sorted\n\
214 --header treat the first line in each file as field headers,\n\
215 print them without trying to pair them\n\
216 "), stdout);
217 fputs (HELP_OPTION_DESCRIPTION, stdout);
218 fputs (VERSION_OPTION_DESCRIPTION, stdout);
219 fputs (_("\
221 Unless -t CHAR is given, leading blanks separate fields and are ignored,\n\
222 else fields are separated by CHAR. Any FIELD is a field number counted\n\
223 from 1. FORMAT is one or more comma or blank separated specifications,\n\
224 each being `FILENUM.FIELD' or `0'. Default FORMAT outputs the join field,\n\
225 the remaining fields from FILE1, the remaining fields from FILE2, all\n\
226 separated by CHAR. If FORMAT is the keyword 'auto', then the first\n\
227 line of each file determines the number of fields output for each line.\n\
229 Important: FILE1 and FILE2 must be sorted on the join fields.\n\
230 E.g., use ` sort -k 1b,1 ' if `join' has no options,\n\
231 or use ` join -t '' ' if `sort' has no options.\n\
232 Note, comparisons honor the rules specified by `LC_COLLATE'.\n\
233 If the input is not sorted and some lines cannot be joined, a\n\
234 warning message will be given.\n\
235 "), stdout);
236 emit_ancillary_info ();
238 exit (status);
241 /* Record a field in LINE, with location FIELD and size LEN. */
243 static void
244 extract_field (struct line *line, char *field, size_t len)
246 if (line->nfields >= line->nfields_allocated)
248 line->fields = X2NREALLOC (line->fields, &line->nfields_allocated);
250 line->fields[line->nfields].beg = field;
251 line->fields[line->nfields].len = len;
252 ++(line->nfields);
255 /* Fill in the `fields' structure in LINE. */
257 static void
258 xfields (struct line *line)
260 char *ptr = line->buf.buffer;
261 char const *lim = ptr + line->buf.length - 1;
263 if (ptr == lim)
264 return;
266 if (0 <= tab && tab != '\n')
268 char *sep;
269 for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1)
270 extract_field (line, ptr, sep - ptr);
272 else if (tab < 0)
274 /* Skip leading blanks before the first field. */
275 while (isblank (to_uchar (*ptr)))
276 if (++ptr == lim)
277 return;
281 char *sep;
282 for (sep = ptr + 1; sep != lim && ! isblank (to_uchar (*sep)); sep++)
283 continue;
284 extract_field (line, ptr, sep - ptr);
285 if (sep == lim)
286 return;
287 for (ptr = sep + 1; ptr != lim && isblank (to_uchar (*ptr)); ptr++)
288 continue;
290 while (ptr != lim);
293 extract_field (line, ptr, lim - ptr);
296 static void
297 freeline (struct line *line)
299 if (line == NULL)
300 return;
301 free (line->fields);
302 line->fields = NULL;
303 free (line->buf.buffer);
304 line->buf.buffer = NULL;
307 /* Return <0 if the join field in LINE1 compares less than the one in LINE2;
308 >0 if it compares greater; 0 if it compares equal.
309 Report an error and exit if the comparison fails.
310 Use join fields JF_1 and JF_2 respectively. */
312 static int
313 keycmp (struct line const *line1, struct line const *line2,
314 size_t jf_1, size_t jf_2)
316 /* Start of field to compare in each file. */
317 char *beg1;
318 char *beg2;
320 size_t len1;
321 size_t len2; /* Length of fields to compare. */
322 int diff;
324 if (jf_1 < line1->nfields)
326 beg1 = line1->fields[jf_1].beg;
327 len1 = line1->fields[jf_1].len;
329 else
331 beg1 = NULL;
332 len1 = 0;
335 if (jf_2 < line2->nfields)
337 beg2 = line2->fields[jf_2].beg;
338 len2 = line2->fields[jf_2].len;
340 else
342 beg2 = NULL;
343 len2 = 0;
346 if (len1 == 0)
347 return len2 == 0 ? 0 : -1;
348 if (len2 == 0)
349 return 1;
351 if (ignore_case)
353 /* FIXME: ignore_case does not work with NLS (in particular,
354 with multibyte chars). */
355 diff = memcasecmp (beg1, beg2, MIN (len1, len2));
357 else
359 if (hard_LC_COLLATE)
360 return xmemcoll (beg1, len1, beg2, len2);
361 diff = memcmp (beg1, beg2, MIN (len1, len2));
364 if (diff)
365 return diff;
366 return len1 < len2 ? -1 : len1 != len2;
369 /* Check that successive input lines PREV and CURRENT from input file
370 WHATFILE are presented in order, unless the user may be relying on
371 the GNU extension that input lines may be out of order if no input
372 lines are unpairable.
374 If the user specified --nocheck-order, the check is not made.
375 If the user specified --check-order, the problem is fatal.
376 Otherwise (the default), the message is simply a warning.
378 A message is printed at most once per input file. */
380 static void
381 check_order (const struct line *prev,
382 const struct line *current,
383 int whatfile)
385 if (check_input_order != CHECK_ORDER_DISABLED
386 && ((check_input_order == CHECK_ORDER_ENABLED) || seen_unpairable))
388 if (!issued_disorder_warning[whatfile-1])
390 size_t join_field = whatfile == 1 ? join_field_1 : join_field_2;
391 if (keycmp (prev, current, join_field, join_field) > 0)
393 /* Exclude any trailing newline. */
394 size_t len = current->buf.length;
395 if (0 < len && current->buf.buffer[len - 1] == '\n')
396 --len;
398 /* If the offending line is longer than INT_MAX, output
399 only the first INT_MAX bytes in this diagnostic. */
400 len = MIN (INT_MAX, len);
402 error ((check_input_order == CHECK_ORDER_ENABLED
403 ? EXIT_FAILURE : 0),
404 0, _("%s:%ju: is not sorted: %.*s"),
405 g_names[whatfile - 1], line_no[whatfile - 1],
406 (int) len, current->buf.buffer);
408 /* If we get to here, the message was merely a warning.
409 Arrange to issue it only once per file. */
410 issued_disorder_warning[whatfile-1] = true;
416 static inline void
417 reset_line (struct line *line)
419 line->nfields = 0;
422 static struct line *
423 init_linep (struct line **linep)
425 struct line *line = xcalloc (1, sizeof *line);
426 *linep = line;
427 return line;
430 /* Read a line from FP into LINE and split it into fields.
431 Return true if successful. */
433 static bool
434 get_line (FILE *fp, struct line **linep, int which)
436 struct line *line = *linep;
438 if (line == prevline[which - 1])
440 SWAPLINES (line, spareline[which - 1]);
441 *linep = line;
444 if (line)
445 reset_line (line);
446 else
447 line = init_linep (linep);
449 if (! readlinebuffer (&line->buf, fp))
451 if (ferror (fp))
452 error (EXIT_FAILURE, errno, _("read error"));
453 freeline (line);
454 return false;
456 ++line_no[which - 1];
458 xfields (line);
460 if (prevline[which - 1])
461 check_order (prevline[which - 1], line, which);
463 prevline[which - 1] = line;
464 return true;
467 static void
468 free_spareline (void)
470 size_t i;
472 for (i = 0; i < ARRAY_CARDINALITY (spareline); i++)
474 if (spareline[i])
476 freeline (spareline[i]);
477 free (spareline[i]);
482 static void
483 initseq (struct seq *seq)
485 seq->count = 0;
486 seq->alloc = 0;
487 seq->lines = NULL;
490 /* Read a line from FP and add it to SEQ. Return true if successful. */
492 static bool
493 getseq (FILE *fp, struct seq *seq, int whichfile)
495 if (seq->count == seq->alloc)
497 size_t i;
498 seq->lines = X2NREALLOC (seq->lines, &seq->alloc);
499 for (i = seq->count; i < seq->alloc; i++)
500 seq->lines[i] = NULL;
503 if (get_line (fp, &seq->lines[seq->count], whichfile))
505 ++seq->count;
506 return true;
508 return false;
511 /* Read a line from FP and add it to SEQ, as the first item if FIRST is
512 true, else as the next. */
513 static bool
514 advance_seq (FILE *fp, struct seq *seq, bool first, int whichfile)
516 if (first)
517 seq->count = 0;
519 return getseq (fp, seq, whichfile);
522 static void
523 delseq (struct seq *seq)
525 size_t i;
526 for (i = 0; i < seq->alloc; i++)
528 freeline (seq->lines[i]);
529 free (seq->lines[i]);
531 free (seq->lines);
535 /* Print field N of LINE if it exists and is nonempty, otherwise
536 `empty_filler' if it is nonempty. */
538 static void
539 prfield (size_t n, struct line const *line)
541 size_t len;
543 if (n < line->nfields)
545 len = line->fields[n].len;
546 if (len)
547 fwrite (line->fields[n].beg, 1, len, stdout);
548 else if (empty_filler)
549 fputs (empty_filler, stdout);
551 else if (empty_filler)
552 fputs (empty_filler, stdout);
555 /* Output all the fields in line, other than the join field. */
557 static void
558 prfields (struct line const *line, size_t join_field, size_t autocount)
560 size_t i;
561 size_t nfields = autoformat ? autocount : line->nfields;
562 char output_separator = tab < 0 ? ' ' : tab;
564 for (i = 0; i < join_field && i < nfields; ++i)
566 putchar (output_separator);
567 prfield (i, line);
569 for (i = join_field + 1; i < nfields; ++i)
571 putchar (output_separator);
572 prfield (i, line);
576 /* Print the join of LINE1 and LINE2. */
578 static void
579 prjoin (struct line const *line1, struct line const *line2)
581 const struct outlist *outlist;
582 char output_separator = tab < 0 ? ' ' : tab;
583 size_t field;
584 struct line const *line;
586 outlist = outlist_head.next;
587 if (outlist)
589 const struct outlist *o;
591 o = outlist;
592 while (1)
594 if (o->file == 0)
596 if (line1 == &uni_blank)
598 line = line2;
599 field = join_field_2;
601 else
603 line = line1;
604 field = join_field_1;
607 else
609 line = (o->file == 1 ? line1 : line2);
610 field = o->field;
612 prfield (field, line);
613 o = o->next;
614 if (o == NULL)
615 break;
616 putchar (output_separator);
618 putchar ('\n');
620 else
622 if (line1 == &uni_blank)
624 line = line2;
625 field = join_field_2;
627 else
629 line = line1;
630 field = join_field_1;
633 /* Output the join field. */
634 prfield (field, line);
636 /* Output other fields. */
637 prfields (line1, join_field_1, autocount_1);
638 prfields (line2, join_field_2, autocount_2);
640 putchar ('\n');
644 /* Print the join of the files in FP1 and FP2. */
646 static void
647 join (FILE *fp1, FILE *fp2)
649 struct seq seq1, seq2;
650 int diff;
651 bool eof1, eof2;
653 fadvise (fp1, FADVISE_SEQUENTIAL);
654 fadvise (fp2, FADVISE_SEQUENTIAL);
656 /* Read the first line of each file. */
657 initseq (&seq1);
658 getseq (fp1, &seq1, 1);
659 initseq (&seq2);
660 getseq (fp2, &seq2, 2);
662 if (autoformat)
664 autocount_1 = seq1.count ? seq1.lines[0]->nfields : 0;
665 autocount_2 = seq2.count ? seq2.lines[0]->nfields : 0;
668 if (join_header_lines && (seq1.count || seq2.count))
670 struct line const *hline1 = seq1.count ? seq1.lines[0] : &uni_blank;
671 struct line const *hline2 = seq2.count ? seq2.lines[0] : &uni_blank;
672 prjoin (hline1, hline2);
673 prevline[0] = NULL;
674 prevline[1] = NULL;
675 if (seq1.count)
676 advance_seq (fp1, &seq1, true, 1);
677 if (seq2.count)
678 advance_seq (fp2, &seq2, true, 2);
681 while (seq1.count && seq2.count)
683 size_t i;
684 diff = keycmp (seq1.lines[0], seq2.lines[0],
685 join_field_1, join_field_2);
686 if (diff < 0)
688 if (print_unpairables_1)
689 prjoin (seq1.lines[0], &uni_blank);
690 advance_seq (fp1, &seq1, true, 1);
691 seen_unpairable = true;
692 continue;
694 if (diff > 0)
696 if (print_unpairables_2)
697 prjoin (&uni_blank, seq2.lines[0]);
698 advance_seq (fp2, &seq2, true, 2);
699 seen_unpairable = true;
700 continue;
703 /* Keep reading lines from file1 as long as they continue to
704 match the current line from file2. */
705 eof1 = false;
707 if (!advance_seq (fp1, &seq1, false, 1))
709 eof1 = true;
710 ++seq1.count;
711 break;
713 while (!keycmp (seq1.lines[seq1.count - 1], seq2.lines[0],
714 join_field_1, join_field_2));
716 /* Keep reading lines from file2 as long as they continue to
717 match the current line from file1. */
718 eof2 = false;
720 if (!advance_seq (fp2, &seq2, false, 2))
722 eof2 = true;
723 ++seq2.count;
724 break;
726 while (!keycmp (seq1.lines[0], seq2.lines[seq2.count - 1],
727 join_field_1, join_field_2));
729 if (print_pairables)
731 for (i = 0; i < seq1.count - 1; ++i)
733 size_t j;
734 for (j = 0; j < seq2.count - 1; ++j)
735 prjoin (seq1.lines[i], seq2.lines[j]);
739 if (!eof1)
741 SWAPLINES (seq1.lines[0], seq1.lines[seq1.count - 1]);
742 seq1.count = 1;
744 else
745 seq1.count = 0;
747 if (!eof2)
749 SWAPLINES (seq2.lines[0], seq2.lines[seq2.count - 1]);
750 seq2.count = 1;
752 else
753 seq2.count = 0;
756 /* If the user did not specify --nocheck-order, then we read the
757 tail ends of both inputs to verify that they are in order. We
758 skip the rest of the tail once we have issued a warning for that
759 file, unless we actually need to print the unpairable lines. */
760 struct line *line = NULL;
761 bool checktail = false;
763 if (check_input_order != CHECK_ORDER_DISABLED
764 && !(issued_disorder_warning[0] && issued_disorder_warning[1]))
765 checktail = true;
767 if ((print_unpairables_1 || checktail) && seq1.count)
769 if (print_unpairables_1)
770 prjoin (seq1.lines[0], &uni_blank);
771 if (seq2.count)
772 seen_unpairable = true;
773 while (get_line (fp1, &line, 1))
775 if (print_unpairables_1)
776 prjoin (line, &uni_blank);
777 if (issued_disorder_warning[0] && !print_unpairables_1)
778 break;
782 if ((print_unpairables_2 || checktail) && seq2.count)
784 if (print_unpairables_2)
785 prjoin (&uni_blank, seq2.lines[0]);
786 if (seq1.count)
787 seen_unpairable = true;
788 while (get_line (fp2, &line, 2))
790 if (print_unpairables_2)
791 prjoin (&uni_blank, line);
792 if (issued_disorder_warning[1] && !print_unpairables_2)
793 break;
797 freeline (line);
798 free (line);
800 delseq (&seq1);
801 delseq (&seq2);
804 /* Add a field spec for field FIELD of file FILE to `outlist'. */
806 static void
807 add_field (int file, size_t field)
809 struct outlist *o;
811 assert (file == 0 || file == 1 || file == 2);
812 assert (file != 0 || field == 0);
814 o = xmalloc (sizeof *o);
815 o->file = file;
816 o->field = field;
817 o->next = NULL;
819 /* Add to the end of the list so the fields are in the right order. */
820 outlist_end->next = o;
821 outlist_end = o;
824 /* Convert a string of decimal digits, STR (the 1-based join field number),
825 to an integral value. Upon successful conversion, return one less
826 (the zero-based field number). Silently convert too-large values
827 to SIZE_MAX - 1. Otherwise, if a value cannot be converted, give a
828 diagnostic and exit. */
830 static size_t
831 string_to_join_field (char const *str)
833 size_t result;
834 unsigned long int val;
835 verify (SIZE_MAX <= ULONG_MAX);
837 strtol_error s_err = xstrtoul (str, NULL, 10, &val, "");
838 if (s_err == LONGINT_OVERFLOW || (s_err == LONGINT_OK && SIZE_MAX < val))
839 val = SIZE_MAX;
840 else if (s_err != LONGINT_OK || val == 0)
841 error (EXIT_FAILURE, 0, _("invalid field number: %s"), quote (str));
843 result = val - 1;
845 return result;
848 /* Convert a single field specifier string, S, to a *FILE_INDEX, *FIELD_INDEX
849 pair. In S, the field index string is 1-based; *FIELD_INDEX is zero-based.
850 If S is valid, return true. Otherwise, give a diagnostic and exit. */
852 static void
853 decode_field_spec (const char *s, int *file_index, size_t *field_index)
855 /* The first character must be 0, 1, or 2. */
856 switch (s[0])
858 case '0':
859 if (s[1])
861 /* `0' must be all alone -- no `.FIELD'. */
862 error (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s));
864 *file_index = 0;
865 *field_index = 0;
866 break;
868 case '1':
869 case '2':
870 if (s[1] != '.')
871 error (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s));
872 *file_index = s[0] - '0';
873 *field_index = string_to_join_field (s + 2);
874 break;
876 default:
877 error (EXIT_FAILURE, 0,
878 _("invalid file number in field spec: %s"), quote (s));
880 /* Tell gcc -W -Wall that we can't get beyond this point.
881 This avoids a warning (otherwise legit) that the caller's copies
882 of *file_index and *field_index might be used uninitialized. */
883 abort ();
885 break;
889 /* Add the comma or blank separated field spec(s) in STR to `outlist'. */
891 static void
892 add_field_list (char *str)
894 char *p = str;
898 int file_index;
899 size_t field_index;
900 char const *spec_item = p;
902 p = strpbrk (p, ", \t");
903 if (p)
904 *p++ = '\0';
905 decode_field_spec (spec_item, &file_index, &field_index);
906 add_field (file_index, field_index);
908 while (p);
911 /* Set the join field *VAR to VAL, but report an error if *VAR is set
912 more than once to incompatible values. */
914 static void
915 set_join_field (size_t *var, size_t val)
917 if (*var != SIZE_MAX && *var != val)
919 unsigned long int var1 = *var + 1;
920 unsigned long int val1 = val + 1;
921 error (EXIT_FAILURE, 0, _("incompatible join fields %lu, %lu"),
922 var1, val1);
924 *var = val;
927 /* Status of command-line arguments. */
929 enum operand_status
931 /* This argument must be an operand, i.e., one of the files to be
932 joined. */
933 MUST_BE_OPERAND,
935 /* This might be the argument of the preceding -j1 or -j2 option,
936 or it might be an operand. */
937 MIGHT_BE_J1_ARG,
938 MIGHT_BE_J2_ARG,
940 /* This might be the argument of the preceding -o option, or it might be
941 an operand. */
942 MIGHT_BE_O_ARG
945 /* Add NAME to the array of input file NAMES with operand statuses
946 OPERAND_STATUS; currently there are NFILES names in the list. */
948 static void
949 add_file_name (char *name, char *names[2],
950 int operand_status[2], int joption_count[2], int *nfiles,
951 int *prev_optc_status, int *optc_status)
953 int n = *nfiles;
955 if (n == 2)
957 bool op0 = (operand_status[0] == MUST_BE_OPERAND);
958 char *arg = names[op0];
959 switch (operand_status[op0])
961 case MUST_BE_OPERAND:
962 error (0, 0, _("extra operand %s"), quote (name));
963 usage (EXIT_FAILURE);
965 case MIGHT_BE_J1_ARG:
966 joption_count[0]--;
967 set_join_field (&join_field_1, string_to_join_field (arg));
968 break;
970 case MIGHT_BE_J2_ARG:
971 joption_count[1]--;
972 set_join_field (&join_field_2, string_to_join_field (arg));
973 break;
975 case MIGHT_BE_O_ARG:
976 add_field_list (arg);
977 break;
979 if (!op0)
981 operand_status[0] = operand_status[1];
982 names[0] = names[1];
984 n = 1;
987 operand_status[n] = *prev_optc_status;
988 names[n] = name;
989 *nfiles = n + 1;
990 if (*prev_optc_status == MIGHT_BE_O_ARG)
991 *optc_status = MIGHT_BE_O_ARG;
995 main (int argc, char **argv)
997 int optc_status;
998 int prev_optc_status = MUST_BE_OPERAND;
999 int operand_status[2];
1000 int joption_count[2] = { 0, 0 };
1001 FILE *fp1, *fp2;
1002 int optc;
1003 int nfiles = 0;
1004 int i;
1006 initialize_main (&argc, &argv);
1007 set_program_name (argv[0]);
1008 setlocale (LC_ALL, "");
1009 bindtextdomain (PACKAGE, LOCALEDIR);
1010 textdomain (PACKAGE);
1011 hard_LC_COLLATE = hard_locale (LC_COLLATE);
1013 atexit (close_stdout);
1014 atexit (free_spareline);
1016 print_pairables = true;
1017 seen_unpairable = false;
1018 issued_disorder_warning[0] = issued_disorder_warning[1] = false;
1019 check_input_order = CHECK_ORDER_DEFAULT;
1021 while ((optc = getopt_long (argc, argv, "-a:e:i1:2:j:o:t:v:",
1022 longopts, NULL))
1023 != -1)
1025 optc_status = MUST_BE_OPERAND;
1027 switch (optc)
1029 case 'v':
1030 print_pairables = false;
1031 /* Fall through. */
1033 case 'a':
1035 unsigned long int val;
1036 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
1037 || (val != 1 && val != 2))
1038 error (EXIT_FAILURE, 0,
1039 _("invalid field number: %s"), quote (optarg));
1040 if (val == 1)
1041 print_unpairables_1 = true;
1042 else
1043 print_unpairables_2 = true;
1045 break;
1047 case 'e':
1048 if (empty_filler && ! STREQ (empty_filler, optarg))
1049 error (EXIT_FAILURE, 0,
1050 _("conflicting empty-field replacement strings"));
1051 empty_filler = optarg;
1052 break;
1054 case 'i':
1055 ignore_case = true;
1056 break;
1058 case '1':
1059 set_join_field (&join_field_1, string_to_join_field (optarg));
1060 break;
1062 case '2':
1063 set_join_field (&join_field_2, string_to_join_field (optarg));
1064 break;
1066 case 'j':
1067 if ((optarg[0] == '1' || optarg[0] == '2') && !optarg[1]
1068 && optarg == argv[optind - 1] + 2)
1070 /* The argument was either "-j1" or "-j2". */
1071 bool is_j2 = (optarg[0] == '2');
1072 joption_count[is_j2]++;
1073 optc_status = MIGHT_BE_J1_ARG + is_j2;
1075 else
1077 set_join_field (&join_field_1, string_to_join_field (optarg));
1078 set_join_field (&join_field_2, join_field_1);
1080 break;
1082 case 'o':
1083 if (STREQ (optarg, "auto"))
1084 autoformat = true;
1085 else
1087 add_field_list (optarg);
1088 optc_status = MIGHT_BE_O_ARG;
1090 break;
1092 case 't':
1094 unsigned char newtab = optarg[0];
1095 if (! newtab)
1096 newtab = '\n'; /* '' => process the whole line. */
1097 else if (optarg[1])
1099 if (STREQ (optarg, "\\0"))
1100 newtab = '\0';
1101 else
1102 error (EXIT_FAILURE, 0, _("multi-character tab %s"),
1103 quote (optarg));
1105 if (0 <= tab && tab != newtab)
1106 error (EXIT_FAILURE, 0, _("incompatible tabs"));
1107 tab = newtab;
1109 break;
1111 case NOCHECK_ORDER_OPTION:
1112 check_input_order = CHECK_ORDER_DISABLED;
1113 break;
1115 case CHECK_ORDER_OPTION:
1116 check_input_order = CHECK_ORDER_ENABLED;
1117 break;
1119 case 1: /* Non-option argument. */
1120 add_file_name (optarg, g_names, operand_status, joption_count,
1121 &nfiles, &prev_optc_status, &optc_status);
1122 break;
1124 case HEADER_LINE_OPTION:
1125 join_header_lines = true;
1126 break;
1128 case_GETOPT_HELP_CHAR;
1130 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1132 default:
1133 usage (EXIT_FAILURE);
1136 prev_optc_status = optc_status;
1139 /* Process any operands after "--". */
1140 prev_optc_status = MUST_BE_OPERAND;
1141 while (optind < argc)
1142 add_file_name (argv[optind++], g_names, operand_status, joption_count,
1143 &nfiles, &prev_optc_status, &optc_status);
1145 if (nfiles != 2)
1147 if (nfiles == 0)
1148 error (0, 0, _("missing operand"));
1149 else
1150 error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
1151 usage (EXIT_FAILURE);
1154 /* If "-j1" was specified and it turns out not to have had an argument,
1155 treat it as "-j 1". Likewise for -j2. */
1156 for (i = 0; i < 2; i++)
1157 if (joption_count[i] != 0)
1159 set_join_field (&join_field_1, i);
1160 set_join_field (&join_field_2, i);
1163 if (join_field_1 == SIZE_MAX)
1164 join_field_1 = 0;
1165 if (join_field_2 == SIZE_MAX)
1166 join_field_2 = 0;
1168 fp1 = STREQ (g_names[0], "-") ? stdin : fopen (g_names[0], "r");
1169 if (!fp1)
1170 error (EXIT_FAILURE, errno, "%s", g_names[0]);
1171 fp2 = STREQ (g_names[1], "-") ? stdin : fopen (g_names[1], "r");
1172 if (!fp2)
1173 error (EXIT_FAILURE, errno, "%s", g_names[1]);
1174 if (fp1 == fp2)
1175 error (EXIT_FAILURE, errno, _("both files cannot be standard input"));
1176 join (fp1, fp2);
1178 if (fclose (fp1) != 0)
1179 error (EXIT_FAILURE, errno, "%s", g_names[0]);
1180 if (fclose (fp2) != 0)
1181 error (EXIT_FAILURE, errno, "%s", g_names[1]);
1183 if (issued_disorder_warning[0] || issued_disorder_warning[1])
1184 exit (EXIT_FAILURE);
1185 else
1186 exit (EXIT_SUCCESS);