join: improve performance when operating on whole lines
[coreutils.git] / src / join.c
blob6eaad65759ab52c5eaafd8d1d23d1ce1bbf41798
1 /* join - join lines of two files on a common field
2 Copyright (C) 1991, 1995-2006, 2008-2010 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
17 Written by Mike Haertel, mike@gnu.ai.mit.edu. */
19 #include <config.h>
21 #include <assert.h>
22 #include <sys/types.h>
23 #include <getopt.h>
25 #include "system.h"
26 #include "error.h"
27 #include "fadvise.h"
28 #include "hard-locale.h"
29 #include "linebuffer.h"
30 #include "memcasecmp.h"
31 #include "quote.h"
32 #include "stdio--.h"
33 #include "xmemcoll.h"
34 #include "xstrtol.h"
35 #include "argmatch.h"
37 /* The official name of this program (e.g., no `g' prefix). */
38 #define PROGRAM_NAME "join"
40 #define AUTHORS proper_name ("Mike Haertel")
42 #define join system_join
44 #define SWAPLINES(a, b) do { \
45 struct line *tmp = a; \
46 a = b; \
47 b = tmp; \
48 } while (0);
50 /* An element of the list identifying which fields to print for each
51 output line. */
52 struct outlist
54 /* File number: 0, 1, or 2. 0 means use the join field.
55 1 means use the first file argument, 2 the second. */
56 int file;
58 /* Field index (zero-based), specified only when FILE is 1 or 2. */
59 size_t field;
61 struct outlist *next;
64 /* A field of a line. */
65 struct field
67 char *beg; /* First character in field. */
68 size_t len; /* The length of the field. */
71 /* A line read from an input file. */
72 struct line
74 struct linebuffer buf; /* The line itself. */
75 size_t nfields; /* Number of elements in `fields'. */
76 size_t nfields_allocated; /* Number of elements allocated for `fields'. */
77 struct field *fields;
80 /* One or more consecutive lines read from a file that all have the
81 same join field value. */
82 struct seq
84 size_t count; /* Elements used in `lines'. */
85 size_t alloc; /* Elements allocated in `lines'. */
86 struct line **lines;
89 /* The previous line read from each file. */
90 static struct line *prevline[2] = {NULL, NULL};
92 /* This provides an extra line buffer for each file. We need these if we
93 try to read two consecutive lines into the same buffer, since we don't
94 want to overwrite the previous buffer before we check order. */
95 static struct line *spareline[2] = {NULL, NULL};
97 /* True if the LC_COLLATE locale is hard. */
98 static bool hard_LC_COLLATE;
100 /* If nonzero, print unpairable lines in file 1 or 2. */
101 static bool print_unpairables_1, print_unpairables_2;
103 /* If nonzero, print pairable lines. */
104 static bool print_pairables;
106 /* If nonzero, we have seen at least one unpairable line. */
107 static bool seen_unpairable;
109 /* If nonzero, we have warned about disorder in that file. */
110 static bool issued_disorder_warning[2];
112 /* Empty output field filler. */
113 static char const *empty_filler;
115 /* Field to join on; SIZE_MAX means they haven't been determined yet. */
116 static size_t join_field_1 = SIZE_MAX;
117 static size_t join_field_2 = SIZE_MAX;
119 /* List of fields to print. */
120 static struct outlist outlist_head;
122 /* Last element in `outlist', where a new element can be added. */
123 static struct outlist *outlist_end = &outlist_head;
125 /* Tab character separating fields. If negative, fields are separated
126 by any nonempty string of blanks, otherwise by exactly one
127 tab character whose value (when cast to unsigned char) equals TAB. */
128 static int tab = -1;
130 /* If nonzero, check that the input is correctly ordered. */
131 static enum
133 CHECK_ORDER_DEFAULT,
134 CHECK_ORDER_ENABLED,
135 CHECK_ORDER_DISABLED
136 } check_input_order;
138 enum
140 CHECK_ORDER_OPTION = CHAR_MAX + 1,
141 NOCHECK_ORDER_OPTION,
142 HEADER_LINE_OPTION
146 static struct option const longopts[] =
148 {"ignore-case", no_argument, NULL, 'i'},
149 {"check-order", no_argument, NULL, CHECK_ORDER_OPTION},
150 {"nocheck-order", no_argument, NULL, NOCHECK_ORDER_OPTION},
151 {"header", no_argument, NULL, HEADER_LINE_OPTION},
152 {GETOPT_HELP_OPTION_DECL},
153 {GETOPT_VERSION_OPTION_DECL},
154 {NULL, 0, NULL, 0}
157 /* Used to print non-joining lines */
158 static struct line uni_blank;
160 /* If nonzero, ignore case when comparing join fields. */
161 static bool ignore_case;
163 /* If nonzero, treat the first line of each file as column headers -
164 join them without checking for ordering */
165 static bool join_header_lines;
167 void
168 usage (int status)
170 if (status != EXIT_SUCCESS)
171 fprintf (stderr, _("Try `%s --help' for more information.\n"),
172 program_name);
173 else
175 printf (_("\
176 Usage: %s [OPTION]... FILE1 FILE2\n\
178 program_name);
179 fputs (_("\
180 For each pair of input lines with identical join fields, write a line to\n\
181 standard output. The default join field is the first, delimited\n\
182 by whitespace. When FILE1 or FILE2 (not both) is -, read standard input.\n\
184 -a FILENUM print unpairable lines coming from file FILENUM, where\n\
185 FILENUM is 1 or 2, corresponding to FILE1 or FILE2\n\
186 -e EMPTY replace missing input fields with EMPTY\n\
187 "), stdout);
188 fputs (_("\
189 -i, --ignore-case ignore differences in case when comparing fields\n\
190 -j FIELD equivalent to `-1 FIELD -2 FIELD'\n\
191 -o FORMAT obey FORMAT while constructing output line\n\
192 -t CHAR use CHAR as input and output field separator\n\
193 "), stdout);
194 fputs (_("\
195 -v FILENUM like -a FILENUM, but suppress joined output lines\n\
196 -1 FIELD join on this FIELD of file 1\n\
197 -2 FIELD join on this FIELD of file 2\n\
198 --check-order check that the input is correctly sorted, even\n\
199 if all input lines are pairable\n\
200 --nocheck-order do not check that the input is correctly sorted\n\
201 --header treat the first line in each file as field headers,\n\
202 print them without trying to pair them\n\
203 "), stdout);
204 fputs (HELP_OPTION_DESCRIPTION, stdout);
205 fputs (VERSION_OPTION_DESCRIPTION, stdout);
206 fputs (_("\
208 Unless -t CHAR is given, leading blanks separate fields and are ignored,\n\
209 else fields are separated by CHAR. Any FIELD is a field number counted\n\
210 from 1. FORMAT is one or more comma or blank separated specifications,\n\
211 each being `FILENUM.FIELD' or `0'. Default FORMAT outputs the join field,\n\
212 the remaining fields from FILE1, the remaining fields from FILE2, all\n\
213 separated by CHAR.\n\
215 Important: FILE1 and FILE2 must be sorted on the join fields.\n\
216 E.g., use ` sort -k 1b,1 ' if `join' has no options,\n\
217 or use ` join -t '' ' if `sort' has no options.\n\
218 Note, comparisons honor the rules specified by `LC_COLLATE'.\n\
219 If the input is not sorted and some lines cannot be joined, a\n\
220 warning message will be given.\n\
221 "), stdout);
222 emit_ancillary_info ();
224 exit (status);
227 /* Record a field in LINE, with location FIELD and size LEN. */
229 static void
230 extract_field (struct line *line, char *field, size_t len)
232 if (line->nfields >= line->nfields_allocated)
234 line->fields = X2NREALLOC (line->fields, &line->nfields_allocated);
236 line->fields[line->nfields].beg = field;
237 line->fields[line->nfields].len = len;
238 ++(line->nfields);
241 /* Fill in the `fields' structure in LINE. */
243 static void
244 xfields (struct line *line)
246 char *ptr = line->buf.buffer;
247 char const *lim = ptr + line->buf.length - 1;
249 if (ptr == lim)
250 return;
252 if (0 <= tab && tab != '\n')
254 char *sep;
255 for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1)
256 extract_field (line, ptr, sep - ptr);
258 else if (tab < 0)
260 /* Skip leading blanks before the first field. */
261 while (isblank (to_uchar (*ptr)))
262 if (++ptr == lim)
263 return;
267 char *sep;
268 for (sep = ptr + 1; sep != lim && ! isblank (to_uchar (*sep)); sep++)
269 continue;
270 extract_field (line, ptr, sep - ptr);
271 if (sep == lim)
272 return;
273 for (ptr = sep + 1; ptr != lim && isblank (to_uchar (*ptr)); ptr++)
274 continue;
276 while (ptr != lim);
279 extract_field (line, ptr, lim - ptr);
282 static void
283 freeline (struct line *line)
285 if (line == NULL)
286 return;
287 free (line->fields);
288 line->fields = NULL;
289 free (line->buf.buffer);
290 line->buf.buffer = NULL;
293 /* Return <0 if the join field in LINE1 compares less than the one in LINE2;
294 >0 if it compares greater; 0 if it compares equal.
295 Report an error and exit if the comparison fails.
296 Use join fields JF_1 and JF_2 respectively. */
298 static int
299 keycmp (struct line const *line1, struct line const *line2,
300 size_t jf_1, size_t jf_2)
302 /* Start of field to compare in each file. */
303 char *beg1;
304 char *beg2;
306 size_t len1;
307 size_t len2; /* Length of fields to compare. */
308 int diff;
310 if (jf_1 < line1->nfields)
312 beg1 = line1->fields[jf_1].beg;
313 len1 = line1->fields[jf_1].len;
315 else
317 beg1 = NULL;
318 len1 = 0;
321 if (jf_2 < line2->nfields)
323 beg2 = line2->fields[jf_2].beg;
324 len2 = line2->fields[jf_2].len;
326 else
328 beg2 = NULL;
329 len2 = 0;
332 if (len1 == 0)
333 return len2 == 0 ? 0 : -1;
334 if (len2 == 0)
335 return 1;
337 if (ignore_case)
339 /* FIXME: ignore_case does not work with NLS (in particular,
340 with multibyte chars). */
341 diff = memcasecmp (beg1, beg2, MIN (len1, len2));
343 else
345 if (hard_LC_COLLATE)
346 return xmemcoll (beg1, len1, beg2, len2);
347 diff = memcmp (beg1, beg2, MIN (len1, len2));
350 if (diff)
351 return diff;
352 return len1 < len2 ? -1 : len1 != len2;
355 /* Check that successive input lines PREV and CURRENT from input file
356 WHATFILE are presented in order, unless the user may be relying on
357 the GNU extension that input lines may be out of order if no input
358 lines are unpairable.
360 If the user specified --nocheck-order, the check is not made.
361 If the user specified --check-order, the problem is fatal.
362 Otherwise (the default), the message is simply a warning.
364 A message is printed at most once per input file. */
366 static void
367 check_order (const struct line *prev,
368 const struct line *current,
369 int whatfile)
371 if (check_input_order != CHECK_ORDER_DISABLED
372 && ((check_input_order == CHECK_ORDER_ENABLED) || seen_unpairable))
374 if (!issued_disorder_warning[whatfile-1])
376 size_t join_field = whatfile == 1 ? join_field_1 : join_field_2;
377 if (keycmp (prev, current, join_field, join_field) > 0)
379 error ((check_input_order == CHECK_ORDER_ENABLED
380 ? EXIT_FAILURE : 0),
381 0, _("file %d is not in sorted order"), whatfile);
383 /* If we get to here, the message was just a warning, but we
384 want only to issue it once. */
385 issued_disorder_warning[whatfile-1] = true;
391 static inline void
392 reset_line (struct line *line)
394 line->nfields = 0;
397 static struct line *
398 init_linep (struct line **linep)
400 struct line *line = xmalloc (sizeof *line);
401 memset (line, '\0', sizeof *line);
402 *linep = line;
403 return line;
406 /* Read a line from FP into LINE and split it into fields.
407 Return true if successful. */
409 static bool
410 get_line (FILE *fp, struct line **linep, int which)
412 struct line *line = *linep;
414 if (line == prevline[which - 1])
416 SWAPLINES (line, spareline[which - 1]);
417 *linep = line;
420 if (line)
421 reset_line (line);
422 else
423 line = init_linep (linep);
425 if (! readlinebuffer (&line->buf, fp))
427 if (ferror (fp))
428 error (EXIT_FAILURE, errno, _("read error"));
429 freeline (line);
430 return false;
433 xfields (line);
435 if (prevline[which - 1])
436 check_order (prevline[which - 1], line, which);
438 prevline[which - 1] = line;
439 return true;
442 static void
443 free_spareline (void)
445 size_t i;
447 for (i = 0; i < ARRAY_CARDINALITY (spareline); i++)
449 if (spareline[i])
451 freeline (spareline[i]);
452 free (spareline[i]);
457 static void
458 initseq (struct seq *seq)
460 seq->count = 0;
461 seq->alloc = 0;
462 seq->lines = NULL;
465 /* Read a line from FP and add it to SEQ. Return true if successful. */
467 static bool
468 getseq (FILE *fp, struct seq *seq, int whichfile)
470 if (seq->count == seq->alloc)
472 size_t i;
473 seq->lines = X2NREALLOC (seq->lines, &seq->alloc);
474 for (i = seq->count; i < seq->alloc; i++)
475 seq->lines[i] = NULL;
478 if (get_line (fp, &seq->lines[seq->count], whichfile))
480 ++seq->count;
481 return true;
483 return false;
486 /* Read a line from FP and add it to SEQ, as the first item if FIRST is
487 true, else as the next. */
488 static bool
489 advance_seq (FILE *fp, struct seq *seq, bool first, int whichfile)
491 if (first)
492 seq->count = 0;
494 return getseq (fp, seq, whichfile);
497 static void
498 delseq (struct seq *seq)
500 size_t i;
501 for (i = 0; i < seq->alloc; i++)
503 freeline (seq->lines[i]);
504 free (seq->lines[i]);
506 free (seq->lines);
510 /* Print field N of LINE if it exists and is nonempty, otherwise
511 `empty_filler' if it is nonempty. */
513 static void
514 prfield (size_t n, struct line const *line)
516 size_t len;
518 if (n < line->nfields)
520 len = line->fields[n].len;
521 if (len)
522 fwrite (line->fields[n].beg, 1, len, stdout);
523 else if (empty_filler)
524 fputs (empty_filler, stdout);
526 else if (empty_filler)
527 fputs (empty_filler, stdout);
530 /* Print the join of LINE1 and LINE2. */
532 static void
533 prjoin (struct line const *line1, struct line const *line2)
535 const struct outlist *outlist;
536 char output_separator = tab < 0 ? ' ' : tab;
538 outlist = outlist_head.next;
539 if (outlist)
541 const struct outlist *o;
543 o = outlist;
544 while (1)
546 size_t field;
547 struct line const *line;
549 if (o->file == 0)
551 if (line1 == &uni_blank)
553 line = line2;
554 field = join_field_2;
556 else
558 line = line1;
559 field = join_field_1;
562 else
564 line = (o->file == 1 ? line1 : line2);
565 field = o->field;
567 prfield (field, line);
568 o = o->next;
569 if (o == NULL)
570 break;
571 putchar (output_separator);
573 putchar ('\n');
575 else
577 size_t i;
579 if (line1 == &uni_blank)
581 struct line const *t;
582 t = line1;
583 line1 = line2;
584 line2 = t;
586 prfield (join_field_1, line1);
587 for (i = 0; i < join_field_1 && i < line1->nfields; ++i)
589 putchar (output_separator);
590 prfield (i, line1);
592 for (i = join_field_1 + 1; i < line1->nfields; ++i)
594 putchar (output_separator);
595 prfield (i, line1);
598 for (i = 0; i < join_field_2 && i < line2->nfields; ++i)
600 putchar (output_separator);
601 prfield (i, line2);
603 for (i = join_field_2 + 1; i < line2->nfields; ++i)
605 putchar (output_separator);
606 prfield (i, line2);
608 putchar ('\n');
612 /* Print the join of the files in FP1 and FP2. */
614 static void
615 join (FILE *fp1, FILE *fp2)
617 struct seq seq1, seq2;
618 int diff;
619 bool eof1, eof2;
621 fadvise (fp1, FADVISE_SEQUENTIAL);
622 fadvise (fp2, FADVISE_SEQUENTIAL);
624 /* Read the first line of each file. */
625 initseq (&seq1);
626 getseq (fp1, &seq1, 1);
627 initseq (&seq2);
628 getseq (fp2, &seq2, 2);
630 if (join_header_lines && seq1.count && seq2.count)
632 prjoin (seq1.lines[0], seq2.lines[0]);
633 prevline[0] = NULL;
634 prevline[1] = NULL;
635 advance_seq (fp1, &seq1, true, 1);
636 advance_seq (fp2, &seq2, true, 2);
639 while (seq1.count && seq2.count)
641 size_t i;
642 diff = keycmp (seq1.lines[0], seq2.lines[0],
643 join_field_1, join_field_2);
644 if (diff < 0)
646 if (print_unpairables_1)
647 prjoin (seq1.lines[0], &uni_blank);
648 advance_seq (fp1, &seq1, true, 1);
649 seen_unpairable = true;
650 continue;
652 if (diff > 0)
654 if (print_unpairables_2)
655 prjoin (&uni_blank, seq2.lines[0]);
656 advance_seq (fp2, &seq2, true, 2);
657 seen_unpairable = true;
658 continue;
661 /* Keep reading lines from file1 as long as they continue to
662 match the current line from file2. */
663 eof1 = false;
665 if (!advance_seq (fp1, &seq1, false, 1))
667 eof1 = true;
668 ++seq1.count;
669 break;
671 while (!keycmp (seq1.lines[seq1.count - 1], seq2.lines[0],
672 join_field_1, join_field_2));
674 /* Keep reading lines from file2 as long as they continue to
675 match the current line from file1. */
676 eof2 = false;
678 if (!advance_seq (fp2, &seq2, false, 2))
680 eof2 = true;
681 ++seq2.count;
682 break;
684 while (!keycmp (seq1.lines[0], seq2.lines[seq2.count - 1],
685 join_field_1, join_field_2));
687 if (print_pairables)
689 for (i = 0; i < seq1.count - 1; ++i)
691 size_t j;
692 for (j = 0; j < seq2.count - 1; ++j)
693 prjoin (seq1.lines[i], seq2.lines[j]);
697 if (!eof1)
699 SWAPLINES (seq1.lines[0], seq1.lines[seq1.count - 1]);
700 seq1.count = 1;
702 else
703 seq1.count = 0;
705 if (!eof2)
707 SWAPLINES (seq2.lines[0], seq2.lines[seq2.count - 1]);
708 seq2.count = 1;
710 else
711 seq2.count = 0;
714 /* If the user did not specify --check-order, then we read the
715 tail ends of both inputs to verify that they are in order. We
716 skip the rest of the tail once we have issued a warning for that
717 file, unless we actually need to print the unpairable lines. */
718 struct line *line = NULL;
719 bool checktail = false;
721 if (check_input_order != CHECK_ORDER_DISABLED
722 && !(issued_disorder_warning[0] && issued_disorder_warning[1]))
723 checktail = true;
725 if ((print_unpairables_1 || checktail) && seq1.count)
727 if (print_unpairables_1)
728 prjoin (seq1.lines[0], &uni_blank);
729 seen_unpairable = true;
730 while (get_line (fp1, &line, 1))
732 if (print_unpairables_1)
733 prjoin (line, &uni_blank);
734 if (issued_disorder_warning[0] && !print_unpairables_1)
735 break;
739 if ((print_unpairables_2 || checktail) && seq2.count)
741 if (print_unpairables_2)
742 prjoin (&uni_blank, seq2.lines[0]);
743 seen_unpairable = true;
744 while (get_line (fp2, &line, 2))
746 if (print_unpairables_2)
747 prjoin (&uni_blank, line);
748 if (issued_disorder_warning[1] && !print_unpairables_2)
749 break;
753 freeline (line);
754 free (line);
756 delseq (&seq1);
757 delseq (&seq2);
760 /* Add a field spec for field FIELD of file FILE to `outlist'. */
762 static void
763 add_field (int file, size_t field)
765 struct outlist *o;
767 assert (file == 0 || file == 1 || file == 2);
768 assert (file != 0 || field == 0);
770 o = xmalloc (sizeof *o);
771 o->file = file;
772 o->field = field;
773 o->next = NULL;
775 /* Add to the end of the list so the fields are in the right order. */
776 outlist_end->next = o;
777 outlist_end = o;
780 /* Convert a string of decimal digits, STR (the 1-based join field number),
781 to an integral value. Upon successful conversion, return one less
782 (the zero-based field number). Silently convert too-large values
783 to SIZE_MAX - 1. Otherwise, if a value cannot be converted, give a
784 diagnostic and exit. */
786 static size_t
787 string_to_join_field (char const *str)
789 size_t result;
790 unsigned long int val;
791 verify (SIZE_MAX <= ULONG_MAX);
793 strtol_error s_err = xstrtoul (str, NULL, 10, &val, "");
794 if (s_err == LONGINT_OVERFLOW || (s_err == LONGINT_OK && SIZE_MAX < val))
795 val = SIZE_MAX;
796 else if (s_err != LONGINT_OK || val == 0)
797 error (EXIT_FAILURE, 0, _("invalid field number: %s"), quote (str));
799 result = val - 1;
801 return result;
804 /* Convert a single field specifier string, S, to a *FILE_INDEX, *FIELD_INDEX
805 pair. In S, the field index string is 1-based; *FIELD_INDEX is zero-based.
806 If S is valid, return true. Otherwise, give a diagnostic and exit. */
808 static void
809 decode_field_spec (const char *s, int *file_index, size_t *field_index)
811 /* The first character must be 0, 1, or 2. */
812 switch (s[0])
814 case '0':
815 if (s[1])
817 /* `0' must be all alone -- no `.FIELD'. */
818 error (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s));
820 *file_index = 0;
821 *field_index = 0;
822 break;
824 case '1':
825 case '2':
826 if (s[1] != '.')
827 error (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s));
828 *file_index = s[0] - '0';
829 *field_index = string_to_join_field (s + 2);
830 break;
832 default:
833 error (EXIT_FAILURE, 0,
834 _("invalid file number in field spec: %s"), quote (s));
836 /* Tell gcc -W -Wall that we can't get beyond this point.
837 This avoids a warning (otherwise legit) that the caller's copies
838 of *file_index and *field_index might be used uninitialized. */
839 abort ();
841 break;
845 /* Add the comma or blank separated field spec(s) in STR to `outlist'. */
847 static void
848 add_field_list (char *str)
850 char *p = str;
854 int file_index;
855 size_t field_index;
856 char const *spec_item = p;
858 p = strpbrk (p, ", \t");
859 if (p)
860 *p++ = '\0';
861 decode_field_spec (spec_item, &file_index, &field_index);
862 add_field (file_index, field_index);
864 while (p);
867 /* Set the join field *VAR to VAL, but report an error if *VAR is set
868 more than once to incompatible values. */
870 static void
871 set_join_field (size_t *var, size_t val)
873 if (*var != SIZE_MAX && *var != val)
875 unsigned long int var1 = *var + 1;
876 unsigned long int val1 = val + 1;
877 error (EXIT_FAILURE, 0, _("incompatible join fields %lu, %lu"),
878 var1, val1);
880 *var = val;
883 /* Status of command-line arguments. */
885 enum operand_status
887 /* This argument must be an operand, i.e., one of the files to be
888 joined. */
889 MUST_BE_OPERAND,
891 /* This might be the argument of the preceding -j1 or -j2 option,
892 or it might be an operand. */
893 MIGHT_BE_J1_ARG,
894 MIGHT_BE_J2_ARG,
896 /* This might be the argument of the preceding -o option, or it might be
897 an operand. */
898 MIGHT_BE_O_ARG
901 /* Add NAME to the array of input file NAMES with operand statuses
902 OPERAND_STATUS; currently there are NFILES names in the list. */
904 static void
905 add_file_name (char *name, char *names[2],
906 int operand_status[2], int joption_count[2], int *nfiles,
907 int *prev_optc_status, int *optc_status)
909 int n = *nfiles;
911 if (n == 2)
913 bool op0 = (operand_status[0] == MUST_BE_OPERAND);
914 char *arg = names[op0];
915 switch (operand_status[op0])
917 case MUST_BE_OPERAND:
918 error (0, 0, _("extra operand %s"), quote (name));
919 usage (EXIT_FAILURE);
921 case MIGHT_BE_J1_ARG:
922 joption_count[0]--;
923 set_join_field (&join_field_1, string_to_join_field (arg));
924 break;
926 case MIGHT_BE_J2_ARG:
927 joption_count[1]--;
928 set_join_field (&join_field_2, string_to_join_field (arg));
929 break;
931 case MIGHT_BE_O_ARG:
932 add_field_list (arg);
933 break;
935 if (!op0)
937 operand_status[0] = operand_status[1];
938 names[0] = names[1];
940 n = 1;
943 operand_status[n] = *prev_optc_status;
944 names[n] = name;
945 *nfiles = n + 1;
946 if (*prev_optc_status == MIGHT_BE_O_ARG)
947 *optc_status = MIGHT_BE_O_ARG;
951 main (int argc, char **argv)
953 int optc_status;
954 int prev_optc_status = MUST_BE_OPERAND;
955 int operand_status[2];
956 int joption_count[2] = { 0, 0 };
957 char *names[2];
958 FILE *fp1, *fp2;
959 int optc;
960 int nfiles = 0;
961 int i;
963 initialize_main (&argc, &argv);
964 set_program_name (argv[0]);
965 setlocale (LC_ALL, "");
966 bindtextdomain (PACKAGE, LOCALEDIR);
967 textdomain (PACKAGE);
968 hard_LC_COLLATE = hard_locale (LC_COLLATE);
970 atexit (close_stdout);
971 atexit (free_spareline);
973 print_pairables = true;
974 seen_unpairable = false;
975 issued_disorder_warning[0] = issued_disorder_warning[1] = false;
976 check_input_order = CHECK_ORDER_DEFAULT;
978 while ((optc = getopt_long (argc, argv, "-a:e:i1:2:j:o:t:v:",
979 longopts, NULL))
980 != -1)
982 optc_status = MUST_BE_OPERAND;
984 switch (optc)
986 case 'v':
987 print_pairables = false;
988 /* Fall through. */
990 case 'a':
992 unsigned long int val;
993 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
994 || (val != 1 && val != 2))
995 error (EXIT_FAILURE, 0,
996 _("invalid field number: %s"), quote (optarg));
997 if (val == 1)
998 print_unpairables_1 = true;
999 else
1000 print_unpairables_2 = true;
1002 break;
1004 case 'e':
1005 if (empty_filler && ! STREQ (empty_filler, optarg))
1006 error (EXIT_FAILURE, 0,
1007 _("conflicting empty-field replacement strings"));
1008 empty_filler = optarg;
1009 break;
1011 case 'i':
1012 ignore_case = true;
1013 break;
1015 case '1':
1016 set_join_field (&join_field_1, string_to_join_field (optarg));
1017 break;
1019 case '2':
1020 set_join_field (&join_field_2, string_to_join_field (optarg));
1021 break;
1023 case 'j':
1024 if ((optarg[0] == '1' || optarg[0] == '2') && !optarg[1]
1025 && optarg == argv[optind - 1] + 2)
1027 /* The argument was either "-j1" or "-j2". */
1028 bool is_j2 = (optarg[0] == '2');
1029 joption_count[is_j2]++;
1030 optc_status = MIGHT_BE_J1_ARG + is_j2;
1032 else
1034 set_join_field (&join_field_1, string_to_join_field (optarg));
1035 set_join_field (&join_field_2, join_field_1);
1037 break;
1039 case 'o':
1040 add_field_list (optarg);
1041 optc_status = MIGHT_BE_O_ARG;
1042 break;
1044 case 't':
1046 unsigned char newtab = optarg[0];
1047 if (! newtab)
1048 newtab = '\n'; /* '' => process the whole line. */
1049 else if (optarg[1])
1051 if (STREQ (optarg, "\\0"))
1052 newtab = '\0';
1053 else
1054 error (EXIT_FAILURE, 0, _("multi-character tab %s"),
1055 quote (optarg));
1057 if (0 <= tab && tab != newtab)
1058 error (EXIT_FAILURE, 0, _("incompatible tabs"));
1059 tab = newtab;
1061 break;
1063 case NOCHECK_ORDER_OPTION:
1064 check_input_order = CHECK_ORDER_DISABLED;
1065 break;
1067 case CHECK_ORDER_OPTION:
1068 check_input_order = CHECK_ORDER_ENABLED;
1069 break;
1071 case 1: /* Non-option argument. */
1072 add_file_name (optarg, names, operand_status, joption_count,
1073 &nfiles, &prev_optc_status, &optc_status);
1074 break;
1076 case HEADER_LINE_OPTION:
1077 join_header_lines = true;
1078 break;
1080 case_GETOPT_HELP_CHAR;
1082 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1084 default:
1085 usage (EXIT_FAILURE);
1088 prev_optc_status = optc_status;
1091 /* Process any operands after "--". */
1092 prev_optc_status = MUST_BE_OPERAND;
1093 while (optind < argc)
1094 add_file_name (argv[optind++], names, operand_status, joption_count,
1095 &nfiles, &prev_optc_status, &optc_status);
1097 if (nfiles != 2)
1099 if (nfiles == 0)
1100 error (0, 0, _("missing operand"));
1101 else
1102 error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
1103 usage (EXIT_FAILURE);
1106 /* If "-j1" was specified and it turns out not to have had an argument,
1107 treat it as "-j 1". Likewise for -j2. */
1108 for (i = 0; i < 2; i++)
1109 if (joption_count[i] != 0)
1111 set_join_field (&join_field_1, i);
1112 set_join_field (&join_field_2, i);
1115 if (join_field_1 == SIZE_MAX)
1116 join_field_1 = 0;
1117 if (join_field_2 == SIZE_MAX)
1118 join_field_2 = 0;
1120 fp1 = STREQ (names[0], "-") ? stdin : fopen (names[0], "r");
1121 if (!fp1)
1122 error (EXIT_FAILURE, errno, "%s", names[0]);
1123 fp2 = STREQ (names[1], "-") ? stdin : fopen (names[1], "r");
1124 if (!fp2)
1125 error (EXIT_FAILURE, errno, "%s", names[1]);
1126 if (fp1 == fp2)
1127 error (EXIT_FAILURE, errno, _("both files cannot be standard input"));
1128 join (fp1, fp2);
1130 if (fclose (fp1) != 0)
1131 error (EXIT_FAILURE, errno, "%s", names[0]);
1132 if (fclose (fp2) != 0)
1133 error (EXIT_FAILURE, errno, "%s", names[1]);
1135 if (issued_disorder_warning[0] || issued_disorder_warning[1])
1136 exit (EXIT_FAILURE);
1137 else
1138 exit (EXIT_SUCCESS);