dd: add a flag to discard cached data
[coreutils/ericb.git] / src / join.c
blob941185c8a132becfbb9980b7329299c57ba2e8c1
1 /* join - join lines of two files on a common field
2 Copyright (C) 1991, 1995-2006, 2008-2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
17 Written by Mike Haertel, mike@gnu.ai.mit.edu. */
19 #include <config.h>
21 #include <assert.h>
22 #include <sys/types.h>
23 #include <getopt.h>
25 #include "system.h"
26 #include "error.h"
27 #include "fadvise.h"
28 #include "hard-locale.h"
29 #include "linebuffer.h"
30 #include "memcasecmp.h"
31 #include "quote.h"
32 #include "stdio--.h"
33 #include "xmemcoll.h"
34 #include "xstrtol.h"
35 #include "argmatch.h"
37 /* The official name of this program (e.g., no `g' prefix). */
38 #define PROGRAM_NAME "join"
40 #define AUTHORS proper_name ("Mike Haertel")
42 #define join system_join
44 #define SWAPLINES(a, b) do { \
45 struct line *tmp = a; \
46 a = b; \
47 b = tmp; \
48 } while (0);
50 /* An element of the list identifying which fields to print for each
51 output line. */
52 struct outlist
54 /* File number: 0, 1, or 2. 0 means use the join field.
55 1 means use the first file argument, 2 the second. */
56 int file;
58 /* Field index (zero-based), specified only when FILE is 1 or 2. */
59 size_t field;
61 struct outlist *next;
64 /* A field of a line. */
65 struct field
67 char *beg; /* First character in field. */
68 size_t len; /* The length of the field. */
71 /* A line read from an input file. */
72 struct line
74 struct linebuffer buf; /* The line itself. */
75 size_t nfields; /* Number of elements in `fields'. */
76 size_t nfields_allocated; /* Number of elements allocated for `fields'. */
77 struct field *fields;
80 /* One or more consecutive lines read from a file that all have the
81 same join field value. */
82 struct seq
84 size_t count; /* Elements used in `lines'. */
85 size_t alloc; /* Elements allocated in `lines'. */
86 struct line **lines;
89 /* The previous line read from each file. */
90 static struct line *prevline[2] = {NULL, NULL};
92 /* This provides an extra line buffer for each file. We need these if we
93 try to read two consecutive lines into the same buffer, since we don't
94 want to overwrite the previous buffer before we check order. */
95 static struct line *spareline[2] = {NULL, NULL};
97 /* True if the LC_COLLATE locale is hard. */
98 static bool hard_LC_COLLATE;
100 /* If nonzero, print unpairable lines in file 1 or 2. */
101 static bool print_unpairables_1, print_unpairables_2;
103 /* If nonzero, print pairable lines. */
104 static bool print_pairables;
106 /* If nonzero, we have seen at least one unpairable line. */
107 static bool seen_unpairable;
109 /* If nonzero, we have warned about disorder in that file. */
110 static bool issued_disorder_warning[2];
112 /* Empty output field filler. */
113 static char const *empty_filler;
115 /* Whether to ensure the same number of fields are output from each line. */
116 static bool autoformat;
117 /* The number of fields to output for each line.
118 Only significant when autoformat is true. */
119 static size_t autocount_1;
120 static size_t autocount_2;
122 /* Field to join on; SIZE_MAX means they haven't been determined yet. */
123 static size_t join_field_1 = SIZE_MAX;
124 static size_t join_field_2 = SIZE_MAX;
126 /* List of fields to print. */
127 static struct outlist outlist_head;
129 /* Last element in `outlist', where a new element can be added. */
130 static struct outlist *outlist_end = &outlist_head;
132 /* Tab character separating fields. If negative, fields are separated
133 by any nonempty string of blanks, otherwise by exactly one
134 tab character whose value (when cast to unsigned char) equals TAB. */
135 static int tab = -1;
137 /* If nonzero, check that the input is correctly ordered. */
138 static enum
140 CHECK_ORDER_DEFAULT,
141 CHECK_ORDER_ENABLED,
142 CHECK_ORDER_DISABLED
143 } check_input_order;
145 enum
147 CHECK_ORDER_OPTION = CHAR_MAX + 1,
148 NOCHECK_ORDER_OPTION,
149 HEADER_LINE_OPTION
153 static struct option const longopts[] =
155 {"ignore-case", no_argument, NULL, 'i'},
156 {"check-order", no_argument, NULL, CHECK_ORDER_OPTION},
157 {"nocheck-order", no_argument, NULL, NOCHECK_ORDER_OPTION},
158 {"header", no_argument, NULL, HEADER_LINE_OPTION},
159 {GETOPT_HELP_OPTION_DECL},
160 {GETOPT_VERSION_OPTION_DECL},
161 {NULL, 0, NULL, 0}
164 /* Used to print non-joining lines */
165 static struct line uni_blank;
167 /* If nonzero, ignore case when comparing join fields. */
168 static bool ignore_case;
170 /* If nonzero, treat the first line of each file as column headers -
171 join them without checking for ordering */
172 static bool join_header_lines;
174 void
175 usage (int status)
177 if (status != EXIT_SUCCESS)
178 fprintf (stderr, _("Try `%s --help' for more information.\n"),
179 program_name);
180 else
182 printf (_("\
183 Usage: %s [OPTION]... FILE1 FILE2\n\
185 program_name);
186 fputs (_("\
187 For each pair of input lines with identical join fields, write a line to\n\
188 standard output. The default join field is the first, delimited\n\
189 by whitespace. When FILE1 or FILE2 (not both) is -, read standard input.\n\
191 -a FILENUM print unpairable lines coming from file FILENUM, where\n\
192 FILENUM is 1 or 2, corresponding to FILE1 or FILE2\n\
193 -e EMPTY replace missing input fields with EMPTY\n\
194 "), stdout);
195 fputs (_("\
196 -i, --ignore-case ignore differences in case when comparing fields\n\
197 -j FIELD equivalent to `-1 FIELD -2 FIELD'\n\
198 -o FORMAT obey FORMAT while constructing output line\n\
199 -t CHAR use CHAR as input and output field separator\n\
200 "), stdout);
201 fputs (_("\
202 -v FILENUM like -a FILENUM, but suppress joined output lines\n\
203 -1 FIELD join on this FIELD of file 1\n\
204 -2 FIELD join on this FIELD of file 2\n\
205 --check-order check that the input is correctly sorted, even\n\
206 if all input lines are pairable\n\
207 --nocheck-order do not check that the input is correctly sorted\n\
208 --header treat the first line in each file as field headers,\n\
209 print them without trying to pair them\n\
210 "), stdout);
211 fputs (HELP_OPTION_DESCRIPTION, stdout);
212 fputs (VERSION_OPTION_DESCRIPTION, stdout);
213 fputs (_("\
215 Unless -t CHAR is given, leading blanks separate fields and are ignored,\n\
216 else fields are separated by CHAR. Any FIELD is a field number counted\n\
217 from 1. FORMAT is one or more comma or blank separated specifications,\n\
218 each being `FILENUM.FIELD' or `0'. Default FORMAT outputs the join field,\n\
219 the remaining fields from FILE1, the remaining fields from FILE2, all\n\
220 separated by CHAR. If FORMAT is the keyword 'auto', then the first\n\
221 line of each file determines the number of fields output for each line.\n\
223 Important: FILE1 and FILE2 must be sorted on the join fields.\n\
224 E.g., use ` sort -k 1b,1 ' if `join' has no options,\n\
225 or use ` join -t '' ' if `sort' has no options.\n\
226 Note, comparisons honor the rules specified by `LC_COLLATE'.\n\
227 If the input is not sorted and some lines cannot be joined, a\n\
228 warning message will be given.\n\
229 "), stdout);
230 emit_ancillary_info ();
232 exit (status);
235 /* Record a field in LINE, with location FIELD and size LEN. */
237 static void
238 extract_field (struct line *line, char *field, size_t len)
240 if (line->nfields >= line->nfields_allocated)
242 line->fields = X2NREALLOC (line->fields, &line->nfields_allocated);
244 line->fields[line->nfields].beg = field;
245 line->fields[line->nfields].len = len;
246 ++(line->nfields);
249 /* Fill in the `fields' structure in LINE. */
251 static void
252 xfields (struct line *line)
254 char *ptr = line->buf.buffer;
255 char const *lim = ptr + line->buf.length - 1;
257 if (ptr == lim)
258 return;
260 if (0 <= tab && tab != '\n')
262 char *sep;
263 for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1)
264 extract_field (line, ptr, sep - ptr);
266 else if (tab < 0)
268 /* Skip leading blanks before the first field. */
269 while (isblank (to_uchar (*ptr)))
270 if (++ptr == lim)
271 return;
275 char *sep;
276 for (sep = ptr + 1; sep != lim && ! isblank (to_uchar (*sep)); sep++)
277 continue;
278 extract_field (line, ptr, sep - ptr);
279 if (sep == lim)
280 return;
281 for (ptr = sep + 1; ptr != lim && isblank (to_uchar (*ptr)); ptr++)
282 continue;
284 while (ptr != lim);
287 extract_field (line, ptr, lim - ptr);
290 static void
291 freeline (struct line *line)
293 if (line == NULL)
294 return;
295 free (line->fields);
296 line->fields = NULL;
297 free (line->buf.buffer);
298 line->buf.buffer = NULL;
301 /* Return <0 if the join field in LINE1 compares less than the one in LINE2;
302 >0 if it compares greater; 0 if it compares equal.
303 Report an error and exit if the comparison fails.
304 Use join fields JF_1 and JF_2 respectively. */
306 static int
307 keycmp (struct line const *line1, struct line const *line2,
308 size_t jf_1, size_t jf_2)
310 /* Start of field to compare in each file. */
311 char *beg1;
312 char *beg2;
314 size_t len1;
315 size_t len2; /* Length of fields to compare. */
316 int diff;
318 if (jf_1 < line1->nfields)
320 beg1 = line1->fields[jf_1].beg;
321 len1 = line1->fields[jf_1].len;
323 else
325 beg1 = NULL;
326 len1 = 0;
329 if (jf_2 < line2->nfields)
331 beg2 = line2->fields[jf_2].beg;
332 len2 = line2->fields[jf_2].len;
334 else
336 beg2 = NULL;
337 len2 = 0;
340 if (len1 == 0)
341 return len2 == 0 ? 0 : -1;
342 if (len2 == 0)
343 return 1;
345 if (ignore_case)
347 /* FIXME: ignore_case does not work with NLS (in particular,
348 with multibyte chars). */
349 diff = memcasecmp (beg1, beg2, MIN (len1, len2));
351 else
353 if (hard_LC_COLLATE)
354 return xmemcoll (beg1, len1, beg2, len2);
355 diff = memcmp (beg1, beg2, MIN (len1, len2));
358 if (diff)
359 return diff;
360 return len1 < len2 ? -1 : len1 != len2;
363 /* Check that successive input lines PREV and CURRENT from input file
364 WHATFILE are presented in order, unless the user may be relying on
365 the GNU extension that input lines may be out of order if no input
366 lines are unpairable.
368 If the user specified --nocheck-order, the check is not made.
369 If the user specified --check-order, the problem is fatal.
370 Otherwise (the default), the message is simply a warning.
372 A message is printed at most once per input file. */
374 static void
375 check_order (const struct line *prev,
376 const struct line *current,
377 int whatfile)
379 if (check_input_order != CHECK_ORDER_DISABLED
380 && ((check_input_order == CHECK_ORDER_ENABLED) || seen_unpairable))
382 if (!issued_disorder_warning[whatfile-1])
384 size_t join_field = whatfile == 1 ? join_field_1 : join_field_2;
385 if (keycmp (prev, current, join_field, join_field) > 0)
387 error ((check_input_order == CHECK_ORDER_ENABLED
388 ? EXIT_FAILURE : 0),
389 0, _("file %d is not in sorted order"), whatfile);
391 /* If we get to here, the message was just a warning, but we
392 want only to issue it once. */
393 issued_disorder_warning[whatfile-1] = true;
399 static inline void
400 reset_line (struct line *line)
402 line->nfields = 0;
405 static struct line *
406 init_linep (struct line **linep)
408 struct line *line = xmalloc (sizeof *line);
409 memset (line, '\0', sizeof *line);
410 *linep = line;
411 return line;
414 /* Read a line from FP into LINE and split it into fields.
415 Return true if successful. */
417 static bool
418 get_line (FILE *fp, struct line **linep, int which)
420 struct line *line = *linep;
422 if (line == prevline[which - 1])
424 SWAPLINES (line, spareline[which - 1]);
425 *linep = line;
428 if (line)
429 reset_line (line);
430 else
431 line = init_linep (linep);
433 if (! readlinebuffer (&line->buf, fp))
435 if (ferror (fp))
436 error (EXIT_FAILURE, errno, _("read error"));
437 freeline (line);
438 return false;
441 xfields (line);
443 if (prevline[which - 1])
444 check_order (prevline[which - 1], line, which);
446 prevline[which - 1] = line;
447 return true;
450 static void
451 free_spareline (void)
453 size_t i;
455 for (i = 0; i < ARRAY_CARDINALITY (spareline); i++)
457 if (spareline[i])
459 freeline (spareline[i]);
460 free (spareline[i]);
465 static void
466 initseq (struct seq *seq)
468 seq->count = 0;
469 seq->alloc = 0;
470 seq->lines = NULL;
473 /* Read a line from FP and add it to SEQ. Return true if successful. */
475 static bool
476 getseq (FILE *fp, struct seq *seq, int whichfile)
478 if (seq->count == seq->alloc)
480 size_t i;
481 seq->lines = X2NREALLOC (seq->lines, &seq->alloc);
482 for (i = seq->count; i < seq->alloc; i++)
483 seq->lines[i] = NULL;
486 if (get_line (fp, &seq->lines[seq->count], whichfile))
488 ++seq->count;
489 return true;
491 return false;
494 /* Read a line from FP and add it to SEQ, as the first item if FIRST is
495 true, else as the next. */
496 static bool
497 advance_seq (FILE *fp, struct seq *seq, bool first, int whichfile)
499 if (first)
500 seq->count = 0;
502 return getseq (fp, seq, whichfile);
505 static void
506 delseq (struct seq *seq)
508 size_t i;
509 for (i = 0; i < seq->alloc; i++)
511 freeline (seq->lines[i]);
512 free (seq->lines[i]);
514 free (seq->lines);
518 /* Print field N of LINE if it exists and is nonempty, otherwise
519 `empty_filler' if it is nonempty. */
521 static void
522 prfield (size_t n, struct line const *line)
524 size_t len;
526 if (n < line->nfields)
528 len = line->fields[n].len;
529 if (len)
530 fwrite (line->fields[n].beg, 1, len, stdout);
531 else if (empty_filler)
532 fputs (empty_filler, stdout);
534 else if (empty_filler)
535 fputs (empty_filler, stdout);
538 /* Output all the fields in line, other than the join field. */
540 static void
541 prfields (struct line const *line, size_t join_field, size_t autocount)
543 size_t i;
544 size_t nfields = autoformat ? autocount : line->nfields;
545 char output_separator = tab < 0 ? ' ' : tab;
547 for (i = 0; i < join_field && i < nfields; ++i)
549 putchar (output_separator);
550 prfield (i, line);
552 for (i = join_field + 1; i < nfields; ++i)
554 putchar (output_separator);
555 prfield (i, line);
559 /* Print the join of LINE1 and LINE2. */
561 static void
562 prjoin (struct line const *line1, struct line const *line2)
564 const struct outlist *outlist;
565 char output_separator = tab < 0 ? ' ' : tab;
566 size_t field;
567 struct line const *line;
569 outlist = outlist_head.next;
570 if (outlist)
572 const struct outlist *o;
574 o = outlist;
575 while (1)
577 if (o->file == 0)
579 if (line1 == &uni_blank)
581 line = line2;
582 field = join_field_2;
584 else
586 line = line1;
587 field = join_field_1;
590 else
592 line = (o->file == 1 ? line1 : line2);
593 field = o->field;
595 prfield (field, line);
596 o = o->next;
597 if (o == NULL)
598 break;
599 putchar (output_separator);
601 putchar ('\n');
603 else
605 if (line1 == &uni_blank)
607 line = line2;
608 field = join_field_2;
610 else
612 line = line1;
613 field = join_field_1;
616 /* Output the join field. */
617 prfield (field, line);
619 /* Output other fields. */
620 prfields (line1, join_field_1, autocount_1);
621 prfields (line2, join_field_2, autocount_2);
623 putchar ('\n');
627 /* Print the join of the files in FP1 and FP2. */
629 static void
630 join (FILE *fp1, FILE *fp2)
632 struct seq seq1, seq2;
633 int diff;
634 bool eof1, eof2;
636 fadvise (fp1, FADVISE_SEQUENTIAL);
637 fadvise (fp2, FADVISE_SEQUENTIAL);
639 /* Read the first line of each file. */
640 initseq (&seq1);
641 getseq (fp1, &seq1, 1);
642 initseq (&seq2);
643 getseq (fp2, &seq2, 2);
645 if (autoformat)
647 autocount_1 = seq1.count ? seq1.lines[0]->nfields : 0;
648 autocount_2 = seq2.count ? seq2.lines[0]->nfields : 0;
651 if (join_header_lines && (seq1.count || seq2.count))
653 struct line const *hline1 = seq1.count ? seq1.lines[0] : &uni_blank;
654 struct line const *hline2 = seq2.count ? seq2.lines[0] : &uni_blank;
655 prjoin (hline1, hline2);
656 prevline[0] = NULL;
657 prevline[1] = NULL;
658 if (seq1.count)
659 advance_seq (fp1, &seq1, true, 1);
660 if (seq2.count)
661 advance_seq (fp2, &seq2, true, 2);
664 while (seq1.count && seq2.count)
666 size_t i;
667 diff = keycmp (seq1.lines[0], seq2.lines[0],
668 join_field_1, join_field_2);
669 if (diff < 0)
671 if (print_unpairables_1)
672 prjoin (seq1.lines[0], &uni_blank);
673 advance_seq (fp1, &seq1, true, 1);
674 seen_unpairable = true;
675 continue;
677 if (diff > 0)
679 if (print_unpairables_2)
680 prjoin (&uni_blank, seq2.lines[0]);
681 advance_seq (fp2, &seq2, true, 2);
682 seen_unpairable = true;
683 continue;
686 /* Keep reading lines from file1 as long as they continue to
687 match the current line from file2. */
688 eof1 = false;
690 if (!advance_seq (fp1, &seq1, false, 1))
692 eof1 = true;
693 ++seq1.count;
694 break;
696 while (!keycmp (seq1.lines[seq1.count - 1], seq2.lines[0],
697 join_field_1, join_field_2));
699 /* Keep reading lines from file2 as long as they continue to
700 match the current line from file1. */
701 eof2 = false;
703 if (!advance_seq (fp2, &seq2, false, 2))
705 eof2 = true;
706 ++seq2.count;
707 break;
709 while (!keycmp (seq1.lines[0], seq2.lines[seq2.count - 1],
710 join_field_1, join_field_2));
712 if (print_pairables)
714 for (i = 0; i < seq1.count - 1; ++i)
716 size_t j;
717 for (j = 0; j < seq2.count - 1; ++j)
718 prjoin (seq1.lines[i], seq2.lines[j]);
722 if (!eof1)
724 SWAPLINES (seq1.lines[0], seq1.lines[seq1.count - 1]);
725 seq1.count = 1;
727 else
728 seq1.count = 0;
730 if (!eof2)
732 SWAPLINES (seq2.lines[0], seq2.lines[seq2.count - 1]);
733 seq2.count = 1;
735 else
736 seq2.count = 0;
739 /* If the user did not specify --nocheck-order, then we read the
740 tail ends of both inputs to verify that they are in order. We
741 skip the rest of the tail once we have issued a warning for that
742 file, unless we actually need to print the unpairable lines. */
743 struct line *line = NULL;
744 bool checktail = false;
746 if (check_input_order != CHECK_ORDER_DISABLED
747 && !(issued_disorder_warning[0] && issued_disorder_warning[1]))
748 checktail = true;
750 if ((print_unpairables_1 || checktail) && seq1.count)
752 if (print_unpairables_1)
753 prjoin (seq1.lines[0], &uni_blank);
754 if (seq2.count)
755 seen_unpairable = true;
756 while (get_line (fp1, &line, 1))
758 if (print_unpairables_1)
759 prjoin (line, &uni_blank);
760 if (issued_disorder_warning[0] && !print_unpairables_1)
761 break;
765 if ((print_unpairables_2 || checktail) && seq2.count)
767 if (print_unpairables_2)
768 prjoin (&uni_blank, seq2.lines[0]);
769 if (seq1.count)
770 seen_unpairable = true;
771 while (get_line (fp2, &line, 2))
773 if (print_unpairables_2)
774 prjoin (&uni_blank, line);
775 if (issued_disorder_warning[1] && !print_unpairables_2)
776 break;
780 freeline (line);
781 free (line);
783 delseq (&seq1);
784 delseq (&seq2);
787 /* Add a field spec for field FIELD of file FILE to `outlist'. */
789 static void
790 add_field (int file, size_t field)
792 struct outlist *o;
794 assert (file == 0 || file == 1 || file == 2);
795 assert (file != 0 || field == 0);
797 o = xmalloc (sizeof *o);
798 o->file = file;
799 o->field = field;
800 o->next = NULL;
802 /* Add to the end of the list so the fields are in the right order. */
803 outlist_end->next = o;
804 outlist_end = o;
807 /* Convert a string of decimal digits, STR (the 1-based join field number),
808 to an integral value. Upon successful conversion, return one less
809 (the zero-based field number). Silently convert too-large values
810 to SIZE_MAX - 1. Otherwise, if a value cannot be converted, give a
811 diagnostic and exit. */
813 static size_t
814 string_to_join_field (char const *str)
816 size_t result;
817 unsigned long int val;
818 verify (SIZE_MAX <= ULONG_MAX);
820 strtol_error s_err = xstrtoul (str, NULL, 10, &val, "");
821 if (s_err == LONGINT_OVERFLOW || (s_err == LONGINT_OK && SIZE_MAX < val))
822 val = SIZE_MAX;
823 else if (s_err != LONGINT_OK || val == 0)
824 error (EXIT_FAILURE, 0, _("invalid field number: %s"), quote (str));
826 result = val - 1;
828 return result;
831 /* Convert a single field specifier string, S, to a *FILE_INDEX, *FIELD_INDEX
832 pair. In S, the field index string is 1-based; *FIELD_INDEX is zero-based.
833 If S is valid, return true. Otherwise, give a diagnostic and exit. */
835 static void
836 decode_field_spec (const char *s, int *file_index, size_t *field_index)
838 /* The first character must be 0, 1, or 2. */
839 switch (s[0])
841 case '0':
842 if (s[1])
844 /* `0' must be all alone -- no `.FIELD'. */
845 error (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s));
847 *file_index = 0;
848 *field_index = 0;
849 break;
851 case '1':
852 case '2':
853 if (s[1] != '.')
854 error (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s));
855 *file_index = s[0] - '0';
856 *field_index = string_to_join_field (s + 2);
857 break;
859 default:
860 error (EXIT_FAILURE, 0,
861 _("invalid file number in field spec: %s"), quote (s));
863 /* Tell gcc -W -Wall that we can't get beyond this point.
864 This avoids a warning (otherwise legit) that the caller's copies
865 of *file_index and *field_index might be used uninitialized. */
866 abort ();
868 break;
872 /* Add the comma or blank separated field spec(s) in STR to `outlist'. */
874 static void
875 add_field_list (char *str)
877 char *p = str;
881 int file_index;
882 size_t field_index;
883 char const *spec_item = p;
885 p = strpbrk (p, ", \t");
886 if (p)
887 *p++ = '\0';
888 decode_field_spec (spec_item, &file_index, &field_index);
889 add_field (file_index, field_index);
891 while (p);
894 /* Set the join field *VAR to VAL, but report an error if *VAR is set
895 more than once to incompatible values. */
897 static void
898 set_join_field (size_t *var, size_t val)
900 if (*var != SIZE_MAX && *var != val)
902 unsigned long int var1 = *var + 1;
903 unsigned long int val1 = val + 1;
904 error (EXIT_FAILURE, 0, _("incompatible join fields %lu, %lu"),
905 var1, val1);
907 *var = val;
910 /* Status of command-line arguments. */
912 enum operand_status
914 /* This argument must be an operand, i.e., one of the files to be
915 joined. */
916 MUST_BE_OPERAND,
918 /* This might be the argument of the preceding -j1 or -j2 option,
919 or it might be an operand. */
920 MIGHT_BE_J1_ARG,
921 MIGHT_BE_J2_ARG,
923 /* This might be the argument of the preceding -o option, or it might be
924 an operand. */
925 MIGHT_BE_O_ARG
928 /* Add NAME to the array of input file NAMES with operand statuses
929 OPERAND_STATUS; currently there are NFILES names in the list. */
931 static void
932 add_file_name (char *name, char *names[2],
933 int operand_status[2], int joption_count[2], int *nfiles,
934 int *prev_optc_status, int *optc_status)
936 int n = *nfiles;
938 if (n == 2)
940 bool op0 = (operand_status[0] == MUST_BE_OPERAND);
941 char *arg = names[op0];
942 switch (operand_status[op0])
944 case MUST_BE_OPERAND:
945 error (0, 0, _("extra operand %s"), quote (name));
946 usage (EXIT_FAILURE);
948 case MIGHT_BE_J1_ARG:
949 joption_count[0]--;
950 set_join_field (&join_field_1, string_to_join_field (arg));
951 break;
953 case MIGHT_BE_J2_ARG:
954 joption_count[1]--;
955 set_join_field (&join_field_2, string_to_join_field (arg));
956 break;
958 case MIGHT_BE_O_ARG:
959 add_field_list (arg);
960 break;
962 if (!op0)
964 operand_status[0] = operand_status[1];
965 names[0] = names[1];
967 n = 1;
970 operand_status[n] = *prev_optc_status;
971 names[n] = name;
972 *nfiles = n + 1;
973 if (*prev_optc_status == MIGHT_BE_O_ARG)
974 *optc_status = MIGHT_BE_O_ARG;
978 main (int argc, char **argv)
980 int optc_status;
981 int prev_optc_status = MUST_BE_OPERAND;
982 int operand_status[2];
983 int joption_count[2] = { 0, 0 };
984 char *names[2];
985 FILE *fp1, *fp2;
986 int optc;
987 int nfiles = 0;
988 int i;
990 initialize_main (&argc, &argv);
991 set_program_name (argv[0]);
992 setlocale (LC_ALL, "");
993 bindtextdomain (PACKAGE, LOCALEDIR);
994 textdomain (PACKAGE);
995 hard_LC_COLLATE = hard_locale (LC_COLLATE);
997 atexit (close_stdout);
998 atexit (free_spareline);
1000 print_pairables = true;
1001 seen_unpairable = false;
1002 issued_disorder_warning[0] = issued_disorder_warning[1] = false;
1003 check_input_order = CHECK_ORDER_DEFAULT;
1005 while ((optc = getopt_long (argc, argv, "-a:e:i1:2:j:o:t:v:",
1006 longopts, NULL))
1007 != -1)
1009 optc_status = MUST_BE_OPERAND;
1011 switch (optc)
1013 case 'v':
1014 print_pairables = false;
1015 /* Fall through. */
1017 case 'a':
1019 unsigned long int val;
1020 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
1021 || (val != 1 && val != 2))
1022 error (EXIT_FAILURE, 0,
1023 _("invalid field number: %s"), quote (optarg));
1024 if (val == 1)
1025 print_unpairables_1 = true;
1026 else
1027 print_unpairables_2 = true;
1029 break;
1031 case 'e':
1032 if (empty_filler && ! STREQ (empty_filler, optarg))
1033 error (EXIT_FAILURE, 0,
1034 _("conflicting empty-field replacement strings"));
1035 empty_filler = optarg;
1036 break;
1038 case 'i':
1039 ignore_case = true;
1040 break;
1042 case '1':
1043 set_join_field (&join_field_1, string_to_join_field (optarg));
1044 break;
1046 case '2':
1047 set_join_field (&join_field_2, string_to_join_field (optarg));
1048 break;
1050 case 'j':
1051 if ((optarg[0] == '1' || optarg[0] == '2') && !optarg[1]
1052 && optarg == argv[optind - 1] + 2)
1054 /* The argument was either "-j1" or "-j2". */
1055 bool is_j2 = (optarg[0] == '2');
1056 joption_count[is_j2]++;
1057 optc_status = MIGHT_BE_J1_ARG + is_j2;
1059 else
1061 set_join_field (&join_field_1, string_to_join_field (optarg));
1062 set_join_field (&join_field_2, join_field_1);
1064 break;
1066 case 'o':
1067 if (STREQ (optarg, "auto"))
1068 autoformat = true;
1069 else
1071 add_field_list (optarg);
1072 optc_status = MIGHT_BE_O_ARG;
1074 break;
1076 case 't':
1078 unsigned char newtab = optarg[0];
1079 if (! newtab)
1080 newtab = '\n'; /* '' => process the whole line. */
1081 else if (optarg[1])
1083 if (STREQ (optarg, "\\0"))
1084 newtab = '\0';
1085 else
1086 error (EXIT_FAILURE, 0, _("multi-character tab %s"),
1087 quote (optarg));
1089 if (0 <= tab && tab != newtab)
1090 error (EXIT_FAILURE, 0, _("incompatible tabs"));
1091 tab = newtab;
1093 break;
1095 case NOCHECK_ORDER_OPTION:
1096 check_input_order = CHECK_ORDER_DISABLED;
1097 break;
1099 case CHECK_ORDER_OPTION:
1100 check_input_order = CHECK_ORDER_ENABLED;
1101 break;
1103 case 1: /* Non-option argument. */
1104 add_file_name (optarg, names, operand_status, joption_count,
1105 &nfiles, &prev_optc_status, &optc_status);
1106 break;
1108 case HEADER_LINE_OPTION:
1109 join_header_lines = true;
1110 break;
1112 case_GETOPT_HELP_CHAR;
1114 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1116 default:
1117 usage (EXIT_FAILURE);
1120 prev_optc_status = optc_status;
1123 /* Process any operands after "--". */
1124 prev_optc_status = MUST_BE_OPERAND;
1125 while (optind < argc)
1126 add_file_name (argv[optind++], names, operand_status, joption_count,
1127 &nfiles, &prev_optc_status, &optc_status);
1129 if (nfiles != 2)
1131 if (nfiles == 0)
1132 error (0, 0, _("missing operand"));
1133 else
1134 error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
1135 usage (EXIT_FAILURE);
1138 /* If "-j1" was specified and it turns out not to have had an argument,
1139 treat it as "-j 1". Likewise for -j2. */
1140 for (i = 0; i < 2; i++)
1141 if (joption_count[i] != 0)
1143 set_join_field (&join_field_1, i);
1144 set_join_field (&join_field_2, i);
1147 if (join_field_1 == SIZE_MAX)
1148 join_field_1 = 0;
1149 if (join_field_2 == SIZE_MAX)
1150 join_field_2 = 0;
1152 fp1 = STREQ (names[0], "-") ? stdin : fopen (names[0], "r");
1153 if (!fp1)
1154 error (EXIT_FAILURE, errno, "%s", names[0]);
1155 fp2 = STREQ (names[1], "-") ? stdin : fopen (names[1], "r");
1156 if (!fp2)
1157 error (EXIT_FAILURE, errno, "%s", names[1]);
1158 if (fp1 == fp2)
1159 error (EXIT_FAILURE, errno, _("both files cannot be standard input"));
1160 join (fp1, fp2);
1162 if (fclose (fp1) != 0)
1163 error (EXIT_FAILURE, errno, "%s", names[0]);
1164 if (fclose (fp2) != 0)
1165 error (EXIT_FAILURE, errno, "%s", names[1]);
1167 if (issued_disorder_warning[0] || issued_disorder_warning[1])
1168 exit (EXIT_FAILURE);
1169 else
1170 exit (EXIT_SUCCESS);