doc: clarify the operation of wc -L
[coreutils.git] / src / join.c
blob52e4b18b39d4116a0121abf3c08510a19534f018
1 /* join - join lines of two files on a common field
2 Copyright (C) 1991-2015 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
17 Written by Mike Haertel, mike@gnu.ai.mit.edu. */
19 #include <config.h>
21 #include <assert.h>
22 #include <sys/types.h>
23 #include <getopt.h>
25 #include "system.h"
26 #include "error.h"
27 #include "fadvise.h"
28 #include "hard-locale.h"
29 #include "linebuffer.h"
30 #include "memcasecmp.h"
31 #include "quote.h"
32 #include "stdio--.h"
33 #include "xmemcoll.h"
34 #include "xstrtol.h"
35 #include "argmatch.h"
37 /* The official name of this program (e.g., no 'g' prefix). */
38 #define PROGRAM_NAME "join"
40 #define AUTHORS proper_name ("Mike Haertel")
42 #define join system_join
44 #define SWAPLINES(a, b) do { \
45 struct line *tmp = a; \
46 a = b; \
47 b = tmp; \
48 } while (0);
50 /* An element of the list identifying which fields to print for each
51 output line. */
52 struct outlist
54 /* File number: 0, 1, or 2. 0 means use the join field.
55 1 means use the first file argument, 2 the second. */
56 int file;
58 /* Field index (zero-based), specified only when FILE is 1 or 2. */
59 size_t field;
61 struct outlist *next;
64 /* A field of a line. */
65 struct field
67 char *beg; /* First character in field. */
68 size_t len; /* The length of the field. */
71 /* A line read from an input file. */
72 struct line
74 struct linebuffer buf; /* The line itself. */
75 size_t nfields; /* Number of elements in 'fields'. */
76 size_t nfields_allocated; /* Number of elements allocated for 'fields'. */
77 struct field *fields;
80 /* One or more consecutive lines read from a file that all have the
81 same join field value. */
82 struct seq
84 size_t count; /* Elements used in 'lines'. */
85 size_t alloc; /* Elements allocated in 'lines'. */
86 struct line **lines;
89 /* The previous line read from each file. */
90 static struct line *prevline[2] = {NULL, NULL};
92 /* The number of lines read from each file. */
93 static uintmax_t line_no[2] = {0, 0};
95 /* The input file names. */
96 static char *g_names[2];
98 /* This provides an extra line buffer for each file. We need these if we
99 try to read two consecutive lines into the same buffer, since we don't
100 want to overwrite the previous buffer before we check order. */
101 static struct line *spareline[2] = {NULL, NULL};
103 /* True if the LC_COLLATE locale is hard. */
104 static bool hard_LC_COLLATE;
106 /* If nonzero, print unpairable lines in file 1 or 2. */
107 static bool print_unpairables_1, print_unpairables_2;
109 /* If nonzero, print pairable lines. */
110 static bool print_pairables;
112 /* If nonzero, we have seen at least one unpairable line. */
113 static bool seen_unpairable;
115 /* If nonzero, we have warned about disorder in that file. */
116 static bool issued_disorder_warning[2];
118 /* Empty output field filler. */
119 static char const *empty_filler;
121 /* Whether to ensure the same number of fields are output from each line. */
122 static bool autoformat;
123 /* The number of fields to output for each line.
124 Only significant when autoformat is true. */
125 static size_t autocount_1;
126 static size_t autocount_2;
128 /* Field to join on; SIZE_MAX means they haven't been determined yet. */
129 static size_t join_field_1 = SIZE_MAX;
130 static size_t join_field_2 = SIZE_MAX;
132 /* List of fields to print. */
133 static struct outlist outlist_head;
135 /* Last element in 'outlist', where a new element can be added. */
136 static struct outlist *outlist_end = &outlist_head;
138 /* Tab character separating fields. If negative, fields are separated
139 by any nonempty string of blanks, otherwise by exactly one
140 tab character whose value (when cast to unsigned char) equals TAB. */
141 static int tab = -1;
143 /* If nonzero, check that the input is correctly ordered. */
144 static enum
146 CHECK_ORDER_DEFAULT,
147 CHECK_ORDER_ENABLED,
148 CHECK_ORDER_DISABLED
149 } check_input_order;
151 enum
153 CHECK_ORDER_OPTION = CHAR_MAX + 1,
154 NOCHECK_ORDER_OPTION,
155 HEADER_LINE_OPTION
159 static struct option const longopts[] =
161 {"ignore-case", no_argument, NULL, 'i'},
162 {"check-order", no_argument, NULL, CHECK_ORDER_OPTION},
163 {"nocheck-order", no_argument, NULL, NOCHECK_ORDER_OPTION},
164 {"zero-terminated", no_argument, NULL, 'z'},
165 {"header", no_argument, NULL, HEADER_LINE_OPTION},
166 {GETOPT_HELP_OPTION_DECL},
167 {GETOPT_VERSION_OPTION_DECL},
168 {NULL, 0, NULL, 0}
171 /* Used to print non-joining lines */
172 static struct line uni_blank;
174 /* If nonzero, ignore case when comparing join fields. */
175 static bool ignore_case;
177 /* If nonzero, treat the first line of each file as column headers --
178 join them without checking for ordering */
179 static bool join_header_lines;
181 /* The character marking end of line. Default to \n. */
182 static char eolchar = '\n';
184 void
185 usage (int status)
187 if (status != EXIT_SUCCESS)
188 emit_try_help ();
189 else
191 printf (_("\
192 Usage: %s [OPTION]... FILE1 FILE2\n\
194 program_name);
195 fputs (_("\
196 For each pair of input lines with identical join fields, write a line to\n\
197 standard output. The default join field is the first, delimited by whitespace.\
199 "), stdout);
200 fputs (_("\
202 When FILE1 or FILE2 (not both) is -, read standard input.\n\
203 "), stdout);
204 fputs (_("\
206 -a FILENUM also print unpairable lines from file FILENUM, where\n\
207 FILENUM is 1 or 2, corresponding to FILE1 or FILE2\n\
208 -e EMPTY replace missing input fields with EMPTY\n\
209 "), stdout);
210 fputs (_("\
211 -i, --ignore-case ignore differences in case when comparing fields\n\
212 -j FIELD equivalent to '-1 FIELD -2 FIELD'\n\
213 -o FORMAT obey FORMAT while constructing output line\n\
214 -t CHAR use CHAR as input and output field separator\n\
215 "), stdout);
216 fputs (_("\
217 -v FILENUM like -a FILENUM, but suppress joined output lines\n\
218 -1 FIELD join on this FIELD of file 1\n\
219 -2 FIELD join on this FIELD of file 2\n\
220 --check-order check that the input is correctly sorted, even\n\
221 if all input lines are pairable\n\
222 --nocheck-order do not check that the input is correctly sorted\n\
223 --header treat the first line in each file as field headers,\n\
224 print them without trying to pair them\n\
225 "), stdout);
226 fputs (_("\
227 -z, --zero-terminated line delimiter is NUL, not newline\n\
228 "), stdout);
229 fputs (HELP_OPTION_DESCRIPTION, stdout);
230 fputs (VERSION_OPTION_DESCRIPTION, stdout);
231 fputs (_("\
233 Unless -t CHAR is given, leading blanks separate fields and are ignored,\n\
234 else fields are separated by CHAR. Any FIELD is a field number counted\n\
235 from 1. FORMAT is one or more comma or blank separated specifications,\n\
236 each being 'FILENUM.FIELD' or '0'. Default FORMAT outputs the join field,\n\
237 the remaining fields from FILE1, the remaining fields from FILE2, all\n\
238 separated by CHAR. If FORMAT is the keyword 'auto', then the first\n\
239 line of each file determines the number of fields output for each line.\n\
241 Important: FILE1 and FILE2 must be sorted on the join fields.\n\
242 E.g., use \"sort -k 1b,1\" if 'join' has no options,\n\
243 or use \"join -t ''\" if 'sort' has no options.\n\
244 Note, comparisons honor the rules specified by 'LC_COLLATE'.\n\
245 If the input is not sorted and some lines cannot be joined, a\n\
246 warning message will be given.\n\
247 "), stdout);
248 emit_ancillary_info (PROGRAM_NAME);
250 exit (status);
253 /* Record a field in LINE, with location FIELD and size LEN. */
255 static void
256 extract_field (struct line *line, char *field, size_t len)
258 if (line->nfields >= line->nfields_allocated)
260 line->fields = X2NREALLOC (line->fields, &line->nfields_allocated);
262 line->fields[line->nfields].beg = field;
263 line->fields[line->nfields].len = len;
264 ++(line->nfields);
267 /* Fill in the 'fields' structure in LINE. */
269 static void
270 xfields (struct line *line)
272 char *ptr = line->buf.buffer;
273 char const *lim = ptr + line->buf.length - 1;
275 if (ptr == lim)
276 return;
278 if (0 <= tab && tab != '\n')
280 char *sep;
281 for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1)
282 extract_field (line, ptr, sep - ptr);
284 else if (tab < 0)
286 /* Skip leading blanks before the first field. */
287 while (isblank (to_uchar (*ptr)))
288 if (++ptr == lim)
289 return;
293 char *sep;
294 for (sep = ptr + 1; sep != lim && ! isblank (to_uchar (*sep)); sep++)
295 continue;
296 extract_field (line, ptr, sep - ptr);
297 if (sep == lim)
298 return;
299 for (ptr = sep + 1; ptr != lim && isblank (to_uchar (*ptr)); ptr++)
300 continue;
302 while (ptr != lim);
305 extract_field (line, ptr, lim - ptr);
308 static void
309 freeline (struct line *line)
311 if (line == NULL)
312 return;
313 free (line->fields);
314 line->fields = NULL;
315 free (line->buf.buffer);
316 line->buf.buffer = NULL;
319 /* Return <0 if the join field in LINE1 compares less than the one in LINE2;
320 >0 if it compares greater; 0 if it compares equal.
321 Report an error and exit if the comparison fails.
322 Use join fields JF_1 and JF_2 respectively. */
324 static int
325 keycmp (struct line const *line1, struct line const *line2,
326 size_t jf_1, size_t jf_2)
328 /* Start of field to compare in each file. */
329 char *beg1;
330 char *beg2;
332 size_t len1;
333 size_t len2; /* Length of fields to compare. */
334 int diff;
336 if (jf_1 < line1->nfields)
338 beg1 = line1->fields[jf_1].beg;
339 len1 = line1->fields[jf_1].len;
341 else
343 beg1 = NULL;
344 len1 = 0;
347 if (jf_2 < line2->nfields)
349 beg2 = line2->fields[jf_2].beg;
350 len2 = line2->fields[jf_2].len;
352 else
354 beg2 = NULL;
355 len2 = 0;
358 if (len1 == 0)
359 return len2 == 0 ? 0 : -1;
360 if (len2 == 0)
361 return 1;
363 if (ignore_case)
365 /* FIXME: ignore_case does not work with NLS (in particular,
366 with multibyte chars). */
367 diff = memcasecmp (beg1, beg2, MIN (len1, len2));
369 else
371 if (hard_LC_COLLATE)
372 return xmemcoll (beg1, len1, beg2, len2);
373 diff = memcmp (beg1, beg2, MIN (len1, len2));
376 if (diff)
377 return diff;
378 return len1 < len2 ? -1 : len1 != len2;
381 /* Check that successive input lines PREV and CURRENT from input file
382 WHATFILE are presented in order, unless the user may be relying on
383 the GNU extension that input lines may be out of order if no input
384 lines are unpairable.
386 If the user specified --nocheck-order, the check is not made.
387 If the user specified --check-order, the problem is fatal.
388 Otherwise (the default), the message is simply a warning.
390 A message is printed at most once per input file. */
392 static void
393 check_order (const struct line *prev,
394 const struct line *current,
395 int whatfile)
397 if (check_input_order != CHECK_ORDER_DISABLED
398 && ((check_input_order == CHECK_ORDER_ENABLED) || seen_unpairable))
400 if (!issued_disorder_warning[whatfile-1])
402 size_t join_field = whatfile == 1 ? join_field_1 : join_field_2;
403 if (keycmp (prev, current, join_field, join_field) > 0)
405 /* Exclude any trailing newline. */
406 size_t len = current->buf.length;
407 if (0 < len && current->buf.buffer[len - 1] == '\n')
408 --len;
410 /* If the offending line is longer than INT_MAX, output
411 only the first INT_MAX bytes in this diagnostic. */
412 len = MIN (INT_MAX, len);
414 error ((check_input_order == CHECK_ORDER_ENABLED
415 ? EXIT_FAILURE : 0),
416 0, _("%s:%"PRIuMAX": is not sorted: %.*s"),
417 g_names[whatfile - 1], line_no[whatfile - 1],
418 (int) len, current->buf.buffer);
420 /* If we get to here, the message was merely a warning.
421 Arrange to issue it only once per file. */
422 issued_disorder_warning[whatfile-1] = true;
428 static inline void
429 reset_line (struct line *line)
431 line->nfields = 0;
434 static struct line *
435 init_linep (struct line **linep)
437 struct line *line = xcalloc (1, sizeof *line);
438 *linep = line;
439 return line;
442 /* Read a line from FP into LINE and split it into fields.
443 Return true if successful. */
445 static bool
446 get_line (FILE *fp, struct line **linep, int which)
448 struct line *line = *linep;
450 if (line == prevline[which - 1])
452 SWAPLINES (line, spareline[which - 1]);
453 *linep = line;
456 if (line)
457 reset_line (line);
458 else
459 line = init_linep (linep);
461 if (! readlinebuffer_delim (&line->buf, fp, eolchar))
463 if (ferror (fp))
464 error (EXIT_FAILURE, errno, _("read error"));
465 freeline (line);
466 return false;
468 ++line_no[which - 1];
470 xfields (line);
472 if (prevline[which - 1])
473 check_order (prevline[which - 1], line, which);
475 prevline[which - 1] = line;
476 return true;
479 static void
480 free_spareline (void)
482 size_t i;
484 for (i = 0; i < ARRAY_CARDINALITY (spareline); i++)
486 if (spareline[i])
488 freeline (spareline[i]);
489 free (spareline[i]);
494 static void
495 initseq (struct seq *seq)
497 seq->count = 0;
498 seq->alloc = 0;
499 seq->lines = NULL;
502 /* Read a line from FP and add it to SEQ. Return true if successful. */
504 static bool
505 getseq (FILE *fp, struct seq *seq, int whichfile)
507 if (seq->count == seq->alloc)
509 size_t i;
510 seq->lines = X2NREALLOC (seq->lines, &seq->alloc);
511 for (i = seq->count; i < seq->alloc; i++)
512 seq->lines[i] = NULL;
515 if (get_line (fp, &seq->lines[seq->count], whichfile))
517 ++seq->count;
518 return true;
520 return false;
523 /* Read a line from FP and add it to SEQ, as the first item if FIRST is
524 true, else as the next. */
525 static bool
526 advance_seq (FILE *fp, struct seq *seq, bool first, int whichfile)
528 if (first)
529 seq->count = 0;
531 return getseq (fp, seq, whichfile);
534 static void
535 delseq (struct seq *seq)
537 size_t i;
538 for (i = 0; i < seq->alloc; i++)
540 freeline (seq->lines[i]);
541 free (seq->lines[i]);
543 free (seq->lines);
547 /* Print field N of LINE if it exists and is nonempty, otherwise
548 'empty_filler' if it is nonempty. */
550 static void
551 prfield (size_t n, struct line const *line)
553 size_t len;
555 if (n < line->nfields)
557 len = line->fields[n].len;
558 if (len)
559 fwrite (line->fields[n].beg, 1, len, stdout);
560 else if (empty_filler)
561 fputs (empty_filler, stdout);
563 else if (empty_filler)
564 fputs (empty_filler, stdout);
567 /* Output all the fields in line, other than the join field. */
569 static void
570 prfields (struct line const *line, size_t join_field, size_t autocount)
572 size_t i;
573 size_t nfields = autoformat ? autocount : line->nfields;
574 char output_separator = tab < 0 ? ' ' : tab;
576 for (i = 0; i < join_field && i < nfields; ++i)
578 putchar (output_separator);
579 prfield (i, line);
581 for (i = join_field + 1; i < nfields; ++i)
583 putchar (output_separator);
584 prfield (i, line);
588 /* Print the join of LINE1 and LINE2. */
590 static void
591 prjoin (struct line const *line1, struct line const *line2)
593 const struct outlist *outlist;
594 char output_separator = tab < 0 ? ' ' : tab;
595 size_t field;
596 struct line const *line;
598 outlist = outlist_head.next;
599 if (outlist)
601 const struct outlist *o;
603 o = outlist;
604 while (1)
606 if (o->file == 0)
608 if (line1 == &uni_blank)
610 line = line2;
611 field = join_field_2;
613 else
615 line = line1;
616 field = join_field_1;
619 else
621 line = (o->file == 1 ? line1 : line2);
622 field = o->field;
624 prfield (field, line);
625 o = o->next;
626 if (o == NULL)
627 break;
628 putchar (output_separator);
630 putchar (eolchar);
632 else
634 if (line1 == &uni_blank)
636 line = line2;
637 field = join_field_2;
639 else
641 line = line1;
642 field = join_field_1;
645 /* Output the join field. */
646 prfield (field, line);
648 /* Output other fields. */
649 prfields (line1, join_field_1, autocount_1);
650 prfields (line2, join_field_2, autocount_2);
652 putchar (eolchar);
656 /* Print the join of the files in FP1 and FP2. */
658 static void
659 join (FILE *fp1, FILE *fp2)
661 struct seq seq1, seq2;
662 int diff;
663 bool eof1, eof2;
665 fadvise (fp1, FADVISE_SEQUENTIAL);
666 fadvise (fp2, FADVISE_SEQUENTIAL);
668 /* Read the first line of each file. */
669 initseq (&seq1);
670 getseq (fp1, &seq1, 1);
671 initseq (&seq2);
672 getseq (fp2, &seq2, 2);
674 if (autoformat)
676 autocount_1 = seq1.count ? seq1.lines[0]->nfields : 0;
677 autocount_2 = seq2.count ? seq2.lines[0]->nfields : 0;
680 if (join_header_lines && (seq1.count || seq2.count))
682 struct line const *hline1 = seq1.count ? seq1.lines[0] : &uni_blank;
683 struct line const *hline2 = seq2.count ? seq2.lines[0] : &uni_blank;
684 prjoin (hline1, hline2);
685 prevline[0] = NULL;
686 prevline[1] = NULL;
687 if (seq1.count)
688 advance_seq (fp1, &seq1, true, 1);
689 if (seq2.count)
690 advance_seq (fp2, &seq2, true, 2);
693 while (seq1.count && seq2.count)
695 size_t i;
696 diff = keycmp (seq1.lines[0], seq2.lines[0],
697 join_field_1, join_field_2);
698 if (diff < 0)
700 if (print_unpairables_1)
701 prjoin (seq1.lines[0], &uni_blank);
702 advance_seq (fp1, &seq1, true, 1);
703 seen_unpairable = true;
704 continue;
706 if (diff > 0)
708 if (print_unpairables_2)
709 prjoin (&uni_blank, seq2.lines[0]);
710 advance_seq (fp2, &seq2, true, 2);
711 seen_unpairable = true;
712 continue;
715 /* Keep reading lines from file1 as long as they continue to
716 match the current line from file2. */
717 eof1 = false;
719 if (!advance_seq (fp1, &seq1, false, 1))
721 eof1 = true;
722 ++seq1.count;
723 break;
725 while (!keycmp (seq1.lines[seq1.count - 1], seq2.lines[0],
726 join_field_1, join_field_2));
728 /* Keep reading lines from file2 as long as they continue to
729 match the current line from file1. */
730 eof2 = false;
732 if (!advance_seq (fp2, &seq2, false, 2))
734 eof2 = true;
735 ++seq2.count;
736 break;
738 while (!keycmp (seq1.lines[0], seq2.lines[seq2.count - 1],
739 join_field_1, join_field_2));
741 if (print_pairables)
743 for (i = 0; i < seq1.count - 1; ++i)
745 size_t j;
746 for (j = 0; j < seq2.count - 1; ++j)
747 prjoin (seq1.lines[i], seq2.lines[j]);
751 if (!eof1)
753 SWAPLINES (seq1.lines[0], seq1.lines[seq1.count - 1]);
754 seq1.count = 1;
756 else
757 seq1.count = 0;
759 if (!eof2)
761 SWAPLINES (seq2.lines[0], seq2.lines[seq2.count - 1]);
762 seq2.count = 1;
764 else
765 seq2.count = 0;
768 /* If the user did not specify --nocheck-order, then we read the
769 tail ends of both inputs to verify that they are in order. We
770 skip the rest of the tail once we have issued a warning for that
771 file, unless we actually need to print the unpairable lines. */
772 struct line *line = NULL;
773 bool checktail = false;
775 if (check_input_order != CHECK_ORDER_DISABLED
776 && !(issued_disorder_warning[0] && issued_disorder_warning[1]))
777 checktail = true;
779 if ((print_unpairables_1 || checktail) && seq1.count)
781 if (print_unpairables_1)
782 prjoin (seq1.lines[0], &uni_blank);
783 if (seq2.count)
784 seen_unpairable = true;
785 while (get_line (fp1, &line, 1))
787 if (print_unpairables_1)
788 prjoin (line, &uni_blank);
789 if (issued_disorder_warning[0] && !print_unpairables_1)
790 break;
794 if ((print_unpairables_2 || checktail) && seq2.count)
796 if (print_unpairables_2)
797 prjoin (&uni_blank, seq2.lines[0]);
798 if (seq1.count)
799 seen_unpairable = true;
800 while (get_line (fp2, &line, 2))
802 if (print_unpairables_2)
803 prjoin (&uni_blank, line);
804 if (issued_disorder_warning[1] && !print_unpairables_2)
805 break;
809 freeline (line);
810 free (line);
812 delseq (&seq1);
813 delseq (&seq2);
816 /* Add a field spec for field FIELD of file FILE to 'outlist'. */
818 static void
819 add_field (int file, size_t field)
821 struct outlist *o;
823 assert (file == 0 || file == 1 || file == 2);
824 assert (file != 0 || field == 0);
826 o = xmalloc (sizeof *o);
827 o->file = file;
828 o->field = field;
829 o->next = NULL;
831 /* Add to the end of the list so the fields are in the right order. */
832 outlist_end->next = o;
833 outlist_end = o;
836 /* Convert a string of decimal digits, STR (the 1-based join field number),
837 to an integral value. Upon successful conversion, return one less
838 (the zero-based field number). Silently convert too-large values
839 to SIZE_MAX - 1. Otherwise, if a value cannot be converted, give a
840 diagnostic and exit. */
842 static size_t
843 string_to_join_field (char const *str)
845 size_t result;
846 unsigned long int val;
847 verify (SIZE_MAX <= ULONG_MAX);
849 strtol_error s_err = xstrtoul (str, NULL, 10, &val, "");
850 if (s_err == LONGINT_OVERFLOW || (s_err == LONGINT_OK && SIZE_MAX < val))
851 val = SIZE_MAX;
852 else if (s_err != LONGINT_OK || val == 0)
853 error (EXIT_FAILURE, 0, _("invalid field number: %s"), quote (str));
855 result = val - 1;
857 return result;
860 /* Convert a single field specifier string, S, to a *FILE_INDEX, *FIELD_INDEX
861 pair. In S, the field index string is 1-based; *FIELD_INDEX is zero-based.
862 If S is valid, return true. Otherwise, give a diagnostic and exit. */
864 static void
865 decode_field_spec (const char *s, int *file_index, size_t *field_index)
867 /* The first character must be 0, 1, or 2. */
868 switch (s[0])
870 case '0':
871 if (s[1])
873 /* '0' must be all alone -- no '.FIELD'. */
874 error (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s));
876 *file_index = 0;
877 *field_index = 0;
878 break;
880 case '1':
881 case '2':
882 if (s[1] != '.')
883 error (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s));
884 *file_index = s[0] - '0';
885 *field_index = string_to_join_field (s + 2);
886 break;
888 default:
889 error (EXIT_FAILURE, 0,
890 _("invalid file number in field spec: %s"), quote (s));
892 /* Tell gcc -W -Wall that we can't get beyond this point.
893 This avoids a warning (otherwise legit) that the caller's copies
894 of *file_index and *field_index might be used uninitialized. */
895 abort ();
897 break;
901 /* Add the comma or blank separated field spec(s) in STR to 'outlist'. */
903 static void
904 add_field_list (char *str)
906 char *p = str;
910 int file_index;
911 size_t field_index;
912 char const *spec_item = p;
914 p = strpbrk (p, ", \t");
915 if (p)
916 *p++ = '\0';
917 decode_field_spec (spec_item, &file_index, &field_index);
918 add_field (file_index, field_index);
920 while (p);
923 /* Set the join field *VAR to VAL, but report an error if *VAR is set
924 more than once to incompatible values. */
926 static void
927 set_join_field (size_t *var, size_t val)
929 if (*var != SIZE_MAX && *var != val)
931 unsigned long int var1 = *var + 1;
932 unsigned long int val1 = val + 1;
933 error (EXIT_FAILURE, 0, _("incompatible join fields %lu, %lu"),
934 var1, val1);
936 *var = val;
939 /* Status of command-line arguments. */
941 enum operand_status
943 /* This argument must be an operand, i.e., one of the files to be
944 joined. */
945 MUST_BE_OPERAND,
947 /* This might be the argument of the preceding -j1 or -j2 option,
948 or it might be an operand. */
949 MIGHT_BE_J1_ARG,
950 MIGHT_BE_J2_ARG,
952 /* This might be the argument of the preceding -o option, or it might be
953 an operand. */
954 MIGHT_BE_O_ARG
957 /* Add NAME to the array of input file NAMES with operand statuses
958 OPERAND_STATUS; currently there are NFILES names in the list. */
960 static void
961 add_file_name (char *name, char *names[2],
962 int operand_status[2], int joption_count[2], int *nfiles,
963 int *prev_optc_status, int *optc_status)
965 int n = *nfiles;
967 if (n == 2)
969 bool op0 = (operand_status[0] == MUST_BE_OPERAND);
970 char *arg = names[op0];
971 switch (operand_status[op0])
973 case MUST_BE_OPERAND:
974 error (0, 0, _("extra operand %s"), quote (name));
975 usage (EXIT_FAILURE);
977 case MIGHT_BE_J1_ARG:
978 joption_count[0]--;
979 set_join_field (&join_field_1, string_to_join_field (arg));
980 break;
982 case MIGHT_BE_J2_ARG:
983 joption_count[1]--;
984 set_join_field (&join_field_2, string_to_join_field (arg));
985 break;
987 case MIGHT_BE_O_ARG:
988 add_field_list (arg);
989 break;
991 if (!op0)
993 operand_status[0] = operand_status[1];
994 names[0] = names[1];
996 n = 1;
999 operand_status[n] = *prev_optc_status;
1000 names[n] = name;
1001 *nfiles = n + 1;
1002 if (*prev_optc_status == MIGHT_BE_O_ARG)
1003 *optc_status = MIGHT_BE_O_ARG;
1007 main (int argc, char **argv)
1009 int optc_status;
1010 int prev_optc_status = MUST_BE_OPERAND;
1011 int operand_status[2];
1012 int joption_count[2] = { 0, 0 };
1013 FILE *fp1, *fp2;
1014 int optc;
1015 int nfiles = 0;
1016 int i;
1018 initialize_main (&argc, &argv);
1019 set_program_name (argv[0]);
1020 setlocale (LC_ALL, "");
1021 bindtextdomain (PACKAGE, LOCALEDIR);
1022 textdomain (PACKAGE);
1023 hard_LC_COLLATE = hard_locale (LC_COLLATE);
1025 atexit (close_stdout);
1026 atexit (free_spareline);
1028 print_pairables = true;
1029 seen_unpairable = false;
1030 issued_disorder_warning[0] = issued_disorder_warning[1] = false;
1031 check_input_order = CHECK_ORDER_DEFAULT;
1033 while ((optc = getopt_long (argc, argv, "-a:e:i1:2:j:o:t:v:z",
1034 longopts, NULL))
1035 != -1)
1037 optc_status = MUST_BE_OPERAND;
1039 switch (optc)
1041 case 'v':
1042 print_pairables = false;
1043 /* Fall through. */
1045 case 'a':
1047 unsigned long int val;
1048 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
1049 || (val != 1 && val != 2))
1050 error (EXIT_FAILURE, 0,
1051 _("invalid field number: %s"), quote (optarg));
1052 if (val == 1)
1053 print_unpairables_1 = true;
1054 else
1055 print_unpairables_2 = true;
1057 break;
1059 case 'e':
1060 if (empty_filler && ! STREQ (empty_filler, optarg))
1061 error (EXIT_FAILURE, 0,
1062 _("conflicting empty-field replacement strings"));
1063 empty_filler = optarg;
1064 break;
1066 case 'i':
1067 ignore_case = true;
1068 break;
1070 case '1':
1071 set_join_field (&join_field_1, string_to_join_field (optarg));
1072 break;
1074 case '2':
1075 set_join_field (&join_field_2, string_to_join_field (optarg));
1076 break;
1078 case 'j':
1079 if ((optarg[0] == '1' || optarg[0] == '2') && !optarg[1]
1080 && optarg == argv[optind - 1] + 2)
1082 /* The argument was either "-j1" or "-j2". */
1083 bool is_j2 = (optarg[0] == '2');
1084 joption_count[is_j2]++;
1085 optc_status = MIGHT_BE_J1_ARG + is_j2;
1087 else
1089 set_join_field (&join_field_1, string_to_join_field (optarg));
1090 set_join_field (&join_field_2, join_field_1);
1092 break;
1094 case 'o':
1095 if (STREQ (optarg, "auto"))
1096 autoformat = true;
1097 else
1099 add_field_list (optarg);
1100 optc_status = MIGHT_BE_O_ARG;
1102 break;
1104 case 't':
1106 unsigned char newtab = optarg[0];
1107 if (! newtab)
1108 newtab = '\n'; /* '' => process the whole line. */
1109 else if (optarg[1])
1111 if (STREQ (optarg, "\\0"))
1112 newtab = '\0';
1113 else
1114 error (EXIT_FAILURE, 0, _("multi-character tab %s"),
1115 quote (optarg));
1117 if (0 <= tab && tab != newtab)
1118 error (EXIT_FAILURE, 0, _("incompatible tabs"));
1119 tab = newtab;
1121 break;
1123 case 'z':
1124 eolchar = 0;
1125 break;
1127 case NOCHECK_ORDER_OPTION:
1128 check_input_order = CHECK_ORDER_DISABLED;
1129 break;
1131 case CHECK_ORDER_OPTION:
1132 check_input_order = CHECK_ORDER_ENABLED;
1133 break;
1135 case 1: /* Non-option argument. */
1136 add_file_name (optarg, g_names, operand_status, joption_count,
1137 &nfiles, &prev_optc_status, &optc_status);
1138 break;
1140 case HEADER_LINE_OPTION:
1141 join_header_lines = true;
1142 break;
1144 case_GETOPT_HELP_CHAR;
1146 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1148 default:
1149 usage (EXIT_FAILURE);
1152 prev_optc_status = optc_status;
1155 /* Process any operands after "--". */
1156 prev_optc_status = MUST_BE_OPERAND;
1157 while (optind < argc)
1158 add_file_name (argv[optind++], g_names, operand_status, joption_count,
1159 &nfiles, &prev_optc_status, &optc_status);
1161 if (nfiles != 2)
1163 if (nfiles == 0)
1164 error (0, 0, _("missing operand"));
1165 else
1166 error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
1167 usage (EXIT_FAILURE);
1170 /* If "-j1" was specified and it turns out not to have had an argument,
1171 treat it as "-j 1". Likewise for -j2. */
1172 for (i = 0; i < 2; i++)
1173 if (joption_count[i] != 0)
1175 set_join_field (&join_field_1, i);
1176 set_join_field (&join_field_2, i);
1179 if (join_field_1 == SIZE_MAX)
1180 join_field_1 = 0;
1181 if (join_field_2 == SIZE_MAX)
1182 join_field_2 = 0;
1184 fp1 = STREQ (g_names[0], "-") ? stdin : fopen (g_names[0], "r");
1185 if (!fp1)
1186 error (EXIT_FAILURE, errno, "%s", g_names[0]);
1187 fp2 = STREQ (g_names[1], "-") ? stdin : fopen (g_names[1], "r");
1188 if (!fp2)
1189 error (EXIT_FAILURE, errno, "%s", g_names[1]);
1190 if (fp1 == fp2)
1191 error (EXIT_FAILURE, errno, _("both files cannot be standard input"));
1192 join (fp1, fp2);
1194 if (fclose (fp1) != 0)
1195 error (EXIT_FAILURE, errno, "%s", g_names[0]);
1196 if (fclose (fp2) != 0)
1197 error (EXIT_FAILURE, errno, "%s", g_names[1]);
1199 if (issued_disorder_warning[0] || issued_disorder_warning[1])
1200 return EXIT_FAILURE;
1201 else
1202 return EXIT_SUCCESS;