doc: refactor and update expand and unexpand --help
[coreutils.git] / src / join.c
blob082f50893f8dc87bdf71b8de7b968d71980eba27
1 /* join - join lines of two files on a common field
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
17 Written by Mike Haertel, mike@gnu.ai.mit.edu. */
19 #include <config.h>
21 #include <assert.h>
22 #include <sys/types.h>
23 #include <getopt.h>
25 #include "system.h"
26 #include "die.h"
27 #include "error.h"
28 #include "fadvise.h"
29 #include "hard-locale.h"
30 #include "linebuffer.h"
31 #include "memcasecmp.h"
32 #include "quote.h"
33 #include "stdio--.h"
34 #include "xmemcoll.h"
35 #include "xstrtol.h"
36 #include "argmatch.h"
38 /* The official name of this program (e.g., no 'g' prefix). */
39 #define PROGRAM_NAME "join"
41 #define AUTHORS proper_name ("Mike Haertel")
43 #define join system_join
45 #define SWAPLINES(a, b) do { \
46 struct line *tmp = a; \
47 a = b; \
48 b = tmp; \
49 } while (0);
51 /* An element of the list identifying which fields to print for each
52 output line. */
53 struct outlist
55 /* File number: 0, 1, or 2. 0 means use the join field.
56 1 means use the first file argument, 2 the second. */
57 int file;
59 /* Field index (zero-based), specified only when FILE is 1 or 2. */
60 size_t field;
62 struct outlist *next;
65 /* A field of a line. */
66 struct field
68 char *beg; /* First character in field. */
69 size_t len; /* The length of the field. */
72 /* A line read from an input file. */
73 struct line
75 struct linebuffer buf; /* The line itself. */
76 size_t nfields; /* Number of elements in 'fields'. */
77 size_t nfields_allocated; /* Number of elements allocated for 'fields'. */
78 struct field *fields;
81 /* One or more consecutive lines read from a file that all have the
82 same join field value. */
83 struct seq
85 size_t count; /* Elements used in 'lines'. */
86 size_t alloc; /* Elements allocated in 'lines'. */
87 struct line **lines;
90 /* The previous line read from each file. */
91 static struct line *prevline[2] = {NULL, NULL};
93 /* The number of lines read from each file. */
94 static uintmax_t line_no[2] = {0, 0};
96 /* The input file names. */
97 static char *g_names[2];
99 /* This provides an extra line buffer for each file. We need these if we
100 try to read two consecutive lines into the same buffer, since we don't
101 want to overwrite the previous buffer before we check order. */
102 static struct line *spareline[2] = {NULL, NULL};
104 /* True if the LC_COLLATE locale is hard. */
105 static bool hard_LC_COLLATE;
107 /* If nonzero, print unpairable lines in file 1 or 2. */
108 static bool print_unpairables_1, print_unpairables_2;
110 /* If nonzero, print pairable lines. */
111 static bool print_pairables;
113 /* If nonzero, we have seen at least one unpairable line. */
114 static bool seen_unpairable;
116 /* If nonzero, we have warned about disorder in that file. */
117 static bool issued_disorder_warning[2];
119 /* Empty output field filler. */
120 static char const *empty_filler;
122 /* Whether to ensure the same number of fields are output from each line. */
123 static bool autoformat;
124 /* The number of fields to output for each line.
125 Only significant when autoformat is true. */
126 static size_t autocount_1;
127 static size_t autocount_2;
129 /* Field to join on; SIZE_MAX means they haven't been determined yet. */
130 static size_t join_field_1 = SIZE_MAX;
131 static size_t join_field_2 = SIZE_MAX;
133 /* List of fields to print. */
134 static struct outlist outlist_head;
136 /* Last element in 'outlist', where a new element can be added. */
137 static struct outlist *outlist_end = &outlist_head;
139 /* Tab character separating fields. If negative, fields are separated
140 by any nonempty string of blanks, otherwise by exactly one
141 tab character whose value (when cast to unsigned char) equals TAB. */
142 static int tab = -1;
144 /* If nonzero, check that the input is correctly ordered. */
145 static enum
147 CHECK_ORDER_DEFAULT,
148 CHECK_ORDER_ENABLED,
149 CHECK_ORDER_DISABLED
150 } check_input_order;
152 enum
154 CHECK_ORDER_OPTION = CHAR_MAX + 1,
155 NOCHECK_ORDER_OPTION,
156 HEADER_LINE_OPTION
160 static struct option const longopts[] =
162 {"ignore-case", no_argument, NULL, 'i'},
163 {"check-order", no_argument, NULL, CHECK_ORDER_OPTION},
164 {"nocheck-order", no_argument, NULL, NOCHECK_ORDER_OPTION},
165 {"zero-terminated", no_argument, NULL, 'z'},
166 {"header", no_argument, NULL, HEADER_LINE_OPTION},
167 {GETOPT_HELP_OPTION_DECL},
168 {GETOPT_VERSION_OPTION_DECL},
169 {NULL, 0, NULL, 0}
172 /* Used to print non-joining lines */
173 static struct line uni_blank;
175 /* If nonzero, ignore case when comparing join fields. */
176 static bool ignore_case;
178 /* If nonzero, treat the first line of each file as column headers --
179 join them without checking for ordering */
180 static bool join_header_lines;
182 /* The character marking end of line. Default to \n. */
183 static char eolchar = '\n';
185 void
186 usage (int status)
188 if (status != EXIT_SUCCESS)
189 emit_try_help ();
190 else
192 printf (_("\
193 Usage: %s [OPTION]... FILE1 FILE2\n\
195 program_name);
196 fputs (_("\
197 For each pair of input lines with identical join fields, write a line to\n\
198 standard output. The default join field is the first, delimited by blanks.\
200 "), stdout);
201 fputs (_("\
203 When FILE1 or FILE2 (not both) is -, read standard input.\n\
204 "), stdout);
205 fputs (_("\
207 -a FILENUM also print unpairable lines from file FILENUM, where\n\
208 FILENUM is 1 or 2, corresponding to FILE1 or FILE2\n\
209 -e EMPTY replace missing input fields with EMPTY\n\
210 "), stdout);
211 fputs (_("\
212 -i, --ignore-case ignore differences in case when comparing fields\n\
213 -j FIELD equivalent to '-1 FIELD -2 FIELD'\n\
214 -o FORMAT obey FORMAT while constructing output line\n\
215 -t CHAR use CHAR as input and output field separator\n\
216 "), stdout);
217 fputs (_("\
218 -v FILENUM like -a FILENUM, but suppress joined output lines\n\
219 -1 FIELD join on this FIELD of file 1\n\
220 -2 FIELD join on this FIELD of file 2\n\
221 --check-order check that the input is correctly sorted, even\n\
222 if all input lines are pairable\n\
223 --nocheck-order do not check that the input is correctly sorted\n\
224 --header treat the first line in each file as field headers,\n\
225 print them without trying to pair them\n\
226 "), stdout);
227 fputs (_("\
228 -z, --zero-terminated line delimiter is NUL, not newline\n\
229 "), stdout);
230 fputs (HELP_OPTION_DESCRIPTION, stdout);
231 fputs (VERSION_OPTION_DESCRIPTION, stdout);
232 fputs (_("\
234 Unless -t CHAR is given, leading blanks separate fields and are ignored,\n\
235 else fields are separated by CHAR. Any FIELD is a field number counted\n\
236 from 1. FORMAT is one or more comma or blank separated specifications,\n\
237 each being 'FILENUM.FIELD' or '0'. Default FORMAT outputs the join field,\n\
238 the remaining fields from FILE1, the remaining fields from FILE2, all\n\
239 separated by CHAR. If FORMAT is the keyword 'auto', then the first\n\
240 line of each file determines the number of fields output for each line.\n\
242 Important: FILE1 and FILE2 must be sorted on the join fields.\n\
243 E.g., use \"sort -k 1b,1\" if 'join' has no options,\n\
244 or use \"join -t ''\" if 'sort' has no options.\n\
245 Note, comparisons honor the rules specified by 'LC_COLLATE'.\n\
246 If the input is not sorted and some lines cannot be joined, a\n\
247 warning message will be given.\n\
248 "), stdout);
249 emit_ancillary_info (PROGRAM_NAME);
251 exit (status);
254 /* Record a field in LINE, with location FIELD and size LEN. */
256 static void
257 extract_field (struct line *line, char *field, size_t len)
259 if (line->nfields >= line->nfields_allocated)
261 line->fields = X2NREALLOC (line->fields, &line->nfields_allocated);
263 line->fields[line->nfields].beg = field;
264 line->fields[line->nfields].len = len;
265 ++(line->nfields);
268 /* Fill in the 'fields' structure in LINE. */
270 static void
271 xfields (struct line *line)
273 char *ptr = line->buf.buffer;
274 char const *lim = ptr + line->buf.length - 1;
276 if (ptr == lim)
277 return;
279 if (0 <= tab && tab != '\n')
281 char *sep;
282 for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1)
283 extract_field (line, ptr, sep - ptr);
285 else if (tab < 0)
287 /* Skip leading blanks before the first field. */
288 while (field_sep (*ptr))
289 if (++ptr == lim)
290 return;
294 char *sep;
295 for (sep = ptr + 1; sep != lim && ! field_sep (*sep); sep++)
296 continue;
297 extract_field (line, ptr, sep - ptr);
298 if (sep == lim)
299 return;
300 for (ptr = sep + 1; ptr != lim && field_sep (*ptr); ptr++)
301 continue;
303 while (ptr != lim);
306 extract_field (line, ptr, lim - ptr);
309 static void
310 freeline (struct line *line)
312 if (line == NULL)
313 return;
314 free (line->fields);
315 line->fields = NULL;
316 free (line->buf.buffer);
317 line->buf.buffer = NULL;
320 /* Return <0 if the join field in LINE1 compares less than the one in LINE2;
321 >0 if it compares greater; 0 if it compares equal.
322 Report an error and exit if the comparison fails.
323 Use join fields JF_1 and JF_2 respectively. */
325 static int
326 keycmp (struct line const *line1, struct line const *line2,
327 size_t jf_1, size_t jf_2)
329 /* Start of field to compare in each file. */
330 char *beg1;
331 char *beg2;
333 size_t len1;
334 size_t len2; /* Length of fields to compare. */
335 int diff;
337 if (jf_1 < line1->nfields)
339 beg1 = line1->fields[jf_1].beg;
340 len1 = line1->fields[jf_1].len;
342 else
344 beg1 = NULL;
345 len1 = 0;
348 if (jf_2 < line2->nfields)
350 beg2 = line2->fields[jf_2].beg;
351 len2 = line2->fields[jf_2].len;
353 else
355 beg2 = NULL;
356 len2 = 0;
359 if (len1 == 0)
360 return len2 == 0 ? 0 : -1;
361 if (len2 == 0)
362 return 1;
364 if (ignore_case)
366 /* FIXME: ignore_case does not work with NLS (in particular,
367 with multibyte chars). */
368 diff = memcasecmp (beg1, beg2, MIN (len1, len2));
370 else
372 if (hard_LC_COLLATE)
373 return xmemcoll (beg1, len1, beg2, len2);
374 diff = memcmp (beg1, beg2, MIN (len1, len2));
377 if (diff)
378 return diff;
379 return len1 < len2 ? -1 : len1 != len2;
382 /* Check that successive input lines PREV and CURRENT from input file
383 WHATFILE are presented in order, unless the user may be relying on
384 the GNU extension that input lines may be out of order if no input
385 lines are unpairable.
387 If the user specified --nocheck-order, the check is not made.
388 If the user specified --check-order, the problem is fatal.
389 Otherwise (the default), the message is simply a warning.
391 A message is printed at most once per input file. */
393 static void
394 check_order (const struct line *prev,
395 const struct line *current,
396 int whatfile)
398 if (check_input_order != CHECK_ORDER_DISABLED
399 && ((check_input_order == CHECK_ORDER_ENABLED) || seen_unpairable))
401 if (!issued_disorder_warning[whatfile-1])
403 size_t join_field = whatfile == 1 ? join_field_1 : join_field_2;
404 if (keycmp (prev, current, join_field, join_field) > 0)
406 /* Exclude any trailing newline. */
407 size_t len = current->buf.length;
408 if (0 < len && current->buf.buffer[len - 1] == '\n')
409 --len;
411 /* If the offending line is longer than INT_MAX, output
412 only the first INT_MAX bytes in this diagnostic. */
413 len = MIN (INT_MAX, len);
415 error ((check_input_order == CHECK_ORDER_ENABLED
416 ? EXIT_FAILURE : 0),
417 0, _("%s:%"PRIuMAX": is not sorted: %.*s"),
418 g_names[whatfile - 1], line_no[whatfile - 1],
419 (int) len, current->buf.buffer);
421 /* If we get to here, the message was merely a warning.
422 Arrange to issue it only once per file. */
423 issued_disorder_warning[whatfile-1] = true;
429 static inline void
430 reset_line (struct line *line)
432 line->nfields = 0;
435 static struct line *
436 init_linep (struct line **linep)
438 struct line *line = xcalloc (1, sizeof *line);
439 *linep = line;
440 return line;
443 /* Read a line from FP into LINE and split it into fields.
444 Return true if successful. */
446 static bool
447 get_line (FILE *fp, struct line **linep, int which)
449 struct line *line = *linep;
451 if (line == prevline[which - 1])
453 SWAPLINES (line, spareline[which - 1]);
454 *linep = line;
457 if (line)
458 reset_line (line);
459 else
460 line = init_linep (linep);
462 if (! readlinebuffer_delim (&line->buf, fp, eolchar))
464 if (ferror (fp))
465 die (EXIT_FAILURE, errno, _("read error"));
466 freeline (line);
467 return false;
469 ++line_no[which - 1];
471 xfields (line);
473 if (prevline[which - 1])
474 check_order (prevline[which - 1], line, which);
476 prevline[which - 1] = line;
477 return true;
480 static void
481 free_spareline (void)
483 size_t i;
485 for (i = 0; i < ARRAY_CARDINALITY (spareline); i++)
487 if (spareline[i])
489 freeline (spareline[i]);
490 free (spareline[i]);
495 static void
496 initseq (struct seq *seq)
498 seq->count = 0;
499 seq->alloc = 0;
500 seq->lines = NULL;
503 /* Read a line from FP and add it to SEQ. Return true if successful. */
505 static bool
506 getseq (FILE *fp, struct seq *seq, int whichfile)
508 if (seq->count == seq->alloc)
510 size_t i;
511 seq->lines = X2NREALLOC (seq->lines, &seq->alloc);
512 for (i = seq->count; i < seq->alloc; i++)
513 seq->lines[i] = NULL;
516 if (get_line (fp, &seq->lines[seq->count], whichfile))
518 ++seq->count;
519 return true;
521 return false;
524 /* Read a line from FP and add it to SEQ, as the first item if FIRST is
525 true, else as the next. */
526 static bool
527 advance_seq (FILE *fp, struct seq *seq, bool first, int whichfile)
529 if (first)
530 seq->count = 0;
532 return getseq (fp, seq, whichfile);
535 static void
536 delseq (struct seq *seq)
538 size_t i;
539 for (i = 0; i < seq->alloc; i++)
541 freeline (seq->lines[i]);
542 free (seq->lines[i]);
544 free (seq->lines);
548 /* Print field N of LINE if it exists and is nonempty, otherwise
549 'empty_filler' if it is nonempty. */
551 static void
552 prfield (size_t n, struct line const *line)
554 size_t len;
556 if (n < line->nfields)
558 len = line->fields[n].len;
559 if (len)
560 fwrite (line->fields[n].beg, 1, len, stdout);
561 else if (empty_filler)
562 fputs (empty_filler, stdout);
564 else if (empty_filler)
565 fputs (empty_filler, stdout);
568 /* Output all the fields in line, other than the join field. */
570 static void
571 prfields (struct line const *line, size_t join_field, size_t autocount)
573 size_t i;
574 size_t nfields = autoformat ? autocount : line->nfields;
575 char output_separator = tab < 0 ? ' ' : tab;
577 for (i = 0; i < join_field && i < nfields; ++i)
579 putchar (output_separator);
580 prfield (i, line);
582 for (i = join_field + 1; i < nfields; ++i)
584 putchar (output_separator);
585 prfield (i, line);
589 /* Print the join of LINE1 and LINE2. */
591 static void
592 prjoin (struct line const *line1, struct line const *line2)
594 const struct outlist *outlist;
595 char output_separator = tab < 0 ? ' ' : tab;
596 size_t field;
597 struct line const *line;
599 outlist = outlist_head.next;
600 if (outlist)
602 const struct outlist *o;
604 o = outlist;
605 while (1)
607 if (o->file == 0)
609 if (line1 == &uni_blank)
611 line = line2;
612 field = join_field_2;
614 else
616 line = line1;
617 field = join_field_1;
620 else
622 line = (o->file == 1 ? line1 : line2);
623 field = o->field;
625 prfield (field, line);
626 o = o->next;
627 if (o == NULL)
628 break;
629 putchar (output_separator);
631 putchar (eolchar);
633 else
635 if (line1 == &uni_blank)
637 line = line2;
638 field = join_field_2;
640 else
642 line = line1;
643 field = join_field_1;
646 /* Output the join field. */
647 prfield (field, line);
649 /* Output other fields. */
650 prfields (line1, join_field_1, autocount_1);
651 prfields (line2, join_field_2, autocount_2);
653 putchar (eolchar);
657 /* Print the join of the files in FP1 and FP2. */
659 static void
660 join (FILE *fp1, FILE *fp2)
662 struct seq seq1, seq2;
663 int diff;
664 bool eof1, eof2;
666 fadvise (fp1, FADVISE_SEQUENTIAL);
667 fadvise (fp2, FADVISE_SEQUENTIAL);
669 /* Read the first line of each file. */
670 initseq (&seq1);
671 getseq (fp1, &seq1, 1);
672 initseq (&seq2);
673 getseq (fp2, &seq2, 2);
675 if (autoformat)
677 autocount_1 = seq1.count ? seq1.lines[0]->nfields : 0;
678 autocount_2 = seq2.count ? seq2.lines[0]->nfields : 0;
681 if (join_header_lines && (seq1.count || seq2.count))
683 struct line const *hline1 = seq1.count ? seq1.lines[0] : &uni_blank;
684 struct line const *hline2 = seq2.count ? seq2.lines[0] : &uni_blank;
685 prjoin (hline1, hline2);
686 prevline[0] = NULL;
687 prevline[1] = NULL;
688 if (seq1.count)
689 advance_seq (fp1, &seq1, true, 1);
690 if (seq2.count)
691 advance_seq (fp2, &seq2, true, 2);
694 while (seq1.count && seq2.count)
696 size_t i;
697 diff = keycmp (seq1.lines[0], seq2.lines[0],
698 join_field_1, join_field_2);
699 if (diff < 0)
701 if (print_unpairables_1)
702 prjoin (seq1.lines[0], &uni_blank);
703 advance_seq (fp1, &seq1, true, 1);
704 seen_unpairable = true;
705 continue;
707 if (diff > 0)
709 if (print_unpairables_2)
710 prjoin (&uni_blank, seq2.lines[0]);
711 advance_seq (fp2, &seq2, true, 2);
712 seen_unpairable = true;
713 continue;
716 /* Keep reading lines from file1 as long as they continue to
717 match the current line from file2. */
718 eof1 = false;
720 if (!advance_seq (fp1, &seq1, false, 1))
722 eof1 = true;
723 ++seq1.count;
724 break;
726 while (!keycmp (seq1.lines[seq1.count - 1], seq2.lines[0],
727 join_field_1, join_field_2));
729 /* Keep reading lines from file2 as long as they continue to
730 match the current line from file1. */
731 eof2 = false;
733 if (!advance_seq (fp2, &seq2, false, 2))
735 eof2 = true;
736 ++seq2.count;
737 break;
739 while (!keycmp (seq1.lines[0], seq2.lines[seq2.count - 1],
740 join_field_1, join_field_2));
742 if (print_pairables)
744 for (i = 0; i < seq1.count - 1; ++i)
746 size_t j;
747 for (j = 0; j < seq2.count - 1; ++j)
748 prjoin (seq1.lines[i], seq2.lines[j]);
752 if (!eof1)
754 SWAPLINES (seq1.lines[0], seq1.lines[seq1.count - 1]);
755 seq1.count = 1;
757 else
758 seq1.count = 0;
760 if (!eof2)
762 SWAPLINES (seq2.lines[0], seq2.lines[seq2.count - 1]);
763 seq2.count = 1;
765 else
766 seq2.count = 0;
769 /* If the user did not specify --nocheck-order, then we read the
770 tail ends of both inputs to verify that they are in order. We
771 skip the rest of the tail once we have issued a warning for that
772 file, unless we actually need to print the unpairable lines. */
773 struct line *line = NULL;
774 bool checktail = false;
776 if (check_input_order != CHECK_ORDER_DISABLED
777 && !(issued_disorder_warning[0] && issued_disorder_warning[1]))
778 checktail = true;
780 if ((print_unpairables_1 || checktail) && seq1.count)
782 if (print_unpairables_1)
783 prjoin (seq1.lines[0], &uni_blank);
784 if (seq2.count)
785 seen_unpairable = true;
786 while (get_line (fp1, &line, 1))
788 if (print_unpairables_1)
789 prjoin (line, &uni_blank);
790 if (issued_disorder_warning[0] && !print_unpairables_1)
791 break;
795 if ((print_unpairables_2 || checktail) && seq2.count)
797 if (print_unpairables_2)
798 prjoin (&uni_blank, seq2.lines[0]);
799 if (seq1.count)
800 seen_unpairable = true;
801 while (get_line (fp2, &line, 2))
803 if (print_unpairables_2)
804 prjoin (&uni_blank, line);
805 if (issued_disorder_warning[1] && !print_unpairables_2)
806 break;
810 freeline (line);
811 free (line);
813 delseq (&seq1);
814 delseq (&seq2);
817 /* Add a field spec for field FIELD of file FILE to 'outlist'. */
819 static void
820 add_field (int file, size_t field)
822 struct outlist *o;
824 assert (file == 0 || file == 1 || file == 2);
825 assert (file != 0 || field == 0);
827 o = xmalloc (sizeof *o);
828 o->file = file;
829 o->field = field;
830 o->next = NULL;
832 /* Add to the end of the list so the fields are in the right order. */
833 outlist_end->next = o;
834 outlist_end = o;
837 /* Convert a string of decimal digits, STR (the 1-based join field number),
838 to an integral value. Upon successful conversion, return one less
839 (the zero-based field number). Silently convert too-large values
840 to SIZE_MAX - 1. Otherwise, if a value cannot be converted, give a
841 diagnostic and exit. */
843 static size_t
844 string_to_join_field (char const *str)
846 size_t result;
847 unsigned long int val;
848 verify (SIZE_MAX <= ULONG_MAX);
850 strtol_error s_err = xstrtoul (str, NULL, 10, &val, "");
851 if (s_err == LONGINT_OVERFLOW || (s_err == LONGINT_OK && SIZE_MAX < val))
852 val = SIZE_MAX;
853 else if (s_err != LONGINT_OK || val == 0)
854 die (EXIT_FAILURE, 0, _("invalid field number: %s"), quote (str));
856 result = val - 1;
858 return result;
861 /* Convert a single field specifier string, S, to a *FILE_INDEX, *FIELD_INDEX
862 pair. In S, the field index string is 1-based; *FIELD_INDEX is zero-based.
863 If S is valid, return true. Otherwise, give a diagnostic and exit. */
865 static void
866 decode_field_spec (const char *s, int *file_index, size_t *field_index)
868 /* The first character must be 0, 1, or 2. */
869 switch (s[0])
871 case '0':
872 if (s[1])
874 /* '0' must be all alone -- no '.FIELD'. */
875 die (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s));
877 *file_index = 0;
878 *field_index = 0;
879 break;
881 case '1':
882 case '2':
883 if (s[1] != '.')
884 die (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s));
885 *file_index = s[0] - '0';
886 *field_index = string_to_join_field (s + 2);
887 break;
889 default:
890 die (EXIT_FAILURE, 0,
891 _("invalid file number in field spec: %s"), quote (s));
893 /* Tell gcc -W -Wall that we can't get beyond this point.
894 This avoids a warning (otherwise legit) that the caller's copies
895 of *file_index and *field_index might be used uninitialized. */
896 abort ();
898 break;
902 /* Add the comma or blank separated field spec(s) in STR to 'outlist'. */
904 static void
905 add_field_list (char *str)
907 char *p = str;
911 int file_index;
912 size_t field_index;
913 char const *spec_item = p;
915 p = strpbrk (p, ", \t");
916 if (p)
917 *p++ = '\0';
918 decode_field_spec (spec_item, &file_index, &field_index);
919 add_field (file_index, field_index);
921 while (p);
924 /* Set the join field *VAR to VAL, but report an error if *VAR is set
925 more than once to incompatible values. */
927 static void
928 set_join_field (size_t *var, size_t val)
930 if (*var != SIZE_MAX && *var != val)
932 unsigned long int var1 = *var + 1;
933 unsigned long int val1 = val + 1;
934 die (EXIT_FAILURE, 0,
935 _("incompatible join fields %lu, %lu"), var1, val1);
937 *var = val;
940 /* Status of command-line arguments. */
942 enum operand_status
944 /* This argument must be an operand, i.e., one of the files to be
945 joined. */
946 MUST_BE_OPERAND,
948 /* This might be the argument of the preceding -j1 or -j2 option,
949 or it might be an operand. */
950 MIGHT_BE_J1_ARG,
951 MIGHT_BE_J2_ARG,
953 /* This might be the argument of the preceding -o option, or it might be
954 an operand. */
955 MIGHT_BE_O_ARG
958 /* Add NAME to the array of input file NAMES with operand statuses
959 OPERAND_STATUS; currently there are NFILES names in the list. */
961 static void
962 add_file_name (char *name, char *names[2],
963 int operand_status[2], int joption_count[2], int *nfiles,
964 int *prev_optc_status, int *optc_status)
966 int n = *nfiles;
968 if (n == 2)
970 bool op0 = (operand_status[0] == MUST_BE_OPERAND);
971 char *arg = names[op0];
972 switch (operand_status[op0])
974 case MUST_BE_OPERAND:
975 error (0, 0, _("extra operand %s"), quoteaf (name));
976 usage (EXIT_FAILURE);
978 case MIGHT_BE_J1_ARG:
979 joption_count[0]--;
980 set_join_field (&join_field_1, string_to_join_field (arg));
981 break;
983 case MIGHT_BE_J2_ARG:
984 joption_count[1]--;
985 set_join_field (&join_field_2, string_to_join_field (arg));
986 break;
988 case MIGHT_BE_O_ARG:
989 add_field_list (arg);
990 break;
992 if (!op0)
994 operand_status[0] = operand_status[1];
995 names[0] = names[1];
997 n = 1;
1000 operand_status[n] = *prev_optc_status;
1001 names[n] = name;
1002 *nfiles = n + 1;
1003 if (*prev_optc_status == MIGHT_BE_O_ARG)
1004 *optc_status = MIGHT_BE_O_ARG;
1008 main (int argc, char **argv)
1010 int optc_status;
1011 int prev_optc_status = MUST_BE_OPERAND;
1012 int operand_status[2];
1013 int joption_count[2] = { 0, 0 };
1014 FILE *fp1, *fp2;
1015 int optc;
1016 int nfiles = 0;
1017 int i;
1019 initialize_main (&argc, &argv);
1020 set_program_name (argv[0]);
1021 setlocale (LC_ALL, "");
1022 bindtextdomain (PACKAGE, LOCALEDIR);
1023 textdomain (PACKAGE);
1024 hard_LC_COLLATE = hard_locale (LC_COLLATE);
1026 atexit (close_stdout);
1027 atexit (free_spareline);
1029 print_pairables = true;
1030 seen_unpairable = false;
1031 issued_disorder_warning[0] = issued_disorder_warning[1] = false;
1032 check_input_order = CHECK_ORDER_DEFAULT;
1034 while ((optc = getopt_long (argc, argv, "-a:e:i1:2:j:o:t:v:z",
1035 longopts, NULL))
1036 != -1)
1038 optc_status = MUST_BE_OPERAND;
1040 switch (optc)
1042 case 'v':
1043 print_pairables = false;
1044 /* Fall through. */
1046 case 'a':
1048 unsigned long int val;
1049 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
1050 || (val != 1 && val != 2))
1051 die (EXIT_FAILURE, 0,
1052 _("invalid field number: %s"), quote (optarg));
1053 if (val == 1)
1054 print_unpairables_1 = true;
1055 else
1056 print_unpairables_2 = true;
1058 break;
1060 case 'e':
1061 if (empty_filler && ! STREQ (empty_filler, optarg))
1062 die (EXIT_FAILURE, 0,
1063 _("conflicting empty-field replacement strings"));
1064 empty_filler = optarg;
1065 break;
1067 case 'i':
1068 ignore_case = true;
1069 break;
1071 case '1':
1072 set_join_field (&join_field_1, string_to_join_field (optarg));
1073 break;
1075 case '2':
1076 set_join_field (&join_field_2, string_to_join_field (optarg));
1077 break;
1079 case 'j':
1080 if ((optarg[0] == '1' || optarg[0] == '2') && !optarg[1]
1081 && optarg == argv[optind - 1] + 2)
1083 /* The argument was either "-j1" or "-j2". */
1084 bool is_j2 = (optarg[0] == '2');
1085 joption_count[is_j2]++;
1086 optc_status = MIGHT_BE_J1_ARG + is_j2;
1088 else
1090 set_join_field (&join_field_1, string_to_join_field (optarg));
1091 set_join_field (&join_field_2, join_field_1);
1093 break;
1095 case 'o':
1096 if (STREQ (optarg, "auto"))
1097 autoformat = true;
1098 else
1100 add_field_list (optarg);
1101 optc_status = MIGHT_BE_O_ARG;
1103 break;
1105 case 't':
1107 unsigned char newtab = optarg[0];
1108 if (! newtab)
1109 newtab = '\n'; /* '' => process the whole line. */
1110 else if (optarg[1])
1112 if (STREQ (optarg, "\\0"))
1113 newtab = '\0';
1114 else
1115 die (EXIT_FAILURE, 0, _("multi-character tab %s"),
1116 quote (optarg));
1118 if (0 <= tab && tab != newtab)
1119 die (EXIT_FAILURE, 0, _("incompatible tabs"));
1120 tab = newtab;
1122 break;
1124 case 'z':
1125 eolchar = 0;
1126 break;
1128 case NOCHECK_ORDER_OPTION:
1129 check_input_order = CHECK_ORDER_DISABLED;
1130 break;
1132 case CHECK_ORDER_OPTION:
1133 check_input_order = CHECK_ORDER_ENABLED;
1134 break;
1136 case 1: /* Non-option argument. */
1137 add_file_name (optarg, g_names, operand_status, joption_count,
1138 &nfiles, &prev_optc_status, &optc_status);
1139 break;
1141 case HEADER_LINE_OPTION:
1142 join_header_lines = true;
1143 break;
1145 case_GETOPT_HELP_CHAR;
1147 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1149 default:
1150 usage (EXIT_FAILURE);
1153 prev_optc_status = optc_status;
1156 /* Process any operands after "--". */
1157 prev_optc_status = MUST_BE_OPERAND;
1158 while (optind < argc)
1159 add_file_name (argv[optind++], g_names, operand_status, joption_count,
1160 &nfiles, &prev_optc_status, &optc_status);
1162 if (nfiles != 2)
1164 if (nfiles == 0)
1165 error (0, 0, _("missing operand"));
1166 else
1167 error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
1168 usage (EXIT_FAILURE);
1171 /* If "-j1" was specified and it turns out not to have had an argument,
1172 treat it as "-j 1". Likewise for -j2. */
1173 for (i = 0; i < 2; i++)
1174 if (joption_count[i] != 0)
1176 set_join_field (&join_field_1, i);
1177 set_join_field (&join_field_2, i);
1180 if (join_field_1 == SIZE_MAX)
1181 join_field_1 = 0;
1182 if (join_field_2 == SIZE_MAX)
1183 join_field_2 = 0;
1185 fp1 = STREQ (g_names[0], "-") ? stdin : fopen (g_names[0], "r");
1186 if (!fp1)
1187 die (EXIT_FAILURE, errno, "%s", quotef (g_names[0]));
1188 fp2 = STREQ (g_names[1], "-") ? stdin : fopen (g_names[1], "r");
1189 if (!fp2)
1190 die (EXIT_FAILURE, errno, "%s", quotef (g_names[1]));
1191 if (fp1 == fp2)
1192 die (EXIT_FAILURE, errno, _("both files cannot be standard input"));
1193 join (fp1, fp2);
1195 if (fclose (fp1) != 0)
1196 die (EXIT_FAILURE, errno, "%s", quotef (g_names[0]));
1197 if (fclose (fp2) != 0)
1198 die (EXIT_FAILURE, errno, "%s", quotef (g_names[1]));
1200 if (issued_disorder_warning[0] || issued_disorder_warning[1])
1201 return EXIT_FAILURE;
1202 else
1203 return EXIT_SUCCESS;