tests: remove crufty test=test_name code from old tests
[coreutils/ericb.git] / src / join.c
blob9404aa30369899d60c20669078df4451f171b3fb
1 /* join - join lines of two files on a common field
2 Copyright (C) 1991, 1995-2006, 2008-2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
17 Written by Mike Haertel, mike@gnu.ai.mit.edu. */
19 #include <config.h>
21 #include <assert.h>
22 #include <sys/types.h>
23 #include <getopt.h>
25 #include "system.h"
26 #include "error.h"
27 #include "fadvise.h"
28 #include "hard-locale.h"
29 #include "linebuffer.h"
30 #include "memcasecmp.h"
31 #include "quote.h"
32 #include "stdio--.h"
33 #include "xmemcoll.h"
34 #include "xstrtol.h"
35 #include "argmatch.h"
37 /* The official name of this program (e.g., no 'g' prefix). */
38 #define PROGRAM_NAME "join"
40 #define AUTHORS proper_name ("Mike Haertel")
42 #define join system_join
44 #define SWAPLINES(a, b) do { \
45 struct line *tmp = a; \
46 a = b; \
47 b = tmp; \
48 } while (0);
50 /* An element of the list identifying which fields to print for each
51 output line. */
52 struct outlist
54 /* File number: 0, 1, or 2. 0 means use the join field.
55 1 means use the first file argument, 2 the second. */
56 int file;
58 /* Field index (zero-based), specified only when FILE is 1 or 2. */
59 size_t field;
61 struct outlist *next;
64 /* A field of a line. */
65 struct field
67 char *beg; /* First character in field. */
68 size_t len; /* The length of the field. */
71 /* A line read from an input file. */
72 struct line
74 struct linebuffer buf; /* The line itself. */
75 size_t nfields; /* Number of elements in 'fields'. */
76 size_t nfields_allocated; /* Number of elements allocated for 'fields'. */
77 struct field *fields;
80 /* One or more consecutive lines read from a file that all have the
81 same join field value. */
82 struct seq
84 size_t count; /* Elements used in 'lines'. */
85 size_t alloc; /* Elements allocated in 'lines'. */
86 struct line **lines;
89 /* The previous line read from each file. */
90 static struct line *prevline[2] = {NULL, NULL};
92 /* The number of lines read from each file. */
93 static uintmax_t line_no[2] = {0, 0};
95 /* The input file names. */
96 static char *g_names[2];
98 /* This provides an extra line buffer for each file. We need these if we
99 try to read two consecutive lines into the same buffer, since we don't
100 want to overwrite the previous buffer before we check order. */
101 static struct line *spareline[2] = {NULL, NULL};
103 /* True if the LC_COLLATE locale is hard. */
104 static bool hard_LC_COLLATE;
106 /* If nonzero, print unpairable lines in file 1 or 2. */
107 static bool print_unpairables_1, print_unpairables_2;
109 /* If nonzero, print pairable lines. */
110 static bool print_pairables;
112 /* If nonzero, we have seen at least one unpairable line. */
113 static bool seen_unpairable;
115 /* If nonzero, we have warned about disorder in that file. */
116 static bool issued_disorder_warning[2];
118 /* Empty output field filler. */
119 static char const *empty_filler;
121 /* Whether to ensure the same number of fields are output from each line. */
122 static bool autoformat;
123 /* The number of fields to output for each line.
124 Only significant when autoformat is true. */
125 static size_t autocount_1;
126 static size_t autocount_2;
128 /* Field to join on; SIZE_MAX means they haven't been determined yet. */
129 static size_t join_field_1 = SIZE_MAX;
130 static size_t join_field_2 = SIZE_MAX;
132 /* List of fields to print. */
133 static struct outlist outlist_head;
135 /* Last element in 'outlist', where a new element can be added. */
136 static struct outlist *outlist_end = &outlist_head;
138 /* Tab character separating fields. If negative, fields are separated
139 by any nonempty string of blanks, otherwise by exactly one
140 tab character whose value (when cast to unsigned char) equals TAB. */
141 static int tab = -1;
143 /* If nonzero, check that the input is correctly ordered. */
144 static enum
146 CHECK_ORDER_DEFAULT,
147 CHECK_ORDER_ENABLED,
148 CHECK_ORDER_DISABLED
149 } check_input_order;
151 enum
153 CHECK_ORDER_OPTION = CHAR_MAX + 1,
154 NOCHECK_ORDER_OPTION,
155 HEADER_LINE_OPTION
159 static struct option const longopts[] =
161 {"ignore-case", no_argument, NULL, 'i'},
162 {"check-order", no_argument, NULL, CHECK_ORDER_OPTION},
163 {"nocheck-order", no_argument, NULL, NOCHECK_ORDER_OPTION},
164 {"header", no_argument, NULL, HEADER_LINE_OPTION},
165 {GETOPT_HELP_OPTION_DECL},
166 {GETOPT_VERSION_OPTION_DECL},
167 {NULL, 0, NULL, 0}
170 /* Used to print non-joining lines */
171 static struct line uni_blank;
173 /* If nonzero, ignore case when comparing join fields. */
174 static bool ignore_case;
176 /* If nonzero, treat the first line of each file as column headers -
177 join them without checking for ordering */
178 static bool join_header_lines;
180 void
181 usage (int status)
183 if (status != EXIT_SUCCESS)
184 emit_try_help ();
185 else
187 printf (_("\
188 Usage: %s [OPTION]... FILE1 FILE2\n\
190 program_name);
191 fputs (_("\
192 For each pair of input lines with identical join fields, write a line to\n\
193 standard output. The default join field is the first, delimited\n\
194 by whitespace. When FILE1 or FILE2 (not both) is -, read standard input.\n\
196 -a FILENUM also print unpairable lines from file FILENUM, where\n\
197 FILENUM is 1 or 2, corresponding to FILE1 or FILE2\n\
198 -e EMPTY replace missing input fields with EMPTY\n\
199 "), stdout);
200 fputs (_("\
201 -i, --ignore-case ignore differences in case when comparing fields\n\
202 -j FIELD equivalent to '-1 FIELD -2 FIELD'\n\
203 -o FORMAT obey FORMAT while constructing output line\n\
204 -t CHAR use CHAR as input and output field separator\n\
205 "), stdout);
206 fputs (_("\
207 -v FILENUM like -a FILENUM, but suppress joined output lines\n\
208 -1 FIELD join on this FIELD of file 1\n\
209 -2 FIELD join on this FIELD of file 2\n\
210 --check-order check that the input is correctly sorted, even\n\
211 if all input lines are pairable\n\
212 --nocheck-order do not check that the input is correctly sorted\n\
213 --header treat the first line in each file as field headers,\n\
214 print them without trying to pair them\n\
215 "), stdout);
216 fputs (HELP_OPTION_DESCRIPTION, stdout);
217 fputs (VERSION_OPTION_DESCRIPTION, stdout);
218 fputs (_("\
220 Unless -t CHAR is given, leading blanks separate fields and are ignored,\n\
221 else fields are separated by CHAR. Any FIELD is a field number counted\n\
222 from 1. FORMAT is one or more comma or blank separated specifications,\n\
223 each being 'FILENUM.FIELD' or '0'. Default FORMAT outputs the join field,\n\
224 the remaining fields from FILE1, the remaining fields from FILE2, all\n\
225 separated by CHAR. If FORMAT is the keyword 'auto', then the first\n\
226 line of each file determines the number of fields output for each line.\n\
228 Important: FILE1 and FILE2 must be sorted on the join fields.\n\
229 E.g., use \"sort -k 1b,1\" if 'join' has no options,\n\
230 or use \"join -t ''\" if 'sort' has no options.\n\
231 Note, comparisons honor the rules specified by 'LC_COLLATE'.\n\
232 If the input is not sorted and some lines cannot be joined, a\n\
233 warning message will be given.\n\
234 "), stdout);
235 emit_ancillary_info ();
237 exit (status);
240 /* Record a field in LINE, with location FIELD and size LEN. */
242 static void
243 extract_field (struct line *line, char *field, size_t len)
245 if (line->nfields >= line->nfields_allocated)
247 line->fields = X2NREALLOC (line->fields, &line->nfields_allocated);
249 line->fields[line->nfields].beg = field;
250 line->fields[line->nfields].len = len;
251 ++(line->nfields);
254 /* Fill in the 'fields' structure in LINE. */
256 static void
257 xfields (struct line *line)
259 char *ptr = line->buf.buffer;
260 char const *lim = ptr + line->buf.length - 1;
262 if (ptr == lim)
263 return;
265 if (0 <= tab && tab != '\n')
267 char *sep;
268 for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1)
269 extract_field (line, ptr, sep - ptr);
271 else if (tab < 0)
273 /* Skip leading blanks before the first field. */
274 while (isblank (to_uchar (*ptr)))
275 if (++ptr == lim)
276 return;
280 char *sep;
281 for (sep = ptr + 1; sep != lim && ! isblank (to_uchar (*sep)); sep++)
282 continue;
283 extract_field (line, ptr, sep - ptr);
284 if (sep == lim)
285 return;
286 for (ptr = sep + 1; ptr != lim && isblank (to_uchar (*ptr)); ptr++)
287 continue;
289 while (ptr != lim);
292 extract_field (line, ptr, lim - ptr);
295 static void
296 freeline (struct line *line)
298 if (line == NULL)
299 return;
300 free (line->fields);
301 line->fields = NULL;
302 free (line->buf.buffer);
303 line->buf.buffer = NULL;
306 /* Return <0 if the join field in LINE1 compares less than the one in LINE2;
307 >0 if it compares greater; 0 if it compares equal.
308 Report an error and exit if the comparison fails.
309 Use join fields JF_1 and JF_2 respectively. */
311 static int
312 keycmp (struct line const *line1, struct line const *line2,
313 size_t jf_1, size_t jf_2)
315 /* Start of field to compare in each file. */
316 char *beg1;
317 char *beg2;
319 size_t len1;
320 size_t len2; /* Length of fields to compare. */
321 int diff;
323 if (jf_1 < line1->nfields)
325 beg1 = line1->fields[jf_1].beg;
326 len1 = line1->fields[jf_1].len;
328 else
330 beg1 = NULL;
331 len1 = 0;
334 if (jf_2 < line2->nfields)
336 beg2 = line2->fields[jf_2].beg;
337 len2 = line2->fields[jf_2].len;
339 else
341 beg2 = NULL;
342 len2 = 0;
345 if (len1 == 0)
346 return len2 == 0 ? 0 : -1;
347 if (len2 == 0)
348 return 1;
350 if (ignore_case)
352 /* FIXME: ignore_case does not work with NLS (in particular,
353 with multibyte chars). */
354 diff = memcasecmp (beg1, beg2, MIN (len1, len2));
356 else
358 if (hard_LC_COLLATE)
359 return xmemcoll (beg1, len1, beg2, len2);
360 diff = memcmp (beg1, beg2, MIN (len1, len2));
363 if (diff)
364 return diff;
365 return len1 < len2 ? -1 : len1 != len2;
368 /* Check that successive input lines PREV and CURRENT from input file
369 WHATFILE are presented in order, unless the user may be relying on
370 the GNU extension that input lines may be out of order if no input
371 lines are unpairable.
373 If the user specified --nocheck-order, the check is not made.
374 If the user specified --check-order, the problem is fatal.
375 Otherwise (the default), the message is simply a warning.
377 A message is printed at most once per input file. */
379 static void
380 check_order (const struct line *prev,
381 const struct line *current,
382 int whatfile)
384 if (check_input_order != CHECK_ORDER_DISABLED
385 && ((check_input_order == CHECK_ORDER_ENABLED) || seen_unpairable))
387 if (!issued_disorder_warning[whatfile-1])
389 size_t join_field = whatfile == 1 ? join_field_1 : join_field_2;
390 if (keycmp (prev, current, join_field, join_field) > 0)
392 /* Exclude any trailing newline. */
393 size_t len = current->buf.length;
394 if (0 < len && current->buf.buffer[len - 1] == '\n')
395 --len;
397 /* If the offending line is longer than INT_MAX, output
398 only the first INT_MAX bytes in this diagnostic. */
399 len = MIN (INT_MAX, len);
401 error ((check_input_order == CHECK_ORDER_ENABLED
402 ? EXIT_FAILURE : 0),
403 0, _("%s:%ju: is not sorted: %.*s"),
404 g_names[whatfile - 1], line_no[whatfile - 1],
405 (int) len, current->buf.buffer);
407 /* If we get to here, the message was merely a warning.
408 Arrange to issue it only once per file. */
409 issued_disorder_warning[whatfile-1] = true;
415 static inline void
416 reset_line (struct line *line)
418 line->nfields = 0;
421 static struct line *
422 init_linep (struct line **linep)
424 struct line *line = xcalloc (1, sizeof *line);
425 *linep = line;
426 return line;
429 /* Read a line from FP into LINE and split it into fields.
430 Return true if successful. */
432 static bool
433 get_line (FILE *fp, struct line **linep, int which)
435 struct line *line = *linep;
437 if (line == prevline[which - 1])
439 SWAPLINES (line, spareline[which - 1]);
440 *linep = line;
443 if (line)
444 reset_line (line);
445 else
446 line = init_linep (linep);
448 if (! readlinebuffer (&line->buf, fp))
450 if (ferror (fp))
451 error (EXIT_FAILURE, errno, _("read error"));
452 freeline (line);
453 return false;
455 ++line_no[which - 1];
457 xfields (line);
459 if (prevline[which - 1])
460 check_order (prevline[which - 1], line, which);
462 prevline[which - 1] = line;
463 return true;
466 static void
467 free_spareline (void)
469 size_t i;
471 for (i = 0; i < ARRAY_CARDINALITY (spareline); i++)
473 if (spareline[i])
475 freeline (spareline[i]);
476 free (spareline[i]);
481 static void
482 initseq (struct seq *seq)
484 seq->count = 0;
485 seq->alloc = 0;
486 seq->lines = NULL;
489 /* Read a line from FP and add it to SEQ. Return true if successful. */
491 static bool
492 getseq (FILE *fp, struct seq *seq, int whichfile)
494 if (seq->count == seq->alloc)
496 size_t i;
497 seq->lines = X2NREALLOC (seq->lines, &seq->alloc);
498 for (i = seq->count; i < seq->alloc; i++)
499 seq->lines[i] = NULL;
502 if (get_line (fp, &seq->lines[seq->count], whichfile))
504 ++seq->count;
505 return true;
507 return false;
510 /* Read a line from FP and add it to SEQ, as the first item if FIRST is
511 true, else as the next. */
512 static bool
513 advance_seq (FILE *fp, struct seq *seq, bool first, int whichfile)
515 if (first)
516 seq->count = 0;
518 return getseq (fp, seq, whichfile);
521 static void
522 delseq (struct seq *seq)
524 size_t i;
525 for (i = 0; i < seq->alloc; i++)
527 freeline (seq->lines[i]);
528 free (seq->lines[i]);
530 free (seq->lines);
534 /* Print field N of LINE if it exists and is nonempty, otherwise
535 'empty_filler' if it is nonempty. */
537 static void
538 prfield (size_t n, struct line const *line)
540 size_t len;
542 if (n < line->nfields)
544 len = line->fields[n].len;
545 if (len)
546 fwrite (line->fields[n].beg, 1, len, stdout);
547 else if (empty_filler)
548 fputs (empty_filler, stdout);
550 else if (empty_filler)
551 fputs (empty_filler, stdout);
554 /* Output all the fields in line, other than the join field. */
556 static void
557 prfields (struct line const *line, size_t join_field, size_t autocount)
559 size_t i;
560 size_t nfields = autoformat ? autocount : line->nfields;
561 char output_separator = tab < 0 ? ' ' : tab;
563 for (i = 0; i < join_field && i < nfields; ++i)
565 putchar (output_separator);
566 prfield (i, line);
568 for (i = join_field + 1; i < nfields; ++i)
570 putchar (output_separator);
571 prfield (i, line);
575 /* Print the join of LINE1 and LINE2. */
577 static void
578 prjoin (struct line const *line1, struct line const *line2)
580 const struct outlist *outlist;
581 char output_separator = tab < 0 ? ' ' : tab;
582 size_t field;
583 struct line const *line;
585 outlist = outlist_head.next;
586 if (outlist)
588 const struct outlist *o;
590 o = outlist;
591 while (1)
593 if (o->file == 0)
595 if (line1 == &uni_blank)
597 line = line2;
598 field = join_field_2;
600 else
602 line = line1;
603 field = join_field_1;
606 else
608 line = (o->file == 1 ? line1 : line2);
609 field = o->field;
611 prfield (field, line);
612 o = o->next;
613 if (o == NULL)
614 break;
615 putchar (output_separator);
617 putchar ('\n');
619 else
621 if (line1 == &uni_blank)
623 line = line2;
624 field = join_field_2;
626 else
628 line = line1;
629 field = join_field_1;
632 /* Output the join field. */
633 prfield (field, line);
635 /* Output other fields. */
636 prfields (line1, join_field_1, autocount_1);
637 prfields (line2, join_field_2, autocount_2);
639 putchar ('\n');
643 /* Print the join of the files in FP1 and FP2. */
645 static void
646 join (FILE *fp1, FILE *fp2)
648 struct seq seq1, seq2;
649 int diff;
650 bool eof1, eof2;
652 fadvise (fp1, FADVISE_SEQUENTIAL);
653 fadvise (fp2, FADVISE_SEQUENTIAL);
655 /* Read the first line of each file. */
656 initseq (&seq1);
657 getseq (fp1, &seq1, 1);
658 initseq (&seq2);
659 getseq (fp2, &seq2, 2);
661 if (autoformat)
663 autocount_1 = seq1.count ? seq1.lines[0]->nfields : 0;
664 autocount_2 = seq2.count ? seq2.lines[0]->nfields : 0;
667 if (join_header_lines && (seq1.count || seq2.count))
669 struct line const *hline1 = seq1.count ? seq1.lines[0] : &uni_blank;
670 struct line const *hline2 = seq2.count ? seq2.lines[0] : &uni_blank;
671 prjoin (hline1, hline2);
672 prevline[0] = NULL;
673 prevline[1] = NULL;
674 if (seq1.count)
675 advance_seq (fp1, &seq1, true, 1);
676 if (seq2.count)
677 advance_seq (fp2, &seq2, true, 2);
680 while (seq1.count && seq2.count)
682 size_t i;
683 diff = keycmp (seq1.lines[0], seq2.lines[0],
684 join_field_1, join_field_2);
685 if (diff < 0)
687 if (print_unpairables_1)
688 prjoin (seq1.lines[0], &uni_blank);
689 advance_seq (fp1, &seq1, true, 1);
690 seen_unpairable = true;
691 continue;
693 if (diff > 0)
695 if (print_unpairables_2)
696 prjoin (&uni_blank, seq2.lines[0]);
697 advance_seq (fp2, &seq2, true, 2);
698 seen_unpairable = true;
699 continue;
702 /* Keep reading lines from file1 as long as they continue to
703 match the current line from file2. */
704 eof1 = false;
706 if (!advance_seq (fp1, &seq1, false, 1))
708 eof1 = true;
709 ++seq1.count;
710 break;
712 while (!keycmp (seq1.lines[seq1.count - 1], seq2.lines[0],
713 join_field_1, join_field_2));
715 /* Keep reading lines from file2 as long as they continue to
716 match the current line from file1. */
717 eof2 = false;
719 if (!advance_seq (fp2, &seq2, false, 2))
721 eof2 = true;
722 ++seq2.count;
723 break;
725 while (!keycmp (seq1.lines[0], seq2.lines[seq2.count - 1],
726 join_field_1, join_field_2));
728 if (print_pairables)
730 for (i = 0; i < seq1.count - 1; ++i)
732 size_t j;
733 for (j = 0; j < seq2.count - 1; ++j)
734 prjoin (seq1.lines[i], seq2.lines[j]);
738 if (!eof1)
740 SWAPLINES (seq1.lines[0], seq1.lines[seq1.count - 1]);
741 seq1.count = 1;
743 else
744 seq1.count = 0;
746 if (!eof2)
748 SWAPLINES (seq2.lines[0], seq2.lines[seq2.count - 1]);
749 seq2.count = 1;
751 else
752 seq2.count = 0;
755 /* If the user did not specify --nocheck-order, then we read the
756 tail ends of both inputs to verify that they are in order. We
757 skip the rest of the tail once we have issued a warning for that
758 file, unless we actually need to print the unpairable lines. */
759 struct line *line = NULL;
760 bool checktail = false;
762 if (check_input_order != CHECK_ORDER_DISABLED
763 && !(issued_disorder_warning[0] && issued_disorder_warning[1]))
764 checktail = true;
766 if ((print_unpairables_1 || checktail) && seq1.count)
768 if (print_unpairables_1)
769 prjoin (seq1.lines[0], &uni_blank);
770 if (seq2.count)
771 seen_unpairable = true;
772 while (get_line (fp1, &line, 1))
774 if (print_unpairables_1)
775 prjoin (line, &uni_blank);
776 if (issued_disorder_warning[0] && !print_unpairables_1)
777 break;
781 if ((print_unpairables_2 || checktail) && seq2.count)
783 if (print_unpairables_2)
784 prjoin (&uni_blank, seq2.lines[0]);
785 if (seq1.count)
786 seen_unpairable = true;
787 while (get_line (fp2, &line, 2))
789 if (print_unpairables_2)
790 prjoin (&uni_blank, line);
791 if (issued_disorder_warning[1] && !print_unpairables_2)
792 break;
796 freeline (line);
797 free (line);
799 delseq (&seq1);
800 delseq (&seq2);
803 /* Add a field spec for field FIELD of file FILE to 'outlist'. */
805 static void
806 add_field (int file, size_t field)
808 struct outlist *o;
810 assert (file == 0 || file == 1 || file == 2);
811 assert (file != 0 || field == 0);
813 o = xmalloc (sizeof *o);
814 o->file = file;
815 o->field = field;
816 o->next = NULL;
818 /* Add to the end of the list so the fields are in the right order. */
819 outlist_end->next = o;
820 outlist_end = o;
823 /* Convert a string of decimal digits, STR (the 1-based join field number),
824 to an integral value. Upon successful conversion, return one less
825 (the zero-based field number). Silently convert too-large values
826 to SIZE_MAX - 1. Otherwise, if a value cannot be converted, give a
827 diagnostic and exit. */
829 static size_t
830 string_to_join_field (char const *str)
832 size_t result;
833 unsigned long int val;
834 verify (SIZE_MAX <= ULONG_MAX);
836 strtol_error s_err = xstrtoul (str, NULL, 10, &val, "");
837 if (s_err == LONGINT_OVERFLOW || (s_err == LONGINT_OK && SIZE_MAX < val))
838 val = SIZE_MAX;
839 else if (s_err != LONGINT_OK || val == 0)
840 error (EXIT_FAILURE, 0, _("invalid field number: %s"), quote (str));
842 result = val - 1;
844 return result;
847 /* Convert a single field specifier string, S, to a *FILE_INDEX, *FIELD_INDEX
848 pair. In S, the field index string is 1-based; *FIELD_INDEX is zero-based.
849 If S is valid, return true. Otherwise, give a diagnostic and exit. */
851 static void
852 decode_field_spec (const char *s, int *file_index, size_t *field_index)
854 /* The first character must be 0, 1, or 2. */
855 switch (s[0])
857 case '0':
858 if (s[1])
860 /* '0' must be all alone -- no '.FIELD'. */
861 error (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s));
863 *file_index = 0;
864 *field_index = 0;
865 break;
867 case '1':
868 case '2':
869 if (s[1] != '.')
870 error (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s));
871 *file_index = s[0] - '0';
872 *field_index = string_to_join_field (s + 2);
873 break;
875 default:
876 error (EXIT_FAILURE, 0,
877 _("invalid file number in field spec: %s"), quote (s));
879 /* Tell gcc -W -Wall that we can't get beyond this point.
880 This avoids a warning (otherwise legit) that the caller's copies
881 of *file_index and *field_index might be used uninitialized. */
882 abort ();
884 break;
888 /* Add the comma or blank separated field spec(s) in STR to 'outlist'. */
890 static void
891 add_field_list (char *str)
893 char *p = str;
897 int file_index;
898 size_t field_index;
899 char const *spec_item = p;
901 p = strpbrk (p, ", \t");
902 if (p)
903 *p++ = '\0';
904 decode_field_spec (spec_item, &file_index, &field_index);
905 add_field (file_index, field_index);
907 while (p);
910 /* Set the join field *VAR to VAL, but report an error if *VAR is set
911 more than once to incompatible values. */
913 static void
914 set_join_field (size_t *var, size_t val)
916 if (*var != SIZE_MAX && *var != val)
918 unsigned long int var1 = *var + 1;
919 unsigned long int val1 = val + 1;
920 error (EXIT_FAILURE, 0, _("incompatible join fields %lu, %lu"),
921 var1, val1);
923 *var = val;
926 /* Status of command-line arguments. */
928 enum operand_status
930 /* This argument must be an operand, i.e., one of the files to be
931 joined. */
932 MUST_BE_OPERAND,
934 /* This might be the argument of the preceding -j1 or -j2 option,
935 or it might be an operand. */
936 MIGHT_BE_J1_ARG,
937 MIGHT_BE_J2_ARG,
939 /* This might be the argument of the preceding -o option, or it might be
940 an operand. */
941 MIGHT_BE_O_ARG
944 /* Add NAME to the array of input file NAMES with operand statuses
945 OPERAND_STATUS; currently there are NFILES names in the list. */
947 static void
948 add_file_name (char *name, char *names[2],
949 int operand_status[2], int joption_count[2], int *nfiles,
950 int *prev_optc_status, int *optc_status)
952 int n = *nfiles;
954 if (n == 2)
956 bool op0 = (operand_status[0] == MUST_BE_OPERAND);
957 char *arg = names[op0];
958 switch (operand_status[op0])
960 case MUST_BE_OPERAND:
961 error (0, 0, _("extra operand %s"), quote (name));
962 usage (EXIT_FAILURE);
964 case MIGHT_BE_J1_ARG:
965 joption_count[0]--;
966 set_join_field (&join_field_1, string_to_join_field (arg));
967 break;
969 case MIGHT_BE_J2_ARG:
970 joption_count[1]--;
971 set_join_field (&join_field_2, string_to_join_field (arg));
972 break;
974 case MIGHT_BE_O_ARG:
975 add_field_list (arg);
976 break;
978 if (!op0)
980 operand_status[0] = operand_status[1];
981 names[0] = names[1];
983 n = 1;
986 operand_status[n] = *prev_optc_status;
987 names[n] = name;
988 *nfiles = n + 1;
989 if (*prev_optc_status == MIGHT_BE_O_ARG)
990 *optc_status = MIGHT_BE_O_ARG;
994 main (int argc, char **argv)
996 int optc_status;
997 int prev_optc_status = MUST_BE_OPERAND;
998 int operand_status[2];
999 int joption_count[2] = { 0, 0 };
1000 FILE *fp1, *fp2;
1001 int optc;
1002 int nfiles = 0;
1003 int i;
1005 initialize_main (&argc, &argv);
1006 set_program_name (argv[0]);
1007 setlocale (LC_ALL, "");
1008 bindtextdomain (PACKAGE, LOCALEDIR);
1009 textdomain (PACKAGE);
1010 hard_LC_COLLATE = hard_locale (LC_COLLATE);
1012 atexit (close_stdout);
1013 atexit (free_spareline);
1015 print_pairables = true;
1016 seen_unpairable = false;
1017 issued_disorder_warning[0] = issued_disorder_warning[1] = false;
1018 check_input_order = CHECK_ORDER_DEFAULT;
1020 while ((optc = getopt_long (argc, argv, "-a:e:i1:2:j:o:t:v:",
1021 longopts, NULL))
1022 != -1)
1024 optc_status = MUST_BE_OPERAND;
1026 switch (optc)
1028 case 'v':
1029 print_pairables = false;
1030 /* Fall through. */
1032 case 'a':
1034 unsigned long int val;
1035 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
1036 || (val != 1 && val != 2))
1037 error (EXIT_FAILURE, 0,
1038 _("invalid field number: %s"), quote (optarg));
1039 if (val == 1)
1040 print_unpairables_1 = true;
1041 else
1042 print_unpairables_2 = true;
1044 break;
1046 case 'e':
1047 if (empty_filler && ! STREQ (empty_filler, optarg))
1048 error (EXIT_FAILURE, 0,
1049 _("conflicting empty-field replacement strings"));
1050 empty_filler = optarg;
1051 break;
1053 case 'i':
1054 ignore_case = true;
1055 break;
1057 case '1':
1058 set_join_field (&join_field_1, string_to_join_field (optarg));
1059 break;
1061 case '2':
1062 set_join_field (&join_field_2, string_to_join_field (optarg));
1063 break;
1065 case 'j':
1066 if ((optarg[0] == '1' || optarg[0] == '2') && !optarg[1]
1067 && optarg == argv[optind - 1] + 2)
1069 /* The argument was either "-j1" or "-j2". */
1070 bool is_j2 = (optarg[0] == '2');
1071 joption_count[is_j2]++;
1072 optc_status = MIGHT_BE_J1_ARG + is_j2;
1074 else
1076 set_join_field (&join_field_1, string_to_join_field (optarg));
1077 set_join_field (&join_field_2, join_field_1);
1079 break;
1081 case 'o':
1082 if (STREQ (optarg, "auto"))
1083 autoformat = true;
1084 else
1086 add_field_list (optarg);
1087 optc_status = MIGHT_BE_O_ARG;
1089 break;
1091 case 't':
1093 unsigned char newtab = optarg[0];
1094 if (! newtab)
1095 newtab = '\n'; /* '' => process the whole line. */
1096 else if (optarg[1])
1098 if (STREQ (optarg, "\\0"))
1099 newtab = '\0';
1100 else
1101 error (EXIT_FAILURE, 0, _("multi-character tab %s"),
1102 quote (optarg));
1104 if (0 <= tab && tab != newtab)
1105 error (EXIT_FAILURE, 0, _("incompatible tabs"));
1106 tab = newtab;
1108 break;
1110 case NOCHECK_ORDER_OPTION:
1111 check_input_order = CHECK_ORDER_DISABLED;
1112 break;
1114 case CHECK_ORDER_OPTION:
1115 check_input_order = CHECK_ORDER_ENABLED;
1116 break;
1118 case 1: /* Non-option argument. */
1119 add_file_name (optarg, g_names, operand_status, joption_count,
1120 &nfiles, &prev_optc_status, &optc_status);
1121 break;
1123 case HEADER_LINE_OPTION:
1124 join_header_lines = true;
1125 break;
1127 case_GETOPT_HELP_CHAR;
1129 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1131 default:
1132 usage (EXIT_FAILURE);
1135 prev_optc_status = optc_status;
1138 /* Process any operands after "--". */
1139 prev_optc_status = MUST_BE_OPERAND;
1140 while (optind < argc)
1141 add_file_name (argv[optind++], g_names, operand_status, joption_count,
1142 &nfiles, &prev_optc_status, &optc_status);
1144 if (nfiles != 2)
1146 if (nfiles == 0)
1147 error (0, 0, _("missing operand"));
1148 else
1149 error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
1150 usage (EXIT_FAILURE);
1153 /* If "-j1" was specified and it turns out not to have had an argument,
1154 treat it as "-j 1". Likewise for -j2. */
1155 for (i = 0; i < 2; i++)
1156 if (joption_count[i] != 0)
1158 set_join_field (&join_field_1, i);
1159 set_join_field (&join_field_2, i);
1162 if (join_field_1 == SIZE_MAX)
1163 join_field_1 = 0;
1164 if (join_field_2 == SIZE_MAX)
1165 join_field_2 = 0;
1167 fp1 = STREQ (g_names[0], "-") ? stdin : fopen (g_names[0], "r");
1168 if (!fp1)
1169 error (EXIT_FAILURE, errno, "%s", g_names[0]);
1170 fp2 = STREQ (g_names[1], "-") ? stdin : fopen (g_names[1], "r");
1171 if (!fp2)
1172 error (EXIT_FAILURE, errno, "%s", g_names[1]);
1173 if (fp1 == fp2)
1174 error (EXIT_FAILURE, errno, _("both files cannot be standard input"));
1175 join (fp1, fp2);
1177 if (fclose (fp1) != 0)
1178 error (EXIT_FAILURE, errno, "%s", g_names[0]);
1179 if (fclose (fp2) != 0)
1180 error (EXIT_FAILURE, errno, "%s", g_names[1]);
1182 if (issued_disorder_warning[0] || issued_disorder_warning[1])
1183 exit (EXIT_FAILURE);
1184 else
1185 exit (EXIT_SUCCESS);