shred: enable direct I/O when possible
[coreutils.git] / src / join.c
blob1da618dc1d5dea7ad2f835f1b22272fa50a8c34f
1 /* join - join lines of two files on a common field
2 Copyright (C) 1991-2013 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
17 Written by Mike Haertel, mike@gnu.ai.mit.edu. */
19 #include <config.h>
21 #include <assert.h>
22 #include <sys/types.h>
23 #include <getopt.h>
25 #include "system.h"
26 #include "error.h"
27 #include "fadvise.h"
28 #include "hard-locale.h"
29 #include "linebuffer.h"
30 #include "memcasecmp.h"
31 #include "quote.h"
32 #include "stdio--.h"
33 #include "xmemcoll.h"
34 #include "xstrtol.h"
35 #include "argmatch.h"
37 /* The official name of this program (e.g., no 'g' prefix). */
38 #define PROGRAM_NAME "join"
40 #define AUTHORS proper_name ("Mike Haertel")
42 #define join system_join
44 #define SWAPLINES(a, b) do { \
45 struct line *tmp = a; \
46 a = b; \
47 b = tmp; \
48 } while (0);
50 /* An element of the list identifying which fields to print for each
51 output line. */
52 struct outlist
54 /* File number: 0, 1, or 2. 0 means use the join field.
55 1 means use the first file argument, 2 the second. */
56 int file;
58 /* Field index (zero-based), specified only when FILE is 1 or 2. */
59 size_t field;
61 struct outlist *next;
64 /* A field of a line. */
65 struct field
67 char *beg; /* First character in field. */
68 size_t len; /* The length of the field. */
71 /* A line read from an input file. */
72 struct line
74 struct linebuffer buf; /* The line itself. */
75 size_t nfields; /* Number of elements in 'fields'. */
76 size_t nfields_allocated; /* Number of elements allocated for 'fields'. */
77 struct field *fields;
80 /* One or more consecutive lines read from a file that all have the
81 same join field value. */
82 struct seq
84 size_t count; /* Elements used in 'lines'. */
85 size_t alloc; /* Elements allocated in 'lines'. */
86 struct line **lines;
89 /* The previous line read from each file. */
90 static struct line *prevline[2] = {NULL, NULL};
92 /* The number of lines read from each file. */
93 static uintmax_t line_no[2] = {0, 0};
95 /* The input file names. */
96 static char *g_names[2];
98 /* This provides an extra line buffer for each file. We need these if we
99 try to read two consecutive lines into the same buffer, since we don't
100 want to overwrite the previous buffer before we check order. */
101 static struct line *spareline[2] = {NULL, NULL};
103 /* True if the LC_COLLATE locale is hard. */
104 static bool hard_LC_COLLATE;
106 /* If nonzero, print unpairable lines in file 1 or 2. */
107 static bool print_unpairables_1, print_unpairables_2;
109 /* If nonzero, print pairable lines. */
110 static bool print_pairables;
112 /* If nonzero, we have seen at least one unpairable line. */
113 static bool seen_unpairable;
115 /* If nonzero, we have warned about disorder in that file. */
116 static bool issued_disorder_warning[2];
118 /* Empty output field filler. */
119 static char const *empty_filler;
121 /* Whether to ensure the same number of fields are output from each line. */
122 static bool autoformat;
123 /* The number of fields to output for each line.
124 Only significant when autoformat is true. */
125 static size_t autocount_1;
126 static size_t autocount_2;
128 /* Field to join on; SIZE_MAX means they haven't been determined yet. */
129 static size_t join_field_1 = SIZE_MAX;
130 static size_t join_field_2 = SIZE_MAX;
132 /* List of fields to print. */
133 static struct outlist outlist_head;
135 /* Last element in 'outlist', where a new element can be added. */
136 static struct outlist *outlist_end = &outlist_head;
138 /* Tab character separating fields. If negative, fields are separated
139 by any nonempty string of blanks, otherwise by exactly one
140 tab character whose value (when cast to unsigned char) equals TAB. */
141 static int tab = -1;
143 /* If nonzero, check that the input is correctly ordered. */
144 static enum
146 CHECK_ORDER_DEFAULT,
147 CHECK_ORDER_ENABLED,
148 CHECK_ORDER_DISABLED
149 } check_input_order;
151 enum
153 CHECK_ORDER_OPTION = CHAR_MAX + 1,
154 NOCHECK_ORDER_OPTION,
155 HEADER_LINE_OPTION
159 static struct option const longopts[] =
161 {"ignore-case", no_argument, NULL, 'i'},
162 {"check-order", no_argument, NULL, CHECK_ORDER_OPTION},
163 {"nocheck-order", no_argument, NULL, NOCHECK_ORDER_OPTION},
164 {"zero-terminated", no_argument, NULL, 'z'},
165 {"header", no_argument, NULL, HEADER_LINE_OPTION},
166 {GETOPT_HELP_OPTION_DECL},
167 {GETOPT_VERSION_OPTION_DECL},
168 {NULL, 0, NULL, 0}
171 /* Used to print non-joining lines */
172 static struct line uni_blank;
174 /* If nonzero, ignore case when comparing join fields. */
175 static bool ignore_case;
177 /* If nonzero, treat the first line of each file as column headers --
178 join them without checking for ordering */
179 static bool join_header_lines;
181 /* The character marking end of line. Default to \n. */
182 static char eolchar = '\n';
184 void
185 usage (int status)
187 if (status != EXIT_SUCCESS)
188 emit_try_help ();
189 else
191 printf (_("\
192 Usage: %s [OPTION]... FILE1 FILE2\n\
194 program_name);
195 fputs (_("\
196 For each pair of input lines with identical join fields, write a line to\n\
197 standard output. The default join field is the first, delimited\n\
198 by whitespace. When FILE1 or FILE2 (not both) is -, read standard input.\n\
200 -a FILENUM also print unpairable lines from file FILENUM, where\n\
201 FILENUM is 1 or 2, corresponding to FILE1 or FILE2\n\
202 -e EMPTY replace missing input fields with EMPTY\n\
203 "), stdout);
204 fputs (_("\
205 -i, --ignore-case ignore differences in case when comparing fields\n\
206 -j FIELD equivalent to '-1 FIELD -2 FIELD'\n\
207 -o FORMAT obey FORMAT while constructing output line\n\
208 -t CHAR use CHAR as input and output field separator\n\
209 "), stdout);
210 fputs (_("\
211 -v FILENUM like -a FILENUM, but suppress joined output lines\n\
212 -1 FIELD join on this FIELD of file 1\n\
213 -2 FIELD join on this FIELD of file 2\n\
214 --check-order check that the input is correctly sorted, even\n\
215 if all input lines are pairable\n\
216 --nocheck-order do not check that the input is correctly sorted\n\
217 --header treat the first line in each file as field headers,\n\
218 print them without trying to pair them\n\
219 "), stdout);
220 fputs (_("\
221 -z, --zero-terminated end lines with 0 byte, not newline\n\
222 "), stdout);
223 fputs (HELP_OPTION_DESCRIPTION, stdout);
224 fputs (VERSION_OPTION_DESCRIPTION, stdout);
225 fputs (_("\
227 Unless -t CHAR is given, leading blanks separate fields and are ignored,\n\
228 else fields are separated by CHAR. Any FIELD is a field number counted\n\
229 from 1. FORMAT is one or more comma or blank separated specifications,\n\
230 each being 'FILENUM.FIELD' or '0'. Default FORMAT outputs the join field,\n\
231 the remaining fields from FILE1, the remaining fields from FILE2, all\n\
232 separated by CHAR. If FORMAT is the keyword 'auto', then the first\n\
233 line of each file determines the number of fields output for each line.\n\
235 Important: FILE1 and FILE2 must be sorted on the join fields.\n\
236 E.g., use \"sort -k 1b,1\" if 'join' has no options,\n\
237 or use \"join -t ''\" if 'sort' has no options.\n\
238 Note, comparisons honor the rules specified by 'LC_COLLATE'.\n\
239 If the input is not sorted and some lines cannot be joined, a\n\
240 warning message will be given.\n\
241 "), stdout);
242 emit_ancillary_info ();
244 exit (status);
247 /* Record a field in LINE, with location FIELD and size LEN. */
249 static void
250 extract_field (struct line *line, char *field, size_t len)
252 if (line->nfields >= line->nfields_allocated)
254 line->fields = X2NREALLOC (line->fields, &line->nfields_allocated);
256 line->fields[line->nfields].beg = field;
257 line->fields[line->nfields].len = len;
258 ++(line->nfields);
261 /* Fill in the 'fields' structure in LINE. */
263 static void
264 xfields (struct line *line)
266 char *ptr = line->buf.buffer;
267 char const *lim = ptr + line->buf.length - 1;
269 if (ptr == lim)
270 return;
272 if (0 <= tab && tab != '\n')
274 char *sep;
275 for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1)
276 extract_field (line, ptr, sep - ptr);
278 else if (tab < 0)
280 /* Skip leading blanks before the first field. */
281 while (isblank (to_uchar (*ptr)))
282 if (++ptr == lim)
283 return;
287 char *sep;
288 for (sep = ptr + 1; sep != lim && ! isblank (to_uchar (*sep)); sep++)
289 continue;
290 extract_field (line, ptr, sep - ptr);
291 if (sep == lim)
292 return;
293 for (ptr = sep + 1; ptr != lim && isblank (to_uchar (*ptr)); ptr++)
294 continue;
296 while (ptr != lim);
299 extract_field (line, ptr, lim - ptr);
302 static void
303 freeline (struct line *line)
305 if (line == NULL)
306 return;
307 free (line->fields);
308 line->fields = NULL;
309 free (line->buf.buffer);
310 line->buf.buffer = NULL;
313 /* Return <0 if the join field in LINE1 compares less than the one in LINE2;
314 >0 if it compares greater; 0 if it compares equal.
315 Report an error and exit if the comparison fails.
316 Use join fields JF_1 and JF_2 respectively. */
318 static int
319 keycmp (struct line const *line1, struct line const *line2,
320 size_t jf_1, size_t jf_2)
322 /* Start of field to compare in each file. */
323 char *beg1;
324 char *beg2;
326 size_t len1;
327 size_t len2; /* Length of fields to compare. */
328 int diff;
330 if (jf_1 < line1->nfields)
332 beg1 = line1->fields[jf_1].beg;
333 len1 = line1->fields[jf_1].len;
335 else
337 beg1 = NULL;
338 len1 = 0;
341 if (jf_2 < line2->nfields)
343 beg2 = line2->fields[jf_2].beg;
344 len2 = line2->fields[jf_2].len;
346 else
348 beg2 = NULL;
349 len2 = 0;
352 if (len1 == 0)
353 return len2 == 0 ? 0 : -1;
354 if (len2 == 0)
355 return 1;
357 if (ignore_case)
359 /* FIXME: ignore_case does not work with NLS (in particular,
360 with multibyte chars). */
361 diff = memcasecmp (beg1, beg2, MIN (len1, len2));
363 else
365 if (hard_LC_COLLATE)
366 return xmemcoll (beg1, len1, beg2, len2);
367 diff = memcmp (beg1, beg2, MIN (len1, len2));
370 if (diff)
371 return diff;
372 return len1 < len2 ? -1 : len1 != len2;
375 /* Check that successive input lines PREV and CURRENT from input file
376 WHATFILE are presented in order, unless the user may be relying on
377 the GNU extension that input lines may be out of order if no input
378 lines are unpairable.
380 If the user specified --nocheck-order, the check is not made.
381 If the user specified --check-order, the problem is fatal.
382 Otherwise (the default), the message is simply a warning.
384 A message is printed at most once per input file. */
386 static void
387 check_order (const struct line *prev,
388 const struct line *current,
389 int whatfile)
391 if (check_input_order != CHECK_ORDER_DISABLED
392 && ((check_input_order == CHECK_ORDER_ENABLED) || seen_unpairable))
394 if (!issued_disorder_warning[whatfile-1])
396 size_t join_field = whatfile == 1 ? join_field_1 : join_field_2;
397 if (keycmp (prev, current, join_field, join_field) > 0)
399 /* Exclude any trailing newline. */
400 size_t len = current->buf.length;
401 if (0 < len && current->buf.buffer[len - 1] == '\n')
402 --len;
404 /* If the offending line is longer than INT_MAX, output
405 only the first INT_MAX bytes in this diagnostic. */
406 len = MIN (INT_MAX, len);
408 error ((check_input_order == CHECK_ORDER_ENABLED
409 ? EXIT_FAILURE : 0),
410 0, _("%s:%"PRIuMAX": is not sorted: %.*s"),
411 g_names[whatfile - 1], line_no[whatfile - 1],
412 (int) len, current->buf.buffer);
414 /* If we get to here, the message was merely a warning.
415 Arrange to issue it only once per file. */
416 issued_disorder_warning[whatfile-1] = true;
422 static inline void
423 reset_line (struct line *line)
425 line->nfields = 0;
428 static struct line *
429 init_linep (struct line **linep)
431 struct line *line = xcalloc (1, sizeof *line);
432 *linep = line;
433 return line;
436 /* Read a line from FP into LINE and split it into fields.
437 Return true if successful. */
439 static bool
440 get_line (FILE *fp, struct line **linep, int which)
442 struct line *line = *linep;
444 if (line == prevline[which - 1])
446 SWAPLINES (line, spareline[which - 1]);
447 *linep = line;
450 if (line)
451 reset_line (line);
452 else
453 line = init_linep (linep);
455 if (! readlinebuffer_delim (&line->buf, fp, eolchar))
457 if (ferror (fp))
458 error (EXIT_FAILURE, errno, _("read error"));
459 freeline (line);
460 return false;
462 ++line_no[which - 1];
464 xfields (line);
466 if (prevline[which - 1])
467 check_order (prevline[which - 1], line, which);
469 prevline[which - 1] = line;
470 return true;
473 static void
474 free_spareline (void)
476 size_t i;
478 for (i = 0; i < ARRAY_CARDINALITY (spareline); i++)
480 if (spareline[i])
482 freeline (spareline[i]);
483 free (spareline[i]);
488 static void
489 initseq (struct seq *seq)
491 seq->count = 0;
492 seq->alloc = 0;
493 seq->lines = NULL;
496 /* Read a line from FP and add it to SEQ. Return true if successful. */
498 static bool
499 getseq (FILE *fp, struct seq *seq, int whichfile)
501 if (seq->count == seq->alloc)
503 size_t i;
504 seq->lines = X2NREALLOC (seq->lines, &seq->alloc);
505 for (i = seq->count; i < seq->alloc; i++)
506 seq->lines[i] = NULL;
509 if (get_line (fp, &seq->lines[seq->count], whichfile))
511 ++seq->count;
512 return true;
514 return false;
517 /* Read a line from FP and add it to SEQ, as the first item if FIRST is
518 true, else as the next. */
519 static bool
520 advance_seq (FILE *fp, struct seq *seq, bool first, int whichfile)
522 if (first)
523 seq->count = 0;
525 return getseq (fp, seq, whichfile);
528 static void
529 delseq (struct seq *seq)
531 size_t i;
532 for (i = 0; i < seq->alloc; i++)
534 freeline (seq->lines[i]);
535 free (seq->lines[i]);
537 free (seq->lines);
541 /* Print field N of LINE if it exists and is nonempty, otherwise
542 'empty_filler' if it is nonempty. */
544 static void
545 prfield (size_t n, struct line const *line)
547 size_t len;
549 if (n < line->nfields)
551 len = line->fields[n].len;
552 if (len)
553 fwrite (line->fields[n].beg, 1, len, stdout);
554 else if (empty_filler)
555 fputs (empty_filler, stdout);
557 else if (empty_filler)
558 fputs (empty_filler, stdout);
561 /* Output all the fields in line, other than the join field. */
563 static void
564 prfields (struct line const *line, size_t join_field, size_t autocount)
566 size_t i;
567 size_t nfields = autoformat ? autocount : line->nfields;
568 char output_separator = tab < 0 ? ' ' : tab;
570 for (i = 0; i < join_field && i < nfields; ++i)
572 putchar (output_separator);
573 prfield (i, line);
575 for (i = join_field + 1; i < nfields; ++i)
577 putchar (output_separator);
578 prfield (i, line);
582 /* Print the join of LINE1 and LINE2. */
584 static void
585 prjoin (struct line const *line1, struct line const *line2)
587 const struct outlist *outlist;
588 char output_separator = tab < 0 ? ' ' : tab;
589 size_t field;
590 struct line const *line;
592 outlist = outlist_head.next;
593 if (outlist)
595 const struct outlist *o;
597 o = outlist;
598 while (1)
600 if (o->file == 0)
602 if (line1 == &uni_blank)
604 line = line2;
605 field = join_field_2;
607 else
609 line = line1;
610 field = join_field_1;
613 else
615 line = (o->file == 1 ? line1 : line2);
616 field = o->field;
618 prfield (field, line);
619 o = o->next;
620 if (o == NULL)
621 break;
622 putchar (output_separator);
624 putchar (eolchar);
626 else
628 if (line1 == &uni_blank)
630 line = line2;
631 field = join_field_2;
633 else
635 line = line1;
636 field = join_field_1;
639 /* Output the join field. */
640 prfield (field, line);
642 /* Output other fields. */
643 prfields (line1, join_field_1, autocount_1);
644 prfields (line2, join_field_2, autocount_2);
646 putchar (eolchar);
650 /* Print the join of the files in FP1 and FP2. */
652 static void
653 join (FILE *fp1, FILE *fp2)
655 struct seq seq1, seq2;
656 int diff;
657 bool eof1, eof2;
659 fadvise (fp1, FADVISE_SEQUENTIAL);
660 fadvise (fp2, FADVISE_SEQUENTIAL);
662 /* Read the first line of each file. */
663 initseq (&seq1);
664 getseq (fp1, &seq1, 1);
665 initseq (&seq2);
666 getseq (fp2, &seq2, 2);
668 if (autoformat)
670 autocount_1 = seq1.count ? seq1.lines[0]->nfields : 0;
671 autocount_2 = seq2.count ? seq2.lines[0]->nfields : 0;
674 if (join_header_lines && (seq1.count || seq2.count))
676 struct line const *hline1 = seq1.count ? seq1.lines[0] : &uni_blank;
677 struct line const *hline2 = seq2.count ? seq2.lines[0] : &uni_blank;
678 prjoin (hline1, hline2);
679 prevline[0] = NULL;
680 prevline[1] = NULL;
681 if (seq1.count)
682 advance_seq (fp1, &seq1, true, 1);
683 if (seq2.count)
684 advance_seq (fp2, &seq2, true, 2);
687 while (seq1.count && seq2.count)
689 size_t i;
690 diff = keycmp (seq1.lines[0], seq2.lines[0],
691 join_field_1, join_field_2);
692 if (diff < 0)
694 if (print_unpairables_1)
695 prjoin (seq1.lines[0], &uni_blank);
696 advance_seq (fp1, &seq1, true, 1);
697 seen_unpairable = true;
698 continue;
700 if (diff > 0)
702 if (print_unpairables_2)
703 prjoin (&uni_blank, seq2.lines[0]);
704 advance_seq (fp2, &seq2, true, 2);
705 seen_unpairable = true;
706 continue;
709 /* Keep reading lines from file1 as long as they continue to
710 match the current line from file2. */
711 eof1 = false;
713 if (!advance_seq (fp1, &seq1, false, 1))
715 eof1 = true;
716 ++seq1.count;
717 break;
719 while (!keycmp (seq1.lines[seq1.count - 1], seq2.lines[0],
720 join_field_1, join_field_2));
722 /* Keep reading lines from file2 as long as they continue to
723 match the current line from file1. */
724 eof2 = false;
726 if (!advance_seq (fp2, &seq2, false, 2))
728 eof2 = true;
729 ++seq2.count;
730 break;
732 while (!keycmp (seq1.lines[0], seq2.lines[seq2.count - 1],
733 join_field_1, join_field_2));
735 if (print_pairables)
737 for (i = 0; i < seq1.count - 1; ++i)
739 size_t j;
740 for (j = 0; j < seq2.count - 1; ++j)
741 prjoin (seq1.lines[i], seq2.lines[j]);
745 if (!eof1)
747 SWAPLINES (seq1.lines[0], seq1.lines[seq1.count - 1]);
748 seq1.count = 1;
750 else
751 seq1.count = 0;
753 if (!eof2)
755 SWAPLINES (seq2.lines[0], seq2.lines[seq2.count - 1]);
756 seq2.count = 1;
758 else
759 seq2.count = 0;
762 /* If the user did not specify --nocheck-order, then we read the
763 tail ends of both inputs to verify that they are in order. We
764 skip the rest of the tail once we have issued a warning for that
765 file, unless we actually need to print the unpairable lines. */
766 struct line *line = NULL;
767 bool checktail = false;
769 if (check_input_order != CHECK_ORDER_DISABLED
770 && !(issued_disorder_warning[0] && issued_disorder_warning[1]))
771 checktail = true;
773 if ((print_unpairables_1 || checktail) && seq1.count)
775 if (print_unpairables_1)
776 prjoin (seq1.lines[0], &uni_blank);
777 if (seq2.count)
778 seen_unpairable = true;
779 while (get_line (fp1, &line, 1))
781 if (print_unpairables_1)
782 prjoin (line, &uni_blank);
783 if (issued_disorder_warning[0] && !print_unpairables_1)
784 break;
788 if ((print_unpairables_2 || checktail) && seq2.count)
790 if (print_unpairables_2)
791 prjoin (&uni_blank, seq2.lines[0]);
792 if (seq1.count)
793 seen_unpairable = true;
794 while (get_line (fp2, &line, 2))
796 if (print_unpairables_2)
797 prjoin (&uni_blank, line);
798 if (issued_disorder_warning[1] && !print_unpairables_2)
799 break;
803 freeline (line);
804 free (line);
806 delseq (&seq1);
807 delseq (&seq2);
810 /* Add a field spec for field FIELD of file FILE to 'outlist'. */
812 static void
813 add_field (int file, size_t field)
815 struct outlist *o;
817 assert (file == 0 || file == 1 || file == 2);
818 assert (file != 0 || field == 0);
820 o = xmalloc (sizeof *o);
821 o->file = file;
822 o->field = field;
823 o->next = NULL;
825 /* Add to the end of the list so the fields are in the right order. */
826 outlist_end->next = o;
827 outlist_end = o;
830 /* Convert a string of decimal digits, STR (the 1-based join field number),
831 to an integral value. Upon successful conversion, return one less
832 (the zero-based field number). Silently convert too-large values
833 to SIZE_MAX - 1. Otherwise, if a value cannot be converted, give a
834 diagnostic and exit. */
836 static size_t
837 string_to_join_field (char const *str)
839 size_t result;
840 unsigned long int val;
841 verify (SIZE_MAX <= ULONG_MAX);
843 strtol_error s_err = xstrtoul (str, NULL, 10, &val, "");
844 if (s_err == LONGINT_OVERFLOW || (s_err == LONGINT_OK && SIZE_MAX < val))
845 val = SIZE_MAX;
846 else if (s_err != LONGINT_OK || val == 0)
847 error (EXIT_FAILURE, 0, _("invalid field number: %s"), quote (str));
849 result = val - 1;
851 return result;
854 /* Convert a single field specifier string, S, to a *FILE_INDEX, *FIELD_INDEX
855 pair. In S, the field index string is 1-based; *FIELD_INDEX is zero-based.
856 If S is valid, return true. Otherwise, give a diagnostic and exit. */
858 static void
859 decode_field_spec (const char *s, int *file_index, size_t *field_index)
861 /* The first character must be 0, 1, or 2. */
862 switch (s[0])
864 case '0':
865 if (s[1])
867 /* '0' must be all alone -- no '.FIELD'. */
868 error (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s));
870 *file_index = 0;
871 *field_index = 0;
872 break;
874 case '1':
875 case '2':
876 if (s[1] != '.')
877 error (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s));
878 *file_index = s[0] - '0';
879 *field_index = string_to_join_field (s + 2);
880 break;
882 default:
883 error (EXIT_FAILURE, 0,
884 _("invalid file number in field spec: %s"), quote (s));
886 /* Tell gcc -W -Wall that we can't get beyond this point.
887 This avoids a warning (otherwise legit) that the caller's copies
888 of *file_index and *field_index might be used uninitialized. */
889 abort ();
891 break;
895 /* Add the comma or blank separated field spec(s) in STR to 'outlist'. */
897 static void
898 add_field_list (char *str)
900 char *p = str;
904 int file_index;
905 size_t field_index;
906 char const *spec_item = p;
908 p = strpbrk (p, ", \t");
909 if (p)
910 *p++ = '\0';
911 decode_field_spec (spec_item, &file_index, &field_index);
912 add_field (file_index, field_index);
914 while (p);
917 /* Set the join field *VAR to VAL, but report an error if *VAR is set
918 more than once to incompatible values. */
920 static void
921 set_join_field (size_t *var, size_t val)
923 if (*var != SIZE_MAX && *var != val)
925 unsigned long int var1 = *var + 1;
926 unsigned long int val1 = val + 1;
927 error (EXIT_FAILURE, 0, _("incompatible join fields %lu, %lu"),
928 var1, val1);
930 *var = val;
933 /* Status of command-line arguments. */
935 enum operand_status
937 /* This argument must be an operand, i.e., one of the files to be
938 joined. */
939 MUST_BE_OPERAND,
941 /* This might be the argument of the preceding -j1 or -j2 option,
942 or it might be an operand. */
943 MIGHT_BE_J1_ARG,
944 MIGHT_BE_J2_ARG,
946 /* This might be the argument of the preceding -o option, or it might be
947 an operand. */
948 MIGHT_BE_O_ARG
951 /* Add NAME to the array of input file NAMES with operand statuses
952 OPERAND_STATUS; currently there are NFILES names in the list. */
954 static void
955 add_file_name (char *name, char *names[2],
956 int operand_status[2], int joption_count[2], int *nfiles,
957 int *prev_optc_status, int *optc_status)
959 int n = *nfiles;
961 if (n == 2)
963 bool op0 = (operand_status[0] == MUST_BE_OPERAND);
964 char *arg = names[op0];
965 switch (operand_status[op0])
967 case MUST_BE_OPERAND:
968 error (0, 0, _("extra operand %s"), quote (name));
969 usage (EXIT_FAILURE);
971 case MIGHT_BE_J1_ARG:
972 joption_count[0]--;
973 set_join_field (&join_field_1, string_to_join_field (arg));
974 break;
976 case MIGHT_BE_J2_ARG:
977 joption_count[1]--;
978 set_join_field (&join_field_2, string_to_join_field (arg));
979 break;
981 case MIGHT_BE_O_ARG:
982 add_field_list (arg);
983 break;
985 if (!op0)
987 operand_status[0] = operand_status[1];
988 names[0] = names[1];
990 n = 1;
993 operand_status[n] = *prev_optc_status;
994 names[n] = name;
995 *nfiles = n + 1;
996 if (*prev_optc_status == MIGHT_BE_O_ARG)
997 *optc_status = MIGHT_BE_O_ARG;
1001 main (int argc, char **argv)
1003 int optc_status;
1004 int prev_optc_status = MUST_BE_OPERAND;
1005 int operand_status[2];
1006 int joption_count[2] = { 0, 0 };
1007 FILE *fp1, *fp2;
1008 int optc;
1009 int nfiles = 0;
1010 int i;
1012 initialize_main (&argc, &argv);
1013 set_program_name (argv[0]);
1014 setlocale (LC_ALL, "");
1015 bindtextdomain (PACKAGE, LOCALEDIR);
1016 textdomain (PACKAGE);
1017 hard_LC_COLLATE = hard_locale (LC_COLLATE);
1019 atexit (close_stdout);
1020 atexit (free_spareline);
1022 print_pairables = true;
1023 seen_unpairable = false;
1024 issued_disorder_warning[0] = issued_disorder_warning[1] = false;
1025 check_input_order = CHECK_ORDER_DEFAULT;
1027 while ((optc = getopt_long (argc, argv, "-a:e:i1:2:j:o:t:v:z",
1028 longopts, NULL))
1029 != -1)
1031 optc_status = MUST_BE_OPERAND;
1033 switch (optc)
1035 case 'v':
1036 print_pairables = false;
1037 /* Fall through. */
1039 case 'a':
1041 unsigned long int val;
1042 if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
1043 || (val != 1 && val != 2))
1044 error (EXIT_FAILURE, 0,
1045 _("invalid field number: %s"), quote (optarg));
1046 if (val == 1)
1047 print_unpairables_1 = true;
1048 else
1049 print_unpairables_2 = true;
1051 break;
1053 case 'e':
1054 if (empty_filler && ! STREQ (empty_filler, optarg))
1055 error (EXIT_FAILURE, 0,
1056 _("conflicting empty-field replacement strings"));
1057 empty_filler = optarg;
1058 break;
1060 case 'i':
1061 ignore_case = true;
1062 break;
1064 case '1':
1065 set_join_field (&join_field_1, string_to_join_field (optarg));
1066 break;
1068 case '2':
1069 set_join_field (&join_field_2, string_to_join_field (optarg));
1070 break;
1072 case 'j':
1073 if ((optarg[0] == '1' || optarg[0] == '2') && !optarg[1]
1074 && optarg == argv[optind - 1] + 2)
1076 /* The argument was either "-j1" or "-j2". */
1077 bool is_j2 = (optarg[0] == '2');
1078 joption_count[is_j2]++;
1079 optc_status = MIGHT_BE_J1_ARG + is_j2;
1081 else
1083 set_join_field (&join_field_1, string_to_join_field (optarg));
1084 set_join_field (&join_field_2, join_field_1);
1086 break;
1088 case 'o':
1089 if (STREQ (optarg, "auto"))
1090 autoformat = true;
1091 else
1093 add_field_list (optarg);
1094 optc_status = MIGHT_BE_O_ARG;
1096 break;
1098 case 't':
1100 unsigned char newtab = optarg[0];
1101 if (! newtab)
1102 newtab = '\n'; /* '' => process the whole line. */
1103 else if (optarg[1])
1105 if (STREQ (optarg, "\\0"))
1106 newtab = '\0';
1107 else
1108 error (EXIT_FAILURE, 0, _("multi-character tab %s"),
1109 quote (optarg));
1111 if (0 <= tab && tab != newtab)
1112 error (EXIT_FAILURE, 0, _("incompatible tabs"));
1113 tab = newtab;
1115 break;
1117 case 'z':
1118 eolchar = 0;
1119 break;
1121 case NOCHECK_ORDER_OPTION:
1122 check_input_order = CHECK_ORDER_DISABLED;
1123 break;
1125 case CHECK_ORDER_OPTION:
1126 check_input_order = CHECK_ORDER_ENABLED;
1127 break;
1129 case 1: /* Non-option argument. */
1130 add_file_name (optarg, g_names, operand_status, joption_count,
1131 &nfiles, &prev_optc_status, &optc_status);
1132 break;
1134 case HEADER_LINE_OPTION:
1135 join_header_lines = true;
1136 break;
1138 case_GETOPT_HELP_CHAR;
1140 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1142 default:
1143 usage (EXIT_FAILURE);
1146 prev_optc_status = optc_status;
1149 /* Process any operands after "--". */
1150 prev_optc_status = MUST_BE_OPERAND;
1151 while (optind < argc)
1152 add_file_name (argv[optind++], g_names, operand_status, joption_count,
1153 &nfiles, &prev_optc_status, &optc_status);
1155 if (nfiles != 2)
1157 if (nfiles == 0)
1158 error (0, 0, _("missing operand"));
1159 else
1160 error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
1161 usage (EXIT_FAILURE);
1164 /* If "-j1" was specified and it turns out not to have had an argument,
1165 treat it as "-j 1". Likewise for -j2. */
1166 for (i = 0; i < 2; i++)
1167 if (joption_count[i] != 0)
1169 set_join_field (&join_field_1, i);
1170 set_join_field (&join_field_2, i);
1173 if (join_field_1 == SIZE_MAX)
1174 join_field_1 = 0;
1175 if (join_field_2 == SIZE_MAX)
1176 join_field_2 = 0;
1178 fp1 = STREQ (g_names[0], "-") ? stdin : fopen (g_names[0], "r");
1179 if (!fp1)
1180 error (EXIT_FAILURE, errno, "%s", g_names[0]);
1181 fp2 = STREQ (g_names[1], "-") ? stdin : fopen (g_names[1], "r");
1182 if (!fp2)
1183 error (EXIT_FAILURE, errno, "%s", g_names[1]);
1184 if (fp1 == fp2)
1185 error (EXIT_FAILURE, errno, _("both files cannot be standard input"));
1186 join (fp1, fp2);
1188 if (fclose (fp1) != 0)
1189 error (EXIT_FAILURE, errno, "%s", g_names[0]);
1190 if (fclose (fp2) != 0)
1191 error (EXIT_FAILURE, errno, "%s", g_names[1]);
1193 if (issued_disorder_warning[0] || issued_disorder_warning[1])
1194 exit (EXIT_FAILURE);
1195 else
1196 exit (EXIT_SUCCESS);