tail: allow multiple PIDs
[coreutils.git] / src / join.c
blob7eef58c0bf940818d5c1ac13bb1f900705e34e4b
1 /* join - join lines of two files on a common field
2 Copyright (C) 1991-2023 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>.
17 Written by Mike Haertel, mike@gnu.ai.mit.edu. */
19 #include <config.h>
21 #include <sys/types.h>
22 #include <getopt.h>
24 #include "system.h"
25 #include "assure.h"
26 #include "fadvise.h"
27 #include "hard-locale.h"
28 #include "linebuffer.h"
29 #include "memcasecmp.h"
30 #include "quote.h"
31 #include "stdio--.h"
32 #include "xmemcoll.h"
33 #include "xstrtol.h"
34 #include "argmatch.h"
36 /* The official name of this program (e.g., no 'g' prefix). */
37 #define PROGRAM_NAME "join"
39 #define AUTHORS proper_name ("Mike Haertel")
41 #define join system_join
43 #define SWAPLINES(a, b) do { \
44 struct line *tmp = a; \
45 a = b; \
46 b = tmp; \
47 } while (0);
49 /* An element of the list identifying which fields to print for each
50 output line. */
51 struct outlist
53 /* File number: 0, 1, or 2. 0 means use the join field.
54 1 means use the first file argument, 2 the second. */
55 int file;
57 /* Field index (zero-based), specified only when FILE is 1 or 2. */
58 idx_t field;
60 struct outlist *next;
63 /* A field of a line. */
64 struct field
66 char *beg; /* First character in field. */
67 idx_t len; /* The length of the field. */
70 /* A line read from an input file. */
71 struct line
73 struct linebuffer buf; /* The line itself. */
74 idx_t nfields; /* Number of elements in 'fields'. */
75 idx_t nfields_allocated; /* Number of elements allocated for 'fields'. */
76 struct field *fields;
79 /* One or more consecutive lines read from a file that all have the
80 same join field value. */
81 struct seq
83 idx_t count; /* Elements used in 'lines'. */
84 idx_t alloc; /* Elements allocated in 'lines'. */
85 struct line **lines;
88 /* The previous line read from each file. */
89 static struct line *prevline[2] = {nullptr, nullptr};
91 /* The number of lines read from each file. */
92 static uintmax_t line_no[2] = {0, 0};
94 /* The input file names. */
95 static char *g_names[2];
97 /* This provides an extra line buffer for each file. We need these if we
98 try to read two consecutive lines into the same buffer, since we don't
99 want to overwrite the previous buffer before we check order. */
100 static struct line *spareline[2] = {nullptr, nullptr};
102 /* True if the LC_COLLATE locale is hard. */
103 static bool hard_LC_COLLATE;
105 /* If nonzero, print unpairable lines in file 1 or 2. */
106 static bool print_unpairables_1, print_unpairables_2;
108 /* If nonzero, print pairable lines. */
109 static bool print_pairables;
111 /* If nonzero, we have seen at least one unpairable line. */
112 static bool seen_unpairable;
114 /* If nonzero, we have warned about disorder in that file. */
115 static bool issued_disorder_warning[2];
117 /* Empty output field filler. */
118 static char const *empty_filler;
120 /* Whether to ensure the same number of fields are output from each line. */
121 static bool autoformat;
122 /* The number of fields to output for each line.
123 Only significant when autoformat is true. */
124 static idx_t autocount_1;
125 static idx_t autocount_2;
127 /* Field to join on; -1 means they haven't been determined yet. */
128 static ptrdiff_t join_field_1 = -1;
129 static ptrdiff_t join_field_2 = -1;
131 /* List of fields to print. */
132 static struct outlist outlist_head;
134 /* Last element in 'outlist', where a new element can be added. */
135 static struct outlist *outlist_end = &outlist_head;
137 /* Tab character separating fields. If negative, fields are separated
138 by any nonempty string of blanks, otherwise by exactly one
139 tab character whose value (when cast to unsigned char) equals TAB. */
140 static int tab = -1;
142 /* If nonzero, check that the input is correctly ordered. */
143 static enum
145 CHECK_ORDER_DEFAULT,
146 CHECK_ORDER_ENABLED,
147 CHECK_ORDER_DISABLED
148 } check_input_order;
150 enum
152 CHECK_ORDER_OPTION = CHAR_MAX + 1,
153 NOCHECK_ORDER_OPTION,
154 HEADER_LINE_OPTION
158 static struct option const longopts[] =
160 {"ignore-case", no_argument, nullptr, 'i'},
161 {"check-order", no_argument, nullptr, CHECK_ORDER_OPTION},
162 {"nocheck-order", no_argument, nullptr, NOCHECK_ORDER_OPTION},
163 {"zero-terminated", no_argument, nullptr, 'z'},
164 {"header", no_argument, nullptr, HEADER_LINE_OPTION},
165 {GETOPT_HELP_OPTION_DECL},
166 {GETOPT_VERSION_OPTION_DECL},
167 {nullptr, 0, nullptr, 0}
170 /* Used to print non-joining lines */
171 static struct line uni_blank;
173 /* If nonzero, ignore case when comparing join fields. */
174 static bool ignore_case;
176 /* If nonzero, treat the first line of each file as column headers --
177 join them without checking for ordering */
178 static bool join_header_lines;
180 /* The character marking end of line. Default to \n. */
181 static char eolchar = '\n';
183 void
184 usage (int status)
186 if (status != EXIT_SUCCESS)
187 emit_try_help ();
188 else
190 printf (_("\
191 Usage: %s [OPTION]... FILE1 FILE2\n\
193 program_name);
194 fputs (_("\
195 For each pair of input lines with identical join fields, write a line to\n\
196 standard output. The default join field is the first, delimited by blanks.\
198 "), stdout);
199 fputs (_("\
201 When FILE1 or FILE2 (not both) is -, read standard input.\n\
202 "), stdout);
203 fputs (_("\
205 -a FILENUM also print unpairable lines from file FILENUM, where\n\
206 FILENUM is 1 or 2, corresponding to FILE1 or FILE2\n\
207 "), stdout);
208 fputs (_("\
209 -e STRING replace missing (empty) input fields with STRING;\n\
210 I.e., missing fields specified with '-12jo' options\
212 "), stdout);
213 fputs (_("\
214 -i, --ignore-case ignore differences in case when comparing fields\n\
215 -j FIELD equivalent to '-1 FIELD -2 FIELD'\n\
216 -o FORMAT obey FORMAT while constructing output line\n\
217 -t CHAR use CHAR as input and output field separator\n\
218 "), stdout);
219 fputs (_("\
220 -v FILENUM like -a FILENUM, but suppress joined output lines\n\
221 -1 FIELD join on this FIELD of file 1\n\
222 -2 FIELD join on this FIELD of file 2\n\
223 --check-order check that the input is correctly sorted, even\n\
224 if all input lines are pairable\n\
225 --nocheck-order do not check that the input is correctly sorted\n\
226 --header treat the first line in each file as field headers,\n\
227 print them without trying to pair them\n\
228 "), stdout);
229 fputs (_("\
230 -z, --zero-terminated line delimiter is NUL, not newline\n\
231 "), stdout);
232 fputs (HELP_OPTION_DESCRIPTION, stdout);
233 fputs (VERSION_OPTION_DESCRIPTION, stdout);
234 fputs (_("\
236 Unless -t CHAR is given, leading blanks separate fields and are ignored,\n\
237 else fields are separated by CHAR. Any FIELD is a field number counted\n\
238 from 1. FORMAT is one or more comma or blank separated specifications,\n\
239 each being 'FILENUM.FIELD' or '0'. Default FORMAT outputs the join field,\n\
240 the remaining fields from FILE1, the remaining fields from FILE2, all\n\
241 separated by CHAR. If FORMAT is the keyword 'auto', then the first\n\
242 line of each file determines the number of fields output for each line.\n\
244 Important: FILE1 and FILE2 must be sorted on the join fields.\n\
245 E.g., use \"sort -k 1b,1\" if 'join' has no options,\n\
246 or use \"join -t ''\" if 'sort' has no options.\n\
247 Note, comparisons honor the rules specified by 'LC_COLLATE'.\n\
248 If the input is not sorted and some lines cannot be joined, a\n\
249 warning message will be given.\n\
250 "), stdout);
251 emit_ancillary_info (PROGRAM_NAME);
253 exit (status);
256 /* Record a field in LINE, with location FIELD and size LEN. */
258 static void
259 extract_field (struct line *line, char *field, idx_t len)
261 if (line->nfields >= line->nfields_allocated)
262 line->fields = xpalloc (line->fields, &line->nfields_allocated, 1,
263 -1, sizeof *line->fields);
264 line->fields[line->nfields].beg = field;
265 line->fields[line->nfields].len = len;
266 ++(line->nfields);
269 /* Fill in the 'fields' structure in LINE. */
271 static void
272 xfields (struct line *line)
274 char *ptr = line->buf.buffer;
275 char const *lim = ptr + line->buf.length - 1;
277 if (ptr == lim)
278 return;
280 if (0 <= tab && tab != '\n')
282 char *sep;
283 for (; (sep = memchr (ptr, tab, lim - ptr)) != nullptr; ptr = sep + 1)
284 extract_field (line, ptr, sep - ptr);
286 else if (tab < 0)
288 /* Skip leading blanks before the first field. */
289 while (field_sep (*ptr))
290 if (++ptr == lim)
291 return;
295 char *sep;
296 for (sep = ptr + 1; sep != lim && ! field_sep (*sep); sep++)
297 continue;
298 extract_field (line, ptr, sep - ptr);
299 if (sep == lim)
300 return;
301 for (ptr = sep + 1; ptr != lim && field_sep (*ptr); ptr++)
302 continue;
304 while (ptr != lim);
307 extract_field (line, ptr, lim - ptr);
310 static void
311 freeline (struct line *line)
313 if (line == nullptr)
314 return;
315 free (line->fields);
316 line->fields = nullptr;
317 free (line->buf.buffer);
318 line->buf.buffer = nullptr;
321 /* Return <0 if the join field in LINE1 compares less than the one in LINE2;
322 >0 if it compares greater; 0 if it compares equal.
323 Report an error and exit if the comparison fails.
324 Use join fields JF_1 and JF_2 respectively. */
326 static int
327 keycmp (struct line const *line1, struct line const *line2,
328 idx_t jf_1, idx_t jf_2)
330 /* Start of field to compare in each file. */
331 char *beg1;
332 char *beg2;
334 idx_t len1;
335 idx_t len2; /* Length of fields to compare. */
336 int diff;
338 if (jf_1 < line1->nfields)
340 beg1 = line1->fields[jf_1].beg;
341 len1 = line1->fields[jf_1].len;
343 else
345 beg1 = nullptr;
346 len1 = 0;
349 if (jf_2 < line2->nfields)
351 beg2 = line2->fields[jf_2].beg;
352 len2 = line2->fields[jf_2].len;
354 else
356 beg2 = nullptr;
357 len2 = 0;
360 if (len1 == 0)
361 return len2 == 0 ? 0 : -1;
362 if (len2 == 0)
363 return 1;
365 if (ignore_case)
367 /* FIXME: ignore_case does not work with NLS (in particular,
368 with multibyte chars). */
369 diff = memcasecmp (beg1, beg2, MIN (len1, len2));
371 else
373 if (hard_LC_COLLATE)
374 return xmemcoll (beg1, len1, beg2, len2);
375 diff = memcmp (beg1, beg2, MIN (len1, len2));
378 if (diff)
379 return diff;
380 return (len1 > len2) - (len1 < len2);
383 /* Check that successive input lines PREV and CURRENT from input file
384 WHATFILE are presented in order, unless the user may be relying on
385 the GNU extension that input lines may be out of order if no input
386 lines are unpairable.
388 If the user specified --nocheck-order, the check is not made.
389 If the user specified --check-order, the problem is fatal.
390 Otherwise (the default), the message is simply a warning.
392 A message is printed at most once per input file. */
394 static void
395 check_order (const struct line *prev,
396 const struct line *current,
397 int whatfile)
399 if (check_input_order != CHECK_ORDER_DISABLED
400 && ((check_input_order == CHECK_ORDER_ENABLED) || seen_unpairable))
402 if (!issued_disorder_warning[whatfile - 1])
404 idx_t join_field = whatfile == 1 ? join_field_1 : join_field_2;
405 if (keycmp (prev, current, join_field, join_field) > 0)
407 /* Exclude any trailing newline. */
408 idx_t len = current->buf.length;
409 if (0 < len && current->buf.buffer[len - 1] == '\n')
410 --len;
412 /* If the offending line is longer than INT_MAX, output
413 only the first INT_MAX bytes in this diagnostic. */
414 len = MIN (INT_MAX, len);
416 error ((check_input_order == CHECK_ORDER_ENABLED
417 ? EXIT_FAILURE : 0),
418 0, _("%s:%ju: is not sorted: %.*s"),
419 g_names[whatfile - 1], line_no[whatfile - 1],
420 (int) len, current->buf.buffer);
422 /* If we get to here, the message was merely a warning.
423 Arrange to issue it only once per file. */
424 issued_disorder_warning[whatfile - 1] = true;
430 static inline void
431 reset_line (struct line *line)
433 line->nfields = 0;
436 static struct line *
437 init_linep (struct line **linep)
439 struct line *line = xzalloc (sizeof *line);
440 *linep = line;
441 return line;
444 /* Read a line from FP into LINE and split it into fields.
445 Return true if successful. */
447 static bool
448 get_line (FILE *fp, struct line **linep, int which)
450 struct line *line = *linep;
452 if (line == prevline[which - 1])
454 SWAPLINES (line, spareline[which - 1]);
455 *linep = line;
458 if (line)
459 reset_line (line);
460 else
461 line = init_linep (linep);
463 if (! readlinebuffer_delim (&line->buf, fp, eolchar))
465 if (ferror (fp))
466 error (EXIT_FAILURE, errno, _("read error"));
467 freeline (line);
468 return false;
470 ++line_no[which - 1];
472 xfields (line);
474 if (prevline[which - 1])
475 check_order (prevline[which - 1], line, which);
477 prevline[which - 1] = line;
478 return true;
481 static void
482 free_spareline (void)
484 for (idx_t i = 0; i < ARRAY_CARDINALITY (spareline); i++)
486 if (spareline[i])
488 freeline (spareline[i]);
489 free (spareline[i]);
494 static void
495 initseq (struct seq *seq)
497 seq->count = 0;
498 seq->alloc = 0;
499 seq->lines = nullptr;
502 /* Read a line from FP and add it to SEQ. Return true if successful. */
504 static bool
505 getseq (FILE *fp, struct seq *seq, int whichfile)
507 if (seq->count == seq->alloc)
509 seq->lines = xpalloc (seq->lines, &seq->alloc, 1, -1, sizeof *seq->lines);
510 for (idx_t i = seq->count; i < seq->alloc; i++)
511 seq->lines[i] = nullptr;
514 if (get_line (fp, &seq->lines[seq->count], whichfile))
516 ++seq->count;
517 return true;
519 return false;
522 /* Read a line from FP and add it to SEQ, as the first item if FIRST is
523 true, else as the next. */
524 static bool
525 advance_seq (FILE *fp, struct seq *seq, bool first, int whichfile)
527 if (first)
528 seq->count = 0;
530 return getseq (fp, seq, whichfile);
533 static void
534 delseq (struct seq *seq)
536 for (idx_t i = 0; i < seq->alloc; i++)
538 freeline (seq->lines[i]);
539 free (seq->lines[i]);
541 free (seq->lines);
545 /* Print field N of LINE if it exists and is nonempty, otherwise
546 'empty_filler' if it is nonempty. */
548 static void
549 prfield (idx_t n, struct line const *line)
551 if (n < line->nfields)
553 idx_t len = line->fields[n].len;
554 if (len)
555 fwrite (line->fields[n].beg, 1, len, stdout);
556 else if (empty_filler)
557 fputs (empty_filler, stdout);
559 else if (empty_filler)
560 fputs (empty_filler, stdout);
563 /* Output all the fields in line, other than the join field. */
565 static void
566 prfields (struct line const *line, idx_t join_field, idx_t autocount)
568 idx_t i;
569 idx_t nfields = autoformat ? autocount : line->nfields;
570 char output_separator = tab < 0 ? ' ' : tab;
572 for (i = 0; i < join_field && i < nfields; ++i)
574 putchar (output_separator);
575 prfield (i, line);
577 for (i = join_field + 1; i < nfields; ++i)
579 putchar (output_separator);
580 prfield (i, line);
584 /* Print the join of LINE1 and LINE2. */
586 static void
587 prjoin (struct line const *line1, struct line const *line2)
589 const struct outlist *outlist;
590 char output_separator = tab < 0 ? ' ' : tab;
591 idx_t field;
592 struct line const *line;
594 outlist = outlist_head.next;
595 if (outlist)
597 const struct outlist *o;
599 o = outlist;
600 while (true)
602 if (o->file == 0)
604 if (line1 == &uni_blank)
606 line = line2;
607 field = join_field_2;
609 else
611 line = line1;
612 field = join_field_1;
615 else
617 line = (o->file == 1 ? line1 : line2);
618 field = o->field;
620 prfield (field, line);
621 o = o->next;
622 if (o == nullptr)
623 break;
624 putchar (output_separator);
626 putchar (eolchar);
628 else
630 if (line1 == &uni_blank)
632 line = line2;
633 field = join_field_2;
635 else
637 line = line1;
638 field = join_field_1;
641 /* Output the join field. */
642 prfield (field, line);
644 /* Output other fields. */
645 prfields (line1, join_field_1, autocount_1);
646 prfields (line2, join_field_2, autocount_2);
648 putchar (eolchar);
651 if (ferror (stdout))
652 write_error ();
655 /* Print the join of the files in FP1 and FP2. */
657 static void
658 join (FILE *fp1, FILE *fp2)
660 struct seq seq1, seq2;
661 int diff;
662 bool eof1, eof2;
664 fadvise (fp1, FADVISE_SEQUENTIAL);
665 fadvise (fp2, FADVISE_SEQUENTIAL);
667 /* Read the first line of each file. */
668 initseq (&seq1);
669 getseq (fp1, &seq1, 1);
670 initseq (&seq2);
671 getseq (fp2, &seq2, 2);
673 if (autoformat)
675 autocount_1 = seq1.count ? seq1.lines[0]->nfields : 0;
676 autocount_2 = seq2.count ? seq2.lines[0]->nfields : 0;
679 if (join_header_lines && (seq1.count || seq2.count))
681 struct line const *hline1 = seq1.count ? seq1.lines[0] : &uni_blank;
682 struct line const *hline2 = seq2.count ? seq2.lines[0] : &uni_blank;
683 prjoin (hline1, hline2);
684 prevline[0] = nullptr;
685 prevline[1] = nullptr;
686 if (seq1.count)
687 advance_seq (fp1, &seq1, true, 1);
688 if (seq2.count)
689 advance_seq (fp2, &seq2, true, 2);
692 while (seq1.count && seq2.count)
694 diff = keycmp (seq1.lines[0], seq2.lines[0],
695 join_field_1, join_field_2);
696 if (diff < 0)
698 if (print_unpairables_1)
699 prjoin (seq1.lines[0], &uni_blank);
700 advance_seq (fp1, &seq1, true, 1);
701 seen_unpairable = true;
702 continue;
704 if (diff > 0)
706 if (print_unpairables_2)
707 prjoin (&uni_blank, seq2.lines[0]);
708 advance_seq (fp2, &seq2, true, 2);
709 seen_unpairable = true;
710 continue;
713 /* Keep reading lines from file1 as long as they continue to
714 match the current line from file2. */
715 eof1 = false;
717 if (!advance_seq (fp1, &seq1, false, 1))
719 eof1 = true;
720 ++seq1.count;
721 break;
723 while (!keycmp (seq1.lines[seq1.count - 1], seq2.lines[0],
724 join_field_1, join_field_2));
726 /* Keep reading lines from file2 as long as they continue to
727 match the current line from file1. */
728 eof2 = false;
730 if (!advance_seq (fp2, &seq2, false, 2))
732 eof2 = true;
733 ++seq2.count;
734 break;
736 while (!keycmp (seq1.lines[0], seq2.lines[seq2.count - 1],
737 join_field_1, join_field_2));
739 if (print_pairables)
741 for (idx_t i = 0; i < seq1.count - 1; ++i)
743 idx_t j;
744 for (j = 0; j < seq2.count - 1; ++j)
745 prjoin (seq1.lines[i], seq2.lines[j]);
749 if (!eof1)
751 SWAPLINES (seq1.lines[0], seq1.lines[seq1.count - 1]);
752 seq1.count = 1;
754 else
755 seq1.count = 0;
757 if (!eof2)
759 SWAPLINES (seq2.lines[0], seq2.lines[seq2.count - 1]);
760 seq2.count = 1;
762 else
763 seq2.count = 0;
766 /* If the user did not specify --nocheck-order, then we read the
767 tail ends of both inputs to verify that they are in order. We
768 skip the rest of the tail once we have issued a warning for that
769 file, unless we actually need to print the unpairable lines. */
770 struct line *line = nullptr;
771 bool checktail = false;
773 if (check_input_order != CHECK_ORDER_DISABLED
774 && !(issued_disorder_warning[0] && issued_disorder_warning[1]))
775 checktail = true;
777 if ((print_unpairables_1 || checktail) && seq1.count)
779 if (print_unpairables_1)
780 prjoin (seq1.lines[0], &uni_blank);
781 if (seq2.count)
782 seen_unpairable = true;
783 while (get_line (fp1, &line, 1))
785 if (print_unpairables_1)
786 prjoin (line, &uni_blank);
787 if (issued_disorder_warning[0] && !print_unpairables_1)
788 break;
792 if ((print_unpairables_2 || checktail) && seq2.count)
794 if (print_unpairables_2)
795 prjoin (&uni_blank, seq2.lines[0]);
796 if (seq1.count)
797 seen_unpairable = true;
798 while (get_line (fp2, &line, 2))
800 if (print_unpairables_2)
801 prjoin (&uni_blank, line);
802 if (issued_disorder_warning[1] && !print_unpairables_2)
803 break;
807 freeline (line);
808 free (line);
810 delseq (&seq1);
811 delseq (&seq2);
814 /* Add a field spec for field FIELD of file FILE to 'outlist'. */
816 static void
817 add_field (int file, idx_t field)
819 struct outlist *o;
821 affirm (file == 0 || file == 1 || file == 2);
822 affirm (file != 0 || field == 0);
824 o = xmalloc (sizeof *o);
825 o->file = file;
826 o->field = field;
827 o->next = nullptr;
829 /* Add to the end of the list so the fields are in the right order. */
830 outlist_end->next = o;
831 outlist_end = o;
834 /* Convert a string of decimal digits, STR (the 1-based join field number),
835 to an integral value. Upon successful conversion, return one less
836 (the zero-based field number). Silently convert too-large values
837 to PTRDIFF_MAX. Otherwise, if a value cannot be converted, give a
838 diagnostic and exit. */
840 static idx_t
841 string_to_join_field (char const *str)
843 intmax_t val;
845 strtol_error s_err = xstrtoimax (str, nullptr, 10, &val, "");
846 if (s_err == LONGINT_OVERFLOW || (s_err == LONGINT_OK && PTRDIFF_MAX < val))
847 val = PTRDIFF_MAX;
848 else if (s_err != LONGINT_OK || val <= 0)
849 error (EXIT_FAILURE, 0, _("invalid field number: %s"), quote (str));
851 return val - 1;
854 /* Convert a single field specifier string, S, to a *FILE_INDEX, *FIELD_INDEX
855 pair. In S, the field index string is 1-based; *FIELD_INDEX is zero-based.
856 If S is valid, return true. Otherwise, give a diagnostic and exit. */
858 static void
859 decode_field_spec (char const *s, int *file_index, idx_t *field_index)
861 /* The first character must be 0, 1, or 2. */
862 switch (s[0])
864 case '0':
865 if (s[1])
867 /* '0' must be all alone -- no '.FIELD'. */
868 error (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s));
870 *file_index = 0;
871 *field_index = 0;
872 break;
874 case '1':
875 case '2':
876 if (s[1] != '.')
877 error (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s));
878 *file_index = s[0] - '0';
879 *field_index = string_to_join_field (s + 2);
880 break;
882 default:
883 error (EXIT_FAILURE, 0,
884 _("invalid file number in field spec: %s"), quote (s));
888 /* Add the comma or blank separated field spec(s) in STR to 'outlist'. */
890 static void
891 add_field_list (char *str)
893 char *p = str;
897 int file_index;
898 idx_t field_index;
899 char const *spec_item = p;
901 p = strpbrk (p, ", \t");
902 if (p)
903 *p++ = '\0';
904 decode_field_spec (spec_item, &file_index, &field_index);
905 add_field (file_index, field_index);
907 while (p);
910 /* Set the join field *VAR to VAL, but report an error if *VAR is set
911 more than once to incompatible values. */
913 static void
914 set_join_field (ptrdiff_t *var, idx_t val)
916 if (0 <= *var && *var != val)
917 error (EXIT_FAILURE, 0,
918 _("incompatible join fields %td, %td"), *var, val);
919 *var = val;
922 /* Status of command-line arguments. */
924 enum operand_status
926 /* This argument must be an operand, i.e., one of the files to be
927 joined. */
928 MUST_BE_OPERAND,
930 /* This might be the argument of the preceding -j1 or -j2 option,
931 or it might be an operand. */
932 MIGHT_BE_J1_ARG,
933 MIGHT_BE_J2_ARG,
935 /* This might be the argument of the preceding -o option, or it might be
936 an operand. */
937 MIGHT_BE_O_ARG
940 /* Add NAME to the array of input file NAMES with operand statuses
941 OPERAND_STATUS; currently there are NFILES names in the list. */
943 static void
944 add_file_name (char *name, char *names[2],
945 int operand_status[2], int joption_count[2], int *nfiles,
946 int *prev_optc_status, int *optc_status)
948 int n = *nfiles;
950 if (n == 2)
952 bool op0 = (operand_status[0] == MUST_BE_OPERAND);
953 char *arg = names[op0];
954 switch (operand_status[op0])
956 case MUST_BE_OPERAND:
957 error (0, 0, _("extra operand %s"), quoteaf (name));
958 usage (EXIT_FAILURE);
960 case MIGHT_BE_J1_ARG:
961 joption_count[0]--;
962 set_join_field (&join_field_1, string_to_join_field (arg));
963 break;
965 case MIGHT_BE_J2_ARG:
966 joption_count[1]--;
967 set_join_field (&join_field_2, string_to_join_field (arg));
968 break;
970 case MIGHT_BE_O_ARG:
971 add_field_list (arg);
972 break;
974 if (!op0)
976 operand_status[0] = operand_status[1];
977 names[0] = names[1];
979 n = 1;
982 operand_status[n] = *prev_optc_status;
983 names[n] = name;
984 *nfiles = n + 1;
985 if (*prev_optc_status == MIGHT_BE_O_ARG)
986 *optc_status = MIGHT_BE_O_ARG;
990 main (int argc, char **argv)
992 int optc_status;
993 int prev_optc_status = MUST_BE_OPERAND;
994 int operand_status[2];
995 int joption_count[2] = { 0, 0 };
996 FILE *fp1, *fp2;
997 int optc;
998 int nfiles = 0;
999 int i;
1001 initialize_main (&argc, &argv);
1002 set_program_name (argv[0]);
1003 setlocale (LC_ALL, "");
1004 bindtextdomain (PACKAGE, LOCALEDIR);
1005 textdomain (PACKAGE);
1006 hard_LC_COLLATE = hard_locale (LC_COLLATE);
1008 atexit (close_stdout);
1009 atexit (free_spareline);
1011 print_pairables = true;
1012 seen_unpairable = false;
1013 issued_disorder_warning[0] = issued_disorder_warning[1] = false;
1014 check_input_order = CHECK_ORDER_DEFAULT;
1016 while ((optc = getopt_long (argc, argv, "-a:e:i1:2:j:o:t:v:z",
1017 longopts, nullptr))
1018 != -1)
1020 optc_status = MUST_BE_OPERAND;
1022 switch (optc)
1024 case 'v':
1025 print_pairables = false;
1026 FALLTHROUGH;
1028 case 'a':
1030 long int val;
1031 if (xstrtol (optarg, nullptr, 10, &val, "") != LONGINT_OK
1032 || (val != 1 && val != 2))
1033 error (EXIT_FAILURE, 0,
1034 _("invalid field number: %s"), quote (optarg));
1035 if (val == 1)
1036 print_unpairables_1 = true;
1037 else
1038 print_unpairables_2 = true;
1040 break;
1042 case 'e':
1043 if (empty_filler && ! STREQ (empty_filler, optarg))
1044 error (EXIT_FAILURE, 0,
1045 _("conflicting empty-field replacement strings"));
1046 empty_filler = optarg;
1047 break;
1049 case 'i':
1050 ignore_case = true;
1051 break;
1053 case '1':
1054 set_join_field (&join_field_1, string_to_join_field (optarg));
1055 break;
1057 case '2':
1058 set_join_field (&join_field_2, string_to_join_field (optarg));
1059 break;
1061 case 'j':
1062 if ((optarg[0] == '1' || optarg[0] == '2') && !optarg[1]
1063 && optarg == argv[optind - 1] + 2)
1065 /* The argument was either "-j1" or "-j2". */
1066 bool is_j2 = (optarg[0] == '2');
1067 joption_count[is_j2]++;
1068 optc_status = MIGHT_BE_J1_ARG + is_j2;
1070 else
1072 set_join_field (&join_field_1, string_to_join_field (optarg));
1073 set_join_field (&join_field_2, join_field_1);
1075 break;
1077 case 'o':
1078 if (STREQ (optarg, "auto"))
1079 autoformat = true;
1080 else
1082 add_field_list (optarg);
1083 optc_status = MIGHT_BE_O_ARG;
1085 break;
1087 case 't':
1089 unsigned char newtab = optarg[0];
1090 if (! newtab)
1091 newtab = '\n'; /* '' => process the whole line. */
1092 else if (optarg[1])
1094 if (STREQ (optarg, "\\0"))
1095 newtab = '\0';
1096 else
1097 error (EXIT_FAILURE, 0, _("multi-character tab %s"),
1098 quote (optarg));
1100 if (0 <= tab && tab != newtab)
1101 error (EXIT_FAILURE, 0, _("incompatible tabs"));
1102 tab = newtab;
1104 break;
1106 case 'z':
1107 eolchar = 0;
1108 break;
1110 case NOCHECK_ORDER_OPTION:
1111 check_input_order = CHECK_ORDER_DISABLED;
1112 break;
1114 case CHECK_ORDER_OPTION:
1115 check_input_order = CHECK_ORDER_ENABLED;
1116 break;
1118 case 1: /* Non-option argument. */
1119 add_file_name (optarg, g_names, operand_status, joption_count,
1120 &nfiles, &prev_optc_status, &optc_status);
1121 break;
1123 case HEADER_LINE_OPTION:
1124 join_header_lines = true;
1125 break;
1127 case_GETOPT_HELP_CHAR;
1129 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1131 default:
1132 usage (EXIT_FAILURE);
1135 prev_optc_status = optc_status;
1138 /* Process any operands after "--". */
1139 prev_optc_status = MUST_BE_OPERAND;
1140 while (optind < argc)
1141 add_file_name (argv[optind++], g_names, operand_status, joption_count,
1142 &nfiles, &prev_optc_status, &optc_status);
1144 if (nfiles != 2)
1146 if (nfiles == 0)
1147 error (0, 0, _("missing operand"));
1148 else
1149 error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
1150 usage (EXIT_FAILURE);
1153 /* If "-j1" was specified and it turns out not to have had an argument,
1154 treat it as "-j 1". Likewise for -j2. */
1155 for (i = 0; i < 2; i++)
1156 if (joption_count[i] != 0)
1158 set_join_field (&join_field_1, i);
1159 set_join_field (&join_field_2, i);
1162 if (join_field_1 < 0)
1163 join_field_1 = 0;
1164 if (join_field_2 < 0)
1165 join_field_2 = 0;
1167 fp1 = STREQ (g_names[0], "-") ? stdin : fopen (g_names[0], "r");
1168 if (!fp1)
1169 error (EXIT_FAILURE, errno, "%s", quotef (g_names[0]));
1170 fp2 = STREQ (g_names[1], "-") ? stdin : fopen (g_names[1], "r");
1171 if (!fp2)
1172 error (EXIT_FAILURE, errno, "%s", quotef (g_names[1]));
1173 if (fp1 == fp2)
1174 error (EXIT_FAILURE, errno, _("both files cannot be standard input"));
1175 join (fp1, fp2);
1177 if (fclose (fp1) != 0)
1178 error (EXIT_FAILURE, errno, "%s", quotef (g_names[0]));
1179 if (fclose (fp2) != 0)
1180 error (EXIT_FAILURE, errno, "%s", quotef (g_names[1]));
1182 if (issued_disorder_warning[0] || issued_disorder_warning[1])
1183 error (EXIT_FAILURE, 0, _("input is not in sorted order"));
1184 else
1185 return EXIT_SUCCESS;