sort - Don't live-loop threads
[dragonfly.git] / contrib / diffutils / src / diff.c
blob8be50be9b1b389ba709f6fc2eb01a59496a87b5e
1 /* diff - compare files line by line
3 Copyright (C) 1988-1989, 1992-1994, 1996, 1998, 2001-2002, 2004, 2006-2007,
4 2009-2013 Free Software Foundation, Inc.
6 This file is part of GNU DIFF.
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #define GDIFF_MAIN
22 #include "diff.h"
23 #include <assert.h>
24 #include "paths.h"
25 #include <c-stack.h>
26 #include <dirname.h>
27 #include <error.h>
28 #include <exclude.h>
29 #include <exitfail.h>
30 #include <filenamecat.h>
31 #include <file-type.h>
32 #include <fnmatch.h>
33 #include <getopt.h>
34 #include <hard-locale.h>
35 #include <prepargs.h>
36 #include <progname.h>
37 #include <sh-quote.h>
38 #include <stat-time.h>
39 #include <timespec.h>
40 #include <version-etc.h>
41 #include <xalloc.h>
42 #include <xreadlink.h>
43 #include <binary-io.h>
45 /* The official name of this program (e.g., no 'g' prefix). */
46 #define PROGRAM_NAME "diff"
48 #define AUTHORS \
49 proper_name ("Paul Eggert"), \
50 proper_name ("Mike Haertel"), \
51 proper_name ("David Hayes"), \
52 proper_name ("Richard Stallman"), \
53 proper_name ("Len Tower")
55 #ifndef GUTTER_WIDTH_MINIMUM
56 # define GUTTER_WIDTH_MINIMUM 3
57 #endif
59 struct regexp_list
61 char *regexps; /* chars representing disjunction of the regexps */
62 size_t len; /* chars used in 'regexps' */
63 size_t size; /* size malloc'ed for 'regexps'; 0 if not malloc'ed */
64 bool multiple_regexps;/* Does 'regexps' represent a disjunction? */
65 struct re_pattern_buffer *buf;
68 static int compare_files (struct comparison const *, char const *, char const *);
69 static void add_regexp (struct regexp_list *, char const *);
70 static void summarize_regexp_list (struct regexp_list *);
71 static void specify_style (enum output_style);
72 static void specify_value (char const **, char const *, char const *);
73 static void try_help (char const *, char const *) __attribute__((noreturn));
74 static void check_stdout (void);
75 static void usage (void);
77 /* If comparing directories, compare their common subdirectories
78 recursively. */
79 static bool recursive;
81 /* In context diffs, show previous lines that match these regexps. */
82 static struct regexp_list function_regexp_list;
84 /* Ignore changes affecting only lines that match these regexps. */
85 static struct regexp_list ignore_regexp_list;
87 #if O_BINARY
88 /* Use binary I/O when reading and writing data (--binary).
89 On POSIX hosts, this has no effect. */
90 static bool binary;
91 #else
92 enum { binary = true };
93 #endif
95 /* If one file is missing, treat it as present but empty (-N). */
96 static bool new_file;
98 /* If the first file is missing, treat it as present but empty
99 (--unidirectional-new-file). */
100 static bool unidirectional_new_file;
102 /* Report files compared that are the same (-s).
103 Normally nothing is output when that happens. */
104 static bool report_identical_files;
106 static char const shortopts[] =
107 "0123456789abBcC:dD:eEfF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:yZ";
109 /* Values for long options that do not have single-letter equivalents. */
110 enum
112 BINARY_OPTION = CHAR_MAX + 1,
113 FROM_FILE_OPTION,
114 HELP_OPTION,
115 HORIZON_LINES_OPTION,
116 IGNORE_FILE_NAME_CASE_OPTION,
117 INHIBIT_HUNK_MERGE_OPTION,
118 LEFT_COLUMN_OPTION,
119 LINE_FORMAT_OPTION,
120 NO_DEREFERENCE_OPTION,
121 NO_IGNORE_FILE_NAME_CASE_OPTION,
122 NORMAL_OPTION,
123 SDIFF_MERGE_ASSIST_OPTION,
124 STRIP_TRAILING_CR_OPTION,
125 SUPPRESS_BLANK_EMPTY_OPTION,
126 SUPPRESS_COMMON_LINES_OPTION,
127 TABSIZE_OPTION,
128 TO_FILE_OPTION,
130 /* These options must be in sequence. */
131 UNCHANGED_LINE_FORMAT_OPTION,
132 OLD_LINE_FORMAT_OPTION,
133 NEW_LINE_FORMAT_OPTION,
135 /* These options must be in sequence. */
136 UNCHANGED_GROUP_FORMAT_OPTION,
137 OLD_GROUP_FORMAT_OPTION,
138 NEW_GROUP_FORMAT_OPTION,
139 CHANGED_GROUP_FORMAT_OPTION
142 static char const group_format_option[][sizeof "--unchanged-group-format"] =
144 "--unchanged-group-format",
145 "--old-group-format",
146 "--new-group-format",
147 "--changed-group-format"
150 static char const line_format_option[][sizeof "--unchanged-line-format"] =
152 "--unchanged-line-format",
153 "--old-line-format",
154 "--new-line-format"
157 static struct option const longopts[] =
159 {"binary", 0, 0, BINARY_OPTION},
160 {"brief", 0, 0, 'q'},
161 {"changed-group-format", 1, 0, CHANGED_GROUP_FORMAT_OPTION},
162 {"context", 2, 0, 'C'},
163 {"ed", 0, 0, 'e'},
164 {"exclude", 1, 0, 'x'},
165 {"exclude-from", 1, 0, 'X'},
166 {"expand-tabs", 0, 0, 't'},
167 {"forward-ed", 0, 0, 'f'},
168 {"from-file", 1, 0, FROM_FILE_OPTION},
169 {"help", 0, 0, HELP_OPTION},
170 {"horizon-lines", 1, 0, HORIZON_LINES_OPTION},
171 {"ifdef", 1, 0, 'D'},
172 {"ignore-all-space", 0, 0, 'w'},
173 {"ignore-blank-lines", 0, 0, 'B'},
174 {"ignore-case", 0, 0, 'i'},
175 {"ignore-file-name-case", 0, 0, IGNORE_FILE_NAME_CASE_OPTION},
176 {"ignore-matching-lines", 1, 0, 'I'},
177 {"ignore-space-change", 0, 0, 'b'},
178 {"ignore-tab-expansion", 0, 0, 'E'},
179 {"ignore-trailing-space", 0, 0, 'Z'},
180 {"inhibit-hunk-merge", 0, 0, INHIBIT_HUNK_MERGE_OPTION},
181 {"initial-tab", 0, 0, 'T'},
182 {"label", 1, 0, 'L'},
183 {"left-column", 0, 0, LEFT_COLUMN_OPTION},
184 {"line-format", 1, 0, LINE_FORMAT_OPTION},
185 {"minimal", 0, 0, 'd'},
186 {"new-file", 0, 0, 'N'},
187 {"new-group-format", 1, 0, NEW_GROUP_FORMAT_OPTION},
188 {"new-line-format", 1, 0, NEW_LINE_FORMAT_OPTION},
189 {"no-dereference", 0, 0, NO_DEREFERENCE_OPTION},
190 {"no-ignore-file-name-case", 0, 0, NO_IGNORE_FILE_NAME_CASE_OPTION},
191 {"normal", 0, 0, NORMAL_OPTION},
192 {"old-group-format", 1, 0, OLD_GROUP_FORMAT_OPTION},
193 {"old-line-format", 1, 0, OLD_LINE_FORMAT_OPTION},
194 {"paginate", 0, 0, 'l'},
195 {"rcs", 0, 0, 'n'},
196 {"recursive", 0, 0, 'r'},
197 {"report-identical-files", 0, 0, 's'},
198 {"sdiff-merge-assist", 0, 0, SDIFF_MERGE_ASSIST_OPTION},
199 {"show-c-function", 0, 0, 'p'},
200 {"show-function-line", 1, 0, 'F'},
201 {"side-by-side", 0, 0, 'y'},
202 {"speed-large-files", 0, 0, 'H'},
203 {"starting-file", 1, 0, 'S'},
204 {"strip-trailing-cr", 0, 0, STRIP_TRAILING_CR_OPTION},
205 {"suppress-blank-empty", 0, 0, SUPPRESS_BLANK_EMPTY_OPTION},
206 {"suppress-common-lines", 0, 0, SUPPRESS_COMMON_LINES_OPTION},
207 {"tabsize", 1, 0, TABSIZE_OPTION},
208 {"text", 0, 0, 'a'},
209 {"to-file", 1, 0, TO_FILE_OPTION},
210 {"unchanged-group-format", 1, 0, UNCHANGED_GROUP_FORMAT_OPTION},
211 {"unchanged-line-format", 1, 0, UNCHANGED_LINE_FORMAT_OPTION},
212 {"unidirectional-new-file", 0, 0, 'P'},
213 {"unified", 2, 0, 'U'},
214 {"version", 0, 0, 'v'},
215 {"width", 1, 0, 'W'},
216 {0, 0, 0, 0}
219 /* Return a string containing the command options with which diff was invoked.
220 Spaces appear between what were separate ARGV-elements.
221 There is a space at the beginning but none at the end.
222 If there were no options, the result is an empty string.
224 Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT,
225 the length of that vector. */
227 static char *
228 option_list (char **optionvec, int count)
230 int i;
231 size_t size = 1;
232 char *result;
233 char *p;
235 for (i = 0; i < count; i++)
236 size += 1 + shell_quote_length (optionvec[i]);
238 p = result = xmalloc (size);
240 for (i = 0; i < count; i++)
242 *p++ = ' ';
243 p = shell_quote_copy (p, optionvec[i]);
246 *p = '\0';
247 return result;
251 /* Return an option value suitable for add_exclude. */
253 static int
254 exclude_options (void)
256 return EXCLUDE_WILDCARDS | (ignore_file_name_case ? FNM_CASEFOLD : 0);
260 main (int argc, char **argv)
262 int exit_status = EXIT_SUCCESS;
263 int c;
264 int i;
265 int prev = -1;
266 lin ocontext = -1;
267 bool explicit_context = false;
268 size_t width = 0;
269 bool show_c_function = false;
270 char const *from_file = NULL;
271 char const *to_file = NULL;
272 uintmax_t numval;
273 char *numend;
275 /* Do our initializations. */
276 exit_failure = EXIT_TROUBLE;
277 initialize_main (&argc, &argv);
278 set_program_name (argv[0]);
279 setlocale (LC_ALL, "");
280 textdomain (PACKAGE);
281 c_stack_action (0);
282 function_regexp_list.buf = &function_regexp;
283 ignore_regexp_list.buf = &ignore_regexp;
284 re_set_syntax (RE_SYNTAX_GREP | RE_NO_POSIX_BACKTRACKING);
285 excluded = new_exclude ();
287 /* Decode the options. */
289 while ((c = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1)
291 switch (c)
293 case 0:
294 break;
296 case '0':
297 case '1':
298 case '2':
299 case '3':
300 case '4':
301 case '5':
302 case '6':
303 case '7':
304 case '8':
305 case '9':
306 if (! ISDIGIT (prev))
307 ocontext = c - '0';
308 else if (LIN_MAX / 10 < ocontext
309 || ((ocontext = 10 * ocontext + c - '0') < 0))
310 ocontext = LIN_MAX;
311 break;
313 case 'a':
314 text = true;
315 break;
317 case 'b':
318 if (ignore_white_space < IGNORE_SPACE_CHANGE)
319 ignore_white_space = IGNORE_SPACE_CHANGE;
320 break;
322 case 'Z':
323 if (ignore_white_space < IGNORE_SPACE_CHANGE)
324 ignore_white_space |= IGNORE_TRAILING_SPACE;
325 break;
327 case 'B':
328 ignore_blank_lines = true;
329 break;
331 case 'C':
332 case 'U':
334 if (optarg)
336 numval = strtoumax (optarg, &numend, 10);
337 if (*numend)
338 try_help ("invalid context length '%s'", optarg);
339 if (LIN_MAX < numval)
340 numval = LIN_MAX;
342 else
343 numval = 3;
345 specify_style (c == 'U' ? OUTPUT_UNIFIED : OUTPUT_CONTEXT);
346 if (context < numval)
347 context = numval;
348 explicit_context = true;
350 break;
352 case 'c':
353 specify_style (OUTPUT_CONTEXT);
354 if (context < 3)
355 context = 3;
356 break;
358 case 'd':
359 minimal = true;
360 break;
362 case 'D':
363 specify_style (OUTPUT_IFDEF);
365 static char const C_ifdef_group_formats[] =
366 "%%=%c#ifndef %s\n%%<#endif /* ! %s */\n%c#ifdef %s\n%%>#endif /* %s */\n%c#ifndef %s\n%%<#else /* %s */\n%%>#endif /* %s */\n";
367 char *b = xmalloc (sizeof C_ifdef_group_formats
368 + 7 * strlen (optarg) - 14 /* 7*"%s" */
369 - 8 /* 5*"%%" + 3*"%c" */);
370 sprintf (b, C_ifdef_group_formats,
372 optarg, optarg, 0,
373 optarg, optarg, 0,
374 optarg, optarg, optarg);
375 for (i = 0; i < sizeof group_format / sizeof group_format[0]; i++)
377 specify_value (&group_format[i], b, "-D");
378 b += strlen (b) + 1;
381 break;
383 case 'e':
384 specify_style (OUTPUT_ED);
385 break;
387 case 'E':
388 if (ignore_white_space < IGNORE_SPACE_CHANGE)
389 ignore_white_space |= IGNORE_TAB_EXPANSION;
390 break;
392 case 'f':
393 specify_style (OUTPUT_FORWARD_ED);
394 break;
396 case 'F':
397 add_regexp (&function_regexp_list, optarg);
398 break;
400 case 'h':
401 /* Split the files into chunks for faster processing.
402 Usually does not change the result.
404 This currently has no effect. */
405 break;
407 case 'H':
408 speed_large_files = true;
409 break;
411 case 'i':
412 ignore_case = true;
413 break;
415 case 'I':
416 add_regexp (&ignore_regexp_list, optarg);
417 break;
419 case 'l':
420 if (!pr_program[0])
421 try_help ("pagination not supported on this host", NULL);
422 paginate = true;
423 #ifdef SIGCHLD
424 /* Pagination requires forking and waiting, and
425 System V fork+wait does not work if SIGCHLD is ignored. */
426 signal (SIGCHLD, SIG_DFL);
427 #endif
428 break;
430 case 'L':
431 if (!file_label[0])
432 file_label[0] = optarg;
433 else if (!file_label[1])
434 file_label[1] = optarg;
435 else
436 fatal ("too many file label options");
437 break;
439 case 'n':
440 specify_style (OUTPUT_RCS);
441 break;
443 case 'N':
444 new_file = true;
445 break;
447 case 'p':
448 show_c_function = true;
449 add_regexp (&function_regexp_list, "^[[:alpha:]$_]");
450 break;
452 case 'P':
453 unidirectional_new_file = true;
454 break;
456 case 'q':
457 brief = true;
458 break;
460 case 'r':
461 recursive = true;
462 break;
464 case 's':
465 report_identical_files = true;
466 break;
468 case 'S':
469 specify_value (&starting_file, optarg, "-S");
470 break;
472 case 't':
473 expand_tabs = true;
474 break;
476 case 'T':
477 initial_tab = true;
478 break;
480 case 'u':
481 specify_style (OUTPUT_UNIFIED);
482 if (context < 3)
483 context = 3;
484 break;
486 case 'v':
487 version_etc (stdout, PROGRAM_NAME, PACKAGE_NAME, Version,
488 AUTHORS, (char *) NULL);
489 check_stdout ();
490 return EXIT_SUCCESS;
492 case 'w':
493 ignore_white_space = IGNORE_ALL_SPACE;
494 break;
496 case 'x':
497 add_exclude (excluded, optarg, exclude_options ());
498 break;
500 case 'X':
501 if (add_exclude_file (add_exclude, excluded, optarg,
502 exclude_options (), '\n'))
503 pfatal_with_name (optarg);
504 break;
506 case 'y':
507 specify_style (OUTPUT_SDIFF);
508 break;
510 case 'W':
511 numval = strtoumax (optarg, &numend, 10);
512 if (! (0 < numval && numval <= SIZE_MAX) || *numend)
513 try_help ("invalid width '%s'", optarg);
514 if (width != numval)
516 if (width)
517 fatal ("conflicting width options");
518 width = numval;
520 break;
522 case BINARY_OPTION:
523 #if O_BINARY
524 binary = true;
525 if (! isatty (STDOUT_FILENO))
526 set_binary_mode (STDOUT_FILENO, O_BINARY);
527 #endif
528 break;
530 case FROM_FILE_OPTION:
531 specify_value (&from_file, optarg, "--from-file");
532 break;
534 case HELP_OPTION:
535 usage ();
536 check_stdout ();
537 return EXIT_SUCCESS;
539 case HORIZON_LINES_OPTION:
540 numval = strtoumax (optarg, &numend, 10);
541 if (*numend)
542 try_help ("invalid horizon length '%s'", optarg);
543 horizon_lines = MAX (horizon_lines, MIN (numval, LIN_MAX));
544 break;
546 case IGNORE_FILE_NAME_CASE_OPTION:
547 ignore_file_name_case = true;
548 break;
550 case INHIBIT_HUNK_MERGE_OPTION:
551 /* This option is obsolete, but accept it for backward
552 compatibility. */
553 break;
555 case LEFT_COLUMN_OPTION:
556 left_column = true;
557 break;
559 case LINE_FORMAT_OPTION:
560 specify_style (OUTPUT_IFDEF);
561 for (i = 0; i < sizeof line_format / sizeof line_format[0]; i++)
562 specify_value (&line_format[i], optarg, "--line-format");
563 break;
565 case NO_DEREFERENCE_OPTION:
566 no_dereference_symlinks = true;
567 break;
569 case NO_IGNORE_FILE_NAME_CASE_OPTION:
570 ignore_file_name_case = false;
571 break;
573 case NORMAL_OPTION:
574 specify_style (OUTPUT_NORMAL);
575 break;
577 case SDIFF_MERGE_ASSIST_OPTION:
578 specify_style (OUTPUT_SDIFF);
579 sdiff_merge_assist = true;
580 break;
582 case STRIP_TRAILING_CR_OPTION:
583 strip_trailing_cr = true;
584 break;
586 case SUPPRESS_BLANK_EMPTY_OPTION:
587 suppress_blank_empty = true;
588 break;
590 case SUPPRESS_COMMON_LINES_OPTION:
591 suppress_common_lines = true;
592 break;
594 case TABSIZE_OPTION:
595 numval = strtoumax (optarg, &numend, 10);
596 if (! (0 < numval && numval <= SIZE_MAX) || *numend)
597 try_help ("invalid tabsize '%s'", optarg);
598 if (tabsize != numval)
600 if (tabsize)
601 fatal ("conflicting tabsize options");
602 tabsize = numval;
604 break;
606 case TO_FILE_OPTION:
607 specify_value (&to_file, optarg, "--to-file");
608 break;
610 case UNCHANGED_LINE_FORMAT_OPTION:
611 case OLD_LINE_FORMAT_OPTION:
612 case NEW_LINE_FORMAT_OPTION:
613 specify_style (OUTPUT_IFDEF);
614 c -= UNCHANGED_LINE_FORMAT_OPTION;
615 specify_value (&line_format[c], optarg, line_format_option[c]);
616 break;
618 case UNCHANGED_GROUP_FORMAT_OPTION:
619 case OLD_GROUP_FORMAT_OPTION:
620 case NEW_GROUP_FORMAT_OPTION:
621 case CHANGED_GROUP_FORMAT_OPTION:
622 specify_style (OUTPUT_IFDEF);
623 c -= UNCHANGED_GROUP_FORMAT_OPTION;
624 specify_value (&group_format[c], optarg, group_format_option[c]);
625 break;
627 default:
628 try_help (NULL, NULL);
630 prev = c;
633 if (output_style == OUTPUT_UNSPECIFIED)
635 if (show_c_function)
637 specify_style (OUTPUT_CONTEXT);
638 if (ocontext < 0)
639 context = 3;
641 else
642 specify_style (OUTPUT_NORMAL);
645 if (output_style != OUTPUT_CONTEXT || hard_locale (LC_TIME))
647 #if (defined STAT_TIMESPEC || defined STAT_TIMESPEC_NS \
648 || defined HAVE_STRUCT_STAT_ST_SPARE1)
649 time_format = "%Y-%m-%d %H:%M:%S.%N %z";
650 #else
651 time_format = "%Y-%m-%d %H:%M:%S %z";
652 #endif
654 else
656 /* See POSIX 1003.1-2001 for this format. */
657 time_format = "%a %b %e %T %Y";
660 if (0 <= ocontext
661 && (output_style == OUTPUT_CONTEXT
662 || output_style == OUTPUT_UNIFIED)
663 && (context < ocontext
664 || (ocontext < context && ! explicit_context)))
665 context = ocontext;
667 if (! tabsize)
668 tabsize = 8;
669 if (! width)
670 width = 130;
673 /* Maximize first the half line width, and then the gutter width,
674 according to the following constraints:
676 1. Two half lines plus a gutter must fit in a line.
677 2. If the half line width is nonzero:
678 a. The gutter width is at least GUTTER_WIDTH_MINIMUM.
679 b. If tabs are not expanded to spaces,
680 a half line plus a gutter is an integral number of tabs,
681 so that tabs in the right column line up. */
683 intmax_t t = expand_tabs ? 1 : tabsize;
684 intmax_t w = width;
685 intmax_t off = (w + t + GUTTER_WIDTH_MINIMUM) / (2 * t) * t;
686 sdiff_half_width = MAX (0, MIN (off - GUTTER_WIDTH_MINIMUM, w - off)),
687 sdiff_column2_offset = sdiff_half_width ? off : w;
690 /* Make the horizon at least as large as the context, so that
691 shift_boundaries has more freedom to shift the first and last hunks. */
692 if (horizon_lines < context)
693 horizon_lines = context;
695 summarize_regexp_list (&function_regexp_list);
696 summarize_regexp_list (&ignore_regexp_list);
698 if (output_style == OUTPUT_IFDEF)
700 for (i = 0; i < sizeof line_format / sizeof line_format[0]; i++)
701 if (!line_format[i])
702 line_format[i] = "%l\n";
703 if (!group_format[OLD])
704 group_format[OLD]
705 = group_format[CHANGED] ? group_format[CHANGED] : "%<";
706 if (!group_format[NEW])
707 group_format[NEW]
708 = group_format[CHANGED] ? group_format[CHANGED] : "%>";
709 if (!group_format[UNCHANGED])
710 group_format[UNCHANGED] = "%=";
711 if (!group_format[CHANGED])
712 group_format[CHANGED] = concat (group_format[OLD],
713 group_format[NEW], "");
716 no_diff_means_no_output =
717 (output_style == OUTPUT_IFDEF ?
718 (!*group_format[UNCHANGED]
719 || (STREQ (group_format[UNCHANGED], "%=")
720 && !*line_format[UNCHANGED]))
721 : (output_style != OUTPUT_SDIFF) | suppress_common_lines);
723 files_can_be_treated_as_binary =
724 (brief & binary
725 & ~ (ignore_blank_lines | ignore_case | strip_trailing_cr
726 | (ignore_regexp_list.regexps || ignore_white_space)));
728 switch_string = option_list (argv + 1, optind - 1);
730 if (from_file)
732 if (to_file)
733 fatal ("--from-file and --to-file both specified");
734 else
735 for (; optind < argc; optind++)
737 int status = compare_files (NULL, from_file, argv[optind]);
738 if (exit_status < status)
739 exit_status = status;
742 else
744 if (to_file)
745 for (; optind < argc; optind++)
747 int status = compare_files (NULL, argv[optind], to_file);
748 if (exit_status < status)
749 exit_status = status;
751 else
753 if (argc - optind != 2)
755 if (argc - optind < 2)
756 try_help ("missing operand after '%s'", argv[argc - 1]);
757 else
758 try_help ("extra operand '%s'", argv[optind + 2]);
761 exit_status = compare_files (NULL, argv[optind], argv[optind + 1]);
765 /* Print any messages that were saved up for last. */
766 print_message_queue ();
768 check_stdout ();
769 exit (exit_status);
770 return exit_status;
773 /* Append to REGLIST the regexp PATTERN. */
775 static void
776 add_regexp (struct regexp_list *reglist, char const *pattern)
778 size_t patlen = strlen (pattern);
779 char const *m = re_compile_pattern (pattern, patlen, reglist->buf);
781 if (m != 0)
782 error (0, 0, "%s: %s", pattern, m);
783 else
785 char *regexps = reglist->regexps;
786 size_t len = reglist->len;
787 bool multiple_regexps = reglist->multiple_regexps = regexps != 0;
788 size_t newlen = reglist->len = len + 2 * multiple_regexps + patlen;
789 size_t size = reglist->size;
791 if (size <= newlen)
793 if (!size)
794 size = 1;
796 do size *= 2;
797 while (size <= newlen);
799 reglist->size = size;
800 reglist->regexps = regexps = xrealloc (regexps, size);
802 if (multiple_regexps)
804 regexps[len++] = '\\';
805 regexps[len++] = '|';
807 memcpy (regexps + len, pattern, patlen + 1);
811 /* Ensure that REGLIST represents the disjunction of its regexps.
812 This is done here, rather than earlier, to avoid O(N^2) behavior. */
814 static void
815 summarize_regexp_list (struct regexp_list *reglist)
817 if (reglist->regexps)
819 /* At least one regexp was specified. Allocate a fastmap for it. */
820 reglist->buf->fastmap = xmalloc (1 << CHAR_BIT);
821 if (reglist->multiple_regexps)
823 /* Compile the disjunction of the regexps.
824 (If just one regexp was specified, it is already compiled.) */
825 char const *m = re_compile_pattern (reglist->regexps, reglist->len,
826 reglist->buf);
827 if (m)
828 error (EXIT_TROUBLE, 0, "%s: %s", reglist->regexps, m);
833 static void
834 try_help (char const *reason_msgid, char const *operand)
836 if (reason_msgid)
837 error (0, 0, _(reason_msgid), operand);
838 error (EXIT_TROUBLE, 0, _("Try '%s --help' for more information."),
839 program_name);
840 abort ();
843 static void
844 check_stdout (void)
846 if (ferror (stdout))
847 fatal ("write failed");
848 else if (fclose (stdout) != 0)
849 pfatal_with_name (_("standard output"));
852 static char const * const option_help_msgid[] = {
853 N_(" --normal output a normal diff (the default)"),
854 N_("-q, --brief report only when files differ"),
855 N_("-s, --report-identical-files report when two files are the same"),
856 N_("-c, -C NUM, --context[=NUM] output NUM (default 3) lines of copied context"),
857 N_("-u, -U NUM, --unified[=NUM] output NUM (default 3) lines of unified context"),
858 N_("-e, --ed output an ed script"),
859 N_("-n, --rcs output an RCS format diff"),
860 N_("-y, --side-by-side output in two columns"),
861 N_("-W, --width=NUM output at most NUM (default 130) print columns"),
862 N_(" --left-column output only the left column of common lines"),
863 N_(" --suppress-common-lines do not output common lines"),
865 N_("-p, --show-c-function show which C function each change is in"),
866 N_("-F, --show-function-line=RE show the most recent line matching RE"),
867 N_(" --label LABEL use LABEL instead of file name\n"
868 " (can be repeated)"),
870 N_("-t, --expand-tabs expand tabs to spaces in output"),
871 N_("-T, --initial-tab make tabs line up by prepending a tab"),
872 N_(" --tabsize=NUM tab stops every NUM (default 8) print columns"),
873 N_(" --suppress-blank-empty suppress space or tab before empty output lines"),
874 N_("-l, --paginate pass output through 'pr' to paginate it"),
876 N_("-r, --recursive recursively compare any subdirectories found"),
877 N_(" --no-dereference don't follow symbolic links"),
878 N_("-N, --new-file treat absent files as empty"),
879 N_(" --unidirectional-new-file treat absent first files as empty"),
880 N_(" --ignore-file-name-case ignore case when comparing file names"),
881 N_(" --no-ignore-file-name-case consider case when comparing file names"),
882 N_("-x, --exclude=PAT exclude files that match PAT"),
883 N_("-X, --exclude-from=FILE exclude files that match any pattern in FILE"),
884 N_("-S, --starting-file=FILE start with FILE when comparing directories"),
885 N_(" --from-file=FILE1 compare FILE1 to all operands;\n"
886 " FILE1 can be a directory"),
887 N_(" --to-file=FILE2 compare all operands to FILE2;\n"
888 " FILE2 can be a directory"),
890 N_("-i, --ignore-case ignore case differences in file contents"),
891 N_("-E, --ignore-tab-expansion ignore changes due to tab expansion"),
892 N_("-Z, --ignore-trailing-space ignore white space at line end"),
893 N_("-b, --ignore-space-change ignore changes in the amount of white space"),
894 N_("-w, --ignore-all-space ignore all white space"),
895 N_("-B, --ignore-blank-lines ignore changes where lines are all blank"),
896 N_("-I, --ignore-matching-lines=RE ignore changes where all lines match RE"),
898 N_("-a, --text treat all files as text"),
899 N_(" --strip-trailing-cr strip trailing carriage return on input"),
900 #if O_BINARY
901 N_(" --binary read and write data in binary mode"),
902 #endif
904 N_("-D, --ifdef=NAME output merged file with '#ifdef NAME' diffs"),
905 N_(" --GTYPE-group-format=GFMT format GTYPE input groups with GFMT"),
906 N_(" --line-format=LFMT format all input lines with LFMT"),
907 N_(" --LTYPE-line-format=LFMT format LTYPE input lines with LFMT"),
908 N_(" These format options provide fine-grained control over the output\n"
909 " of diff, generalizing -D/--ifdef."),
910 N_(" LTYPE is 'old', 'new', or 'unchanged'. GTYPE is LTYPE or 'changed'."),
911 N_(" GFMT (only) may contain:\n\
912 %< lines from FILE1\n\
913 %> lines from FILE2\n\
914 %= lines common to FILE1 and FILE2\n\
915 %[-][WIDTH][.[PREC]]{doxX}LETTER printf-style spec for LETTER\n\
916 LETTERs are as follows for new group, lower case for old group:\n\
917 F first line number\n\
918 L last line number\n\
919 N number of lines = L-F+1\n\
920 E F-1\n\
921 M L+1\n\
922 %(A=B?T:E) if A equals B then T else E"),
923 N_(" LFMT (only) may contain:\n\
924 %L contents of line\n\
925 %l contents of line, excluding any trailing newline\n\
926 %[-][WIDTH][.[PREC]]{doxX}n printf-style spec for input line number"),
927 N_(" Both GFMT and LFMT may contain:\n\
928 %% %\n\
929 %c'C' the single character C\n\
930 %c'\\OOO' the character with octal code OOO\n\
931 C the character C (other characters represent themselves)"),
933 N_("-d, --minimal try hard to find a smaller set of changes"),
934 N_(" --horizon-lines=NUM keep NUM lines of the common prefix and suffix"),
935 N_(" --speed-large-files assume large files and many scattered small changes"),
937 N_(" --help display this help and exit"),
938 N_("-v, --version output version information and exit"),
940 N_("FILES are 'FILE1 FILE2' or 'DIR1 DIR2' or 'DIR FILE...' or 'FILE... DIR'."),
941 N_("If --from-file or --to-file is given, there are no restrictions on FILE(s)."),
942 N_("If a FILE is '-', read standard input."),
943 N_("Exit status is 0 if inputs are the same, 1 if different, 2 if trouble."),
947 static void
948 usage (void)
950 char const * const *p;
952 printf (_("Usage: %s [OPTION]... FILES\n"), program_name);
953 printf ("%s\n\n", _("Compare FILES line by line."));
955 fputs (_("\
956 Mandatory arguments to long options are mandatory for short options too.\n\
957 "), stdout);
959 for (p = option_help_msgid; *p; p++)
961 if (!**p)
962 putchar ('\n');
963 else
965 char const *msg = _(*p);
966 char const *nl;
967 while ((nl = strchr (msg, '\n')))
969 int msglen = nl + 1 - msg;
970 printf (" %.*s", msglen, msg);
971 msg = nl + 1;
974 printf (" %s\n" + 2 * (*msg != ' ' && *msg != '-'), msg);
977 emit_bug_reporting_address ();
980 /* Set VAR to VALUE, reporting an OPTION error if this is a
981 conflict. */
982 static void
983 specify_value (char const **var, char const *value, char const *option)
985 if (*var && ! STREQ (*var, value))
987 error (0, 0, _("conflicting %s option value '%s'"), option, value);
988 try_help (NULL, NULL);
990 *var = value;
993 /* Set the output style to STYLE, diagnosing conflicts. */
994 static void
995 specify_style (enum output_style style)
997 if (output_style != style)
999 if (output_style != OUTPUT_UNSPECIFIED)
1000 try_help ("conflicting output style options", NULL);
1001 output_style = style;
1005 /* Set the last-modified time of *ST to be the current time. */
1007 static void
1008 set_mtime_to_now (struct stat *st)
1010 #ifdef STAT_TIMESPEC
1011 gettime (&STAT_TIMESPEC (st, st_mtim));
1012 #else
1013 struct timespec t;
1014 gettime (&t);
1015 st->st_mtime = t.tv_sec;
1016 # if defined STAT_TIMESPEC_NS
1017 STAT_TIMESPEC_NS (st, st_mtim) = t.tv_nsec;
1018 # elif defined HAVE_STRUCT_STAT_ST_SPARE1
1019 st->st_spare1 = t.tv_nsec / 1000;
1020 # endif
1021 #endif
1024 /* Compare two files (or dirs) with parent comparison PARENT
1025 and names NAME0 and NAME1.
1026 (If PARENT is null, then the first name is just NAME0, etc.)
1027 This is self-contained; it opens the files and closes them.
1029 Value is EXIT_SUCCESS if files are the same, EXIT_FAILURE if
1030 different, EXIT_TROUBLE if there is a problem opening them. */
1032 static int
1033 compare_files (struct comparison const *parent,
1034 char const *name0,
1035 char const *name1)
1037 struct comparison cmp;
1038 #define DIR_P(f) (S_ISDIR (cmp.file[f].stat.st_mode) != 0)
1039 register int f;
1040 int status = EXIT_SUCCESS;
1041 bool same_files;
1042 char *free0;
1043 char *free1;
1045 /* If this is directory comparison, perhaps we have a file
1046 that exists only in one of the directories.
1047 If so, just print a message to that effect. */
1049 if (! ((name0 && name1)
1050 || (unidirectional_new_file && name1)
1051 || new_file))
1053 char const *name = name0 ? name0 : name1;
1054 char const *dir = parent->file[!name0].name;
1056 /* See POSIX 1003.1-2001 for this format. */
1057 message ("Only in %s: %s\n", dir, name);
1059 /* Return EXIT_FAILURE so that diff_dirs will return
1060 EXIT_FAILURE ("some files differ"). */
1061 return EXIT_FAILURE;
1064 memset (cmp.file, 0, sizeof cmp.file);
1065 cmp.parent = parent;
1067 /* cmp.file[f].desc markers */
1068 #define NONEXISTENT (-1) /* nonexistent file */
1069 #define UNOPENED (-2) /* unopened file (e.g. directory) */
1070 #define ERRNO_ENCODE(errno) (-3 - (errno)) /* encoded errno value */
1072 #define ERRNO_DECODE(desc) (-3 - (desc)) /* inverse of ERRNO_ENCODE */
1074 cmp.file[0].desc = name0 ? UNOPENED : NONEXISTENT;
1075 cmp.file[1].desc = name1 ? UNOPENED : NONEXISTENT;
1077 /* Now record the full name of each file, including nonexistent ones. */
1079 if (!name0)
1080 name0 = name1;
1081 if (!name1)
1082 name1 = name0;
1084 if (!parent)
1086 free0 = NULL;
1087 free1 = NULL;
1088 cmp.file[0].name = name0;
1089 cmp.file[1].name = name1;
1091 else
1093 cmp.file[0].name = free0
1094 = file_name_concat (parent->file[0].name, name0, NULL);
1095 cmp.file[1].name = free1
1096 = file_name_concat (parent->file[1].name, name1, NULL);
1099 /* Stat the files. */
1101 for (f = 0; f < 2; f++)
1103 if (cmp.file[f].desc != NONEXISTENT)
1105 if (f && file_name_cmp (cmp.file[f].name, cmp.file[0].name) == 0)
1107 cmp.file[f].desc = cmp.file[0].desc;
1108 cmp.file[f].stat = cmp.file[0].stat;
1110 else if (STREQ (cmp.file[f].name, "-"))
1112 cmp.file[f].desc = STDIN_FILENO;
1113 if (binary && ! isatty (STDIN_FILENO))
1114 set_binary_mode (STDIN_FILENO, O_BINARY);
1115 if (fstat (STDIN_FILENO, &cmp.file[f].stat) != 0)
1116 cmp.file[f].desc = ERRNO_ENCODE (errno);
1117 else
1119 if (S_ISREG (cmp.file[f].stat.st_mode))
1121 off_t pos = lseek (STDIN_FILENO, 0, SEEK_CUR);
1122 if (pos < 0)
1123 cmp.file[f].desc = ERRNO_ENCODE (errno);
1124 else
1125 cmp.file[f].stat.st_size =
1126 MAX (0, cmp.file[f].stat.st_size - pos);
1129 /* POSIX 1003.1-2001 requires current time for
1130 stdin. */
1131 set_mtime_to_now (&cmp.file[f].stat);
1134 else if ((no_dereference_symlinks
1135 ? lstat (cmp.file[f].name, &cmp.file[f].stat)
1136 : stat (cmp.file[f].name, &cmp.file[f].stat))
1137 != 0)
1138 cmp.file[f].desc = ERRNO_ENCODE (errno);
1142 /* Mark files as nonexistent as needed for -N and -P, if they are
1143 inaccessible empty regular files (the kind of files that 'patch'
1144 creates to indicate nonexistent backups), or if they are
1145 top-level files that do not exist but their counterparts do
1146 exist. */
1147 for (f = 0; f < 2; f++)
1148 if ((new_file || (f == 0 && unidirectional_new_file))
1149 && (cmp.file[f].desc == UNOPENED
1150 ? (S_ISREG (cmp.file[f].stat.st_mode)
1151 && ! (cmp.file[f].stat.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO))
1152 && cmp.file[f].stat.st_size == 0)
1153 : ((cmp.file[f].desc == ERRNO_ENCODE (ENOENT)
1154 || cmp.file[f].desc == ERRNO_ENCODE (EBADF))
1155 && ! parent
1156 && (cmp.file[1 - f].desc == UNOPENED
1157 || cmp.file[1 - f].desc == STDIN_FILENO))))
1158 cmp.file[f].desc = NONEXISTENT;
1160 for (f = 0; f < 2; f++)
1161 if (cmp.file[f].desc == NONEXISTENT)
1163 memset (&cmp.file[f].stat, 0, sizeof cmp.file[f].stat);
1164 cmp.file[f].stat.st_mode = cmp.file[1 - f].stat.st_mode;
1167 for (f = 0; f < 2; f++)
1169 int e = ERRNO_DECODE (cmp.file[f].desc);
1170 if (0 <= e)
1172 errno = e;
1173 perror_with_name (cmp.file[f].name);
1174 status = EXIT_TROUBLE;
1178 if (status == EXIT_SUCCESS && ! parent && DIR_P (0) != DIR_P (1))
1180 /* If one is a directory, and it was specified in the command line,
1181 use the file in that dir with the other file's basename. */
1183 int fnm_arg = DIR_P (0);
1184 int dir_arg = 1 - fnm_arg;
1185 char const *fnm = cmp.file[fnm_arg].name;
1186 char const *dir = cmp.file[dir_arg].name;
1187 char const *filename = cmp.file[dir_arg].name = free0
1188 = find_dir_file_pathname (dir, last_component (fnm));
1190 if (STREQ (fnm, "-"))
1191 fatal ("cannot compare '-' to a directory");
1193 if ((no_dereference_symlinks
1194 ? lstat (filename, &cmp.file[dir_arg].stat)
1195 : stat (filename, &cmp.file[dir_arg].stat))
1196 != 0)
1198 perror_with_name (filename);
1199 status = EXIT_TROUBLE;
1203 if (status != EXIT_SUCCESS)
1205 /* One of the files should exist but does not. */
1207 else if (cmp.file[0].desc == NONEXISTENT
1208 && cmp.file[1].desc == NONEXISTENT)
1210 /* Neither file "exists", so there's nothing to compare. */
1212 else if ((same_files
1213 = (cmp.file[0].desc != NONEXISTENT
1214 && cmp.file[1].desc != NONEXISTENT
1215 && 0 < same_file (&cmp.file[0].stat, &cmp.file[1].stat)
1216 && same_file_attributes (&cmp.file[0].stat,
1217 &cmp.file[1].stat)))
1218 && no_diff_means_no_output)
1220 /* The two named files are actually the same physical file.
1221 We know they are identical without actually reading them. */
1223 else if (DIR_P (0) & DIR_P (1))
1225 if (output_style == OUTPUT_IFDEF)
1226 fatal ("-D option not supported with directories");
1228 /* If both are directories, compare the files in them. */
1230 if (parent && !recursive)
1232 /* But don't compare dir contents one level down
1233 unless -r was specified.
1234 See POSIX 1003.1-2001 for this format. */
1235 message ("Common subdirectories: %s and %s\n",
1236 cmp.file[0].name, cmp.file[1].name);
1238 else
1239 status = diff_dirs (&cmp, compare_files);
1241 else if ((DIR_P (0) | DIR_P (1))
1242 || (parent
1243 && !((S_ISREG (cmp.file[0].stat.st_mode)
1244 || S_ISLNK (cmp.file[0].stat.st_mode))
1245 && (S_ISREG (cmp.file[1].stat.st_mode)
1246 || S_ISLNK (cmp.file[1].stat.st_mode)))))
1248 if (cmp.file[0].desc == NONEXISTENT || cmp.file[1].desc == NONEXISTENT)
1250 /* We have a subdirectory that exists only in one directory. */
1252 if ((DIR_P (0) | DIR_P (1))
1253 && recursive
1254 && (new_file
1255 || (unidirectional_new_file
1256 && cmp.file[0].desc == NONEXISTENT)))
1257 status = diff_dirs (&cmp, compare_files);
1258 else
1260 char const *dir;
1262 /* PARENT must be non-NULL here. */
1263 assert (parent);
1264 dir = parent->file[cmp.file[0].desc == NONEXISTENT].name;
1266 /* See POSIX 1003.1-2001 for this format. */
1267 message ("Only in %s: %s\n", dir, name0);
1269 status = EXIT_FAILURE;
1272 else
1274 /* We have two files that are not to be compared. */
1276 /* See POSIX 1003.1-2001 for this format. */
1277 message5 ("File %s is a %s while file %s is a %s\n",
1278 file_label[0] ? file_label[0] : cmp.file[0].name,
1279 file_type (&cmp.file[0].stat),
1280 file_label[1] ? file_label[1] : cmp.file[1].name,
1281 file_type (&cmp.file[1].stat));
1283 /* This is a difference. */
1284 status = EXIT_FAILURE;
1287 else if (S_ISLNK (cmp.file[0].stat.st_mode)
1288 || S_ISLNK (cmp.file[1].stat.st_mode))
1290 /* We get here only if we use lstat(), not stat(). */
1291 assert (no_dereference_symlinks);
1293 if (S_ISLNK (cmp.file[0].stat.st_mode)
1294 && S_ISLNK (cmp.file[1].stat.st_mode))
1296 /* Compare the values of the symbolic links. */
1297 char *link_value[2] = { NULL, NULL };
1299 for (f = 0; f < 2; f++)
1301 link_value[f] = xreadlink (cmp.file[f].name);
1302 if (link_value[f] == NULL)
1304 perror_with_name (cmp.file[f].name);
1305 status = EXIT_TROUBLE;
1306 break;
1309 if (status == EXIT_SUCCESS)
1311 if ( ! STREQ (link_value[0], link_value[1]))
1313 message ("Symbolic links %s and %s differ\n",
1314 cmp.file[0].name, cmp.file[1].name);
1315 /* This is a difference. */
1316 status = EXIT_FAILURE;
1319 for (f = 0; f < 2; f++)
1320 free (link_value[f]);
1322 else
1324 /* We have two files that are not to be compared, because
1325 one of them is a symbolic link and the other one is not. */
1327 message5 ("File %s is a %s while file %s is a %s\n",
1328 file_label[0] ? file_label[0] : cmp.file[0].name,
1329 file_type (&cmp.file[0].stat),
1330 file_label[1] ? file_label[1] : cmp.file[1].name,
1331 file_type (&cmp.file[1].stat));
1333 /* This is a difference. */
1334 status = EXIT_FAILURE;
1337 else if (files_can_be_treated_as_binary
1338 && S_ISREG (cmp.file[0].stat.st_mode)
1339 && S_ISREG (cmp.file[1].stat.st_mode)
1340 && cmp.file[0].stat.st_size != cmp.file[1].stat.st_size)
1342 message ("Files %s and %s differ\n",
1343 file_label[0] ? file_label[0] : cmp.file[0].name,
1344 file_label[1] ? file_label[1] : cmp.file[1].name);
1345 status = EXIT_FAILURE;
1347 else
1349 /* Both exist and neither is a directory. */
1351 /* Open the files and record their descriptors. */
1353 int oflags = O_RDONLY | (binary ? O_BINARY : 0);
1355 if (cmp.file[0].desc == UNOPENED)
1356 if ((cmp.file[0].desc = open (cmp.file[0].name, oflags, 0)) < 0)
1358 perror_with_name (cmp.file[0].name);
1359 status = EXIT_TROUBLE;
1361 if (cmp.file[1].desc == UNOPENED)
1363 if (same_files)
1364 cmp.file[1].desc = cmp.file[0].desc;
1365 else if ((cmp.file[1].desc = open (cmp.file[1].name, oflags, 0)) < 0)
1367 perror_with_name (cmp.file[1].name);
1368 status = EXIT_TROUBLE;
1372 /* Compare the files, if no error was found. */
1374 if (status == EXIT_SUCCESS)
1375 status = diff_2_files (&cmp);
1377 /* Close the file descriptors. */
1379 if (0 <= cmp.file[0].desc && close (cmp.file[0].desc) != 0)
1381 perror_with_name (cmp.file[0].name);
1382 status = EXIT_TROUBLE;
1384 if (0 <= cmp.file[1].desc && cmp.file[0].desc != cmp.file[1].desc
1385 && close (cmp.file[1].desc) != 0)
1387 perror_with_name (cmp.file[1].name);
1388 status = EXIT_TROUBLE;
1392 /* Now the comparison has been done, if no error prevented it,
1393 and STATUS is the value this function will return. */
1395 if (status == EXIT_SUCCESS)
1397 if (report_identical_files && !DIR_P (0))
1398 message ("Files %s and %s are identical\n",
1399 file_label[0] ? file_label[0] : cmp.file[0].name,
1400 file_label[1] ? file_label[1] : cmp.file[1].name);
1402 else
1404 /* Flush stdout so that the user sees differences immediately.
1405 This can hurt performance, unfortunately. */
1406 if (fflush (stdout) != 0)
1407 pfatal_with_name (_("standard output"));
1410 free (free0);
1411 free (free1);
1413 return status;