Import of diffutils 2.8.1
[dragonfly.git] / contrib / diffutils-2.8.1 / src / util.c
blob70f841284f43e339000b45e528d7e8f9f5b41098
1 /* Support routines for GNU DIFF.
3 Copyright (C) 1988, 1989, 1992, 1993, 1994, 1995, 1998, 2001, 2002
4 Free Software Foundation, Inc.
6 This file is part of GNU DIFF.
8 GNU DIFF is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
13 GNU DIFF is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; see the file COPYING.
20 If not, write to the Free Software Foundation,
21 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
23 #include "diff.h"
24 #include <dirname.h>
25 #include <error.h>
26 #include <quotesys.h>
27 #include <regex.h>
28 #include <xalloc.h>
30 char const pr_program[] = PR_PROGRAM;
32 /* Queue up one-line messages to be printed at the end,
33 when -l is specified. Each message is recorded with a `struct msg'. */
35 struct msg
37 struct msg *next;
38 char args[1]; /* Format + 4 args, each '\0' terminated, concatenated. */
41 /* Head of the chain of queues messages. */
43 static struct msg *msg_chain;
45 /* Tail of the chain of queues messages. */
47 static struct msg **msg_chain_end = &msg_chain;
49 /* Use when a system call returns non-zero status.
50 NAME should normally be the file name. */
52 void
53 perror_with_name (char const *name)
55 error (0, errno, "%s", name);
58 /* Use when a system call returns non-zero status and that is fatal. */
60 void
61 pfatal_with_name (char const *name)
63 int e = errno;
64 print_message_queue ();
65 error (EXIT_TROUBLE, e, "%s", name);
66 abort ();
69 /* Print an error message containing MSGID, then exit. */
71 void
72 fatal (char const *msgid)
74 print_message_queue ();
75 error (EXIT_TROUBLE, 0, "%s", _(msgid));
76 abort ();
79 /* Like printf, except if -l in effect then save the message and print later.
80 This is used for things like "Only in ...". */
82 void
83 message (char const *format_msgid, char const *arg1, char const *arg2)
85 message5 (format_msgid, arg1, arg2, 0, 0);
88 void
89 message5 (char const *format_msgid, char const *arg1, char const *arg2,
90 char const *arg3, char const *arg4)
92 if (paginate)
94 char *p;
95 char const *arg[5];
96 int i;
97 size_t size[5];
98 size_t total_size = offsetof (struct msg, args);
99 struct msg *new;
101 arg[0] = format_msgid;
102 arg[1] = arg1;
103 arg[2] = arg2;
104 arg[3] = arg3 ? arg3 : "";
105 arg[4] = arg4 ? arg4 : "";
107 for (i = 0; i < 5; i++)
108 total_size += size[i] = strlen (arg[i]) + 1;
110 new = xmalloc (total_size);
112 for (i = 0, p = new->args; i < 5; p += size[i++])
113 memcpy (p, arg[i], size[i]);
115 *msg_chain_end = new;
116 new->next = 0;
117 msg_chain_end = &new->next;
119 else
121 if (sdiff_merge_assist)
122 putchar (' ');
123 printf (_(format_msgid), arg1, arg2, arg3, arg4);
127 /* Output all the messages that were saved up by calls to `message'. */
129 void
130 print_message_queue (void)
132 char const *arg[5];
133 int i;
134 struct msg *m = msg_chain;
136 while (m)
138 struct msg *next = m->next;
139 arg[0] = m->args;
140 for (i = 0; i < 4; i++)
141 arg[i + 1] = arg[i] + strlen (arg[i]) + 1;
142 printf (_(arg[0]), arg[1], arg[2], arg[3], arg[4]);
143 free (m);
144 m = next;
148 /* Call before outputting the results of comparing files NAME0 and NAME1
149 to set up OUTFILE, the stdio stream for the output to go to.
151 Usually, OUTFILE is just stdout. But when -l was specified
152 we fork off a `pr' and make OUTFILE a pipe to it.
153 `pr' then outputs to our stdout. */
155 static char const *current_name0;
156 static char const *current_name1;
157 static bool currently_recursive;
159 void
160 setup_output (char const *name0, char const *name1, bool recursive)
162 current_name0 = name0;
163 current_name1 = name1;
164 currently_recursive = recursive;
165 outfile = 0;
168 #if HAVE_WORKING_FORK || HAVE_WORKING_VFORK
169 static pid_t pr_pid;
170 #endif
172 void
173 begin_output (void)
175 char *name;
177 if (outfile != 0)
178 return;
180 /* Construct the header of this piece of diff. */
181 name = xmalloc (strlen (current_name0) + strlen (current_name1)
182 + strlen (switch_string) + 7);
184 /* POSIX 1003.1-2001 specifies this format. But there are some bugs in
185 the standard: it says that we must print only the last component
186 of the pathnames, and it requires two spaces after "diff" if
187 there are no options. These requirements are silly and do not
188 match historical practice. */
189 sprintf (name, "diff%s %s %s", switch_string, current_name0, current_name1);
191 if (paginate)
193 if (fflush (stdout) != 0)
194 pfatal_with_name (_("write failed"));
196 /* Make OUTFILE a pipe to a subsidiary `pr'. */
198 #if HAVE_WORKING_FORK || HAVE_WORKING_VFORK
199 int pipes[2];
201 if (pipe (pipes) != 0)
202 pfatal_with_name ("pipe");
204 pr_pid = vfork ();
205 if (pr_pid < 0)
206 pfatal_with_name ("fork");
208 if (pr_pid == 0)
210 close (pipes[1]);
211 if (pipes[0] != STDIN_FILENO)
213 if (dup2 (pipes[0], STDIN_FILENO) < 0)
214 pfatal_with_name ("dup2");
215 close (pipes[0]);
218 execl (pr_program, pr_program, "-h", name, 0);
219 _exit (errno == ENOEXEC ? 126 : 127);
221 else
223 close (pipes[0]);
224 outfile = fdopen (pipes[1], "w");
225 if (!outfile)
226 pfatal_with_name ("fdopen");
228 #else
229 char *command = xmalloc (sizeof pr_program - 1 + 7
230 + quote_system_arg ((char *) 0, name) + 1);
231 char *p;
232 sprintf (command, "%s -f -h ", pr_program);
233 p = command + sizeof pr_program - 1 + 7;
234 p += quote_system_arg (p, name);
235 *p = 0;
236 errno = 0;
237 outfile = popen (command, "w");
238 if (!outfile)
239 pfatal_with_name (command);
240 free (command);
241 #endif
244 else
247 /* If -l was not specified, output the diff straight to `stdout'. */
249 outfile = stdout;
251 /* If handling multiple files (because scanning a directory),
252 print which files the following output is about. */
253 if (currently_recursive)
254 printf ("%s\n", name);
257 free (name);
259 /* A special header is needed at the beginning of context output. */
260 switch (output_style)
262 case OUTPUT_CONTEXT:
263 print_context_header (files, 0);
264 break;
266 case OUTPUT_UNIFIED:
267 print_context_header (files, 1);
268 break;
270 default:
271 break;
275 /* Call after the end of output of diffs for one file.
276 Close OUTFILE and get rid of the `pr' subfork. */
278 void
279 finish_output (void)
281 if (outfile != 0 && outfile != stdout)
283 int wstatus;
284 int werrno = 0;
285 if (ferror (outfile))
286 fatal ("write failed");
287 #if ! (HAVE_WORKING_FORK || HAVE_WORKING_VFORK)
288 wstatus = pclose (outfile);
289 if (wstatus == -1)
290 werrno = errno;
291 #else
292 if (fclose (outfile) != 0)
293 pfatal_with_name (_("write failed"));
294 if (waitpid (pr_pid, &wstatus, 0) < 0)
295 pfatal_with_name ("waitpid");
296 #endif
297 if (! werrno && WIFEXITED (wstatus) && WEXITSTATUS (wstatus) == 127)
298 error (EXIT_TROUBLE, 0, _("subsidiary program `%s' not found"),
299 pr_program);
300 if (wstatus != 0)
301 error (EXIT_TROUBLE, werrno, _("subsidiary program `%s' failed"),
302 pr_program);
305 outfile = 0;
308 /* Compare two lines (typically one from each input file)
309 according to the command line options.
310 For efficiency, this is invoked only when the lines do not match exactly
311 but an option like -i might cause us to ignore the difference.
312 Return nonzero if the lines differ. */
314 bool
315 lines_differ (char const *s1, char const *s2)
317 register unsigned char const *t1 = (unsigned char const *) s1;
318 register unsigned char const *t2 = (unsigned char const *) s2;
319 size_t column = 0;
321 while (1)
323 register unsigned char c1 = *t1++;
324 register unsigned char c2 = *t2++;
326 /* Test for exact char equality first, since it's a common case. */
327 if (c1 != c2)
329 switch (ignore_white_space)
331 case IGNORE_ALL_SPACE:
332 /* For -w, just skip past any white space. */
333 while (ISSPACE (c1) && c1 != '\n') c1 = *t1++;
334 while (ISSPACE (c2) && c2 != '\n') c2 = *t2++;
335 break;
337 case IGNORE_SPACE_CHANGE:
338 /* For -b, advance past any sequence of white space in
339 line 1 and consider it just one space, or nothing at
340 all if it is at the end of the line. */
341 if (ISSPACE (c1))
343 while (c1 != '\n')
345 c1 = *t1++;
346 if (! ISSPACE (c1))
348 --t1;
349 c1 = ' ';
350 break;
355 /* Likewise for line 2. */
356 if (ISSPACE (c2))
358 while (c2 != '\n')
360 c2 = *t2++;
361 if (! ISSPACE (c2))
363 --t2;
364 c2 = ' ';
365 break;
370 if (c1 != c2)
372 /* If we went too far when doing the simple test
373 for equality, go back to the first non-white-space
374 character in both sides and try again. */
375 if (c2 == ' ' && c1 != '\n'
376 && (unsigned char const *) s1 + 1 < t1
377 && ISSPACE (t1[-2]))
379 --t1;
380 continue;
382 if (c1 == ' ' && c2 != '\n'
383 && (unsigned char const *) s2 + 1 < t2
384 && ISSPACE (t2[-2]))
386 --t2;
387 continue;
391 break;
393 case IGNORE_TAB_EXPANSION:
394 if ((c1 == ' ' && c2 == '\t')
395 || (c1 == '\t' && c2 == ' '))
397 size_t column2 = column;
398 for (;; c1 = *t1++)
400 if (c1 == ' ')
401 column++;
402 else if (c1 == '\t')
403 column += TAB_WIDTH - column % TAB_WIDTH;
404 else
405 break;
407 for (;; c2 = *t2++)
409 if (c2 == ' ')
410 column2++;
411 else if (c2 == '\t')
412 column2 += TAB_WIDTH - column2 % TAB_WIDTH;
413 else
414 break;
416 if (column != column2)
417 return 1;
419 break;
421 case IGNORE_NO_WHITE_SPACE:
422 break;
425 /* Lowercase all letters if -i is specified. */
427 if (ignore_case)
429 c1 = TOLOWER (c1);
430 c2 = TOLOWER (c2);
433 if (c1 != c2)
434 break;
436 if (c1 == '\n')
437 return 0;
439 column += c1 == '\t' ? TAB_WIDTH - column % TAB_WIDTH : 1;
442 return 1;
445 /* Find the consecutive changes at the start of the script START.
446 Return the last link before the first gap. */
448 struct change *
449 find_change (struct change *start)
451 return start;
454 struct change *
455 find_reverse_change (struct change *start)
457 return start;
460 /* Divide SCRIPT into pieces by calling HUNKFUN and
461 print each piece with PRINTFUN.
462 Both functions take one arg, an edit script.
464 HUNKFUN is called with the tail of the script
465 and returns the last link that belongs together with the start
466 of the tail.
468 PRINTFUN takes a subscript which belongs together (with a null
469 link at the end) and prints it. */
471 void
472 print_script (struct change *script,
473 struct change * (*hunkfun) (struct change *),
474 void (*printfun) (struct change *))
476 struct change *next = script;
478 while (next)
480 struct change *this, *end;
482 /* Find a set of changes that belong together. */
483 this = next;
484 end = (*hunkfun) (next);
486 /* Disconnect them from the rest of the changes,
487 making them a hunk, and remember the rest for next iteration. */
488 next = end->link;
489 end->link = 0;
490 #ifdef DEBUG
491 debug_script (this);
492 #endif
494 /* Print this hunk. */
495 (*printfun) (this);
497 /* Reconnect the script so it will all be freed properly. */
498 end->link = next;
502 /* Print the text of a single line LINE,
503 flagging it with the characters in LINE_FLAG (which say whether
504 the line is inserted, deleted, changed, etc.). */
506 void
507 print_1_line (char const *line_flag, char const *const *line)
509 char const *base = line[0], *limit = line[1]; /* Help the compiler. */
510 FILE *out = outfile; /* Help the compiler some more. */
511 char const *flag_format = 0;
513 /* If -T was specified, use a Tab between the line-flag and the text.
514 Otherwise use a Space (as Unix diff does).
515 Print neither space nor tab if line-flags are empty. */
517 if (line_flag && *line_flag)
519 flag_format = initial_tab ? "%s\t" : "%s ";
520 fprintf (out, flag_format, line_flag);
523 output_1_line (base, limit, flag_format, line_flag);
525 if ((!line_flag || line_flag[0]) && limit[-1] != '\n')
526 fprintf (out, "\n\\ %s\n", _("No newline at end of file"));
529 /* Output a line from BASE up to LIMIT.
530 With -t, expand white space characters to spaces, and if FLAG_FORMAT
531 is nonzero, output it with argument LINE_FLAG after every
532 internal carriage return, so that tab stops continue to line up. */
534 void
535 output_1_line (char const *base, char const *limit, char const *flag_format,
536 char const *line_flag)
538 if (!expand_tabs)
539 fwrite (base, limit - base, 1, outfile);
540 else
542 register FILE *out = outfile;
543 register unsigned char c;
544 register char const *t = base;
545 register unsigned int column = 0;
547 while (t < limit)
548 switch ((c = *t++))
550 case '\t':
552 unsigned int spaces = TAB_WIDTH - column % TAB_WIDTH;
553 column += spaces;
555 putc (' ', out);
556 while (--spaces);
558 break;
560 case '\r':
561 putc (c, out);
562 if (flag_format && t < limit && *t != '\n')
563 fprintf (out, flag_format, line_flag);
564 column = 0;
565 break;
567 case '\b':
568 if (column == 0)
569 continue;
570 column--;
571 putc (c, out);
572 break;
574 default:
575 if (ISPRINT (c))
576 column++;
577 putc (c, out);
578 break;
583 char const change_letter[] = { 0, 'd', 'a', 'c' };
585 /* Translate an internal line number (an index into diff's table of lines)
586 into an actual line number in the input file.
587 The internal line number is I. FILE points to the data on the file.
589 Internal line numbers count from 0 starting after the prefix.
590 Actual line numbers count from 1 within the entire file. */
593 translate_line_number (struct file_data const *file, lin i)
595 return i + file->prefix_lines + 1;
598 /* Translate a line number range. This is always done for printing,
599 so for convenience translate to long rather than lin, so that the
600 caller can use printf with "%ld" without casting. */
602 void
603 translate_range (struct file_data const *file,
604 lin a, lin b,
605 long *aptr, long *bptr)
607 *aptr = translate_line_number (file, a - 1) + 1;
608 *bptr = translate_line_number (file, b + 1) - 1;
611 /* Print a pair of line numbers with SEPCHAR, translated for file FILE.
612 If the two numbers are identical, print just one number.
614 Args A and B are internal line numbers.
615 We print the translated (real) line numbers. */
617 void
618 print_number_range (char sepchar, struct file_data *file, lin a, lin b)
620 long trans_a, trans_b;
621 translate_range (file, a, b, &trans_a, &trans_b);
623 /* Note: we can have B < A in the case of a range of no lines.
624 In this case, we should print the line number before the range,
625 which is B. */
626 if (trans_b > trans_a)
627 fprintf (outfile, "%ld%c%ld", trans_a, sepchar, trans_b);
628 else
629 fprintf (outfile, "%ld", trans_b);
632 /* Look at a hunk of edit script and report the range of lines in each file
633 that it applies to. HUNK is the start of the hunk, which is a chain
634 of `struct change'. The first and last line numbers of file 0 are stored in
635 *FIRST0 and *LAST0, and likewise for file 1 in *FIRST1 and *LAST1.
636 Note that these are internal line numbers that count from 0.
638 If no lines from file 0 are deleted, then FIRST0 is LAST0+1.
640 Return UNCHANGED if only ignorable lines are inserted or deleted,
641 OLD if lines of file 0 are deleted,
642 NEW if lines of file 1 are inserted,
643 and CHANGED if both kinds of changes are found. */
645 enum changes
646 analyze_hunk (struct change *hunk,
647 lin *first0, lin *last0,
648 lin *first1, lin *last1)
650 struct change *next;
651 lin l0, l1;
652 lin show_from, show_to;
653 lin i;
654 bool trivial = ignore_blank_lines || ignore_regexp.fastmap;
655 size_t trivial_length = (int) ignore_blank_lines - 1;
656 /* If 0, ignore zero-length lines;
657 if SIZE_MAX, do not ignore lines just because of their length. */
659 char const * const *linbuf0 = files[0].linbuf; /* Help the compiler. */
660 char const * const *linbuf1 = files[1].linbuf;
662 show_from = show_to = 0;
664 *first0 = hunk->line0;
665 *first1 = hunk->line1;
667 next = hunk;
670 l0 = next->line0 + next->deleted - 1;
671 l1 = next->line1 + next->inserted - 1;
672 show_from += next->deleted;
673 show_to += next->inserted;
675 for (i = next->line0; i <= l0 && trivial; i++)
677 char const *line = linbuf0[i];
678 size_t len = linbuf0[i + 1] - line - 1;
679 if (len != trivial_length
680 && (! ignore_regexp.fastmap
681 || re_search (&ignore_regexp, line, len, 0, len, 0) < 0))
682 trivial = 0;
685 for (i = next->line1; i <= l1 && trivial; i++)
687 char const *line = linbuf1[i];
688 size_t len = linbuf1[i + 1] - line - 1;
689 if (len != trivial_length
690 && (! ignore_regexp.fastmap
691 || re_search (&ignore_regexp, line, len, 0, len, 0) < 0))
692 trivial = 0;
695 while ((next = next->link) != 0);
697 *last0 = l0;
698 *last1 = l1;
700 /* If all inserted or deleted lines are ignorable,
701 tell the caller to ignore this hunk. */
703 if (trivial)
704 return UNCHANGED;
706 return (show_from ? OLD : UNCHANGED) | (show_to ? NEW : UNCHANGED);
709 /* Concatenate three strings, returning a newly malloc'd string. */
711 char *
712 concat (char const *s1, char const *s2, char const *s3)
714 char *new = xmalloc (strlen (s1) + strlen (s2) + strlen (s3) + 1);
715 sprintf (new, "%s%s%s", s1, s2, s3);
716 return new;
719 /* Yield a new block of SIZE bytes, initialized to zero. */
721 void *
722 zalloc (size_t size)
724 void *p = xmalloc (size);
725 memset (p, 0, size);
726 return p;
729 /* Yield the newly malloc'd pathname
730 of the file in DIR whose filename is FILE. */
732 char *
733 dir_file_pathname (char const *dir, char const *file)
735 char const *base = base_name (dir);
736 bool omit_slash = !*base || base[strlen (base) - 1] == '/';
737 return concat (dir, "/" + omit_slash, file);
740 void
741 debug_script (struct change *sp)
743 fflush (stdout);
745 for (; sp; sp = sp->link)
747 long line0 = sp->line0;
748 long line1 = sp->line1;
749 long deleted = sp->deleted;
750 long inserted = sp->inserted;
751 fprintf (stderr, "%3ld %3ld delete %ld insert %ld\n",
752 line0, line1, deleted, inserted);
755 fflush (stderr);