HAMMER: MFC to 2.0
[dragonfly.git] / contrib / diffutils-2.8 / src / analyze.c
blob7d6750a421835c364f7156a48f6f638a225faa0d
1 /* Analyze file differences for GNU DIFF.
3 Copyright (C) 1988, 1989, 1992, 1993, 1994, 1995, 1998, 2001, 2002,
4 2004 Free Software Foundation, Inc.
6 This file is part of GNU DIFF.
8 GNU DIFF is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
13 GNU DIFF is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; see the file COPYING.
20 If not, write to the Free Software Foundation,
21 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
23 /* The basic algorithm is described in:
24 "An O(ND) Difference Algorithm and its Variations", Eugene Myers,
25 Algorithmica Vol. 1 No. 2, 1986, pp. 251-266;
26 see especially section 4.2, which describes the variation used below.
27 Unless the --minimal option is specified, this code uses the TOO_EXPENSIVE
28 heuristic, by Paul Eggert, to limit the cost to O(N**1.5 log N)
29 at the price of producing suboptimal output for large inputs with
30 many differences.
32 The basic algorithm was independently discovered as described in:
33 "Algorithms for Approximate String Matching", E. Ukkonen,
34 Information and Control Vol. 64, 1985, pp. 100-118. */
36 #include "diff.h"
37 #include <cmpbuf.h>
38 #include <error.h>
39 #include <file-type.h>
40 #include <xalloc.h>
42 static lin *xvec, *yvec; /* Vectors being compared. */
43 static lin *fdiag; /* Vector, indexed by diagonal, containing
44 1 + the X coordinate of the point furthest
45 along the given diagonal in the forward
46 search of the edit matrix. */
47 static lin *bdiag; /* Vector, indexed by diagonal, containing
48 the X coordinate of the point furthest
49 along the given diagonal in the backward
50 search of the edit matrix. */
51 static lin too_expensive; /* Edit scripts longer than this are too
52 expensive to compute. */
54 #define SNAKE_LIMIT 20 /* Snakes bigger than this are considered `big'. */
56 struct partition
58 lin xmid, ymid; /* Midpoints of this partition. */
59 bool lo_minimal; /* Nonzero if low half will be analyzed minimally. */
60 bool hi_minimal; /* Likewise for high half. */
63 /* Find the midpoint of the shortest edit script for a specified
64 portion of the two files.
66 Scan from the beginnings of the files, and simultaneously from the ends,
67 doing a breadth-first search through the space of edit-sequence.
68 When the two searches meet, we have found the midpoint of the shortest
69 edit sequence.
71 If FIND_MINIMAL is nonzero, find the minimal edit script regardless
72 of expense. Otherwise, if the search is too expensive, use
73 heuristics to stop the search and report a suboptimal answer.
75 Set PART->(xmid,ymid) to the midpoint (XMID,YMID). The diagonal number
76 XMID - YMID equals the number of inserted lines minus the number
77 of deleted lines (counting only lines before the midpoint).
79 Set PART->lo_minimal to true iff the minimal edit script for the
80 left half of the partition is known; similarly for PART->hi_minimal.
82 This function assumes that the first lines of the specified portions
83 of the two files do not match, and likewise that the last lines do not
84 match. The caller must trim matching lines from the beginning and end
85 of the portions it is going to specify.
87 If we return the "wrong" partitions,
88 the worst this can do is cause suboptimal diff output.
89 It cannot cause incorrect diff output. */
91 static void
92 diag (lin xoff, lin xlim, lin yoff, lin ylim, bool find_minimal,
93 struct partition *part)
95 lin *const fd = fdiag; /* Give the compiler a chance. */
96 lin *const bd = bdiag; /* Additional help for the compiler. */
97 lin const *const xv = xvec; /* Still more help for the compiler. */
98 lin const *const yv = yvec; /* And more and more . . . */
99 lin const dmin = xoff - ylim; /* Minimum valid diagonal. */
100 lin const dmax = xlim - yoff; /* Maximum valid diagonal. */
101 lin const fmid = xoff - yoff; /* Center diagonal of top-down search. */
102 lin const bmid = xlim - ylim; /* Center diagonal of bottom-up search. */
103 lin fmin = fmid, fmax = fmid; /* Limits of top-down search. */
104 lin bmin = bmid, bmax = bmid; /* Limits of bottom-up search. */
105 lin c; /* Cost. */
106 bool odd = (fmid - bmid) & 1; /* True if southeast corner is on an odd
107 diagonal with respect to the northwest. */
109 fd[fmid] = xoff;
110 bd[bmid] = xlim;
112 for (c = 1;; ++c)
114 lin d; /* Active diagonal. */
115 bool big_snake = false;
117 /* Extend the top-down search by an edit step in each diagonal. */
118 fmin > dmin ? fd[--fmin - 1] = -1 : ++fmin;
119 fmax < dmax ? fd[++fmax + 1] = -1 : --fmax;
120 for (d = fmax; d >= fmin; d -= 2)
122 lin x, y, oldx, tlo = fd[d - 1], thi = fd[d + 1];
124 if (tlo >= thi)
125 x = tlo + 1;
126 else
127 x = thi;
128 oldx = x;
129 y = x - d;
130 while (x < xlim && y < ylim && xv[x] == yv[y])
131 ++x, ++y;
132 if (x - oldx > SNAKE_LIMIT)
133 big_snake = true;
134 fd[d] = x;
135 if (odd && bmin <= d && d <= bmax && bd[d] <= x)
137 part->xmid = x;
138 part->ymid = y;
139 part->lo_minimal = part->hi_minimal = true;
140 return;
144 /* Similarly extend the bottom-up search. */
145 bmin > dmin ? bd[--bmin - 1] = LIN_MAX : ++bmin;
146 bmax < dmax ? bd[++bmax + 1] = LIN_MAX : --bmax;
147 for (d = bmax; d >= bmin; d -= 2)
149 lin x, y, oldx, tlo = bd[d - 1], thi = bd[d + 1];
151 if (tlo < thi)
152 x = tlo;
153 else
154 x = thi - 1;
155 oldx = x;
156 y = x - d;
157 while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1])
158 --x, --y;
159 if (oldx - x > SNAKE_LIMIT)
160 big_snake = true;
161 bd[d] = x;
162 if (!odd && fmin <= d && d <= fmax && x <= fd[d])
164 part->xmid = x;
165 part->ymid = y;
166 part->lo_minimal = part->hi_minimal = true;
167 return;
171 if (find_minimal)
172 continue;
174 /* Heuristic: check occasionally for a diagonal that has made
175 lots of progress compared with the edit distance.
176 If we have any such, find the one that has made the most
177 progress and return it as if it had succeeded.
179 With this heuristic, for files with a constant small density
180 of changes, the algorithm is linear in the file size. */
182 if (200 < c && big_snake && speed_large_files)
184 lin best = 0;
186 for (d = fmax; d >= fmin; d -= 2)
188 lin dd = d - fmid;
189 lin x = fd[d];
190 lin y = x - d;
191 lin v = (x - xoff) * 2 - dd;
192 if (v > 12 * (c + (dd < 0 ? -dd : dd)))
194 if (v > best
195 && xoff + SNAKE_LIMIT <= x && x < xlim
196 && yoff + SNAKE_LIMIT <= y && y < ylim)
198 /* We have a good enough best diagonal;
199 now insist that it end with a significant snake. */
200 int k;
202 for (k = 1; xv[x - k] == yv[y - k]; k++)
203 if (k == SNAKE_LIMIT)
205 best = v;
206 part->xmid = x;
207 part->ymid = y;
208 break;
213 if (best > 0)
215 part->lo_minimal = true;
216 part->hi_minimal = false;
217 return;
220 best = 0;
221 for (d = bmax; d >= bmin; d -= 2)
223 lin dd = d - bmid;
224 lin x = bd[d];
225 lin y = x - d;
226 lin v = (xlim - x) * 2 + dd;
227 if (v > 12 * (c + (dd < 0 ? -dd : dd)))
229 if (v > best
230 && xoff < x && x <= xlim - SNAKE_LIMIT
231 && yoff < y && y <= ylim - SNAKE_LIMIT)
233 /* We have a good enough best diagonal;
234 now insist that it end with a significant snake. */
235 int k;
237 for (k = 0; xv[x + k] == yv[y + k]; k++)
238 if (k == SNAKE_LIMIT - 1)
240 best = v;
241 part->xmid = x;
242 part->ymid = y;
243 break;
248 if (best > 0)
250 part->lo_minimal = false;
251 part->hi_minimal = true;
252 return;
256 /* Heuristic: if we've gone well beyond the call of duty,
257 give up and report halfway between our best results so far. */
258 if (c >= too_expensive)
260 lin fxybest, fxbest;
261 lin bxybest, bxbest;
263 fxbest = bxbest = 0; /* Pacify `gcc -Wall'. */
265 /* Find forward diagonal that maximizes X + Y. */
266 fxybest = -1;
267 for (d = fmax; d >= fmin; d -= 2)
269 lin x = MIN (fd[d], xlim);
270 lin y = x - d;
271 if (ylim < y)
272 x = ylim + d, y = ylim;
273 if (fxybest < x + y)
275 fxybest = x + y;
276 fxbest = x;
280 /* Find backward diagonal that minimizes X + Y. */
281 bxybest = LIN_MAX;
282 for (d = bmax; d >= bmin; d -= 2)
284 lin x = MAX (xoff, bd[d]);
285 lin y = x - d;
286 if (y < yoff)
287 x = yoff + d, y = yoff;
288 if (x + y < bxybest)
290 bxybest = x + y;
291 bxbest = x;
295 /* Use the better of the two diagonals. */
296 if ((xlim + ylim) - bxybest < fxybest - (xoff + yoff))
298 part->xmid = fxbest;
299 part->ymid = fxybest - fxbest;
300 part->lo_minimal = true;
301 part->hi_minimal = false;
303 else
305 part->xmid = bxbest;
306 part->ymid = bxybest - bxbest;
307 part->lo_minimal = false;
308 part->hi_minimal = true;
310 return;
315 /* Compare in detail contiguous subsequences of the two files
316 which are known, as a whole, to match each other.
318 The results are recorded in the vectors files[N].changed, by
319 storing 1 in the element for each line that is an insertion or deletion.
321 The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
323 Note that XLIM, YLIM are exclusive bounds.
324 All line numbers are origin-0 and discarded lines are not counted.
326 If FIND_MINIMAL, find a minimal difference no matter how
327 expensive it is. */
329 static void
330 compareseq (lin xoff, lin xlim, lin yoff, lin ylim, bool find_minimal)
332 lin const *xv = xvec; /* Help the compiler. */
333 lin const *yv = yvec;
335 /* Slide down the bottom initial diagonal. */
336 while (xoff < xlim && yoff < ylim && xv[xoff] == yv[yoff])
337 ++xoff, ++yoff;
338 /* Slide up the top initial diagonal. */
339 while (xlim > xoff && ylim > yoff && xv[xlim - 1] == yv[ylim - 1])
340 --xlim, --ylim;
342 /* Handle simple cases. */
343 if (xoff == xlim)
344 while (yoff < ylim)
345 files[1].changed[files[1].realindexes[yoff++]] = 1;
346 else if (yoff == ylim)
347 while (xoff < xlim)
348 files[0].changed[files[0].realindexes[xoff++]] = 1;
349 else
351 struct partition part;
353 /* Find a point of correspondence in the middle of the files. */
354 diag (xoff, xlim, yoff, ylim, find_minimal, &part);
356 /* Use the partitions to split this problem into subproblems. */
357 compareseq (xoff, part.xmid, yoff, part.ymid, part.lo_minimal);
358 compareseq (part.xmid, xlim, part.ymid, ylim, part.hi_minimal);
362 /* Discard lines from one file that have no matches in the other file.
364 A line which is discarded will not be considered by the actual
365 comparison algorithm; it will be as if that line were not in the file.
366 The file's `realindexes' table maps virtual line numbers
367 (which don't count the discarded lines) into real line numbers;
368 this is how the actual comparison algorithm produces results
369 that are comprehensible when the discarded lines are counted.
371 When we discard a line, we also mark it as a deletion or insertion
372 so that it will be printed in the output. */
374 static void
375 discard_confusing_lines (struct file_data filevec[])
377 int f;
378 lin i;
379 char *discarded[2];
380 lin *equiv_count[2];
381 lin *p;
383 /* Allocate our results. */
384 p = xmalloc ((filevec[0].buffered_lines + filevec[1].buffered_lines)
385 * (2 * sizeof *p));
386 for (f = 0; f < 2; f++)
388 filevec[f].undiscarded = p; p += filevec[f].buffered_lines;
389 filevec[f].realindexes = p; p += filevec[f].buffered_lines;
392 /* Set up equiv_count[F][I] as the number of lines in file F
393 that fall in equivalence class I. */
395 p = zalloc (filevec[0].equiv_max * (2 * sizeof *p));
396 equiv_count[0] = p;
397 equiv_count[1] = p + filevec[0].equiv_max;
399 for (i = 0; i < filevec[0].buffered_lines; ++i)
400 ++equiv_count[0][filevec[0].equivs[i]];
401 for (i = 0; i < filevec[1].buffered_lines; ++i)
402 ++equiv_count[1][filevec[1].equivs[i]];
404 /* Set up tables of which lines are going to be discarded. */
406 discarded[0] = zalloc (filevec[0].buffered_lines
407 + filevec[1].buffered_lines);
408 discarded[1] = discarded[0] + filevec[0].buffered_lines;
410 /* Mark to be discarded each line that matches no line of the other file.
411 If a line matches many lines, mark it as provisionally discardable. */
413 for (f = 0; f < 2; f++)
415 size_t end = filevec[f].buffered_lines;
416 char *discards = discarded[f];
417 lin *counts = equiv_count[1 - f];
418 lin *equivs = filevec[f].equivs;
419 size_t many = 5;
420 size_t tem = end / 64;
422 /* Multiply MANY by approximate square root of number of lines.
423 That is the threshold for provisionally discardable lines. */
424 while ((tem = tem >> 2) > 0)
425 many *= 2;
427 for (i = 0; i < end; i++)
429 lin nmatch;
430 if (equivs[i] == 0)
431 continue;
432 nmatch = counts[equivs[i]];
433 if (nmatch == 0)
434 discards[i] = 1;
435 else if (nmatch > many)
436 discards[i] = 2;
440 /* Don't really discard the provisional lines except when they occur
441 in a run of discardables, with nonprovisionals at the beginning
442 and end. */
444 for (f = 0; f < 2; f++)
446 lin end = filevec[f].buffered_lines;
447 register char *discards = discarded[f];
449 for (i = 0; i < end; i++)
451 /* Cancel provisional discards not in middle of run of discards. */
452 if (discards[i] == 2)
453 discards[i] = 0;
454 else if (discards[i] != 0)
456 /* We have found a nonprovisional discard. */
457 register lin j;
458 lin length;
459 lin provisional = 0;
461 /* Find end of this run of discardable lines.
462 Count how many are provisionally discardable. */
463 for (j = i; j < end; j++)
465 if (discards[j] == 0)
466 break;
467 if (discards[j] == 2)
468 ++provisional;
471 /* Cancel provisional discards at end, and shrink the run. */
472 while (j > i && discards[j - 1] == 2)
473 discards[--j] = 0, --provisional;
475 /* Now we have the length of a run of discardable lines
476 whose first and last are not provisional. */
477 length = j - i;
479 /* If 1/4 of the lines in the run are provisional,
480 cancel discarding of all provisional lines in the run. */
481 if (provisional * 4 > length)
483 while (j > i)
484 if (discards[--j] == 2)
485 discards[j] = 0;
487 else
489 register lin consec;
490 lin minimum = 1;
491 lin tem = length >> 2;
493 /* MINIMUM is approximate square root of LENGTH/4.
494 A subrun of two or more provisionals can stand
495 when LENGTH is at least 16.
496 A subrun of 4 or more can stand when LENGTH >= 64. */
497 while (0 < (tem >>= 2))
498 minimum <<= 1;
499 minimum++;
501 /* Cancel any subrun of MINIMUM or more provisionals
502 within the larger run. */
503 for (j = 0, consec = 0; j < length; j++)
504 if (discards[i + j] != 2)
505 consec = 0;
506 else if (minimum == ++consec)
507 /* Back up to start of subrun, to cancel it all. */
508 j -= consec;
509 else if (minimum < consec)
510 discards[i + j] = 0;
512 /* Scan from beginning of run
513 until we find 3 or more nonprovisionals in a row
514 or until the first nonprovisional at least 8 lines in.
515 Until that point, cancel any provisionals. */
516 for (j = 0, consec = 0; j < length; j++)
518 if (j >= 8 && discards[i + j] == 1)
519 break;
520 if (discards[i + j] == 2)
521 consec = 0, discards[i + j] = 0;
522 else if (discards[i + j] == 0)
523 consec = 0;
524 else
525 consec++;
526 if (consec == 3)
527 break;
530 /* I advances to the last line of the run. */
531 i += length - 1;
533 /* Same thing, from end. */
534 for (j = 0, consec = 0; j < length; j++)
536 if (j >= 8 && discards[i - j] == 1)
537 break;
538 if (discards[i - j] == 2)
539 consec = 0, discards[i - j] = 0;
540 else if (discards[i - j] == 0)
541 consec = 0;
542 else
543 consec++;
544 if (consec == 3)
545 break;
552 /* Actually discard the lines. */
553 for (f = 0; f < 2; f++)
555 char *discards = discarded[f];
556 lin end = filevec[f].buffered_lines;
557 lin j = 0;
558 for (i = 0; i < end; ++i)
559 if (minimal || discards[i] == 0)
561 filevec[f].undiscarded[j] = filevec[f].equivs[i];
562 filevec[f].realindexes[j++] = i;
564 else
565 filevec[f].changed[i] = 1;
566 filevec[f].nondiscarded_lines = j;
569 free (discarded[0]);
570 free (equiv_count[0]);
573 /* Adjust inserts/deletes of identical lines to join changes
574 as much as possible.
576 We do something when a run of changed lines include a
577 line at one end and have an excluded, identical line at the other.
578 We are free to choose which identical line is included.
579 `compareseq' usually chooses the one at the beginning,
580 but usually it is cleaner to consider the following identical line
581 to be the "change". */
583 static void
584 shift_boundaries (struct file_data filevec[])
586 int f;
588 for (f = 0; f < 2; f++)
590 char *changed = filevec[f].changed;
591 char *other_changed = filevec[1 - f].changed;
592 lin const *equivs = filevec[f].equivs;
593 lin i = 0;
594 lin j = 0;
595 lin i_end = filevec[f].buffered_lines;
597 while (1)
599 lin runlength, start, corresponding;
601 /* Scan forwards to find beginning of another run of changes.
602 Also keep track of the corresponding point in the other file. */
604 while (i < i_end && !changed[i])
606 while (other_changed[j++])
607 continue;
608 i++;
611 if (i == i_end)
612 break;
614 start = i;
616 /* Find the end of this run of changes. */
618 while (changed[++i])
619 continue;
620 while (other_changed[j])
621 j++;
625 /* Record the length of this run of changes, so that
626 we can later determine whether the run has grown. */
627 runlength = i - start;
629 /* Move the changed region back, so long as the
630 previous unchanged line matches the last changed one.
631 This merges with previous changed regions. */
633 while (start && equivs[start - 1] == equivs[i - 1])
635 changed[--start] = 1;
636 changed[--i] = 0;
637 while (changed[start - 1])
638 start--;
639 while (other_changed[--j])
640 continue;
643 /* Set CORRESPONDING to the end of the changed run, at the last
644 point where it corresponds to a changed run in the other file.
645 CORRESPONDING == I_END means no such point has been found. */
646 corresponding = other_changed[j - 1] ? i : i_end;
648 /* Move the changed region forward, so long as the
649 first changed line matches the following unchanged one.
650 This merges with following changed regions.
651 Do this second, so that if there are no merges,
652 the changed region is moved forward as far as possible. */
654 while (i != i_end && equivs[start] == equivs[i])
656 changed[start++] = 0;
657 changed[i++] = 1;
658 while (changed[i])
659 i++;
660 while (other_changed[++j])
661 corresponding = i;
664 while (runlength != i - start);
666 /* If possible, move the fully-merged run of changes
667 back to a corresponding run in the other file. */
669 while (corresponding < i)
671 changed[--start] = 1;
672 changed[--i] = 0;
673 while (other_changed[--j])
674 continue;
680 /* Cons an additional entry onto the front of an edit script OLD.
681 LINE0 and LINE1 are the first affected lines in the two files (origin 0).
682 DELETED is the number of lines deleted here from file 0.
683 INSERTED is the number of lines inserted here in file 1.
685 If DELETED is 0 then LINE0 is the number of the line before
686 which the insertion was done; vice versa for INSERTED and LINE1. */
688 static struct change *
689 add_change (lin line0, lin line1, lin deleted, lin inserted,
690 struct change *old)
692 struct change *new = xmalloc (sizeof *new);
694 new->line0 = line0;
695 new->line1 = line1;
696 new->inserted = inserted;
697 new->deleted = deleted;
698 new->link = old;
699 return new;
702 /* Scan the tables of which lines are inserted and deleted,
703 producing an edit script in reverse order. */
705 static struct change *
706 build_reverse_script (struct file_data const filevec[])
708 struct change *script = 0;
709 char *changed0 = filevec[0].changed;
710 char *changed1 = filevec[1].changed;
711 lin len0 = filevec[0].buffered_lines;
712 lin len1 = filevec[1].buffered_lines;
714 /* Note that changedN[len0] does exist, and is 0. */
716 lin i0 = 0, i1 = 0;
718 while (i0 < len0 || i1 < len1)
720 if (changed0[i0] | changed1[i1])
722 lin line0 = i0, line1 = i1;
724 /* Find # lines changed here in each file. */
725 while (changed0[i0]) ++i0;
726 while (changed1[i1]) ++i1;
728 /* Record this change. */
729 script = add_change (line0, line1, i0 - line0, i1 - line1, script);
732 /* We have reached lines in the two files that match each other. */
733 i0++, i1++;
736 return script;
739 /* Scan the tables of which lines are inserted and deleted,
740 producing an edit script in forward order. */
742 static struct change *
743 build_script (struct file_data const filevec[])
745 struct change *script = 0;
746 char *changed0 = filevec[0].changed;
747 char *changed1 = filevec[1].changed;
748 lin i0 = filevec[0].buffered_lines, i1 = filevec[1].buffered_lines;
750 /* Note that changedN[-1] does exist, and is 0. */
752 while (i0 >= 0 || i1 >= 0)
754 if (changed0[i0 - 1] | changed1[i1 - 1])
756 lin line0 = i0, line1 = i1;
758 /* Find # lines changed here in each file. */
759 while (changed0[i0 - 1]) --i0;
760 while (changed1[i1 - 1]) --i1;
762 /* Record this change. */
763 script = add_change (i0, i1, line0 - i0, line1 - i1, script);
766 /* We have reached lines in the two files that match each other. */
767 i0--, i1--;
770 return script;
773 /* If CHANGES, briefly report that two files differed.
774 Return 2 if trouble, CHANGES otherwise. */
775 static int
776 briefly_report (int changes, struct file_data const filevec[])
778 if (changes)
780 char const *label0 = file_label[0] ? file_label[0] : filevec[0].name;
781 char const *label1 = file_label[1] ? file_label[1] : filevec[1].name;
782 message ("Files %s and %s differ\n", label0, label1);
783 if (! brief)
784 changes = 2;
787 return changes;
790 /* Report the differences of two files. */
792 diff_2_files (struct comparison *cmp)
794 lin diags;
795 int f;
796 struct change *e, *p;
797 struct change *script;
798 int changes;
801 /* If we have detected that either file is binary,
802 compare the two files as binary. This can happen
803 only when the first chunk is read.
804 Also, --brief without any --ignore-* options means
805 we can speed things up by treating the files as binary. */
807 if (read_files (cmp->file, files_can_be_treated_as_binary))
809 /* Files with different lengths must be different. */
810 if (cmp->file[0].stat.st_size != cmp->file[1].stat.st_size
811 && (cmp->file[0].desc < 0 || S_ISREG (cmp->file[0].stat.st_mode))
812 && (cmp->file[1].desc < 0 || S_ISREG (cmp->file[1].stat.st_mode)))
813 changes = 1;
815 /* Standard input equals itself. */
816 else if (cmp->file[0].desc == cmp->file[1].desc)
817 changes = 0;
819 else
820 /* Scan both files, a buffer at a time, looking for a difference. */
822 /* Allocate same-sized buffers for both files. */
823 size_t lcm_max = PTRDIFF_MAX - 1;
824 size_t buffer_size =
825 buffer_lcm (sizeof (word),
826 buffer_lcm (STAT_BLOCKSIZE (cmp->file[0].stat),
827 STAT_BLOCKSIZE (cmp->file[1].stat),
828 lcm_max),
829 lcm_max);
830 for (f = 0; f < 2; f++)
831 cmp->file[f].buffer = xrealloc (cmp->file[f].buffer, buffer_size);
833 for (;; cmp->file[0].buffered = cmp->file[1].buffered = 0)
835 /* Read a buffer's worth from both files. */
836 for (f = 0; f < 2; f++)
837 if (0 <= cmp->file[f].desc)
838 file_block_read (&cmp->file[f],
839 buffer_size - cmp->file[f].buffered);
841 /* If the buffers differ, the files differ. */
842 if (cmp->file[0].buffered != cmp->file[1].buffered
843 || memcmp (cmp->file[0].buffer,
844 cmp->file[1].buffer,
845 cmp->file[0].buffered))
847 changes = 1;
848 break;
851 /* If we reach end of file, the files are the same. */
852 if (cmp->file[0].buffered != buffer_size)
854 changes = 0;
855 break;
860 changes = briefly_report (changes, cmp->file);
862 else
864 /* Allocate vectors for the results of comparison:
865 a flag for each line of each file, saying whether that line
866 is an insertion or deletion.
867 Allocate an extra element, always 0, at each end of each vector. */
869 size_t s = cmp->file[0].buffered_lines + cmp->file[1].buffered_lines + 4;
870 char *flag_space = zalloc (s);
871 cmp->file[0].changed = flag_space + 1;
872 cmp->file[1].changed = flag_space + cmp->file[0].buffered_lines + 3;
874 /* Some lines are obviously insertions or deletions
875 because they don't match anything. Detect them now, and
876 avoid even thinking about them in the main comparison algorithm. */
878 discard_confusing_lines (cmp->file);
880 /* Now do the main comparison algorithm, considering just the
881 undiscarded lines. */
883 xvec = cmp->file[0].undiscarded;
884 yvec = cmp->file[1].undiscarded;
885 diags = (cmp->file[0].nondiscarded_lines
886 + cmp->file[1].nondiscarded_lines + 3);
887 fdiag = xmalloc (diags * (2 * sizeof *fdiag));
888 bdiag = fdiag + diags;
889 fdiag += cmp->file[1].nondiscarded_lines + 1;
890 bdiag += cmp->file[1].nondiscarded_lines + 1;
892 /* Set TOO_EXPENSIVE to be approximate square root of input size,
893 bounded below by 256. */
894 too_expensive = 1;
895 for (; diags != 0; diags >>= 2)
896 too_expensive <<= 1;
897 too_expensive = MAX (256, too_expensive);
899 files[0] = cmp->file[0];
900 files[1] = cmp->file[1];
902 compareseq (0, cmp->file[0].nondiscarded_lines,
903 0, cmp->file[1].nondiscarded_lines, minimal);
905 free (fdiag - (cmp->file[1].nondiscarded_lines + 1));
907 /* Modify the results slightly to make them prettier
908 in cases where that can validly be done. */
910 shift_boundaries (cmp->file);
912 /* Get the results of comparison in the form of a chain
913 of `struct change's -- an edit script. */
915 if (output_style == OUTPUT_ED)
916 script = build_reverse_script (cmp->file);
917 else
918 script = build_script (cmp->file);
920 /* Set CHANGES if we had any diffs.
921 If some changes are ignored, we must scan the script to decide. */
922 if (ignore_blank_lines || ignore_regexp.fastmap)
924 struct change *next = script;
925 changes = 0;
927 while (next && changes == 0)
929 struct change *this, *end;
930 lin first0, last0, first1, last1;
932 /* Find a set of changes that belong together. */
933 this = next;
934 end = find_change (next);
936 /* Disconnect them from the rest of the changes, making them
937 a hunk, and remember the rest for next iteration. */
938 next = end->link;
939 end->link = 0;
941 /* Determine whether this hunk is really a difference. */
942 if (analyze_hunk (this, &first0, &last0, &first1, &last1))
943 changes = 1;
945 /* Reconnect the script so it will all be freed properly. */
946 end->link = next;
949 else
950 changes = (script != 0);
952 if (brief)
953 changes = briefly_report (changes, cmp->file);
954 else
956 if (changes | !no_diff_means_no_output)
958 /* Record info for starting up output,
959 to be used if and when we have some output to print. */
960 setup_output (file_label[0] ? file_label[0] : cmp->file[0].name,
961 file_label[1] ? file_label[1] : cmp->file[1].name,
962 cmp->parent != 0);
964 switch (output_style)
966 case OUTPUT_CONTEXT:
967 print_context_script (script, false);
968 break;
970 case OUTPUT_UNIFIED:
971 print_context_script (script, true);
972 break;
974 case OUTPUT_ED:
975 print_ed_script (script);
976 break;
978 case OUTPUT_FORWARD_ED:
979 pr_forward_ed_script (script);
980 break;
982 case OUTPUT_RCS:
983 print_rcs_script (script);
984 break;
986 case OUTPUT_NORMAL:
987 print_normal_script (script);
988 break;
990 case OUTPUT_IFDEF:
991 print_ifdef_script (script);
992 break;
994 case OUTPUT_SDIFF:
995 print_sdiff_script (script);
996 break;
998 default:
999 abort ();
1002 finish_output ();
1006 free (cmp->file[0].undiscarded);
1008 free (flag_space);
1010 for (f = 0; f < 2; f++)
1012 free (cmp->file[f].equivs);
1013 free (cmp->file[f].linbuf + cmp->file[f].linbuf_base);
1016 for (e = script; e; e = p)
1018 p = e->link;
1019 free (e);
1022 if (! ROBUST_OUTPUT_STYLE (output_style))
1023 for (f = 0; f < 2; ++f)
1024 if (cmp->file[f].missing_newline)
1026 error (0, 0, "%s: %s\n",
1027 file_label[f] ? file_label[f] : cmp->file[f].name,
1028 _("No newline at end of file"));
1029 changes = 2;
1033 if (cmp->file[0].buffer != cmp->file[1].buffer)
1034 free (cmp->file[0].buffer);
1035 free (cmp->file[1].buffer);
1037 return changes;