1 /* Analyze file differences for GNU DIFF.
3 Copyright (C) 1988, 1989, 1992, 1993, 1994, 1995, 1998, 2001, 2002,
4 2004 Free Software Foundation, Inc.
6 This file is part of GNU DIFF.
8 GNU DIFF is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 GNU DIFF is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; see the file COPYING.
20 If not, write to the Free Software Foundation,
21 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
23 /* The basic algorithm is described in:
24 "An O(ND) Difference Algorithm and its Variations", Eugene Myers,
25 Algorithmica Vol. 1 No. 2, 1986, pp. 251-266;
26 see especially section 4.2, which describes the variation used below.
27 Unless the --minimal option is specified, this code uses the TOO_EXPENSIVE
28 heuristic, by Paul Eggert, to limit the cost to O(N**1.5 log N)
29 at the price of producing suboptimal output for large inputs with
32 The basic algorithm was independently discovered as described in:
33 "Algorithms for Approximate String Matching", E. Ukkonen,
34 Information and Control Vol. 64, 1985, pp. 100-118. */
39 #include <file-type.h>
42 static lin
*xvec
, *yvec
; /* Vectors being compared. */
43 static lin
*fdiag
; /* Vector, indexed by diagonal, containing
44 1 + the X coordinate of the point furthest
45 along the given diagonal in the forward
46 search of the edit matrix. */
47 static lin
*bdiag
; /* Vector, indexed by diagonal, containing
48 the X coordinate of the point furthest
49 along the given diagonal in the backward
50 search of the edit matrix. */
51 static lin too_expensive
; /* Edit scripts longer than this are too
52 expensive to compute. */
54 #define SNAKE_LIMIT 20 /* Snakes bigger than this are considered `big'. */
58 lin xmid
, ymid
; /* Midpoints of this partition. */
59 bool lo_minimal
; /* Nonzero if low half will be analyzed minimally. */
60 bool hi_minimal
; /* Likewise for high half. */
63 /* Find the midpoint of the shortest edit script for a specified
64 portion of the two files.
66 Scan from the beginnings of the files, and simultaneously from the ends,
67 doing a breadth-first search through the space of edit-sequence.
68 When the two searches meet, we have found the midpoint of the shortest
71 If FIND_MINIMAL is nonzero, find the minimal edit script regardless
72 of expense. Otherwise, if the search is too expensive, use
73 heuristics to stop the search and report a suboptimal answer.
75 Set PART->(xmid,ymid) to the midpoint (XMID,YMID). The diagonal number
76 XMID - YMID equals the number of inserted lines minus the number
77 of deleted lines (counting only lines before the midpoint).
79 Set PART->lo_minimal to true iff the minimal edit script for the
80 left half of the partition is known; similarly for PART->hi_minimal.
82 This function assumes that the first lines of the specified portions
83 of the two files do not match, and likewise that the last lines do not
84 match. The caller must trim matching lines from the beginning and end
85 of the portions it is going to specify.
87 If we return the "wrong" partitions,
88 the worst this can do is cause suboptimal diff output.
89 It cannot cause incorrect diff output. */
92 diag (lin xoff
, lin xlim
, lin yoff
, lin ylim
, bool find_minimal
,
93 struct partition
*part
)
95 lin
*const fd
= fdiag
; /* Give the compiler a chance. */
96 lin
*const bd
= bdiag
; /* Additional help for the compiler. */
97 lin
const *const xv
= xvec
; /* Still more help for the compiler. */
98 lin
const *const yv
= yvec
; /* And more and more . . . */
99 lin
const dmin
= xoff
- ylim
; /* Minimum valid diagonal. */
100 lin
const dmax
= xlim
- yoff
; /* Maximum valid diagonal. */
101 lin
const fmid
= xoff
- yoff
; /* Center diagonal of top-down search. */
102 lin
const bmid
= xlim
- ylim
; /* Center diagonal of bottom-up search. */
103 lin fmin
= fmid
, fmax
= fmid
; /* Limits of top-down search. */
104 lin bmin
= bmid
, bmax
= bmid
; /* Limits of bottom-up search. */
106 bool odd
= (fmid
- bmid
) & 1; /* True if southeast corner is on an odd
107 diagonal with respect to the northwest. */
114 lin d
; /* Active diagonal. */
115 bool big_snake
= false;
117 /* Extend the top-down search by an edit step in each diagonal. */
118 fmin
> dmin
? fd
[--fmin
- 1] = -1 : ++fmin
;
119 fmax
< dmax
? fd
[++fmax
+ 1] = -1 : --fmax
;
120 for (d
= fmax
; d
>= fmin
; d
-= 2)
122 lin x
, y
, oldx
, tlo
= fd
[d
- 1], thi
= fd
[d
+ 1];
130 while (x
< xlim
&& y
< ylim
&& xv
[x
] == yv
[y
])
132 if (x
- oldx
> SNAKE_LIMIT
)
135 if (odd
&& bmin
<= d
&& d
<= bmax
&& bd
[d
] <= x
)
139 part
->lo_minimal
= part
->hi_minimal
= true;
144 /* Similarly extend the bottom-up search. */
145 bmin
> dmin
? bd
[--bmin
- 1] = LIN_MAX
: ++bmin
;
146 bmax
< dmax
? bd
[++bmax
+ 1] = LIN_MAX
: --bmax
;
147 for (d
= bmax
; d
>= bmin
; d
-= 2)
149 lin x
, y
, oldx
, tlo
= bd
[d
- 1], thi
= bd
[d
+ 1];
157 while (x
> xoff
&& y
> yoff
&& xv
[x
- 1] == yv
[y
- 1])
159 if (oldx
- x
> SNAKE_LIMIT
)
162 if (!odd
&& fmin
<= d
&& d
<= fmax
&& x
<= fd
[d
])
166 part
->lo_minimal
= part
->hi_minimal
= true;
174 /* Heuristic: check occasionally for a diagonal that has made
175 lots of progress compared with the edit distance.
176 If we have any such, find the one that has made the most
177 progress and return it as if it had succeeded.
179 With this heuristic, for files with a constant small density
180 of changes, the algorithm is linear in the file size. */
182 if (200 < c
&& big_snake
&& speed_large_files
)
186 for (d
= fmax
; d
>= fmin
; d
-= 2)
191 lin v
= (x
- xoff
) * 2 - dd
;
192 if (v
> 12 * (c
+ (dd
< 0 ? -dd
: dd
)))
195 && xoff
+ SNAKE_LIMIT
<= x
&& x
< xlim
196 && yoff
+ SNAKE_LIMIT
<= y
&& y
< ylim
)
198 /* We have a good enough best diagonal;
199 now insist that it end with a significant snake. */
202 for (k
= 1; xv
[x
- k
] == yv
[y
- k
]; k
++)
203 if (k
== SNAKE_LIMIT
)
215 part
->lo_minimal
= true;
216 part
->hi_minimal
= false;
221 for (d
= bmax
; d
>= bmin
; d
-= 2)
226 lin v
= (xlim
- x
) * 2 + dd
;
227 if (v
> 12 * (c
+ (dd
< 0 ? -dd
: dd
)))
230 && xoff
< x
&& x
<= xlim
- SNAKE_LIMIT
231 && yoff
< y
&& y
<= ylim
- SNAKE_LIMIT
)
233 /* We have a good enough best diagonal;
234 now insist that it end with a significant snake. */
237 for (k
= 0; xv
[x
+ k
] == yv
[y
+ k
]; k
++)
238 if (k
== SNAKE_LIMIT
- 1)
250 part
->lo_minimal
= false;
251 part
->hi_minimal
= true;
256 /* Heuristic: if we've gone well beyond the call of duty,
257 give up and report halfway between our best results so far. */
258 if (c
>= too_expensive
)
263 fxbest
= bxbest
= 0; /* Pacify `gcc -Wall'. */
265 /* Find forward diagonal that maximizes X + Y. */
267 for (d
= fmax
; d
>= fmin
; d
-= 2)
269 lin x
= MIN (fd
[d
], xlim
);
272 x
= ylim
+ d
, y
= ylim
;
280 /* Find backward diagonal that minimizes X + Y. */
282 for (d
= bmax
; d
>= bmin
; d
-= 2)
284 lin x
= MAX (xoff
, bd
[d
]);
287 x
= yoff
+ d
, y
= yoff
;
295 /* Use the better of the two diagonals. */
296 if ((xlim
+ ylim
) - bxybest
< fxybest
- (xoff
+ yoff
))
299 part
->ymid
= fxybest
- fxbest
;
300 part
->lo_minimal
= true;
301 part
->hi_minimal
= false;
306 part
->ymid
= bxybest
- bxbest
;
307 part
->lo_minimal
= false;
308 part
->hi_minimal
= true;
315 /* Compare in detail contiguous subsequences of the two files
316 which are known, as a whole, to match each other.
318 The results are recorded in the vectors files[N].changed, by
319 storing 1 in the element for each line that is an insertion or deletion.
321 The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
323 Note that XLIM, YLIM are exclusive bounds.
324 All line numbers are origin-0 and discarded lines are not counted.
326 If FIND_MINIMAL, find a minimal difference no matter how
330 compareseq (lin xoff
, lin xlim
, lin yoff
, lin ylim
, bool find_minimal
)
332 lin
const *xv
= xvec
; /* Help the compiler. */
333 lin
const *yv
= yvec
;
335 /* Slide down the bottom initial diagonal. */
336 while (xoff
< xlim
&& yoff
< ylim
&& xv
[xoff
] == yv
[yoff
])
338 /* Slide up the top initial diagonal. */
339 while (xlim
> xoff
&& ylim
> yoff
&& xv
[xlim
- 1] == yv
[ylim
- 1])
342 /* Handle simple cases. */
345 files
[1].changed
[files
[1].realindexes
[yoff
++]] = 1;
346 else if (yoff
== ylim
)
348 files
[0].changed
[files
[0].realindexes
[xoff
++]] = 1;
351 struct partition part
;
353 /* Find a point of correspondence in the middle of the files. */
354 diag (xoff
, xlim
, yoff
, ylim
, find_minimal
, &part
);
356 /* Use the partitions to split this problem into subproblems. */
357 compareseq (xoff
, part
.xmid
, yoff
, part
.ymid
, part
.lo_minimal
);
358 compareseq (part
.xmid
, xlim
, part
.ymid
, ylim
, part
.hi_minimal
);
362 /* Discard lines from one file that have no matches in the other file.
364 A line which is discarded will not be considered by the actual
365 comparison algorithm; it will be as if that line were not in the file.
366 The file's `realindexes' table maps virtual line numbers
367 (which don't count the discarded lines) into real line numbers;
368 this is how the actual comparison algorithm produces results
369 that are comprehensible when the discarded lines are counted.
371 When we discard a line, we also mark it as a deletion or insertion
372 so that it will be printed in the output. */
375 discard_confusing_lines (struct file_data filevec
[])
383 /* Allocate our results. */
384 p
= xmalloc ((filevec
[0].buffered_lines
+ filevec
[1].buffered_lines
)
386 for (f
= 0; f
< 2; f
++)
388 filevec
[f
].undiscarded
= p
; p
+= filevec
[f
].buffered_lines
;
389 filevec
[f
].realindexes
= p
; p
+= filevec
[f
].buffered_lines
;
392 /* Set up equiv_count[F][I] as the number of lines in file F
393 that fall in equivalence class I. */
395 p
= zalloc (filevec
[0].equiv_max
* (2 * sizeof *p
));
397 equiv_count
[1] = p
+ filevec
[0].equiv_max
;
399 for (i
= 0; i
< filevec
[0].buffered_lines
; ++i
)
400 ++equiv_count
[0][filevec
[0].equivs
[i
]];
401 for (i
= 0; i
< filevec
[1].buffered_lines
; ++i
)
402 ++equiv_count
[1][filevec
[1].equivs
[i
]];
404 /* Set up tables of which lines are going to be discarded. */
406 discarded
[0] = zalloc (filevec
[0].buffered_lines
407 + filevec
[1].buffered_lines
);
408 discarded
[1] = discarded
[0] + filevec
[0].buffered_lines
;
410 /* Mark to be discarded each line that matches no line of the other file.
411 If a line matches many lines, mark it as provisionally discardable. */
413 for (f
= 0; f
< 2; f
++)
415 size_t end
= filevec
[f
].buffered_lines
;
416 char *discards
= discarded
[f
];
417 lin
*counts
= equiv_count
[1 - f
];
418 lin
*equivs
= filevec
[f
].equivs
;
420 size_t tem
= end
/ 64;
422 /* Multiply MANY by approximate square root of number of lines.
423 That is the threshold for provisionally discardable lines. */
424 while ((tem
= tem
>> 2) > 0)
427 for (i
= 0; i
< end
; i
++)
432 nmatch
= counts
[equivs
[i
]];
435 else if (nmatch
> many
)
440 /* Don't really discard the provisional lines except when they occur
441 in a run of discardables, with nonprovisionals at the beginning
444 for (f
= 0; f
< 2; f
++)
446 lin end
= filevec
[f
].buffered_lines
;
447 register char *discards
= discarded
[f
];
449 for (i
= 0; i
< end
; i
++)
451 /* Cancel provisional discards not in middle of run of discards. */
452 if (discards
[i
] == 2)
454 else if (discards
[i
] != 0)
456 /* We have found a nonprovisional discard. */
461 /* Find end of this run of discardable lines.
462 Count how many are provisionally discardable. */
463 for (j
= i
; j
< end
; j
++)
465 if (discards
[j
] == 0)
467 if (discards
[j
] == 2)
471 /* Cancel provisional discards at end, and shrink the run. */
472 while (j
> i
&& discards
[j
- 1] == 2)
473 discards
[--j
] = 0, --provisional
;
475 /* Now we have the length of a run of discardable lines
476 whose first and last are not provisional. */
479 /* If 1/4 of the lines in the run are provisional,
480 cancel discarding of all provisional lines in the run. */
481 if (provisional
* 4 > length
)
484 if (discards
[--j
] == 2)
491 lin tem
= length
>> 2;
493 /* MINIMUM is approximate square root of LENGTH/4.
494 A subrun of two or more provisionals can stand
495 when LENGTH is at least 16.
496 A subrun of 4 or more can stand when LENGTH >= 64. */
497 while (0 < (tem
>>= 2))
501 /* Cancel any subrun of MINIMUM or more provisionals
502 within the larger run. */
503 for (j
= 0, consec
= 0; j
< length
; j
++)
504 if (discards
[i
+ j
] != 2)
506 else if (minimum
== ++consec
)
507 /* Back up to start of subrun, to cancel it all. */
509 else if (minimum
< consec
)
512 /* Scan from beginning of run
513 until we find 3 or more nonprovisionals in a row
514 or until the first nonprovisional at least 8 lines in.
515 Until that point, cancel any provisionals. */
516 for (j
= 0, consec
= 0; j
< length
; j
++)
518 if (j
>= 8 && discards
[i
+ j
] == 1)
520 if (discards
[i
+ j
] == 2)
521 consec
= 0, discards
[i
+ j
] = 0;
522 else if (discards
[i
+ j
] == 0)
530 /* I advances to the last line of the run. */
533 /* Same thing, from end. */
534 for (j
= 0, consec
= 0; j
< length
; j
++)
536 if (j
>= 8 && discards
[i
- j
] == 1)
538 if (discards
[i
- j
] == 2)
539 consec
= 0, discards
[i
- j
] = 0;
540 else if (discards
[i
- j
] == 0)
552 /* Actually discard the lines. */
553 for (f
= 0; f
< 2; f
++)
555 char *discards
= discarded
[f
];
556 lin end
= filevec
[f
].buffered_lines
;
558 for (i
= 0; i
< end
; ++i
)
559 if (minimal
|| discards
[i
] == 0)
561 filevec
[f
].undiscarded
[j
] = filevec
[f
].equivs
[i
];
562 filevec
[f
].realindexes
[j
++] = i
;
565 filevec
[f
].changed
[i
] = 1;
566 filevec
[f
].nondiscarded_lines
= j
;
570 free (equiv_count
[0]);
573 /* Adjust inserts/deletes of identical lines to join changes
576 We do something when a run of changed lines include a
577 line at one end and have an excluded, identical line at the other.
578 We are free to choose which identical line is included.
579 `compareseq' usually chooses the one at the beginning,
580 but usually it is cleaner to consider the following identical line
581 to be the "change". */
584 shift_boundaries (struct file_data filevec
[])
588 for (f
= 0; f
< 2; f
++)
590 char *changed
= filevec
[f
].changed
;
591 char *other_changed
= filevec
[1 - f
].changed
;
592 lin
const *equivs
= filevec
[f
].equivs
;
595 lin i_end
= filevec
[f
].buffered_lines
;
599 lin runlength
, start
, corresponding
;
601 /* Scan forwards to find beginning of another run of changes.
602 Also keep track of the corresponding point in the other file. */
604 while (i
< i_end
&& !changed
[i
])
606 while (other_changed
[j
++])
616 /* Find the end of this run of changes. */
620 while (other_changed
[j
])
625 /* Record the length of this run of changes, so that
626 we can later determine whether the run has grown. */
627 runlength
= i
- start
;
629 /* Move the changed region back, so long as the
630 previous unchanged line matches the last changed one.
631 This merges with previous changed regions. */
633 while (start
&& equivs
[start
- 1] == equivs
[i
- 1])
635 changed
[--start
] = 1;
637 while (changed
[start
- 1])
639 while (other_changed
[--j
])
643 /* Set CORRESPONDING to the end of the changed run, at the last
644 point where it corresponds to a changed run in the other file.
645 CORRESPONDING == I_END means no such point has been found. */
646 corresponding
= other_changed
[j
- 1] ? i
: i_end
;
648 /* Move the changed region forward, so long as the
649 first changed line matches the following unchanged one.
650 This merges with following changed regions.
651 Do this second, so that if there are no merges,
652 the changed region is moved forward as far as possible. */
654 while (i
!= i_end
&& equivs
[start
] == equivs
[i
])
656 changed
[start
++] = 0;
660 while (other_changed
[++j
])
664 while (runlength
!= i
- start
);
666 /* If possible, move the fully-merged run of changes
667 back to a corresponding run in the other file. */
669 while (corresponding
< i
)
671 changed
[--start
] = 1;
673 while (other_changed
[--j
])
680 /* Cons an additional entry onto the front of an edit script OLD.
681 LINE0 and LINE1 are the first affected lines in the two files (origin 0).
682 DELETED is the number of lines deleted here from file 0.
683 INSERTED is the number of lines inserted here in file 1.
685 If DELETED is 0 then LINE0 is the number of the line before
686 which the insertion was done; vice versa for INSERTED and LINE1. */
688 static struct change
*
689 add_change (lin line0
, lin line1
, lin deleted
, lin inserted
,
692 struct change
*new = xmalloc (sizeof *new);
696 new->inserted
= inserted
;
697 new->deleted
= deleted
;
702 /* Scan the tables of which lines are inserted and deleted,
703 producing an edit script in reverse order. */
705 static struct change
*
706 build_reverse_script (struct file_data
const filevec
[])
708 struct change
*script
= 0;
709 char *changed0
= filevec
[0].changed
;
710 char *changed1
= filevec
[1].changed
;
711 lin len0
= filevec
[0].buffered_lines
;
712 lin len1
= filevec
[1].buffered_lines
;
714 /* Note that changedN[len0] does exist, and is 0. */
718 while (i0
< len0
|| i1
< len1
)
720 if (changed0
[i0
] | changed1
[i1
])
722 lin line0
= i0
, line1
= i1
;
724 /* Find # lines changed here in each file. */
725 while (changed0
[i0
]) ++i0
;
726 while (changed1
[i1
]) ++i1
;
728 /* Record this change. */
729 script
= add_change (line0
, line1
, i0
- line0
, i1
- line1
, script
);
732 /* We have reached lines in the two files that match each other. */
739 /* Scan the tables of which lines are inserted and deleted,
740 producing an edit script in forward order. */
742 static struct change
*
743 build_script (struct file_data
const filevec
[])
745 struct change
*script
= 0;
746 char *changed0
= filevec
[0].changed
;
747 char *changed1
= filevec
[1].changed
;
748 lin i0
= filevec
[0].buffered_lines
, i1
= filevec
[1].buffered_lines
;
750 /* Note that changedN[-1] does exist, and is 0. */
752 while (i0
>= 0 || i1
>= 0)
754 if (changed0
[i0
- 1] | changed1
[i1
- 1])
756 lin line0
= i0
, line1
= i1
;
758 /* Find # lines changed here in each file. */
759 while (changed0
[i0
- 1]) --i0
;
760 while (changed1
[i1
- 1]) --i1
;
762 /* Record this change. */
763 script
= add_change (i0
, i1
, line0
- i0
, line1
- i1
, script
);
766 /* We have reached lines in the two files that match each other. */
773 /* If CHANGES, briefly report that two files differed.
774 Return 2 if trouble, CHANGES otherwise. */
776 briefly_report (int changes
, struct file_data
const filevec
[])
780 char const *label0
= file_label
[0] ? file_label
[0] : filevec
[0].name
;
781 char const *label1
= file_label
[1] ? file_label
[1] : filevec
[1].name
;
782 message ("Files %s and %s differ\n", label0
, label1
);
790 /* Report the differences of two files. */
792 diff_2_files (struct comparison
*cmp
)
796 struct change
*e
, *p
;
797 struct change
*script
;
801 /* If we have detected that either file is binary,
802 compare the two files as binary. This can happen
803 only when the first chunk is read.
804 Also, --brief without any --ignore-* options means
805 we can speed things up by treating the files as binary. */
807 if (read_files (cmp
->file
, files_can_be_treated_as_binary
))
809 /* Files with different lengths must be different. */
810 if (cmp
->file
[0].stat
.st_size
!= cmp
->file
[1].stat
.st_size
811 && (cmp
->file
[0].desc
< 0 || S_ISREG (cmp
->file
[0].stat
.st_mode
))
812 && (cmp
->file
[1].desc
< 0 || S_ISREG (cmp
->file
[1].stat
.st_mode
)))
815 /* Standard input equals itself. */
816 else if (cmp
->file
[0].desc
== cmp
->file
[1].desc
)
820 /* Scan both files, a buffer at a time, looking for a difference. */
822 /* Allocate same-sized buffers for both files. */
823 size_t lcm_max
= PTRDIFF_MAX
- 1;
825 buffer_lcm (sizeof (word
),
826 buffer_lcm (STAT_BLOCKSIZE (cmp
->file
[0].stat
),
827 STAT_BLOCKSIZE (cmp
->file
[1].stat
),
830 for (f
= 0; f
< 2; f
++)
831 cmp
->file
[f
].buffer
= xrealloc (cmp
->file
[f
].buffer
, buffer_size
);
833 for (;; cmp
->file
[0].buffered
= cmp
->file
[1].buffered
= 0)
835 /* Read a buffer's worth from both files. */
836 for (f
= 0; f
< 2; f
++)
837 if (0 <= cmp
->file
[f
].desc
)
838 file_block_read (&cmp
->file
[f
],
839 buffer_size
- cmp
->file
[f
].buffered
);
841 /* If the buffers differ, the files differ. */
842 if (cmp
->file
[0].buffered
!= cmp
->file
[1].buffered
843 || memcmp (cmp
->file
[0].buffer
,
845 cmp
->file
[0].buffered
))
851 /* If we reach end of file, the files are the same. */
852 if (cmp
->file
[0].buffered
!= buffer_size
)
860 changes
= briefly_report (changes
, cmp
->file
);
864 /* Allocate vectors for the results of comparison:
865 a flag for each line of each file, saying whether that line
866 is an insertion or deletion.
867 Allocate an extra element, always 0, at each end of each vector. */
869 size_t s
= cmp
->file
[0].buffered_lines
+ cmp
->file
[1].buffered_lines
+ 4;
870 char *flag_space
= zalloc (s
);
871 cmp
->file
[0].changed
= flag_space
+ 1;
872 cmp
->file
[1].changed
= flag_space
+ cmp
->file
[0].buffered_lines
+ 3;
874 /* Some lines are obviously insertions or deletions
875 because they don't match anything. Detect them now, and
876 avoid even thinking about them in the main comparison algorithm. */
878 discard_confusing_lines (cmp
->file
);
880 /* Now do the main comparison algorithm, considering just the
881 undiscarded lines. */
883 xvec
= cmp
->file
[0].undiscarded
;
884 yvec
= cmp
->file
[1].undiscarded
;
885 diags
= (cmp
->file
[0].nondiscarded_lines
886 + cmp
->file
[1].nondiscarded_lines
+ 3);
887 fdiag
= xmalloc (diags
* (2 * sizeof *fdiag
));
888 bdiag
= fdiag
+ diags
;
889 fdiag
+= cmp
->file
[1].nondiscarded_lines
+ 1;
890 bdiag
+= cmp
->file
[1].nondiscarded_lines
+ 1;
892 /* Set TOO_EXPENSIVE to be approximate square root of input size,
893 bounded below by 256. */
895 for (; diags
!= 0; diags
>>= 2)
897 too_expensive
= MAX (256, too_expensive
);
899 files
[0] = cmp
->file
[0];
900 files
[1] = cmp
->file
[1];
902 compareseq (0, cmp
->file
[0].nondiscarded_lines
,
903 0, cmp
->file
[1].nondiscarded_lines
, minimal
);
905 free (fdiag
- (cmp
->file
[1].nondiscarded_lines
+ 1));
907 /* Modify the results slightly to make them prettier
908 in cases where that can validly be done. */
910 shift_boundaries (cmp
->file
);
912 /* Get the results of comparison in the form of a chain
913 of `struct change's -- an edit script. */
915 if (output_style
== OUTPUT_ED
)
916 script
= build_reverse_script (cmp
->file
);
918 script
= build_script (cmp
->file
);
920 /* Set CHANGES if we had any diffs.
921 If some changes are ignored, we must scan the script to decide. */
922 if (ignore_blank_lines
|| ignore_regexp
.fastmap
)
924 struct change
*next
= script
;
927 while (next
&& changes
== 0)
929 struct change
*this, *end
;
930 lin first0
, last0
, first1
, last1
;
932 /* Find a set of changes that belong together. */
934 end
= find_change (next
);
936 /* Disconnect them from the rest of the changes, making them
937 a hunk, and remember the rest for next iteration. */
941 /* Determine whether this hunk is really a difference. */
942 if (analyze_hunk (this, &first0
, &last0
, &first1
, &last1
))
945 /* Reconnect the script so it will all be freed properly. */
950 changes
= (script
!= 0);
953 changes
= briefly_report (changes
, cmp
->file
);
956 if (changes
| !no_diff_means_no_output
)
958 /* Record info for starting up output,
959 to be used if and when we have some output to print. */
960 setup_output (file_label
[0] ? file_label
[0] : cmp
->file
[0].name
,
961 file_label
[1] ? file_label
[1] : cmp
->file
[1].name
,
964 switch (output_style
)
967 print_context_script (script
, false);
971 print_context_script (script
, true);
975 print_ed_script (script
);
978 case OUTPUT_FORWARD_ED
:
979 pr_forward_ed_script (script
);
983 print_rcs_script (script
);
987 print_normal_script (script
);
991 print_ifdef_script (script
);
995 print_sdiff_script (script
);
1006 free (cmp
->file
[0].undiscarded
);
1010 for (f
= 0; f
< 2; f
++)
1012 free (cmp
->file
[f
].equivs
);
1013 free (cmp
->file
[f
].linbuf
+ cmp
->file
[f
].linbuf_base
);
1016 for (e
= script
; e
; e
= p
)
1022 if (! ROBUST_OUTPUT_STYLE (output_style
))
1023 for (f
= 0; f
< 2; ++f
)
1024 if (cmp
->file
[f
].missing_newline
)
1026 error (0, 0, "%s: %s\n",
1027 file_label
[f
] ? file_label
[f
] : cmp
->file
[f
].name
,
1028 _("No newline at end of file"));
1033 if (cmp
->file
[0].buffer
!= cmp
->file
[1].buffer
)
1034 free (cmp
->file
[0].buffer
);
1035 free (cmp
->file
[1].buffer
);