Fix #338: re.sub() flag argument at wrong position.
[docutils.git] / prest / tbin / diffre
blob82956c3960ff4c642628e36413bed10917ef84a4
1 #!/usr/local/bin/nosuch/perl
2 #### Text above here was automatically generated during the configuration.
3 #### Modifications to this file will be lost during configuration.
6 #!/usr/local/bin/perl
8 # $Id: diffre.prl.root,v 1.1 2004/03/24 19:25:24 nodine Exp $
9 # Copyright (C) 2002-2005 Freescale Semiconductor, Inc.
10 # Distributed under terms of the GNU General Public License (GPL).
12 # Description: This program is like the Unix diff program except that
13 # the first file can contain regular expressions that allow parts of the
14 # line to vary but still count as a match.
16 # Usage: ${main::TOOL_NAME} [options] re-file file2
18 # Options:
19 # -h Help
20 # -r regexp Elide differences involving the regexp (no regular
21 # expressions in re-file) (conflicts with -t)
22 # -t Text-only match (no regular expressions in re-file)
23 # (conflicts with -r)
24 # -D symbol Generate "#ifdef symbol" output
25 # -V Print version info
27 $0 =~ m|([^\/]+)$|;
28 $main::TOOL_NAME = $1;
30 main();
32 #use strict;
33 use integer;
34 if ($[ > 5.008) { eval "use bytes"; }
35 use Getopt::Std;
37 sub main {
38 my($options) = "dhr:tVD:C";
39 Usage() unless getopts($options);
40 if (defined $opt_r && $opt_t) {
41 print STDERR "Cannot have both -r and -t\n";
42 Usage('Options');
45 $| = 1; # Output right away (no output buffering)
47 Usage('\\$Id') if $main::opt_V;
48 Usage('Description') if $main::opt_h;
49 Usage() unless defined $ARGV[1];
51 # Open the files
52 open (FILE_RE,"<$ARGV[0]") or die "Cannot open $ARGV[0]";
53 open (FILE_2,"<$ARGV[1]") or die "Cannot open $ARGV[1]";
55 # Read the files
56 @main::FILE_RE = <FILE_RE>;
57 @main::FILE_2 = <FILE_2>;
58 # Add sentinels to the files
59 push @main::FILE_RE, "\000";
60 push @main::FILE_2, "\000";
62 # Close the files
63 close FILE_RE;
64 close FILE_2;
66 CompareFiles();
69 # This algorithm is loosely based on Dijkstra's single-source shortest-paths
70 # algorithm. See Thomas H. Cormen, Charles E. Leiserson, Ronald
71 # L. Rivest, _Introduction_to_Algorithms_, first edition, pp. 527-532
72 # for a description of the algorithm. The graph upon which the
73 # algorithm is applied is that induced by comparing every line of file
74 # 1 with every line of file 2; if they are the same, we create a node
75 # in the graph with a label that is the combination of the two line
76 # numbers. The distance between node A=(a1,a2) and node B=(b1,b2)
77 # is the minumum number of line changes that need to occur in going
78 # from line a1 of file 1 to line b1 of file 1 and line a2 of file 2
79 # to line b2 of file 2 (or infinite if b1 <= a1 or b2 <= a2). This
80 # distance is delta(A,B) = b1-a1+b2-a2-2 if there are no
81 # intervening nodes. The goal is to find the shortest path from
82 # (-1,-1) to the vertex that represents the end of both files.
84 # For efficiency's sake, we do not actually generate all the nodes,
85 # but rather produce them as Dijkstra's algorithm would need them.
86 # For further efficiency's sake, we do not even generate all the successors
87 # every time we add a node to the list of those whose distances are
88 # known. We go through the list generating successors at progressively
89 # larger distances as they become greater than the minimum distance.
90 # Since the delta function has an inverse triangle inequality, i.e.,
91 # delta(A,C) > delta(A,B) + delta(B,C), it means that any successor B
92 # of any node A means that we do not need to check edges from A to any
93 # point C for which c1 >= b1 and c2 >= b2. As a special case, if B
94 # is (a1+1, b1+1), no further checks from A are necessary, so it is
95 # removed from the active list.
97 # Unlike Dijkstra's algorithm, by the time we compute a distance for
98 # a node, we know that it is the exact minimum distance.
100 # The following global variables are used:
101 # %PRED Key=node name, value=predecessor on min. path
102 # %DIST Key=node name, value=distance from start node
103 # %SUCC Key=node name, value=hash ref. with key=successor
104 # node name and value=est min. dist.
105 # %ACTIVE Key=node name, value=1; has an entry for every
106 # node from which we actively generate successors
107 # %COORDS key=node name, value=array ref. with node's
108 # coordinates.
110 # This is the subroutine that actually compares the files.
111 # Uses globals: @main::FILE_RE, @main::FILE_2, %main::DIST, %main::PRED,
112 # %main::SUCC
113 # Sets globals: %main::DIST, %main::ACTIVE
114 #BEGIN_CORE -- Do not delete this line; expectit2 depends upon it
115 sub CompareFiles {
116 my($goalNode) = @main::FILE_2 . ',' . @main::FILE_RE;
117 if ($main::opt_r) {
118 # Preprocess RE file for regexp
119 foreach (@main::FILE_RE) {
120 my $A = $_;
121 # Quote regexp characters
122 $A =~ s/([.*?+^\$\\$@\[\]\(\)\{\}])|$opt_r/
123 $1 ne '' ? "\\$1" : $opt_r/geo;
124 push @main::OPTR_RE,$A;
127 my($startNode) = '-1,-1';
128 if ($main::opt_t) {
129 eval (q|sub Compare {
130 #print STDERR "($_[0],$_[1]): \'${\substr($main::FILE_2[$_[0]],0,-1)}\' cmp \'${\substr($main::FILE_RE[$_[1]],0,-1)}\'\n";
131 return $main::FILE_2[$_[0]] eq $main::FILE_RE[$_[1]];
132 }|);
134 elsif ($main::opt_r) {
135 eval (q|sub Compare {
136 return $main::FILE_2[$_[0]] =~ /^$main::OPTR_RE[$_[1]]$/;
137 }|);
139 else {
140 eval(q|sub Compare {
141 return $main::FILE_2[$_[0]] =~ /^$main::FILE_RE[$_[1]]$/;
142 }|);
144 $main::COORDS{$startNode} = [-1, -1];
145 $main::ACTIVE{$startNode} = 1;
146 $main::DIST{$startNode} = 0;
148 my $genDist = 0;
150 my %done;
151 # We continue until the last node we generated was the goal node
152 #print STDERR "$goalNode\n";
153 while (! defined $main::DIST{$goalNode}) {
154 $genDist = GenerateSuccessors($genDist);
157 pop @main::FILE_2;
158 pop @main::FILE_RE;
159 # Generate the report by recursively working backwards from the goal node
160 if ($main::opt_d) {
161 GenerateDot();
163 else {
164 OutputDifferences($goalNode);
165 print "$main::count comparisons\n" if $main::opt_C;
169 # This subroutine generates the successors for the nodes in
170 # %main::ACTIVE starting with the specified distance.
171 # It returns the distance of any newly generated nodes.
172 # Arguments: $genDist
173 # Returns: Updated $genDist
174 # Uses globals: @main::FILE_RE, @main::FILE_2, %main::DIST,
175 # %main::DONE_COMPARISON, %main::ACTIVE
176 # Sets globals: %main::DIST, %main::PRED, %main::SUCC, %main::DONE_COMPARISON,
177 # %main::ACTIVE, %main::COORDS
178 sub GenerateSuccessors {
179 my($genDist) = @_;
180 #print STDERR "GenerateSuccessors(",join(',',@_),")\n";
182 my $found_node;
183 my $max_line_2 = @main::FILE_2;
184 for ($found_node = 0; ! $found_node; $genDist++) {
185 my $i;
186 my @nodes = keys %main::ACTIVE;
187 die "Internal error: No active nodes!" unless @nodes;
188 for ($i=0; $i < @nodes; $i++) {
189 #print STDERR "$i: [",join(';',@nodes),"]\n";
190 my $node = $nodes[$i];
191 my $baseDist = $main::DIST{$node};
192 #print STDERR "[$node] at $genDist = ${\scalar($genDist-$baseDist)} from $baseDist\n";
193 my $node_lines = $COORDS{$node};
194 my ($min_line_2,$min_line_re) =
195 ($node_lines->[0]+1, $node_lines->[1]+1);
196 my $netDist = $genDist - $baseDist;
197 my $active = 0;
198 my $max_line_re = $min_line_re + $netDist;
199 $max_line_re = @main::FILE_RE if @main::FILE_RE < $max_line_re;
200 my $line_re = $min_line_re;
201 my $line_2 = $min_line_2 + $netDist;
202 if ($line_2 > $max_line_2) {
203 #print STDERR " Off end of file 2: ($line_2, $line_re)";
204 my $diff = $line_2 - $max_line_2;
205 $line_re += $diff;
206 $line_2 -= $diff;
207 #print STDERR ": warp to ($line_2, $line_re)\n";
209 my @succ = sort {$b <=> $a} keys %{$main::SUCC{$node}};
210 while ($line_re <= $max_line_re) {
211 #print STDERR " Check ($line_2, $line_re)\n";
212 # Check to see that none of the node's successors blocks this
213 # comparison.
214 my $succ = $succ[0];
215 if (defined $succ && $COORDS{$succ}[1] <= $line_re) {
216 # It blocks.
217 my $diff = $line_2 - $COORDS{$succ}[0] + 1;
218 $line_re += $diff;
219 $line_2 -= $diff;
220 #print STDERR " Blocked by $succ: warp to ($line_2, $line_re)\n";
221 shift @succ;
222 next;
224 my $newNode = $line_2 . ',' . $line_re;
225 my $compare = $DONE_COMPARISON{$newNode};
226 if (! defined $compare) {
227 $compare = $DONE_COMPARISON{$newNode} =
228 Compare($line_2,$line_re);
230 $main::count++;
231 $active = 1;
232 if ($compare) {
233 # The lines agree; we've found a new node
234 #print STDERR " Match ($line_2,$line_re) = $genDist\n";
235 my $oldDist = $main::DIST{$newNode};
236 if (! defined $oldDist || $oldDist > $genDist) {
237 $main::DIST{$newNode} = $genDist;
238 $main::COORDS{$newNode} = [$line_2,$line_re];
239 $main::ACTIVE{$newNode} = 1;
240 $main::PRED{$newNode} = $node;
241 push @nodes, $newNode;
242 $found_node = 1;
244 $main::SUCC{$node}{$newNode} = $genDist;
246 $line_re++, $line_2--;
248 #print STDERR " $node is no longer active\n" unless $active;
249 delete $main::ACTIVE{$node} unless $active;
252 return $genDist;
255 # This subroutine outputs a difference report. The arguments are the
256 # delta line numbers for the two files where the next match occurs.
257 # Arguments: $node
258 # Uses globals: @main::FILE_RE, @main::FILE_2, %main::PRED
259 sub OutputDifferences {
260 my($goalNode) = @_;
261 my(@path,$node);
263 push(@path, $goalNode);
264 for ($node = $goalNode; $main::PRED{$node} ne "";
265 $node = $main::PRED{$node}) {
266 push(@path, $main::PRED{$node});
269 my($i);
270 for ($i = $#path-1; $i >= 0; $i--) {
271 my($last_2,$last_re) = @{$COORDS{$path[$i+1]}};
272 my($this_2,$this_re) = @{$COORDS{$path[$i]}};
273 my($diff_2,$diff_re) = ($this_2-$last_2-1, $this_re-$last_re-1);
274 if ($diff_re != 0 || $diff_2 != 0) {
275 my($base_re) = $diff_re == 0 ? $last_re + 1 :
276 ($diff_re == 1 ? ($last_re + 2) :
277 ($last_re + 2) . ',' . ($last_re + $diff_re + 1));
278 my($base_2) = $diff_2 == 0 ? $last_2 + 1 :
279 ($diff_2 == 1 ? ($last_2 + 2) :
280 ($last_2 + 2) . ',' . ($last_2 + $diff_2 + 1));
281 my $pfx;
282 if (defined $opt_D) {
283 print "#ifndef $opt_D\n" if $diff_re > 0;
285 else {
286 print $base_re, ($diff_re == 0 ? 'a' : ($diff_2 == 0 ? 'd'
287 : 'c')),
288 "$base_2\n";
289 $pfx = "< ";
291 print map("$pfx$_",@main::FILE_RE[$last_re+1 .. ($last_re+$diff_re)])
292 if ($diff_re > 0);
293 if (defined $opt_D) {
294 print $diff_re > 0 ? "#else /* $opt_D */\n" :
295 "#ifdef $opt_D\n" if $diff_2 > 0;
297 else {
298 print "---\n" if $diff_re > 0 && $diff_2 > 0;
299 $pfx = "> ";
301 print map("$pfx$_",@main::FILE_2[$last_2+1 .. ($last_2+$diff_2)])
302 if ($diff_2 > 0);
303 if (defined $opt_D) {
304 print "#endif /* $opt_D */\n";
305 print $main::FILE_2[$this_2];
308 elsif (defined $opt_D) {
309 print $main::FILE_2[$this_2];
314 # This subroutine outputs a dot file for debugging purposes.
315 sub GenerateDot {
316 print "digraph g {\n";
317 my($node);
318 # Find out how many predecessors each node has
319 my(%preds);
320 foreach $node (keys %main::SUCC) {
321 my($nextNode);
322 foreach $nextNode (keys %{$main::SUCC{$node}}) {
323 $preds{$nextNode}++;
326 # Elide strings of nodes with the same total distance
327 foreach $node (keys %main::SUCC) {
328 my($nextNode);
329 my(@succs);
330 @succs = keys %{$main::SUCC{$node}};
331 if ($preds{$node} == 1 && @succs == 1 &&
332 $main::DIST{$main::PRED{$node}} ==
333 $main::SUCC{$node}{$succs[0]}) {
334 $dist = $main::SUCC{$node}{$succs[0]};
335 delete $main::SUCC{$main::PRED{$node}}{$node};
336 delete $main::SUCC{$node}{$succs[0]};
337 $main::SUCC{$main::PRED{$node}}{$succs[0]} = $dist;
338 $main::PRED{$succs[0]} = $main::PRED{$node};
341 # Find the shortest path
342 my(%path);
343 for ($node = $goalNode; $main::PRED{$node} ne "";
344 $node = $main::PRED{$node}) {
345 $path{$main::PRED{$node}} = $node;
347 # Output the graph
348 foreach $node (sort lex keys %main::SUCC) {
349 my($nextNode);
350 my($l1a,$l2a) = split(/,/, $node);
351 foreach $nextNode (sort lex keys %{$main::SUCC{$node}}) {
352 my($dist) = $main::SUCC{$node}{$nextNode};
353 my($atts);
354 $atts = ', weight = "10"'
355 if $path{$node} eq $nextNode;
356 my($l1b,$l2b) = split(/,/, $nextNode);
357 $atts .= ', style = "dotted"'
358 if $l1b - $l1a > 1 && $main::DIST{$node} ==
359 $main::SUCC{$node}{$nextNode};
360 print qq/ "$node" -> "$nextNode" [label = "$dist"$atts];\n/
363 print "}\n";
365 # Does lexicographic sort of node names
366 sub lex {
367 my ($a1,$a2) = split(/,/, $a);
368 my ($b1,$b2) = split(/,/, $b);
369 return $a1 <=> $b1 || $a2 <=> $b2;
372 #END_CORE -- Do not delete this line; expectit2 depends upon it
374 # This subroutine extracts and prints usage information
375 sub Usage {
376 my ($what,$end) = @_;
377 $what = "Usage" if ! $what;
378 if (open(ME,$0) == 1) {
379 while (<ME>) {
380 $print = 1 if /^\# $what/o;
381 $print = 0 if ! /^\#/o || ($end && /^\# $end/o);
382 if ($print) {
383 my $line = substr($_, 2);
384 $line =~ s/(\$\{[^\}]+\})/eval($1)/ge;
385 print $line;
388 close(ME);
390 else {
391 print STDERR "Usage not available.\n";
393 exit (1);
396 __END__
397 # $Log: diffre.prl.root,v $
398 # Revision 1.1 2004/03/24 19:25:24 nodine
399 # Rename tool from rediff to diffre.
401 # Revision 1.1 2004/03/19 21:54:50 nodine
402 # Create gen_gress, run_gress, and rediff using the safe perl from config
403 # time and just copy to the bin directory.
405 # Revision 1.1 2004/02/03 20:45:09 nodine
406 # Initial release.
408 # Revision 1.1 2003/02/07 21:28:22 nodine
409 # Initial release.
411 # Revision 1.1 2001/11/05 22:17:42 nodine
412 # Initial release.
414 # Revision 1.1 2001/07/25 15:24:35 nodine
415 # Initial release. These tools are for the regression set methodology.
417 # Revision 1.1 2001/02/10 16:22:55 nodine
418 # Initial release.
420 # Revision 1.1 2000/11/03 20:22:40 nodine
421 # Initial release.
423 # Revision 1.1 2000/08/31 18:11:48 nodine
424 # Initial release.
426 # Revision 1.1 2000/05/31 20:31:14 nodine
427 # Initial release.
429 # Revision 1.4 1997/11/06 21:42:17 nodine
430 # * Added comma before "weight" attribute in dot output.
431 # * Fixed processing that occurs when one EOF is reached before the other.
433 # Revision 1.3 1997/11/03 18:15:07 nodine
434 # * Added -t option and hidden -D and -C options.
435 # * Fixed a bunch of bugs.
436 # * Did some speed optimizations.
438 # Revision 1.2 1997/10/27 20:22:54 nodine
439 # Changed to find the minimal set of differences by using Dijkstra's shortest
440 # path algorithm.
442 # Revision 1.1 1997/10/24 21:22:43 nodine
443 # "Initial version"