prest/tbin/diffre

   1 #!/usr/local/bin/nosuch/perl
   2 #### Text above here was automatically generated during the configuration.
   3 #### Modifications to this file will be lost during configuration.
   4
   5
   6 #!/usr/local/bin/perl
   7
   8 # $Id: diffre.prl.root,v 1.1 2004/03/24 19:25:24 nodine Exp $
   9 # Copyright (C) 2002-2005 Freescale Semiconductor, Inc.
  10 # Distributed under terms of the GNU General Public License (GPL).
  11
  12 # Description: This program is like the Unix diff program except that
  13 # the first file can contain regular expressions that allow parts of the
  14 # line to vary but still count as a match.
  15 #
  16 # Usage: ${main::TOOL_NAME} [options] re-file file2
  17 #
  18 # Options:
  19 #   -h          Help
  20 #   -r regexp   Elide differences involving the regexp (no regular
  21 #               expressions in re-file) (conflicts with -t)
  22 #   -t          Text-only match (no regular expressions in re-file)
  23 #               (conflicts with -r)
  24 #   -D symbol   Generate "#ifdef symbol" output
  25 #   -V          Print version info
  26
  27 $0 =~ m|([^\/]+)$|;
  28 $main::TOOL_NAME = $1;
  29
  30 main();
  31
  32 #use strict;
  33 use integer;
  34 if ($[ > 5.008) { eval "use bytes"; }
  35 use Getopt::Std;
  36
  37 sub main {
  38     my($options) = "dhr:tVD:C";
  39     Usage() unless getopts($options);
  40     if (defined $opt_r && $opt_t) {
  41         print STDERR "Cannot have both -r and -t\n";
  42         Usage('Options');
  43     }
  44
  45     $| = 1;                     # Output right away (no output buffering)
  46
  47     Usage('\\$Id') if $main::opt_V;
  48     Usage('Description') if $main::opt_h;
  49     Usage() unless defined $ARGV[1];
  50
  51     # Open the files
  52     open (FILE_RE,"<$ARGV[0]") or die "Cannot open $ARGV[0]";
  53     open (FILE_2,"<$ARGV[1]") or die "Cannot open $ARGV[1]";
  54
  55     # Read the files
  56     @main::FILE_RE = <FILE_RE>;
  57     @main::FILE_2 = <FILE_2>;
  58     # Add sentinels to the files
  59     push @main::FILE_RE, "\000";
  60     push @main::FILE_2, "\000";
  61
  62     # Close the files
  63     close FILE_RE;
  64     close FILE_2;
  65
  66     CompareFiles();
  67 }
  68
  69 # This algorithm is loosely based on Dijkstra's single-source shortest-paths
  70 # algorithm.  See Thomas H. Cormen, Charles E. Leiserson, Ronald
  71 # L. Rivest, _Introduction_to_Algorithms_, first edition, pp. 527-532
  72 # for a description of the algorithm.  The graph upon which the
  73 # algorithm is applied is that induced by comparing every line of file
  74 # 1 with every line of file 2; if they are the same, we create a node
  75 # in the graph with a label that is the combination of the two line
  76 # numbers.  The distance between node A=(a1,a2) and node B=(b1,b2)
  77 # is the minumum number of line changes that need to occur in going
  78 # from line a1 of file 1 to line b1 of file 1 and line a2 of file 2
  79 # to line b2 of file 2 (or infinite if b1 <= a1 or b2 <= a2).  This
  80 # distance is delta(A,B) = b1-a1+b2-a2-2 if there are no
  81 # intervening nodes.  The goal is to find the shortest path from
  82 # (-1,-1) to the vertex that represents the end of both files.
  83
  84 # For efficiency's sake, we do not actually generate all the nodes,
  85 # but rather produce them as Dijkstra's algorithm would need them.
  86 # For further efficiency's sake, we do not even generate all the successors
  87 # every time we add a node to the list of those whose distances are
  88 # known.  We go through the list generating successors at progressively
  89 # larger distances as they become greater than the minimum distance.
  90 # Since the delta function has an inverse triangle inequality, i.e.,
  91 # delta(A,C) > delta(A,B) + delta(B,C), it means that any successor B
  92 # of any node A means that we do not need to check edges from A to any
  93 # point C for which c1 >= b1 and c2 >= b2.  As a special case, if B
  94 # is (a1+1, b1+1), no further checks from A are necessary, so it is
  95 # removed from the active list.
  96
  97 # Unlike Dijkstra's algorithm, by the time we compute a distance for
  98 # a node, we know that it is the exact minimum distance.
  99
 100 # The following global variables are used:
 101 #   %PRED                 Key=node name, value=predecessor on min. path
 102 #   %DIST                 Key=node name, value=distance from start node
 103 #   %SUCC                 Key=node name, value=hash ref. with key=successor
 104 #                         node name and value=est min. dist.
 105 #   %ACTIVE               Key=node name, value=1; has an entry for every
 106 #                         node from which we actively generate successors
 107 #   %COORDS               key=node name, value=array ref. with node's
 108 #                         coordinates.
 109
 110 # This is the subroutine that actually compares the files.
 111 # Uses globals: @main::FILE_RE, @main::FILE_2, %main::DIST, %main::PRED,
 112 #               %main::SUCC
 113 # Sets globals: %main::DIST, %main::ACTIVE
 114 #BEGIN_CORE -- Do not delete this line; expectit2 depends upon it
 115 sub CompareFiles {
 116     my($goalNode) = @main::FILE_2 . ',' . @main::FILE_RE;
 117     if ($main::opt_r) {
 118         # Preprocess RE file for regexp
 119         foreach (@main::FILE_RE) {
 120             my $A = $_;
 121             # Quote regexp characters
 122             $A =~ s/([.*?+^\$\\$@\[\]\(\)\{\}])|$opt_r/
 123                 $1 ne '' ? "\\$1" : $opt_r/geo;
 124             push @main::OPTR_RE,$A;
 125         }
 126     }
 127     my($startNode) = '-1,-1';
 128     if ($main::opt_t) {
 129         eval (q|sub Compare {
 130 #print STDERR "($_[0],$_[1]): \'${\substr($main::FILE_2[$_[0]],0,-1)}\' cmp \'${\substr($main::FILE_RE[$_[1]],0,-1)}\'\n";
 131             return $main::FILE_2[$_[0]] eq $main::FILE_RE[$_[1]];
 132         }|);
 133     }
 134     elsif ($main::opt_r) {
 135         eval (q|sub Compare {
 136             return $main::FILE_2[$_[0]] =~ /^$main::OPTR_RE[$_[1]]$/;
 137         }|);
 138     }
 139     else {
 140         eval(q|sub Compare {
 141             return $main::FILE_2[$_[0]] =~ /^$main::FILE_RE[$_[1]]$/;
 142         }|);
 143     }
 144     $main::COORDS{$startNode} = [-1, -1];
 145     $main::ACTIVE{$startNode} = 1;
 146     $main::DIST{$startNode} = 0;
 147
 148     my $genDist = 0;
 149
 150     my %done;
 151     # We continue until the last node we generated was the goal node
 152 #print STDERR "$goalNode\n";
 153     while (! defined $main::DIST{$goalNode}) {
 154         $genDist = GenerateSuccessors($genDist);
 155     }
 156
 157     pop @main::FILE_2;
 158     pop @main::FILE_RE;
 159     # Generate the report by recursively working backwards from the goal node
 160     if ($main::opt_d) {
 161         GenerateDot();
 162     }
 163     else {
 164         OutputDifferences($goalNode);
 165         print "$main::count comparisons\n" if $main::opt_C;
 166     }
 167 }
 168
 169 # This subroutine generates the successors for the nodes in
 170 # %main::ACTIVE starting with the specified distance.
 171 # It returns the distance of any newly generated nodes.
 172 # Arguments: $genDist
 173 # Returns: Updated $genDist
 174 # Uses globals: @main::FILE_RE, @main::FILE_2, %main::DIST,
 175 #               %main::DONE_COMPARISON, %main::ACTIVE
 176 # Sets globals: %main::DIST, %main::PRED, %main::SUCC, %main::DONE_COMPARISON,
 177 #               %main::ACTIVE, %main::COORDS
 178 sub GenerateSuccessors {
 179     my($genDist) = @_;
 180 #print STDERR "GenerateSuccessors(",join(',',@_),")\n";
 181
 182     my $found_node;
 183     my $max_line_2 = @main::FILE_2;
 184     for ($found_node = 0; ! $found_node; $genDist++) {
 185         my $i;
 186         my @nodes = keys %main::ACTIVE;
 187         die "Internal error: No active nodes!" unless @nodes;
 188         for ($i=0; $i < @nodes; $i++) {
 189 #print STDERR "$i: [",join(';',@nodes),"]\n";
 190             my $node = $nodes[$i];
 191             my $baseDist = $main::DIST{$node};
 192 #print STDERR "[$node] at $genDist = ${\scalar($genDist-$baseDist)} from $baseDist\n";
 193             my $node_lines = $COORDS{$node};
 194             my ($min_line_2,$min_line_re) =
 195                 ($node_lines->[0]+1, $node_lines->[1]+1);
 196             my $netDist = $genDist - $baseDist;
 197             my $active = 0;
 198             my $max_line_re = $min_line_re + $netDist;
 199             $max_line_re = @main::FILE_RE if @main::FILE_RE < $max_line_re;
 200             my $line_re = $min_line_re;
 201             my $line_2 = $min_line_2 + $netDist;
 202             if ($line_2 > $max_line_2) {
 203 #print STDERR "    Off end of file 2: ($line_2, $line_re)";
 204                 my $diff = $line_2 - $max_line_2;
 205                 $line_re += $diff;
 206                 $line_2 -= $diff;
 207 #print STDERR ": warp to ($line_2, $line_re)\n";
 208             }
 209             my @succ = sort {$b <=> $a} keys %{$main::SUCC{$node}};
 210             while ($line_re <= $max_line_re) {
 211 #print STDERR "  Check ($line_2, $line_re)\n";
 212                 # Check to see that none of the node's successors blocks this
 213                 # comparison.
 214                 my $succ = $succ[0];
 215                 if (defined $succ && $COORDS{$succ}[1] <= $line_re) {
 216                     # It blocks.
 217                     my $diff = $line_2 - $COORDS{$succ}[0] + 1;
 218                     $line_re += $diff;
 219                     $line_2 -= $diff;
 220 #print STDERR "    Blocked by $succ: warp to ($line_2, $line_re)\n";
 221                     shift @succ;
 222                     next;
 223                 }
 224                 my $newNode = $line_2 . ',' . $line_re;
 225                 my $compare = $DONE_COMPARISON{$newNode};
 226                 if (! defined $compare) {
 227                     $compare = $DONE_COMPARISON{$newNode} =
 228                         Compare($line_2,$line_re);
 229                 }
 230                 $main::count++;
 231                 $active = 1;
 232                 if ($compare) {
 233                     # The lines agree; we've found a new node
 234 #print STDERR "  Match ($line_2,$line_re) = $genDist\n";
 235                     my $oldDist = $main::DIST{$newNode};
 236                     if (! defined $oldDist || $oldDist > $genDist) {
 237                         $main::DIST{$newNode} = $genDist;
 238                         $main::COORDS{$newNode} = [$line_2,$line_re];
 239                         $main::ACTIVE{$newNode} = 1;
 240                         $main::PRED{$newNode} = $node;
 241                         push @nodes, $newNode;
 242                         $found_node = 1;
 243                     }
 244                     $main::SUCC{$node}{$newNode} = $genDist;
 245                 }
 246                 $line_re++, $line_2--;
 247             }
 248 #print STDERR "  $node is no longer active\n" unless $active;
 249             delete $main::ACTIVE{$node} unless $active;
 250         }
 251     }
 252     return $genDist;
 253 }
 254
 255 # This subroutine outputs a difference report.  The arguments are the
 256 # delta line numbers for the two files where the next match occurs.
 257 # Arguments: $node
 258 # Uses globals: @main::FILE_RE, @main::FILE_2, %main::PRED
 259 sub OutputDifferences {
 260     my($goalNode) = @_;
 261     my(@path,$node);
 262
 263     push(@path, $goalNode);
 264     for ($node = $goalNode; $main::PRED{$node} ne "";
 265          $node = $main::PRED{$node}) {
 266         push(@path, $main::PRED{$node});
 267     }
 268
 269     my($i);
 270     for ($i = $#path-1; $i >= 0; $i--) {
 271         my($last_2,$last_re) = @{$COORDS{$path[$i+1]}};
 272         my($this_2,$this_re) = @{$COORDS{$path[$i]}};
 273         my($diff_2,$diff_re) = ($this_2-$last_2-1, $this_re-$last_re-1);
 274         if ($diff_re != 0 || $diff_2 != 0) {
 275             my($base_re) = $diff_re == 0 ? $last_re + 1 :
 276                 ($diff_re == 1 ? ($last_re + 2) :
 277                  ($last_re + 2) . ',' . ($last_re + $diff_re + 1));
 278             my($base_2) = $diff_2 == 0 ? $last_2 + 1 :
 279                 ($diff_2 == 1 ? ($last_2 + 2) :
 280                  ($last_2 + 2) . ',' . ($last_2 + $diff_2 + 1));
 281             my $pfx;
 282             if (defined $opt_D) {
 283                 print "#ifndef $opt_D\n" if $diff_re > 0;
 284             }
 285             else {
 286                 print $base_re, ($diff_re == 0 ? 'a' : ($diff_2 == 0 ? 'd'
 287                                                         : 'c')),
 288                     "$base_2\n";
 289                 $pfx = "< ";
 290             }
 291             print map("$pfx$_",@main::FILE_RE[$last_re+1 .. ($last_re+$diff_re)])
 292                 if ($diff_re > 0);
 293             if (defined $opt_D) {
 294                 print $diff_re > 0 ? "#else /* $opt_D */\n" :
 295                     "#ifdef $opt_D\n" if $diff_2 > 0;
 296             }
 297             else {
 298                 print "---\n" if $diff_re > 0 && $diff_2 > 0;
 299                 $pfx = "> ";
 300             }
 301             print map("$pfx$_",@main::FILE_2[$last_2+1 .. ($last_2+$diff_2)])
 302                 if ($diff_2 > 0);
 303             if (defined $opt_D) {
 304                 print "#endif /* $opt_D */\n";
 305                 print $main::FILE_2[$this_2];
 306             }
 307         }
 308         elsif (defined $opt_D) {
 309             print $main::FILE_2[$this_2];
 310         }
 311     }
 312 }
 313
 314 # This subroutine outputs a dot file for debugging purposes.
 315 sub GenerateDot {
 316     print "digraph g {\n";
 317     my($node);
 318     # Find out how many predecessors each node has
 319     my(%preds);
 320     foreach $node (keys %main::SUCC) {
 321         my($nextNode);
 322         foreach $nextNode (keys %{$main::SUCC{$node}}) {
 323             $preds{$nextNode}++;
 324         }
 325     }
 326     # Elide strings of nodes with the same total distance
 327     foreach $node (keys %main::SUCC) {
 328         my($nextNode);
 329         my(@succs);
 330         @succs = keys %{$main::SUCC{$node}};
 331         if ($preds{$node} == 1 && @succs == 1 &&
 332             $main::DIST{$main::PRED{$node}} ==
 333             $main::SUCC{$node}{$succs[0]}) {
 334             $dist = $main::SUCC{$node}{$succs[0]};
 335             delete $main::SUCC{$main::PRED{$node}}{$node};
 336             delete $main::SUCC{$node}{$succs[0]};
 337             $main::SUCC{$main::PRED{$node}}{$succs[0]} = $dist;
 338             $main::PRED{$succs[0]} = $main::PRED{$node};
 339         }
 340     }
 341     # Find the shortest path
 342     my(%path);
 343     for ($node = $goalNode; $main::PRED{$node} ne "";
 344          $node = $main::PRED{$node}) {
 345         $path{$main::PRED{$node}} = $node;
 346     }
 347     # Output the graph
 348     foreach $node (sort lex keys %main::SUCC) {
 349         my($nextNode);
 350         my($l1a,$l2a) = split(/,/, $node);
 351         foreach $nextNode (sort lex keys %{$main::SUCC{$node}}) {
 352             my($dist) = $main::SUCC{$node}{$nextNode};
 353             my($atts);
 354             $atts = ', weight = "10"'
 355                 if $path{$node} eq $nextNode;
 356             my($l1b,$l2b) = split(/,/, $nextNode);
 357             $atts .= ', style = "dotted"'
 358                 if $l1b - $l1a > 1 && $main::DIST{$node} ==
 359                 $main::SUCC{$node}{$nextNode};
 360             print qq/ "$node" -> "$nextNode" [label = "$dist"$atts];\n/
 361             }
 362     }
 363     print "}\n";
 364 }
 365 # Does lexicographic sort of node names
 366 sub lex {
 367     my ($a1,$a2) = split(/,/, $a);
 368     my ($b1,$b2) = split(/,/, $b);
 369     return $a1 <=> $b1 || $a2 <=> $b2;
 370 }
 371
 372 #END_CORE -- Do not delete this line; expectit2 depends upon it
 373
 374 # This subroutine extracts and prints usage information
 375 sub Usage {
 376     my ($what,$end) = @_;
 377     $what = "Usage" if ! $what;
 378     if (open(ME,$0) == 1) {
 379         while (<ME>) {
 380             $print = 1 if /^\# $what/o;
 381             $print = 0 if ! /^\#/o || ($end && /^\# $end/o);
 382             if ($print) {
 383                 my $line = substr($_, 2);
 384                 $line =~ s/(\$\{[^\}]+\})/eval($1)/ge;
 385                 print $line;
 386             }
 387         }
 388         close(ME);
 389     }
 390     else {
 391         print STDERR "Usage not available.\n";
 392     }
 393     exit (1);
 394 }
 395
 396 __END__
 397 # $Log: diffre.prl.root,v $
 398 # Revision 1.1  2004/03/24 19:25:24  nodine
 399 # Rename tool from rediff to diffre.
 400 #
 401 # Revision 1.1  2004/03/19 21:54:50  nodine
 402 # Create gen_gress, run_gress, and rediff using the safe perl from config
 403 # time and just copy to the bin directory.
 404 #
 405 # Revision 1.1  2004/02/03 20:45:09  nodine
 406 # Initial release.
 407 #
 408 # Revision 1.1  2003/02/07 21:28:22  nodine
 409 # Initial release.
 410 #
 411 # Revision 1.1  2001/11/05 22:17:42  nodine
 412 # Initial release.
 413 #
 414 # Revision 1.1  2001/07/25 15:24:35  nodine
 415 # Initial release.  These tools are for the regression set methodology.
 416 #
 417 # Revision 1.1  2001/02/10 16:22:55  nodine
 418 # Initial release.
 419 #
 420 # Revision 1.1  2000/11/03 20:22:40  nodine
 421 # Initial release.
 422 #
 423 # Revision 1.1  2000/08/31 18:11:48  nodine
 424 # Initial release.
 425 #
 426 # Revision 1.1  2000/05/31 20:31:14  nodine
 427 # Initial release.
 428 #
 429 # Revision 1.4  1997/11/06 21:42:17  nodine
 430 # * Added comma before "weight" attribute in dot output.
 431 # * Fixed processing that occurs when one EOF is reached before the other.
 432 #
 433 # Revision 1.3  1997/11/03  18:15:07  nodine
 434 # * Added -t option and hidden -D and -C options.
 435 # * Fixed a bunch of bugs.
 436 # * Did some speed optimizations.
 437 #
 438 # Revision 1.2  1997/10/27  20:22:54  nodine
 439 # Changed to find the minimal set of differences by using Dijkstra's shortest
 440 # path algorithm.
 441 #
 442 # Revision 1.1  1997/10/24  21:22:43  nodine
 443 # "Initial version"
 444 #