From 879889128f91acee93fad3a2e945e3a0d3b17a8f Mon Sep 17 00:00:00 2001
From: Brad Gilbert <b2gills@gmail.com>
Date: Wed, 15 Apr 2009 22:50:49 -0500
Subject: [PATCH] Added phash.pl

Modified phash.pm so that gen_perfect_hash calls verify_hash_table, before returning
---
 macros.pl         |   2 +-
 perl/lib/phash.pm | 497 +++++++++++++++++++++++++++---------------------------
 perl/phash.pl     |  56 ++++++
 perl/pptok.pl     |   3 +-
 phash.pl          |   4 +
 syncfiles.pl      |  43 ++---
 6 files changed, 336 insertions(+), 269 deletions(-)
 rewrite perl/lib/phash.pm (71%)
 create mode 100755 perl/phash.pl

diff --git a/macros.pl b/macros.pl
index 2eb5167b..53949750 100755
--- a/macros.pl
+++ b/macros.pl
@@ -37,7 +37,7 @@ sub charcify(@) {
 #
 # Generate macros.c
 #
-open(OUT,"> macros.c\0") or die "unable to open macros.c\n";
+open( OUT, '>', "macros.c\0" ) or die "unable to open macros.c\n";
 
 print OUT "/*\n";
 print OUT " * Do not edit - this file auto-generated by macros.pl from:\n";
diff --git a/perl/lib/phash.pm b/perl/lib/phash.pm
dissimilarity index 71%
index d9d8bb37..8c553c0c 100644
--- a/perl/lib/phash.pm
+++ b/perl/lib/phash.pm
@@ -1,248 +1,249 @@
-# -*- perl -*-
-#
-# Perfect Minimal Hash Generator written in Perl, which produces
-# C output.
-#
-# Requires the CPAN Graph module (tested against 0.81, 0.83, 0.84)
-#
-package phash;
-use strict;
-use warnings;
-
-use random_sv_vectors;
-use Nasm::crc64;
-
-use base 'Exporter';
-
-our @EXPORT = qw{
-    prehash
-    walk_graph
-    gen_hash_n
-    gen_perfect_hash
-    read_input
-    verify_hash_table
-};
-
-
-sub prehash($$\@);
-sub walk_graph(\@\@$$);
-sub gen_hash_n($\@\%$);
-sub gen_perfect_hash(\%);
-sub read_input();
-sub verify_hash_table($$);
-
-#
-# Compute the prehash for a key
-#
-# prehash(key, N, sv)
-#
-sub prehash($$\@) {
-    my($key, $n, $sv) = @_;
-    my @c = crc64(@$sv, $key);
-
-    # Create a bipartite graph...
-    my $low_word  = (($c[1] & ($n-1)) << 1) + 0; # low word
-    my $high_word = (($c[0] & ($n-1)) << 1) + 1; # high word
-
-    return ($low_word, $high_word);
-}
-
-#
-# Walk the assignment graph, return true on success
-#
-sub walk_graph(\@\@$$) {
-    my($node_val,$node_neighbor,$n,$v) = @_;
-
-    # print STDERR "Vertex $n value $v\n";
-    $node_val->[$n] = $v;
-
-    for my $nx ( @{$node_neighbor->[$n]} ){
-	# $nx -> [neigh, hash]
-	my ($o, $e) = @$nx;
-
-	# print STDERR "Edge $n,$o value $e: ";
-	my $ov = $node_val->[$o];
-	if( defined($ov) ){
-	    if ($v+$ov != $e) {
-		# Cyclic graph with collision
-		# print STDERR "error, should be ", $v+$ov, "\n";
-		return 0;
-	    } else {
-		# print STDERR "ok\n";
-	    }
-	}elsif( not walk_graph( @$node_val, @$node_neighbor, $o, $e-$v )){
-	    return 0;
-	}
-    }
-    return 1;
-}
-
-#
-# Generate the function assuming a given N.
-#
-# gen_hash_n(N, sv, \%data, run)
-#
-sub gen_hash_n($\@\%$) {
-    my($n, $sv, $href, $run) = @_;
-    my @keys = keys(%{$href});
-    my $i;
-    my $gr;
-    my $gsize = 2*$n;
-    my @node_val;
-    my @node_neighbor;
-    my %edges;
-
-    for ($i = 0; $i < $gsize; $i++) {
-	$node_neighbor[$i] = [];
-    }
-
-    %edges = ();
-    for my $key( @keys ){
-	my ($pf1, $pf2) = prehash($key, $n, @$sv);
-	($pf1,$pf2) = ($pf2,$pf1) if ($pf1 > $pf2); # Canonicalize order
-
-	my $pf = "$pf1,$pf2";
-	my $e = $href->{$key};
-	my $xkey;
-
-	if (defined($xkey = $edges{$pf})) {
-	    next if ($e == $href->{$xkey}); # Duplicate hash, safe to ignore
-	    if (defined($run)) {
-		print STDERR "$run: Collision: $pf: $key with $xkey\n";
-	    }
-	    return;
-	}
-
-	# print STDERR "Edge $pf value $e from $k\n";
-
-	$edges{$pf} = $key;
-	push(@{$node_neighbor[$pf1]}, [$pf2, $e]);
-	push(@{$node_neighbor[$pf2]}, [$pf1, $e]);
-    }
-
-    # Now we need to assign values to each vertex, so that for each
-    # edge, the sum of the values for the two vertices give the value
-    # for the edge (which is our hash index.)  If we find an impossible
-    # sitation, the graph was cyclic.
-    @node_val = (undef) x $gsize;
-
-    for ($i = 0; $i < $gsize; $i++) {
-	if (scalar(@{$node_neighbor[$i]})) {
-	    # This vertex has neighbors (is used)
-	    if (!defined($node_val[$i])) {
-		# First vertex in a cluster
-		unless (walk_graph( @node_val, @node_neighbor, $i, 0)) {
-		    if (defined($run)) {
-			print STDERR "$run: Graph is cyclic\n";
-		    }
-		    return;
-		}
-	    }
-	}
-    }
-
-    # for ($i = 0; $i < $n; $i++) {
-    #	print STDERR "Vertex ", $i, ": ", $g[$i], "\n";
-    # }
-
-    if (defined($run)) {
-	printf STDERR "$run: Done: n = $n, sv = [0x%08x, 0x%08x]\n",
-	$$sv[0], $$sv[1];
-    }
-
-    return ($n, $sv, \@node_val);
-}
-
-#
-# Driver for generating the function
-#
-# gen_perfect_hash(\%data)
-#
-sub gen_perfect_hash(\%) {
-    my($href) = @_;
-    my @keys = keys(%{$href});
-    my @hashinfo;
-    my( $n, $i, $j, $sv, $maxj );
-    my $run = 1;
-
-    # Minimal power of 2 value for N with enough wiggle room.
-    # The scaling constant must be larger than 0.5 in order for the
-    # algorithm to ever terminate.
-    my $room = scalar(@keys)*0.8;
-    $n = 1;
-    while ($n < $room) {
-	$n <<= 1;
-    }
-
-    # Number of times to try...
-    $maxj = scalar @random_sv_vectors;
-
-    for ($i = 0; $i < 4; $i++) {
-	printf STDERR "%d vectors, trying n = %d...\n",
-		scalar @keys, $n;
-	for ($j = 0; $j < $maxj; $j++) {
-	    $sv = $random_sv_vectors[$j];
-	    @hashinfo = gen_hash_n($n, @$sv, %$href, $run++);
-	    return @hashinfo if (@hashinfo);
-	}
-	$n <<= 1;
-    }
-
-    return;
-}
-
-#
-# Read input file
-#
-sub read_input() {
-    my( $key,$val);
-    my %out;
-    my $x = 0;
-
-    while ( my $l = <STDIN> ) {
-	chomp $l;
-	$l =~ s/\s*(\#.*|)$//;
-
-	next if ($l eq '');
-
-	if ($l =~ /^([^=]+)\=([^=]+)$/) {
-	    $out{$1} = $2;
-	    $x = $2;
-	} else {
-	    $out{$l} = $x;
-	}
-	$x++;
-    }
-
-    return %out;
-}
-
-#
-# Verify that the hash table is actually correct...
-#
-sub verify_hash_table($$)
-{
-    my ($href, $hashinfo) = @_;
-    my ($n, $sv, $g) = @{$hashinfo};
-    my $k;
-    my $err = 0;
-
-    foreach $k (keys(%$href)) {
-	my ($pf1, $pf2) = prehash($k, $n, @$sv);
-	my $g1 = ${$g}[$pf1];
-	my $g2 = ${$g}[$pf2];
-
-	if ($g1+$g2 != ${$href}{$k}) {
-	    printf STDERR "%s(%d,%d): %d+%d = %d != %d\n",
-	    $k, $pf1, $pf2, $g1, $g2, $g1+$g2, ${$href}{$k};
-	    $err = 1;
-	} else {
-	    # printf STDERR "%s: %d+%d = %d ok\n",
-	    # $k, $g1, $g2, $g1+$g2;
-	}
-    }
-
-    die "$0: hash validation error\n" if ($err);
-}
-
-1;
+# -*- perl -*-
+#
+# Perfect Minimal Hash Generator written in Perl, which produces
+# C output.
+#
+# Requires the CPAN Graph module (tested against 0.81, 0.83, 0.84)
+#
+package phash;
+use strict;
+use warnings;
+
+use random_sv_vectors;
+use Nasm::crc64;
+
+use base 'Exporter';
+
+our @EXPORT = qw{
+    prehash
+    walk_graph
+    gen_hash_n
+    gen_perfect_hash
+    read_input
+    verify_hash_table
+};
+
+
+sub prehash($$\@);
+sub walk_graph(\@\@$$);
+sub gen_hash_n($\@\%$);
+sub gen_perfect_hash(\%);
+sub read_input();
+sub verify_hash_table(\%\@);
+
+#
+# Compute the prehash for a key
+#
+# prehash(key, N, sv)
+#
+sub prehash($$\@) {
+    my($key, $n, $sv) = @_;
+    my @c = crc64(@$sv, $key);
+
+    # Create a bipartite graph...
+    my $low_word  = (($c[1] & ($n-1)) << 1) + 0; # low word
+    my $high_word = (($c[0] & ($n-1)) << 1) + 1; # high word
+
+    return ($low_word, $high_word);
+}
+
+#
+# Walk the assignment graph, return true on success
+#
+sub walk_graph(\@\@$$) {
+  my($node_val,$node_neighbor,$n,$v) = @_;
+
+  # print STDERR "Vertex $n value $v\n";
+  $node_val->[$n] = $v;
+
+  for my $nx ( @{$node_neighbor->[$n]} ){
+    # $nx -> [neigh, hash]
+    my ($o, $e) = @$nx;
+  
+    # print STDERR "Edge $n,$o value $e: ";
+    my $ov = $node_val->[$o];
+    if( defined($ov) ){
+      if ($v+$ov != $e) {
+        # Cyclic graph with collision
+        # print STDERR "error, should be ", $v+$ov, "\n";
+        return 0;
+      } else {
+        # print STDERR "ok\n";
+      }
+    }elsif( not walk_graph( @$node_val, @$node_neighbor, $o, $e-$v )){
+      return 0;
+    }
+  }
+  return 1;
+}
+
+#
+# Generate the function assuming a given N.
+#
+# gen_hash_n(N, sv, \%data, run)
+#
+sub gen_hash_n($\@\%$) {
+    my($n, $sv, $href, $run) = @_;
+    my @keys = keys(%{$href});
+    my $i;
+    my $gr;
+    my $gsize = 2*$n;
+    my @node_val;
+    my @node_neighbor;
+    my %edges;
+
+  for ($i = 0; $i < $gsize; $i++) {
+    $node_neighbor[$i] = [];
+  }
+
+  %edges = ();
+  for my $key( @keys ){
+    my ($pf1, $pf2) = prehash($key, $n, @$sv);
+    ($pf1,$pf2) = ($pf2,$pf1) if ($pf1 > $pf2); # Canonicalize order
+    
+    my $pf = "$pf1,$pf2";
+    my $e = $href->{$key};
+    my $xkey;
+    
+    if (defined($xkey = $edges{$pf})) {
+      next if ($e == $href->{$xkey}); # Duplicate hash, safe to ignore
+      if (defined($run)) {
+        print STDERR "$run: Collision: $pf: $key with $xkey\n";
+      }
+      return;
+    }
+    
+    # print STDERR "Edge $pf value $e from $k\n";
+    
+    $edges{$pf} = $key;
+    push(@{$node_neighbor[$pf1]}, [$pf2, $e]);
+    push(@{$node_neighbor[$pf2]}, [$pf1, $e]);
+  }
+
+  # Now we need to assign values to each vertex, so that for each
+  # edge, the sum of the values for the two vertices give the value
+  # for the edge (which is our hash index.)  If we find an impossible
+  # sitation, the graph was cyclic.
+  @node_val = (undef) x $gsize;
+
+  for ($i = 0; $i < $gsize; $i++) {
+    if (scalar(@{$node_neighbor[$i]})) {
+      # This vertex has neighbors (is used)
+      if (!defined($node_val[$i])) {
+        # First vertex in a cluster
+        unless (walk_graph( @node_val, @node_neighbor, $i, 0)) {
+          if (defined($run)) {
+            print STDERR "$run: Graph is cyclic\n";
+          }
+          return;
+        }
+      }
+    }
+  }
+
+  # for ($i = 0; $i < $n; $i++) {
+  # print STDERR "Vertex ", $i, ": ", $g[$i], "\n";
+  # }
+
+  if (defined($run)) {
+    printf STDERR "$run: Done: n = $n, sv = [0x%08x, 0x%08x]\n",
+    $$sv[0], $$sv[1];
+  }
+
+  return ($n, $sv, \@node_val);
+}
+
+#
+# Driver for generating the function
+#
+# gen_perfect_hash(\%data)
+#
+sub gen_perfect_hash(\%) {
+  my($href) = @_;
+  my @keys = keys(%{$href});
+  #my @hashinfo;
+  my( $n, $i, $j, $sv, $maxj );
+  my $run = 1;
+  
+  # Minimal power of 2 value for N with enough wiggle room.
+  # The scaling constant must be larger than 0.5 in order for the
+  # algorithm to ever terminate.
+  my $room = scalar(@keys)*0.8;
+  $n = 1;
+  while ($n < $room) {
+    $n <<= 1;
+  }
+  
+  # Number of times to try...
+  $maxj = scalar @random_sv_vectors;
+  
+  for ($i = 0; $i < 4; $i++) {
+    printf STDERR "%d vectors, trying n = %d...\n",
+    scalar @keys, $n;
+    for ($j = 0; $j < $maxj; $j++) {
+      $sv = $random_sv_vectors[$j];
+      my @hashinfo = gen_hash_n($n, @$sv, %$href, $run++);
+      
+      if( @hashinfo ){
+        verify_hash_table(%$href,@hashinfo);
+        return @hashinfo;
+      }
+    }
+    $n <<= 1;
+  }
+  
+  die "no hash";
+  return;
+}
+
+#
+# Read input file
+#
+sub read_input() {
+  my %out;
+  my $x = 0;
+
+  while ( my $line = <STDIN> ){
+    $line =~ s/\s*(\#.*)?$//;
+    next unless $line;
+  
+    if( $line =~ /^([^=]++)=([^=]++)$/ ){
+      $out{$1} = $x = $2;
+    } else {
+      $out{$line} = $x;
+    }
+    $x++;
+  }
+
+  return  %out if wantarray;
+  return \%out;
+}
+
+#
+# Verify that the hash table is actually correct...
+#
+sub verify_hash_table(\%\@){
+  my ($href, $hashinfo) = @_;
+  my ($n, $sv, $g) = @{$hashinfo};
+  my $k;
+  my $err = 0;
+  
+  foreach $k (keys(%$href)) {
+    my ($pf1, $pf2) = prehash($k, $n, @$sv);
+    my $g1 = ${$g}[$pf1];
+    my $g2 = ${$g}[$pf2];
+    
+    if ($g1+$g2 != ${$href}{$k}) {
+      printf STDERR "%s(%d,%d): %d+%d = %d != %d\n",
+      $k, $pf1, $pf2, $g1, $g2, $g1+$g2, ${$href}{$k};
+      $err = 1;
+    } else {
+      # printf STDERR "%s: %d+%d = %d ok\n",
+      # $k, $g1, $g2, $g1+$g2;
+    }
+  }
+
+  die "$0: hash validation error\n" if ($err);
+}
+
+1;
diff --git a/perl/phash.pl b/perl/phash.pl
new file mode 100755
index 00000000..646fe7f4
--- /dev/null
+++ b/perl/phash.pl
@@ -0,0 +1,56 @@
+#! /usr/bin/env perl
+
+
+# not completed, possibly not needed
+
+use strict;
+use warnings;
+
+use lib 'lib';
+
+use phash;
+
+my %data = read_input;
+my($n, $sv, $g) = gen_perfect_hash(%data);
+
+use Data::Dump 'dump';
+use 5.010;
+say STDERR dump \%data;
+say STDERR dump $n,$sv,$g;
+
+
+  print "static int HASHNAME_fg1[$n] =\n{\n";
+  
+  for( my $i = 0; $i < $n; $i++ ){
+    no warnings 'uninitialized';
+    my $h = ${$g}[$i*2+0] || 'UNUSED';
+    say $h;
+    #print "\t", ${$g}[${$f1}[$i]], "\n";
+  }
+  
+  print "};\n\n";
+
+
+
+  print "static int HASHNAME_fg2[$n] =\n{\n";
+  
+  for( my  $i = 0; $i < $n; $i++ ){
+    no warnings 'uninitialized';
+    no warnings 'uninitialized';
+    my $h = ${$g}[$i*2+1] || 'UNUSED';
+    say $h;
+    #print "\t", ${$g}[${$f2}[$i]], "\n";
+  }
+  
+  print "};\n\n";
+
+
+  print <<END;
+struct p_hash HASHNAME =
+{
+\t$n
+\t$sv->[0],$sv->[1]
+\tHASHNAME_fg1,
+\tHASHNAME_fg2,
+};
+END
diff --git a/perl/pptok.pl b/perl/pptok.pl
index e315a146..2920e693 100755
--- a/perl/pptok.pl
+++ b/perl/pptok.pl
@@ -257,7 +257,8 @@ END
     }
 
     # Paranoia...
-    verify_hash_table(\%tokens, \@hashinfo);
+    # no longer needed, gen_perfect_hash now runs verify_hash_table
+    # verify_hash_table(\%tokens, \@hashinfo);
     
     my ($n, $sv, $g) = @hashinfo;
     my $sv2 = $sv+2;
diff --git a/phash.pl b/phash.pl
index 03f500f3..5dc385f1 100755
--- a/phash.pl
+++ b/phash.pl
@@ -4,6 +4,10 @@
 # C output.
 #
 
+# I don't think this file is even used.
+
+use lib qw'perllib';
+
 require 'phash.ph';
 
 #
diff --git a/syncfiles.pl b/syncfiles.pl
index 95969231..c64cb672 100755
--- a/syncfiles.pl
+++ b/syncfiles.pl
@@ -1,15 +1,20 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 #
 # Sync the output file list between Makefiles
 # Use the mkdep.pl parameters to get the filename syntax
 #
 # The first file is the source file; the other ones target.
 #
-%def_hints = ('object-ending' => '.o',
-	      'path-separator' => '/',
-	      'continuation' => "\\");
+use strict;
+use warnings;
 
-sub do_transform($$) {
+our %def_hints = qw{
+    object-ending  .o
+    path-separator /
+    continuation   \\
+};
+
+sub do_transform($\%) {
     my($l, $h) = @_;
     my($ps) = $$h{'path-separator'};
 
@@ -27,18 +32,19 @@ sub do_transform($$) {
     return $l;
 }
 
-@file_list = ();
 
-$first = 1;
-$first_file = $ARGV[0];
+our @file_list;
+
+my $first = 1;
+my $first_file = $ARGV[0];
 die unless (defined($first_file));
 
-foreach $file (@ARGV) {
-    open(FILE, "< $file\0") or die;
+for my $filename (@ARGV) {
+    open( FILE, '<', $filename ) or die;
 
     # First, read the syntax hints
-    %hints = %def_hints;
-    while (defined($line = <FILE>)) {
+    my %hints = %def_hints;
+    while( my $line = <FILE> ){
 	if ( $line =~ /^\s*\#\s*@([a-z0-9-]+):\s*\"([^\"]*)\"/ ) {
 	    $hints{$1} = $2;
 	}
@@ -46,9 +52,9 @@ foreach $file (@ARGV) {
 
     # Read and process the file
     seek(FILE,0,0);
-    @lines = ();
-    $processing = 0;
-    while (defined($line = <FILE>)) {
+    my @lines;
+    my $processing = 0;
+    while( my $line = <FILE> ){
 	chomp $line;
 	if ($processing) {
 	    if ($line eq '#-- End File Lists --#') {
@@ -72,8 +78,8 @@ foreach $file (@ARGV) {
 		$processing = 1;
 		if (!$first) {
 		    push(@lines, "# Edit in $first_file, not here!\n");
-		    foreach $l (@file_list) {
-			push(@lines, do_transform($l, \%hints)."\n");
+		    for my $l (@file_list) {
+			push(@lines, do_transform($l, %hints)."\n");
 		    }
 		}
 	    }
@@ -83,11 +89,10 @@ foreach $file (@ARGV) {
 
     # Write the file back out
     if (!$first) {
-	open(FILE, "> $file\0") or die;
+	open( FILE, '>', $filename ) or die;
 	print FILE @lines;
 	close(FILE);
     }
 
-    undef @lines;
     $first = 0;
 }
-- 
2.11.4.GIT