From 879889128f91acee93fad3a2e945e3a0d3b17a8f Mon Sep 17 00:00:00 2001 From: Brad Gilbert Date: Wed, 15 Apr 2009 22:50:49 -0500 Subject: [PATCH] Added phash.pl Modified phash.pm so that gen_perfect_hash calls verify_hash_table, before returning --- macros.pl | 2 +- perl/lib/phash.pm | 497 +++++++++++++++++++++++++++--------------------------- perl/phash.pl | 56 ++++++ perl/pptok.pl | 3 +- phash.pl | 4 + syncfiles.pl | 43 ++--- 6 files changed, 336 insertions(+), 269 deletions(-) rewrite perl/lib/phash.pm (71%) create mode 100755 perl/phash.pl diff --git a/macros.pl b/macros.pl index 2eb5167b..53949750 100755 --- a/macros.pl +++ b/macros.pl @@ -37,7 +37,7 @@ sub charcify(@) { # # Generate macros.c # -open(OUT,"> macros.c\0") or die "unable to open macros.c\n"; +open( OUT, '>', "macros.c\0" ) or die "unable to open macros.c\n"; print OUT "/*\n"; print OUT " * Do not edit - this file auto-generated by macros.pl from:\n"; diff --git a/perl/lib/phash.pm b/perl/lib/phash.pm dissimilarity index 71% index d9d8bb37..8c553c0c 100644 --- a/perl/lib/phash.pm +++ b/perl/lib/phash.pm @@ -1,248 +1,249 @@ -# -*- perl -*- -# -# Perfect Minimal Hash Generator written in Perl, which produces -# C output. -# -# Requires the CPAN Graph module (tested against 0.81, 0.83, 0.84) -# -package phash; -use strict; -use warnings; - -use random_sv_vectors; -use Nasm::crc64; - -use base 'Exporter'; - -our @EXPORT = qw{ - prehash - walk_graph - gen_hash_n - gen_perfect_hash - read_input - verify_hash_table -}; - - -sub prehash($$\@); -sub walk_graph(\@\@$$); -sub gen_hash_n($\@\%$); -sub gen_perfect_hash(\%); -sub read_input(); -sub verify_hash_table($$); - -# -# Compute the prehash for a key -# -# prehash(key, N, sv) -# -sub prehash($$\@) { - my($key, $n, $sv) = @_; - my @c = crc64(@$sv, $key); - - # Create a bipartite graph... - my $low_word = (($c[1] & ($n-1)) << 1) + 0; # low word - my $high_word = (($c[0] & ($n-1)) << 1) + 1; # high word - - return ($low_word, $high_word); -} - -# -# Walk the assignment graph, return true on success -# -sub walk_graph(\@\@$$) { - my($node_val,$node_neighbor,$n,$v) = @_; - - # print STDERR "Vertex $n value $v\n"; - $node_val->[$n] = $v; - - for my $nx ( @{$node_neighbor->[$n]} ){ - # $nx -> [neigh, hash] - my ($o, $e) = @$nx; - - # print STDERR "Edge $n,$o value $e: "; - my $ov = $node_val->[$o]; - if( defined($ov) ){ - if ($v+$ov != $e) { - # Cyclic graph with collision - # print STDERR "error, should be ", $v+$ov, "\n"; - return 0; - } else { - # print STDERR "ok\n"; - } - }elsif( not walk_graph( @$node_val, @$node_neighbor, $o, $e-$v )){ - return 0; - } - } - return 1; -} - -# -# Generate the function assuming a given N. -# -# gen_hash_n(N, sv, \%data, run) -# -sub gen_hash_n($\@\%$) { - my($n, $sv, $href, $run) = @_; - my @keys = keys(%{$href}); - my $i; - my $gr; - my $gsize = 2*$n; - my @node_val; - my @node_neighbor; - my %edges; - - for ($i = 0; $i < $gsize; $i++) { - $node_neighbor[$i] = []; - } - - %edges = (); - for my $key( @keys ){ - my ($pf1, $pf2) = prehash($key, $n, @$sv); - ($pf1,$pf2) = ($pf2,$pf1) if ($pf1 > $pf2); # Canonicalize order - - my $pf = "$pf1,$pf2"; - my $e = $href->{$key}; - my $xkey; - - if (defined($xkey = $edges{$pf})) { - next if ($e == $href->{$xkey}); # Duplicate hash, safe to ignore - if (defined($run)) { - print STDERR "$run: Collision: $pf: $key with $xkey\n"; - } - return; - } - - # print STDERR "Edge $pf value $e from $k\n"; - - $edges{$pf} = $key; - push(@{$node_neighbor[$pf1]}, [$pf2, $e]); - push(@{$node_neighbor[$pf2]}, [$pf1, $e]); - } - - # Now we need to assign values to each vertex, so that for each - # edge, the sum of the values for the two vertices give the value - # for the edge (which is our hash index.) If we find an impossible - # sitation, the graph was cyclic. - @node_val = (undef) x $gsize; - - for ($i = 0; $i < $gsize; $i++) { - if (scalar(@{$node_neighbor[$i]})) { - # This vertex has neighbors (is used) - if (!defined($node_val[$i])) { - # First vertex in a cluster - unless (walk_graph( @node_val, @node_neighbor, $i, 0)) { - if (defined($run)) { - print STDERR "$run: Graph is cyclic\n"; - } - return; - } - } - } - } - - # for ($i = 0; $i < $n; $i++) { - # print STDERR "Vertex ", $i, ": ", $g[$i], "\n"; - # } - - if (defined($run)) { - printf STDERR "$run: Done: n = $n, sv = [0x%08x, 0x%08x]\n", - $$sv[0], $$sv[1]; - } - - return ($n, $sv, \@node_val); -} - -# -# Driver for generating the function -# -# gen_perfect_hash(\%data) -# -sub gen_perfect_hash(\%) { - my($href) = @_; - my @keys = keys(%{$href}); - my @hashinfo; - my( $n, $i, $j, $sv, $maxj ); - my $run = 1; - - # Minimal power of 2 value for N with enough wiggle room. - # The scaling constant must be larger than 0.5 in order for the - # algorithm to ever terminate. - my $room = scalar(@keys)*0.8; - $n = 1; - while ($n < $room) { - $n <<= 1; - } - - # Number of times to try... - $maxj = scalar @random_sv_vectors; - - for ($i = 0; $i < 4; $i++) { - printf STDERR "%d vectors, trying n = %d...\n", - scalar @keys, $n; - for ($j = 0; $j < $maxj; $j++) { - $sv = $random_sv_vectors[$j]; - @hashinfo = gen_hash_n($n, @$sv, %$href, $run++); - return @hashinfo if (@hashinfo); - } - $n <<= 1; - } - - return; -} - -# -# Read input file -# -sub read_input() { - my( $key,$val); - my %out; - my $x = 0; - - while ( my $l = ) { - chomp $l; - $l =~ s/\s*(\#.*|)$//; - - next if ($l eq ''); - - if ($l =~ /^([^=]+)\=([^=]+)$/) { - $out{$1} = $2; - $x = $2; - } else { - $out{$l} = $x; - } - $x++; - } - - return %out; -} - -# -# Verify that the hash table is actually correct... -# -sub verify_hash_table($$) -{ - my ($href, $hashinfo) = @_; - my ($n, $sv, $g) = @{$hashinfo}; - my $k; - my $err = 0; - - foreach $k (keys(%$href)) { - my ($pf1, $pf2) = prehash($k, $n, @$sv); - my $g1 = ${$g}[$pf1]; - my $g2 = ${$g}[$pf2]; - - if ($g1+$g2 != ${$href}{$k}) { - printf STDERR "%s(%d,%d): %d+%d = %d != %d\n", - $k, $pf1, $pf2, $g1, $g2, $g1+$g2, ${$href}{$k}; - $err = 1; - } else { - # printf STDERR "%s: %d+%d = %d ok\n", - # $k, $g1, $g2, $g1+$g2; - } - } - - die "$0: hash validation error\n" if ($err); -} - -1; +# -*- perl -*- +# +# Perfect Minimal Hash Generator written in Perl, which produces +# C output. +# +# Requires the CPAN Graph module (tested against 0.81, 0.83, 0.84) +# +package phash; +use strict; +use warnings; + +use random_sv_vectors; +use Nasm::crc64; + +use base 'Exporter'; + +our @EXPORT = qw{ + prehash + walk_graph + gen_hash_n + gen_perfect_hash + read_input + verify_hash_table +}; + + +sub prehash($$\@); +sub walk_graph(\@\@$$); +sub gen_hash_n($\@\%$); +sub gen_perfect_hash(\%); +sub read_input(); +sub verify_hash_table(\%\@); + +# +# Compute the prehash for a key +# +# prehash(key, N, sv) +# +sub prehash($$\@) { + my($key, $n, $sv) = @_; + my @c = crc64(@$sv, $key); + + # Create a bipartite graph... + my $low_word = (($c[1] & ($n-1)) << 1) + 0; # low word + my $high_word = (($c[0] & ($n-1)) << 1) + 1; # high word + + return ($low_word, $high_word); +} + +# +# Walk the assignment graph, return true on success +# +sub walk_graph(\@\@$$) { + my($node_val,$node_neighbor,$n,$v) = @_; + + # print STDERR "Vertex $n value $v\n"; + $node_val->[$n] = $v; + + for my $nx ( @{$node_neighbor->[$n]} ){ + # $nx -> [neigh, hash] + my ($o, $e) = @$nx; + + # print STDERR "Edge $n,$o value $e: "; + my $ov = $node_val->[$o]; + if( defined($ov) ){ + if ($v+$ov != $e) { + # Cyclic graph with collision + # print STDERR "error, should be ", $v+$ov, "\n"; + return 0; + } else { + # print STDERR "ok\n"; + } + }elsif( not walk_graph( @$node_val, @$node_neighbor, $o, $e-$v )){ + return 0; + } + } + return 1; +} + +# +# Generate the function assuming a given N. +# +# gen_hash_n(N, sv, \%data, run) +# +sub gen_hash_n($\@\%$) { + my($n, $sv, $href, $run) = @_; + my @keys = keys(%{$href}); + my $i; + my $gr; + my $gsize = 2*$n; + my @node_val; + my @node_neighbor; + my %edges; + + for ($i = 0; $i < $gsize; $i++) { + $node_neighbor[$i] = []; + } + + %edges = (); + for my $key( @keys ){ + my ($pf1, $pf2) = prehash($key, $n, @$sv); + ($pf1,$pf2) = ($pf2,$pf1) if ($pf1 > $pf2); # Canonicalize order + + my $pf = "$pf1,$pf2"; + my $e = $href->{$key}; + my $xkey; + + if (defined($xkey = $edges{$pf})) { + next if ($e == $href->{$xkey}); # Duplicate hash, safe to ignore + if (defined($run)) { + print STDERR "$run: Collision: $pf: $key with $xkey\n"; + } + return; + } + + # print STDERR "Edge $pf value $e from $k\n"; + + $edges{$pf} = $key; + push(@{$node_neighbor[$pf1]}, [$pf2, $e]); + push(@{$node_neighbor[$pf2]}, [$pf1, $e]); + } + + # Now we need to assign values to each vertex, so that for each + # edge, the sum of the values for the two vertices give the value + # for the edge (which is our hash index.) If we find an impossible + # sitation, the graph was cyclic. + @node_val = (undef) x $gsize; + + for ($i = 0; $i < $gsize; $i++) { + if (scalar(@{$node_neighbor[$i]})) { + # This vertex has neighbors (is used) + if (!defined($node_val[$i])) { + # First vertex in a cluster + unless (walk_graph( @node_val, @node_neighbor, $i, 0)) { + if (defined($run)) { + print STDERR "$run: Graph is cyclic\n"; + } + return; + } + } + } + } + + # for ($i = 0; $i < $n; $i++) { + # print STDERR "Vertex ", $i, ": ", $g[$i], "\n"; + # } + + if (defined($run)) { + printf STDERR "$run: Done: n = $n, sv = [0x%08x, 0x%08x]\n", + $$sv[0], $$sv[1]; + } + + return ($n, $sv, \@node_val); +} + +# +# Driver for generating the function +# +# gen_perfect_hash(\%data) +# +sub gen_perfect_hash(\%) { + my($href) = @_; + my @keys = keys(%{$href}); + #my @hashinfo; + my( $n, $i, $j, $sv, $maxj ); + my $run = 1; + + # Minimal power of 2 value for N with enough wiggle room. + # The scaling constant must be larger than 0.5 in order for the + # algorithm to ever terminate. + my $room = scalar(@keys)*0.8; + $n = 1; + while ($n < $room) { + $n <<= 1; + } + + # Number of times to try... + $maxj = scalar @random_sv_vectors; + + for ($i = 0; $i < 4; $i++) { + printf STDERR "%d vectors, trying n = %d...\n", + scalar @keys, $n; + for ($j = 0; $j < $maxj; $j++) { + $sv = $random_sv_vectors[$j]; + my @hashinfo = gen_hash_n($n, @$sv, %$href, $run++); + + if( @hashinfo ){ + verify_hash_table(%$href,@hashinfo); + return @hashinfo; + } + } + $n <<= 1; + } + + die "no hash"; + return; +} + +# +# Read input file +# +sub read_input() { + my %out; + my $x = 0; + + while ( my $line = ){ + $line =~ s/\s*(\#.*)?$//; + next unless $line; + + if( $line =~ /^([^=]++)=([^=]++)$/ ){ + $out{$1} = $x = $2; + } else { + $out{$line} = $x; + } + $x++; + } + + return %out if wantarray; + return \%out; +} + +# +# Verify that the hash table is actually correct... +# +sub verify_hash_table(\%\@){ + my ($href, $hashinfo) = @_; + my ($n, $sv, $g) = @{$hashinfo}; + my $k; + my $err = 0; + + foreach $k (keys(%$href)) { + my ($pf1, $pf2) = prehash($k, $n, @$sv); + my $g1 = ${$g}[$pf1]; + my $g2 = ${$g}[$pf2]; + + if ($g1+$g2 != ${$href}{$k}) { + printf STDERR "%s(%d,%d): %d+%d = %d != %d\n", + $k, $pf1, $pf2, $g1, $g2, $g1+$g2, ${$href}{$k}; + $err = 1; + } else { + # printf STDERR "%s: %d+%d = %d ok\n", + # $k, $g1, $g2, $g1+$g2; + } + } + + die "$0: hash validation error\n" if ($err); +} + +1; diff --git a/perl/phash.pl b/perl/phash.pl new file mode 100755 index 00000000..646fe7f4 --- /dev/null +++ b/perl/phash.pl @@ -0,0 +1,56 @@ +#! /usr/bin/env perl + + +# not completed, possibly not needed + +use strict; +use warnings; + +use lib 'lib'; + +use phash; + +my %data = read_input; +my($n, $sv, $g) = gen_perfect_hash(%data); + +use Data::Dump 'dump'; +use 5.010; +say STDERR dump \%data; +say STDERR dump $n,$sv,$g; + + + print "static int HASHNAME_fg1[$n] =\n{\n"; + + for( my $i = 0; $i < $n; $i++ ){ + no warnings 'uninitialized'; + my $h = ${$g}[$i*2+0] || 'UNUSED'; + say $h; + #print "\t", ${$g}[${$f1}[$i]], "\n"; + } + + print "};\n\n"; + + + + print "static int HASHNAME_fg2[$n] =\n{\n"; + + for( my $i = 0; $i < $n; $i++ ){ + no warnings 'uninitialized'; + no warnings 'uninitialized'; + my $h = ${$g}[$i*2+1] || 'UNUSED'; + say $h; + #print "\t", ${$g}[${$f2}[$i]], "\n"; + } + + print "};\n\n"; + + + print <[0],$sv->[1] +\tHASHNAME_fg1, +\tHASHNAME_fg2, +}; +END diff --git a/perl/pptok.pl b/perl/pptok.pl index e315a146..2920e693 100755 --- a/perl/pptok.pl +++ b/perl/pptok.pl @@ -257,7 +257,8 @@ END } # Paranoia... - verify_hash_table(\%tokens, \@hashinfo); + # no longer needed, gen_perfect_hash now runs verify_hash_table + # verify_hash_table(\%tokens, \@hashinfo); my ($n, $sv, $g) = @hashinfo; my $sv2 = $sv+2; diff --git a/phash.pl b/phash.pl index 03f500f3..5dc385f1 100755 --- a/phash.pl +++ b/phash.pl @@ -4,6 +4,10 @@ # C output. # +# I don't think this file is even used. + +use lib qw'perllib'; + require 'phash.ph'; # diff --git a/syncfiles.pl b/syncfiles.pl index 95969231..c64cb672 100755 --- a/syncfiles.pl +++ b/syncfiles.pl @@ -1,15 +1,20 @@ -#!/usr/bin/perl +#!/usr/bin/env perl # # Sync the output file list between Makefiles # Use the mkdep.pl parameters to get the filename syntax # # The first file is the source file; the other ones target. # -%def_hints = ('object-ending' => '.o', - 'path-separator' => '/', - 'continuation' => "\\"); +use strict; +use warnings; -sub do_transform($$) { +our %def_hints = qw{ + object-ending .o + path-separator / + continuation \\ +}; + +sub do_transform($\%) { my($l, $h) = @_; my($ps) = $$h{'path-separator'}; @@ -27,18 +32,19 @@ sub do_transform($$) { return $l; } -@file_list = (); -$first = 1; -$first_file = $ARGV[0]; +our @file_list; + +my $first = 1; +my $first_file = $ARGV[0]; die unless (defined($first_file)); -foreach $file (@ARGV) { - open(FILE, "< $file\0") or die; +for my $filename (@ARGV) { + open( FILE, '<', $filename ) or die; # First, read the syntax hints - %hints = %def_hints; - while (defined($line = )) { + my %hints = %def_hints; + while( my $line = ){ if ( $line =~ /^\s*\#\s*@([a-z0-9-]+):\s*\"([^\"]*)\"/ ) { $hints{$1} = $2; } @@ -46,9 +52,9 @@ foreach $file (@ARGV) { # Read and process the file seek(FILE,0,0); - @lines = (); - $processing = 0; - while (defined($line = )) { + my @lines; + my $processing = 0; + while( my $line = ){ chomp $line; if ($processing) { if ($line eq '#-- End File Lists --#') { @@ -72,8 +78,8 @@ foreach $file (@ARGV) { $processing = 1; if (!$first) { push(@lines, "# Edit in $first_file, not here!\n"); - foreach $l (@file_list) { - push(@lines, do_transform($l, \%hints)."\n"); + for my $l (@file_list) { + push(@lines, do_transform($l, %hints)."\n"); } } } @@ -83,11 +89,10 @@ foreach $file (@ARGV) { # Write the file back out if (!$first) { - open(FILE, "> $file\0") or die; + open( FILE, '>', $filename ) or die; print FILE @lines; close(FILE); } - undef @lines; $first = 0; } -- 2.11.4.GIT