7 Perfect Minimal Hash Generator written in Perl, which produces
10 Requires the CPAN Graph module (tested against 0.81, 0.83, 0.84)
19 use random_sv_vectors
;
36 sub walk_graph
(\@\@
$$);
37 sub gen_hash_n
($\@\
%$);
38 sub gen_perfect_hash
(\
%);
40 sub verify_hash_table
(\
%\@
);
43 =head2 prehash( $key, $N, @sv )
45 Compute the prehash for a key
51 my($key, $n, $sv) = @_;
52 my @c = crc64
(@
$sv, $key);
54 # Create a bipartite graph...
55 my $low_word = (($c[1] & ($n-1)) << 1) + 0; # low word
56 my $high_word = (($c[0] & ($n-1)) << 1) + 1; # high word
58 return ($low_word, $high_word);
61 =head2 walk_graph( @node_val, @node_neighbor, $n, $v )
63 Walk the assignment graph, return true on success
67 sub walk_graph
(\@\@
$$) {
69 my($node_val,$node_neighbor,$n,$v) = @_;
71 # print STDERR "Vertex $n value $v\n";
74 for my $nx ( @
{$node_neighbor->[$n]} ){
75 # $nx -> [neigh, hash]
78 # print STDERR "Edge $n,$o value $e: ";
79 my $ov = $node_val->[$o];
82 # Cyclic graph with collision
83 # print STDERR "error, should be ", $v+$ov, "\n";
86 # print STDERR "ok\n";
88 }elsif( not walk_graph
( @
$node_val, @
$node_neighbor, $o, $e-$v )){
95 =head2 gen_hash_n( $N, @sv, %data, $run )
97 Generate the function assuming a given N.
101 sub gen_hash_n
($\@\
%$) {
103 my($n, $sv, $href, $run) = @_;
104 my @keys = keys(%{$href});
110 for( my $i = 0; $i < $gsize; $i++ ){
111 $node_neighbor[$i] = [];
115 for my $key( @keys ){
116 my ($pf1, $pf2) = prehash
($key, $n, @
$sv);
117 ($pf1,$pf2) = ($pf2,$pf1) if ($pf1 > $pf2); # Canonicalize order
119 my $pf = "$pf1,$pf2";
120 my $e = $href->{$key};
123 if (defined($xkey = $edges{$pf})) {
124 next if ($e == $href->{$xkey}); # Duplicate hash, safe to ignore
126 print STDERR
"$run: Collision: $pf: $key with $xkey\n";
131 # print STDERR "Edge $pf value $e from $k\n";
134 push(@
{$node_neighbor[$pf1]}, [$pf2, $e]);
135 push(@
{$node_neighbor[$pf2]}, [$pf1, $e]);
138 # Now we need to assign values to each vertex, so that for each
139 # edge, the sum of the values for the two vertices give the value
140 # for the edge (which is our hash index.) If we find an impossible
141 # sitation, the graph was cyclic.
142 @node_val = (undef) x
$gsize;
144 for( my $i = 0; $i < $gsize; $i++ ){
145 if (scalar(@
{$node_neighbor[$i]})) {
146 # This vertex has neighbors (is used)
147 if (!defined($node_val[$i])) {
148 # First vertex in a cluster
149 unless (walk_graph
( @node_val, @node_neighbor, $i, 0)) {
151 print STDERR
"$run: Graph is cyclic\n";
159 # for ($i = 0; $i < $n; $i++) {
160 # print STDERR "Vertex ", $i, ": ", $g[$i], "\n";
164 printf STDERR
"$run: Done: n = $n, sv = [0x%08x, 0x%08x]\n",
168 return ($n, $sv, \
@node_val);
171 =head2 gen_perfect_hash( %data )
173 Driver for generating the function
177 sub gen_perfect_hash
(\
%) {
180 my @keys = keys(%{$href});
182 my( $n, $i, $j, $sv, $maxj );
185 # Minimal power of 2 value for N with enough wiggle room.
186 # The scaling constant must be larger than 0.5 in order for the
187 # algorithm to ever terminate.
188 my $room = scalar(@keys)*0.8;
194 # Number of times to try...
195 $maxj = scalar @random_sv_vectors;
197 for ($i = 0; $i < 4; $i++) {
198 printf STDERR
"%d vectors, trying n = %d...\n",
200 for ($j = 0; $j < $maxj; $j++) {
201 $sv = $random_sv_vectors[$j];
202 my @hashinfo = gen_hash_n
($n, @
$sv, %$href, $run++);
205 verify_hash_table
(%$href,@hashinfo);
225 while ( my $line = <STDIN
> ){
226 $line =~ s/\s*(\#.*)?$//;
229 if( $line =~ /^([^=]++)=([^=]++)$/ ){
237 return %out if wantarray;
241 =head2 verify_hash_table( %href, @hashinfo )
243 Verify that the hash table is actually correct...
247 sub verify_hash_table
(\
%\@
){
249 my ($href, $hashinfo) = @_;
250 my ($n, $sv, $g) = @
{$hashinfo};
253 for my $k (keys(%$href)) {
254 my ($pf1, $pf2) = prehash
($k, $n, @
$sv);
255 my $g1 = ${$g}[$pf1];
256 my $g2 = ${$g}[$pf2];
258 if ($g1+$g2 != ${$href}{$k}) {
259 printf STDERR
"%s(%d,%d): %d+%d = %d != %d\n",
260 $k, $pf1, $pf2, $g1, $g2, $g1+$g2, ${$href}{$k};
263 # printf STDERR "%s: %d+%d = %d ok\n",
264 # $k, $g1, $g2, $g1+$g2;
268 die "$0: hash validation error\n" if ($err);