3 # Perfect Minimal Hash Generator written in Perl, which produces
7 require 'random_sv_vectors.ph';
11 # Compute the prehash for a key
16 my($key, $n, $sv) = @_;
17 my @c = crc64($sv, $key);
19 # Create a bipartite graph...
20 $k1 = (($c[1] & ($n-1)) << 1) + 0; # low word
21 $k2 = (($c[0] & ($n-1)) << 1) + 1; # high word
27 # Walk the assignment graph, return true on success
29 sub walk_graph($$$$) {
30 my($nodeval,$nodeneigh,$n,$v) = @_;
33 # print STDERR "Vertex $n value $v\n";
36 foreach $nx (@{$$nodeneigh[$n]}) {
37 # $nx -> [neigh, hash]
40 # print STDERR "Edge $n,$o value $e: ";
42 if (defined($ov = $$nodeval[$o])) {
44 # Cyclic graph with collision
45 # print STDERR "error, should be ", $v+$ov, "\n";
48 # print STDERR "ok\n";
51 return 0 unless (walk_graph($nodeval, $nodeneigh, $o, $e-$v));
58 # Generate the function assuming a given N.
60 # gen_hash_n(N, sv, \%data, run)
62 sub gen_hash_n($$$$) {
63 my($n, $sv, $href, $run) = @_;
64 my @keys = keys(%{$href});
73 for ($i = 0; $i < $gsize; $i++) {
79 my ($pf1, $pf2) = prehash($k, $n, $sv);
80 ($pf1,$pf2) = ($pf2,$pf1) if ($pf1 > $pf2); # Canonicalize order
86 if (defined($xkey = $edges{$pf})) {
87 next if ($e == ${$href}{$xkey}); # Duplicate hash, safe to ignore
89 print STDERR "$run: Collision: $pf: $k with $xkey\n";
94 # print STDERR "Edge $pf value $e from $k\n";
97 push(@{$nodeneigh[$pf1]}, [$pf2, $e]);
98 push(@{$nodeneigh[$pf2]}, [$pf1, $e]);
101 # Now we need to assign values to each vertex, so that for each
102 # edge, the sum of the values for the two vertices give the value
103 # for the edge (which is our hash index.) If we find an impossible
104 # sitation, the graph was cyclic.
105 @nodeval = (undef) x $gsize;
107 for ($i = 0; $i < $gsize; $i++) {
108 if (scalar(@{$nodeneigh[$i]})) {
109 # This vertex has neighbors (is used)
110 if (!defined($nodeval[$i])) {
111 # First vertex in a cluster
112 unless (walk_graph(\@nodeval, \@nodeneigh, $i, 0)) {
114 print STDERR "$run: Graph is cyclic\n";
122 # for ($i = 0; $i < $n; $i++) {
123 # print STDERR "Vertex ", $i, ": ", $g[$i], "\n";
127 printf STDERR "$run: Done: n = $n, sv = [0x%08x, 0x%08x]\n",
131 return ($n, $sv, \@nodeval);
135 # Driver for generating the function
137 # gen_perfect_hash(\%data)
139 sub gen_perfect_hash($) {
141 my @keys = keys(%{$href});
143 my $n, $i, $j, $sv, $maxj;
146 # Minimal power of 2 value for N with enough wiggle room.
147 # The scaling constant must be larger than 0.5 in order for the
148 # algorithm to ever terminate.
149 my $room = scalar(@keys)*0.8;
155 # Number of times to try...
156 $maxj = scalar @random_sv_vectors;
158 for ($i = 0; $i < 4; $i++) {
159 printf STDERR "%d vectors, trying n = %d...\n",
161 for ($j = 0; $j < $maxj; $j++) {
162 $sv = $random_sv_vectors[$j];
163 @hashinfo = gen_hash_n($n, $sv, $href, $run++);
164 return @hashinfo if (@hashinfo);
173 # Verify that the hash table is actually correct...
175 sub verify_hash_table($$)
177 my ($href, $hashinfo) = @_;
178 my ($n, $sv, $g) = @{$hashinfo};
182 foreach $k (keys(%$href)) {
183 my ($pf1, $pf2) = prehash($k, $n, $sv);
184 my $g1 = ${$g}[$pf1];
185 my $g2 = ${$g}[$pf2];
187 if ($g1+$g2 != ${$href}{$k}) {
188 printf STDERR "%s(%d,%d): %d+%d = %d != %d\n",
189 $k, $pf1, $pf2, $g1, $g2, $g1+$g2, ${$href}{$k};
192 # printf STDERR "%s: %d+%d = %d ok\n",
193 # $k, $g1, $g2, $g1+$g2;
197 die "$0: hash validation error\n" if ($err);