3 # Perfect Minimal Hash Generator written in Perl, which produces
6 # Requires the CPAN Graph module (tested against 0.81, 0.83, 0.84)
9 require 'random_sv_vectors.ph';
13 # Compute the prehash for a key
18 my($key, $n, $sv) = @_;
19 my @c = crc64($sv, $key);
21 # Create a bipartite graph...
22 $k1 = (($c[1] & ($n-1)) << 1) + 0; # low word
23 $k2 = (($c[0] & ($n-1)) << 1) + 1; # high word
29 # Walk the assignment graph, return true on success
31 sub walk_graph($$$$) {
32 my($nodeval,$nodeneigh,$n,$v) = @_;
35 # print STDERR "Vertex $n value $v\n";
38 foreach $nx (@{$$nodeneigh[$n]}) {
39 # $nx -> [neigh, hash]
42 # print STDERR "Edge $n,$o value $e: ";
44 if (defined($ov = $$nodeval[$o])) {
46 # Cyclic graph with collision
47 # print STDERR "error, should be ", $v+$ov, "\n";
50 # print STDERR "ok\n";
53 return 0 unless (walk_graph($nodeval, $nodeneigh, $o, $e-$v));
60 # Generate the function assuming a given N.
62 # gen_hash_n(N, sv, \%data, run)
64 sub gen_hash_n($$$$) {
65 my($n, $sv, $href, $run) = @_;
66 my @keys = keys(%{$href});
75 for ($i = 0; $i < $gsize; $i++) {
81 my ($pf1, $pf2) = prehash($k, $n, $sv);
82 ($pf1,$pf2) = ($pf2,$pf1) if ($pf1 > $pf2); # Canonicalize order
88 if (defined($xkey = $edges{$pf})) {
89 next if ($e == ${$href}{$xkey}); # Duplicate hash, safe to ignore
91 print STDERR "$run: Collision: $pf: $k with $xkey\n";
96 # print STDERR "Edge $pf value $e from $k\n";
99 push(@{$nodeneigh[$pf1]}, [$pf2, $e]);
100 push(@{$nodeneigh[$pf2]}, [$pf1, $e]);
103 # Now we need to assign values to each vertex, so that for each
104 # edge, the sum of the values for the two vertices give the value
105 # for the edge (which is our hash index.) If we find an impossible
106 # sitation, the graph was cyclic.
107 @nodeval = (undef) x $gsize;
109 for ($i = 0; $i < $gsize; $i++) {
110 if (scalar(@{$nodeneigh[$i]})) {
111 # This vertex has neighbors (is used)
112 if (!defined($nodeval[$i])) {
113 # First vertex in a cluster
114 unless (walk_graph(\@nodeval, \@nodeneigh, $i, 0)) {
116 print STDERR "$run: Graph is cyclic\n";
124 # for ($i = 0; $i < $n; $i++) {
125 # print STDERR "Vertex ", $i, ": ", $g[$i], "\n";
129 printf STDERR "$run: Done: n = $n, sv = [0x%08x, 0x%08x]\n",
133 return ($n, $sv, \@nodeval);
137 # Driver for generating the function
139 # gen_perfect_hash(\%data)
141 sub gen_perfect_hash($) {
143 my @keys = keys(%{$href});
145 my $n, $i, $j, $sv, $maxj;
148 # Minimal power of 2 value for N with enough wiggle room.
149 # The scaling constant must be larger than 0.5 in order for the
150 # algorithm to ever terminate.
151 my $room = scalar(@keys)*0.8;
157 # Number of times to try...
158 $maxj = scalar @random_sv_vectors;
160 for ($i = 0; $i < 4; $i++) {
161 printf STDERR "%d vectors, trying n = %d...\n",
163 for ($j = 0; $j < $maxj; $j++) {
164 $sv = $random_sv_vectors[$j];
165 @hashinfo = gen_hash_n($n, $sv, $href, $run++);
166 return @hashinfo if (defined(@hashinfo));
182 while (defined($l = <STDIN>)) {
184 $l =~ s/\s*(\#.*|)$//;
188 if ($l =~ /^([^=]+)\=([^=]+)$/) {
201 # Verify that the hash table is actually correct...
203 sub verify_hash_table($$)
205 my ($href, $hashinfo) = @_;
206 my ($n, $sv, $g) = @{$hashinfo};
210 foreach $k (keys(%$href)) {
211 my ($pf1, $pf2) = prehash($k, $n, $sv);
212 my $g1 = ${$g}[$pf1];
213 my $g2 = ${$g}[$pf2];
215 if ($g1+$g2 != ${$href}{$k}) {
216 printf STDERR "%s(%d,%d): %d+%d = %d != %d\n",
217 $k, $pf1, $pf2, $g1, $g2, $g1+$g2, ${$href}{$k};
220 # printf STDERR "%s: %d+%d = %d ok\n",
221 # $k, $g1, $g2, $g1+$g2;
225 die "$0: hash validation error\n" if ($err);