Added POD tests and a Perl::Critic test
[nasm/perl-rewrite.git] / perl / lib / phash.pm
blobe86b89abfa18ab73e8259bfdb8cb701c79560d9e
1 =head1 NAME
3 phash
5 =head1 DESCRIPTION
7 Perfect Minimal Hash Generator written in Perl, which produces
8 C output.
10 Requires the CPAN Graph module (tested against 0.81, 0.83, 0.84)
13 =cut
15 package phash;
16 use strict;
17 use warnings;
19 use random_sv_vectors;
20 use Nasm::crc64;
22 use base 'Exporter';
24 our @EXPORT = qw{
25 prehash
26 walk_graph
27 gen_hash_n
28 gen_perfect_hash
29 read_input
30 verify_hash_table
34 ## no critic
35 sub prehash($$\@);
36 sub walk_graph(\@\@$$);
37 sub gen_hash_n($\@\%$);
38 sub gen_perfect_hash(\%);
39 sub read_input();
40 sub verify_hash_table(\%\@);
41 ## use critic
43 =head2 prehash( $key, $N, @sv )
45 Compute the prehash for a key
47 =cut
48 ## no critic
49 sub prehash($$\@) {
50 ## use critic
51 my($key, $n, $sv) = @_;
52 my @c = crc64(@$sv, $key);
54 # Create a bipartite graph...
55 my $low_word = (($c[1] & ($n-1)) << 1) + 0; # low word
56 my $high_word = (($c[0] & ($n-1)) << 1) + 1; # high word
58 return ($low_word, $high_word);
61 =head2 walk_graph( @node_val, @node_neighbor, $n, $v )
63 Walk the assignment graph, return true on success
65 =cut
66 ## no critic
67 sub walk_graph(\@\@$$) {
68 ## use critic
69 my($node_val,$node_neighbor,$n,$v) = @_;
71 # print STDERR "Vertex $n value $v\n";
72 $node_val->[$n] = $v;
74 for my $nx ( @{$node_neighbor->[$n]} ){
75 # $nx -> [neigh, hash]
76 my ($o, $e) = @$nx;
78 # print STDERR "Edge $n,$o value $e: ";
79 my $ov = $node_val->[$o];
80 if( defined($ov) ){
81 if ($v+$ov != $e) {
82 # Cyclic graph with collision
83 # print STDERR "error, should be ", $v+$ov, "\n";
84 return 0;
85 } else {
86 # print STDERR "ok\n";
88 }elsif( not walk_graph( @$node_val, @$node_neighbor, $o, $e-$v )){
89 return 0;
92 return 1;
95 =head2 gen_hash_n( $N, @sv, %data, $run )
97 Generate the function assuming a given N.
99 =cut
100 ## no critic
101 sub gen_hash_n($\@\%$) {
102 ## use critic
103 my($n, $sv, $href, $run) = @_;
104 my @keys = keys(%{$href});
105 my $gsize = 2*$n;
106 my @node_val;
107 my @node_neighbor;
108 my %edges;
110 for( my $i = 0; $i < $gsize; $i++ ){
111 $node_neighbor[$i] = [];
114 %edges = ();
115 for my $key( @keys ){
116 my ($pf1, $pf2) = prehash($key, $n, @$sv);
117 ($pf1,$pf2) = ($pf2,$pf1) if ($pf1 > $pf2); # Canonicalize order
119 my $pf = "$pf1,$pf2";
120 my $e = $href->{$key};
121 my $xkey;
123 if (defined($xkey = $edges{$pf})) {
124 next if ($e == $href->{$xkey}); # Duplicate hash, safe to ignore
125 if (defined($run)) {
126 print STDERR "$run: Collision: $pf: $key with $xkey\n";
128 return;
131 # print STDERR "Edge $pf value $e from $k\n";
133 $edges{$pf} = $key;
134 push(@{$node_neighbor[$pf1]}, [$pf2, $e]);
135 push(@{$node_neighbor[$pf2]}, [$pf1, $e]);
138 # Now we need to assign values to each vertex, so that for each
139 # edge, the sum of the values for the two vertices give the value
140 # for the edge (which is our hash index.) If we find an impossible
141 # sitation, the graph was cyclic.
142 @node_val = (undef) x $gsize;
144 for( my $i = 0; $i < $gsize; $i++ ){
145 if (scalar(@{$node_neighbor[$i]})) {
146 # This vertex has neighbors (is used)
147 if (!defined($node_val[$i])) {
148 # First vertex in a cluster
149 unless (walk_graph( @node_val, @node_neighbor, $i, 0)) {
150 if (defined($run)) {
151 print STDERR "$run: Graph is cyclic\n";
153 return;
159 # for ($i = 0; $i < $n; $i++) {
160 # print STDERR "Vertex ", $i, ": ", $g[$i], "\n";
163 if (defined($run)) {
164 printf STDERR "$run: Done: n = $n, sv = [0x%08x, 0x%08x]\n",
165 $$sv[0], $$sv[1];
168 return ($n, $sv, \@node_val);
171 =head2 gen_perfect_hash( %data )
173 Driver for generating the function
175 =cut
176 ## no critic
177 sub gen_perfect_hash(\%) {
178 ## use critic
179 my($href) = @_;
180 my @keys = keys(%{$href});
181 #my @hashinfo;
182 my( $n, $i, $j, $sv, $maxj );
183 my $run = 1;
185 # Minimal power of 2 value for N with enough wiggle room.
186 # The scaling constant must be larger than 0.5 in order for the
187 # algorithm to ever terminate.
188 my $room = scalar(@keys)*0.8;
189 $n = 1;
190 while ($n < $room) {
191 $n <<= 1;
194 # Number of times to try...
195 $maxj = scalar @random_sv_vectors;
197 for ($i = 0; $i < 4; $i++) {
198 printf STDERR "%d vectors, trying n = %d...\n",
199 scalar @keys, $n;
200 for ($j = 0; $j < $maxj; $j++) {
201 $sv = $random_sv_vectors[$j];
202 my @hashinfo = gen_hash_n($n, @$sv, %$href, $run++);
204 if( @hashinfo ){
205 verify_hash_table(%$href,@hashinfo);
206 return @hashinfo;
209 $n <<= 1;
212 die "no hash";
213 return;
216 =head2 read_input
218 Read input file
220 =cut
221 sub read_input() {
222 my %out;
223 my $x = 0;
225 while ( my $line = <STDIN> ){
226 $line =~ s/\s*(\#.*)?$//;
227 next unless $line;
229 if( $line =~ /^([^=]++)=([^=]++)$/ ){
230 $out{$1} = $x = $2;
231 } else {
232 $out{$line} = $x;
234 $x++;
237 return %out if wantarray;
238 return \%out;
241 =head2 verify_hash_table( %href, @hashinfo )
243 Verify that the hash table is actually correct...
245 =cut
246 ## no critic
247 sub verify_hash_table(\%\@){
248 ## use critic
249 my ($href, $hashinfo) = @_;
250 my ($n, $sv, $g) = @{$hashinfo};
251 my $err = 0;
253 for my $k (keys(%$href)) {
254 my ($pf1, $pf2) = prehash($k, $n, @$sv);
255 my $g1 = ${$g}[$pf1];
256 my $g2 = ${$g}[$pf2];
258 if ($g1+$g2 != ${$href}{$k}) {
259 printf STDERR "%s(%d,%d): %d+%d = %d != %d\n",
260 $k, $pf1, $pf2, $g1, $g2, $g1+$g2, ${$href}{$k};
261 $err = 1;
262 } else {
263 # printf STDERR "%s: %d+%d = %d ok\n",
264 # $k, $g1, $g2, $g1+$g2;
268 die "$0: hash validation error\n" if ($err);