From 1df0b9ee2d7f87e5e194343227cfe9cdcc41ac12 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Sun, 25 May 2008 18:44:44 -0700 Subject: [PATCH] phash: canonicalize order, fix handling of ignored duplicates Canonicalize the order of the prehash entries, so we don't have to worry about looking up both pairs of edges. When we find a collision that we decide to ignore, there is no point in adding the same edge into the array again; instead, just skip the current edge. --- perllib/phash.ph | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/perllib/phash.ph b/perllib/phash.ph index 4efc6519..8feb6b70 100644 --- a/perllib/phash.ph +++ b/perllib/phash.ph @@ -79,11 +79,14 @@ sub gen_hash_n($$$$) { %edges = (); foreach $k (@keys) { my ($pf1, $pf2) = prehash($k, $n, $sv); + ($pf1,$pf2) = ($pf2,$pf1) if ($pf1 > $pf2); # Canonicalize order + my $pf = "$pf1,$pf2"; my $e = ${$href}{$k}; my $xkey; - if (defined($xkey = $edges{$pf}) && ${$href}{$xkey} != $e) { + if (defined($xkey = $edges{$pf})) { + next if ($e == ${$href}{$xkey}); # Duplicate hash, safe to ignore if (defined($run)) { print STDERR "$run: Collision: $pf: $k with $xkey\n"; } -- 2.11.4.GIT