egra: cosmetix, added brief comments for most interesting widget methods
[iv.d.git] / hash / test_synth_speed.d
blobf0705f299be780be0905d674067e11d3f302c4df
1 import iv.hash.fasthash;
2 import iv.hash.joaat;
3 import iv.hash.murhash;
5 import iv.pxclock;
6 import iv.vfs.io;
8 // /tmp/bigtext/dump.sql
9 // _00/zwords.txt
12 // ////////////////////////////////////////////////////////////////////////// //
13 string[] words;
15 void loadWords () {
16 foreach (string s; VFile("_00/zwords.txt").byLineCopy) words ~= s;
20 // ////////////////////////////////////////////////////////////////////////// //
21 uint sfHash (const(char)[] str) {
22 return SFhashOf(str.ptr, str.length);
26 usize SFhashOf( const (void)* buf, usize len, usize seed = 0 ) @trusted pure nothrow @nogc
29 * This is Paul Hsieh's SuperFastHash algorithm, described here:
30 * http://www.azillionmonkeys.com/qed/hash.html
31 * It is protected by the following open source license:
32 * http://www.azillionmonkeys.com/qed/weblicense.html
34 static uint get16bits( const (ubyte)* x ) pure nothrow
36 // CTFE doesn't support casting ubyte* -> ushort*, so revert to
37 // per-byte access when in CTFE.
38 version( HasUnalignedOps )
40 if (!__ctfe)
41 return *cast(ushort*) x;
44 return ((cast(uint) x[1]) << 8) + (cast(uint) x[0]);
47 // NOTE: SuperFastHash normally starts with a zero hash value. The seed
48 // value was incorporated to allow chaining.
49 auto data = cast(const (ubyte)*) buf;
50 auto hash = seed;
51 int rem;
53 if( len <= 0 || data is null )
54 return 0;
56 rem = len & 3;
57 len >>= 2;
59 for( ; len > 0; len-- )
61 hash += get16bits( data );
62 auto tmp = (get16bits( data + 2 ) << 11) ^ hash;
63 hash = (hash << 16) ^ tmp;
64 data += 2 * ushort.sizeof;
65 hash += hash >> 11;
68 switch( rem )
70 case 3: hash += get16bits( data );
71 hash ^= hash << 16;
72 hash ^= data[ushort.sizeof] << 18;
73 hash += hash >> 11;
74 break;
75 case 2: hash += get16bits( data );
76 hash ^= hash << 11;
77 hash += hash >> 17;
78 break;
79 case 1: hash += *data;
80 hash ^= hash << 10;
81 hash += hash >> 1;
82 break;
83 default:
84 break;
87 /* Force "avalanching" of final 127 bits */
88 hash ^= hash << 3;
89 hash += hash >> 5;
90 hash ^= hash << 4;
91 hash += hash >> 17;
92 hash ^= hash << 25;
93 hash += hash >> 6;
95 return hash;
99 // ////////////////////////////////////////////////////////////////////////// //
100 uint murHashX (const(char)[] str, usize seed=0) {
101 return MurHashOf(str, seed);
105 usize MurHashOf (const(char)[] data, usize seed=0) pure nothrow @trusted @nogc {
106 enum C1 = 0xcc9e2d51u;
107 enum C2 = 0x1b873593u;
109 uint hash = seed; // current value
110 uint accum; // we operate 32-bit chunks; low 2 bits of accum used as counter
111 ubyte n;
112 uint k1;
114 // process data blocks
115 if (data.length) {
116 auto bytes = cast(const(ubyte)*)data.ptr;
117 auto len = data.length;
118 // process 32-bit chunks
119 foreach (immutable _; 0..len/4) {
120 version(LittleEndian) {
121 if (__ctfe) {
122 k1 = (bytes[0])|(bytes[1]<<8)|(bytes[2]<<16)|(bytes[3]<<24);
123 } else {
124 k1 = *cast(const(uint)*)bytes;
126 } else {
127 if (__ctfe) {
128 k1 = (bytes[3])|(bytes[2]<<8)|(bytes[1]<<16)|(bytes[0]<<24);
129 } else {
130 import core.bitop : bswap;
131 k1 = bswap(*cast(const(uint)*)bytes);
134 bytes += 4;
135 k1 *= C1;
136 k1 = (k1<<15)|(k1>>(32-15));
137 k1 *= C2;
138 hash ^= k1;
139 hash = (hash<<13)|(hash>>(32-13));
140 hash = hash*5+0xe6546b64;
142 // advance over whole 32-bit chunks, possibly leaving 1..3 bytes
143 len &= 0x03;
144 n = cast(ubyte)len;
145 // append any remaining bytes into carry
146 while (len--) accum = (accum>>8)|(*bytes++<<24);
149 // finalize a hash
150 if (n) {
151 k1 = accum>>(4-n)*8;
152 k1 *= C1;
153 k1 = (k1<<15)|(k1>>(32-15));
154 k1 *= C2;
155 hash ^= k1;
157 hash ^= cast(uint)data.length;
158 // fmix
159 hash ^= hash>>16;
160 hash *= 0x85ebca6bu;
161 hash ^= hash>>13;
162 hash *= 0xc2b2ae35u;
163 hash ^= hash>>16;
164 return hash;
169 * MurmurHash3 was written by Austin Appleby, and is placed in the public domain.
171 * Params:
172 * buf = data buffer
173 * seed = the seed
175 * Returns:
176 * 32-bit hash
178 uint murHash32 (const(char)[] data, uint seed=0) pure nothrow @trusted @nogc {
179 enum C1 = 0xcc9e2d51u;
180 enum C2 = 0x1b873593u;
182 uint hash = seed; // current value
183 uint k1;
185 // process data blocks
186 if (data.length) {
187 auto bytes = cast(const(ubyte)*)data.ptr;
188 auto len = data.length;
189 // process 32-bit chunks
190 foreach (immutable _; 0..len/4) {
191 version(LittleEndian) {
192 if (__ctfe) {
193 k1 = (bytes[0])|(bytes[1]<<8)|(bytes[2]<<16)|(bytes[3]<<24);
194 } else {
195 k1 = *cast(const(uint)*)bytes;
197 } else {
198 if (__ctfe) {
199 k1 = (bytes[3])|(bytes[2]<<8)|(bytes[1]<<16)|(bytes[0]<<24);
200 } else {
201 import core.bitop : bswap;
202 k1 = bswap(*cast(const(uint)*)bytes);
205 bytes += 4;
206 k1 *= C1;
207 k1 = (k1<<15)|(k1>>(32-15));
208 k1 *= C2;
209 hash ^= k1;
210 hash = (hash<<13)|(hash>>(32-13));
211 hash = hash*5+0xe6546b64;
213 // advance over whole 32-bit chunks, possibly leaving 1..3 bytes
214 if ((len &= 0x03) != 0) {
215 immutable ubyte n = cast(ubyte)len;
216 // append any remaining bytes into carry
217 uint accum = 0;
218 while (len--) accum = (accum>>8)|(*bytes++<<24);
219 // finalize a hash
220 k1 = accum>>((4-n)*8);
221 k1 *= C1;
222 k1 = (k1<<15)|(k1>>(32-15));
223 k1 *= C2;
224 hash ^= k1;
226 hash ^= cast(uint)data.length;
229 // fmix
230 hash ^= hash>>16;
231 hash *= 0x85ebca6bu;
232 hash ^= hash>>13;
233 hash *= 0xc2b2ae35u;
234 hash ^= hash>>16;
236 return hash;
241 usize bytesHash3(const(char)[] buf, usize seed=0) @system nothrow @nogc {
242 static uint rotl32(uint n)(in uint x) pure nothrow @safe @nogc
244 return (x << n) | (x >> (32 - n));
247 //-----------------------------------------------------------------------------
248 // Block read - if your platform needs to do endian-swapping or can only
249 // handle aligned reads, do the conversion here
250 static uint get32bits(const (ubyte)* x) pure nothrow @nogc
252 //Compiler can optimize this code to simple *cast(uint*)x if it possible.
253 version(HasUnalignedOps)
255 if (!__ctfe)
256 return *cast(uint*)x; //BUG: Can't be inlined by DMD
258 version(BigEndian)
260 return ((cast(uint) x[0]) << 24) | ((cast(uint) x[1]) << 16) | ((cast(uint) x[2]) << 8) | (cast(uint) x[3]);
262 else
264 return ((cast(uint) x[3]) << 24) | ((cast(uint) x[2]) << 16) | ((cast(uint) x[1]) << 8) | (cast(uint) x[0]);
268 //-----------------------------------------------------------------------------
269 // Finalization mix - force all bits of a hash block to avalanche
270 static uint fmix32(uint h) pure nothrow @safe @nogc
272 h ^= h >> 16;
273 h *= 0x85ebca6b;
274 h ^= h >> 13;
275 h *= 0xc2b2ae35;
276 h ^= h >> 16;
278 return h;
281 auto len = buf.length;
282 auto data = cast(const(ubyte)*)buf.ptr;
283 auto nblocks = len / 4;
285 uint h1 = cast(uint)seed;
287 enum uint c1 = 0xcc9e2d51;
288 enum uint c2 = 0x1b873593;
289 enum uint c3 = 0xe6546b64;
291 //----------
292 // body
293 auto end_data = data+nblocks*uint.sizeof;
294 for(; data!=end_data; data += uint.sizeof)
296 uint k1 = get32bits(data);
297 k1 *= c1;
298 k1 = rotl32!15(k1);
299 k1 *= c2;
301 h1 ^= k1;
302 h1 = rotl32!13(h1);
303 h1 = h1*5+c3;
306 //----------
307 // tail
308 uint k1 = 0;
310 switch(len & 3)
312 case 3: k1 ^= data[2] << 16; goto case;
313 case 2: k1 ^= data[1] << 8; goto case;
314 case 1: k1 ^= data[0];
315 k1 *= c1; k1 = rotl32!15(k1); k1 *= c2; h1 ^= k1;
316 goto default;
317 default:
320 //----------
321 // finalization
322 h1 ^= len;
323 h1 = fmix32(h1);
324 return h1;
328 // ////////////////////////////////////////////////////////////////////////// //
330 * 64-bit implementation of fasthash
332 * Params:
333 * buf = data buffer
334 * seed = the seed
336 * Returns:
337 * 32-bit or 64-bit hash
339 usize FFhashOf (const(void)[] buf, usize seed=0) /*pure*/ nothrow @trusted @nogc {
340 enum Get8Bytes = q{
341 cast(ulong)data[0]|
342 (cast(ulong)data[1]<<8)|
343 (cast(ulong)data[2]<<16)|
344 (cast(ulong)data[3]<<24)|
345 (cast(ulong)data[4]<<32)|
346 (cast(ulong)data[5]<<40)|
347 (cast(ulong)data[6]<<48)|
348 (cast(ulong)data[7]<<56)
350 enum m = 0x880355f21e6d1965UL;
351 auto data = cast(const(ubyte)*)buf;
352 immutable len = buf.length;
353 ulong h = seed;
354 ulong t;
355 foreach (immutable _; 0..len/8) {
356 version(HasUnalignedOps) {
357 if (__ctfe) {
358 t = mixin(Get8Bytes);
359 } else {
360 t = *cast(ulong*)data;
362 } else {
363 t = mixin(Get8Bytes);
365 data += 8;
366 t ^= t>>23;
367 t *= 0x2127599bf4325c37UL;
368 t ^= t>>47;
369 h ^= t;
370 h *= m;
373 h ^= len*m;
374 t = 0;
375 switch (len&7) {
376 case 7: t ^= cast(ulong)data[6]<<48; goto case 6;
377 case 6: t ^= cast(ulong)data[5]<<40; goto case 5;
378 case 5: t ^= cast(ulong)data[4]<<32; goto case 4;
379 case 4: t ^= cast(ulong)data[3]<<24; goto case 3;
380 case 3: t ^= cast(ulong)data[2]<<16; goto case 2;
381 case 2: t ^= cast(ulong)data[1]<<8; goto case 1;
382 case 1: t ^= cast(ulong)data[0]; goto default;
383 default:
384 t ^= t>>23;
385 t *= 0x2127599bf4325c37UL;
386 t ^= t>>47;
387 h ^= t;
388 h *= m;
389 break;
392 h ^= h>>23;
393 h *= 0x2127599bf4325c37UL;
394 h ^= h>>47;
395 static if (usize.sizeof == 4) {
396 // 32-bit hash
397 // the following trick converts the 64-bit hashcode to Fermat
398 // residue, which shall retain information from both the higher
399 // and lower parts of hashcode.
400 return cast(usize)(h-(h>>32));
401 } else {
402 return h;
407 // ////////////////////////////////////////////////////////////////////////// //
408 void doHash(T) () if (is(T == struct)) {
409 foreach (string s; words) {
410 T hash;
411 hash.put(s);
412 hash.finish32();
417 // ////////////////////////////////////////////////////////////////////////// //
418 void doTest(T) () if (is(T == struct)) {
419 enum Tries = 1000;
420 write(T.stringof, ": ");
421 auto stt = clockMilli();
422 foreach (immutable _; 0..Tries) doHash!T();
423 auto ett = clockMilli()-stt;
424 writeln(words.length*Tries, " (", words.length, ", ", Tries, " times) took ", ett, " milliseconds");
428 // ////////////////////////////////////////////////////////////////////////// //
429 void doTestX(alias fn) () {
430 enum Tries = 1000;
431 write((&fn).stringof[2..$], ": ");
432 auto stt = clockMilli();
433 foreach (immutable _; 0..Tries) {
434 foreach (string s; words) cast(void)fn(s);
436 auto ett = clockMilli()-stt;
437 writeln(words.length*Tries, " (", words.length, ", ", Tries, " times) took ", ett, " milliseconds");
441 // ////////////////////////////////////////////////////////////////////////// //
442 static assert(murHashX("Hello, world!", 1234) == 0xfaf6cdb3U);
443 static assert(murHashX("Hello, world!", 4321) == 0xbf505788U);
444 static assert(murHashX("xxxxxxxxxxxxxxxxxxxxxxxxxxxx", 1234) == 0x8905ac28U);
445 static assert(murHashX("", 1234) == 0x0f2cc00bU);
447 static assert(bytesHash3("Hello, world!", 1234) == 0xfaf6cdb3U);
448 static assert(bytesHash3("Hello, world!", 4321) == 0xbf505788U);
449 static assert(bytesHash3("xxxxxxxxxxxxxxxxxxxxxxxxxxxx", 1234) == 0x8905ac28U);
450 static assert(bytesHash3("", 1234) == 0x0f2cc00bU);
453 pragma(msg, murHashX("Sample string"));
454 pragma(msg, murHashX("Alice & Miriel"));
455 pragma(msg, murHash32("Sample string"));
456 pragma(msg, murHash32("Alice & Miriel"));
459 // ////////////////////////////////////////////////////////////////////////// //
460 void main () {
461 assert(object.hashOf("Hello, world!", 1234) == 0xfaf6cdb3U);
462 //writeln(object.hashOf("Hello, world!", 1234) == 0xfaf6cdb3U);
463 loadWords();
464 doTest!JoaatHash();
465 doTest!FastHash();
466 doTest!MurHash();
467 doTestX!sfHash();
468 doTestX!murHashX();
469 doTestX!murHash32();
470 doTestX!bytesHash3();
471 doTestX!FFhashOf();
473 foreach (string w; words) {
474 if (murHashX(w) != murHash32(w)) assert(0, "shit!");
477 writeln("checking murhashes...");
478 char[1024*8] buf = void;
479 foreach (immutable _; 0..100000) {
480 import std.random;
481 auto len = uniform!"[]"(0, buf.length);
482 foreach (ref char c; buf[0..len]) c = cast(char)(uniform!"[]"(0, 255));
483 if (murHashX(buf[0..len]) != murHash32(buf[0..len])) assert(0, "shit!");