egra: agg will respect clip rect now
[iv.d.git] / hash / murhash.d
blob39998853a1ff7239188f6ba605ee4bb1d7fd9f59
1 /*-----------------------------------------------------------------------------
2 * MurmurHash3 was written by Austin Appleby, and is placed in the public
3 * domain.
5 * This implementation was written by Shane Day, and is also public domain.
6 * Translated to D by Ketmar // Invisible Vector
8 * This is a D implementation of MurmurHash3_x86_32 (Murmur3A) with support
9 * for progressive processing.
11 module iv.hash.murhash /*is aliced*/;
12 import iv.alice;
14 /*-----------------------------------------------------------------------------
16 If you want to understand the MurmurHash algorithm you would be much better
17 off reading the original source. Just point your browser at:
18 http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
21 What this version provides?
23 Progressive data feeding. Useful when the entire payload to be hashed
24 does not fit in memory or when the data is streamed through the application.
25 Also useful when hashing a number of strings with a common prefix. A partial
26 hash of a prefix string can be generated and reused for each suffix string.
29 How does it work?
31 We can only process entire 32 bit chunks of input, except for the very end
32 that may be shorter. So along with the partial hash we need to give back to
33 the caller a carry containing up to 3 bytes that we were unable to process.
34 This carry also needs to record the number of bytes the carry holds. I use
35 the low 2 bits as a count (0..3) and the carry bytes are shifted into the
36 high byte in stream order.
38 To handle endianess I simply use a macro that reads a uint32_t and define
39 that macro to be a direct read on little endian machines, a read and swap
40 on big endian machines, or a byte-by-byte read if the endianess is unknown.
42 -----------------------------------------------------------------------------*/
44 public struct MurHash {
45 public:
46 enum has64bit = false;
47 enum has32bit = true;
49 private:
50 enum C1 = 0xcc9e2d51u;
51 enum C2 = 0x1b873593u;
53 private:
54 uint seed; // initial seed value; MUST BE FIRST
55 uint hash; // current value
56 uint accum; // we operate 32-bit chunks; low 2 bits of accum used as counter
57 uint totallen; // to match the original Murmur3A
59 public:
60 nothrow @trusted @nogc:
61 /// construct state with seed
62 this (uint aseed) pure { hash = seed = aseed; }
64 /// reset state
65 void reset () pure { accum = totallen = 0; hash = seed; }
67 /// reset state
68 void reset (uint aseed) pure { accum = totallen = 0; hash = seed = aseed; }
70 /// process data block
71 void put(T) (scope const(T)[] data...) if (T.sizeof == 1) {
72 if (data.length == 0) return; // nothing to do
73 auto bytes = cast(const(ubyte)*)data.ptr;
74 auto len = data.length;
75 static if (len.sizeof > uint.sizeof) {
76 if (len > uint.max) assert(0, "MurHash: too much data");
78 if (uint.max-totallen < len) assert(0, "MurHash: too much data"); // overflow
79 totallen += len;
80 auto acc = accum;
81 auto hh = hash;
82 // extract carry count from low 2 bits of accum value
83 ubyte n = acc&3;
84 // consume any carry bytes
85 ubyte i = (4-n)&3;
86 if (i && i <= len) {
87 while (i--) {
88 acc = (acc>>8)|(*bytes++<<24);
89 --len;
90 if (++n == 4) {
91 n = 0;
92 acc *= C1;
93 acc = (acc<<15)|(acc>>(32-15));
94 acc *= C2;
95 hh ^= acc;
96 hh = (hh<<13)|(hh>>(32-13));
97 hh = hh*5+0xe6546b64;
101 // process 32-bit chunks
102 uint k1;
103 foreach (immutable _; 0..len/4) {
104 version(LittleEndian) {
105 if (__ctfe) {
106 k1 = (bytes[0])|(bytes[1]<<8)|(bytes[2]<<16)|(bytes[3]<<24);
107 } else {
108 k1 = *cast(const(uint)*)bytes;
110 } else {
111 if (__ctfe) {
112 k1 = (bytes[3])|(bytes[2]<<8)|(bytes[1]<<16)|(bytes[0]<<24);
113 } else {
114 import core.bitop : bswap;
115 k1 = bswap(*cast(const(uint)*)bytes);
118 bytes += 4;
119 k1 *= C1;
120 k1 = (k1<<15)|(k1>>(32-15));
121 k1 *= C2;
122 hh ^= k1;
123 hh = (hh<<13)|(hh>>(32-13));
124 hh = hh*5+0xe6546b64;
126 // advance over whole 32-bit chunks, possibly leaving 1..3 bytes
127 len -= len/4*4;
128 // append any remaining bytes into carry
129 while (len--) {
130 acc = (acc>>8)|(*bytes++<<24);
131 if (++n == 4) {
132 n = 0;
133 acc *= C1;
134 acc = (acc<<15)|(acc>>(32-15));
135 acc *= C2;
136 hh ^= acc;
137 hh = (hh<<13)|(hh>>(32-13));
138 hh = hh*5+0xe6546b64;
141 // store accum counter
142 acc = (acc&~0xff)|n;
143 // update state
144 hash = hh;
145 accum = acc;
148 /// finalize a hash (i.e. return current result).
149 /// note that you can continue putting data, as this is not destructive
150 @property uint result32 () const pure {
151 uint acc = accum;
152 uint hh = hash;
153 immutable n = acc&3;
154 if (n) {
155 uint k1 = acc>>(4-n)*8;
156 k1 *= C1;
157 k1 = (k1<<15)|(k1>>(32-15));
158 k1 *= C2;
159 hh ^= k1;
161 hh ^= totallen;
162 // fmix
163 hh ^= hh>>16;
164 hh *= 0x85ebca6bu;
165 hh ^= hh>>13;
166 hh *= 0xc2b2ae35u;
167 hh ^= hh>>16;
168 return hh;
171 uint finish32 () pure { auto res = result32; reset(); return res; } /// resets state
176 * 32-bit implementation of Murmur3
178 * Params:
179 * buf = data buffer
180 * seed = the seed
183 uint murHash32(T) (const(T)[] buf, uint seed=0) nothrow @trusted @nogc if (T.sizeof == 1) {
184 auto hh = MurHash(seed);
185 hh.put(buf);
186 return hh.result32;
192 * MurmurHash3 was written by Austin Appleby, and is placed in the public domain.
194 * Params:
195 * buf = data buffer
196 * seed = the seed
198 * Returns:
199 * 32-bit hash
201 uint murHash32(T) (const(T)[] data, uint seed=0) pure nothrow @trusted @nogc if (T.sizeof == 1) {
202 enum C1 = 0xcc9e2d51u;
203 enum C2 = 0x1b873593u;
205 uint hash = seed; // current value
206 uint k1;
208 // process data blocks
209 if (data.length) {
210 auto bytes = cast(const(ubyte)*)data.ptr;
211 auto len = data.length;
212 // process 32-bit chunks
213 foreach (immutable _; 0..len/4) {
214 version(LittleEndian) {
215 if (__ctfe) {
216 k1 = (bytes[0])|(bytes[1]<<8)|(bytes[2]<<16)|(bytes[3]<<24);
217 } else {
218 k1 = *cast(const(uint)*)bytes;
220 } else {
221 if (__ctfe) {
222 k1 = (bytes[3])|(bytes[2]<<8)|(bytes[1]<<16)|(bytes[0]<<24);
223 } else {
224 import core.bitop : bswap;
225 k1 = bswap(*cast(const(uint)*)bytes);
228 bytes += 4;
229 k1 *= C1;
230 k1 = (k1<<15)|(k1>>(32-15));
231 k1 *= C2;
232 hash ^= k1;
233 hash = (hash<<13)|(hash>>(32-13));
234 hash = hash*5+0xe6546b64;
236 // advance over whole 32-bit chunks, possibly leaving 1..3 bytes
237 if ((len &= 0x03) != 0) {
238 immutable ubyte n = cast(ubyte)len;
239 // append any remaining bytes into carry
240 uint accum = 0;
241 while (len--) accum = (accum>>8)|(*bytes++<<24);
242 // finalize a hash
243 k1 = accum>>((4-n)*8);
244 k1 *= C1;
245 k1 = (k1<<15)|(k1>>(32-15));
246 k1 *= C2;
247 hash ^= k1;
249 hash ^= cast(uint)data.length;
252 // fmix
253 hash ^= hash>>16;
254 hash *= 0x85ebca6bu;
255 hash ^= hash>>13;
256 hash *= 0xc2b2ae35u;
257 hash ^= hash>>16;
259 return hash;
264 * MurmurHash3 was written by Austin Appleby, and is placed in the public domain.
266 * Params:
267 * buf = data buffer
268 * seed = the seed
270 * Returns:
271 * 32-bit hash
273 uint murHash32(T) (const(T)[] data, uint seed=0) pure nothrow @trusted @nogc if (T.sizeof > 1) {
274 return murHash32((cast(const(ubyte)*)buf.ptr)[0..buf.length*T.sizeof]);
278 version(iv_hash_unittest) unittest {
279 // wow, we can do this in compile time!
280 static assert(murHash32("Alice & Miriel") == 0x295db5e7u);
282 static uint xmurhash32(T) (const(T)[] buf, uint seed=0) nothrow @trusted @nogc if (T.sizeof == 1) {
283 auto hh = MurHash(seed);
284 hh.put(buf);
285 return hh.result32;
287 static assert(xmurhash32("Alice & Miriel") == 0x295db5e7u);
291 import std.stdio;
292 writefln("0x%08xU", murHash32("Alice & Miriel"));
295 mixin(import("test.d"));
296 doTest!(32, "MurHash")("Alice & Miriel", 0x295db5e7u);
297 assert(murHash32("Alice & Miriel") == 0x295db5e7u);
298 assert(xmurhash32("Alice & Miriel") == 0x295db5e7u);