Another Murmur3 implementation - 128 bit output, 64 bit platform version
/// <summary> /// Taken from here - http://blog.teamleadnet.com/2012/08/murmurhash3-ultra-fast-hash-algorithm.html /// but corrected the code and made the class re-usable, the original will only work on the first call /// There's another version here: http://pastebin.com/aP8aRRHK /// Corrected the code using code from https://github.com/darrenkopp/murmurhash-net/blob/master/MurmurHash/Unmanaged/Murmur128UnmanagedX64.cs /// In ProcessBytes both h1 and h2 should be set to the seed /// 128 bit output, 64 bit platform version /// </summary> internal class Murmur3 { // 128 bit output, 64 bit platform version private static readonly ulong READ_SIZE = 16; private static readonly ulong C1 = 0x87c37b91114253d5L; private static readonly ulong C2 = 0x4cf5ad432745937fL; private ulong length; private uint seed = 0; // if want to start with a seed, create a constructor private ulong h1; private ulong h2; /// <summary> /// Murmur3 constructor /// </summary> /// <param name="seed"></param> public Murmur3(uint seed = 0) { this.seed = seed; } /// <summary> /// Compute a hash from an input byte array /// </summary> /// <param name="input"></param> /// <returns></returns> public byte[] ComputeHash(byte[] input) { ProcessBytes(input); return Hash; } /// <summary> /// Create a string hash from an input string /// </summary> /// <param name="input"></param> /// <returns></returns> public string ComputeHash(string input) { Byte[] inBytes = Encoding.UTF8.GetBytes(input); Byte[] hash = this.ComputeHash(inBytes); string output = Convert.ToBase64String(hash); return output.TrimEnd('='); // There can be up to 2 trailing '=' characters which are just for padding (Not required for a hash) } #region Private Methods /// <summary> /// /// </summary> private byte[] Hash { get { h1 ^= length; h2 ^= length; h1 += h2; h2 += h1; h1 = Murmur3.MixFinal(h1); h2 = Murmur3.MixFinal(h2); h1 += h2; h2 += h1; var hash = new byte[Murmur3.READ_SIZE]; Array.Copy(BitConverter.GetBytes(h1), 0, hash, 0, 8); Array.Copy(BitConverter.GetBytes(h2), 0, hash, 8, 8); return hash; } } private void MixBody(ulong k1, ulong k2) { h1 ^= MixKey1(k1); h1 = h1.RotateLeft(27); h1 += h2; h1 = h1*5 + 0x52dce729; h2 ^= MixKey2(k2); h2 = h2.RotateLeft(31); h2 += h1; h2 = h2*5 + 0x38495ab5; } private static ulong MixKey1(ulong k1) { k1 *= C1; k1 = k1.RotateLeft(31); k1 *= C2; return k1; } private static ulong MixKey2(ulong k2) { k2 *= C2; k2 = k2.RotateLeft(33); k2 *= C1; return k2; } private static ulong MixFinal(ulong k) { // avalanche bits k ^= k >> 33; k *= 0xff51afd7ed558ccdL; k ^= k >> 33; k *= 0xc4ceb9fe1a85ec53L; k ^= k >> 33; return k; } private void ProcessBytes(byte[] bb) { h2 = seed; h1 = seed; this.length = 0L; int pos = 0; ulong remaining = (ulong) bb.Length; // read 128 bits, 16 bytes, 2 longs in eacy cycle while (remaining >= READ_SIZE) { ulong k1 = bb.GetUInt64(pos); pos += 8; ulong k2 = bb.GetUInt64(pos); pos += 8; length += READ_SIZE; remaining -= READ_SIZE; MixBody(k1, k2); } // if the input MOD 16 != 0 if (remaining > 0) ProcessBytesRemaining(bb, remaining, pos); } private void ProcessBytesRemaining(byte[] bb, ulong remaining, int pos) { ulong k1 = 0; ulong k2 = 0; length += remaining; // little endian (x86) processing switch (remaining) { case 15: k2 ^= (ulong) bb[pos + 14] << 48; // fall through goto case 14; case 14: k2 ^= (ulong) bb[pos + 13] << 40; // fall through goto case 13; case 13: k2 ^= (ulong) bb[pos + 12] << 32; // fall through goto case 12; case 12: k2 ^= (ulong) bb[pos + 11] << 24; // fall through goto case 11; case 11: k2 ^= (ulong) bb[pos + 10] << 16; // fall through goto case 10; case 10: k2 ^= (ulong) bb[pos + 9] << 8; // fall through goto case 9; case 9: k2 ^= (ulong) bb[pos + 8]; // fall through goto case 8; case 8: k1 ^= bb.GetUInt64(pos); break; case 7: k1 ^= (ulong) bb[pos + 6] << 48; // fall through goto case 6; case 6: k1 ^= (ulong) bb[pos + 5] << 40; // fall through goto case 5; case 5: k1 ^= (ulong) bb[pos + 4] << 32; // fall through goto case 4; case 4: k1 ^= (ulong) bb[pos + 3] << 24; // fall through goto case 3; case 3: k1 ^= (ulong) bb[pos + 2] << 16; // fall through goto case 2; case 2: k1 ^= (ulong) bb[pos + 1] << 8; // fall through goto case 1; case 1: k1 ^= (ulong) bb[pos]; // fall through break; default: throw new Exception("Something went wrong with remaining bytes calculation."); } h1 ^= MixKey1(k1); h2 ^= MixKey2(k2); } #endregion Private Methods } /// <summary> /// Some helper functions for reading 64 bit integers from a byte array and rotating unsigned longs: /// </summary> public static class IntHelpers { /// <summary> /// /// </summary> /// <param name="original"></param> /// <param name="bits"></param> /// <returns></returns> public static ulong RotateLeft(this ulong original, int bits) { return (original << bits) | (original >> (64 - bits)); } /// <summary> /// /// </summary> /// <param name="original"></param> /// <param name="bits"></param> /// <returns></returns> public static ulong RotateRight(this ulong original, int bits) { return (original >> bits) | (original << (64 - bits)); } /// <summary> /// /// </summary> /// <param name="bb"></param> /// <param name="pos"></param> /// <returns></returns> public static unsafe ulong GetUInt64(this byte[] bb, int pos) { // we only read aligned longs, so a simple casting is enough fixed (byte* pbyte = &bb[pos]) { return *((ulong*) pbyte); } } }Unit tests to verify the implementation (but it also shows how to use it).
// Test vectors taken from here: https://github.com/karanlyons/murmurHash3.js/blob/master/tests.html [Test] public void Murmur3ValidityTests() { TestVector("I will not buy this tobacconist's, it is scratched.", 0, new ulong[] { 0xd30654abbd8227e3, 0x67d73523f0079673 }); TestVector("", 0, new ulong[] { 0x0000000000000000, 0x0000000000000000 }); TestVector("0", 0, new ulong[] { 0x2ac9debed546a380, 0x3a8de9e53c875e09 }); TestVector("01", 0, new ulong[] { 0x649e4eaa7fc1708e, 0xe6945110230f2ad6 }); TestVector("012", 0, new ulong[] { 0xce68f60d7c353bdb, 0x00364cd5936bf18a }); TestVector("0123", 0, new ulong[] { 0x0f95757ce7f38254, 0xb4c67c9e6f12ab4b }); TestVector("01234", 0, new ulong[] { 0x0f04e459497f3fc1, 0xeccc6223a28dd613 }); TestVector("012345", 0, new ulong[] { 0x88c0a92586be0a27, 0x81062d6137728244 }); TestVector("0123456", 0, new ulong[] { 0x13eb9fb82606f7a6, 0xb4ebef492fdef34e }); TestVector("01234567", 0, new ulong[] { 0x8236039b7387354d, 0xc3369387d8964920 }); TestVector("012345678", 0, new ulong[] { 0x4c1e87519fe738ba, 0x72a17af899d597f1 }); TestVector("0123456789", 0, new ulong[] { 0x3f9652ac3effeb24, 0x8027a17cf2990b07 }); TestVector("0123456789a", 0, new ulong[] { 0x4bc3eacd29d38629, 0x7cb2d9e797da9c92 }); TestVector("0123456789ab", 0, new ulong[] { 0x66352b8cee9e3ca7, 0xa9edf0b381a8fc58 }); TestVector("0123456789abc", 0, new ulong[] { 0x5eb2f8db4265931e, 0x801ce853e61d0ab7 }); TestVector("0123456789abcd", 0, new ulong[] { 0x07a4a014dd59f71a, 0xaaf437854cd22231 }); TestVector("0123456789abcde", 0, new ulong[] { 0xa62dd5f6c0bf2351, 0x4fccf50c7c544cf0 }); TestVector("0123456789abcdef", 0, new ulong[] { 0x4be06d94cf4ad1a7, 0x87c35b5c63a708da }); TestVector("", 1, new ulong[] { 0x4610abe56eff5cb5, 0x51622daa78f83583 }); } private void TestVector(string key, uint seed, ulong[] hash) { var hasher = new Murmur3(seed); Byte[] inBytes = Encoding.UTF8.GetBytes(key); Byte[] res = hasher.ComputeHash(inBytes); ulong check0 = BitConverter.ToUInt64(res, 0); ulong check1 = BitConverter.ToUInt64(res, 8); Assert.That(hash[0] == check0); Assert.That(hash[1] == check1); }
No comments:
Post a Comment