September 7, 2015

Another Murmur3 implementation - 128 bit output, 64 bit platform version

Another Murmur3 implementation - 128 bit output, 64 bit platform version
/// <summary>
/// Taken from here - http://blog.teamleadnet.com/2012/08/murmurhash3-ultra-fast-hash-algorithm.html
/// but corrected the code and made the class re-usable, the original will only work on the first call
/// There's another version here: http://pastebin.com/aP8aRRHK
/// Corrected the code using code from https://github.com/darrenkopp/murmurhash-net/blob/master/MurmurHash/Unmanaged/Murmur128UnmanagedX64.cs
/// In ProcessBytes both h1 and h2 should be set to the seed
/// 128 bit output, 64 bit platform version
/// </summary>
internal class Murmur3
{
 // 128 bit output, 64 bit platform version
 private static readonly ulong READ_SIZE = 16;
 private static readonly ulong C1 = 0x87c37b91114253d5L;
 private static readonly ulong C2 = 0x4cf5ad432745937fL;

 private ulong length;
 private uint seed = 0; // if want to start with a seed, create a constructor
 private ulong h1;
 private ulong h2;


 /// <summary>
 /// Murmur3 constructor
 /// </summary>
 /// <param name="seed"></param>
 public Murmur3(uint seed = 0)
 {
  this.seed = seed;
 }

 /// <summary>
 /// Compute a hash from an input byte array
 /// </summary>
 /// <param name="input"></param>
 /// <returns></returns>
 public byte[] ComputeHash(byte[] input)
 {
  ProcessBytes(input);
  return Hash;
 }

 /// <summary>
 /// Create a string hash from an input string
 /// </summary>
 /// <param name="input"></param>
 /// <returns></returns>
 public string ComputeHash(string input)
 {
  Byte[] inBytes = Encoding.UTF8.GetBytes(input);
  Byte[] hash = this.ComputeHash(inBytes);
  string output = Convert.ToBase64String(hash);
  return output.TrimEnd('='); // There can be up to 2 trailing '=' characters which are just for padding (Not required for a hash)
 }

 #region Private Methods

 
 /// <summary>
 /// 
 /// </summary>
 private byte[] Hash
 {
  get
  {
   h1 ^= length;
   h2 ^= length;

   h1 += h2;
   h2 += h1;

   h1 = Murmur3.MixFinal(h1);
   h2 = Murmur3.MixFinal(h2);

   h1 += h2;
   h2 += h1;

   var hash = new byte[Murmur3.READ_SIZE];

   Array.Copy(BitConverter.GetBytes(h1), 0, hash, 0, 8);
   Array.Copy(BitConverter.GetBytes(h2), 0, hash, 8, 8);

   return hash;
  }
 }
 
 private void MixBody(ulong k1, ulong k2)
 {
  h1 ^= MixKey1(k1);

  h1 = h1.RotateLeft(27);
  h1 += h2;
  h1 = h1*5 + 0x52dce729;

  h2 ^= MixKey2(k2);

  h2 = h2.RotateLeft(31);
  h2 += h1;
  h2 = h2*5 + 0x38495ab5;
 }

 private static ulong MixKey1(ulong k1)
 {
  k1 *= C1;
  k1 = k1.RotateLeft(31);
  k1 *= C2;
  return k1;
 }

 private static ulong MixKey2(ulong k2)
 {
  k2 *= C2;
  k2 = k2.RotateLeft(33);
  k2 *= C1;
  return k2;
 }

 private static ulong MixFinal(ulong k)
 {
  // avalanche bits

  k ^= k >> 33;
  k *= 0xff51afd7ed558ccdL;
  k ^= k >> 33;
  k *= 0xc4ceb9fe1a85ec53L;
  k ^= k >> 33;
  return k;
 }

 private void ProcessBytes(byte[] bb)
 {
  h2 = seed;
  h1 = seed;
  this.length = 0L;

  int pos = 0;
  ulong remaining = (ulong) bb.Length;

  // read 128 bits, 16 bytes, 2 longs in eacy cycle
  while (remaining >= READ_SIZE)
  {
   ulong k1 = bb.GetUInt64(pos);
   pos += 8;

   ulong k2 = bb.GetUInt64(pos);
   pos += 8;

   length += READ_SIZE;
   remaining -= READ_SIZE;

   MixBody(k1, k2);
  }

  // if the input MOD 16 != 0
  if (remaining > 0)
   ProcessBytesRemaining(bb, remaining, pos);
 }

 private void ProcessBytesRemaining(byte[] bb, ulong remaining, int pos)
 {
  ulong k1 = 0;
  ulong k2 = 0;
  length += remaining;

  // little endian (x86) processing
  switch (remaining)
  {
   case 15:
    k2 ^= (ulong) bb[pos + 14] << 48; // fall through
    goto case 14;
   case 14:
    k2 ^= (ulong) bb[pos + 13] << 40; // fall through
    goto case 13;
   case 13:
    k2 ^= (ulong) bb[pos + 12] << 32; // fall through
    goto case 12;
   case 12:
    k2 ^= (ulong) bb[pos + 11] << 24; // fall through
    goto case 11;
   case 11:
    k2 ^= (ulong) bb[pos + 10] << 16; // fall through
    goto case 10;
   case 10:
    k2 ^= (ulong) bb[pos + 9] << 8; // fall through
    goto case 9;
   case 9:
    k2 ^= (ulong) bb[pos + 8]; // fall through
    goto case 8;
   case 8:
    k1 ^= bb.GetUInt64(pos);
    break;
   case 7:
    k1 ^= (ulong) bb[pos + 6] << 48; // fall through
    goto case 6;
   case 6:
    k1 ^= (ulong) bb[pos + 5] << 40; // fall through
    goto case 5;
   case 5:
    k1 ^= (ulong) bb[pos + 4] << 32; // fall through
    goto case 4;
   case 4:
    k1 ^= (ulong) bb[pos + 3] << 24; // fall through
    goto case 3;
   case 3:
    k1 ^= (ulong) bb[pos + 2] << 16; // fall through
    goto case 2;
   case 2:
    k1 ^= (ulong) bb[pos + 1] << 8; // fall through
    goto case 1;
   case 1:
    k1 ^= (ulong) bb[pos]; // fall through
    break;
   default:
    throw new Exception("Something went wrong with remaining bytes calculation.");
  }

  h1 ^= MixKey1(k1);
  h2 ^= MixKey2(k2);
 }

 #endregion Private Methods

}

/// <summary>
/// Some helper functions for reading 64 bit integers from a byte array and rotating unsigned longs:
/// </summary>
public static class IntHelpers
{
 /// <summary>
 /// 
 /// </summary>
 /// <param name="original"></param>
 /// <param name="bits"></param>
 /// <returns></returns>
 public static ulong RotateLeft(this ulong original, int bits)
 {
  return (original << bits) | (original >> (64 - bits));
 }

 /// <summary>
 /// 
 /// </summary>
 /// <param name="original"></param>
 /// <param name="bits"></param>
 /// <returns></returns>
 public static ulong RotateRight(this ulong original, int bits)
 {
  return (original >> bits) | (original << (64 - bits));
 }

 /// <summary>
 /// 
 /// </summary>
 /// <param name="bb"></param>
 /// <param name="pos"></param>
 /// <returns></returns>
 public static unsafe ulong GetUInt64(this byte[] bb, int pos)
 {
  // we only read aligned longs, so a simple casting is enough
  fixed (byte* pbyte = &bb[pos])
  {
   return *((ulong*) pbyte);
  }
 }
}
Unit tests to verify the implementation (but it also shows how to use it).
// Test vectors taken from here: https://github.com/karanlyons/murmurHash3.js/blob/master/tests.html
[Test]
public void Murmur3ValidityTests()
{
 TestVector("I will not buy this tobacconist's, it is scratched.", 0, new ulong[] { 0xd30654abbd8227e3, 0x67d73523f0079673 });
 TestVector("", 0, new ulong[] { 0x0000000000000000, 0x0000000000000000 });
 TestVector("0", 0, new ulong[] { 0x2ac9debed546a380, 0x3a8de9e53c875e09 });
 TestVector("01", 0, new ulong[] { 0x649e4eaa7fc1708e, 0xe6945110230f2ad6 });
 TestVector("012", 0, new ulong[] { 0xce68f60d7c353bdb, 0x00364cd5936bf18a });
 TestVector("0123", 0, new ulong[] { 0x0f95757ce7f38254, 0xb4c67c9e6f12ab4b });
 TestVector("01234", 0, new ulong[] { 0x0f04e459497f3fc1, 0xeccc6223a28dd613 });
 TestVector("012345", 0, new ulong[] { 0x88c0a92586be0a27, 0x81062d6137728244 });
 TestVector("0123456", 0, new ulong[] { 0x13eb9fb82606f7a6, 0xb4ebef492fdef34e });
 TestVector("01234567", 0, new ulong[] { 0x8236039b7387354d, 0xc3369387d8964920 });
 TestVector("012345678", 0, new ulong[] { 0x4c1e87519fe738ba, 0x72a17af899d597f1 });
 TestVector("0123456789", 0, new ulong[] { 0x3f9652ac3effeb24, 0x8027a17cf2990b07 });
 TestVector("0123456789a", 0, new ulong[] { 0x4bc3eacd29d38629, 0x7cb2d9e797da9c92 });
 TestVector("0123456789ab", 0, new ulong[] { 0x66352b8cee9e3ca7, 0xa9edf0b381a8fc58 });
 TestVector("0123456789abc", 0, new ulong[] { 0x5eb2f8db4265931e, 0x801ce853e61d0ab7 });
 TestVector("0123456789abcd", 0, new ulong[] { 0x07a4a014dd59f71a, 0xaaf437854cd22231 });
 TestVector("0123456789abcde", 0, new ulong[] { 0xa62dd5f6c0bf2351, 0x4fccf50c7c544cf0 });
 TestVector("0123456789abcdef", 0, new ulong[] { 0x4be06d94cf4ad1a7, 0x87c35b5c63a708da });
 TestVector("", 1, new ulong[] { 0x4610abe56eff5cb5, 0x51622daa78f83583 });
}

private void TestVector(string key, uint seed, ulong[] hash)
{
 var hasher = new Murmur3(seed);
 Byte[] inBytes = Encoding.UTF8.GetBytes(key);
 Byte[] res = hasher.ComputeHash(inBytes);
 ulong check0 = BitConverter.ToUInt64(res, 0);
 ulong check1 = BitConverter.ToUInt64(res, 8);
 Assert.That(hash[0] == check0);
 Assert.That(hash[1] == check1);
}

No comments: