Bug 729952 - Part 1: Add a better hash function to mfbt. r=waldo
authorJustin Lebar <justin.lebar@gmail.com>
Fri, 02 Mar 2012 17:18:21 -0500
changeset 88374 69e8dd5e9201e4608a0d603ae9c9287bf8687d7d
parent 88373 7fdea5401cf54212ee8777f3682814bba56c5d6b
child 88375 73831fbed59f5e31cc325cf5c1f8b49866d5f5bd
push id157
push userMs2ger@gmail.com
push dateWed, 07 Mar 2012 19:27:10 +0000
reviewerswaldo
bugs729952
milestone13.0a1
Bug 729952 - Part 1: Add a better hash function to mfbt. r=waldo
mfbt/Attributes.h
mfbt/HashFunctions.h
mfbt/exported_headers.mk
--- a/mfbt/Attributes.h
+++ b/mfbt/Attributes.h
@@ -315,11 +315,30 @@
  * only as documentation.
  */
 #if defined(MOZ_HAVE_CXX11_FINAL)
 #  define MOZ_FINAL             MOZ_HAVE_CXX11_FINAL
 #else
 #  define MOZ_FINAL             /* no support */
 #endif
 
+/**
+ * MOZ_WARN_UNUSED_RESULT tells the compiler to emit a warning if a function's
+ * return value is not used by the caller.
+ *
+ * Place this attribute at the very beginning of a function definition. For
+ * example, write
+ *
+ *   MOZ_WARN_UNUSED_RESULT int foo();
+ *
+ * or
+ *
+ *   MOZ_WARN_UNUSED_RESULT int foo() { return 42; }
+ */
+#if defined(__GNUC__) || defined(__clang__)
+#  define MOZ_WARN_UNUSED_RESULT __attribute__ ((warn_unused_result))
+#else
+#  define MOZ_WARN_UNUSED_RESULT
+#endif
+
 #endif /* __cplusplus */
 
 #endif  /* mozilla_Attributes_h_ */
new file mode 100644
--- /dev/null
+++ b/mfbt/HashFunctions.h
@@ -0,0 +1,111 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sw=4 et tw=99 ft=cpp:
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* Utilities for hashing */
+
+#ifndef mozilla_HashFunctions_h_
+#define mozilla_HashFunctions_h_
+
+#include "mozilla/Attributes.h"
+#include "mozilla/StandardInteger.h"
+
+#ifdef __cplusplus
+namespace mozilla {
+
+/**
+ * The golden ratio as a 32-bit fixed-point value.
+ */
+static const uint32_t GoldenRatioU32 = 0x9E3779B9U;
+
+inline uint32_t
+RotateLeft32(uint32_t value, uint8_t bits)
+{
+  MOZ_ASSERT(bits < 32);
+  return (value << bits) | (value >> (32 - bits));
+}
+
+/**
+ * Add the given value(s) to the given hashcode and return the new hashcode.
+ *
+ * AddToHash(h, x, y) is equivalent to AddToHash(AddToHash(h, x), y).
+ */
+MOZ_WARN_UNUSED_RESULT
+inline uint32_t
+AddToHash(uint32_t hash, uint32_t value)
+{
+  /*
+   * This is not a sophisticated hash routine, but it seems to work well for our
+   * mostly plain-text inputs.  Implementation notes follow.
+   *
+   * Our use of the golden ratio here is arbitrary; we could pick almost any
+   * number which:
+   *
+   *  * is odd (because otherwise, all our hash values will be even)
+   *
+   *  * has a reasonably-even mix of 1's and 0's (consider the extreme case
+   *    where we multiply by 0x3 or 0xeffffff -- this will not produce good
+   *    mixing across all bits of the hash).
+   *
+   * The rotation length of 5 is also arbitrary, although an odd number is again
+   * preferable so our hash explores the whole universe of possible rotations.
+   *
+   * Finally, we multiply by the golden ratio *after* xor'ing, not before.
+   * Otherwise, if |hash| is 0 (as it often is for the beginning of a message),
+   * the expression
+   *
+   *   (GoldenRatioU32 * RotateLeft(hash, 5)) ^ value
+   *
+   * evaluates to |value|.
+   *
+   * (Number-theoretic aside: Because any odd number |m| is relatively prime to
+   * our modulus (2^32), the list
+   *
+   *    [x * m (mod 2^32) for 0 <= x < 2^32]
+   *
+   * has no duplicate elements.  This means that multiplying by |m| does not
+   * cause us to skip any possible hash values.
+   *
+   * It's also nice if |m| has larger order mod 2^32 -- that is, if the smallest
+   * k such that m^k == 1 (mod 2^32) is large -- so we can safely multiply our
+   * hash value by |m| a few times without negating the multiplicative effect.
+   * Our golden ratio constant has order 2^29, which is more than enough for our
+   * purposes.)
+   */
+  return GoldenRatioU32 * (RotateLeft32(hash, 5) ^ value);
+}
+
+MOZ_WARN_UNUSED_RESULT
+inline uint32_t
+AddToHash(uint32_t hash, uint32_t v1, uint32_t v2)
+{
+  return AddToHash(AddToHash(hash, v1), v2);
+}
+
+MOZ_WARN_UNUSED_RESULT
+inline uint32_t
+AddToHash(uint32_t hash, uint32_t v1, uint32_t v2, uint32_t v3)
+{
+  return AddToHash(AddToHash(hash, v1, v2), v3);
+}
+
+MOZ_WARN_UNUSED_RESULT
+inline uint32_t
+AddToHash(uint32_t hash, uint32_t v1, uint32_t v2, uint32_t v3, uint32_t v4)
+{
+  return AddToHash(AddToHash(hash, v1, v2, v3), v4);
+}
+
+MOZ_WARN_UNUSED_RESULT
+inline uint32_t
+AddToHash(uint32_t hash, uint32_t v1, uint32_t v2, uint32_t v3, uint32_t v4, uint32_t v5)
+{
+  return AddToHash(AddToHash(hash, v1, v2, v3, v4), v5);
+}
+
+} /* namespace mozilla */
+#endif /* __cplusplus */
+#endif /* mozilla_HashFunctions_h_ */
--- a/mfbt/exported_headers.mk
+++ b/mfbt/exported_headers.mk
@@ -41,16 +41,17 @@
 
 EXPORTS_NAMESPACES += mozilla
 
 EXPORTS_mozilla += \
   Assertions.h \
   Attributes.h \
   BloomFilter.h \
   GuardObjects.h \
+  HashFunctions.h \
   Likely.h \
   LinkedList.h \
   MSStdInt.h \
   RangedPtr.h \
   RefPtr.h \
   StandardInteger.h \
   Types.h \
   Util.h \