Bug 1484096 - Remove use of `fnv` in bloom.rs. r=heycam
authorNicholas Nethercote <nnethercote@mozilla.com>
Fri, 17 Aug 2018 15:33:22 +1000
changeset 487183 f15fd461d2eca64077a42a6bcc04038cb741624e
parent 487182 cf257868689ac19d05c88e0b2607b180373cfcb3
child 487184 8adbf514581c39a16e5b9fd8243db0e91c626c1e
push id9719
push userffxbld-merge
push dateFri, 24 Aug 2018 17:49:46 +0000
treeherdermozilla-beta@719ec98fba77 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersheycam
bugs1484096
milestone63.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1484096 - Remove use of `fnv` in bloom.rs. r=heycam To support that, this patch also does the following. - Removes the insert(), remove() and might_contain() methods, because they are specialized versions of insert_hash(), remove_hash(), and might_contain_hash(), and they are only used by tests within this file. - Moves hash() from the top level into create_and_insert_some_stuff(). - Changes create_and_insert_some_stuff() so that instead of hashing consecutive integers, it instead hashes stringified consecutive integers, which matches real usage a little better. - Raises the false_positives limit a little to account for the above changes.
Cargo.lock
servo/components/selectors/Cargo.toml
servo/components/selectors/bloom.rs
servo/components/selectors/lib.rs
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1873,17 +1873,16 @@ version = "0.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
 [[package]]
 name = "selectors"
 version = "0.19.0"
 dependencies = [
  "bitflags 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "cssparser 0.24.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
  "fxhash 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "log 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "matches 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
  "phf 0.7.21 (registry+https://github.com/rust-lang/crates.io-index)",
  "phf_codegen 0.7.21 (registry+https://github.com/rust-lang/crates.io-index)",
  "precomputed-hash 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "servo_arc 0.1.1",
  "smallvec 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)",
--- a/servo/components/selectors/Cargo.toml
+++ b/servo/components/selectors/Cargo.toml
@@ -19,17 +19,16 @@ path = "lib.rs"
 [features]
 bench = []
 
 [dependencies]
 bitflags = "1.0"
 matches = "0.1"
 cssparser = "0.24.0"
 log = "0.4"
-fnv = "1.0"
 fxhash = "0.2"
 phf = "0.7.18"
 precomputed-hash = "0.1"
 servo_arc = { version = "0.1", path = "../servo_arc" }
 smallvec = "0.6.2"
 thin-slice = "0.1.0"
 
 [build-dependencies]
--- a/servo/components/selectors/bloom.rs
+++ b/servo/components/selectors/bloom.rs
@@ -1,18 +1,16 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 //! Counting and non-counting Bloom filters tuned for use as ancestor filters
 //! for selector matching.
 
-use fnv::FnvHasher;
 use std::fmt::{self, Debug};
-use std::hash::{Hash, Hasher};
 
 // The top 8 bits of the 32-bit hash value are not used by the bloom filter.
 // Consumers may rely on this to pack hashes more efficiently.
 pub const BLOOM_HASH_MASK: u32 = 0x00ffffff;
 const KEY_SIZE: usize = 12;
 
 const ARRAY_SIZE: usize = 1 << KEY_SIZE;
 const KEY_MASK: u32 = (1 << KEY_SIZE) - 1;
@@ -103,53 +101,37 @@ where
         self.storage.is_zeroed()
     }
 
     #[cfg(not(debug_assertions))]
     pub fn is_zeroed(&self) -> bool {
         unreachable!()
     }
 
+    /// Inserts an item with a particular hash into the bloom filter.
     #[inline]
     pub fn insert_hash(&mut self, hash: u32) {
         self.storage.adjust_first_slot(hash, true);
         self.storage.adjust_second_slot(hash, true);
     }
 
-    /// Inserts an item into the bloom filter.
-    #[inline]
-    pub fn insert<T: Hash>(&mut self, elem: &T) {
-        self.insert_hash(hash(elem))
-    }
-
+    /// Removes an item with a particular hash from the bloom filter.
     #[inline]
     pub fn remove_hash(&mut self, hash: u32) {
         self.storage.adjust_first_slot(hash, false);
         self.storage.adjust_second_slot(hash, false);
     }
 
-    /// Removes an item from the bloom filter.
-    #[inline]
-    pub fn remove<T: Hash>(&mut self, elem: &T) {
-        self.remove_hash(hash(elem))
-    }
-
+    /// Check whether the filter might contain an item with the given hash.
+    /// This can sometimes return true even if the item is not in the filter,
+    /// but will never return false for items that are actually in the filter.
     #[inline]
     pub fn might_contain_hash(&self, hash: u32) -> bool {
         !self.storage.first_slot_is_empty(hash) && !self.storage.second_slot_is_empty(hash)
     }
-
-    /// Check whether the filter might contain an item.  This can
-    /// sometimes return true even if the item is not in the filter,
-    /// but will never return false for items that are actually in the
-    /// filter.
-    #[inline]
-    pub fn might_contain<T: Hash>(&self, elem: &T) -> bool {
-        self.might_contain_hash(hash(elem))
-    }
 }
 
 impl<S> Debug for CountingBloomFilter<S>
 where
     S: BloomStorage,
 {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         let mut slots_used = 0;
@@ -291,76 +273,77 @@ impl Default for BloomStorageBool {
 impl Clone for BloomStorageBool {
     fn clone(&self) -> Self {
         BloomStorageBool {
             counters: self.counters,
         }
     }
 }
 
-fn hash<T: Hash>(elem: &T) -> u32 {
-    // We generally use FxHasher in Stylo because it's faster than FnvHasher,
-    // but the increased collision rate has outsized effect on the bloom
-    // filter, so we use FnvHasher instead here.
-    let mut hasher = FnvHasher::default();
-    elem.hash(&mut hasher);
-    let hash: u64 = hasher.finish();
-    (hash >> 32) as u32 ^ (hash as u32)
-}
-
 #[inline]
 fn hash1(hash: u32) -> u32 {
     hash & KEY_MASK
 }
 
 #[inline]
 fn hash2(hash: u32) -> u32 {
     (hash >> KEY_SIZE) & KEY_MASK
 }
 
 #[test]
 fn create_and_insert_some_stuff() {
+    use fxhash::FxHasher;
+    use std::hash::{Hash, Hasher};
     use std::mem::transmute;
 
+    fn hash_as_str(i: usize) -> u32 {
+        let mut hasher = FxHasher::default();
+        let s = i.to_string();
+        s.hash(&mut hasher);
+        let hash: u64 = hasher.finish();
+        (hash >> 32) as u32 ^ (hash as u32)
+    }
+
     let mut bf = BloomFilter::new();
 
     // Statically assert that ARRAY_SIZE is a multiple of 8, which
     // BloomStorageBool relies on.
     unsafe {
         transmute::<[u8; ARRAY_SIZE % 8], [u8; 0]>([]);
     }
 
     for i in 0_usize..1000 {
-        bf.insert(&i);
+        bf.insert_hash(hash_as_str(i));
     }
 
     for i in 0_usize..1000 {
-        assert!(bf.might_contain(&i));
+        assert!(bf.might_contain_hash(hash_as_str(i)));
     }
 
-    let false_positives = (1001_usize..2000).filter(|i| bf.might_contain(i)).count();
+    let false_positives =
+        (1001_usize..2000).filter(|i| bf.might_contain_hash(hash_as_str(*i))).count();
 
-    assert!(false_positives < 160, "{} is not < 160", false_positives); // 16%.
+    assert!(false_positives < 190, "{} is not < 190", false_positives); // 19%.
 
     for i in 0_usize..100 {
-        bf.remove(&i);
+        bf.remove_hash(hash_as_str(i));
     }
 
     for i in 100_usize..1000 {
-        assert!(bf.might_contain(&i));
+        assert!(bf.might_contain_hash(hash_as_str(i)));
     }
 
-    let false_positives = (0_usize..100).filter(|i| bf.might_contain(i)).count();
+    let false_positives = (0_usize..100).filter(|i| bf.might_contain_hash(hash_as_str(*i))).count();
 
     assert!(false_positives < 20, "{} is not < 20", false_positives); // 20%.
 
     bf.clear();
 
     for i in 0_usize..2000 {
-        assert!(!bf.might_contain(&i));
+        assert!(!bf.might_contain_hash(hash_as_str(i)));
     }
 }
 
 #[cfg(feature = "bench")]
 #[cfg(test)]
 mod bench {
     extern crate test;
     use super::BloomFilter;
--- a/servo/components/selectors/lib.rs
+++ b/servo/components/selectors/lib.rs
@@ -4,17 +4,16 @@
 
 // Make |cargo bench| work.
 #![cfg_attr(feature = "bench", feature(test))]
 
 #[macro_use]
 extern crate bitflags;
 #[macro_use]
 extern crate cssparser;
-extern crate fnv;
 extern crate fxhash;
 #[macro_use]
 extern crate log;
 #[macro_use]
 extern crate matches;
 extern crate phf;
 extern crate precomputed_hash;
 extern crate servo_arc;