servo: Merge #15119 - Add style performance statistics to Servo (from bholley:traversal_stats); r=emilio
authorBobby Holley <bobbyholley@gmail.com>
Thu, 19 Jan 2017 15:27:34 -0800
changeset 340613 0e346b3529c123b7b505a8c48ca0d7ebcbbc7f48
parent 340612 dac055ec3a1255b7fc23c279a7e91016c33ebd17
child 340614 1383f4886eaa98d20f3581e7dcbaa09f50ecd0d8
push id31307
push usergszorc@mozilla.com
push dateSat, 04 Feb 2017 00:59:06 +0000
treeherdermozilla-central@94079d43835f [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersemilio
servo: Merge #15119 - Add style performance statistics to Servo (from bholley:traversal_stats); r=emilio Gecko bug: https://bugzilla.mozilla.org/show_bug.cgi?id=1331856 Source-Repo: https://github.com/servo/servo Source-Revision: f8418a328435b114f6d8bb41c378a5bf0dfa4428
servo/components/style/context.rs
servo/components/style/parallel.rs
servo/components/style/scoped_tls.rs
servo/components/style/sequential.rs
servo/components/style/traversal.rs
--- a/servo/components/style/context.rs
+++ b/servo/components/style/context.rs
@@ -11,16 +11,19 @@ use bloom::StyleBloom;
 use data::ElementData;
 use dom::{OpaqueNode, TNode, TElement};
 use error_reporting::ParseErrorReporter;
 use euclid::Size2D;
 use matching::StyleSharingCandidateCache;
 use parking_lot::RwLock;
 use properties::ComputedValues;
 use std::collections::HashMap;
+use std::env;
+use std::fmt;
+use std::ops::Add;
 use std::sync::{Arc, Mutex};
 use std::sync::mpsc::Sender;
 use stylist::Stylist;
 use timer::Timer;
 
 /// This structure is used to create a local style context from a shared one.
 pub struct ThreadLocalStyleContextCreationInfo {
     new_animations_sender: Sender<Animation>,
@@ -97,40 +100,102 @@ struct CurrentElementInfo {
     /// The element being processed. Currently we use an OpaqueNode since we only
     /// use this for identity checks, but we could use SendElement if there were
     /// a good reason to.
     element: OpaqueNode,
     /// Whether the element is being styled for the first time.
     is_initial_style: bool,
 }
 
+/// Statistics gathered during the traversal. We gather statistics on each thread
+/// and then combine them after the threads join via the Add implementation below.
+#[derive(Default)]
+pub struct TraversalStatistics {
+    /// The total number of elements traversed.
+    pub elements_traversed: u32,
+    /// The number of elements where has_styles() went from false to true.
+    pub elements_styled: u32,
+    /// The number of elements for which we performed selector matching.
+    pub elements_matched: u32,
+    /// The number of cache hits from the StyleSharingCache.
+    pub styles_shared: u32,
+}
+
+/// Implementation of Add to aggregate statistics across different threads.
+impl<'a> Add for &'a TraversalStatistics {
+    type Output = TraversalStatistics;
+    fn add(self, other: Self) -> TraversalStatistics {
+        TraversalStatistics {
+            elements_traversed: self.elements_traversed + other.elements_traversed,
+            elements_styled: self.elements_styled + other.elements_styled,
+            elements_matched: self.elements_matched + other.elements_matched,
+            styles_shared: self.styles_shared + other.styles_shared,
+        }
+    }
+}
+
+/// Format the statistics in a way that the performance test harness understands.
+/// See https://bugzilla.mozilla.org/show_bug.cgi?id=1331856#c2
+impl fmt::Display for TraversalStatistics {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        try!(writeln!(f, "[PERF] perf block start"));
+        try!(writeln!(f, "[PERF],elements_traversed,{}", self.elements_traversed));
+        try!(writeln!(f, "[PERF],elements_styled,{}", self.elements_styled));
+        try!(writeln!(f, "[PERF],elements_matched,{}", self.elements_matched));
+        try!(writeln!(f, "[PERF],styles_shared,{}", self.styles_shared));
+        writeln!(f, "[PERF] perf block end")
+    }
+}
+
+lazy_static! {
+    /// Whether to dump style statistics, computed statically. We use an environmental
+    /// variable so that this is easy to set for Gecko builds, and matches the
+    /// mechanism we use to dump statistics on the Gecko style system.
+    static ref DUMP_STYLE_STATISTICS: bool = {
+        match env::var("DUMP_STYLE_STATISTICS") {
+            Ok(s) => !s.is_empty(),
+            Err(_) => false,
+        }
+    };
+}
+
+impl TraversalStatistics {
+    /// Returns whether statistics dumping is enabled.
+    pub fn should_dump() -> bool {
+        *DUMP_STYLE_STATISTICS
+    }
+}
+
 /// A thread-local style context.
 ///
 /// This context contains data that needs to be used during restyling, but is
 /// not required to be unique among worker threads, so we create one per worker
 /// thread in order to be able to mutate it without locking.
 pub struct ThreadLocalStyleContext<E: TElement> {
     /// A cache to share style among siblings.
     pub style_sharing_candidate_cache: StyleSharingCandidateCache<E>,
     /// The bloom filter used to fast-reject selector-matching.
     pub bloom_filter: StyleBloom<E>,
     /// A channel on which new animations that have been triggered by style
     /// recalculation can be sent.
     pub new_animations_sender: Sender<Animation>,
+    /// Statistics about the traversal.
+    pub statistics: TraversalStatistics,
     /// Information related to the current element, non-None during processing.
     current_element_info: Option<CurrentElementInfo>,
 }
 
 impl<E: TElement> ThreadLocalStyleContext<E> {
     /// Creates a new `ThreadLocalStyleContext` from a shared one.
     pub fn new(shared: &SharedStyleContext) -> Self {
         ThreadLocalStyleContext {
             style_sharing_candidate_cache: StyleSharingCandidateCache::new(),
             bloom_filter: StyleBloom::new(),
             new_animations_sender: shared.local_context_creation_data.lock().unwrap().new_animations_sender.clone(),
+            statistics: TraversalStatistics::default(),
             current_element_info: None,
         }
     }
 
     /// Notes when the style system starts traversing an element.
     pub fn begin_element(&mut self, element: E, data: &ElementData) {
         debug_assert!(self.current_element_info.is_none());
         self.current_element_info = Some(CurrentElementInfo {
--- a/servo/components/style/parallel.rs
+++ b/servo/components/style/parallel.rs
@@ -17,44 +17,39 @@
 //! share them with other threads. In the parallel traversal, we explicitly
 //! invoke |unsafe { SendNode::new(n) }| to put nodes in containers that may
 //! be sent to other threads. This occurs in only a handful of places and is
 //! easy to grep for. At the time of this writing, there is no other unsafe
 //! code in the parallel traversal.
 
 #![deny(missing_docs)]
 
+use context::TraversalStatistics;
 use dom::{OpaqueNode, SendNode, TElement, TNode};
 use rayon;
 use scoped_tls::ScopedTLS;
 use servo_config::opts;
-use std::sync::atomic::Ordering;
+use std::borrow::Borrow;
 use traversal::{DomTraversal, PerLevelTraversalData, PreTraverseToken};
-use traversal::{STYLE_SHARING_CACHE_HITS, STYLE_SHARING_CACHE_MISSES};
 
 /// The chunk size used to split the parallel traversal nodes.
 ///
 /// We send each `CHUNK_SIZE` nodes as a different work unit to the work queue.
 pub const CHUNK_SIZE: usize = 64;
 
 /// A parallel top down traversal, generic over `D`.
 #[allow(unsafe_code)]
 pub fn traverse_dom<E, D>(traversal: &D,
                           root: E,
                           known_root_dom_depth: Option<usize>,
                           token: PreTraverseToken,
                           queue: &rayon::ThreadPool)
     where E: TElement,
           D: DomTraversal<E>,
 {
-    if opts::get().style_sharing_stats {
-        STYLE_SHARING_CACHE_HITS.store(0, Ordering::SeqCst);
-        STYLE_SHARING_CACHE_MISSES.store(0, Ordering::SeqCst);
-    }
-
     // Handle Gecko's eager initial styling. We don't currently support it
     // in conjunction with bottom-up traversal. If we did, we'd need to put
     // it on the context to make it available to the bottom-up phase.
     let (nodes, depth) = if token.traverse_unstyled_children_only() {
         debug_assert!(!D::needs_postorder_traversal());
         let mut children = vec![];
         for kid in root.as_node().children() {
             if kid.as_element().map_or(false, |el| el.get_data().is_none()) {
@@ -73,23 +68,26 @@ pub fn traverse_dom<E, D>(traversal: &D,
     let root = root.as_node().opaque();
 
     queue.install(|| {
         rayon::scope(|scope| {
             traverse_nodes(nodes, root, traversal_data, scope, traversal, &tls);
         });
     });
 
-    if opts::get().style_sharing_stats {
-        let hits = STYLE_SHARING_CACHE_HITS.load(Ordering::SeqCst);
-        let misses = STYLE_SHARING_CACHE_MISSES.load(Ordering::SeqCst);
-
-        println!("Style sharing stats:");
-        println!(" * Hits: {}", hits);
-        println!(" * Misses: {}", misses);
+    // Dump statistics to stdout if requested.
+    if TraversalStatistics::should_dump() || opts::get().style_sharing_stats {
+        let slots = unsafe { tls.unsafe_get() };
+        let aggregate = slots.iter().fold(TraversalStatistics::default(), |acc, t| {
+            match *t.borrow() {
+                None => acc,
+                Some(ref cx) => &cx.borrow().statistics + &acc,
+            }
+        });
+        println!("{}", aggregate);
     }
 }
 
 /// A parallel top-down DOM traversal.
 #[inline(always)]
 #[allow(unsafe_code)]
 fn top_down_dom<'a, 'scope, E, D>(nodes: &'a [SendNode<E::ConcreteNode>],
                                   root: OpaqueNode,
--- a/servo/components/style/scoped_tls.rs
+++ b/servo/components/style/scoped_tls.rs
@@ -56,9 +56,15 @@ impl<'scope, T: Send> ScopedTLS<'scope, 
     pub fn ensure<F: FnOnce() -> T>(&self, f: F) -> RefMut<T> {
         let mut opt = self.borrow_mut();
         if opt.is_none() {
             *opt = Some(f());
         }
 
         RefMut::map(opt, |x| x.as_mut().unwrap())
     }
+
+    /// Unsafe access to the slots. This can be used to access the TLS when
+    /// the caller knows that the pool does not have access to the TLS.
+    pub unsafe fn unsafe_get(&self) -> &[RefCell<Option<T>>] {
+        &self.slots
+    }
 }
--- a/servo/components/style/sequential.rs
+++ b/servo/components/style/sequential.rs
@@ -1,17 +1,19 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 //! Implements sequential traversal over the DOM tree.
 
 #![deny(missing_docs)]
 
+use context::TraversalStatistics;
 use dom::{TElement, TNode};
+use std::borrow::Borrow;
 use traversal::{DomTraversal, PerLevelTraversalData, PreTraverseToken};
 
 /// Do a sequential DOM traversal for layout or styling, generic over `D`.
 pub fn traverse_dom<E, D>(traversal: &D,
                           root: E,
                           token: PreTraverseToken)
     where E: TElement,
           D: DomTraversal<E>,
@@ -52,9 +54,15 @@ pub fn traverse_dom<E, D>(traversal: &D,
         for kid in root.as_node().children() {
             if kid.as_element().map_or(false, |el| el.get_data().is_none()) {
                 doit(traversal, &mut traversal_data, &mut tlc, kid);
             }
         }
     } else {
         doit(traversal, &mut traversal_data, &mut tlc, root.as_node());
     }
+
+    // Dump statistics to stdout if requested.
+    let tlsc = tlc.borrow();
+    if TraversalStatistics::should_dump() {
+        println!("{}", tlsc.statistics);
+    }
 }
--- a/servo/components/style/traversal.rs
+++ b/servo/components/style/traversal.rs
@@ -11,26 +11,18 @@ use context::{SharedStyleContext, StyleC
 use data::{ElementData, ElementStyles, StoredRestyleHint};
 use dom::{NodeInfo, TElement, TNode};
 use matching::{MatchMethods, StyleSharingResult};
 use restyle_hints::{RESTYLE_DESCENDANTS, RESTYLE_SELF};
 use selector_parser::RestyleDamage;
 use servo_config::opts;
 use std::borrow::BorrowMut;
 use std::mem;
-use std::sync::atomic::{AtomicUsize, ATOMIC_USIZE_INIT, Ordering};
 use stylist::Stylist;
 
-/// Style sharing candidate cache hits. These are only used when
-/// `-Z style-sharing-stats` is given.
-pub static STYLE_SHARING_CACHE_HITS: AtomicUsize = ATOMIC_USIZE_INIT;
-
-/// Style sharing candidate cache misses.
-pub static STYLE_SHARING_CACHE_MISSES: AtomicUsize = ATOMIC_USIZE_INIT;
-
 /// A per-traversal-level chunk of data. This is sent down by the traversal, and
 /// currently only holds the dom depth for the bloom filter.
 ///
 /// NB: Keep this as small as possible, please!
 #[derive(Clone, Debug)]
 pub struct PerLevelTraversalData {
     /// The current dom depth, if known, or `None` otherwise.
     ///
@@ -381,16 +373,17 @@ pub fn recalc_style_at<E, D>(traversal: 
                              traversal_data: &mut PerLevelTraversalData,
                              context: &mut StyleContext<E>,
                              element: E,
                              mut data: &mut AtomicRefMut<ElementData>)
     where E: TElement,
           D: DomTraversal<E>
 {
     context.thread_local.begin_element(element, &data);
+    context.thread_local.statistics.elements_traversed += 1;
     debug_assert!(data.get_restyle().map_or(true, |r| r.snapshot.is_none()),
                   "Snapshots should be expanded by the caller");
 
     let compute_self = !data.has_current_styles();
     let mut inherited_style_changed = false;
 
     debug!("recalc_style_at: {:?} (compute_self={:?}, dirty_descendants={:?}, data={:?})",
            element, compute_self, element.has_dirty_descendants(), data);
@@ -441,16 +434,17 @@ pub fn recalc_style_at<E, D>(traversal: 
 fn compute_style<E, D>(_traversal: &D,
                        traversal_data: &mut PerLevelTraversalData,
                        context: &mut StyleContext<E>,
                        element: E,
                        mut data: &mut AtomicRefMut<ElementData>) -> bool
     where E: TElement,
           D: DomTraversal<E>,
 {
+    context.thread_local.statistics.elements_styled += 1;
     let shared_context = context.shared;
     // Ensure the bloom filter is up to date.
     let dom_depth = context.thread_local.bloom_filter
                            .insert_parents_recovering(element, traversal_data.current_dom_depth);
 
     // Update the dom depth with the up-to-date dom depth.
     //
     // Note that this is always the same than the pre-existing depth, but it can
@@ -467,21 +461,18 @@ fn compute_style<E, D>(_traversal: &D,
                                                  shared_context, &mut data) }
     };
 
     // Otherwise, match and cascade selectors.
     match sharing_result {
         StyleSharingResult::CannotShare => {
             let match_results;
             let shareable_element = {
-                if opts::get().style_sharing_stats {
-                    STYLE_SHARING_CACHE_MISSES.fetch_add(1, Ordering::Relaxed);
-                }
-
                 // Perform the CSS selector matching.
+                context.thread_local.statistics.elements_matched += 1;
                 let filter = context.thread_local.bloom_filter.filter();
                 match_results = element.match_element(context, Some(filter));
                 if match_results.primary_is_shareable() {
                     Some(element)
                 } else {
                     None
                 }
             };
@@ -500,19 +491,17 @@ fn compute_style<E, D>(_traversal: &D,
             // Add ourselves to the LRU cache.
             if let Some(element) = shareable_element {
                 context.thread_local
                        .style_sharing_candidate_cache
                        .insert_if_possible(&element, &data.styles().primary.values, relations);
             }
         }
         StyleSharingResult::StyleWasShared(index) => {
-            if opts::get().style_sharing_stats {
-                STYLE_SHARING_CACHE_HITS.fetch_add(1, Ordering::Relaxed);
-            }
+            context.thread_local.statistics.styles_shared += 1;
             context.thread_local.style_sharing_candidate_cache.touch(index);
         }
     }
 
     // If we're restyling this element to display:none, throw away all style data
     // in the subtree, notify the caller to early-return.
     let display_none = data.styles().is_display_none();
     if display_none {