Bug 1455032: Pack the shadow cascade order in ApplicableDeclarationBlock. r=heycam
authorEmilio Cobos Álvarez <emilio@crisal.io>
Wed, 18 Apr 2018 19:48:24 +0200
changeset 471348 6538479958ef5993565268cf6b6ee0974b699044
parent 471347 1513e4d8668757d4be6bb4b81ed4e167c028a54d
child 471349 4b851036974da2dacf2985a00d93ddb2e46a19fd
push id1728
push userjlund@mozilla.com
push dateMon, 18 Jun 2018 21:12:27 +0000
treeherdermozilla-release@c296fde26f5f [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersheycam
bugs1455032
milestone61.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1455032: Pack the shadow cascade order in ApplicableDeclarationBlock. r=heycam I didn't bother not shifting there. We need to load the whole thing and shift for at least one of cascade level / shadow cascade order. Callers of level() other than for_rule_tree are non-existent in release builds, so we'd be doing the shift anyway. I can implement the same thing for shadow_cascade_order too, but I don't think that optimization is measurable in any way, either, the compiler should make the decision. And just in case, the simpler version actually generated less instructions in: https://play.rust-lang.org/?gist=ceadb0d3cbce4eeca76e4d9ab9a1c744&version=nightly with the simple thing. MozReview-Commit-ID: 8xPBJmlcyKh
servo/components/style/applicable_declarations.rs
servo/components/style/rule_tree/mod.rs
servo/ports/geckolib/tests/size_of.rs
--- a/servo/components/style/applicable_declarations.rs
+++ b/servo/components/style/applicable_declarations.rs
@@ -5,138 +5,157 @@
 //! Applicable declarations management.
 
 use properties::PropertyDeclarationBlock;
 use rule_tree::{CascadeLevel, ShadowCascadeOrder, StyleSource};
 use servo_arc::Arc;
 use shared_lock::Locked;
 use smallvec::SmallVec;
 use std::fmt::{self, Debug};
-use std::mem;
 
 /// List of applicable declarations. This is a transient structure that shuttles
 /// declarations between selector matching and inserting into the rule tree, and
 /// therefore we want to avoid heap-allocation where possible.
 ///
 /// In measurements on wikipedia, we pretty much never have more than 8 applicable
 /// declarations, so we could consider making this 8 entries instead of 16.
 /// However, it may depend a lot on workload, and stack space is cheap.
 pub type ApplicableDeclarationList = SmallVec<[ApplicableDeclarationBlock; 16]>;
 
 /// Blink uses 18 bits to store source order, and does not check overflow [1].
 /// That's a limit that could be reached in realistic webpages, so we use
 /// 24 bits and enforce defined behavior in the overflow case.
 ///
-/// Note that the value of 24 is also hard-coded into the level() accessor,
-/// which does a byte-aligned load of the 4th byte. If you change this value
-/// you'll need to change that as well.
-///
 /// [1] https://cs.chromium.org/chromium/src/third_party/WebKit/Source/core/css/
 ///     RuleSet.h?l=128&rcl=90140ab80b84d0f889abc253410f44ed54ae04f3
+const SOURCE_ORDER_SHIFT: usize = 0;
 const SOURCE_ORDER_BITS: usize = 24;
-const SOURCE_ORDER_MASK: u32 = (1 << SOURCE_ORDER_BITS) - 1;
-const SOURCE_ORDER_MAX: u32 = SOURCE_ORDER_MASK;
+const SOURCE_ORDER_MAX: u32 = (1 << SOURCE_ORDER_BITS) - 1;
+const SOURCE_ORDER_MASK: u32 = SOURCE_ORDER_MAX << SOURCE_ORDER_SHIFT;
 
-/// Stores the source order of a block and the cascade level it belongs to.
+/// We store up-to-15 shadow order levels.
+///
+/// You'd need an element slotted across 16 components with ::slotted rules to
+/// trigger this as of this writing, which looks... Unlikely.
+const SHADOW_CASCADE_ORDER_SHIFT: usize = SOURCE_ORDER_BITS;
+const SHADOW_CASCADE_ORDER_BITS: usize = 4;
+const SHADOW_CASCADE_ORDER_MAX: u8 = (1 << SHADOW_CASCADE_ORDER_BITS) - 1;
+const SHADOW_CASCADE_ORDER_MASK: u32 = (SHADOW_CASCADE_ORDER_MAX as u32) << SHADOW_CASCADE_ORDER_SHIFT;
+
+const CASCADE_LEVEL_SHIFT: usize = SOURCE_ORDER_BITS + SHADOW_CASCADE_ORDER_BITS;
+const CASCADE_LEVEL_BITS: usize = 4;
+const CASCADE_LEVEL_MAX: u8 = (1 << CASCADE_LEVEL_BITS) - 1;
+const CASCADE_LEVEL_MASK: u32 = (CASCADE_LEVEL_MAX as u32) << CASCADE_LEVEL_SHIFT;
+
+/// Stores the source order of a block, the cascade level it belongs to, and the
+/// counter needed to handle Shadow DOM cascade order properly.
 #[derive(Clone, Copy, Eq, MallocSizeOf, PartialEq)]
-struct SourceOrderAndCascadeLevel(u32);
+struct ApplicableDeclarationBits(u32);
 
-impl SourceOrderAndCascadeLevel {
-    fn new(source_order: u32, cascade_level: CascadeLevel) -> SourceOrderAndCascadeLevel {
+impl ApplicableDeclarationBits {
+    fn new(
+        source_order: u32,
+        cascade_level: CascadeLevel,
+        shadow_cascade_order: ShadowCascadeOrder,
+    ) -> Self {
+        debug_assert!(
+            cascade_level as u8 <= CASCADE_LEVEL_MAX,
+            "Gotta find more bits!"
+        );
         let mut bits = ::std::cmp::min(source_order, SOURCE_ORDER_MAX);
-        bits |= (cascade_level as u8 as u32) << SOURCE_ORDER_BITS;
-        SourceOrderAndCascadeLevel(bits)
+        bits |= ((shadow_cascade_order & SHADOW_CASCADE_ORDER_MAX) as u32) << SHADOW_CASCADE_ORDER_SHIFT;
+        bits |= (cascade_level as u8 as u32) << CASCADE_LEVEL_SHIFT;
+        ApplicableDeclarationBits(bits)
     }
 
-    fn order(&self) -> u32 {
-        self.0 & SOURCE_ORDER_MASK
+    fn source_order(&self) -> u32 {
+        (self.0 & SOURCE_ORDER_MASK) >> SOURCE_ORDER_SHIFT
+    }
+
+    fn shadow_cascade_order(&self) -> ShadowCascadeOrder {
+        ((self.0 & SHADOW_CASCADE_ORDER_MASK) >> SHADOW_CASCADE_ORDER_SHIFT) as ShadowCascadeOrder
     }
 
     fn level(&self) -> CascadeLevel {
-        unsafe {
-            // Transmute rather than shifting so that we're sure the compiler
-            // emits a simple byte-aligned load.
-            let as_bytes: [u8; 4] = mem::transmute(self.0);
-            CascadeLevel::from_byte(as_bytes[3])
-        }
+        let byte = ((self.0 & CASCADE_LEVEL_MASK) >> CASCADE_LEVEL_SHIFT) as u8;
+        unsafe { CascadeLevel::from_byte(byte) }
     }
 }
 
-impl Debug for SourceOrderAndCascadeLevel {
+impl Debug for ApplicableDeclarationBits {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        f.debug_struct("SourceOrderAndCascadeLevel")
-            .field("order", &self.order())
+        f.debug_struct("ApplicableDeclarationBits")
+            .field("source_order", &self.source_order())
+            .field("shadow_cascade_order", &self.shadow_cascade_order())
             .field("level", &self.level())
             .finish()
     }
 }
 
 /// A property declaration together with its precedence among rules of equal
 /// specificity so that we can sort them.
 ///
 /// This represents the declarations in a given declaration block for a given
 /// importance.
 #[derive(Clone, Debug, MallocSizeOf, PartialEq)]
 pub struct ApplicableDeclarationBlock {
     /// The style source, either a style rule, or a property declaration block.
     #[ignore_malloc_size_of = "Arc"]
     pub source: StyleSource,
-    /// The source order of the block, and the cascade level it belongs to.
-    order_and_level: SourceOrderAndCascadeLevel,
+    /// The bits containing the source order, cascade level, and shadow cascade
+    /// order.
+    bits: ApplicableDeclarationBits,
     /// The specificity of the selector this block is represented by.
     pub specificity: u32,
-    /// The order in the tree of trees we carry on.
-    pub shadow_cascade_order: ShadowCascadeOrder,
 }
 
 impl ApplicableDeclarationBlock {
     /// Constructs an applicable declaration block from a given property
     /// declaration block and importance.
     #[inline]
     pub fn from_declarations(
         declarations: Arc<Locked<PropertyDeclarationBlock>>,
         level: CascadeLevel,
     ) -> Self {
         ApplicableDeclarationBlock {
             source: StyleSource::Declarations(declarations),
-            order_and_level: SourceOrderAndCascadeLevel::new(0, level),
+            bits: ApplicableDeclarationBits::new(0, level, 0),
             specificity: 0,
-            shadow_cascade_order: 0,
         }
     }
 
     /// Constructs an applicable declaration block from the given components
     #[inline]
     pub fn new(
         source: StyleSource,
         order: u32,
         level: CascadeLevel,
         specificity: u32,
-        shadow_cascade_order: u32,
+        shadow_cascade_order: ShadowCascadeOrder,
     ) -> Self {
         ApplicableDeclarationBlock {
             source,
-            order_and_level: SourceOrderAndCascadeLevel::new(order, level),
+            bits: ApplicableDeclarationBits::new(order, level, shadow_cascade_order),
             specificity,
-            shadow_cascade_order,
         }
     }
 
     /// Returns the source order of the block.
     #[inline]
     pub fn source_order(&self) -> u32 {
-        self.order_and_level.order()
+        self.bits.source_order()
     }
 
     /// Returns the cascade level of the block.
     #[inline]
     pub fn level(&self) -> CascadeLevel {
-        self.order_and_level.level()
+        self.bits.level()
     }
 
     /// Convenience method to consume self and return the right thing for the
     /// rule tree to iterate over.
     #[inline]
     pub fn for_rule_tree(self) -> (StyleSource, CascadeLevel, ShadowCascadeOrder) {
         let level = self.level();
-        (self.source, level, self.shadow_cascade_order)
+        let cascade_order = self.bits.shadow_cascade_order();
+        (self.source, level, cascade_order)
     }
 }
--- a/servo/components/style/rule_tree/mod.rs
+++ b/servo/components/style/rule_tree/mod.rs
@@ -165,17 +165,17 @@ const FREE_LIST_LOCKED: *mut RuleNode = 
 /// A counter to track how many inner shadow roots rules deep we are.
 ///
 /// This is used to handle:
 ///
 /// https://drafts.csswg.org/css-scoping/#shadow-cascading
 ///
 /// In particular, it'd be `0` for the innermost shadow host, `1` for the next,
 /// and so on.
-pub type ShadowCascadeOrder = u32;
+pub type ShadowCascadeOrder = u8;
 
 impl RuleTree {
     /// Construct a new rule tree.
     pub fn new() -> Self {
         RuleTree {
             root: StrongRuleNode::new(Box::new(RuleNode::root())),
         }
     }
--- a/servo/ports/geckolib/tests/size_of.rs
+++ b/servo/ports/geckolib/tests/size_of.rs
@@ -30,17 +30,17 @@ size_of_test!(test_size_of_cv, ComputedV
 size_of_test!(test_size_of_option_arc_cv, Option<Arc<ComputedValues>>, 8);
 size_of_test!(test_size_of_option_rule_node, Option<StrongRuleNode>, 8);
 
 size_of_test!(test_size_of_element_styles, ElementStyles, 16);
 size_of_test!(test_size_of_element_data, ElementData, 24);
 
 size_of_test!(test_size_of_property_declaration, style::properties::PropertyDeclaration, 32);
 
-size_of_test!(test_size_of_application_declaration_block, ApplicableDeclarationBlock, 32);
+size_of_test!(test_size_of_application_declaration_block, ApplicableDeclarationBlock, 24);
 size_of_test!(test_size_of_rule_node, RuleNode, 80);
 
 // This is huge, but we allocate it on the stack and then never move it,
 // we only pass `&mut SourcePropertyDeclaration` references around.
 size_of_test!(test_size_of_parsed_declaration, style::properties::SourcePropertyDeclaration, 608);
 
 size_of_test!(test_size_of_computed_image, computed::image::Image, 32);
 size_of_test!(test_size_of_specified_image, specified::image::Image, 32);