servo: Merge #14642 - Use Scoped TLS in the style system and eliminate UnsafeNode usage in the StyleSharingCandidateCache (from bholley:scoped_tls); r=emilio
authorBobby Holley <bobbyholley@gmail.com>
Wed, 21 Dec 2016 11:11:12 -0800
changeset 340388 f0be16755379c0cb9feabf5c063edcea211ad9a1
parent 340387 82e11b3fc9d1fe5a8b413249206161404ce86cbe
child 340389 12250ac495234018fb9401256fa067783342c11b
push id31307
push usergszorc@mozilla.com
push dateSat, 04 Feb 2017 00:59:06 +0000
treeherdermozilla-central@94079d43835f [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersemilio
servo: Merge #14642 - Use Scoped TLS in the style system and eliminate UnsafeNode usage in the StyleSharingCandidateCache (from bholley:scoped_tls); r=emilio See the discussion in https://bugzilla.mozilla.org/show_bug.cgi?id=1323372 @emilio Please review, but don't merge yet until we get the upstream changes into Rayon. CC @SimonSapin @heycam @upsuper @Manishearth @pcwalton @nikomatsakis Source-Repo: https://github.com/servo/servo Source-Revision: 8fd8d6161426af386c0dfd3d13968a409474eb16
servo/Cargo.lock
servo/components/layout/Cargo.toml
servo/components/layout/context.rs
servo/components/layout/parallel.rs
servo/components/layout/query.rs
servo/components/layout/sequential.rs
servo/components/layout/traversal.rs
servo/components/layout_thread/Cargo.toml
servo/components/layout_thread/lib.rs
servo/components/style/Cargo.toml
servo/components/style/context.rs
servo/components/style/gecko/context.rs
servo/components/style/gecko/mod.rs
servo/components/style/gecko/traversal.rs
servo/components/style/lib.rs
servo/components/style/matching.rs
servo/components/style/parallel.rs
servo/components/style/scoped_tls.rs
servo/components/style/sequential.rs
servo/components/style/traversal.rs
servo/ports/geckolib/glue.rs
servo/servo-tidy.toml
servo/tests/unit/style/Cargo.toml
--- a/servo/Cargo.lock
+++ b/servo/Cargo.lock
@@ -1310,17 +1310,17 @@ dependencies = [
  "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
  "msg 0.0.1",
  "net_traits 0.0.1",
  "ordered-float 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "parking_lot 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "plugins 0.0.1",
  "profile_traits 0.0.1",
  "range 0.0.1",
- "rayon 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rayon 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "script_layout_interface 0.0.1",
  "script_traits 0.0.1",
  "selectors 0.15.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde 0.8.18 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde_derive 0.8.18 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde_json 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "servo_atoms 0.0.1",
  "servo_config 0.0.1",
@@ -1356,17 +1356,17 @@ dependencies = [
  "layout_traits 0.0.1",
  "lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
  "msg 0.0.1",
  "net_traits 0.0.1",
  "parking_lot 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "plugins 0.0.1",
  "profile_traits 0.0.1",
- "rayon 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rayon 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "script 0.0.1",
  "script_layout_interface 0.0.1",
  "script_traits 0.0.1",
  "selectors 0.15.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde_derive 0.8.18 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde_json 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "servo_config 0.0.1",
  "servo_geometry 0.0.1",
@@ -2186,16 +2186,27 @@ source = "registry+https://github.com/ru
 dependencies = [
  "deque 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
  "num_cpus 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
+name = "rayon"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "deque 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
+ "num_cpus 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
 name = "ref_filter_map"
 version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
 [[package]]
 name = "ref_slice"
 version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -2739,17 +2750,17 @@ dependencies = [
  "ordered-float 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "owning_ref 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "parking_lot 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "phf 0.7.20 (registry+https://github.com/rust-lang/crates.io-index)",
  "phf_codegen 0.7.20 (registry+https://github.com/rust-lang/crates.io-index)",
  "plugins 0.0.1",
  "quickersort 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
- "rayon 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rayon 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "regex 0.1.76 (registry+https://github.com/rust-lang/crates.io-index)",
  "rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)",
  "selectors 0.15.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde 0.8.18 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde_derive 0.8.18 (registry+https://github.com/rust-lang/crates.io-index)",
  "servo_atoms 0.0.1",
  "servo_config 0.0.1",
  "servo_url 0.0.1",
@@ -2766,17 +2777,17 @@ version = "0.0.1"
 dependencies = [
  "app_units 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "cssparser 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "euclid 0.10.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "html5ever-atoms 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "matches 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "owning_ref 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "parking_lot 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
- "rayon 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rayon 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)",
  "selectors 0.15.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "servo_atoms 0.0.1",
  "servo_config 0.0.1",
  "servo_url 0.0.1",
  "style 0.0.1",
  "style_traits 0.0.1",
 ]
@@ -3476,16 +3487,17 @@ dependencies = [
 "checksum png 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "06208e2ee243e3118a55dda9318f821f206d8563fb8d4df258767f8e62bb0997"
 "checksum quasi 0.26.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8ab7992920bf5bc5f1ed6fdc49090bf665cd00b3aa4b78c16ac3465286257db1"
 "checksum quasi_codegen 0.26.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d52e5e2c92ffdad67a9b86ad27ad999bf1a652723f1d4cc93b7cf6c272b5f8e0"
 "checksum quickersort 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "14ae8f367c38c78abd03114e524b55a817885446662413fbca951f42848450c5"
 "checksum quine-mc_cluskey 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)" = "07589615d719a60c8dd8a4622e7946465dfef20d1a428f969e3443e7386d5f45"
 "checksum quote 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "ea1e0c9bc6bfb0a60d539aab6e338207c1a5456e62f5bd5375132cee119aa4b3"
 "checksum rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "2791d88c6defac799c3f20d74f094ca33b9332612d9aef9078519c82e4fe04a5"
 "checksum rayon 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3b6a6e05e0e6b703e9f2ad266eb63f3712e693a17a2702b95a23de14ce8defa9"
+"checksum rayon 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "50c575b58c2b109e2fbc181820cbe177474f35610ff9e357dc75f6bac854ffbf"
 "checksum ref_filter_map 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2b5ceb840e4009da4841ed22a15eb49f64fdd00a2138945c5beacf506b2fb5ed"
 "checksum ref_slice 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "546bb4aa91c85f232732cc5b3c8097ea97ae9a77304f9ab4df8b203ff7672dad"
 "checksum regex 0.1.76 (registry+https://github.com/rust-lang/crates.io-index)" = "63b49f873f36ddc838d773972511e5fed2ef7350885af07d58e2f48ce8073dcd"
 "checksum regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "279401017ae31cf4e15344aa3f085d0e2e5c1e70067289ef906906fdbe92c8fd"
 "checksum rustc-demangle 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c4c2d35b2ed94cec4fad26a36eee4d6eff394ce70a8ceea064b0b6ca42ea4cf0"
 "checksum rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)" = "6159e4e6e559c81bd706afe9c8fd68f547d3e851ce12e76b1de7914bab61691b"
 "checksum rustc_version 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "c5f5376ea5e30ce23c03eb77cbe4962b988deead10910c372b226388b594c084"
 "checksum scoped_threadpool 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "3ef399c8893e8cb7aa9696e895427fab3a6bf265977bb96e126f24ddd2cda85a"
--- a/servo/components/layout/Cargo.toml
+++ b/servo/components/layout/Cargo.toml
@@ -26,17 +26,17 @@ libc = "0.2"
 log = "0.3.5"
 msg = {path = "../msg"}
 net_traits = {path = "../net_traits"}
 ordered-float = "0.2.2"
 parking_lot = "0.3.3"
 plugins = {path = "../plugins"}
 profile_traits = {path = "../profile_traits"}
 range = {path = "../range"}
-rayon = "0.5"
+rayon = "0.6"
 script_layout_interface = {path = "../script_layout_interface"}
 script_traits = {path = "../script_traits"}
 selectors = "0.15"
 serde = "0.8"
 serde_derive = "0.8"
 serde_json = "0.8"
 servo_atoms = {path = "../atoms"}
 servo_config = {path = "../config"}
--- a/servo/components/layout/context.rs
+++ b/servo/components/layout/context.rs
@@ -14,77 +14,82 @@ use gfx::font_context::FontContext;
 use heapsize::HeapSizeOf;
 use ipc_channel::ipc;
 use net_traits::image::base::Image;
 use net_traits::image_cache_thread::{ImageCacheChan, ImageCacheThread, ImageResponse, ImageState};
 use net_traits::image_cache_thread::{ImageOrMetadataAvailable, UsePlaceholder};
 use parking_lot::RwLock;
 use servo_config::opts;
 use servo_url::ServoUrl;
-use std::borrow::Borrow;
 use std::cell::{RefCell, RefMut};
 use std::collections::HashMap;
 use std::hash::BuildHasherDefault;
 use std::rc::Rc;
 use std::sync::{Arc, Mutex};
 use style::context::{SharedStyleContext, ThreadLocalStyleContext};
+use style::dom::TElement;
 
-pub struct ThreadLocalLayoutContext {
-    pub style_context: ThreadLocalStyleContext,
+/// TLS data scoped to the traversal.
+pub struct ScopedThreadLocalLayoutContext<E: TElement> {
+    pub style_context: ThreadLocalStyleContext<E>,
+}
+
+impl<E: TElement> ScopedThreadLocalLayoutContext<E> {
+    pub fn new(shared: &SharedLayoutContext) -> Self {
+        ScopedThreadLocalLayoutContext {
+            style_context: ThreadLocalStyleContext::new(&shared.style_context),
+        }
+    }
+}
+
+/// TLS data that persists across traversals.
+pub struct PersistentThreadLocalLayoutContext {
+    // FontContext uses Rc all over the place and so isn't Send, which means we
+    // can't use ScopedTLS for it. There's also no reason to scope it to the
+    // traversal, and performance is probably better if we don't.
     pub font_context: RefCell<FontContext>,
 }
 
-impl ThreadLocalLayoutContext {
+impl PersistentThreadLocalLayoutContext {
     pub fn new(shared: &SharedLayoutContext) -> Rc<Self> {
         let font_cache_thread = shared.font_cache_thread.lock().unwrap().clone();
-        let local_style_data = shared.style_context.local_context_creation_data.lock().unwrap();
-
-        Rc::new(ThreadLocalLayoutContext {
-            style_context: ThreadLocalStyleContext::new(&local_style_data),
+        Rc::new(PersistentThreadLocalLayoutContext {
             font_context: RefCell::new(FontContext::new(font_cache_thread)),
         })
     }
 }
 
-impl Borrow<ThreadLocalStyleContext> for ThreadLocalLayoutContext {
-    fn borrow(&self) -> &ThreadLocalStyleContext {
-        &self.style_context
-    }
-}
-
-impl HeapSizeOf for ThreadLocalLayoutContext {
-    // FIXME(njn): measure other fields eventually.
+impl HeapSizeOf for PersistentThreadLocalLayoutContext {
     fn heap_size_of_children(&self) -> usize {
         self.font_context.heap_size_of_children()
     }
 }
 
-thread_local!(static LOCAL_CONTEXT_KEY: RefCell<Option<Rc<ThreadLocalLayoutContext>>> = RefCell::new(None));
+thread_local!(static LOCAL_CONTEXT_KEY: RefCell<Option<Rc<PersistentThreadLocalLayoutContext>>> = RefCell::new(None));
 
-pub fn heap_size_of_local_context() -> usize {
-    LOCAL_CONTEXT_KEY.with(|r| {
-        r.borrow().clone().map_or(0, |context| context.heap_size_of_children())
-    })
-}
-
-// Keep this implementation in sync with the one in ports/geckolib/traversal.rs.
-pub fn create_or_get_local_context(shared: &SharedLayoutContext)
-                                   -> Rc<ThreadLocalLayoutContext> {
+fn create_or_get_persistent_context(shared: &SharedLayoutContext)
+                                    -> Rc<PersistentThreadLocalLayoutContext> {
     LOCAL_CONTEXT_KEY.with(|r| {
         let mut r = r.borrow_mut();
         if let Some(context) = r.clone() {
             context
         } else {
-            let context = ThreadLocalLayoutContext::new(shared);
+            let context = PersistentThreadLocalLayoutContext::new(shared);
             *r = Some(context.clone());
             context
         }
     })
 }
 
+pub fn heap_size_of_persistent_local_context() -> usize {
+    LOCAL_CONTEXT_KEY.with(|r| {
+        r.borrow().clone().map_or(0, |context| context.heap_size_of_children())
+    })
+}
+
 /// Layout information shared among all workers. This must be thread-safe.
 pub struct SharedLayoutContext {
     /// Bits shared by the layout and style system.
     pub style_context: SharedStyleContext,
 
     /// The shared image cache thread.
     pub image_cache_thread: Mutex<ImageCacheThread>,
 
@@ -95,34 +100,27 @@ pub struct SharedLayoutContext {
     pub font_cache_thread: Mutex<FontCacheThread>,
 
     /// A cache of WebRender image info.
     pub webrender_image_cache: Arc<RwLock<HashMap<(ServoUrl, UsePlaceholder),
                                                   WebRenderImageInfo,
                                                   BuildHasherDefault<FnvHasher>>>>,
 }
 
-impl Borrow<SharedStyleContext> for SharedLayoutContext {
-    fn borrow(&self) -> &SharedStyleContext {
-        &self.style_context
-    }
-}
-
 pub struct LayoutContext<'a> {
     pub shared: &'a SharedLayoutContext,
-    pub thread_local: &'a ThreadLocalLayoutContext,
+    pub persistent: Rc<PersistentThreadLocalLayoutContext>,
 }
 
 impl<'a> LayoutContext<'a> {
-    pub fn new(shared: &'a SharedLayoutContext,
-               thread_local: &'a ThreadLocalLayoutContext) -> Self
+    pub fn new(shared: &'a SharedLayoutContext) -> Self
     {
         LayoutContext {
             shared: shared,
-            thread_local: thread_local,
+            persistent: create_or_get_persistent_context(shared),
         }
     }
 }
 
 impl<'a> LayoutContext<'a> {
     // FIXME(bholley): The following two methods are identical and should be merged.
     // shared_context() is the appropriate name, but it involves renaming a lot of
     // calls.
@@ -133,17 +131,17 @@ impl<'a> LayoutContext<'a> {
 
     #[inline(always)]
     pub fn style_context(&self) -> &SharedStyleContext {
         &self.shared.style_context
     }
 
     #[inline(always)]
     pub fn font_context(&self) -> RefMut<FontContext> {
-        self.thread_local.font_context.borrow_mut()
+        self.persistent.font_context.borrow_mut()
     }
 }
 
 impl SharedLayoutContext {
     fn get_or_request_image_synchronously(&self, url: ServoUrl, use_placeholder: UsePlaceholder)
                                           -> Option<Arc<Image>> {
         debug_assert!(opts::get().output_file.is_some() || opts::get().exit_after_load);
 
--- a/servo/components/layout/parallel.rs
+++ b/servo/components/layout/parallel.rs
@@ -3,18 +3,17 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 //! Implements parallel traversals over the DOM and flow trees.
 //!
 //! This code is highly unsafe. Keep this file small and easy to audit.
 
 #![allow(unsafe_code)]
 
-use context::{LayoutContext, SharedLayoutContext, ThreadLocalLayoutContext};
-use context::create_or_get_local_context;
+use context::{LayoutContext, SharedLayoutContext};
 use flow::{self, Flow, MutableFlowUtils, PostorderFlowTraversal, PreorderFlowTraversal};
 use flow_ref::FlowRef;
 use profile_traits::time::{self, TimerMetadata, profile};
 use rayon;
 use servo_config::opts;
 use std::mem;
 use std::sync::atomic::{AtomicIsize, Ordering};
 use style::dom::UnsafeNode;
@@ -46,17 +45,17 @@ pub fn mut_owned_flow_to_unsafe_flow(flo
 
 pub fn borrowed_flow_to_unsafe_flow(flow: &Flow) -> UnsafeFlow {
     unsafe {
         mem::transmute::<&Flow, UnsafeFlow>(flow)
     }
 }
 
 pub type ChunkedFlowTraversalFunction<'scope> =
-    extern "Rust" fn(Box<[UnsafeFlow]>, &'scope SharedLayoutContext, &rayon::Scope<'scope>);
+    extern "Rust" fn(Box<[UnsafeFlow]>, &rayon::Scope<'scope>, &'scope SharedLayoutContext);
 
 pub type FlowTraversalFunction = extern "Rust" fn(UnsafeFlow, &LayoutContext);
 
 /// Information that we need stored in each flow.
 pub struct FlowParallelInfo {
     /// The number of children that still need work done.
     pub children_count: AtomicIsize,
     /// The address of the parent flow.
@@ -128,33 +127,32 @@ trait ParallelPostorderFlowTraversal : P
         }
     }
 }
 
 /// A parallel top-down flow traversal.
 trait ParallelPreorderFlowTraversal : PreorderFlowTraversal {
     fn run_parallel<'scope>(&self,
                             unsafe_flows: &[UnsafeFlow],
-                            layout_context: &'scope SharedLayoutContext,
-                            scope: &rayon::Scope<'scope>);
+                            scope: &rayon::Scope<'scope>,
+                            shared: &'scope SharedLayoutContext);
 
     fn should_record_thread_ids(&self) -> bool;
 
     #[inline(always)]
     fn run_parallel_helper<'scope>(&self,
                                    unsafe_flows: &[UnsafeFlow],
+                                   scope: &rayon::Scope<'scope>,
                                    shared: &'scope SharedLayoutContext,
-                                   scope: &rayon::Scope<'scope>,
                                    top_down_func: ChunkedFlowTraversalFunction<'scope>,
                                    bottom_up_func: FlowTraversalFunction)
     {
-        let tlc = create_or_get_local_context(shared);
-        let context = LayoutContext::new(&shared, &*tlc);
+        let mut discovered_child_flows = vec![];
+        let context = LayoutContext::new(&shared);
 
-        let mut discovered_child_flows = vec![];
         for unsafe_flow in unsafe_flows {
             let mut had_children = false;
             unsafe {
                 // Get a real flow.
                 let flow: &mut Flow = mem::transmute(*unsafe_flow);
 
                 if self.should_record_thread_ids() {
                     // FIXME(emilio): With the switch to rayon we can no longer
@@ -182,49 +180,49 @@ trait ParallelPreorderFlowTraversal : Pr
                 bottom_up_func(*unsafe_flow, &context)
             }
         }
 
         for chunk in discovered_child_flows.chunks(CHUNK_SIZE) {
             let nodes = chunk.iter().cloned().collect::<Vec<_>>().into_boxed_slice();
 
             scope.spawn(move |scope| {
-                top_down_func(nodes, shared, scope);
+                top_down_func(nodes, scope, shared);
             });
         }
     }
 }
 
 impl<'a> ParallelPreorderFlowTraversal for AssignISizes<'a> {
     fn run_parallel<'scope>(&self,
                                   unsafe_flows: &[UnsafeFlow],
-                                  layout_context: &'scope SharedLayoutContext,
-                                  scope: &rayon::Scope<'scope>)
+                                  scope: &rayon::Scope<'scope>,
+                                  shared: &'scope SharedLayoutContext)
     {
         self.run_parallel_helper(unsafe_flows,
-                                 layout_context,
                                  scope,
+                                 shared,
                                  assign_inline_sizes,
                                  assign_block_sizes_and_store_overflow)
     }
 
     fn should_record_thread_ids(&self) -> bool {
         true
     }
 }
 
 impl<'a> ParallelPostorderFlowTraversal for AssignBSizes<'a> {}
 
 fn assign_inline_sizes<'scope>(unsafe_flows: Box<[UnsafeFlow]>,
-                               shared_layout_context: &'scope SharedLayoutContext,
-                               scope: &rayon::Scope<'scope>) {
+                               scope: &rayon::Scope<'scope>,
+                               shared: &'scope SharedLayoutContext) {
     let assign_inline_sizes_traversal = AssignISizes {
-        shared_context: &shared_layout_context.style_context,
+        shared_context: &shared.style_context,
     };
-    assign_inline_sizes_traversal.run_parallel(&unsafe_flows, shared_layout_context, scope)
+    assign_inline_sizes_traversal.run_parallel(&unsafe_flows, scope, shared)
 }
 
 fn assign_block_sizes_and_store_overflow(
         unsafe_flow: UnsafeFlow,
         context: &LayoutContext) {
     let assign_block_sizes_traversal = AssignBSizes {
         layout_context: context,
     };
@@ -233,25 +231,24 @@ fn assign_block_sizes_and_store_overflow
 
 pub fn traverse_flow_tree_preorder(
         root: &mut Flow,
         profiler_metadata: Option<TimerMetadata>,
         time_profiler_chan: time::ProfilerChan,
         shared: &SharedLayoutContext,
         queue: &rayon::ThreadPool) {
     if opts::get().bubble_inline_sizes_separately {
-        let tlc = ThreadLocalLayoutContext::new(shared);
-        let context = LayoutContext::new(shared, &*tlc);
+        let context = LayoutContext::new(shared);
         let bubble_inline_sizes = BubbleISizes { layout_context: &context };
         root.traverse_postorder(&bubble_inline_sizes);
     }
 
     let nodes = vec![borrowed_flow_to_unsafe_flow(root)].into_boxed_slice();
 
     queue.install(move || {
         rayon::scope(move |scope| {
             profile(time::ProfilerCategory::LayoutParallelWarmup,
                     profiler_metadata, time_profiler_chan, move || {
-                assign_inline_sizes(nodes, &shared, scope);
+                assign_inline_sizes(nodes, scope, &shared);
             });
         });
     });
 }
--- a/servo/components/layout/query.rs
+++ b/servo/components/layout/query.rs
@@ -1,18 +1,17 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 //! Utilities for querying the layout, as needed by the layout thread.
 
 use app_units::Au;
 use construct::ConstructionResult;
-use context::SharedLayoutContext;
-use context::create_or_get_local_context;
+use context::{ScopedThreadLocalLayoutContext, SharedLayoutContext};
 use euclid::point::Point2D;
 use euclid::rect::Rect;
 use euclid::size::Size2D;
 use flow::{self, Flow};
 use fragment::{Fragment, FragmentBorderBoxIterator, SpecificFragmentInfo};
 use gfx::display_list::{DisplayItemMetadata, DisplayList, OpaqueNode, ScrollOffsetMap};
 use gfx_traits::ScrollRootId;
 use ipc_channel::ipc::IpcSender;
@@ -641,20 +640,20 @@ pub fn process_resolved_style_request<'a
     // is that the requested node is in a display:none subtree. We currently
     // maintain the invariant that elements in display:none subtrees always have
     // no ElementData, so we need to temporarily bend those invariants here, and
     // then throw them the style data away again before returning to preserve them.
     // We could optimize this later to keep the style data cached somehow, but
     // we'd need a mechanism to prevent detect when it's stale (since we don't
     // traverse display:none subtrees during restyle).
     let display_none_root = if element.get_data().is_none() {
-        let tlc = create_or_get_local_context(shared);
+        let mut tlc = ScopedThreadLocalLayoutContext::new(shared);
         let context = StyleContext {
             shared: &shared.style_context,
-            thread_local: &tlc.style_context,
+            thread_local: &mut tlc.style_context,
         };
 
         Some(style_element_in_display_none_subtree(&context, element,
                                                    &|e| e.as_node().initialize_data()))
     } else {
         None
     };
 
--- a/servo/components/layout/sequential.rs
+++ b/servo/components/layout/sequential.rs
@@ -1,16 +1,16 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 //! Implements sequential traversals over the DOM and flow trees.
 
 use app_units::Au;
-use context::{LayoutContext, SharedLayoutContext, ThreadLocalLayoutContext};
+use context::{LayoutContext, SharedLayoutContext};
 use display_list_builder::DisplayListBuildState;
 use euclid::point::Point2D;
 use floats::SpeculatedFloatPlacement;
 use flow::{self, Flow, ImmutableFlowUtils, InorderFlowTraversal, MutableFlowUtils};
 use flow::{PostorderFlowTraversal, PreorderFlowTraversal};
 use flow::IS_ABSOLUTELY_POSITIONED;
 use fragment::FragmentBorderBoxIterator;
 use generated_content::ResolveGeneratedContent;
@@ -29,18 +29,17 @@ pub fn resolve_generated_content(root: &
 
         traversal.process(flow, level);
 
         for kid in flow::mut_base(flow).children.iter_mut() {
             doit(kid, level + 1, traversal)
         }
     }
 
-    let tlc = ThreadLocalLayoutContext::new(shared);
-    let layout_context = LayoutContext::new(shared, &*tlc);
+    let layout_context = LayoutContext::new(shared);
     let mut traversal = ResolveGeneratedContent::new(&layout_context);
     doit(root, 0, &mut traversal)
 }
 
 pub fn traverse_flow_tree_preorder(root: &mut Flow,
                                    shared: &SharedLayoutContext) {
     fn doit(flow: &mut Flow,
             assign_inline_sizes: AssignISizes,
@@ -53,18 +52,17 @@ pub fn traverse_flow_tree_preorder(root:
             doit(kid, assign_inline_sizes, assign_block_sizes);
         }
 
         if assign_block_sizes.should_process(flow) {
             assign_block_sizes.process(flow);
         }
     }
 
-    let tlc = ThreadLocalLayoutContext::new(shared);
-    let layout_context = LayoutContext::new(shared, &*tlc);
+    let layout_context = LayoutContext::new(shared);
 
     if opts::get().bubble_inline_sizes_separately {
         let bubble_inline_sizes = BubbleISizes { layout_context: &layout_context };
         {
             let root: &mut Flow = root;
             root.traverse_postorder(&bubble_inline_sizes);
         }
     }
--- a/servo/components/layout/traversal.rs
+++ b/servo/components/layout/traversal.rs
@@ -1,24 +1,22 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 //! Traversals over the DOM and flow trees, running the layout computations.
 
 use construct::FlowConstructor;
-use context::{LayoutContext, SharedLayoutContext, ThreadLocalLayoutContext};
-use context::create_or_get_local_context;
+use context::{LayoutContext, ScopedThreadLocalLayoutContext, SharedLayoutContext};
 use display_list_builder::DisplayListBuildState;
 use flow::{self, PreorderFlowTraversal};
 use flow::{CAN_BE_FRAGMENTED, Flow, ImmutableFlowUtils, PostorderFlowTraversal};
 use gfx::display_list::OpaqueNode;
 use script_layout_interface::wrapper_traits::{LayoutNode, ThreadSafeLayoutNode};
 use servo_config::opts;
-use std::rc::Rc;
 use style::atomic_refcell::AtomicRefCell;
 use style::context::{SharedStyleContext, StyleContext};
 use style::data::ElementData;
 use style::dom::{TElement, TNode};
 use style::selector_parser::RestyleDamage;
 use style::servo::restyle_damage::{BUBBLE_ISIZES, REFLOW, REFLOW_OUT_OF_FLOW, REPAINT};
 use style::traversal::{DomTraversal, recalc_style_at, remove_from_bloom_filter};
 use style::traversal::PerLevelTraversalData;
@@ -51,39 +49,38 @@ impl RecalcStyleAndConstructFlows {
     }
 }
 
 #[allow(unsafe_code)]
 impl<N> DomTraversal<N> for RecalcStyleAndConstructFlows
     where N: LayoutNode + TNode,
           N::ConcreteElement: TElement
 {
-    type ThreadLocalContext = ThreadLocalLayoutContext;
+    type ThreadLocalContext = ScopedThreadLocalLayoutContext<N::ConcreteElement>;
 
-    fn process_preorder(&self, node: N, traversal_data: &mut PerLevelTraversalData) {
+    fn process_preorder(&self, traversal_data: &mut PerLevelTraversalData,
+                        thread_local: &mut Self::ThreadLocalContext, node: N) {
         // FIXME(pcwalton): Stop allocating here. Ideally this should just be
         // done by the HTML parser.
         node.initialize_data();
 
         if !node.is_text_node() {
             let el = node.as_element().unwrap();
             let mut data = el.mutate_data().unwrap();
-            let tlc = create_or_get_local_context(&self.shared);
-            let context = StyleContext {
+            let mut context = StyleContext {
                 shared: &self.shared.style_context,
-                thread_local: &tlc.style_context,
+                thread_local: &mut thread_local.style_context,
             };
-            recalc_style_at(self, traversal_data, &context, el, &mut data);
+            recalc_style_at(self, traversal_data, &mut context, el, &mut data);
         }
     }
 
-    fn process_postorder(&self, node: N) {
-        let tlc = create_or_get_local_context(&self.shared);
-        let context = LayoutContext::new(&self.shared, &*tlc);
-        construct_flows_at(&context, self.root, node);
+    fn process_postorder(&self, thread_local: &mut Self::ThreadLocalContext, node: N) {
+        let context = LayoutContext::new(&self.shared);
+        construct_flows_at(&context, thread_local, self.root, node);
     }
 
     fn text_node_needs_traversal(node: N) -> bool {
         // Text nodes never need styling. However, there are two cases they may need
         // flow construction:
         // (1) They child doesn't yet have layout data (preorder traversal initializes it).
         // (2) The parent element has restyle damage (so the text flow also needs fixup).
         node.get_raw_data().is_none() ||
@@ -98,31 +95,33 @@ impl<N> DomTraversal<N> for RecalcStyleA
     unsafe fn clear_element_data(element: &N::ConcreteElement) {
         element.as_node().clear_data();
     }
 
     fn shared_context(&self) -> &SharedStyleContext {
         &self.shared.style_context
     }
 
-    fn create_or_get_thread_local_context(&self) -> Rc<ThreadLocalLayoutContext> {
-        create_or_get_local_context(&self.shared)
+    fn create_thread_local_context(&self) -> Self::ThreadLocalContext {
+        ScopedThreadLocalLayoutContext::new(&self.shared)
     }
 }
 
 /// A bottom-up, parallelizable traversal.
 pub trait PostorderNodeMutTraversal<ConcreteThreadSafeLayoutNode: ThreadSafeLayoutNode> {
     /// The operation to perform. Return true to continue or false to stop.
     fn process(&mut self, node: &ConcreteThreadSafeLayoutNode);
 }
 
 /// The flow construction traversal, which builds flows for styled nodes.
 #[inline]
 #[allow(unsafe_code)]
-fn construct_flows_at<'a, N>(context: &LayoutContext<'a>, root: OpaqueNode, node: N)
+fn construct_flows_at<'a, N>(context: &LayoutContext<'a>,
+                             _thread_local: &ScopedThreadLocalLayoutContext<N::ConcreteElement>,
+                             root: OpaqueNode, node: N)
     where N: LayoutNode,
 {
     debug!("construct_flows_at: {:?}", node);
 
     // Construct flows for this node.
     {
         let tnode = node.to_threadsafe();
 
--- a/servo/components/layout_thread/Cargo.toml
+++ b/servo/components/layout_thread/Cargo.toml
@@ -22,17 +22,17 @@ layout = {path = "../layout"}
 layout_traits = {path = "../layout_traits"}
 lazy_static = "0.2"
 log = "0.3.5"
 msg = {path = "../msg"}
 net_traits = {path = "../net_traits"}
 parking_lot = {version = "0.3.3", features = ["nightly"]}
 plugins = {path = "../plugins"}
 profile_traits = {path = "../profile_traits"}
-rayon = "0.5"
+rayon = "0.6"
 script = {path = "../script"}
 script_layout_interface = {path = "../script_layout_interface"}
 script_traits = {path = "../script_traits"}
 selectors = "0.15"
 serde_derive = "0.8"
 serde_json = "0.8"
 servo_config = {path = "../config"}
 servo_geometry = {path = "../geometry"}
--- a/servo/components/layout_thread/lib.rs
+++ b/servo/components/layout_thread/lib.rs
@@ -58,17 +58,18 @@ use gfx::font;
 use gfx::font_cache_thread::FontCacheThread;
 use gfx::font_context;
 use gfx_traits::{Epoch, FragmentType, ScrollRootId};
 use heapsize::HeapSizeOf;
 use ipc_channel::ipc::{self, IpcReceiver, IpcSender};
 use ipc_channel::router::ROUTER;
 use layout::animation;
 use layout::construct::ConstructionResult;
-use layout::context::{LayoutContext, SharedLayoutContext, ThreadLocalLayoutContext, heap_size_of_local_context};
+use layout::context::{LayoutContext, SharedLayoutContext};
+use layout::context::heap_size_of_persistent_local_context;
 use layout::display_list_builder::ToGfxColor;
 use layout::flow::{self, Flow, ImmutableFlowUtils, MutableFlowUtils, MutableOwnedFlowUtils};
 use layout::flow_ref::FlowRef;
 use layout::incremental::{LayoutDamageComputation, REFLOW_ENTIRE_DOCUMENT};
 use layout::layout_debug;
 use layout::parallel;
 use layout::query::{LayoutRPCImpl, LayoutThreadData, process_content_box_request, process_content_boxes_request};
 use layout::query::{process_margin_style_query, process_node_overflow_request, process_resolved_style_request};
@@ -718,21 +719,21 @@ impl LayoutThread {
 
         let stylist = rw_data.stylist.as_ref();
         reports.push(Report {
             path: path![formatted_url, "layout-thread", "stylist"],
             kind: ReportKind::ExplicitJemallocHeapSize,
             size: stylist.heap_size_of_children(),
         });
 
-        // The LayoutThread has a context in TLS...
+        // The LayoutThread has data in Persistent TLS...
         reports.push(Report {
             path: path![formatted_url, "layout-thread", "local-context"],
             kind: ReportKind::ExplicitJemallocHeapSize,
-            size: heap_size_of_local_context(),
+            size: heap_size_of_persistent_local_context(),
         });
 
         reports_chan.send(reports);
     }
 
     fn create_layout_thread(&self, info: NewLayoutThreadInfo) {
         LayoutThread::create(info.id,
                              FrameId::installed(),
@@ -1442,18 +1443,17 @@ impl LayoutThread {
                     }
                 });
             }
 
             profile(time::ProfilerCategory::LayoutStoreOverflow,
                     self.profiler_metadata(),
                     self.time_profiler_chan.clone(),
                     || {
-                let tlc = ThreadLocalLayoutContext::new(&shared);
-                let context = LayoutContext::new(&shared, &*tlc);
+                let context = LayoutContext::new(&shared);
                 sequential::store_overflow(&context,
                                            FlowRef::deref_mut(&mut root_flow) as &mut Flow);
             });
 
             self.perform_post_main_layout_passes(data,
                                                  query_type,
                                                  document,
                                                  rw_data,
--- a/servo/components/style/Cargo.toml
+++ b/servo/components/style/Cargo.toml
@@ -39,17 +39,17 @@ nsstring_vendor = {path = "gecko_binding
 num-integer = "0.1.32"
 num-traits = "0.1.32"
 ordered-float = "0.2.2"
 owning_ref = "0.2.2"
 parking_lot = "0.3.3"
 phf = "0.7.20"
 quickersort = "2.0.0"
 rand = "0.3"
-rayon = "0.5"
+rayon = "0.6"
 rustc-serialize = "0.3"
 selectors = "0.15"
 serde = {version = "0.8", optional = true}
 serde_derive = {version = "0.8", optional = true}
 servo_atoms = {path = "../atoms", optional = true}
 servo_config = {path = "../config"}
 smallvec = "0.1"
 style_traits = {path = "../style_traits"}
--- a/servo/components/style/context.rs
+++ b/servo/components/style/context.rs
@@ -1,22 +1,21 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 //! The context within which style is calculated.
 
 use animation::Animation;
 use app_units::Au;
-use dom::OpaqueNode;
+use dom::{OpaqueNode, TElement};
 use error_reporting::ParseErrorReporter;
 use euclid::Size2D;
 use matching::StyleSharingCandidateCache;
 use parking_lot::RwLock;
-use std::cell::RefCell;
 use std::collections::HashMap;
 use std::sync::{Arc, Mutex};
 use std::sync::mpsc::Sender;
 use stylist::Stylist;
 use timer::Timer;
 
 /// This structure is used to create a local style context from a shared one.
 pub struct ThreadLocalStyleContextCreationInfo {
@@ -71,35 +70,35 @@ pub struct SharedStyleContext {
     /// The current timer for transitions and animations. This is needed to test
     /// them.
     pub timer: Timer,
 
     /// The QuirksMode state which the document needs to be rendered with
     pub quirks_mode: QuirksMode,
 }
 
-pub struct ThreadLocalStyleContext {
-    pub style_sharing_candidate_cache: RefCell<StyleSharingCandidateCache>,
+pub struct ThreadLocalStyleContext<E: TElement> {
+    pub style_sharing_candidate_cache: StyleSharingCandidateCache<E>,
     /// A channel on which new animations that have been triggered by style
     /// recalculation can be sent.
     pub new_animations_sender: Sender<Animation>,
 }
 
-impl ThreadLocalStyleContext {
-    pub fn new(local_context_creation_data: &ThreadLocalStyleContextCreationInfo) -> Self {
+impl<E: TElement> ThreadLocalStyleContext<E> {
+    pub fn new(shared: &SharedStyleContext) -> Self {
         ThreadLocalStyleContext {
-            style_sharing_candidate_cache: RefCell::new(StyleSharingCandidateCache::new()),
-            new_animations_sender: local_context_creation_data.new_animations_sender.clone(),
+            style_sharing_candidate_cache: StyleSharingCandidateCache::new(),
+            new_animations_sender: shared.local_context_creation_data.lock().unwrap().new_animations_sender.clone(),
         }
     }
 }
 
-pub struct StyleContext<'a> {
+pub struct StyleContext<'a, E: TElement + 'a> {
     pub shared: &'a SharedStyleContext,
-    pub thread_local: &'a ThreadLocalStyleContext,
+    pub thread_local: &'a mut ThreadLocalStyleContext<E>,
 }
 
 /// Why we're doing reflow.
 #[derive(PartialEq, Copy, Clone, Debug)]
 pub enum ReflowGoal {
     /// We're reflowing in order to send a display list to the screen.
     ForDisplay,
     /// We're reflowing in order to satisfy a script query. No display list will be created.
deleted file mode 100644
--- a/servo/components/style/gecko/context.rs
+++ /dev/null
@@ -1,27 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-use context::{SharedStyleContext, ThreadLocalStyleContext};
-use std::cell::RefCell;
-use std::rc::Rc;
-
-thread_local!(static LOCAL_CONTEXT_KEY: RefCell<Option<Rc<ThreadLocalStyleContext>>> = RefCell::new(None));
-
-// Keep this implementation in sync with the one in components/layout/context.rs.
-pub fn create_or_get_local_context(shared: &SharedStyleContext) -> Rc<ThreadLocalStyleContext> {
-    LOCAL_CONTEXT_KEY.with(|r| {
-        let mut r = r.borrow_mut();
-        if let Some(context) = r.clone() {
-            context
-        } else {
-            let context = Rc::new(ThreadLocalStyleContext::new(&shared.local_context_creation_data.lock().unwrap()));
-            *r = Some(context.clone());
-            context
-        }
-    })
-}
-
-pub fn clear_local_context() {
-    LOCAL_CONTEXT_KEY.with(|r| *r.borrow_mut() = None);
-}
--- a/servo/components/style/gecko/mod.rs
+++ b/servo/components/style/gecko/mod.rs
@@ -1,14 +1,13 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 
-pub mod context;
 pub mod data;
 pub mod restyle_damage;
 pub mod snapshot;
 pub mod snapshot_helpers;
 pub mod traversal;
 pub mod wrapper;
 
 pub mod conversions;
--- a/servo/components/style/gecko/traversal.rs
+++ b/servo/components/style/gecko/traversal.rs
@@ -1,49 +1,50 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use atomic_refcell::AtomicRefCell;
 use context::{SharedStyleContext, StyleContext, ThreadLocalStyleContext};
 use data::ElementData;
 use dom::{NodeInfo, TNode};
-use gecko::context::create_or_get_local_context;
 use gecko::wrapper::{GeckoElement, GeckoNode};
-use std::rc::Rc; use traversal::{DomTraversal, PerLevelTraversalData, recalc_style_at};
+use traversal::{DomTraversal, PerLevelTraversalData, recalc_style_at};
 
 pub struct RecalcStyleOnly {
     shared: SharedStyleContext,
 }
 
 impl RecalcStyleOnly {
     pub fn new(shared: SharedStyleContext) -> Self {
         RecalcStyleOnly {
             shared: shared,
         }
     }
 }
 
 impl<'ln> DomTraversal<GeckoNode<'ln>> for RecalcStyleOnly {
-    type ThreadLocalContext = ThreadLocalStyleContext;
+    type ThreadLocalContext = ThreadLocalStyleContext<GeckoElement<'ln>>;
 
-    fn process_preorder(&self, node: GeckoNode<'ln>, traversal_data: &mut PerLevelTraversalData) {
+    fn process_preorder(&self, traversal_data: &mut PerLevelTraversalData,
+                        thread_local: &mut Self::ThreadLocalContext,
+                        node: GeckoNode<'ln>)
+    {
         if node.is_element() {
             let el = node.as_element().unwrap();
             let mut data = unsafe { el.ensure_data() }.borrow_mut();
-            let tlc = self.create_or_get_thread_local_context();
-            let context = StyleContext {
+            let mut context = StyleContext {
                 shared: &self.shared,
-                thread_local: &*tlc,
+                thread_local: thread_local,
             };
-            recalc_style_at(self, traversal_data, &context, el, &mut data);
+            recalc_style_at(self, traversal_data, &mut context, el, &mut data);
         }
     }
 
-    fn process_postorder(&self, _: GeckoNode<'ln>) {
+    fn process_postorder(&self, _: &mut Self::ThreadLocalContext, _: GeckoNode<'ln>) {
         unreachable!();
     }
 
     /// We don't use the post-order traversal for anything.
     fn needs_postorder_traversal() -> bool { false }
 
     unsafe fn ensure_element_data<'a>(element: &'a GeckoElement<'ln>) -> &'a AtomicRefCell<ElementData> {
         element.ensure_data()
@@ -52,12 +53,12 @@ impl<'ln> DomTraversal<GeckoNode<'ln>> f
     unsafe fn clear_element_data<'a>(element: &'a GeckoElement<'ln>) {
         element.clear_data()
     }
 
     fn shared_context(&self) -> &SharedStyleContext {
         &self.shared
     }
 
-    fn create_or_get_thread_local_context(&self) -> Rc<ThreadLocalStyleContext> {
-        create_or_get_local_context(&self.shared)
+    fn create_thread_local_context(&self) -> Self::ThreadLocalContext {
+        ThreadLocalStyleContext::new(&self.shared)
     }
 }
--- a/servo/components/style/lib.rs
+++ b/servo/components/style/lib.rs
@@ -110,16 +110,17 @@ pub mod keyframes;
 pub mod logical_geometry;
 pub mod matching;
 pub mod media_queries;
 pub mod owning_handle;
 pub mod parallel;
 pub mod parser;
 pub mod restyle_hints;
 pub mod rule_tree;
+pub mod scoped_tls;
 pub mod selector_parser;
 pub mod stylist;
 #[cfg(feature = "servo")] #[allow(unsafe_code)] pub mod servo;
 pub mod sequential;
 pub mod sink;
 pub mod str;
 pub mod stylesheets;
 pub mod thread_state;
--- a/servo/components/style/matching.rs
+++ b/servo/components/style/matching.rs
@@ -8,17 +8,17 @@
 
 use {Atom, LocalName};
 use animation::{self, Animation, PropertyAnimation};
 use atomic_refcell::AtomicRefMut;
 use cache::LRUCache;
 use cascade_info::CascadeInfo;
 use context::{SharedStyleContext, StyleContext};
 use data::{ComputedStyle, ElementData, ElementStyles, PseudoStyles};
-use dom::{TElement, TNode, UnsafeNode};
+use dom::{TElement, TNode};
 use properties::{CascadeFlags, ComputedValues, SHAREABLE, SKIP_ROOT_AND_ITEM_BASED_DISPLAY_FIXUP, cascade};
 use properties::longhands::display::computed_value as display;
 use rule_tree::StrongRuleNode;
 use selector_parser::{PseudoElement, RestyleDamage, SelectorImpl};
 use selectors::MatchAttr;
 use selectors::bloom::BloomFilter;
 use selectors::matching::{AFFECTED_BY_PSEUDO_ELEMENTS, MatchingReason, StyleRelations};
 use servo_config::opts;
@@ -60,48 +60,50 @@ pub struct MatchResults {
 impl MatchResults {
     /// Returns true if the primary rule node is shareable with other nodes.
     pub fn primary_is_shareable(&self) -> bool {
         use traversal::relations_are_shareable;
         relations_are_shareable(&self.relations)
     }
 }
 
+// TElement isn't Send because we want to be careful and explicit about our
+// parallel traversal, but we need the candidates to be Send so that we can stick
+// them in ScopedTLS.
+#[derive(Debug, PartialEq)]
+struct SendElement<E: TElement>(pub E);
+unsafe impl<E: TElement> Send for SendElement<E> {}
+
 /// Information regarding a candidate.
 ///
 /// TODO: We can stick a lot more info here.
 #[derive(Debug)]
-struct StyleSharingCandidate {
-    /// The node, guaranteed to be an element.
-    node: UnsafeNode,
+struct StyleSharingCandidate<E: TElement> {
+    /// The element.
+    element: SendElement<E>,
     /// The cached common style affecting attribute info.
     common_style_affecting_attributes: Option<CommonStyleAffectingAttributes>,
     /// the cached class names.
     class_attributes: Option<Vec<Atom>>,
 }
 
-impl PartialEq<StyleSharingCandidate> for StyleSharingCandidate {
+impl<E: TElement> PartialEq<StyleSharingCandidate<E>> for StyleSharingCandidate<E> {
     fn eq(&self, other: &Self) -> bool {
-        self.node == other.node &&
+        self.element == other.element &&
             self.common_style_affecting_attributes == other.common_style_affecting_attributes
     }
 }
 
 /// An LRU cache of the last few nodes seen, so that we can aggressively try to
 /// reuse their styles.
 ///
 /// Note that this cache is flushed every time we steal work from the queue, so
 /// storing nodes here temporarily is safe.
-///
-/// NB: We store UnsafeNode's, but this is not unsafe. It's a shame being
-/// generic over elements is unfeasible (you can make compile style without much
-/// difficulty, but good luck with layout and all the types with assoc.
-/// lifetimes).
-pub struct StyleSharingCandidateCache {
-    cache: LRUCache<StyleSharingCandidate, ()>,
+pub struct StyleSharingCandidateCache<E: TElement> {
+    cache: LRUCache<StyleSharingCandidate<E>, ()>,
 }
 
 #[derive(Clone, Debug)]
 pub enum CacheMiss {
     Parent,
     LocalName,
     Namespace,
     Link,
@@ -112,17 +114,17 @@ pub enum CacheMiss {
     Class,
     CommonStyleAffectingAttributes,
     PresHints,
     SiblingRules,
     NonCommonAttrRules,
 }
 
 fn element_matches_candidate<E: TElement>(element: &E,
-                                          candidate: &mut StyleSharingCandidate,
+                                          candidate: &mut StyleSharingCandidate<E>,
                                           candidate_element: &E,
                                           shared_context: &SharedStyleContext)
                                           -> Result<ComputedStyle, CacheMiss> {
     macro_rules! miss {
         ($miss: ident) => {
             return Err(CacheMiss::$miss);
         }
     }
@@ -188,17 +190,17 @@ fn element_matches_candidate<E: TElement
     let data = candidate_element.borrow_data().unwrap();
     debug_assert!(data.has_current_styles());
     let current_styles = data.styles();
 
     Ok(current_styles.primary.clone())
 }
 
 fn have_same_common_style_affecting_attributes<E: TElement>(element: &E,
-                                                            candidate: &mut StyleSharingCandidate,
+                                                            candidate: &mut StyleSharingCandidate<E>,
                                                             candidate_element: &E) -> bool {
     if candidate.common_style_affecting_attributes.is_none() {
         candidate.common_style_affecting_attributes =
             Some(create_common_style_affecting_attributes_from_element(candidate_element))
     }
     create_common_style_affecting_attributes_from_element(element) ==
         candidate.common_style_affecting_attributes.unwrap()
 }
@@ -267,17 +269,17 @@ pub fn common_style_affecting_attributes
 /// Attributes that, if present, disable style sharing. All legacy HTML attributes must be in
 /// either this list or `common_style_affecting_attributes`. See the comment in
 /// `synthesize_presentational_hints_for_legacy_attributes`.
 pub fn rare_style_affecting_attributes() -> [LocalName; 3] {
     [ local_name!("bgcolor"), local_name!("border"), local_name!("colspan") ]
 }
 
 fn have_same_class<E: TElement>(element: &E,
-                                candidate: &mut StyleSharingCandidate,
+                                candidate: &mut StyleSharingCandidate<E>,
                                 candidate_element: &E) -> bool {
     // XXX Efficiency here, I'm only validating ideas.
     let mut element_class_attributes = vec![];
     element.each_class(|c| element_class_attributes.push(c.clone()));
 
     if candidate.class_attributes.is_none() {
         let mut attrs = vec![];
         candidate_element.each_class(|c| attrs.push(c.clone()));
@@ -299,31 +301,31 @@ fn match_same_not_common_style_affecting
 fn match_same_sibling_affecting_rules<E: TElement>(element: &E,
                                                    candidate: &E,
                                                    ctx: &SharedStyleContext) -> bool {
     ctx.stylist.match_same_sibling_affecting_rules(element, candidate)
 }
 
 static STYLE_SHARING_CANDIDATE_CACHE_SIZE: usize = 8;
 
-impl StyleSharingCandidateCache {
+impl<E: TElement> StyleSharingCandidateCache<E> {
     pub fn new() -> Self {
         StyleSharingCandidateCache {
             cache: LRUCache::new(STYLE_SHARING_CANDIDATE_CACHE_SIZE),
         }
     }
 
-    fn iter_mut(&mut self) -> IterMut<(StyleSharingCandidate, ())> {
+    fn iter_mut(&mut self) -> IterMut<(StyleSharingCandidate<E>, ())> {
         self.cache.iter_mut()
     }
 
-    pub fn insert_if_possible<E: TElement>(&mut self,
-                                           element: &E,
-                                           style: &Arc<ComputedValues>,
-                                           relations: StyleRelations) {
+    pub fn insert_if_possible(&mut self,
+                              element: &E,
+                              style: &Arc<ComputedValues>,
+                              relations: StyleRelations) {
         use traversal::relations_are_shareable;
 
         let parent = match element.parent_element() {
             Some(element) => element,
             None => {
                 debug!("Failing to insert to the cache: no parent element");
                 return;
             }
@@ -343,20 +345,20 @@ impl StyleSharingCandidateCache {
         }
 
         if box_style.animation_name_count() > 0 {
             debug!("Failing to insert to the cache: animations");
             return;
         }
 
         debug!("Inserting into cache: {:?} with parent {:?}",
-               element.as_node().to_unsafe(), parent.as_node().to_unsafe());
+               element, parent);
 
         self.cache.insert(StyleSharingCandidate {
-            node: element.as_node().to_unsafe(),
+            element: SendElement(*element),
             common_style_affecting_attributes: None,
             class_attributes: None,
         }, ());
     }
 
     pub fn touch(&mut self, index: usize) {
         self.cache.touch(index);
     }
@@ -387,17 +389,17 @@ struct CascadeBooleans {
 }
 
 trait PrivateMatchMethods: TElement {
     /// Actually cascades style for a node or a pseudo-element of a node.
     ///
     /// Note that animations only apply to nodes or ::before or ::after
     /// pseudo-elements.
     fn cascade_node_pseudo_element<'a>(&self,
-                                       context: &StyleContext,
+                                       context: &StyleContext<Self>,
                                        parent_style: Option<&Arc<ComputedValues>>,
                                        old_style: Option<&Arc<ComputedValues>>,
                                        rule_node: &StrongRuleNode,
                                        possibly_expired_animations: &[PropertyAnimation],
                                        booleans: CascadeBooleans)
                                        -> Arc<ComputedValues> {
         let shared_context = context.shared;
         let mut cascade_info = CascadeInfo::new();
@@ -492,40 +494,37 @@ trait PrivateMatchMethods: TElement {
                     }
                 }
             }
         }
     }
 
     fn share_style_with_candidate_if_possible(&self,
                                               shared_context: &SharedStyleContext,
-                                              candidate: &mut StyleSharingCandidate)
+                                              candidate: &mut StyleSharingCandidate<Self>)
                                               -> Result<ComputedStyle, CacheMiss> {
-        let candidate_element = unsafe {
-            Self::ConcreteNode::from_unsafe(&candidate.node).as_element().unwrap()
-        };
-
+        let candidate_element = candidate.element.0;
         element_matches_candidate(self, candidate, &candidate_element,
                                   shared_context)
     }
 }
 
-fn compute_rule_node(context: &StyleContext,
-                     applicable_declarations: &mut Vec<ApplicableDeclarationBlock>)
-                     -> StrongRuleNode
+fn compute_rule_node<E: TElement>(context: &StyleContext<E>,
+                                  applicable_declarations: &mut Vec<ApplicableDeclarationBlock>)
+                                  -> StrongRuleNode
 {
     let rules = applicable_declarations.drain(..).map(|d| (d.source, d.importance));
     let rule_node = context.shared.stylist.rule_tree.insert_ordered_rules(rules);
     rule_node
 }
 
 impl<E: TElement> PrivateMatchMethods for E {}
 
 pub trait MatchMethods : TElement {
-    fn match_element(&self, context: &StyleContext, parent_bf: Option<&BloomFilter>)
+    fn match_element(&self, context: &StyleContext<Self>, parent_bf: Option<&BloomFilter>)
                      -> MatchResults
     {
         let mut applicable_declarations: Vec<ApplicableDeclarationBlock> = Vec::with_capacity(16);
         let stylist = &context.shared.stylist;
         let style_attribute = self.style_attribute();
 
         // Compute the primary rule node.
         let mut primary_relations =
@@ -564,17 +563,17 @@ pub trait MatchMethods : TElement {
         }
     }
 
     /// Attempts to share a style with another node. This method is unsafe because it depends on
     /// the `style_sharing_candidate_cache` having only live nodes in it, and we have no way to
     /// guarantee that at the type system level yet.
     unsafe fn share_style_if_possible(&self,
                                       style_sharing_candidate_cache:
-                                        &mut StyleSharingCandidateCache,
+                                        &mut StyleSharingCandidateCache<Self>,
                                       shared_context: &SharedStyleContext,
                                       data: &mut AtomicRefMut<ElementData>)
                                       -> StyleSharingResult {
         if opts::get().disable_share_style_cache {
             return StyleSharingResult::CannotShare
         }
 
         if self.style_attribute().is_some() {
@@ -713,17 +712,17 @@ pub trait MatchMethods : TElement {
                 debug_assert!(pseudo.is_none() ||
                               new_style.get_box().clone_display() != display::T::none);
                 RestyleDamage::rebuild_and_reflow()
             }
         }
     }
 
     unsafe fn cascade_node(&self,
-                           context: &StyleContext,
+                           context: &StyleContext<Self>,
                            mut data: &mut AtomicRefMut<ElementData>,
                            parent: Option<Self>,
                            primary_rule_node: StrongRuleNode,
                            pseudo_rule_nodes: PseudoRuleNodes,
                            primary_is_shareable: bool)
     {
         // Get our parent's style.
         let parent_data = parent.as_ref().map(|x| x.borrow_data().unwrap());
@@ -790,17 +789,17 @@ pub trait MatchMethods : TElement {
     }
 
     fn compute_damage_and_cascade_pseudos(
             &self,
             old_primary: Option<&Arc<ComputedValues>>,
             mut old_pseudos: Option<&mut PseudoStyles>,
             new_primary: &Arc<ComputedValues>,
             new_pseudos: &mut PseudoStyles,
-            context: &StyleContext,
+            context: &StyleContext<Self>,
             mut pseudo_rule_nodes: PseudoRuleNodes,
             possibly_expired_animations: &mut Vec<PropertyAnimation>)
             -> RestyleDamage
     {
         // Here we optimise the case of the style changing but both the
         // previous and the new styles having display: none. In this
         // case, we can always optimize the traversal, regardless of the
         // restyle hint.
--- a/servo/components/style/parallel.rs
+++ b/servo/components/style/parallel.rs
@@ -3,18 +3,18 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 //! Implements parallel traversal over the DOM tree.
 //!
 //! This code is highly unsafe. Keep this file small and easy to audit.
 
 use dom::{OpaqueNode, TElement, TNode, UnsafeNode};
 use rayon;
+use scoped_tls::ScopedTLS;
 use servo_config::opts;
-use std::borrow::Borrow;
 use std::sync::atomic::Ordering;
 use traversal::{DomTraversal, PerLevelTraversalData, PreTraverseToken};
 use traversal::{STYLE_SHARING_CACHE_HITS, STYLE_SHARING_CACHE_MISSES};
 
 pub const CHUNK_SIZE: usize = 64;
 
 pub fn traverse_dom<N, D>(traversal: &D,
                           root: N::ConcreteElement,
@@ -40,24 +40,25 @@ pub fn traverse_dom<N, D>(traversal: &D,
                 children.push(kid.to_unsafe());
             }
         }
         (children, known_root_dom_depth.map(|x| x + 1))
     } else {
         (vec![root.as_node().to_unsafe()], known_root_dom_depth)
     };
 
-    let data = PerLevelTraversalData {
+    let traversal_data = PerLevelTraversalData {
         current_dom_depth: depth,
     };
+    let tls = ScopedTLS::<D::ThreadLocalContext>::new(queue);
+    let root = root.as_node().opaque();
 
-    let root = root.as_node().opaque();
     queue.install(|| {
         rayon::scope(|scope| {
-            traverse_nodes(nodes, root, data, scope, traversal);
+            traverse_nodes(nodes, root, traversal_data, scope, traversal, &tls);
         });
     });
 
     if opts::get().style_sharing_stats {
         let hits = STYLE_SHARING_CACHE_HITS.load(Ordering::SeqCst);
         let misses = STYLE_SHARING_CACHE_MISSES.load(Ordering::SeqCst);
 
         println!("Style sharing stats:");
@@ -66,116 +67,120 @@ pub fn traverse_dom<N, D>(traversal: &D,
     }
 }
 
 /// A parallel top-down DOM traversal.
 #[inline(always)]
 #[allow(unsafe_code)]
 fn top_down_dom<'a, 'scope, N, D>(unsafe_nodes: &'a [UnsafeNode],
                                   root: OpaqueNode,
-                                  mut data: PerLevelTraversalData,
+                                  mut traversal_data: PerLevelTraversalData,
                                   scope: &'a rayon::Scope<'scope>,
-                                  traversal: &'scope D)
+                                  traversal: &'scope D,
+                                  tls: &'scope ScopedTLS<'scope, D::ThreadLocalContext>)
     where N: TNode,
           D: DomTraversal<N>,
 {
     let mut discovered_child_nodes = vec![];
-    for unsafe_node in unsafe_nodes {
-        // Get a real layout node.
-        let node = unsafe { N::from_unsafe(&unsafe_node) };
+    {
+        // Scope the borrow of the TLS so that the borrow is dropped before
+        // potentially traversing a child on this thread.
+        let mut tlc = tls.ensure(|| traversal.create_thread_local_context());
+
+        for unsafe_node in unsafe_nodes {
+            // Get a real layout node.
+            let node = unsafe { N::from_unsafe(&unsafe_node) };
 
-        // Perform the appropriate traversal.
-        let mut children_to_process = 0isize;
-        traversal.process_preorder(node, &mut data);
-        if let Some(el) = node.as_element() {
-            D::traverse_children(el, |kid| {
-                children_to_process += 1;
-                discovered_child_nodes.push(kid.to_unsafe())
-            });
-        }
+            // Perform the appropriate traversal.
+            let mut children_to_process = 0isize;
+            traversal.process_preorder(&mut traversal_data, &mut *tlc, node);
+            if let Some(el) = node.as_element() {
+                D::traverse_children(el, |kid| {
+                    children_to_process += 1;
+                    discovered_child_nodes.push(kid.to_unsafe())
+                });
+            }
 
-        // Reset the count of children if we need to do a bottom-up traversal
-        // after the top up.
-        if D::needs_postorder_traversal() {
-            if children_to_process == 0 {
-                // If there were no more children, start walking back up.
-                bottom_up_dom(root, *unsafe_node, traversal)
-            } else {
-                // Otherwise record the number of children to process when the
-                // time comes.
-                node.as_element().unwrap().store_children_to_process(children_to_process);
+            // Reset the count of children if we need to do a bottom-up traversal
+            // after the top up.
+            if D::needs_postorder_traversal() {
+                if children_to_process == 0 {
+                    // If there were no more children, start walking back up.
+                    bottom_up_dom(traversal, &mut *tlc, root, *unsafe_node)
+                } else {
+                    // Otherwise record the number of children to process when the
+                    // time comes.
+                    node.as_element().unwrap().store_children_to_process(children_to_process);
+                }
             }
         }
     }
 
-    // NB: In parallel traversal mode we have to purge the LRU cache in order to
-    // be able to access it without races.
-    let tlc = traversal.create_or_get_thread_local_context();
-    (*tlc).borrow().style_sharing_candidate_cache.borrow_mut().clear();
-
-    if let Some(ref mut depth) = data.current_dom_depth {
+    if let Some(ref mut depth) = traversal_data.current_dom_depth {
         *depth += 1;
     }
 
-    traverse_nodes(discovered_child_nodes, root, data, scope, traversal);
+    traverse_nodes(discovered_child_nodes, root, traversal_data, scope, traversal, tls);
 }
 
 fn traverse_nodes<'a, 'scope, N, D>(nodes: Vec<UnsafeNode>, root: OpaqueNode,
-                                    data: PerLevelTraversalData,
+                                    traversal_data: PerLevelTraversalData,
                                     scope: &'a rayon::Scope<'scope>,
-                                    traversal: &'scope D)
+                                    traversal: &'scope D,
+                                    tls: &'scope ScopedTLS<'scope, D::ThreadLocalContext>)
     where N: TNode,
           D: DomTraversal<N>,
 {
     if nodes.is_empty() {
         return;
     }
 
     // Optimization: traverse directly and avoid a heap-allocating spawn() call if
     // we're only pushing one work unit.
     if nodes.len() <= CHUNK_SIZE {
         let nodes = nodes.into_boxed_slice();
-        top_down_dom(&nodes, root, data, scope, traversal);
+        top_down_dom(&nodes, root, traversal_data, scope, traversal, tls);
         return;
     }
 
     // General case.
     for chunk in nodes.chunks(CHUNK_SIZE) {
         let nodes = chunk.iter().cloned().collect::<Vec<_>>().into_boxed_slice();
-        let data = data.clone();
+        let traversal_data = traversal_data.clone();
         scope.spawn(move |scope| {
             let nodes = nodes;
-            top_down_dom(&nodes, root, data, scope, traversal)
+            top_down_dom(&nodes, root, traversal_data, scope, traversal, tls)
         })
     }
 }
 
 /// Process current node and potentially traverse its ancestors.
 ///
 /// If we are the last child that finished processing, recursively process
 /// our parent. Else, stop. Also, stop at the root.
 ///
 /// Thus, if we start with all the leaves of a tree, we end up traversing
 /// the whole tree bottom-up because each parent will be processed exactly
 /// once (by the last child that finishes processing).
 ///
 /// The only communication between siblings is that they both
 /// fetch-and-subtract the parent's children count.
 #[allow(unsafe_code)]
-fn bottom_up_dom<N, D>(root: OpaqueNode,
-                       unsafe_node: UnsafeNode,
-                       traversal: &D)
+fn bottom_up_dom<N, D>(traversal: &D,
+                       thread_local: &mut D::ThreadLocalContext,
+                       root: OpaqueNode,
+                       unsafe_node: UnsafeNode)
     where N: TNode,
           D: DomTraversal<N>
 {
     // Get a real layout node.
     let mut node = unsafe { N::from_unsafe(&unsafe_node) };
     loop {
         // Perform the appropriate operation.
-        traversal.process_postorder(node);
+        traversal.process_postorder(thread_local, node);
 
         if node.opaque() == root {
             break;
         }
 
         let parent = match node.parent_element() {
             None => unreachable!("How can this happen after the break above?"),
             Some(parent) => parent,
new file mode 100644
--- /dev/null
+++ b/servo/components/style/scoped_tls.rs
@@ -0,0 +1,51 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#![allow(unsafe_code)]
+
+use rayon;
+use std::cell::{Ref, RefCell, RefMut};
+
+/// Stack-scoped thread-local storage for rayon thread pools.
+
+pub struct ScopedTLS<'a, T: Send> {
+    pool: &'a rayon::ThreadPool,
+    slots: Box<[RefCell<Option<T>>]>,
+}
+
+unsafe impl<'a, T: Send> Sync for ScopedTLS<'a, T> {}
+
+impl<'a, T: Send> ScopedTLS<'a, T> {
+    pub fn new(p: &'a rayon::ThreadPool) -> Self {
+        let count = p.num_threads();
+        let mut v = Vec::with_capacity(count);
+        for _ in 0..count {
+            v.push(RefCell::new(None));
+        }
+
+        ScopedTLS {
+            pool: p,
+            slots: v.into_boxed_slice(),
+        }
+    }
+
+    pub fn borrow(&self) -> Ref<Option<T>> {
+        let idx = self.pool.current_thread_index().unwrap();
+        self.slots[idx].borrow()
+    }
+
+    pub fn borrow_mut(&self) -> RefMut<Option<T>> {
+        let idx = self.pool.current_thread_index().unwrap();
+        self.slots[idx].borrow_mut()
+    }
+
+    pub fn ensure<F: FnOnce() -> T>(&self, f: F) -> RefMut<T> {
+        let mut opt = self.borrow_mut();
+        if opt.is_none() {
+            *opt = Some(f());
+        }
+
+        RefMut::map(opt, |x| x.as_mut().unwrap())
+    }
+}
--- a/servo/components/style/sequential.rs
+++ b/servo/components/style/sequential.rs
@@ -1,58 +1,55 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 //! Implements sequential traversal over the DOM tree.
 
 use dom::{TElement, TNode};
-use std::borrow::Borrow;
 use traversal::{DomTraversal, PerLevelTraversalData, PreTraverseToken};
 
 pub fn traverse_dom<N, D>(traversal: &D,
                           root: N::ConcreteElement,
                           token: PreTraverseToken)
     where N: TNode,
           D: DomTraversal<N>
 {
     debug_assert!(token.should_traverse());
 
-    fn doit<N, D>(traversal: &D, node: N, data: &mut PerLevelTraversalData)
+    fn doit<N, D>(traversal: &D, traversal_data: &mut PerLevelTraversalData,
+                  thread_local: &mut D::ThreadLocalContext, node: N)
         where N: TNode,
               D: DomTraversal<N>
     {
-        traversal.process_preorder(node, data);
+        traversal.process_preorder(traversal_data, thread_local, node);
         if let Some(el) = node.as_element() {
-            if let Some(ref mut depth) = data.current_dom_depth {
+            if let Some(ref mut depth) = traversal_data.current_dom_depth {
                 *depth += 1;
             }
 
-            D::traverse_children(el, |kid| doit(traversal, kid, data));
+            D::traverse_children(el, |kid| doit(traversal, traversal_data, thread_local, kid));
 
-            if let Some(ref mut depth) = data.current_dom_depth {
+            if let Some(ref mut depth) = traversal_data.current_dom_depth {
                 *depth -= 1;
             }
         }
 
         if D::needs_postorder_traversal() {
-            traversal.process_postorder(node);
+            traversal.process_postorder(thread_local, node);
         }
     }
 
-    let mut data = PerLevelTraversalData {
+    let mut traversal_data = PerLevelTraversalData {
         current_dom_depth: None,
     };
 
+    let mut tlc = traversal.create_thread_local_context();
     if token.traverse_unstyled_children_only() {
         for kid in root.as_node().children() {
             if kid.as_element().map_or(false, |el| el.get_data().is_none()) {
-                doit(traversal, kid, &mut data);
+                doit(traversal, &mut traversal_data, &mut tlc, kid);
             }
         }
     } else {
-        doit(traversal, root.as_node(), &mut data);
+        doit(traversal, &mut traversal_data, &mut tlc, root.as_node());
     }
-
-    // Clear the local LRU cache since we store stateful elements inside.
-    let tlc = traversal.create_or_get_thread_local_context();
-    (*tlc).borrow().style_sharing_candidate_cache.borrow_mut().clear();
 }
--- a/servo/components/style/traversal.rs
+++ b/servo/components/style/traversal.rs
@@ -1,29 +1,27 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 //! Traversing the DOM tree; the bloom filter.
 
 use atomic_refcell::{AtomicRefCell, AtomicRefMut};
 use bloom::StyleBloom;
-use context::{SharedStyleContext, StyleContext, ThreadLocalStyleContext};
+use context::{SharedStyleContext, StyleContext};
 use data::{ElementData, StoredRestyleHint};
 use dom::{OpaqueNode, TElement, TNode};
 use matching::{MatchMethods, StyleSharingResult};
 use restyle_hints::{RESTYLE_DESCENDANTS, RESTYLE_SELF};
 use selector_parser::RestyleDamage;
 use selectors::Element;
 use selectors::matching::StyleRelations;
 use servo_config::opts;
-use std::borrow::Borrow;
 use std::cell::RefCell;
 use std::mem;
-use std::rc::Rc;
 use std::sync::atomic::{AtomicUsize, ATOMIC_USIZE_INIT, Ordering};
 use stylist::Stylist;
 
 /// Every time we do another layout, the old bloom filters are invalid. This is
 /// detected by ticking a generation number every layout.
 pub type Generation = u32;
 
 /// Style sharing candidate cache stats. These are only used when
@@ -123,25 +121,26 @@ pub enum LogBehavior {
     DontLog,
 }
 use self::LogBehavior::*;
 impl LogBehavior {
     fn allow(&self) -> bool { match *self { MayLog => true, DontLog => false, } }
 }
 
 pub trait DomTraversal<N: TNode> : Sync {
-    type ThreadLocalContext: Borrow<ThreadLocalStyleContext>;
+    type ThreadLocalContext: Send;
 
     /// Process `node` on the way down, before its children have been processed.
-    fn process_preorder(&self, node: N, data: &mut PerLevelTraversalData);
+    fn process_preorder(&self, data: &mut PerLevelTraversalData,
+                        thread_local: &mut Self::ThreadLocalContext, node: N);
 
     /// Process `node` on the way up, after its children have been processed.
     ///
     /// This is only executed if `needs_postorder_traversal` returns true.
-    fn process_postorder(&self, node: N);
+    fn process_postorder(&self, thread_local: &mut Self::ThreadLocalContext, node: N);
 
     /// Boolean that specifies whether a bottom up traversal should be
     /// performed.
     ///
     /// If it's false, then process_postorder has no effect at all.
     fn needs_postorder_traversal() -> bool { true }
 
     /// Must be invoked before traversing the root element to determine whether
@@ -316,33 +315,33 @@ pub trait DomTraversal<N: TNode> : Sync 
     /// Clears the ElementData attached to this element, if any.
     ///
     /// This is only safe to call in top-down traversal before processing the
     /// children of |element|.
     unsafe fn clear_element_data(element: &N::ConcreteElement);
 
     fn shared_context(&self) -> &SharedStyleContext;
 
-    fn create_or_get_thread_local_context(&self) -> Rc<Self::ThreadLocalContext>;
+    fn create_thread_local_context(&self) -> Self::ThreadLocalContext;
 }
 
 /// Determines the amount of relations where we're going to share style.
 #[inline]
 pub fn relations_are_shareable(relations: &StyleRelations) -> bool {
     use selectors::matching::*;
     !relations.intersects(AFFECTED_BY_ID_SELECTOR |
                           AFFECTED_BY_PSEUDO_ELEMENTS | AFFECTED_BY_STATE |
                           AFFECTED_BY_NON_COMMON_STYLE_AFFECTING_ATTRIBUTE_SELECTOR |
                           AFFECTED_BY_STYLE_ATTRIBUTE |
                           AFFECTED_BY_PRESENTATIONAL_HINTS)
 }
 
 /// Handles lazy resolution of style in display:none subtrees. See the comment
 /// at the callsite in query.rs.
-pub fn style_element_in_display_none_subtree<E, F>(context: &StyleContext,
+pub fn style_element_in_display_none_subtree<E, F>(context: &StyleContext<E>,
                                                    element: E, init_data: &F) -> E
     where E: TElement,
           F: Fn(E),
 {
     // Check the base case.
     if element.get_data().is_some() {
         // See the comment on `cascade_node` for why we allow this on Gecko.
         debug_assert!(cfg!(feature = "gecko") || element.borrow_data().unwrap().has_current_styles());
@@ -371,17 +370,17 @@ pub fn style_element_in_display_none_sub
     display_none_root
 }
 
 /// Calculates the style for a single node.
 #[inline]
 #[allow(unsafe_code)]
 pub fn recalc_style_at<E, D>(traversal: &D,
                              traversal_data: &mut PerLevelTraversalData,
-                             context: &StyleContext,
+                             context: &mut StyleContext<E>,
                              element: E,
                              mut data: &mut AtomicRefMut<ElementData>)
     where E: TElement,
           D: DomTraversal<E::ConcreteNode>
 {
     debug_assert!(data.as_restyle().map_or(true, |r| r.snapshot.is_none()),
                   "Snapshots should be expanded by the caller");
 
@@ -418,17 +417,17 @@ pub fn recalc_style_at<E, D>(traversal: 
 
 // Computes style, returning true if the inherited styles changed for this
 // element.
 //
 // FIXME(bholley): This should differentiate between matching and cascading,
 // since we have separate bits for each now.
 fn compute_style<E, D>(_traversal: &D,
                        traversal_data: &mut PerLevelTraversalData,
-                       context: &StyleContext,
+                       context: &mut StyleContext<E>,
                        element: E,
                        mut data: &mut AtomicRefMut<ElementData>) -> bool
     where E: TElement,
           D: DomTraversal<E::ConcreteNode>,
 {
     let shared_context = context.shared;
     let mut bf = take_thread_local_bloom_filter(shared_context);
     // Ensure the bloom filter is up to date.
@@ -440,23 +439,20 @@ fn compute_style<E, D>(_traversal: &D,
     //
     // Note that this is always the same than the pre-existing depth, but it can
     // change from unknown to known at this step.
     traversal_data.current_dom_depth = Some(dom_depth);
 
     bf.assert_complete(element);
 
     // Check to see whether we can share a style with someone.
-    let mut style_sharing_candidate_cache =
-        context.thread_local.style_sharing_candidate_cache.borrow_mut();
-
     let sharing_result = if element.parent_element().is_none() {
         StyleSharingResult::CannotShare
     } else {
-        unsafe { element.share_style_if_possible(&mut style_sharing_candidate_cache,
+        unsafe { element.share_style_if_possible(&mut context.thread_local.style_sharing_candidate_cache,
                                                  shared_context, &mut data) }
     };
 
     // Otherwise, match and cascade selectors.
     match sharing_result {
         StyleSharingResult::CannotShare => {
             let match_results;
             let shareable_element = {
@@ -481,26 +477,26 @@ fn compute_style<E, D>(_traversal: &D,
                                      element.parent_element(),
                                      match_results.primary,
                                      match_results.per_pseudo,
                                      shareable);
             }
 
             // Add ourselves to the LRU cache.
             if let Some(element) = shareable_element {
-                style_sharing_candidate_cache.insert_if_possible(&element,
-                                                                 &data.styles().primary.values,
-                                                                 relations);
+                context.thread_local
+                       .style_sharing_candidate_cache
+                       .insert_if_possible(&element, &data.styles().primary.values, relations);
             }
         }
         StyleSharingResult::StyleWasShared(index) => {
             if opts::get().style_sharing_stats {
                 STYLE_SHARING_CACHE_HITS.fetch_add(1, Ordering::Relaxed);
             }
-            style_sharing_candidate_cache.touch(index);
+            context.thread_local.style_sharing_candidate_cache.touch(index);
         }
     }
 
     // If we're restyling this element to display:none, throw away all style data
     // in the subtree, notify the caller to early-return.
     let display_none = data.styles().is_display_none();
     if display_none {
         debug!("New element style is display:none - clearing data from descendants.");
--- a/servo/ports/geckolib/glue.rs
+++ b/servo/ports/geckolib/glue.rs
@@ -12,21 +12,21 @@ use selectors::Element;
 use servo_url::ServoUrl;
 use std::borrow::Cow;
 use std::fmt::Write;
 use std::mem::transmute;
 use std::ptr;
 use std::sync::{Arc, Mutex};
 use style::arc_ptr_eq;
 use style::atomic_refcell::AtomicRefMut;
-use style::context::{ThreadLocalStyleContextCreationInfo, QuirksMode, ReflowGoal, SharedStyleContext, StyleContext};
+use style::context::{QuirksMode, ReflowGoal, SharedStyleContext, StyleContext};
+use style::context::{ThreadLocalStyleContext, ThreadLocalStyleContextCreationInfo};
 use style::data::{ElementData, RestyleData};
 use style::dom::{ShowSubtreeData, TElement, TNode};
 use style::error_reporting::StdoutErrorReporter;
-use style::gecko::context::clear_local_context;
 use style::gecko::data::{NUM_THREADS, PerDocumentStyleData, PerDocumentStyleDataImpl};
 use style::gecko::restyle_damage::GeckoRestyleDamage;
 use style::gecko::selector_parser::{SelectorImpl, PseudoElement};
 use style::gecko::traversal::RecalcStyleOnly;
 use style::gecko::wrapper::DUMMY_BASE_URL;
 use style::gecko::wrapper::GeckoElement;
 use style::gecko_bindings::bindings::{RawServoDeclarationBlockBorrowed, RawServoDeclarationBlockStrong};
 use style::gecko_bindings::bindings::{RawServoStyleRuleBorrowed, RawServoStyleRuleStrong};
@@ -83,22 +83,16 @@ pub extern "C" fn Servo_Initialize() -> 
     // Pretend that we're a Servo Layout thread, to make some assertions happy.
     thread_state::initialize(thread_state::LAYOUT);
 }
 
 #[no_mangle]
 pub extern "C" fn Servo_Shutdown() -> () {
     // Destroy our default computed values.
     unsafe { ComputedValues::shutdown(); }
-
-    // In general, ThreadLocalStyleContexts will get destroyed when the worker thread
-    // is joined and the TLS is dropped. However, under some configurations we
-    // may do sequential style computation on the main thread, so we need to be
-    // sure to clear the main thread TLS entry as well.
-    clear_local_context();
 }
 
 fn create_shared_context(mut per_doc_data: &mut AtomicRefMut<PerDocumentStyleDataImpl>) -> SharedStyleContext {
     // The stylist consumes stylesheets lazily.
     per_doc_data.flush_stylesheets();
 
     let local_context_data =
         ThreadLocalStyleContextCreationInfo::new(per_doc_data.new_animations_sender.clone());
@@ -870,31 +864,28 @@ pub extern "C" fn Servo_ResolveStyle(ele
                     error!("Attempting manual style computation with unstyled parent");
                     return Arc::new(ComputedValues::initial_values().clone()).into_strong();
                 }
             }
 
             let mut per_doc_data = PerDocumentStyleData::from_ffi(raw_data).borrow_mut();
             let shared_style_context = create_shared_context(&mut per_doc_data);
             let traversal = RecalcStyleOnly::new(shared_style_context);
-            let tlc = traversal.create_or_get_thread_local_context();
 
             let mut traversal_data = PerLevelTraversalData {
                 current_dom_depth: None,
             };
 
-            let context = StyleContext {
+            let mut tlc = ThreadLocalStyleContext::new(traversal.shared_context());
+            let mut context = StyleContext {
                 shared: traversal.shared_context(),
-                thread_local: &*tlc,
+                thread_local: &mut tlc,
             };
 
-            recalc_style_at(&traversal, &mut traversal_data, &context, element, &mut data);
-
-            // We don't want to keep any cached style around after this one-off style resolution.
-            tlc.style_sharing_candidate_cache.borrow_mut().clear();
+            recalc_style_at(&traversal, &mut traversal_data, &mut context, element, &mut data);
 
             // The element was either unstyled or needed restyle. If it was unstyled, it may have
             // additional unstyled children that subsequent traversals won't find now that the style
             // on this element is up-to-date. Mark dirty descendants in that case.
             if element.first_child_element().is_some() {
                 unsafe { element.set_dirty_descendants() };
             }
         }
--- a/servo/servo-tidy.toml
+++ b/servo/servo-tidy.toml
@@ -6,17 +6,17 @@ check-ordered-json-keys = [
   "./resources/package-prefs.json",
 ]
 lint-scripts = [
   "./python/servo/lints/wpt_lint.py",
 ]
 
 [ignore]
 # Ignored packages with duplicated versions
-packages = ["bitflags", "lazy_static", "semver"]
+packages = ["bitflags", "lazy_static", "semver", "rayon"]
 # Files that are ignored for all tidy and lint checks.
 files = [
   # Generated and upstream code combined with our own. Could use cleanup
   "./components/style/gecko_bindings/bindings.rs",
   "./components/style/gecko_bindings/structs_debug.rs",
   "./components/style/gecko_bindings/structs_release.rs",
   "./components/style/gecko_string_cache/atom_macro.rs",
   "./resources/hsts_preload.json",
--- a/servo/tests/unit/style/Cargo.toml
+++ b/servo/tests/unit/style/Cargo.toml
@@ -15,16 +15,16 @@ testing = ["style/testing"]
 [dependencies]
 app_units = "0.3"
 cssparser = {version = "0.7", features = ["heap_size"]}
 euclid = "0.10.1"
 html5ever-atoms = "0.1"
 matches = "0.1"
 owning_ref = "0.2.2"
 parking_lot = "0.3"
-rayon = "0.5"
+rayon = "0.6"
 rustc-serialize = "0.3"
 selectors = "0.15"
 servo_atoms = {path = "../../../components/atoms"}
 servo_config = {path = "../../../components/config"}
 style = {path = "../../../components/style"}
 style_traits = {path = "../../../components/style_traits"}
 servo_url = {path = "../../../components/url"}