servo: Merge #13641 - Use rayon to drive parallel layout and styling (from emilio:rayon-style); r=pcwalton
authorEmilio Cobos Álvarez <ecoal95@gmail.com>
Mon, 14 Nov 2016 14:47:21 -0600
changeset 369181 23de2cbe2ec91dc8ec1a702e884c7d5a242c97c7
parent 369180 5f5394adc8ac6592687f31d4456396b1db6592f4
child 369182 529c507772a09d64315487998c156421beb67333
push id10863
push userjlorenzo@mozilla.com
push dateMon, 06 Mar 2017 23:02:23 +0000
treeherdermozilla-aurora@0931190cd725 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerspcwalton
servo: Merge #13641 - Use rayon to drive parallel layout and styling (from emilio:rayon-style); r=pcwalton <!-- Please describe your changes on the following line: --> The current work queue had a really annoying constraint: The size of the node had to be the size of the work unit data. This makes it impractical for the new restyling model where we plan to pass down a bunch of data. Rayon by default makes you wait for the result of the work unit, which makes it impractical for the current model (it's mostly sequential). I added an API to rayon that allows us to push work to the queue without waiting (https://github.com/nikomatsakis/rayon/pull/103). This still needs some work (for example, we're loosing the memory reporting functionality), but I wanted feedback on this. --- <!-- Thank you for contributing to Servo! Please replace each `[ ]` by `[X]` when the step is complete, and replace `__` with appropriate data: --> - [ ] `./mach build -d` does not report any errors - [ ] `./mach test-tidy` does not report any errors - [ ] These changes fix #__ (github issue number if applicable). <!-- Either: --> - [ ] There are tests for these changes OR - [ ] These changes do not require tests because _____ <!-- Pull requests that do not address these steps are welcome, but they will require additional verification as part of the review process. --> Source-Repo: https://github.com/servo/servo Source-Revision: 9467fbe26d25dfc633d5ab8973fe459d14eff9dc
servo/components/layout/Cargo.toml
servo/components/layout/lib.rs
servo/components/layout/parallel.rs
servo/components/layout_thread/Cargo.toml
servo/components/layout_thread/lib.rs
servo/components/servo/Cargo.lock
servo/components/style/Cargo.toml
servo/components/style/gecko/data.rs
servo/components/style/gecko/wrapper.rs
servo/components/style/lib.rs
servo/components/style/parallel.rs
servo/components/style/thread_state.rs
servo/components/style/workqueue.rs
servo/ports/cef/Cargo.lock
servo/ports/geckolib/Cargo.lock
--- a/servo/components/layout/Cargo.toml
+++ b/servo/components/layout/Cargo.toml
@@ -26,16 +26,17 @@ libc = "0.2"
 log = "0.3.5"
 msg = {path = "../msg"}
 net_traits = {path = "../net_traits"}
 ordered-float = "0.2.2"
 parking_lot = "0.3.3"
 plugins = {path = "../plugins"}
 profile_traits = {path = "../profile_traits"}
 range = {path = "../range"}
+rayon = "0.5"
 script_layout_interface = {path = "../script_layout_interface"}
 script_traits = {path = "../script_traits"}
 selectors = "0.14"
 serde = "0.8"
 serde_derive = "0.8"
 serde_json = "0.8"
 servo_atoms = {path = "../atoms"}
 smallvec = "0.1"
--- a/servo/components/layout/lib.rs
+++ b/servo/components/layout/lib.rs
@@ -39,16 +39,17 @@ extern crate ordered_float;
 extern crate parking_lot;
 #[macro_use]
 #[no_link]
 extern crate plugins as servo_plugins;
 #[macro_use]
 extern crate profile_traits;
 #[macro_use]
 extern crate range;
+extern crate rayon;
 extern crate script_layout_interface;
 extern crate script_traits;
 extern crate serde;
 #[macro_use]
 extern crate serde_derive;
 extern crate serde_json;
 #[macro_use] extern crate servo_atoms;
 extern crate smallvec;
--- a/servo/components/layout/parallel.rs
+++ b/servo/components/layout/parallel.rs
@@ -7,22 +7,21 @@
 //! This code is highly unsafe. Keep this file small and easy to audit.
 
 #![allow(unsafe_code)]
 
 use context::{LayoutContext, SharedLayoutContext};
 use flow::{self, Flow, MutableFlowUtils, PostorderFlowTraversal, PreorderFlowTraversal};
 use flow_ref::FlowRef;
 use profile_traits::time::{self, TimerMetadata, profile};
+use rayon;
 use std::mem;
 use std::sync::atomic::{AtomicIsize, Ordering};
 use style::dom::UnsafeNode;
-use style::parallel::{CHUNK_SIZE, WorkQueueData};
-use style::parallel::run_queue_with_custom_work_data_type;
-use style::workqueue::{WorkQueue, WorkUnit, WorkerProxy};
+use style::parallel::CHUNK_SIZE;
 use traversal::{AssignISizes, BubbleISizes};
 use traversal::AssignBSizes;
 use util::opts;
 
 pub use style::parallel::traverse_dom;
 
 #[allow(dead_code)]
 fn static_assertion(node: UnsafeNode) {
@@ -45,20 +44,18 @@ pub fn mut_owned_flow_to_unsafe_flow(flo
 }
 
 pub fn borrowed_flow_to_unsafe_flow(flow: &Flow) -> UnsafeFlow {
     unsafe {
         mem::transmute::<&Flow, UnsafeFlow>(flow)
     }
 }
 
-pub type UnsafeFlowList = (Box<Vec<UnsafeNode>>, usize);
-
-pub type ChunkedFlowTraversalFunction =
-    extern "Rust" fn(UnsafeFlowList, &mut WorkerProxy<SharedLayoutContext, UnsafeFlowList>);
+pub type ChunkedFlowTraversalFunction<'scope> =
+    extern "Rust" fn(Box<[UnsafeFlow]>, &'scope SharedLayoutContext, &rayon::Scope<'scope>);
 
 pub type FlowTraversalFunction = extern "Rust" fn(UnsafeFlow, &SharedLayoutContext);
 
 /// Information that we need stored in each flow.
 pub struct FlowParallelInfo {
     /// The number of children that still need work done.
     pub children_count: AtomicIsize,
     /// The address of the parent flow.
@@ -128,90 +125,102 @@ trait ParallelPostorderFlowTraversal : P
                 break
             }
         }
     }
 }
 
 /// A parallel top-down flow traversal.
 trait ParallelPreorderFlowTraversal : PreorderFlowTraversal {
-    fn run_parallel(&self,
-                    unsafe_flows: UnsafeFlowList,
-                    proxy: &mut WorkerProxy<SharedLayoutContext, UnsafeFlowList>);
+    fn run_parallel<'scope>(&self,
+                            unsafe_flows: &[UnsafeFlow],
+                            layout_context: &'scope SharedLayoutContext,
+                            scope: &rayon::Scope<'scope>);
 
     fn should_record_thread_ids(&self) -> bool;
 
     #[inline(always)]
-    fn run_parallel_helper(&self,
-                           unsafe_flows: UnsafeFlowList,
-                           proxy: &mut WorkerProxy<SharedLayoutContext, UnsafeFlowList>,
-                           top_down_func: ChunkedFlowTraversalFunction,
-                           bottom_up_func: FlowTraversalFunction) {
-        let mut discovered_child_flows = Vec::new();
-        for unsafe_flow in *unsafe_flows.0 {
+    fn run_parallel_helper<'scope>(&self,
+                                   unsafe_flows: &[UnsafeFlow],
+                                   layout_context: &'scope SharedLayoutContext,
+                                   scope: &rayon::Scope<'scope>,
+                                   top_down_func: ChunkedFlowTraversalFunction<'scope>,
+                                   bottom_up_func: FlowTraversalFunction)
+    {
+        let mut discovered_child_flows = vec![];
+        for unsafe_flow in unsafe_flows {
             let mut had_children = false;
             unsafe {
                 // Get a real flow.
-                let flow: &mut Flow = mem::transmute(unsafe_flow);
+                let flow: &mut Flow = mem::transmute(*unsafe_flow);
 
                 if self.should_record_thread_ids() {
-                    flow::mut_base(flow).thread_id = proxy.worker_index();
+                    // FIXME(emilio): With the switch to rayon we can no longer
+                    // access a thread id from here easily. Either instrument
+                    // rayon (the unstable feature) to get a worker thread
+                    // identifier, or remove all the layout tinting mode.
+                    //
+                    // flow::mut_base(flow).thread_id = proxy.worker_index();
                 }
 
                 if self.should_process(flow) {
                     // Perform the appropriate traversal.
                     self.process(flow);
                 }
 
                 // Possibly enqueue the children.
                 for kid in flow::child_iter_mut(flow) {
                     had_children = true;
                     discovered_child_flows.push(borrowed_flow_to_unsafe_flow(kid));
                 }
             }
 
             // If there were no more children, start assigning block-sizes.
             if !had_children {
-                bottom_up_func(unsafe_flow, proxy.user_data())
+                bottom_up_func(*unsafe_flow, layout_context)
             }
         }
 
         for chunk in discovered_child_flows.chunks(CHUNK_SIZE) {
-            proxy.push(WorkUnit {
-                fun: top_down_func,
-                data: (box chunk.iter().cloned().collect(), 0),
+            let nodes = chunk.iter().cloned().collect::<Vec<_>>().into_boxed_slice();
+
+            scope.spawn(move |scope| {
+                top_down_func(nodes, layout_context, scope);
             });
         }
     }
 }
 
 impl<'a> ParallelPreorderFlowTraversal for AssignISizes<'a> {
-    fn run_parallel(&self,
-                    unsafe_flows: UnsafeFlowList,
-                    proxy: &mut WorkerProxy<SharedLayoutContext, UnsafeFlowList>) {
+    fn run_parallel<'scope>(&self,
+                                  unsafe_flows: &[UnsafeFlow],
+                                  layout_context: &'scope SharedLayoutContext,
+                                  scope: &rayon::Scope<'scope>)
+    {
         self.run_parallel_helper(unsafe_flows,
-                                 proxy,
+                                 layout_context,
+                                 scope,
                                  assign_inline_sizes,
                                  assign_block_sizes_and_store_overflow)
     }
 
     fn should_record_thread_ids(&self) -> bool {
         true
     }
 }
 
 impl<'a> ParallelPostorderFlowTraversal for AssignBSizes<'a> {}
 
-fn assign_inline_sizes(unsafe_flows: UnsafeFlowList,
-                       proxy: &mut WorkerProxy<SharedLayoutContext, UnsafeFlowList>) {
-    let shared_layout_context = proxy.user_data();
+fn assign_inline_sizes<'scope>(unsafe_flows: Box<[UnsafeFlow]>,
+                               shared_layout_context: &'scope SharedLayoutContext,
+                               scope: &rayon::Scope<'scope>) {
     let assign_inline_sizes_traversal = AssignISizes {
         shared_context: &shared_layout_context.style_context,
     };
-    assign_inline_sizes_traversal.run_parallel(unsafe_flows, proxy)
+    assign_inline_sizes_traversal.run_parallel(&unsafe_flows, shared_layout_context, scope)
 }
 
 fn assign_block_sizes_and_store_overflow(
         unsafe_flow: UnsafeFlow,
         shared_layout_context: &SharedLayoutContext) {
     let layout_context = LayoutContext::new(shared_layout_context);
     let assign_block_sizes_traversal = AssignBSizes {
         layout_context: &layout_context,
@@ -219,25 +228,26 @@ fn assign_block_sizes_and_store_overflow
     assign_block_sizes_traversal.run_parallel(unsafe_flow)
 }
 
 pub fn traverse_flow_tree_preorder(
         root: &mut Flow,
         profiler_metadata: Option<TimerMetadata>,
         time_profiler_chan: time::ProfilerChan,
         shared_layout_context: &SharedLayoutContext,
-        queue: &mut WorkQueue<SharedLayoutContext, WorkQueueData>) {
+        queue: &rayon::ThreadPool) {
     if opts::get().bubble_inline_sizes_separately {
         let layout_context = LayoutContext::new(shared_layout_context);
         let bubble_inline_sizes = BubbleISizes { layout_context: &layout_context };
         root.traverse_postorder(&bubble_inline_sizes);
     }
 
-    run_queue_with_custom_work_data_type(queue, |queue| {
-        profile(time::ProfilerCategory::LayoutParallelWarmup, profiler_metadata,
-                time_profiler_chan, || {
-            queue.push(WorkUnit {
-                fun: assign_inline_sizes,
-                data: (box vec![borrowed_flow_to_unsafe_flow(root)], 0),
-            })
+    let nodes = vec![borrowed_flow_to_unsafe_flow(root)].into_boxed_slice();
+
+    queue.install(move || {
+        rayon::scope(move |scope| {
+            profile(time::ProfilerCategory::LayoutParallelWarmup,
+                    profiler_metadata, time_profiler_chan, move || {
+                assign_inline_sizes(nodes, &shared_layout_context, scope);
+            });
         });
-    }, shared_layout_context);
+    });
 }
--- a/servo/components/layout_thread/Cargo.toml
+++ b/servo/components/layout_thread/Cargo.toml
@@ -22,16 +22,17 @@ layout = {path = "../layout"}
 layout_traits = {path = "../layout_traits"}
 lazy_static = "0.2"
 log = "0.3.5"
 msg = {path = "../msg"}
 net_traits = {path = "../net_traits"}
 parking_lot = {version = "0.3.3", features = ["nightly"]}
 plugins = {path = "../plugins"}
 profile_traits = {path = "../profile_traits"}
+rayon = "0.5"
 script = {path = "../script"}
 script_layout_interface = {path = "../script_layout_interface"}
 script_traits = {path = "../script_traits"}
 selectors = "0.14"
 serde_derive = "0.8"
 serde_json = "0.8"
 style = {path = "../style"}
 url = {version = "1.2", features = ["heap_size"]}
--- a/servo/components/layout_thread/lib.rs
+++ b/servo/components/layout_thread/lib.rs
@@ -29,16 +29,17 @@ extern crate layout_traits;
 extern crate lazy_static;
 #[macro_use]
 extern crate log;
 extern crate msg;
 extern crate net_traits;
 extern crate parking_lot;
 #[macro_use]
 extern crate profile_traits;
+extern crate rayon;
 extern crate script;
 extern crate script_layout_interface;
 extern crate script_traits;
 extern crate selectors;
 extern crate serde_json;
 extern crate style;
 extern crate url;
 extern crate util;
@@ -102,24 +103,22 @@ use std::sync::{Arc, Mutex, MutexGuard};
 use std::sync::atomic::{AtomicUsize, Ordering};
 use std::sync::mpsc::{Receiver, Sender, channel};
 use style::animation::Animation;
 use style::context::{LocalStyleContextCreationInfo, ReflowGoal, SharedStyleContext};
 use style::dom::{TDocument, TElement, TNode};
 use style::error_reporting::{ParseErrorReporter, StdoutErrorReporter};
 use style::logical_geometry::LogicalPoint;
 use style::media_queries::{Device, MediaType};
-use style::parallel::WorkQueueData;
 use style::parser::ParserContextExtraData;
 use style::selector_matching::Stylist;
 use style::servo::restyle_damage::{REFLOW, REFLOW_OUT_OF_FLOW, REPAINT, REPOSITION, STORE_OVERFLOW};
 use style::stylesheets::{Origin, Stylesheet, UserAgentStylesheets};
 use style::thread_state;
 use style::timer::Timer;
-use style::workqueue::WorkQueue;
 use url::Url;
 use util::geometry::max_rect;
 use util::opts;
 use util::prefs::PREFS;
 use util::resource_files::read_resource_file;
 use util::thread;
 
 /// Information needed by the layout thread.
@@ -168,17 +167,17 @@ pub struct LayoutThread {
 
     /// Public interface to the font cache thread.
     font_cache_thread: FontCacheThread,
 
     /// Is this the first reflow in this LayoutThread?
     first_reflow: bool,
 
     /// The workers that we use for parallel operation.
-    parallel_traversal: Option<WorkQueue<SharedLayoutContext, WorkQueueData>>,
+    parallel_traversal: Option<rayon::ThreadPool>,
 
     /// Starts at zero, and increased by one every time a layout completes.
     /// This can be used to easily check for invalid stale data.
     generation: u32,
 
     /// A channel on which new animations that have been triggered by style recalculation can be
     /// sent.
     new_animations_sender: Sender<Animation>,
@@ -378,17 +377,19 @@ impl LayoutThread {
            mem_profiler_chan: mem::ProfilerChan,
            webrender_api_sender: webrender_traits::RenderApiSender,
            layout_threads: usize)
            -> LayoutThread {
         let device = Device::new(
             MediaType::Screen,
             opts::get().initial_window_size.to_f32() * ScaleFactor::new(1.0));
         let parallel_traversal = if layout_threads != 1 {
-            WorkQueue::new("LayoutWorker", thread_state::LAYOUT, layout_threads).ok()
+            let configuration =
+                rayon::Configuration::new().set_num_threads(layout_threads);
+            rayon::ThreadPool::new(configuration).ok()
         } else {
             None
         };
 
         // Create the channel on which new animations can be sent.
         let (new_animations_sender, new_animations_receiver) = channel();
 
         // Proxy IPC messages from the pipeline to the layout thread.
@@ -706,29 +707,16 @@ impl LayoutThread {
 
         // The LayoutThread has a context in TLS...
         reports.push(Report {
             path: path![formatted_url, "layout-thread", "local-context"],
             kind: ReportKind::ExplicitJemallocHeapSize,
             size: heap_size_of_local_context(),
         });
 
-        // ... as do each of the LayoutWorkers, if present.
-        if let Some(ref traversal) = self.parallel_traversal {
-            let sizes = traversal.heap_size_of_tls(heap_size_of_local_context);
-            for (i, size) in sizes.iter().enumerate() {
-                reports.push(Report {
-                    path: path![formatted_url,
-                                format!("layout-worker-{}-local-context", i)],
-                    kind: ReportKind::ExplicitJemallocHeapSize,
-                    size: *size,
-                });
-            }
-        }
-
         reports_chan.send(reports);
     }
 
     fn create_layout_thread(&self, info: NewLayoutThreadInfo) {
         LayoutThread::create(info.id,
                              info.url.clone(),
                              info.is_parent,
                              info.layout_pair,
@@ -768,19 +756,18 @@ impl LayoutThread {
                 }
             }
         }
     }
 
     /// Shuts down the layout thread now. If there are any DOM nodes left, layout will now (safely)
     /// crash.
     fn exit_now(&mut self) {
-        if let Some(ref mut traversal) = self.parallel_traversal {
-            traversal.shutdown()
-        }
+        // Drop the rayon threadpool if present.
+        let _ = self.parallel_traversal.take();
     }
 
     fn handle_add_stylesheet<'a, 'b>(&self,
                                      stylesheet: Arc<Stylesheet>,
                                      possibly_locked_rw_data: &mut RwData<'a, 'b>) {
         // Find all font-face rules and notify the font cache of them.
         // GWTODO: Need to handle unloading web fonts.
 
@@ -850,17 +837,17 @@ impl LayoutThread {
         sequential::traverse_flow_tree_preorder(layout_root, shared_layout_context);
     }
 
     /// Performs layout constraint solving in parallel.
     ///
     /// This corresponds to `Reflow()` in Gecko and `layout()` in WebKit/Blink and should be
     /// benchmarked against those two. It is marked `#[inline(never)]` to aid profiling.
     #[inline(never)]
-    fn solve_constraints_parallel(traversal: &mut WorkQueue<SharedLayoutContext, WorkQueueData>,
+    fn solve_constraints_parallel(traversal: &rayon::ThreadPool,
                                   layout_root: &mut Flow,
                                   profiler_metadata: Option<TimerMetadata>,
                                   time_profiler_chan: time::ProfilerChan,
                                   shared_layout_context: &SharedLayoutContext) {
         let _scope = layout_debug_scope!("solve_constraints_parallel");
 
         // NOTE: this currently computes borders, so any pruning should separate that
         // operation out.
--- a/servo/components/servo/Cargo.lock
+++ b/servo/components/servo/Cargo.lock
@@ -1260,16 +1260,17 @@ dependencies = [
  "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
  "msg 0.0.1",
  "net_traits 0.0.1",
  "ordered-float 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "parking_lot 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "plugins 0.0.1",
  "profile_traits 0.0.1",
  "range 0.0.1",
+ "rayon 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "script_layout_interface 0.0.1",
  "script_traits 0.0.1",
  "selectors 0.14.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde_derive 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde_json 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "servo_atoms 0.0.1",
  "smallvec 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -1305,16 +1306,17 @@ dependencies = [
  "layout_traits 0.0.1",
  "lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
  "msg 0.0.1",
  "net_traits 0.0.1",
  "parking_lot 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "plugins 0.0.1",
  "profile_traits 0.0.1",
+ "rayon 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "script 0.0.1",
  "script_layout_interface 0.0.1",
  "script_traits 0.0.1",
  "selectors 0.14.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde_derive 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde_json 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "style 0.0.1",
  "url 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -2482,37 +2484,35 @@ source = "registry+https://github.com/ru
 [[package]]
 name = "style"
 version = "0.0.1"
 dependencies = [
  "app_units 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "cfg-if 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "cssparser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "deque 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "encoding 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)",
  "euclid 0.10.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "fnv 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
  "heapsize 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
  "heapsize_derive 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "html5ever-atoms 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
  "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
  "matches 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "num-integer 0.1.32 (registry+https://github.com/rust-lang/crates.io-index)",
  "num-traits 0.1.35 (registry+https://github.com/rust-lang/crates.io-index)",
- "num_cpus 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "ordered-float 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "owning_ref 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "parking_lot 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "plugins 0.0.1",
  "quickersort 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rayon 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)",
  "selectors 0.14.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde_derive 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)",
  "servo_atoms 0.0.1",
  "smallvec 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
  "style_traits 0.0.1",
  "time 0.1.35 (registry+https://github.com/rust-lang/crates.io-index)",
--- a/servo/components/style/Cargo.toml
+++ b/servo/components/style/Cargo.toml
@@ -8,61 +8,60 @@ publish = false
 build = "build.rs"
 
 [lib]
 name = "style"
 path = "lib.rs"
 doctest = false
 
 [features]
-gecko = ["nsstring_vendor"]
+gecko = ["nsstring_vendor", "num_cpus", "rayon/unstable"]
 servo = ["serde/unstable", "serde", "serde_derive", "heapsize_derive",
          "style_traits/servo", "app_units/plugins", "servo_atoms", "html5ever-atoms",
          "cssparser/heap_size", "cssparser/serde-serialization",
-         "url/heap_size", "plugins"]
+         "url/heap_size", "plugins", "rayon/unstable"]
 testing = []
 
 [dependencies]
 app_units = "0.3"
 bitflags = "0.7"
 cfg-if = "0.1.0"
 cssparser = "0.7"
-deque = "0.3.1"
 encoding = "0.2"
 euclid = "0.10.1"
 fnv = "1.0"
 heapsize = "0.3.0"
 heapsize_derive = {version = "0.1", optional = true}
 html5ever-atoms = {version = "0.1", optional = true}
 lazy_static = "0.2"
 log = "0.3.5"
-libc = "0.2"
 matches = "0.1"
 nsstring_vendor = {path = "gecko_bindings/nsstring_vendor", optional = true}
 num-integer = "0.1.32"
 num-traits = "0.1.32"
-num_cpus = "1.1.0"
 ordered-float = "0.2.2"
 owning_ref = "0.2.2"
 parking_lot = "0.3.3"
 quickersort = "2.0.0"
 rand = "0.3"
+rayon = "0.5"
 rustc-serialize = "0.3"
 selectors = "0.14"
 serde = {version = "0.8", optional = true}
 serde_derive = {version = "0.8", optional = true}
 servo_atoms = {path = "../atoms", optional = true}
 smallvec = "0.1"
 style_traits = {path = "../style_traits"}
 time = "0.1"
 unicode-segmentation = "0.1.2"
 url = "1.2"
 util = {path = "../util"}
 plugins = {path = "../plugins", optional = true}
 
+[dependencies.num_cpus]
+optional = true
+version = "1.0"
+
 [target.'cfg(windows)'.dependencies]
 kernel32-sys = "0.2"
 
-[target.'cfg(not(windows))'.dependencies]
-libc = "0.2"
-
 [build-dependencies]
 walkdir = "0.1"
--- a/servo/components/style/gecko/data.rs
+++ b/servo/components/style/gecko/data.rs
@@ -1,33 +1,30 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use animation::Animation;
 use atomic_refcell::{AtomicRef, AtomicRefCell, AtomicRefMut};
-use context::SharedStyleContext;
 use dom::OpaqueNode;
 use euclid::size::TypedSize2D;
 use gecko_bindings::bindings::RawServoStyleSet;
 use gecko_bindings::sugar::ownership::{HasBoxFFI, HasFFI, HasSimpleFFI};
 use media_queries::{Device, MediaType};
 use num_cpus;
-use parallel::WorkQueueData;
 use parking_lot::RwLock;
+use rayon;
 use selector_matching::Stylist;
 use std::cmp;
 use std::collections::HashMap;
 use std::env;
 use std::sync::Arc;
 use std::sync::mpsc::{Receiver, Sender, channel};
 use style_traits::ViewportPx;
 use stylesheets::Stylesheet;
-use thread_state;
-use workqueue::WorkQueue;
 
 pub struct PerDocumentStyleDataImpl {
     /// Rule processor.
     pub stylist: Arc<Stylist>,
 
     /// List of stylesheets, mirrored from Gecko.
     pub stylesheets: Vec<Arc<Stylesheet>>,
 
@@ -36,17 +33,17 @@ pub struct PerDocumentStyleDataImpl {
 
     // FIXME(bholley): Hook these up to something.
     pub new_animations_sender: Sender<Animation>,
     pub new_animations_receiver: Receiver<Animation>,
     pub running_animations: Arc<RwLock<HashMap<OpaqueNode, Vec<Animation>>>>,
     pub expired_animations: Arc<RwLock<HashMap<OpaqueNode, Vec<Animation>>>>,
 
     // FIXME(bholley): This shouldn't be per-document.
-    pub work_queue: Option<WorkQueue<SharedStyleContext, WorkQueueData>>,
+    pub work_queue: Option<rayon::ThreadPool>,
 
     pub num_threads: usize,
 }
 
 pub struct PerDocumentStyleData(AtomicRefCell<PerDocumentStyleDataImpl>);
 
 lazy_static! {
     pub static ref NUM_THREADS: usize = {
@@ -71,17 +68,19 @@ impl PerDocumentStyleData {
             stylesheets_changed: true,
             new_animations_sender: new_anims_sender,
             new_animations_receiver: new_anims_receiver,
             running_animations: Arc::new(RwLock::new(HashMap::new())),
             expired_animations: Arc::new(RwLock::new(HashMap::new())),
             work_queue: if *NUM_THREADS <= 1 {
                 None
             } else {
-                WorkQueue::new("StyleWorker", thread_state::LAYOUT, *NUM_THREADS).ok()
+                let configuration =
+                    rayon::Configuration::new().set_num_threads(*NUM_THREADS);
+                rayon::ThreadPool::new(configuration).ok()
             },
             num_threads: *NUM_THREADS,
         }))
     }
 
     pub fn borrow(&self) -> AtomicRef<PerDocumentStyleDataImpl> {
         self.0.borrow()
     }
@@ -107,13 +106,11 @@ impl PerDocumentStyleDataImpl {
 unsafe impl HasFFI for PerDocumentStyleData {
     type FFIType = RawServoStyleSet;
 }
 unsafe impl HasSimpleFFI for PerDocumentStyleData {}
 unsafe impl HasBoxFFI for PerDocumentStyleData {}
 
 impl Drop for PerDocumentStyleDataImpl {
     fn drop(&mut self) {
-        if let Some(ref mut queue) = self.work_queue {
-            queue.shutdown();
-        }
+        let _ = self.work_queue.take();
     }
 }
--- a/servo/components/style/gecko/wrapper.rs
+++ b/servo/components/style/gecko/wrapper.rs
@@ -25,17 +25,16 @@ use gecko_bindings::bindings::{Gecko_IsU
 use gecko_bindings::bindings::{RawGeckoDocument, RawGeckoElement, RawGeckoNode};
 use gecko_bindings::bindings::Gecko_ClassOrClassList;
 use gecko_bindings::bindings::Gecko_GetStyleContext;
 use gecko_bindings::bindings::Gecko_SetNodeFlags;
 use gecko_bindings::bindings::Gecko_StoreStyleDifference;
 use gecko_bindings::structs;
 use gecko_bindings::structs::{NODE_HAS_DIRTY_DESCENDANTS_FOR_SERVO, NODE_IS_DIRTY_FOR_SERVO};
 use gecko_bindings::structs::{nsIAtom, nsIContent, nsStyleContext};
-use libc::uintptr_t;
 use parking_lot::RwLock;
 use parser::ParserContextExtraData;
 use properties::{ComputedValues, parse_style_attribute};
 use properties::PropertyDeclarationBlock;
 use selector_impl::ElementExt;
 use selector_matching::ApplicableDeclarationBlock;
 use selectors::Element;
 use selectors::parser::{AttrSelector, NamespaceConstraint};
@@ -109,17 +108,17 @@ impl<'ln> TNode for GeckoNode<'ln> {
         if let Some(iter) = maybe_iter.into_owned_opt() {
             LayoutIterator(GeckoChildrenIterator::GeckoIterator(iter))
         } else {
             LayoutIterator(GeckoChildrenIterator::Current(self.first_child()))
         }
     }
 
     fn opaque(&self) -> OpaqueNode {
-        let ptr: uintptr_t = self.0 as *const _ as uintptr_t;
+        let ptr: usize = self.0 as *const _ as usize;
         OpaqueNode(ptr)
     }
 
     fn layout_parent_element(self, reflow_root: OpaqueNode) -> Option<GeckoElement<'ln>> {
         if self.opaque() == reflow_root {
             None
         } else {
             self.parent_node().and_then(|x| x.as_element())
--- a/servo/components/style/lib.rs
+++ b/servo/components/style/lib.rs
@@ -44,42 +44,40 @@ extern crate app_units;
 #[allow(unused_extern_crates)]
 #[macro_use]
 extern crate bitflags;
 #[macro_use] #[no_link]
 extern crate cfg_if;
 extern crate core;
 #[macro_use]
 extern crate cssparser;
-extern crate deque;
 extern crate encoding;
 extern crate euclid;
 extern crate fnv;
 #[cfg(feature = "gecko")] #[macro_use] pub mod gecko_string_cache;
 extern crate heapsize;
 #[cfg(feature = "servo")] #[macro_use] extern crate heapsize_derive;
 #[cfg(feature = "servo")] #[macro_use] extern crate html5ever_atoms;
 #[allow(unused_extern_crates)]
 #[macro_use]
 extern crate lazy_static;
-#[cfg(feature = "gecko")] extern crate libc;
 #[macro_use]
 extern crate log;
 #[allow(unused_extern_crates)]
 #[macro_use]
 extern crate matches;
 #[cfg(feature = "gecko")] extern crate nsstring_vendor as nsstring;
 extern crate num_integer;
 extern crate num_traits;
 #[cfg(feature = "gecko")] extern crate num_cpus;
 extern crate ordered_float;
 extern crate owning_ref;
 extern crate parking_lot;
 extern crate quickersort;
-extern crate rand;
+extern crate rayon;
 extern crate rustc_serialize;
 extern crate selectors;
 #[cfg(feature = "servo")]
 extern crate serde;
 #[cfg(feature = "servo")] #[macro_use] extern crate serde_derive;
 #[cfg(feature = "servo")] #[macro_use] extern crate servo_atoms;
 extern crate smallvec;
 #[macro_use]
@@ -126,17 +124,16 @@ pub mod stylesheets;
 pub mod thread_state;
 mod tid;
 pub mod timer;
 pub mod traversal;
 #[macro_use]
 #[allow(non_camel_case_types)]
 pub mod values;
 pub mod viewport;
-pub mod workqueue;
 
 use std::fmt;
 use std::sync::Arc;
 use style_traits::ToCss;
 
 #[cfg(feature = "gecko")] pub use gecko_string_cache as string_cache;
 #[cfg(feature = "gecko")] pub use gecko_string_cache::Atom;
 #[cfg(feature = "gecko")] pub use gecko_string_cache::Namespace;
--- a/servo/components/style/parallel.rs
+++ b/servo/components/style/parallel.rs
@@ -1,90 +1,70 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 //! Implements parallel traversal over the DOM tree.
 //!
 //! This code is highly unsafe. Keep this file small and easy to audit.
 
-#![allow(unsafe_code)]
-
 use dom::{OpaqueNode, StylingMode, TElement, TNode, UnsafeNode};
-use std::mem;
+use rayon;
 use std::sync::atomic::Ordering;
 use traversal::{STYLE_SHARING_CACHE_HITS, STYLE_SHARING_CACHE_MISSES};
 use traversal::DomTraversalContext;
 use util::opts;
-use workqueue::{WorkQueue, WorkUnit, WorkerProxy};
-
-#[allow(dead_code)]
-fn static_assertion(node: UnsafeNode) {
-    unsafe {
-        let _: UnsafeNodeList = mem::transmute(node);
-    }
-}
-
-pub type UnsafeNodeList = (Box<Vec<UnsafeNode>>, OpaqueNode);
 
 pub const CHUNK_SIZE: usize = 64;
 
-pub struct WorkQueueData(usize, usize);
-
-pub fn run_queue_with_custom_work_data_type<To, F, SharedContext: Sync>(
-        queue: &mut WorkQueue<SharedContext, WorkQueueData>,
-        callback: F,
-        shared: &SharedContext)
-    where To: 'static + Send, F: FnOnce(&mut WorkQueue<SharedContext, To>)
-{
-    let queue: &mut WorkQueue<SharedContext, To> = unsafe {
-        mem::transmute(queue)
-    };
-    callback(queue);
-    queue.run(shared);
-}
-
 pub fn traverse_dom<N, C>(root: N,
-                          queue_data: &C::SharedContext,
-                          queue: &mut WorkQueue<C::SharedContext, WorkQueueData>)
+                          shared_context: &C::SharedContext,
+                          queue: &rayon::ThreadPool)
     where N: TNode,
           C: DomTraversalContext<N>
 {
     debug_assert!(root.as_element().unwrap().styling_mode() != StylingMode::Stop);
     if opts::get().style_sharing_stats {
         STYLE_SHARING_CACHE_HITS.store(0, Ordering::SeqCst);
         STYLE_SHARING_CACHE_MISSES.store(0, Ordering::SeqCst);
     }
-    run_queue_with_custom_work_data_type(queue, |queue| {
-        queue.push(WorkUnit {
-            fun: top_down_dom::<N, C>,
-            data: (Box::new(vec![root.to_unsafe()]), root.opaque()),
+
+    let nodes = vec![root.to_unsafe()].into_boxed_slice();
+    let root = root.opaque();
+    queue.install(|| {
+        rayon::scope(|scope| {
+            let nodes = nodes;
+            top_down_dom::<N, C>(&nodes, root, scope, shared_context);
         });
-    }, queue_data);
+    });
 
     if opts::get().style_sharing_stats {
         let hits = STYLE_SHARING_CACHE_HITS.load(Ordering::SeqCst);
         let misses = STYLE_SHARING_CACHE_MISSES.load(Ordering::SeqCst);
 
         println!("Style sharing stats:");
         println!(" * Hits: {}", hits);
         println!(" * Misses: {}", misses);
     }
 }
 
 /// A parallel top-down DOM traversal.
 #[inline(always)]
-fn top_down_dom<N, C>(unsafe_nodes: UnsafeNodeList,
-                      proxy: &mut WorkerProxy<C::SharedContext, UnsafeNodeList>)
-    where N: TNode, C: DomTraversalContext<N>
+#[allow(unsafe_code)]
+fn top_down_dom<'a, 'scope, N, C>(unsafe_nodes: &'a [UnsafeNode],
+                                  root: OpaqueNode,
+                                  scope: &'a rayon::Scope<'scope>,
+                                  shared_context: &'scope C::SharedContext)
+    where N: TNode,
+          C: DomTraversalContext<N>,
 {
-    let context = C::new(proxy.user_data(), unsafe_nodes.1);
+    let context = C::new(shared_context, root);
 
     let mut discovered_child_nodes = vec![];
-    for unsafe_node in *unsafe_nodes.0 {
+    for unsafe_node in unsafe_nodes {
         // Get a real layout node.
         let node = unsafe { N::from_unsafe(&unsafe_node) };
 
         // Perform the appropriate traversal.
         let mut children_to_process = 0isize;
         context.process_preorder(node);
         if let Some(el) = node.as_element() {
             C::traverse_children(el, |kid| {
@@ -93,55 +73,57 @@ fn top_down_dom<N, C>(unsafe_nodes: Unsa
             });
         }
 
         // Reset the count of children if we need to do a bottom-up traversal
         // after the top up.
         if context.needs_postorder_traversal() {
             if children_to_process == 0 {
                 // If there were no more children, start walking back up.
-                bottom_up_dom::<N, C>(unsafe_nodes.1, unsafe_node, proxy)
+                bottom_up_dom::<N, C>(root, *unsafe_node, shared_context)
             } else {
                 // Otherwise record the number of children to process when the
                 // time comes.
                 node.as_element().unwrap().store_children_to_process(children_to_process);
             }
         }
     }
 
     // NB: In parallel traversal mode we have to purge the LRU cache in order to
     // be able to access it without races.
     context.local_context().style_sharing_candidate_cache.borrow_mut().clear();
 
     for chunk in discovered_child_nodes.chunks(CHUNK_SIZE) {
-        proxy.push(WorkUnit {
-            fun:  top_down_dom::<N, C>,
-            data: (Box::new(chunk.iter().cloned().collect()), unsafe_nodes.1),
-        });
+        let nodes = chunk.iter().cloned().collect::<Vec<_>>().into_boxed_slice();
+        scope.spawn(move |scope| {
+            let nodes = nodes;
+            top_down_dom::<N, C>(&nodes, root, scope, shared_context)
+        })
     }
 }
 
 /// Process current node and potentially traverse its ancestors.
 ///
 /// If we are the last child that finished processing, recursively process
 /// our parent. Else, stop. Also, stop at the root.
 ///
 /// Thus, if we start with all the leaves of a tree, we end up traversing
 /// the whole tree bottom-up because each parent will be processed exactly
 /// once (by the last child that finishes processing).
 ///
 /// The only communication between siblings is that they both
 /// fetch-and-subtract the parent's children count.
+#[allow(unsafe_code)]
 fn bottom_up_dom<N, C>(root: OpaqueNode,
                        unsafe_node: UnsafeNode,
-                       proxy: &mut WorkerProxy<C::SharedContext, UnsafeNodeList>)
+                       shared_context: &C::SharedContext)
     where N: TNode,
           C: DomTraversalContext<N>
 {
-    let context = C::new(proxy.user_data(), root);
+    let context = C::new(shared_context, root);
 
     // Get a real layout node.
     let mut node = unsafe { N::from_unsafe(&unsafe_node) };
     loop {
         // Perform the appropriate operation.
         context.process_postorder(node);
 
         let parent = match node.layout_parent_element(root) {
--- a/servo/components/style/thread_state.rs
+++ b/servo/components/style/thread_state.rs
@@ -67,17 +67,18 @@ mod imp {
             *k.borrow_mut() = Some(x);
         });
         get(); // check the assertion below
     }
 
     pub fn get() -> ThreadState {
         let state = STATE.with(|ref k| {
             match *k.borrow() {
-                None => panic!("Thread state not initialized"),
+                // This is one of the layout threads, that use rayon.
+                None => super::LAYOUT | super::IN_WORKER,
                 Some(s) => s,
             }
         });
 
         // Exactly one of the thread type flags should be set.
         assert_eq!(1, TYPES.iter().filter(|&&ty| state.contains(ty)).count());
         state
     }
deleted file mode 100644
--- a/servo/components/style/workqueue.rs
+++ /dev/null
@@ -1,385 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-//! A work queue for scheduling units of work across threads in a fork-join fashion.
-//!
-//! Data associated with queues is simply a pair of unsigned integers. It is expected that a
-//! higher-level API on top of this could allow safe fork-join parallelism.
-
-#![allow(unsafe_code)]
-
-#[cfg(windows)]
-extern crate kernel32;
-#[cfg(not(windows))]
-extern crate libc;
-
-use deque::{self, Abort, Data, Empty, Stealer, Worker};
-use rand::{Rng, XorShiftRng, weak_rng};
-use std::sync::atomic::{AtomicUsize, Ordering};
-use std::sync::mpsc::{Receiver, Sender, channel};
-use std::thread;
-use thread_state;
-
-/// A unit of work.
-///
-/// # Type parameters
-///
-/// - `QueueData`: global custom data for the entire work queue.
-/// - `WorkData`: custom data specific to each unit of work.
-pub struct WorkUnit<QueueData, WorkData: Send> {
-    /// The function to execute.
-    pub fun: extern "Rust" fn(WorkData, &mut WorkerProxy<QueueData, WorkData>),
-    /// Arbitrary data.
-    pub data: WorkData,
-}
-
-/// Messages from the supervisor to the worker.
-enum WorkerMsg<QueueData: 'static, WorkData: 'static + Send> {
-    /// Tells the worker to start work.
-    Start(Worker<WorkUnit<QueueData, WorkData>>, *const AtomicUsize, *const QueueData),
-    /// Tells the worker to stop. It can be restarted again with a `WorkerMsg::Start`.
-    Stop,
-    /// Tells the worker to measure the heap size of its TLS using the supplied function.
-    HeapSizeOfTLS(fn() -> usize),
-    /// Tells the worker thread to terminate.
-    Exit,
-}
-
-unsafe impl<QueueData: 'static, WorkData: 'static + Send> Send for WorkerMsg<QueueData, WorkData> {}
-
-/// Messages to the supervisor.
-enum SupervisorMsg<QueueData: 'static, WorkData: 'static + Send> {
-    Finished,
-    HeapSizeOfTLS(usize),
-    ReturnDeque(usize, Worker<WorkUnit<QueueData, WorkData>>),
-}
-
-unsafe impl<QueueData: 'static, WorkData: 'static + Send> Send for SupervisorMsg<QueueData, WorkData> {}
-
-/// Information that the supervisor thread keeps about the worker threads.
-struct WorkerInfo<QueueData: 'static, WorkData: 'static + Send> {
-    /// The communication channel to the workers.
-    chan: Sender<WorkerMsg<QueueData, WorkData>>,
-    /// The worker end of the deque, if we have it.
-    deque: Option<Worker<WorkUnit<QueueData, WorkData>>>,
-    /// The thief end of the work-stealing deque.
-    thief: Stealer<WorkUnit<QueueData, WorkData>>,
-}
-
-/// Information specific to each worker thread that the thread keeps.
-struct WorkerThread<QueueData: 'static, WorkData: 'static + Send> {
-    /// The index of this worker.
-    index: usize,
-    /// The communication port from the supervisor.
-    port: Receiver<WorkerMsg<QueueData, WorkData>>,
-    /// The communication channel on which messages are sent to the supervisor.
-    chan: Sender<SupervisorMsg<QueueData, WorkData>>,
-    /// The thief end of the work-stealing deque for all other workers.
-    other_deques: Vec<Stealer<WorkUnit<QueueData, WorkData>>>,
-    /// The random number generator for this worker.
-    rng: XorShiftRng,
-}
-
-unsafe impl<QueueData: 'static, WorkData: 'static + Send> Send for WorkerThread<QueueData, WorkData> {}
-
-const SPINS_UNTIL_BACKOFF: u32 = 128;
-const BACKOFF_INCREMENT_IN_US: u32 = 5;
-const BACKOFFS_UNTIL_CONTROL_CHECK: u32 = 6;
-
-#[cfg(not(windows))]
-fn sleep_microseconds(usec: u32) {
-    unsafe {
-        libc::usleep(usec);
-    }
-}
-
-#[cfg(windows)]
-fn sleep_microseconds(_: u32) {
-    unsafe {
-        kernel32::Sleep(0);
-    }
-}
-
-impl<QueueData: Sync, WorkData: Send> WorkerThread<QueueData, WorkData> {
-    /// The main logic. This function starts up the worker and listens for
-    /// messages.
-    fn start(&mut self) {
-        let deque_index_mask = (self.other_deques.len() as u32).next_power_of_two() - 1;
-        loop {
-            // Wait for a start message.
-            let (mut deque, ref_count, queue_data) = match self.port.recv().unwrap() {
-                WorkerMsg::Start(deque, ref_count, queue_data) => (deque, ref_count, queue_data),
-                WorkerMsg::Stop => panic!("unexpected stop message"),
-                WorkerMsg::Exit => return,
-                WorkerMsg::HeapSizeOfTLS(f) => {
-                    self.chan.send(SupervisorMsg::HeapSizeOfTLS(f())).unwrap();
-                    continue;
-                }
-            };
-
-            let mut back_off_sleep = 0 as u32;
-
-            // We're off!
-            'outer: loop {
-                let work_unit;
-                match deque.pop() {
-                    Some(work) => work_unit = work,
-                    None => {
-                        // Become a thief.
-                        let mut i = 0;
-                        loop {
-                            // Don't just use `rand % len` because that's slow on ARM.
-                            let mut victim;
-                            loop {
-                                victim = self.rng.next_u32() & deque_index_mask;
-                                if (victim as usize) < self.other_deques.len() {
-                                    break
-                                }
-                            }
-
-                            match self.other_deques[victim as usize].steal() {
-                                Empty | Abort => {
-                                    // Continue.
-                                }
-                                Data(work) => {
-                                    work_unit = work;
-                                    back_off_sleep = 0 as u32;
-                                    break
-                                }
-                            }
-
-                            if i > SPINS_UNTIL_BACKOFF {
-                                if back_off_sleep >= BACKOFF_INCREMENT_IN_US *
-                                        BACKOFFS_UNTIL_CONTROL_CHECK {
-                                    match self.port.try_recv() {
-                                        Ok(WorkerMsg::Stop) => break 'outer,
-                                        Ok(WorkerMsg::Exit) => return,
-                                        Ok(_) => panic!("unexpected message"),
-                                        _ => {}
-                                    }
-                                }
-
-                                sleep_microseconds(back_off_sleep);
-
-                                back_off_sleep += BACKOFF_INCREMENT_IN_US;
-                                i = 0
-                            } else {
-                                i += 1
-                            }
-                        }
-                    }
-                }
-
-                // At this point, we have some work. Perform it.
-                let mut proxy = WorkerProxy {
-                    worker: &mut deque,
-                    ref_count: ref_count,
-                    // queue_data is kept alive in the stack frame of
-                    // WorkQueue::run until we send the
-                    // SupervisorMsg::ReturnDeque message below.
-                    queue_data: unsafe { &*queue_data },
-                    worker_index: self.index as u8,
-                };
-                (work_unit.fun)(work_unit.data, &mut proxy);
-
-                // The work is done. Now decrement the count of outstanding work items. If this was
-                // the last work unit in the queue, then send a message on the channel.
-                unsafe {
-                    if (*ref_count).fetch_sub(1, Ordering::Release) == 1 {
-                        self.chan.send(SupervisorMsg::Finished).unwrap()
-                    }
-                }
-            }
-
-            // Give the deque back to the supervisor.
-            self.chan.send(SupervisorMsg::ReturnDeque(self.index, deque)).unwrap()
-        }
-    }
-}
-
-/// A handle to the work queue that individual work units have.
-pub struct WorkerProxy<'a, QueueData: 'a, WorkData: 'a + Send> {
-    worker: &'a mut Worker<WorkUnit<QueueData, WorkData>>,
-    ref_count: *const AtomicUsize,
-    queue_data: &'a QueueData,
-    worker_index: u8,
-}
-
-impl<'a, QueueData: 'static, WorkData: Send + 'static> WorkerProxy<'a, QueueData, WorkData> {
-    /// Enqueues a block into the work queue.
-    #[inline]
-    pub fn push(&mut self, work_unit: WorkUnit<QueueData, WorkData>) {
-        unsafe {
-            drop((*self.ref_count).fetch_add(1, Ordering::Relaxed));
-        }
-        self.worker.push(work_unit);
-    }
-
-    /// Retrieves the queue user data.
-    #[inline]
-    pub fn user_data(&self) -> &'a QueueData {
-        self.queue_data
-    }
-
-    /// Retrieves the index of the worker.
-    #[inline]
-    pub fn worker_index(&self) -> u8 {
-        self.worker_index
-    }
-}
-
-/// A work queue on which units of work can be submitted.
-pub struct WorkQueue<QueueData: 'static, WorkData: 'static + Send> {
-    /// Information about each of the workers.
-    workers: Vec<WorkerInfo<QueueData, WorkData>>,
-    /// A port on which deques can be received from the workers.
-    port: Receiver<SupervisorMsg<QueueData, WorkData>>,
-    /// The amount of work that has been enqueued.
-    work_count: usize,
-}
-
-impl<QueueData: Sync, WorkData: Send> WorkQueue<QueueData, WorkData> {
-    /// Creates a new work queue and spawns all the threads associated with
-    /// it.
-    pub fn new(thread_name: &'static str,
-               state: thread_state::ThreadState,
-               thread_count: usize) -> Result<WorkQueue<QueueData, WorkData>, ()> {
-        // Set up data structures.
-        let (supervisor_chan, supervisor_port) = channel();
-        let mut infos = Vec::with_capacity(thread_count);
-        let mut threads = Vec::with_capacity(thread_count);
-        for i in 0..thread_count {
-            let (worker_chan, worker_port) = channel();
-            let (worker, thief) = deque::new();
-            infos.push(WorkerInfo {
-                chan: worker_chan,
-                deque: Some(worker),
-                thief: thief,
-            });
-            threads.push(WorkerThread {
-                index: i,
-                port: worker_port,
-                chan: supervisor_chan.clone(),
-                other_deques: vec!(),
-                rng: weak_rng(),
-            });
-        }
-
-        // Connect workers to one another.
-        for (i, mut thread) in threads.iter_mut().enumerate() {
-            for (j, info) in infos.iter().enumerate() {
-                if i != j {
-                    thread.other_deques.push(info.thief.clone())
-                }
-            }
-            assert!(thread.other_deques.len() == thread_count - 1)
-        }
-
-        // Spawn threads.
-        let mut thread_handles = vec![];
-        for (i, thread) in threads.into_iter().enumerate() {
-            let handle = thread::Builder::new()
-                .name(format!("{} worker {}/{}", thread_name, i + 1, thread_count))
-                .spawn(move || {
-                    thread_state::initialize(state | thread_state::IN_WORKER);
-                    let mut thread = thread;
-                    thread.start()
-                });
-            match handle {
-                Ok(handle) => {
-                    thread_handles.push(handle);
-                }
-                Err(err) => {
-                    warn!("Failed spawning thread: {:?}", err);
-                    break;
-                }
-            }
-        }
-
-        if thread_handles.len() != thread_count {
-            // At least one worker thread failed to be created, just close the
-            // rest of them, and return an error.
-            for (i, handle) in thread_handles.into_iter().enumerate() {
-                let _ = infos[i].chan.send(WorkerMsg::Exit);
-                let _ = handle.join();
-            }
-
-            return Err(());
-        }
-
-        Ok(WorkQueue {
-            workers: infos,
-            port: supervisor_port,
-            work_count: 0,
-        })
-    }
-
-    /// Enqueues a block into the work queue.
-    #[inline]
-    pub fn push(&mut self, work_unit: WorkUnit<QueueData, WorkData>) {
-        let deque = &mut self.workers[0].deque;
-        match *deque {
-            None => {
-                panic!("tried to push a block but we don't have the deque?!")
-            }
-            Some(ref mut deque) => deque.push(work_unit),
-        }
-        self.work_count += 1
-    }
-
-    /// Synchronously runs all the enqueued tasks and waits for them to complete.
-    pub fn run(&mut self, data: &QueueData) {
-        // Tell the workers to start.
-        let work_count = AtomicUsize::new(self.work_count);
-        for worker in &mut self.workers {
-            worker.chan.send(WorkerMsg::Start(worker.deque.take().unwrap(),
-                                              &work_count,
-                                              data)).unwrap()
-        }
-
-        // Wait for the work to finish.
-        drop(self.port.recv());
-        self.work_count = 0;
-
-        // Tell everyone to stop.
-        for worker in &self.workers {
-            worker.chan.send(WorkerMsg::Stop).unwrap()
-        }
-
-        // Get our deques back.
-        for _ in 0..self.workers.len() {
-            match self.port.recv().unwrap() {
-                SupervisorMsg::ReturnDeque(index, deque) => self.workers[index].deque = Some(deque),
-                SupervisorMsg::HeapSizeOfTLS(_) => panic!("unexpected HeapSizeOfTLS message"),
-                SupervisorMsg::Finished => panic!("unexpected finished message!"),
-            }
-        }
-    }
-
-    /// Synchronously measure memory usage of any thread-local storage.
-    pub fn heap_size_of_tls(&self, f: fn() -> usize) -> Vec<usize> {
-        // Tell the workers to measure themselves.
-        for worker in &self.workers {
-            worker.chan.send(WorkerMsg::HeapSizeOfTLS(f)).unwrap()
-        }
-
-        // Wait for the workers to finish measuring themselves.
-        let mut sizes = vec![];
-        for _ in 0..self.workers.len() {
-            match self.port.recv().unwrap() {
-                SupervisorMsg::HeapSizeOfTLS(size) => {
-                    sizes.push(size);
-                }
-                _ => panic!("unexpected message!"),
-            }
-        }
-        sizes
-    }
-
-    pub fn shutdown(&mut self) {
-        for worker in &self.workers {
-            worker.chan.send(WorkerMsg::Exit).unwrap()
-        }
-    }
-}
--- a/servo/ports/cef/Cargo.lock
+++ b/servo/ports/cef/Cargo.lock
@@ -1150,16 +1150,17 @@ dependencies = [
  "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
  "msg 0.0.1",
  "net_traits 0.0.1",
  "ordered-float 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "parking_lot 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "plugins 0.0.1",
  "profile_traits 0.0.1",
  "range 0.0.1",
+ "rayon 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "script_layout_interface 0.0.1",
  "script_traits 0.0.1",
  "selectors 0.14.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde_derive 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde_json 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "servo_atoms 0.0.1",
  "smallvec 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -1188,16 +1189,17 @@ dependencies = [
  "layout_traits 0.0.1",
  "lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
  "msg 0.0.1",
  "net_traits 0.0.1",
  "parking_lot 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "plugins 0.0.1",
  "profile_traits 0.0.1",
+ "rayon 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "script 0.0.1",
  "script_layout_interface 0.0.1",
  "script_traits 0.0.1",
  "selectors 0.14.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde_derive 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde_json 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "style 0.0.1",
  "url 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -2330,37 +2332,35 @@ source = "registry+https://github.com/ru
 [[package]]
 name = "style"
 version = "0.0.1"
 dependencies = [
  "app_units 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "cfg-if 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "cssparser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "deque 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "encoding 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)",
  "euclid 0.10.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "fnv 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
  "heapsize 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
  "heapsize_derive 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "html5ever-atoms 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
  "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
  "matches 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "num-integer 0.1.32 (registry+https://github.com/rust-lang/crates.io-index)",
  "num-traits 0.1.35 (registry+https://github.com/rust-lang/crates.io-index)",
- "num_cpus 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "ordered-float 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "owning_ref 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "parking_lot 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "plugins 0.0.1",
  "quickersort 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rayon 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)",
  "selectors 0.14.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde_derive 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)",
  "servo_atoms 0.0.1",
  "smallvec 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
  "style_traits 0.0.1",
  "time 0.1.35 (registry+https://github.com/rust-lang/crates.io-index)",
--- a/servo/ports/geckolib/Cargo.lock
+++ b/servo/ports/geckolib/Cargo.lock
@@ -280,16 +280,27 @@ dependencies = [
 name = "rand"
 version = "0.3.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
  "libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
+name = "rayon"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "deque 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
+ "num_cpus 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
 name = "regex"
 version = "0.1.76"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
  "aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
  "regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
  "thread_local 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -330,35 +341,34 @@ source = "registry+https://github.com/ru
 [[package]]
 name = "style"
 version = "0.0.1"
 dependencies = [
  "app_units 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "cfg-if 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "cssparser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "deque 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "encoding 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)",
  "euclid 0.10.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "fnv 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
  "heapsize 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
  "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
  "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
  "matches 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "nsstring_vendor 0.1.0",
  "num-integer 0.1.32 (registry+https://github.com/rust-lang/crates.io-index)",
  "num-traits 0.1.35 (registry+https://github.com/rust-lang/crates.io-index)",
  "num_cpus 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "ordered-float 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "owning_ref 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "parking_lot 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "quickersort 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rayon 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)",
  "selectors 0.14.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "smallvec 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
  "style_traits 0.0.1",
  "time 0.1.35 (registry+https://github.com/rust-lang/crates.io-index)",
  "unicode-segmentation 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "url 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "util 0.0.1",
@@ -538,16 +548,17 @@ source = "registry+https://github.com/ru
 "checksum num-traits 0.1.35 (registry+https://github.com/rust-lang/crates.io-index)" = "8359ea48994f253fa958b5b90b013728b06f54872e5a58bce39540fcdd0f2527"
 "checksum num_cpus 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8890e6084723d57d0df8d2720b0d60c6ee67d6c93e7169630e4371e88765dcad"
 "checksum ordered-float 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "cc511538298611a79d5a4ddfbb75315b866d942ed26a00bdc3590795c68b7279"
 "checksum owning_ref 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "8d91377085359426407a287ab16884a0111ba473aa6844ff01d4ec20ce3d75e7"
 "checksum parking_lot 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3562f3de7bdff194212be82366abf5c6565aff8a433b71c53c63d0e7c9913878"
 "checksum parking_lot_core 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "06f24c980718110494e9cfb7db7438895c3f54505101bb6170329d5e43a53f64"
 "checksum quickersort 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e952ea7699262481636004bc4ab8afaccf2bc13f91b79d1aee6617bd8fc39651"
 "checksum rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "2791d88c6defac799c3f20d74f094ca33b9332612d9aef9078519c82e4fe04a5"
+"checksum rayon 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3b6a6e05e0e6b703e9f2ad266eb63f3712e693a17a2702b95a23de14ce8defa9"
 "checksum regex 0.1.76 (registry+https://github.com/rust-lang/crates.io-index)" = "63b49f873f36ddc838d773972511e5fed2ef7350885af07d58e2f48ce8073dcd"
 "checksum regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "279401017ae31cf4e15344aa3f085d0e2e5c1e70067289ef906906fdbe92c8fd"
 "checksum rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)" = "6159e4e6e559c81bd706afe9c8fd68f547d3e851ce12e76b1de7914bab61691b"
 "checksum selectors 0.14.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a7f8d3498861f4486e7e1d5c56eabf2b0e461f92bcbf45a3ac30cae0f3d5cdd0"
 "checksum serde 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)" = "784e249221c84265caeb1e2fe48aeada86f67f5acb151bd3903c4585969e43f6"
 "checksum smallvec 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "fcc8d19212aacecf95e4a7a2179b26f7aeb9732a915cf01f05b0d3e044865410"
 "checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03"
 "checksum thread_local 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "55dd963dbaeadc08aa7266bf7f91c3154a7805e32bb94b820b769d2ef3b4744d"