servo/components/layout/parallel.rs
author Emilio Cobos Álvarez <ecoal95@gmail.com>
Mon, 14 Nov 2016 14:47:21 -0600
changeset 369181 23de2cbe2ec91dc8ec1a702e884c7d5a242c97c7
parent 368692 89bfec936a22ef359e7b65a2deae11ff8f16d7c0
child 369368 0dfe7146374f72f7f5b5e2c99ff6df6984a81353
permissions -rw-r--r--
servo: Merge #13641 - Use rayon to drive parallel layout and styling (from emilio:rayon-style); r=pcwalton <!-- Please describe your changes on the following line: --> The current work queue had a really annoying constraint: The size of the node had to be the size of the work unit data. This makes it impractical for the new restyling model where we plan to pass down a bunch of data. Rayon by default makes you wait for the result of the work unit, which makes it impractical for the current model (it's mostly sequential). I added an API to rayon that allows us to push work to the queue without waiting (https://github.com/nikomatsakis/rayon/pull/103). This still needs some work (for example, we're loosing the memory reporting functionality), but I wanted feedback on this. --- <!-- Thank you for contributing to Servo! Please replace each `[ ]` by `[X]` when the step is complete, and replace `__` with appropriate data: --> - [ ] `./mach build -d` does not report any errors - [ ] `./mach test-tidy` does not report any errors - [ ] These changes fix #__ (github issue number if applicable). <!-- Either: --> - [ ] There are tests for these changes OR - [ ] These changes do not require tests because _____ <!-- Pull requests that do not address these steps are welcome, but they will require additional verification as part of the review process. --> Source-Repo: https://github.com/servo/servo Source-Revision: 9467fbe26d25dfc633d5ab8973fe459d14eff9dc

/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

//! Implements parallel traversals over the DOM and flow trees.
//!
//! This code is highly unsafe. Keep this file small and easy to audit.

#![allow(unsafe_code)]

use context::{LayoutContext, SharedLayoutContext};
use flow::{self, Flow, MutableFlowUtils, PostorderFlowTraversal, PreorderFlowTraversal};
use flow_ref::FlowRef;
use profile_traits::time::{self, TimerMetadata, profile};
use rayon;
use std::mem;
use std::sync::atomic::{AtomicIsize, Ordering};
use style::dom::UnsafeNode;
use style::parallel::CHUNK_SIZE;
use traversal::{AssignISizes, BubbleISizes};
use traversal::AssignBSizes;
use util::opts;

pub use style::parallel::traverse_dom;

#[allow(dead_code)]
fn static_assertion(node: UnsafeNode) {
    unsafe {
        let _: UnsafeFlow = ::std::intrinsics::transmute(node);
    }
}

/// Vtable + pointer representation of a Flow trait object.
pub type UnsafeFlow = (usize, usize);

fn null_unsafe_flow() -> UnsafeFlow {
    (0, 0)
}

pub fn mut_owned_flow_to_unsafe_flow(flow: *mut FlowRef) -> UnsafeFlow {
    unsafe {
        mem::transmute::<&Flow, UnsafeFlow>(&**flow)
    }
}

pub fn borrowed_flow_to_unsafe_flow(flow: &Flow) -> UnsafeFlow {
    unsafe {
        mem::transmute::<&Flow, UnsafeFlow>(flow)
    }
}

pub type ChunkedFlowTraversalFunction<'scope> =
    extern "Rust" fn(Box<[UnsafeFlow]>, &'scope SharedLayoutContext, &rayon::Scope<'scope>);

pub type FlowTraversalFunction = extern "Rust" fn(UnsafeFlow, &SharedLayoutContext);

/// Information that we need stored in each flow.
pub struct FlowParallelInfo {
    /// The number of children that still need work done.
    pub children_count: AtomicIsize,
    /// The address of the parent flow.
    pub parent: UnsafeFlow,
}

impl FlowParallelInfo {
    pub fn new() -> FlowParallelInfo {
        FlowParallelInfo {
            children_count: AtomicIsize::new(0),
            parent: null_unsafe_flow(),
        }
    }
}

/// A parallel bottom-up flow traversal.
trait ParallelPostorderFlowTraversal : PostorderFlowTraversal {
    /// Process current flow and potentially traverse its ancestors.
    ///
    /// If we are the last child that finished processing, recursively process
    /// our parent. Else, stop. Also, stop at the root.
    ///
    /// Thus, if we start with all the leaves of a tree, we end up traversing
    /// the whole tree bottom-up because each parent will be processed exactly
    /// once (by the last child that finishes processing).
    ///
    /// The only communication between siblings is that they both
    /// fetch-and-subtract the parent's children count.
    fn run_parallel(&self, mut unsafe_flow: UnsafeFlow) {
        loop {
            // Get a real flow.
            let flow: &mut Flow = unsafe {
                mem::transmute(unsafe_flow)
            };

            // Perform the appropriate traversal.
            if self.should_process(flow) {
                self.process(flow);
            }


            let base = flow::mut_base(flow);

            // Reset the count of children for the next layout traversal.
            base.parallel.children_count.store(base.children.len() as isize,
                                               Ordering::Relaxed);

            // Possibly enqueue the parent.
            let unsafe_parent = base.parallel.parent;
            if unsafe_parent == null_unsafe_flow() {
                // We're done!
                break
            }

            // No, we're not at the root yet. Then are we the last child
            // of our parent to finish processing? If so, we can continue
            // on with our parent; otherwise, we've gotta wait.
            let parent: &mut Flow = unsafe {
                mem::transmute(unsafe_parent)
            };
            let parent_base = flow::mut_base(parent);
            if parent_base.parallel.children_count.fetch_sub(1, Ordering::Relaxed) == 1 {
                // We were the last child of our parent. Reflow our parent.
                unsafe_flow = unsafe_parent
            } else {
                // Stop.
                break
            }
        }
    }
}

/// A parallel top-down flow traversal.
trait ParallelPreorderFlowTraversal : PreorderFlowTraversal {
    fn run_parallel<'scope>(&self,
                            unsafe_flows: &[UnsafeFlow],
                            layout_context: &'scope SharedLayoutContext,
                            scope: &rayon::Scope<'scope>);

    fn should_record_thread_ids(&self) -> bool;

    #[inline(always)]
    fn run_parallel_helper<'scope>(&self,
                                   unsafe_flows: &[UnsafeFlow],
                                   layout_context: &'scope SharedLayoutContext,
                                   scope: &rayon::Scope<'scope>,
                                   top_down_func: ChunkedFlowTraversalFunction<'scope>,
                                   bottom_up_func: FlowTraversalFunction)
    {
        let mut discovered_child_flows = vec![];
        for unsafe_flow in unsafe_flows {
            let mut had_children = false;
            unsafe {
                // Get a real flow.
                let flow: &mut Flow = mem::transmute(*unsafe_flow);

                if self.should_record_thread_ids() {
                    // FIXME(emilio): With the switch to rayon we can no longer
                    // access a thread id from here easily. Either instrument
                    // rayon (the unstable feature) to get a worker thread
                    // identifier, or remove all the layout tinting mode.
                    //
                    // flow::mut_base(flow).thread_id = proxy.worker_index();
                }

                if self.should_process(flow) {
                    // Perform the appropriate traversal.
                    self.process(flow);
                }

                // Possibly enqueue the children.
                for kid in flow::child_iter_mut(flow) {
                    had_children = true;
                    discovered_child_flows.push(borrowed_flow_to_unsafe_flow(kid));
                }
            }

            // If there were no more children, start assigning block-sizes.
            if !had_children {
                bottom_up_func(*unsafe_flow, layout_context)
            }
        }

        for chunk in discovered_child_flows.chunks(CHUNK_SIZE) {
            let nodes = chunk.iter().cloned().collect::<Vec<_>>().into_boxed_slice();

            scope.spawn(move |scope| {
                top_down_func(nodes, layout_context, scope);
            });
        }
    }
}

impl<'a> ParallelPreorderFlowTraversal for AssignISizes<'a> {
    fn run_parallel<'scope>(&self,
                                  unsafe_flows: &[UnsafeFlow],
                                  layout_context: &'scope SharedLayoutContext,
                                  scope: &rayon::Scope<'scope>)
    {
        self.run_parallel_helper(unsafe_flows,
                                 layout_context,
                                 scope,
                                 assign_inline_sizes,
                                 assign_block_sizes_and_store_overflow)
    }

    fn should_record_thread_ids(&self) -> bool {
        true
    }
}

impl<'a> ParallelPostorderFlowTraversal for AssignBSizes<'a> {}

fn assign_inline_sizes<'scope>(unsafe_flows: Box<[UnsafeFlow]>,
                               shared_layout_context: &'scope SharedLayoutContext,
                               scope: &rayon::Scope<'scope>) {
    let assign_inline_sizes_traversal = AssignISizes {
        shared_context: &shared_layout_context.style_context,
    };
    assign_inline_sizes_traversal.run_parallel(&unsafe_flows, shared_layout_context, scope)
}

fn assign_block_sizes_and_store_overflow(
        unsafe_flow: UnsafeFlow,
        shared_layout_context: &SharedLayoutContext) {
    let layout_context = LayoutContext::new(shared_layout_context);
    let assign_block_sizes_traversal = AssignBSizes {
        layout_context: &layout_context,
    };
    assign_block_sizes_traversal.run_parallel(unsafe_flow)
}

pub fn traverse_flow_tree_preorder(
        root: &mut Flow,
        profiler_metadata: Option<TimerMetadata>,
        time_profiler_chan: time::ProfilerChan,
        shared_layout_context: &SharedLayoutContext,
        queue: &rayon::ThreadPool) {
    if opts::get().bubble_inline_sizes_separately {
        let layout_context = LayoutContext::new(shared_layout_context);
        let bubble_inline_sizes = BubbleISizes { layout_context: &layout_context };
        root.traverse_postorder(&bubble_inline_sizes);
    }

    let nodes = vec![borrowed_flow_to_unsafe_flow(root)].into_boxed_slice();

    queue.install(move || {
        rayon::scope(move |scope| {
            profile(time::ProfilerCategory::LayoutParallelWarmup,
                    profiler_metadata, time_profiler_chan, move || {
                assign_inline_sizes(nodes, &shared_layout_context, scope);
            });
        });
    });
}