Bug 1423203 - Update webrender to commit 22f472f0adb02bd71c472e426e47182f2b218f6d. r=jrmuizel
authorKartikaya Gupta <kgupta@mozilla.com>
Fri, 08 Dec 2017 13:43:37 -0500
changeset 395884 09af0af978a05bfad8e9d17085f920332d564294
parent 395844 27fdcafdb961e51fd792bf8563c5d1fd70b10100
child 395885 bdc16b1eab501e3e8c86d01447a727fcb268bab2
push id98209
push userapavel@mozilla.com
push dateSat, 09 Dec 2017 20:29:15 +0000
treeherdermozilla-inbound@dcc6570639f6 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjrmuizel
bugs1423203
milestone59.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1423203 - Update webrender to commit 22f472f0adb02bd71c472e426e47182f2b218f6d. r=jrmuizel MozReview-Commit-ID: JJK2le2vpeN
gfx/doc/README.webrender
gfx/webrender/Cargo.toml
gfx/webrender/res/brush_image.glsl
gfx/webrender/res/ps_hardware_composite.glsl
gfx/webrender/res/ps_text_run.glsl
gfx/webrender/res/rect.glsl
gfx/webrender/src/clip_scroll_node.rs
gfx/webrender/src/clip_scroll_tree.rs
gfx/webrender/src/frame_builder.rs
gfx/webrender/src/internal_types.rs
gfx/webrender/src/picture.rs
gfx/webrender/src/platform/macos/font.rs
gfx/webrender/src/prim_store.rs
gfx/webrender/src/profiler.rs
gfx/webrender/src/render_task.rs
gfx/webrender/src/renderer.rs
gfx/webrender/src/scene.rs
gfx/webrender/src/tiling.rs
gfx/webrender_api/Cargo.toml
gfx/webrender_api/src/display_item.rs
gfx/webrender_bindings/Cargo.toml
--- a/gfx/doc/README.webrender
+++ b/gfx/doc/README.webrender
@@ -170,9 +170,9 @@ 2. Sometimes autoland tip has changed en
    has an env var you can set to do this). In theory you can get the same
    result by resolving the conflict manually but Cargo.lock files are usually not
    trivial to merge by hand. If it's just the third_party/rust dir that has conflicts
    you can delete it and run |mach vendor rust| again to repopulate it.
 
 -------------------------------------------------------------------------------
 
 The version of WebRender currently in the tree is:
-b7714b1d4348c00682b5643ea0e3f0b15adaeda5
+22f472f0adb02bd71c472e426e47182f2b218f6d
--- a/gfx/webrender/Cargo.toml
+++ b/gfx/webrender/Cargo.toml
@@ -44,11 +44,11 @@ servo-glutin = "0.13"     # for the exam
 
 [target.'cfg(any(target_os = "android", all(unix, not(target_os = "macos"))))'.dependencies]
 freetype = { version = "0.3", default-features = false }
 
 [target.'cfg(target_os = "windows")'.dependencies]
 dwrote = "0.4.1"
 
 [target.'cfg(target_os = "macos")'.dependencies]
-core-foundation = "0.4"
+core-foundation = "0.4.6"
 core-graphics = "0.12.3"
 core-text = { version = "8.0", default-features = false }
--- a/gfx/webrender/res/brush_image.glsl
+++ b/gfx/webrender/res/brush_image.glsl
@@ -9,17 +9,17 @@ varying vec2 vLocalPos;
 #endif
 
 varying vec3 vUv;
 flat varying int vImageKind;
 flat varying vec4 vUvBounds;
 flat varying vec4 vUvBounds_NoClamp;
 flat varying vec4 vParams;
 
-#if defined WR_FEATURE_ALPHA_TARGET
+#if defined WR_FEATURE_ALPHA_TARGET || defined WR_FEATURE_COLOR_TARGET_ALPHA_MASK
 flat varying vec4 vColor;
 #endif
 
 #define BRUSH_IMAGE_SIMPLE      0
 #define BRUSH_IMAGE_NINEPATCH   1
 #define BRUSH_IMAGE_MIRROR      2
 
 #ifdef WR_VERTEX_SHADER
@@ -36,16 +36,19 @@ void brush_vs(
     //           the normal texture cache and unify this
     //           with the normal image shader.
     BlurTask blur_task = fetch_blur_task(user_data.x);
     vUv.z = blur_task.common_data.texture_layer_index;
     vImageKind = user_data.y;
 
 #if defined WR_FEATURE_COLOR_TARGET
     vec2 texture_size = vec2(textureSize(sColor0, 0).xy);
+#elif defined WR_FEATURE_COLOR_TARGET_ALPHA_MASK
+    vec2 texture_size = vec2(textureSize(sColor0, 0).xy);
+    vColor = blur_task.color;
 #else
     vec2 texture_size = vec2(textureSize(sColor1, 0).xy);
     vColor = blur_task.color;
 #endif
 
     vec2 uv0 = blur_task.common_data.task_rect.p0;
     vec2 src_size = blur_task.common_data.task_rect.size * blur_task.scale_factor;
     vec2 uv1 = uv0 + blur_task.common_data.task_rect.size;
@@ -115,16 +118,18 @@ vec4 brush_fs() {
             uv = mix(vUvBounds_NoClamp.xy, vUvBounds_NoClamp.zw, uv);
             uv = clamp(uv, vUvBounds.xy, vUvBounds.zw);
             break;
         }
     }
 
 #if defined WR_FEATURE_COLOR_TARGET
     vec4 color = texture(sColor0, vec3(uv, vUv.z));
+#elif defined WR_FEATURE_COLOR_TARGET_ALPHA_MASK
+    vec4 color = vColor * texture(sColor0, vec3(uv, vUv.z)).a;
 #else
     vec4 color = vColor * texture(sColor1, vec3(uv, vUv.z)).r;
 #endif
 
 #ifdef WR_FEATURE_ALPHA_PASS
     color *= init_transform_fs(vLocalPos);
 #endif
 
--- a/gfx/webrender/res/ps_hardware_composite.glsl
+++ b/gfx/webrender/res/ps_hardware_composite.glsl
@@ -29,11 +29,11 @@ void main(void) {
 
     gl_Position = uTransform * vec4(local_pos, ci.z, 1.0);
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 void main(void) {
     vec2 uv = clamp(vUv.xy, vUvBounds.xy, vUvBounds.zw);
-    oFragColor = texture(sCacheRGBA8, vec3(uv, vUv.z));
+    oFragColor = texture(sColor0, vec3(uv, vUv.z));
 }
 #endif
--- a/gfx/webrender/res/ps_text_run.glsl
+++ b/gfx/webrender/res/ps_text_run.glsl
@@ -14,25 +14,22 @@ flat varying vec4 vUvBorder;
 #define MODE_SUBPX_CONST_COLOR  1
 #define MODE_SUBPX_PASS0        2
 #define MODE_SUBPX_PASS1        3
 #define MODE_SUBPX_BG_PASS0     4
 #define MODE_SUBPX_BG_PASS1     5
 #define MODE_SUBPX_BG_PASS2     6
 #define MODE_COLOR_BITMAP       7
 
-VertexInfo write_text_vertex(vec2 local_pos,
+VertexInfo write_text_vertex(vec2 clamped_local_pos,
                              RectWithSize local_clip_rect,
                              float z,
                              Layer layer,
                              PictureTask task,
                              RectWithSize snap_rect) {
-    // Clamp to the two local clip rects.
-    vec2 clamped_local_pos = clamp_rect(clamp_rect(local_pos, local_clip_rect), layer.local_clip_rect);
-
     // Transform the current vertex to world space.
     vec4 world_pos = layer.transform * vec4(clamped_local_pos, 0.0, 1.0);
 
     // Convert the world positions to device pixel space.
     vec2 device_pos = world_pos.xy / world_pos.w * uDevicePixelRatio;
 
     // Apply offsets for the render task to get correct screen location.
     vec2 final_pos = device_pos -
@@ -70,29 +67,45 @@ void main(void) {
 #ifdef WR_FEATURE_GLYPH_TRANSFORM
     // Transform from local space to glyph space.
     mat2 transform = mat2(prim.layer.transform) * uDevicePixelRatio;
 
     // Compute the glyph rect in glyph space.
     RectWithSize glyph_rect = RectWithSize(res.offset + transform * (text.offset + glyph.offset),
                                            res.uv_rect.zw - res.uv_rect.xy);
 
-    // Select the corner of the glyph rect that we are processing.
-    // Transform it from glyph space into local space.
-    vec2 local_pos = inverse(transform) * (glyph_rect.p0 + glyph_rect.size * aPosition.xy);
+    // Transform the glyph rect back to local space.
+    mat2 inv = inverse(transform);
+    RectWithSize local_rect = transform_rect(glyph_rect, inv);
+
+    // Select the corner of the glyph's local space rect that we are processing.
+    vec2 local_pos = local_rect.p0 + local_rect.size * aPosition.xy;
+
+    // Calculate a combined local clip rect.
+    RectWithSize local_clip_rect = intersect_rects(prim.local_clip_rect, prim.layer.local_clip_rect);
+
+    // If the glyph's local rect would fit inside the local clip rect, then select a corner from
+    // the device space glyph rect to reduce overdraw of clipped pixels in the fragment shader.
+    // Otherwise, fall back to clamping the glyph's local rect to the local clip rect.
+    local_pos = rect_inside_rect(local_rect, local_clip_rect) ?
+                    inv * (glyph_rect.p0 + glyph_rect.size * aPosition.xy) :
+                    clamp_rect(local_pos, local_clip_rect);
 #else
     // Scale from glyph space to local space.
     float scale = res.scale / uDevicePixelRatio;
 
     // Compute the glyph rect in local space.
     RectWithSize glyph_rect = RectWithSize(scale * res.offset + text.offset + glyph.offset,
                                            scale * (res.uv_rect.zw - res.uv_rect.xy));
 
     // Select the corner of the glyph rect that we are processing.
     vec2 local_pos = glyph_rect.p0 + glyph_rect.size * aPosition.xy;
+
+    // Clamp to the two local clip rects.
+    local_pos = clamp_rect(clamp_rect(local_pos, prim.local_clip_rect), prim.layer.local_clip_rect);
 #endif
 
     VertexInfo vi = write_text_vertex(local_pos,
                                       prim.local_clip_rect,
                                       prim.z,
                                       prim.layer,
                                       prim.task,
                                       glyph_rect);
@@ -126,26 +139,26 @@ void main(void) {
     }
 #endif
 
     vec2 texture_size = vec2(textureSize(sColor0, 0));
     vec2 st0 = res.uv_rect.xy / texture_size;
     vec2 st1 = res.uv_rect.zw / texture_size;
 
     vUv = vec3(mix(st0, st1, f), res.layer);
-    vUvBorder = (res.uv_rect + vec4(0.5, 0.5, -0.5, -0.5)) / texture_size.xyxy;
+    vUvBorder = (res.uv_rect + vec4(0.499, 0.499, -0.499, -0.499)) / texture_size.xyxy;
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 void main(void) {
-    vec3 tc = vec3(clamp(vUv.xy, vUvBorder.xy, vUvBorder.zw), vUv.z);
-    vec4 mask = texture(sColor0, tc);
+    vec4 mask = texture(sColor0, vUv);
 
-    float alpha = do_clip();
+    float alpha = float(all(lessThanEqual(vec4(vUvBorder.xy, vUv.xy), vec4(vUv.xy, vUvBorder.zw))));
+    alpha *= do_clip();
 
 #ifdef WR_FEATURE_SUBPX_BG_PASS1
     mask.rgb = vec3(mask.a) - mask.rgb;
 #endif
 
     oFragColor = vColor * mask * alpha;
 }
 #endif
--- a/gfx/webrender/res/rect.glsl
+++ b/gfx/webrender/res/rect.glsl
@@ -22,8 +22,27 @@ RectWithEndpoint to_rect_with_endpoint(R
 
 RectWithSize to_rect_with_size(RectWithEndpoint rect) {
     RectWithSize result;
     result.p0 = rect.p0;
     result.size = rect.p1 - rect.p0;
 
     return result;
 }
+
+RectWithSize transform_rect(RectWithSize rect, mat2 transform) {
+    vec2 center = transform * (rect.p0 + rect.size * 0.5);
+    vec2 radius = mat2(abs(transform[0]), abs(transform[1])) * (rect.size * 0.5);
+    return RectWithSize(center - radius, radius * 2.0);
+}
+
+RectWithSize intersect_rects(RectWithSize a, RectWithSize b) {
+    RectWithSize result;
+    result.p0 = max(a.p0, b.p0);
+    result.size = min(a.p0 + a.size, b.p0 + b.size) - result.p0;
+
+    return result;
+}
+
+bool rect_inside_rect(RectWithSize little, RectWithSize big) {
+    return all(lessThanEqual(vec4(big.p0, little.p0 + little.size),
+                             vec4(little.p0, big.p0 + big.size)));
+}
--- a/gfx/webrender/src/clip_scroll_node.rs
+++ b/gfx/webrender/src/clip_scroll_node.rs
@@ -324,16 +324,17 @@ impl ClipScrollNode {
 
         // Write the data that will be made available to the GPU for this node.
         node_data.push(data);
     }
 
     pub fn update(
         &mut self,
         state: &mut TransformUpdateState,
+        next_coordinate_system_id: &mut CoordinateSystemId,
         device_pixel_ratio: f32,
         clip_store: &mut ClipStore,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         scene_properties: &SceneProperties,
     ) {
         // If any of our parents was not rendered, we are not rendered either and can just
         // quit here.
@@ -347,17 +348,17 @@ impl ClipScrollNode {
         // produce only additional translations which should be invertible.
         if self.node_type.is_reference_frame() {
             if self.world_content_transform.determinant() == 0.0 {
                 self.update_to_empty_rect();
                 return;
             }
         }
 
-        self.update_transform(state, scene_properties);
+        self.update_transform(state, next_coordinate_system_id, scene_properties);
         self.update_clip_work_item(
             state,
             device_pixel_ratio,
             clip_store,
             resource_cache,
             gpu_cache,
         );
 
@@ -436,20 +437,25 @@ impl ClipScrollNode {
         }));
 
         state.parent_clip_chain = self.clip_chain_node.clone();
     }
 
     pub fn update_transform(
         &mut self,
         state: &mut TransformUpdateState,
+        next_coordinate_system_id: &mut CoordinateSystemId,
         scene_properties: &SceneProperties,
     ) {
         if self.node_type.is_reference_frame() {
-            self.update_transform_for_reference_frame(state, scene_properties);
+            self.update_transform_for_reference_frame(
+                state,
+                next_coordinate_system_id,
+                scene_properties
+            );
             return;
         }
 
         // We calculate this here to avoid a double-borrow later.
         let sticky_offset = self.calculate_sticky_offset(
             &state.nearest_scrolling_ancestor_offset,
             &state.nearest_scrolling_ancestor_viewport,
         );
@@ -479,16 +485,17 @@ impl ClipScrollNode {
         }
 
         self.coordinate_system_id = state.current_coordinate_system_id;
     }
 
     pub fn update_transform_for_reference_frame(
         &mut self,
         state: &mut TransformUpdateState,
+        next_coordinate_system_id: &mut CoordinateSystemId,
         scene_properties: &SceneProperties,
     ) {
         let info = match self.node_type {
             NodeType::ReferenceFrame(ref mut info) => info,
             _ => unreachable!("Called update_transform_for_reference_frame on non-ReferenceFrame"),
         };
 
         // Resolve the transform against any property bindings.
@@ -497,20 +504,20 @@ impl ClipScrollNode {
             info.origin_in_parent_reference_frame.x,
             info.origin_in_parent_reference_frame.y,
             0.0
         ).pre_mul(&source_transform)
          .pre_mul(&info.source_perspective);
 
         if !info.resolved_transform.preserves_2d_axis_alignment() ||
            info.resolved_transform.has_perspective_component() {
-            state.current_coordinate_system_id = state.next_coordinate_system_id;
-            state.next_coordinate_system_id = state.next_coordinate_system_id.next();
-            self.coordinate_system_id = state.current_coordinate_system_id;
+            state.current_coordinate_system_id = *next_coordinate_system_id;
+            next_coordinate_system_id.advance();
         }
+        self.coordinate_system_id = state.current_coordinate_system_id;
 
         // The transformation for this viewport in world coordinates is the transformation for
         // our parent reference frame, plus any accumulated scrolling offsets from nodes
         // between our reference frame and this node. Finally, we also include
         // whatever local transformation this reference frame provides. This can be combined
         // with the local_viewport_rect to get its position in world space.
         self.world_viewport_transform = state
             .parent_reference_frame_transform
--- a/gfx/webrender/src/clip_scroll_tree.rs
+++ b/gfx/webrender/src/clip_scroll_tree.rs
@@ -29,16 +29,20 @@ impl CoordinateSystemId {
     pub fn root() -> CoordinateSystemId {
         CoordinateSystemId(0)
     }
 
     pub fn next(&self) -> CoordinateSystemId {
         let CoordinateSystemId(id) = *self;
         CoordinateSystemId(id + 1)
     }
+
+    pub fn advance(&mut self) {
+        self.0 += 1;
+    }
 }
 
 pub struct ClipScrollTree {
     pub nodes: FastHashMap<ClipId, ClipScrollNode>,
     pub pending_scroll_offsets: FastHashMap<ClipId, (LayerPoint, ScrollClamping)>,
 
     /// The ClipId of the currently scrolling node. Used to allow the same
     /// node to scroll even if a touch operation leaves the boundaries of that node.
@@ -73,17 +77,16 @@ pub struct TransformUpdateState {
     pub combined_outer_clip_bounds: DeviceIntRect,
     pub combined_inner_clip_bounds: DeviceIntRect,
 
     /// An id for keeping track of the axis-aligned space of this node. This is used in
     /// order to to track what kinds of clip optimizations can be done for a particular
     /// display list item, since optimizations can usually only be done among
     /// coordinate systems which are relatively axis aligned.
     pub current_coordinate_system_id: CoordinateSystemId,
-    pub next_coordinate_system_id: CoordinateSystemId,
 }
 
 impl ClipScrollTree {
     pub fn new() -> Self {
         let dummy_pipeline = PipelineId::dummy();
         ClipScrollTree {
             nodes: FastHashMap::default(),
             pending_scroll_offsets: FastHashMap::default(),
@@ -357,35 +360,37 @@ impl ClipScrollTree {
             ),
             parent_combined_viewport_rect: root_viewport,
             parent_accumulated_scroll_offset: LayerVector2D::zero(),
             nearest_scrolling_ancestor_offset: LayerVector2D::zero(),
             nearest_scrolling_ancestor_viewport: LayerRect::zero(),
             parent_clip_chain: None,
             combined_outer_clip_bounds: *screen_rect,
             combined_inner_clip_bounds: DeviceIntRect::max_rect(),
-            current_coordinate_system_id: CoordinateSystemId(0),
-            next_coordinate_system_id: CoordinateSystemId(0).next(),
+            current_coordinate_system_id: CoordinateSystemId::root(),
         };
+        let mut next_coordinate_system_id = state.current_coordinate_system_id.next();
         self.update_node(
             root_reference_frame_id,
             &mut state,
+            &mut next_coordinate_system_id,
             device_pixel_ratio,
             clip_store,
             resource_cache,
             gpu_cache,
             node_data,
             scene_properties,
         );
     }
 
     fn update_node(
         &mut self,
         layer_id: ClipId,
         state: &mut TransformUpdateState,
+        next_coordinate_system_id: &mut CoordinateSystemId,
         device_pixel_ratio: f32,
         clip_store: &mut ClipStore,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         gpu_node_data: &mut Vec<ClipScrollNodeData>,
         scene_properties: &SceneProperties,
     ) {
         // TODO(gw): This is an ugly borrow check workaround to clone these.
@@ -397,36 +402,39 @@ impl ClipScrollTree {
                 None => return,
             };
 
             // We set this early so that we can use it to populate the ClipChain.
             node.node_data_index = ClipScrollNodeIndex(gpu_node_data.len() as u32);
 
             node.update(
                 &mut state,
+                next_coordinate_system_id,
                 device_pixel_ratio,
                 clip_store,
                 resource_cache,
                 gpu_cache,
                 scene_properties,
             );
 
             node.push_gpu_node_data(&state, gpu_node_data);
 
-            if !node.children.is_empty() {
-                node.prepare_state_for_children(&mut state, gpu_node_data);
+            if node.children.is_empty() {
+                return;
             }
 
+            node.prepare_state_for_children(&mut state, gpu_node_data);
             node.children.clone()
         };
 
         for child_layer_id in node_children {
             self.update_node(
                 child_layer_id,
                 &mut state,
+                next_coordinate_system_id,
                 device_pixel_ratio,
                 clip_store,
                 resource_cache,
                 gpu_cache,
                 gpu_node_data,
                 scene_properties,
             );
         }
--- a/gfx/webrender/src/frame_builder.rs
+++ b/gfx/webrender/src/frame_builder.rs
@@ -17,30 +17,30 @@ use border::ImageBorderSegment;
 use clip::{ClipRegion, ClipSource, ClipSources, ClipStore, Contains};
 use clip_scroll_node::{ClipScrollNode, NodeType};
 use clip_scroll_tree::ClipScrollTree;
 use euclid::{SideOffsets2D, vec2};
 use frame::FrameId;
 use glyph_rasterizer::FontInstance;
 use gpu_cache::GpuCache;
 use gpu_types::ClipScrollNodeData;
-use internal_types::{FastHashMap, FastHashSet};
+use internal_types::{FastHashMap, FastHashSet, RenderPassIndex};
 use picture::{PictureCompositeMode, PictureKind, PicturePrimitive, RasterizationSpace};
 use prim_store::{BrushAntiAliasMode, BrushKind, BrushPrimitive, TexelRect, YuvImagePrimitiveCpu};
 use prim_store::{GradientPrimitiveCpu, ImagePrimitiveCpu, LinePrimitive, PrimitiveKind};
 use prim_store::{PrimitiveContainer, PrimitiveIndex, SpecificPrimitiveIndex};
 use prim_store::{PrimitiveStore, RadialGradientPrimitiveCpu};
 use prim_store::{BrushSegmentDescriptor, TextRunPrimitiveCpu};
 use profiler::{FrameProfileCounters, GpuCacheProfileCounters, TextureCacheProfileCounters};
 use render_task::{ClearMode, RenderTask, RenderTaskId, RenderTaskTree};
 use resource_cache::ResourceCache;
 use scene::{ScenePipeline, SceneProperties};
 use std::{mem, usize, f32};
 use tiling::{CompositeOps, Frame};
-use tiling::{RenderPass, RenderPassKind, RenderTargetKind};
+use tiling::{RenderPass, RenderTargetKind};
 use tiling::{RenderTargetContext, ScrollbarPrimitive};
 use util::{self, MaxRect, pack_as_float, RectHelpers, recycle_vec};
 
 #[derive(Debug)]
 pub struct ScrollbarInfo(pub ClipId, pub LayerRect);
 
 /// Properties of a stacking context that are maintained
 /// during creation of the scene. These structures are
@@ -1756,48 +1756,33 @@ impl FrameBuilder {
                 main_render_task_id,
                 required_pass_count - 1,
                 &mut passes,
             );
         }
 
         let mut deferred_resolves = vec![];
 
-        for pass in &mut passes {
+        for (pass_index, pass) in passes.iter_mut().enumerate() {
             let ctx = RenderTargetContext {
                 device_pixel_ratio,
                 prim_store: &self.prim_store,
                 resource_cache,
                 node_data: &node_data,
                 clip_scroll_tree,
             };
 
             pass.build(
                 &ctx,
                 gpu_cache,
                 &mut render_tasks,
                 &mut deferred_resolves,
                 &self.clip_store,
+                RenderPassIndex(pass_index),
             );
-
-            profile_counters.passes.inc();
-
-            match pass.kind {
-                RenderPassKind::MainFramebuffer(_) => {
-                    profile_counters.color_targets.add(1);
-                }
-                RenderPassKind::OffScreen { ref color, ref alpha } => {
-                    profile_counters
-                        .color_targets
-                        .add(color.targets.len());
-                    profile_counters
-                        .alpha_targets
-                        .add(alpha.targets.len());
-                }
-            }
         }
 
         let gpu_cache_updates = gpu_cache.end_frame(gpu_cache_profile);
 
         render_tasks.build();
 
         resource_cache.end_frame();
 
--- a/gfx/webrender/src/internal_types.rs
+++ b/gfx/webrender/src/internal_types.rs
@@ -27,29 +27,36 @@ pub type FastHashSet<K> = HashSet<K, Bui
 // to be added to an atlas). The texture cache
 // manages the allocation and freeing of these
 // IDs, and the rendering thread maintains a
 // map from cache texture ID to native texture.
 
 #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
 pub struct CacheTextureId(pub usize);
 
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
+pub struct RenderPassIndex(pub usize);
+
 // Represents the source for a texture.
 // These are passed from throughout the
 // pipeline until they reach the rendering
 // thread, where they are resolved to a
 // native texture ID.
 
 #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
 pub enum SourceTexture {
     Invalid,
     TextureCache(CacheTextureId),
     External(ExternalImageData),
     CacheA8,
     CacheRGBA8,
+    // XXX Remove this once RenderTaskCacheA8 is used.
+    #[allow(dead_code)]
+    RenderTaskCacheA8(RenderPassIndex),
+    RenderTaskCacheRGBA8(RenderPassIndex),
 }
 
 pub const ORTHO_NEAR_PLANE: f32 = -1000000.0;
 pub const ORTHO_FAR_PLANE: f32 = 1000000.0;
 
 /// Optional textures that can be used as a source in the shaders.
 /// Textures that are not used by the batch are equal to TextureId::invalid().
 #[derive(Copy, Clone, Debug)]
--- a/gfx/webrender/src/picture.rs
+++ b/gfx/webrender/src/picture.rs
@@ -62,17 +62,20 @@ pub enum PictureKind {
         radii_kind: BorderRadiusKind,
         content_rect: LayerRect,
         cache_key: BoxShadowCacheKey,
     },
     Image {
         // If a mix-blend-mode, contains the render task for
         // the readback of the framebuffer that we use to sample
         // from in the mix-blend-mode shader.
-        readback_render_task_id: Option<RenderTaskId>,
+        // For drop-shadow filter, this will store the original
+        // picture task which would be rendered on screen after
+        // blur pass.
+        secondary_render_task_id: Option<RenderTaskId>,
         /// How this picture should be composited.
         /// If None, don't composite - just draw directly on parent surface.
         composite_mode: Option<PictureCompositeMode>,
         // If true, this picture is part of a 3D context.
         is_in_3d_context: bool,
         // If requested as a frame output (for rendering
         // pages to a texture), this is the pipeline this
         // picture is the root of.
@@ -181,17 +184,17 @@ impl PicturePrimitive {
         pipeline_id: PipelineId,
         reference_frame_id: ClipId,
         frame_output_pipeline_id: Option<PipelineId>,
     ) -> PicturePrimitive {
         PicturePrimitive {
             runs: Vec::new(),
             render_task_id: None,
             kind: PictureKind::Image {
-                readback_render_task_id: None,
+                secondary_render_task_id: None,
                 composite_mode,
                 is_in_3d_context,
                 frame_output_pipeline_id,
                 reference_frame_id,
                 real_local_rect: LayerRect::zero(),
             },
             pipeline_id,
             cull_children: true,
@@ -231,16 +234,21 @@ impl PicturePrimitive {
             PictureKind::Image { composite_mode, ref mut real_local_rect, .. } => {
                 *real_local_rect = prim_run_rect.local_rect_in_original_parent_space;
 
                 match composite_mode {
                     Some(PictureCompositeMode::Filter(FilterOp::Blur(blur_radius))) => {
                         let inflate_size = blur_radius * BLUR_SAMPLE_SCALE;
                         local_content_rect.inflate(inflate_size, inflate_size)
                     }
+                    Some(PictureCompositeMode::Filter(FilterOp::DropShadow(offset, blur_radius, _))) => {
+                        let inflate_size = blur_radius * BLUR_SAMPLE_SCALE;
+                        local_content_rect.inflate(inflate_size, inflate_size)
+                                          .translate(&offset)
+                    }
                     _ => {
                         local_content_rect
                     }
                 }
             }
             PictureKind::TextShadow { offset, blur_radius, ref mut content_rect, .. } => {
                 let blur_offset = blur_radius * BLUR_SAMPLE_SCALE;
 
@@ -299,17 +307,17 @@ impl PicturePrimitive {
         prim_context: &PrimitiveContext,
         render_tasks: &mut RenderTaskTree,
         prim_screen_rect: &DeviceIntRect,
         child_tasks: Vec<RenderTaskId>,
         parent_tasks: &mut Vec<RenderTaskId>,
     ) {
         match self.kind {
             PictureKind::Image {
-                ref mut readback_render_task_id,
+                ref mut secondary_render_task_id,
                 composite_mode,
                 ..
             } => {
                 match composite_mode {
                     Some(PictureCompositeMode::Filter(FilterOp::Blur(blur_radius))) => {
                         let picture_task = RenderTask::new_picture(
                             Some(prim_screen_rect.size),
                             prim_index,
@@ -336,33 +344,64 @@ impl PicturePrimitive {
                             ClearMode::Transparent,
                             PremultipliedColorF::TRANSPARENT,
                             None,
                         );
 
                         let blur_render_task_id = render_tasks.add(blur_render_task);
                         self.render_task_id = Some(blur_render_task_id);
                     }
+                    Some(PictureCompositeMode::Filter(FilterOp::DropShadow(offset, blur_radius, color))) => {
+                        let picture_task = RenderTask::new_picture(
+                            Some(prim_screen_rect.size),
+                            prim_index,
+                            RenderTargetKind::Color,
+                            prim_screen_rect.origin.x as f32 - offset.x,
+                            prim_screen_rect.origin.y as f32 - offset.y,
+                            PremultipliedColorF::TRANSPARENT,
+                            ClearMode::Transparent,
+                            self.rasterization_kind,
+                            child_tasks,
+                            None,
+                        );
+
+                        let blur_std_deviation = blur_radius * prim_context.device_pixel_ratio;
+                        let picture_task_id = render_tasks.add(picture_task);
+
+                        let blur_render_task = RenderTask::new_blur(
+                            blur_std_deviation,
+                            picture_task_id,
+                            render_tasks,
+                            RenderTargetKind::Color,
+                            &[],
+                            ClearMode::Transparent,
+                            color.premultiplied(),
+                            None,
+                        );
+
+                        *secondary_render_task_id = Some(picture_task_id);
+                        self.render_task_id = Some(render_tasks.add(blur_render_task));
+                    }
                     Some(PictureCompositeMode::MixBlend(..)) => {
                         let picture_task = RenderTask::new_picture(
                             Some(prim_screen_rect.size),
                             prim_index,
                             RenderTargetKind::Color,
                             prim_screen_rect.origin.x as f32,
                             prim_screen_rect.origin.y as f32,
                             PremultipliedColorF::TRANSPARENT,
                             ClearMode::Transparent,
                             self.rasterization_kind,
                             child_tasks,
                             None,
                         );
 
                         let readback_task_id = render_tasks.add(RenderTask::new_readback(*prim_screen_rect));
 
-                        *readback_render_task_id = Some(readback_task_id);
+                        *secondary_render_task_id = Some(readback_task_id);
                         parent_tasks.push(readback_task_id);
 
                         self.render_task_id = Some(render_tasks.add(picture_task));
                     }
                     Some(PictureCompositeMode::Filter(filter)) => {
                         // If this filter is not currently going to affect
                         // the picture, just collapse this picture into the
                         // current render task. This most commonly occurs
--- a/gfx/webrender/src/platform/macos/font.rs
+++ b/gfx/webrender/src/platform/macos/font.rs
@@ -242,17 +242,17 @@ fn new_ct_font_with_variations(cg_font: 
                         None => return ct_font,
                     }
                 }
                 None => return ct_font,
             };
 
             val = val.max(min_val).min(max_val);
             if val != def_val {
-                vals.push((name, CFNumber::from_f64(val)));
+                vals.push((name, CFNumber::from(val)));
             }
         }
         if vals.is_empty() {
             return ct_font;
         }
         let vals_dict = CFDictionary::from_CFType_pairs(&vals);
         let cg_var_font = cg_font.create_copy_from_variations(&vals_dict).unwrap();
         core_text::font::new_from_CGFont(&cg_var_font, size)
--- a/gfx/webrender/src/prim_store.rs
+++ b/gfx/webrender/src/prim_store.rs
@@ -14,18 +14,18 @@ use clip::{ClipSource, ClipSourcesHandle
 use frame_builder::PrimitiveContext;
 use glyph_rasterizer::{FontInstance, FontTransform};
 use internal_types::{FastHashMap};
 use gpu_cache::{GpuBlockData, GpuCache, GpuCacheAddress, GpuCacheHandle, GpuDataRequest,
                 ToGpuBlocks};
 use gpu_types::ClipScrollNodeData;
 use picture::{PictureKind, PicturePrimitive, RasterizationSpace};
 use profiler::FrameProfileCounters;
-use render_task::{ClipChainNode, ClipChainNodeIter, ClipWorkItem, RenderTask, RenderTaskId};
-use render_task::RenderTaskTree;
+use render_task::{ClipChain, ClipChainNode, ClipChainNodeIter, ClipWorkItem, RenderTask};
+use render_task::{RenderTaskId, RenderTaskTree};
 use renderer::MAX_VERTEX_TEXTURE_WIDTH;
 use resource_cache::{ImageProperties, ResourceCache};
 use scene::{ScenePipeline, SceneProperties};
 use std::{mem, u16, usize};
 use std::rc::Rc;
 use util::{MatrixHelpers, calculate_screen_bounding_rect, extract_inner_rect_safe, pack_as_float};
 use util::recycle_vec;
 
@@ -204,16 +204,25 @@ pub enum BrushKind {
         kind: BrushMaskKind,
     },
     Solid {
         color: ColorF,
     },
     Clear,
 }
 
+impl BrushKind {
+    fn is_solid(&self) -> bool {
+        match *self {
+            BrushKind::Solid { .. } => true,
+            _ => false,
+        }
+    }
+}
+
 #[derive(Debug, Copy, Clone)]
 #[repr(u32)]
 pub enum BrushAntiAliasMode {
     Primitive = 0,
     Segment = 1,
 }
 
 #[allow(dead_code)]
@@ -1362,49 +1371,197 @@ impl PrimitiveStore {
                 PrimitiveKind::Brush => {
                     let brush = &self.cpu_brushes[metadata.cpu_prim_index.0];
                     brush.write_gpu_blocks(request);
                 }
             }
         }
     }
 
+    fn write_brush_nine_patch_segment_description(
+        &mut self,
+        prim_index: PrimitiveIndex,
+        prim_context: &PrimitiveContext,
+        clip_store: &mut ClipStore,
+        node_data: &[ClipScrollNodeData],
+        clips: &Vec<ClipWorkItem>,
+    ) {
+        debug_assert!(self.cpu_metadata[prim_index.0].prim_kind == PrimitiveKind::Brush);
+
+        if clips.len() != 1 {
+            return;
+        }
+
+        let clip_item = clips.first().unwrap();
+        if clip_item.coordinate_system_id != prim_context.scroll_node.coordinate_system_id {
+            return;
+        }
+
+        let metadata = &self.cpu_metadata[prim_index.0];
+        let brush = &mut self.cpu_brushes[metadata.cpu_prim_index.0];
+        if brush.segment_desc.is_some() {
+            return;
+        }
+        if !brush.kind.is_solid() {
+            return;
+        }
+        if metadata.local_rect.size.area() <= MIN_BRUSH_SPLIT_AREA {
+            return;
+        }
+
+        let local_clips = clip_store.get_opt(&clip_item.clip_sources).expect("bug");
+        let mut selected_clip = None;
+        for &(ref clip, _) in &local_clips.clips {
+            match *clip {
+                ClipSource::RoundedRectangle(rect, radii, ClipMode::Clip) => {
+                    if selected_clip.is_some() {
+                        selected_clip = None;
+                        break;
+                    }
+                    selected_clip = Some((rect, radii, clip_item.scroll_node_data_index));
+                }
+                ClipSource::Rectangle(..) => {}
+                ClipSource::RoundedRectangle(_, _, ClipMode::ClipOut) |
+                ClipSource::BorderCorner(..) |
+                ClipSource::Image(..) => {
+                    selected_clip = None;
+                    break;
+                }
+            }
+        }
+
+        if let Some((rect, radii, clip_scroll_node_data_index)) = selected_clip {
+            // If the scroll node transforms are different between the clip
+            // node and the primitive, we need to get the clip rect in the
+            // local space of the primitive, in order to generate correct
+            // local segments.
+            let local_clip_rect = if clip_scroll_node_data_index == prim_context.scroll_node.node_data_index {
+                rect
+            } else {
+                let clip_transform_data = &node_data[clip_scroll_node_data_index.0 as usize];
+                let prim_transform = &prim_context.scroll_node.world_content_transform;
+
+                let relative_transform = prim_transform
+                    .inverse()
+                    .unwrap_or(WorldToLayerTransform::identity())
+                    .pre_mul(&clip_transform_data.transform);
+
+                relative_transform.transform_rect(&rect)
+            };
+            brush.segment_desc = create_nine_patch(
+                &metadata.local_rect,
+                &local_clip_rect,
+                &radii
+            );
+        }
+    }
+
+    fn update_nine_patch_clip_task_for_brush(
+        &mut self,
+        prim_context: &PrimitiveContext,
+        prim_index: PrimitiveIndex,
+        render_tasks: &mut RenderTaskTree,
+        clip_store: &mut ClipStore,
+        tasks: &mut Vec<RenderTaskId>,
+        node_data: &[ClipScrollNodeData],
+        clips: &Vec<ClipWorkItem>,
+        combined_outer_rect: &DeviceIntRect,
+    ) -> bool {
+        if self.cpu_metadata[prim_index.0].prim_kind != PrimitiveKind::Brush {
+            return false;
+        }
+
+        self.write_brush_nine_patch_segment_description(
+            prim_index,
+            prim_context,
+            clip_store,
+            node_data,
+            clips
+        );
+
+        let metadata = &self.cpu_metadata[prim_index.0];
+        let brush = &mut self.cpu_brushes[metadata.cpu_prim_index.0];
+        let segment_desc = match brush.segment_desc {
+            Some(ref mut description) => description,
+            None => return false,
+        };
+
+        let enabled_segments = segment_desc.enabled_segments;
+        let can_optimize_clip_mask = segment_desc.can_optimize_clip_mask;
+
+        for (i, segment) in segment_desc.segments.iter_mut().enumerate() {
+            // We only build clips for the corners. The ordering of the
+            // BrushSegmentKind enum is such that corners come first, then
+            // edges, then inner.
+            let segment_enabled = ((1 << i) & enabled_segments) != 0;
+            let create_clip_task =
+               segment_enabled &&
+               (!can_optimize_clip_mask || i <= BrushSegmentKind::BottomLeft as usize);
+
+            segment.clip_task_id = if create_clip_task {
+                let segment_screen_rect = calculate_screen_bounding_rect(
+                    &prim_context.scroll_node.world_content_transform,
+                    &segment.local_rect,
+                    prim_context.device_pixel_ratio
+                );
+
+                combined_outer_rect.intersection(&segment_screen_rect).map(|bounds| {
+                    let clip_task = RenderTask::new_mask(
+                        None,
+                        bounds,
+                        clips.clone(),
+                        prim_context.scroll_node.coordinate_system_id,
+                    );
+
+                    let clip_task_id = render_tasks.add(clip_task);
+                    tasks.push(clip_task_id);
+
+                    clip_task_id
+                })
+            } else {
+                None
+            };
+        }
+
+        true
+    }
+
     fn update_clip_task(
         &mut self,
         prim_index: PrimitiveIndex,
         prim_context: &PrimitiveContext,
         prim_screen_rect: &DeviceIntRect,
         screen_rect: &DeviceIntRect,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         render_tasks: &mut RenderTaskTree,
         clip_store: &mut ClipStore,
         tasks: &mut Vec<RenderTaskId>,
         node_data: &[ClipScrollNodeData],
     ) -> bool {
-        let metadata = &mut self.cpu_metadata[prim_index.0];
-        metadata.clip_task_id = None;
+        self.cpu_metadata[prim_index.0].clip_task_id = None;
 
         let prim_screen_rect = match prim_screen_rect.intersection(screen_rect) {
             Some(rect) => rect,
             None => {
-                metadata.screen_rect = None;
+                self.cpu_metadata[prim_index.0].screen_rect = None;
                 return false;
             }
         };
 
         let clip_chain = prim_context.clip_node.clip_chain_node.clone();
         let mut combined_outer_rect = match clip_chain {
             Some(ref node) => prim_screen_rect.intersection(&node.combined_outer_screen_rect),
             None => Some(prim_screen_rect),
         };
 
         let prim_coordinate_system_id = prim_context.scroll_node.coordinate_system_id;
         let transform = &prim_context.scroll_node.world_content_transform;
         let extra_clip =  {
+            let metadata = &self.cpu_metadata[prim_index.0];
             let prim_clips = clip_store.get_mut(&metadata.clip_sources);
             if prim_clips.has_clips() {
                 prim_clips.update(gpu_cache, resource_cache);
                 let (screen_inner_rect, screen_outer_rect) =
                     prim_clips.get_screen_bounds(transform, prim_context.device_pixel_ratio);
 
                 if let Some(outer) = screen_outer_rect {
                     combined_outer_rect = combined_outer_rect.and_then(|r| r.intersection(&outer));
@@ -1426,44 +1583,28 @@ impl PrimitiveStore {
                 None
             }
         };
 
         // If everything is clipped out, then we don't need to render this primitive.
         let combined_outer_rect = match combined_outer_rect {
             Some(rect) if !rect.is_empty() => rect,
             _ => {
-                metadata.screen_rect = None;
+                self.cpu_metadata[prim_index.0].screen_rect = None;
                 return false;
             }
         };
 
-        // Filter out all the clip instances that don't contribute to the result.
         let mut combined_inner_rect = *screen_rect;
-        let clips: Vec<_> = ClipChainNodeIter { current: extra_clip }
-            .chain(ClipChainNodeIter { current: clip_chain })
-            .take_while(|node| {
-                !node.combined_inner_screen_rect.contains_rect(&combined_outer_rect)
-            })
-            .filter_map(|node| {
-                combined_inner_rect = if !node.screen_inner_rect.is_empty() {
-                    // If this clip's inner area contains the area of the primitive clipped
-                    // by previous clips, then it's not going to affect rendering in any way.
-                    if node.screen_inner_rect.contains_rect(&combined_outer_rect) {
-                        return None;
-                    }
-                    combined_inner_rect.intersection(&node.screen_inner_rect)
-                        .unwrap_or_else(DeviceIntRect::zero)
-                } else {
-                    DeviceIntRect::zero()
-                };
-
-                Some(node.work_item.clone())
-            })
-            .collect();
+        let clips = convert_clip_chain_to_clip_vector(
+            clip_chain,
+            extra_clip,
+            &combined_outer_rect,
+            &mut combined_inner_rect
+        );
 
         if clips.is_empty() {
             // If this item is in the root coordinate system, then
             // we know that the local_clip_rect in the clip node
             // will take care of applying this clip, so no need
             // for a mask.
             if prim_coordinate_system_id == CoordinateSystemId::root() {
                 return true;
@@ -1478,126 +1619,40 @@ impl PrimitiveStore {
             // optimization of the empty mask.
             combined_inner_rect = DeviceIntRect::zero();
         }
 
         if combined_inner_rect.contains_rect(&prim_screen_rect) {
            return true;
         }
 
-        let mut needs_prim_clip_task = true;
-
-        if metadata.prim_kind == PrimitiveKind::Brush {
-            let brush = &mut self.cpu_brushes[metadata.cpu_prim_index.0];
-            if brush.segment_desc.is_none() && metadata.local_rect.size.area() > MIN_BRUSH_SPLIT_AREA {
-                if let BrushKind::Solid { .. } = brush.kind {
-                    if clips.len() == 1 {
-                        let clip_item = clips.first().unwrap();
-                        if clip_item.coordinate_system_id == prim_coordinate_system_id {
-                            let local_clips = clip_store.get_opt(&clip_item.clip_sources).expect("bug");
-                            let mut selected_clip = None;
-                            for &(ref clip, _) in &local_clips.clips {
-                                match *clip {
-                                    ClipSource::RoundedRectangle(rect, radii, ClipMode::Clip) => {
-                                        if selected_clip.is_some() {
-                                            selected_clip = None;
-                                            break;
-                                        }
-                                        selected_clip = Some((rect, radii, clip_item.scroll_node_data_index));
-                                    }
-                                    ClipSource::Rectangle(..) => {}
-                                    ClipSource::RoundedRectangle(_, _, ClipMode::ClipOut) |
-                                    ClipSource::BorderCorner(..) |
-                                    ClipSource::Image(..) => {
-                                        selected_clip = None;
-                                        break;
-                                    }
-                                }
-                            }
-                            if let Some((rect, radii, clip_scroll_node_data_index)) = selected_clip {
-                                // If the scroll node transforms are different between the clip
-                                // node and the primitive, we need to get the clip rect in the
-                                // local space of the primitive, in order to generate correct
-                                // local segments.
-                                let local_clip_rect = if clip_scroll_node_data_index == prim_context.scroll_node.node_data_index {
-                                    rect
-                                } else {
-                                    let clip_transform_data = &node_data[clip_scroll_node_data_index.0 as usize];
-                                    let prim_transform = &prim_context.scroll_node.world_content_transform;
-
-                                    let relative_transform = prim_transform
-                                        .inverse()
-                                        .unwrap_or(WorldToLayerTransform::identity())
-                                        .pre_mul(&clip_transform_data.transform);
-
-                                    relative_transform.transform_rect(&rect)
-                                };
-                                brush.segment_desc = create_nine_patch(
-                                    &metadata.local_rect,
-                                    &local_clip_rect,
-                                    &radii
-                                );
-                            }
-                        }
-                    }
-                }
-            }
-
-            if let Some(ref mut segment_desc) = brush.segment_desc {
-                let enabled_segments = segment_desc.enabled_segments;
-                let can_optimize_clip_mask = segment_desc.can_optimize_clip_mask;
-
-                for (i, segment) in segment_desc.segments.iter_mut().enumerate() {
-                    // We only build clips for the corners. The ordering of the
-                    // BrushSegmentKind enum is such that corners come first, then
-                    // edges, then inner.
-                    let segment_enabled = ((1 << i) & enabled_segments) != 0;
-                    let create_clip_task = segment_enabled &&
-                                           (!can_optimize_clip_mask || i <= BrushSegmentKind::BottomLeft as usize);
-                    segment.clip_task_id = if create_clip_task {
-                        let segment_screen_rect = calculate_screen_bounding_rect(
-                            &prim_context.scroll_node.world_content_transform,
-                            &segment.local_rect,
-                            prim_context.device_pixel_ratio
-                        );
-
-                        combined_outer_rect.intersection(&segment_screen_rect).map(|bounds| {
-                            let clip_task = RenderTask::new_mask(
-                                None,
-                                bounds,
-                                clips.clone(),
-                                prim_coordinate_system_id,
-                            );
-
-                            let clip_task_id = render_tasks.add(clip_task);
-                            tasks.push(clip_task_id);
-
-                            clip_task_id
-                        })
-                    } else {
-                        None
-                    };
-                }
-
-                needs_prim_clip_task = false;
-            }
+        // First try to  render this primitive's mask using optimized nine-patch brush rendering.
+        if self.update_nine_patch_clip_task_for_brush(
+            prim_context,
+            prim_index,
+            render_tasks,
+            clip_store,
+            tasks,
+            node_data,
+            &clips,
+            &combined_outer_rect,
+        ) {
+            return true;
         }
 
-        if needs_prim_clip_task {
-            let clip_task = RenderTask::new_mask(
-                None,
-                combined_outer_rect,
-                clips,
-                prim_coordinate_system_id,
-            );
+        let clip_task = RenderTask::new_mask(
+            None,
+            combined_outer_rect,
+            clips,
+            prim_coordinate_system_id,
+        );
 
-            let clip_task_id = render_tasks.add(clip_task);
-            metadata.clip_task_id = Some(clip_task_id);
-            tasks.push(clip_task_id);
-        }
+        let clip_task_id = render_tasks.add(clip_task);
+        self.cpu_metadata[prim_index.0].clip_task_id = Some(clip_task_id);
+        tasks.push(clip_task_id);
 
         true
     }
 
     pub fn prepare_prim_for_render(
         &mut self,
         prim_index: PrimitiveIndex,
         prim_context: &PrimitiveContext,
@@ -1908,8 +1963,38 @@ fn create_nine_patch(
             &inner,
             None,
         );
         desc.can_optimize_clip_mask = true;
 
         Box::new(desc)
     })
 }
+
+fn convert_clip_chain_to_clip_vector(
+    clip_chain: ClipChain,
+    extra_clip: ClipChain,
+    combined_outer_rect: &DeviceIntRect,
+    combined_inner_rect: &mut DeviceIntRect,
+) -> Vec<ClipWorkItem> {
+    // Filter out all the clip instances that don't contribute to the result.
+    ClipChainNodeIter { current: extra_clip }
+        .chain(ClipChainNodeIter { current: clip_chain })
+        .take_while(|node| {
+            !node.combined_inner_screen_rect.contains_rect(&combined_outer_rect)
+        })
+        .filter_map(|node| {
+            *combined_inner_rect = if !node.screen_inner_rect.is_empty() {
+                // If this clip's inner area contains the area of the primitive clipped
+                // by previous clips, then it's not going to affect rendering in any way.
+                if node.screen_inner_rect.contains_rect(&combined_outer_rect) {
+                    return None;
+                }
+                combined_inner_rect.intersection(&node.screen_inner_rect)
+                    .unwrap_or_else(DeviceIntRect::zero)
+            } else {
+                DeviceIntRect::zero()
+            };
+
+            Some(node.work_item.clone())
+        })
+        .collect()
+}
--- a/gfx/webrender/src/profiler.rs
+++ b/gfx/webrender/src/profiler.rs
@@ -32,25 +32,16 @@ impl NamedTag for GpuProfileTag {
     }
 }
 
 trait ProfileCounter {
     fn description(&self) -> &'static str;
     fn value(&self) -> String;
 }
 
-impl<'a, T: ProfileCounter> ProfileCounter for &'a T {
-    fn description(&self) -> &'static str {
-        (*self).description()
-    }
-    fn value(&self) -> String {
-        (*self).value()
-    }
-}
-
 #[derive(Clone)]
 pub struct IntProfileCounter {
     description: &'static str,
     value: usize,
 }
 
 impl IntProfileCounter {
     fn new(description: &'static str) -> Self {
@@ -304,29 +295,23 @@ impl ProfileCounter for AverageTimeProfi
     }
 }
 
 
 #[derive(Clone)]
 pub struct FrameProfileCounters {
     pub total_primitives: IntProfileCounter,
     pub visible_primitives: IntProfileCounter,
-    pub passes: IntProfileCounter,
-    pub color_targets: IntProfileCounter,
-    pub alpha_targets: IntProfileCounter,
 }
 
 impl FrameProfileCounters {
     pub fn new() -> Self {
         FrameProfileCounters {
             total_primitives: IntProfileCounter::new("Total Primitives"),
             visible_primitives: IntProfileCounter::new("Visible Primitives"),
-            passes: IntProfileCounter::new("Passes"),
-            color_targets: IntProfileCounter::new("Color Targets"),
-            alpha_targets: IntProfileCounter::new("Alpha Targets"),
         }
     }
 }
 
 #[derive(Clone)]
 pub struct TextureCacheProfileCounters {
     pub pages_a8_linear: ResourceProfileCounter,
     pub pages_rgb8_linear: ResourceProfileCounter,
@@ -444,38 +429,44 @@ impl BackendProfileCounters {
 }
 
 pub struct RendererProfileCounters {
     pub frame_counter: IntProfileCounter,
     pub frame_time: AverageTimeProfileCounter,
     pub draw_calls: IntProfileCounter,
     pub vertices: IntProfileCounter,
     pub vao_count_and_size: ResourceProfileCounter,
+    pub color_targets: IntProfileCounter,
+    pub alpha_targets: IntProfileCounter,
 }
 
 pub struct RendererProfileTimers {
     pub cpu_time: TimeProfileCounter,
     pub gpu_time: TimeProfileCounter,
     pub gpu_samples: Vec<GpuTimer<GpuProfileTag>>,
 }
 
 impl RendererProfileCounters {
     pub fn new() -> Self {
         RendererProfileCounters {
             frame_counter: IntProfileCounter::new("Frame"),
             frame_time: AverageTimeProfileCounter::new("FPS", true, ONE_SECOND_NS / 2),
             draw_calls: IntProfileCounter::new("Draw Calls"),
             vertices: IntProfileCounter::new("Vertices"),
             vao_count_and_size: ResourceProfileCounter::new("VAO"),
+            color_targets: IntProfileCounter::new("Color Targets"),
+            alpha_targets: IntProfileCounter::new("Alpha Targets"),
         }
     }
 
     pub fn reset(&mut self) {
         self.draw_calls.reset();
         self.vertices.reset();
+        self.color_targets.reset();
+        self.alpha_targets.reset();
     }
 }
 
 impl RendererProfileTimers {
     pub fn new() -> Self {
         RendererProfileTimers {
             cpu_time: TimeProfileCounter::new("Compositor CPU Time", false),
             gpu_samples: Vec::new(),
@@ -488,23 +479,28 @@ struct GraphStats {
     min_value: f32,
     mean_value: f32,
     max_value: f32,
 }
 
 struct ProfileGraph {
     max_samples: usize,
     values: VecDeque<f32>,
+    short_description: &'static str,
 }
 
 impl ProfileGraph {
-    fn new(max_samples: usize) -> Self {
+    fn new(
+        max_samples: usize,
+        short_description: &'static str,
+    ) -> Self {
         ProfileGraph {
             max_samples,
             values: VecDeque::new(),
+            short_description,
         }
     }
 
     fn push(&mut self, ns: u64) {
         let ms = ns as f64 / 1000000.0;
         if self.values.len() == self.max_samples {
             self.values.pop_back();
         }
@@ -615,16 +611,26 @@ impl ProfileGraph {
 
             debug_renderer.add_quad(x0, y0, x1, y1, color_top, color_bottom);
         }
 
         rect
     }
 }
 
+impl ProfileCounter for ProfileGraph {
+    fn description(&self) -> &'static str {
+        self.short_description
+    }
+
+    fn value(&self) -> String {
+        format!("{:.2}ms", self.stats().mean_value)
+    }
+}
+
 struct GpuFrame {
     total_time: u64,
     samples: Vec<GpuTimer<GpuProfileTag>>,
 }
 
 struct GpuFrameCollection {
     frames: VecDeque<GpuFrame>,
 }
@@ -732,55 +738,61 @@ impl GpuFrameCollection {
                 ColorU::new(255, 255, 0, 255),
             );
         }
 
         bounding_rect
     }
 }
 
-pub struct Profiler {
+struct DrawState {
     x_left: f32,
     y_left: f32,
     x_right: f32,
     y_right: f32,
+}
+
+pub struct Profiler {
+    draw_state: DrawState,
     backend_time: ProfileGraph,
     compositor_time: ProfileGraph,
     gpu_time: ProfileGraph,
     gpu_frames: GpuFrameCollection,
     ipc_time: ProfileGraph,
 }
 
 impl Profiler {
     pub fn new() -> Self {
         Profiler {
-            x_left: 0.0,
-            y_left: 0.0,
-            x_right: 0.0,
-            y_right: 0.0,
-            backend_time: ProfileGraph::new(600),
-            compositor_time: ProfileGraph::new(600),
-            gpu_time: ProfileGraph::new(600),
+            draw_state: DrawState {
+                x_left: 0.0,
+                y_left: 0.0,
+                x_right: 0.0,
+                y_right: 0.0,
+            },
+            backend_time: ProfileGraph::new(600, "Backend:"),
+            compositor_time: ProfileGraph::new(600, "Compositor:"),
+            gpu_time: ProfileGraph::new(600, "GPU:"),
             gpu_frames: GpuFrameCollection::new(),
-            ipc_time: ProfileGraph::new(600),
+            ipc_time: ProfileGraph::new(600, "IPC:"),
         }
     }
 
-    fn draw_counters<T: ProfileCounter>(
-        &mut self,
-        counters: &[T],
+    fn draw_counters<T: ProfileCounter + ?Sized>(
+        counters: &[&T],
         debug_renderer: &mut DebugRenderer,
         left: bool,
+        draw_state: &mut DrawState,
     ) {
         let mut label_rect = Rect::zero();
         let mut value_rect = Rect::zero();
         let (mut current_x, mut current_y) = if left {
-            (self.x_left, self.y_left)
+            (draw_state.x_left, draw_state.y_left)
         } else {
-            (self.x_right, self.y_right)
+            (draw_state.x_right, draw_state.y_right)
         };
         let mut color_index = 0;
         let line_height = debug_renderer.line_height();
 
         let colors = [
             ColorU::new(255, 255, 255, 255),
             ColorU::new(255, 255, 0, 255),
         ];
@@ -795,17 +807,17 @@ impl Profiler {
             color_index = (color_index + 1) % colors.len();
 
             label_rect = label_rect.union(&rect);
             current_y += line_height;
         }
 
         color_index = 0;
         current_x = label_rect.origin.x + label_rect.size.width + 60.0;
-        current_y = if left { self.y_left } else { self.y_right };
+        current_y = if left { draw_state.y_left } else { draw_state.y_right };
 
         for counter in counters {
             let rect = debug_renderer.add_text(
                 current_x,
                 current_y,
                 &counter.value(),
                 colors[color_index],
             );
@@ -821,39 +833,39 @@ impl Profiler {
             total_rect.origin.y,
             total_rect.origin.x + total_rect.size.width,
             total_rect.origin.y + total_rect.size.height,
             ColorF::new(0.1, 0.1, 0.1, 0.8).into(),
             ColorF::new(0.2, 0.2, 0.2, 0.8).into(),
         );
         let new_y = total_rect.origin.y + total_rect.size.height + 30.0;
         if left {
-            self.y_left = new_y;
+            draw_state.y_left = new_y;
         } else {
-            self.y_right = new_y;
+            draw_state.y_right = new_y;
         }
     }
 
     fn draw_gpu_cache_bar(
         &mut self,
         label: &str,
         label_color: ColorU,
         counters: &[(ColorU, &IntProfileCounter)],
         debug_renderer: &mut DebugRenderer,
     ) -> Rect<f32> {
         let mut rect = debug_renderer.add_text(
-            self.x_left,
-            self.y_left,
+            self.draw_state.x_left,
+            self.draw_state.y_left,
             label,
             label_color,
         );
 
         let x_base = rect.origin.x + rect.size.width + 10.0;
         let height = debug_renderer.line_height();
-        let width = (self.x_right - 30.0 - x_base).max(0.0);
+        let width = (self.draw_state.x_right - 30.0 - x_base).max(0.0);
         let total_value = counters.last().unwrap().1.value;
         let scale = width / total_value as f32;
         let mut x_current = x_base;
 
         for &(color, counter) in counters {
             let x_stop = x_base + counter.value as f32 * scale;
             debug_renderer.add_quad(
                 x_current,
@@ -861,17 +873,17 @@ impl Profiler {
                 x_stop,
                 rect.origin.y + height,
                 color,
                 color,
             );
             x_current = x_stop;
         }
 
-        self.y_left += height;
+        self.draw_state.y_left += height;
 
         rect.size.width += width + 10.0;
         rect
     }
 
     fn draw_gpu_cache_bars(
         &mut self,
         counters: &GpuCacheProfileCounters,
@@ -917,110 +929,131 @@ impl Profiler {
             total_rect.origin.x,
             total_rect.origin.y,
             total_rect.origin.x + total_rect.size.width,
             total_rect.origin.y + total_rect.size.height,
             ColorF::new(0.1, 0.1, 0.1, 0.8).into(),
             ColorF::new(0.2, 0.2, 0.2, 0.8).into(),
         );
 
-        self.y_left = total_rect.origin.y + total_rect.size.height + 30.0;
+        self.draw_state.y_left = total_rect.origin.y + total_rect.size.height + 30.0;
     }
 
-    pub fn draw_profile(
+    fn draw_compact_profile(
+        &mut self,
+        renderer_profile: &RendererProfileCounters,
+        debug_renderer: &mut DebugRenderer,
+    ) {
+        Profiler::draw_counters(
+            &[
+                &renderer_profile.frame_time as &ProfileCounter,
+                &renderer_profile.color_targets,
+                &renderer_profile.alpha_targets,
+                &renderer_profile.draw_calls,
+                &self.backend_time,
+                &self.compositor_time,
+                &self.gpu_time,
+            ],
+            debug_renderer,
+            true,
+            &mut self.draw_state,
+        );
+    }
+
+    fn draw_full_profile(
         &mut self,
         frame_profiles: &[FrameProfileCounters],
         backend_profile: &BackendProfileCounters,
         renderer_profile: &RendererProfileCounters,
         renderer_timers: &mut RendererProfileTimers,
         gpu_samplers: &[GpuSampler<GpuProfileTag>],
         screen_fraction: f32,
         debug_renderer: &mut DebugRenderer,
     ) {
-        self.x_left = 20.0;
-        self.y_left = 40.0;
-        self.x_right = 450.0;
-        self.y_right = 40.0;
-
-        let mut gpu_time = 0;
-        let gpu_timers = mem::replace(&mut renderer_timers.gpu_samples, Vec::new());
-        for sample in &gpu_timers {
-            gpu_time += sample.time_ns;
-        }
-        renderer_timers.gpu_time.set(gpu_time);
-
-        self.draw_counters(&[&renderer_profile.frame_time], debug_renderer, true);
-        self.draw_counters(&[&renderer_profile.frame_counter], debug_renderer, true);
+        Profiler::draw_counters(
+            &[
+                &renderer_profile.frame_time as &ProfileCounter,
+                &renderer_profile.frame_counter,
+                &renderer_profile.color_targets,
+                &renderer_profile.alpha_targets,
+            ],
+            debug_renderer,
+            true,
+            &mut self.draw_state
+        );
 
         self.draw_gpu_cache_bars(
             &backend_profile.resources.gpu_cache,
             debug_renderer,
         );
 
-        self.draw_counters(
+        Profiler::draw_counters(
             &[
                 &backend_profile.resources.font_templates,
                 &backend_profile.resources.image_templates,
             ],
             debug_renderer,
             true,
+            &mut self.draw_state
         );
 
-        self.draw_counters(
+        Profiler::draw_counters(
             &[
                 &backend_profile.resources.texture_cache.pages_a8_linear,
                 &backend_profile.resources.texture_cache.pages_rgb8_linear,
                 &backend_profile.resources.texture_cache.pages_rgba8_linear,
                 &backend_profile.resources.texture_cache.pages_rgba8_nearest,
                 &backend_profile.resources.texture_cache.pages_rg8_linear,
                 &backend_profile.ipc.display_lists,
             ],
             debug_renderer,
             true,
+            &mut self.draw_state
         );
 
-        self.draw_counters(
+        Profiler::draw_counters(
             &[
                 &backend_profile.ipc.build_time,
                 &backend_profile.ipc.send_time,
                 &backend_profile.ipc.consume_time,
                 &backend_profile.ipc.total_time,
             ],
             debug_renderer,
             true,
+            &mut self.draw_state
         );
 
         for frame_profile in frame_profiles {
-            self.draw_counters(
+            Profiler::draw_counters(
                 &[
                     &frame_profile.total_primitives,
                     &frame_profile.visible_primitives,
-                    &frame_profile.passes,
-                    &frame_profile.color_targets,
-                    &frame_profile.alpha_targets,
                 ],
                 debug_renderer,
                 true,
+                &mut self.draw_state
             );
         }
 
-        self.draw_counters(
+        Profiler::draw_counters(
             &[&renderer_profile.draw_calls, &renderer_profile.vertices],
             debug_renderer,
             true,
+            &mut self.draw_state
         );
 
-        self.draw_counters(
+        Profiler::draw_counters(
             &[
                 &backend_profile.total_time,
                 &renderer_timers.cpu_time,
                 &renderer_timers.gpu_time,
             ],
             debug_renderer,
             false,
+            &mut self.draw_state
         );
 
         if !gpu_samplers.is_empty() {
             let mut samplers = Vec::<FloatProfileCounter>::new();
             // Gathering unique GPU samplers. This has O(N^2) complexity,
             // but we only have a few samplers per target.
             for sampler in gpu_samplers {
                 let value = sampler.count as f32 * screen_fraction;
@@ -1029,44 +1062,92 @@ impl Profiler {
                 }) {
                     Some(pos) => samplers[pos].value += value,
                     None => samplers.push(FloatProfileCounter {
                         description: sampler.tag.label,
                         value,
                     }),
                 }
             }
-            self.draw_counters(&samplers, debug_renderer, false);
+            let samplers: Vec<&ProfileCounter> = samplers.iter().map(|sampler| {
+                sampler as &ProfileCounter
+            }).collect();
+            Profiler::draw_counters(
+                &samplers,
+                debug_renderer,
+                false,
+                &mut self.draw_state,
+            );
         }
 
+        let rect =
+            self.backend_time
+                .draw_graph(self.draw_state.x_right, self.draw_state.y_right, "CPU (backend)", debug_renderer);
+        self.draw_state.y_right += rect.size.height + PROFILE_PADDING;
+        let rect = self.compositor_time.draw_graph(
+            self.draw_state.x_right,
+            self.draw_state.y_right,
+            "CPU (compositor)",
+            debug_renderer,
+        );
+        self.draw_state.y_right += rect.size.height + PROFILE_PADDING;
+        let rect =
+            self.ipc_time
+                .draw_graph(self.draw_state.x_right, self.draw_state.y_right, "DisplayList IPC", debug_renderer);
+        self.draw_state.y_right += rect.size.height + PROFILE_PADDING;
+        let rect = self.gpu_time
+            .draw_graph(self.draw_state.x_right, self.draw_state.y_right, "GPU", debug_renderer);
+        self.draw_state.y_right += rect.size.height + PROFILE_PADDING;
+        let rect = self.gpu_frames
+            .draw(self.draw_state.x_left, f32::max(self.draw_state.y_left, self.draw_state.y_right), debug_renderer);
+        self.draw_state.y_right += rect.size.height + PROFILE_PADDING;
+    }
+
+    pub fn draw_profile(
+        &mut self,
+        frame_profiles: &[FrameProfileCounters],
+        backend_profile: &BackendProfileCounters,
+        renderer_profile: &RendererProfileCounters,
+        renderer_timers: &mut RendererProfileTimers,
+        gpu_samplers: &[GpuSampler<GpuProfileTag>],
+        screen_fraction: f32,
+        debug_renderer: &mut DebugRenderer,
+        compact: bool,
+    ) {
+        self.draw_state.x_left = 20.0;
+        self.draw_state.y_left = 40.0;
+        self.draw_state.x_right = 450.0;
+        self.draw_state.y_right = 40.0;
+
+        let mut gpu_time = 0;
+        let gpu_timers = mem::replace(&mut renderer_timers.gpu_samples, Vec::new());
+        for sample in &gpu_timers {
+            gpu_time += sample.time_ns;
+        }
+        renderer_timers.gpu_time.set(gpu_time);
+
         self.backend_time
             .push(backend_profile.total_time.nanoseconds);
         self.compositor_time
             .push(renderer_timers.cpu_time.nanoseconds);
         self.ipc_time
             .push(backend_profile.ipc.total_time.nanoseconds);
         self.gpu_time.push(gpu_time);
         self.gpu_frames.push(gpu_time, gpu_timers);
 
-
-        let rect =
-            self.backend_time
-                .draw_graph(self.x_right, self.y_right, "CPU (backend)", debug_renderer);
-        self.y_right += rect.size.height + PROFILE_PADDING;
-        let rect = self.compositor_time.draw_graph(
-            self.x_right,
-            self.y_right,
-            "CPU (compositor)",
-            debug_renderer,
-        );
-        self.y_right += rect.size.height + PROFILE_PADDING;
-        let rect =
-            self.ipc_time
-                .draw_graph(self.x_right, self.y_right, "DisplayList IPC", debug_renderer);
-        self.y_right += rect.size.height + PROFILE_PADDING;
-        let rect = self.gpu_time
-            .draw_graph(self.x_right, self.y_right, "GPU", debug_renderer);
-        self.y_right += rect.size.height + PROFILE_PADDING;
-        let rect = self.gpu_frames
-            .draw(self.x_left, f32::max(self.y_left, self.y_right), debug_renderer);
-        self.y_right += rect.size.height + PROFILE_PADDING;
+        if compact {
+            self.draw_compact_profile(
+                renderer_profile,
+                debug_renderer,
+            );
+        } else {
+            self.draw_full_profile(
+                frame_profiles,
+                backend_profile,
+                renderer_profile,
+                renderer_timers,
+                gpu_samplers,
+                screen_fraction,
+                debug_renderer,
+            );
+        }
     }
 }
--- a/gfx/webrender/src/render_task.rs
+++ b/gfx/webrender/src/render_task.rs
@@ -4,16 +4,17 @@
 
 use api::{ClipId, DeviceIntPoint, DeviceIntRect, DeviceIntSize, DevicePixel};
 use api::{LayerPoint, LayerRect, PremultipliedColorF};
 use box_shadow::BoxShadowCacheKey;
 use clip::{ClipSourcesWeakHandle};
 use clip_scroll_tree::CoordinateSystemId;
 use euclid::TypedSize2D;
 use gpu_types::{ClipScrollNodeIndex};
+use internal_types::RenderPassIndex;
 use picture::RasterizationSpace;
 use prim_store::{PrimitiveIndex};
 #[cfg(feature = "debugger")]
 use print_tree::{PrintTreePrinter};
 use std::{cmp, ops, usize, f32, i32};
 use std::rc::Rc;
 use tiling::{RenderPass, RenderTargetIndex};
 use tiling::{RenderTargetKind};
@@ -236,16 +237,17 @@ pub enum ClearMode {
 
 #[derive(Debug)]
 pub struct RenderTask {
     pub cache_key: Option<RenderTaskKey>,
     pub location: RenderTaskLocation,
     pub children: Vec<RenderTaskId>,
     pub kind: RenderTaskKind,
     pub clear_mode: ClearMode,
+    pub pass_index: Option<RenderPassIndex>,
 }
 
 impl RenderTask {
     pub fn new_picture(
         size: Option<DeviceIntSize>,
         prim_index: PrimitiveIndex,
         target_kind: RenderTargetKind,
         content_origin_x: f32,
@@ -271,26 +273,28 @@ impl RenderTask {
             kind: RenderTaskKind::Picture(PictureTask {
                 prim_index,
                 target_kind,
                 content_origin: LayerPoint::new(content_origin_x, content_origin_y),
                 color,
                 rasterization_kind,
             }),
             clear_mode,
+            pass_index: None,
         }
     }
 
     pub fn new_readback(screen_rect: DeviceIntRect) -> Self {
         RenderTask {
             cache_key: None,
             children: Vec::new(),
             location: RenderTaskLocation::Dynamic(None, screen_rect.size),
             kind: RenderTaskKind::Readback(screen_rect),
             clear_mode: ClearMode::Transparent,
+            pass_index: None,
         }
     }
 
     pub fn new_mask(
         key: Option<ClipId>,
         outer_rect: DeviceIntRect,
         clips: Vec<ClipWorkItem>,
         prim_coordinate_system_id: CoordinateSystemId,
@@ -300,16 +304,17 @@ impl RenderTask {
             children: Vec::new(),
             location: RenderTaskLocation::Dynamic(None, outer_rect.size),
             kind: RenderTaskKind::CacheMask(CacheMaskTask {
                 actual_rect: outer_rect,
                 clips,
                 coordinate_system_id: prim_coordinate_system_id,
             }),
             clear_mode: ClearMode::One,
+            pass_index: None,
         }
     }
 
     // Construct a render task to apply a blur to a primitive.
     // The render task chain that is constructed looks like:
     //
     //    PrimitiveCacheTask: Draw the primitives.
     //           ^
@@ -370,16 +375,17 @@ impl RenderTask {
             kind: RenderTaskKind::VerticalBlur(BlurTask {
                 blur_std_deviation: adjusted_blur_std_deviation,
                 target_kind,
                 regions: regions.to_vec(),
                 color,
                 scale_factor,
             }),
             clear_mode,
+            pass_index: None,
         };
 
         let blur_task_v_id = render_tasks.add(blur_task_v);
 
         let blur_task_h = RenderTask {
             cache_key: match box_shadow_cache_key {
                 Some(key) => Some(RenderTaskKey::CacheBlur(key, 1)),
                 None => None,
@@ -389,16 +395,17 @@ impl RenderTask {
             kind: RenderTaskKind::HorizontalBlur(BlurTask {
                 blur_std_deviation: adjusted_blur_std_deviation,
                 target_kind,
                 regions: regions.to_vec(),
                 color,
                 scale_factor,
             }),
             clear_mode,
+            pass_index: None,
         };
 
         blur_task_h
     }
 
     pub fn new_scaling(
         target_kind: RenderTargetKind,
         src_task_id: RenderTaskId,
@@ -412,16 +419,17 @@ impl RenderTask {
             },
             children: vec![src_task_id],
             location: RenderTaskLocation::Dynamic(None, target_size),
             kind: RenderTaskKind::Scaling(target_kind),
             clear_mode: match target_kind {
                 RenderTargetKind::Color => ClearMode::Transparent,
                 RenderTargetKind::Alpha => ClearMode::One,
             },
+            pass_index: None,
         }
     }
 
     // Write (up to) 8 floats of data specific to the type
     // of render task that is provided to the GPU shaders
     // via a vertex texture.
     pub fn write_task_data(&self) -> RenderTaskData {
         // NOTE: The ordering and layout of these structures are
--- a/gfx/webrender/src/renderer.rs
+++ b/gfx/webrender/src/renderer.rs
@@ -35,17 +35,17 @@ use device::ProgramCache;
 use euclid::{rect, ScaleFactor, Transform3D};
 use frame_builder::FrameBuilderConfig;
 use gleam::gl;
 use glyph_rasterizer::GlyphFormat;
 use gpu_cache::{GpuBlockData, GpuCacheUpdate, GpuCacheUpdateList};
 use gpu_types::PrimitiveInstance;
 use internal_types::{BatchTextures, SourceTexture, ORTHO_FAR_PLANE, ORTHO_NEAR_PLANE};
 use internal_types::{CacheTextureId, FastHashMap, RenderedDocument, ResultMsg, TextureUpdateOp};
-use internal_types::{DebugOutput, RenderTargetInfo, TextureUpdateList, TextureUpdateSource};
+use internal_types::{DebugOutput, RenderPassIndex, RenderTargetInfo, TextureUpdateList, TextureUpdateSource};
 use profiler::{BackendProfileCounters, Profiler};
 use profiler::{GpuProfileTag, RendererProfileCounters, RendererProfileTimers};
 use query::{GpuProfiler, GpuTimer};
 use rayon::Configuration as ThreadPoolConfig;
 use rayon::ThreadPool;
 use record::ApiRecordingReceiver;
 use render_backend::RenderBackend;
 use render_task::{RenderTaskKind, RenderTaskTree};
@@ -60,18 +60,18 @@ use std::mem;
 use std::path::PathBuf;
 use std::rc::Rc;
 use std::sync::Arc;
 use std::sync::mpsc::{channel, Receiver, Sender};
 use std::thread;
 use texture_cache::TextureCache;
 use thread_profiler::{register_thread_with_profiler, write_profile};
 use tiling::{AlphaRenderTarget, ColorRenderTarget};
-use tiling::{RenderPass, RenderPassKind, RenderTargetKind, RenderTargetList};
-use tiling::{BatchKey, BatchKind, BrushBatchKind, Frame, RenderTarget, ScalingInfo, TransformBatchKind};
+use tiling::{RenderPass, RenderPassKind, RenderTargetList};
+use tiling::{BatchKey, BatchKind, BrushBatchKind, BrushImageSourceKind, Frame, RenderTarget, ScalingInfo, TransformBatchKind};
 use time::precise_time_ns;
 use util::TransformedRectKind;
 
 pub const MAX_VERTEX_TEXTURE_WIDTH: usize = 1024;
 
 const GPU_TAG_BRUSH_SOLID: GpuProfileTag = GpuProfileTag {
     label: "B_Solid",
     color: debug_colors::RED,
@@ -251,16 +251,17 @@ bitflags! {
         const PROFILER_DBG      = 1 << 0;
         const RENDER_TARGET_DBG = 1 << 1;
         const TEXTURE_CACHE_DBG = 1 << 2;
         const ALPHA_PRIM_DBG    = 1 << 3;
         const GPU_TIME_QUERIES  = 1 << 4;
         const GPU_SAMPLE_QUERIES= 1 << 5;
         const DISABLE_BATCHING  = 1 << 6;
         const EPOCHS            = 1 << 7;
+        const COMPACT_PROFILER  = 1 << 8;
     }
 }
 
 fn flag_changed(before: DebugFlags, after: DebugFlags, select: DebugFlags) -> Option<bool> {
     if before & select != after & select {
         Some(after.contains(select))
     } else {
         None
@@ -546,16 +547,18 @@ impl CpuProfile {
             frame_id,
             backend_time_ns,
             composite_time_ns,
             draw_calls,
         }
     }
 }
 
+struct RenderTargetPoolId(usize);
+
 struct SourceTextureResolver {
     /// A vector for fast resolves of texture cache IDs to
     /// native texture IDs. This maps to a free-list managed
     /// by the backend thread / texture cache. We free the
     /// texture memory associated with a TextureId when its
     /// texture cache ID is freed by the texture cache, but
     /// reuse the TextureId when the texture caches's free
     /// list reuses the texture cache ID. This saves having to
@@ -568,16 +571,21 @@ struct SourceTextureResolver {
     /// A special 1x1 dummy cache texture used for shaders that expect to work
     /// with the cache but are actually running in the first pass
     /// when no target is yet provided as a cache texture input.
     dummy_cache_texture: Texture,
 
     /// The current cache textures.
     cache_rgba8_texture: Option<Texture>,
     cache_a8_texture: Option<Texture>,
+
+    pass_rgba8_textures: FastHashMap<RenderPassIndex, RenderTargetPoolId>,
+    pass_a8_textures: FastHashMap<RenderPassIndex, RenderTargetPoolId>,
+
+    render_target_pool: Vec<Texture>,
 }
 
 impl SourceTextureResolver {
     fn new(device: &mut Device) -> SourceTextureResolver {
         let mut dummy_cache_texture = device.create_texture(TextureTarget::Array);
         device.init_texture(
             &mut dummy_cache_texture,
             1,
@@ -590,46 +598,66 @@ impl SourceTextureResolver {
         );
 
         SourceTextureResolver {
             cache_texture_map: Vec::new(),
             external_images: FastHashMap::default(),
             dummy_cache_texture,
             cache_a8_texture: None,
             cache_rgba8_texture: None,
+            pass_rgba8_textures: FastHashMap::default(),
+            pass_a8_textures: FastHashMap::default(),
+            render_target_pool: Vec::new(),
         }
     }
 
     fn deinit(self, device: &mut Device) {
         device.delete_texture(self.dummy_cache_texture);
 
         for texture in self.cache_texture_map {
             device.delete_texture(texture);
         }
+
+        for texture in self.render_target_pool {
+            device.delete_texture(texture);
+        }
     }
 
-    fn begin_frame(&self) {
+    fn begin_frame(&mut self) {
         assert!(self.cache_rgba8_texture.is_none());
         assert!(self.cache_a8_texture.is_none());
+
+        self.pass_rgba8_textures.clear();
+        self.pass_a8_textures.clear();
     }
 
-    fn end_frame(&mut self, pool: &mut Vec<Texture>) {
+    fn end_frame(&mut self, pass_index: RenderPassIndex) {
         // return the cached targets to the pool
-        self.end_pass(None, None, pool)
+        self.end_pass(None, None, pass_index)
     }
 
     fn end_pass(
         &mut self,
         a8_texture: Option<Texture>,
         rgba8_texture: Option<Texture>,
-        pool: &mut Vec<Texture>,
+        pass_index: RenderPassIndex,
     ) {
         // If we have cache textures from previous pass, return them to the pool.
-        pool.extend(self.cache_rgba8_texture.take());
-        pool.extend(self.cache_a8_texture.take());
+        // Also assign the pool index of those cache textures to last pass's index because this is
+        // the result of last pass.
+        if let Some(texture) = self.cache_rgba8_texture.take() {
+            self.pass_rgba8_textures.insert(
+                RenderPassIndex(pass_index.0 - 1), RenderTargetPoolId(self.render_target_pool.len()));
+            self.render_target_pool.push(texture);
+        }
+        if let Some(texture) = self.cache_a8_texture.take() {
+            self.pass_a8_textures.insert(
+                RenderPassIndex(pass_index.0 - 1), RenderTargetPoolId(self.render_target_pool.len()));
+            self.render_target_pool.push(texture);
+        }
 
         // We have another pass to process, make these textures available
         // as inputs to the next pass.
         self.cache_rgba8_texture = rgba8_texture;
         self.cache_a8_texture = a8_texture;
     }
 
     // Bind a source texture to the device.
@@ -653,16 +681,28 @@ impl SourceTextureResolver {
                     .get(&(external_image.id, external_image.channel_index))
                     .expect("BUG: External image should be resolved by now!");
                 device.bind_external_texture(sampler, texture);
             }
             SourceTexture::TextureCache(index) => {
                 let texture = &self.cache_texture_map[index.0];
                 device.bind_texture(sampler, texture);
             }
+            SourceTexture::RenderTaskCacheRGBA8(pass_index) => {
+                let pool_index = self.pass_rgba8_textures
+                    .get(&pass_index)
+                    .expect("BUG: pass_index doesn't map to pool_index");
+                device.bind_texture(sampler, &self.render_target_pool[pool_index.0])
+            }
+            SourceTexture::RenderTaskCacheA8(pass_index) => {
+                let pool_index = self.pass_a8_textures
+                    .get(&pass_index)
+                    .expect("BUG: pass_index doesn't map to pool_index");
+                device.bind_texture(sampler, &self.render_target_pool[pool_index.0])
+            }
         }
     }
 
     // Get the real (OpenGL) texture ID for a given source texture.
     // For a texture cache texture, the IDs are stored in a vector
     // map for fast access.
     fn resolve(&self, texture_id: &SourceTexture) -> Option<&Texture> {
         match *texture_id {
@@ -676,16 +716,28 @@ impl SourceTextureResolver {
                 self.cache_rgba8_texture
                     .as_ref()
                     .unwrap_or(&self.dummy_cache_texture),
             ),
             SourceTexture::External(..) => {
                 panic!("BUG: External textures cannot be resolved, they can only be bound.");
             }
             SourceTexture::TextureCache(index) => Some(&self.cache_texture_map[index.0]),
+            SourceTexture::RenderTaskCacheRGBA8(pass_index) => {
+                let pool_index = self.pass_rgba8_textures
+                    .get(&pass_index)
+                    .expect("BUG: pass_index doesn't map to pool_index");
+                Some(&self.render_target_pool[pool_index.0])
+            },
+            SourceTexture::RenderTaskCacheA8(pass_index) => {
+                let pool_index = self.pass_a8_textures
+                    .get(&pass_index)
+                    .expect("BUG: pass_index doesn't map to pool_index");
+                Some(&self.render_target_pool[pool_index.0])
+            },
         }
     }
 }
 
 #[derive(Debug, Copy, Clone, PartialEq)]
 #[allow(dead_code)] // SubpixelVariableTextColor is not used at the moment.
 pub enum BlendMode {
     None,
@@ -1344,16 +1396,17 @@ pub struct Renderer {
     cs_line: LazilyCompiledShader,
     cs_blur_a8: LazilyCompiledShader,
     cs_blur_rgba8: LazilyCompiledShader,
 
     // Brush shaders
     brush_mask_corner: LazilyCompiledShader,
     brush_mask_rounded_rect: LazilyCompiledShader,
     brush_image_rgba8: BrushShader,
+    brush_image_rgba8_alpha_mask: BrushShader,
     brush_image_a8: BrushShader,
     brush_solid: BrushShader,
 
     /// These are "cache clip shaders". These shaders are used to
     /// draw clip instances into the cached clip mask. The results
     /// of these shaders are also used by the primitive shaders.
     cs_clip_rectangle: LazilyCompiledShader,
     cs_clip_image: LazilyCompiledShader,
@@ -1389,18 +1442,16 @@ pub struct Renderer {
     enable_clear_scissor: bool,
     debug: DebugRenderer,
     debug_flags: DebugFlags,
     backend_profile_counters: BackendProfileCounters,
     profile_counters: RendererProfileCounters,
     profiler: Profiler,
     last_time: u64,
 
-    render_target_pool: Vec<Texture>,
-
     gpu_profile: GpuProfiler<GpuProfileTag>,
     prim_vao: VAO,
     blur_vao: VAO,
     clip_vao: VAO,
 
     node_data_texture: VertexDataTexture,
     render_task_texture: VertexDataTexture,
     gpu_cache_texture: CacheTexture,
@@ -1569,16 +1620,23 @@ impl Renderer {
 
         let brush_image_rgba8 = try!{
             BrushShader::new("brush_image",
                              &mut device,
                              &["COLOR_TARGET"],
                              options.precache_shaders)
         };
 
+        let brush_image_rgba8_alpha_mask = try!{
+            BrushShader::new("brush_image",
+                             &mut device,
+                             &["COLOR_TARGET_ALPHA_MASK"],
+                             options.precache_shaders)
+        };
+
         let cs_blur_a8 = try!{
             LazilyCompiledShader::new(ShaderKind::Cache(VertexArrayKind::Blur),
                                      "cs_blur",
                                       &["ALPHA_TARGET"],
                                       &mut device,
                                       options.precache_shaders)
         };
 
@@ -1993,16 +2051,17 @@ impl Renderer {
             pending_shader_updates: Vec::new(),
             cs_text_run,
             cs_line,
             cs_blur_a8,
             cs_blur_rgba8,
             brush_mask_corner,
             brush_mask_rounded_rect,
             brush_image_rgba8,
+            brush_image_rgba8_alpha_mask,
             brush_image_a8,
             brush_solid,
             cs_clip_rectangle,
             cs_clip_border,
             cs_clip_image,
             ps_text_run,
             ps_text_run_subpx_bg_pass1,
             ps_image,
@@ -2022,17 +2081,16 @@ impl Renderer {
             backend_profile_counters: BackendProfileCounters::new(),
             profile_counters: RendererProfileCounters::new(),
             profiler: Profiler::new(),
             max_texture_size: max_texture_size,
             max_recorded_profiles: options.max_recorded_profiles,
             clear_color: options.clear_color,
             enable_clear_scissor: options.enable_clear_scissor,
             last_time: 0,
-            render_target_pool: Vec::new(),
             gpu_profile,
             prim_vao,
             blur_vao,
             clip_vao,
             node_data_texture,
             render_task_texture,
             pipeline_epoch_map: FastHashMap::default(),
             dither_matrix_texture,
@@ -2544,16 +2602,17 @@ impl Renderer {
             self.profiler.draw_profile(
                 &frame_profiles,
                 &self.backend_profile_counters,
                 &self.profile_counters,
                 &mut profile_timers,
                 &profile_samplers,
                 screen_fraction,
                 &mut self.debug,
+                self.debug_flags.contains(DebugFlags::COMPACT_PROFILER),
             );
         }
 
         self.profile_counters.reset();
         self.profile_counters.frame_counter.inc();
 
         self.debug.render(&mut self.device, &framebuffer_size);
         profile_timers.cpu_time.profile(|| {
@@ -2773,18 +2832,19 @@ impl Renderer {
                             key.blend_mode,
                             projection,
                             0,
                             &mut self.renderer_errors,
                         );
                     }
                     BrushBatchKind::Image(target_kind) => {
                         let shader = match target_kind {
-                            RenderTargetKind::Alpha => &mut self.brush_image_a8,
-                            RenderTargetKind::Color => &mut self.brush_image_rgba8,
+                            BrushImageSourceKind::Alpha => &mut self.brush_image_a8,
+                            BrushImageSourceKind::Color => &mut self.brush_image_rgba8,
+                            BrushImageSourceKind::ColorAlphaMask => &mut self.brush_image_rgba8_alpha_mask,
                         };
                         shader.bind(
                             &mut self.device,
                             key.blend_mode,
                             projection,
                             0,
                             &mut self.renderer_errors,
                         );
@@ -2982,16 +3042,17 @@ impl Renderer {
         target_size: DeviceUintSize,
         depth_is_ready: bool,
         clear_color: Option<[f32; 4]>,
         render_tasks: &RenderTaskTree,
         projection: &Transform3D<f32>,
         frame_id: FrameId,
         stats: &mut RendererStats,
     ) {
+        self.profile_counters.color_targets.inc();
         let _gm = self.gpu_profile.start_marker("color target");
 
         // sanity check for the depth buffer
         if let Some((texture, _)) = render_target {
             assert!(texture.has_depth() >= target.needs_depth());
         }
 
         {
@@ -3403,16 +3464,17 @@ impl Renderer {
         &mut self,
         render_target: (&Texture, i32),
         target: &AlphaRenderTarget,
         target_size: DeviceUintSize,
         projection: &Transform3D<f32>,
         render_tasks: &RenderTaskTree,
         stats: &mut RendererStats,
     ) {
+        self.profile_counters.alpha_targets.inc();
         let _gm = self.gpu_profile.start_marker("alpha target");
         let alpha_sampler = self.gpu_profile.start_sampler(GPU_SAMPLER_TAG_ALPHA);
 
         {
             let _timer = self.gpu_profile.start_timer(GPU_TAG_SETUP_TARGET);
             self.device
                 .bind_draw_target(Some(render_target), Some(target_size));
             self.device.disable_depth();
@@ -3679,34 +3741,34 @@ impl Renderer {
         let mut texture = if perfect_only {
             debug_assert!(list.texture.is_none());
 
             let selector = TargetSelector {
                 size: list.max_size,
                 num_layers: list.targets.len() as _,
                 format: list.format,
             };
-            let index = self.render_target_pool
+            let index = self.texture_resolver.render_target_pool
                 .iter()
                 .position(|texture| {
                     selector == TargetSelector {
                         size: texture.get_dimensions(),
                         num_layers: texture.get_render_target_layer_count(),
                         format: texture.get_format(),
                     }
                 });
             match index {
-                Some(pos) => self.render_target_pool.swap_remove(pos),
+                Some(pos) => self.texture_resolver.render_target_pool.swap_remove(pos),
                 None => return,
             }
         } else {
             if list.texture.is_some() {
                 return
             }
-            match self.render_target_pool.pop() {
+            match self.texture_resolver.render_target_pool.pop() {
                 Some(texture) => texture,
                 None => self.device.create_texture(TextureTarget::Array),
             }
         };
 
         self.device.init_texture(
             &mut texture,
             list.max_size.width,
@@ -3880,32 +3942,32 @@ impl Renderer {
 
                     (alpha.texture.take(), color.texture.take())
                 }
             };
 
             self.texture_resolver.end_pass(
                 cur_alpha,
                 cur_color,
-                &mut self.render_target_pool,
+                RenderPassIndex(pass_index),
             );
 
             // After completing the first pass, make the A8 target available as an
             // input to any subsequent passes.
             if pass_index == 0 {
                 if let Some(shared_alpha_texture) =
                     self.texture_resolver.resolve(&SourceTexture::CacheA8)
                 {
                     self.device
                         .bind_texture(TextureSampler::SharedCacheA8, shared_alpha_texture);
                 }
             }
         }
 
-        self.texture_resolver.end_frame(&mut self.render_target_pool);
+        self.texture_resolver.end_frame(RenderPassIndex(frame.passes.len()));
         self.draw_render_target_debug(framebuffer_size);
         self.draw_texture_cache_debug(framebuffer_size);
         self.draw_epoch_debug();
 
         // Garbage collect any frame outputs that weren't used this frame.
         let device = &mut self.device;
         self.output_targets
             .retain(|_, target| if target.last_access != frame_id {
@@ -3962,29 +4024,29 @@ impl Renderer {
     fn draw_render_target_debug(&mut self, framebuffer_size: DeviceUintSize) {
         if !self.debug_flags.contains(DebugFlags::RENDER_TARGET_DBG) {
             return;
         }
 
         let mut spacing = 16;
         let mut size = 512;
         let fb_width = framebuffer_size.width as i32;
-        let num_layers: i32 = self.render_target_pool
+        let num_layers: i32 = self.texture_resolver.render_target_pool
             .iter()
             .map(|texture| texture.get_render_target_layer_count() as i32)
             .sum();
 
         if num_layers * (size + spacing) > fb_width {
             let factor = fb_width as f32 / (num_layers * (size + spacing)) as f32;
             size = (size as f32 * factor) as i32;
             spacing = (spacing as f32 * factor) as i32;
         }
 
         let mut target_index = 0;
-        for texture in &self.render_target_pool {
+        for texture in &self.texture_resolver.render_target_pool {
             let dimensions = texture.get_dimensions();
             let src_rect = DeviceIntRect::new(DeviceIntPoint::zero(), dimensions.to_i32());
 
             let layer_count = texture.get_render_target_layer_count();
             for layer_index in 0 .. layer_count {
                 self.device
                     .bind_read_target(Some((texture, layer_index as i32)));
                 let x = fb_width - (spacing + size) * (target_index + 1);
@@ -4116,32 +4178,30 @@ impl Renderer {
         //Note: this is a fake frame, only needed because texture deletion is require to happen inside a frame
         self.device.begin_frame();
         self.gpu_cache_texture.deinit(&mut self.device);
         if let Some(dither_matrix_texture) = self.dither_matrix_texture {
             self.device.delete_texture(dither_matrix_texture);
         }
         self.node_data_texture.deinit(&mut self.device);
         self.render_task_texture.deinit(&mut self.device);
-        for texture in self.render_target_pool {
-            self.device.delete_texture(texture);
-        }
         self.device.delete_pbo(self.texture_cache_upload_pbo);
         self.texture_resolver.deinit(&mut self.device);
         self.device.delete_vao(self.prim_vao);
         self.device.delete_vao(self.clip_vao);
         self.device.delete_vao(self.blur_vao);
         self.debug.deinit(&mut self.device);
         self.cs_text_run.deinit(&mut self.device);
         self.cs_line.deinit(&mut self.device);
         self.cs_blur_a8.deinit(&mut self.device);
         self.cs_blur_rgba8.deinit(&mut self.device);
         self.brush_mask_rounded_rect.deinit(&mut self.device);
         self.brush_mask_corner.deinit(&mut self.device);
         self.brush_image_rgba8.deinit(&mut self.device);
+        self.brush_image_rgba8_alpha_mask.deinit(&mut self.device);
         self.brush_image_a8.deinit(&mut self.device);
         self.brush_solid.deinit(&mut self.device);
         self.cs_clip_rectangle.deinit(&mut self.device);
         self.cs_clip_image.deinit(&mut self.device);
         self.cs_clip_border.deinit(&mut self.device);
         self.ps_text_run.deinit(&mut self.device);
         self.ps_text_run_subpx_bg_pass1.deinit(&mut self.device);
         for shader in self.ps_image {
--- a/gfx/webrender/src/scene.rs
+++ b/gfx/webrender/src/scene.rs
@@ -157,17 +157,18 @@ impl FilterOpHelpers for FilterOp {
         match *self {
             FilterOp::Blur(..) |
             FilterOp::Brightness(..) |
             FilterOp::Contrast(..) |
             FilterOp::Grayscale(..) |
             FilterOp::HueRotate(..) |
             FilterOp::Invert(..) |
             FilterOp::Saturate(..) |
-            FilterOp::Sepia(..) => true,
+            FilterOp::Sepia(..) |
+            FilterOp::DropShadow(..) => true,
             FilterOp::Opacity(_, amount) => {
                 amount > OPACITY_EPSILON
             }
         }
     }
 
     fn is_noop(&self) -> bool {
         match *self {
@@ -175,16 +176,19 @@ impl FilterOpHelpers for FilterOp {
             FilterOp::Brightness(amount) => amount == 1.0,
             FilterOp::Contrast(amount) => amount == 1.0,
             FilterOp::Grayscale(amount) => amount == 0.0,
             FilterOp::HueRotate(amount) => amount == 0.0,
             FilterOp::Invert(amount) => amount == 0.0,
             FilterOp::Opacity(_, amount) => amount >= 1.0,
             FilterOp::Saturate(amount) => amount == 1.0,
             FilterOp::Sepia(amount) => amount == 0.0,
+            FilterOp::DropShadow(offset, blur, _) => {
+                offset.x == 0.0 && offset.y == 0.0 && blur == 0.0
+            }
         }
     }
 }
 
 pub trait StackingContextHelpers {
     fn mix_blend_mode_for_compositing(&self) -> Option<MixBlendMode>;
     fn filter_ops_for_compositing(
         &self,
--- a/gfx/webrender/src/tiling.rs
+++ b/gfx/webrender/src/tiling.rs
@@ -1,31 +1,31 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BorderRadiusKind, ClipId, ColorF, DeviceIntPoint, ImageKey};
-use api::{DeviceIntRect, DeviceIntSize, DeviceUintPoint, DeviceUintRect, DeviceUintSize};
+use api::{DeviceIntRect, DeviceIntSize, device_length, DeviceUintPoint, DeviceUintRect, DeviceUintSize};
 use api::{DocumentLayer, ExternalImageType, FilterOp, FontRenderMode};
 use api::{ImageFormat, ImageRendering};
 use api::{LayerRect, MixBlendMode, PipelineId};
 use api::{TileOffset, YuvColorSpace, YuvFormat};
 use api::{LayerToWorldTransform, WorldPixel};
 use border::{BorderCornerInstance, BorderCornerSide};
 use clip::{ClipSource, ClipStore};
 use clip_scroll_tree::{ClipScrollTree, CoordinateSystemId};
 use device::Texture;
 use euclid::{TypedTransform3D, vec3};
 use glyph_rasterizer::GlyphFormat;
 use gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle, GpuCacheUpdateList};
 use gpu_types::{BlurDirection, BlurInstance, BrushInstance, BrushImageKind, ClipMaskInstance};
 use gpu_types::{CompositePrimitiveInstance, PrimitiveInstance, SimplePrimitiveInstance};
 use gpu_types::{ClipScrollNodeIndex, ClipScrollNodeData};
 use internal_types::{FastHashMap, SourceTexture};
-use internal_types::{BatchTextures};
+use internal_types::{BatchTextures, RenderPassIndex};
 use picture::{PictureCompositeMode, PictureKind, PicturePrimitive, RasterizationSpace};
 use plane_split::{BspSplitter, Polygon, Splitter};
 use prim_store::{PrimitiveIndex, PrimitiveKind, PrimitiveMetadata, PrimitiveStore};
 use prim_store::{BrushPrimitive, BrushMaskKind, BrushKind, BrushSegmentKind, DeferredResolve, PrimitiveRun};
 use profiler::FrameProfileCounters;
 use render_task::{ClipWorkItem};
 use render_task::{RenderTaskAddress, RenderTaskId, RenderTaskKey, RenderTaskKind};
 use render_task::{BlurTask, ClearMode, RenderTaskLocation, RenderTaskTree};
@@ -143,19 +143,16 @@ pub struct ScrollbarPrimitive {
     pub clip_id: ClipId,
     pub prim_index: PrimitiveIndex,
     pub frame_rect: LayerRect,
 }
 
 #[derive(Debug, Copy, Clone)]
 pub struct RenderTargetIndex(pub usize);
 
-#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
-pub struct RenderPassIndex(isize);
-
 #[derive(Debug)]
 struct DynamicTaskInfo {
     task_id: RenderTaskId,
     rect: DeviceIntRect,
 }
 
 pub struct AlphaBatchList {
     pub batches: Vec<AlphaPrimitiveBatch>,
@@ -622,17 +619,18 @@ fn add_to_batch(
             match picture.render_task_id {
                 Some(cache_task_id) => {
                     let cache_task_address = render_tasks.get_task_address(cache_task_id);
                     let textures = BatchTextures::render_target_cache();
 
                     match picture.kind {
                         PictureKind::TextShadow { .. } => {
                             let kind = BatchKind::Brush(
-                                BrushBatchKind::Image(picture.target_kind()),
+                                BrushBatchKind::Image(
+                                    BrushImageSourceKind::from_render_target_kind(picture.target_kind())),
                             );
                             let key = BatchKey::new(kind, blend_mode, textures);
                             let batch = batch_list.get_suitable_batch(key, item_bounding_rect);
 
                             let instance = BrushInstance {
                                 picture_address: task_address,
                                 prim_address: prim_cache_address,
                                 clip_id,
@@ -642,17 +640,18 @@ fn add_to_batch(
                                 segment_kind: 0,
                                 user_data0: cache_task_address.0 as i32,
                                 user_data1: BrushImageKind::Simple as i32,
                             };
                             batch.push(PrimitiveInstance::from(instance));
                         }
                         PictureKind::BoxShadow { radii_kind, .. } => {
                             let kind = BatchKind::Brush(
-                                BrushBatchKind::Image(picture.target_kind()),
+                                BrushBatchKind::Image(
+                                    BrushImageSourceKind::from_render_target_kind(picture.target_kind())),
                             );
                             let key = BatchKey::new(kind, blend_mode, textures);
                             let batch = batch_list.get_suitable_batch(key, item_bounding_rect);
 
                             let image_kind = match radii_kind {
                                 BorderRadiusKind::Uniform => {
                                     BrushImageKind::Mirror
                                 }
@@ -671,17 +670,17 @@ fn add_to_batch(
                                 segment_kind: 0,
                                 user_data0: cache_task_address.0 as i32,
                                 user_data1: image_kind as i32,
                             };
                             batch.push(PrimitiveInstance::from(instance));
                         }
                         PictureKind::Image {
                             composite_mode,
-                            readback_render_task_id,
+                            secondary_render_task_id,
                             is_in_3d_context,
                             reference_frame_id,
                             real_local_rect,
                             ..
                         } => {
                             // If this picture is participating in a 3D rendering context,
                             // then don't add it to any batches here. Instead, create a polygon
                             // for it and add it to the current plane splitter.
@@ -712,32 +711,87 @@ fn add_to_batch(
                             match composite_mode.expect("bug: only composites here") {
                                 PictureCompositeMode::Filter(filter) => {
                                     match filter {
                                         FilterOp::Blur(..) => {
                                             let src_task_address = render_tasks.get_task_address(source_id);
                                             let key = BatchKey::new(
                                                 BatchKind::HardwareComposite,
                                                 BlendMode::PremultipliedAlpha,
-                                                BatchTextures::no_texture(),
+                                                BatchTextures::render_target_cache(),
                                             );
                                             let batch = batch_list.get_suitable_batch(key, &item_bounding_rect);
                                             let instance = CompositePrimitiveInstance::new(
                                                 task_address,
                                                 src_task_address,
                                                 RenderTaskAddress(0),
                                                 item_bounding_rect.origin.x,
                                                 item_bounding_rect.origin.y,
                                                 z,
                                                 item_bounding_rect.size.width,
                                                 item_bounding_rect.size.height,
                                             );
 
                                             batch.push(PrimitiveInstance::from(instance));
                                         }
+                                        FilterOp::DropShadow(offset, _, _) => {
+                                            let kind = BatchKind::Brush(
+                                                BrushBatchKind::Image(BrushImageSourceKind::ColorAlphaMask),
+                                            );
+                                            let key = BatchKey::new(kind, blend_mode, textures);
+
+                                            let instance = BrushInstance {
+                                                picture_address: task_address,
+                                                prim_address: prim_cache_address,
+                                                clip_id,
+                                                scroll_id,
+                                                clip_task_address,
+                                                z,
+                                                segment_kind: 0,
+                                                user_data0: cache_task_address.0 as i32,
+                                                user_data1: BrushImageKind::Simple as i32,
+                                            };
+
+                                            {
+                                                let batch = batch_list.get_suitable_batch(key, item_bounding_rect);
+                                                batch.push(PrimitiveInstance::from(instance));
+                                            }
+
+                                            let secondary_id = secondary_render_task_id.expect("no secondary!?");
+                                            let render_task = &render_tasks[secondary_id];
+                                            let secondary_task_address = render_tasks.get_task_address(secondary_id);
+                                            let render_pass_index = render_task.pass_index.expect("no render_pass_index!?");
+                                            let secondary_textures = BatchTextures {
+                                                colors: [
+                                                    SourceTexture::RenderTaskCacheRGBA8(render_pass_index),
+                                                    SourceTexture::Invalid,
+                                                    SourceTexture::Invalid,
+                                                ],
+                                            };
+                                            let key = BatchKey::new(
+                                                BatchKind::HardwareComposite,
+                                                BlendMode::PremultipliedAlpha,
+                                                secondary_textures,
+                                            );
+                                            let batch = batch_list.get_suitable_batch(key, &item_bounding_rect);
+                                            let device_offset_x = device_length(offset.x, ctx.device_pixel_ratio);
+                                            let device_offset_y = device_length(offset.y, ctx.device_pixel_ratio);
+                                            let instance = CompositePrimitiveInstance::new(
+                                                task_address,
+                                                secondary_task_address,
+                                                RenderTaskAddress(0),
+                                                item_bounding_rect.origin.x - device_offset_x.0,
+                                                item_bounding_rect.origin.y - device_offset_y.0,
+                                                z,
+                                                item_bounding_rect.size.width,
+                                                item_bounding_rect.size.height,
+                                            );
+
+                                            batch.push(PrimitiveInstance::from(instance));
+                                        }
                                         _ => {
                                             let key = BatchKey::new(
                                                 BatchKind::Blend,
                                                 BlendMode::PremultipliedAlpha,
                                                 BatchTextures::no_texture(),
                                             );
                                             let src_task_address = render_tasks.get_task_address(source_id);
 
@@ -746,16 +800,17 @@ fn add_to_batch(
                                                 FilterOp::Contrast(amount) => (1, amount),
                                                 FilterOp::Grayscale(amount) => (2, amount),
                                                 FilterOp::HueRotate(angle) => (3, angle),
                                                 FilterOp::Invert(amount) => (4, amount),
                                                 FilterOp::Saturate(amount) => (5, amount),
                                                 FilterOp::Sepia(amount) => (6, amount),
                                                 FilterOp::Brightness(amount) => (7, amount),
                                                 FilterOp::Opacity(_, amount) => (8, amount),
+                                                FilterOp::DropShadow(..) => unreachable!(),
                                             };
 
                                             let amount = (amount * 65535.0).round() as i32;
                                             let batch = batch_list.get_suitable_batch(key, &item_bounding_rect);
 
                                             let instance = CompositePrimitiveInstance::new(
                                                 task_address,
                                                 src_task_address,
@@ -767,17 +822,17 @@ fn add_to_batch(
                                                 0,
                                             );
 
                                             batch.push(PrimitiveInstance::from(instance));
                                         }
                                     }
                                 }
                                 PictureCompositeMode::MixBlend(mode) => {
-                                    let backdrop_id = readback_render_task_id.expect("no backdrop!?");
+                                    let backdrop_id = secondary_render_task_id.expect("no backdrop!?");
 
                                     let key = BatchKey::new(
                                         BatchKind::Composite {
                                             task_id,
                                             source_id,
                                             backdrop_id,
                                         },
                                         BlendMode::PremultipliedAlpha,
@@ -800,17 +855,17 @@ fn add_to_batch(
 
                                     batch.push(PrimitiveInstance::from(instance));
                                 }
                                 PictureCompositeMode::Blit => {
                                     let src_task_address = render_tasks.get_task_address(source_id);
                                     let key = BatchKey::new(
                                         BatchKind::HardwareComposite,
                                         BlendMode::PremultipliedAlpha,
-                                        BatchTextures::no_texture(),
+                                        BatchTextures::render_target_cache(),
                                     );
                                     let batch = batch_list.get_suitable_batch(key, &item_bounding_rect);
                                     let instance = CompositePrimitiveInstance::new(
                                         task_address,
                                         src_task_address,
                                         RenderTaskAddress(0),
                                         item_bounding_rect.origin.x,
                                         item_bounding_rect.origin.y,
@@ -1791,32 +1846,35 @@ impl RenderPass {
 
     pub fn build(
         &mut self,
         ctx: &RenderTargetContext,
         gpu_cache: &mut GpuCache,
         render_tasks: &mut RenderTaskTree,
         deferred_resolves: &mut Vec<DeferredResolve>,
         clip_store: &ClipStore,
+        pass_index: RenderPassIndex,
     ) {
         profile_scope!("RenderPass::build");
 
         match self.kind {
             RenderPassKind::MainFramebuffer(ref mut target) => {
                 for &task_id in &self.tasks {
                     assert_eq!(render_tasks[task_id].target_kind(), RenderTargetKind::Color);
+                    render_tasks[task_id].pass_index = Some(pass_index);
                     target.add_task(task_id, ctx, gpu_cache, render_tasks, clip_store);
                 }
                 target.build(ctx, gpu_cache, render_tasks, deferred_resolves);
             }
             RenderPassKind::OffScreen { ref mut color, ref mut alpha } => {
                 // Step through each task, adding to batches as appropriate.
                 for &task_id in &self.tasks {
                     let target_kind = {
                         let task = &mut render_tasks[task_id];
+                        task.pass_index = Some(pass_index);
                         let target_kind = task.target_kind();
 
                         // Find a target to assign this task to, or create a new
                         // one if required.
                         match task.location {
                             RenderTaskLocation::Fixed => {}
                             RenderTaskLocation::Dynamic(ref mut origin, size) => {
                                 let dynamic_entry = match task.cache_key {
@@ -1876,18 +1934,34 @@ pub enum TransformBatchKind {
     AngleGradient,
     RadialGradient,
     BorderCorner,
     BorderEdge,
     Line,
 }
 
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
+pub enum BrushImageSourceKind {
+    Alpha,
+    Color,
+    ColorAlphaMask,
+}
+
+impl BrushImageSourceKind {
+    pub fn from_render_target_kind(render_target_kind: RenderTargetKind) -> BrushImageSourceKind {
+        match render_target_kind {
+            RenderTargetKind::Color => BrushImageSourceKind::Color,
+            RenderTargetKind::Alpha => BrushImageSourceKind::Alpha,
+        }
+    }
+}
+
+#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
 pub enum BrushBatchKind {
-    Image(RenderTargetKind),
+    Image(BrushImageSourceKind),
     Solid,
 }
 
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
 pub enum BatchKind {
     Composite {
         task_id: RenderTaskId,
         source_id: RenderTaskId,
--- a/gfx/webrender_api/Cargo.toml
+++ b/gfx/webrender_api/Cargo.toml
@@ -15,13 +15,13 @@ bitflags = "1.0"
 bincode = "0.9"
 byteorder = "1.2.1"
 euclid = "0.15"
 ipc-channel = {version = "0.9", optional = true}
 serde = { version = "1.0", features = ["rc", "derive"] }
 time = "0.1"
 
 [target.'cfg(target_os = "macos")'.dependencies]
-core-foundation = "0.4"
+core-foundation = "0.4.6"
 core-graphics = "0.12.3"
 
 [target.'cfg(target_os = "windows")'.dependencies]
 dwrote = "0.4.1"
--- a/gfx/webrender_api/src/display_item.rs
+++ b/gfx/webrender_api/src/display_item.rs
@@ -453,16 +453,17 @@ pub enum FilterOp {
     Brightness(f32),
     Contrast(f32),
     Grayscale(f32),
     HueRotate(f32),
     Invert(f32),
     Opacity(PropertyBinding<f32>, f32),
     Saturate(f32),
     Sepia(f32),
+    DropShadow(LayoutVector2D, f32, ColorF),
 }
 
 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
 pub struct IframeDisplayItem {
     pub pipeline_id: PipelineId,
 }
 
 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
--- a/gfx/webrender_bindings/Cargo.toml
+++ b/gfx/webrender_bindings/Cargo.toml
@@ -16,12 +16,12 @@ log = "0.3"
 path = "../webrender"
 version = "0.55.0"
 default-features = false
 
 [target.'cfg(target_os = "windows")'.dependencies]
 dwrote = "0.4.1"
 
 [target.'cfg(target_os = "macos")'.dependencies]
-core-foundation = "0.4"
+core-foundation = "0.4.6"
 core-graphics = "0.12.3"
 foreign-types = "0.3.0"