Bug 1522758 - Optimize drawing axis aligned clip rectangles into clip masks. r=kvark
authorGlenn Watson <github@intuitionlibrary.com>
Sat, 26 Jan 2019 21:11:53 +0000
changeset 515574 8e87ea84e5df448cd627f0cef05418d8fb0b717c
parent 515573 40da998f860f2d5ad59250223271f3e5057246e1
child 515575 e6701b43c193ed00469b7b7b270e43f6bf834cc4
push id1953
push userffxbld-merge
push dateMon, 11 Mar 2019 12:10:20 +0000
treeherdermozilla-release@9c35dcbaa899 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerskvark
bugs1522758
milestone66.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1522758 - Optimize drawing axis aligned clip rectangles into clip masks. r=kvark Differential Revision: https://phabricator.services.mozilla.com/D17606
gfx/wr/webrender/res/clip_shared.glsl
gfx/wr/webrender/res/cs_clip_box_shadow.glsl
gfx/wr/webrender/res/cs_clip_image.glsl
gfx/wr/webrender/res/cs_clip_rectangle.glsl
gfx/wr/webrender/src/batch.rs
gfx/wr/webrender/src/frame_builder.rs
gfx/wr/webrender/src/gpu_types.rs
gfx/wr/webrender/src/render_task.rs
gfx/wr/webrender/src/renderer.rs
gfx/wr/webrender/src/tiling.rs
layout/reftests/transform-3d/reftest.list
--- a/gfx/wr/webrender/res/clip_shared.glsl
+++ b/gfx/wr/webrender/res/clip_shared.glsl
@@ -1,52 +1,47 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include rect,render_task,gpu_cache,snap,transform
 
 #ifdef WR_VERTEX_SHADER
 
-#define SEGMENT_ALL         0
-#define SEGMENT_CORNER_TL   1
-#define SEGMENT_CORNER_TR   2
-#define SEGMENT_CORNER_BL   3
-#define SEGMENT_CORNER_BR   4
-
 in int aClipRenderTaskAddress;
 in int aClipTransformId;
 in int aPrimTransformId;
 in int aClipSegment;
 in ivec4 aClipDataResourceAddress;
 in vec2 aClipLocalPos;
 in vec4 aClipTileRect;
+in vec4 aClipDeviceArea;
 
 struct ClipMaskInstance {
     int render_task_address;
     int clip_transform_id;
     int prim_transform_id;
-    int segment;
     ivec2 clip_data_address;
     ivec2 resource_address;
     vec2 local_pos;
     RectWithSize tile_rect;
+    RectWithSize sub_rect;
 };
 
 ClipMaskInstance fetch_clip_item() {
     ClipMaskInstance cmi;
 
     cmi.render_task_address = aClipRenderTaskAddress;
     cmi.clip_transform_id = aClipTransformId;
     cmi.prim_transform_id = aPrimTransformId;
-    cmi.segment = aClipSegment;
     cmi.clip_data_address = aClipDataResourceAddress.xy;
     cmi.resource_address = aClipDataResourceAddress.zw;
     cmi.local_pos = aClipLocalPos;
     cmi.tile_rect = RectWithSize(aClipTileRect.xy, aClipTileRect.zw);
+    cmi.sub_rect = RectWithSize(aClipDeviceArea.xy, aClipDeviceArea.zw);
 
     return cmi;
 }
 
 struct ClipVertexInfo {
     vec3 local_pos;
     RectWithSize clipped_local_rect;
 };
@@ -56,19 +51,20 @@ RectWithSize intersect_rect(RectWithSize
     return RectWithSize(p.xy, max(vec2(0.0), p.zw - p.xy));
 }
 
 // The transformed vertex function that always covers the whole clip area,
 // which is the intersection of all clip instances of a given primitive
 ClipVertexInfo write_clip_tile_vertex(RectWithSize local_clip_rect,
                                       Transform prim_transform,
                                       Transform clip_transform,
-                                      ClipArea area) {
-    vec2 device_pos = area.screen_origin +
-                      aPosition.xy * area.common_data.task_rect.size;
+                                      ClipArea area,
+                                      RectWithSize sub_rect) {
+    vec2 device_pos = area.screen_origin + sub_rect.p0 +
+                      aPosition.xy * sub_rect.size;
 
     if (clip_transform.is_axis_aligned && prim_transform.is_axis_aligned) {
         mat4 snap_mat = clip_transform.m * prim_transform.inv_m;
         vec4 snap_positions = compute_snap_positions(
             snap_mat,
             local_clip_rect,
             area.common_data.device_pixel_scale
         );
@@ -86,17 +82,17 @@ ClipVertexInfo write_clip_tile_vertex(Re
 
     vec4 pos = prim_transform.m * vec4(world_pos, 0.0, 1.0);
     pos.xyz /= pos.w;
 
     vec4 p = get_node_pos(pos.xy, clip_transform);
     vec3 local_pos = p.xyw * pos.w;
 
     vec4 vertex_pos = vec4(
-        area.common_data.task_rect.p0 + aPosition.xy * area.common_data.task_rect.size,
+        area.common_data.task_rect.p0 + sub_rect.p0 + aPosition.xy * sub_rect.size,
         0.0,
         1.0
     );
 
     gl_Position = uTransform * vertex_pos;
 
     init_transform_vs(vec4(local_clip_rect.p0, local_clip_rect.p0 + local_clip_rect.size));
 
--- a/gfx/wr/webrender/res/cs_clip_box_shadow.glsl
+++ b/gfx/wr/webrender/res/cs_clip_box_shadow.glsl
@@ -48,17 +48,18 @@ void main(void) {
 
     RectWithSize dest_rect = bs_data.dest_rect;
     dest_rect.p0 += cmi.local_pos;
 
     ClipVertexInfo vi = write_clip_tile_vertex(
         dest_rect,
         prim_transform,
         clip_transform,
-        area
+        area,
+        cmi.sub_rect
     );
     vLocalPos = vi.local_pos;
     vLayer = res.layer;
     vClipMode = bs_data.clip_mode;
 
     vec2 uv0 = res.uv_rect.p0;
     vec2 uv1 = res.uv_rect.p1;
 
--- a/gfx/wr/webrender/res/cs_clip_image.glsl
+++ b/gfx/wr/webrender/res/cs_clip_image.glsl
@@ -30,17 +30,18 @@ void main(void) {
     ImageMaskData mask = fetch_mask_data(cmi.clip_data_address);
     RectWithSize local_rect = RectWithSize(cmi.local_pos, mask.local_mask_size);
     ImageResource res = fetch_image_resource_direct(cmi.resource_address);
 
     ClipVertexInfo vi = write_clip_tile_vertex(
         local_rect,
         prim_transform,
         clip_transform,
-        area
+        area,
+        cmi.sub_rect
     );
     vLocalPos = vi.local_pos.xy / vi.local_pos.z;
     vLayer = res.layer;
     vClipMaskImageUv = (vLocalPos - cmi.tile_rect.p0) / cmi.tile_rect.size;
     vec2 texture_size = vec2(textureSize(sColor0, 0));
     vClipMaskUvRect = vec4(res.uv_rect.p0, res.uv_rect.p1 - res.uv_rect.p0) / texture_size.xyxy;
     // applying a half-texel offset to the UV boundaries to prevent linear samples from the outside
     vec4 inner_rect = vec4(res.uv_rect.p0, res.uv_rect.p1);
--- a/gfx/wr/webrender/res/cs_clip_rectangle.glsl
+++ b/gfx/wr/webrender/res/cs_clip_rectangle.glsl
@@ -66,17 +66,18 @@ void main(void) {
 
     RectWithSize local_rect = clip.rect.rect;
     local_rect.p0 = cmi.local_pos;
 
     ClipVertexInfo vi = write_clip_tile_vertex(
         local_rect,
         prim_transform,
         clip_transform,
-        area
+        area,
+        cmi.sub_rect
     );
 
     vLocalPos = vi.local_pos;
     vClipMode = clip.rect.mode.x;
 
     RectWithEndpoint clip_rect = to_rect_with_endpoint(local_rect);
 
     vec2 r_tl = clip.top_left.outer_inner_radius.xy;
--- a/gfx/wr/webrender/src/batch.rs
+++ b/gfx/wr/webrender/src/batch.rs
@@ -1,17 +1,17 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{AlphaType, ClipMode, DeviceIntRect, DeviceIntPoint, DeviceIntSize};
-use api::{ExternalImageType, FilterOp, ImageRendering, LayoutRect};
-use api::{YuvColorSpace, YuvFormat, PictureRect, ColorDepth, LayoutPoint};
-use clip::{ClipDataStore, ClipNodeFlags, ClipNodeRange, ClipItem, ClipStore};
-use clip_scroll_tree::{ClipScrollTree, ROOT_SPATIAL_NODE_INDEX, SpatialNodeIndex};
+use api::{AlphaType, ClipMode, DeviceIntRect, DeviceIntPoint, DeviceIntSize, WorldRect};
+use api::{ExternalImageType, FilterOp, ImageRendering, LayoutRect, DeviceRect, DevicePixelScale};
+use api::{YuvColorSpace, YuvFormat, PictureRect, ColorDepth, LayoutPoint, DevicePoint, LayoutSize};
+use clip::{ClipDataStore, ClipNodeFlags, ClipNodeRange, ClipItem, ClipStore, ClipNodeInstance};
+use clip_scroll_tree::{ClipScrollTree, ROOT_SPATIAL_NODE_INDEX, SpatialNodeIndex, CoordinateSystemId};
 use glyph_rasterizer::GlyphFormat;
 use gpu_cache::{GpuCache, GpuCacheHandle, GpuCacheAddress};
 use gpu_types::{BrushFlags, BrushInstance, PrimitiveHeaders, ZBufferId, ZBufferIdGenerator};
 use gpu_types::{ClipMaskInstance, SplitCompositeInstance};
 use gpu_types::{PrimitiveInstanceData, RasterizationSpace, GlyphInstance};
 use gpu_types::{PrimitiveHeader, PrimitiveHeaderIndex, TransformPaletteId, TransformPalette};
 use internal_types::{FastHashMap, SavedTargetIndex, TextureSource};
 use picture::{Picture3DContext, PictureCompositeMode, PicturePrimitive, PictureSurface};
@@ -23,25 +23,31 @@ use render_backend::DataStores;
 use render_task::{RenderTaskAddress, RenderTaskId, RenderTaskTree, TileBlit};
 use renderer::{BlendMode, ImageBufferKind, ShaderColorMode};
 use renderer::BLOCKS_PER_UV_RECT;
 use resource_cache::{CacheItem, GlyphFetchResult, ImageRequest, ResourceCache, ImageProperties};
 use scene::FilterOpHelpers;
 use smallvec::SmallVec;
 use std::{f32, i32, usize};
 use tiling::{RenderTargetContext};
-use util::{TransformedRectKind};
+use util::{project_rect, TransformedRectKind};
 
 // Special sentinel value recognized by the shader. It is considered to be
 // a dummy task that doesn't mask out anything.
 const OPAQUE_TASK_ADDRESS: RenderTaskAddress = RenderTaskAddress(0x7fff);
 
 /// Used to signal there are no segments provided with this primitive.
 const INVALID_SEGMENT_INDEX: i32 = 0xffff;
 
+/// Size in device pixels for tiles that clip masks are drawn in.
+const CLIP_RECTANGLE_TILE_SIZE: i32 = 128;
+
+/// The minimum size of a clip mask before trying to draw in tiles.
+const CLIP_RECTANGLE_AREA_THRESHOLD: i32 = CLIP_RECTANGLE_TILE_SIZE * CLIP_RECTANGLE_TILE_SIZE * 4;
+
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub enum BrushBatchKind {
     Solid,
     Image(ImageBufferKind),
     Blend,
     MixBlend {
@@ -2521,42 +2527,133 @@ impl ClipBatcher {
         }
     }
 
     pub fn add_clip_region(
         &mut self,
         task_address: RenderTaskAddress,
         clip_data_address: GpuCacheAddress,
         local_pos: LayoutPoint,
+        sub_rect: DeviceRect,
     ) {
         let instance = ClipMaskInstance {
             render_task_address: task_address,
             clip_transform_id: TransformPaletteId::IDENTITY,
             prim_transform_id: TransformPaletteId::IDENTITY,
-            segment: 0,
             clip_data_address,
             resource_address: GpuCacheAddress::invalid(),
             local_pos,
             tile_rect: LayoutRect::zero(),
+            sub_rect,
         };
 
         self.rectangles.push(instance);
     }
 
+    /// Where appropriate, draw a clip rectangle as a small series of tiles,
+    /// instead of one large rectangle.
+    fn add_tiled_clip_mask(
+        &mut self,
+        mask_screen_rect: DeviceIntRect,
+        clip_rect_size: LayoutSize,
+        clip_instance: &ClipNodeInstance,
+        clip_scroll_tree: &ClipScrollTree,
+        world_rect: &WorldRect,
+        device_pixel_scale: DevicePixelScale,
+        gpu_address: GpuCacheAddress,
+        instance: &ClipMaskInstance,
+    ) -> bool {
+        // Only try to draw in tiles if the clip mark is big enough.
+        if mask_screen_rect.area() < CLIP_RECTANGLE_AREA_THRESHOLD {
+            return false;
+        }
+
+        let clip_spatial_node = &clip_scroll_tree
+            .spatial_nodes[clip_instance.spatial_node_index.0 as usize];
+
+        // Only support clips that are axis-aligned to the root coordinate space,
+        // for now, to simplify the logic below. This handles the vast majority
+        // of real world cases, but could be expanded in future if needed.
+        if clip_spatial_node.coordinate_system_id != CoordinateSystemId::root() {
+            return false;
+        }
+
+        // Get the world rect of the clip rectangle. If we can't transform it due
+        // to the matrix, just fall back to drawing the entire clip mask.
+        let local_clip_rect = LayoutRect::new(
+            clip_instance.local_pos,
+            clip_rect_size,
+        );
+        let world_clip_rect = match project_rect(
+            &clip_spatial_node.world_content_transform.to_transform(),
+            &local_clip_rect,
+            world_rect,
+        ) {
+            Some(rect) => rect,
+            None => return false,
+        };
+
+        // Work out how many tiles to draw this clip mask in, stretched across the
+        // device rect of the primitive clip mask.
+        let world_device_rect = world_clip_rect * device_pixel_scale;
+        let x_tiles = (mask_screen_rect.size.width + CLIP_RECTANGLE_TILE_SIZE-1) / CLIP_RECTANGLE_TILE_SIZE;
+        let y_tiles = (mask_screen_rect.size.height + CLIP_RECTANGLE_TILE_SIZE-1) / CLIP_RECTANGLE_TILE_SIZE;
+
+        // Because we only run this code path for axis-aligned rects (the root coord system check above),
+        // and only for rectangles (not rounded etc), the world_device_rect is not conservative - we know
+        // that there is no inner_rect, and the world_device_rect should be the real, axis-aligned clip rect.
+
+        for y in 0 .. y_tiles {
+            for x in 0 .. x_tiles {
+                let p0 = DeviceIntPoint::new(
+                    x * CLIP_RECTANGLE_TILE_SIZE,
+                    y * CLIP_RECTANGLE_TILE_SIZE,
+                );
+                let p1 = DeviceIntPoint::new(
+                    (p0.x + CLIP_RECTANGLE_TILE_SIZE).min(mask_screen_rect.size.width),
+                    (p0.y + CLIP_RECTANGLE_TILE_SIZE).min(mask_screen_rect.size.height),
+                );
+                let sub_rect = DeviceIntRect::new(
+                    p0,
+                    DeviceIntSize::new(
+                        p1.x - p0.x,
+                        p1.y - p0.y,
+                    ),
+                ).to_f32();
+
+                // If the clip rect completely contains this tile rect, then drawing
+                // these pixels would be redundant - since this clip can't possibly
+                // affect the pixels in this tile, skip them!
+                if !world_device_rect.contains_rect(&sub_rect) {
+                    self.rectangles.push(ClipMaskInstance {
+                        clip_data_address: gpu_address,
+                        sub_rect,
+                        ..*instance
+                    });
+                }
+            }
+        }
+
+        true
+    }
+
     pub fn add(
         &mut self,
         task_address: RenderTaskAddress,
         clip_node_range: ClipNodeRange,
         root_spatial_node_index: SpatialNodeIndex,
         resource_cache: &ResourceCache,
         gpu_cache: &GpuCache,
         clip_store: &ClipStore,
         clip_scroll_tree: &ClipScrollTree,
         transforms: &mut TransformPalette,
         clip_data_store: &ClipDataStore,
+        actual_rect: DeviceIntRect,
+        world_rect: &WorldRect,
+        device_pixel_scale: DevicePixelScale,
     ) {
         for i in 0 .. clip_node_range.count {
             let clip_instance = clip_store.get_instance_from_range(&clip_node_range, i);
             let clip_node = &clip_data_store[clip_instance.handle];
 
             let clip_transform_id = transforms.get_id(
                 clip_instance.spatial_node_index,
                 ROOT_SPATIAL_NODE_INDEX,
@@ -2568,21 +2665,24 @@ impl ClipBatcher {
                 ROOT_SPATIAL_NODE_INDEX,
                 clip_scroll_tree,
             );
 
             let instance = ClipMaskInstance {
                 render_task_address: task_address,
                 clip_transform_id,
                 prim_transform_id,
-                segment: 0,
                 clip_data_address: GpuCacheAddress::invalid(),
                 resource_address: GpuCacheAddress::invalid(),
                 local_pos: clip_instance.local_pos,
                 tile_rect: LayoutRect::zero(),
+                sub_rect: DeviceRect::new(
+                    DevicePoint::zero(),
+                    actual_rect.size.to_f32(),
+                ),
             };
 
             match clip_node.item {
                 ClipItem::Image { image, size, .. } => {
                     let request = ImageRequest {
                         key: image,
                         rendering: ImageRendering::Auto,
                         tile: None,
@@ -2643,25 +2743,43 @@ impl ClipBatcher {
                         .entry(cache_item.texture_id)
                         .or_insert(Vec::new())
                         .push(ClipMaskInstance {
                             clip_data_address: gpu_address,
                             resource_address: gpu_cache.get_address(&cache_item.uv_rect_handle),
                             ..instance
                         });
                 }
-                ClipItem::Rectangle(_, mode) => {
-                    if !clip_instance.flags.contains(ClipNodeFlags::SAME_COORD_SYSTEM) ||
-                        mode == ClipMode::ClipOut {
-                        let gpu_address =
-                            gpu_cache.get_address(&clip_node.gpu_cache_handle);
-                        self.rectangles.push(ClipMaskInstance {
-                            clip_data_address: gpu_address,
-                            ..instance
-                        });
+                ClipItem::Rectangle(_, ClipMode::ClipOut) => {
+                    let gpu_address =
+                        gpu_cache.get_address(&clip_node.gpu_cache_handle);
+                    self.rectangles.push(ClipMaskInstance {
+                        clip_data_address: gpu_address,
+                        ..instance
+                    });
+                }
+                ClipItem::Rectangle(clip_rect_size, ClipMode::Clip) => {
+                    if !clip_instance.flags.contains(ClipNodeFlags::SAME_COORD_SYSTEM) {
+                        let gpu_address = gpu_cache.get_address(&clip_node.gpu_cache_handle);
+
+                        if !self.add_tiled_clip_mask(
+                            actual_rect,
+                            clip_rect_size,
+                            clip_instance,
+                            clip_scroll_tree,
+                            world_rect,
+                            device_pixel_scale,
+                            gpu_address,
+                            &instance,
+                        ) {
+                            self.rectangles.push(ClipMaskInstance {
+                                clip_data_address: gpu_address,
+                                ..instance
+                            });
+                        }
                     }
                 }
                 ClipItem::RoundedRectangle(..) => {
                     let gpu_address =
                         gpu_cache.get_address(&clip_node.gpu_cache_handle);
                     self.rectangles.push(ClipMaskInstance {
                         clip_data_address: gpu_address,
                         ..instance
--- a/gfx/wr/webrender/src/frame_builder.rs
+++ b/gfx/wr/webrender/src/frame_builder.rs
@@ -248,16 +248,17 @@ impl FrameBuilder {
         // the retained tiles passed to the next frame builder.
         retained_tiles.merge(self.pending_retained_tiles);
     }
 
     /// Compute the contribution (bounding rectangles, and resources) of layers and their
     /// primitives in screen space.
     fn build_layer_screen_rects_and_cull_layers(
         &mut self,
+        screen_world_rect: WorldRect,
         clip_scroll_tree: &ClipScrollTree,
         pipelines: &FastHashMap<PipelineId, Arc<ScenePipeline>>,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         render_tasks: &mut RenderTaskTree,
         profile_counters: &mut FrameProfileCounters,
         device_pixel_scale: DevicePixelScale,
         scene_properties: &SceneProperties,
@@ -274,18 +275,16 @@ impl FrameBuilder {
         }
 
         scratch.begin_frame();
 
         let root_spatial_node_index = clip_scroll_tree.root_reference_frame_index();
 
         const MAX_CLIP_COORD: f32 = 1.0e9;
 
-        let screen_world_rect = (self.screen_rect.to_f32() / device_pixel_scale).round_out();
-
         let frame_context = FrameBuildingContext {
             device_pixel_scale,
             scene_properties,
             pipelines,
             screen_world_rect,
             clip_scroll_tree,
             max_local_clip: LayoutRect::new(
                 LayoutPoint::new(-MAX_CLIP_COORD, -MAX_CLIP_COORD),
@@ -471,18 +470,20 @@ impl FrameBuilder {
             Some(&mut transform_palette),
         );
         self.clip_store.clear_old_instances();
 
         let mut render_tasks = RenderTaskTree::new(stamp.frame_id());
         let mut surfaces = Vec::new();
 
         let screen_size = self.screen_rect.size.to_i32();
+        let screen_world_rect = (self.screen_rect.to_f32() / device_pixel_scale).round_out();
 
         let main_render_task_id = self.build_layer_screen_rects_and_cull_layers(
+            screen_world_rect,
             clip_scroll_tree,
             pipelines,
             resource_cache,
             gpu_cache,
             &mut render_tasks,
             &mut profile_counters,
             device_pixel_scale,
             scene_properties,
@@ -539,16 +540,17 @@ impl FrameBuilder {
                 device_pixel_scale,
                 prim_store: &self.prim_store,
                 resource_cache,
                 use_dual_source_blending,
                 clip_scroll_tree,
                 data_stores,
                 surfaces: &surfaces,
                 scratch,
+                screen_world_rect,
             };
 
             pass.build(
                 &mut ctx,
                 gpu_cache,
                 &mut render_tasks,
                 &mut deferred_resolves,
                 &self.clip_store,
--- a/gfx/wr/webrender/src/gpu_types.rs
+++ b/gfx/wr/webrender/src/gpu_types.rs
@@ -132,21 +132,21 @@ pub struct BorderInstance {
 #[derive(Debug, Copy, Clone)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 #[repr(C)]
 pub struct ClipMaskInstance {
     pub render_task_address: RenderTaskAddress,
     pub clip_transform_id: TransformPaletteId,
     pub prim_transform_id: TransformPaletteId,
-    pub segment: i32,
     pub clip_data_address: GpuCacheAddress,
     pub resource_address: GpuCacheAddress,
     pub local_pos: LayoutPoint,
     pub tile_rect: LayoutRect,
+    pub sub_rect: DeviceRect,
 }
 
 /// A border corner dot or dash drawn into the clipping mask.
 #[derive(Debug, Copy, Clone)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 #[repr(C)]
 pub struct ClipMaskBorderCornerDotDash {
--- a/gfx/wr/webrender/src/render_task.rs
+++ b/gfx/wr/webrender/src/render_task.rs
@@ -231,17 +231,17 @@ impl RenderTaskLocation {
         }
     }
 }
 
 #[derive(Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct CacheMaskTask {
-    actual_rect: DeviceIntRect,
+    pub actual_rect: DeviceIntRect,
     pub root_spatial_node_index: SpatialNodeIndex,
     pub clip_node_range: ClipNodeRange,
 }
 
 #[derive(Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct ClipRegionTask {
--- a/gfx/wr/webrender/src/renderer.rs
+++ b/gfx/wr/webrender/src/renderer.rs
@@ -516,34 +516,34 @@ pub(crate) mod desc {
                 kind: VertexAttributeKind::I32,
             },
             VertexAttribute {
                 name: "aPrimTransformId",
                 count: 1,
                 kind: VertexAttributeKind::I32,
             },
             VertexAttribute {
-                name: "aClipSegment",
-                count: 1,
-                kind: VertexAttributeKind::I32,
-            },
-            VertexAttribute {
                 name: "aClipDataResourceAddress",
                 count: 4,
                 kind: VertexAttributeKind::U16,
             },
             VertexAttribute {
                 name: "aClipLocalPos",
                 count: 2,
                 kind: VertexAttributeKind::F32,
             },
             VertexAttribute {
                 name: "aClipTileRect",
                 count: 4,
                 kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipDeviceArea",
+                count: 4,
+                kind: VertexAttributeKind::F32,
             }
         ],
     };
 
     pub const GPU_CACHE_UPDATE: VertexDescriptor = VertexDescriptor {
         vertex_attributes: &[
             VertexAttribute {
                 name: "aPosition",
--- a/gfx/wr/webrender/src/tiling.rs
+++ b/gfx/wr/webrender/src/tiling.rs
@@ -1,15 +1,15 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{ColorF, BorderStyle, DeviceIntPoint, DeviceIntRect, DeviceIntSize, DevicePixelScale};
-use api::{DocumentLayer, FilterOp, ImageFormat};
-use api::{MixBlendMode, PipelineId, DeviceRect, LayoutSize};
+use api::{DocumentLayer, FilterOp, ImageFormat, DevicePoint};
+use api::{MixBlendMode, PipelineId, DeviceRect, LayoutSize, WorldRect};
 use batch::{AlphaBatchBuilder, AlphaBatchContainer, ClipBatcher, resolve_image};
 use clip::ClipStore;
 use clip_scroll_tree::{ClipScrollTree};
 use debug_render::DebugItem;
 use device::{Texture};
 #[cfg(feature = "pathfinder")]
 use euclid::{TypedPoint2D, TypedVector2D};
 use gpu_cache::{GpuCache};
@@ -52,16 +52,17 @@ pub struct RenderTargetContext<'a, 'rc> 
     pub device_pixel_scale: DevicePixelScale,
     pub prim_store: &'a PrimitiveStore,
     pub resource_cache: &'rc mut ResourceCache,
     pub use_dual_source_blending: bool,
     pub clip_scroll_tree: &'a ClipScrollTree,
     pub data_stores: &'a DataStores,
     pub surfaces: &'a [SurfaceInfo],
     pub scratch: &'a PrimitiveScratchBuffer,
+    pub screen_world_rect: WorldRect,
 }
 
 /// Represents a number of rendering operations on a surface.
 ///
 /// In graphics parlance, a "render target" usually means "a surface (texture or
 /// framebuffer) bound to the output of a shader". This trait has a slightly
 /// different meaning, in that it represents the operations on that surface
 /// _before_ it's actually bound and rendered. So a `RenderTarget` is built by
@@ -647,24 +648,32 @@ impl RenderTarget for AlphaRenderTarget 
                     task_info.clip_node_range,
                     task_info.root_spatial_node_index,
                     ctx.resource_cache,
                     gpu_cache,
                     clip_store,
                     ctx.clip_scroll_tree,
                     transforms,
                     &ctx.data_stores.clip,
+                    task_info.actual_rect,
+                    &ctx.screen_world_rect,
+                    ctx.device_pixel_scale,
                 );
             }
-            RenderTaskKind::ClipRegion(ref task) => {
+            RenderTaskKind::ClipRegion(ref region_task) => {
                 let task_address = render_tasks.get_task_address(task_id);
+                let device_rect = DeviceRect::new(
+                    DevicePoint::zero(),
+                    task.get_dynamic_size().to_f32(),
+                );
                 self.clip_batcher.add_clip_region(
                     task_address,
-                    task.clip_data_address,
-                    task.local_pos,
+                    region_task.clip_data_address,
+                    region_task.local_pos,
+                    device_rect,
                 );
             }
             RenderTaskKind::Scaling(ref info) => {
                 info.add_instances(
                     &mut self.scalings,
                     render_tasks.get_task_address(task_id),
                     render_tasks.get_task_address(task.children[0]),
                 );
--- a/layout/reftests/transform-3d/reftest.list
+++ b/layout/reftests/transform-3d/reftest.list
@@ -77,17 +77,17 @@ fuzzy-if(cocoaWidget,0-128,0-9) random-i
 == 1245450-1.html green-rect.html
 fuzzy(0-1,0-2000) == opacity-preserve3d-1.html opacity-preserve3d-1-ref.html
 fuzzy(0-1,0-15000) == opacity-preserve3d-2.html opacity-preserve3d-2-ref.html
 fuzzy(0-1,0-10000) == opacity-preserve3d-3.html opacity-preserve3d-3-ref.html
 fuzzy(0-1,0-10000) == opacity-preserve3d-4.html opacity-preserve3d-4-ref.html
 == opacity-preserve3d-5.html opacity-preserve3d-5-ref.html
 == snap-perspective-1.html snap-perspective-1-ref.html
 == mask-layer-1.html mask-layer-ref.html
-fuzzy-if(webrender&&gtkWidget,16-16,100-100) == mask-layer-2.html mask-layer-ref.html
+fuzzy-if(webrender&&gtkWidget,8-8,100-100) == mask-layer-2.html mask-layer-ref.html
 fuzzy-if(webrender,0-16,0-100) == mask-layer-3.html mask-layer-ref.html
 == split-intersect1.html split-intersect1-ref.html
 fuzzy(0-255,0-150) == split-intersect2.html split-intersect2-ref.html
 fuzzy(0-255,0-100) == split-non-ortho1.html split-non-ortho1-ref.html
 fuzzy-if(winWidget,0-150,0-120) == component-alpha-1.html component-alpha-1-ref.html
 == nested-transform-1.html nested-transform-1-ref.html
 == transform-geometry-1.html transform-geometry-1-ref.html
 == intermediate-1.html intermediate-1-ref.html