Bug 1540200 - Part 3. Reuse snapping offsets calculated on the CPU in the WebRender shaders. r=kvark
☠☠ backed out by f816de04249b ☠ ☠
authorAndrew Osmond <aosmond@mozilla.com>
Fri, 12 Apr 2019 09:34:25 -0400
changeset 530917 0259eb7967f41e2476320a899efa85e59a3fe837
parent 530916 67e8e1f2f00c55ff83a99c19f189322f6103821e
child 530918 4755a19e0826d2e3beee0559440387e84bc60444
push id11265
push userffxbld-merge
push dateMon, 13 May 2019 10:53:39 +0000
treeherdermozilla-beta@77e0fe8dbdd3 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerskvark
bugs1540200
milestone68.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1540200 - Part 3. Reuse snapping offsets calculated on the CPU in the WebRender shaders. r=kvark Historically we calculated the snapping offsets in the GPU shaders. Because this information is always needed on the CPU side, we now just pass the values into the shader instead of recalculating again. This ensures we will use the same set of values consistently and makes it easier to adjust how we snap in the future. This patch should have no functional change on the output of WebRender itself. Differential Revision: https://phabricator.services.mozilla.com/D28883
gfx/wr/webrender/res/brush.glsl
gfx/wr/webrender/res/prim_shared.glsl
gfx/wr/webrender/res/snap.glsl
gfx/wr/webrender/src/batch.rs
gfx/wr/webrender/src/gpu_types.rs
gfx/wr/webrender/src/prim_store/mod.rs
--- a/gfx/wr/webrender/res/brush.glsl
+++ b/gfx/wr/webrender/res/brush.glsl
@@ -19,17 +19,16 @@ void brush_vs(
 
 #define VECS_PER_SEGMENT                    2
 
 #define BRUSH_FLAG_PERSPECTIVE_INTERPOLATION    1
 #define BRUSH_FLAG_SEGMENT_RELATIVE             2
 #define BRUSH_FLAG_SEGMENT_REPEAT_X             4
 #define BRUSH_FLAG_SEGMENT_REPEAT_Y             8
 #define BRUSH_FLAG_TEXEL_RECT                  16
-#define BRUSH_FLAG_SNAP_TO_PRIMITIVE           32
 
 #define INVALID_SEGMENT_INDEX                   0xffff
 
 void main(void) {
     // Load the brush instance from vertex attributes.
     int prim_header_address = aData.x;
     int clip_address = aData.y;
     int segment_index = aData.z & 0xffff;
@@ -60,25 +59,24 @@ void main(void) {
     // Fetch the dynamic picture that we are drawing on.
     PictureTask pic_task = fetch_picture_task(ph.render_task_index);
     ClipArea clip_area = fetch_clip_area(clip_address);
 
     Transform transform = fetch_transform(ph.transform_id);
 
     // Write the normal vertex information out.
     if (transform.is_axis_aligned) {
-        bool snap_to_primitive = (brush_flags & BRUSH_FLAG_SNAP_TO_PRIMITIVE) != 0;
         vi = write_vertex(
             segment_rect,
             ph.local_clip_rect,
             ph.z,
             transform,
             pic_task,
             ph.local_rect,
-            snap_to_primitive
+            ph.snap_offsets
         );
 
         // TODO(gw): transform bounds may be referenced by
         //           the fragment shader when running in
         //           the alpha pass, even on non-transformed
         //           items. For now, just ensure it has no
         //           effect. We can tidy this up as we move
         //           more items to be brush shaders.
--- a/gfx/wr/webrender/res/prim_shared.glsl
+++ b/gfx/wr/webrender/res/prim_shared.glsl
@@ -42,35 +42,37 @@ varying vec4 vClipMaskUv;
 #define COLOR_MODE_IMAGE              9
 
 uniform HIGHP_SAMPLER_FLOAT sampler2D sPrimitiveHeadersF;
 uniform HIGHP_SAMPLER_FLOAT isampler2D sPrimitiveHeadersI;
 
 // Instanced attributes
 in ivec4 aData;
 
-#define VECS_PER_PRIM_HEADER_F 2U
+#define VECS_PER_PRIM_HEADER_F 3U
 #define VECS_PER_PRIM_HEADER_I 2U
 
 struct PrimitiveHeader {
     RectWithSize local_rect;
     RectWithSize local_clip_rect;
+    vec4 snap_offsets;
     float z;
     int specific_prim_address;
     int render_task_index;
     int transform_id;
     ivec4 user_data;
 };
 
 PrimitiveHeader fetch_prim_header(int index) {
     PrimitiveHeader ph;
 
     ivec2 uv_f = get_fetch_uv(index, VECS_PER_PRIM_HEADER_F);
     vec4 local_rect = TEXEL_FETCH(sPrimitiveHeadersF, uv_f, 0, ivec2(0, 0));
     vec4 local_clip_rect = TEXEL_FETCH(sPrimitiveHeadersF, uv_f, 0, ivec2(1, 0));
+    ph.snap_offsets = TEXEL_FETCH(sPrimitiveHeadersF, uv_f, 0, ivec2(2, 0));
     ph.local_rect = RectWithSize(local_rect.xy, local_rect.zw);
     ph.local_clip_rect = RectWithSize(local_clip_rect.xy, local_clip_rect.zw);
 
     ivec2 uv_i = get_fetch_uv(index, VECS_PER_PRIM_HEADER_I);
     ivec4 data0 = TEXEL_FETCH(sPrimitiveHeadersI, uv_i, 0, ivec2(0, 0));
     ivec4 data1 = TEXEL_FETCH(sPrimitiveHeadersI, uv_i, 0, ivec2(1, 0));
     ph.z = float(data0.x);
     ph.render_task_index = data0.y;
@@ -88,41 +90,29 @@ struct VertexInfo {
 };
 
 VertexInfo write_vertex(RectWithSize instance_rect,
                         RectWithSize local_clip_rect,
                         float z,
                         Transform transform,
                         PictureTask task,
                         RectWithSize snap_rect,
-                        bool snap_to_primitive) {
+                        vec4 snap_offsets) {
 
     // Select the corner of the local rect that we are processing.
     vec2 local_pos = instance_rect.p0 + instance_rect.size * aPosition.xy;
 
     // Clamp to the two local clip rects.
     vec2 clamped_local_pos = clamp_rect(local_pos, local_clip_rect);
 
-    RectWithSize visible_rect;
-    if (snap_to_primitive) {
-        // We are producing a picture. The snap rect has already taken into
-        // account the clipping we wish to consider for snapping purposes.
-        visible_rect = snap_rect;
-    } else {
-        // Compute the visible rect to snap against. This ensures segments along
-        // the edges are snapped consistently with other nearby primitives.
-        visible_rect = intersect_rects(local_clip_rect, snap_rect);
-    }
-
     /// Compute the snapping offset.
     vec2 snap_offset = compute_snap_offset(
         clamped_local_pos,
-        transform.m,
-        visible_rect,
-        task.device_pixel_scale
+        snap_rect,
+        snap_offsets
     );
 
     // Transform the current vertex to world space.
     vec4 world_pos = transform.m * vec4(clamped_local_pos, 0.0, 1.0);
 
     // Convert the world positions to device pixel space.
     vec2 device_pos = world_pos.xy * task.device_pixel_scale;
 
--- a/gfx/wr/webrender/res/snap.glsl
+++ b/gfx/wr/webrender/res/snap.glsl
@@ -1,68 +1,22 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #ifdef WR_VERTEX_SHADER
 
-vec4 compute_snap_positions(
-    mat4 transform,
-    RectWithSize snap_rect,
-    float device_pixel_scale
-) {
-    // Ensure that the snap rect is at *least* one device pixel in size.
-    // TODO(gw): It's not clear to me that this is "correct". Specifically,
-    //           how should it interact with sub-pixel snap rects when there
-    //           is a transform with scale present? But it does fix
-    //           the test cases we have in Servo that are failing without it
-    //           and seem better than not having this at all.
-    snap_rect.size = max(snap_rect.size, vec2(1.0 / device_pixel_scale));
-
-    // Transform the snap corners to the world space.
-    vec4 world_snap_p0 = transform * vec4(snap_rect.p0, 0.0, 1.0);
-    vec4 world_snap_p1 = transform * vec4(snap_rect.p0 + snap_rect.size, 0.0, 1.0);
-    // Snap bounds in world coordinates, adjusted for pixel ratio. XY = top left, ZW = bottom right
-    vec4 world_snap = device_pixel_scale * vec4(world_snap_p0.xy, world_snap_p1.xy) /
-                                           vec4(world_snap_p0.ww, world_snap_p1.ww);
-    return world_snap;
-}
-
 /// Given a point within a local rectangle, and the device space corners
-/// of a snapped primitive, return the snap offsets. This *must* exactly
-/// match the logic in the GLSL compute_snap_offset_impl function.
-vec2 compute_snap_offset_impl(
+/// offsets for the unsnapped primitive, return the snap offsets. This *must*
+/// exactly match the logic in the Rust compute_snap_offset_impl function.
+vec2 compute_snap_offset(
     vec2 reference_pos,
     RectWithSize reference_rect,
     vec4 snap_positions
 ) {
-    /// World offsets applied to the corners of the snap rectangle.
-    vec4 snap_offsets = floor(snap_positions + 0.5) - snap_positions;
-
     /// Compute the position of this vertex inside the snap rectangle.
     vec2 normalized_snap_pos = (reference_pos - reference_rect.p0) / reference_rect.size;
 
     /// Compute the actual world offset for this vertex needed to make it snap.
-    return mix(snap_offsets.xy, snap_offsets.zw, normalized_snap_pos);
-}
-
-// Compute a snapping offset in world space (adjusted to pixel ratio),
-// given local position on the transform and a snap rectangle.
-vec2 compute_snap_offset(vec2 local_pos,
-                         mat4 transform,
-                         RectWithSize snap_rect,
-                         float device_pixel_scale) {
-    vec4 snap_positions = compute_snap_positions(
-        transform,
-        snap_rect,
-        device_pixel_scale
-    );
-
-    vec2 snap_offsets = compute_snap_offset_impl(
-        local_pos,
-        snap_rect,
-        snap_positions
-    );
-
-    return snap_offsets;
+    return mix(snap_positions.xy, snap_positions.zw, normalized_snap_pos);
 }
 
 #endif //WR_VERTEX_SHADER
--- a/gfx/wr/webrender/src/batch.rs
+++ b/gfx/wr/webrender/src/batch.rs
@@ -13,16 +13,17 @@ use gpu_types::{BrushFlags, BrushInstanc
 use gpu_types::{ClipMaskInstance, SplitCompositeInstance, SnapOffsets};
 use gpu_types::{PrimitiveInstanceData, RasterizationSpace, GlyphInstance};
 use gpu_types::{PrimitiveHeader, PrimitiveHeaderIndex, TransformPaletteId, TransformPalette};
 use internal_types::{FastHashMap, SavedTargetIndex, TextureSource};
 use picture::{Picture3DContext, PictureCompositeMode, PicturePrimitive, PictureSurface};
 use prim_store::{DeferredResolve, EdgeAaSegmentMask, PrimitiveInstanceKind, PrimitiveVisibilityIndex};
 use prim_store::{VisibleGradientTile, PrimitiveInstance, PrimitiveOpacity, SegmentInstanceIndex};
 use prim_store::{BrushSegment, ClipMaskKind, ClipTaskIndex, VECS_PER_SEGMENT};
+use prim_store::{recompute_snap_offsets};
 use prim_store::image::ImageSource;
 use render_backend::DataStores;
 use render_task::{RenderTaskAddress, RenderTaskId, RenderTaskTree, TileBlit};
 use renderer::{BlendMode, ImageBufferKind, ShaderColorMode};
 use renderer::{BLOCKS_PER_UV_RECT, MAX_VERTEX_TEXTURE_WIDTH};
 use resource_cache::{CacheItem, GlyphFetchResult, ImageRequest, ResourceCache, ImageProperties};
 use scene::FilterOpHelpers;
 use smallvec::SmallVec;
@@ -623,32 +624,35 @@ impl AlphaBatchBuilder {
         let z_id = z_generator.next();
 
         let prim_common_data = &ctx.data_stores.as_common_data(&prim_instance);
         let prim_rect = LayoutRect::new(
             prim_instance.prim_origin,
             prim_common_data.prim_size,
         );
 
+        let snap_offsets = prim_info.snap_offsets;
+
         if is_chased {
             println!("\tbatch {:?} with bound {:?}", prim_rect, bounding_rect);
         }
 
         match prim_instance.kind {
             PrimitiveInstanceKind::Clear { data_handle } => {
                 let prim_data = &ctx.data_stores.prim[data_handle];
                 let prim_cache_address = gpu_cache.get_address(&prim_data.gpu_cache_handle);
 
                 // TODO(gw): We can abstract some of the common code below into
                 //           helper methods, as we port more primitives to make
                 //           use of interning.
 
                 let prim_header = PrimitiveHeader {
                     local_rect: prim_rect,
                     local_clip_rect: prim_info.combined_local_clip_rect,
+                    snap_offsets,
                     task_address,
                     specific_prim_address: prim_cache_address,
                     transform_id,
                 };
 
                 let prim_header_index = prim_headers.push(
                     &prim_header,
                     z_id,
@@ -713,16 +717,17 @@ impl AlphaBatchBuilder {
                     specified_blend_mode
                 } else {
                     BlendMode::None
                 };
 
                 let prim_header = PrimitiveHeader {
                     local_rect: prim_rect,
                     local_clip_rect: prim_info.combined_local_clip_rect,
+                    snap_offsets,
                     task_address,
                     specific_prim_address: prim_cache_address,
                     transform_id,
                 };
 
                 let batch_params = BrushBatchParameters::instanced(
                     BrushBatchKind::Image(ImageBufferKind::Texture2DArray),
                     [
@@ -765,16 +770,17 @@ impl AlphaBatchBuilder {
                 let prim_data = &ctx.data_stores.text_run[data_handle];
                 let glyph_fetch_buffer = &mut self.glyph_fetch_buffer;
                 let alpha_batch_list = &mut self.batch_lists.last_mut().unwrap().alpha_batch_list;
                 let prim_cache_address = gpu_cache.get_address(&prim_data.gpu_cache_handle);
 
                 let prim_header = PrimitiveHeader {
                     local_rect: prim_rect,
                     local_clip_rect: prim_info.combined_local_clip_rect,
+                    snap_offsets,
                     task_address,
                     specific_prim_address: prim_cache_address,
                     transform_id,
                 };
 
                 let clip_task_address = ctx.get_prim_clip_task_address(
                     prim_info.clip_task_index,
                     render_tasks,
@@ -932,16 +938,17 @@ impl AlphaBatchBuilder {
                     BlendMode::PremultipliedAlpha
                 } else {
                     BlendMode::None
                 };
 
                 let prim_header = PrimitiveHeader {
                     local_rect: prim_rect,
                     local_clip_rect: prim_info.combined_local_clip_rect,
+                    snap_offsets,
                     task_address,
                     specific_prim_address: prim_cache_address,
                     transform_id,
                 };
 
                 let prim_header_index = prim_headers.push(
                     &prim_header,
                     z_id,
@@ -978,16 +985,17 @@ impl AlphaBatchBuilder {
             PrimitiveInstanceKind::Picture { pic_index, segment_instance_index, .. } => {
                 let picture = &ctx.prim_store.pictures[pic_index.0];
                 let non_segmented_blend_mode = BlendMode::PremultipliedAlpha;
                 let prim_cache_address = gpu_cache.get_address(&ctx.globals.default_image_handle);
 
                 let prim_header = PrimitiveHeader {
                     local_rect: picture.snapped_local_rect,
                     local_clip_rect: prim_info.combined_local_clip_rect,
+                    snap_offsets,
                     task_address,
                     specific_prim_address: prim_cache_address,
                     transform_id,
                 };
 
                 match picture.context_3d {
                     // Convert all children of the 3D hierarchy root into batches.
                     Picture3DContext::In { root_data: Some(ref list), .. } => {
@@ -1016,16 +1024,17 @@ impl AlphaBatchBuilder {
                             let clip_task_address = ctx.get_prim_clip_task_address(
                                 prim_info.clip_task_index,
                                 render_tasks,
                             ).unwrap_or(OPAQUE_TASK_ADDRESS);
 
                             let prim_header = PrimitiveHeader {
                                 local_rect: pic.snapped_local_rect,
                                 local_clip_rect: prim_info.combined_local_clip_rect,
+                                snap_offsets,
                                 task_address,
                                 specific_prim_address: GpuCacheAddress::invalid(),
                                 transform_id: transforms
                                     .get_id(
                                         child.spatial_node_index,
                                         root_spatial_node_index,
                                         ctx.clip_scroll_tree,
                                     ),
@@ -1077,29 +1086,22 @@ impl AlphaBatchBuilder {
                     // hierarchy, since we process them with the root.
                     Picture3DContext::In { root_data: None, .. } => return,
                     // Proceed for non-3D pictures.
                     Picture3DContext::Out => ()
                 }
 
                 match picture.raster_config {
                     Some(ref raster_config) => {
-                        // All pictures must snap to their primitive rect instead of the
-                        // visible rect like most primitives. This is because the picture's
-                        // visible rect includes the effect of the picture's clip rect,
-                        // which was not considered by the picture's children. The primitive
-                        // rect however is simply the union of the visible rect of the
-                        // children, which they snapped to, which is precisely what we also
-                        // need to snap to in order to be consistent.
-                        let mut brush_flags = BrushFlags::SNAP_TO_PRIMITIVE;
-
                         // If the child picture was rendered in local space, we can safely
                         // interpolate the UV coordinates with perspective correction.
-                        if raster_config.establishes_raster_root {
-                            brush_flags |= BrushFlags::PERSPECTIVE_INTERPOLATION;
+                        let brush_flags = if raster_config.establishes_raster_root {
+                            BrushFlags::PERSPECTIVE_INTERPOLATION
+                        } else {
+                            BrushFlags::empty()
                         };
 
                         let clip_task_address = ctx.get_prim_clip_task_address(
                             prim_info.clip_task_index,
                             render_tasks,
                         ).unwrap_or(OPAQUE_TASK_ADDRESS);
 
                         match raster_config.composite_mode {
@@ -1144,19 +1146,27 @@ impl AlphaBatchBuilder {
                                     );
 
                                     for tile_index in &tile_cache.tiles_to_draw {
                                         let tile = &tile_cache.tiles[tile_index.0];
 
                                         // Get the local rect of the tile.
                                         let tile_rect = tile.local_rect;
 
+                                        // Adjust the snap offsets for the tile.
+                                        let snap_offsets = recompute_snap_offsets(
+                                            tile_rect,
+                                            prim_rect,
+                                            snap_offsets,
+                                        );
+
                                         let prim_header = PrimitiveHeader {
                                             local_rect: tile_rect,
                                             local_clip_rect,
+                                            snap_offsets,
                                             task_address,
                                             specific_prim_address: prim_cache_address,
                                             transform_id,
                                         };
 
                                         let prim_header_index = prim_headers.push(&prim_header, z_id, [
                                             ShaderColorMode::Image as i32 | ((AlphaType::PremultipliedAlpha as i32) << 16),
                                             RasterizationSpace::Local as i32,
@@ -1349,16 +1359,17 @@ impl AlphaBatchBuilder {
                                             get_shader_opacity(1.0),
                                             0,
                                         ]);
 
                                         let shadow_rect = prim_header.local_rect.translate(&offset);
 
                                         let shadow_prim_header = PrimitiveHeader {
                                             local_rect: shadow_rect,
+                                            snap_offsets: prim_info.shadow_snap_offsets,
                                             specific_prim_address: shadow_prim_address,
                                             ..prim_header
                                         };
 
                                         let shadow_prim_header_index = prim_headers.push(&shadow_prim_header, z_id_shadow, [
                                             ShaderColorMode::Alpha as i32 | ((AlphaType::PremultipliedAlpha as i32) << 16),
                                             RasterizationSpace::Screen as i32,
                                             get_shader_opacity(1.0),
@@ -1610,16 +1621,17 @@ impl AlphaBatchBuilder {
                                     (gpu_cache.get_address(&segment_instance.gpu_cache_handle), segments)
                                 } else {
                                     (prim_cache_address, None)
                                 };
 
                                 let prim_header = PrimitiveHeader {
                                     local_rect: picture.snapped_local_rect,
                                     local_clip_rect: prim_info.combined_local_clip_rect,
+                                    snap_offsets,
                                     task_address,
                                     specific_prim_address: prim_cache_address,
                                     transform_id,
                                 };
 
                                 let prim_header_index = prim_headers.push(
                                     &prim_header,
                                     z_id,
@@ -1695,16 +1707,17 @@ impl AlphaBatchBuilder {
                     specified_blend_mode
                 } else {
                     BlendMode::None
                 };
 
                 let prim_header = PrimitiveHeader {
                     local_rect: prim_rect,
                     local_clip_rect: prim_info.combined_local_clip_rect,
+                    snap_offsets: snap_offsets,
                     task_address,
                     specific_prim_address: prim_cache_address,
                     transform_id,
                 };
 
                 let batch_params = BrushBatchParameters::shared(
                     BrushBatchKind::Image(get_buffer_kind(cache_item.texture_id)),
                     textures,
@@ -1768,16 +1781,17 @@ impl AlphaBatchBuilder {
                     let segment_instance = &ctx.scratch.segment_instances[segment_instance_index];
                     let segments = Some(&ctx.scratch.segments[segment_instance.segments_range]);
                     (gpu_cache.get_address(&segment_instance.gpu_cache_handle), segments)
                 };
 
                 let prim_header = PrimitiveHeader {
                     local_rect: prim_rect,
                     local_clip_rect: prim_info.combined_local_clip_rect,
+                    snap_offsets: snap_offsets,
                     task_address,
                     specific_prim_address: prim_cache_address,
                     transform_id,
                 };
 
                 let prim_header_index = prim_headers.push(
                     &prim_header,
                     z_id,
@@ -1875,16 +1889,17 @@ impl AlphaBatchBuilder {
                     let segment_instance = &ctx.scratch.segment_instances[segment_instance_index];
                     let segments = Some(&ctx.scratch.segments[segment_instance.segments_range]);
                     (gpu_cache.get_address(&segment_instance.gpu_cache_handle), segments)
                 };
 
                 let prim_header = PrimitiveHeader {
                     local_rect: prim_rect,
                     local_clip_rect: prim_info.combined_local_clip_rect,
+                    snap_offsets: snap_offsets,
                     task_address,
                     specific_prim_address: prim_cache_address,
                     transform_id,
                 };
 
                 let prim_header_index = prim_headers.push(
                     &prim_header,
                     z_id,
@@ -1979,16 +1994,17 @@ impl AlphaBatchBuilder {
                         let segment_instance = &ctx.scratch.segment_instances[image_instance.segment_instance_index];
                         let segments = Some(&ctx.scratch.segments[segment_instance.segments_range]);
                         (gpu_cache.get_address(&segment_instance.gpu_cache_handle), segments)
                     };
 
                     let prim_header = PrimitiveHeader {
                         local_rect: prim_rect,
                         local_clip_rect: prim_info.combined_local_clip_rect,
+                        snap_offsets: snap_offsets,
                         task_address,
                         specific_prim_address: prim_cache_address,
                         transform_id,
                     };
 
                     let prim_header_index = prim_headers.push(
                         &prim_header,
                         z_id,
@@ -2031,16 +2047,17 @@ impl AlphaBatchBuilder {
                             gpu_blocks.push(tile_rect.into());
                             gpu_blocks.push(GpuBlockData::EMPTY);
                         }
 
                         let gpu_handle = gpu_cache.push_per_frame_blocks(&gpu_blocks);
                         let prim_header = PrimitiveHeader {
                             local_rect: prim_rect,
                             local_clip_rect: image_instance.tight_local_clip_rect,
+                            snap_offsets,
                             task_address,
                             specific_prim_address: gpu_cache.get_address(&gpu_handle),
                             transform_id,
                         };
                         let prim_header_index = prim_headers.push(&prim_header, z_id, prim_user_data);
 
                         for (i, tile) in chunk.iter().enumerate() {
                             if let Some((batch_kind, textures, uv_rect_address)) = get_image_tile_params(
@@ -2076,16 +2093,17 @@ impl AlphaBatchBuilder {
             PrimitiveInstanceKind::LinearGradient { data_handle, gradient_index, .. } => {
                 let gradient = &ctx.prim_store.linear_gradients[gradient_index];
                 let prim_data = &ctx.data_stores.linear_grad[data_handle];
                 let specified_blend_mode = BlendMode::PremultipliedAlpha;
 
                 let mut prim_header = PrimitiveHeader {
                     local_rect: prim_rect,
                     local_clip_rect: prim_info.combined_local_clip_rect,
+                    snap_offsets,
                     task_address,
                     specific_prim_address: GpuCacheAddress::invalid(),
                     transform_id,
                 };
 
                 let non_segmented_blend_mode = if !prim_data.opacity.is_opaque ||
                     prim_info.clip_task_index != ClipTaskIndex::INVALID ||
                     transform_kind == TransformedRectKind::Complex
@@ -2214,16 +2232,17 @@ impl AlphaBatchBuilder {
             }
             PrimitiveInstanceKind::RadialGradient { data_handle, ref visible_tiles_range, .. } => {
                 let prim_data = &ctx.data_stores.radial_grad[data_handle];
                 let specified_blend_mode = BlendMode::PremultipliedAlpha;
 
                 let mut prim_header = PrimitiveHeader {
                     local_rect: prim_rect,
                     local_clip_rect: prim_info.combined_local_clip_rect,
+                    snap_offsets,
                     task_address,
                     specific_prim_address: GpuCacheAddress::invalid(),
                     transform_id,
                 };
 
                 if visible_tiles_range.is_empty() {
                     let non_segmented_blend_mode = if !prim_data.opacity.is_opaque ||
                         prim_info.clip_task_index != ClipTaskIndex::INVALID ||
@@ -2483,20 +2502,28 @@ fn add_gradient_tiles(
         },
         bounding_rect,
         z_id,
     );
 
     let user_data = [stops_handle.as_int(gpu_cache), 0, 0, 0];
 
     for tile in visible_tiles {
+        // Adjust the snap offsets for the tile.
+        let snap_offsets = recompute_snap_offsets(
+            tile.local_rect,
+            base_prim_header.local_rect,
+            base_prim_header.snap_offsets,
+        );
+
         let prim_header = PrimitiveHeader {
             specific_prim_address: gpu_cache.get_address(&tile.handle),
             local_rect: tile.local_rect,
             local_clip_rect: tile.local_clip_rect,
+            snap_offsets,
             ..*base_prim_header
         };
         let prim_header_index = prim_headers.push(&prim_header, z_id, user_data);
 
         batch.push(PrimitiveInstanceData::from(
             BrushInstance {
                 prim_header_index,
                 clip_task_address,
--- a/gfx/wr/webrender/src/gpu_types.rs
+++ b/gfx/wr/webrender/src/gpu_types.rs
@@ -230,16 +230,17 @@ impl PrimitiveHeaders {
         user_data: [i32; 4],
     ) -> PrimitiveHeaderIndex {
         debug_assert_eq!(self.headers_int.len(), self.headers_float.len());
         let id = self.headers_float.len();
 
         self.headers_float.push(PrimitiveHeaderF {
             local_rect: prim_header.local_rect,
             local_clip_rect: prim_header.local_clip_rect,
+            snap_offsets: prim_header.snap_offsets,
         });
 
         self.headers_int.push(PrimitiveHeaderI {
             z,
             task_address: prim_header.task_address,
             specific_prim_address: prim_header.specific_prim_address.as_int(),
             transform_id: prim_header.transform_id,
             user_data,
@@ -250,29 +251,31 @@ impl PrimitiveHeaders {
 }
 
 // This is a convenience type used to make it easier to pass
 // the common parts around during batching.
 #[derive(Debug)]
 pub struct PrimitiveHeader {
     pub local_rect: LayoutRect,
     pub local_clip_rect: LayoutRect,
+    pub snap_offsets: SnapOffsets,
     pub task_address: RenderTaskAddress,
     pub specific_prim_address: GpuCacheAddress,
     pub transform_id: TransformPaletteId,
 }
 
 // f32 parts of a primitive header
 #[derive(Debug)]
 #[repr(C)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct PrimitiveHeaderF {
     pub local_rect: LayoutRect,
     pub local_clip_rect: LayoutRect,
+    pub snap_offsets: SnapOffsets,
 }
 
 // i32 parts of a primitive header
 // TODO(gw): Compress parts of these down to u16
 #[derive(Debug)]
 #[repr(C)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
@@ -358,18 +361,16 @@ bitflags! {
         /// rather than primitive rect.
         const SEGMENT_RELATIVE = 0x2;
         /// Repeat UVs horizontally.
         const SEGMENT_REPEAT_X = 0x4;
         /// Repeat UVs vertically.
         const SEGMENT_REPEAT_Y = 0x8;
         /// The extra segment data is a texel rect.
         const SEGMENT_TEXEL_RECT = 0x10;
-        /// Snap to the primitive rect instead of the visible rect.
-        const SNAP_TO_PRIMITIVE = 0x20;
     }
 }
 
 // TODO(gw): Some of these fields can be moved to the primitive
 //           header since they are constant, and some can be
 //           compressed to a smaller size.
 #[repr(C)]
 pub struct BrushInstance {
@@ -623,16 +624,21 @@ pub struct SnapOffsets {
 
 impl SnapOffsets {
     pub fn empty() -> Self {
         SnapOffsets {
             top_left: DeviceVector2D::zero(),
             bottom_right: DeviceVector2D::zero(),
         }
     }
+
+    pub fn is_empty(&self) -> bool {
+        let zero = DeviceVector2D::zero();
+        self.top_left == zero && self.bottom_right == zero
+    }
 }
 
 #[derive(Debug, Copy, Clone)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct ImageSource {
     pub p0: DevicePoint,
     pub p1: DevicePoint,
--- a/gfx/wr/webrender/src/prim_store/mod.rs
+++ b/gfx/wr/webrender/src/prim_store/mod.rs
@@ -3454,17 +3454,16 @@ impl PrimitiveInstance {
         frame_context: &FrameBuildingContext,
         frame_state: &mut FrameBuildingState,
         prim_store: &PrimitiveStore,
         data_stores: &mut DataStores,
         segments_store: &mut SegmentStorage,
         segment_instances_store: &mut SegmentInstanceStorage,
         clip_mask_instances: &mut Vec<ClipMaskKind>,
         unclipped: &DeviceRect,
-        prim_snap_offsets: SnapOffsets,
         device_pixel_scale: DevicePixelScale,
     ) -> bool {
         let segments = match self.kind {
             PrimitiveInstanceKind::TextRun { .. } |
             PrimitiveInstanceKind::Clear { .. } |
             PrimitiveInstanceKind::LineDecoration { .. } => {
                 return false;
             }
@@ -3564,17 +3563,17 @@ impl PrimitiveInstance {
                 prim_info.clipped_world_rect,
                 root_spatial_node_index,
                 pic_context.surface_index,
                 pic_state,
                 frame_context,
                 frame_state,
                 &mut data_stores.clip,
                 unclipped,
-                prim_snap_offsets,
+                prim_info.snap_offsets,
                 device_pixel_scale,
             );
             clip_mask_instances.push(clip_mask_kind);
         } else {
             let dirty_world_rect = frame_state.current_dirty_region().combined.world_rect;
 
             for segment in segments {
                 // Build a clip chain for the smaller segment rect. This will
@@ -3605,17 +3604,17 @@ impl PrimitiveInstance {
                     prim_info.clipped_world_rect,
                     root_spatial_node_index,
                     pic_context.surface_index,
                     pic_state,
                     frame_context,
                     frame_state,
                     &mut data_stores.clip,
                     unclipped,
-                    prim_snap_offsets,
+                    prim_info.snap_offsets,
                     device_pixel_scale,
                 );
                 clip_mask_instances.push(clip_mask_kind);
             }
         }
 
         true
     }
@@ -3633,28 +3632,23 @@ impl PrimitiveInstance {
     ) {
         let prim_info = &mut scratch.prim_info[self.visibility_info.0 as usize];
         let device_pixel_scale = frame_state.surfaces[pic_context.surface_index.0].device_pixel_scale;
 
         if self.is_chased() {
             println!("\tupdating clip task with pic rect {:?}", prim_info.clip_chain.pic_clip_rect);
         }
 
-        // Get the device space rect for the primitive if it was unclipped,
-        // including any snap offsets applied to the corners.
-        let (unclipped, prim_snap_offsets) = match get_unclipped_device_rect(
-            self.spatial_node_index,
-            root_spatial_node_index,
+        // Get the device space rect for the primitive if it was unclipped.
+        let unclipped = match get_unclipped_device_rect(
             prim_info.clip_chain.pic_clip_rect,
             &pic_state.map_pic_to_raster,
             device_pixel_scale,
-            frame_context,
-            frame_state,
         ) {
-            Some(info) => info,
+            Some(rect) => rect,
             None => return,
         };
 
         self.build_segments_if_needed(
             &prim_info.clip_chain,
             frame_state,
             prim_store,
             data_stores,
@@ -3671,32 +3665,31 @@ impl PrimitiveInstance {
             frame_context,
             frame_state,
             prim_store,
             data_stores,
             &mut scratch.segments,
             &mut scratch.segment_instances,
             &mut scratch.clip_mask_instances,
             &unclipped,
-            prim_snap_offsets,
             device_pixel_scale,
         ) {
             if self.is_chased() {
                 println!("\tsegment tasks have been created for clipping");
             }
             return;
         }
 
         if prim_info.clip_chain.needs_mask {
             // Get a minimal device space rect, clipped to the screen that we
             // need to allocate for the clip mask, as well as interpolated
             // snap offsets.
             if let Some((device_rect, snap_offsets)) = get_clipped_device_rect(
                 &unclipped,
-                prim_snap_offsets,
+                prim_info.snap_offsets,
                 &pic_state.map_raster_to_world,
                 prim_info.clipped_world_rect,
                 device_pixel_scale,
             ) {
                 let clip_task = RenderTask::new_mask(
                     device_rect,
                     prim_info.clip_chain.clips_range,
                     root_spatial_node_index,
@@ -3726,18 +3719,17 @@ impl PrimitiveInstance {
 }
 
 /// Mimics the GLSL mix() function.
 fn mix(x: f32, y: f32, a: f32) -> f32 {
     x * (1.0 - a) + y * a
 }
 
 /// Given a point within a local rectangle, and the device space corners
-/// of a snapped primitive, return the snap offsets. This *must* exactly
-/// match the logic in the GLSL compute_snap_offset_impl function.
+/// of a snapped primitive, return the snap offsets.
 fn compute_snap_offset_impl<PixelSpace>(
     reference_pos: TypedPoint2D<f32, PixelSpace>,
     reference_rect: TypedRect<f32, PixelSpace>,
     prim_top_left: DevicePoint,
     prim_bottom_right: DevicePoint,
 ) -> DeviceVector2D {
     let normalized_snap_pos = TypedPoint2D::<f32, PixelSpace>::new(
         (reference_pos.x - reference_rect.origin.x) / reference_rect.size.width,
@@ -3755,69 +3747,65 @@ fn compute_snap_offset_impl<PixelSpace>(
     );
 
     DeviceVector2D::new(
         mix(top_left.x, bottom_right.x, normalized_snap_pos.x),
         mix(top_left.y, bottom_right.y, normalized_snap_pos.y),
     )
 }
 
+/// Given the snapping offsets for a primitive rectangle, recompute
+/// the snapping offsets to be relative to given local rectangle.
+/// This *must* exactly match the logic in the GLSL
+/// compute_snap_offset function.
+pub fn recompute_snap_offsets<PixelSpace>(
+    local_rect: TypedRect<f32, PixelSpace>,
+    prim_rect: TypedRect<f32, PixelSpace>,
+    snap_offsets: SnapOffsets,
+) -> SnapOffsets
+{
+    if prim_rect.is_empty() || snap_offsets.is_empty() {
+        return SnapOffsets::empty();
+    }
+
+    let normalized_top_left = TypedPoint2D::<f32, PixelSpace>::new(
+        (local_rect.origin.x - prim_rect.origin.x) / prim_rect.size.width,
+        (local_rect.origin.y - prim_rect.origin.y) / prim_rect.size.height,
+    );
+
+    let normalized_bottom_right = TypedPoint2D::<f32, PixelSpace>::new(
+        (local_rect.origin.x + local_rect.size.width - prim_rect.origin.x) / prim_rect.size.width,
+        (local_rect.origin.y + local_rect.size.height - prim_rect.origin.y) / prim_rect.size.height,
+    );
+
+    let top_left = DeviceVector2D::new(
+        mix(snap_offsets.top_left.x, snap_offsets.bottom_right.x, normalized_top_left.x),
+        mix(snap_offsets.top_left.y, snap_offsets.bottom_right.y, normalized_top_left.y),
+    );
+
+    let bottom_right = DeviceVector2D::new(
+        mix(snap_offsets.top_left.x, snap_offsets.bottom_right.x, normalized_bottom_right.x),
+        mix(snap_offsets.top_left.y, snap_offsets.bottom_right.y, normalized_bottom_right.y),
+    );
+
+    SnapOffsets {
+        top_left,
+        bottom_right,
+    }
+}
+
 /// Retrieve the exact unsnapped device space rectangle for a primitive.
-/// If the transform is axis-aligned, compute the snapping offsets that
-/// the shaders will apply.
 fn get_unclipped_device_rect(
-    prim_spatial_node_index: SpatialNodeIndex,
-    root_spatial_node_index: SpatialNodeIndex,
     prim_rect: PictureRect,
     map_to_raster: &SpaceMapper<PicturePixel, RasterPixel>,
     device_pixel_scale: DevicePixelScale,
-    frame_context: &FrameBuildingContext,
-    frame_state: &mut FrameBuildingState,
-) -> Option<(DeviceRect, SnapOffsets)> {
-    let unclipped_raster_rect = map_to_raster.map(&prim_rect)?;
-
-    let unclipped_device_rect = {
-        let world_rect = unclipped_raster_rect * TypedScale::new(1.0);
-        let device_rect = world_rect * device_pixel_scale;
-        device_rect
-    };
-
-    let transform_id = frame_state.transforms.get_id(
-        prim_spatial_node_index,
-        root_spatial_node_index,
-        frame_context.clip_scroll_tree,
-    );
-
-    match transform_id.transform_kind() {
-        TransformedRectKind::AxisAligned => {
-            let top_left = compute_snap_offset_impl(
-                prim_rect.origin,
-                prim_rect,
-                unclipped_device_rect.origin,
-                unclipped_device_rect.bottom_right(),
-            );
-
-            let bottom_right = compute_snap_offset_impl(
-                prim_rect.bottom_right(),
-                prim_rect,
-                unclipped_device_rect.origin,
-                unclipped_device_rect.bottom_right(),
-            );
-
-            let snap_offsets = SnapOffsets {
-                top_left,
-                bottom_right,
-            };
-
-            Some((unclipped_device_rect, snap_offsets))
-        }
-        TransformedRectKind::Complex => {
-            Some((unclipped_device_rect, SnapOffsets::empty()))
-        }
-    }
+) -> Option<DeviceRect> {
+    let raster_rect = map_to_raster.map(&prim_rect)?;
+    let world_rect = raster_rect * TypedScale::new(1.0);
+    Some(world_rect * device_pixel_scale)
 }
 
 /// Given an unclipped device rect, try to find a minimal device space
 /// rect to allocate a clip mask for, by clipping to the screen. This
 /// function is very similar to get_raster_rects below. It is far from
 /// ideal, and should be refactored as part of the support for setting
 /// scale per-raster-root.
 fn get_clipped_device_rect(