Bug 1457241 - Update webrender to commit 4b65822a2f7e1fed246a492f9fe193ede2f37d74. r=jrmuizel
authorKartikaya Gupta <kgupta@mozilla.com>
Mon, 30 Apr 2018 10:12:52 -0400
changeset 472434 2b40c075c18aa05e944a1343619cb9d2d986b990
parent 472433 16a647445e5cd89ffbefb3f3a490acaaa4f8868e
child 472435 25c5ae00eb5388349d20231d360c6b7710576afd
push id1728
push userjlund@mozilla.com
push dateMon, 18 Jun 2018 21:12:27 +0000
treeherdermozilla-release@c296fde26f5f [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjrmuizel
bugs1457241
milestone61.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1457241 - Update webrender to commit 4b65822a2f7e1fed246a492f9fe193ede2f37d74. r=jrmuizel MozReview-Commit-ID: EIE8tuyH8Ai
gfx/webrender/res/brush_image.glsl
gfx/webrender/res/brush_radial_gradient.glsl
gfx/webrender/res/cs_clip_border.glsl
gfx/webrender/res/resource_cache.glsl
gfx/webrender/src/batch.rs
gfx/webrender/src/clip_scroll_node.rs
gfx/webrender/src/display_list_flattener.rs
gfx/webrender/src/frame_builder.rs
gfx/webrender/src/glyph_rasterizer.rs
gfx/webrender/src/gpu_cache.rs
gfx/webrender/src/gpu_types.rs
gfx/webrender/src/picture.rs
gfx/webrender/src/platform/windows/font.rs
gfx/webrender/src/prim_store.rs
gfx/webrender/src/render_backend.rs
gfx/webrender/src/render_task.rs
gfx/webrender/src/resource_cache.rs
gfx/webrender/src/scene_builder.rs
gfx/webrender/src/texture_cache.rs
gfx/webrender_api/src/api.rs
gfx/webrender_bindings/revision.txt
--- a/gfx/webrender/res/brush_image.glsl
+++ b/gfx/webrender/res/brush_image.glsl
@@ -35,34 +35,16 @@ ImageBrushData fetch_image_data(int addr
     vec4[2] raw_data = fetch_from_resource_cache_2(address);
     ImageBrushData data = ImageBrushData(
         raw_data[0],
         raw_data[1]
     );
     return data;
 }
 
-struct ImageBrushExtraData {
-    RectWithSize rendered_task_rect;
-    vec2 offset;
-};
-
-ImageBrushExtraData fetch_image_extra_data(int address) {
-    vec4[2] raw_data = fetch_from_resource_cache_2(address);
-    RectWithSize rendered_task_rect = RectWithSize(
-        raw_data[0].xy,
-        raw_data[0].zw
-    );
-    ImageBrushExtraData data = ImageBrushExtraData(
-        rendered_task_rect,
-        raw_data[1].xy
-    );
-    return data;
-}
-
 #ifdef WR_FEATURE_ALPHA_PASS
 vec2 transform_point_snapped(
     vec2 local_pos,
     RectWithSize local_rect,
     mat4 transform
 ) {
     vec2 snap_offset = compute_snap_offset(local_pos, transform, local_rect);
     vec4 world_pos = transform * vec4(local_pos, 0.0, 1.0);
@@ -100,67 +82,44 @@ void brush_vs(
     vec2 min_uv = min(uv0, uv1);
     vec2 max_uv = max(uv0, uv1);
 
     vUvSampleBounds = vec4(
         min_uv + vec2(0.5),
         max_uv - vec2(0.5)
     ) / texture_size.xyxy;
 
-    vec2 f;
+    vec2 f = (vi.local_pos - local_rect.p0) / local_rect.size;
 
 #ifdef WR_FEATURE_ALPHA_PASS
     int color_mode = user_data.y >> 16;
     int raster_space = user_data.y & 0xffff;
     ImageBrushData image_data = fetch_image_data(prim_address);
 
     if (color_mode == COLOR_MODE_FROM_PASS) {
         color_mode = uMode;
     }
 
     // Derive the texture coordinates for this image, based on
     // whether the source image is a local-space or screen-space
     // image.
     switch (raster_space) {
         case RASTER_SCREEN: {
-            ImageBrushExtraData extra_data = fetch_image_extra_data(user_data.z);
-
-            vec2 snapped_device_pos;
-
-            // For drop-shadows, we need to apply a local offset
-            // in order to generate the correct screen-space UV.
-            // For other effects, we can use the 1:1 mapping of
-            // the vertex device position for the UV generation.
-            switch (color_mode) {
-                case COLOR_MODE_ALPHA: {
-                    vec2 local_pos = vi.local_pos - extra_data.offset;
-                    snapped_device_pos = transform_point_snapped(
-                        local_pos,
-                        local_rect,
-                        transform
-                    );
-                    break;
-                }
-                default:
-                    snapped_device_pos = vi.snapped_device_pos;
-                    break;
-            }
-
-            f = (snapped_device_pos - extra_data.rendered_task_rect.p0) / extra_data.rendered_task_rect.size;
-
+            // Since the screen space UVs specify an arbitrary quad, do
+            // a bilinear interpolation to get the correct UV for this
+            // local position.
+            ImageResourceExtra extra_data = fetch_image_resource_extra(user_data.x);
+            vec2 x = mix(extra_data.st_tl, extra_data.st_tr, f.x);
+            vec2 y = mix(extra_data.st_bl, extra_data.st_br, f.x);
+            f = mix(x, y, f.y);
             break;
         }
-        case RASTER_LOCAL:
-        default: {
-            f = (vi.local_pos - local_rect.p0) / local_rect.size;
+        default:
             break;
-        }
     }
-#else
-    f = (vi.local_pos - local_rect.p0) / local_rect.size;
 #endif
 
     // Offset and scale vUv here to avoid doing it in the fragment shader.
     vUv.xy = mix(uv0, uv1, f) - min_uv;
     vUv.xy /= texture_size;
     vUv.xy *= repeat.xy;
 
 #ifdef WR_FEATURE_TEXTURE_RECT
--- a/gfx/webrender/res/brush_radial_gradient.glsl
+++ b/gfx/webrender/res/brush_radial_gradient.glsl
@@ -53,18 +53,16 @@ void brush_vs(
     // Transform all coordinates by the y scale so the
     // fragment shader can work with circles
     float ratio_xy = gradient.ratio_xy_extend_mode.x;
     vPos.y *= ratio_xy;
     vCenter.y *= ratio_xy;
     vRepeatedSize = local_rect.size / tile_repeat.xy;
     vRepeatedSize.y *=  ratio_xy;
 
-    vPos;
-
     vGradientAddress = user_data.x;
 
     // Whether to repeat the gradient instead of clamping.
     vGradientRepeat = float(int(gradient.ratio_xy_extend_mode.y) != EXTEND_MODE_CLAMP);
 
 #ifdef WR_FEATURE_ALPHA_PASS
     vTileRepeat = tile_repeat.xy;
     vLocalPos = vi.local_pos;
--- a/gfx/webrender/res/cs_clip_border.glsl
+++ b/gfx/webrender/res/cs_clip_border.glsl
@@ -10,16 +10,17 @@ in vec4 aDashOrDot1;
 varying vec3 vPos;
 
 flat varying vec2 vClipCenter;
 
 flat varying vec4 vPoint_Tangent0;
 flat varying vec4 vPoint_Tangent1;
 flat varying vec3 vDotParams;
 flat varying vec2 vAlphaMask;
+flat varying vec4 vTaskRect;
 
 #ifdef WR_VERTEX_SHADER
 // Matches BorderCorner enum in border.rs
 #define CORNER_TOP_LEFT     0
 #define CORNER_TOP_RIGHT    1
 #define CORNER_BOTTOM_LEFT  2
 #define CORNER_BOTTOM_RIGHT 3
 
@@ -140,19 +141,23 @@ void main(void) {
     // Transform to world pos
     vec4 world_pos = scroll_node.transform * vec4(pos, 0.0, 1.0);
     world_pos.xyz /= world_pos.w;
 
     // Scale into device pixels.
     vec2 device_pos = world_pos.xy * uDevicePixelRatio;
 
     // Position vertex within the render task area.
-    vec2 final_pos = device_pos -
-                     area.screen_origin +
-                     area.common_data.task_rect.p0;
+    vec2 task_rect_origin = area.common_data.task_rect.p0;
+    vec2 final_pos = device_pos - area.screen_origin + task_rect_origin;
+
+    // We pass the task rectangle to the fragment shader so that we can do one last clip
+    // in order to ensure that we don't draw outside the task rectangle.
+    vTaskRect.xy = task_rect_origin;
+    vTaskRect.zw = task_rect_origin + area.common_data.task_rect.size;
 
     // Calculate the local space position for this vertex.
     vec4 node_pos = get_node_pos(world_pos.xy, scroll_node);
     vPos = node_pos.xyw;
 
     gl_Position = uTransform * vec4(final_pos, 0.0, 1.0);
 }
 #endif
@@ -185,11 +190,14 @@ void main(void) {
     float d = mix(dash_distance, dot_distance, vAlphaMask.x);
 
     // Apply AA.
     d = distance_aa(aa_range, d);
 
     // Completely mask out clip if zero'ing out the rect.
     d = d * vAlphaMask.y;
 
+    // Make sure that we don't draw outside the task rectangle.
+    d = d * point_inside_rect(gl_FragCoord.xy, vTaskRect.xy, vTaskRect.zw);
+
     oFragColor = vec4(d, 0.0, 0.0, 1.0);
 }
 #endif
--- a/gfx/webrender/res/resource_cache.glsl
+++ b/gfx/webrender/res/resource_cache.glsl
@@ -1,14 +1,16 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 uniform HIGHP_SAMPLER_FLOAT sampler2D sResourceCache;
 
+#define VECS_PER_IMAGE_RESOURCE     2
+
 // TODO(gw): This is here temporarily while we have
 //           both GPU store and cache. When the GPU
 //           store code is removed, we can change the
 //           PrimitiveInstance instance structure to
 //           use 2x unsigned shorts as vertex attributes
 //           instead of an int, and encode the UV directly
 //           in the vertices.
 ivec2 get_resource_cache_uv(int address) {
@@ -108,9 +110,28 @@ ImageResource fetch_image_resource(int a
 }
 
 ImageResource fetch_image_resource_direct(ivec2 address) {
     vec4 data[2] = fetch_from_resource_cache_2_direct(address);
     RectWithEndpoint uv_rect = RectWithEndpoint(data[0].xy, data[0].zw);
     return ImageResource(uv_rect, data[1].x, data[1].yzw);
 }
 
+// Fetch optional extra data for a texture cache resource. This can contain
+// a polygon defining a UV rect within the texture cache resource.
+struct ImageResourceExtra {
+    vec2 st_tl;
+    vec2 st_tr;
+    vec2 st_bl;
+    vec2 st_br;
+};
+
+ImageResourceExtra fetch_image_resource_extra(int address) {
+    vec4 data[2] = fetch_from_resource_cache_2(address + VECS_PER_IMAGE_RESOURCE);
+    return ImageResourceExtra(
+        data[0].xy,
+        data[0].zw,
+        data[1].xy,
+        data[1].zw
+    );
+}
+
 #endif //WR_VERTEX_SHADER
--- a/gfx/webrender/src/batch.rs
+++ b/gfx/webrender/src/batch.rs
@@ -13,17 +13,16 @@ use euclid::{TypedTransform3D, vec3};
 use glyph_rasterizer::GlyphFormat;
 use gpu_cache::{GpuCache, GpuCacheAddress};
 use gpu_types::{BrushFlags, BrushInstance, ClipChainRectIndex, ClipMaskBorderCornerDotDash};
 use gpu_types::{ClipMaskInstance, ClipScrollNodeIndex, CompositePrimitiveInstance};
 use gpu_types::{PrimitiveInstance, RasterizationSpace, SimplePrimitiveInstance, ZBufferId};
 use gpu_types::ZBufferIdGenerator;
 use internal_types::{FastHashMap, SavedTargetIndex, SourceTexture};
 use picture::{PictureCompositeMode, PicturePrimitive, PictureSurface};
-use picture::{IMAGE_BRUSH_BLOCKS, IMAGE_BRUSH_EXTRA_BLOCKS};
 use plane_split::{BspSplitter, Polygon, Splitter};
 use prim_store::{CachedGradient, ImageSource, PrimitiveIndex, PrimitiveKind, PrimitiveMetadata, PrimitiveStore};
 use prim_store::{BrushPrimitive, BrushKind, DeferredResolve, EdgeAaSegmentMask, PictureIndex, PrimitiveRun};
 use render_task::{RenderTaskAddress, RenderTaskId, RenderTaskKind, RenderTaskTree};
 use renderer::{BlendMode, ImageBufferKind};
 use renderer::{BLOCKS_PER_UV_RECT, ShaderColorMode};
 use resource_cache::{CacheItem, GlyphFetchResult, ImageRequest, ResourceCache};
 use scene::FilterOpHelpers;
@@ -695,17 +694,17 @@ impl AlphaBatchBuilder {
                                                     z,
                                                     segment_index: 0,
                                                     edge_flags: EdgeAaSegmentMask::empty(),
                                                     brush_flags: BrushFlags::empty(),
                                                     user_data: [
                                                         uv_rect_address.as_int(),
                                                         (ShaderColorMode::ColorBitmap as i32) << 16 |
                                                         RasterizationSpace::Screen as i32,
-                                                        picture.extra_gpu_data_handle.as_int(gpu_cache),
+                                                        0,
                                                     ],
                                                 };
                                                 batch.push(PrimitiveInstance::from(instance));
                                                 false
                                             }
                                             None => {
                                                 true
                                             }
@@ -745,47 +744,43 @@ impl AlphaBatchBuilder {
                                             let shadow_uv_rect_address = render_tasks[cache_task_id]
                                                 .get_texture_address(gpu_cache)
                                                 .as_int();
                                             let content_uv_rect_address = render_tasks[secondary_id]
                                                 .get_texture_address(gpu_cache)
                                                 .as_int();
 
                                             // Get the GPU cache address of the extra data handle.
-                                            let extra_data_address = gpu_cache.get_address(&picture.extra_gpu_data_handle);
-                                            let shadow_prim_address = extra_data_address
-                                                .offset(IMAGE_BRUSH_EXTRA_BLOCKS);
-                                            let shadow_data_address = extra_data_address
-                                                .offset(IMAGE_BRUSH_EXTRA_BLOCKS + IMAGE_BRUSH_BLOCKS);
+                                            let shadow_prim_address = gpu_cache.get_address(&picture.extra_gpu_data_handle);
 
                                             let shadow_instance = BrushInstance {
                                                 picture_address: task_address,
                                                 prim_address: shadow_prim_address,
                                                 clip_chain_rect_index,
                                                 scroll_id,
                                                 clip_task_address,
                                                 z,
                                                 segment_index: 0,
                                                 edge_flags: EdgeAaSegmentMask::empty(),
                                                 brush_flags: BrushFlags::empty(),
                                                 user_data: [
                                                     shadow_uv_rect_address,
                                                     (ShaderColorMode::Alpha as i32) << 16 |
                                                     RasterizationSpace::Screen as i32,
-                                                    shadow_data_address.as_int(),
+                                                    0,
                                                 ],
                                             };
 
                                             let content_instance = BrushInstance {
                                                 prim_address: prim_cache_address,
                                                 user_data: [
                                                     content_uv_rect_address,
                                                     (ShaderColorMode::ColorBitmap as i32) << 16 |
                                                     RasterizationSpace::Screen as i32,
-                                                    extra_data_address.as_int(),
+                                                    0,
                                                 ],
                                                 ..shadow_instance
                                             };
 
                                             self.batch_list
                                                 .get_suitable_batch(shadow_key, &task_relative_bounding_rect)
                                                 .push(PrimitiveInstance::from(shadow_instance));
 
@@ -948,17 +943,17 @@ impl AlphaBatchBuilder {
                                     z,
                                     segment_index: 0,
                                     edge_flags: EdgeAaSegmentMask::empty(),
                                     brush_flags: BrushFlags::empty(),
                                     user_data: [
                                         uv_rect_address,
                                         (ShaderColorMode::ColorBitmap as i32) << 16 |
                                         RasterizationSpace::Screen as i32,
-                                        picture.extra_gpu_data_handle.as_int(gpu_cache),
+                                        0,
                                     ],
                                 };
                                 batch.push(PrimitiveInstance::from(instance));
                                 false
                             }
                             None => {
                                 true
                             }
--- a/gfx/webrender/src/clip_scroll_node.rs
+++ b/gfx/webrender/src/clip_scroll_node.rs
@@ -96,16 +96,19 @@ pub struct ClipScrollNode {
     /// between our reference frame and this node. For reference frames, we also include
     /// whatever local transformation this reference frame provides. This can be combined
     /// with the local_viewport_rect to get its position in world space.
     pub world_viewport_transform: LayoutToWorldFastTransform,
 
     /// World transform for content transformed by this node.
     pub world_content_transform: LayoutToWorldFastTransform,
 
+    /// The current transform kind of world_content_transform.
+    pub transform_kind: TransformedRectKind,
+
     /// Pipeline that this layer belongs to
     pub pipeline_id: PipelineId,
 
     /// Parent layer. If this is None, we are the root node.
     pub parent: Option<ClipScrollNodeIndex>,
 
     /// Child layers
     pub children: Vec<ClipScrollNodeIndex>,
@@ -137,16 +140,17 @@ impl ClipScrollNode {
         parent_index: Option<ClipScrollNodeIndex>,
         rect: &LayoutRect,
         node_type: NodeType
     ) -> Self {
         ClipScrollNode {
             local_viewport_rect: *rect,
             world_viewport_transform: LayoutToWorldFastTransform::identity(),
             world_content_transform: LayoutToWorldFastTransform::identity(),
+            transform_kind: TransformedRectKind::AxisAligned,
             parent: parent_index,
             children: Vec::new(),
             pipeline_id,
             node_type,
             invertible: true,
             coordinate_system_id: CoordinateSystemId(0),
             coordinate_system_relative_transform: LayoutFastTransform::identity(),
             node_data_index: GPUClipScrollNodeIndex(0),
@@ -280,25 +284,20 @@ impl ClipScrollNode {
         let inv_transform = match self.world_content_transform.inverse() {
             Some(inverted) => inverted.to_transform(),
             None => {
                 node_data.push(ClipScrollNodeData::invalid());
                 return;
             }
         };
 
-        let transform_kind = if self.world_content_transform.preserves_2d_axis_alignment() {
-            TransformedRectKind::AxisAligned
-        } else {
-            TransformedRectKind::Complex
-        };
         let data = ClipScrollNodeData {
             transform: self.world_content_transform.into(),
             inv_transform,
-            transform_kind: transform_kind as u32 as f32,
+            transform_kind: self.transform_kind as u32 as f32,
             padding: [0.0; 3],
         };
 
         // Write the data that will be made available to the GPU for this node.
         node_data.push(data);
     }
 
     pub fn update(
@@ -316,16 +315,22 @@ impl ClipScrollNode {
         // quit here.
         if !state.invertible {
             self.mark_uninvertible();
             return;
         }
 
         self.update_transform(state, next_coordinate_system_id, scene_properties);
 
+        self.transform_kind = if self.world_content_transform.preserves_2d_axis_alignment() {
+            TransformedRectKind::AxisAligned
+        } else {
+            TransformedRectKind::Complex
+        };
+
         // If this node is a reference frame, we check if it has a non-invertible matrix.
         // For non-reference-frames we assume that they will produce only additional
         // translations which should be invertible.
         match self.node_type {
             NodeType::ReferenceFrame(info) if !info.invertible => {
                 self.mark_uninvertible();
                 return;
             }
--- a/gfx/webrender/src/display_list_flattener.rs
+++ b/gfx/webrender/src/display_list_flattener.rs
@@ -1838,16 +1838,17 @@ impl<'a> DisplayListFlattener<'a> {
         clip_and_scroll: ScrollNodeAndClipChain,
         info: &LayoutPrimitiveInfo,
         start_point: LayoutPoint,
         end_point: LayoutPoint,
         stops: ItemRange<GradientStop>,
         stops_count: usize,
         extend_mode: ExtendMode,
         gradient_index: CachedGradientIndex,
+        stretch_size: LayoutSize,
     ) {
         // Try to ensure that if the gradient is specified in reverse, then so long as the stops
         // are also supplied in reverse that the rendered result will be equivalent. To do this,
         // a reference orientation for the gradient line must be chosen, somewhat arbitrarily, so
         // just designate the reference orientation as start < end. Aligned gradient rendering
         // manages to produce the same result regardless of orientation, so don't worry about
         // reversing in that case.
         let reverse_stops = start_point.x > end_point.x ||
@@ -1866,16 +1867,17 @@ impl<'a> DisplayListFlattener<'a> {
             BrushKind::LinearGradient {
                 stops_range: stops,
                 stops_count,
                 extend_mode,
                 reverse_stops,
                 start_point: sp,
                 end_point: ep,
                 gradient_index,
+                stretch_size,
             },
             None,
         );
 
         let prim = PrimitiveContainer::Brush(prim);
 
         self.add_primitive(clip_and_scroll, info, Vec::new(), prim);
     }
@@ -1884,76 +1886,84 @@ impl<'a> DisplayListFlattener<'a> {
         &mut self,
         clip_and_scroll: ScrollNodeAndClipChain,
         info: &LayoutPrimitiveInfo,
         start_point: LayoutPoint,
         end_point: LayoutPoint,
         stops: ItemRange<GradientStop>,
         stops_count: usize,
         extend_mode: ExtendMode,
-        tile_size: LayoutSize,
+        stretch_size: LayoutSize,
         tile_spacing: LayoutSize,
     ) {
         let gradient_index = CachedGradientIndex(self.cached_gradients.len());
         self.cached_gradients.push(CachedGradient::new());
 
-        let prim_infos = info.decompose(
-            tile_size,
-            tile_spacing,
-            64 * 64,
-        );
+        if tile_spacing != LayoutSize::zero() {
+            let prim_infos = info.decompose(
+                stretch_size,
+                tile_spacing,
+                64 * 64,
+            );
 
-        if prim_infos.is_empty() {
-            self.add_gradient_impl(
-                clip_and_scroll,
-                info,
-                start_point,
-                end_point,
-                stops,
-                stops_count,
-                extend_mode,
-                gradient_index,
-            );
-        } else {
-            for prim_info in prim_infos {
-                self.add_gradient_impl(
-                    clip_and_scroll,
-                    &prim_info,
-                    start_point,
-                    end_point,
-                    stops,
-                    stops_count,
-                    extend_mode,
-                    gradient_index,
-                );
+            if !prim_infos.is_empty() {
+                for prim_info in prim_infos {
+                    self.add_gradient_impl(
+                        clip_and_scroll,
+                        &prim_info,
+                        start_point,
+                        end_point,
+                        stops,
+                        stops_count,
+                        extend_mode,
+                        gradient_index,
+                        prim_info.rect.size,
+                    );
+                }
+
+                return;
             }
         }
+
+        self.add_gradient_impl(
+            clip_and_scroll,
+            info,
+            start_point,
+            end_point,
+            stops,
+            stops_count,
+            extend_mode,
+            gradient_index,
+            stretch_size,
+        );
     }
 
     fn add_radial_gradient_impl(
         &mut self,
         clip_and_scroll: ScrollNodeAndClipChain,
         info: &LayoutPrimitiveInfo,
         center: LayoutPoint,
         start_radius: f32,
         end_radius: f32,
         ratio_xy: f32,
         stops: ItemRange<GradientStop>,
         extend_mode: ExtendMode,
         gradient_index: CachedGradientIndex,
+        stretch_size: LayoutSize,
     ) {
         let prim = BrushPrimitive::new(
             BrushKind::RadialGradient {
                 stops_range: stops,
                 extend_mode,
                 center,
                 start_radius,
                 end_radius,
                 ratio_xy,
                 gradient_index,
+                stretch_size,
             },
             None,
         );
 
         self.add_primitive(
             clip_and_scroll,
             info,
             Vec::new(),
@@ -1966,55 +1976,61 @@ impl<'a> DisplayListFlattener<'a> {
         clip_and_scroll: ScrollNodeAndClipChain,
         info: &LayoutPrimitiveInfo,
         center: LayoutPoint,
         start_radius: f32,
         end_radius: f32,
         ratio_xy: f32,
         stops: ItemRange<GradientStop>,
         extend_mode: ExtendMode,
-        tile_size: LayoutSize,
+        stretch_size: LayoutSize,
         tile_spacing: LayoutSize,
     ) {
         let gradient_index = CachedGradientIndex(self.cached_gradients.len());
         self.cached_gradients.push(CachedGradient::new());
 
-        let prim_infos = info.decompose(
-            tile_size,
-            tile_spacing,
-            64 * 64,
-        );
+        if tile_spacing != LayoutSize::zero() {
+            let prim_infos = info.decompose(
+                stretch_size,
+                tile_spacing,
+                64 * 64,
+            );
 
-        if prim_infos.is_empty() {
-            self.add_radial_gradient_impl(
-                clip_and_scroll,
-                info,
-                center,
-                start_radius,
-                end_radius,
-                ratio_xy,
-                stops,
-                extend_mode,
-                gradient_index,
-            );
-        } else {
-            for prim_info in prim_infos {
-                self.add_radial_gradient_impl(
-                    clip_and_scroll,
-                    &prim_info,
-                    center,
-                    start_radius,
-                    end_radius,
-                    ratio_xy,
-                    stops,
-                    extend_mode,
-                    gradient_index,
-                );
+            if !prim_infos.is_empty() {
+                for prim_info in prim_infos {
+                    self.add_radial_gradient_impl(
+                        clip_and_scroll,
+                        &prim_info,
+                        center,
+                        start_radius,
+                        end_radius,
+                        ratio_xy,
+                        stops,
+                        extend_mode,
+                        gradient_index,
+                        stretch_size,
+                    );
+                }
+
+                return;
             }
         }
+
+        self.add_radial_gradient_impl(
+            clip_and_scroll,
+            info,
+            center,
+            start_radius,
+            end_radius,
+            ratio_xy,
+            stops,
+            extend_mode,
+            gradient_index,
+            stretch_size,
+        );
     }
 
     pub fn add_text(
         &mut self,
         clip_and_scroll: ScrollNodeAndClipChain,
         run_offset: LayoutVector2D,
         prim_info: &LayoutPrimitiveInfo,
         font_instance_key: &FontInstanceKey,
@@ -2132,17 +2148,16 @@ impl<'a> DisplayListFlattener<'a> {
                     (texel_rect.uv1.y - texel_rect.uv0.y) as i32,
                 ),
             )
         });
 
         // See if conditions are met to run through the new
         // image brush shader, which supports segments.
         if tile_spacing == LayoutSize::zero() &&
-           stretch_size == info.rect.size &&
            tile_offset.is_none() {
             let prim = BrushPrimitive::new(
                 BrushKind::Image {
                     request,
                     current_epoch: Epoch::invalid(),
                     alpha_type,
                     stretch_size,
                     tile_spacing,
--- a/gfx/webrender/src/frame_builder.rs
+++ b/gfx/webrender/src/frame_builder.rs
@@ -5,17 +5,17 @@
 use api::{BuiltDisplayList, ColorF, DeviceIntPoint, DeviceIntRect, DevicePixelScale};
 use api::{DeviceUintPoint, DeviceUintRect, DeviceUintSize, DocumentLayer, FontRenderMode};
 use api::{LayoutRect, LayoutSize, PipelineId, WorldPoint};
 use clip::{ClipChain, ClipStore};
 use clip_scroll_node::{ClipScrollNode};
 use clip_scroll_tree::{ClipScrollNodeIndex, ClipScrollTree};
 use display_list_flattener::{DisplayListFlattener};
 use gpu_cache::GpuCache;
-use gpu_types::{ClipChainRectIndex, ClipScrollNodeData};
+use gpu_types::{ClipChainRectIndex, ClipScrollNodeData, UvRectKind};
 use hit_test::{HitTester, HitTestingRun};
 use internal_types::{FastHashMap};
 use picture::PictureSurface;
 use prim_store::{CachedGradient, PrimitiveIndex, PrimitiveRun, PrimitiveStore};
 use profiler::{FrameProfileCounters, GpuCacheProfileCounters, TextureCacheProfileCounters};
 use render_backend::FrameId;
 use render_task::{RenderTask, RenderTaskId, RenderTaskLocation, RenderTaskTree};
 use resource_cache::{ResourceCache};
@@ -228,16 +228,17 @@ impl FrameBuilder {
         let pic = &mut self.prim_store.pictures[0];
         pic.runs = pic_context.prim_runs;
 
         let root_render_task = RenderTask::new_picture(
             RenderTaskLocation::Fixed(frame_context.screen_rect),
             PrimitiveIndex(0),
             DeviceIntPoint::zero(),
             pic_state.tasks,
+            UvRectKind::Rect,
         );
 
         let render_task_id = frame_state.render_tasks.add(root_render_task);
         pic.surface = Some(PictureSurface::RenderTask(render_task_id));
         Some(render_task_id)
     }
 
     fn update_scroll_bars(&mut self, clip_scroll_tree: &ClipScrollTree, gpu_cache: &mut GpuCache) {
--- a/gfx/webrender/src/glyph_rasterizer.rs
+++ b/gfx/webrender/src/glyph_rasterizer.rs
@@ -16,16 +16,17 @@ use api::DeviceIntSize;
 use api::{ImageData, ImageDescriptor, ImageFormat};
 use app_units::Au;
 #[cfg(not(feature = "pathfinder"))]
 use device::TextureFilter;
 #[cfg(feature = "pathfinder")]
 use euclid::{TypedPoint2D, TypedSize2D, TypedVector2D};
 use glyph_cache::{CachedGlyphInfo, GlyphCache, GlyphCacheEntry};
 use gpu_cache::GpuCache;
+use gpu_types::UvRectKind;
 use internal_types::ResourceCacheError;
 #[cfg(feature = "pathfinder")]
 use pathfinder_font_renderer;
 #[cfg(feature = "pathfinder")]
 use pathfinder_partitioner::mesh::Mesh as PathfinderMesh;
 #[cfg(feature = "pathfinder")]
 use pathfinder_path_utils::cubic_to_quadratic::CubicToQuadraticTransformer;
 use platform::font::FontContext;
@@ -795,16 +796,17 @@ impl GlyphRasterizer {
                                 offset: 0,
                             },
                             TextureFilter::Linear,
                             Some(ImageData::Raw(Arc::new(glyph.bytes))),
                             [glyph.left, -glyph.top, glyph.scale],
                             None,
                             gpu_cache,
                             Some(glyph_key_cache.eviction_notice()),
+                            UvRectKind::Rect,
                         );
                         GlyphCacheEntry::Cached(CachedGlyphInfo {
                             texture_cache_handle,
                             format: glyph.format,
                         })
                     }
                 };
                 glyph_key_cache.insert(key, glyph_info);
--- a/gfx/webrender/src/gpu_cache.rs
+++ b/gfx/webrender/src/gpu_cache.rs
@@ -146,23 +146,16 @@ impl GpuCacheAddress {
     }
 
     pub fn invalid() -> Self {
         GpuCacheAddress {
             u: u16::MAX,
             v: u16::MAX,
         }
     }
-
-    pub fn offset(&self, offset: usize) -> Self {
-        GpuCacheAddress {
-            u: self.u + offset as u16,
-            v: self.v
-        }
-    }
 }
 
 impl Add<usize> for GpuCacheAddress {
     type Output = GpuCacheAddress;
 
     fn add(self, other: usize) -> GpuCacheAddress {
         GpuCacheAddress {
             u: self.u + other as u16,
--- a/gfx/webrender/src/gpu_types.rs
+++ b/gfx/webrender/src/gpu_types.rs
@@ -272,35 +272,75 @@ impl ClipScrollNodeData {
         }
     }
 }
 
 #[derive(Copy, Debug, Clone, PartialEq)]
 #[repr(C)]
 pub struct ClipChainRectIndex(pub usize);
 
+// Texture cache resources can be either a simple rect, or define
+// a polygon within a rect by specifying a UV coordinate for each
+// corner. This is useful for rendering screen-space rasterized
+// off-screen surfaces.
 #[derive(Debug, Copy, Clone)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
-#[repr(C)]
+pub enum UvRectKind {
+    // The 2d bounds of the texture cache entry define the
+    // valid UV space for this texture cache entry.
+    Rect,
+    // The four vertices below define a quad within
+    // the texture cache entry rect. The shader can
+    // use a bilerp() to correctly interpolate a
+    // UV coord in the vertex shader.
+    Quad {
+        top_left: DevicePoint,
+        top_right: DevicePoint,
+        bottom_left: DevicePoint,
+        bottom_right: DevicePoint,
+    },
+}
+
+#[derive(Debug, Copy, Clone)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct ImageSource {
     pub p0: DevicePoint,
     pub p1: DevicePoint,
     pub texture_layer: f32,
     pub user_data: [f32; 3],
+    pub uv_rect_kind: UvRectKind,
 }
 
 impl ImageSource {
     pub fn write_gpu_blocks(&self, request: &mut GpuDataRequest) {
         request.push([
             self.p0.x,
             self.p0.y,
             self.p1.x,
             self.p1.y,
         ]);
         request.push([
             self.texture_layer,
             self.user_data[0],
             self.user_data[1],
             self.user_data[2],
         ]);
+
+        // If this is a polygon uv kind, then upload the four vertices.
+        if let UvRectKind::Quad { top_left, top_right, bottom_left, bottom_right } = self.uv_rect_kind {
+            request.push([
+                top_left.x,
+                top_left.y,
+                top_right.x,
+                top_right.y,
+            ]);
+
+            request.push([
+                bottom_left.x,
+                bottom_left.y,
+                bottom_right.x,
+                bottom_right.y,
+            ]);
+        }
     }
 }
--- a/gfx/webrender/src/picture.rs
+++ b/gfx/webrender/src/picture.rs
@@ -1,40 +1,40 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{FilterOp, MixBlendMode, PipelineId, PremultipliedColorF};
-use api::{DeviceIntRect, DeviceIntSize, LayoutRect};
-use api::{PictureIntPoint, PictureIntRect, PictureIntSize};
+use api::{DeviceRect, FilterOp, MixBlendMode, PipelineId, PremultipliedColorF};
+use api::{DeviceIntRect, DeviceIntSize, DevicePoint, LayoutPoint, LayoutRect};
+use api::{DevicePixelScale, PictureIntPoint, PictureIntRect, PictureIntSize};
 use box_shadow::{BLUR_SAMPLE_SCALE};
+use clip_scroll_node::ClipScrollNode;
 use clip_scroll_tree::ClipScrollNodeIndex;
-use frame_builder::{FrameBuildingContext, FrameBuildingState, PictureState};
+use frame_builder::{FrameBuildingContext, FrameBuildingState, PictureState, PrimitiveRunContext};
 use gpu_cache::{GpuCacheHandle};
+use gpu_types::UvRectKind;
 use prim_store::{PrimitiveIndex, PrimitiveRun, PrimitiveRunLocalRect};
 use prim_store::{PrimitiveMetadata, ScrollNodeAndClipChain};
 use render_task::{ClearMode, RenderTask, RenderTaskCacheEntryHandle};
 use render_task::{RenderTaskCacheKey, RenderTaskCacheKeyKind, RenderTaskId, RenderTaskLocation};
 use scene::{FilterOpHelpers, SceneProperties};
 use std::mem;
 use tiling::RenderTargetKind;
+use util::TransformedRectKind;
 
 /*
  A picture represents a dynamically rendered image. It consists of:
 
  * A number of primitives that are drawn onto the picture.
  * A composite operation describing how to composite this
    picture into its parent.
  * A configuration describing how to draw the primitives on
    this picture (e.g. in screen space or local space).
  */
 
-pub const IMAGE_BRUSH_EXTRA_BLOCKS: usize = 2;
-pub const IMAGE_BRUSH_BLOCKS: usize = 6;
-
 /// Specifies how this Picture should be composited
 /// onto the target it belongs to.
 #[derive(Debug, Copy, Clone, PartialEq)]
 pub enum PictureCompositeMode {
     /// Apply CSS mix-blend-mode effect.
     MixBlend(MixBlendMode),
     /// Apply a CSS filter.
     Filter(FilterOp),
@@ -122,20 +122,16 @@ pub struct PicturePrimitive {
 
     // The pipeline that the primitives on this picture belong to.
     pub pipeline_id: PipelineId,
 
     // If true, apply the local clip rect to primitive drawn
     // in this picture.
     pub apply_local_clip_rect: bool,
 
-    // The current screen-space rect of the rendered
-    // portion of this picture.
-    task_rect: DeviceIntRect,
-
     // If a mix-blend-mode, contains the render task for
     // the readback of the framebuffer that we use to sample
     // from in the mix-blend-mode shader.
     // For drop-shadow filter, this will store the original
     // picture task which would be rendered on screen after
     // blur pass.
     pub secondary_render_task_id: Option<RenderTaskId>,
     /// How this picture should be composited.
@@ -194,17 +190,16 @@ impl PicturePrimitive {
             composite_mode,
             is_in_3d_context,
             frame_output_pipeline_id,
             reference_frame_index,
             real_local_rect: LayoutRect::zero(),
             extra_gpu_data_handle: GpuCacheHandle::new(),
             apply_local_clip_rect,
             pipeline_id,
-            task_rect: DeviceIntRect::zero(),
             id,
         }
     }
 
     pub fn add_primitive(
         &mut self,
         prim_index: PrimitiveIndex,
         clip_and_scroll: ScrollNodeAndClipChain
@@ -276,32 +271,32 @@ impl PicturePrimitive {
     pub fn allow_subpixel_aa(&self) -> bool {
         self.can_draw_directly_to_parent_surface()
     }
 
     pub fn prepare_for_render_inner(
         &mut self,
         prim_index: PrimitiveIndex,
         prim_metadata: &mut PrimitiveMetadata,
+        prim_run_context: &PrimitiveRunContext,
         mut pic_state_for_children: PictureState,
         pic_state: &mut PictureState,
         frame_context: &FrameBuildingContext,
         frame_state: &mut FrameBuildingState,
-    ) -> Option<DeviceIntRect> {
+    ) {
         let prim_screen_rect = prim_metadata
                                 .screen_rect
                                 .as_ref()
                                 .expect("bug: trying to draw an off-screen picture!?");
         if self.can_draw_directly_to_parent_surface() {
             pic_state.tasks.extend(pic_state_for_children.tasks);
             self.surface = None;
-            return None;
+            return;
         }
 
-
         // TODO(gw): Almost all of the Picture types below use extra_gpu_cache_data
         //           to store the same type of data. The exception is the filter
         //           with a ColorMatrix, which stores the color matrix here. It's
         //           probably worth tidying this code up to be a bit more consistent.
         //           Perhaps store the color matrix after the common data, even though
         //           it's not used by that shader.
         match self.composite_mode {
             Some(PictureCompositeMode::Filter(FilterOp::Blur(blur_radius))) => {
@@ -317,28 +312,36 @@ impl PicturePrimitive {
                 // then intersect with the total screen rect, to minimize the
                 // allocation size.
                 let device_rect = prim_screen_rect
                     .clipped
                     .inflate(blur_range, blur_range)
                     .intersection(&prim_screen_rect.unclipped)
                     .unwrap();
 
+                let uv_rect_kind = calculate_uv_rect_kind(
+                    &prim_metadata.local_rect,
+                    &prim_run_context.scroll_node,
+                    &device_rect,
+                    frame_context.device_pixel_scale,
+                );
+
                 // If we are drawing a blur that has primitives or clips that contain
                 // a complex coordinate system, don't bother caching them (for now).
                 // It's likely that they are animating and caching may not help here
                 // anyway. In the future we should relax this a bit, so that we can
                 // cache tasks with complex coordinate systems if we detect the
                 // relevant transforms haven't changed from frame to frame.
                 let surface = if pic_state_for_children.has_non_root_coord_system {
                     let picture_task = RenderTask::new_picture(
                         RenderTaskLocation::Dynamic(None, Some(device_rect.size)),
                         prim_index,
                         device_rect.origin,
                         pic_state_for_children.tasks,
+                        uv_rect_kind,
                     );
 
                     let picture_task_id = frame_state.render_tasks.add(picture_task);
 
                     let blur_render_task = RenderTask::new_blur(
                         blur_std_deviation,
                         picture_task_id,
                         frame_state.render_tasks,
@@ -383,16 +386,17 @@ impl PicturePrimitive {
                         |render_tasks| {
                             let child_tasks = mem::replace(&mut pic_state_for_children.tasks, Vec::new());
 
                             let picture_task = RenderTask::new_picture(
                                 RenderTaskLocation::Dynamic(None, Some(device_rect.size)),
                                 prim_index,
                                 device_rect.origin,
                                 child_tasks,
+                                uv_rect_kind,
                             );
 
                             let picture_task_id = render_tasks.add(picture_task);
 
                             let blur_render_task = RenderTask::new_blur(
                                 blur_std_deviation,
                                 picture_task_id,
                                 render_tasks,
@@ -407,20 +411,18 @@ impl PicturePrimitive {
                             render_task_id
                         }
                     );
 
                     PictureSurface::TextureCache(cache_item)
                 };
 
                 self.surface = Some(surface);
-
-                Some(device_rect)
             }
-            Some(PictureCompositeMode::Filter(FilterOp::DropShadow(_, blur_radius, _))) => {
+            Some(PictureCompositeMode::Filter(FilterOp::DropShadow(offset, blur_radius, color))) => {
                 let blur_std_deviation = blur_radius * frame_context.device_pixel_scale.0;
                 let blur_range = (blur_std_deviation * BLUR_SAMPLE_SCALE).ceil() as i32;
 
                 // The clipped field is the part of the picture that is visible
                 // on screen. The unclipped field is the screen-space rect of
                 // the complete picture, if no screen / clip-chain was applied
                 // (this includes the extra space for blur region). To ensure
                 // that we draw a large enough part of the picture to get correct
@@ -428,21 +430,29 @@ impl PicturePrimitive {
                 // then intersect with the total screen rect, to minimize the
                 // allocation size.
                 let device_rect = prim_screen_rect
                     .clipped
                     .inflate(blur_range, blur_range)
                     .intersection(&prim_screen_rect.unclipped)
                     .unwrap();
 
+                let uv_rect_kind = calculate_uv_rect_kind(
+                    &prim_metadata.local_rect,
+                    &prim_run_context.scroll_node,
+                    &device_rect,
+                    frame_context.device_pixel_scale,
+                );
+
                 let mut picture_task = RenderTask::new_picture(
                     RenderTaskLocation::Dynamic(None, Some(device_rect.size)),
                     prim_index,
                     device_rect.origin,
                     pic_state_for_children.tasks,
+                    uv_rect_kind,
                 );
                 picture_task.mark_for_saving();
 
                 let picture_task_id = frame_state.render_tasks.add(picture_task);
 
                 let blur_render_task = RenderTask::new_blur(
                     blur_std_deviation.round(),
                     picture_task_id,
@@ -452,132 +462,24 @@ impl PicturePrimitive {
                 );
 
                 self.secondary_render_task_id = Some(picture_task_id);
 
                 let render_task_id = frame_state.render_tasks.add(blur_render_task);
                 pic_state.tasks.push(render_task_id);
                 self.surface = Some(PictureSurface::RenderTask(render_task_id));
 
-                Some(device_rect)
-            }
-            Some(PictureCompositeMode::MixBlend(..)) => {
-                let picture_task = RenderTask::new_picture(
-                    RenderTaskLocation::Dynamic(None, Some(prim_screen_rect.clipped.size)),
-                    prim_index,
-                    prim_screen_rect.clipped.origin,
-                    pic_state_for_children.tasks,
-                );
-
-                let readback_task_id = frame_state.render_tasks.add(
-                    RenderTask::new_readback(prim_screen_rect.clipped)
-                );
-
-                self.secondary_render_task_id = Some(readback_task_id);
-                pic_state.tasks.push(readback_task_id);
-
-                let render_task_id = frame_state.render_tasks.add(picture_task);
-                pic_state.tasks.push(render_task_id);
-                self.surface = Some(PictureSurface::RenderTask(render_task_id));
-
-                Some(prim_screen_rect.clipped)
-            }
-            Some(PictureCompositeMode::Filter(filter)) => {
-                let device_rect = match filter {
-                    FilterOp::ColorMatrix(m) => {
-                        if let Some(mut request) = frame_state.gpu_cache.request(&mut self.extra_gpu_data_handle) {
-                            for i in 0..5 {
-                                request.push([m[i*4], m[i*4+1], m[i*4+2], m[i*4+3]]);
-                            }
-                        }
-
-                        None
-                    }
-                    _ => Some(prim_screen_rect.clipped),
-                };
-
-                let picture_task = RenderTask::new_picture(
-                    RenderTaskLocation::Dynamic(None, Some(prim_screen_rect.clipped.size)),
-                    prim_index,
-                    prim_screen_rect.clipped.origin,
-                    pic_state_for_children.tasks,
-                );
-
-                let render_task_id = frame_state.render_tasks.add(picture_task);
-                pic_state.tasks.push(render_task_id);
-                self.surface = Some(PictureSurface::RenderTask(render_task_id));
-
-                device_rect
-            }
-            Some(PictureCompositeMode::Blit) | None => {
-                let picture_task = RenderTask::new_picture(
-                    RenderTaskLocation::Dynamic(None, Some(prim_screen_rect.clipped.size)),
-                    prim_index,
-                    prim_screen_rect.clipped.origin,
-                    pic_state_for_children.tasks,
-                );
-
-                let render_task_id = frame_state.render_tasks.add(picture_task);
-                pic_state.tasks.push(render_task_id);
-                self.surface = Some(PictureSurface::RenderTask(render_task_id));
-
-                Some(prim_screen_rect.clipped)
-            }
-        }
-    }
-
-    pub fn prepare_for_render(
-        &mut self,
-        prim_index: PrimitiveIndex,
-        prim_metadata: &mut PrimitiveMetadata,
-        pic_state_for_children: PictureState,
-        pic_state: &mut PictureState,
-        frame_context: &FrameBuildingContext,
-        frame_state: &mut FrameBuildingState,
-    ) {
-        let device_rect = self.prepare_for_render_inner(
-            prim_index,
-            prim_metadata,
-            pic_state_for_children,
-            pic_state,
-            frame_context,
-            frame_state,
-        );
-
-        // If this picture type uses the common / general GPU data
-        // format, then write it now.
-        if let Some(device_rect) = device_rect {
-            // If scrolling or property animation has resulted in the task
-            // rect being different than last time, invalidate the GPU
-            // cache entry for this picture to ensure that the correct
-            // task rect is provided to the image shader.
-            if self.task_rect != device_rect {
-                frame_state.gpu_cache.invalidate(&self.extra_gpu_data_handle);
-                self.task_rect = device_rect;
-            }
-
-            if let Some(mut request) = frame_state.gpu_cache.request(&mut self.extra_gpu_data_handle) {
-                // [GLSL ImageBrushExtraData: task_rect, offset]
-                request.push(self.task_rect.to_f32());
-                request.push([0.0; 4]);
-
-                // TODO(gw): It would make the shaders a bit simpler if the offset
-                //           was provided as part of the brush::picture instance,
-                //           rather than in the Picture data itself.
-                if let Some(PictureCompositeMode::Filter(FilterOp::DropShadow(offset, _, color))) = self.composite_mode {
+                if let Some(mut request) = frame_state.gpu_cache.request(&mut self.extra_gpu_data_handle) {
                     // TODO(gw): This is very hacky code below! It stores an extra
                     //           brush primitive below for the special case of a
                     //           drop-shadow where we need a different local
                     //           rect for the shadow. To tidy this up in future,
                     //           we could consider abstracting the code in prim_store.rs
                     //           that writes a brush primitive header.
 
-                    // NOTE: If any of the layout below changes, the IMAGE_BRUSH_EXTRA_BLOCKS and
-                    //       IMAGE_BRUSH_BLOCKS fields above *must* be updated.
-
                     // Basic brush primitive header is (see end of prepare_prim_for_render_inner in prim_store.rs)
                     //  local_rect
                     //  clip_rect
                     //  [brush specific data]
                     //  [segment_rect, (repetitions.xy, 0.0, 0.0)]
                     let shadow_rect = prim_metadata.local_rect.translate(&offset);
                     let shadow_clip_rect = prim_metadata.local_clip_rect.translate(&offset);
 
@@ -587,17 +489,180 @@ impl PicturePrimitive {
 
                     // ImageBrush colors
                     request.push(color.premultiplied());
                     request.push(PremultipliedColorF::WHITE);
 
                     // segment rect / repetitions
                     request.push(shadow_rect);
                     request.push([1.0, 1.0, 0.0, 0.0]);
+                }
+            }
+            Some(PictureCompositeMode::MixBlend(..)) => {
+                let uv_rect_kind = calculate_uv_rect_kind(
+                    &prim_metadata.local_rect,
+                    &prim_run_context.scroll_node,
+                    &prim_screen_rect.clipped,
+                    frame_context.device_pixel_scale,
+                );
 
-                    // Now write another GLSL ImageBrush struct, for the shadow to reference.
-                    request.push(self.task_rect.to_f32());
-                    request.push([offset.x, offset.y, 0.0, 0.0]);
+                let picture_task = RenderTask::new_picture(
+                    RenderTaskLocation::Dynamic(None, Some(prim_screen_rect.clipped.size)),
+                    prim_index,
+                    prim_screen_rect.clipped.origin,
+                    pic_state_for_children.tasks,
+                    uv_rect_kind,
+                );
+
+                let readback_task_id = frame_state.render_tasks.add(
+                    RenderTask::new_readback(prim_screen_rect.clipped)
+                );
+
+                self.secondary_render_task_id = Some(readback_task_id);
+                pic_state.tasks.push(readback_task_id);
+
+                let render_task_id = frame_state.render_tasks.add(picture_task);
+                pic_state.tasks.push(render_task_id);
+                self.surface = Some(PictureSurface::RenderTask(render_task_id));
+            }
+            Some(PictureCompositeMode::Filter(filter)) => {
+                if let FilterOp::ColorMatrix(m) = filter {
+                    if let Some(mut request) = frame_state.gpu_cache.request(&mut self.extra_gpu_data_handle) {
+                        for i in 0..5 {
+                            request.push([m[i*4], m[i*4+1], m[i*4+2], m[i*4+3]]);
+                        }
+                    }
                 }
+
+                let uv_rect_kind = calculate_uv_rect_kind(
+                    &prim_metadata.local_rect,
+                    &prim_run_context.scroll_node,
+                    &prim_screen_rect.clipped,
+                    frame_context.device_pixel_scale,
+                );
+
+                let picture_task = RenderTask::new_picture(
+                    RenderTaskLocation::Dynamic(None, Some(prim_screen_rect.clipped.size)),
+                    prim_index,
+                    prim_screen_rect.clipped.origin,
+                    pic_state_for_children.tasks,
+                    uv_rect_kind,
+                );
+
+                let render_task_id = frame_state.render_tasks.add(picture_task);
+                pic_state.tasks.push(render_task_id);
+                self.surface = Some(PictureSurface::RenderTask(render_task_id));
+            }
+            Some(PictureCompositeMode::Blit) | None => {
+                let uv_rect_kind = calculate_uv_rect_kind(
+                    &prim_metadata.local_rect,
+                    &prim_run_context.scroll_node,
+                    &prim_screen_rect.clipped,
+                    frame_context.device_pixel_scale,
+                );
+
+                let picture_task = RenderTask::new_picture(
+                    RenderTaskLocation::Dynamic(None, Some(prim_screen_rect.clipped.size)),
+                    prim_index,
+                    prim_screen_rect.clipped.origin,
+                    pic_state_for_children.tasks,
+                    uv_rect_kind,
+                );
+
+                let render_task_id = frame_state.render_tasks.add(picture_task);
+                pic_state.tasks.push(render_task_id);
+                self.surface = Some(PictureSurface::RenderTask(render_task_id));
             }
         }
     }
+
+    pub fn prepare_for_render(
+        &mut self,
+        prim_index: PrimitiveIndex,
+        prim_metadata: &mut PrimitiveMetadata,
+        prim_run_context: &PrimitiveRunContext,
+        pic_state_for_children: PictureState,
+        pic_state: &mut PictureState,
+        frame_context: &FrameBuildingContext,
+        frame_state: &mut FrameBuildingState,
+    ) {
+        self.prepare_for_render_inner(
+            prim_index,
+            prim_metadata,
+            prim_run_context,
+            pic_state_for_children,
+            pic_state,
+            frame_context,
+            frame_state,
+        );
+    }
 }
+
+// Calculate a single screen-space UV for a picture.
+fn calculate_screen_uv(
+    local_pos: &LayoutPoint,
+    clip_scroll_node: &ClipScrollNode,
+    rendered_rect: &DeviceRect,
+    device_pixel_scale: DevicePixelScale,
+) -> DevicePoint {
+    let world_pos = clip_scroll_node
+        .world_content_transform
+        .transform_point2d(local_pos);
+
+    let mut device_pos = world_pos * device_pixel_scale;
+
+    // Apply snapping for axis-aligned scroll nodes, as per prim_shared.glsl.
+    if clip_scroll_node.transform_kind == TransformedRectKind::AxisAligned {
+        device_pos.x = (device_pos.x + 0.5).floor();
+        device_pos.y = (device_pos.y + 0.5).floor();
+    }
+
+    DevicePoint::new(
+        (device_pos.x - rendered_rect.origin.x) / rendered_rect.size.width,
+        (device_pos.y - rendered_rect.origin.y) / rendered_rect.size.height,
+    )
+}
+
+// Calculate a UV rect within an image based on the screen space
+// vertex positions of a picture.
+fn calculate_uv_rect_kind(
+    local_rect: &LayoutRect,
+    clip_scroll_node: &ClipScrollNode,
+    rendered_rect: &DeviceIntRect,
+    device_pixel_scale: DevicePixelScale,
+) -> UvRectKind {
+    let rendered_rect = rendered_rect.to_f32();
+
+    let top_left = calculate_screen_uv(
+        &local_rect.origin,
+        clip_scroll_node,
+        &rendered_rect,
+        device_pixel_scale,
+    );
+
+    let top_right = calculate_screen_uv(
+        &local_rect.top_right(),
+        clip_scroll_node,
+        &rendered_rect,
+        device_pixel_scale,
+    );
+
+    let bottom_left = calculate_screen_uv(
+        &local_rect.bottom_left(),
+        clip_scroll_node,
+        &rendered_rect,
+        device_pixel_scale,
+    );
+
+    let bottom_right = calculate_screen_uv(
+        &local_rect.bottom_right(),
+        clip_scroll_node,
+        &rendered_rect,
+        device_pixel_scale,
+    );
+
+    UvRectKind::Quad {
+        top_left,
+        top_right,
+        bottom_left,
+        bottom_right,
+    }
+}
--- a/gfx/webrender/src/platform/windows/font.rs
+++ b/gfx/webrender/src/platform/windows/font.rs
@@ -126,17 +126,20 @@ impl FontContext {
     }
 
     pub fn add_native_font(&mut self, font_key: &FontKey, font_handle: dwrote::FontDescriptor) {
         if self.fonts.contains_key(font_key) {
             return;
         }
 
         let system_fc = dwrote::FontCollection::system();
-        let font = system_fc.get_font_from_descriptor(&font_handle).unwrap();
+        let font = match system_fc.get_font_from_descriptor(&font_handle) {
+            Some(font) => font,
+            None => { panic!("missing descriptor {:?}", font_handle) }
+        };
         let face = font.create_font_face();
         self.fonts.insert(*font_key, face);
     }
 
     pub fn delete_font(&mut self, font_key: &FontKey) {
         if let Some(_) = self.fonts.remove(font_key) {
             self.simulations.retain(|k, _| k.0 != *font_key);
         }
--- a/gfx/webrender/src/prim_store.rs
+++ b/gfx/webrender/src/prim_store.rs
@@ -271,25 +271,27 @@ pub enum BrushKind {
     RadialGradient {
         gradient_index: CachedGradientIndex,
         stops_range: ItemRange<GradientStop>,
         extend_mode: ExtendMode,
         center: LayoutPoint,
         start_radius: f32,
         end_radius: f32,
         ratio_xy: f32,
+        stretch_size: LayoutSize,
     },
     LinearGradient {
         gradient_index: CachedGradientIndex,
         stops_range: ItemRange<GradientStop>,
         stops_count: usize,
         extend_mode: ExtendMode,
         reverse_stops: bool,
         start_point: LayoutPoint,
         end_point: LayoutPoint,
+        stretch_size: LayoutSize,
     }
 }
 
 impl BrushKind {
     fn supports_segments(&self) -> bool {
         match *self {
             BrushKind::Solid { .. } |
             BrushKind::Image { .. } |
@@ -1636,16 +1638,17 @@ impl PrimitiveStore {
                             );
                         }
                     }
                     BrushKind::Picture { pic_index, .. } => {
                         let pic = &mut self.pictures[pic_index.0];
                         pic.prepare_for_render(
                             prim_index,
                             metadata,
+                            prim_run_context,
                             pic_state_for_children,
                             pic_state,
                             frame_context,
                             frame_state,
                         );
                     }
                     BrushKind::Solid { ref color, ref mut opacity_binding, .. } => {
                         // If the opacity changed, invalidate the GPU cache so that
@@ -1679,25 +1682,42 @@ impl PrimitiveStore {
                 }
                 PrimitiveKind::TextRun => {
                     let text = &self.cpu_text_runs[metadata.cpu_prim_index.0];
                     text.write_gpu_blocks(&mut request);
                 }
                 PrimitiveKind::Brush => {
                     let brush = &self.cpu_brushes[metadata.cpu_prim_index.0];
                     brush.write_gpu_blocks(&mut request);
+
+                    let repeat = match brush.kind {
+                        BrushKind::Image { stretch_size, .. } |
+                        BrushKind::LinearGradient { stretch_size, .. } |
+                        BrushKind::RadialGradient { stretch_size, .. } => {
+                            [
+                                metadata.local_rect.size.width / stretch_size.width,
+                                metadata.local_rect.size.height / stretch_size.height,
+                                0.0,
+                                0.0,
+                            ]
+                        }
+                        _ => {
+                            [1.0, 1.0, 0.0, 0.0]
+                        }
+                    };
+
                     match brush.segment_desc {
                         Some(ref segment_desc) => {
                             for segment in &segment_desc.segments {
                                 // has to match VECS_PER_SEGMENT
-                                request.write_segment(segment.local_rect);
+                                request.write_segment(segment.local_rect, repeat);
                             }
                         }
                         None => {
-                            request.write_segment(metadata.local_rect);
+                            request.write_segment(metadata.local_rect, repeat);
                         }
                     }
                 }
             }
         }
     }
 
     fn write_brush_segment_description(
@@ -2457,18 +2477,14 @@ impl<'a> GpuDataRequest<'a> {
     // Write the GPU cache data for an individual segment.
     // TODO(gw): The second block is currently unused. In
     //           the future, it will be used to store a
     //           UV rect, allowing segments to reference
     //           part of an image.
     fn write_segment(
         &mut self,
         local_rect: LayoutRect,
+        extra_params: [f32; 4],
     ) {
         self.push(local_rect);
-        self.push([
-            1.0,
-            1.0,
-            0.0,
-            0.0
-        ]);
+        self.push(extra_params);
     }
 }
--- a/gfx/webrender/src/render_backend.rs
+++ b/gfx/webrender/src/render_backend.rs
@@ -727,16 +727,19 @@ impl RenderBackend {
                             self.update_document(
                                 document_id,
                                 transaction_msg,
                                 &mut frame_counter,
                                 &mut profile_counters
                             );
                         }
                     },
+                    SceneBuilderResult::FlushComplete(tx) => {
+                        tx.send(()).ok();
+                    }
                     SceneBuilderResult::Stopped => {
                         panic!("We haven't sent a Stop yet, how did we get a Stopped back?");
                     }
                 }
             }
 
             keep_going = match self.api_rx.recv() {
                 Ok(msg) => {
@@ -749,16 +752,23 @@ impl RenderBackend {
             };
         }
 
         let _ = self.scene_tx.send(SceneBuilderRequest::Stop);
         // Ensure we read everything the scene builder is sending us from
         // inflight messages, otherwise the scene builder might panic.
         while let Ok(msg) = self.scene_rx.recv() {
             match msg {
+                SceneBuilderResult::FlushComplete(tx) => {
+                    // If somebody's blocked waiting for a flush, how did they
+                    // trigger the RB thread to shut down? This shouldn't happen
+                    // but handle it gracefully anyway.
+                    debug_assert!(false);
+                    tx.send(()).ok();
+                }
                 SceneBuilderResult::Stopped => break,
                 _ => continue,
             }
         }
 
         self.notifier.shut_down();
 
         if let Some(ref sampler) = self.sampler {
@@ -773,16 +783,19 @@ impl RenderBackend {
         profile_counters: &mut BackendProfileCounters,
         frame_counter: &mut u32,
     ) -> bool {
         match msg {
             ApiMsg::WakeUp => {}
             ApiMsg::WakeSceneBuilder => {
                 self.scene_tx.send(SceneBuilderRequest::WakeUp).unwrap();
             }
+            ApiMsg::FlushSceneBuilder(tx) => {
+                self.scene_tx.send(SceneBuilderRequest::Flush(tx)).unwrap();
+            }
             ApiMsg::UpdateResources(updates) => {
                 self.resource_cache
                     .update_resources(updates, &mut profile_counters.resources);
             }
             ApiMsg::GetGlyphDimensions(instance_key, glyph_keys, tx) => {
                 let mut glyph_dimensions = Vec::with_capacity(glyph_keys.len());
                 if let Some(font) = self.resource_cache.get_font_instance(instance_key) {
                     for glyph_key in &glyph_keys {
--- a/gfx/webrender/src/render_task.rs
+++ b/gfx/webrender/src/render_task.rs
@@ -9,17 +9,17 @@ use box_shadow::{BoxShadowCacheKey};
 use clip::{ClipSource, ClipStore, ClipWorkItem};
 use clip_scroll_tree::CoordinateSystemId;
 use device::TextureFilter;
 #[cfg(feature = "pathfinder")]
 use euclid::{TypedPoint2D, TypedVector2D};
 use freelist::{FreeList, FreeListHandle, WeakFreeListHandle};
 use glyph_rasterizer::GpuGlyphCacheKey;
 use gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle};
-use gpu_types::{ImageSource, RasterizationSpace};
+use gpu_types::{ImageSource, RasterizationSpace, UvRectKind};
 use internal_types::{FastHashMap, SavedTargetIndex, SourceTexture};
 #[cfg(feature = "pathfinder")]
 use pathfinder_partitioner::mesh::Mesh;
 use picture::PictureCacheKey;
 use prim_store::{PrimitiveIndex, ImageCacheKey};
 #[cfg(feature = "debugger")]
 use print_tree::{PrintTreePrinter};
 use render_backend::FrameId;
@@ -191,25 +191,27 @@ pub struct ClipRegionTask {
 
 #[derive(Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct PictureTask {
     pub prim_index: PrimitiveIndex,
     pub content_origin: DeviceIntPoint,
     pub uv_rect_handle: GpuCacheHandle,
+    uv_rect_kind: UvRectKind,
 }
 
 #[derive(Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct BlurTask {
     pub blur_std_deviation: f32,
     pub target_kind: RenderTargetKind,
     pub uv_rect_handle: GpuCacheHandle,
+    uv_rect_kind: UvRectKind,
 }
 
 impl BlurTask {
     #[cfg(feature = "debugger")]
     fn print_with<T: PrintTreePrinter>(&self, pt: &mut T) {
         pt.add_item(format!("std deviation: {}", self.blur_std_deviation));
         pt.add_item(format!("target: {:?}", self.target_kind));
     }
@@ -301,24 +303,26 @@ pub struct RenderTask {
 }
 
 impl RenderTask {
     pub fn new_picture(
         location: RenderTaskLocation,
         prim_index: PrimitiveIndex,
         content_origin: DeviceIntPoint,
         children: Vec<RenderTaskId>,
+        uv_rect_kind: UvRectKind,
     ) -> Self {
         RenderTask {
             children,
             location,
             kind: RenderTaskKind::Picture(PictureTask {
                 prim_index,
                 content_origin,
                 uv_rect_handle: GpuCacheHandle::new(),
+                uv_rect_kind,
             }),
             clear_mode: ClearMode::Transparent,
             saved_index: None,
         }
     }
 
     pub fn new_readback(screen_rect: DeviceIntRect) -> Self {
         RenderTask {
@@ -483,17 +487,20 @@ impl RenderTask {
         blur_std_deviation: f32,
         src_task_id: RenderTaskId,
         render_tasks: &mut RenderTaskTree,
         target_kind: RenderTargetKind,
         clear_mode: ClearMode,
     ) -> Self {
         // Adjust large std deviation value.
         let mut adjusted_blur_std_deviation = blur_std_deviation;
-        let blur_target_size = render_tasks[src_task_id].get_dynamic_size();
+        let (blur_target_size, uv_rect_kind) = {
+            let src_task = &render_tasks[src_task_id];
+            (src_task.get_dynamic_size(), src_task.uv_rect_kind())
+        };
         let mut adjusted_blur_target_size = blur_target_size;
         let mut downscaling_src_task_id = src_task_id;
         let mut scale_factor = 1.0;
         while adjusted_blur_std_deviation > MAX_BLUR_STD_DEVIATION {
             if adjusted_blur_target_size.width < MIN_DOWNSCALING_RT_SIZE ||
                adjusted_blur_target_size.height < MIN_DOWNSCALING_RT_SIZE {
                 break;
             }
@@ -510,30 +517,32 @@ impl RenderTask {
 
         let blur_task_v = RenderTask {
             children: vec![downscaling_src_task_id],
             location: RenderTaskLocation::Dynamic(None, Some(adjusted_blur_target_size)),
             kind: RenderTaskKind::VerticalBlur(BlurTask {
                 blur_std_deviation: adjusted_blur_std_deviation,
                 target_kind,
                 uv_rect_handle: GpuCacheHandle::new(),
+                uv_rect_kind,
             }),
             clear_mode,
             saved_index: None,
         };
 
         let blur_task_v_id = render_tasks.add(blur_task_v);
 
         RenderTask {
             children: vec![blur_task_v_id],
             location: RenderTaskLocation::Dynamic(None, Some(adjusted_blur_target_size)),
             kind: RenderTaskKind::HorizontalBlur(BlurTask {
                 blur_std_deviation: adjusted_blur_std_deviation,
                 target_kind,
                 uv_rect_handle: GpuCacheHandle::new(),
+                uv_rect_kind,
             }),
             clear_mode,
             saved_index: None,
         }
     }
 
     pub fn new_scaling(
         target_kind: RenderTargetKind,
@@ -570,16 +579,41 @@ impl RenderTask {
                 render_mode: render_mode,
                 embolden_amount: *embolden_amount,
             }),
             clear_mode: ClearMode::Transparent,
             saved_index: None,
         }
     }
 
+    fn uv_rect_kind(&self) -> UvRectKind {
+        match self.kind {
+            RenderTaskKind::CacheMask(..) |
+            RenderTaskKind::Glyph(_) |
+            RenderTaskKind::Readback(..) |
+            RenderTaskKind::Scaling(..) => {
+                unreachable!("bug: unexpected render task");
+            }
+
+            RenderTaskKind::Picture(ref task) => {
+                task.uv_rect_kind
+            }
+
+            RenderTaskKind::VerticalBlur(ref task) |
+            RenderTaskKind::HorizontalBlur(ref task) => {
+                task.uv_rect_kind
+            }
+
+            RenderTaskKind::ClipRegion(..) |
+            RenderTaskKind::Blit(..) => {
+                UvRectKind::Rect
+            }
+        }
+    }
+
     // Write (up to) 8 floats of data specific to the type
     // of render task that is provided to the GPU shaders
     // via a vertex texture.
     pub fn write_task_data(&self) -> RenderTaskData {
         // NOTE: The ordering and layout of these structures are
         //       required to match both the GPU structures declared
         //       in prim_shared.glsl, and also the uses in submit_batch()
         //       in renderer.rs.
@@ -773,40 +807,44 @@ impl RenderTask {
     }
 
     pub fn write_gpu_blocks(
         &mut self,
         gpu_cache: &mut GpuCache,
     ) {
         let (target_rect, target_index) = self.get_target_rect();
 
-        let cache_handle = match self.kind {
+        let (cache_handle, uv_rect_kind) = match self.kind {
             RenderTaskKind::HorizontalBlur(ref mut info) |
             RenderTaskKind::VerticalBlur(ref mut info) => {
-                &mut info.uv_rect_handle
+                (&mut info.uv_rect_handle, info.uv_rect_kind)
             }
             RenderTaskKind::Picture(ref mut info) => {
-                &mut info.uv_rect_handle
+                (&mut info.uv_rect_handle, info.uv_rect_kind)
             }
             RenderTaskKind::Readback(..) |
             RenderTaskKind::Scaling(..) |
             RenderTaskKind::Blit(..) |
             RenderTaskKind::ClipRegion(..) |
             RenderTaskKind::CacheMask(..) |
             RenderTaskKind::Glyph(..) => {
                 return;
             }
         };
 
         if let Some(mut request) = gpu_cache.request(cache_handle) {
+            let p0 = target_rect.origin.to_f32();
+            let p1 = target_rect.bottom_right().to_f32();
+
             let image_source = ImageSource {
-                p0: target_rect.origin.to_f32(),
-                p1: target_rect.bottom_right().to_f32(),
+                p0,
+                p1,
                 texture_layer: target_index.0 as f32,
                 user_data: [0.0; 3],
+                uv_rect_kind,
             };
             image_source.write_gpu_blocks(&mut request);
         }
     }
 
     #[cfg(feature = "debugger")]
     pub fn print_with<T: PrintTreePrinter>(&self, pt: &mut T, tree: &RenderTaskTree) -> bool {
         match self.kind {
@@ -1007,16 +1045,17 @@ impl RenderTaskCache {
                     &mut entry.handle,
                     descriptor,
                     TextureFilter::Linear,
                     None,
                     entry.user_data.unwrap_or([0.0; 3]),
                     None,
                     gpu_cache,
                     None,
+                    render_task.uv_rect_kind(),
                 );
 
                 // Get the allocation details in the texture cache, and store
                 // this in the render task. The renderer will draw this
                 // task into the appropriate layer and rect of the texture
                 // cache on this frame.
                 let (texture_id, texture_layer, uv_rect) =
                     texture_cache.get_cache_location(&entry.handle);
--- a/gfx/webrender/src/resource_cache.rs
+++ b/gfx/webrender/src/resource_cache.rs
@@ -19,16 +19,17 @@ use capture::PlainExternalImage;
 #[cfg(any(feature = "replay", feature = "png"))]
 use capture::CaptureConfig;
 use device::TextureFilter;
 use glyph_cache::GlyphCache;
 #[cfg(not(feature = "pathfinder"))]
 use glyph_cache::GlyphCacheEntry;
 use glyph_rasterizer::{FontInstance, GlyphFormat, GlyphRasterizer, GlyphRequest};
 use gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle};
+use gpu_types::UvRectKind;
 use internal_types::{FastHashMap, FastHashSet, SourceTexture, TextureUpdateList};
 use profiler::{ResourceProfileCounters, TextureCacheProfileCounters};
 use render_backend::FrameId;
 use render_task::{RenderTaskCache, RenderTaskCacheKey, RenderTaskId};
 use render_task::{RenderTaskCacheEntry, RenderTaskCacheEntryHandle, RenderTaskTree};
 use std::collections::hash_map::Entry::{self, Occupied, Vacant};
 use std::cmp;
 use std::fmt::Debug;
@@ -1052,16 +1053,17 @@ impl ResourceCache {
                 &mut entry.texture_cache_handle,
                 descriptor,
                 filter,
                 Some(image_data),
                 [0.0; 3],
                 dirty_rect,
                 gpu_cache,
                 None,
+                UvRectKind::Rect,
             );
             image_template.dirty_rect = None;
         }
     }
 
     pub fn end_frame(&mut self) {
         debug_assert_eq!(self.state, State::QueryResources);
         self.state = State::Idle;
--- a/gfx/webrender/src/scene_builder.rs
+++ b/gfx/webrender/src/scene_builder.rs
@@ -20,29 +20,31 @@ pub enum SceneBuilderRequest {
         document_id: DocumentId,
         scene: Option<SceneRequest>,
         resource_updates: ResourceUpdates,
         frame_ops: Vec<FrameMsg>,
         render: bool,
         current_epochs: FastHashMap<PipelineId, Epoch>,
     },
     WakeUp,
+    Flush(MsgSender<()>),
     Stop
 }
 
 // Message from scene builder to render backend.
 pub enum SceneBuilderResult {
     Transaction {
         document_id: DocumentId,
         built_scene: Option<BuiltScene>,
         resource_updates: ResourceUpdates,
         frame_ops: Vec<FrameMsg>,
         render: bool,
         result_tx: Sender<SceneSwapResult>,
     },
+    FlushComplete(MsgSender<()>),
     Stopped,
 }
 
 // Message from render backend to scene builder to indicate the
 // scene swap was completed. We need a separate channel for this
 // so that they don't get mixed with SceneBuilderRequest messages.
 pub enum SceneSwapResult {
     Complete,
@@ -120,16 +122,20 @@ impl SceneBuilder {
         if let Some(ref hooks) = self.hooks {
             hooks.deregister();
         }
     }
 
     fn process_message(&mut self, msg: SceneBuilderRequest) -> bool {
         match msg {
             SceneBuilderRequest::WakeUp => {}
+            SceneBuilderRequest::Flush(tx) => {
+                self.tx.send(SceneBuilderResult::FlushComplete(tx)).unwrap();
+                let _ = self.api_tx.send(ApiMsg::WakeUp);
+            }
             SceneBuilderRequest::Transaction {
                 document_id,
                 scene,
                 resource_updates,
                 frame_ops,
                 render,
                 current_epochs,
             } => {
--- a/gfx/webrender/src/texture_cache.rs
+++ b/gfx/webrender/src/texture_cache.rs
@@ -3,17 +3,17 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{DeviceUintPoint, DeviceUintRect, DeviceUintSize};
 use api::{ExternalImageType, ImageData, ImageFormat};
 use api::ImageDescriptor;
 use device::TextureFilter;
 use freelist::{FreeList, FreeListHandle, UpsertResult, WeakFreeListHandle};
 use gpu_cache::{GpuCache, GpuCacheHandle};
-use gpu_types::ImageSource;
+use gpu_types::{ImageSource, UvRectKind};
 use internal_types::{CacheTextureId, FastHashMap, TextureUpdateList, TextureUpdateSource};
 use internal_types::{RenderTargetInfo, SourceTexture, TextureUpdate, TextureUpdateOp};
 use profiler::{ResourceProfileCounter, TextureCacheProfileCounters};
 use render_backend::FrameId;
 use resource_cache::CacheItem;
 use std::cell::Cell;
 use std::cmp;
 use std::mem;
@@ -105,38 +105,42 @@ struct CacheEntry {
     uv_rect_handle: GpuCacheHandle,
     // Image format of the item.
     format: ImageFormat,
     filter: TextureFilter,
     // The actual device texture ID this is part of.
     texture_id: CacheTextureId,
     // Optional notice when the entry is evicted from the cache.
     eviction_notice: Option<EvictionNotice>,
+    // The type of UV rect this entry specifies.
+    uv_rect_kind: UvRectKind,
 }
 
 impl CacheEntry {
     // Create a new entry for a standalone texture.
     fn new_standalone(
         texture_id: CacheTextureId,
         size: DeviceUintSize,
         format: ImageFormat,
         filter: TextureFilter,
         user_data: [f32; 3],
         last_access: FrameId,
+        uv_rect_kind: UvRectKind,
     ) -> Self {
         CacheEntry {
             size,
             user_data,
             last_access,
             kind: EntryKind::Standalone,
             texture_id,
             format,
             filter,
             uv_rect_handle: GpuCacheHandle::new(),
             eviction_notice: None,
+            uv_rect_kind,
         }
     }
 
     // Update the GPU cache for this texture cache entry.
     // This ensures that the UV rect, and texture layer index
     // are up to date in the GPU cache for vertex shaders
     // to fetch from.
     fn update_gpu_cache(&mut self, gpu_cache: &mut GpuCache) {
@@ -149,16 +153,17 @@ impl CacheEntry {
                     ..
                 } => (origin, layer_index as f32),
             };
             let image_source = ImageSource {
                 p0: origin.to_f32(),
                 p1: (origin + self.size).to_f32(),
                 texture_layer: layer_index,
                 user_data: self.user_data,
+                uv_rect_kind: self.uv_rect_kind,
             };
             image_source.write_gpu_blocks(&mut request);
         }
     }
 
     fn evict(&self) {
         if let Some(eviction_notice) = self.eviction_notice.as_ref() {
             eviction_notice.notify();
@@ -389,16 +394,17 @@ impl TextureCache {
         handle: &mut TextureCacheHandle,
         descriptor: ImageDescriptor,
         filter: TextureFilter,
         data: Option<ImageData>,
         user_data: [f32; 3],
         mut dirty_rect: Option<DeviceUintRect>,
         gpu_cache: &mut GpuCache,
         eviction_notice: Option<&EvictionNotice>,
+        uv_rect_kind: UvRectKind,
     ) {
         // Determine if we need to allocate texture cache memory
         // for this item. We need to reallocate if any of the following
         // is true:
         // - Never been in the cache
         // - Has been in the cache but was evicted.
         // - Exists in the cache but dimensions / format have changed.
         let realloc = match handle.entry {
@@ -417,17 +423,23 @@ impl TextureCache {
             }
             None => {
                 // This handle has not been allocated yet.
                 true
             }
         };
 
         if realloc {
-            self.allocate(handle, descriptor, filter, user_data);
+            self.allocate(
+                handle,
+                descriptor,
+                filter,
+                user_data,
+                uv_rect_kind,
+            );
 
             // If we reallocated, we need to upload the whole item again.
             dirty_rect = None;
         }
 
         let entry = self.entries
             .get_opt_mut(handle.entry.as_ref().unwrap())
             .expect("BUG: handle must be valid now");
@@ -634,17 +646,17 @@ impl TextureCache {
         // more items being uploaded than necessary.
         // Instead, we say we will keep evicting until both of these
         // conditions are met:
         // - We have evicted some arbitrary number of items (512 currently).
         //   AND
         // - We have freed an item that will definitely allow us to
         //   fit the currently requested allocation.
         let needed_slab_size =
-            SlabSize::new(required_alloc.width, required_alloc.height).get_size();
+            SlabSize::new(required_alloc.width, required_alloc.height);
         let mut found_matching_slab = false;
         let mut freed_complete_page = false;
         let mut evicted_items = 0;
 
         for handle in eviction_candidates {
             if evicted_items > 512 && (found_matching_slab || freed_complete_page) {
                 retained_entries.push(handle);
             } else {
@@ -693,16 +705,17 @@ impl TextureCache {
     }
 
     // Attempt to allocate a block from the shared cache.
     fn allocate_from_shared_cache(
         &mut self,
         descriptor: &ImageDescriptor,
         filter: TextureFilter,
         user_data: [f32; 3],
+        uv_rect_kind: UvRectKind,
     ) -> Option<CacheEntry> {
         // Work out which cache it goes in, based on format.
         let texture_array = match (descriptor.format, filter) {
             (ImageFormat::R8, TextureFilter::Linear) => &mut self.array_a8_linear,
             (ImageFormat::BGRA8, TextureFilter::Linear) => &mut self.array_rgba8_linear,
             (ImageFormat::BGRA8, TextureFilter::Nearest) => &mut self.array_rgba8_nearest,
             (ImageFormat::RGBAF32, _) |
             (ImageFormat::R8, TextureFilter::Nearest) |
@@ -740,16 +753,17 @@ impl TextureCache {
 
         // Do the allocation. This can fail and return None
         // if there are no free slots or regions available.
         texture_array.alloc(
             descriptor.width,
             descriptor.height,
             user_data,
             self.frame_id,
+            uv_rect_kind,
         )
     }
 
     // Returns true if the given image descriptor *may* be
     // placed in the shared texture cache.
     pub fn is_allowed_in_shared_cache(
         &self,
         filter: TextureFilter,
@@ -760,36 +774,38 @@ impl TextureCache {
         // TODO(gw): For now, anything that requests nearest filtering and isn't BGRA8
         //           just fails to allocate in a texture page, and gets a standalone
         //           texture. This is probably rare enough that it can be fixed up later.
         if filter == TextureFilter::Nearest &&
            descriptor.format != ImageFormat::BGRA8 {
             allowed_in_shared_cache = false;
         }
 
-        // Anything larger than 512 goes in a standalone texture.
+        // Anything larger than TEXTURE_REGION_DIMENSIONS goes in a standalone texture.
         // TODO(gw): If we find pages that suffer from batch breaks in this
         //           case, add support for storing these in a standalone
         //           texture array.
-        if descriptor.width > 512 || descriptor.height > 512 {
+        if descriptor.width > TEXTURE_REGION_DIMENSIONS ||
+           descriptor.height > TEXTURE_REGION_DIMENSIONS {
             allowed_in_shared_cache = false;
         }
 
         allowed_in_shared_cache
     }
 
     // Allocate storage for a given image. This attempts to allocate
     // from the shared cache, but falls back to standalone texture
     // if the image is too large, or the cache is full.
     fn allocate(
         &mut self,
         handle: &mut TextureCacheHandle,
         descriptor: ImageDescriptor,
         filter: TextureFilter,
         user_data: [f32; 3],
+        uv_rect_kind: UvRectKind,
     ) {
         assert!(descriptor.width > 0 && descriptor.height > 0);
 
         // Work out if this image qualifies to go in the shared (batching) cache.
         let allowed_in_shared_cache = self.is_allowed_in_shared_cache(
             filter,
             &descriptor,
         );
@@ -798,28 +814,30 @@ impl TextureCache {
         let size = DeviceUintSize::new(descriptor.width, descriptor.height);
         let frame_id = self.frame_id;
 
         // If it's allowed in the cache, see if there is a spot for it.
         if allowed_in_shared_cache {
             new_cache_entry = self.allocate_from_shared_cache(
                 &descriptor,
                 filter,
-                user_data
+                user_data,
+                uv_rect_kind,
             );
 
             // If we failed to allocate in the shared cache, run an
             // eviction cycle, and then try to allocate again.
             if new_cache_entry.is_none() {
                 self.expire_old_shared_entries(&descriptor);
 
                 new_cache_entry = self.allocate_from_shared_cache(
                     &descriptor,
                     filter,
-                    user_data
+                    user_data,
+                    uv_rect_kind,
                 );
             }
         }
 
         // If not allowed in the cache, or if the shared cache is full, then it
         // will just have to be in a unique texture. This hurts batching but should
         // only occur on a small number of images (or pathological test cases!).
         if new_cache_entry.is_none() {
@@ -842,16 +860,17 @@ impl TextureCache {
 
             new_cache_entry = Some(CacheEntry::new_standalone(
                 texture_id,
                 size,
                 descriptor.format,
                 filter,
                 user_data,
                 frame_id,
+                uv_rect_kind,
             ));
 
             allocated_in_shared_cache = false;
         }
 
         let new_cache_entry = new_cache_entry.expect("BUG: must have allocated by now");
 
         // We need to update the texture cache handle now, so that it
@@ -895,53 +914,58 @@ impl TextureCache {
                 self.shared_entry_handles.push(new_entry_handle);
             } else {
                 self.standalone_entry_handles.push(new_entry_handle);
             }
         }
     }
 }
 
-// A list of the block sizes that a region can be initialized with.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
 #[derive(Copy, Clone, PartialEq)]
-enum SlabSize {
-    Size16x16,
-    Size32x32,
-    Size64x64,
-    Size128x128,
-    Size256x256,
-    Size512x512,
+struct SlabSize {
+    width: u32,
+    height: u32,
 }
 
 impl SlabSize {
     fn new(width: u32, height: u32) -> SlabSize {
-        // TODO(gw): Consider supporting non-square
-        //           allocator sizes here.
-        let max_dim = cmp::max(width, height);
+        let x_size = quantize_dimension(width);
+        let y_size = quantize_dimension(height);
+
+        assert!(x_size > 0 && x_size <= TEXTURE_REGION_DIMENSIONS);
+        assert!(y_size > 0 && y_size <= TEXTURE_REGION_DIMENSIONS);
 
-        match max_dim {
-            0 => unreachable!(),
-            1...16 => SlabSize::Size16x16,
-            17...32 => SlabSize::Size32x32,
-            33...64 => SlabSize::Size64x64,
-            65...128 => SlabSize::Size128x128,
-            129...256 => SlabSize::Size256x256,
-            257...512 => SlabSize::Size512x512,
-            _ => panic!("Invalid dimensions for cache!"),
+        let (width, height) = match (x_size, y_size) {
+            // Special cased rectangular slab pages.
+            (512, 256) => (512, 256),
+            (512, 128) => (512, 128),
+            (512,  64) => (512,  64),
+            (256, 512) => (256, 512),
+            (128, 512) => (128, 512),
+            ( 64, 512) => ( 64, 512),
+
+            // If none of those fit, use a square slab size.
+            (x_size, y_size) => {
+                let square_size = cmp::max(x_size, y_size);
+                (square_size, square_size)
+            }
+        };
+
+        SlabSize {
+            width,
+            height,
         }
     }
 
-    fn get_size(&self) -> u32 {
-        match *self {
-            SlabSize::Size16x16 => 16,
-            SlabSize::Size32x32 => 32,
-            SlabSize::Size64x64 => 64,
-            SlabSize::Size128x128 => 128,
-            SlabSize::Size256x256 => 256,
-            SlabSize::Size512x512 => 512,
+    fn invalid() -> SlabSize {
+        SlabSize {
+            width: 0,
+            height: 0,
         }
     }
 }
 
 // The x/y location within a texture region of an allocation.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 struct TextureLocation(u8, u8);
@@ -955,81 +979,81 @@ impl TextureLocation {
 
 // A region is a sub-rect of a texture array layer.
 // All allocations within a region are of the same size.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 struct TextureRegion {
     layer_index: i32,
     region_size: u32,
-    slab_size: u32,
+    slab_size: SlabSize,
     free_slots: Vec<TextureLocation>,
-    slots_per_axis: u32,
     total_slot_count: usize,
     origin: DeviceUintPoint,
 }
 
 impl TextureRegion {
     fn new(region_size: u32, layer_index: i32, origin: DeviceUintPoint) -> Self {
         TextureRegion {
             layer_index,
             region_size,
-            slab_size: 0,
+            slab_size: SlabSize::invalid(),
             free_slots: Vec::new(),
-            slots_per_axis: 0,
             total_slot_count: 0,
             origin,
         }
     }
 
     // Initialize a region to be an allocator for a specific slab size.
     fn init(&mut self, slab_size: SlabSize) {
-        debug_assert!(self.slab_size == 0);
+        debug_assert!(self.slab_size == SlabSize::invalid());
         debug_assert!(self.free_slots.is_empty());
 
-        self.slab_size = slab_size.get_size();
-        self.slots_per_axis = self.region_size / self.slab_size;
+        self.slab_size = slab_size;
+        let slots_per_x_axis = self.region_size / self.slab_size.width;
+        let slots_per_y_axis = self.region_size / self.slab_size.height;
 
         // Add each block to a freelist.
-        for y in 0 .. self.slots_per_axis {
-            for x in 0 .. self.slots_per_axis {
+        for y in 0 .. slots_per_y_axis {
+            for x in 0 .. slots_per_x_axis {
                 self.free_slots.push(TextureLocation::new(x, y));
             }
         }
 
         self.total_slot_count = self.free_slots.len();
     }
 
     // Deinit a region, allowing it to become a region with
     // a different allocator size.
     fn deinit(&mut self) {
-        self.slab_size = 0;
+        self.slab_size = SlabSize::invalid();
         self.free_slots.clear();
-        self.slots_per_axis = 0;
         self.total_slot_count = 0;
     }
 
     fn is_empty(&self) -> bool {
-        self.slab_size == 0
+        self.slab_size == SlabSize::invalid()
     }
 
     // Attempt to allocate a fixed size block from this region.
     fn alloc(&mut self) -> Option<DeviceUintPoint> {
+        debug_assert!(self.slab_size != SlabSize::invalid());
+
         self.free_slots.pop().map(|location| {
             DeviceUintPoint::new(
-                self.origin.x + self.slab_size * location.0 as u32,
-                self.origin.y + self.slab_size * location.1 as u32,
+                self.origin.x + self.slab_size.width * location.0 as u32,
+                self.origin.y + self.slab_size.height * location.1 as u32,
             )
         })
     }
 
     // Free a block in this region.
     fn free(&mut self, point: DeviceUintPoint) {
-        let x = (point.x - self.origin.x) / self.slab_size;
-        let y = (point.y - self.origin.y) / self.slab_size;
+        let x = (point.x - self.origin.x) / self.slab_size.width;
+        let y = (point.y - self.origin.y) / self.slab_size.height;
         self.free_slots.push(TextureLocation::new(x, y));
 
         // If this region is completely unused, deinit it
         // so that it can become a different slab size
         // as required.
         if self.free_slots.len() == self.total_slot_count {
             self.deinit();
         }
@@ -1084,16 +1108,17 @@ impl TextureArray {
 
     // Allocate space in this texture array.
     fn alloc(
         &mut self,
         width: u32,
         height: u32,
         user_data: [f32; 3],
         frame_id: FrameId,
+        uv_rect_kind: UvRectKind,
     ) -> Option<CacheEntry> {
         // Lazily allocate the regions if not already created.
         // This means that very rarely used image formats can be
         // added but won't allocate a cache if never used.
         if !self.is_allocated {
             debug_assert!(TEXTURE_LAYER_DIMENSIONS % TEXTURE_REGION_DIMENSIONS == 0);
             let regions_per_axis = TEXTURE_LAYER_DIMENSIONS / TEXTURE_REGION_DIMENSIONS;
             for layer_index in 0 .. self.layer_count {
@@ -1113,35 +1138,34 @@ impl TextureArray {
                 }
             }
             self.is_allocated = true;
         }
 
         // Quantize the size of the allocation to select a region to
         // allocate from.
         let slab_size = SlabSize::new(width, height);
-        let slab_size_dim = slab_size.get_size();
 
         // TODO(gw): For simplicity, the initial implementation just
         //           has a single vec<> of regions. We could easily
         //           make this more efficient by storing a list of
         //           regions for each slab size specifically...
 
         // Keep track of the location of an empty region,
         // in case we need to select a new empty region
         // after the loop.
         let mut empty_region_index = None;
         let mut entry_kind = None;
 
         // Run through the existing regions of this size, and see if
         // we can find a free block in any of them.
         for (i, region) in self.regions.iter_mut().enumerate() {
-            if region.slab_size == 0 {
+            if region.is_empty() {
                 empty_region_index = Some(i);
-            } else if region.slab_size == slab_size_dim {
+            } else if region.slab_size == slab_size {
                 if let Some(location) = region.alloc() {
                     entry_kind = Some(EntryKind::Cache {
                         layer_index: region.layer_index as u16,
                         region_index: i as u16,
                         origin: location,
                     });
                     break;
                 }
@@ -1169,16 +1193,17 @@ impl TextureArray {
                 user_data,
                 last_access: frame_id,
                 kind,
                 uv_rect_handle: GpuCacheHandle::new(),
                 format: self.format,
                 filter: self.filter,
                 texture_id: self.texture_id.unwrap(),
                 eviction_notice: None,
+                uv_rect_kind,
             }
         })
     }
 }
 
 impl TextureUpdate {
     // Constructs a TextureUpdate operation to be passed to the
     // rendering thread in order to do an upload to the right
@@ -1239,8 +1264,21 @@ impl TextureUpdate {
         };
 
         TextureUpdate {
             id: texture_id,
             op: update_op,
         }
     }
 }
+
+fn quantize_dimension(size: u32) -> u32 {
+    match size {
+        0 => unreachable!(),
+        1...16 => 16,
+        17...32 => 32,
+        33...64 => 64,
+        65...128 => 128,
+        129...256 => 256,
+        257...512 => 512,
+        _ => panic!("Invalid dimensions for cache!"),
+    }
+}
--- a/gfx/webrender_api/src/api.rs
+++ b/gfx/webrender_api/src/api.rs
@@ -628,16 +628,17 @@ pub enum ApiMsg {
     /// Flush from the caches anything that isn't necessary, to free some memory.
     MemoryPressure,
     /// Change debugging options.
     DebugCommand(DebugCommand),
     /// Wakes the render backend's event loop up. Needed when an event is communicated
     /// through another channel.
     WakeUp,
     WakeSceneBuilder,
+    FlushSceneBuilder(MsgSender<()>),
     ShutDown,
 }
 
 impl fmt::Debug for ApiMsg {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         f.write_str(match *self {
             ApiMsg::UpdateResources(..) => "ApiMsg::UpdateResources",
             ApiMsg::GetGlyphDimensions(..) => "ApiMsg::GetGlyphDimensions",
@@ -648,16 +649,17 @@ impl fmt::Debug for ApiMsg {
             ApiMsg::DeleteDocument(..) => "ApiMsg::DeleteDocument",
             ApiMsg::ExternalEvent(..) => "ApiMsg::ExternalEvent",
             ApiMsg::ClearNamespace(..) => "ApiMsg::ClearNamespace",
             ApiMsg::MemoryPressure => "ApiMsg::MemoryPressure",
             ApiMsg::DebugCommand(..) => "ApiMsg::DebugCommand",
             ApiMsg::ShutDown => "ApiMsg::ShutDown",
             ApiMsg::WakeUp => "ApiMsg::WakeUp",
             ApiMsg::WakeSceneBuilder => "ApiMsg::WakeSceneBuilder",
+            ApiMsg::FlushSceneBuilder(..) => "ApiMsg::FlushSceneBuilder",
         })
     }
 }
 
 #[repr(C)]
 #[derive(Clone, Copy, Debug, Deserialize, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)]
 pub struct Epoch(pub u32);
 
@@ -960,16 +962,25 @@ impl RenderApi {
         self.send_frame_msg(document_id, FrameMsg::GetScrollNodeState(tx));
         rx.recv().unwrap()
     }
 
     pub fn wake_scene_builder(&self) {
         self.send_message(ApiMsg::WakeSceneBuilder);
     }
 
+    /// Block until a round-trip to the scene builder thread has completed. This
+    /// ensures that any transactions (including ones deferred to the scene
+    /// builder thread) have been processed.
+    pub fn flush_scene_builder(&self) {
+        let (tx, rx) = channel::msg_channel().unwrap();
+        self.send_message(ApiMsg::FlushSceneBuilder(tx));
+        rx.recv().unwrap(); // block until done
+    }
+
     /// Save a capture of the current frame state for debugging.
     pub fn save_capture(&self, path: PathBuf, bits: CaptureBits) {
         let msg = ApiMsg::DebugCommand(DebugCommand::SaveCapture(path, bits));
         self.send_message(msg);
     }
 
     /// Load a capture of the current frame state for debugging.
     pub fn load_capture(&self, path: PathBuf) -> Vec<CapturedDocument> {
--- a/gfx/webrender_bindings/revision.txt
+++ b/gfx/webrender_bindings/revision.txt
@@ -1,1 +1,1 @@
-751236199b39bb8dac78522713133ca18c603fb3
+4b65822a2f7e1fed246a492f9fe193ede2f37d74