Bug 1468950 - Update webrender to commit cf98ad4d63729c678a7575eb9bce36794da5e270. r=Gankro
authorKartikaya Gupta <kgupta@mozilla.com>
Thu, 21 Jun 2018 08:15:31 -0400
changeset 477476 47c1b857617b74db7ea4d2fde8f01bc4f017fe33
parent 477475 47e6dedb51858d0b234e5efb89620d82e5583d47
child 477477 0088ae712bf5fe1a4ae4e0c7b816f1b9f68b20a4
push id9385
push userdluca@mozilla.com
push dateFri, 22 Jun 2018 15:47:18 +0000
treeherdermozilla-beta@82a9a1027e2b [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersGankro
bugs1468950
milestone62.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1468950 - Update webrender to commit cf98ad4d63729c678a7575eb9bce36794da5e270. r=Gankro MozReview-Commit-ID: GlMFdJueahi
gfx/webrender/res/cs_border_segment.glsl
gfx/webrender/res/prim_shared.glsl
gfx/webrender/res/ps_split_composite.glsl
gfx/webrender/res/ps_text_run.glsl
gfx/webrender/src/batch.rs
gfx/webrender/src/border.rs
gfx/webrender/src/gpu_types.rs
gfx/webrender/src/prim_store.rs
gfx/webrender/src/render_backend.rs
gfx/webrender/src/resource_cache.rs
gfx/webrender/src/segment.rs
gfx/webrender/src/texture_cache.rs
gfx/webrender_api/Cargo.toml
gfx/webrender_api/src/image.rs
gfx/webrender_bindings/revision.txt
gfx/wrench/src/blob.rs
--- a/gfx/webrender/res/cs_border_segment.glsl
+++ b/gfx/webrender/res/cs_border_segment.glsl
@@ -1,18 +1,20 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include shared,ellipse
 
 // For edges, the colors are the same. For corners, these
 // are the colors of each edge making up the corner.
-flat varying vec4 vColor0[2];
-flat varying vec4 vColor1[2];
+flat varying vec4 vColor00;
+flat varying vec4 vColor01;
+flat varying vec4 vColor10;
+flat varying vec4 vColor11;
 
 // A point + tangent defining the line where the edge
 // transition occurs. Used for corners only.
 flat varying vec4 vColorLine;
 
 // x = segment, y = styles, z = edge axes, w = clip mode
 flat varying ivec4 vConfig;
 
@@ -176,18 +178,22 @@ void main(void) {
         segment,
         style0 | (style1 << 16),
         edge_axis.x | (edge_axis.y << 16),
         clip_mode
     );
     vPartialWidths = vec4(aWidths / 3.0, aWidths / 2.0);
     vPos = aRect.zw * aPosition.xy;
 
-    vColor0 = get_colors_for_side(aColor0, style0);
-    vColor1 = get_colors_for_side(aColor1, style1);
+    vec4[2] color0 = get_colors_for_side(aColor0, style0);
+    vColor00 = color0[0];
+    vColor01 = color0[1];
+    vec4[2] color1 = get_colors_for_side(aColor1, style1);
+    vColor10 = color1[0];
+    vColor11 = color1[1];
     vClipCenter_Sign = vec4(outer + clip_sign * aRadii, clip_sign);
     vClipRadii = vec4(aRadii, max(aRadii - aWidths, 0.0));
     vColorLine = vec4(outer, aWidths.y * -clip_sign.y, aWidths.x * clip_sign.x);
     vEdgeReference = vec4(edge_reference, edge_reference + aWidths);
     vClipParams1 = aClipParams1;
     vClipParams2 = aClipParams2;
 
     // For the case of dot clips, optimize the number of pixels that
@@ -205,17 +211,18 @@ void main(void) {
     gl_Position = uTransform * vec4(aTaskOrigin + aRect.xy + vPos, 0.0, 1.0);
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 vec4 evaluate_color_for_style_in_corner(
     vec2 clip_relative_pos,
     int style,
-    vec4 color[2],
+    vec4 color0,
+    vec4 color1,
     vec4 clip_radii,
     float mix_factor,
     int segment,
     float aa_range
 ) {
     switch (style) {
         case BORDER_STYLE_DOUBLE: {
             // Get the distances from 0.33 of the radii, and
@@ -229,17 +236,17 @@ vec4 evaluate_color_for_style_in_corner(
             );
             float d_radii_b = distance_to_ellipse(
                 clip_relative_pos,
                 clip_radii.xy - 2.0 * vPartialWidths.xy,
                 aa_range
             );
             float d = min(-d_radii_a, d_radii_b);
             float alpha = distance_aa(aa_range, d);
-            return alpha * color[0];
+            return alpha * color0;
         }
         case BORDER_STYLE_GROOVE:
         case BORDER_STYLE_RIDGE: {
             float d = distance_to_ellipse(
                 clip_relative_pos,
                 clip_radii.xy - vPartialWidths.zw,
                 aa_range
             );
@@ -247,31 +254,32 @@ vec4 evaluate_color_for_style_in_corner(
             float swizzled_factor;
             switch (segment) {
                 case SEGMENT_TOP_LEFT: swizzled_factor = 0.0; break;
                 case SEGMENT_TOP_RIGHT: swizzled_factor = mix_factor; break;
                 case SEGMENT_BOTTOM_RIGHT: swizzled_factor = 1.0; break;
                 case SEGMENT_BOTTOM_LEFT: swizzled_factor = 1.0 - mix_factor; break;
                 default: swizzled_factor = 0.0; break;
             };
-            vec4 c0 = mix(color[1], color[0], swizzled_factor);
-            vec4 c1 = mix(color[0], color[1], swizzled_factor);
+            vec4 c0 = mix(color1, color0, swizzled_factor);
+            vec4 c1 = mix(color0, color1, swizzled_factor);
             return mix(c0, c1, alpha);
         }
         default:
             break;
     }
 
-    return color[0];
+    return color0;
 }
 
 vec4 evaluate_color_for_style_in_edge(
     vec2 pos,
     int style,
-    vec4 color[2],
+    vec4 color0,
+    vec4 color1,
     float aa_range,
     int edge_axis
 ) {
     switch (style) {
         case BORDER_STYLE_DOUBLE: {
             float d0 = -1.0;
             float d1 = -1.0;
             if (vPartialWidths[edge_axis] > 1.0) {
@@ -279,30 +287,30 @@ vec4 evaluate_color_for_style_in_edge(
                     vEdgeReference[edge_axis] + vPartialWidths[edge_axis],
                     vEdgeReference[edge_axis+2] - vPartialWidths[edge_axis]
                 );
                 d0 = pos[edge_axis] - ref.x;
                 d1 = ref.y - pos[edge_axis];
             }
             float d = min(d0, d1);
             float alpha = distance_aa(aa_range, d);
-            return alpha * color[0];
+            return alpha * color0;
         }
         case BORDER_STYLE_GROOVE:
         case BORDER_STYLE_RIDGE: {
             float ref = vEdgeReference[edge_axis] + vPartialWidths[edge_axis+2];
             float d = pos[edge_axis] - ref;
             float alpha = distance_aa(aa_range, d);
-            return mix(color[0], color[1], alpha);
+            return mix(color0, color1, alpha);
         }
         default:
             break;
     }
 
-    return color[0];
+    return color0;
 }
 
 void main(void) {
     float aa_range = compute_aa_range(vPos);
     vec4 color0, color1;
 
     int segment = vConfig.x;
     ivec2 style = ivec2(vConfig.y & 0xffff, vConfig.y >> 16);
@@ -347,43 +355,47 @@ void main(void) {
         float d_radii_a = distance_to_ellipse(clip_relative_pos, vClipRadii.xy, aa_range);
         float d_radii_b = distance_to_ellipse(clip_relative_pos, vClipRadii.zw, aa_range);
         float d_radii = max(d_radii_a, -d_radii_b);
         d = max(d, d_radii);
 
         color0 = evaluate_color_for_style_in_corner(
             clip_relative_pos,
             style.x,
-            vColor0,
+            vColor00,
+            vColor01,
             vClipRadii,
             mix_factor,
             segment,
             aa_range
         );
         color1 = evaluate_color_for_style_in_corner(
             clip_relative_pos,
             style.y,
-            vColor1,
+            vColor10,
+            vColor11,
             vClipRadii,
             mix_factor,
             segment,
             aa_range
         );
     } else {
         color0 = evaluate_color_for_style_in_edge(
             vPos,
             style.x,
-            vColor0,
+            vColor00,
+            vColor01,
             aa_range,
             edge_axis.x
         );
         color1 = evaluate_color_for_style_in_edge(
             vPos,
             style.y,
-            vColor1,
+            vColor10,
+            vColor11,
             aa_range,
             edge_axis.y
         );
     }
 
     float alpha = distance_aa(aa_range, d);
     vec4 color = mix(color0, color1, mix_factor);
     oFragColor = color * alpha;
--- a/gfx/webrender/res/prim_shared.glsl
+++ b/gfx/webrender/res/prim_shared.glsl
@@ -54,141 +54,16 @@ in ivec4 aData0;
 in ivec4 aData1;
 
 RectWithSize fetch_clip_chain_rect(int index) {
     ivec2 uv = get_fetch_uv(index, VECS_PER_LOCAL_CLIP_RECT);
     vec4 rect = TEXEL_FETCH(sLocalClipRects, uv, 0, ivec2(0, 0));
     return RectWithSize(rect.xy, rect.zw);
 }
 
-struct Glyph {
-    vec2 offset;
-};
-
-Glyph fetch_glyph(int specific_prim_address,
-                  int glyph_index) {
-    // Two glyphs are packed in each texel in the GPU cache.
-    int glyph_address = specific_prim_address +
-                        VECS_PER_TEXT_RUN +
-                        glyph_index / 2;
-    vec4 data = fetch_from_resource_cache_1(glyph_address);
-    // Select XY or ZW based on glyph index.
-    // We use "!= 0" instead of "== 1" here in order to work around a driver
-    // bug with equality comparisons on integers.
-    vec2 glyph = mix(data.xy, data.zw, bvec2(glyph_index % 2 != 0));
-
-    return Glyph(glyph);
-}
-
-struct PrimitiveInstance {
-    int prim_address;
-    int specific_prim_address;
-    int render_task_index;
-    int clip_task_index;
-    int scroll_node_id;
-    int clip_chain_rect_index;
-    int z;
-    int user_data0;
-    int user_data1;
-    int user_data2;
-};
-
-PrimitiveInstance fetch_prim_instance() {
-    PrimitiveInstance pi;
-
-    pi.prim_address = aData0.x;
-    pi.specific_prim_address = pi.prim_address + VECS_PER_PRIM_HEADER;
-    pi.render_task_index = aData0.y % 0x10000;
-    pi.clip_task_index = aData0.y / 0x10000;
-    pi.clip_chain_rect_index = aData0.z;
-    pi.scroll_node_id = aData0.w;
-    pi.z = aData1.x;
-    pi.user_data0 = aData1.y;
-    pi.user_data1 = aData1.z;
-    pi.user_data2 = aData1.w;
-
-    return pi;
-}
-
-struct CompositeInstance {
-    int render_task_index;
-    int src_task_index;
-    int backdrop_task_index;
-    int user_data0;
-    int user_data1;
-    float z;
-    int user_data2;
-    int user_data3;
-};
-
-CompositeInstance fetch_composite_instance() {
-    CompositeInstance ci;
-
-    ci.render_task_index = aData0.x;
-    ci.src_task_index = aData0.y;
-    ci.backdrop_task_index = aData0.z;
-    ci.z = float(aData0.w);
-
-    ci.user_data0 = aData1.x;
-    ci.user_data1 = aData1.y;
-    ci.user_data2 = aData1.z;
-    ci.user_data3 = aData1.w;
-
-    return ci;
-}
-
-struct Primitive {
-    ClipScrollNode scroll_node;
-    ClipArea clip_area;
-    PictureTask task;
-    RectWithSize local_rect;
-    RectWithSize local_clip_rect;
-    int specific_prim_address;
-    int user_data0;
-    int user_data1;
-    int user_data2;
-    float z;
-};
-
-struct PrimitiveGeometry {
-    RectWithSize local_rect;
-    RectWithSize local_clip_rect;
-};
-
-PrimitiveGeometry fetch_primitive_geometry(int address) {
-    vec4 geom[2] = fetch_from_resource_cache_2(address);
-    return PrimitiveGeometry(RectWithSize(geom[0].xy, geom[0].zw),
-                             RectWithSize(geom[1].xy, geom[1].zw));
-}
-
-Primitive load_primitive() {
-    PrimitiveInstance pi = fetch_prim_instance();
-
-    Primitive prim;
-
-    prim.scroll_node = fetch_clip_scroll_node(pi.scroll_node_id);
-    prim.clip_area = fetch_clip_area(pi.clip_task_index);
-    prim.task = fetch_picture_task(pi.render_task_index);
-
-    RectWithSize clip_chain_rect = fetch_clip_chain_rect(pi.clip_chain_rect_index);
-
-    PrimitiveGeometry geom = fetch_primitive_geometry(pi.prim_address);
-    prim.local_rect = geom.local_rect;
-    prim.local_clip_rect = intersect_rects(clip_chain_rect, geom.local_clip_rect);
-
-    prim.specific_prim_address = pi.specific_prim_address;
-    prim.user_data0 = pi.user_data0;
-    prim.user_data1 = pi.user_data1;
-    prim.user_data2 = pi.user_data2;
-    prim.z = float(pi.z);
-
-    return prim;
-}
-
-
 struct VertexInfo {
     vec2 local_pos;
     vec2 screen_pos;
     float w;
     vec2 snapped_device_pos;
 };
 
 VertexInfo write_vertex(RectWithSize instance_rect,
@@ -322,62 +197,16 @@ VertexInfo write_transform_vertex(RectWi
         device_pos,
         world_pos.w,
         device_pos
     );
 
     return vi;
 }
 
-VertexInfo write_transform_vertex_primitive(Primitive prim) {
-    return write_transform_vertex(
-        prim.local_rect,
-        prim.local_rect,
-        prim.local_clip_rect,
-        vec4(1.0),
-        prim.z,
-        prim.scroll_node,
-        prim.task,
-        true
-    );
-}
-
-struct GlyphResource {
-    vec4 uv_rect;
-    float layer;
-    vec2 offset;
-    float scale;
-};
-
-GlyphResource fetch_glyph_resource(int address) {
-    vec4 data[2] = fetch_from_resource_cache_2(address);
-    return GlyphResource(data[0], data[1].x, data[1].yz, data[1].w);
-}
-
-struct TextRun {
-    vec4 color;
-    vec4 bg_color;
-    vec2 offset;
-};
-
-TextRun fetch_text_run(int address) {
-    vec4 data[3] = fetch_from_resource_cache_3(address);
-    return TextRun(data[0], data[1], data[2].xy);
-}
-
-struct Image {
-    vec4 stretch_size_and_tile_spacing;  // Size of the actual image and amount of space between
-                                         //     tiled instances of this image.
-};
-
-Image fetch_image(int address) {
-    vec4 data = fetch_from_resource_cache_1(address);
-    return Image(data);
-}
-
 void write_clip(vec2 global_pos, ClipArea area) {
     vec2 uv = global_pos +
               area.common_data.task_rect.p0 -
               area.screen_origin;
     vClipMaskUvBounds = vec4(
         area.common_data.task_rect.p0,
         area.common_data.task_rect.p0 + area.common_data.task_rect.size
     );
--- a/gfx/webrender/res/ps_split_composite.glsl
+++ b/gfx/webrender/res/ps_split_composite.glsl
@@ -29,19 +29,37 @@ SplitGeometry fetch_split_geometry(int a
 }
 
 vec3 bilerp(vec3 a, vec3 b, vec3 c, vec3 d, float s, float t) {
     vec3 x = mix(a, b, t);
     vec3 y = mix(c, d, t);
     return mix(x, y, s);
 }
 
+struct SplitCompositeInstance {
+    int render_task_index;
+    int src_task_index;
+    int polygons_address;
+    float z;
+};
+
+SplitCompositeInstance fetch_composite_instance() {
+    SplitCompositeInstance ci;
+
+    ci.render_task_index = aData0.x;
+    ci.src_task_index = aData0.y;
+    ci.polygons_address = aData0.z;
+    ci.z = float(aData0.w);
+
+    return ci;
+}
+
 void main(void) {
-    CompositeInstance ci = fetch_composite_instance();
-    SplitGeometry geometry = fetch_split_geometry(ci.user_data0);
+    SplitCompositeInstance ci = fetch_composite_instance();
+    SplitGeometry geometry = fetch_split_geometry(ci.polygons_address);
     PictureTask src_task = fetch_picture_task(ci.src_task_index);
     PictureTask dest_task = fetch_picture_task(ci.render_task_index);
 
     vec2 dest_origin = dest_task.common_data.task_rect.p0 -
                        dest_task.content_origin;
 
     vec3 world_pos = bilerp(geometry.points[0], geometry.points[1],
                             geometry.points[3], geometry.points[2],
--- a/gfx/webrender/res/ps_text_run.glsl
+++ b/gfx/webrender/res/ps_text_run.glsl
@@ -10,60 +10,188 @@ flat varying vec4 vUvBorder;
 flat varying vec2 vMaskSwizzle;
 
 #ifdef WR_FEATURE_GLYPH_TRANSFORM
 varying vec4 vUvClip;
 #endif
 
 #ifdef WR_VERTEX_SHADER
 
+struct Glyph {
+    vec2 offset;
+};
+
+Glyph fetch_glyph(int specific_prim_address,
+                  int glyph_index) {
+    // Two glyphs are packed in each texel in the GPU cache.
+    int glyph_address = specific_prim_address +
+                        VECS_PER_TEXT_RUN +
+                        glyph_index / 2;
+    vec4 data = fetch_from_resource_cache_1(glyph_address);
+    // Select XY or ZW based on glyph index.
+    // We use "!= 0" instead of "== 1" here in order to work around a driver
+    // bug with equality comparisons on integers.
+    vec2 glyph = mix(data.xy, data.zw, bvec2(glyph_index % 2 != 0));
+
+    return Glyph(glyph);
+}
+
+struct GlyphResource {
+    vec4 uv_rect;
+    float layer;
+    vec2 offset;
+    float scale;
+};
+
+GlyphResource fetch_glyph_resource(int address) {
+    vec4 data[2] = fetch_from_resource_cache_2(address);
+    return GlyphResource(data[0], data[1].x, data[1].yz, data[1].w);
+}
+
+struct TextRun {
+    vec4 color;
+    vec4 bg_color;
+    vec2 offset;
+};
+
+TextRun fetch_text_run(int address) {
+    vec4 data[3] = fetch_from_resource_cache_3(address);
+    return TextRun(data[0], data[1], data[2].xy);
+}
+
+struct PrimitiveInstance {
+    int prim_address;
+    int specific_prim_address;
+    int render_task_index;
+    int clip_task_index;
+    int scroll_node_id;
+    int clip_chain_rect_index;
+    int z;
+    int user_data0;
+    int user_data1;
+    int user_data2;
+};
+
+PrimitiveInstance fetch_prim_instance() {
+    PrimitiveInstance pi;
+
+    pi.prim_address = aData0.x;
+    pi.specific_prim_address = pi.prim_address + VECS_PER_PRIM_HEADER;
+    pi.render_task_index = aData0.y % 0x10000;
+    pi.clip_task_index = aData0.y / 0x10000;
+    pi.clip_chain_rect_index = aData0.z;
+    pi.scroll_node_id = aData0.w;
+    pi.z = aData1.x;
+    pi.user_data0 = aData1.y;
+    pi.user_data1 = aData1.z;
+    pi.user_data2 = aData1.w;
+
+    return pi;
+}
+
+struct Primitive {
+    ClipScrollNode scroll_node;
+    ClipArea clip_area;
+    PictureTask task;
+    RectWithSize local_rect;
+    RectWithSize local_clip_rect;
+    int specific_prim_address;
+    int user_data0;
+    int user_data1;
+    int user_data2;
+    float z;
+};
+
+struct PrimitiveGeometry {
+    RectWithSize local_rect;
+    RectWithSize local_clip_rect;
+};
+
+PrimitiveGeometry fetch_primitive_geometry(int address) {
+    vec4 geom[2] = fetch_from_resource_cache_2(address);
+    return PrimitiveGeometry(RectWithSize(geom[0].xy, geom[0].zw),
+                             RectWithSize(geom[1].xy, geom[1].zw));
+}
+
+Primitive load_primitive() {
+    PrimitiveInstance pi = fetch_prim_instance();
+
+    Primitive prim;
+
+    prim.scroll_node = fetch_clip_scroll_node(pi.scroll_node_id);
+    prim.clip_area = fetch_clip_area(pi.clip_task_index);
+    prim.task = fetch_picture_task(pi.render_task_index);
+
+    RectWithSize clip_chain_rect = fetch_clip_chain_rect(pi.clip_chain_rect_index);
+
+    PrimitiveGeometry geom = fetch_primitive_geometry(pi.prim_address);
+    prim.local_rect = geom.local_rect;
+    prim.local_clip_rect = intersect_rects(clip_chain_rect, geom.local_clip_rect);
+
+    prim.specific_prim_address = pi.specific_prim_address;
+    prim.user_data0 = pi.user_data0;
+    prim.user_data1 = pi.user_data1;
+    prim.user_data2 = pi.user_data2;
+    prim.z = float(pi.z);
+
+    return prim;
+}
+
 VertexInfo write_text_vertex(vec2 clamped_local_pos,
                              RectWithSize local_clip_rect,
                              float z,
                              ClipScrollNode scroll_node,
                              PictureTask task,
+                             vec2 text_offset,
                              RectWithSize snap_rect,
                              vec2 snap_bias) {
-    // Ensure the transform does not contain a subpixel translation to ensure
-    // that glyph snapping is stable for equivalent glyph subpixel positions.
-#if defined(WR_FEATURE_GLYPH_TRANSFORM)
-    bool remove_subpx_offset = true;
-#else
-    bool remove_subpx_offset = scroll_node.is_axis_aligned;
-#endif
-
-    if (remove_subpx_offset) {
-        scroll_node.transform[3].xy = floor(scroll_node.transform[3].xy + 0.5);
-    }
-
     // Transform the current vertex to world space.
     vec4 world_pos = scroll_node.transform * vec4(clamped_local_pos, 0.0, 1.0);
 
     // Convert the world positions to device pixel space.
-    vec2 device_pos = world_pos.xy / world_pos.w * uDevicePixelRatio;
+    float device_scale = uDevicePixelRatio / world_pos.w;
+    vec2 device_pos = world_pos.xy * device_scale;
 
     // Apply offsets for the render task to get correct screen location.
     vec2 final_pos = device_pos -
                      task.content_origin +
                      task.common_data.task_rect.p0;
 
-#ifdef WR_FEATURE_GLYPH_TRANSFORM
-    // For transformed subpixels, we just need to align the glyph origin to a device pixel.
-    final_pos += floor(snap_rect.p0 + snap_bias) - snap_rect.p0;
+#if defined(WR_FEATURE_GLYPH_TRANSFORM)
+    bool remove_subpx_offset = true;
 #else
     // Compute the snapping offset only if the scroll node transform is axis-aligned.
-    if (scroll_node.is_axis_aligned) {
+    bool remove_subpx_offset = scroll_node.is_axis_aligned;
+#endif
+    if (remove_subpx_offset) {
+        // Ensure the transformed text offset does not contain a subpixel translation
+        // such that glyph snapping is stable for equivalent glyph subpixel positions.
+        vec2 world_text_offset = mat2(scroll_node.transform) * text_offset;
+        vec2 device_text_pos = (scroll_node.transform[3].xy + world_text_offset) * device_scale;
+        final_pos += floor(device_text_pos + 0.5) - device_text_pos;
+
+#ifdef WR_FEATURE_GLYPH_TRANSFORM
+        // For transformed subpixels, we just need to align the glyph origin to a device pixel.
+        // The transformed text offset has already been snapped, so remove it from the glyph
+        // origin when snapping the glyph.
+        vec2 snap_offset = snap_rect.p0 - world_text_offset * device_scale;
+        final_pos += floor(snap_offset + snap_bias) - snap_offset;
+#else
+        // The transformed text offset has already been snapped, so remove it from the transform
+        // when snapping the glyph.
+        mat4 snap_transform = scroll_node.transform;
+        snap_transform[3].xy = -world_text_offset;
         final_pos += compute_snap_offset(
             clamped_local_pos,
-            scroll_node.transform,
+            snap_transform,
             snap_rect,
             snap_bias
         );
+#endif
     }
-#endif
 
     gl_Position = uTransform * vec4(final_pos, z, 1.0);
 
     VertexInfo vi = VertexInfo(
         clamped_local_pos,
         device_pos,
         world_pos.w,
         final_pos
@@ -149,16 +277,17 @@ void main(void) {
             break;
     }
 
     VertexInfo vi = write_text_vertex(local_pos,
                                       prim.local_clip_rect,
                                       prim.z,
                                       prim.scroll_node,
                                       prim.task,
+                                      text.offset,
                                       glyph_rect,
                                       snap_bias);
 
 #ifdef WR_FEATURE_GLYPH_TRANSFORM
     vec2 f = (transform * vi.local_pos - glyph_rect.p0) / glyph_rect.size;
     vUvClip = vec4(f, 1.0 - f);
 #else
     vec2 f = (vi.local_pos - glyph_rect.p0) / glyph_rect.size;
--- a/gfx/webrender/src/batch.rs
+++ b/gfx/webrender/src/batch.rs
@@ -7,18 +7,18 @@ use api::{DeviceUintRect, DeviceUintPoin
 use api::{DeviceIntPoint, YuvColorSpace, YuvFormat};
 use api::{LayoutToWorldTransform, WorldPixel};
 use clip::{ClipSource, ClipStore, ClipWorkItem};
 use clip_scroll_tree::{CoordinateSystemId};
 use euclid::{TypedTransform3D, vec3};
 use glyph_rasterizer::GlyphFormat;
 use gpu_cache::{GpuCache, GpuCacheHandle, GpuCacheAddress};
 use gpu_types::{BrushFlags, BrushInstance, ClipChainRectIndex};
-use gpu_types::{ClipMaskInstance, ClipScrollNodeIndex, CompositePrimitiveInstance};
-use gpu_types::{PrimitiveInstance, RasterizationSpace, SimplePrimitiveInstance, ZBufferId};
+use gpu_types::{ClipMaskInstance, ClipScrollNodeIndex, SplitCompositeInstance};
+use gpu_types::{PrimitiveInstance, RasterizationSpace, GlyphInstance, ZBufferId};
 use gpu_types::ZBufferIdGenerator;
 use internal_types::{FastHashMap, SavedTargetIndex, SourceTexture};
 use picture::{PictureCompositeMode, PicturePrimitive, PictureSurface};
 use plane_split::{BspSplitter, Polygon, Splitter};
 use prim_store::{BrushKind, BrushPrimitive, BrushSegmentTaskId, DeferredResolve};
 use prim_store::{EdgeAaSegmentMask, ImageSource, PictureIndex, PrimitiveIndex, PrimitiveKind};
 use prim_store::{PrimitiveMetadata, PrimitiveRun, PrimitiveStore, VisibleGradientTile};
 use prim_store::{BorderSource};
@@ -515,27 +515,23 @@ impl AlphaBatchBuilder {
             let batch = self.batch_list.get_suitable_batch(key, &pic_metadata.screen_rect.as_ref().expect("bug").clipped);
 
             let source_task_id = pic
                 .surface
                 .as_ref()
                 .expect("BUG: unexpected surface in splitting")
                 .resolve_render_task_id();
             let source_task_address = render_tasks.get_task_address(source_task_id);
-            let gpu_address = gpu_handle.as_int(gpu_cache);
+            let gpu_address = gpu_cache.get_address(&gpu_handle);
 
-            let instance = CompositePrimitiveInstance::new(
+            let instance = SplitCompositeInstance::new(
                 task_address,
                 source_task_address,
-                RenderTaskAddress(0),
                 gpu_address,
-                0,
                 z_generator.next(),
-                0,
-                0,
             );
 
             batch.push(PrimitiveInstance::from(instance));
         }
     }
 
     // Helper to add an entire primitive run to a batch list.
     // TODO(gw): Restructure this so the param list isn't quite
@@ -636,24 +632,16 @@ impl AlphaBatchBuilder {
             GpuCacheAddress::invalid()
         } else {
             gpu_cache.get_address(&prim_metadata.gpu_location)
         };
 
         let clip_task_address = prim_metadata
             .clip_task_id
             .map_or(OPAQUE_TASK_ADDRESS, |id| render_tasks.get_task_address(id));
-        let base_instance = SimplePrimitiveInstance::new(
-            prim_cache_address,
-            task_address,
-            clip_task_address,
-            clip_chain_rect_index,
-            scroll_id,
-            z,
-        );
 
         let specified_blend_mode = ctx.prim_store.get_blend_mode(prim_metadata);
 
         let non_segmented_blend_mode = if !prim_metadata.opacity.is_opaque ||
             prim_metadata.clip_task_id.is_some() ||
             transform_kind == TransformedRectKind::Complex {
             specified_blend_mode
         } else {
@@ -1164,16 +1152,24 @@ impl AlphaBatchBuilder {
                                     BlendMode::PremultipliedAlpha,
                                     ShaderColorMode::ColorBitmap,
                                 )
                             }
                         };
 
                         let key = BatchKey::new(kind, blend_mode, textures);
                         let batch = batch_list.get_suitable_batch(key, &task_relative_bounding_rect);
+                        let base_instance = GlyphInstance::new(
+                            prim_cache_address,
+                            task_address,
+                            clip_task_address,
+                            clip_chain_rect_index,
+                            scroll_id,
+                            z,
+                        );
 
                         for glyph in glyphs {
                             batch.push(base_instance.build(
                                 glyph.index_in_text_run,
                                 glyph.uv_rect_address.as_int(),
                                 (subpx_dir as u32 as i32) << 16 |
                                 (color_mode as u32 as i32),
                             ));
@@ -1466,19 +1462,20 @@ impl BrushPrimitive {
                         resolve_image(
                             request,
                             resource_cache,
                             gpu_cache,
                             deferred_resolves,
                         )
                     }
                     BorderSource::Border { ref handle, .. } => {
-                        let rt_handle = handle
-                            .as_ref()
-                            .expect("bug: render task handle not allocated");
+                        let rt_handle = match *handle {
+                            Some(ref handle) => handle,
+                            None => return None,
+                        };
                         let rt_cache_entry = resource_cache
                             .get_cached_render_task(rt_handle);
                         resource_cache.get_texture_cache_item(&rt_cache_entry.handle)
                     }
                 };
 
                 if cache_item.texture_id == SourceTexture::Invalid {
                     None
--- a/gfx/webrender/src/border.rs
+++ b/gfx/webrender/src/border.rs
@@ -599,17 +599,17 @@ fn get_edge_info(
 
 impl BorderRenderTaskInfo {
     pub fn new(
         rect: &LayoutRect,
         border: &NormalBorder,
         widths: &BorderWidths,
         scale: LayoutToDeviceScale,
         brush_segments: &mut Vec<BrushSegment>,
-    ) -> Self {
+    ) -> Option<Self> {
         let mut border_segments = Vec::new();
 
         let dp_width_top = (widths.top * scale.0).ceil();
         let dp_width_bottom = (widths.bottom * scale.0).ceil();
         let dp_width_left = (widths.left * scale.0).ceil();
         let dp_width_right = (widths.right * scale.0).ceil();
 
         let dp_corner_tl = (border.radius.top_left * scale).ceil();
@@ -679,16 +679,20 @@ impl BorderRenderTaskInfo {
         );
         let inner_height = left_edge_info.device_size.max(right_edge_info.device_size).ceil();
 
         let size = DeviceSize::new(
             dp_size_tl.width.max(dp_size_bl.width) + inner_width + dp_size_tr.width.max(dp_size_br.width),
             dp_size_tl.height.max(dp_size_tr.height) + inner_height + dp_size_bl.height.max(dp_size_br.height),
         );
 
+        if size.width == 0.0 || size.height == 0.0 {
+            return None;
+        }
+
         add_edge_segment(
             LayoutRect::from_floats(
                 rect.origin.x,
                 rect.origin.y + local_size_tl.height + left_edge_info.local_offset,
                 rect.origin.x + widths.left,
                 rect.origin.y + local_size_tl.height + left_edge_info.local_offset + left_edge_info.local_size,
             ),
             DeviceRect::from_floats(
@@ -855,20 +859,20 @@ impl BorderRenderTaskInfo {
             DeviceSize::new(dp_width_left, dp_width_bottom),
             dp_corner_bl,
             BorderSegment::BottomLeft,
             EdgeAaSegmentMask::BOTTOM | EdgeAaSegmentMask::LEFT,
             &mut border_segments,
             brush_segments,
         );
 
-        BorderRenderTaskInfo {
+        Some(BorderRenderTaskInfo {
             border_segments,
             size: size.to_i32(),
-        }
+        })
     }
 
     pub fn build_instances(&self, border: &NormalBorder) -> Vec<BorderInstance> {
         let mut instances = Vec::new();
 
         for info in &self.border_segments {
             let (side0, side1, flip0, flip1) = match info.segment {
                 BorderSegment::Left => (&border.left, &border.left, false, false),
--- a/gfx/webrender/src/gpu_types.rs
+++ b/gfx/webrender/src/gpu_types.rs
@@ -138,35 +138,35 @@ pub struct ClipMaskBorderCornerDotDash {
 // 32 bytes per instance should be enough for anyone!
 #[derive(Debug, Clone)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct PrimitiveInstance {
     data: [i32; 8],
 }
 
-pub struct SimplePrimitiveInstance {
+pub struct GlyphInstance {
     pub specific_prim_address: GpuCacheAddress,
     pub task_address: RenderTaskAddress,
     pub clip_task_address: RenderTaskAddress,
     pub clip_chain_rect_index: ClipChainRectIndex,
     pub scroll_id: ClipScrollNodeIndex,
     pub z: ZBufferId,
 }
 
-impl SimplePrimitiveInstance {
+impl GlyphInstance {
     pub fn new(
         specific_prim_address: GpuCacheAddress,
         task_address: RenderTaskAddress,
         clip_task_address: RenderTaskAddress,
         clip_chain_rect_index: ClipChainRectIndex,
         scroll_id: ClipScrollNodeIndex,
         z: ZBufferId,
     ) -> Self {
-        SimplePrimitiveInstance {
+        GlyphInstance {
             specific_prim_address,
             task_address,
             clip_task_address,
             clip_chain_rect_index,
             scroll_id,
             z,
         }
     }
@@ -182,63 +182,51 @@ impl SimplePrimitiveInstance {
                 data0,
                 data1,
                 data2,
             ],
         }
     }
 }
 
-pub struct CompositePrimitiveInstance {
+pub struct SplitCompositeInstance {
     pub task_address: RenderTaskAddress,
     pub src_task_address: RenderTaskAddress,
-    pub backdrop_task_address: RenderTaskAddress,
-    pub data0: i32,
-    pub data1: i32,
+    pub polygons_address: GpuCacheAddress,
     pub z: ZBufferId,
-    pub data2: i32,
-    pub data3: i32,
 }
 
-impl CompositePrimitiveInstance {
+impl SplitCompositeInstance {
     pub fn new(
         task_address: RenderTaskAddress,
         src_task_address: RenderTaskAddress,
-        backdrop_task_address: RenderTaskAddress,
-        data0: i32,
-        data1: i32,
+        polygons_address: GpuCacheAddress,
         z: ZBufferId,
-        data2: i32,
-        data3: i32,
     ) -> Self {
-        CompositePrimitiveInstance {
+        SplitCompositeInstance {
             task_address,
             src_task_address,
-            backdrop_task_address,
-            data0,
-            data1,
+            polygons_address,
             z,
-            data2,
-            data3,
         }
     }
 }
 
-impl From<CompositePrimitiveInstance> for PrimitiveInstance {
-    fn from(instance: CompositePrimitiveInstance) -> Self {
+impl From<SplitCompositeInstance> for PrimitiveInstance {
+    fn from(instance: SplitCompositeInstance) -> Self {
         PrimitiveInstance {
             data: [
                 instance.task_address.0 as i32,
                 instance.src_task_address.0 as i32,
-                instance.backdrop_task_address.0 as i32,
+                instance.polygons_address.as_int(),
                 instance.z.0,
-                instance.data0,
-                instance.data1,
-                instance.data2,
-                instance.data3,
+                0,
+                0,
+                0,
+                0,
             ],
         }
     }
 }
 
 bitflags! {
     /// Flags that define how the common brush shader
     /// code should process this instance.
--- a/gfx/webrender/src/prim_store.rs
+++ b/gfx/webrender/src/prim_store.rs
@@ -815,18 +815,17 @@ impl TextRunPrimitiveCpu {
             let subpx_dir = font.get_subpx_dir();
             let src_glyphs = display_list.get(self.glyph_range);
 
             // TODO(gw): If we support chunks() on AuxIter
             //           in the future, this code below could
             //           be much simpler...
             let mut gpu_block = [0.0; 4];
             for (i, src) in src_glyphs.enumerate() {
-                let layout_offset = src.point + self.offset;
-                let world_offset = font.transform.transform(&layout_offset);
+                let world_offset = font.transform.transform(&src.point);
                 let device_offset = device_pixel_scale.transform_point(&world_offset);
                 let key = GlyphKey::new(src.index, device_offset, subpx_dir);
                 self.glyph_keys.push(key);
 
                 // Two glyphs are packed per GPU block.
 
                 if (i & 1) == 0 {
                     gpu_block[0] = src.point.x;
@@ -1419,49 +1418,49 @@ impl PrimitiveStore {
                 let scale = world_scale * frame_context.device_pixel_scale;
                 let scale_au = Au::from_f32_px(scale.0);
                 let needs_update = scale_au != cache_key.scale;
                 let mut new_segments = Vec::new();
 
                 if needs_update {
                     cache_key.scale = scale_au;
 
-                    *task_info = Some(BorderRenderTaskInfo::new(
+                    *task_info = BorderRenderTaskInfo::new(
                         &metadata.local_rect,
                         border,
                         widths,
                         scale,
                         &mut new_segments,
-                    ));
+                    );
                 }
 
-                let task_info = task_info.as_ref().unwrap();
+                *handle = task_info.as_ref().map(|task_info| {
+                    frame_state.resource_cache.request_render_task(
+	                    RenderTaskCacheKey {
+	                        size: DeviceIntSize::zero(),
+	                        kind: RenderTaskCacheKeyKind::Border(cache_key.clone()),
+	                    },
+	                    frame_state.gpu_cache,
+	                    frame_state.render_tasks,
+	                    None,
+	                    false,          // todo
+	                    |render_tasks| {
+	                        let task = RenderTask::new_border(
+	                            task_info.size,
+	                            task_info.build_instances(border),
+	                        );
 
-                *handle = Some(frame_state.resource_cache.request_render_task(
-                    RenderTaskCacheKey {
-                        size: DeviceIntSize::zero(),
-                        kind: RenderTaskCacheKeyKind::Border(cache_key.clone()),
-                    },
-                    frame_state.gpu_cache,
-                    frame_state.render_tasks,
-                    None,
-                    false,          // todo
-                    |render_tasks| {
-                        let task = RenderTask::new_border(
-                            task_info.size,
-                            task_info.build_instances(border),
-                        );
+	                        let task_id = render_tasks.add(task);
+
+	                        pic_state.tasks.push(task_id);
 
-                        let task_id = render_tasks.add(task);
-
-                        pic_state.tasks.push(task_id);
-
-                        task_id
-                    }
-                ));
+	                        task_id
+	                    }
+	                )
+	            });
 
                 if needs_update {
                     brush.segment_desc = Some(BrushSegmentDescriptor {
                         segments: new_segments,
                         clip_mask_kind: BrushClipMaskKind::Unknown,
                     });
 
                     // The segments have changed, so force the GPU cache to
--- a/gfx/webrender/src/render_backend.rs
+++ b/gfx/webrender/src/render_backend.rs
@@ -757,16 +757,17 @@ impl RenderBackend {
 
                         if !transaction_msg.is_empty() || ops.render {
                             self.update_document(
                                 document_id,
                                 transaction_msg,
                                 &mut frame_counter,
                                 &mut profile_counters,
                                 ops,
+                                true,
                             );
                         }
                     },
                     SceneBuilderResult::FlushComplete(tx) => {
                         tx.send(()).ok();
                     }
                     SceneBuilderResult::Stopped => {
                         panic!("We haven't sent a Stop yet, how did we get a Stopped back?");
@@ -957,30 +958,32 @@ impl RenderBackend {
             }
             ApiMsg::UpdateDocument(document_id, doc_msgs) => {
                 self.update_document(
                     document_id,
                     doc_msgs,
                     frame_counter,
                     profile_counters,
                     DocumentOps::nop(),
+                    false,
                 )
             }
         }
 
         true
     }
 
     fn update_document(
         &mut self,
         document_id: DocumentId,
         mut transaction_msg: TransactionMsg,
         frame_counter: &mut u32,
         profile_counters: &mut BackendProfileCounters,
         initial_op: DocumentOps,
+        has_built_scene: bool,
     ) {
         let mut op = initial_op;
 
         for scene_msg in transaction_msg.scene_ops.drain(..) {
             let _timer = profile_counters.total_time.timer();
             op.combine(
                 self.process_scene_msg(
                     document_id,
@@ -1068,17 +1071,17 @@ impl RenderBackend {
             // borrow ck hack for profile_counters
             let (pending_update, rendered_document) = {
                 let _timer = profile_counters.total_time.timer();
 
                 let rendered_document = doc.render(
                     &mut self.resource_cache,
                     &mut self.gpu_cache,
                     &mut profile_counters.resources,
-                    op.build,
+                    op.build || has_built_scene,
                 );
 
                 debug!("generated frame for document {:?} with {} passes",
                     document_id, rendered_document.frame.passes.len());
 
                 let msg = ResultMsg::UpdateGpuCache(self.gpu_cache.extract_updates());
                 self.result_tx.send(msg).unwrap();
 
--- a/gfx/webrender/src/resource_cache.rs
+++ b/gfx/webrender/src/resource_cache.rs
@@ -520,35 +520,35 @@ impl ResourceCache {
 
         self.resources.image_templates.insert(image_key, resource);
     }
 
     pub fn update_image_template(
         &mut self,
         image_key: ImageKey,
         descriptor: ImageDescriptor,
-        mut data: ImageData,
+        data: ImageData,
         dirty_rect: Option<DeviceUintRect>,
     ) {
         let max_texture_size = self.max_texture_size();
         let image = match self.resources.image_templates.get_mut(image_key) {
             Some(res) => res,
             None => panic!("Attempt to update non-existent image"),
         };
 
         let mut tiling = image.tiling;
         if tiling.is_none() && Self::should_tile(max_texture_size, &descriptor, &data) {
             tiling = Some(DEFAULT_TILE_SIZE);
         }
 
-        if let ImageData::Blob(ref mut blob) = data {
+        if let ImageData::Blob(ref blob) = data {
             self.blob_image_renderer
                 .as_mut()
                 .unwrap()
-                .update(image_key, Arc::clone(&blob), dirty_rect);
+                .update(image_key, Arc::clone(blob), dirty_rect);
         }
 
         *image = ImageResource {
             descriptor,
             data,
             epoch: Epoch(image.epoch.0 + 1),
             tiling,
             dirty_rect: match (dirty_rect, image.dirty_rect) {
@@ -640,43 +640,43 @@ impl ResourceCache {
         } else {
             return
         };
 
         // We can start a worker thread rasterizing right now, if:
         //  - The image is a blob.
         //  - The blob hasn't already been requested this frame.
         if self.pending_image_requests.insert(request) && template.data.is_blob() {
-            if let Some(ref mut renderer) = self.blob_image_renderer {
-                let (offset, size) = match template.tiling {
-                    Some(tile_size) => {
-                        let tile_offset = request.tile.unwrap();
-                        let actual_size = compute_tile_size(
-                            &template.descriptor,
-                            tile_size,
-                            tile_offset,
-                        );
-                        let offset = DevicePoint::new(
-                            tile_offset.x as f32 * tile_size as f32,
-                            tile_offset.y as f32 * tile_size as f32,
-                        );
+            let (offset, size) = match template.tiling {
+                Some(tile_size) => {
+                    let tile_offset = request.tile.unwrap();
+                    let actual_size = compute_tile_size(
+                        &template.descriptor,
+                        tile_size,
+                        tile_offset,
+                    );
 
-                        if let Some(dirty) = dirty_rect {
-                            if intersect_for_tile(dirty, actual_size, tile_size, tile_offset).is_none() {
-                                // don't bother requesting unchanged tiles
-                                self.pending_image_requests.remove(&request);
-                                return
-                            }
+                    if let Some(dirty) = dirty_rect {
+                        if intersect_for_tile(dirty, actual_size, tile_size, tile_offset).is_none() {
+                            // don't bother requesting unchanged tiles
+                            self.pending_image_requests.remove(&request);
+                            return
                         }
+                    }
 
-                        (offset, actual_size)
-                    }
-                    None => (DevicePoint::zero(), template.descriptor.size),
-                };
+                    let offset = DevicePoint::new(
+                        tile_offset.x as f32 * tile_size as f32,
+                        tile_offset.y as f32 * tile_size as f32,
+                    );
+                    (offset, actual_size)
+                }
+                None => (DevicePoint::zero(), template.descriptor.size),
+            };
 
+            if let Some(ref mut renderer) = self.blob_image_renderer {
                 renderer.request(
                     &self.resources,
                     request.into(),
                     &BlobImageDescriptor {
                         size,
                         offset,
                         format: template.descriptor.format,
                     },
@@ -921,17 +921,16 @@ impl ResourceCache {
         );
         self.texture_cache.end_frame(texture_cache_profile);
     }
 
     fn update_texture_cache(&mut self, gpu_cache: &mut GpuCache) {
         for request in self.pending_image_requests.drain() {
             let image_template = self.resources.image_templates.get_mut(request.key).unwrap();
             debug_assert!(image_template.data.uses_texture_cache());
-            let mut dirty_rect = image_template.dirty_rect;
 
             let image_data = match image_template.data {
                 ImageData::Raw(..) | ImageData::External(..) => {
                     // Safe to clone here since the Raw image data is an
                     // Arc, and the external image data is small.
                     image_template.data.clone()
                 }
                 ImageData::Blob(..) => {
@@ -957,54 +956,51 @@ impl ResourceCache {
                         }
                         Err(BlobImageError::Other(msg)) => {
                             panic!("Vector image error {}", msg);
                         }
                     }
                 }
             };
 
-            let descriptor = if let Some(tile) = request.tile {
-                let tile_size = image_template.tiling.unwrap();
-                let image_descriptor = &image_template.descriptor;
+            let entry = self.cached_images.get_mut(&request).as_mut().unwrap();
+            let mut descriptor = image_template.descriptor.clone();
+            //TODO: erasing the dirty rectangle here is incorrect for tiled images,
+            // since other tile requests may follow that depend on it
+            let mut local_dirty_rect = image_template.dirty_rect.take();
 
-                let clipped_tile_size = compute_tile_size(image_descriptor, tile_size, tile);
+            if let Some(tile) = request.tile {
+                let tile_size = image_template.tiling.unwrap();
+                let clipped_tile_size = compute_tile_size(&descriptor, tile_size, tile);
 
-                if let Some(dirty) = dirty_rect {
-                    dirty_rect = intersect_for_tile(dirty, clipped_tile_size, tile_size, tile);
-                    if dirty_rect.is_none() {
-                        continue
+                if let Some(ref mut rect) = local_dirty_rect {
+                    match intersect_for_tile(*rect, clipped_tile_size, tile_size, tile) {
+                        Some(intersection) => *rect = intersection,
+                        None => {
+                            // if re-uploaded, the dirty rect is ignored anyway
+                            debug_assert!(self.texture_cache.needs_upload(&entry.texture_cache_handle))
+                        }
                     }
                 }
 
                 // The tiled image could be stored on the CPU as one large image or be
                 // already broken up into tiles. This affects the way we compute the stride
                 // and offset.
                 let tiled_on_cpu = image_template.data.is_blob();
-
-                let (stride, offset) = if tiled_on_cpu {
-                    (image_descriptor.stride, 0)
-                } else {
-                    let bpp = image_descriptor.format.bytes_per_pixel();
-                    let stride = image_descriptor.compute_stride();
-                    let offset = image_descriptor.offset +
+                if !tiled_on_cpu {
+                    let bpp = descriptor.format.bytes_per_pixel();
+                    let stride = descriptor.compute_stride();
+                    descriptor.stride = Some(stride);
+                    descriptor.offset +=
                         tile.y as u32 * tile_size as u32 * stride +
                         tile.x as u32 * tile_size as u32 * bpp;
-                    (Some(stride), offset)
-                };
+                }
 
-                ImageDescriptor {
-                    size: clipped_tile_size,
-                    stride,
-                    offset,
-                    ..*image_descriptor
-                }
-            } else {
-                image_template.descriptor.clone()
-            };
+                descriptor.size = clipped_tile_size;
+            }
 
             let filter = match request.rendering {
                 ImageRendering::Pixelated => {
                     TextureFilter::Nearest
                 }
                 ImageRendering::Auto | ImageRendering::CrispEdges => {
                     // If the texture uses linear filtering, enable mipmaps and
                     // trilinear filtering, for better image quality. We only
@@ -1022,29 +1018,28 @@ impl ResourceCache {
                     ) {
                         TextureFilter::Trilinear
                     } else {
                         TextureFilter::Linear
                     }
                 }
             };
 
-            let entry = self.cached_images.get_mut(&request).as_mut().unwrap();
+            //Note: at this point, the dirty rectangle is local to the descriptor space
             self.texture_cache.update(
                 &mut entry.texture_cache_handle,
                 descriptor,
                 filter,
                 Some(image_data),
                 [0.0; 3],
-                dirty_rect,
+                local_dirty_rect,
                 gpu_cache,
                 None,
                 UvRectKind::Rect,
             );
-            image_template.dirty_rect = None;
         }
     }
 
     pub fn end_frame(&mut self) {
         debug_assert_eq!(self.state, State::QueryResources);
         self.state = State::Idle;
     }
 
--- a/gfx/webrender/src/segment.rs
+++ b/gfx/webrender/src/segment.rs
@@ -1,17 +1,17 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BorderRadius, ClipMode, LayoutPoint, LayoutPointAu, LayoutRect, LayoutSize};
 use app_units::Au;
 use prim_store::EdgeAaSegmentMask;
 use std::{cmp, usize};
-use util::extract_inner_rect_safe;
+use util::{extract_inner_rect_safe, RectHelpers};
 
 bitflags! {
     pub struct ItemFlags: u8 {
         const X_ACTIVE = 0x1;
         const Y_ACTIVE = 0x2;
         const HAS_MASK = 0x4;
     }
 }
@@ -205,73 +205,81 @@ impl SegmentBuilder {
     // the pixels of a clip-mask. It might be useful for other types
     // such as dashed and dotted borders in the future.
     pub fn push_mask_region(
         &mut self,
         outer_rect: LayoutRect,
         inner_rect: LayoutRect,
         inner_clip_mode: Option<ClipMode>,
     ) {
-        debug_assert!(outer_rect.contains_rect(&inner_rect));
+        if inner_rect.is_well_formed_and_nonempty() {
+            debug_assert!(outer_rect.contains_rect(&inner_rect));
 
-        let p0 = outer_rect.origin;
-        let p1 = inner_rect.origin;
-        let p2 = inner_rect.bottom_right();
-        let p3 = outer_rect.bottom_right();
+            let p0 = outer_rect.origin;
+            let p1 = inner_rect.origin;
+            let p2 = inner_rect.bottom_right();
+            let p3 = outer_rect.bottom_right();
 
-        let segments = &[
-            LayoutRect::new(
-                LayoutPoint::new(p0.x, p0.y),
-                LayoutSize::new(p1.x - p0.x, p1.y - p0.y),
-            ),
-            LayoutRect::new(
-                LayoutPoint::new(p2.x, p0.y),
-                LayoutSize::new(p3.x - p2.x, p1.y - p0.y),
-            ),
-            LayoutRect::new(
-                LayoutPoint::new(p2.x, p2.y),
-                LayoutSize::new(p3.x - p2.x, p3.y - p2.y),
-            ),
-            LayoutRect::new(
-                LayoutPoint::new(p0.x, p2.y),
-                LayoutSize::new(p1.x - p0.x, p3.y - p2.y),
-            ),
-            LayoutRect::new(
-                LayoutPoint::new(p1.x, p0.y),
-                LayoutSize::new(p2.x - p1.x, p1.y - p0.y),
-            ),
-            LayoutRect::new(
-                LayoutPoint::new(p2.x, p1.y),
-                LayoutSize::new(p3.x - p2.x, p2.y - p1.y),
-            ),
-            LayoutRect::new(
-                LayoutPoint::new(p1.x, p2.y),
-                LayoutSize::new(p2.x - p1.x, p3.y - p2.y),
-            ),
-            LayoutRect::new(
-                LayoutPoint::new(p0.x, p1.y),
-                LayoutSize::new(p1.x - p0.x, p2.y - p1.y),
-            ),
-        ];
+            let segments = &[
+                LayoutRect::new(
+                    LayoutPoint::new(p0.x, p0.y),
+                    LayoutSize::new(p1.x - p0.x, p1.y - p0.y),
+                ),
+                LayoutRect::new(
+                    LayoutPoint::new(p2.x, p0.y),
+                    LayoutSize::new(p3.x - p2.x, p1.y - p0.y),
+                ),
+                LayoutRect::new(
+                    LayoutPoint::new(p2.x, p2.y),
+                    LayoutSize::new(p3.x - p2.x, p3.y - p2.y),
+                ),
+                LayoutRect::new(
+                    LayoutPoint::new(p0.x, p2.y),
+                    LayoutSize::new(p1.x - p0.x, p3.y - p2.y),
+                ),
+                LayoutRect::new(
+                    LayoutPoint::new(p1.x, p0.y),
+                    LayoutSize::new(p2.x - p1.x, p1.y - p0.y),
+                ),
+                LayoutRect::new(
+                    LayoutPoint::new(p2.x, p1.y),
+                    LayoutSize::new(p3.x - p2.x, p2.y - p1.y),
+                ),
+                LayoutRect::new(
+                    LayoutPoint::new(p1.x, p2.y),
+                    LayoutSize::new(p2.x - p1.x, p3.y - p2.y),
+                ),
+                LayoutRect::new(
+                    LayoutPoint::new(p0.x, p1.y),
+                    LayoutSize::new(p1.x - p0.x, p2.y - p1.y),
+                ),
+            ];
 
-        for segment in segments {
+            for segment in segments {
+                self.items.push(Item::new(
+                    *segment,
+                    None,
+                    true
+                ));
+            }
+
+            if inner_clip_mode.is_some() {
+                self.items.push(Item::new(
+                    inner_rect,
+                    inner_clip_mode,
+                    false,
+                ));
+            }
+        } else {
             self.items.push(Item::new(
-                *segment,
+                outer_rect,
                 None,
                 true
             ));
         }
-
-        if inner_clip_mode.is_some() {
-            self.items.push(Item::new(
-                inner_rect,
-                inner_clip_mode,
-                false,
-            ));
-        }
     }
 
     // Push some kind of clipping region into the segment builder.
     // If radius is None, it's a simple rect.
     pub fn push_clip_rect(
         &mut self,
         rect: LayoutRect,
         radius: Option<BorderRadius>,
--- a/gfx/webrender/src/texture_cache.rs
+++ b/gfx/webrender/src/texture_cache.rs
@@ -375,16 +375,26 @@ impl TextureCache {
                     }
                     None => true,
                 }
             }
             None => true,
         }
     }
 
+    // Returns true if the image needs to be uploaded to the
+    // texture cache (either never uploaded, or has been
+    // evicted on a previous frame).
+    pub fn needs_upload(&self, handle: &TextureCacheHandle) -> bool {
+        match handle.entry {
+            Some(ref handle) => self.entries.get_opt(handle).is_none(),
+            None => true,
+        }
+    }
+
     pub fn max_texture_size(&self) -> u32 {
         self.max_texture_size
     }
 
     pub fn pending_updates(&mut self) -> TextureUpdateList {
         mem::replace(&mut self.pending_updates, TextureUpdateList::new())
     }
 
@@ -1206,17 +1216,17 @@ impl TextureUpdate {
         data: ImageData,
         descriptor: &ImageDescriptor,
         origin: DeviceUintPoint,
         size: DeviceUintSize,
         texture_id: CacheTextureId,
         layer_index: i32,
         dirty_rect: Option<DeviceUintRect>,
     ) -> TextureUpdate {
-        let data_src = match data {
+        let source = match data {
             ImageData::Blob(..) => {
                 panic!("The vector image should have been rasterized.");
             }
             ImageData::External(ext_image) => match ext_image.image_type {
                 ExternalImageType::TextureHandle(_) => {
                     panic!("External texture handle should not go through texture_cache.");
                 }
                 ExternalImageType::Buffer => TextureUpdateSource::External {
@@ -1231,35 +1241,43 @@ impl TextureUpdate {
                 assert!(bytes.len() >= finish as usize);
 
                 TextureUpdateSource::Bytes { data: bytes }
             }
         };
 
         let update_op = match dirty_rect {
             Some(dirty) => {
+                // the dirty rectangle doesn't have to be within the area but has to intersect it, at least
                 let stride = descriptor.compute_stride();
                 let offset = descriptor.offset + dirty.origin.y * stride + dirty.origin.x * descriptor.format.bytes_per_pixel();
-                let origin =
-                    DeviceUintPoint::new(origin.x + dirty.origin.x, origin.y + dirty.origin.y);
+
                 TextureUpdateOp::Update {
-                    rect: DeviceUintRect::new(origin, dirty.size),
-                    source: data_src,
+                    rect: DeviceUintRect::new(
+                        DeviceUintPoint::new(origin.x + dirty.origin.x, origin.y + dirty.origin.y),
+                        DeviceUintSize::new(
+                            dirty.size.width.min(size.width - dirty.origin.x),
+                            dirty.size.height.min(size.height - dirty.origin.y),
+                        ),
+                    ),
+                    source,
                     stride: Some(stride),
                     offset,
                     layer_index,
                 }
             }
-            None => TextureUpdateOp::Update {
-                rect: DeviceUintRect::new(origin, size),
-                source: data_src,
-                stride: descriptor.stride,
-                offset: descriptor.offset,
-                layer_index,
-            },
+            None => {
+                TextureUpdateOp::Update {
+                    rect: DeviceUintRect::new(origin, size),
+                    source,
+                    stride: descriptor.stride,
+                    offset: descriptor.offset,
+                    layer_index,
+                }
+            }
         };
 
         TextureUpdate {
             id: texture_id,
             op: update_op,
         }
     }
 }
--- a/gfx/webrender_api/Cargo.toml
+++ b/gfx/webrender_api/Cargo.toml
@@ -13,18 +13,18 @@ deserialize = []
 
 [dependencies]
 app_units = "0.6"
 bincode = "1.0"
 bitflags = "1.0"
 byteorder = "1.2.1"
 ipc-channel = {version = "0.10.0", optional = true}
 euclid = { version = "0.17", features = ["serde"] }
-serde = { version = "=1.0.58", features = ["rc"] }
-serde_derive = { version = "=1.0.58", features = ["deserialize_in_place"] }
+serde = { version = "=1.0.66", features = ["rc"] }
+serde_derive = { version = "=1.0.66", features = ["deserialize_in_place"] }
 serde_bytes = "0.10"
 time = "0.1"
 
 [target.'cfg(target_os = "macos")'.dependencies]
 core-foundation = "0.5"
 core-graphics = "0.13"
 
 [target.'cfg(target_os = "windows")'.dependencies]
--- a/gfx/webrender_api/src/image.rs
+++ b/gfx/webrender_api/src/image.rs
@@ -181,17 +181,17 @@ pub trait BlobImageRenderer: Send {
     fn add(&mut self, key: ImageKey, data: Arc<BlobImageData>, tiling: Option<TileSize>);
 
     fn update(&mut self, key: ImageKey, data: Arc<BlobImageData>, dirty_rect: Option<DeviceUintRect>);
 
     fn delete(&mut self, key: ImageKey);
 
     fn request(
         &mut self,
-        services: &BlobImageResources,
+        resources: &BlobImageResources,
         key: BlobImageRequest,
         descriptor: &BlobImageDescriptor,
         dirty_rect: Option<DeviceUintRect>,
     );
 
     fn resolve(&mut self, key: BlobImageRequest) -> BlobImageResult;
 
     fn delete_font(&mut self, key: FontKey);
--- a/gfx/webrender_bindings/revision.txt
+++ b/gfx/webrender_bindings/revision.txt
@@ -1,1 +1,1 @@
-dd30fbb21c876b252b805b607bd04f3bab1fd228
+cf98ad4d63729c678a7575eb9bce36794da5e270
--- a/gfx/wrench/src/blob.rs
+++ b/gfx/wrench/src/blob.rs
@@ -21,17 +21,17 @@ fn deserialize_blob(blob: &[u8]) -> Resu
     let mut iter = blob.iter();
     return match (iter.next(), iter.next(), iter.next(), iter.next()) {
         (Some(&r), Some(&g), Some(&b), Some(&a)) => Ok(ColorU::new(r, g, b, a)),
         (Some(&a), None, None, None) => Ok(ColorU::new(a, a, a, a)),
         _ => Err(()),
     };
 }
 
-// perform floor((x * a) / 255. + 0.5) see "Three wrongs make a right" for deriviation
+// perform floor((x * a) / 255. + 0.5) see "Three wrongs make a right" for derivation
 fn premul(x: u8, a: u8) -> u8 {
     let t = (x as u32) * (a as u32) + 128;
     ((t + (t >> 8)) >> 8) as u8
 }
 
 // This is the function that applies the deserialized drawing commands and generates
 // actual image data.
 fn render_blob(
@@ -50,18 +50,19 @@ fn render_blob(
     // Generate a per-tile pattern to see it in the demo. For a real use case it would not
     // make sense for the rendered content to depend on its tile.
     let tile_checker = match tile {
         Some((_, tile)) => (tile.x % 2 == 0) != (tile.y % 2 == 0),
         None => true,
     };
 
     let mut dirty_rect = dirty_rect.unwrap_or(DeviceUintRect::new(
-        DeviceUintPoint::new(0, 0),
-        DeviceUintSize::new(descriptor.size.width, descriptor.size.height)));
+        DeviceUintPoint::origin(),
+        descriptor.size,
+    ));
 
     if let Some((tile_size, tile)) = tile {
         dirty_rect = intersect_for_tile(dirty_rect, size2(tile_size as u32, tile_size as u32),
                                         tile_size, tile)
             .expect("empty rects should be culled by webrender");
     }