Bug 1479432 - Update webrender to commit 7a1b919e37d6cd0155077aa90f98cfcdf9fa5bae. r=jrmuizel
authorKartikaya Gupta <kgupta@mozilla.com>
Thu, 02 Aug 2018 10:20:04 -0400
changeset 429887 4088993297c4ce754df796d2d0a0bc107b6073e6
parent 429886 32a8875a1721cd9369931bb5e1b74da448455bff
child 429888 b6f3d38d8da5c786f3160c24960001d88ae86a28
push id106015
push usernbeleuzu@mozilla.com
push dateThu, 02 Aug 2018 22:36:07 +0000
treeherdermozilla-inbound@055397065a0b [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjrmuizel
bugs1479432
milestone63.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1479432 - Update webrender to commit 7a1b919e37d6cd0155077aa90f98cfcdf9fa5bae. r=jrmuizel MozReview-Commit-ID: 1SJgRWEp2qf
gfx/webrender/Cargo.toml
gfx/webrender/res/brush.glsl
gfx/webrender/res/brush_blend.glsl
gfx/webrender/res/brush_image.glsl
gfx/webrender/res/brush_mix_blend.glsl
gfx/webrender/res/prim_shared.glsl
gfx/webrender/res/ps_text_run.glsl
gfx/webrender/src/batch.rs
gfx/webrender/src/border.rs
gfx/webrender/src/clip.rs
gfx/webrender/src/clip_node.rs
gfx/webrender/src/clip_scroll_tree.rs
gfx/webrender/src/device/query_gl.rs
gfx/webrender/src/display_list_flattener.rs
gfx/webrender/src/frame_builder.rs
gfx/webrender/src/gpu_types.rs
gfx/webrender/src/hit_test.rs
gfx/webrender/src/prim_store.rs
gfx/webrender/src/render_task.rs
gfx/webrender/src/renderer.rs
gfx/webrender/src/spatial_node.rs
gfx/webrender/src/util.rs
gfx/webrender_api/Cargo.toml
gfx/webrender_api/src/display_list.rs
gfx/webrender_bindings/Cargo.toml
gfx/webrender_bindings/revision.txt
gfx/wrench/Cargo.toml
--- a/gfx/webrender/Cargo.toml
+++ b/gfx/webrender/Cargo.toml
@@ -13,30 +13,30 @@ profiler = ["thread_profiler/thread_prof
 debugger = ["ws", "serde_json", "serde", "image", "base64", "debug_renderer"]
 capture = ["webrender_api/serialize", "ron", "serde", "debug_renderer"]
 replay = ["webrender_api/deserialize", "ron", "serde"]
 debug_renderer = []
 pathfinder = ["pathfinder_font_renderer", "pathfinder_gfx_utils", "pathfinder_partitioner", "pathfinder_path_utils"]
 serialize_program = ["serde"]
 
 [dependencies]
-app_units = "0.6"
+app_units = "0.7"
 base64 = { optional = true, version = "0.6" }
 bincode = "1.0"
 bitflags = "1.0"
 byteorder = "1.0"
 cfg-if = "0.1.2"
-euclid = "0.18"
+euclid = "0.19"
 fxhash = "0.2.1"
 gleam = "0.6"
 image = { optional = true, version = "0.19" }
 lazy_static = "1"
 log = "0.4"
-num-traits = "0.1.43"
-plane-split = "0.10"
+num-traits = "0.2"
+plane-split = "0.12"
 png = { optional = true, version = "0.12" }
 rayon = "1"
 ron = { optional = true, version = "0.1.7" }
 serde = { optional = true, version = "1.0", features = ["serde_derive"] }
 serde_json = { optional = true, version = "1.0" }
 smallvec = "0.6"
 thread_profiler = "0.1.1"
 time = "0.1"
--- a/gfx/webrender/res/brush.glsl
+++ b/gfx/webrender/res/brush.glsl
@@ -65,39 +65,37 @@ void main(void) {
         //           items. For now, just ensure it has no
         //           effect. We can tidy this up as we move
         //           more items to be brush shaders.
 #ifdef WR_FEATURE_ALPHA_PASS
         init_transform_vs(vec4(vec2(-1000000.0), vec2(1000000.0)));
 #endif
     } else {
         bvec4 edge_mask = notEqual(edge_flags & ivec4(1, 2, 4, 8), ivec4(0));
-        bool do_perspective_interpolation = (brush_flags & BRUSH_FLAG_PERSPECTIVE_INTERPOLATION) != 0;
 
         vi = write_transform_vertex(
             local_segment_rect,
             ph.local_rect,
             ph.local_clip_rect,
             mix(vec4(0.0), vec4(1.0), edge_mask),
             ph.z,
             transform,
-            pic_task,
-            do_perspective_interpolation
+            pic_task
         );
     }
 
     // For brush instances in the alpha pass, always write
     // out clip information.
     // TODO(gw): It's possible that we might want alpha
     //           shaders that don't clip in the future,
     //           but it's reasonable to assume that one
     //           implies the other, for now.
 #ifdef WR_FEATURE_ALPHA_PASS
     write_clip(
-        vi.screen_pos,
+        vi.world_pos,
         clip_area
     );
 #endif
 
     // Run the specific brush VS code to write interpolators.
     brush_vs(
         vi,
         ph.specific_prim_address,
--- a/gfx/webrender/res/brush_blend.glsl
+++ b/gfx/webrender/res/brush_blend.glsl
@@ -24,19 +24,19 @@ void brush_vs(
     ivec3 user_data,
     mat4 transform,
     PictureTask pic_task,
     int brush_flags,
     vec4 unused
 ) {
     PictureTask src_task = fetch_picture_task(user_data.x);
     vec2 texture_size = vec2(textureSize(sColor0, 0).xy);
-    vec2 uv = vi.snapped_device_pos +
-              src_task.common_data.task_rect.p0 -
-              src_task.content_origin;
+    vec2 uv = snap_device_pos(vi) +
+        src_task.common_data.task_rect.p0 -
+        src_task.content_origin;
     vUv = vec3(uv / texture_size, src_task.common_data.texture_layer_index);
 
     vec2 uv0 = src_task.common_data.task_rect.p0;
     vec2 uv1 = uv0 + src_task.common_data.task_rect.size;
     vUvClipBounds = vec4(uv0, uv1) / texture_size.xyxy;
 
     float lumR = 0.2126;
     float lumG = 0.7152;
--- a/gfx/webrender/res/brush_image.glsl
+++ b/gfx/webrender/res/brush_image.glsl
@@ -6,17 +6,18 @@
 
 #include shared,prim_shared,brush
 
 #ifdef WR_FEATURE_ALPHA_PASS
 varying vec2 vLocalPos;
 #endif
 
 // Interpolated uv coordinates in xy, and layer in z.
-varying vec3 vUv;
+// W is 1 when perspective interpolation is enabled.
+varying vec4 vUv;
 // Normalized bounds of the source image in the texture.
 flat varying vec4 vUvBounds;
 // Normalized bounds of the source image in the texture, adjusted to avoid
 // sampling artifacts.
 flat varying vec4 vUvSampleBounds;
 
 #ifdef WR_FEATURE_ALPHA_PASS
 flat varying vec4 vColor;
@@ -101,16 +102,17 @@ void brush_vs(
             stretch_size.y = (texel_rect.w - texel_rect.y) / uDevicePixelRatio;
         }
 
         uv0 = res.uv_rect.p0 + texel_rect.xy;
         uv1 = res.uv_rect.p0 + texel_rect.zw;
     }
 
     vUv.z = res.layer;
+    vUv.w = (brush_flags & BRUSH_FLAG_PERSPECTIVE_INTERPOLATION) != 0 ? 1.0 : 0.0;
 
     // Handle case where the UV coords are inverted (e.g. from an
     // external image).
     vec2 min_uv = min(uv0, uv1);
     vec2 max_uv = max(uv0, uv1);
 
     vUvSampleBounds = vec4(
         min_uv + vec2(0.5),
@@ -146,16 +148,20 @@ void brush_vs(
     }
 #endif
 
     // Offset and scale vUv here to avoid doing it in the fragment shader.
     vec2 repeat = local_rect.size / stretch_size;
     vUv.xy = mix(uv0, uv1, f) - min_uv;
     vUv.xy /= texture_size;
     vUv.xy *= repeat.xy;
+    if ((brush_flags & BRUSH_FLAG_PERSPECTIVE_INTERPOLATION) == 0) {
+        // Multiply by W to compensate for perspective interpolation.
+        vUv.xy *= gl_Position.w;
+    }
 
 #ifdef WR_FEATURE_TEXTURE_RECT
     vUvBounds = vec4(0.0, 0.0, vec2(textureSize(sColor0)));
 #else
     vUvBounds = vec4(min_uv, max_uv) / texture_size.xyxy;
 #endif
 
 #ifdef WR_FEATURE_ALPHA_PASS
@@ -191,38 +197,40 @@ void brush_vs(
 #endif
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 
 Fragment brush_fs() {
     vec2 uv_size = vUvBounds.zw - vUvBounds.xy;
+    // Unapply the W scaler when no perspective interpolation is enabled.
+    vec2 base_uv = vUv.xy * mix(gl_FragCoord.w, 1.0, vUv.w);
 
 #ifdef WR_FEATURE_ALPHA_PASS
     // This prevents the uv on the top and left parts of the primitive that was inflated
     // for anti-aliasing purposes from going beyound the range covered by the regular
     // (non-inflated) primitive.
-    vec2 local_uv = max(vUv.xy, vec2(0.0));
+    vec2 local_uv = max(base_uv, vec2(0.0));
 
     // Handle horizontal and vertical repetitions.
     vec2 repeated_uv = mod(local_uv, uv_size) + vUvBounds.xy;
 
     // This takes care of the bottom and right inflated parts.
     // We do it after the modulo because the latter wraps around the values exactly on
     // the right and bottom edges, which we do not want.
     if (local_uv.x >= vTileRepeat.x * uv_size.x) {
         repeated_uv.x = vUvBounds.z;
     }
     if (local_uv.y >= vTileRepeat.y * uv_size.y) {
         repeated_uv.y = vUvBounds.w;
     }
 #else
     // Handle horizontal and vertical repetitions.
-    vec2 repeated_uv = mod(vUv.xy, uv_size) + vUvBounds.xy;
+    vec2 repeated_uv = mod(base_uv, uv_size) + vUvBounds.xy;
 #endif
 
     // Clamp the uvs to avoid sampling artifacts.
     vec2 uv = clamp(repeated_uv, vUvSampleBounds.xy, vUvSampleBounds.zw);
 
     vec4 texel = TEX_SAMPLE(sColor0, vec3(uv, vUv.z));
 
     Fragment frag;
--- a/gfx/webrender/res/brush_mix_blend.glsl
+++ b/gfx/webrender/res/brush_mix_blend.glsl
@@ -18,27 +18,28 @@ void brush_vs(
     RectWithSize local_rect,
     RectWithSize segment_rect,
     ivec3 user_data,
     mat4 transform,
     PictureTask pic_task,
     int brush_flags,
     vec4 unused
 ) {
+    vec2 snapped_device_pos = snap_device_pos(vi);
     vec2 texture_size = vec2(textureSize(sCacheRGBA8, 0));
     vOp = user_data.x;
 
     PictureTask src_task = fetch_picture_task(user_data.z);
-    vec2 src_uv = vi.snapped_device_pos +
+    vec2 src_uv = snapped_device_pos +
                   src_task.common_data.task_rect.p0 -
                   src_task.content_origin;
     vSrcUv = vec3(src_uv / texture_size, src_task.common_data.texture_layer_index);
 
     RenderTaskCommonData backdrop_task = fetch_render_task_common_data(user_data.y);
-    vec2 backdrop_uv = vi.snapped_device_pos +
+    vec2 backdrop_uv = snapped_device_pos +
                        backdrop_task.task_rect.p0 -
                        src_task.content_origin;
     vBackdropUv = vec3(backdrop_uv / texture_size, backdrop_task.texture_layer_index);
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 vec3 Multiply(vec3 Cb, vec3 Cs) {
--- a/gfx/webrender/res/prim_shared.glsl
+++ b/gfx/webrender/res/prim_shared.glsl
@@ -22,17 +22,18 @@ uniform sampler2DArray sCacheRGBA8;
 uniform sampler2DArray sSharedCacheA8;
 
 vec2 clamp_rect(vec2 pt, RectWithSize rect) {
     return clamp(pt, rect.p0, rect.p0 + rect.size);
 }
 
 // TODO: convert back to RectWithEndPoint if driver issues are resolved, if ever.
 flat varying vec4 vClipMaskUvBounds;
-varying vec3 vClipMaskUv;
+// XY and W are homogeneous coordinates, Z is the layer index
+varying vec4 vClipMaskUv;
 
 
 #ifdef WR_VERTEX_SHADER
 
 #define COLOR_MODE_FROM_PASS          0
 #define COLOR_MODE_ALPHA              1
 #define COLOR_MODE_SUBPX_CONST_COLOR  2
 #define COLOR_MODE_SUBPX_BG_PASS0     3
@@ -81,21 +82,25 @@ PrimitiveHeader fetch_prim_header(int in
     ph.transform_id = data1.x;
     ph.user_data = data1.yzw;
 
     return ph;
 }
 
 struct VertexInfo {
     vec2 local_pos;
-    vec2 screen_pos;
-    float w;
-    vec2 snapped_device_pos;
+    vec2 snap_offset;
+    vec4 world_pos;
 };
 
+//Note: this function is unsafe for `vi.world_pos.w <= 0.0`
+vec2 snap_device_pos(VertexInfo vi) {
+    return vi.world_pos.xy * uDevicePixelRatio / max(0.0, vi.world_pos.w) + vi.snap_offset;
+}
+
 VertexInfo write_vertex(RectWithSize instance_rect,
                         RectWithSize local_clip_rect,
                         float z,
                         Transform transform,
                         PictureTask task,
                         RectWithSize snap_rect) {
 
     // Select the corner of the local rect that we are processing.
@@ -114,28 +119,26 @@ VertexInfo write_vertex(RectWithSize ins
 
     // Transform the current vertex to world space.
     vec4 world_pos = transform.m * vec4(clamped_local_pos, 0.0, 1.0);
 
     // Convert the world positions to device pixel space.
     vec2 device_pos = world_pos.xy / world_pos.w * uDevicePixelRatio;
 
     // Apply offsets for the render task to get correct screen location.
-    vec2 snapped_device_pos = device_pos + snap_offset;
-    vec2 final_pos = snapped_device_pos -
+    vec2 final_pos = device_pos + snap_offset -
                      task.content_origin +
                      task.common_data.task_rect.p0;
 
     gl_Position = uTransform * vec4(final_pos, z, 1.0);
 
     VertexInfo vi = VertexInfo(
         clamped_local_pos,
-        device_pos,
-        world_pos.w,
-        snapped_device_pos
+        snap_offset,
+        world_pos
     );
 
     return vi;
 }
 
 float cross2(vec2 v0, vec2 v1) {
     return v0.x * v1.y - v0.y * v1.x;
 }
@@ -156,18 +159,17 @@ vec2 intersect_lines(vec2 p0, vec2 p1, v
 }
 
 VertexInfo write_transform_vertex(RectWithSize local_segment_rect,
                                   RectWithSize local_prim_rect,
                                   RectWithSize local_clip_rect,
                                   vec4 clip_edge_mask,
                                   float z,
                                   Transform transform,
-                                  PictureTask task,
-                                  bool do_perspective_interpolation) {
+                                  PictureTask task) {
     // Calculate a clip rect from local_rect + local clip
     RectWithEndpoint clip_rect = to_rect_with_endpoint(local_clip_rect);
     RectWithEndpoint segment_rect = to_rect_with_endpoint(local_segment_rect);
     segment_rect.p0 = clamp(segment_rect.p0, clip_rect.p0, clip_rect.p1);
     segment_rect.p1 = clamp(segment_rect.p1, clip_rect.p0, clip_rect.p1);
 
     // Calculate a clip rect from local_rect + local clip
     RectWithEndpoint prim_rect = to_rect_with_endpoint(local_prim_rect);
@@ -187,73 +189,75 @@ VertexInfo write_transform_vertex(RectWi
     float extrude_amount = 2.0;
     vec4 extrude_distance = vec4(extrude_amount) * clip_edge_mask;
     local_segment_rect.p0 -= extrude_distance.xy;
     local_segment_rect.size += extrude_distance.xy + extrude_distance.zw;
 
     // Select the corner of the local rect that we are processing.
     vec2 local_pos = local_segment_rect.p0 + local_segment_rect.size * aPosition.xy;
 
-    // Transform the current vertex to the world cpace.
-    vec4 world_pos = transform.m * vec4(local_pos, 0.0, 1.0);
-
     // Convert the world positions to device pixel space.
-    vec2 device_pos = world_pos.xy / world_pos.w * uDevicePixelRatio;
     vec2 task_offset = task.common_data.task_rect.p0 - task.content_origin;
 
-    // Force w = 1, if we don't want perspective interpolation (for
-    // example, drawing a screen-space quad on an element with a
-    // perspective transform).
-    world_pos.w = mix(1.0, world_pos.w, do_perspective_interpolation);
+    // Transform the current vertex to the world cpace.
+    vec4 world_pos = transform.m * vec4(local_pos, 0.0, 1.0);
+    vec4 final_pos = vec4(
+        world_pos.xy * uDevicePixelRatio + task_offset * world_pos.w,
+        z * world_pos.w,
+        world_pos.w
+    );
 
-    // We want the world space coords to be perspective divided by W.
-    // We also want that to apply to any interpolators. However, we
-    // want a constant Z across the primitive, since we're using it
-    // for draw ordering - so scale by the W coord to ensure this.
-    vec4 final_pos = vec4(device_pos + task_offset, z, 1.0) * world_pos.w;
     gl_Position = uTransform * final_pos;
 
     init_transform_vs(mix(
         vec4(prim_rect.p0, prim_rect.p1),
         vec4(segment_rect.p0, segment_rect.p1),
         clip_edge_mask
     ));
 
     VertexInfo vi = VertexInfo(
         local_pos,
-        device_pos,
-        world_pos.w,
-        device_pos
+        vec2(0.0),
+        world_pos
     );
 
     return vi;
 }
 
-void write_clip(vec2 global_pos, ClipArea area) {
-    vec2 uv = global_pos +
-              area.common_data.task_rect.p0 -
-              area.screen_origin;
+void write_clip(vec4 world_pos, ClipArea area) {
+    vec2 uv = world_pos.xy * uDevicePixelRatio +
+        world_pos.w * (area.common_data.task_rect.p0 - area.screen_origin);
     vClipMaskUvBounds = vec4(
         area.common_data.task_rect.p0,
         area.common_data.task_rect.p0 + area.common_data.task_rect.size
     );
-    vClipMaskUv = vec3(uv, area.common_data.texture_layer_index);
+    vClipMaskUv = vec4(uv, area.common_data.texture_layer_index, world_pos.w);
 }
 #endif //WR_VERTEX_SHADER
 
 #ifdef WR_FRAGMENT_SHADER
 
 float do_clip() {
+    // check for the dummy bounds, which are given to the opaque objects
+    if (vClipMaskUvBounds.xy == vClipMaskUvBounds.zw) {
+        return 1.0;
+    }
     // anything outside of the mask is considered transparent
+    //Note: we assume gl_FragCoord.w == interpolated(1 / vClipMaskUv.w)
+    vec2 mask_uv = vClipMaskUv.xy * gl_FragCoord.w;
     bvec4 inside = lessThanEqual(
-        vec4(vClipMaskUvBounds.xy, vClipMaskUv.xy),
-        vec4(vClipMaskUv.xy, vClipMaskUvBounds.zw));
-    // check for the dummy bounds, which are given to the opaque objects
-    return vClipMaskUvBounds.xy == vClipMaskUvBounds.zw ? 1.0:
-        all(inside) ? texelFetch(sCacheA8, ivec3(vClipMaskUv), 0).r : 0.0;
+        vec4(vClipMaskUvBounds.xy, mask_uv),
+        vec4(mask_uv, vClipMaskUvBounds.zw));
+    // bail out if the pixel is outside the valid bounds
+    if (!all(inside)) {
+        return 0.0;
+    }
+    // finally, the slow path - fetch the mask value from an image
+    ivec3 tc = ivec3(mask_uv, vClipMaskUv.z);
+    return texelFetch(sCacheA8, tc, 0).r;
 }
 
 #ifdef WR_FEATURE_DITHERING
 vec4 dither(vec4 color) {
     const int matrix_mask = 7;
 
     ivec2 pos = ivec2(gl_FragCoord.xy) & ivec2(matrix_mask);
     float noise_normalized = (texelFetch(sDither, pos, 0).r * 255.0 + 0.5) / 64.0;
--- a/gfx/webrender/res/ps_text_run.glsl
+++ b/gfx/webrender/res/ps_text_run.glsl
@@ -68,62 +68,62 @@ VertexInfo write_text_vertex(vec2 clampe
                              RectWithSize snap_rect,
                              vec2 snap_bias) {
     // Transform the current vertex to world space.
     vec4 world_pos = transform.m * vec4(clamped_local_pos, 0.0, 1.0);
 
     // Convert the world positions to device pixel space.
     float device_scale = uDevicePixelRatio / world_pos.w;
     vec2 device_pos = world_pos.xy * device_scale;
-
-    // Apply offsets for the render task to get correct screen location.
-    vec2 final_pos = device_pos -
-                     task.content_origin +
-                     task.common_data.task_rect.p0;
+    vec2 snap_offset = vec2(0.0);
 
 #if defined(WR_FEATURE_GLYPH_TRANSFORM)
     bool remove_subpx_offset = true;
 #else
-    // Compute the snapping offset only if the scroll node transform is axis-aligned.
     bool remove_subpx_offset = transform.is_axis_aligned;
 #endif
+    // Compute the snapping offset only if the scroll node transform is axis-aligned.
     if (remove_subpx_offset) {
         // Ensure the transformed text offset does not contain a subpixel translation
         // such that glyph snapping is stable for equivalent glyph subpixel positions.
         vec2 world_text_offset = mat2(transform.m) * text_offset;
         vec2 device_text_pos = (transform.m[3].xy + world_text_offset) * device_scale;
-        final_pos += floor(device_text_pos + 0.5) - device_text_pos;
+        snap_offset += floor(device_text_pos + 0.5) - device_text_pos;
 
 #ifdef WR_FEATURE_GLYPH_TRANSFORM
         // For transformed subpixels, we just need to align the glyph origin to a device pixel.
         // The transformed text offset has already been snapped, so remove it from the glyph
         // origin when snapping the glyph.
-        vec2 snap_offset = snap_rect.p0 - world_text_offset * device_scale;
-        final_pos += floor(snap_offset + snap_bias) - snap_offset;
+        vec2 rough_offset = snap_rect.p0 - world_text_offset * device_scale;
+        snap_offset += floor(rough_offset + snap_bias) - rough_offset;
 #else
         // The transformed text offset has already been snapped, so remove it from the transform
         // when snapping the glyph.
         mat4 snap_transform = transform.m;
         snap_transform[3].xy = -world_text_offset;
-        final_pos += compute_snap_offset(
+        snap_offset += compute_snap_offset(
             clamped_local_pos,
             snap_transform,
             snap_rect,
             snap_bias
         );
 #endif
     }
 
+    // Apply offsets for the render task to get correct screen location.
+    vec2 final_pos = device_pos + snap_offset -
+                     task.content_origin +
+                     task.common_data.task_rect.p0;
+
     gl_Position = uTransform * vec4(final_pos, z, 1.0);
 
     VertexInfo vi = VertexInfo(
         clamped_local_pos,
-        device_pos,
-        world_pos.w,
-        final_pos
+        snap_offset,
+        world_pos
     );
 
     return vi;
 }
 
 void main(void) {
     int prim_header_address = aData.x;
     int glyph_index = aData.y;
@@ -218,17 +218,17 @@ void main(void) {
 
 #ifdef WR_FEATURE_GLYPH_TRANSFORM
     vec2 f = (glyph_transform * vi.local_pos - glyph_rect.p0) / glyph_rect.size;
     vUvClip = vec4(f, 1.0 - f);
 #else
     vec2 f = (vi.local_pos - glyph_rect.p0) / glyph_rect.size;
 #endif
 
-    write_clip(vi.screen_pos, clip_area);
+    write_clip(vi.world_pos, clip_area);
 
     switch (color_mode) {
         case COLOR_MODE_ALPHA:
         case COLOR_MODE_BITMAP:
             vMaskSwizzle = vec2(0.0, 1.0);
             vColor = text.color;
             break;
         case COLOR_MODE_SUBPX_BG_PASS2:
--- a/gfx/webrender/src/batch.rs
+++ b/gfx/webrender/src/batch.rs
@@ -1783,17 +1783,17 @@ impl ClipBatcher {
         coordinate_system_id: CoordinateSystemId,
         resource_cache: &ResourceCache,
         gpu_cache: &GpuCache,
         clip_store: &ClipStore,
         transforms: &TransformPalette,
     ) {
         let mut coordinate_system_id = coordinate_system_id;
         for work_item in clips.iter() {
-            let info = clip_store.get(work_item.clip_sources_index);
+            let info = &clip_store[work_item.clip_sources_index];
             let instance = ClipMaskInstance {
                 render_task_address: task_address,
                 transform_id: transforms.get_id(info.spatial_node_index),
                 segment: 0,
                 clip_data_address: GpuCacheAddress::invalid(),
                 resource_address: GpuCacheAddress::invalid(),
             };
 
--- a/gfx/webrender/src/border.rs
+++ b/gfx/webrender/src/border.rs
@@ -4,20 +4,30 @@
 
 use api::{BorderRadius, BorderSide, BorderStyle, BorderWidths, ColorF};
 use api::{ColorU, DeviceRect, DeviceSize, LayoutSizeAu, LayoutPrimitiveInfo, LayoutToDeviceScale};
 use api::{DevicePixel, DeviceVector2D, DevicePoint, DeviceIntSize, LayoutRect, LayoutSize, NormalBorder};
 use app_units::Au;
 use ellipse::Ellipse;
 use display_list_flattener::DisplayListFlattener;
 use gpu_types::{BorderInstance, BorderSegment, BrushFlags};
-use prim_store::{BrushKind, BrushPrimitive, BrushSegment};
+use prim_store::{BrushKind, BrushPrimitive, BrushSegment, VECS_PER_SEGMENT};
 use prim_store::{BorderSource, EdgeAaSegmentMask, PrimitiveContainer, ScrollNodeAndClipChain};
+use renderer::{MAX_VERTEX_TEXTURE_WIDTH};
 use util::{lerp, RectHelpers};
 
+// Using 2048 as the maximum radius in device space before which we
+// start stretching is up for debate.
+// the value must be chosen so that the corners will not use an
+// unreasonable amount of memory but should allow crisp corners in the
+// common cases.
+
+/// Maximum resolution in device pixels at which borders are rasterized.
+pub const MAX_BORDER_RESOLUTION: u32 = 2048;
+
 trait AuSizeConverter {
     fn to_au(&self) -> LayoutSizeAu;
 }
 
 impl AuSizeConverter for LayoutSize {
     fn to_au(&self) -> LayoutSizeAu {
         LayoutSizeAu::new(
             Au::from_f32_px(self.width),
@@ -340,28 +350,34 @@ impl BorderCornerClipSource {
             outer_scale.x * self.radius.width,
             outer_scale.y * self.radius.height,
         );
         let clip_sign = DeviceVector2D::new(
             1.0 - 2.0 * outer_scale.x,
             1.0 - 2.0 * outer_scale.y,
         );
 
+        // No point in pushing more clips as it will blow up the maximum amount of
+        // segments per primitive later down the road.
+        // See #2915 for a better fix.
+        let clip_limit = MAX_VERTEX_TEXTURE_WIDTH / VECS_PER_SEGMENT;
+        let max_clip_count = self.max_clip_count.min(clip_limit);
+
         match self.kind {
             BorderCornerClipKind::Dash => {
                 // Get the correct dash arc length.
                 let dash_arc_length =
-                    0.5 * self.ellipse.total_arc_length / self.max_clip_count as f32;
+                    0.5 * self.ellipse.total_arc_length / max_clip_count as f32;
                 // Start the first dash at one quarter the length of a single dash
                 // along the arc line. This is arbitrary but looks reasonable in
                 // most cases. We need to spend some time working on a more
                 // sophisticated dash placement algorithm that takes into account
                 // the offset of the dashes along edge segments.
                 let mut current_arc_length = 0.25 * dash_arc_length;
-                for _ in 0 .. self.max_clip_count {
+                for _ in 0 .. max_clip_count {
                     let arc_length0 = current_arc_length;
                     current_arc_length += dash_arc_length;
 
                     let arc_length1 = current_arc_length;
                     current_arc_length += dash_arc_length;
 
                     let alpha = self.ellipse.find_angle_for_arc_length(arc_length0);
                     let beta =  self.ellipse.find_angle_for_arc_length(arc_length1);
@@ -396,17 +412,17 @@ impl BorderCornerClipSource {
                         tangent0.y,
                         point1.x,
                         point1.y,
                         tangent1.x,
                         tangent1.y,
                     ]);
                 }
             }
-            BorderCornerClipKind::Dot if self.max_clip_count == 1 => {
+            BorderCornerClipKind::Dot if max_clip_count == 1 => {
                 let dot_diameter = lerp(self.widths.width, self.widths.height, 0.5);
                 dot_dash_data.push([
                     self.widths.width / 2.0, self.widths.height / 2.0, 0.5 * dot_diameter, 0.,
                     0., 0., 0., 0.,
                 ]);
             }
             BorderCornerClipKind::Dot => {
                 let mut forward_dots = Vec::new();
@@ -417,17 +433,17 @@ impl BorderCornerClipSource {
                 // ellipse arc. This ensures that we always end up with an exact
                 // half dot at each end of the arc, to match up with the edges.
                 forward_dots.push(DotInfo::new(self.widths.width, self.widths.width));
                 back_dots.push(DotInfo::new(
                     self.ellipse.total_arc_length - self.widths.height,
                     self.widths.height,
                 ));
 
-                for dot_index in 0 .. self.max_clip_count {
+                for dot_index in 0 .. max_clip_count {
                     let prev_forward_pos = *forward_dots.last().unwrap();
                     let prev_back_pos = *back_dots.last().unwrap();
 
                     // Select which end of the arc to place a dot from.
                     // This just alternates between the start and end of
                     // the arc, which ensures that there is always an
                     // exact half-dot at each end of the ellipse.
                     let going_forward = dot_index & 1 == 0;
@@ -922,16 +938,33 @@ impl BorderRenderTaskInfo {
                 &mut instances,
                 info.widths,
                 info.radius,
             );
         }
 
         instances
     }
+
+    /// Computes the maximum scale that we allow for this set of border radii.
+    /// capping the scale will result in rendering very large corners at a lower
+    /// resolution and stretching them, so they will have the right shape, but
+    /// blurrier.
+    pub fn get_max_scale(radii: &BorderRadius) -> LayoutToDeviceScale {
+        let r = radii.top_left.width
+            .max(radii.top_left.height)
+            .max(radii.top_right.width)
+            .max(radii.top_right.height)
+            .max(radii.bottom_left.width)
+            .max(radii.bottom_left.height)
+            .max(radii.bottom_right.width)
+            .max(radii.bottom_right.height);
+
+        LayoutToDeviceScale::new(MAX_BORDER_RESOLUTION as f32 / r)
+    }
 }
 
 fn add_brush_segment(
     image_rect: LayoutRect,
     task_rect: DeviceRect,
     brush_flags: BrushFlags,
     edge_flags: EdgeAaSegmentMask,
     brush_segments: &mut Vec<BrushSegment>,
--- a/gfx/webrender/src/clip.rs
+++ b/gfx/webrender/src/clip.rs
@@ -9,110 +9,142 @@ use border::{ensure_no_corner_overlap};
 use box_shadow::{BLUR_SAMPLE_SCALE, BoxShadowClipSource, BoxShadowCacheKey};
 use clip_scroll_tree::{ClipChainIndex, CoordinateSystemId, SpatialNodeIndex};
 use ellipse::Ellipse;
 use gpu_cache::{GpuCache, GpuCacheHandle, ToGpuBlocks};
 use gpu_types::BoxShadowStretchMode;
 use prim_store::{ClipData, ImageMaskData};
 use render_task::to_cache_size;
 use resource_cache::{ImageRequest, ResourceCache};
-use util::{LayoutToWorldFastTransform, MaxRect, calculate_screen_bounding_rect};
-use util::{extract_inner_rect_safe, pack_as_float, recycle_vec};
+use util::{LayoutToWorldFastTransform, MaxRect, TransformedRectKind};
+use util::{calculate_screen_bounding_rect, extract_inner_rect_safe, pack_as_float, recycle_vec};
+use std::{iter, ops};
 use std::sync::Arc;
 
 #[derive(Debug, Copy, Clone)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct ClipSourcesIndex(usize);
 
 pub struct ClipStore {
     clip_sources: Vec<ClipSources>,
 }
 
 impl ClipStore {
-    pub fn new() -> ClipStore {
+    pub fn new() -> Self {
         ClipStore {
             clip_sources: Vec::new(),
         }
     }
 
-    pub fn recycle(self) -> ClipStore {
+    pub fn recycle(self) -> Self {
         ClipStore {
             clip_sources: recycle_vec(self.clip_sources),
         }
     }
 
     pub fn insert(&mut self, clip_sources: ClipSources) -> ClipSourcesIndex {
         let index = ClipSourcesIndex(self.clip_sources.len());
         self.clip_sources.push(clip_sources);
         index
     }
+}
 
-    pub fn get(&self, index: ClipSourcesIndex) -> &ClipSources {
+impl ops::Index<ClipSourcesIndex> for ClipStore {
+    type Output = ClipSources;
+    fn index(&self, index: ClipSourcesIndex) -> &Self::Output {
         &self.clip_sources[index.0]
     }
+}
 
-    pub fn get_mut(&mut self, index: ClipSourcesIndex) -> &mut ClipSources {
+impl ops::IndexMut<ClipSourcesIndex> for ClipStore {
+    fn index_mut(&mut self, index: ClipSourcesIndex) -> &mut Self::Output {
         &mut self.clip_sources[index.0]
     }
 }
 
 #[derive(Debug)]
 pub struct LineDecorationClipSource {
     rect: LayoutRect,
     style: LineStyle,
     orientation: LineOrientation,
     wavy_line_thickness: f32,
 }
 
-#[derive(Clone, Debug)]
-pub struct ClipRegion {
-    pub main: LayoutRect,
-    pub image_mask: Option<ImageMask>,
-    pub complex_clips: Vec<ComplexClipRegion>,
+
+pub struct ComplexTranslateIter<I> {
+    source: I,
+    offset: LayoutVector2D,
+}
+
+impl<I: Iterator<Item = ComplexClipRegion>> Iterator for ComplexTranslateIter<I> {
+    type Item = ComplexClipRegion;
+    fn next(&mut self) -> Option<Self::Item> {
+        self.source
+            .next()
+            .map(|mut complex| {
+                complex.rect = complex.rect.translate(&self.offset);
+                complex
+            })
+    }
 }
 
-impl ClipRegion {
+#[derive(Clone, Debug)]
+pub struct ClipRegion<I> {
+    pub main: LayoutRect,
+    pub image_mask: Option<ImageMask>,
+    pub complex_clips: I,
+}
+
+impl<J> ClipRegion<ComplexTranslateIter<J>> {
     pub fn create_for_clip_node(
         rect: LayoutRect,
-        mut complex_clips: Vec<ComplexClipRegion>,
+        complex_clips: J,
         mut image_mask: Option<ImageMask>,
         reference_frame_relative_offset: &LayoutVector2D,
-    ) -> ClipRegion {
-        let rect = rect.translate(reference_frame_relative_offset);
-
+    ) -> Self
+    where
+        J: Iterator<Item = ComplexClipRegion>
+    {
         if let Some(ref mut image_mask) = image_mask {
             image_mask.rect = image_mask.rect.translate(reference_frame_relative_offset);
         }
 
-        for complex_clip in complex_clips.iter_mut() {
-            complex_clip.rect = complex_clip.rect.translate(reference_frame_relative_offset);
-        }
-
         ClipRegion {
-            main: rect,
+            main: rect.translate(reference_frame_relative_offset),
             image_mask,
-            complex_clips,
+            complex_clips: ComplexTranslateIter {
+                source: complex_clips,
+                offset: *reference_frame_relative_offset,
+            },
         }
     }
+}
 
+impl ClipRegion<Option<ComplexClipRegion>> {
     pub fn create_for_clip_node_with_local_clip(
         local_clip: &LocalClip,
         reference_frame_relative_offset: &LayoutVector2D
-    ) -> ClipRegion {
-        let complex_clips = match *local_clip {
-            LocalClip::Rect(_) => Vec::new(),
-            LocalClip::RoundedRect(_, ref region) => vec![region.clone()],
-        };
-        ClipRegion::create_for_clip_node(
-            *local_clip.clip_rect(),
-            complex_clips,
-            None,
-            reference_frame_relative_offset
-        )
+    ) -> Self {
+        ClipRegion {
+            main: local_clip
+                .clip_rect()
+                .translate(reference_frame_relative_offset),
+            image_mask: None,
+            complex_clips: match *local_clip {
+                LocalClip::Rect(_) => None,
+                LocalClip::RoundedRect(_, ref region) => {
+                    Some(ComplexClipRegion {
+                        rect: region.rect.translate(reference_frame_relative_offset),
+                        radii: region.radii,
+                        mode: region.mode,
+                    })
+                },
+            }
+        }
     }
 }
 
 #[derive(Debug)]
 pub enum ClipSource {
     Rectangle(LayoutRect, ClipMode),
     RoundedRectangle(LayoutRect, BorderRadius, ClipMode),
     Image(ImageMask),
@@ -275,151 +307,167 @@ impl ClipSource {
     pub fn is_image_or_line_decoration_clip(&self) -> bool {
         match *self {
             ClipSource::Image(..) | ClipSource::LineDecoration(..) => true,
             _ => false,
         }
     }
 }
 
+
+struct BoundsAccumulator {
+    local_outer: Option<LayoutRect>,
+    local_inner: Option<LayoutRect>,
+    can_calculate_inner_rect: bool,
+    can_calculate_outer_rect: bool,
+}
+
+impl BoundsAccumulator {
+    fn new() -> Self {
+        BoundsAccumulator {
+            local_outer: Some(LayoutRect::max_rect()),
+            local_inner: Some(LayoutRect::max_rect()),
+            can_calculate_inner_rect: true,
+            can_calculate_outer_rect: false,
+        }
+    }
+
+    fn add(&mut self, source: &ClipSource) {
+        // Depending on the complexity of the clip, we may either know the outer and/or inner
+        // rect, or neither or these.  In the case of a clip-out, we currently set the mask bounds
+        // to be unknown. This is conservative, but ensures correctness. In the future we can make
+        // this a lot more clever with some proper region handling.
+        if !self.can_calculate_inner_rect {
+            return
+        }
+
+        match *source {
+            ClipSource::Image(ref mask) => {
+                if !mask.repeat {
+                    self.can_calculate_outer_rect = true;
+                    self.local_outer = self.local_outer.and_then(|r| r.intersection(&mask.rect));
+                }
+                self.local_inner = None;
+            }
+            ClipSource::Rectangle(rect, mode) => {
+                // Once we encounter a clip-out, we just assume the worst
+                // case clip mask size, for now.
+                if mode == ClipMode::ClipOut {
+                    self.can_calculate_inner_rect = false;
+                    return
+                }
+
+                self.can_calculate_outer_rect = true;
+                self.local_outer = self.local_outer.and_then(|r| r.intersection(&rect));
+                self.local_inner = self.local_inner.and_then(|r| r.intersection(&rect));
+            }
+            ClipSource::RoundedRectangle(ref rect, ref radius, mode) => {
+                // Once we encounter a clip-out, we just assume the worst
+                // case clip mask size, for now.
+                if mode == ClipMode::ClipOut {
+                    self.can_calculate_inner_rect = false;
+                    return
+                }
+
+                self.can_calculate_outer_rect = true;
+                self.local_outer = self.local_outer.and_then(|r| r.intersection(rect));
+
+                let inner_rect = extract_inner_rect_safe(rect, radius);
+                self.local_inner = self.local_inner
+                    .and_then(|r| inner_rect.and_then(|ref inner| r.intersection(inner)));
+            }
+            ClipSource::BoxShadow(..) |
+            ClipSource::LineDecoration(..) => {
+                self.can_calculate_inner_rect = false;
+            }
+        }
+    }
+
+    fn finish(self) -> (LayoutRect, Option<LayoutRect>) {
+        (
+            if self.can_calculate_inner_rect {
+                self.local_inner.unwrap_or_else(LayoutRect::zero)
+            } else {
+                LayoutRect::zero()
+            },
+            if self.can_calculate_outer_rect {
+                Some(self.local_outer.unwrap_or_else(LayoutRect::zero))
+            } else {
+                None
+            },
+        )
+    }
+}
+
+
 #[derive(Debug)]
 pub struct ClipSources {
     pub clips: Vec<(ClipSource, GpuCacheHandle)>,
     pub local_inner_rect: LayoutRect,
     pub local_outer_rect: Option<LayoutRect>,
     pub only_rectangular_clips: bool,
     pub has_image_or_line_decoration_clip: bool,
     pub spatial_node_index: SpatialNodeIndex,
 }
 
 impl ClipSources {
-    pub fn new(
-        clips: Vec<ClipSource>,
-        spatial_node_index: SpatialNodeIndex,
-    ) -> Self {
-        let (local_inner_rect, local_outer_rect) = Self::calculate_inner_and_outer_rects(&clips);
+    pub fn new<I>(clip_iter: I, spatial_node_index: SpatialNodeIndex) -> Self
+    where
+        I: IntoIterator<Item = ClipSource>,
+    {
+        let mut clips = Vec::new();
+        let mut bounds_accum = BoundsAccumulator::new();
+        let mut has_image_or_line_decoration_clip = false;
+        let mut only_rectangular_clips = true;
 
-        let has_image_or_line_decoration_clip =
-            clips.iter().any(|clip| clip.is_image_or_line_decoration_clip());
-        let only_rectangular_clips =
-            !has_image_or_line_decoration_clip && clips.iter().all(|clip| clip.is_rect());
-        let clips = clips
-            .into_iter()
-            .map(|clip| (clip, GpuCacheHandle::new()))
-            .collect();
+        for clip in clip_iter {
+            bounds_accum.add(&clip);
+            has_image_or_line_decoration_clip |= clip.is_image_or_line_decoration_clip();
+            only_rectangular_clips &= clip.is_rect();
+            clips.push((clip, GpuCacheHandle::new()));
+        }
+
+        only_rectangular_clips &= !has_image_or_line_decoration_clip;
+        let (local_inner_rect, local_outer_rect) = bounds_accum.finish();
 
         ClipSources {
             clips,
             local_inner_rect,
             local_outer_rect,
             only_rectangular_clips,
             has_image_or_line_decoration_clip,
             spatial_node_index,
         }
     }
 
-    pub fn from_region(
-        region: ClipRegion,
+    pub fn from_region<I>(
+        region: ClipRegion<I>,
         spatial_node_index: SpatialNodeIndex,
-    ) -> ClipSources {
-        let mut clips = Vec::new();
-
-        if let Some(info) = region.image_mask {
-            clips.push(ClipSource::Image(info));
-        }
-
-        clips.push(ClipSource::Rectangle(region.main, ClipMode::Clip));
-
-        for complex in region.complex_clips {
-            clips.push(ClipSource::new_rounded_rect(
+    ) -> ClipSources
+    where
+        I: IntoIterator<Item = ComplexClipRegion>
+    {
+        let clip_rect = iter::once(ClipSource::Rectangle(region.main, ClipMode::Clip));
+        let clip_image = region.image_mask.map(ClipSource::Image);
+        let clips_complex = region.complex_clips
+            .into_iter()
+            .map(|complex| ClipSource::new_rounded_rect(
                 complex.rect,
                 complex.radii,
                 complex.mode,
             ));
-        }
 
-        ClipSources::new(clips, spatial_node_index)
+        let clips_all = clip_rect.chain(clip_image).chain(clips_complex);
+        ClipSources::new(clips_all, spatial_node_index)
     }
 
     pub fn clips(&self) -> &[(ClipSource, GpuCacheHandle)] {
         &self.clips
     }
 
-    fn calculate_inner_and_outer_rects(clips: &Vec<ClipSource>) -> (LayoutRect, Option<LayoutRect>) {
-        if clips.is_empty() {
-            return (LayoutRect::zero(), None);
-        }
-
-        // Depending on the complexity of the clip, we may either know the outer and/or inner
-        // rect, or neither or these.  In the case of a clip-out, we currently set the mask bounds
-        // to be unknown. This is conservative, but ensures correctness. In the future we can make
-        // this a lot more clever with some proper region handling.
-        let mut local_outer = Some(LayoutRect::max_rect());
-        let mut local_inner = local_outer;
-        let mut can_calculate_inner_rect = true;
-        let mut can_calculate_outer_rect = false;
-        for source in clips {
-            match *source {
-                ClipSource::Image(ref mask) => {
-                    if !mask.repeat {
-                        can_calculate_outer_rect = true;
-                        local_outer = local_outer.and_then(|r| r.intersection(&mask.rect));
-                    }
-                    local_inner = None;
-                }
-                ClipSource::Rectangle(rect, mode) => {
-                    // Once we encounter a clip-out, we just assume the worst
-                    // case clip mask size, for now.
-                    if mode == ClipMode::ClipOut {
-                        can_calculate_inner_rect = false;
-                        break;
-                    }
-
-                    can_calculate_outer_rect = true;
-                    local_outer = local_outer.and_then(|r| r.intersection(&rect));
-                    local_inner = local_inner.and_then(|r| r.intersection(&rect));
-                }
-                ClipSource::RoundedRectangle(ref rect, ref radius, mode) => {
-                    // Once we encounter a clip-out, we just assume the worst
-                    // case clip mask size, for now.
-                    if mode == ClipMode::ClipOut {
-                        can_calculate_inner_rect = false;
-                        break;
-                    }
-
-                    can_calculate_outer_rect = true;
-                    local_outer = local_outer.and_then(|r| r.intersection(rect));
-
-                    let inner_rect = extract_inner_rect_safe(rect, radius);
-                    local_inner = local_inner
-                        .and_then(|r| inner_rect.and_then(|ref inner| r.intersection(inner)));
-                }
-                ClipSource::BoxShadow(..) |
-                ClipSource::LineDecoration(..) => {
-                    can_calculate_inner_rect = false;
-                    break;
-                }
-            }
-        }
-
-        let outer = if can_calculate_outer_rect {
-            Some(local_outer.unwrap_or_else(LayoutRect::zero))
-        } else {
-            None
-        };
-
-        let inner = if can_calculate_inner_rect {
-            local_inner.unwrap_or_else(LayoutRect::zero)
-        } else {
-            LayoutRect::zero()
-        };
-
-        (inner, outer)
-    }
-
     pub fn update(
         &mut self,
         gpu_cache: &mut GpuCache,
         resource_cache: &mut ResourceCache,
         device_pixel_scale: DevicePixelScale,
     ) {
         for &mut (ref mut source, ref mut handle) in &mut self.clips {
             if let Some(mut request) = gpu_cache.request(handle) {
@@ -518,17 +566,18 @@ impl ClipSources {
     ) -> (DeviceIntRect, Option<DeviceIntRect>) {
         // If this translation isn't axis aligned or has a perspective component, don't try to
         // calculate the inner rectangle. The rectangle that we produce would include potentially
         // clipped screen area.
         // TODO(mrobinson): We should eventually try to calculate an inner region or some inner
         // rectangle so that we can do screen inner rectangle optimizations for these kind of
         // cilps.
         let can_calculate_inner_rect =
-            transform.preserves_2d_axis_alignment() && !transform.has_perspective_component();
+            transform.kind() == TransformedRectKind::AxisAligned &&
+            !transform.has_perspective_component();
         let screen_inner_rect = if can_calculate_inner_rect {
             calculate_screen_bounding_rect(transform, &self.local_inner_rect, device_pixel_scale, screen_rect)
                 .unwrap_or(DeviceIntRect::zero())
         } else {
             DeviceIntRect::zero()
         };
 
         let screen_outer_rect = self.local_outer_rect.map(|outer_rect|
--- a/gfx/webrender/src/clip_node.rs
+++ b/gfx/webrender/src/clip_node.rs
@@ -32,17 +32,17 @@ impl ClipNode {
         &mut self,
         device_pixel_scale: DevicePixelScale,
         clip_store: &mut ClipStore,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         clip_chains: &mut [ClipChain],
         spatial_nodes: &[SpatialNode],
     ) {
-        let clip_sources = clip_store.get_mut(self.clip_sources_index);
+        let clip_sources = &mut clip_store[self.clip_sources_index];
         clip_sources.update(gpu_cache, resource_cache, device_pixel_scale);
         let spatial_node = &spatial_nodes[clip_sources.spatial_node_index.0];
 
         let (screen_inner_rect, screen_outer_rect) = clip_sources.get_screen_bounds(
             &spatial_node.world_content_transform,
             device_pixel_scale,
             None,
         );
@@ -55,20 +55,18 @@ impl ClipNode {
         let local_outer_rect = clip_sources.local_outer_rect
             .expect("Clipping node didn't have outer rect.");
 
         let new_node = ClipChainNode {
             work_item: ClipWorkItem {
                 clip_sources_index: self.clip_sources_index,
                 coordinate_system_id: spatial_node.coordinate_system_id,
             },
-            local_clip_rect: spatial_node
-                .coordinate_system_relative_transform
-                .transform_rect(&local_outer_rect)
-                .expect("clip node transform is not valid"),
+            local_clip_rect: local_outer_rect
+                .translate(&spatial_node.coordinate_system_relative_offset),
             screen_outer_rect,
             screen_inner_rect,
             prev: None,
         };
 
         let mut clip_chain =
             clip_chains[self.parent_clip_chain_index.0]
             .new_with_added_node(&new_node);
--- a/gfx/webrender/src/clip_scroll_tree.rs
+++ b/gfx/webrender/src/clip_scroll_tree.rs
@@ -9,17 +9,17 @@ use clip::{ClipChain, ClipSourcesIndex, 
 use clip_node::ClipNode;
 use gpu_cache::GpuCache;
 use gpu_types::TransformPalette;
 use internal_types::{FastHashMap, FastHashSet};
 use print_tree::{PrintTree, PrintTreePrinter};
 use resource_cache::ResourceCache;
 use scene::SceneProperties;
 use spatial_node::{ScrollFrameInfo, SpatialNode, SpatialNodeType, StickyFrameInfo};
-use util::{LayoutFastTransform, LayoutToWorldFastTransform};
+use util::LayoutToWorldFastTransform;
 
 pub type ScrollStates = FastHashMap<ExternalScrollId, ScrollFrameInfo>;
 
 /// An id that identifies coordinate systems in the ClipScrollTree. Each
 /// coordinate system has an id and those ids will be shared when the coordinates
 /// system are the same or are in the same axis-aligned space. This allows
 /// for optimizing mask generation.
 #[derive(Debug, Copy, Clone, PartialEq)]
@@ -98,18 +98,18 @@ pub struct TransformUpdateState {
     pub nearest_scrolling_ancestor_viewport: LayoutRect,
 
     /// An id for keeping track of the axis-aligned space of this node. This is used in
     /// order to to track what kinds of clip optimizations can be done for a particular
     /// display list item, since optimizations can usually only be done among
     /// coordinate systems which are relatively axis aligned.
     pub current_coordinate_system_id: CoordinateSystemId,
 
-    /// Transform from the coordinate system that started this compatible coordinate system.
-    pub coordinate_system_relative_transform: LayoutFastTransform,
+    /// Offset from the coordinate system that started this compatible coordinate system.
+    pub coordinate_system_relative_offset: LayoutVector2D,
 
     /// True if this node is transformed by an invertible transform.  If not, display items
     /// transformed by this node will not be displayed and display items not transformed by this
     /// node will not be clipped by clips that are transformed by this node.
     pub invertible: bool,
 }
 
 impl ClipScrollTree {
@@ -237,17 +237,17 @@ impl ClipScrollTree {
 
         let root_reference_frame_index = self.root_reference_frame_index();
         let mut state = TransformUpdateState {
             parent_reference_frame_transform: LayoutVector2D::new(pan.x, pan.y).into(),
             parent_accumulated_scroll_offset: LayoutVector2D::zero(),
             nearest_scrolling_ancestor_offset: LayoutVector2D::zero(),
             nearest_scrolling_ancestor_viewport: LayoutRect::zero(),
             current_coordinate_system_id: CoordinateSystemId::root(),
-            coordinate_system_relative_transform: LayoutFastTransform::identity(),
+            coordinate_system_relative_offset: LayoutVector2D::zero(),
             invertible: true,
         };
 
         let mut next_coordinate_system_id = state.current_coordinate_system_id.next();
         self.update_node(
             root_reference_frame_index,
             &mut state,
             &mut next_coordinate_system_id,
--- a/gfx/webrender/src/device/query_gl.rs
+++ b/gfx/webrender/src/device/query_gl.rs
@@ -66,26 +66,28 @@ impl<T> QuerySet<T> {
 }
 
 pub struct GpuFrameProfile<T> {
     gl: Rc<gl::Gl>,
     timers: QuerySet<GpuTimer<T>>,
     samplers: QuerySet<GpuSampler<T>>,
     frame_id: FrameId,
     inside_frame: bool,
+    ext_debug_marker: bool
 }
 
 impl<T> GpuFrameProfile<T> {
-    fn new(gl: Rc<gl::Gl>) -> Self {
+    fn new(gl: Rc<gl::Gl>, ext_debug_marker: bool) -> Self {
         GpuFrameProfile {
             gl,
             timers: QuerySet::new(),
             samplers: QuerySet::new(),
             frame_id: FrameId::new(0),
             inside_frame: false,
+            ext_debug_marker
         }
     }
 
     fn enable_timers(&mut self, count: i32) {
         self.timers.set = self.gl.gen_queries(count);
     }
 
     fn disable_timers(&mut self) {
@@ -135,17 +137,17 @@ impl<T> GpuFrameProfile<T> {
         }
     }
 }
 
 impl<T: NamedTag> GpuFrameProfile<T> {
     fn start_timer(&mut self, tag: T) -> GpuTimeQuery {
         self.finish_timer();
 
-        let marker = GpuMarker::new(&self.gl, tag.get_label());
+        let marker = GpuMarker::new(&self.gl, tag.get_label(), self.ext_debug_marker);
 
         if let Some(query) = self.timers.add(GpuTimer { tag, time_ns: 0 }) {
             self.gl.begin_query(gl::TIME_ELAPSED, query);
         }
 
         GpuTimeQuery(marker)
     }
 
@@ -182,29 +184,31 @@ impl<T> Drop for GpuFrameProfile<T> {
         self.disable_samplers();
     }
 }
 
 pub struct GpuProfiler<T> {
     gl: Rc<gl::Gl>,
     frames: Vec<GpuFrameProfile<T>>,
     next_frame: usize,
+    ext_debug_marker: bool
 }
 
 impl<T> GpuProfiler<T> {
-    pub fn new(gl: Rc<gl::Gl>) -> Self {
+    pub fn new(gl: Rc<gl::Gl>, ext_debug_marker: bool) -> Self {
         const MAX_PROFILE_FRAMES: usize = 4;
         let frames = (0 .. MAX_PROFILE_FRAMES)
-            .map(|_| GpuFrameProfile::new(Rc::clone(&gl)))
+            .map(|_| GpuFrameProfile::new(Rc::clone(&gl), ext_debug_marker))
             .collect();
 
         GpuProfiler {
             gl,
             next_frame: 0,
             frames,
+            ext_debug_marker
         }
     }
 
     pub fn enable_timers(&mut self) {
         const MAX_TIMERS_PER_FRAME: i32 = 256;
 
         for frame in &mut self.frames {
             frame.enable_timers(MAX_TIMERS_PER_FRAME);
@@ -258,42 +262,51 @@ impl<T: NamedTag> GpuProfiler<T> {
         self.frames[self.next_frame].start_sampler(tag)
     }
 
     pub fn finish_sampler(&mut self, _sampler: GpuSampleQuery) {
         self.frames[self.next_frame].finish_sampler()
     }
 
     pub fn start_marker(&mut self, label: &str) -> GpuMarker {
-        GpuMarker::new(&self.gl, label)
+        GpuMarker::new(&self.gl, label, self.ext_debug_marker)
     }
 
     pub fn place_marker(&mut self, label: &str) {
-        GpuMarker::fire(&self.gl, label)
+        GpuMarker::fire(&self.gl, label, self.ext_debug_marker)
     }
 }
 
 #[must_use]
 pub struct GpuMarker {
-    gl: Rc<gl::Gl>,
+    gl: Option<Rc<gl::Gl>>
 }
 
 impl GpuMarker {
-    fn new(gl: &Rc<gl::Gl>, message: &str) -> Self {
-        gl.push_group_marker_ext(message);
-        GpuMarker { gl: Rc::clone(gl) }
+    fn new(gl: &Rc<gl::Gl>, message: &str, ext_debug_marker: bool) -> Self {
+        let gl = if ext_debug_marker {
+            gl.push_group_marker_ext(message);            
+            Some(Rc::clone(gl))
+        } else {
+            None
+        };
+        GpuMarker { gl }
     }
 
-    fn fire(gl: &Rc<gl::Gl>, message: &str) {
-        gl.insert_event_marker_ext(message);
+    fn fire(gl: &Rc<gl::Gl>, message: &str, ext_debug_marker: bool) {
+        if ext_debug_marker {
+            gl.insert_event_marker_ext(message);
+        }
     }
 }
 
 impl Drop for GpuMarker {
     fn drop(&mut self) {
-        self.gl.pop_group_marker_ext();
+        if let Some(ref gl) = self.gl {
+            gl.pop_group_marker_ext();
+        }
     }
 }
 
 #[must_use]
 pub struct GpuTimeQuery(GpuMarker);
 #[must_use]
 pub struct GpuSampleQuery;
--- a/gfx/webrender/src/display_list_flattener.rs
+++ b/gfx/webrender/src/display_list_flattener.rs
@@ -219,32 +219,32 @@ impl<'a> DisplayListFlattener<'a> {
             flattener,
         )
     }
 
     fn get_complex_clips(
         &self,
         pipeline_id: PipelineId,
         complex_clips: ItemRange<ComplexClipRegion>,
-    ) -> Vec<ComplexClipRegion> {
-        if complex_clips.is_empty() {
-            return vec![];
-        }
-        self.scene.get_display_list_for_pipeline(pipeline_id).get(complex_clips).collect()
+    ) -> impl 'a + Iterator<Item = ComplexClipRegion> {
+        //Note: we could make this a bit more complex to early out
+        // on `complex_clips.is_empty()` if it's worth it
+        self.scene
+            .get_display_list_for_pipeline(pipeline_id)
+            .get(complex_clips)
     }
 
     fn get_clip_chain_items(
         &self,
         pipeline_id: PipelineId,
         items: ItemRange<ClipId>,
-    ) -> Vec<ClipId> {
-        if items.is_empty() {
-            return vec![];
-        }
-        self.scene.get_display_list_for_pipeline(pipeline_id).get(items).collect()
+    ) -> impl 'a + Iterator<Item = ClipId> {
+        self.scene
+            .get_display_list_for_pipeline(pipeline_id)
+            .get(items)
     }
 
     fn flatten_root(&mut self, pipeline: &'a ScenePipeline, frame_size: &LayoutSize) {
         let pipeline_id = pipeline.pipeline_id;
         let reference_frame_info = self.simple_scroll_and_clip_chain(
             &ClipId::root_reference_frame(pipeline_id),
         );
 
@@ -680,18 +680,17 @@ impl<'a> DisplayListFlattener<'a> {
                     complex_clips,
                     info.image_mask,
                     &reference_frame_relative_offset,
                 );
                 self.add_clip_node(info.id, clip_and_scroll_ids.scroll_node_id, clip_region);
             }
             SpecificDisplayItem::ClipChain(ref info) => {
                 let items = self.get_clip_chain_items(pipeline_id, item.clip_chain_items())
-                    .iter()
-                    .map(|id| self.id_to_index_mapper.get_clip_node_index(*id))
+                    .map(|id| self.id_to_index_mapper.get_clip_node_index(id))
                     .collect();
                 let parent = match info.parent {
                     Some(id) => Some(
                         self.id_to_index_mapper.get_clip_chain_index(&ClipId::ClipChain(id))
                     ),
                     None => self.pipeline_clip_chain_stack.last().cloned(),
                 };
                 let clip_chain_index =
@@ -1236,22 +1235,25 @@ impl<'a> DisplayListFlattener<'a> {
             Some(ExternalScrollId(0, pipeline_id)),
             pipeline_id,
             &LayoutRect::new(LayoutPoint::zero(), *viewport_size),
             content_size,
             ScrollSensitivity::ScriptAndInputEvents,
         );
     }
 
-    pub fn add_clip_node(
+    pub fn add_clip_node<I>(
         &mut self,
         new_node_id: ClipId,
         parent_id: ClipId,
-        clip_region: ClipRegion,
-    ) -> ClipChainIndex {
+        clip_region: ClipRegion<I>,
+    ) -> ClipChainIndex
+    where
+        I: IntoIterator<Item = ComplexClipRegion>
+    {
         let parent_clip_chain_index = self.id_to_index_mapper.get_clip_chain_index(&parent_id);
         let spatial_node = self.id_to_index_mapper.get_spatial_node_index(parent_id);
 
         let clip_sources = ClipSources::from_region(clip_region, spatial_node);
         let handle = self.clip_store.insert(clip_sources);
 
         let (node_index, clip_chain_index) = self.clip_scroll_tree.add_clip_node(
             parent_clip_chain_index,
--- a/gfx/webrender/src/frame_builder.rs
+++ b/gfx/webrender/src/frame_builder.rs
@@ -99,40 +99,40 @@ pub struct PictureContext<'a> {
 
 pub struct PictureState {
     pub tasks: Vec<RenderTaskId>,
     pub has_non_root_coord_system: bool,
     pub local_rect_changed: bool,
 }
 
 impl PictureState {
-    pub fn new() -> PictureState {
+    pub fn new() -> Self {
         PictureState {
             tasks: Vec::new(),
             has_non_root_coord_system: false,
             local_rect_changed: false,
         }
     }
 }
 
 pub struct PrimitiveRunContext<'a> {
     pub clip_chain: &'a ClipChain,
     pub scroll_node: &'a SpatialNode,
     pub spatial_node_index: SpatialNodeIndex,
-    pub transform: Transform,
+    pub transform: Transform<'a>,
     pub local_clip_rect: LayoutRect,
 }
 
 impl<'a> PrimitiveRunContext<'a> {
     pub fn new(
         clip_chain: &'a ClipChain,
         scroll_node: &'a SpatialNode,
         spatial_node_index: SpatialNodeIndex,
         local_clip_rect: LayoutRect,
-        transform: Transform,
+        transform: Transform<'a>,
     ) -> Self {
         PrimitiveRunContext {
             clip_chain,
             scroll_node,
             local_clip_rect,
             spatial_node_index,
             transform,
         }
--- a/gfx/webrender/src/gpu_types.rs
+++ b/gfx/webrender/src/gpu_types.rs
@@ -3,17 +3,17 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{DevicePoint, DeviceSize, DeviceRect, LayoutRect, LayoutToWorldTransform};
 use api::{PremultipliedColorF, WorldToLayoutTransform};
 use clip_scroll_tree::SpatialNodeIndex;
 use gpu_cache::{GpuCacheAddress, GpuDataRequest};
 use prim_store::{EdgeAaSegmentMask, Transform};
 use render_task::RenderTaskAddress;
-use util::{MatrixHelpers, TransformedRectKind};
+use util::{LayoutToWorldFastTransform, TransformedRectKind};
 
 // Contains type that must exactly match the same structures declared in GLSL.
 
 #[derive(Copy, Clone, Debug)]
 #[repr(C)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct ZBufferId(i32);
@@ -369,98 +369,119 @@ impl TransformPaletteId {
 }
 
 // The GPU data payload for a transform palette entry.
 #[derive(Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 #[repr(C)]
 pub struct TransformData {
-    pub transform: LayoutToWorldTransform,
-    pub inv_transform: WorldToLayoutTransform,
+    transform: LayoutToWorldTransform,
+    inv_transform: WorldToLayoutTransform,
 }
 
 impl TransformData {
-    pub fn invalid() -> Self {
+    fn invalid() -> Self {
         TransformData {
             transform: LayoutToWorldTransform::identity(),
             inv_transform: WorldToLayoutTransform::identity(),
         }
     }
 }
 
 // Extra data stored about each transform palette entry.
 pub struct TransformMetadata {
-    pub transform_kind: TransformedRectKind,
+    transform_kind: TransformedRectKind,
 }
 
 // Stores a contiguous list of TransformData structs, that
 // are ready for upload to the GPU.
 // TODO(gw): For now, this only stores the complete local
 //           to world transform for each spatial node. In
 //           the future, the transform palette will support
 //           specifying a coordinate system that the transform
 //           should be relative to.
 pub struct TransformPalette {
     pub transforms: Vec<TransformData>,
     metadata: Vec<TransformMetadata>,
 }
 
 impl TransformPalette {
-    pub fn new(spatial_node_count: usize) -> TransformPalette {
+    pub fn new(spatial_node_count: usize) -> Self {
         TransformPalette {
             transforms: Vec::with_capacity(spatial_node_count),
             metadata: Vec::with_capacity(spatial_node_count),
         }
     }
 
-    // Set the local -> world transform for a given spatial
-    // node in the transform palette.
-    pub fn set(
-        &mut self,
-        index: SpatialNodeIndex,
-        data: TransformData,
-    ) {
-        let index = index.0 as usize;
-
+    #[inline]
+    fn grow(&mut self, index: SpatialNodeIndex) {
         // Pad the vectors out if they are not long enough to
         // account for this index. This can occur, for instance,
         // when we stop recursing down the CST due to encountering
         // a node with an invalid transform.
-        while index >= self.transforms.len() {
+        while self.transforms.len() <= index.0 as usize {
             self.transforms.push(TransformData::invalid());
             self.metadata.push(TransformMetadata {
                 transform_kind: TransformedRectKind::AxisAligned,
             });
         }
+    }
 
-        // Store the transform itself, along with metadata about it.
-        self.metadata[index] = TransformMetadata {
-            transform_kind: data.transform.transform_kind(),
+    pub fn invalidate(&mut self, index: SpatialNodeIndex) {
+        self.grow(index);
+        self.metadata[index.0 as usize] = TransformMetadata {
+            transform_kind: TransformedRectKind::AxisAligned,
         };
-        self.transforms[index] = data;
+        self.transforms[index.0 as usize] = TransformData::invalid();
+    }
+
+    // Set the local -> world transform for a given spatial
+    // node in the transform palette.
+    pub fn set(
+        &mut self, index: SpatialNodeIndex, fast_transform: &LayoutToWorldFastTransform,
+    ) -> bool {
+        self.grow(index);
+
+        match fast_transform.inverse() {
+            Some(inverted) => {
+                // Store the transform itself, along with metadata about it.
+                self.metadata[index.0 as usize] = TransformMetadata {
+                    transform_kind: fast_transform.kind()
+                };
+                // Write the data that will be made available to the GPU for this node.
+                self.transforms[index.0 as usize] = TransformData {
+                    transform: fast_transform.to_transform().into_owned(),
+                    inv_transform: inverted.to_transform().into_owned(),
+                };
+                true
+            }
+            None => {
+                self.invalidate(index);
+                false
+            }
+        }
     }
 
     // Get the relevant information about a given transform that is
     // used by the CPU code during culling and primitive prep pass.
     // TODO(gw): In the future, it will be possible to specify
     //           a coordinate system id here, to allow retrieving
     //           transforms in the local space of a given spatial node.
     pub fn get_transform(
         &self,
         index: SpatialNodeIndex,
     ) -> Transform {
-        let index = index.0;
-        let transform = &self.transforms[index];
-        let metadata = &self.metadata[index];
+        let data = &self.transforms[index.0 as usize];
+        let metadata = &self.metadata[index.0 as usize];
 
         Transform {
-            m: transform.transform,
+            m: &data.transform,
             transform_kind: metadata.transform_kind,
-            backface_is_visible: transform.transform.is_backface_visible(),
+            backface_is_visible: data.transform.is_backface_visible(),
         }
     }
 
     // Get a transform palette id for the given spatial node.
     // TODO(gw): In the future, it will be possible to specify
     //           a coordinate system id here, to allow retrieving
     //           transforms in the local space of a given spatial node.
     pub fn get_id(
--- a/gfx/webrender/src/hit_test.rs
+++ b/gfx/webrender/src/hit_test.rs
@@ -31,17 +31,17 @@ pub struct HitTestClipNode {
 
     /// A particular point must be inside all of these regions to be considered clipped in
     /// for the purposes of a hit test.
     regions: Vec<HitTestRegion>,
 }
 
 impl HitTestClipNode {
     fn new(node: &ClipNode, clip_store: &ClipStore) -> Self {
-        let clips = clip_store.get(node.clip_sources_index);
+        let clips = &clip_store[node.clip_sources_index];
         let regions = clips.clips().iter().map(|source| {
             match source.0 {
                 ClipSource::Rectangle(ref rect, mode) => HitTestRegion::Rectangle(*rect, mode),
                 ClipSource::RoundedRectangle(ref rect, ref radii, ref mode) =>
                     HitTestRegion::RoundedRectangle(*rect, *radii, *mode),
                 ClipSource::Image(ref mask) => HitTestRegion::Rectangle(mask.rect, ClipMode::Clip),
                 ClipSource::LineDecoration(_) |
                 ClipSource::BoxShadow(_) => {
--- a/gfx/webrender/src/prim_store.rs
+++ b/gfx/webrender/src/prim_store.rs
@@ -58,18 +58,18 @@ impl ScrollNodeAndClipChain {
     }
 }
 
 // This is CPU-side information about a transform, that is relevant
 // during culling and primitive prep pass. Often it is the same as
 // the information in the clip-scroll tree. However, if we decide
 // to rasterize a picture in local space, then this will be the
 // transform relative to that picture's coordinate system.
-pub struct Transform {
-    pub m: LayoutToWorldTransform,
+pub struct Transform<'a> {
+    pub m: &'a LayoutToWorldTransform,
     pub backface_is_visible: bool,
     pub transform_kind: TransformedRectKind,
 }
 
 #[derive(Debug)]
 pub struct PrimitiveRun {
     pub base_prim_index: PrimitiveIndex,
     pub count: usize,
@@ -1490,17 +1490,19 @@ impl PrimitiveStore {
                 ref mut handle,
                 ref mut task_info,
                 ..
             } = *source {
                 // TODO(gw): When drawing in screen raster mode, we should also incorporate a
                 //           scale factor from the world transform to get an appropriately
                 //           sized border task.
                 let world_scale = LayoutToWorldScale::new(1.0);
-                let scale = world_scale * frame_context.device_pixel_scale;
+                let mut scale = world_scale * frame_context.device_pixel_scale;
+                let max_scale = BorderRenderTaskInfo::get_max_scale(&border.radius);
+                scale.0 = scale.0.min(max_scale.0);
                 let scale_au = Au::from_f32_px(scale.0);
                 let needs_update = scale_au != cache_key.scale;
                 let mut new_segments = Vec::new();
 
                 if needs_update {
                     cache_key.scale = scale_au;
 
                     *task_info = BorderRenderTaskInfo::new(
@@ -1567,17 +1569,17 @@ impl PrimitiveStore {
         {
             metadata.prepared_frame_id = frame_state.render_tasks.frame_id();
         }
 
         match metadata.prim_kind {
             PrimitiveKind::TextRun => {
                 let text = &mut self.cpu_text_runs[metadata.cpu_prim_index.0];
                 // The transform only makes sense for screen space rasterization
-                let transform = prim_run_context.scroll_node.world_content_transform.into();
+                let transform = prim_run_context.scroll_node.world_content_transform.to_transform();
                 text.prepare_for_render(
                     frame_context.device_pixel_scale,
                     &transform,
                     pic_context.allow_subpixel_aa,
                     pic_context.display_list,
                     frame_state,
                 );
             }
@@ -2057,17 +2059,17 @@ impl PrimitiveStore {
         };
 
         // Segment the primitive on all the local-space clip sources that we can.
         for clip_item in clips {
             if clip_item.coordinate_system_id != prim_run_context.scroll_node.coordinate_system_id {
                 continue;
             }
 
-            let local_clips = frame_state.clip_store.get(clip_item.clip_sources_index);
+            let local_clips = &frame_state.clip_store[clip_item.clip_sources_index];
             rect_clips_only = rect_clips_only && local_clips.only_rectangular_clips;
 
             // TODO(gw): We can easily extend the segment builder to support these clip sources in
             // the future, but they are rarely used.
             // We must do this check here in case we continue early below.
             if local_clips.has_image_or_line_decoration_clip {
                 clip_mask_kind = BrushClipMaskKind::Global;
             }
@@ -2284,17 +2286,17 @@ impl PrimitiveStore {
                 prim_screen_rect, prim_run_context.clip_chain.combined_outer_screen_rect);
         }
 
         let prim_coordinate_system_id = prim_run_context.scroll_node.coordinate_system_id;
         let transform = &prim_run_context.scroll_node.world_content_transform;
         let extra_clip =  {
             let metadata = &self.cpu_metadata[prim_index.0];
             metadata.clip_sources_index.map(|clip_sources_index| {
-                let prim_clips = frame_state.clip_store.get_mut(clip_sources_index);
+                let prim_clips = &mut frame_state.clip_store[clip_sources_index];
                 prim_clips.update(
                     frame_state.gpu_cache,
                     frame_state.resource_cache,
                     frame_context.device_pixel_scale,
                 );
                 let (screen_inner_rect, screen_outer_rect) = prim_clips.get_screen_bounds(
                     transform,
                     frame_context.device_pixel_scale,
@@ -2959,18 +2961,18 @@ fn get_local_clip_rect_for_nodes(
                     Some(combined_rect) =>
                         combined_rect
                             .intersection(&node.local_clip_rect)
                             .unwrap_or_else(LayoutRect::zero),
                     None => node.local_clip_rect,
                 })
             }
         )
-        .and_then(|local_rect| {
-            scroll_node.coordinate_system_relative_transform.unapply(&local_rect)
+        .map(|local_rect| {
+            local_rect.translate(&-scroll_node.coordinate_system_relative_offset)
         })
 }
 
 impl<'a> GpuDataRequest<'a> {
     // Write the GPU cache data for an individual segment.
     fn write_segment(
         &mut self,
         local_rect: LayoutRect,
--- a/gfx/webrender/src/render_task.rs
+++ b/gfx/webrender/src/render_task.rs
@@ -398,17 +398,17 @@ impl RenderTask {
         // task cache. This allows the blurred box-shadow rect to be cached
         // in the texture cache across frames.
         // TODO(gw): Consider moving this logic outside this function, especially
         //           as we add more clip sources that depend on render tasks.
         // TODO(gw): If this ever shows up in a profile, we could pre-calculate
         //           whether a ClipSources contains any box-shadows and skip
         //           this iteration for the majority of cases.
         for clip_item in &clips {
-            let clip_sources = clip_store.get_mut(clip_item.clip_sources_index);
+            let clip_sources = &mut clip_store[clip_item.clip_sources_index];
             for &mut (ref mut clip, _) in &mut clip_sources.clips {
                 match *clip {
                     ClipSource::BoxShadow(ref mut info) => {
                         let (cache_size, cache_key) = info.cache_key
                             .as_ref()
                             .expect("bug: no cache key set")
                             .clone();
                         let blur_radius_dp = cache_key.blur_radius_dp as f32;
--- a/gfx/webrender/src/renderer.rs
+++ b/gfx/webrender/src/renderer.rs
@@ -1762,17 +1762,18 @@ impl Renderer {
                 enable_render_on_scroll,
             );
             backend.run(backend_profile_counters);
             if let Some(ref thread_listener) = *thread_listener_for_render_backend {
                 thread_listener.thread_stopped(&rb_thread_name);
             }
         })?;
 
-        let gpu_profile = GpuProfiler::new(Rc::clone(device.rc_gl()));
+        let ext_debug_marker = device.supports_extension("GL_EXT_debug_marker");
+        let gpu_profile = GpuProfiler::new(Rc::clone(device.rc_gl()), ext_debug_marker);
         #[cfg(feature = "capture")]
         let read_fbo = device.create_fbo_for_external_texture(0);
 
         let mut renderer = Renderer {
             result_rx,
             debug_server,
             device,
             active_documents: Vec::new(),
--- a/gfx/webrender/src/spatial_node.rs
+++ b/gfx/webrender/src/spatial_node.rs
@@ -3,19 +3,19 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{ExternalScrollId, LayoutPixel, LayoutPoint, LayoutRect, LayoutSize, LayoutTransform};
 use api::{LayoutVector2D, PipelineId, PropertyBinding, ScrollClamping, ScrollLocation};
 use api::{ScrollSensitivity, StickyOffsetBounds};
 use clip_scroll_tree::{CoordinateSystemId, SpatialNodeIndex, TransformUpdateState};
 use euclid::SideOffsets2D;
-use gpu_types::{TransformData, TransformPalette};
+use gpu_types::TransformPalette;
 use scene::SceneProperties;
-use util::{LayoutFastTransform, LayoutToWorldFastTransform, TransformedRectKind};
+use util::{LayoutFastTransform, LayoutToWorldFastTransform, MatrixHelpers, TransformedRectKind};
 
 #[derive(Clone, Debug)]
 pub enum SpatialNodeType {
     /// A special kind of node that adjusts its position based on the position
     /// of its parent node and a given set of sticky positioning offset bounds.
     /// Sticky positioned is described in the CSS Positioned Layout Module Level 3 here:
     /// https://www.w3.org/TR/css-position-3/#sticky-pos
     StickyFrame(StickyFrameInfo),
@@ -61,17 +61,17 @@ pub struct SpatialNode {
     pub invertible: bool,
 
     /// The axis-aligned coordinate system id of this node.
     pub coordinate_system_id: CoordinateSystemId,
 
     /// The transformation from the coordinate system which established our compatible coordinate
     /// system (same coordinate system id) and us. This can change via scroll offsets and via new
     /// reference frame transforms.
-    pub coordinate_system_relative_transform: LayoutFastTransform,
+    pub coordinate_system_relative_offset: LayoutVector2D,
 }
 
 impl SpatialNode {
     pub fn new(
         pipeline_id: PipelineId,
         parent_index: Option<SpatialNodeIndex>,
         node_type: SpatialNodeType,
     ) -> Self {
@@ -80,17 +80,17 @@ impl SpatialNode {
             world_content_transform: LayoutToWorldFastTransform::identity(),
             transform_kind: TransformedRectKind::AxisAligned,
             parent: parent_index,
             children: Vec::new(),
             pipeline_id,
             node_type,
             invertible: true,
             coordinate_system_id: CoordinateSystemId(0),
-            coordinate_system_relative_transform: LayoutFastTransform::identity(),
+            coordinate_system_relative_offset: LayoutVector2D::zero(),
         }
     }
 
     pub fn new_scroll_frame(
         pipeline_id: PipelineId,
         parent_index: SpatialNodeIndex,
         external_id: Option<ExternalScrollId>,
         frame_rect: &LayoutRect,
@@ -199,57 +199,38 @@ impl SpatialNode {
     }
 
     pub fn push_gpu_data(
         &mut self,
         transform_palette: &mut TransformPalette,
         node_index: SpatialNodeIndex,
     ) {
         if !self.invertible {
-            transform_palette.set(node_index, TransformData::invalid());
+            transform_palette.invalidate(node_index);
             return;
         }
 
-        let inv_transform = match self.world_content_transform.inverse() {
-            Some(inverted) => inverted.to_transform(),
-            None => {
-                transform_palette.set(node_index, TransformData::invalid());
-                return;
-            }
-        };
-
-        let data = TransformData {
-            transform: self.world_content_transform.into(),
-            inv_transform,
-        };
-
-        // Write the data that will be made available to the GPU for this node.
-        transform_palette.set(node_index, data);
+        transform_palette.set(node_index, &self.world_content_transform);
     }
 
     pub fn update(
         &mut self,
         state: &mut TransformUpdateState,
         next_coordinate_system_id: &mut CoordinateSystemId,
         scene_properties: &SceneProperties,
     ) {
         // If any of our parents was not rendered, we are not rendered either and can just
         // quit here.
         if !state.invertible {
             self.mark_uninvertible();
             return;
         }
 
         self.update_transform(state, next_coordinate_system_id, scene_properties);
-
-        self.transform_kind = if self.world_content_transform.preserves_2d_axis_alignment() {
-            TransformedRectKind::AxisAligned
-        } else {
-            TransformedRectKind::Complex
-        };
+        self.transform_kind = self.world_content_transform.kind();
 
         // If this node is a reference frame, we check if it has a non-invertible matrix.
         // For non-reference-frames we assume that they will produce only additional
         // translations which should be invertible.
         match self.node_type {
             SpatialNodeType::ReferenceFrame(info) if !info.invertible => {
                 self.mark_uninvertible();
                 return;
@@ -287,23 +268,26 @@ impl SpatialNode {
 
                 info.invertible = self.world_viewport_transform.is_invertible();
                 if !info.invertible {
                     return;
                 }
 
                 // Try to update our compatible coordinate system transform. If we cannot, start a new
                 // incompatible coordinate system.
-                match state.coordinate_system_relative_transform.update(relative_transform) {
-                    Some(offset) => self.coordinate_system_relative_transform = offset,
-                    None => {
-                        self.coordinate_system_relative_transform = LayoutFastTransform::identity();
-                        state.current_coordinate_system_id = *next_coordinate_system_id;
-                        next_coordinate_system_id.advance();
-                    }
+                if relative_transform.is_simple_2d_translation() {
+                    self.coordinate_system_relative_offset =
+                        state.coordinate_system_relative_offset +
+                        LayoutVector2D::new(relative_transform.m41, relative_transform.m42);
+                } else {
+                    // If we break 2D axis alignment or have a perspective component, we need to start a
+                    // new incompatible coordinate system with which we cannot share clips without masking.
+                    self.coordinate_system_relative_offset = LayoutVector2D::zero();
+                    state.current_coordinate_system_id = *next_coordinate_system_id;
+                    next_coordinate_system_id.advance();
                 }
 
                 self.coordinate_system_id = state.current_coordinate_system_id;
             }
             _ => {
                 // We calculate this here to avoid a double-borrow later.
                 let sticky_offset = self.calculate_sticky_offset(
                     &state.nearest_scrolling_ancestor_offset,
@@ -325,18 +309,18 @@ impl SpatialNode {
                 let scroll_offset = self.scroll_offset();
                 self.world_content_transform = if scroll_offset != LayoutVector2D::zero() {
                     self.world_viewport_transform.pre_translate(&scroll_offset)
                 } else {
                     self.world_viewport_transform
                 };
 
                 let added_offset = state.parent_accumulated_scroll_offset + sticky_offset + scroll_offset;
-                self.coordinate_system_relative_transform =
-                    state.coordinate_system_relative_transform.offset(added_offset);
+                self.coordinate_system_relative_offset =
+                    state.coordinate_system_relative_offset + added_offset;
 
                 if let SpatialNodeType::StickyFrame(ref mut info) = self.node_type {
                     info.current_offset = sticky_offset;
                 }
 
                 self.coordinate_system_id = state.current_coordinate_system_id;
             }
         }
@@ -473,18 +457,17 @@ impl SpatialNode {
                 state.parent_accumulated_scroll_offset =
                     scrolling.offset + state.parent_accumulated_scroll_offset;
                 state.nearest_scrolling_ancestor_offset = scrolling.offset;
                 state.nearest_scrolling_ancestor_viewport = scrolling.viewport_rect;
             }
             SpatialNodeType::ReferenceFrame(ref info) => {
                 state.parent_reference_frame_transform = self.world_viewport_transform;
                 state.parent_accumulated_scroll_offset = LayoutVector2D::zero();
-                state.coordinate_system_relative_transform =
-                    self.coordinate_system_relative_transform.clone();
+                state.coordinate_system_relative_offset = self.coordinate_system_relative_offset;
                 let translation = -info.origin_in_parent_reference_frame;
                 state.nearest_scrolling_ancestor_viewport =
                     state.nearest_scrolling_ancestor_viewport
                        .translate(&translation);
             }
         }
     }
 
--- a/gfx/webrender/src/util.rs
+++ b/gfx/webrender/src/util.rs
@@ -6,16 +6,18 @@ use api::{BorderRadius, DeviceIntPoint, 
 use api::{DevicePoint, DeviceRect, DeviceSize, LayoutPixel, LayoutPoint, LayoutRect, LayoutSize};
 use api::{WorldPixel, WorldPoint, WorldRect};
 use euclid::{Point2D, Rect, Size2D, TypedPoint2D, TypedRect, TypedSize2D};
 use euclid::{TypedTransform2D, TypedTransform3D, TypedVector2D, TypedVector3D};
 use euclid::{HomogeneousVector};
 use num_traits::Zero;
 use plane_split::{Clipper, Plane, Polygon};
 use std::{i32, f32};
+use std::borrow::Cow;
+
 
 // Matches the definition of SK_ScalarNearlyZero in Skia.
 const NEARLY_ZERO: f32 = 1.0 / 4096.0;
 
 // TODO: Implement these in euclid!
 pub trait MatrixHelpers<Src, Dst> {
     fn preserves_2d_axis_alignment(&self) -> bool;
     fn has_perspective_component(&self) -> bool;
@@ -481,21 +483,30 @@ impl<Src, Dst> FastTransform<Src, Dst> {
         if transform.is_simple_2d_translation() {
             return FastTransform::Offset(TypedVector2D::new(transform.m41, transform.m42));
         }
         let inverse = transform.inverse();
         let is_2d = transform.is_2d();
         FastTransform::Transform { transform, inverse, is_2d}
     }
 
-    pub fn to_transform(&self) -> TypedTransform3D<f32, Src, Dst> {
+    pub fn kind(&self) -> TransformedRectKind {
         match *self {
-            FastTransform::Offset(offset) =>
-                TypedTransform3D::create_translation(offset.x, offset.y, 0.0),
-            FastTransform::Transform { transform, .. } => transform
+            FastTransform::Offset(_) => TransformedRectKind::AxisAligned,
+            FastTransform::Transform { ref transform, .. } if transform.preserves_2d_axis_alignment() => TransformedRectKind::AxisAligned,
+            FastTransform::Transform { .. } => TransformedRectKind::Complex,
+        }
+    }
+
+    pub fn to_transform(&self) -> Cow<TypedTransform3D<f32, Src, Dst>> {
+        match *self {
+            FastTransform::Offset(offset) => Cow::Owned(
+                TypedTransform3D::create_translation(offset.x, offset.y, 0.0)
+            ),
+            FastTransform::Transform { ref transform, .. } => Cow::Borrowed(transform),
         }
     }
 
     pub fn is_invertible(&self) -> bool {
         match *self {
             FastTransform::Offset(..) => true,
             FastTransform::Transform { ref inverse, .. } => inverse.is_some(),
         }
@@ -524,25 +535,16 @@ impl<Src, Dst> FastTransform<Src, Dst> {
             FastTransform::Offset(ref offset) =>
                 FastTransform::Offset(*offset + *other_offset),
             FastTransform::Transform { transform, .. } =>
                 FastTransform::with_transform(transform.pre_translate(other_offset.to_3d()))
         }
     }
 
     #[inline(always)]
-    pub fn preserves_2d_axis_alignment(&self) -> bool {
-        match *self {
-            FastTransform::Offset(..) => true,
-            FastTransform::Transform { ref transform, .. } =>
-                transform.preserves_2d_axis_alignment(),
-        }
-    }
-
-    #[inline(always)]
     pub fn has_perspective_component(&self) -> bool {
         match *self {
             FastTransform::Offset(..) => false,
             FastTransform::Transform { ref transform, .. } => transform.has_perspective_component(),
         }
     }
 
     #[inline(always)]
@@ -591,27 +593,16 @@ impl<Src, Dst> FastTransform<Src, Dst> {
             FastTransform::Transform { inverse: Some(ref inverse), is_2d: true, .. }  =>
                 inverse.transform_rect(rect),
             FastTransform::Transform { ref transform, is_2d: false, .. } =>
                 Some(transform.inverse_rect_footprint(rect)),
             FastTransform::Transform { inverse: None, .. }  => None,
         }
     }
 
-    #[inline(always)]
-    pub fn offset(&self, new_offset: TypedVector2D<f32, Src>) -> Self {
-        match *self {
-            FastTransform::Offset(offset) => FastTransform::Offset(offset + new_offset),
-            FastTransform::Transform { ref transform, .. } => {
-                let transform = transform.pre_translate(new_offset.to_3d());
-                FastTransform::with_transform(transform)
-            }
-        }
-    }
-
     pub fn post_translate(&self, new_offset: TypedVector2D<f32, Dst>) -> Self {
         match *self {
             FastTransform::Offset(offset) => {
                 let offset = offset.to_untyped() + new_offset.to_untyped();
                 FastTransform::Offset(TypedVector2D::from_untyped(&offset))
             }
             FastTransform::Transform { ref transform, .. } => {
                 let transform = transform.post_translate(new_offset.to_3d());
@@ -630,40 +621,24 @@ impl<Src, Dst> FastTransform<Src, Dst> {
                     transform: inverse,
                     inverse: Some(transform),
                     is_2d
                 }),
             FastTransform::Transform { inverse: None, .. } => None,
 
         }
     }
-
-    pub fn update(&self, transform: TypedTransform3D<f32, Src, Dst>) -> Option<Self> {
-        if transform.is_simple_2d_translation() {
-            Some(self.offset(TypedVector2D::new(transform.m41, transform.m42)))
-        } else {
-            // If we break 2D axis alignment or have a perspective component, we need to start a
-            // new incompatible coordinate system with which we cannot share clips without masking.
-            None
-        }
-    }
 }
 
 impl<Src, Dst> From<TypedTransform3D<f32, Src, Dst>> for FastTransform<Src, Dst> {
     fn from(transform: TypedTransform3D<f32, Src, Dst>) -> Self {
         FastTransform::with_transform(transform)
     }
 }
 
-impl<Src, Dst> Into<TypedTransform3D<f32, Src, Dst>> for FastTransform<Src, Dst> {
-    fn into(self) -> TypedTransform3D<f32, Src, Dst> {
-        self.to_transform()
-    }
-}
-
 impl<Src, Dst> From<TypedVector2D<f32, Src>> for FastTransform<Src, Dst> {
     fn from(vector: TypedVector2D<f32, Src>) -> Self {
         FastTransform::with_vector(vector)
     }
 }
 
 pub type LayoutFastTransform = FastTransform<LayoutPixel, LayoutPixel>;
 pub type LayoutToWorldFastTransform = FastTransform<LayoutPixel, WorldPixel>;
--- a/gfx/webrender_api/Cargo.toml
+++ b/gfx/webrender_api/Cargo.toml
@@ -7,22 +7,22 @@ repository = "https://github.com/servo/w
 
 [features]
 nightly = ["euclid/unstable", "serde/unstable"]
 ipc = ["ipc-channel"]
 serialize = []
 deserialize = []
 
 [dependencies]
-app_units = "0.6"
+app_units = "0.7"
 bincode = "1.0"
 bitflags = "1.0"
 byteorder = "1.2.1"
 ipc-channel = {version = "0.10.0", optional = true}
-euclid = { version = "0.18", features = ["serde"] }
+euclid = { version = "0.19", features = ["serde"] }
 serde = { version = "=1.0.66", features = ["rc"] }
 serde_derive = { version = "=1.0.66", features = ["deserialize_in_place"] }
 serde_bytes = "0.10"
 time = "0.1"
 
 [target.'cfg(target_os = "macos")'.dependencies]
 core-foundation = "0.6"
 core-graphics = "0.16"
--- a/gfx/webrender_api/src/display_list.rs
+++ b/gfx/webrender_api/src/display_list.rs
@@ -118,17 +118,17 @@ pub struct AuxIter<'a, T> {
     data: &'a [u8],
     size: usize,
     _boo: PhantomData<T>,
 }
 
 impl BuiltDisplayListDescriptor {}
 
 impl BuiltDisplayList {
-    pub fn from_data(data: Vec<u8>, descriptor: BuiltDisplayListDescriptor) -> BuiltDisplayList {
+    pub fn from_data(data: Vec<u8>, descriptor: BuiltDisplayListDescriptor) -> Self {
         BuiltDisplayList { data, descriptor }
     }
 
     pub fn into_data(mut self) -> (Vec<u8>, BuiltDisplayListDescriptor) {
         self.descriptor.send_start_time = precise_time_ns();
         (self.data, self.descriptor)
     }
 
--- a/gfx/webrender_bindings/Cargo.toml
+++ b/gfx/webrender_bindings/Cargo.toml
@@ -2,18 +2,18 @@
 name = "webrender_bindings"
 version = "0.1.0"
 authors = ["The Mozilla Project Developers"]
 license = "MPL-2.0"
 
 [dependencies]
 rayon = "1"
 thread_profiler = "0.1.1"
-euclid = { version = "0.18", features = ["serde"] }
-app_units = "0.6"
+euclid = { version = "0.19", features = ["serde"] }
+app_units = "0.7"
 gleam = "0.6"
 log = "0.4"
 nsstring = { path = "../../servo/support/gecko/nsstring" }
 bincode = "1.0"
 uuid = {version = "0.1.18"}
 fxhash = "0.2.1"
 
 [dependencies.webrender]
--- a/gfx/webrender_bindings/revision.txt
+++ b/gfx/webrender_bindings/revision.txt
@@ -1,1 +1,1 @@
-8a4fe66528aa362721e4048aac3cd5abf7faaf2c
+7a1b919e37d6cd0155077aa90f98cfcdf9fa5bae
--- a/gfx/wrench/Cargo.toml
+++ b/gfx/wrench/Cargo.toml
@@ -5,20 +5,20 @@ authors = ["Vladimir Vukicevic <vladimir
 build = "build.rs"
 license = "MPL-2.0"
 
 [dependencies]
 base64 = "0.6"
 bincode = "1.0"
 byteorder = "1.0"
 env_logger = { version = "0.5", optional = true }
-euclid = "0.18"
+euclid = "0.19"
 gleam = "0.6"
 glutin = "0.17"
-app_units = "0.6"
+app_units = "0.7"
 image = "0.19"
 clap = { version = "2", features = ["yaml"] }
 lazy_static = "1"
 log = "0.4"
 yaml-rust = { git = "https://github.com/vvuk/yaml-rust", features = ["preserve_order"] }
 serde_json = "1.0"
 ron = "0.1.5"
 time = "0.1"