Bug 1422317 - Update webrender to commit b7714b1d4348c00682b5643ea0e3f0b15adaeda5. r=jrmuizel
authorKartikaya Gupta <kgupta@mozilla.com>
Tue, 05 Dec 2017 11:51:33 -0500
changeset 395161 52277221fade40cf523011786476c7ab2c551ff1
parent 395160 3abc6abd34bf81eb3bd29f21630cbefa2f43947b
child 395162 483371920adbd4a5e4597071636ca7f9cdb431de
push id98021
push usernbeleuzu@mozilla.com
push dateTue, 05 Dec 2017 23:49:57 +0000
treeherdermozilla-inbound@78a9a1962684 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjrmuizel
bugs1422317
milestone59.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1422317 - Update webrender to commit b7714b1d4348c00682b5643ea0e3f0b15adaeda5. r=jrmuizel MozReview-Commit-ID: BuZtMenOwqd
gfx/doc/README.webrender
gfx/webrender/Cargo.toml
gfx/webrender/res/brush.glsl
gfx/webrender/res/brush_mask_corner.glsl
gfx/webrender/res/brush_mask_rounded_rect.glsl
gfx/webrender/res/brush_solid.glsl
gfx/webrender/res/clip_shared.glsl
gfx/webrender/res/cs_clip_image.glsl
gfx/webrender/res/cs_clip_rectangle.glsl
gfx/webrender/res/ellipse.glsl
gfx/webrender/res/prim_shared.glsl
gfx/webrender/res/ps_border_corner.glsl
gfx/webrender/res/ps_rectangle.glsl
gfx/webrender/src/border.rs
gfx/webrender/src/box_shadow.rs
gfx/webrender/src/clip.rs
gfx/webrender/src/clip_scroll_node.rs
gfx/webrender/src/clip_scroll_tree.rs
gfx/webrender/src/device.rs
gfx/webrender/src/frame.rs
gfx/webrender/src/frame_builder.rs
gfx/webrender/src/glyph_rasterizer.rs
gfx/webrender/src/gpu_types.rs
gfx/webrender/src/internal_types.rs
gfx/webrender/src/lib.rs
gfx/webrender/src/picture.rs
gfx/webrender/src/platform/macos/font.rs
gfx/webrender/src/platform/unix/font.rs
gfx/webrender/src/platform/windows/font.rs
gfx/webrender/src/prim_store.rs
gfx/webrender/src/render_task.rs
gfx/webrender/src/renderer.rs
gfx/webrender/src/tiling.rs
gfx/webrender/src/util.rs
gfx/webrender/tests/angle_shader_validation.rs
--- a/gfx/doc/README.webrender
+++ b/gfx/doc/README.webrender
@@ -170,9 +170,9 @@ 2. Sometimes autoland tip has changed en
    has an env var you can set to do this). In theory you can get the same
    result by resolving the conflict manually but Cargo.lock files are usually not
    trivial to merge by hand. If it's just the third_party/rust dir that has conflicts
    you can delete it and run |mach vendor rust| again to repopulate it.
 
 -------------------------------------------------------------------------------
 
 The version of WebRender currently in the tree is:
-e3dd85359580074f4ca4a554d9a3c85779f8de64
+b7714b1d4348c00682b5643ea0e3f0b15adaeda5
--- a/gfx/webrender/Cargo.toml
+++ b/gfx/webrender/Cargo.toml
@@ -14,25 +14,26 @@ debugger = ["ws", "serde_json", "serde",
 
 [dependencies]
 app_units = "0.5.6"
 bincode = "0.9"
 byteorder = "1.0"
 euclid = "0.15.5"
 fxhash = "0.2.1"
 gleam = "0.4.15"
-lazy_static = "0.2"
+lazy_static = "1"
 log = "0.3"
 num-traits = "0.1.32"
 time = "0.1"
 rayon = "0.8"
 webrender_api = {path = "../webrender_api"}
 bitflags = "1.0"
 thread_profiler = "0.1.1"
 plane-split = "0.6"
+smallvec = "0.5"
 ws = { optional = true, version = "0.7.3" }
 serde_json = { optional = true, version = "1.0" }
 serde = { optional = true, version = "1.0" }
 serde_derive = { optional = true, version = "1.0" }
 image = { optional = true, version = "0.17" }
 base64 = { optional = true, version = "0.3.0" }
 
 [dev-dependencies]
--- a/gfx/webrender/res/brush.glsl
+++ b/gfx/webrender/res/brush.glsl
@@ -9,99 +9,196 @@ void brush_vs(
     vec2 local_pos,
     RectWithSize local_rect,
     ivec2 user_data
 );
 
 #define RASTERIZATION_MODE_LOCAL_SPACE      0.0
 #define RASTERIZATION_MODE_SCREEN_SPACE     1.0
 
+#define SEGMENT_ALL             0
+#define SEGMENT_TOP_LEFT        1
+#define SEGMENT_TOP_RIGHT       2
+#define SEGMENT_BOTTOM_RIGHT    3
+#define SEGMENT_BOTTOM_LEFT     4
+#define SEGMENT_TOP_MID         5
+#define SEGMENT_MID_RIGHT       6
+#define SEGMENT_BOTTOM_MID      7
+#define SEGMENT_MID_LEFT        8
+#define SEGMENT_CENTER          9
+
+#define AA_KIND_DEFAULT         0
+#define AA_KIND_SEGMENT         1
+
+#define VECS_PER_BRUSH_PRIM                 4
+
 struct BrushInstance {
     int picture_address;
     int prim_address;
     int clip_node_id;
     int scroll_node_id;
     int clip_address;
     int z;
-    int flags;
+    int segment_kind;
     ivec2 user_data;
 };
 
 BrushInstance load_brush() {
 	BrushInstance bi;
 
     bi.picture_address = aData0.x;
     bi.prim_address = aData0.y;
     bi.clip_node_id = aData0.z / 65536;
     bi.scroll_node_id = aData0.z % 65536;
     bi.clip_address = aData0.w;
     bi.z = aData1.x;
-    bi.flags = aData1.y;
+    bi.segment_kind = aData1.y;
     bi.user_data = aData1.zw;
 
     return bi;
 }
 
+struct BrushPrimitive {
+    RectWithSize local_rect;
+    RectWithSize local_clip_rect;
+    vec4 offsets;
+    int aa_kind;
+};
+
+BrushPrimitive fetch_brush_primitive(int address) {
+    vec4 data[4] = fetch_from_resource_cache_4(address);
+
+    BrushPrimitive prim = BrushPrimitive(
+        RectWithSize(data[0].xy, data[0].zw),
+        RectWithSize(data[1].xy, data[1].zw),
+        data[2],
+        int(data[3].x)
+    );
+
+    return prim;
+}
+
 void main(void) {
     // Load the brush instance from vertex attributes.
     BrushInstance brush = load_brush();
 
     // Load the geometry for this brush. For now, this is simply the
     // local rect of the primitive. In the future, this will support
     // loading segment rects, and other rect formats (glyphs).
-    PrimitiveGeometry geom = fetch_primitive_geometry(brush.prim_address);
+    BrushPrimitive brush_prim = fetch_brush_primitive(brush.prim_address);
+
+    // Fetch the segment of this brush primitive we are drawing.
+    RectWithSize local_segment_rect;
+    vec4 edge_aa_segment_mask;
+
+    // p0 = origin of outer rect
+    // p1 = origin of inner rect
+    // p2 = bottom right corner of inner rect
+    // p3 = bottom right corner of outer rect
+    vec2 p0 = brush_prim.local_rect.p0;
+    vec2 p1 = brush_prim.local_rect.p0 + brush_prim.offsets.xy;
+    vec2 p2 = brush_prim.local_rect.p0 + brush_prim.local_rect.size - brush_prim.offsets.zw;
+    vec2 p3 = brush_prim.local_rect.p0 + brush_prim.local_rect.size;
+
+    switch (brush.segment_kind) {
+        case SEGMENT_ALL:
+            local_segment_rect = brush_prim.local_rect;
+            break;
+
+        case SEGMENT_TOP_LEFT:
+            local_segment_rect = RectWithSize(p0, p1 - p0);
+            break;
+        case SEGMENT_TOP_RIGHT:
+            local_segment_rect = RectWithSize(vec2(p2.x, p0.y), vec2(p3.x - p2.x, p1.y - p0.y));
+            break;
+        case SEGMENT_BOTTOM_RIGHT:
+            local_segment_rect = RectWithSize(vec2(p2.x, p2.y), vec2(p3.x - p2.x, p3.y - p2.y));
+            break;
+        case SEGMENT_BOTTOM_LEFT:
+            local_segment_rect = RectWithSize(vec2(p0.x, p2.y), vec2(p1.x - p0.x, p3.y - p2.y));
+            break;
+
+        case SEGMENT_TOP_MID:
+            local_segment_rect = RectWithSize(vec2(p1.x, p0.y), vec2(p2.x - p1.x, p1.y - p0.y));
+            break;
+        case SEGMENT_MID_RIGHT:
+            local_segment_rect = RectWithSize(vec2(p2.x, p1.y), vec2(p3.x - p2.x, p2.y - p1.y));
+            break;
+        case SEGMENT_BOTTOM_MID:
+            local_segment_rect = RectWithSize(vec2(p1.x, p2.y), vec2(p2.x - p1.x, p3.y - p2.y));
+            break;
+        case SEGMENT_MID_LEFT:
+            local_segment_rect = RectWithSize(vec2(p0.x, p1.y), vec2(p1.x - p0.x, p2.y - p1.y));
+            break;
+
+        case SEGMENT_CENTER:
+            local_segment_rect = RectWithSize(p1, p2 - p1);
+            break;
+
+        default:
+            local_segment_rect = RectWithSize(vec2(0.0), vec2(0.0));
+            break;
+    }
+
+    switch (brush_prim.aa_kind) {
+        case AA_KIND_SEGMENT:
+            // TODO: select these correctly based on the segment kind.
+            edge_aa_segment_mask = vec4(1.0);
+            break;
+        case AA_KIND_DEFAULT:
+            edge_aa_segment_mask = vec4(1.0);
+            break;
+    }
 
     vec2 device_pos, local_pos;
-    RectWithSize local_rect = geom.local_rect;
 
     // Fetch the dynamic picture that we are drawing on.
     PictureTask pic_task = fetch_picture_task(brush.picture_address);
 
     if (pic_task.rasterization_mode == RASTERIZATION_MODE_LOCAL_SPACE) {
-
-        local_pos = local_rect.p0 + aPosition.xy * local_rect.size;
+        local_pos = local_segment_rect.p0 + aPosition.xy * local_segment_rect.size;
 
         // Right now - pictures only support local positions. In the future, this
         // will be expanded to support transform picture types (the common kind).
         device_pos = pic_task.common_data.task_rect.p0 +
                      uDevicePixelRatio * (local_pos - pic_task.content_origin);
 
         // Write the final position transformed by the orthographic device-pixel projection.
         gl_Position = uTransform * vec4(device_pos, 0.0, 1.0);
     } else {
         VertexInfo vi;
         Layer layer = fetch_layer(brush.clip_node_id, brush.scroll_node_id);
         ClipArea clip_area = fetch_clip_area(brush.clip_address);
 
         // Write the normal vertex information out.
         if (layer.is_axis_aligned) {
             vi = write_vertex(
-                geom.local_rect,
-                geom.local_clip_rect,
+                local_segment_rect,
+                brush_prim.local_clip_rect,
                 float(brush.z),
                 layer,
                 pic_task,
-                geom.local_rect
+                brush_prim.local_rect
             );
 
             // TODO(gw): vLocalBounds may be referenced by
             //           the fragment shader when running in
             //           the alpha pass, even on non-transformed
             //           items. For now, just ensure it has no
             //           effect. We can tidy this up as we move
             //           more items to be brush shaders.
-            vLocalBounds = vec4(
-                geom.local_clip_rect.p0,
-                geom.local_clip_rect.p0 + geom.local_clip_rect.size
-            );
+#ifdef WR_FEATURE_ALPHA_PASS
+            vLocalBounds = vec4(vec2(-1000000.0), vec2(1000000.0));
+#endif
         } else {
-            vi = write_transform_vertex(geom.local_rect,
-                geom.local_rect,
-                geom.local_clip_rect,
-                vec4(1.0),
+            vi = write_transform_vertex(
+                local_segment_rect,
+                brush_prim.local_rect,
+                brush_prim.local_clip_rect,
+                edge_aa_segment_mask,
                 float(brush.z),
                 layer,
                 pic_task
             );
         }
 
         local_pos = vi.local_pos;
 
@@ -116,19 +213,19 @@ void main(void) {
             vi.screen_pos,
             clip_area
         );
 #endif
     }
 
     // Run the specific brush VS code to write interpolators.
     brush_vs(
-        brush.prim_address + VECS_PER_PRIM_HEADER,
+        brush.prim_address + VECS_PER_BRUSH_PRIM,
         local_pos,
-        local_rect,
+        brush_prim.local_rect,
         brush.user_data
     );
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 
 vec4 brush_fs();
--- a/gfx/webrender/res/brush_mask_corner.glsl
+++ b/gfx/webrender/res/brush_mask_corner.glsl
@@ -46,15 +46,16 @@ vec4 brush_fs() {
     //       since otherwise the results can be undefined if the
     //       input function is not continuous. I have observed this
     //       as flickering behaviour on Intel GPUs.
     float aa_range = compute_aa_range(vLocalPos);
     // Check if in valid clip region.
     if (vLocalPos.x < vClipCenter_Radius.x && vLocalPos.y < vClipCenter_Radius.y) {
         // Apply ellipse clip on corner.
         d = distance_to_ellipse(vLocalPos - vClipCenter_Radius.xy,
-                                vClipCenter_Radius.zw);
+                                vClipCenter_Radius.zw,
+                                aa_range);
         d = distance_aa(aa_range, d);
     }
 
     return vec4(mix(d, 1.0 - d, vClipMode));
 }
 #endif
--- a/gfx/webrender/res/brush_mask_rounded_rect.glsl
+++ b/gfx/webrender/res/brush_mask_rounded_rect.glsl
@@ -9,45 +9,45 @@ flat varying vec4 vClipCenter_Radius_TL;
 flat varying vec4 vClipCenter_Radius_TR;
 flat varying vec4 vClipCenter_Radius_BR;
 flat varying vec4 vClipCenter_Radius_BL;
 flat varying vec4 vLocalRect;
 varying vec2 vLocalPos;
 
 #ifdef WR_VERTEX_SHADER
 
-struct BrushPrimitive {
+struct RoundedRectPrimitive {
     float clip_mode;
     vec4 rect;
     vec2 radius_tl;
     vec2 radius_tr;
     vec2 radius_br;
     vec2 radius_bl;
 };
 
-BrushPrimitive fetch_brush_primitive(int address) {
+RoundedRectPrimitive fetch_rounded_rect_primitive(int address) {
     vec4 data[4] = fetch_from_resource_cache_4(address);
-    return BrushPrimitive(
+    return RoundedRectPrimitive(
         data[0].x,
         data[1],
         data[2].xy,
         data[2].zw,
         data[3].xy,
         data[3].zw
     );
 }
 
 void brush_vs(
     int prim_address,
     vec2 local_pos,
     RectWithSize local_rect,
     ivec2 user_data
 ) {
     // Load the specific primitive.
-    BrushPrimitive prim = fetch_brush_primitive(prim_address);
+    RoundedRectPrimitive prim = fetch_rounded_rect_primitive(prim_address);
 
     // Write clip parameters
     vClipMode = prim.clip_mode;
 
     // TODO(gw): In the future, when brush primitives may be segment rects
     //           we need to account for that here, and differentiate between
     //           the segment rect (geometry) amd the primitive rect (which
     //           defines where the clip radii are relative to).
new file mode 100644
--- /dev/null
+++ b/gfx/webrender/res/brush_solid.glsl
@@ -0,0 +1,47 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include shared,prim_shared,brush
+
+flat varying vec4 vColor;
+
+#ifdef WR_FEATURE_ALPHA_PASS
+varying vec2 vLocalPos;
+#endif
+
+#ifdef WR_VERTEX_SHADER
+
+struct SolidBrush {
+    vec4 color;
+};
+
+SolidBrush fetch_solid_primitive(int address) {
+    vec4 data = fetch_from_resource_cache_1(address);
+    return SolidBrush(data);
+}
+
+void brush_vs(
+    int prim_address,
+    vec2 local_pos,
+    RectWithSize local_rect,
+    ivec2 user_data
+) {
+    SolidBrush prim = fetch_solid_primitive(prim_address);
+    vColor = prim.color;
+
+#ifdef WR_FEATURE_ALPHA_PASS
+    vLocalPos = local_pos;
+#endif
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+vec4 brush_fs() {
+    vec4 color = vColor;
+#ifdef WR_FEATURE_ALPHA_PASS
+    color *= init_transform_fs(vLocalPos);
+#endif
+    return color;
+}
+#endif
--- a/gfx/webrender/res/clip_shared.glsl
+++ b/gfx/webrender/res/clip_shared.glsl
@@ -45,48 +45,18 @@ RectWithSize intersect_rect(RectWithSize
     vec4 p = clamp(vec4(a.p0, a.p0 + a.size), b.p0.xyxy, b.p0.xyxy + b.size.xyxy);
     return RectWithSize(p.xy, max(vec2(0.0), p.zw - p.xy));
 }
 
 // The transformed vertex function that always covers the whole clip area,
 // which is the intersection of all clip instances of a given primitive
 ClipVertexInfo write_clip_tile_vertex(RectWithSize local_clip_rect,
                                       Layer layer,
-                                      ClipArea area,
-                                      int segment) {
-    vec2 outer_p0 = area.screen_origin;
-    vec2 outer_p1 = outer_p0 + area.common_data.task_rect.size;
-    vec2 inner_p0 = area.inner_rect.xy;
-    vec2 inner_p1 = area.inner_rect.zw;
-
-    vec2 p0, p1;
-    switch (segment) {
-        case SEGMENT_ALL:
-            p0 = outer_p0;
-            p1 = outer_p1;
-            break;
-        case SEGMENT_CORNER_TL:
-            p0 = outer_p0;
-            p1 = inner_p0;
-            break;
-        case SEGMENT_CORNER_BL:
-            p0 = vec2(outer_p0.x, outer_p1.y);
-            p1 = vec2(inner_p0.x, inner_p1.y);
-            break;
-        case SEGMENT_CORNER_TR:
-            p0 = vec2(outer_p1.x, outer_p1.y);
-            p1 = vec2(inner_p1.x, inner_p1.y);
-            break;
-        case SEGMENT_CORNER_BR:
-            p0 = vec2(outer_p1.x, outer_p0.y);
-            p1 = vec2(inner_p1.x, inner_p0.y);
-            break;
-    }
-
-    vec2 actual_pos = mix(p0, p1, aPosition.xy);
+                                      ClipArea area) {
+    vec2 actual_pos = area.screen_origin + aPosition.xy * area.common_data.task_rect.size;
 
     vec4 layer_pos = get_layer_pos(actual_pos / uDevicePixelRatio, layer);
 
     // compute the point position in side the layer, in CSS space
     vec2 vertex_pos = actual_pos +
                       area.common_data.task_rect.p0 -
                       area.screen_origin;
 
--- a/gfx/webrender/res/cs_clip_image.glsl
+++ b/gfx/webrender/res/cs_clip_image.glsl
@@ -26,18 +26,17 @@ void main(void) {
     ClipArea area = fetch_clip_area(cmi.render_task_address);
     Layer layer = fetch_layer(cmi.layer_address, cmi.layer_address);
     ImageMaskData mask = fetch_mask_data(cmi.clip_data_address);
     RectWithSize local_rect = mask.local_rect;
     ImageResource res = fetch_image_resource_direct(cmi.resource_address);
 
     ClipVertexInfo vi = write_clip_tile_vertex(local_rect,
                                                layer,
-                                               area,
-                                               cmi.segment);
+                                               area);
 
     vPos = vi.local_pos;
     vLayer = res.layer;
 
     vClipMaskUv = vec3((vPos.xy / vPos.z - local_rect.p0) / local_rect.size, 0.0);
     vec2 texture_size = vec2(textureSize(sColor0, 0));
     vClipMaskUvRect = vec4(res.uv_rect.xy, res.uv_rect.zw - res.uv_rect.xy) / texture_size.xyxy;
     // applying a half-texel offset to the UV boundaries to prevent linear samples from the outside
--- a/gfx/webrender/res/cs_clip_rectangle.glsl
+++ b/gfx/webrender/res/cs_clip_rectangle.glsl
@@ -59,18 +59,17 @@ void main(void) {
     ClipMaskInstance cmi = fetch_clip_item();
     ClipArea area = fetch_clip_area(cmi.render_task_address);
     Layer layer = fetch_layer(cmi.layer_address, cmi.layer_address);
     ClipData clip = fetch_clip(cmi.clip_data_address);
     RectWithSize local_rect = clip.rect.rect;
 
     ClipVertexInfo vi = write_clip_tile_vertex(local_rect,
                                                layer,
-                                               area,
-                                               cmi.segment);
+                                               area);
     vPos = vi.local_pos;
 
     vClipMode = clip.rect.mode.x;
 
     RectWithEndpoint clip_rect = to_rect_with_endpoint(local_rect);
 
     vec2 r_tl = clip.top_left.outer_inner_radius.xy;
     vec2 r_tr = clip.top_right.outer_inner_radius.xy;
--- a/gfx/webrender/res/ellipse.glsl
+++ b/gfx/webrender/res/ellipse.glsl
@@ -50,39 +50,48 @@ float sdEllipse( vec2 p, in vec2 ab ) {
 
     float si = sqrt( 1.0 - co*co );
 
     vec2 r = vec2( ab.x*co, ab.y*si );
 
     return length(r - p ) * sign(p.y-r.y);
 }
 
-float distance_to_ellipse(vec2 p, vec2 radii) {
+float distance_to_ellipse(vec2 p, vec2 radii, float aa_range) {
     // sdEllipse fails on exact circles, so handle equal
     // radii here. The branch coherency should make this
     // a performance win for the circle case too.
+    float len = length(p);
     if (radii.x == radii.y) {
-        return length(p) - radii.x;
+        return len - radii.x;
     } else {
+        if (len < min(radii.x, radii.y) - aa_range) {
+          return -aa_range;
+        } else if (len > max(radii.x, radii.y) + aa_range) {
+          return aa_range;
+        }
+
         return sdEllipse(p, radii);
     }
 }
 
 float clip_against_ellipse_if_needed(
     vec2 pos,
     float current_distance,
     vec4 ellipse_center_radius,
-    vec2 sign_modifier
+    vec2 sign_modifier,
+    float aa_range
 ) {
     if (!all(lessThan(sign_modifier * pos, sign_modifier * ellipse_center_radius.xy))) {
       return current_distance;
     }
 
     return distance_to_ellipse(pos - ellipse_center_radius.xy,
-                               ellipse_center_radius.zw);
+                               ellipse_center_radius.zw,
+                               aa_range);
 }
 
 float rounded_rect(vec2 pos,
                    vec4 clip_center_radius_tl,
                    vec4 clip_center_radius_tr,
                    vec4 clip_center_radius_br,
                    vec4 clip_center_radius_bl,
                    float aa_range) {
@@ -90,31 +99,35 @@ float rounded_rect(vec2 pos,
     // in a corner. If the fragment is in a corner, one of the clip_against_ellipse_if_needed
     // calls below will update it.
     float current_distance = -1.0;
 
     // Clip against each ellipse.
     current_distance = clip_against_ellipse_if_needed(pos,
                                                       current_distance,
                                                       clip_center_radius_tl,
-                                                      vec2(1.0));
+                                                      vec2(1.0),
+                                                      aa_range);
 
     current_distance = clip_against_ellipse_if_needed(pos,
                                                       current_distance,
                                                       clip_center_radius_tr,
-                                                      vec2(-1.0, 1.0));
+                                                      vec2(-1.0, 1.0),
+                                                      aa_range);
 
     current_distance = clip_against_ellipse_if_needed(pos,
                                                       current_distance,
                                                       clip_center_radius_br,
-                                                      vec2(-1.0));
+                                                      vec2(-1.0),
+                                                      aa_range);
 
     current_distance = clip_against_ellipse_if_needed(pos,
                                                       current_distance,
                                                       clip_center_radius_bl,
-                                                      vec2(1.0, -1.0));
+                                                      vec2(1.0, -1.0),
+                                                      aa_range);
 
     // Apply AA
     // See comment in ps_border_corner about the choice of constants.
 
     return distance_aa(aa_range, current_distance);
 }
 #endif
--- a/gfx/webrender/res/prim_shared.glsl
+++ b/gfx/webrender/res/prim_shared.glsl
@@ -64,17 +64,17 @@ vec4[2] fetch_from_resource_cache_2(int 
     return vec4[2](
         TEXEL_FETCH(sResourceCache, uv, 0, ivec2(0, 0)),
         TEXEL_FETCH(sResourceCache, uv, 0, ivec2(1, 0))
     );
 }
 
 #ifdef WR_VERTEX_SHADER
 
-#define VECS_PER_LAYER              11
+#define VECS_PER_LAYER              7
 #define VECS_PER_RENDER_TASK        3
 #define VECS_PER_PRIM_HEADER        2
 #define VECS_PER_TEXT_RUN           3
 #define VECS_PER_GRADIENT           3
 #define VECS_PER_GRADIENT_STOP      2
 
 uniform HIGHP_SAMPLER_FLOAT sampler2D sClipScrollNodes;
 uniform HIGHP_SAMPLER_FLOAT sampler2D sRenderTasks;
@@ -138,71 +138,62 @@ vec4 fetch_from_resource_cache_1_direct(
 
 vec4 fetch_from_resource_cache_1(int address) {
     ivec2 uv = get_resource_cache_uv(address);
     return texelFetch(sResourceCache, uv, 0);
 }
 
 struct ClipScrollNode {
     mat4 transform;
-    mat4 inv_transform;
     vec4 local_clip_rect;
     vec2 reference_frame_relative_scroll_offset;
     vec2 scroll_offset;
     bool is_axis_aligned;
 };
 
 ClipScrollNode fetch_clip_scroll_node(int index) {
     ClipScrollNode node;
 
     // Create a UV base coord for each 8 texels.
     // This is required because trying to use an offset
     // of more than 8 texels doesn't work on some versions
     // of OSX.
     ivec2 uv = get_fetch_uv(index, VECS_PER_LAYER);
     ivec2 uv0 = ivec2(uv.x + 0, uv.y);
-    ivec2 uv1 = ivec2(uv.x + 8, uv.y);
 
     node.transform[0] = TEXEL_FETCH(sClipScrollNodes, uv0, 0, ivec2(0, 0));
     node.transform[1] = TEXEL_FETCH(sClipScrollNodes, uv0, 0, ivec2(1, 0));
     node.transform[2] = TEXEL_FETCH(sClipScrollNodes, uv0, 0, ivec2(2, 0));
     node.transform[3] = TEXEL_FETCH(sClipScrollNodes, uv0, 0, ivec2(3, 0));
 
-    node.inv_transform[0] = TEXEL_FETCH(sClipScrollNodes, uv0, 0, ivec2(4, 0));
-    node.inv_transform[1] = TEXEL_FETCH(sClipScrollNodes, uv0, 0, ivec2(5, 0));
-    node.inv_transform[2] = TEXEL_FETCH(sClipScrollNodes, uv0, 0, ivec2(6, 0));
-    node.inv_transform[3] = TEXEL_FETCH(sClipScrollNodes, uv0, 0, ivec2(7, 0));
-
-    vec4 clip_rect = TEXEL_FETCH(sClipScrollNodes, uv1, 0, ivec2(0, 0));
+    vec4 clip_rect = TEXEL_FETCH(sClipScrollNodes, uv0, 0, ivec2(4, 0));
     node.local_clip_rect = clip_rect;
 
-    vec4 offsets = TEXEL_FETCH(sClipScrollNodes, uv1, 0, ivec2(1, 0));
+    vec4 offsets = TEXEL_FETCH(sClipScrollNodes, uv0, 0, ivec2(5, 0));
     node.reference_frame_relative_scroll_offset = offsets.xy;
     node.scroll_offset = offsets.zw;
 
-    vec4 misc = TEXEL_FETCH(sClipScrollNodes, uv1, 0, ivec2(2, 0));
+    vec4 misc = TEXEL_FETCH(sClipScrollNodes, uv0, 0, ivec2(6, 0));
     node.is_axis_aligned = misc.x == 0.0;
 
     return node;
 }
 
 struct Layer {
     mat4 transform;
-    mat4 inv_transform;
     RectWithSize local_clip_rect;
     bool is_axis_aligned;
 };
 
 Layer fetch_layer(int clip_node_id, int scroll_node_id) {
     ClipScrollNode clip_node = fetch_clip_scroll_node(clip_node_id);
     ClipScrollNode scroll_node = fetch_clip_scroll_node(scroll_node_id);
 
     Layer layer;
     layer.transform = scroll_node.transform;
-    layer.inv_transform = scroll_node.inv_transform;
 
     vec4 local_clip_rect = clip_node.local_clip_rect;
     local_clip_rect.xy += clip_node.reference_frame_relative_scroll_offset;
     local_clip_rect.xy -= scroll_node.reference_frame_relative_scroll_offset;
     local_clip_rect.xy -= scroll_node.scroll_offset;
 
     layer.local_clip_rect = RectWithSize(local_clip_rect.xy, local_clip_rect.zw);
     layer.is_axis_aligned = scroll_node.is_axis_aligned;
@@ -309,35 +300,32 @@ BlurTask fetch_blur_task(int address) {
     );
 
     return task;
 }
 
 struct ClipArea {
     RenderTaskCommonData common_data;
     vec2 screen_origin;
-    vec4 inner_rect;
 };
 
 ClipArea fetch_clip_area(int index) {
     ClipArea area;
 
     if (index == 0x7FFFFFFF) { //special sentinel task index
         area.common_data = RenderTaskCommonData(
             RectWithSize(vec2(0.0), vec2(0.0)),
             0.0
         );
         area.screen_origin = vec2(0.0);
-        area.inner_rect = vec4(0.0);
     } else {
         RenderTaskData task_data = fetch_render_task_data(index);
 
         area.common_data = task_data.common_data;
         area.screen_origin = task_data.data1.xy;
-        area.inner_rect = task_data.data2;
     }
 
     return area;
 }
 
 struct Gradient {
     vec4 start_end_point;
     vec4 tile_size_repeat;
@@ -543,19 +531,21 @@ vec4 untransform(vec2 ref, vec3 n, vec3 
     return r;
 }
 
 // Given a CSS space position, transform it back into the layer space.
 vec4 get_layer_pos(vec2 pos, Layer layer) {
     // get a point on the layer plane
     vec4 ah = layer.transform * vec4(0.0, 0.0, 0.0, 1.0);
     vec3 a = ah.xyz / ah.w;
+
     // get the normal to the layer plane
-    vec3 n = transpose(mat3(layer.inv_transform)) * vec3(0.0, 0.0, 1.0);
-    return untransform(pos, n, a, layer.inv_transform);
+    mat4 inv_transform = inverse(layer.transform);
+    vec3 n = transpose(mat3(inv_transform)) * vec3(0.0, 0.0, 1.0);
+    return untransform(pos, n, a, inv_transform);
 }
 
 // Compute a snapping offset in world space (adjusted to pixel ratio),
 // given local position on the layer and a snap rectangle.
 vec2 compute_snap_offset(vec2 local_pos,
                          Layer layer,
                          RectWithSize snap_rect) {
     // Ensure that the snap rect is at *least* one device pixel in size.
@@ -734,27 +724,16 @@ ImageResource fetch_image_resource(int a
     return ImageResource(data[0], data[1].x);
 }
 
 ImageResource fetch_image_resource_direct(ivec2 address) {
     vec4 data[2] = fetch_from_resource_cache_2_direct(address);
     return ImageResource(data[0], data[1].x);
 }
 
-struct Rectangle {
-    vec4 color;
-    vec4 edge_aa_segment_mask;
-};
-
-Rectangle fetch_rectangle(int address) {
-    vec4 data[2] = fetch_from_resource_cache_2(address);
-    vec4 mask = vec4((int(data[1].x) & ivec4(1,2,4,8)) != ivec4(0));
-    return Rectangle(data[0], mask);
-}
-
 struct TextRun {
     vec4 color;
     vec4 bg_color;
     vec2 offset;
     int subpx_dir;
 };
 
 TextRun fetch_text_run(int address) {
--- a/gfx/webrender/res/ps_border_corner.glsl
+++ b/gfx/webrender/res/ps_border_corner.glsl
@@ -346,20 +346,20 @@ void main(void) {
         // This error is corrected by offsetting the distance by half a device pixel.
         // This not entirely correct: it leaves an error that varries between
         // 0 and (sqrt(2) - 1)/2 = 0.2 pixels but it is hardly noticeable and is better
         // than the constant sqrt(2)/2 px error without the correction.
         // To correct this exactly we would need to offset p by half a pixel in the
         // direction of the center of the ellipse (a different offset for each corner).
 
         // Get signed distance from the inner/outer clips.
-        float d0 = distance_to_ellipse(p, vRadii0.xy);
-        float d1 = distance_to_ellipse(p, vRadii0.zw);
-        float d2 = distance_to_ellipse(p, vRadii1.xy);
-        float d3 = distance_to_ellipse(p, vRadii1.zw);
+        float d0 = distance_to_ellipse(p, vRadii0.xy, aa_range);
+        float d1 = distance_to_ellipse(p, vRadii0.zw, aa_range);
+        float d2 = distance_to_ellipse(p, vRadii1.xy, aa_range);
+        float d3 = distance_to_ellipse(p, vRadii1.zw, aa_range);
 
         // SDF subtract main radii
         float d_main = max(d0, -d1);
 
         // SDF subtract inner radii (double style borders)
         float d_inner = max(d2, -d3);
 
         // Select how to combine the SDF based on border style.
deleted file mode 100644
--- a/gfx/webrender/res/ps_rectangle.glsl
+++ /dev/null
@@ -1,54 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#include shared,prim_shared
-
-varying vec4 vColor;
-
-#ifdef WR_FEATURE_TRANSFORM
-varying vec2 vLocalPos;
-#endif
-
-#ifdef WR_VERTEX_SHADER
-void main(void) {
-    Primitive prim = load_primitive();
-    Rectangle rect = fetch_rectangle(prim.specific_prim_address);
-    vColor = rect.color;
-#ifdef WR_FEATURE_TRANSFORM
-    VertexInfo vi = write_transform_vertex(prim.local_rect,
-                                           prim.local_rect,
-                                           prim.local_clip_rect,
-                                           rect.edge_aa_segment_mask,
-                                           prim.z,
-                                           prim.layer,
-                                           prim.task);
-    vLocalPos = vi.local_pos;
-#else
-    VertexInfo vi = write_vertex(prim.local_rect,
-                                 prim.local_clip_rect,
-                                 prim.z,
-                                 prim.layer,
-                                 prim.task,
-                                 prim.local_rect);
-#endif
-
-#ifdef WR_FEATURE_CLIP
-    write_clip(vi.screen_pos, prim.clip_area);
-#endif
-}
-#endif
-
-#ifdef WR_FRAGMENT_SHADER
-void main(void) {
-    float alpha = 1.0;
-#ifdef WR_FEATURE_TRANSFORM
-    alpha = init_transform_fs(vLocalPos);
-#endif
-
-#ifdef WR_FEATURE_CLIP
-    alpha *= do_clip();
-#endif
-    oFragColor = vColor * alpha;
-}
-#endif
--- a/gfx/webrender/src/border.rs
+++ b/gfx/webrender/src/border.rs
@@ -4,17 +4,17 @@
 
 use api::{BorderSide, BorderStyle, BorderWidths, ClipAndScrollInfo, ColorF};
 use api::{LayerPoint, LayerRect};
 use api::{LayerPrimitiveInfo, LayerSize, NormalBorder, RepeatMode};
 use clip::ClipSource;
 use ellipse::Ellipse;
 use frame_builder::FrameBuilder;
 use gpu_cache::GpuDataRequest;
-use internal_types::EdgeAaSegmentMask;
+use prim_store::{BrushAntiAliasMode, BrushSegmentDescriptor, BrushSegmentKind};
 use prim_store::{BorderPrimitiveCpu, PrimitiveContainer, TexelRect};
 use util::{lerp, pack_as_float};
 
 #[repr(u8)]
 #[derive(Debug, Copy, Clone, PartialEq)]
 pub enum BorderCornerInstance {
     None,
     Single, // Single instance needed - corner styles are same or similar.
@@ -369,66 +369,94 @@ impl FrameBuilder {
         });
         let all_edges_simple = edges.iter().all(|e| {
             *e == BorderEdgeKind::Solid || *e == BorderEdgeKind::None
         });
 
         let has_no_curve = radius.is_zero();
 
         if has_no_curve && all_corners_simple && all_edges_simple {
-            let p0 = info.rect.origin;
-            let p1 = info.rect.bottom_right();
-            let rect_width = info.rect.size.width;
-            let rect_height = info.rect.size.height;
-            let mut info = info.clone();
+            let inner_rect = LayerRect::new(
+                LayerPoint::new(
+                    info.rect.origin.x + left_len,
+                    info.rect.origin.y + top_len,
+                ),
+                LayerSize::new(
+                    info.rect.size.width - left_len - right_len,
+                    info.rect.size.height - top_len - bottom_len,
+                ),
+            );
 
             // Add a solid rectangle for each visible edge/corner combination.
             if top_edge == BorderEdgeKind::Solid {
-                info.rect = LayerRect::new(p0, LayerSize::new(rect_width, top_len));
+                let descriptor = BrushSegmentDescriptor::new(
+                    &info.rect,
+                    &inner_rect,
+                    Some(&[
+                        BrushSegmentKind::TopLeft,
+                        BrushSegmentKind::TopMid,
+                        BrushSegmentKind::TopRight
+                    ]),
+                );
                 self.add_solid_rectangle(
                     clip_and_scroll,
                     &info,
                     border.top.color,
-                    EdgeAaSegmentMask::BOTTOM,
+                    Some(Box::new(descriptor)),
+                    BrushAntiAliasMode::Segment,
                 );
             }
             if left_edge == BorderEdgeKind::Solid {
-                info.rect = LayerRect::new(
-                    LayerPoint::new(p0.x, p0.y + top_len),
-                    LayerSize::new(left_len, rect_height - top_len - bottom_len),
+                let descriptor = BrushSegmentDescriptor::new(
+                    &info.rect,
+                    &inner_rect,
+                    Some(&[
+                        BrushSegmentKind::MidLeft,
+                    ]),
                 );
                 self.add_solid_rectangle(
                     clip_and_scroll,
                     &info,
                     border.left.color,
-                    EdgeAaSegmentMask::RIGHT,
+                    Some(Box::new(descriptor)),
+                    BrushAntiAliasMode::Segment,
                 );
             }
             if right_edge == BorderEdgeKind::Solid {
-                info.rect = LayerRect::new(
-                    LayerPoint::new(p1.x - right_len, p0.y + top_len),
-                    LayerSize::new(right_len, rect_height - top_len - bottom_len),
+                let descriptor = BrushSegmentDescriptor::new(
+                    &info.rect,
+                    &inner_rect,
+                    Some(&[
+                        BrushSegmentKind::MidRight,
+                    ]),
                 );
                 self.add_solid_rectangle(
                     clip_and_scroll,
                     &info,
                     border.right.color,
-                    EdgeAaSegmentMask::LEFT,
+                    Some(Box::new(descriptor)),
+                    BrushAntiAliasMode::Segment,
                 );
             }
             if bottom_edge == BorderEdgeKind::Solid {
-                info.rect = LayerRect::new(
-                    LayerPoint::new(p0.x, p1.y - bottom_len),
-                    LayerSize::new(rect_width, bottom_len),
+                let descriptor = BrushSegmentDescriptor::new(
+                    &info.rect,
+                    &inner_rect,
+                    Some(&[
+                        BrushSegmentKind::BottomLeft,
+                        BrushSegmentKind::BottomMid,
+                        BrushSegmentKind::BottomRight
+                    ]),
                 );
                 self.add_solid_rectangle(
                     clip_and_scroll,
                     &info,
                     border.bottom.color,
-                    EdgeAaSegmentMask::TOP,
+                    Some(Box::new(descriptor)),
+                    BrushAntiAliasMode::Segment,
                 );
             }
         } else {
             // Create clip masks for border corners, if required.
             let mut extra_clips = Vec::new();
             let mut corner_instances = [BorderCornerInstance::Single; 4];
 
             for (i, corner) in corners.iter().enumerate() {
--- a/gfx/webrender/src/box_shadow.rs
+++ b/gfx/webrender/src/box_shadow.rs
@@ -4,18 +4,17 @@
 
 use api::{BorderRadiusKind, ColorF, LayerPoint, LayerRect, LayerSize, LayerVector2D};
 use api::{BorderRadius, BoxShadowClipMode, LayoutSize, LayerPrimitiveInfo};
 use api::{ClipMode, ClipAndScrollInfo, ComplexClipRegion, LocalClip};
 use api::{PipelineId};
 use app_units::Au;
 use clip::ClipSource;
 use frame_builder::FrameBuilder;
-use internal_types::EdgeAaSegmentMask;
-use prim_store::{PrimitiveContainer, RectangleContent, RectanglePrimitive};
+use prim_store::{BrushAntiAliasMode, PrimitiveContainer};
 use prim_store::{BrushMaskKind, BrushKind, BrushPrimitive};
 use picture::PicturePrimitive;
 use util::RectHelpers;
 use render_task::MAX_BLUR_STD_DEVIATION;
 
 // The blur shader samples BLUR_SAMPLE_SCALE * blur_radius surrounding texels.
 pub const BLUR_SAMPLE_SCALE: f32 = 3.0;
 
@@ -126,20 +125,24 @@ impl FrameBuilder {
                     )
                 }
             };
 
             self.add_primitive(
                 clip_and_scroll,
                 &fast_info,
                 clips,
-                PrimitiveContainer::Rectangle(RectanglePrimitive {
-                    content: RectangleContent::Fill(*color),
-                    edge_aa_segment_mask: EdgeAaSegmentMask::empty(),
-                }),
+                PrimitiveContainer::Brush(
+                    BrushPrimitive::new(BrushKind::Solid {
+                            color: *color,
+                        },
+                        None,
+                        BrushAntiAliasMode::Primitive,
+                    )
+                ),
             );
         } else {
             let blur_offset = BLUR_SAMPLE_SCALE * blur_radius;
             let mut extra_clips = vec![];
 
             let cache_key = BoxShadowCacheKey {
                 width: Au::from_f32_px(shadow_rect.size.width),
                 height: Au::from_f32_px(shadow_rect.size.height),
@@ -173,22 +176,24 @@ impl FrameBuilder {
                     // If the outset box shadow has a uniform corner side, we can
                     // just blur the top left corner, and stretch / mirror that
                     // across the primitive.
                     if let Some(corner_size) = corner_size {
                         radii_kind = BorderRadiusKind::Uniform;
                         width = MASK_CORNER_PADDING + corner_size.width.max(BLUR_SAMPLE_SCALE * blur_radius);
                         height = MASK_CORNER_PADDING + corner_size.height.max(BLUR_SAMPLE_SCALE * blur_radius);
 
-                        brush_prim = BrushPrimitive {
-                            kind: BrushKind::Mask {
+                        brush_prim = BrushPrimitive::new(
+                            BrushKind::Mask {
                                 clip_mode: brush_clip_mode,
                                 kind: BrushMaskKind::Corner(corner_size),
-                            }
-                        };
+                            },
+                            None,
+                            BrushAntiAliasMode::Primitive,
+                        );
                     } else {
                         // Create a minimal size primitive mask to blur. In this
                         // case, we ensure the size of each corner is the same,
                         // to simplify the shader logic that stretches the blurred
                         // result across the primitive.
                         radii_kind = BorderRadiusKind::NonUniform;
                         let max_width = shadow_radius.top_left.width
                                             .max(shadow_radius.bottom_left.width)
@@ -200,22 +205,24 @@ impl FrameBuilder {
                                             .max(shadow_radius.bottom_right.height);
 
                         width = 2.0 * max_width + BLUR_SAMPLE_SCALE * blur_radius;
                         height = 2.0 * max_height + BLUR_SAMPLE_SCALE * blur_radius;
 
                         let clip_rect = LayerRect::new(LayerPoint::zero(),
                                                        LayerSize::new(width, height));
 
-                        brush_prim = BrushPrimitive {
-                            kind: BrushKind::Mask {
+                        brush_prim = BrushPrimitive::new(
+                            BrushKind::Mask {
                                 clip_mode: brush_clip_mode,
                                 kind: BrushMaskKind::RoundedRect(clip_rect, shadow_radius),
-                            }
-                        };
+                            },
+                            None,
+                            BrushAntiAliasMode::Primitive,
+                        );
                     };
 
                     // Construct a mask primitive to add to the picture.
                     let brush_rect = LayerRect::new(LayerPoint::zero(),
                                                     LayerSize::new(width, height));
                     let brush_info = LayerPrimitiveInfo::new(brush_rect);
                     let brush_prim_index = self.create_primitive(
                         &brush_info,
@@ -283,22 +290,24 @@ impl FrameBuilder {
                     let mut adjusted_blur_std_deviation = blur_radius * 0.5;
                     let mut inflate_size = 1.0;
                     while adjusted_blur_std_deviation > MAX_BLUR_STD_DEVIATION {
                         adjusted_blur_std_deviation *= 0.5;
                         inflate_size += 1.0;
                     }
 
                     let brush_rect = brush_rect.inflate(inflate_size, inflate_size);
-                    let brush_prim = BrushPrimitive {
-                        kind: BrushKind::Mask {
+                    let brush_prim = BrushPrimitive::new(
+                        BrushKind::Mask {
                             clip_mode: brush_clip_mode,
                             kind: BrushMaskKind::RoundedRect(clip_rect, shadow_radius),
-                        }
-                    };
+                        },
+                        None,
+                        BrushAntiAliasMode::Primitive,
+                    );
                     let brush_info = LayerPrimitiveInfo::new(brush_rect);
                     let brush_prim_index = self.create_primitive(
                         &brush_info,
                         Vec::new(),
                         PrimitiveContainer::Brush(brush_prim),
                     );
 
                     // Create a box shadow picture primitive and add
--- a/gfx/webrender/src/clip.rs
+++ b/gfx/webrender/src/clip.rs
@@ -1,22 +1,21 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{BorderRadius, ComplexClipRegion, DeviceIntRect, ImageMask, ImageRendering, LayerPoint};
-use api::{ClipMode, LayerRect};
-use api::{LayerToWorldTransform, LayoutPoint, LayoutVector2D, LocalClip};
+use api::{BorderRadius, ClipMode, ComplexClipRegion, DeviceIntRect, ImageMask, ImageRendering};
+use api::{LayerPoint, LayerRect, LayerToWorldTransform, LayoutPoint, LayoutVector2D, LocalClip};
 use border::BorderCornerClipSource;
 use ellipse::Ellipse;
 use freelist::{FreeList, FreeListHandle, WeakFreeListHandle};
 use gpu_cache::{GpuCache, GpuCacheHandle, ToGpuBlocks};
 use prim_store::{ClipData, ImageMaskData};
 use resource_cache::ResourceCache;
-use util::{extract_inner_rect_safe, MaxRect, TransformedRect};
+use util::{MaxRect, calculate_screen_bounding_rect, extract_inner_rect_safe};
 
 pub type ClipStore = FreeList<ClipSources>;
 pub type ClipSourcesHandle = FreeListHandle<ClipSources>;
 pub type ClipSourcesWeakHandle = WeakFreeListHandle<ClipSources>;
 
 #[derive(Clone, Debug)]
 pub struct ClipRegion {
     pub main: LayerRect,
@@ -248,22 +247,22 @@ impl ClipSources {
     }
 
     pub fn get_screen_bounds(
         &self,
         transform: &LayerToWorldTransform,
         device_pixel_ratio: f32,
     ) -> (DeviceIntRect, Option<DeviceIntRect>) {
         let screen_inner_rect =
-            TransformedRect::new(&self.local_inner_rect, transform, device_pixel_ratio);
+            calculate_screen_bounding_rect(transform, &self.local_inner_rect, device_pixel_ratio);
         let screen_outer_rect = self.local_outer_rect.map(|outer_rect|
-            TransformedRect::new(&outer_rect, transform, device_pixel_ratio).bounding_rect
+            calculate_screen_bounding_rect(transform, &outer_rect, device_pixel_ratio)
         );
 
-        (screen_inner_rect.bounding_rect, screen_outer_rect)
+        (screen_inner_rect, screen_outer_rect)
     }
 }
 
 /// Represents a local rect and a device space
 /// rectangles that are either outside or inside bounds.
 #[derive(Clone, Debug, PartialEq)]
 pub struct Geometry {
     pub local_rect: LayerRect,
--- a/gfx/webrender/src/clip_scroll_node.rs
+++ b/gfx/webrender/src/clip_scroll_node.rs
@@ -65,57 +65,55 @@ pub enum NodeType {
 
     /// A special kind of node that adjusts its position based on the position
     /// of its parent node and a given set of sticky positioning offset bounds.
     /// Sticky positioned is described in the CSS Positioned Layout Module Level 3 here:
     /// https://www.w3.org/TR/css-position-3/#sticky-pos
     StickyFrame(StickyFrameInfo),
 }
 
+impl NodeType {
+    fn is_reference_frame(&self) -> bool {
+        match *self {
+            NodeType::ReferenceFrame(_) => true,
+            _ => false,
+        }
+    }
+}
+
 /// Contains information common among all types of ClipScrollTree nodes.
 #[derive(Debug)]
 pub struct ClipScrollNode {
     /// Viewing rectangle in the coordinate system of the parent reference frame.
     pub local_viewport_rect: LayerRect,
 
     /// Clip rect of this node - typically the same as viewport rect, except
     /// in overscroll cases.
     pub local_clip_rect: LayerRect,
 
-    /// Viewport rectangle clipped against parent layer(s) viewport rectangles.
-    /// This is in the coordinate system of the node origin.
-    /// Precisely, it combines the local clipping rectangles of all the parent
-    /// nodes on the way to the root, including those of `ClipRegion` rectangles.
-    /// The combined clip is reset to maximum when an incompatible coordinate
-    /// system is encountered.
-    pub combined_local_viewport_rect: LayerRect,
-
-    /// World transform for the viewport rect itself. This is the parent
-    /// reference frame transformation plus the scrolling offsets provided by
-    /// the nodes in between the reference frame and this node.
+    /// The transformation for this viewport in world coordinates is the transformation for
+    /// our parent reference frame, plus any accumulated scrolling offsets from nodes
+    /// between our reference frame and this node. For reference frames, we also include
+    /// whatever local transformation this reference frame provides. This can be combined
+    /// with the local_viewport_rect to get its position in world space.
     pub world_viewport_transform: LayerToWorldTransform,
 
     /// World transform for content transformed by this node.
     pub world_content_transform: LayerToWorldTransform,
 
-    /// The scroll offset of all the nodes between us and our parent reference frame.
-    /// This is used to calculate intersections between us and content or nodes that
-    /// are also direct children of our reference frame.
-    pub reference_frame_relative_scroll_offset: LayerVector2D,
-
     /// Pipeline that this layer belongs to
     pub pipeline_id: PipelineId,
 
     /// Parent layer. If this is None, we are the root node.
     pub parent: Option<ClipId>,
 
     /// Child layers
     pub children: Vec<ClipId>,
 
-    /// Whether or not this node is a reference frame.
+    /// The type of this node and any data associated with that node type.
     pub node_type: NodeType,
 
     /// The node in the chain of clips that are necessary to clip display items
     /// that have this ClipScrollNode as their clip parent. This will be used to
     /// generate clip tasks.
     pub clip_chain_node: ClipChain,
 
     /// The intersected outer bounds of the clips for this node.
@@ -134,20 +132,18 @@ impl ClipScrollNode {
         pipeline_id: PipelineId,
         parent_id: Option<ClipId>,
         rect: &LayerRect,
         node_type: NodeType
     ) -> Self {
         ClipScrollNode {
             local_viewport_rect: *rect,
             local_clip_rect: *rect,
-            combined_local_viewport_rect: LayerRect::zero(),
             world_viewport_transform: LayerToWorldTransform::identity(),
             world_content_transform: LayerToWorldTransform::identity(),
-            reference_frame_relative_scroll_offset: LayerVector2D::zero(),
             parent: parent_id,
             children: Vec::new(),
             pipeline_id,
             node_type: node_type,
             clip_chain_node: None,
             combined_clip_outer_bounds: DeviceIntRect::max_rect(),
             coordinate_system_id: CoordinateSystemId(0),
             node_data_index: ClipScrollNodeIndex(0),
@@ -263,73 +259,119 @@ impl ClipScrollNode {
         }
 
         scrolling.offset = new_offset;
         scrolling.bouncing_back = false;
         scrolling.started_bouncing_back = false;
         true
     }
 
+    pub fn update_to_empty_rect(&mut self) {
+        self.combined_clip_outer_bounds = DeviceIntRect::zero();
+        self.world_content_transform = LayerToWorldTransform::identity();
+        self.world_viewport_transform = LayerToWorldTransform::identity();
+        self.clip_chain_node = None;
+    }
+
+    pub fn push_gpu_node_data(
+        &mut self,
+        state: &TransformUpdateState,
+        node_data: &mut Vec<ClipScrollNodeData>
+    ) {
+        if self.combined_clip_outer_bounds.is_empty() {
+            node_data.push(ClipScrollNodeData::invalid());
+            return;
+        }
+
+        let local_clip_rect = match self.node_type {
+            _ if self.world_content_transform.has_perspective_component() => LayerRect::max_rect(),
+            NodeType::ReferenceFrame(ref info) => {
+                info.resolved_transform.with_destination::<LayerPixel>()
+                    .inverse_rect_footprint(&state.parent_combined_viewport_rect)
+            }
+            NodeType::Clip(_) | NodeType::ScrollFrame(_) => {
+                state.parent_combined_viewport_rect
+                    .intersection(&self.local_clip_rect)
+                    .unwrap_or(LayerRect::zero())
+            }
+            NodeType::StickyFrame(ref sticky_info) => {
+                state.parent_combined_viewport_rect
+                    .translate(&-sticky_info.current_offset)
+                    .intersection(&self.local_clip_rect)
+                    .unwrap_or(LayerRect::zero())
+            }
+        };
+
+        let transform_kind = if self.world_content_transform.preserves_2d_axis_alignment() {
+            TransformedRectKind::AxisAligned
+        } else {
+            TransformedRectKind::Complex
+        };
+
+        let reference_frame_relative_scroll_offset = match self.node_type {
+            NodeType::ReferenceFrame(_) => LayerVector2D::zero(),
+            NodeType::Clip(_) | NodeType::ScrollFrame(_) => state.parent_accumulated_scroll_offset,
+            NodeType::StickyFrame(ref sticky_info) =>
+                    state.parent_accumulated_scroll_offset + sticky_info.current_offset,
+        };
+
+        let data = ClipScrollNodeData {
+            transform: self.world_content_transform,
+            local_clip_rect,
+            reference_frame_relative_scroll_offset,
+            scroll_offset: self.scroll_offset(),
+            transform_kind: transform_kind as u32 as f32,
+            padding: [0.0; 3],
+        };
+
+        // Write the data that will be made available to the GPU for this node.
+        node_data.push(data);
+    }
+
     pub fn update(
         &mut self,
         state: &mut TransformUpdateState,
-        node_data: &mut Vec<ClipScrollNodeData>,
         device_pixel_ratio: f32,
         clip_store: &mut ClipStore,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         scene_properties: &SceneProperties,
     ) {
-        // We set this earlier so that we can use it before we have all the data necessary
-        // to populate the ClipScrollNodeData.
-        self.node_data_index = ClipScrollNodeIndex(node_data.len() as u32);
+        // If any of our parents was not rendered, we are not rendered either and can just
+        // quit here.
+        if state.combined_outer_clip_bounds.is_empty() {
+            self.update_to_empty_rect();
+            return;
+        }
+
+        // If this node is a reference frame, we check if the determinant is 0, which means it
+        // has a non-invertible matrix. For non-reference-frames we assume that they will
+        // produce only additional translations which should be invertible.
+        if self.node_type.is_reference_frame() {
+            if self.world_content_transform.determinant() == 0.0 {
+                self.update_to_empty_rect();
+                return;
+            }
+        }
 
         self.update_transform(state, scene_properties);
         self.update_clip_work_item(
             state,
             device_pixel_ratio,
             clip_store,
             resource_cache,
             gpu_cache,
         );
 
-        let local_clip_rect = if self.world_content_transform.has_perspective_component() {
-            LayerRect::max_rect()
-        } else {
-            self.combined_local_viewport_rect
-        };
-
-        let data = match self.world_content_transform.inverse() {
-            Some(inverse) => {
-                let transform_kind = if self.world_content_transform.preserves_2d_axis_alignment() {
-                    TransformedRectKind::AxisAligned
-                } else {
-                    TransformedRectKind::Complex
-                };
+        // This indicates that we are entirely clipped out.
+        if state.combined_outer_clip_bounds.is_empty() {
+            self.update_to_empty_rect();
+            return;
+        }
 
-                ClipScrollNodeData {
-                    transform: self.world_content_transform,
-                    inv_transform: inverse,
-                    local_clip_rect,
-                    reference_frame_relative_scroll_offset:
-                        self.reference_frame_relative_scroll_offset,
-                    scroll_offset: self.scroll_offset(),
-                    transform_kind: transform_kind as u32 as f32,
-                    padding: [0.0; 3],
-                }
-            }
-            None => {
-                state.combined_outer_clip_bounds = DeviceIntRect::zero();
-                self.combined_clip_outer_bounds = DeviceIntRect::zero();
-                ClipScrollNodeData::invalid()
-            }
-        };
-
-        // Write the data that will be made available to the GPU for this node.
-        node_data.push(data);
     }
 
     pub fn update_clip_work_item(
         &mut self,
         state: &mut TransformUpdateState,
         device_pixel_ratio: f32,
         clip_store: &mut ClipStore,
         resource_cache: &mut ResourceCache,
@@ -396,123 +438,90 @@ impl ClipScrollNode {
         state.parent_clip_chain = self.clip_chain_node.clone();
     }
 
     pub fn update_transform(
         &mut self,
         state: &mut TransformUpdateState,
         scene_properties: &SceneProperties,
     ) {
+        if self.node_type.is_reference_frame() {
+            self.update_transform_for_reference_frame(state, scene_properties);
+            return;
+        }
+
         // We calculate this here to avoid a double-borrow later.
         let sticky_offset = self.calculate_sticky_offset(
             &state.nearest_scrolling_ancestor_offset,
             &state.nearest_scrolling_ancestor_viewport,
         );
 
-        let (local_transform, accumulated_scroll_offset) = match self.node_type {
-            NodeType::ReferenceFrame(ref mut info) => {
-                // Resolve the transform against any property bindings.
-                let source_transform = scene_properties.resolve_layout_transform(&info.source_transform);
-                info.resolved_transform = LayerToScrollTransform::create_translation(
-                    info.origin_in_parent_reference_frame.x,
-                    info.origin_in_parent_reference_frame.y,
-                    0.0
-                ).pre_mul(&source_transform)
-                 .pre_mul(&info.source_perspective);
+        // The transformation for the bounds of our viewport is the parent reference frame
+        // transform, plus any accumulated scroll offset from our parents, plus any offset
+        // provided by our own sticky positioning.
+        let accumulated_offset = state.parent_accumulated_scroll_offset + sticky_offset;
+        self.world_viewport_transform = if accumulated_offset != LayerVector2D::zero() {
+            state.parent_reference_frame_transform.pre_translate(accumulated_offset.to_3d())
+        } else {
+            state.parent_reference_frame_transform
+        };
+
+        // The transformation for any content inside of us is the viewport transformation, plus
+        // whatever scrolling offset we supply as well.
+        let scroll_offset = self.scroll_offset();
+        self.world_content_transform = if scroll_offset != LayerVector2D::zero() {
+            self.world_viewport_transform.pre_translate(scroll_offset.to_3d())
+        } else {
+            self.world_viewport_transform
+        };
+
+        match self.node_type {
+            NodeType::StickyFrame(ref mut info) => info.current_offset = sticky_offset,
+            _ => {},
+        }
 
-                self.combined_local_viewport_rect = info.resolved_transform
-                    .with_destination::<LayerPixel>()
-                    .inverse_rect_footprint(&state.parent_combined_viewport_rect);
-                self.reference_frame_relative_scroll_offset = LayerVector2D::zero();
-                (info.resolved_transform, state.parent_accumulated_scroll_offset)
-            }
-            NodeType::Clip(_) | NodeType::ScrollFrame(_) => {
-                // Move the parent's viewport into the local space (of the node origin)
-                // and intersect with the local clip rectangle to get the local viewport.
-                self.combined_local_viewport_rect =
-                    state.parent_combined_viewport_rect
-                    .intersection(&self.local_clip_rect)
-                    .unwrap_or(LayerRect::zero());
-                self.reference_frame_relative_scroll_offset =
-                    state.parent_accumulated_scroll_offset;
-                (
-                    LayerToScrollTransform::identity(),
-                    self.reference_frame_relative_scroll_offset,
-                )
-            }
-            NodeType::StickyFrame(ref mut info) => {
-                info.current_offset = sticky_offset;
-                self.combined_local_viewport_rect =
-                    state.parent_combined_viewport_rect
-                    .translate(&-sticky_offset)
-                    .intersection(&self.local_clip_rect)
-                    .unwrap_or(LayerRect::zero());
-                self.reference_frame_relative_scroll_offset =
-                    state.parent_accumulated_scroll_offset + sticky_offset;
-                (LayerToScrollTransform::identity(), self.reference_frame_relative_scroll_offset)
-            }
+        self.coordinate_system_id = state.current_coordinate_system_id;
+    }
+
+    pub fn update_transform_for_reference_frame(
+        &mut self,
+        state: &mut TransformUpdateState,
+        scene_properties: &SceneProperties,
+    ) {
+        let info = match self.node_type {
+            NodeType::ReferenceFrame(ref mut info) => info,
+            _ => unreachable!("Called update_transform_for_reference_frame on non-ReferenceFrame"),
         };
 
+        // Resolve the transform against any property bindings.
+        let source_transform = scene_properties.resolve_layout_transform(&info.source_transform);
+        info.resolved_transform = LayerToScrollTransform::create_translation(
+            info.origin_in_parent_reference_frame.x,
+            info.origin_in_parent_reference_frame.y,
+            0.0
+        ).pre_mul(&source_transform)
+         .pre_mul(&info.source_perspective);
+
+        if !info.resolved_transform.preserves_2d_axis_alignment() ||
+           info.resolved_transform.has_perspective_component() {
+            state.current_coordinate_system_id = state.next_coordinate_system_id;
+            state.next_coordinate_system_id = state.next_coordinate_system_id.next();
+            self.coordinate_system_id = state.current_coordinate_system_id;
+        }
+
         // The transformation for this viewport in world coordinates is the transformation for
         // our parent reference frame, plus any accumulated scrolling offsets from nodes
-        // between our reference frame and this node. For reference frames, we also include
+        // between our reference frame and this node. Finally, we also include
         // whatever local transformation this reference frame provides. This can be combined
         // with the local_viewport_rect to get its position in world space.
         self.world_viewport_transform = state
             .parent_reference_frame_transform
-            .pre_translate(accumulated_scroll_offset.to_3d())
-            .pre_mul(&local_transform.with_destination::<LayerPixel>());
-
-        // The transformation for any content inside of us is the viewport transformation, plus
-        // whatever scrolling offset we supply as well.
-        let scroll_offset = self.scroll_offset();
-        self.world_content_transform = self.world_viewport_transform
-            .pre_translate(scroll_offset.to_3d());
-
-        // The transformation we are passing is the transformation of the parent
-        // reference frame and the offset is the accumulated offset of all the nodes
-        // between us and the parent reference frame. If we are a reference frame,
-        // we need to reset both these values.
-        match self.node_type {
-            NodeType::ReferenceFrame(ref info) => {
-                state.parent_reference_frame_transform = self.world_viewport_transform;
-                state.parent_combined_viewport_rect = self.combined_local_viewport_rect;
-                state.parent_accumulated_scroll_offset = LayerVector2D::zero();
-                state.nearest_scrolling_ancestor_viewport =
-                    state.nearest_scrolling_ancestor_viewport
-                       .translate(&info.origin_in_parent_reference_frame);
-
-                if !info.resolved_transform.preserves_2d_axis_alignment() {
-                    state.current_coordinate_system_id = state.next_coordinate_system_id;
-                    state.next_coordinate_system_id = state.next_coordinate_system_id.next();
-                }
-            },
-            NodeType::Clip(..) => {
-                state.parent_combined_viewport_rect = self.combined_local_viewport_rect;
-            },
-            NodeType::ScrollFrame(ref scrolling) => {
-                state.parent_combined_viewport_rect =
-                        self.combined_local_viewport_rect.translate(&-scrolling.offset);
-                state.parent_accumulated_scroll_offset =
-                    scrolling.offset + state.parent_accumulated_scroll_offset;
-                state.nearest_scrolling_ancestor_offset = scrolling.offset;
-                state.nearest_scrolling_ancestor_viewport = self.local_viewport_rect;
-            }
-            NodeType::StickyFrame(ref info) => {
-                // We don't translate the combined rect by the sticky offset, because sticky
-                // offsets actually adjust the node position itself, whereas scroll offsets
-                // only apply to contents inside the node.
-                state.parent_combined_viewport_rect = self.combined_local_viewport_rect;
-                state.parent_accumulated_scroll_offset =
-                    info.current_offset + state.parent_accumulated_scroll_offset;
-            }
-        }
-
-        // Store coord system ID, and also the ID used for shaders to reference this node.
-        self.coordinate_system_id = state.current_coordinate_system_id;
+            .pre_translate(state.parent_accumulated_scroll_offset.to_3d())
+            .pre_mul(&info.resolved_transform.with_destination::<LayerPixel>());
+        self.world_content_transform = self.world_viewport_transform;
     }
 
     fn calculate_sticky_offset(
         &self,
         viewport_scroll_offset: &LayerVector2D,
         viewport_rect: &LayerRect,
     ) -> LayerVector2D {
         let info = match self.node_type {
@@ -614,16 +623,66 @@ impl ClipScrollNode {
                                          &info.vertical_offset_bounds);
         sticky_offset.x = clamp_adjusted(sticky_offset.x,
                                          info.previously_applied_offset.x,
                                          &info.horizontal_offset_bounds);
 
         sticky_offset
     }
 
+    pub fn prepare_state_for_children(
+        &self,
+        state: &mut TransformUpdateState,
+        node_data: &Vec<ClipScrollNodeData>
+    ) {
+        if self.combined_clip_outer_bounds.is_empty() {
+            state.parent_combined_viewport_rect = LayerRect::zero();
+            state.combined_outer_clip_bounds = DeviceIntRect::zero();
+            state.parent_clip_chain = None;
+            return;
+        }
+
+        let combined_local_viewport_rect =
+            node_data[self.node_data_index.0 as usize].local_clip_rect;
+
+        // The transformation we are passing is the transformation of the parent
+        // reference frame and the offset is the accumulated offset of all the nodes
+        // between us and the parent reference frame. If we are a reference frame,
+        // we need to reset both these values.
+        match self.node_type {
+            NodeType::ReferenceFrame(ref info) => {
+                state.parent_reference_frame_transform = self.world_viewport_transform;
+                state.parent_combined_viewport_rect = combined_local_viewport_rect;
+                state.parent_accumulated_scroll_offset = LayerVector2D::zero();
+                state.nearest_scrolling_ancestor_viewport =
+                    state.nearest_scrolling_ancestor_viewport
+                       .translate(&info.origin_in_parent_reference_frame);
+            }
+            NodeType::Clip(..) => {
+                state.parent_combined_viewport_rect = combined_local_viewport_rect;
+            },
+            NodeType::ScrollFrame(ref scrolling) => {
+                state.parent_combined_viewport_rect =
+                        combined_local_viewport_rect.translate(&-scrolling.offset);
+                state.parent_accumulated_scroll_offset =
+                    scrolling.offset + state.parent_accumulated_scroll_offset;
+                state.nearest_scrolling_ancestor_offset = scrolling.offset;
+                state.nearest_scrolling_ancestor_viewport = self.local_viewport_rect;
+            }
+            NodeType::StickyFrame(ref info) => {
+                // We don't translate the combined rect by the sticky offset, because sticky
+                // offsets actually adjust the node position itself, whereas scroll offsets
+                // only apply to contents inside the node.
+                state.parent_combined_viewport_rect = combined_local_viewport_rect;
+                state.parent_accumulated_scroll_offset =
+                    info.current_offset + state.parent_accumulated_scroll_offset;
+            }
+        }
+    }
+
     pub fn scrollable_size(&self) -> LayerSize {
         match self.node_type {
            NodeType:: ScrollFrame(state) => state.scrollable_size,
             _ => LayerSize::zero(),
         }
     }
 
 
--- a/gfx/webrender/src/clip_scroll_tree.rs
+++ b/gfx/webrender/src/clip_scroll_tree.rs
@@ -3,17 +3,17 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{ClipId, DeviceIntRect, LayerPoint, LayerRect};
 use api::{LayerToWorldTransform, LayerVector2D, PipelineId, ScrollClamping, ScrollEventPhase};
 use api::{PropertyBinding, LayoutTransform, ScrollLayerState, ScrollLocation, WorldPoint};
 use clip::ClipStore;
 use clip_scroll_node::{ClipScrollNode, NodeType, ScrollingState, StickyFrameInfo};
 use gpu_cache::GpuCache;
-use gpu_types::ClipScrollNodeData;
+use gpu_types::{ClipScrollNodeIndex, ClipScrollNodeData};
 use internal_types::{FastHashMap, FastHashSet};
 use print_tree::{PrintTree, PrintTreePrinter};
 use render_task::ClipChain;
 use resource_cache::ResourceCache;
 use scene::SceneProperties;
 use util::MaxRect;
 
 pub type ScrollStates = FastHashMap<ClipId, ScrollingState>;
@@ -380,50 +380,58 @@ impl ClipScrollTree {
     fn update_node(
         &mut self,
         layer_id: ClipId,
         state: &mut TransformUpdateState,
         device_pixel_ratio: f32,
         clip_store: &mut ClipStore,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
-        node_data: &mut Vec<ClipScrollNodeData>,
+        gpu_node_data: &mut Vec<ClipScrollNodeData>,
         scene_properties: &SceneProperties,
     ) {
         // TODO(gw): This is an ugly borrow check workaround to clone these.
         //           Restructure this to avoid the clones!
         let mut state = state.clone();
         let node_children = {
             let node = match self.nodes.get_mut(&layer_id) {
                 Some(node) => node,
                 None => return,
             };
 
+            // We set this early so that we can use it to populate the ClipChain.
+            node.node_data_index = ClipScrollNodeIndex(gpu_node_data.len() as u32);
+
             node.update(
                 &mut state,
-                node_data,
                 device_pixel_ratio,
                 clip_store,
                 resource_cache,
                 gpu_cache,
                 scene_properties,
             );
 
+            node.push_gpu_node_data(&state, gpu_node_data);
+
+            if !node.children.is_empty() {
+                node.prepare_state_for_children(&mut state, gpu_node_data);
+            }
+
             node.children.clone()
         };
 
         for child_layer_id in node_children {
             self.update_node(
                 child_layer_id,
                 &mut state,
                 device_pixel_ratio,
                 clip_store,
                 resource_cache,
                 gpu_cache,
-                node_data,
+                gpu_node_data,
                 scene_properties,
             );
         }
     }
 
     pub fn tick_scrolling_bounce_animations(&mut self) {
         for (_, node) in &mut self.nodes {
             node.tick_scrolling_bounce_animation()
@@ -547,20 +555,16 @@ impl ClipScrollTree {
         }
 
         pt.add_item(format!(
             "local_viewport_rect: {:?}",
             node.local_viewport_rect
         ));
         pt.add_item(format!("local_clip_rect: {:?}", node.local_clip_rect));
         pt.add_item(format!(
-            "combined_local_viewport_rect: {:?}",
-            node.combined_local_viewport_rect
-        ));
-        pt.add_item(format!(
             "world_viewport_transform: {:?}",
             node.world_viewport_transform
         ));
         pt.add_item(format!(
             "world_content_transform: {:?}",
             node.world_content_transform
         ));
 
--- a/gfx/webrender/src/device.rs
+++ b/gfx/webrender/src/device.rs
@@ -1,22 +1,24 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use super::shader_source;
 use api::{ColorF, ImageFormat};
-use api::{DeviceIntRect, DeviceUintSize};
+use api::{DeviceIntRect, DeviceUintRect, DeviceUintSize};
 use euclid::Transform3D;
 use gleam::gl;
 use internal_types::{FastHashMap, RenderTargetInfo};
+use smallvec::SmallVec;
 use std::cell::RefCell;
 use std::fs::File;
 use std::io::Read;
 use std::iter::repeat;
+use std::marker::PhantomData;
 use std::mem;
 use std::ops::Add;
 use std::path::PathBuf;
 use std::ptr;
 use std::rc::Rc;
 use std::thread;
 
 #[derive(Debug, Copy, Clone, PartialEq, Ord, Eq, PartialOrd)]
@@ -111,16 +113,25 @@ pub struct VertexDescriptor {
     pub instance_attributes: &'static [VertexAttribute],
 }
 
 enum FBOTarget {
     Read,
     Draw,
 }
 
+/// Method of uploading texel data from CPU to GPU.
+#[derive(Debug, Clone)]
+pub enum UploadMethod {
+    /// Just call `glTexSubImage` directly with the CPU data pointer
+    Immediate,
+    /// Accumulate the changes in PBO first before transferring to a texture.
+    PixelBuffer(VertexUsageHint),
+}
+
 pub fn get_gl_format_bgra(gl: &gl::Gl) -> gl::GLuint {
     match gl.get_type() {
         gl::GlType::Gl => GL_FORMAT_BGRA_GL,
         gl::GlType::Gles => GL_FORMAT_BGRA_GLES,
     }
 }
 
 fn get_shader_version(gl: &gl::Gl) -> &'static str {
@@ -577,23 +588,23 @@ pub enum ShaderError {
 }
 
 pub struct Device {
     gl: Rc<gl::Gl>,
     // device state
     bound_textures: [gl::GLuint; 16],
     bound_program: gl::GLuint,
     bound_vao: gl::GLuint,
-    bound_pbo: gl::GLuint,
     bound_read_fbo: FBOId,
     bound_draw_fbo: FBOId,
     default_read_fbo: gl::GLuint,
     default_draw_fbo: gl::GLuint,
 
-    pub device_pixel_ratio: f32,
+    device_pixel_ratio: f32,
+    upload_method: UploadMethod,
 
     // HW or API capabilties
     capabilities: Capabilities,
 
     // debug
     inside_frame: bool,
 
     // resources
@@ -607,38 +618,39 @@ pub struct Device {
     // frames and GPU frames.
     frame_id: FrameId,
 }
 
 impl Device {
     pub fn new(
         gl: Rc<gl::Gl>,
         resource_override_path: Option<PathBuf>,
+        upload_method: UploadMethod,
         _file_changed_handler: Box<FileWatcherHandler>,
         cached_programs: Option<Rc<ProgramCache>>,
     ) -> Device {
         let max_texture_size = gl.get_integer_v(gl::MAX_TEXTURE_SIZE) as u32;
         let renderer_name = gl.get_string(gl::RENDERER);
 
         Device {
             gl,
             resource_override_path,
-            // This is initialized to 1 by default, but it is set
-            // every frame by the call to begin_frame().
+            // This is initialized to 1 by default, but it is reset
+            // at the beginning of each frame in `Renderer::bind_frame_data`.
             device_pixel_ratio: 1.0,
+            upload_method,
             inside_frame: false,
 
             capabilities: Capabilities {
                 supports_multisampling: false, //TODO
             },
 
             bound_textures: [0; 16],
             bound_program: 0,
             bound_vao: 0,
-            bound_pbo: 0,
             bound_read_fbo: FBOId(0),
             bound_draw_fbo: FBOId(0),
             default_read_fbo: 0,
             default_draw_fbo: 0,
 
             max_texture_size,
             renderer_name,
             cached_programs,
@@ -649,32 +661,35 @@ impl Device {
     pub fn gl(&self) -> &gl::Gl {
         &*self.gl
     }
 
     pub fn rc_gl(&self) -> &Rc<gl::Gl> {
         &self.gl
     }
 
+    pub fn set_device_pixel_ratio(&mut self, ratio: f32) {
+        self.device_pixel_ratio = ratio;
+    }
+
     pub fn update_program_cache(&mut self, cached_programs: Rc<ProgramCache>) {
         self.cached_programs = Some(cached_programs);
     }
 
     pub fn max_texture_size(&self) -> u32 {
         self.max_texture_size
     }
 
     pub fn get_capabilities(&self) -> &Capabilities {
         &self.capabilities
     }
 
     pub fn reset_state(&mut self) {
         self.bound_textures = [0; 16];
         self.bound_vao = 0;
-        self.bound_pbo = 0;
         self.bound_read_fbo = FBOId(0);
         self.bound_draw_fbo = FBOId(0);
     }
 
     pub fn compile_shader(
         gl: &gl::Gl,
         name: &str,
         shader_type: gl::GLenum,
@@ -722,17 +737,16 @@ impl Device {
         self.gl.bind_vertex_array(0);
 
         // FBO state
         self.bound_read_fbo = FBOId(self.default_read_fbo);
         self.bound_draw_fbo = FBOId(self.default_draw_fbo);
 
         // Pixel op state
         self.gl.pixel_store_i(gl::UNPACK_ALIGNMENT, 1);
-        self.bound_pbo = 0;
         self.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, 0);
 
         // Default is sampler 0, always
         self.gl.active_texture(gl::TEXTURE0);
 
         self.frame_id
     }
 
@@ -1068,17 +1082,16 @@ impl Device {
             dest_rect.origin.y + dest_rect.size.height,
             gl::COLOR_BUFFER_BIT,
             gl::LINEAR,
         );
     }
 
     pub fn free_texture_storage(&mut self, texture: &mut Texture) {
         debug_assert!(self.inside_frame);
-        debug_assert_eq!(self.bound_pbo, 0);
 
         if texture.format == ImageFormat::Invalid {
             return;
         }
 
         self.bind_texture(DEFAULT_TEXTURE, texture);
 
         let (internal_format, gl_format) =
@@ -1317,121 +1330,55 @@ impl Device {
         PBO { id }
     }
 
     pub fn delete_pbo(&mut self, mut pbo: PBO) {
         self.gl.delete_buffers(&[pbo.id]);
         pbo.id = 0;
     }
 
-    pub fn bind_pbo(&mut self, pbo: Option<&PBO>) {
+    pub fn upload_texture<'a, T>(
+        &'a mut self,
+        texture: &'a Texture,
+        pbo: &PBO,
+        upload_count: usize,
+    ) -> TextureUploader<'a, T> {
         debug_assert!(self.inside_frame);
-        let pbo_id = pbo.map_or(0, |pbo| pbo.id);
-
-        if self.bound_pbo != pbo_id {
-            self.bound_pbo = pbo_id;
-
-            self.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, pbo_id);
-        }
-    }
-
-    pub fn update_pbo_data<T>(&mut self, data: &[T]) {
-        debug_assert!(self.inside_frame);
-        debug_assert_ne!(self.bound_pbo, 0);
-
-        gl::buffer_data(&*self.gl, gl::PIXEL_UNPACK_BUFFER, data, gl::STREAM_DRAW);
-    }
-
-    pub fn orphan_pbo(&mut self, new_size: usize) {
-        debug_assert!(self.inside_frame);
-        debug_assert_ne!(self.bound_pbo, 0);
+        self.bind_texture(DEFAULT_TEXTURE, texture);
 
-        self.gl.buffer_data_untyped(
-            gl::PIXEL_UNPACK_BUFFER,
-            new_size as isize,
-            ptr::null(),
-            gl::STREAM_DRAW,
-        );
-    }
-
-    pub fn update_texture_from_pbo(
-        &mut self,
-        texture: &Texture,
-        x0: u32,
-        y0: u32,
-        width: u32,
-        height: u32,
-        layer_index: i32,
-        stride: Option<u32>,
-        offset: usize,
-    ) {
-        debug_assert!(self.inside_frame);
-
-        let (gl_format, bpp, data_type) = match texture.format {
-            ImageFormat::A8 => (GL_FORMAT_A, 1, gl::UNSIGNED_BYTE),
-            ImageFormat::RGB8 => (gl::RGB, 3, gl::UNSIGNED_BYTE),
-            ImageFormat::BGRA8 => (get_gl_format_bgra(self.gl()), 4, gl::UNSIGNED_BYTE),
-            ImageFormat::RG8 => (gl::RG, 2, gl::UNSIGNED_BYTE),
-            ImageFormat::RGBAF32 => (gl::RGBA, 16, gl::FLOAT),
-            ImageFormat::Invalid => unreachable!(),
+        let buffer = match self.upload_method {
+            UploadMethod::Immediate => None,
+            UploadMethod::PixelBuffer(hint) => {
+                let upload_size = upload_count * mem::size_of::<T>();
+                self.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, pbo.id);
+                if upload_size != 0 {
+                    self.gl.buffer_data_untyped(
+                        gl::PIXEL_UNPACK_BUFFER,
+                        upload_size as _,
+                        ptr::null(),
+                        hint.to_gl(),
+                    );
+                }
+                Some(PixelBuffer::new(hint.to_gl(), upload_size))
+            },
         };
 
-        let row_length = match stride {
-            Some(value) => value / bpp,
-            None => width,
-        };
-
-        if let Some(..) = stride {
-            self.gl
-                .pixel_store_i(gl::UNPACK_ROW_LENGTH, row_length as gl::GLint);
-        }
-
-        self.bind_texture(DEFAULT_TEXTURE, texture);
-
-        match texture.target {
-            gl::TEXTURE_2D_ARRAY => {
-                self.gl.tex_sub_image_3d_pbo(
-                    texture.target,
-                    0,
-                    x0 as gl::GLint,
-                    y0 as gl::GLint,
-                    layer_index,
-                    width as gl::GLint,
-                    height as gl::GLint,
-                    1,
-                    gl_format,
-                    data_type,
-                    offset,
-                );
-            }
-            gl::TEXTURE_2D | gl::TEXTURE_RECTANGLE | gl::TEXTURE_EXTERNAL_OES => {
-                self.gl.tex_sub_image_2d_pbo(
-                    texture.target,
-                    0,
-                    x0 as gl::GLint,
-                    y0 as gl::GLint,
-                    width as gl::GLint,
-                    height as gl::GLint,
-                    gl_format,
-                    data_type,
-                    offset,
-                );
-            }
-            _ => panic!("BUG: Unexpected texture target!"),
-        }
-
-        // Reset row length to 0, otherwise the stride would apply to all texture uploads.
-        if let Some(..) = stride {
-            self.gl.pixel_store_i(gl::UNPACK_ROW_LENGTH, 0 as gl::GLint);
+        TextureUploader {
+            target: UploadTarget {
+                gl: &*self.gl,
+                texture,
+            },
+            buffer,
+            marker: PhantomData,
         }
     }
 
     pub fn read_pixels(&mut self, width: i32, height: i32) -> Vec<u8> {
         self.gl.read_pixels(
-            0, 0, 
+            0, 0,
             width as i32, height as i32,
             gl::RGBA,
             gl::UNSIGNED_BYTE
         )
     }
 
     pub fn bind_vao(&mut self, vao: &VAO) {
         debug_assert!(self.inside_frame);
@@ -1762,8 +1709,177 @@ fn gl_texture_formats_for_image_format(
 }
 
 fn gl_type_for_texture_format(format: ImageFormat) -> gl::GLuint {
     match format {
         ImageFormat::RGBAF32 => gl::FLOAT,
         _ => gl::UNSIGNED_BYTE,
     }
 }
+
+
+struct UploadChunk {
+    rect: DeviceUintRect,
+    layer_index: i32,
+    stride: Option<u32>,
+    offset: usize,
+}
+
+struct PixelBuffer {
+    usage: gl::GLenum,
+    size_allocated: usize,
+    size_used: usize,
+    // small vector avoids heap allocation for a single chunk
+    chunks: SmallVec<[UploadChunk; 1]>,
+}
+
+impl PixelBuffer {
+    fn new(
+        usage: gl::GLenum,
+        size_allocated: usize,
+    ) -> Self {
+        PixelBuffer {
+            usage,
+            size_allocated,
+            size_used: 0,
+            chunks: SmallVec::new(),
+        }
+    }
+}
+
+struct UploadTarget<'a> {
+    gl: &'a gl::Gl,
+    texture: &'a Texture,
+}
+
+pub struct TextureUploader<'a, T> {
+    target: UploadTarget<'a>,
+    buffer: Option<PixelBuffer>,
+    marker: PhantomData<T>,
+}
+
+impl<'a, T> Drop for TextureUploader<'a, T> {
+    fn drop(&mut self) {
+        if let Some(buffer) = self.buffer.take() {
+            for chunk in buffer.chunks {
+                self.target.update_impl(chunk);
+            }
+            self.target.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, 0);
+        }
+    }
+}
+
+impl<'a, T> TextureUploader<'a, T> {
+    pub fn upload(
+        &mut self,
+        rect: DeviceUintRect,
+        layer_index: i32,
+        stride: Option<u32>,
+        data: &[T],
+    ) {
+        match self.buffer {
+            Some(ref mut buffer) => {
+                let upload_size = mem::size_of::<T>() * data.len();
+                if buffer.size_used + upload_size > buffer.size_allocated {
+                    // flush
+                    for chunk in buffer.chunks.drain() {
+                        self.target.update_impl(chunk);
+                    }
+                    buffer.size_used = 0;
+                }
+
+                if upload_size > buffer.size_allocated {
+                    gl::buffer_data(
+                        self.target.gl,
+                        gl::PIXEL_UNPACK_BUFFER,
+                        data,
+                        buffer.usage,
+                    );
+                    buffer.size_allocated = upload_size;
+                } else {
+                    gl::buffer_sub_data(
+                        self.target.gl,
+                        gl::PIXEL_UNPACK_BUFFER,
+                        buffer.size_used as _,
+                        data,
+                    );
+                }
+
+                buffer.chunks.push(UploadChunk {
+                    rect, layer_index, stride,
+                    offset: buffer.size_used,
+                });
+                buffer.size_used += upload_size;
+            }
+            None => {
+                self.target.update_impl(UploadChunk {
+                    rect, layer_index, stride,
+                    offset: data.as_ptr() as _,
+                });
+            }
+        }
+    }
+}
+
+impl<'a> UploadTarget<'a> {
+    fn update_impl(&mut self, chunk: UploadChunk) {
+        let (gl_format, bpp, data_type) = match self.texture.format {
+            ImageFormat::A8 => (GL_FORMAT_A, 1, gl::UNSIGNED_BYTE),
+            ImageFormat::RGB8 => (gl::RGB, 3, gl::UNSIGNED_BYTE),
+            ImageFormat::BGRA8 => (get_gl_format_bgra(self.gl), 4, gl::UNSIGNED_BYTE),
+            ImageFormat::RG8 => (gl::RG, 2, gl::UNSIGNED_BYTE),
+            ImageFormat::RGBAF32 => (gl::RGBA, 16, gl::FLOAT),
+            ImageFormat::Invalid => unreachable!(),
+        };
+
+        let row_length = match chunk.stride {
+            Some(value) => value / bpp,
+            None => self.texture.width,
+        };
+
+        if chunk.stride.is_some() {
+            self.gl.pixel_store_i(
+                gl::UNPACK_ROW_LENGTH,
+                row_length as _,
+            );
+        }
+
+        let pos = chunk.rect.origin;
+        let size = chunk.rect.size;
+
+        match self.texture.target {
+            gl::TEXTURE_2D_ARRAY => {
+                self.gl.tex_sub_image_3d_pbo(
+                    self.texture.target,
+                    0,
+                    pos.x as _,
+                    pos.y as _,
+                    chunk.layer_index,
+                    size.width as _,
+                    size.height as _,
+                    1,
+                    gl_format,
+                    data_type,
+                    chunk.offset,
+                );
+            }
+            gl::TEXTURE_2D | gl::TEXTURE_RECTANGLE | gl::TEXTURE_EXTERNAL_OES => {
+                self.gl.tex_sub_image_2d_pbo(
+                    self.texture.target,
+                    0,
+                    pos.x as _,
+                    pos.y as _,
+                    size.width as _,
+                    size.height as _,
+                    gl_format,
+                    data_type,
+                    chunk.offset,
+                );
+            }
+            _ => panic!("BUG: Unexpected texture target!"),
+        }
+
+        // Reset row length to 0, otherwise the stride would apply to all texture uploads.
+        if chunk.stride.is_some() {
+            self.gl.pixel_store_i(gl::UNPACK_ROW_LENGTH, 0 as _);
+        }
+    }
+}
--- a/gfx/webrender/src/frame.rs
+++ b/gfx/webrender/src/frame.rs
@@ -1,33 +1,32 @@
 
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BuiltDisplayListIter, ClipAndScrollInfo, ClipId, ColorF, ComplexClipRegion};
 use api::{DeviceUintRect, DeviceUintSize, DisplayItemRef, DocumentLayer, Epoch, FilterOp};
 use api::{ImageDisplayItem, ItemRange, LayerPoint, LayerPrimitiveInfo, LayerRect};
-use api::{LayerSize, LayerVector2D};
-use api::{LayoutRect, LayoutSize};
+use api::{LayerSize, LayerVector2D, LayoutSize};
 use api::{LocalClip, PipelineId, ScrollClamping, ScrollEventPhase, ScrollLayerState};
 use api::{ScrollLocation, ScrollPolicy, ScrollSensitivity, SpecificDisplayItem, StackingContext};
-use api::{ClipMode, TileOffset, TransformStyle, WorldPoint};
+use api::{TileOffset, TransformStyle, WorldPoint};
 use clip::ClipRegion;
 use clip_scroll_node::StickyFrameInfo;
 use clip_scroll_tree::{ClipScrollTree, ScrollStates};
 use euclid::rect;
 use frame_builder::{FrameBuilder, FrameBuilderConfig, ScrollbarInfo};
 use gpu_cache::GpuCache;
-use internal_types::{EdgeAaSegmentMask, FastHashMap, FastHashSet, RenderedDocument};
+use internal_types::{FastHashMap, FastHashSet, RenderedDocument};
+use prim_store::{BrushAntiAliasMode};
 use profiler::{GpuCacheProfileCounters, TextureCacheProfileCounters};
 use resource_cache::{FontInstanceMap,ResourceCache, TiledImageMap};
 use scene::{Scene, StackingContextHelpers, ScenePipeline, SceneProperties};
 use tiling::CompositeOps;
-use util::ComplexClipRegionHelpers;
 
 #[derive(Copy, Clone, PartialEq, PartialOrd, Debug, Eq, Ord)]
 pub struct FrameId(pub u32);
 
 static DEFAULT_SCROLLBAR_COLOR: ColorF = ColorF {
     r: 0.3,
     g: 0.3,
     b: 0.3,
@@ -37,21 +36,16 @@ static DEFAULT_SCROLLBAR_COLOR: ColorF =
 struct FlattenContext<'a> {
     scene: &'a Scene,
     builder: FrameBuilder,
     clip_scroll_tree: &'a mut ClipScrollTree,
     font_instances: FontInstanceMap,
     tiled_image_map: TiledImageMap,
     pipeline_epochs: Vec<(PipelineId, Epoch)>,
     replacements: Vec<(ClipId, ClipId)>,
-    /// Opaque rectangle vector, stored here in order to
-    /// avoid re-allocation on each use.
-    opaque_parts: Vec<LayoutRect>,
-    /// Same for the transparent rectangles.
-    transparent_parts: Vec<LayoutRect>,
     output_pipelines: &'a FastHashSet<PipelineId>,
 }
 
 impl<'a> FlattenContext<'a> {
     /// Since WebRender still handles fixed position and reference frame content internally
     /// we need to apply this table of id replacements only to the id that affects the
     /// position of a node. We can eventually remove this when clients start handling
     /// reference frames themselves. This method applies these replacements.
@@ -106,17 +100,18 @@ impl<'a> FlattenContext<'a> {
             if let Some(pipeline) = self.scene.pipelines.get(&pipeline_id) {
                 if let Some(bg_color) = pipeline.background_color {
                     let root_bounds = LayerRect::new(LayerPoint::zero(), *frame_size);
                     let info = LayerPrimitiveInfo::new(root_bounds);
                     self.builder.add_solid_rectangle(
                         ClipAndScrollInfo::simple(root_reference_frame_id),
                         &info,
                         bg_color,
-                        EdgeAaSegmentMask::empty(),
+                        None,
+                        BrushAntiAliasMode::Primitive,
                     );
                 }
             }
         }
 
 
         self.flatten_items(
             traversal,
@@ -443,28 +438,23 @@ impl<'a> FlattenContext<'a> {
                         );
                     }
                     None => {
                         warn!("Unknown font instance key: {:?}", text_info.font_key);
                     }
                 }
             }
             SpecificDisplayItem::Rectangle(ref info) => {
-                if !self.try_to_add_rectangle_splitting_on_clip(
+                self.builder.add_solid_rectangle(
+                    clip_and_scroll,
                     &prim_info,
                     info.color,
-                    &clip_and_scroll,
-                ) {
-                    self.builder.add_solid_rectangle(
-                        clip_and_scroll,
-                        &prim_info,
-                        info.color,
-                        EdgeAaSegmentMask::empty(),
-                    );
-                }
+                    None,
+                    BrushAntiAliasMode::Primitive,
+                );
             }
             SpecificDisplayItem::ClearRectangle => {
                 self.builder.add_clear_rectangle(
                     clip_and_scroll,
                     &prim_info,
                 );
             }
             SpecificDisplayItem::Line(ref info) => {
@@ -626,96 +616,16 @@ impl<'a> FlattenContext<'a> {
             }
             SpecificDisplayItem::PopAllShadows => {
                 self.builder.pop_all_shadows();
             }
         }
         None
     }
 
-    /// Try to optimize the rendering of a solid rectangle that is clipped by a single
-    /// rounded rectangle, by only masking the parts of the rectangle that intersect
-    /// the rounded parts of the clip. This is pretty simple now, so has a lot of
-    /// potential for further optimizations.
-    fn try_to_add_rectangle_splitting_on_clip(
-        &mut self,
-        info: &LayerPrimitiveInfo,
-        color: ColorF,
-        clip_and_scroll: &ClipAndScrollInfo,
-    ) -> bool {
-        if info.rect.size.area() < 200.0 { // arbitrary threshold
-            // too few pixels, don't bother adding instances
-            return false;
-        }
-        // If this rectangle is not opaque, splitting the rectangle up
-        // into an inner opaque region just ends up hurting batching and
-        // doing more work than necessary.
-        if color.a != 1.0 {
-            return false;
-        }
-
-        self.opaque_parts.clear();
-        self.transparent_parts.clear();
-
-        match info.local_clip {
-            LocalClip::Rect(_) => return false,
-            LocalClip::RoundedRect(_, ref region) => {
-                if region.mode == ClipMode::ClipOut {
-                    return false;
-                }
-                region.split_rectangles(
-                    &mut self.opaque_parts,
-                    &mut self.transparent_parts,
-                );
-            }
-        };
-
-        let local_clip = LocalClip::from(*info.local_clip.clip_rect());
-        let mut has_opaque = false;
-
-        for opaque in &self.opaque_parts {
-            let prim_info = LayerPrimitiveInfo {
-                rect: match opaque.intersection(&info.rect) {
-                    Some(rect) => rect,
-                    None => continue,
-                },
-                local_clip,
-                .. info.clone()
-            };
-            self.builder.add_solid_rectangle(
-                *clip_and_scroll,
-                &prim_info,
-                color,
-                EdgeAaSegmentMask::empty(),
-            );
-            has_opaque = true;
-        }
-
-        if !has_opaque {
-            return false
-        }
-
-        for transparent in &self.transparent_parts {
-            let prim_info = LayerPrimitiveInfo {
-                rect: match transparent.intersection(&info.rect) {
-                    Some(rect) => rect,
-                    None => continue,
-                },
-                .. info.clone()
-            };
-            self.builder.add_solid_rectangle(
-                *clip_and_scroll,
-                &prim_info,
-                color,
-                EdgeAaSegmentMask::empty(),
-            );
-        }
-        true
-    }
-
     /// Decomposes an image display item that is repeated into an image per individual repetition.
     /// We need to do this when we are unable to perform the repetition in the shader,
     /// for example if the image is tiled.
     ///
     /// In all of the "decompose" methods below, we independently handle horizontal and vertical
     /// decomposition. This lets us generate the minimum amount of primitives by, for  example,
     /// decompositing the repetition horizontally while repeating vertically in the shader (for
     /// an image where the width is too bug but the height is not).
@@ -1117,18 +1027,16 @@ impl FrameContext {
                     background_color,
                     self.frame_builder_config,
                 ),
                 clip_scroll_tree: &mut self.clip_scroll_tree,
                 font_instances: resource_cache.get_font_instances(),
                 tiled_image_map: resource_cache.get_tiled_image_map(),
                 pipeline_epochs: Vec::new(),
                 replacements: Vec::new(),
-                opaque_parts: Vec::new(),
-                transparent_parts: Vec::new(),
                 output_pipelines,
             };
 
             roller.builder.push_root(
                 root_pipeline_id,
                 &root_pipeline.viewport_size,
                 &root_pipeline.content_size,
                 roller.clip_scroll_tree,
--- a/gfx/webrender/src/frame_builder.rs
+++ b/gfx/webrender/src/frame_builder.rs
@@ -16,23 +16,24 @@ use app_units::Au;
 use border::ImageBorderSegment;
 use clip::{ClipRegion, ClipSource, ClipSources, ClipStore, Contains};
 use clip_scroll_node::{ClipScrollNode, NodeType};
 use clip_scroll_tree::ClipScrollTree;
 use euclid::{SideOffsets2D, vec2};
 use frame::FrameId;
 use glyph_rasterizer::FontInstance;
 use gpu_cache::GpuCache;
-use internal_types::{EdgeAaSegmentMask, FastHashMap, FastHashSet};
+use gpu_types::ClipScrollNodeData;
+use internal_types::{FastHashMap, FastHashSet};
 use picture::{PictureCompositeMode, PictureKind, PicturePrimitive, RasterizationSpace};
-use prim_store::{TexelRect, YuvImagePrimitiveCpu};
+use prim_store::{BrushAntiAliasMode, BrushKind, BrushPrimitive, TexelRect, YuvImagePrimitiveCpu};
 use prim_store::{GradientPrimitiveCpu, ImagePrimitiveCpu, LinePrimitive, PrimitiveKind};
 use prim_store::{PrimitiveContainer, PrimitiveIndex, SpecificPrimitiveIndex};
 use prim_store::{PrimitiveStore, RadialGradientPrimitiveCpu};
-use prim_store::{RectangleContent, RectanglePrimitive, TextRunPrimitiveCpu};
+use prim_store::{BrushSegmentDescriptor, TextRunPrimitiveCpu};
 use profiler::{FrameProfileCounters, GpuCacheProfileCounters, TextureCacheProfileCounters};
 use render_task::{ClearMode, RenderTask, RenderTaskId, RenderTaskTree};
 use resource_cache::ResourceCache;
 use scene::{ScenePipeline, SceneProperties};
 use std::{mem, usize, f32};
 use tiling::{CompositeOps, Frame};
 use tiling::{RenderPass, RenderPassKind, RenderTargetKind};
 use tiling::{RenderTargetContext, ScrollbarPrimitive};
@@ -392,17 +393,17 @@ impl FrameBuilder {
                 .find(|sc| sc.transform_style == TransformStyle::Flat)
                 .map(|sc| sc.pic_prim_index)
                 .unwrap()
         } else {
             *self.picture_stack.last().unwrap()
         };
 
         // For each filter, create a new image with that composite mode.
-        for filter in &composite_ops.filters {
+        for filter in composite_ops.filters.iter().rev() {
             let src_prim = PicturePrimitive::new_image(
                 Some(PictureCompositeMode::Filter(*filter)),
                 false,
                 pipeline_id,
                 current_reference_frame_id,
                 None,
             );
             let src_clip_sources = self.clip_store.insert(ClipSources::new(Vec::new()));
@@ -751,77 +752,85 @@ impl FrameBuilder {
         mem::replace(&mut self.shadow_prim_stack, shadows);
     }
 
     pub fn add_solid_rectangle(
         &mut self,
         clip_and_scroll: ClipAndScrollInfo,
         info: &LayerPrimitiveInfo,
         color: ColorF,
-        edge_aa_segment_mask: EdgeAaSegmentMask,
+        segments: Option<Box<BrushSegmentDescriptor>>,
+        aa_mode: BrushAntiAliasMode,
     ) {
         if color.a == 0.0 {
             // Don't add transparent rectangles to the draw list, but do consider them for hit
             // testing. This allows specifying invisible hit testing areas.
             self.add_primitive_to_hit_testing_list(info, clip_and_scroll);
             return;
         }
 
-        let prim = RectanglePrimitive {
-            content: RectangleContent::Fill(color),
-            edge_aa_segment_mask,
-        };
+        let prim = BrushPrimitive::new(
+            BrushKind::Solid {
+                color,
+            },
+            segments,
+            aa_mode,
+        );
 
         self.add_primitive(
             clip_and_scroll,
             info,
             Vec::new(),
-            PrimitiveContainer::Rectangle(prim),
+            PrimitiveContainer::Brush(prim),
         );
     }
 
     pub fn add_clear_rectangle(
         &mut self,
         clip_and_scroll: ClipAndScrollInfo,
         info: &LayerPrimitiveInfo,
     ) {
-        let prim = RectanglePrimitive {
-            content: RectangleContent::Clear,
-            edge_aa_segment_mask: EdgeAaSegmentMask::empty(),
-        };
+        let prim = BrushPrimitive::new(
+            BrushKind::Clear,
+            None,
+            BrushAntiAliasMode::Primitive,
+        );
 
         self.add_primitive(
             clip_and_scroll,
             info,
             Vec::new(),
-            PrimitiveContainer::Rectangle(prim),
+            PrimitiveContainer::Brush(prim),
         );
     }
 
     pub fn add_scroll_bar(
         &mut self,
         clip_and_scroll: ClipAndScrollInfo,
         info: &LayerPrimitiveInfo,
         color: ColorF,
         scrollbar_info: ScrollbarInfo,
     ) {
         if color.a == 0.0 {
             return;
         }
 
-        let prim = RectanglePrimitive {
-            content: RectangleContent::Fill(color),
-            edge_aa_segment_mask: EdgeAaSegmentMask::empty(),
-        };
+        let prim = BrushPrimitive::new(
+            BrushKind::Solid {
+                color,
+            },
+            None,
+            BrushAntiAliasMode::Primitive,
+        );
 
         let prim_index = self.add_primitive(
             clip_and_scroll,
             info,
             Vec::new(),
-            PrimitiveContainer::Rectangle(prim),
+            PrimitiveContainer::Brush(prim),
         );
 
         self.scrollbar_prims.push(ScrollbarPrimitive {
             prim_index,
             clip_id: scrollbar_info.0,
             frame_rect: scrollbar_info.1,
         });
     }
@@ -1571,16 +1580,17 @@ impl FrameBuilder {
         clip_scroll_tree: &mut ClipScrollTree,
         pipelines: &FastHashMap<PipelineId, ScenePipeline>,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         render_tasks: &mut RenderTaskTree,
         profile_counters: &mut FrameProfileCounters,
         device_pixel_ratio: f32,
         scene_properties: &SceneProperties,
+        node_data: &[ClipScrollNodeData],
     ) -> Option<RenderTaskId> {
         profile_scope!("cull");
 
         if self.prim_store.cpu_pictures.is_empty() {
             return None
         }
 
         // The root picture is always the first one added.
@@ -1613,16 +1623,17 @@ impl FrameBuilder {
             &root_prim_context,
             true,
             &mut child_tasks,
             profile_counters,
             None,
             scene_properties,
             SpecificPrimitiveIndex(0),
             &self.screen_rect.to_i32(),
+            node_data,
         );
 
         let pic = &mut self.prim_store.cpu_pictures[0];
         pic.runs = prim_run_cmds;
 
         let root_render_task = RenderTask::new_picture(
             None,
             PrimitiveIndex(0),
@@ -1693,17 +1704,17 @@ impl FrameBuilder {
         let mut profile_counters = FrameProfileCounters::new();
         profile_counters
             .total_primitives
             .set(self.prim_store.prim_count());
 
         resource_cache.begin_frame(frame_id);
         gpu_cache.begin_frame();
 
-        let mut node_data = Vec::new();
+        let mut node_data = Vec::with_capacity(clip_scroll_tree.nodes.len());
         clip_scroll_tree.update_tree(
             &self.screen_rect.to_i32(),
             device_pixel_ratio,
             &mut self.clip_store,
             resource_cache,
             gpu_cache,
             pan,
             &mut node_data,
@@ -1718,16 +1729,17 @@ impl FrameBuilder {
             clip_scroll_tree,
             pipelines,
             resource_cache,
             gpu_cache,
             &mut render_tasks,
             &mut profile_counters,
             device_pixel_ratio,
             scene_properties,
+            &node_data,
         );
 
         let mut passes = Vec::new();
         resource_cache.block_until_all_resources_added(gpu_cache, texture_cache_profile);
 
         if let Some(main_render_task_id) = main_render_task_id {
             let mut required_pass_count = 0;
             render_tasks.max_depth(main_render_task_id, 0, &mut required_pass_count);
--- a/gfx/webrender/src/glyph_rasterizer.rs
+++ b/gfx/webrender/src/glyph_rasterizer.rs
@@ -77,19 +77,19 @@ impl FontTransform {
 
     pub fn determinant(&self) -> f64 {
         self.scale_x as f64 * self.scale_y as f64 - self.skew_y as f64 * self.skew_x as f64
     }
 
     pub fn compute_scale(&self) -> Option<(f64, f64)> {
         let det = self.determinant();
         if det != 0.0 {
-            let major = (self.scale_x as f64).hypot(self.skew_y as f64);
-            let minor = det.abs() / major;
-            Some((major, minor))
+            let x_scale = (self.scale_x as f64).hypot(self.skew_y as f64);
+            let y_scale = det.abs() / x_scale;
+            Some((x_scale, y_scale))
         } else {
             None
         }
     }
 
     pub fn pre_scale(&self, scale_x: f32, scale_y: f32) -> Self {
         FontTransform::new(
             self.scale_x * scale_x,
@@ -188,16 +188,29 @@ impl FontInstance {
             FontRenderMode::Subpixel => {
                 if self.transform.is_identity() { GlyphFormat::Subpixel } else { GlyphFormat::TransformedSubpixel }
             }
             FontRenderMode::Bitmap => {
                 if color_bitmaps { GlyphFormat::ColorBitmap } else { GlyphFormat::Alpha }
             }
         }
     }
+
+    #[allow(dead_code)]
+    pub fn get_extra_strikes(&self, x_scale: f64) -> usize {
+        if self.flags.contains(FontInstanceFlags::SYNTHETIC_BOLD) {
+            let mut bold_offset = self.size.to_f64_px() / 48.0;
+            if bold_offset < 1.0 {
+                bold_offset = 0.25 + 0.75 * bold_offset;
+            }
+            (bold_offset * x_scale).max(1.0).round() as usize
+        } else {
+            0
+        }
+    }
 }
 
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
 pub enum GlyphFormat {
     Alpha,
     TransformedAlpha,
     Subpixel,
     TransformedSubpixel,
--- a/gfx/webrender/src/gpu_types.rs
+++ b/gfx/webrender/src/gpu_types.rs
@@ -1,13 +1,13 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{LayerVector2D, LayerRect, LayerToWorldTransform, WorldToLayerTransform};
+use api::{LayerVector2D, LayerRect, LayerToWorldTransform};
 use gpu_cache::GpuCacheAddress;
 use render_task::RenderTaskAddress;
 
 // Contains type that must exactly match the same structures declared in GLSL.
 
 #[repr(i32)]
 #[derive(Debug, Copy, Clone)]
 pub enum BlurDirection {
@@ -149,31 +149,31 @@ impl From<CompositePrimitiveInstance> fo
 #[repr(C)]
 pub struct BrushInstance {
     pub picture_address: RenderTaskAddress,
     pub prim_address: GpuCacheAddress,
     pub clip_id: ClipScrollNodeIndex,
     pub scroll_id: ClipScrollNodeIndex,
     pub clip_task_address: RenderTaskAddress,
     pub z: i32,
-    pub flags: i32,
+    pub segment_kind: i32,
     pub user_data0: i32,
     pub user_data1: i32,
 }
 
 impl From<BrushInstance> for PrimitiveInstance {
     fn from(instance: BrushInstance) -> PrimitiveInstance {
         PrimitiveInstance {
             data: [
                 instance.picture_address.0 as i32,
                 instance.prim_address.as_int(),
                 ((instance.clip_id.0 as i32) << 16) | instance.scroll_id.0 as i32,
                 instance.clip_task_address.0 as i32,
                 instance.z,
-                instance.flags,
+                instance.segment_kind,
                 instance.user_data0,
                 instance.user_data1,
             ]
         }
     }
 }
 
 // Defines how a brush image is stretched onto the primitive.
@@ -181,37 +181,46 @@ impl From<BrushInstance> for PrimitiveIn
 // of the primitive, in which case this will be redundant.
 #[repr(C)]
 pub enum BrushImageKind {
     Simple = 0,     // A normal rect
     NinePatch = 1,  // A nine-patch image (stretch inside segments)
     Mirror = 2,     // A top left corner only (mirror across x/y axes)
 }
 
-#[derive(Copy, Debug, Clone)]
+#[derive(Copy, Debug, Clone, PartialEq)]
 #[repr(C)]
 pub struct ClipScrollNodeIndex(pub u32);
 
 #[derive(Debug)]
 #[repr(C)]
 pub struct ClipScrollNodeData {
     pub transform: LayerToWorldTransform,
-    pub inv_transform: WorldToLayerTransform,
+
+    /// Viewport rectangle clipped against parent viewport rectangles.  This is
+    /// in the coordinate system of the node origin.  Precisely, it combines the
+    /// local clipping rectangles of all the parent nodes on the way to the root,
+    /// including those of `ClipRegion` rectangles.  The combined clip is reset to
+    /// maximum when an incompatible coordinate system is encountered.
     pub local_clip_rect: LayerRect,
+
+    /// The scroll offset of all the nodes between us and our parent reference frame.
+    /// This is used to calculate intersections between us and content or nodes that
+    /// are also direct children of our reference frame.
     pub reference_frame_relative_scroll_offset: LayerVector2D,
+
     pub scroll_offset: LayerVector2D,
     pub transform_kind: f32,
     pub padding: [f32; 3],
 }
 
 impl ClipScrollNodeData {
     pub fn invalid() -> ClipScrollNodeData {
         ClipScrollNodeData {
             transform: LayerToWorldTransform::identity(),
-            inv_transform: WorldToLayerTransform::identity(),
             local_clip_rect: LayerRect::zero(),
             reference_frame_relative_scroll_offset: LayerVector2D::zero(),
             scroll_offset: LayerVector2D::zero(),
             transform_kind: 0.0,
             padding: [0.0; 3],
         }
     }
 }
--- a/gfx/webrender/src/internal_types.rs
+++ b/gfx/webrender/src/internal_types.rs
@@ -185,23 +185,8 @@ pub enum ResultMsg {
     },
 }
 
 #[derive(Clone, Copy, Debug)]
 pub struct UvRect {
     pub uv0: DevicePoint,
     pub uv1: DevicePoint,
 }
-
-bitflags! {
-    /// Each bit of the edge AA mask is:
-    /// 0, when the edge of the primitive needs to be considered for AA
-    /// 1, when the edge of the segment needs to be considered for AA
-    ///
-    /// *Note*: the bit values have to match the shader logic in
-    /// `write_transform_vertex()` function.
-    pub struct EdgeAaSegmentMask: u8 {
-        const LEFT = 0x1;
-        const TOP = 0x2;
-        const RIGHT = 0x4;
-        const BOTTOM = 0x8;
-    }
-}
--- a/gfx/webrender/src/lib.rs
+++ b/gfx/webrender/src/lib.rs
@@ -139,26 +139,27 @@ extern crate gleam;
 extern crate num_traits;
 extern crate plane_split;
 extern crate rayon;
 #[cfg(feature = "debugger")]
 #[macro_use]
 extern crate serde_derive;
 #[cfg(feature = "debugger")]
 extern crate serde_json;
+extern crate smallvec;
 extern crate time;
 #[cfg(feature = "debugger")]
 extern crate ws;
 #[cfg(feature = "debugger")]
 extern crate image;
 #[cfg(feature = "debugger")]
 extern crate base64;
 
 pub extern crate webrender_api;
 
 #[doc(hidden)]
-pub use device::{build_shader_strings, ProgramCache};
+pub use device::{build_shader_strings, ProgramCache, UploadMethod, VertexUsageHint};
 pub use renderer::{CpuProfile, DebugFlags, GpuProfile, OutputImageHandler, RendererKind};
 pub use renderer::{ExternalImage, ExternalImageHandler, ExternalImageSource};
 pub use renderer::{GraphicsApi, GraphicsApiInfo, ReadPixelsFormat, Renderer, RendererOptions};
 pub use renderer::{RendererStats, ThreadListener};
 pub use renderer::MAX_VERTEX_TEXTURE_WIDTH;
 pub use webrender_api as api;
--- a/gfx/webrender/src/picture.rs
+++ b/gfx/webrender/src/picture.rs
@@ -520,17 +520,17 @@ impl PicturePrimitive {
             }
         }
 
         if let Some(render_task_id) = self.render_task_id {
             parent_tasks.push(render_task_id);
         }
     }
 
-    pub fn write_gpu_blocks(&self, mut _request: GpuDataRequest) {
+    pub fn write_gpu_blocks(&self, _request: &mut GpuDataRequest) {
         // TODO(gw): We'll need to write the GPU blocks
         //           here specific to a brush primitive
         //           once we start drawing pictures as brushes!
     }
 
     pub fn target_kind(&self) -> RenderTargetKind {
         match self.kind {
             PictureKind::TextShadow { .. } => RenderTargetKind::Color,
--- a/gfx/webrender/src/platform/macos/font.rs
+++ b/gfx/webrender/src/platform/macos/font.rs
@@ -58,17 +58,16 @@ fn supports_subpixel_aa() -> bool {
         8,
         4,
         &CGColorSpace::create_device_rgb(),
         kCGImageAlphaNoneSkipFirst | kCGBitmapByteOrder32Little,
     );
     let ct_font = core_text::font::new_from_name("Helvetica", 16.).unwrap();
     cg_context.set_should_smooth_fonts(true);
     cg_context.set_should_antialias(true);
-    cg_context.set_allows_font_smoothing(true);
     cg_context.set_rgb_fill_color(1.0, 1.0, 1.0, 1.0);
     let point = CGPoint { x: -1., y: 0. };
     let glyph = '|' as CGGlyph;
     ct_font.draw_glyphs(&[glyph], &[point], cg_context.clone());
     let data = cg_context.data();
     data[0] != data[1] || data[1] != data[2]
 }
 
@@ -79,16 +78,17 @@ fn should_use_white_on_black(color: Colo
 }
 
 fn get_glyph_metrics(
     ct_font: &CTFont,
     transform: Option<&CGAffineTransform>,
     glyph: CGGlyph,
     x_offset: f64,
     y_offset: f64,
+    extra_width: f64,
 ) -> GlyphMetrics {
     let mut bounds = ct_font.get_bounding_rects_for_glyphs(kCTFontDefaultOrientation, &[glyph]);
 
     if bounds.origin.x.is_nan() || bounds.origin.y.is_nan() || bounds.size.width.is_nan() ||
         bounds.size.height.is_nan()
     {
         // If an unexpected glyph index is requested, core text will return NaN values
         // which causes us to do bad thing as the value is cast into an integer and
@@ -103,16 +103,23 @@ fn get_glyph_metrics(
             rasterized_descent: 0,
             advance: 0.0,
         };
     }
 
     let mut advance = CGSize { width: 0.0, height: 0.0 };
     ct_font.get_advances_for_glyphs(kCTFontDefaultOrientation, &glyph, &mut advance, 1);
 
+    if bounds.size.width > 0.0 {
+        bounds.size.width += extra_width;
+    }
+    if advance.width > 0.0 {
+        advance.width += extra_width;
+    }
+
     if let Some(transform) = transform {
         bounds = bounds.apply_transform(transform);
         advance = advance.apply_transform(transform);
     }
 
     // First round out to pixel boundaries
     // CG Origin is bottom left
     let mut left = bounds.origin.x.floor() as i32;
@@ -340,17 +347,17 @@ impl FontContext {
         &mut self,
         font: &FontInstance,
         key: &GlyphKey,
     ) -> Option<GlyphDimensions> {
         self.get_ct_font(font.font_key, font.size, &font.variations)
             .and_then(|ref ct_font| {
                 let glyph = key.index as CGGlyph;
                 let (x_offset, y_offset) = font.get_subpx_offset(key);
-                let metrics = get_glyph_metrics(ct_font, None, glyph, x_offset, y_offset);
+                let metrics = get_glyph_metrics(ct_font, None, glyph, x_offset, y_offset, 0.0);
                 if metrics.rasterized_width == 0 || metrics.rasterized_height == 0 {
                     None
                 } else {
                     Some(GlyphDimensions {
                         left: metrics.rasterized_left,
                         top: metrics.rasterized_ascent,
                         width: metrics.rasterized_width as u32,
                         height: metrics.rasterized_height as u32,
@@ -438,39 +445,47 @@ impl FontContext {
         }
     }
 
     pub fn rasterize_glyph(
         &mut self,
         font: &FontInstance,
         key: &GlyphKey,
     ) -> Option<RasterizedGlyph> {
-        let (.., minor) = font.transform.compute_scale().unwrap_or((1.0, 1.0));
-        let size = font.size.scale_by(minor as f32);
+        let (x_scale, y_scale) = font.transform.compute_scale().unwrap_or((1.0, 1.0));
+        let size = font.size.scale_by(y_scale as f32);
         let ct_font = match self.get_ct_font(font.font_key, size, &font.variations) {
             Some(font) => font,
             None => return None,
         };
 
-        let shape = font.transform.pre_scale(minor.recip() as f32, minor.recip() as f32);
+        let shape = font.transform.pre_scale(y_scale.recip() as f32, y_scale.recip() as f32);
         let transform = if shape.is_identity() {
             None
         } else {
             Some(CGAffineTransform {
                 a: shape.scale_x as f64,
                 b: -shape.skew_y as f64,
                 c: -shape.skew_x as f64,
                 d: shape.scale_y as f64,
                 tx: 0.0,
                 ty: 0.0
             })
         };
         let glyph = key.index as CGGlyph;
         let (x_offset, y_offset) = font.get_subpx_offset(key);
-        let metrics = get_glyph_metrics(&ct_font, transform.as_ref(), glyph, x_offset, y_offset);
+        let extra_strikes = font.get_extra_strikes(x_scale);
+        let metrics = get_glyph_metrics(
+            &ct_font,
+            transform.as_ref(),
+            glyph,
+            x_offset,
+            y_offset,
+            extra_strikes as f64 * y_scale / x_scale,
+        );
         if metrics.rasterized_width == 0 || metrics.rasterized_height == 0 {
             return None;
         }
 
         // The result of this function, in all render modes, is going to be a
         // BGRA surface with white text on transparency using premultiplied
         // alpha. For subpixel text, the RGB values will be the mask value for
         // the individual components. For bitmap glyphs, the RGB values will be
@@ -553,19 +568,17 @@ impl FontContext {
         // These are always true in Gecko, even for non-AA fonts
         cg_context.set_allows_font_subpixel_positioning(true);
         cg_context.set_should_subpixel_position_fonts(true);
 
         // Don't quantize because we're doing it already.
         cg_context.set_allows_font_subpixel_quantization(false);
         cg_context.set_should_subpixel_quantize_fonts(false);
 
-        cg_context.set_allows_font_smoothing(smooth);
         cg_context.set_should_smooth_fonts(smooth);
-        cg_context.set_allows_antialiasing(antialias);
         cg_context.set_should_antialias(antialias);
 
         // Fill the background. This could be opaque white, opaque black, or
         // transparency.
         cg_context.set_rgb_fill_color(bg_color, bg_color, bg_color, bg_alpha);
         let rect = CGRect {
             origin: CGPoint { x: 0.0, y: 0.0 },
             size: CGSize {
@@ -586,17 +599,27 @@ impl FontContext {
         };
 
         if let Some(transform) = transform {
             cg_context.set_text_matrix(&transform);
 
             draw_origin = draw_origin.apply_transform(&transform.invert());
         }
 
-        ct_font.draw_glyphs(&[glyph], &[draw_origin], cg_context.clone());
+        if extra_strikes > 0 {
+            let strikes = 1 + extra_strikes;
+            let pixel_step = y_scale / x_scale;
+            let glyphs = vec![glyph; strikes];
+            let origins = (0..strikes)
+                .map(|i| CGPoint { x: draw_origin.x + i as f64 * pixel_step, y: draw_origin.y })
+                .collect::<Vec<_>>();
+            ct_font.draw_glyphs(&glyphs, &origins, cg_context.clone());
+        } else {
+            ct_font.draw_glyphs(&[glyph], &[draw_origin], cg_context.clone());
+        }
 
         let mut rasterized_pixels = cg_context.data().to_vec();
 
         if font.render_mode != FontRenderMode::Bitmap {
             // We rendered text into an opaque surface. The code below needs to
             // ignore the current value of each pixel's alpha channel. But it's
             // allowed to write to the alpha channel, because we're done calling
             // CG functions now.
--- a/gfx/webrender/src/platform/unix/font.rs
+++ b/gfx/webrender/src/platform/unix/font.rs
@@ -170,17 +170,17 @@ impl FontContext {
                     load_flags |= FT_LOAD_FORCE_AUTOHINT;
                 }
             }
         }
 
         if font.flags.contains(FontInstanceFlags::NO_AUTOHINT) {
             load_flags |= FT_LOAD_NO_AUTOHINT;
         }
-        if font.flags.contains(FontInstanceFlags::EMBEDDED_BITMAPS) {
+        if !font.flags.contains(FontInstanceFlags::EMBEDDED_BITMAPS) {
             load_flags |= FT_LOAD_NO_BITMAP;
         }
         if font.flags.contains(FontInstanceFlags::VERTICAL_LAYOUT) {
             load_flags |= FT_LOAD_VERTICAL_LAYOUT;
         }
 
         load_flags |= FT_LOAD_COLOR;
         load_flags |= FT_LOAD_IGNORE_GLOBAL_ADVANCE_WIDTH;
@@ -189,30 +189,30 @@ impl FontContext {
         let mut result = if font.render_mode == FontRenderMode::Bitmap {
             if (load_flags & FT_LOAD_NO_BITMAP) != 0 {
                 FT_Error(FT_Err_Cannot_Render_Glyph as i32)
             } else {
                 unsafe { FT_Set_Transform(face.face, ptr::null_mut(), ptr::null_mut()) };
                 self.choose_bitmap_size(face.face, req_size)
             }
         } else {
-            let (major, minor) = font.transform.compute_scale().unwrap_or((1.0, 1.0));
-            let shape = font.transform.pre_scale(major.recip() as f32, minor.recip() as f32);
+            let (x_scale, y_scale) = font.transform.compute_scale().unwrap_or((1.0, 1.0));
+            let shape = font.transform.pre_scale(x_scale.recip() as f32, y_scale.recip() as f32);
             let mut ft_shape = FT_Matrix {
                 xx: (shape.scale_x * 65536.0) as FT_Fixed,
                 xy: (shape.skew_x * -65536.0) as FT_Fixed,
                 yx: (shape.skew_y * -65536.0) as FT_Fixed,
                 yy: (shape.scale_y * 65536.0) as FT_Fixed,
             };
             unsafe {
                 FT_Set_Transform(face.face, &mut ft_shape, ptr::null_mut());
                 FT_Set_Char_Size(
                     face.face,
-                    (req_size * major * 64.0 + 0.5) as FT_F26Dot6,
-                    (req_size * minor * 64.0 + 0.5) as FT_F26Dot6,
+                    (req_size * x_scale * 64.0 + 0.5) as FT_F26Dot6,
+                    (req_size * y_scale * 64.0 + 0.5) as FT_F26Dot6,
                     0,
                     0,
                 )
             }
         };
 
         if result.succeeded() {
             result = unsafe { FT_Load_Glyph(face.face, glyph.index as FT_UInt, load_flags as FT_Int32) };
@@ -488,19 +488,20 @@ impl FontContext {
             None => return None,
         };
 
         // Get dimensions of the glyph, to see if we need to rasterize it.
         let dimensions = match self.get_glyph_dimensions_impl(slot, font, key, false) {
             Some(val) => val,
             None => return None,
         };
+        let GlyphDimensions { mut left, mut top, width, height, .. } = dimensions;
 
         // For spaces and other non-printable characters, early out.
-        if dimensions.width == 0 || dimensions.height == 0 {
+        if width == 0 || height == 0 {
             return None;
         }
 
         let format = unsafe { (*slot).format };
         let mut scale = 1.0;
         match format {
             FT_Glyph_Format::FT_GLYPH_FORMAT_BITMAP => {
                 let y_size = unsafe { (*(*(*slot).face).size).metrics.y_ppem };
@@ -510,44 +511,43 @@ impl FontContext {
                 if !self.rasterize_glyph_outline(slot, font, key) {
                     return None;
                 }
             }
             _ => {
                 error!("Unsupported {:?}", format);
                 return None;
             }
-        }
+        };
 
-        let bitmap = unsafe { &(*slot).bitmap };
-        let pixel_mode = unsafe { mem::transmute(bitmap.pixel_mode as u32) };
         info!(
             "Rasterizing {:?} as {:?} with dimensions {:?}",
             key,
             font.render_mode,
             dimensions
         );
 
+        let bitmap = unsafe { &(*slot).bitmap };
+        let pixel_mode = unsafe { mem::transmute(bitmap.pixel_mode as u32) };
         let (actual_width, actual_height) = match pixel_mode {
             FT_Pixel_Mode::FT_PIXEL_MODE_LCD => {
                 assert!(bitmap.width % 3 == 0);
                 ((bitmap.width / 3) as i32, bitmap.rows as i32)
             }
             FT_Pixel_Mode::FT_PIXEL_MODE_LCD_V => {
                 assert!(bitmap.rows % 3 == 0);
                 (bitmap.width as i32, (bitmap.rows / 3) as i32)
             }
             FT_Pixel_Mode::FT_PIXEL_MODE_MONO |
             FT_Pixel_Mode::FT_PIXEL_MODE_GRAY |
             FT_Pixel_Mode::FT_PIXEL_MODE_BGRA => {
                 (bitmap.width as i32, bitmap.rows as i32)
             }
             _ => panic!("Unsupported {:?}", pixel_mode),
         };
-        let (left, top) = unsafe { ((*slot).bitmap_left, (*slot).bitmap_top) };
         let mut final_buffer = vec![0; (actual_width * actual_height * 4) as usize];
 
         // Extract the final glyph from FT format into RGBA8 format, which is
         // what WR expects.
         let subpixel_bgr = font.flags.contains(FontInstanceFlags::SUBPIXEL_BGR);
         let mut src_row = bitmap.buffer;
         let mut dest: usize = 0;
         while dest < final_buffer.len() {
@@ -620,19 +620,29 @@ impl FontContext {
                     dest_slice.copy_from_slice(src_slice);
                 }
                 _ => panic!("Unsupported {:?}", pixel_mode),
             }
             src_row = unsafe { src_row.offset(bitmap.pitch as isize) };
             dest = row_end;
         }
 
+        match format {
+            FT_Glyph_Format::FT_GLYPH_FORMAT_OUTLINE => {
+                unsafe {
+                    left += (*slot).bitmap_left;
+                    top += (*slot).bitmap_top - actual_height;
+                }
+            }
+            _ => {}
+        }
+
         Some(RasterizedGlyph {
-            left: (dimensions.left + left) as f32,
-            top: (dimensions.top + top - actual_height) as f32,
+            left: left as f32,
+            top: top as f32,
             width: actual_width as u32,
             height: actual_height as u32,
             scale,
             format: font.get_glyph_format(pixel_mode == FT_Pixel_Mode::FT_PIXEL_MODE_BGRA),
             bytes: final_buffer,
         })
     }
 }
--- a/gfx/webrender/src/platform/windows/font.rs
+++ b/gfx/webrender/src/platform/windows/font.rs
@@ -180,18 +180,18 @@ impl FontContext {
         let face = self.get_font_face(font);
         let glyph = key.index as u16;
         let advance = 0.0f32;
         let offset = dwrote::GlyphOffset {
             advanceOffset: 0.0,
             ascenderOffset: 0.0,
         };
 
-        let (.., minor) = font.transform.compute_scale().unwrap_or((1.0, 1.0));
-        let size = (font.size.to_f64_px() * minor) as f32;
+        let (.., y_scale) = font.transform.compute_scale().unwrap_or((1.0, 1.0));
+        let size = (font.size.to_f64_px() * y_scale) as f32;
 
         let glyph_run = dwrote::DWRITE_GLYPH_RUN {
             fontFace: unsafe { face.as_ptr() },
             fontEmSize: size, // size in DIPs (1/96", same as CSS pixels)
             glyphCount: 1,
             glyphIndices: &glyph,
             glyphAdvances: &advance,
             glyphOffsets: &offset,
@@ -203,17 +203,17 @@ impl FontContext {
         let dwrite_render_mode = dwrite_render_mode(
             face,
             font,
             size,
             dwrite_measure_mode,
         );
 
         let (x_offset, y_offset) = font.get_subpx_offset(key);
-        let shape = font.transform.pre_scale(minor.recip() as f32, minor.recip() as f32);
+        let shape = font.transform.pre_scale(y_scale.recip() as f32, y_scale.recip() as f32);
         let transform = dwrote::DWRITE_MATRIX {
             m11: shape.scale_x,
             m12: shape.skew_y,
             m21: shape.skew_x,
             m22: shape.scale_y,
             dx: x_offset as f32,
             dy: y_offset as f32,
         };
--- a/gfx/webrender/src/prim_store.rs
+++ b/gfx/webrender/src/prim_store.rs
@@ -1,36 +1,40 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{BorderRadius, BuiltDisplayList, ColorF, ComplexClipRegion, DeviceIntRect};
-use api::{DevicePoint, ExtendMode, FontRenderMode, GlyphInstance, GlyphKey};
-use api::{GradientStop, ImageKey, ImageRendering, ItemRange, ItemTag, LayerPoint, LayerRect};
-use api::{ClipMode, LayerSize, LayerVector2D, LayerToWorldTransform, LineOrientation, LineStyle};
-use api::{ClipAndScrollInfo, PremultipliedColorF, TileOffset};
-use api::{ClipId, LayerTransform, PipelineId, YuvColorSpace, YuvFormat};
+use api::{BorderRadius, BuiltDisplayList, ClipAndScrollInfo, ClipId, ClipMode, ColorF};
+use api::{ComplexClipRegion, DeviceIntRect, DevicePoint, ExtendMode, FontRenderMode};
+use api::{GlyphInstance, GlyphKey, GradientStop, ImageKey, ImageRendering, ItemRange, ItemTag};
+use api::{LayerPoint, LayerRect, LayerSize, LayerToWorldTransform, LayerVector2D, LineOrientation};
+use api::{LineStyle, PipelineId, PremultipliedColorF, TileOffset, WorldToLayerTransform};
+use api::{YuvColorSpace, YuvFormat};
 use border::BorderCornerInstance;
 use clip_scroll_tree::{CoordinateSystemId, ClipScrollTree};
-use clip::{ClipSourcesHandle, ClipStore};
+use clip::{ClipSource, ClipSourcesHandle, ClipStore};
 use frame_builder::PrimitiveContext;
 use glyph_rasterizer::{FontInstance, FontTransform};
-use internal_types::{EdgeAaSegmentMask, FastHashMap};
+use internal_types::{FastHashMap};
 use gpu_cache::{GpuBlockData, GpuCache, GpuCacheAddress, GpuCacheHandle, GpuDataRequest,
                 ToGpuBlocks};
+use gpu_types::ClipScrollNodeData;
 use picture::{PictureKind, PicturePrimitive, RasterizationSpace};
 use profiler::FrameProfileCounters;
 use render_task::{ClipChainNode, ClipChainNodeIter, ClipWorkItem, RenderTask, RenderTaskId};
 use render_task::RenderTaskTree;
 use renderer::MAX_VERTEX_TEXTURE_WIDTH;
 use resource_cache::{ImageProperties, ResourceCache};
 use scene::{ScenePipeline, SceneProperties};
-use std::{mem, usize};
+use std::{mem, u16, usize};
 use std::rc::Rc;
-use util::{pack_as_float, recycle_vec, MatrixHelpers, TransformedRect, TransformedRectKind};
+use util::{MatrixHelpers, calculate_screen_bounding_rect, extract_inner_rect_safe, pack_as_float};
+use util::recycle_vec;
+
+const MIN_BRUSH_SPLIT_AREA: f32 = 128.0 * 128.0;
 
 #[derive(Debug)]
 pub struct PrimitiveRun {
     pub base_prim_index: PrimitiveIndex,
     pub count: usize,
     pub clip_and_scroll: ClipAndScrollInfo,
 }
 
@@ -131,17 +135,16 @@ pub struct DeferredResolve {
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
 pub struct SpecificPrimitiveIndex(pub usize);
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
 pub struct PrimitiveIndex(pub usize);
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
 pub enum PrimitiveKind {
-    Rectangle,
     TextRun,
     Image,
     YuvImage,
     Border,
     AlignedGradient,
     AngleGradient,
     RadialGradient,
     Line,
@@ -182,64 +185,204 @@ pub struct PrimitiveMetadata {
     pub is_backface_visible: bool,
     pub screen_rect: Option<DeviceIntRect>,
 
     /// A tag used to identify this primitive outside of WebRender. This is
     /// used for returning useful data during hit testing.
     pub tag: Option<ItemTag>,
 }
 
-#[derive(Debug,Clone,Copy)]
-pub enum RectangleContent {
-    Fill(ColorF),
-    Clear,
-}
-
-#[derive(Debug)]
-pub struct RectanglePrimitive {
-    pub content: RectangleContent,
-    pub edge_aa_segment_mask: EdgeAaSegmentMask,
-}
-
-impl ToGpuBlocks for RectanglePrimitive {
-    fn write_gpu_blocks(&self, mut request: GpuDataRequest) {
-        request.push(match self.content {
-            RectangleContent::Fill(color) => color.premultiplied(),
-            // Opaque black with operator dest out
-            RectangleContent::Clear => PremultipliedColorF::BLACK,
-        });
-        request.extend_from_slice(&[GpuBlockData {
-            data: [self.edge_aa_segment_mask.bits() as f32, 0.0, 0.0, 0.0],
-        }]);
-    }
-}
-
 #[derive(Debug)]
 pub enum BrushMaskKind {
     //Rect,         // TODO(gw): Optimization opportunity for masks with 0 border radii.
     Corner(LayerSize),
     RoundedRect(LayerRect, BorderRadius),
 }
 
 #[derive(Debug)]
 pub enum BrushKind {
     Mask {
         clip_mode: ClipMode,
         kind: BrushMaskKind,
+    },
+    Solid {
+        color: ColorF,
+    },
+    Clear,
+}
+
+#[derive(Debug, Copy, Clone)]
+#[repr(u32)]
+pub enum BrushAntiAliasMode {
+    Primitive = 0,
+    Segment = 1,
+}
+
+#[allow(dead_code)]
+#[derive(Debug, Copy, Clone)]
+#[repr(C)]
+pub enum BrushSegmentKind {
+    TopLeft = 0,
+    TopRight,
+    BottomRight,
+    BottomLeft,
+
+    TopMid,
+    MidRight,
+    BottomMid,
+    MidLeft,
+
+    Center,
+}
+
+#[derive(Debug)]
+pub struct BrushSegment {
+    pub local_rect: LayerRect,
+    pub clip_task_id: Option<RenderTaskId>,
+}
+
+impl BrushSegment {
+    fn new(
+        origin: LayerPoint,
+        size: LayerSize,
+    ) -> BrushSegment {
+        BrushSegment {
+            local_rect: LayerRect::new(origin, size),
+            clip_task_id: None,
+        }
+    }
+}
+
+#[derive(Debug)]
+pub struct BrushSegmentDescriptor {
+    pub top_left_offset: LayerVector2D,
+    pub bottom_right_offset: LayerVector2D,
+    pub segments: [BrushSegment; 9],
+    pub enabled_segments: u16,
+    pub can_optimize_clip_mask: bool,
+}
+
+impl BrushSegmentDescriptor {
+    pub fn new(
+        outer_rect: &LayerRect,
+        inner_rect: &LayerRect,
+        valid_segments: Option<&[BrushSegmentKind]>,
+    ) -> BrushSegmentDescriptor {
+        let p0 = outer_rect.origin;
+        let p1 = inner_rect.origin;
+        let p2 = inner_rect.bottom_right();
+        let p3 = outer_rect.bottom_right();
+
+        let enabled_segments = match valid_segments {
+            Some(valid_segments) => {
+                valid_segments.iter().fold(
+                    0,
+                    |acc, segment| acc | 1 << *segment as u32
+                )
+            }
+            None => u16::MAX,
+        };
+
+        BrushSegmentDescriptor {
+            enabled_segments,
+            can_optimize_clip_mask: false,
+            top_left_offset: p1 - p0,
+            bottom_right_offset: p3 - p2,
+            segments: [
+                BrushSegment::new(
+                    LayerPoint::new(p0.x, p0.y),
+                    LayerSize::new(p1.x - p0.x, p1.y - p0.y),
+                ),
+                BrushSegment::new(
+                    LayerPoint::new(p2.x, p0.y),
+                    LayerSize::new(p3.x - p2.x, p1.y - p0.y),
+                ),
+                BrushSegment::new(
+                    LayerPoint::new(p2.x, p2.y),
+                    LayerSize::new(p3.x - p2.x, p3.y - p2.y),
+                ),
+                BrushSegment::new(
+                    LayerPoint::new(p0.x, p2.y),
+                    LayerSize::new(p1.x - p0.x, p3.y - p2.y),
+                ),
+                BrushSegment::new(
+                    LayerPoint::new(p1.x, p0.y),
+                    LayerSize::new(p2.x - p1.x, p1.y - p0.y),
+                ),
+                BrushSegment::new(
+                    LayerPoint::new(p2.x, p1.y),
+                    LayerSize::new(p3.x - p2.x, p2.y - p1.y),
+                ),
+                BrushSegment::new(
+                    LayerPoint::new(p1.x, p2.y),
+                    LayerSize::new(p2.x - p1.x, p3.y - p2.y),
+                ),
+                BrushSegment::new(
+                    LayerPoint::new(p0.x, p1.y),
+                    LayerSize::new(p1.x - p0.x, p2.y - p1.y),
+                ),
+                BrushSegment::new(
+                    LayerPoint::new(p1.x, p1.y),
+                    LayerSize::new(p2.x - p1.x, p2.y - p1.y),
+                ),
+            ],
+        }
     }
 }
 
 #[derive(Debug)]
 pub struct BrushPrimitive {
     pub kind: BrushKind,
+    pub segment_desc: Option<Box<BrushSegmentDescriptor>>,
+    pub aa_mode: BrushAntiAliasMode,
+}
+
+impl BrushPrimitive {
+    pub fn new(
+        kind: BrushKind,
+        segment_desc: Option<Box<BrushSegmentDescriptor>>,
+        aa_mode: BrushAntiAliasMode,
+    ) -> BrushPrimitive {
+        BrushPrimitive {
+            kind,
+            segment_desc,
+            aa_mode,
+        }
+    }
 }
 
 impl ToGpuBlocks for BrushPrimitive {
     fn write_gpu_blocks(&self, mut request: GpuDataRequest) {
+        match self.segment_desc {
+            Some(ref segment_desc) => {
+                request.push([
+                    segment_desc.top_left_offset.x,
+                    segment_desc.top_left_offset.y,
+                    segment_desc.bottom_right_offset.x,
+                    segment_desc.bottom_right_offset.y,
+                ]);
+            }
+            None => {
+                request.push([0.0; 4]);
+            }
+        }
+        request.push([
+            self.aa_mode as u32 as f32,
+            0.0,
+            0.0,
+            0.0,
+        ]);
         match self.kind {
+            BrushKind::Solid { color } => {
+                request.push(color.premultiplied());
+            }
+            BrushKind::Clear => {
+                // Opaque black with operator dest out
+                request.push(PremultipliedColorF::BLACK);
+            }
             BrushKind::Mask { clip_mode, kind: BrushMaskKind::Corner(radius) } => {
                 request.push([
                     radius.width,
                     radius.height,
                     clip_mode as u32 as f32,
                     0.0,
                 ]);
             }
@@ -858,65 +1001,61 @@ impl ClipData {
         ] {
             corner.write(request);
         }
     }
 }
 
 #[derive(Debug)]
 pub enum PrimitiveContainer {
-    Rectangle(RectanglePrimitive),
     TextRun(TextRunPrimitiveCpu),
     Image(ImagePrimitiveCpu),
     YuvImage(YuvImagePrimitiveCpu),
     Border(BorderPrimitiveCpu),
     AlignedGradient(GradientPrimitiveCpu),
     AngleGradient(GradientPrimitiveCpu),
     RadialGradient(RadialGradientPrimitiveCpu),
     Picture(PicturePrimitive),
     Line(LinePrimitive),
     Brush(BrushPrimitive),
 }
 
 pub struct PrimitiveStore {
     /// CPU side information only.
-    pub cpu_rectangles: Vec<RectanglePrimitive>,
     pub cpu_brushes: Vec<BrushPrimitive>,
     pub cpu_text_runs: Vec<TextRunPrimitiveCpu>,
     pub cpu_pictures: Vec<PicturePrimitive>,
     pub cpu_images: Vec<ImagePrimitiveCpu>,
     pub cpu_yuv_images: Vec<YuvImagePrimitiveCpu>,
     pub cpu_gradients: Vec<GradientPrimitiveCpu>,
     pub cpu_radial_gradients: Vec<RadialGradientPrimitiveCpu>,
     pub cpu_metadata: Vec<PrimitiveMetadata>,
     pub cpu_borders: Vec<BorderPrimitiveCpu>,
     pub cpu_lines: Vec<LinePrimitive>,
 }
 
 impl PrimitiveStore {
     pub fn new() -> PrimitiveStore {
         PrimitiveStore {
             cpu_metadata: Vec::new(),
-            cpu_rectangles: Vec::new(),
             cpu_brushes: Vec::new(),
             cpu_text_runs: Vec::new(),
             cpu_pictures: Vec::new(),
             cpu_images: Vec::new(),
             cpu_yuv_images: Vec::new(),
             cpu_gradients: Vec::new(),
             cpu_radial_gradients: Vec::new(),
             cpu_borders: Vec::new(),
             cpu_lines: Vec::new(),
         }
     }
 
     pub fn recycle(self) -> Self {
         PrimitiveStore {
             cpu_metadata: recycle_vec(self.cpu_metadata),
-            cpu_rectangles: recycle_vec(self.cpu_rectangles),
             cpu_brushes: recycle_vec(self.cpu_brushes),
             cpu_text_runs: recycle_vec(self.cpu_text_runs),
             cpu_pictures: recycle_vec(self.cpu_pictures),
             cpu_images: recycle_vec(self.cpu_images),
             cpu_yuv_images: recycle_vec(self.cpu_yuv_images),
             cpu_gradients: recycle_vec(self.cpu_gradients),
             cpu_radial_gradients: recycle_vec(self.cpu_radial_gradients),
             cpu_borders: recycle_vec(self.cpu_borders),
@@ -940,42 +1079,30 @@ impl PrimitiveStore {
             gpu_location: GpuCacheHandle::new(),
             clip_task_id: None,
             local_rect: *local_rect,
             local_clip_rect: *local_clip_rect,
             is_backface_visible: is_backface_visible,
             screen_rect: None,
             tag,
             opacity: PrimitiveOpacity::translucent(),
-            prim_kind: PrimitiveKind::Rectangle,
+            prim_kind: PrimitiveKind::Brush,
             cpu_prim_index: SpecificPrimitiveIndex(0),
         };
 
         let metadata = match container {
-            PrimitiveContainer::Rectangle(rect) => {
-                let opacity = match &rect.content {
-                    &RectangleContent::Fill(ref color) => {
-                        PrimitiveOpacity::from_alpha(color.a)
-                    },
-                    &RectangleContent::Clear => PrimitiveOpacity::opaque()
+            PrimitiveContainer::Brush(brush) => {
+                let opacity = match brush.kind {
+                    BrushKind::Clear => PrimitiveOpacity::translucent(),
+                    BrushKind::Solid { ref color } => PrimitiveOpacity::from_alpha(color.a),
+                    BrushKind::Mask { .. } => PrimitiveOpacity::translucent(),
                 };
+
                 let metadata = PrimitiveMetadata {
                     opacity,
-                    prim_kind: PrimitiveKind::Rectangle,
-                    cpu_prim_index: SpecificPrimitiveIndex(self.cpu_rectangles.len()),
-                    ..base_metadata
-                };
-
-                self.cpu_rectangles.push(rect);
-
-                metadata
-            }
-            PrimitiveContainer::Brush(brush) => {
-                let metadata = PrimitiveMetadata {
-                    opacity: PrimitiveOpacity::translucent(),
                     prim_kind: PrimitiveKind::Brush,
                     cpu_prim_index: SpecificPrimitiveIndex(self.cpu_brushes.len()),
                     ..base_metadata
                 };
 
                 self.cpu_brushes.push(brush);
 
                 metadata
@@ -1104,17 +1231,17 @@ impl PrimitiveStore {
         gpu_cache: &mut GpuCache,
         render_tasks: &mut RenderTaskTree,
         child_tasks: Vec<RenderTaskId>,
         parent_tasks: &mut Vec<RenderTaskId>,
         pic_index: SpecificPrimitiveIndex,
     ) {
         let metadata = &mut self.cpu_metadata[prim_index.0];
         match metadata.prim_kind {
-            PrimitiveKind::Rectangle | PrimitiveKind::Border | PrimitiveKind::Line => {}
+            PrimitiveKind::Border | PrimitiveKind::Line => {}
             PrimitiveKind::Picture => {
                 self.cpu_pictures[metadata.cpu_prim_index.0]
                     .prepare_for_render(
                         prim_index,
                         prim_context,
                         render_tasks,
                         metadata.screen_rect.as_ref().expect("bug: trying to draw an off-screen picture!?"),
                         child_tasks,
@@ -1164,32 +1291,28 @@ impl PrimitiveStore {
                     resource_cache.request_image(
                         image_cpu.yuv_key[channel],
                         image_cpu.image_rendering,
                         None,
                         gpu_cache,
                     );
                 }
             }
+            PrimitiveKind::Brush |
             PrimitiveKind::AlignedGradient |
             PrimitiveKind::AngleGradient |
-            PrimitiveKind::RadialGradient |
-            PrimitiveKind::Brush => {}
+            PrimitiveKind::RadialGradient => {}
         }
 
         // Mark this GPU resource as required for this frame.
         if let Some(mut request) = gpu_cache.request(&mut metadata.gpu_location) {
             request.push(metadata.local_rect);
             request.push(metadata.local_clip_rect);
 
             match metadata.prim_kind {
-                PrimitiveKind::Rectangle => {
-                    let rect = &self.cpu_rectangles[metadata.cpu_prim_index.0];
-                    rect.write_gpu_blocks(request);
-                }
                 PrimitiveKind::Line => {
                     let line = &self.cpu_lines[metadata.cpu_prim_index.0];
                     line.write_gpu_blocks(request);
                 }
                 PrimitiveKind::Border => {
                     let border = &self.cpu_borders[metadata.cpu_prim_index.0];
                     border.write_gpu_blocks(request);
                 }
@@ -1213,18 +1336,33 @@ impl PrimitiveStore {
                     let gradient = &self.cpu_radial_gradients[metadata.cpu_prim_index.0];
                     gradient.build_gpu_blocks_for_angle_radial(prim_context.display_list, request);
                 }
                 PrimitiveKind::TextRun => {
                     let text = &self.cpu_text_runs[metadata.cpu_prim_index.0];
                     text.write_gpu_blocks(&mut request);
                 }
                 PrimitiveKind::Picture => {
+                    // TODO(gw): This is a bit of a hack. The Picture type
+                    //           is drawn by the brush_image shader, so the
+                    //           layout here needs to conform to the same
+                    //           BrushPrimitive layout. We should tidy this
+                    //           up in the future so it's enforced that these
+                    //           types use a shared function to write out the
+                    //           GPU blocks...
+                    request.push([0.0; 4]);
+                    request.push([
+                        BrushAntiAliasMode::Primitive as u32 as f32,
+                        0.0,
+                        0.0,
+                        0.0,
+                    ]);
+
                     self.cpu_pictures[metadata.cpu_prim_index.0]
-                        .write_gpu_blocks(request);
+                        .write_gpu_blocks(&mut request);
                 }
                 PrimitiveKind::Brush => {
                     let brush = &self.cpu_brushes[metadata.cpu_prim_index.0];
                     brush.write_gpu_blocks(request);
                 }
             }
         }
     }
@@ -1235,16 +1373,17 @@ impl PrimitiveStore {
         prim_context: &PrimitiveContext,
         prim_screen_rect: &DeviceIntRect,
         screen_rect: &DeviceIntRect,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         render_tasks: &mut RenderTaskTree,
         clip_store: &mut ClipStore,
         tasks: &mut Vec<RenderTaskId>,
+        node_data: &[ClipScrollNodeData],
     ) -> bool {
         let metadata = &mut self.cpu_metadata[prim_index.0];
         metadata.clip_task_id = None;
 
         let prim_screen_rect = match prim_screen_rect.intersection(screen_rect) {
             Some(rect) => rect,
             None => {
                 metadata.screen_rect = None;
@@ -1339,27 +1478,122 @@ impl PrimitiveStore {
             // optimization of the empty mask.
             combined_inner_rect = DeviceIntRect::zero();
         }
 
         if combined_inner_rect.contains_rect(&prim_screen_rect) {
            return true;
         }
 
-        let clip_task = RenderTask::new_mask(
-            None,
-            combined_outer_rect,
-            combined_inner_rect,
-            clips,
-            clip_store,
-            transform.transform_kind() == TransformedRectKind::AxisAligned,
-            prim_coordinate_system_id,
-        );
+        let mut needs_prim_clip_task = true;
+
+        if metadata.prim_kind == PrimitiveKind::Brush {
+            let brush = &mut self.cpu_brushes[metadata.cpu_prim_index.0];
+            if brush.segment_desc.is_none() && metadata.local_rect.size.area() > MIN_BRUSH_SPLIT_AREA {
+                if let BrushKind::Solid { .. } = brush.kind {
+                    if clips.len() == 1 {
+                        let clip_item = clips.first().unwrap();
+                        if clip_item.coordinate_system_id == prim_coordinate_system_id {
+                            let local_clips = clip_store.get_opt(&clip_item.clip_sources).expect("bug");
+                            let mut selected_clip = None;
+                            for &(ref clip, _) in &local_clips.clips {
+                                match *clip {
+                                    ClipSource::RoundedRectangle(rect, radii, ClipMode::Clip) => {
+                                        if selected_clip.is_some() {
+                                            selected_clip = None;
+                                            break;
+                                        }
+                                        selected_clip = Some((rect, radii, clip_item.scroll_node_data_index));
+                                    }
+                                    ClipSource::Rectangle(..) => {}
+                                    ClipSource::RoundedRectangle(_, _, ClipMode::ClipOut) |
+                                    ClipSource::BorderCorner(..) |
+                                    ClipSource::Image(..) => {
+                                        selected_clip = None;
+                                        break;
+                                    }
+                                }
+                            }
+                            if let Some((rect, radii, clip_scroll_node_data_index)) = selected_clip {
+                                // If the scroll node transforms are different between the clip
+                                // node and the primitive, we need to get the clip rect in the
+                                // local space of the primitive, in order to generate correct
+                                // local segments.
+                                let local_clip_rect = if clip_scroll_node_data_index == prim_context.scroll_node.node_data_index {
+                                    rect
+                                } else {
+                                    let clip_transform_data = &node_data[clip_scroll_node_data_index.0 as usize];
+                                    let prim_transform = &prim_context.scroll_node.world_content_transform;
+
+                                    let relative_transform = prim_transform
+                                        .inverse()
+                                        .unwrap_or(WorldToLayerTransform::identity())
+                                        .pre_mul(&clip_transform_data.transform);
 
-        if let Some(clip_task) = clip_task {
+                                    relative_transform.transform_rect(&rect)
+                                };
+                                brush.segment_desc = create_nine_patch(
+                                    &metadata.local_rect,
+                                    &local_clip_rect,
+                                    &radii
+                                );
+                            }
+                        }
+                    }
+                }
+            }
+
+            if let Some(ref mut segment_desc) = brush.segment_desc {
+                let enabled_segments = segment_desc.enabled_segments;
+                let can_optimize_clip_mask = segment_desc.can_optimize_clip_mask;
+
+                for (i, segment) in segment_desc.segments.iter_mut().enumerate() {
+                    // We only build clips for the corners. The ordering of the
+                    // BrushSegmentKind enum is such that corners come first, then
+                    // edges, then inner.
+                    let segment_enabled = ((1 << i) & enabled_segments) != 0;
+                    let create_clip_task = segment_enabled &&
+                                           (!can_optimize_clip_mask || i <= BrushSegmentKind::BottomLeft as usize);
+                    segment.clip_task_id = if create_clip_task {
+                        let segment_screen_rect = calculate_screen_bounding_rect(
+                            &prim_context.scroll_node.world_content_transform,
+                            &segment.local_rect,
+                            prim_context.device_pixel_ratio
+                        );
+
+                        combined_outer_rect.intersection(&segment_screen_rect).map(|bounds| {
+                            let clip_task = RenderTask::new_mask(
+                                None,
+                                bounds,
+                                clips.clone(),
+                                prim_coordinate_system_id,
+                            );
+
+                            let clip_task_id = render_tasks.add(clip_task);
+                            tasks.push(clip_task_id);
+
+                            clip_task_id
+                        })
+                    } else {
+                        None
+                    };
+                }
+
+                needs_prim_clip_task = false;
+            }
+        }
+
+        if needs_prim_clip_task {
+            let clip_task = RenderTask::new_mask(
+                None,
+                combined_outer_rect,
+                clips,
+                prim_coordinate_system_id,
+            );
+
             let clip_task_id = render_tasks.add(clip_task);
             metadata.clip_task_id = Some(clip_task_id);
             tasks.push(clip_task_id);
         }
 
         true
     }
 
@@ -1374,16 +1608,17 @@ impl PrimitiveStore {
         clip_scroll_tree: &ClipScrollTree,
         pipelines: &FastHashMap<PipelineId, ScenePipeline>,
         perform_culling: bool,
         parent_tasks: &mut Vec<RenderTaskId>,
         scene_properties: &SceneProperties,
         profile_counters: &mut FrameProfileCounters,
         pic_index: SpecificPrimitiveIndex,
         screen_rect: &DeviceIntRect,
+        node_data: &[ClipScrollNodeData],
     ) -> Option<LayerRect> {
         // Reset the visibility of this primitive.
         // Do some basic checks first, that can early out
         // without even knowing the local rect.
         let (cpu_prim_index, dependencies, cull_children, may_need_clip_mask) = {
             let metadata = &mut self.cpu_metadata[prim_index.0];
             metadata.screen_rect = None;
 
@@ -1440,16 +1675,17 @@ impl PrimitiveStore {
                 prim_context,
                 cull_children,
                 &mut child_tasks,
                 profile_counters,
                 rfid,
                 scene_properties,
                 cpu_prim_index,
                 screen_rect,
+                node_data,
             );
 
             let metadata = &mut self.cpu_metadata[prim_index.0];
 
             // Restore the dependencies (borrow check dance)
             let pic = &mut self.cpu_pictures[cpu_prim_index.0];
             pic.runs = dependencies;
 
@@ -1469,42 +1705,43 @@ impl PrimitiveStore {
 
             let local_rect = metadata.local_rect.intersection(&metadata.local_clip_rect);
             let local_rect = match local_rect {
                 Some(local_rect) => local_rect,
                 None if perform_culling => return None,
                 None => LayerRect::zero(),
             };
 
-            let xf_rect = TransformedRect::new(
+            let screen_bounding_rect = calculate_screen_bounding_rect(
+                &prim_context.scroll_node.world_content_transform,
                 &local_rect,
-                &prim_context.scroll_node.world_content_transform,
                 prim_context.device_pixel_ratio
             );
 
             let clip_bounds = &prim_context.clip_node.combined_clip_outer_bounds;
-            metadata.screen_rect = xf_rect.bounding_rect.intersection(clip_bounds);
+            metadata.screen_rect = screen_bounding_rect.intersection(clip_bounds);
 
             if metadata.screen_rect.is_none() && perform_culling {
                 return None;
             }
 
-            (local_rect, xf_rect.bounding_rect)
+            (local_rect, screen_bounding_rect)
         };
 
         if perform_culling && may_need_clip_mask && !self.update_clip_task(
             prim_index,
             prim_context,
             &unclipped_device_rect,
             screen_rect,
             resource_cache,
             gpu_cache,
             render_tasks,
             clip_store,
             parent_tasks,
+            node_data,
         ) {
             return None;
         }
 
         self.prepare_prim_for_render_inner(
             prim_index,
             prim_context,
             resource_cache,
@@ -1539,16 +1776,17 @@ impl PrimitiveStore {
         parent_prim_context: &PrimitiveContext,
         perform_culling: bool,
         parent_tasks: &mut Vec<RenderTaskId>,
         profile_counters: &mut FrameProfileCounters,
         original_reference_frame_id: Option<ClipId>,
         scene_properties: &SceneProperties,
         pic_index: SpecificPrimitiveIndex,
         screen_rect: &DeviceIntRect,
+        node_data: &[ClipScrollNodeData],
     ) -> PrimitiveRunLocalRect {
         let mut result = PrimitiveRunLocalRect {
             local_rect_in_actual_parent_space: LayerRect::zero(),
             local_rect_in_original_parent_space: LayerRect::zero(),
         };
 
         for run in runs {
             // TODO(gw): Perhaps we can restructure this to not need to create
@@ -1606,27 +1844,28 @@ impl PrimitiveStore {
                     clip_scroll_tree,
                     pipelines,
                     perform_culling,
                     parent_tasks,
                     scene_properties,
                     profile_counters,
                     pic_index,
                     screen_rect,
+                    node_data,
                 ) {
                     profile_counters.visible_primitives.inc();
 
                     if let Some(ref matrix) = original_relative_transform {
-                        let bounds = get_local_bounding_rect(&prim_local_rect, matrix);
+                        let bounds = matrix.transform_rect(&prim_local_rect);
                         result.local_rect_in_original_parent_space =
                             result.local_rect_in_original_parent_space.union(&bounds);
                     }
 
                     if let Some(ref matrix) = parent_relative_transform {
-                        let bounds = get_local_bounding_rect(&prim_local_rect, matrix);
+                        let bounds = matrix.transform_rect(&prim_local_rect);
                         result.local_rect_in_actual_parent_space =
                             result.local_rect_in_actual_parent_space.union(&bounds);
                     }
                 }
             }
         }
 
         result
@@ -1653,33 +1892,24 @@ impl InsideTest<ComplexClipRegion> for C
             clip.radii.top_right.height >= self.radii.top_right.height - delta_top &&
             clip.radii.bottom_left.width >= self.radii.bottom_left.width - delta_left &&
             clip.radii.bottom_left.height >= self.radii.bottom_left.height - delta_bottom &&
             clip.radii.bottom_right.width >= self.radii.bottom_right.width - delta_right &&
             clip.radii.bottom_right.height >= self.radii.bottom_right.height - delta_bottom
     }
 }
 
-fn get_local_bounding_rect(
+fn create_nine_patch(
     local_rect: &LayerRect,
-    matrix: &LayerTransform
-) -> LayerRect {
-    let vertices = [
-        matrix.transform_point3d(&local_rect.origin.to_3d()),
-        matrix.transform_point3d(&local_rect.bottom_left().to_3d()),
-        matrix.transform_point3d(&local_rect.bottom_right().to_3d()),
-        matrix.transform_point3d(&local_rect.top_right().to_3d()),
-    ];
+    local_clip_rect: &LayerRect,
+    radii: &BorderRadius
+) -> Option<Box<BrushSegmentDescriptor>> {
+    extract_inner_rect_safe(local_clip_rect, radii).map(|inner| {
+        let mut desc = BrushSegmentDescriptor::new(
+            local_rect,
+            &inner,
+            None,
+        );
+        desc.can_optimize_clip_mask = true;
 
-    let mut x0 = vertices[0].x;
-    let mut y0 = vertices[0].y;
-    let mut x1 = vertices[0].x;
-    let mut y1 = vertices[0].y;
-
-    for v in &vertices[1..] {
-        x0 = x0.min(v.x);
-        y0 = y0.min(v.y);
-        x1 = x1.max(v.x);
-        y1 = y1.max(v.y);
-    }
-
-    LayerRect::new(LayerPoint::new(x0, y0), LayerSize::new(x1 - x0, y1 - y0))
+        Box::new(desc)
+    })
 }
--- a/gfx/webrender/src/render_task.rs
+++ b/gfx/webrender/src/render_task.rs
@@ -1,16 +1,16 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{ClipId, DeviceIntPoint, DeviceIntRect, DeviceIntSize, DevicePixel};
 use api::{LayerPoint, LayerRect, PremultipliedColorF};
 use box_shadow::BoxShadowCacheKey;
-use clip::{ClipSource, ClipSourcesWeakHandle, ClipStore};
+use clip::{ClipSourcesWeakHandle};
 use clip_scroll_tree::CoordinateSystemId;
 use euclid::TypedSize2D;
 use gpu_types::{ClipScrollNodeIndex};
 use picture::RasterizationSpace;
 use prim_store::{PrimitiveIndex};
 #[cfg(feature = "debugger")]
 use print_tree::{PrintTreePrinter};
 use std::{cmp, ops, usize, f32, i32};
@@ -159,88 +159,27 @@ pub enum RenderTaskKey {
 }
 
 #[derive(Debug)]
 pub enum RenderTaskLocation {
     Fixed,
     Dynamic(Option<(DeviceIntPoint, RenderTargetIndex)>, DeviceIntSize),
 }
 
-#[derive(Debug, Copy, Clone)]
-#[repr(C)]
-pub enum MaskSegment {
-    // This must match the SEGMENT_ values in clip_shared.glsl!
-    All = 0,
-    TopLeftCorner,
-    TopRightCorner,
-    BottomLeftCorner,
-    BottomRightCorner,
-}
-
-#[derive(Debug, Copy, Clone)]
-#[repr(C)]
-pub enum MaskGeometryKind {
-    Default, // Draw the entire rect
-    CornersOnly, // Draw the corners (simple axis aligned mask)
-             // TODO(gw): Add more types here (e.g. 4 rectangles outside the inner rect)
-}
-
 #[derive(Debug, Clone)]
 pub struct ClipWorkItem {
     pub scroll_node_data_index: ClipScrollNodeIndex,
     pub clip_sources: ClipSourcesWeakHandle,
     pub coordinate_system_id: CoordinateSystemId,
 }
 
-impl ClipWorkItem {
-    fn get_geometry_kind(
-        &self,
-        clip_store: &ClipStore,
-        prim_coordinate_system_id: CoordinateSystemId
-    ) -> MaskGeometryKind {
-        let clips = clip_store
-            .get_opt(&self.clip_sources)
-            .expect("bug: clip handle should be valid")
-            .clips();
-        let mut rounded_rect_count = 0;
-
-        for &(ref clip, _) in clips {
-            match *clip {
-                ClipSource::Rectangle(..) => {
-                    if !self.has_compatible_coordinate_system(prim_coordinate_system_id) {
-                        return MaskGeometryKind::Default;
-                    }
-                },
-                ClipSource::RoundedRectangle(..) => {
-                    rounded_rect_count += 1;
-                }
-                ClipSource::Image(..) | ClipSource::BorderCorner(..) => {
-                    return MaskGeometryKind::Default;
-                }
-            }
-        }
-
-        if rounded_rect_count == 1 {
-            MaskGeometryKind::CornersOnly
-        } else {
-            MaskGeometryKind::Default
-        }
-    }
-
-    fn has_compatible_coordinate_system(&self, other_id: CoordinateSystemId) -> bool {
-        self.coordinate_system_id == other_id
-    }
-}
-
 #[derive(Debug)]
 pub struct CacheMaskTask {
     actual_rect: DeviceIntRect,
-    inner_rect: DeviceIntRect,
     pub clips: Vec<ClipWorkItem>,
-    pub geometry_kind: MaskGeometryKind,
     pub coordinate_system_id: CoordinateSystemId,
 }
 
 #[derive(Debug)]
 pub struct PictureTask {
     pub prim_index: PrimitiveIndex,
     pub target_kind: RenderTargetKind,
     pub content_origin: LayerPoint,
@@ -348,48 +287,30 @@ impl RenderTask {
             kind: RenderTaskKind::Readback(screen_rect),
             clear_mode: ClearMode::Transparent,
         }
     }
 
     pub fn new_mask(
         key: Option<ClipId>,
         outer_rect: DeviceIntRect,
-        inner_rect: DeviceIntRect,
         clips: Vec<ClipWorkItem>,
-        clip_store: &ClipStore,
-        is_axis_aligned: bool,
         prim_coordinate_system_id: CoordinateSystemId,
-    ) -> Option<RenderTask> {
-        // TODO(gw): This optimization is very conservative for now.
-        //           For now, only draw optimized geometry if it is
-        //           a single aligned rect mask with rounded corners.
-        //           In the future, we'll expand this to handle the
-        //           more complex types of clip mask geometry.
-        let geometry_kind = if is_axis_aligned &&
-            clips.len() == 1 &&
-            inner_rect.size != DeviceIntSize::zero() {
-            clips[0].get_geometry_kind(clip_store, prim_coordinate_system_id)
-        } else {
-            MaskGeometryKind::Default
-        };
-
-        Some(RenderTask {
+    ) -> RenderTask {
+        RenderTask {
             cache_key: key.map(RenderTaskKey::CacheMask),
             children: Vec::new(),
             location: RenderTaskLocation::Dynamic(None, outer_rect.size),
             kind: RenderTaskKind::CacheMask(CacheMaskTask {
                 actual_rect: outer_rect,
-                inner_rect: inner_rect,
                 clips,
-                geometry_kind,
                 coordinate_system_id: prim_coordinate_system_id,
             }),
             clear_mode: ClearMode::One,
-        })
+        }
     }
 
     // Construct a render task to apply a blur to a primitive.
     // The render task chain that is constructed looks like:
     //
     //    PrimitiveCacheTask: Draw the primitives.
     //           ^
     //           |
@@ -524,22 +445,17 @@ impl RenderTask {
             }
             RenderTaskKind::CacheMask(ref task) => {
                 (
                     [
                         task.actual_rect.origin.x as f32,
                         task.actual_rect.origin.y as f32,
                         0.0,
                     ],
-                    [
-                        task.inner_rect.origin.x as f32,
-                        task.inner_rect.origin.y as f32,
-                        (task.inner_rect.origin.x + task.inner_rect.size.width) as f32,
-                        (task.inner_rect.origin.y + task.inner_rect.size.height) as f32,
-                    ],
+                    [0.0; 4],
                 )
             }
             RenderTaskKind::VerticalBlur(ref task) |
             RenderTaskKind::HorizontalBlur(ref task) => {
                 (
                     [
                         task.blur_std_deviation,
                         task.scale_factor,
@@ -668,17 +584,16 @@ impl RenderTask {
             RenderTaskKind::Picture(ref task) => {
                 pt.new_level(format!("Picture of {:?}", task.prim_index));
                 pt.add_item(format!("kind: {:?}", task.target_kind));
                 pt.add_item(format!("space: {:?}", task.rasterization_kind));
             }
             RenderTaskKind::CacheMask(ref task) => {
                 pt.new_level(format!("CacheMask with {} clips", task.clips.len()));
                 pt.add_item(format!("rect: {:?}", task.actual_rect));
-                pt.add_item(format!("geometry: {:?}", task.geometry_kind));
             }
             RenderTaskKind::VerticalBlur(ref task) => {
                 pt.new_level("VerticalBlur".to_owned());
                 task.print_with(pt);
             }
             RenderTaskKind::HorizontalBlur(ref task) => {
                 pt.new_level("HorizontalBlur".to_owned());
                 task.print_with(pt);
--- a/gfx/webrender/src/renderer.rs
+++ b/gfx/webrender/src/renderer.rs
@@ -20,17 +20,17 @@ use api::{YuvColorSpace, YuvFormat};
 use api::ApiMsg;
 use api::DebugCommand;
 #[cfg(not(feature = "debugger"))]
 use api::channel::MsgSender;
 use debug_colors;
 use debug_render::DebugRenderer;
 #[cfg(feature = "debugger")]
 use debug_server::{self, DebugServer};
-use device::{DepthFunction, Device, FrameId, Program, Texture,
+use device::{DepthFunction, Device, FrameId, Program, UploadMethod, Texture,
              VertexDescriptor, PBO};
 use device::{get_gl_format_bgra, ExternalTexture, FBOId, TextureSlot, VertexAttribute,
              VertexAttributeKind};
 use device::{FileWatcherHandler, ShaderError, TextureFilter, TextureTarget,
              VertexUsageHint, VAO};
 use device::ProgramCache;
 use euclid::{rect, ScaleFactor, Transform3D};
 use frame_builder::FrameBuilderConfig;
@@ -67,16 +67,20 @@ use thread_profiler::{register_thread_wi
 use tiling::{AlphaRenderTarget, ColorRenderTarget};
 use tiling::{RenderPass, RenderPassKind, RenderTargetKind, RenderTargetList};
 use tiling::{BatchKey, BatchKind, BrushBatchKind, Frame, RenderTarget, ScalingInfo, TransformBatchKind};
 use time::precise_time_ns;
 use util::TransformedRectKind;
 
 pub const MAX_VERTEX_TEXTURE_WIDTH: usize = 1024;
 
+const GPU_TAG_BRUSH_SOLID: GpuProfileTag = GpuProfileTag {
+    label: "B_Solid",
+    color: debug_colors::RED,
+};
 const GPU_TAG_BRUSH_MASK: GpuProfileTag = GpuProfileTag {
     label: "B_Mask",
     color: debug_colors::BLACK,
 };
 const GPU_TAG_BRUSH_IMAGE: GpuProfileTag = GpuProfileTag {
     label: "B_Image",
     color: debug_colors::SILVER,
 };
@@ -95,20 +99,16 @@ const GPU_TAG_CACHE_LINE: GpuProfileTag 
 const GPU_TAG_SETUP_TARGET: GpuProfileTag = GpuProfileTag {
     label: "target init",
     color: debug_colors::SLATEGREY,
 };
 const GPU_TAG_SETUP_DATA: GpuProfileTag = GpuProfileTag {
     label: "data init",
     color: debug_colors::LIGHTGREY,
 };
-const GPU_TAG_PRIM_RECT: GpuProfileTag = GpuProfileTag {
-    label: "Rect",
-    color: debug_colors::RED,
-};
 const GPU_TAG_PRIM_LINE: GpuProfileTag = GpuProfileTag {
     label: "Line",
     color: debug_colors::DARKRED,
 };
 const GPU_TAG_PRIM_IMAGE: GpuProfileTag = GpuProfileTag {
     label: "Image",
     color: debug_colors::GREEN,
 };
@@ -173,17 +173,16 @@ const GPU_SAMPLER_TAG_TRANSPARENT: GpuPr
     label: "Transparent Pass",
     color: debug_colors::BLACK,
 };
 
 impl TransformBatchKind {
     #[cfg(feature = "debugger")]
     fn debug_name(&self) -> &'static str {
         match *self {
-            TransformBatchKind::Rectangle(..) => "Rectangle",
             TransformBatchKind::TextRun(..) => "TextRun",
             TransformBatchKind::Image(image_buffer_kind, ..) => match image_buffer_kind {
                 ImageBufferKind::Texture2D => "Image (2D)",
                 ImageBufferKind::TextureRect => "Image (Rect)",
                 ImageBufferKind::TextureExternal => "Image (External)",
                 ImageBufferKind::Texture2DArray => "Image (Array)",
             },
             TransformBatchKind::YuvImage(..) => "YuvImage",
@@ -193,17 +192,16 @@ impl TransformBatchKind {
             TransformBatchKind::BorderCorner => "BorderCorner",
             TransformBatchKind::BorderEdge => "BorderEdge",
             TransformBatchKind::Line => "Line",
         }
     }
 
     fn gpu_sampler_tag(&self) -> GpuProfileTag {
         match *self {
-            TransformBatchKind::Rectangle(_) => GPU_TAG_PRIM_RECT,
             TransformBatchKind::Line => GPU_TAG_PRIM_LINE,
             TransformBatchKind::TextRun(..) => GPU_TAG_PRIM_TEXT_RUN,
             TransformBatchKind::Image(..) => GPU_TAG_PRIM_IMAGE,
             TransformBatchKind::YuvImage(..) => GPU_TAG_PRIM_YUV_IMAGE,
             TransformBatchKind::BorderCorner => GPU_TAG_PRIM_BORDER_CORNER,
             TransformBatchKind::BorderEdge => GPU_TAG_PRIM_BORDER_EDGE,
             TransformBatchKind::AlignedGradient => GPU_TAG_PRIM_GRADIENT,
             TransformBatchKind::AngleGradient => GPU_TAG_PRIM_ANGLE_GRADIENT,
@@ -215,28 +213,38 @@ impl TransformBatchKind {
 impl BatchKind {
     #[cfg(feature = "debugger")]
     fn debug_name(&self) -> &'static str {
         match *self {
             BatchKind::Composite { .. } => "Composite",
             BatchKind::HardwareComposite => "HardwareComposite",
             BatchKind::SplitComposite => "SplitComposite",
             BatchKind::Blend => "Blend",
-            BatchKind::Brush(BrushBatchKind::Image(..)) => "Brush (Image)",
+            BatchKind::Brush(kind) => {
+                match kind {
+                    BrushBatchKind::Image(..) => "Brush (Image)",
+                    BrushBatchKind::Solid => "Brush (Solid)",
+                }
+            }
             BatchKind::Transformable(_, batch_kind) => batch_kind.debug_name(),
         }
     }
 
     fn gpu_sampler_tag(&self) -> GpuProfileTag {
         match *self {
             BatchKind::Composite { .. } => GPU_TAG_PRIM_COMPOSITE,
             BatchKind::HardwareComposite => GPU_TAG_PRIM_HW_COMPOSITE,
             BatchKind::SplitComposite => GPU_TAG_PRIM_SPLIT_COMPOSITE,
             BatchKind::Blend => GPU_TAG_PRIM_BLEND,
-            BatchKind::Brush(BrushBatchKind::Image(_)) => GPU_TAG_BRUSH_IMAGE,
+            BatchKind::Brush(kind) => {
+                match kind {
+                    BrushBatchKind::Image(..) => GPU_TAG_BRUSH_IMAGE,
+                    BrushBatchKind::Solid => GPU_TAG_BRUSH_SOLID,
+                }
+            }
             BatchKind::Transformable(_, batch_kind) => batch_kind.gpu_sampler_tag(),
         }
     }
 }
 
 bitflags! {
     #[derive(Default)]
     pub struct DebugFlags: u32 {
@@ -790,59 +798,48 @@ impl CacheTexture {
         let mut updated_blocks = 0;
         for update in &updates.updates {
             updated_blocks += self.apply_patch(update, &updates.blocks);
         }
         updated_blocks
     }
 
     fn flush(&mut self, device: &mut Device) -> usize {
-        // Bind a PBO to do the texture upload.
-        // Updating the texture via PBO avoids CPU-side driver stalls.
-        device.bind_pbo(Some(&self.pbo));
-
-        let mut rows_dirty = 0;
+        let rows_dirty = self.rows
+            .iter()
+            .filter(|row| row.is_dirty)
+            .count();
+        if rows_dirty == 0 {
+            return 0
+        }
+
+        let mut uploader = device.upload_texture(
+            &self.texture,
+            &self.pbo,
+            rows_dirty * MAX_VERTEX_TEXTURE_WIDTH,
+        );
 
         for (row_index, row) in self.rows.iter_mut().enumerate() {
             if !row.is_dirty {
                 continue;
             }
 
-            // Get the data for this row and push to the PBO.
             let block_index = row_index * MAX_VERTEX_TEXTURE_WIDTH;
             let cpu_blocks =
                 &self.cpu_blocks[block_index .. (block_index + MAX_VERTEX_TEXTURE_WIDTH)];
-            device.update_pbo_data(cpu_blocks);
-
-            // Insert a command to copy the PBO data to the right place in
-            // the GPU-side cache texture.
-            device.update_texture_from_pbo(
-                &self.texture,
-                0,
-                row_index as u32,
-                MAX_VERTEX_TEXTURE_WIDTH as u32,
-                1,
-                0,
-                None,
-                0,
+            let rect = DeviceUintRect::new(
+                DeviceUintPoint::new(0, row_index as u32),
+                DeviceUintSize::new(MAX_VERTEX_TEXTURE_WIDTH as u32, 1),
             );
 
-            // Orphan the PBO. This is the recommended way to hint to the
-            // driver to detach the underlying storage from this PBO id.
-            // Keeping the size the same gives the driver a hint for future
-            // use of this PBO.
-            device.orphan_pbo(mem::size_of::<GpuBlockData>() * MAX_VERTEX_TEXTURE_WIDTH);
-
-            rows_dirty += 1;
+            uploader.upload(rect, 0, None, cpu_blocks);
+
             row.is_dirty = false;
         }
 
-        // Ensure that other texture updates won't read from this PBO.
-        device.bind_pbo(None);
-
         rows_dirty
     }
 }
 
 struct VertexDataTexture {
     texture: Texture,
     pbo: PBO,
 }
@@ -890,34 +887,32 @@ impl VertexDataTexture {
                 ImageFormat::RGBAF32,
                 TextureFilter::Nearest,
                 None,
                 1,
                 None,
             );
         }
 
-        // Bind a PBO to do the texture upload.
-        // Updating the texture via PBO avoids CPU-side driver stalls.
-        device.bind_pbo(Some(&self.pbo));
-        device.update_pbo_data(data);
-        device.update_texture_from_pbo(&self.texture, 0, 0, width, needed_height, 0, None, 0);
-
-        // Ensure that other texture updates won't read from this PBO.
-        device.bind_pbo(None);
+        let rect = DeviceUintRect::new(
+            DeviceUintPoint::zero(),
+            DeviceUintSize::new(width, needed_height),
+        );
+        device
+            .upload_texture(&self.texture, &self.pbo, 0)
+            .upload(rect, 0, None, data);
     }
 
     fn deinit(self, device: &mut Device) {
         device.delete_pbo(self.pbo);
         device.delete_texture(self.texture);
     }
 }
 
 const TRANSFORM_FEATURE: &str = "TRANSFORM";
-const CLIP_FEATURE: &str = "CLIP";
 const ALPHA_FEATURE: &str = "ALPHA_PASS";
 
 enum ShaderKind {
     Primitive,
     Cache(VertexArrayKind),
     ClipCache,
     Brush,
     Text,
@@ -1350,33 +1345,32 @@ pub struct Renderer {
     cs_blur_a8: LazilyCompiledShader,
     cs_blur_rgba8: LazilyCompiledShader,
 
     // Brush shaders
     brush_mask_corner: LazilyCompiledShader,
     brush_mask_rounded_rect: LazilyCompiledShader,
     brush_image_rgba8: BrushShader,
     brush_image_a8: BrushShader,
+    brush_solid: BrushShader,
 
     /// These are "cache clip shaders". These shaders are used to
     /// draw clip instances into the cached clip mask. The results
     /// of these shaders are also used by the primitive shaders.
     cs_clip_rectangle: LazilyCompiledShader,
     cs_clip_image: LazilyCompiledShader,
     cs_clip_border: LazilyCompiledShader,
 
     // The are "primitive shaders". These shaders draw and blend
     // final results on screen. They are aware of tile boundaries.
     // Most draw directly to the framebuffer, but some use inputs
     // from the cache shaders to draw. Specifically, the box
     // shadow primitive shader stretches the box shadow cache
     // output, and the cache_image shader blits the results of
     // a cache shader (e.g. blur) to the screen.
-    ps_rectangle: PrimitiveShader,
-    ps_rectangle_clip: PrimitiveShader,
     ps_text_run: TextShader,
     ps_text_run_subpx_bg_pass1: TextShader,
     ps_image: Vec<Option<PrimitiveShader>>,
     ps_yuv_image: Vec<Option<PrimitiveShader>>,
     ps_border_corner: PrimitiveShader,
     ps_border_edge: PrimitiveShader,
     ps_gradient: PrimitiveShader,
     ps_angle_gradient: PrimitiveShader,
@@ -1493,16 +1487,17 @@ impl Renderer {
         let file_watch_handler = FileWatcher {
             result_tx: result_tx.clone(),
             notifier: notifier.clone(),
         };
 
         let mut device = Device::new(
             gl,
             options.resource_override_path.clone(),
+            options.upload_method,
             Box::new(file_watch_handler),
             options.cached_programs,
         );
 
         let device_max_size = device.max_texture_size();
         // 512 is the minimum that the texture cache can work with.
         // Broken GL contexts can return a max texture size of zero (See #1260). Better to
         // gracefully fail now than panic as soon as a texture is allocated.
@@ -1553,16 +1548,23 @@ impl Renderer {
         let brush_mask_rounded_rect = try!{
             LazilyCompiledShader::new(ShaderKind::Brush,
                                       "brush_mask_rounded_rect",
                                       &[],
                                       &mut device,
                                       options.precache_shaders)
         };
 
+        let brush_solid = try!{
+            BrushShader::new("brush_solid",
+                             &mut device,
+                             &[],
+                             options.precache_shaders)
+        };
+
         let brush_image_a8 = try!{
             BrushShader::new("brush_image",
                              &mut device,
                              &["ALPHA_TARGET"],
                              options.precache_shaders)
         };
 
         let brush_image_rgba8 = try!{
@@ -1607,30 +1609,16 @@ impl Renderer {
         let cs_clip_border = try!{
             LazilyCompiledShader::new(ShaderKind::ClipCache,
                                       "cs_clip_border",
                                       &[],
                                       &mut device,
                                       options.precache_shaders)
         };
 
-        let ps_rectangle = try!{
-            PrimitiveShader::new("ps_rectangle",
-                                 &mut device,
-                                 &[],
-                                 options.precache_shaders)
-        };
-
-        let ps_rectangle_clip = try!{
-            PrimitiveShader::new("ps_rectangle",
-                                 &mut device,
-                                 &[ CLIP_FEATURE ],
-                                 options.precache_shaders)
-        };
-
         let ps_line = try!{
             PrimitiveShader::new("ps_line",
                                  &mut device,
                                  &[],
                                  options.precache_shaders)
         };
 
         let ps_text_run = try!{
@@ -2006,21 +1994,20 @@ impl Renderer {
             cs_text_run,
             cs_line,
             cs_blur_a8,
             cs_blur_rgba8,
             brush_mask_corner,
             brush_mask_rounded_rect,
             brush_image_rgba8,
             brush_image_a8,
+            brush_solid,
             cs_clip_rectangle,
             cs_clip_border,
             cs_clip_image,
-            ps_rectangle,
-            ps_rectangle_clip,
             ps_text_run,
             ps_text_run_subpx_bg_pass1,
             ps_image,
             ps_yuv_image,
             ps_border_corner,
             ps_border_edge,
             ps_gradient,
             ps_angle_gradient,
@@ -2645,62 +2632,55 @@ impl Renderer {
                     TextureUpdateOp::Update {
                         rect,
                         source,
                         stride,
                         layer_index,
                         offset,
                     } => {
                         let texture = &self.texture_resolver.cache_texture_map[update.id.0];
-
-                        // Bind a PBO to do the texture upload.
-                        // Updating the texture via PBO avoids CPU-side driver stalls.
-                        self.device.bind_pbo(Some(&self.texture_cache_upload_pbo));
+                        let mut uploader = self.device.upload_texture(
+                            texture,
+                            &self.texture_cache_upload_pbo,
+                            0,
+                        );
 
                         match source {
                             TextureUpdateSource::Bytes { data } => {
-                                self.device.update_pbo_data(&data[offset as usize ..]);
+                                uploader.upload(
+                                    rect, layer_index, stride,
+                                    &data[offset as usize ..],
+                                );
                             }
                             TextureUpdateSource::External { id, channel_index } => {
                                 let handler = self.external_image_handler
                                     .as_mut()
                                     .expect("Found external image, but no handler set!");
                                 match handler.lock(id, channel_index).source {
                                     ExternalImageSource::RawData(data) => {
-                                        self.device.update_pbo_data(&data[offset as usize ..]);
+                                        uploader.upload(
+                                            rect, layer_index, stride,
+                                            &data[offset as usize ..],
+                                        );
                                     }
                                     ExternalImageSource::Invalid => {
                                         // Create a local buffer to fill the pbo.
                                         let bpp = texture.get_bpp();
                                         let width = stride.unwrap_or(rect.size.width * bpp);
                                         let total_size = width * rect.size.height;
                                         // WR haven't support RGBAF32 format in texture_cache, so
                                         // we use u8 type here.
                                         let dummy_data: Vec<u8> = vec![255; total_size as usize];
-                                        self.device.update_pbo_data(&dummy_data);
+                                        uploader.upload(rect, layer_index, stride, &dummy_data);
                                     }
                                     _ => panic!("No external buffer found"),
                                 };
                                 handler.unlock(id, channel_index);
                             }
                         }
-
-                        self.device.update_texture_from_pbo(
-                            texture,
-                            rect.origin.x,
-                            rect.origin.y,
-                            rect.size.width,
-                            rect.size.height,
-                            layer_index,
-                            stride,
-                            0,
-                        );
-
-                        // Ensure that other texture updates won't read from this PBO.
-                        self.device.bind_pbo(None);
                     }
                     TextureUpdateOp::Free => {
                         let texture = &mut self.texture_resolver.cache_texture_map[update.id.0];
                         self.device.free_texture_storage(texture);
                     }
                 }
             }
         }
@@ -2782,62 +2762,41 @@ impl Renderer {
                     &mut self.renderer_errors,
                 );
             }
             BatchKind::Blend => {
                 self.ps_blend.bind(&mut self.device, projection, 0, &mut self.renderer_errors);
             }
             BatchKind::Brush(brush_kind) => {
                 match brush_kind {
+                    BrushBatchKind::Solid => {
+                        self.brush_solid.bind(
+                            &mut self.device,
+                            key.blend_mode,
+                            projection,
+                            0,
+                            &mut self.renderer_errors,
+                        );
+                    }
                     BrushBatchKind::Image(target_kind) => {
                         let shader = match target_kind {
                             RenderTargetKind::Alpha => &mut self.brush_image_a8,
                             RenderTargetKind::Color => &mut self.brush_image_rgba8,
                         };
                         shader.bind(
                             &mut self.device,
                             key.blend_mode,
                             projection,
                             0,
                             &mut self.renderer_errors,
                         );
                     }
                 }
             }
             BatchKind::Transformable(transform_kind, batch_kind) => match batch_kind {
-                TransformBatchKind::Rectangle(needs_clipping) => {
-                    debug_assert!(
-                        !needs_clipping || match key.blend_mode {
-                            BlendMode::PremultipliedAlpha |
-                            BlendMode::PremultipliedDestOut |
-                            BlendMode::SubpixelConstantTextColor(..) |
-                            BlendMode::SubpixelVariableTextColor |
-                            BlendMode::SubpixelWithBgColor => true,
-                            BlendMode::None => false,
-                        }
-                    );
-
-                    if needs_clipping {
-                        self.ps_rectangle_clip.bind(
-                            &mut self.device,
-                            transform_kind,
-                            projection,
-                            0,
-                            &mut self.renderer_errors,
-                        );
-                    } else {
-                        self.ps_rectangle.bind(
-                            &mut self.device,
-                            transform_kind,
-                            projection,
-                            0,
-                            &mut self.renderer_errors,
-                        );
-                    }
-                }
                 TransformBatchKind::Line => {
                     self.ps_line.bind(
                         &mut self.device,
                         transform_kind,
                         projection,
                         0,
                         &mut self.renderer_errors,
                     );
@@ -3771,17 +3730,17 @@ impl Renderer {
                 self.prepare_target_list(alpha, true);
                 self.prepare_target_list(color, true);
             }
         }
     }
 
     fn bind_frame_data(&mut self, frame: &mut Frame) {
         let _timer = self.gpu_profile.start_timer(GPU_TAG_SETUP_DATA);
-        self.device.device_pixel_ratio = frame.device_pixel_ratio;
+        self.device.set_device_pixel_ratio(frame.device_pixel_ratio);
 
         // Some of the textures are already assigned by `prepare_frame`.
         // Now re-allocate the space for the rest of the target textures.
         for pass in &mut frame.passes {
             if let RenderPassKind::OffScreen { ref mut alpha, ref mut color } = pass.kind {
                 self.prepare_target_list(alpha, false);
                 self.prepare_target_list(color, false);
             }
@@ -4174,21 +4133,20 @@ impl Renderer {
         self.cs_text_run.deinit(&mut self.device);
         self.cs_line.deinit(&mut self.device);
         self.cs_blur_a8.deinit(&mut self.device);
         self.cs_blur_rgba8.deinit(&mut self.device);
         self.brush_mask_rounded_rect.deinit(&mut self.device);
         self.brush_mask_corner.deinit(&mut self.device);
         self.brush_image_rgba8.deinit(&mut self.device);
         self.brush_image_a8.deinit(&mut self.device);
+        self.brush_solid.deinit(&mut self.device);
         self.cs_clip_rectangle.deinit(&mut self.device);
         self.cs_clip_image.deinit(&mut self.device);
         self.cs_clip_border.deinit(&mut self.device);
-        self.ps_rectangle.deinit(&mut self.device);
-        self.ps_rectangle_clip.deinit(&mut self.device);
         self.ps_text_run.deinit(&mut self.device);
         self.ps_text_run_subpx_bg_pass1.deinit(&mut self.device);
         for shader in self.ps_image {
             if let Some(shader) = shader {
                 shader.deinit(&mut self.device);
             }
         }
         for shader in self.ps_yuv_image {
@@ -4276,16 +4234,17 @@ pub struct RendererOptions {
     pub debug: bool,
     pub enable_scrollbars: bool,
     pub precache_shaders: bool,
     pub renderer_kind: RendererKind,
     pub enable_subpixel_aa: bool,
     pub clear_color: Option<ColorF>,
     pub enable_clear_scissor: bool,
     pub max_texture_size: Option<u32>,
+    pub upload_method: UploadMethod,
     pub workers: Option<Arc<ThreadPool>>,
     pub blob_image_renderer: Option<Box<BlobImageRenderer>>,
     pub recorder: Option<Box<ApiRecordingReceiver>>,
     pub thread_listener: Option<Box<ThreadListener + Send + Sync>>,
     pub enable_render_on_scroll: bool,
     pub cached_programs: Option<Rc<ProgramCache>>,
     pub debug_flags: DebugFlags,
     pub renderer_id: Option<u64>,
@@ -4303,16 +4262,18 @@ impl Default for RendererOptions {
             debug: false,
             enable_scrollbars: false,
             precache_shaders: false,
             renderer_kind: RendererKind::Native,
             enable_subpixel_aa: false,
             clear_color: Some(ColorF::new(1.0, 1.0, 1.0, 1.0)),
             enable_clear_scissor: true,
             max_texture_size: None,
+            //TODO: switch to `Immediate` on Angle
+            upload_method: UploadMethod::PixelBuffer(VertexUsageHint::Stream),
             workers: None,
             blob_image_renderer: None,
             recorder: None,
             thread_listener: None,
             enable_render_on_scroll: true,
             renderer_id: None,
             cached_programs: None,
         }
--- a/gfx/webrender/src/tiling.rs
+++ b/gfx/webrender/src/tiling.rs
@@ -15,23 +15,23 @@ use clip_scroll_tree::{ClipScrollTree, C
 use device::Texture;
 use euclid::{TypedTransform3D, vec3};
 use glyph_rasterizer::GlyphFormat;
 use gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle, GpuCacheUpdateList};
 use gpu_types::{BlurDirection, BlurInstance, BrushInstance, BrushImageKind, ClipMaskInstance};
 use gpu_types::{CompositePrimitiveInstance, PrimitiveInstance, SimplePrimitiveInstance};
 use gpu_types::{ClipScrollNodeIndex, ClipScrollNodeData};
 use internal_types::{FastHashMap, SourceTexture};
-use internal_types::BatchTextures;
+use internal_types::{BatchTextures};
 use picture::{PictureCompositeMode, PictureKind, PicturePrimitive, RasterizationSpace};
 use plane_split::{BspSplitter, Polygon, Splitter};
 use prim_store::{PrimitiveIndex, PrimitiveKind, PrimitiveMetadata, PrimitiveStore};
-use prim_store::{BrushMaskKind, BrushKind, DeferredResolve, PrimitiveRun, RectangleContent};
+use prim_store::{BrushPrimitive, BrushMaskKind, BrushKind, BrushSegmentKind, DeferredResolve, PrimitiveRun};
 use profiler::FrameProfileCounters;
-use render_task::{ClipWorkItem, MaskGeometryKind, MaskSegment};
+use render_task::{ClipWorkItem};
 use render_task::{RenderTaskAddress, RenderTaskId, RenderTaskKey, RenderTaskKind};
 use render_task::{BlurTask, ClearMode, RenderTaskLocation, RenderTaskTree};
 use renderer::BlendMode;
 use renderer::ImageBufferKind;
 use resource_cache::{GlyphFetchResult, ResourceCache};
 use std::{cmp, usize, f32, i32};
 use std::collections::hash_map::Entry;
 use texture_allocator::GuillotineAllocator;
@@ -116,33 +116,16 @@ impl AlphaBatchHelpers for PrimitiveStor
                             BlendMode::SubpixelConstantTextColor(font.color.into())
                         }
                     }
                     FontRenderMode::Alpha |
                     FontRenderMode::Mono |
                     FontRenderMode::Bitmap => BlendMode::PremultipliedAlpha,
                 }
             },
-            PrimitiveKind::Rectangle => {
-                let rectangle_cpu = &self.cpu_rectangles[metadata.cpu_prim_index.0];
-                match rectangle_cpu.content {
-                    RectangleContent::Fill(..) => if needs_blending {
-                        BlendMode::PremultipliedAlpha
-                    } else {
-                        BlendMode::None
-                    },
-                    RectangleContent::Clear => {
-                        // TODO: If needs_blending == false, we could use BlendMode::None
-                        // to clear the rectangle, but then we'd need to draw the rectangle
-                        // with alpha == 0.0 instead of alpha == 1.0, and the RectanglePrimitive
-                        // would need to know about that.
-                        BlendMode::PremultipliedDestOut
-                    },
-                }
-            },
             PrimitiveKind::Border |
             PrimitiveKind::Image |
             PrimitiveKind::YuvImage |
             PrimitiveKind::AlignedGradient |
             PrimitiveKind::AngleGradient |
             PrimitiveKind::RadialGradient |
             PrimitiveKind::Line |
             PrimitiveKind::Brush |
@@ -405,17 +388,74 @@ fn add_to_batch(
         scroll_id,
         z,
     );
 
     let blend_mode = ctx.prim_store.get_blend_mode(prim_metadata, transform_kind);
 
     match prim_metadata.prim_kind {
         PrimitiveKind::Brush => {
-            panic!("BUG: brush type not expected in an alpha task (yet)");
+            let brush = &ctx.prim_store.cpu_brushes[prim_metadata.cpu_prim_index.0];
+            let base_instance = BrushInstance {
+                picture_address: task_address,
+                prim_address: prim_cache_address,
+                clip_id,
+                scroll_id,
+                clip_task_address,
+                z,
+                segment_kind: 0,
+                user_data0: 0,
+                user_data1: 0,
+            };
+
+            match brush.segment_desc {
+                Some(ref segment_desc) => {
+                    let opaque_batch = batch_list.opaque_batch_list.get_suitable_batch(
+                        brush.get_batch_key(
+                            BlendMode::None
+                        ),
+                        item_bounding_rect
+                    );
+                    let alpha_batch = batch_list.alpha_batch_list.get_suitable_batch(
+                        brush.get_batch_key(
+                            BlendMode::PremultipliedAlpha
+                        ),
+                        item_bounding_rect
+                    );
+
+                    for (i, segment) in segment_desc.segments.iter().enumerate() {
+                        if ((1 << i) & segment_desc.enabled_segments) != 0 {
+                            let is_inner = i == BrushSegmentKind::Center as usize;
+                            let needs_blending = !prim_metadata.opacity.is_opaque ||
+                                                 segment.clip_task_id.is_some() ||
+                                                 (!is_inner && transform_kind == TransformedRectKind::Complex);
+
+                            let clip_task_address = segment
+                                .clip_task_id
+                                .map_or(OPAQUE_TASK_ADDRESS, |id| render_tasks.get_task_address(id));
+
+                            let instance = PrimitiveInstance::from(BrushInstance {
+                                segment_kind: 1 + i as i32,
+                                clip_task_address,
+                                ..base_instance
+                            });
+
+                            if needs_blending {
+                                alpha_batch.push(instance);
+                            } else {
+                                opaque_batch.push(instance);
+                            }
+                        }
+                    }
+                }
+                None => {
+                    let batch = batch_list.get_suitable_batch(brush.get_batch_key(blend_mode), item_bounding_rect);
+                    batch.push(PrimitiveInstance::from(base_instance));
+                }
+            }
         }
         PrimitiveKind::Border => {
             let border_cpu =
                 &ctx.prim_store.cpu_borders[prim_metadata.cpu_prim_index.0];
             // TODO(gw): Select correct blend mode for edges and corners!!
             let corner_kind = BatchKind::Transformable(
                 transform_kind,
                 TransformBatchKind::BorderCorner,
@@ -459,26 +499,16 @@ fn add_to_batch(
                 }
             }
 
             let batch = batch_list.get_suitable_batch(edge_key, item_bounding_rect);
             for border_segment in 0 .. 4 {
                 batch.push(base_instance.build(border_segment, 0, 0));
             }
         }
-        PrimitiveKind::Rectangle => {
-            let needs_clipping = prim_metadata.clip_task_id.is_some();
-            let kind = BatchKind::Transformable(
-                transform_kind,
-                TransformBatchKind::Rectangle(needs_clipping),
-            );
-            let key = BatchKey::new(kind, blend_mode, no_textures);
-            let batch = batch_list.get_suitable_batch(key, item_bounding_rect);
-            batch.push(base_instance.build(0, 0, 0));
-        }
         PrimitiveKind::Line => {
             let kind =
                 BatchKind::Transformable(transform_kind, TransformBatchKind::Line);
             let key = BatchKey::new(kind, blend_mode, no_textures);
             let batch = batch_list.get_suitable_batch(key, item_bounding_rect);
             batch.push(base_instance.build(0, 0, 0));
         }
         PrimitiveKind::Image => {
@@ -604,17 +634,17 @@ fn add_to_batch(
 
                             let instance = BrushInstance {
                                 picture_address: task_address,
                                 prim_address: prim_cache_address,
                                 clip_id,
                                 scroll_id,
                                 clip_task_address,
                                 z,
-                                flags: 0,
+                                segment_kind: 0,
                                 user_data0: cache_task_address.0 as i32,
                                 user_data1: BrushImageKind::Simple as i32,
                             };
                             batch.push(PrimitiveInstance::from(instance));
                         }
                         PictureKind::BoxShadow { radii_kind, .. } => {
                             let kind = BatchKind::Brush(
                                 BrushBatchKind::Image(picture.target_kind()),
@@ -633,17 +663,17 @@ fn add_to_batch(
 
                             let instance = BrushInstance {
                                 picture_address: task_address,
                                 prim_address: prim_cache_address,
                                 clip_id,
                                 scroll_id,
                                 clip_task_address,
                                 z,
-                                flags: 0,
+                                segment_kind: 0,
                                 user_data0: cache_task_address.0 as i32,
                                 user_data1: image_kind as i32,
                             };
                             batch.push(PrimitiveInstance::from(instance));
                         }
                         PictureKind::Image {
                             composite_mode,
                             readback_render_task_id,
@@ -1078,17 +1108,16 @@ impl ClipBatcher {
 
     fn add(
         &mut self,
         task_address: RenderTaskAddress,
         clips: &[ClipWorkItem],
         coordinate_system_id: CoordinateSystemId,
         resource_cache: &ResourceCache,
         gpu_cache: &GpuCache,
-        geometry_kind: MaskGeometryKind,
         clip_store: &ClipStore,
     ) {
         let mut coordinate_system_id = coordinate_system_id;
         for work_item in clips.iter() {
             let instance = ClipMaskInstance {
                 render_task_address: task_address,
                 scroll_node_data_index: work_item.scroll_node_data_index,
                 segment: 0,
@@ -1117,53 +1146,27 @@ impl ClipBatcher {
                             warn!("Warnings: skip a image mask. Key:{:?} Rect::{:?}.\n", mask.image, mask.rect);
                             continue;
                         }
                     }
                     ClipSource::Rectangle(..) => {
                         if work_item.coordinate_system_id != coordinate_system_id {
                             self.rectangles.push(ClipMaskInstance {
                                 clip_data_address: gpu_address,
-                                segment: MaskSegment::All as i32,
                                 ..instance
                             });
                             coordinate_system_id = work_item.coordinate_system_id;
                         }
-                    },
-                    ClipSource::RoundedRectangle(..) => match geometry_kind {
-                        MaskGeometryKind::Default => {
-                            self.rectangles.push(ClipMaskInstance {
-                                clip_data_address: gpu_address,
-                                segment: MaskSegment::All as i32,
-                                ..instance
-                            });
-                        }
-                        MaskGeometryKind::CornersOnly => {
-                            self.rectangles.push(ClipMaskInstance {
-                                clip_data_address: gpu_address,
-                                segment: MaskSegment::TopLeftCorner as i32,
-                                ..instance
-                            });
-                            self.rectangles.push(ClipMaskInstance {
-                                clip_data_address: gpu_address,
-                                segment: MaskSegment::TopRightCorner as i32,
-                                ..instance
-                            });
-                            self.rectangles.push(ClipMaskInstance {
-                                clip_data_address: gpu_address,
-                                segment: MaskSegment::BottomLeftCorner as i32,
-                                ..instance
-                            });
-                            self.rectangles.push(ClipMaskInstance {
-                                clip_data_address: gpu_address,
-                                segment: MaskSegment::BottomRightCorner as i32,
-                                ..instance
-                            });
-                        }
-                    },
+                    }
+                    ClipSource::RoundedRectangle(..) => {
+                        self.rectangles.push(ClipMaskInstance {
+                            clip_data_address: gpu_address,
+                            ..instance
+                        });
+                    }
                     ClipSource::BorderCorner(ref source) => {
                         self.border_clears.push(ClipMaskInstance {
                             clip_data_address: gpu_address,
                             segment: 0,
                             ..instance
                         });
                         for clip_index in 0 .. source.actual_clip_count {
                             self.borders.push(ClipMaskInstance {
@@ -1661,22 +1664,26 @@ impl RenderTarget for AlphaRenderTarget 
                                             //           primitives on picture backed
                                             //           tasks support clip masks and
                                             //           transform primitives, these
                                             //           will need to be filled out!
                                             clip_id: ClipScrollNodeIndex(0),
                                             scroll_id: ClipScrollNodeIndex(0),
                                             clip_task_address: RenderTaskAddress(0),
                                             z: 0,
-                                            flags: 0,
+                                            segment_kind: 0,
                                             user_data0: 0,
                                             user_data1: 0,
                                         };
                                         let brush = &ctx.prim_store.cpu_brushes[sub_metadata.cpu_prim_index.0];
                                         let batch = match brush.kind {
+                                            BrushKind::Solid { .. } |
+                                            BrushKind::Clear => {
+                                                unreachable!("bug: unexpected brush here");
+                                            }
                                             BrushKind::Mask { ref kind, .. } => {
                                                 match *kind {
                                                     BrushMaskKind::Corner(..) => &mut self.brush_mask_corners,
                                                     BrushMaskKind::RoundedRect(..) => &mut self.brush_mask_rounded_rects,
                                                 }
                                             }
                                         };
                                         batch.push(PrimitiveInstance::from(instance));
@@ -1697,17 +1704,16 @@ impl RenderTarget for AlphaRenderTarget 
             RenderTaskKind::CacheMask(ref task_info) => {
                 let task_address = render_tasks.get_task_address(task_id);
                 self.clip_batcher.add(
                     task_address,
                     &task_info.clips,
                     task_info.coordinate_system_id,
                     &ctx.resource_cache,
                     gpu_cache,
-                    task_info.geometry_kind,
                     clip_store,
                 );
             }
             RenderTaskKind::Scaling(..) => {
                 self.scalings.push(ScalingInfo {
                     src_task_id: task.children[0],
                     dest_task_id: task_id,
                 });
@@ -1858,31 +1864,31 @@ impl RenderPass {
                 alpha.build(ctx, gpu_cache, render_tasks, deferred_resolves);
             }
         }
     }
 }
 
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
 pub enum TransformBatchKind {
-    Rectangle(bool),
     TextRun(GlyphFormat),
     Image(ImageBufferKind),
     YuvImage(ImageBufferKind, YuvFormat, YuvColorSpace),
     AlignedGradient,
     AngleGradient,
     RadialGradient,
     BorderCorner,
     BorderEdge,
     Line,
 }
 
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
 pub enum BrushBatchKind {
-    Image(RenderTargetKind)
+    Image(RenderTargetKind),
+    Solid,
 }
 
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
 pub enum BatchKind {
     Composite {
         task_id: RenderTaskId,
         source_id: RenderTaskId,
         backdrop_id: RenderTaskId,
@@ -2092,8 +2098,32 @@ fn make_polygon(
         transform.m33 as f64,
         transform.m34 as f64,
         transform.m41 as f64,
         transform.m42 as f64,
         transform.m43 as f64,
         transform.m44 as f64);
     Polygon::from_transformed_rect(rect.cast().unwrap(), mat, anchor)
 }
+
+impl BrushPrimitive {
+    fn get_batch_key(&self, blend_mode: BlendMode) -> BatchKey {
+        match self.kind {
+            BrushKind::Solid { .. } => {
+                BatchKey::new(
+                    BatchKind::Brush(BrushBatchKind::Solid),
+                    blend_mode,
+                    BatchTextures::no_texture(),
+                )
+            }
+            BrushKind::Clear => {
+                BatchKey::new(
+                    BatchKind::Brush(BrushBatchKind::Solid),
+                    BlendMode::PremultipliedDestOut,
+                    BatchTextures::no_texture(),
+                )
+            }
+            BrushKind::Mask { .. } => {
+                unreachable!("bug: mask brushes not expected in normal alpha pass");
+            }
+        }
+    }
+}
--- a/gfx/webrender/src/util.rs
+++ b/gfx/webrender/src/util.rs
@@ -1,50 +1,34 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{BorderRadius, ComplexClipRegion, DeviceIntPoint, DeviceIntRect, DeviceIntSize};
-use api::{DevicePoint, DeviceRect, DeviceSize, LayerPoint, LayerRect, LayerSize};
-use api::{LayerToWorldTransform, LayoutPoint, LayoutRect, LayoutSize, WorldPoint3D};
+use api::{BorderRadius, DeviceIntPoint, DeviceIntRect, DeviceIntSize, DevicePoint, DeviceRect};
+use api::{DeviceSize, LayerPoint, LayerRect, LayerSize, LayerToWorldTransform, WorldRect};
 use euclid::{Point2D, Rect, Size2D, TypedPoint2D, TypedRect, TypedSize2D, TypedTransform2D};
 use euclid::TypedTransform3D;
 use num_traits::Zero;
-use std::f32::consts::FRAC_1_SQRT_2;
 use std::i32;
 use std::f32;
 
 // Matches the definition of SK_ScalarNearlyZero in Skia.
 const NEARLY_ZERO: f32 = 1.0 / 4096.0;
 
 // TODO: Implement these in euclid!
 pub trait MatrixHelpers<Src, Dst> {
-    fn transform_rect(&self, rect: &TypedRect<f32, Src>) -> TypedRect<f32, Dst>;
-    fn is_identity(&self) -> bool;
     fn preserves_2d_axis_alignment(&self) -> bool;
     fn has_perspective_component(&self) -> bool;
     fn has_2d_inverse(&self) -> bool;
     fn inverse_project(&self, target: &TypedPoint2D<f32, Dst>) -> Option<TypedPoint2D<f32, Src>>;
     fn inverse_rect_footprint(&self, rect: &TypedRect<f32, Dst>) -> TypedRect<f32, Src>;
     fn transform_kind(&self) -> TransformedRectKind;
 }
 
 impl<Src, Dst> MatrixHelpers<Src, Dst> for TypedTransform3D<f32, Src, Dst> {
-    fn transform_rect(&self, rect: &TypedRect<f32, Src>) -> TypedRect<f32, Dst> {
-        let top_left = self.transform_point2d(&rect.origin);
-        let top_right = self.transform_point2d(&rect.top_right());
-        let bottom_left = self.transform_point2d(&rect.bottom_left());
-        let bottom_right = self.transform_point2d(&rect.bottom_right());
-        TypedRect::from_points(&[top_left, top_right, bottom_right, bottom_left])
-    }
-
-    fn is_identity(&self) -> bool {
-        *self == TypedTransform3D::identity()
-    }
-
     // A port of the preserves2dAxisAlignment function in Skia.
     // Defined in the SkMatrix44 class.
     fn preserves_2d_axis_alignment(&self) -> bool {
         if self.m14 != 0.0 || self.m24 != 0.0 {
             return false;
         }
 
         let mut col0 = 0;
@@ -90,24 +74,20 @@ impl<Src, Dst> MatrixHelpers<Src, Dst> f
             self.m22 - target.y * self.m24,
             self.m42 - target.y * self.m44,
         );
         m.inverse().map(|inv| TypedPoint2D::new(inv.m31, inv.m32))
     }
 
     fn inverse_rect_footprint(&self, rect: &TypedRect<f32, Dst>) -> TypedRect<f32, Src> {
         TypedRect::from_points(&[
-            self.inverse_project(&rect.origin)
-                .unwrap_or(TypedPoint2D::zero()),
-            self.inverse_project(&rect.top_right())
-                .unwrap_or(TypedPoint2D::zero()),
-            self.inverse_project(&rect.bottom_left())
-                .unwrap_or(TypedPoint2D::zero()),
-            self.inverse_project(&rect.bottom_right())
-                .unwrap_or(TypedPoint2D::zero()),
+            self.inverse_project(&rect.origin).unwrap_or(TypedPoint2D::zero()),
+            self.inverse_project(&rect.top_right()).unwrap_or(TypedPoint2D::zero()),
+            self.inverse_project(&rect.bottom_left()).unwrap_or(TypedPoint2D::zero()),
+            self.inverse_project(&rect.bottom_right()).unwrap_or(TypedPoint2D::zero()),
         ])
     }
 
     fn transform_kind(&self) -> TransformedRectKind {
         if self.preserves_2d_axis_alignment() {
             TransformedRectKind::AxisAligned
         } else {
             TransformedRectKind::Complex
@@ -153,16 +133,37 @@ pub fn rect_is_empty<N: PartialEq + Zero
 pub fn rect_from_points_f(x0: f32, y0: f32, x1: f32, y1: f32) -> Rect<f32> {
     Rect::new(Point2D::new(x0, y0), Size2D::new(x1 - x0, y1 - y0))
 }
 
 pub fn lerp(a: f32, b: f32, t: f32) -> f32 {
     (b - a) * t + a
 }
 
+pub fn calculate_screen_bounding_rect(
+    transform: &LayerToWorldTransform,
+    rect: &LayerRect,
+    device_pixel_ratio: f32
+) -> DeviceIntRect {
+    let rect = WorldRect::from_points(&[
+        transform.transform_point2d(&rect.origin),
+        transform.transform_point2d(&rect.top_right()),
+        transform.transform_point2d(&rect.bottom_left()),
+        transform.transform_point2d(&rect.bottom_right()),
+    ]) * device_pixel_ratio;
+
+    let rect = DeviceRect::new(
+        DevicePoint::new(rect.origin.x, rect.origin.y),
+        DeviceSize::new(rect.size.width, rect.size.height),
+    );
+
+    let max_rect = DeviceRect::max_rect();
+    rect.round_out().intersection(&max_rect).unwrap_or(max_rect).to_i32()
+}
+
 pub fn _subtract_rect<U>(
     rect: &TypedRect<f32, U>,
     other: &TypedRect<f32, U>,
     results: &mut Vec<TypedRect<f32, U>>,
 ) {
     results.clear();
 
     let int = rect.intersection(other);
@@ -196,180 +197,28 @@ pub fn _subtract_rect<U>(
             }
         }
         None => {
             results.push(*rect);
         }
     }
 }
 
-pub fn get_normal(x: f32) -> Option<f32> {
-    if x.is_normal() {
-        Some(x)
-    } else {
-        None
-    }
-}
-
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
 #[repr(u32)]
 pub enum TransformedRectKind {
     AxisAligned = 0,
     Complex = 1,
 }
 
-#[derive(Debug, Clone)]
-pub struct TransformedRect {
-    pub local_rect: LayerRect,
-    pub bounding_rect: DeviceIntRect,
-    pub inner_rect: DeviceIntRect,
-    pub vertices: [WorldPoint3D; 4],
-    pub kind: TransformedRectKind,
-}
-
-impl TransformedRect {
-    pub fn new(
-        rect: &LayerRect,
-        transform: &LayerToWorldTransform,
-        device_pixel_ratio: f32,
-    ) -> TransformedRect {
-        let kind = if transform.preserves_2d_axis_alignment() {
-            TransformedRectKind::AxisAligned
-        } else {
-            TransformedRectKind::Complex
-        };
-
-
-        let vertices = [
-            transform.transform_point3d(&rect.origin.to_3d()),
-            transform.transform_point3d(&rect.bottom_left().to_3d()),
-            transform.transform_point3d(&rect.bottom_right().to_3d()),
-            transform.transform_point3d(&rect.top_right().to_3d()),
-        ];
-
-        let (mut xs, mut ys) = ([0.0; 4], [0.0; 4]);
-
-        for (vertex, (x, y)) in vertices.iter().zip(xs.iter_mut().zip(ys.iter_mut())) {
-            *x = get_normal(vertex.x).unwrap_or(0.0);
-            *y = get_normal(vertex.y).unwrap_or(0.0);
-        }
-
-        xs.sort_by(|a, b| a.partial_cmp(b).unwrap());
-        ys.sort_by(|a, b| a.partial_cmp(b).unwrap());
-
-        let outer_min_dp = (DevicePoint::new(xs[0], ys[0]) * device_pixel_ratio).floor();
-        let outer_max_dp = (DevicePoint::new(xs[3], ys[3]) * device_pixel_ratio).ceil();
-        let inner_min_dp = (DevicePoint::new(xs[1], ys[1]) * device_pixel_ratio).ceil();
-        let inner_max_dp = (DevicePoint::new(xs[2], ys[2]) * device_pixel_ratio).floor();
-
-        let max_rect = DeviceRect::max_rect();
-        let bounding_rect = DeviceRect::new(outer_min_dp, (outer_max_dp - outer_min_dp).to_size())
-            .intersection(&max_rect)
-            .unwrap_or(max_rect)
-            .to_i32();
-        let inner_rect = DeviceRect::new(inner_min_dp, (inner_max_dp - inner_min_dp).to_size())
-            .intersection(&max_rect)
-            .unwrap_or(max_rect)
-            .to_i32();
-
-        TransformedRect {
-            local_rect: *rect,
-            vertices,
-            bounding_rect,
-            inner_rect,
-            kind,
-        }
-    }
-}
-
 #[inline(always)]
 pub fn pack_as_float(value: u32) -> f32 {
     value as f32 + 0.5
 }
 
-
-pub trait ComplexClipRegionHelpers {
-    /// Return the approximately largest aligned rectangle that is fully inside
-    /// the provided clip region.
-    fn get_inner_rect_full(&self) -> Option<LayoutRect>;
-    /// Split the clip region into 2 sets of rectangles: opaque and transparent.
-    /// Guarantees no T-junctions in the produced split.
-    /// Attempts to cover more space in opaque, where it reasonably makes sense.
-    fn split_rectangles(
-        &self,
-        opaque: &mut Vec<LayoutRect>,
-        transparent: &mut Vec<LayoutRect>,
-    );
-}
-
-impl ComplexClipRegionHelpers for ComplexClipRegion {
-    fn get_inner_rect_full(&self) -> Option<LayoutRect> {
-        // this `k` is optimal for a simple case of all border radii being equal
-        let k = 1.0 - 0.5 * FRAC_1_SQRT_2; // could be nicely approximated to `0.3`
-        extract_inner_rect_impl(&self.rect, &self.radii, k)
-    }
-
-    fn split_rectangles(
-        &self,
-        opaque: &mut Vec<LayoutRect>,
-        transparent: &mut Vec<LayoutRect>,
-    ) {
-        fn rect(p0: LayoutPoint, p1: LayoutPoint) -> Option<LayoutRect> {
-            if p0.x != p1.x && p0.y != p1.y {
-                Some(LayerRect::new(p0.min(p1), (p1 - p0).abs().to_size()))
-            } else {
-                None
-            }
-        }
-
-        let inner = match extract_inner_rect_impl(&self.rect, &self.radii, 1.0) {
-            Some(rect) => rect,
-            None => {
-                transparent.push(self.rect);
-                return
-            },
-        };
-        let left_top = inner.origin - self.rect.origin;
-        let right_bot = self.rect.bottom_right() - inner.bottom_right();
-
-        // fill in the opaque parts
-        opaque.push(inner);
-        if left_top.x > 0.0 {
-            opaque.push(LayerRect::new(
-                LayoutPoint::new(self.rect.origin.x, inner.origin.y),
-                LayoutSize::new(left_top.x, inner.size.height),
-            ));
-        }
-        if right_bot.y > 0.0 {
-            opaque.push(LayerRect::new(
-                LayoutPoint::new(inner.origin.x, inner.origin.y + inner.size.height),
-                LayoutSize::new(inner.size.width, right_bot.y),
-            ));
-        }
-        if right_bot.x > 0.0 {
-            opaque.push(LayerRect::new(
-                LayoutPoint::new(inner.origin.x + inner.size.width, inner.origin.y),
-                LayoutSize::new(right_bot.x, inner.size.height),
-            ));
-        }
-        if left_top.y > 0.0 {
-            opaque.push(LayerRect::new(
-                LayoutPoint::new(inner.origin.x, self.rect.origin.y),
-                LayoutSize::new(inner.size.width, left_top.y),
-            ));
-        }
-
-        // fill in the transparent parts
-        transparent.extend(rect(self.rect.origin, inner.origin));
-        transparent.extend(rect(self.rect.bottom_left(), inner.bottom_left()));
-        transparent.extend(rect(self.rect.bottom_right(), inner.bottom_right()));
-        transparent.extend(rect(self.rect.top_right(), inner.top_right()));
-    }
-}
-
 #[inline]
 fn extract_inner_rect_impl<U>(
     rect: &TypedRect<f32, U>,
     radii: &BorderRadius,
     k: f32,
 ) -> Option<TypedRect<f32, U>> {
     // `k` defines how much border is taken into account
     // We enforce the offsets to be rounded to pixel boundaries
--- a/gfx/webrender/tests/angle_shader_validation.rs
+++ b/gfx/webrender/tests/angle_shader_validation.rs
@@ -89,26 +89,26 @@ const SHADERS: &[Shader] = &[
     Shader {
         name: "ps_yuv_image",
         features: PRIM_FEATURES,
     },
     Shader {
         name: "ps_text_run",
         features: PRIM_FEATURES,
     },
-    Shader {
-        name: "ps_rectangle",
-        features: &["", "TRANSFORM", "CLIP_FEATURE", "TRANSFORM,CLIP_FEATURE"],
-    },
     // Brush shaders
     Shader {
         name: "brush_mask",
         features: &[],
     },
     Shader {
+        name: "brush_solid",
+        features: &[],
+    },
+    Shader {
         name: "brush_image",
         features: &["COLOR_TARGET", "ALPHA_TARGET"],
     },
 ];
 
 const VERSION_STRING: &str = "#version 300 es\n";
 
 #[test]