Bug 1412280 - Update webrender to commit c0194de78ce26106a8497484dc8d159069e3a482. r=jrmuizel
authorKartikaya Gupta <kgupta@mozilla.com>
Tue, 31 Oct 2017 09:08:43 -0400
changeset 389428 42724e2fb2156ae1d558c111eb3b76ee8011ad04
parent 389427 c0ade1e797372449e3dbbb02168ddb411cdf1f66
child 389429 90b10943a31ebd7fea5634d4cbba25c073935921
push id96855
push userarchaeopteryx@coole-files.de
push dateTue, 31 Oct 2017 23:40:37 +0000
treeherdermozilla-inbound@285362745f60 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjrmuizel
bugs1412280
milestone58.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1412280 - Update webrender to commit c0194de78ce26106a8497484dc8d159069e3a482. r=jrmuizel MozReview-Commit-ID: 6kfVziU2Cj7
gfx/doc/README.webrender
gfx/webrender/Cargo.toml
gfx/webrender/examples/scrolling.rs
gfx/webrender/res/brush.glsl
gfx/webrender/res/brush_image.glsl
gfx/webrender/res/brush_mask.glsl
gfx/webrender/res/brush_mask_corner.glsl
gfx/webrender/res/brush_mask_rounded_rect.glsl
gfx/webrender/res/clip_shared.glsl
gfx/webrender/res/cs_clip_image.glsl
gfx/webrender/res/cs_clip_rectangle.glsl
gfx/webrender/res/ellipse.glsl
gfx/webrender/res/prim_shared.glsl
gfx/webrender/src/border.rs
gfx/webrender/src/box_shadow.rs
gfx/webrender/src/clip_scroll_node.rs
gfx/webrender/src/clip_scroll_tree.rs
gfx/webrender/src/device.rs
gfx/webrender/src/frame.rs
gfx/webrender/src/frame_builder.rs
gfx/webrender/src/gamma_lut.rs
gfx/webrender/src/gpu_types.rs
gfx/webrender/src/lib.rs
gfx/webrender/src/picture.rs
gfx/webrender/src/platform/macos/font.rs
gfx/webrender/src/platform/windows/font.rs
gfx/webrender/src/prim_store.rs
gfx/webrender/src/render_backend.rs
gfx/webrender/src/render_task.rs
gfx/webrender/src/renderer.rs
gfx/webrender/src/tiling.rs
gfx/webrender_api/src/display_item.rs
gfx/webrender_api/src/display_list.rs
--- a/gfx/doc/README.webrender
+++ b/gfx/doc/README.webrender
@@ -170,9 +170,9 @@ 2. Sometimes autoland tip has changed en
    has an env var you can set to do this). In theory you can get the same
    result by resolving the conflict manually but Cargo.lock files are usually not
    trivial to merge by hand. If it's just the third_party/rust dir that has conflicts
    you can delete it and run |mach vendor rust| again to repopulate it.
 
 -------------------------------------------------------------------------------
 
 The version of WebRender currently in the tree is:
-4b8493d6bdc64d2d83202ac15b06b0d4b14c6e76
+c0194de78ce26106a8497484dc8d159069e3a482
--- a/gfx/webrender/Cargo.toml
+++ b/gfx/webrender/Cargo.toml
@@ -40,15 +40,13 @@ env_logger = "0.4"
 rand = "0.3"                # for the benchmarks
 servo-glutin = "0.12"     # for the example apps
 
 [target.'cfg(any(target_os = "android", all(unix, not(target_os = "macos"))))'.dependencies]
 freetype = { version = "0.3", default-features = false }
 
 [target.'cfg(target_os = "windows")'.dependencies]
 dwrote = "0.4"
-gamma-lut = "0.2.1"
 
 [target.'cfg(target_os = "macos")'.dependencies]
 core-foundation = "0.4"
 core-graphics = "0.9"
 core-text = { version = "7.0", default-features = false }
-gamma-lut = "0.2.1"
--- a/gfx/webrender/examples/scrolling.rs
+++ b/gfx/webrender/examples/scrolling.rs
@@ -1,20 +1,22 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+extern crate euclid;
 extern crate gleam;
 extern crate glutin;
 extern crate webrender;
 
 #[path = "common/boilerplate.rs"]
 mod boilerplate;
 
 use boilerplate::{Example, HandyDandyRectBuilder};
+use euclid::SideOffsets2D;
 use webrender::api::*;
 
 struct App {
     cursor_position: WorldPoint,
 }
 
 impl Example for App {
     fn render(
@@ -102,29 +104,21 @@ impl Example for App {
 
             // Add a sticky frame. It will "stick" twice while scrolling, once
             // at a margin of 10px from the bottom, for 40 pixels of scrolling,
             // and once at a margin of 10px from the top, for 60 pixels of
             // scrolling.
             let sticky_id = builder.define_sticky_frame(
                 None,
                 (50, 350).by(50, 50),
-                StickyFrameInfo::new(
-                    Some(StickySideConstraint {
-                        margin: 10.0,
-                        max_offset: 60.0,
-                    }),
-                    None,
-                    Some(StickySideConstraint {
-                        margin: 10.0,
-                        max_offset: -40.0,
-                    }),
-                    None,
-                ),
+                SideOffsets2D::new(Some(10.0), None, Some(10.0), None),
+                StickyOffsetBounds::new(-40.0, 60.0),
+                StickyOffsetBounds::new(0.0, 0.0)
             );
+
             builder.push_clip_id(sticky_id);
             let info = LayoutPrimitiveInfo::new((50, 350).by(50, 50));
             builder.push_rect(&info, ColorF::new(0.5, 0.5, 1.0, 1.0));
             builder.pop_clip_id(); // sticky_id
 
             // just for good measure add another teal square further down and to
             // the right, which can be scrolled into view by the user
             let info = LayoutPrimitiveInfo::new((250, 350).to(300, 400));
--- a/gfx/webrender/res/brush.glsl
+++ b/gfx/webrender/res/brush.glsl
@@ -51,20 +51,21 @@ void main(void) {
 
     vec2 device_pos, local_pos;
     RectWithSize local_rect = geom.local_rect;
 
     if ((brush.flags & BRUSH_FLAG_USES_PICTURE) != 0) {
         // Fetch the dynamic picture that we are drawing on.
         PictureTask pic_task = fetch_picture_task(brush.picture_address);
 
+        local_pos = local_rect.p0 + aPosition.xy * local_rect.size;
+
         // Right now - pictures only support local positions. In the future, this
         // will be expanded to support transform picture types (the common kind).
-        device_pos = pic_task.target_rect.p0 + aPosition.xy * pic_task.target_rect.size;
-        local_pos = aPosition.xy * pic_task.target_rect.size / uDevicePixelRatio;
+        device_pos = pic_task.target_rect.p0 + uDevicePixelRatio * (local_pos - pic_task.content_origin);
 
         // Write the final position transformed by the orthographic device-pixel projection.
         gl_Position = uTransform * vec4(device_pos, 0.0, 1.0);
     } else {
         AlphaBatchTask alpha_task = fetch_alpha_batch_task(brush.picture_address);
         Layer layer = fetch_layer(brush.layer_address);
         ClipArea clip_area = fetch_clip_area(brush.clip_address);
 
--- a/gfx/webrender/res/brush_image.glsl
+++ b/gfx/webrender/res/brush_image.glsl
@@ -1,60 +1,115 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include shared,prim_shared,brush
 
 varying vec3 vUv;
+flat varying int vImageKind;
 flat varying vec4 vUvBounds;
+flat varying vec4 vParams;
 
 #if defined WR_FEATURE_ALPHA_TARGET
 flat varying vec4 vColor;
 #endif
 
+#define BRUSH_IMAGE_SIMPLE      0
+#define BRUSH_IMAGE_NINEPATCH   1
+#define BRUSH_IMAGE_MIRROR      2
+
 #ifdef WR_VERTEX_SHADER
 void brush_vs(
     int prim_address,
     vec2 local_pos,
     RectWithSize local_rect,
     ivec2 user_data
 ) {
     // TODO(gw): For now, this brush_image shader is only
     //           being used to draw items from the intermediate
     //           surface cache (render tasks). In the future
     //           we can expand this to support items from
     //           the normal texture cache and unify this
     //           with the normal image shader.
     BlurTask task = fetch_blur_task(user_data.x);
     vUv.z = task.render_target_layer_index;
+    vImageKind = user_data.y;
 
 #if defined WR_FEATURE_COLOR_TARGET
     vec2 texture_size = vec2(textureSize(sColor0, 0).xy);
 #else
     vec2 texture_size = vec2(textureSize(sColor1, 0).xy);
     vColor = task.color;
 #endif
 
     vec2 uv0 = task.target_rect.p0;
-    vec2 uv1 = (task.target_rect.p0 + task.target_rect.size);
+    vec2 src_size = task.target_rect.size;
+    vec2 uv1 = uv0 + src_size;
 
-    vec2 f = (local_pos - local_rect.p0) / local_rect.size;
+    // TODO(gw): In the future we'll probably draw these as segments
+    //           with the brush shader. When that occurs, we can
+    //           modify the UVs for each segment in the VS, and the
+    //           FS can become a simple shader that doesn't need
+    //           to adjust the UVs.
 
-    vUv.xy = mix(uv0 / texture_size,
-                 uv1 / texture_size,
-                 f);
+    switch (vImageKind) {
+        case BRUSH_IMAGE_SIMPLE: {
+            vec2 f = (local_pos - local_rect.p0) / local_rect.size;
+            vUv.xy = mix(uv0, uv1, f);
+            vUv.xy /= texture_size;
+            break;
+        }
+        case BRUSH_IMAGE_NINEPATCH: {
+            vec2 local_src_size = src_size / uDevicePixelRatio;
+            vUv.xy = (local_pos - local_rect.p0) / local_src_size;
+            vParams.xy = vec2(0.5);
+            vParams.zw = (local_rect.size / local_src_size - 0.5);
+            break;
+        }
+        case BRUSH_IMAGE_MIRROR: {
+            vec2 local_src_size = src_size / uDevicePixelRatio;
+            vUv.xy = (local_pos - local_rect.p0) / local_src_size;
+            vParams.xy = 0.5 * local_rect.size / local_src_size;
+            break;
+        }
+    }
 
     vUvBounds = vec4(uv0 + vec2(0.5), uv1 - vec2(0.5)) / texture_size.xyxy;
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 vec4 brush_fs() {
-    vec2 uv = clamp(vUv.xy, vUvBounds.xy, vUvBounds.zw);
+    vec2 uv;
+
+    switch (vImageKind) {
+        case BRUSH_IMAGE_SIMPLE: {
+            uv = clamp(vUv.xy, vUvBounds.xy, vUvBounds.zw);
+            break;
+        }
+        case BRUSH_IMAGE_NINEPATCH: {
+            uv = clamp(vUv.xy, vec2(0.0), vParams.xy);
+            uv += max(vec2(0.0), vUv.xy - vParams.zw);
+            uv = mix(vUvBounds.xy, vUvBounds.zw, uv);
+            break;
+        }
+        case BRUSH_IMAGE_MIRROR: {
+            // Mirror and stretch the box shadow corner over the entire
+            // primitives.
+            uv = vParams.xy - abs(vUv.xy - vParams.xy);
+
+            // Ensure that we don't fetch texels outside the box
+            // shadow corner. This can happen, for example, when
+            // drawing the outer parts of an inset box shadow.
+            uv = clamp(uv, vec2(0.0), vec2(1.0));
+            uv = mix(vUvBounds.xy, vUvBounds.zw, uv);
+            break;
+        }
+    }
 
 #if defined WR_FEATURE_COLOR_TARGET
     vec4 color = texture(sColor0, vec3(uv, vUv.z));
 #else
     vec4 color = vColor * texture(sColor1, vec3(uv, vUv.z)).r;
 #endif
 
     return color;
deleted file mode 100644
--- a/gfx/webrender/res/brush_mask.glsl
+++ /dev/null
@@ -1,77 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#include shared,prim_shared,ellipse,brush
-
-flat varying float vClipMode;
-flat varying vec4 vClipCenter_Radius_TL;
-flat varying vec4 vClipCenter_Radius_TR;
-flat varying vec4 vClipCenter_Radius_BR;
-flat varying vec4 vClipCenter_Radius_BL;
-flat varying vec4 vLocalRect;
-varying vec2 vLocalPos;
-
-#ifdef WR_VERTEX_SHADER
-
-struct BrushPrimitive {
-    float clip_mode;
-    vec2 radius_tl;
-    vec2 radius_tr;
-    vec2 radius_br;
-    vec2 radius_bl;
-};
-
-BrushPrimitive fetch_brush_primitive(int address) {
-    vec4 data[3] = fetch_from_resource_cache_3(address);
-    return BrushPrimitive(data[0].x, data[1].xy, data[1].zw, data[2].xy, data[2].zw);
-}
-
-void brush_vs(
-    int prim_address,
-    vec2 local_pos,
-    RectWithSize local_rect,
-    ivec2 user_data
-) {
-    // Load the specific primitive.
-    BrushPrimitive prim = fetch_brush_primitive(prim_address);
-
-    // Write clip parameters
-    vClipMode = prim.clip_mode;
-
-    // TODO(gw): In the future, when brush primitives may be segment rects
-    //           we need to account for that here, and differentiate between
-    //           the segment rect (geometry) amd the primitive rect (which
-    //           defines where the clip radii are relative to).
-    vec4 prim_rect = vec4(local_rect.p0, local_rect.p0 + local_rect.size);
-
-    vClipCenter_Radius_TL = vec4(prim_rect.xy + prim.radius_tl, prim.radius_tl);
-    vClipCenter_Radius_TR = vec4(prim_rect.zy + vec2(-prim.radius_tr.x, prim.radius_tr.y), prim.radius_tr);
-    vClipCenter_Radius_BR = vec4(prim_rect.zw - prim.radius_br, prim.radius_br);
-    vClipCenter_Radius_BL = vec4(prim_rect.xw + vec2(prim.radius_bl.x, -prim.radius_bl.y), prim.radius_bl);
-
-    vLocalRect = prim_rect;
-    vLocalPos = local_pos;
-}
-#endif
-
-#ifdef WR_FRAGMENT_SHADER
-vec4 brush_fs() {
-    // TODO(gw): The mask code below is super-inefficient. Once we
-    // start using primitive segments in brush shaders, this can
-    // be made much faster.
-    float d = 0.0;
-    // Check if in valid clip region.
-    if (vLocalPos.x >= vLocalRect.x && vLocalPos.x < vLocalRect.z &&
-        vLocalPos.y >= vLocalRect.y && vLocalPos.y < vLocalRect.w) {
-        // Apply ellipse clip on each corner.
-        d = rounded_rect(vLocalPos,
-                         vClipCenter_Radius_TL,
-                         vClipCenter_Radius_TR,
-                         vClipCenter_Radius_BR,
-                         vClipCenter_Radius_BL);
-    }
-
-    return vec4(mix(d, 1.0 - d, vClipMode));
-}
-#endif
new file mode 100644
--- /dev/null
+++ b/gfx/webrender/res/brush_mask_corner.glsl
@@ -0,0 +1,60 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include shared,prim_shared,ellipse,brush
+
+flat varying float vClipMode;
+flat varying vec4 vClipCenter_Radius;
+flat varying vec4 vLocalRect;
+varying vec2 vLocalPos;
+
+#ifdef WR_VERTEX_SHADER
+
+struct BrushMaskCornerPrimitive {
+    vec2 radius;
+    float clip_mode;
+};
+
+BrushMaskCornerPrimitive fetch_primitive(int address) {
+    vec4 data = fetch_from_resource_cache_1(address);
+    return BrushMaskCornerPrimitive(data.xy, data.z);
+}
+
+void brush_vs(
+    int prim_address,
+    vec2 local_pos,
+    RectWithSize local_rect,
+    ivec2 user_data
+) {
+    // Load the specific primitive.
+    BrushMaskCornerPrimitive prim = fetch_primitive(prim_address);
+
+    // Write clip parameters
+    vClipMode = prim.clip_mode;
+    vClipCenter_Radius = vec4(local_rect.p0 + prim.radius, prim.radius);
+
+    vLocalRect = vec4(local_rect.p0, local_rect.p0 + local_rect.size);
+    vLocalPos = local_pos;
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+vec4 brush_fs() {
+    float d = 1.0;
+    // NOTE: The AA range must be computed outside the if statement,
+    //       since otherwise the results can be undefined if the
+    //       input function is not continuous. I have observed this
+    //       as flickering behaviour on Intel GPUs.
+    float aa_range = compute_aa_range(vLocalPos);
+    // Check if in valid clip region.
+    if (vLocalPos.x < vClipCenter_Radius.x && vLocalPos.y < vClipCenter_Radius.y) {
+        // Apply ellipse clip on corner.
+        d = distance_to_ellipse(vLocalPos - vClipCenter_Radius.xy,
+                                vClipCenter_Radius.zw);
+        d = distance_aa(aa_range, d);
+    }
+
+    return vec4(mix(d, 1.0 - d, vClipMode));
+}
+#endif
new file mode 100644
--- /dev/null
+++ b/gfx/webrender/res/brush_mask_rounded_rect.glsl
@@ -0,0 +1,95 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include shared,prim_shared,ellipse,brush
+
+flat varying float vClipMode;
+flat varying vec4 vClipCenter_Radius_TL;
+flat varying vec4 vClipCenter_Radius_TR;
+flat varying vec4 vClipCenter_Radius_BR;
+flat varying vec4 vClipCenter_Radius_BL;
+flat varying vec4 vLocalRect;
+varying vec2 vLocalPos;
+
+#ifdef WR_VERTEX_SHADER
+
+struct BrushPrimitive {
+    float clip_mode;
+    vec4 rect;
+    vec2 radius_tl;
+    vec2 radius_tr;
+    vec2 radius_br;
+    vec2 radius_bl;
+};
+
+BrushPrimitive fetch_brush_primitive(int address) {
+    vec4 data[4] = fetch_from_resource_cache_4(address);
+    return BrushPrimitive(
+        data[0].x,
+        data[1],
+        data[2].xy,
+        data[2].zw,
+        data[3].xy,
+        data[3].zw
+    );
+}
+
+void brush_vs(
+    int prim_address,
+    vec2 local_pos,
+    RectWithSize local_rect,
+    ivec2 user_data
+) {
+    // Load the specific primitive.
+    BrushPrimitive prim = fetch_brush_primitive(prim_address);
+
+    // Write clip parameters
+    vClipMode = prim.clip_mode;
+
+    // TODO(gw): In the future, when brush primitives may be segment rects
+    //           we need to account for that here, and differentiate between
+    //           the segment rect (geometry) amd the primitive rect (which
+    //           defines where the clip radii are relative to).
+    vec4 clip_rect = vec4(prim.rect.xy, prim.rect.xy + prim.rect.zw);
+
+    vClipCenter_Radius_TL = vec4(clip_rect.xy + prim.radius_tl, prim.radius_tl);
+    vClipCenter_Radius_TR = vec4(clip_rect.zy + vec2(-prim.radius_tr.x, prim.radius_tr.y), prim.radius_tr);
+    vClipCenter_Radius_BR = vec4(clip_rect.zw - prim.radius_br, prim.radius_br);
+    vClipCenter_Radius_BL = vec4(clip_rect.xw + vec2(prim.radius_bl.x, -prim.radius_bl.y), prim.radius_bl);
+
+    vLocalRect = clip_rect;
+    vLocalPos = local_pos;
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+vec4 brush_fs() {
+    // TODO(gw): The mask code below is super-inefficient. Once we
+    // start using primitive segments in brush shaders, this can
+    // be made much faster.
+
+    // NOTE: The AA range must be computed outside the if statement,
+    //       since otherwise the results can be undefined if the
+    //       input function is not continuous. I have observed this
+    //       as flickering behaviour on Intel GPUs.
+    float aa_range = compute_aa_range(vLocalPos);
+
+    // Apply ellipse clip on each corner.
+    float d = 0.0;
+
+    if (vLocalPos.x > vLocalRect.x &&
+        vLocalPos.y > vLocalRect.y &&
+        vLocalPos.x <= vLocalRect.z &&
+        vLocalPos.y <= vLocalRect.w) {
+        d = rounded_rect(vLocalPos,
+                         vClipCenter_Radius_TL,
+                         vClipCenter_Radius_TR,
+                         vClipCenter_Radius_BR,
+                         vClipCenter_Radius_BL,
+                         aa_range);
+    }
+
+    return vec4(mix(d, 1.0 - d, vClipMode));
+}
+#endif
--- a/gfx/webrender/res/clip_shared.glsl
+++ b/gfx/webrender/res/clip_shared.glsl
@@ -91,12 +91,13 @@ ClipVertexInfo write_clip_tile_vertex(Re
 
     // compute the point position in side the layer, in CSS space
     vec2 vertex_pos = actual_pos + area.task_bounds.xy - area.screen_origin_target_index.xy;
 
     gl_Position = uTransform * vec4(vertex_pos, 0.0, 1);
 
     vLocalBounds = vec4(clipped_local_rect.p0, clipped_local_rect.p0 + clipped_local_rect.size);
 
-    return ClipVertexInfo(layer_pos.xyw, actual_pos, clipped_local_rect);
+    ClipVertexInfo vi = ClipVertexInfo(layer_pos.xyw, actual_pos, clipped_local_rect);
+    return vi;
 }
 
 #endif //WR_VERTEX_SHADER
--- a/gfx/webrender/res/cs_clip_image.glsl
+++ b/gfx/webrender/res/cs_clip_image.glsl
@@ -11,17 +11,19 @@ flat varying float vLayer;
 
 #ifdef WR_VERTEX_SHADER
 struct ImageMaskData {
     RectWithSize local_rect;
 };
 
 ImageMaskData fetch_mask_data(ivec2 address) {
     vec4 data = fetch_from_resource_cache_1_direct(address);
-    return ImageMaskData(RectWithSize(data.xy, data.zw));
+    RectWithSize local_rect = RectWithSize(data.xy, data.zw);
+    ImageMaskData mask_data = ImageMaskData(local_rect);
+    return mask_data;
 }
 
 void main(void) {
     ClipMaskInstance cmi = fetch_clip_item();
     ClipArea area = fetch_clip_area(cmi.render_task_address);
     Layer layer = fetch_layer(cmi.layer_address);
     ImageMaskData mask = fetch_mask_data(cmi.clip_data_address);
     RectWithSize local_rect = mask.local_rect;
--- a/gfx/webrender/res/cs_clip_rectangle.glsl
+++ b/gfx/webrender/res/cs_clip_rectangle.glsl
@@ -91,21 +91,24 @@ void main(void) {
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 void main(void) {
     float alpha = 1.f;
     vec2 local_pos = init_transform_fs(vPos, alpha);
 
+    float aa_range = compute_aa_range(local_pos);
+
     float clip_alpha = rounded_rect(local_pos,
                                     vClipCenter_Radius_TL,
                                     vClipCenter_Radius_TR,
                                     vClipCenter_Radius_BR,
-                                    vClipCenter_Radius_BL);
+                                    vClipCenter_Radius_BL,
+                                    aa_range);
 
     float combined_alpha = alpha * clip_alpha;
 
     // Select alpha or inverse alpha depending on clip in/out.
     float final_alpha = mix(combined_alpha, 1.0 - combined_alpha, vClipMode);
 
     oFragColor = vec4(final_alpha, 0.0, 0.0, 1.0);
 }
--- a/gfx/webrender/res/ellipse.glsl
+++ b/gfx/webrender/res/ellipse.glsl
@@ -79,17 +79,18 @@ float clip_against_ellipse_if_needed(
                ellipse_distance,
                all(lessThan(sign_modifier * pos, sign_modifier * ellipse_center_radius.xy)));
 }
 
 float rounded_rect(vec2 pos,
                    vec4 clip_center_radius_tl,
                    vec4 clip_center_radius_tr,
                    vec4 clip_center_radius_br,
-                   vec4 clip_center_radius_bl) {
+                   vec4 clip_center_radius_bl,
+                   float aa_range) {
     // Start with a negative value (means "inside") for all fragments that are not
     // in a corner. If the fragment is in a corner, one of the clip_against_ellipse_if_needed
     // calls below will update it.
     float current_distance = -1.0;
 
     // Clip against each ellipse.
     current_distance = clip_against_ellipse_if_needed(pos,
                                                       current_distance,
@@ -108,13 +109,12 @@ float rounded_rect(vec2 pos,
 
     current_distance = clip_against_ellipse_if_needed(pos,
                                                       current_distance,
                                                       clip_center_radius_bl,
                                                       vec2(1.0, -1.0));
 
     // Apply AA
     // See comment in ps_border_corner about the choice of constants.
-    float aa_range = compute_aa_range(pos);
 
     return distance_aa(aa_range, current_distance);
 }
 #endif
--- a/gfx/webrender/res/prim_shared.glsl
+++ b/gfx/webrender/res/prim_shared.glsl
@@ -204,22 +204,29 @@ struct PictureTask {
     float render_target_layer_index;
     vec2 content_origin;
     vec4 color;
 };
 
 PictureTask fetch_picture_task(int address) {
     RenderTaskData task_data = fetch_render_task(address);
 
-    return PictureTask(
-        RectWithSize(task_data.data0.xy, task_data.data0.zw),
+    RectWithSize target_rect = RectWithSize(
+        task_data.data0.xy,
+        task_data.data0.zw
+    );
+
+    PictureTask task = PictureTask(
+        target_rect,
         task_data.data1.x,
         task_data.data1.yz,
         task_data.data2
     );
+
+    return task;
 }
 
 struct BlurTask {
     RectWithSize target_rect;
     float render_target_layer_index;
     float blur_radius;
     vec4 color;
 };
--- a/gfx/webrender/src/border.rs
+++ b/gfx/webrender/src/border.rs
@@ -3,17 +3,17 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BorderSide, BorderStyle, BorderWidths, ClipAndScrollInfo, ColorF, LayerPoint, LayerRect};
 use api::{LayerPrimitiveInfo, LayerSize, NormalBorder, RepeatMode};
 use clip::ClipSource;
 use ellipse::Ellipse;
 use frame_builder::FrameBuilder;
 use gpu_cache::GpuDataRequest;
-use prim_store::{BorderPrimitiveCpu, PrimitiveContainer, TexelRect};
+use prim_store::{BorderPrimitiveCpu, RectangleContent, PrimitiveContainer, TexelRect};
 use tiling::PrimitiveFlags;
 use util::{lerp, pack_as_float};
 
 #[repr(u8)]
 #[derive(Debug, Copy, Clone, PartialEq)]
 pub enum BorderCornerInstance {
     None,
     Single, // Single instance needed - corner styles are same or similar.
@@ -380,56 +380,56 @@ impl FrameBuilder {
 
             // Add a solid rectangle for each visible edge/corner combination.
             if top_edge == BorderEdgeKind::Solid {
                 let mut info = info.clone();
                 info.rect = LayerRect::new(p0, LayerSize::new(rect_width, top_len));
                 self.add_solid_rectangle(
                     clip_and_scroll,
                     &info,
-                    &border.top.color,
+                    &RectangleContent::Fill(border.top.color),
                     PrimitiveFlags::None,
                 );
             }
             if left_edge == BorderEdgeKind::Solid {
                 let mut info = info.clone();
                 info.rect = LayerRect::new(
                     LayerPoint::new(p0.x, p0.y + top_len),
                     LayerSize::new(left_len, rect_height - top_len - bottom_len),
                 );
                 self.add_solid_rectangle(
                     clip_and_scroll,
                     &info,
-                    &border.left.color,
+                    &RectangleContent::Fill(border.left.color),
                     PrimitiveFlags::None,
                 );
             }
             if right_edge == BorderEdgeKind::Solid {
                 let mut info = info.clone();
                 info.rect = LayerRect::new(
                     LayerPoint::new(p1.x - right_len, p0.y + top_len),
                     LayerSize::new(right_len, rect_height - top_len - bottom_len),
                 );
                 self.add_solid_rectangle(
                     clip_and_scroll,
                     &info,
-                    &border.right.color,
+                    &RectangleContent::Fill(border.right.color),
                     PrimitiveFlags::None,
                 );
             }
             if bottom_edge == BorderEdgeKind::Solid {
                 let mut info = info.clone();
                 info.rect = LayerRect::new(
                     LayerPoint::new(p0.x, p1.y - bottom_len),
                     LayerSize::new(rect_width, bottom_len),
                 );
                 self.add_solid_rectangle(
                     clip_and_scroll,
                     &info,
-                    &border.bottom.color,
+                    &RectangleContent::Fill(border.bottom.color),
                     PrimitiveFlags::None,
                 );
             }
         } else {
             // Create clip masks for border corners, if required.
             let mut extra_clips = Vec::new();
             let mut corner_instances = [BorderCornerInstance::Single; 4];
 
--- a/gfx/webrender/src/box_shadow.rs
+++ b/gfx/webrender/src/box_shadow.rs
@@ -1,46 +1,52 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{ColorF, LayerPoint, LayerRect, LayerSize, LayerVector2D};
+use api::{BorderRadiusKind, ColorF, LayerPoint, LayerRect, LayerSize, LayerVector2D};
 use api::{BorderRadius, BoxShadowClipMode, LayoutSize, LayerPrimitiveInfo};
 use api::{ClipMode, ComplexClipRegion, LocalClip, ClipAndScrollInfo};
 use clip::ClipSource;
 use frame_builder::FrameBuilder;
-use prim_store::{PrimitiveContainer, RectanglePrimitive, BrushPrimitive};
+use prim_store::{PrimitiveContainer, RectangleContent, RectanglePrimitive};
+use prim_store::{BrushMaskKind, BrushKind, BrushPrimitive};
 use picture::PicturePrimitive;
 use util::RectHelpers;
 
 // The blur shader samples BLUR_SAMPLE_SCALE * blur_radius surrounding texels.
 pub const BLUR_SAMPLE_SCALE: f32 = 3.0;
 
+// The amount of padding added to the border corner drawn in the box shadow
+// mask. This ensures that we get a few pixels past the corner that can be
+// blurred without being affected by the border radius.
+pub const MASK_CORNER_PADDING: f32 = 4.0;
+
 impl FrameBuilder {
     pub fn add_box_shadow(
         &mut self,
         clip_and_scroll: ClipAndScrollInfo,
         prim_info: &LayerPrimitiveInfo,
         box_offset: &LayerVector2D,
         color: &ColorF,
         blur_radius: f32,
         spread_radius: f32,
         border_radius: BorderRadius,
         clip_mode: BoxShadowClipMode,
     ) {
         if color.a == 0.0 {
             return;
         }
 
-        let spread_amount = match clip_mode {
+        let (spread_amount, brush_clip_mode) = match clip_mode {
             BoxShadowClipMode::Outset => {
-                spread_radius
+                (spread_radius, ClipMode::Clip)
             }
             BoxShadowClipMode::Inset => {
-                -spread_radius
+                (-spread_radius, ClipMode::ClipOut)
             }
         };
 
         let shadow_radius = adjust_border_radius_for_box_shadow(
             border_radius,
             spread_amount,
         );
         let shadow_rect = prim_info.rect
@@ -92,162 +98,196 @@ impl FrameBuilder {
                 }
             };
 
             self.add_primitive(
                 clip_and_scroll,
                 &fast_info,
                 clips,
                 PrimitiveContainer::Rectangle(RectanglePrimitive {
-                    color: *color,
+                    content: RectangleContent::Fill(*color),
                 }),
             );
         } else {
-            let blur_offset = 2.0 * blur_radius;
+            let blur_offset = BLUR_SAMPLE_SCALE * blur_radius;
             let mut extra_clips = vec![];
-            let mut blur_regions = vec![];
 
             match clip_mode {
                 BoxShadowClipMode::Outset => {
-                    let brush_prim = BrushPrimitive {
-                        clip_mode: ClipMode::Clip,
-                        radius: shadow_radius,
+                    let width;
+                    let height;
+                    let brush_prim;
+                    let corner_size = shadow_radius.is_uniform_size();
+                    let radii_kind;
+
+                    if !shadow_rect.is_well_formed_and_nonempty() {
+                        return;
+                    }
+
+                    // If the outset box shadow has a uniform corner side, we can
+                    // just blur the top left corner, and stretch / mirror that
+                    // across the primitive.
+                    if let Some(corner_size) = corner_size {
+                        radii_kind = BorderRadiusKind::Uniform;
+                        width = MASK_CORNER_PADDING + corner_size.width.max(BLUR_SAMPLE_SCALE * blur_radius);
+                        height = MASK_CORNER_PADDING + corner_size.height.max(BLUR_SAMPLE_SCALE * blur_radius);
+
+                        brush_prim = BrushPrimitive {
+                            kind: BrushKind::Mask {
+                                clip_mode: brush_clip_mode,
+                                kind: BrushMaskKind::Corner(corner_size),
+                            }
+                        };
+                    } else {
+                        // Create a minimal size primitive mask to blur. In this
+                        // case, we ensure the size of each corner is the same,
+                        // to simplify the shader logic that stretches the blurred
+                        // result across the primitive.
+                        radii_kind = BorderRadiusKind::NonUniform;
+                        let max_width = shadow_radius.top_left.width
+                                            .max(shadow_radius.bottom_left.width)
+                                            .max(shadow_radius.top_right.width)
+                                            .max(shadow_radius.bottom_right.width);
+                        let max_height = shadow_radius.top_left.height
+                                            .max(shadow_radius.bottom_left.height)
+                                            .max(shadow_radius.top_right.height)
+                                            .max(shadow_radius.bottom_right.height);
+
+                        width = 2.0 * max_width + BLUR_SAMPLE_SCALE * blur_radius;
+                        height = 2.0 * max_height + BLUR_SAMPLE_SCALE * blur_radius;
+
+                        let clip_rect = LayerRect::new(LayerPoint::zero(),
+                                                       LayerSize::new(width, height));
+
+                        brush_prim = BrushPrimitive {
+                            kind: BrushKind::Mask {
+                                clip_mode: brush_clip_mode,
+                                kind: BrushMaskKind::RoundedRect(clip_rect, shadow_radius),
+                            }
+                        };
                     };
 
-                    let brush_rect = LayerRect::new(LayerPoint::new(blur_offset, blur_offset),
-                                                    shadow_rect.size);
-
+                    // Construct a mask primitive to add to the picture.
+                    let brush_rect = LayerRect::new(LayerPoint::zero(),
+                                                    LayerSize::new(width, height));
                     let brush_info = LayerPrimitiveInfo::new(brush_rect);
-
                     let brush_prim_index = self.create_primitive(
                         clip_and_scroll,
                         &brush_info,
                         Vec::new(),
                         PrimitiveContainer::Brush(brush_prim),
                     );
 
+                    // Create a box shadow picture and add the mask primitive to it.
                     let pic_rect = shadow_rect.inflate(blur_offset, blur_offset);
-                    let blur_range = BLUR_SAMPLE_SCALE * blur_radius;
-
-                    let size = pic_rect.size;
-
-                    let tl = LayerSize::new(
-                        blur_radius.max(border_radius.top_left.width),
-                        blur_radius.max(border_radius.top_left.height)
-                    ) * BLUR_SAMPLE_SCALE;
-                    let tr = LayerSize::new(
-                        blur_radius.max(border_radius.top_right.width),
-                        blur_radius.max(border_radius.top_right.height)
-                    ) * BLUR_SAMPLE_SCALE;
-                    let br = LayerSize::new(
-                        blur_radius.max(border_radius.bottom_right.width),
-                        blur_radius.max(border_radius.bottom_right.height)
-                    ) * BLUR_SAMPLE_SCALE;
-                    let bl = LayerSize::new(
-                        blur_radius.max(border_radius.bottom_left.width),
-                        blur_radius.max(border_radius.bottom_left.height)
-                    ) * BLUR_SAMPLE_SCALE;
-
-                    let max_width = tl.width.max(tr.width.max(bl.width.max(br.width)));
-                    let max_height = tl.height.max(tr.height.max(bl.height.max(br.height)));
-
-                    // Apply a conservative test that if any of the blur regions below
-                    // will overlap, we won't bother applying the region optimization
-                    // and will just blur the entire thing. This should only happen
-                    // in rare cases, where either the blur radius or border radius
-                    // is very large, in which case there's no real point in trying
-                    // to only blur a small region anyway.
-                    if max_width < 0.5 * size.width && max_height < 0.5 * size.height {
-                        blur_regions.push(LayerRect::from_floats(0.0, 0.0, tl.width, tl.height));
-                        blur_regions.push(LayerRect::from_floats(size.width - tr.width, 0.0, size.width, tr.height));
-                        blur_regions.push(LayerRect::from_floats(size.width - br.width, size.height - br.height, size.width, size.height));
-                        blur_regions.push(LayerRect::from_floats(0.0, size.height - bl.height, bl.width, size.height));
-
-                        blur_regions.push(LayerRect::from_floats(0.0, tl.height, blur_range, size.height - bl.height));
-                        blur_regions.push(LayerRect::from_floats(size.width - blur_range, tr.height, size.width, size.height - br.height));
-                        blur_regions.push(LayerRect::from_floats(tl.width, 0.0, size.width - tr.width, blur_range));
-                        blur_regions.push(LayerRect::from_floats(bl.width, size.height - blur_range, size.width - br.width, size.height));
-                    }
-
                     let mut pic_prim = PicturePrimitive::new_box_shadow(
                         blur_radius,
                         *color,
-                        blur_regions,
-                        BoxShadowClipMode::Outset,
+                        Vec::new(),
+                        clip_mode,
+                        radii_kind,
                     );
-
                     pic_prim.add_primitive(
                         brush_prim_index,
                         &brush_rect,
                         clip_and_scroll
                     );
-
                     pic_prim.build();
 
+                    // TODO(gw): Right now, we always use a clip out
+                    //           mask for outset shadows. We can make this
+                    //           much more efficient when we have proper
+                    //           segment logic, by avoiding drawing
+                    //           most of the pixels inside and just
+                    //           clipping out along the edges.
                     extra_clips.push(ClipSource::RoundedRectangle(
                         prim_info.rect,
                         border_radius,
                         ClipMode::ClipOut,
                     ));
 
                     let pic_info = LayerPrimitiveInfo::new(pic_rect);
-
                     self.add_primitive(
                         clip_and_scroll,
                         &pic_info,
                         extra_clips,
                         PrimitiveContainer::Picture(pic_prim),
                     );
                 }
                 BoxShadowClipMode::Inset => {
-                    let brush_prim = BrushPrimitive {
-                        clip_mode: ClipMode::ClipOut,
-                        radius: shadow_radius,
-                    };
+                    // TODO(gw): Inset shadows still need an optimization pass.
+                    //           We draw and blur way more pixels than needed.
+
+                    // Draw a picture that covers the area of the primitive rect.
+                    let brush_rect = LayerRect::new(
+                        LayerPoint::zero(),
+                        prim_info.rect.size
+                    );
 
-                    let mut brush_rect = shadow_rect;
-                    brush_rect.origin.x = brush_rect.origin.x - prim_info.rect.origin.x + blur_offset;
-                    brush_rect.origin.y = brush_rect.origin.y - prim_info.rect.origin.y + blur_offset;
+                    // Define where the inset box shadow rect is, local
+                    // to the brush rect above.
+                    let clip_rect = brush_rect.translate(box_offset)
+                                              .inflate(spread_amount, spread_amount);
 
+                    // Ensure there is one pixel around the edges, so that there
+                    // is non-zero data to blur, in the case of an inset shadow
+                    // with zero spread and zero offset.
+                    let brush_rect = brush_rect.inflate(1.0, 1.0);
+                    let brush_prim = BrushPrimitive {
+                        kind: BrushKind::Mask {
+                            clip_mode: brush_clip_mode,
+                            kind: BrushMaskKind::RoundedRect(clip_rect, shadow_radius),
+                        }
+                    };
                     let brush_info = LayerPrimitiveInfo::new(brush_rect);
-
                     let brush_prim_index = self.create_primitive(
                         clip_and_scroll,
                         &brush_info,
                         Vec::new(),
                         PrimitiveContainer::Brush(brush_prim),
                     );
 
-                    let pic_rect = prim_info.rect.inflate(blur_offset, blur_offset);
-
-                    // TODO(gw): Apply minimal blur regions for inset box shadows.
-
+                    // Create a box shadow picture primitive and add
+                    // the brush primitive to it.
                     let mut pic_prim = PicturePrimitive::new_box_shadow(
                         blur_radius,
                         *color,
-                        blur_regions,
+                        Vec::new(),
                         BoxShadowClipMode::Inset,
+                        // TODO(gw): Make use of optimization for inset.
+                        BorderRadiusKind::NonUniform,
                     );
-
                     pic_prim.add_primitive(
                         brush_prim_index,
-                        &prim_info.rect,
+                        &brush_rect,
                         clip_and_scroll
                     );
+                    pic_prim.build();
+
+                    // Draw the picture one pixel outside the original
+                    // rect to account for the inflate above. This
+                    // extra edge will be clipped by the local clip
+                    // rect set below.
+                    let pic_rect = prim_info.rect.inflate(1.0, 1.0);
+                    let pic_info = LayerPrimitiveInfo::with_clip_rect(
+                        pic_rect,
+                        prim_info.rect
+                    );
 
-                    pic_prim.build();
+                    // Add a normal clip to ensure nothing gets drawn
+                    // outside the primitive rect.
+                    if !border_radius.is_zero() {
+                        extra_clips.push(ClipSource::RoundedRectangle(
+                            prim_info.rect,
+                            border_radius,
+                            ClipMode::Clip,
+                        ));
+                    }
 
-                    extra_clips.push(ClipSource::RoundedRectangle(
-                        prim_info.rect,
-                        border_radius,
-                        ClipMode::Clip,
-                    ));
-
-                    let pic_info = LayerPrimitiveInfo::with_clip_rect(pic_rect, prim_info.rect);
-
+                    // Add the picture primitive to the frame.
                     self.add_primitive(
                         clip_and_scroll,
                         &pic_info,
                         extra_clips,
                         PrimitiveContainer::Picture(pic_prim),
                     );
                 }
             }
--- a/gfx/webrender/src/clip_scroll_node.rs
+++ b/gfx/webrender/src/clip_scroll_node.rs
@@ -1,18 +1,19 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{ClipId, DeviceIntRect, LayerPixel, LayerPoint, LayerRect, LayerSize};
 use api::{LayerToScrollTransform, LayerToWorldTransform, LayerVector2D, PipelineId};
-use api::{ScrollClamping, ScrollEventPhase, ScrollLocation, ScrollSensitivity, StickyFrameInfo};
+use api::{ScrollClamping, ScrollEventPhase, ScrollLocation, ScrollSensitivity, StickyOffsetBounds};
 use api::WorldPoint;
 use clip::{ClipRegion, ClipSources, ClipSourcesHandle, ClipStore};
 use clip_scroll_tree::{CoordinateSystemId, TransformUpdateState};
+use euclid::SideOffsets2D;
 use geometry::ray_intersects_rect;
 use gpu_cache::GpuCache;
 use render_task::{ClipChain, ClipChainNode, ClipWorkItem};
 use resource_cache::ResourceCache;
 use spring::{DAMPING, STIFFNESS, Spring};
 use std::rc::Rc;
 use tiling::{PackedLayer, PackedLayerIndex};
 use util::{MatrixHelpers, MaxRect};
@@ -49,32 +50,55 @@ impl ClipInfo {
             clip_sources: clip_store.insert(clip_sources),
             packed_layer_index,
             is_masking,
         }
     }
 }
 
 #[derive(Debug)]
+pub struct StickyFrameInfo {
+    pub margins: SideOffsets2D<Option<f32>>,
+    pub vertical_offset_bounds: StickyOffsetBounds,
+    pub horizontal_offset_bounds: StickyOffsetBounds,
+    pub current_offset: LayerVector2D,
+}
+
+impl StickyFrameInfo {
+    pub fn new(
+        margins: SideOffsets2D<Option<f32>>,
+        vertical_offset_bounds: StickyOffsetBounds,
+        horizontal_offset_bounds: StickyOffsetBounds
+    ) -> StickyFrameInfo {
+        StickyFrameInfo {
+            margins,
+            vertical_offset_bounds,
+            horizontal_offset_bounds,
+            current_offset: LayerVector2D::zero(),
+        }
+    }
+}
+
+#[derive(Debug)]
 pub enum NodeType {
     /// A reference frame establishes a new coordinate space in the tree.
     ReferenceFrame(ReferenceFrameInfo),
 
     /// Other nodes just do clipping, but no transformation.
     Clip(ClipInfo),
 
     /// Transforms it's content, but doesn't clip it. Can also be adjusted
     /// by scroll events or setting scroll offsets.
     ScrollFrame(ScrollingState),
 
     /// A special kind of node that adjusts its position based on the position
-    /// of its parent node and a given set of sticky positioning constraints.
+    /// of its parent node and a given set of sticky positioning offset bounds.
     /// Sticky positioned is described in the CSS Positioned Layout Module Level 3 here:
     /// https://www.w3.org/TR/css-position-3/#sticky-pos
-    StickyFrame(StickyFrameInfo, LayerVector2D),
+    StickyFrame(StickyFrameInfo),
 }
 
 /// Contains information common among all types of ClipScrollTree nodes.
 #[derive(Debug)]
 pub struct ClipScrollNode {
     /// Viewing rectangle in the coordinate system of the parent reference frame.
     pub local_viewport_rect: LayerRect,
 
@@ -192,17 +216,17 @@ impl ClipScrollNode {
     }
 
     pub fn new_sticky_frame(
         parent_id: ClipId,
         frame_rect: LayerRect,
         sticky_frame_info: StickyFrameInfo,
         pipeline_id: PipelineId,
     ) -> ClipScrollNode {
-        let node_type = NodeType::StickyFrame(sticky_frame_info, LayerVector2D::zero());
+        let node_type = NodeType::StickyFrame(sticky_frame_info);
         Self::new(pipeline_id, Some(parent_id), &frame_rect, node_type)
     }
 
 
     pub fn add_child(&mut self, child: ClipId) {
         self.children.push(child);
     }
 
@@ -354,29 +378,26 @@ impl ClipScrollNode {
                     .unwrap_or(LayerRect::zero());
                 self.reference_frame_relative_scroll_offset =
                     state.parent_accumulated_scroll_offset;
                 (
                     LayerToScrollTransform::identity(),
                     self.reference_frame_relative_scroll_offset,
                 )
             }
-            NodeType::StickyFrame(_, ref mut node_sticky_offset) => {
-                *node_sticky_offset = sticky_offset;
+            NodeType::StickyFrame(ref mut info) => {
+                info.current_offset = sticky_offset;
                 self.combined_local_viewport_rect =
                     state.parent_combined_viewport_rect
                     .translate(&-sticky_offset)
                     .intersection(&self.local_clip_rect)
                     .unwrap_or(LayerRect::zero());
                 self.reference_frame_relative_scroll_offset =
                     state.parent_accumulated_scroll_offset + sticky_offset;
-                (
-                    LayerToScrollTransform::identity(),
-                    self.reference_frame_relative_scroll_offset,
-                )
+                (LayerToScrollTransform::identity(), self.reference_frame_relative_scroll_offset)
             }
         };
 
         // The transformation for this viewport in world coordinates is the transformation for
         // our parent reference frame, plus any accumulated scrolling offsets from nodes
         // between our reference frame and this node. For reference frames, we also include
         // whatever local transformation this reference frame provides. This can be combined
         // with the local_viewport_rect to get its position in world space.
@@ -415,66 +436,100 @@ impl ClipScrollNode {
             NodeType::ScrollFrame(ref scrolling) => {
                 state.parent_combined_viewport_rect =
                         self.combined_local_viewport_rect.translate(&-scrolling.offset);
                 state.parent_accumulated_scroll_offset =
                     scrolling.offset + state.parent_accumulated_scroll_offset;
                 state.nearest_scrolling_ancestor_offset = scrolling.offset;
                 state.nearest_scrolling_ancestor_viewport = self.local_viewport_rect;
             }
-            NodeType::StickyFrame(_, sticky_offset) => {
+            NodeType::StickyFrame(ref info) => {
                 // We don't translate the combined rect by the sticky offset, because sticky
                 // offsets actually adjust the node position itself, whereas scroll offsets
                 // only apply to contents inside the node.
                 state.parent_combined_viewport_rect = self.combined_local_viewport_rect;
                 state.parent_accumulated_scroll_offset =
-                    sticky_offset + state.parent_accumulated_scroll_offset;
+                    info.current_offset + state.parent_accumulated_scroll_offset;
             }
         }
     }
 
     fn calculate_sticky_offset(
         &self,
         viewport_scroll_offset: &LayerVector2D,
         viewport_rect: &LayerRect,
     ) -> LayerVector2D {
-        let sticky_frame_info = match self.node_type {
-            NodeType::StickyFrame(info, _) => info,
+        let info = match self.node_type {
+            NodeType::StickyFrame(ref info) => info,
             _ => return LayerVector2D::zero(),
         };
 
-        let sticky_rect = self.local_viewport_rect.translate(viewport_scroll_offset);
-        let mut sticky_offset = LayerVector2D::zero();
+        if info.margins.top.is_none() && info.margins.bottom.is_none() &&
+            info.margins.left.is_none() && info.margins.right.is_none() {
+            return LayerVector2D::zero();
+        }
 
-        if let Some(info) = sticky_frame_info.top {
-            sticky_offset.y = viewport_rect.min_y() + info.margin - sticky_rect.min_y();
-            sticky_offset.y = sticky_offset.y.max(0.0).min(info.max_offset);
+        // The viewport and margins of the item establishes the maximum amount that it can
+        // be offset in order to keep it on screen. Since we care about the relationship
+        // between the scrolled content and unscrolled viewport we adjust the viewport's
+        // position by the scroll offset in order to work with their relative positions on the
+        // page.
+        let sticky_rect = self.local_viewport_rect.translate(viewport_scroll_offset);
+
+        let mut sticky_offset = LayerVector2D::zero();
+        if let Some(margin) = info.margins.top {
+            // If the sticky rect is positioned above the top edge of the viewport (plus margin)
+            // we move it down so that it is fully inside the viewport.
+            let top_viewport_edge = viewport_rect.min_y() + margin;
+            if sticky_rect.min_y() < top_viewport_edge {
+                 sticky_offset.y = top_viewport_edge - sticky_rect.min_y();
+            }
+            debug_assert!(sticky_offset.y >= 0.0);
         }
 
         if sticky_offset.y == 0.0 {
-            if let Some(info) = sticky_frame_info.bottom {
-                sticky_offset.y = (viewport_rect.max_y() - info.margin) -
-                    (sticky_offset.y + sticky_rect.min_y() + sticky_rect.size.height);
-                sticky_offset.y = sticky_offset.y.min(0.0).max(info.max_offset);
+            if let Some(margin) = info.margins.bottom {
+                // If the bottom of the sticky rect is positioned below the bottom viewport edge
+                // (accounting for margin), we move it up so that it is fully inside the viewport.
+                let bottom_viewport_edge = viewport_rect.max_y() - margin;
+                if sticky_rect.max_y() > bottom_viewport_edge {
+                     sticky_offset.y = bottom_viewport_edge - sticky_rect.max_y();
+                }
+                debug_assert!(sticky_offset.y <= 0.0);
             }
         }
 
-        if let Some(info) = sticky_frame_info.left {
-            sticky_offset.x = viewport_rect.min_x() + info.margin - sticky_rect.min_x();
-            sticky_offset.x = sticky_offset.x.max(0.0).min(info.max_offset);
+        if let Some(margin) = info.margins.left {
+            // If the sticky rect is positioned left of the left edge of the viewport (plus margin)
+            // we move it right so that it is fully inside the viewport.
+            let left_viewport_edge = viewport_rect.min_x() + margin;
+            if sticky_rect.min_x() < left_viewport_edge {
+                 sticky_offset.x = left_viewport_edge - sticky_rect.min_x();
+            }
+            debug_assert!(sticky_offset.x >= 0.0);
         }
 
         if sticky_offset.x == 0.0 {
-            if let Some(info) = sticky_frame_info.right {
-                sticky_offset.x = (viewport_rect.max_x() - info.margin) -
-                    (sticky_offset.x + sticky_rect.min_x() + sticky_rect.size.width);
-                sticky_offset.x = sticky_offset.x.min(0.0).max(info.max_offset);
+            if let Some(margin) = info.margins.right {
+                // If the right edge of the sticky rect is positioned right of the right viewport
+                // edge (accounting for margin), we move it left so that it is fully inside the
+                // viewport.
+                let right_viewport_edge = viewport_rect.max_x() - margin;
+                if sticky_rect.max_x() > right_viewport_edge {
+                     sticky_offset.x = right_viewport_edge - sticky_rect.max_x();
+                }
+                debug_assert!(sticky_offset.x <= 0.0);
             }
         }
 
+        sticky_offset.y = sticky_offset.y.max(info.vertical_offset_bounds.min);
+        sticky_offset.y = sticky_offset.y.min(info.vertical_offset_bounds.max);
+        sticky_offset.x = sticky_offset.x.max(info.horizontal_offset_bounds.min);
+        sticky_offset.x = sticky_offset.x.min(info.horizontal_offset_bounds.max);
+
         sticky_offset
     }
 
     pub fn scrollable_size(&self) -> LayerSize {
         match self.node_type {
            NodeType:: ScrollFrame(state) => state.scrollable_size,
             _ => LayerSize::zero(),
         }
--- a/gfx/webrender/src/clip_scroll_tree.rs
+++ b/gfx/webrender/src/clip_scroll_tree.rs
@@ -1,18 +1,17 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{ClipId, DeviceIntRect, LayerPoint, LayerRect};
-use api::{LayerToScrollTransform, LayerToWorldTransform, LayerVector2D, PipelineId};
-use api::{ScrollClamping, ScrollEventPhase, ScrollLayerState, ScrollLocation, StickyFrameInfo};
-use api::WorldPoint;
+use api::{ClipId, DeviceIntRect, LayerPoint, LayerRect, LayerToScrollTransform};
+use api::{LayerToWorldTransform, LayerVector2D, PipelineId, ScrollClamping, ScrollEventPhase};
+use api::{ScrollLayerState, ScrollLocation, WorldPoint};
 use clip::ClipStore;
-use clip_scroll_node::{ClipScrollNode, NodeType, ScrollingState};
+use clip_scroll_node::{ClipScrollNode, NodeType, ScrollingState, StickyFrameInfo};
 use gpu_cache::GpuCache;
 use internal_types::{FastHashMap, FastHashSet};
 use print_tree::{PrintTree, PrintTreePrinter};
 use render_task::ClipChain;
 use resource_cache::ResourceCache;
 use tiling::PackedLayer;
 
 pub type ScrollStates = FastHashMap<ClipId, ScrollingState>;
@@ -43,19 +42,20 @@ pub struct ClipScrollTree {
     /// added frames and clips. The ClipScrollTree increments this by one every
     /// time a new dynamic frame is created.
     current_new_node_item: u64,
 
     /// The root reference frame, which is the true root of the ClipScrollTree. Initially
     /// this ID is not valid, which is indicated by ```node``` being empty.
     pub root_reference_frame_id: ClipId,
 
-    /// The root scroll node which is the first child of the root reference frame.
-    /// Initially this ID is not valid, which is indicated by ```nodes``` being empty.
-    pub topmost_scrolling_node_id: ClipId,
+    /// The topmost scrolling node that we have, which is decided by the first scrolling node
+    /// to be added to the tree. This is really only useful for Servo, so we should figure out
+    /// a good way to remove it in the future.
+    pub topmost_scrolling_node_id: Option<ClipId>,
 
     /// A set of pipelines which should be discarded the next time this
     /// tree is drained.
     pub pipelines_to_discard: FastHashSet<PipelineId>,
 }
 
 #[derive(Clone)]
 pub struct TransformUpdateState {
@@ -78,36 +78,29 @@ pub struct TransformUpdateState {
 impl ClipScrollTree {
     pub fn new() -> ClipScrollTree {
         let dummy_pipeline = PipelineId::dummy();
         ClipScrollTree {
             nodes: FastHashMap::default(),
             pending_scroll_offsets: FastHashMap::default(),
             currently_scrolling_node_id: None,
             root_reference_frame_id: ClipId::root_reference_frame(dummy_pipeline),
-            topmost_scrolling_node_id: ClipId::root_scroll_node(dummy_pipeline),
+            topmost_scrolling_node_id: None,
             current_new_node_item: 1,
             pipelines_to_discard: FastHashSet::default(),
         }
     }
 
     pub fn root_reference_frame_id(&self) -> ClipId {
         // TODO(mrobinson): We should eventually make this impossible to misuse.
         debug_assert!(!self.nodes.is_empty());
         debug_assert!(self.nodes.contains_key(&self.root_reference_frame_id));
         self.root_reference_frame_id
     }
 
-    pub fn topmost_scrolling_node_id(&self) -> ClipId {
-        // TODO(mrobinson): We should eventually make this impossible to misuse.
-        debug_assert!(!self.nodes.is_empty());
-        debug_assert!(self.nodes.contains_key(&self.topmost_scrolling_node_id));
-        self.topmost_scrolling_node_id
-    }
-
     pub fn collect_nodes_bouncing_back(&self) -> FastHashSet<ClipId> {
         let mut nodes_bouncing_back = FastHashSet::default();
         for (clip_id, node) in self.nodes.iter() {
             if let NodeType::ScrollFrame(ref scrolling) = node.node_type {
                 if scrolling.bouncing_back {
                     nodes_bouncing_back.insert(*clip_id);
                 }
             }
@@ -137,21 +130,16 @@ impl ClipScrollTree {
             if node.ray_intersects_node(cursor) {
                 Some(clip_id)
             } else {
                 None
             }
         })
     }
 
-    pub fn find_scrolling_node_at_point(&self, cursor: &WorldPoint) -> ClipId {
-        self.find_scrolling_node_at_point_in_node(cursor, self.root_reference_frame_id())
-            .unwrap_or(self.topmost_scrolling_node_id())
-    }
-
     pub fn is_point_clipped_in_for_node(
         &self,
         point: WorldPoint,
         node_id: &ClipId,
         cache: &mut FastHashMap<ClipId, Option<LayerPoint>>,
         clip_store: &ClipStore
     ) -> bool {
         if let Some(point) = cache.get(node_id) {
@@ -257,21 +245,27 @@ impl ClipScrollTree {
         scroll_location: ScrollLocation,
         cursor: WorldPoint,
         phase: ScrollEventPhase,
     ) -> bool {
         if self.nodes.is_empty() {
             return false;
         }
 
-        let clip_id = match (
-            phase,
-            self.find_scrolling_node_at_point(&cursor),
-            self.currently_scrolling_node_id,
-        ) {
+        let topmost_scrolling_node_id = match self.topmost_scrolling_node_id {
+            Some(id) => id,
+            None => return false,
+        };
+
+        let scrolling_node = self.find_scrolling_node_at_point_in_node(
+            &cursor,
+            self.root_reference_frame_id()
+        ).unwrap_or(topmost_scrolling_node_id);;
+
+        let clip_id = match (phase, scrolling_node, self.currently_scrolling_node_id) {
             (ScrollEventPhase::Start, scroll_node_at_point_id, _) => {
                 self.currently_scrolling_node_id = Some(scroll_node_at_point_id);
                 scroll_node_at_point_id
             }
             (_, scroll_node_at_point_id, Some(cached_clip_id)) => {
                 let clip_id = match self.nodes.get(&cached_clip_id) {
                     Some(_) => cached_clip_id,
                     None => {
@@ -279,17 +273,16 @@ impl ClipScrollTree {
                         scroll_node_at_point_id
                     }
                 };
                 clip_id
             }
             (_, _, None) => return false,
         };
 
-        let topmost_scrolling_node_id = self.topmost_scrolling_node_id();
         let non_root_overscroll = if clip_id != topmost_scrolling_node_id {
             self.nodes.get(&clip_id).unwrap().is_overscrolling()
         } else {
             false
         };
 
         let mut switch_node = false;
         if let Some(node) = self.nodes.get_mut(&clip_id) {
@@ -482,16 +475,20 @@ impl ClipScrollTree {
             frame_rect,
             sticky_frame_info,
             id.pipeline_id(),
         );
         self.add_node(node, id);
     }
 
     pub fn add_node(&mut self, node: ClipScrollNode, id: ClipId) {
+        if let NodeType::ScrollFrame(..) = node.node_type {
+            self.topmost_scrolling_node_id.get_or_insert(id);
+        }
+
         // When the parent node is None this means we are adding the root.
         match node.parent {
             Some(parent_id) => self.nodes.get_mut(&parent_id).unwrap().add_child(id),
             None => self.root_reference_frame_id = id,
         }
 
         debug_assert!(!self.nodes.contains_key(&id));
         self.nodes.insert(id, node);
@@ -526,21 +523,20 @@ impl ClipScrollTree {
                 pt.add_item(format!("id: {:?}", id));
             }
             NodeType::ScrollFrame(scrolling_info) => {
                 pt.new_level(format!("ScrollFrame"));
                 pt.add_item(format!("id: {:?}", id));
                 pt.add_item(format!("scrollable_size: {:?}", scrolling_info.scrollable_size));
                 pt.add_item(format!("scroll.offset: {:?}", scrolling_info.offset));
             }
-            NodeType::StickyFrame(sticky_frame_info, sticky_offset) => {
+            NodeType::StickyFrame(ref sticky_frame_info) => {
                 pt.new_level(format!("StickyFrame"));
                 pt.add_item(format!("id: {:?}", id));
                 pt.add_item(format!("sticky info: {:?}", sticky_frame_info));
-                pt.add_item(format!("sticky offset: {:?}", sticky_offset));
             }
         }
 
         pt.add_item(format!(
             "local_viewport_rect: {:?}",
             node.local_viewport_rect
         ));
         pt.add_item(format!("local_clip_rect: {:?}", node.local_clip_rect));
--- a/gfx/webrender/src/device.rs
+++ b/gfx/webrender/src/device.rs
@@ -1872,16 +1872,21 @@ impl Device {
         }
     }
 
     pub fn set_blend_mode_premultiplied_alpha(&self) {
         self.gl.blend_func(gl::ONE, gl::ONE_MINUS_SRC_ALPHA);
         self.gl.blend_equation(gl::FUNC_ADD);
     }
 
+    pub fn set_blend_mode_premultiplied_dest_out(&self) {
+        self.gl.blend_func(gl::ZERO, gl::ONE_MINUS_SRC_ALPHA);
+        self.gl.blend_equation(gl::FUNC_ADD);
+    }
+
     pub fn set_blend_mode_alpha(&self) {
         self.gl.blend_func_separate(
             gl::SRC_ALPHA,
             gl::ONE_MINUS_SRC_ALPHA,
             gl::ONE,
             gl::ONE_MINUS_SRC_ALPHA,
         );
         self.gl.blend_equation(gl::FUNC_ADD);
--- a/gfx/webrender/src/frame.rs
+++ b/gfx/webrender/src/frame.rs
@@ -6,21 +6,23 @@
 use api::{BuiltDisplayListIter, ClipAndScrollInfo, ClipId, ColorF, ComplexClipRegion};
 use api::{DeviceUintRect, DeviceUintSize, DisplayItemRef, Epoch, FilterOp};
 use api::{ImageDisplayItem, ItemRange, LayerPoint, LayerPrimitiveInfo, LayerRect};
 use api::{LayerSize, LayerToScrollTransform, LayerVector2D, LayoutSize, LayoutTransform};
 use api::{LocalClip, PipelineId, ScrollClamping, ScrollEventPhase, ScrollLayerState};
 use api::{ScrollLocation, ScrollPolicy, ScrollSensitivity, SpecificDisplayItem, StackingContext};
 use api::{ClipMode, TileOffset, TransformStyle, WorldPoint};
 use clip::ClipRegion;
+use clip_scroll_node::StickyFrameInfo;
 use clip_scroll_tree::{ClipScrollTree, ScrollStates};
 use euclid::rect;
 use frame_builder::{FrameBuilder, FrameBuilderConfig};
 use gpu_cache::GpuCache;
 use internal_types::{FastHashMap, FastHashSet, RendererFrame};
+use prim_store::RectangleContent;
 use profiler::{GpuCacheProfileCounters, TextureCacheProfileCounters};
 use resource_cache::{FontInstanceMap,ResourceCache, TiledImageMap};
 use scene::{Scene, StackingContextHelpers, ScenePipeline};
 use tiling::{CompositeOps, Frame, PrimitiveFlags};
 use util::{subtract_rect, ComplexClipRegionHelpers};
 
 #[derive(Copy, Clone, PartialEq, PartialOrd, Debug, Eq, Ord)]
 pub struct FrameId(pub u32);
@@ -71,17 +73,17 @@ impl<'a> FlattenContext<'a> {
             .get(complex_clips)
             .collect()
     }
 
     fn flatten_root(
         &mut self,
         traversal: &mut BuiltDisplayListIter<'a>,
         pipeline_id: PipelineId,
-        content_size: &LayoutSize,
+        frame_size: &LayoutSize,
     ) {
         self.builder.push_stacking_context(
             &LayerVector2D::zero(),
             pipeline_id,
             CompositeOps::default(),
             TransformStyle::Flat,
             true,
             true,
@@ -90,45 +92,47 @@ impl<'a> FlattenContext<'a> {
         // We do this here, rather than above because we want any of the top-level
         // stacking contexts in the display list to be treated like root stacking contexts.
         // FIXME(mrobinson): Currently only the first one will, which for the moment is
         // sufficient for all our use cases.
         self.builder.notify_waiting_for_root_stacking_context();
 
         // For the root pipeline, there's no need to add a full screen rectangle
         // here, as it's handled by the framebuffer clear.
-        let clip_id = ClipId::root_scroll_node(pipeline_id);
+        let clip_id = ClipId::root_reference_frame(pipeline_id);
         if self.scene.root_pipeline_id != Some(pipeline_id) {
             if let Some(pipeline) = self.scene.pipelines.get(&pipeline_id) {
                 if let Some(bg_color) = pipeline.background_color {
-                    let root_bounds = LayerRect::new(LayerPoint::zero(), *content_size);
+                    let root_bounds = LayerRect::new(LayerPoint::zero(), *frame_size);
                     let info = LayerPrimitiveInfo::new(root_bounds);
                     self.builder.add_solid_rectangle(
                         ClipAndScrollInfo::simple(clip_id),
                         &info,
-                        &bg_color,
+                        &RectangleContent::Fill(bg_color),
                         PrimitiveFlags::None,
                     );
                 }
             }
         }
 
 
         self.flatten_items(traversal, pipeline_id, LayerVector2D::zero());
 
         if self.builder.config.enable_scrollbars {
             let scrollbar_rect = LayerRect::new(LayerPoint::zero(), LayerSize::new(10.0, 70.0));
             let info = LayerPrimitiveInfo::new(scrollbar_rect);
 
-            self.builder.add_solid_rectangle(
-                ClipAndScrollInfo::simple(clip_id),
-                &info,
-                &DEFAULT_SCROLLBAR_COLOR,
-                PrimitiveFlags::Scrollbar(self.clip_scroll_tree.topmost_scrolling_node_id(), 4.0),
-            );
+            if let Some(node_id) = self.clip_scroll_tree.topmost_scrolling_node_id {
+                self.builder.add_solid_rectangle(
+                    ClipAndScrollInfo::simple(clip_id),
+                    &info,
+                    &RectangleContent::Fill(DEFAULT_SCROLLBAR_COLOR),
+                    PrimitiveFlags::Scrollbar(node_id, 4.0),
+                );
+            }
         }
 
         self.builder.pop_stacking_context();
     }
 
     fn flatten_items(
         &mut self,
         traversal: &mut BuiltDisplayListIter<'a>,
@@ -333,40 +337,30 @@ impl<'a> FlattenContext<'a> {
             self.clip_scroll_tree,
         );
 
         self.pipeline_epochs.push((pipeline_id, pipeline.epoch));
 
         let iframe_rect = LayerRect::new(LayerPoint::zero(), bounds.size);
         let origin = reference_frame_relative_offset + bounds.origin.to_vector();
         let transform = LayerToScrollTransform::create_translation(origin.x, origin.y, 0.0);
-        let iframe_reference_frame_id = self.builder.push_reference_frame(
+        self.builder.push_reference_frame(
             Some(clip_id),
             pipeline_id,
             &iframe_rect,
             &transform,
             origin,
             true,
             self.clip_scroll_tree,
         );
 
-        self.builder.add_scroll_frame(
-            ClipId::root_scroll_node(pipeline_id),
-            iframe_reference_frame_id,
-            pipeline_id,
-            &iframe_rect,
-            &pipeline.content_size,
-            ScrollSensitivity::ScriptAndInputEvents,
-            self.clip_scroll_tree,
-        );
-
         self.flatten_root(
             &mut pipeline.display_list.iter(),
             pipeline_id,
-            &pipeline.content_size,
+            &bounds.size,
         );
 
         self.builder.pop_reference_frame();
     }
 
     fn flatten_item<'b>(
         &'b mut self,
         item: DisplayItemRef<'a, 'b>,
@@ -437,27 +431,35 @@ impl<'a> FlattenContext<'a> {
                     None => {
                         warn!("Unknown font instance key: {:?}", text_info.font_key);
                     }
                 }
             }
             SpecificDisplayItem::Rectangle(ref info) => {
                 if !self.try_to_add_rectangle_splitting_on_clip(
                     &prim_info,
-                    &info.color,
+                    &RectangleContent::Fill(info.color),
                     &clip_and_scroll,
                 ) {
                     self.builder.add_solid_rectangle(
                         clip_and_scroll,
                         &prim_info,
-                        &info.color,
+                        &RectangleContent::Fill(info.color),
                         PrimitiveFlags::None,
                     );
                 }
             }
+            SpecificDisplayItem::ClearRectangle => {
+                self.builder.add_solid_rectangle(
+                    clip_and_scroll,
+                    &prim_info,
+                    &RectangleContent::Clear,
+                    PrimitiveFlags::None,
+                );
+            }
             SpecificDisplayItem::Line(ref info) => {
                 self.builder.add_line(
                     clip_and_scroll,
                     &prim_info,
                     info.wavy_line_thickness,
                     info.orientation,
                     &info.color,
                     info.style,
@@ -580,21 +582,26 @@ impl<'a> FlattenContext<'a> {
                     &frame_rect,
                     &content_rect,
                     clip_region,
                     info.scroll_sensitivity,
                 );
             }
             SpecificDisplayItem::StickyFrame(ref info) => {
                 let frame_rect = item.rect().translate(&reference_frame_relative_offset);
+                let sticky_frame_info = StickyFrameInfo::new(
+                    info.margins,
+                    info.vertical_offset_bounds,
+                    info.horizontal_offset_bounds,
+                );
                 self.clip_scroll_tree.add_sticky_frame(
                     info.id,
                     clip_and_scroll.scroll_node_id, /* parent id */
                     frame_rect,
-                    info.sticky_frame_info,
+                    sticky_frame_info
                 );
             }
 
             // Do nothing; these are dummy items for the display list parser
             SpecificDisplayItem::SetGradientStops => {}
 
             SpecificDisplayItem::PopStackingContext => {
                 unreachable!("Should have returned in parent method.")
@@ -614,24 +621,26 @@ impl<'a> FlattenContext<'a> {
 
     /// Try to optimize the rendering of a solid rectangle that is clipped by a single
     /// rounded rectangle, by only masking the parts of the rectangle that intersect
     /// the rounded parts of the clip. This is pretty simple now, so has a lot of
     /// potential for further optimizations.
     fn try_to_add_rectangle_splitting_on_clip(
         &mut self,
         info: &LayerPrimitiveInfo,
-        color: &ColorF,
+        content: &RectangleContent,
         clip_and_scroll: &ClipAndScrollInfo,
     ) -> bool {
         // If this rectangle is not opaque, splitting the rectangle up
         // into an inner opaque region just ends up hurting batching and
         // doing more work than necessary.
-        if color.a != 1.0 {
-            return false;
+        if let &RectangleContent::Fill(ColorF{a, ..}) = content {
+            if a != 1.0 {
+                return false;
+            }
         }
 
         let inner_unclipped_rect = match &info.local_clip {
             &LocalClip::Rect(_) => return false,
             &LocalClip::RoundedRect(_, ref region) => {
                 if region.mode == ClipMode::ClipOut {
                     return false;
                 }
@@ -654,27 +663,26 @@ impl<'a> FlattenContext<'a> {
             local_clip: LocalClip::from(*info.local_clip.clip_rect()),
             is_backface_visible: info.is_backface_visible,
             tag: None,
         };
 
         self.builder.add_solid_rectangle(
             *clip_and_scroll,
             &prim_info,
-            color,
+            content,
             PrimitiveFlags::None,
         );
-
         for clipped_rect in &clipped_rects {
             let mut info = info.clone();
             info.rect = *clipped_rect;
             self.builder.add_solid_rectangle(
                 *clip_and_scroll,
                 &info,
-                color,
+                content,
                 PrimitiveFlags::None,
             );
         }
         true
     }
 
     /// Decomposes an image display item that is repeated into an image per individual repetition.
     /// We need to do this when we are unable to perform the repetition in the shader,
@@ -1084,17 +1092,16 @@ impl FrameContext {
                 tiled_image_map: resource_cache.get_tiled_image_map(),
                 pipeline_epochs: Vec::new(),
                 replacements: Vec::new(),
             };
 
             roller.builder.push_root(
                 root_pipeline_id,
                 &root_pipeline.viewport_size,
-                &root_pipeline.content_size,
                 roller.clip_scroll_tree,
             );
 
             roller.builder.setup_viewport_offset(
                 window_size,
                 inner_rect,
                 device_pixel_ratio,
                 roller.clip_scroll_tree,
--- a/gfx/webrender/src/frame_builder.rs
+++ b/gfx/webrender/src/frame_builder.rs
@@ -22,17 +22,17 @@ use frame::FrameId;
 use gpu_cache::GpuCache;
 use internal_types::{FastHashMap, FastHashSet, HardwareCompositeOp};
 use picture::{PicturePrimitive};
 use plane_split::{BspSplitter, Polygon, Splitter};
 use prim_store::{TexelRect, YuvImagePrimitiveCpu};
 use prim_store::{GradientPrimitiveCpu, ImagePrimitiveCpu, LinePrimitive, PrimitiveKind};
 use prim_store::{PrimitiveContainer, PrimitiveIndex};
 use prim_store::{PrimitiveStore, RadialGradientPrimitiveCpu};
-use prim_store::{RectanglePrimitive, TextRunPrimitiveCpu};
+use prim_store::{RectangleContent, RectanglePrimitive, TextRunPrimitiveCpu};
 use profiler::{FrameProfileCounters, GpuCacheProfileCounters, TextureCacheProfileCounters};
 use render_task::{AlphaRenderItem, ClearMode, ClipChain, RenderTask, RenderTaskId, RenderTaskLocation};
 use render_task::RenderTaskTree;
 use resource_cache::ResourceCache;
 use scene::ScenePipeline;
 use std::{mem, usize, f32, i32};
 use tiling::{ClipScrollGroup, ClipScrollGroupIndex, CompositeOps, Frame};
 use tiling::{ContextIsolation, RenderTargetKind, StackingContextIndex};
@@ -463,55 +463,42 @@ impl FrameBuilder {
                     viewport_offset.x,
                     viewport_offset.y,
                     0.0,
                 );
             }
             root_node.local_clip_rect = viewport_clip;
         }
 
-        let clip_id = clip_scroll_tree.topmost_scrolling_node_id();
-        if let Some(root_node) = clip_scroll_tree.nodes.get_mut(&clip_id) {
-            root_node.local_clip_rect = viewport_clip;
+        if let Some(clip_id) = clip_scroll_tree.topmost_scrolling_node_id {
+            if let Some(root_node) = clip_scroll_tree.nodes.get_mut(&clip_id) {
+                root_node.local_clip_rect = viewport_clip;
+            }
         }
     }
 
     pub fn push_root(
         &mut self,
         pipeline_id: PipelineId,
         viewport_size: &LayerSize,
-        content_size: &LayerSize,
         clip_scroll_tree: &mut ClipScrollTree,
     ) -> ClipId {
         let viewport_rect = LayerRect::new(LayerPoint::zero(), *viewport_size);
         let identity = &LayerToScrollTransform::identity();
         self.push_reference_frame(
             None,
             pipeline_id,
             &viewport_rect,
             identity,
             LayerVector2D::zero(),
             true,
             clip_scroll_tree,
         );
 
-        let topmost_scrolling_node_id = ClipId::root_scroll_node(pipeline_id);
-        clip_scroll_tree.topmost_scrolling_node_id = topmost_scrolling_node_id;
-
-        self.add_scroll_frame(
-            topmost_scrolling_node_id,
-            clip_scroll_tree.root_reference_frame_id,
-            pipeline_id,
-            &viewport_rect,
-            content_size,
-            ScrollSensitivity::ScriptAndInputEvents,
-            clip_scroll_tree,
-        );
-
-        topmost_scrolling_node_id
+        clip_scroll_tree.root_reference_frame_id
     }
 
     pub fn add_clip_node(
         &mut self,
         new_node_id: ClipId,
         parent_id: ClipId,
         pipeline_id: PipelineId,
         clip_region: ClipRegion,
@@ -607,27 +594,28 @@ impl FrameBuilder {
         mem::replace(&mut self.pending_shadow_contents, pending_primitives);
         mem::replace(&mut self.shadow_prim_stack, shadows);
     }
 
     pub fn add_solid_rectangle(
         &mut self,
         clip_and_scroll: ClipAndScrollInfo,
         info: &LayerPrimitiveInfo,
-        color: &ColorF,
+        content: &RectangleContent,
         flags: PrimitiveFlags,
     ) {
-        let prim = RectanglePrimitive { color: *color };
-
-        // Don't add transparent rectangles to the draw list, but do consider them for hit
-        // testing. This allows specifying invisible hit testing areas.
-        if color.a == 0.0 {
-            self.add_primitive_to_hit_testing_list(info, clip_and_scroll);
-            return;
+        if let &RectangleContent::Fill(ColorF{a, ..}) = content {
+            if a == 0.0 {
+                // Don't add transparent rectangles to the draw list, but do consider them for hit
+                // testing. This allows specifying invisible hit testing areas.
+                self.add_primitive_to_hit_testing_list(info, clip_and_scroll);
+                return;
+            }
         }
+        let prim = RectanglePrimitive { content: *content };
 
         let prim_index = self.add_primitive(
             clip_and_scroll,
             info,
             Vec::new(),
             PrimitiveContainer::Rectangle(prim),
         );
 
new file mode 100644
--- /dev/null
+++ b/gfx/webrender/src/gamma_lut.rs
@@ -0,0 +1,428 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*!
+Gamma correction lookup tables.
+
+This is a port of Skia gamma LUT logic into Rust, used by WebRender.
+*/
+//#![warn(missing_docs)] //TODO
+#![allow(dead_code)]
+
+use api::ColorU;
+
+/// Color space responsible for converting between lumas and luminances.
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub enum LuminanceColorSpace {
+    /// Linear space - no conversion involved.
+    Linear,
+    /// Simple gamma space - uses the `luminance ^ gamma` function.
+    Gamma(f32),
+    /// Srgb space.
+    Srgb,
+}
+
+impl LuminanceColorSpace {
+    pub fn new(gamma: f32) -> LuminanceColorSpace {
+        if gamma == 1.0 {
+            LuminanceColorSpace::Linear
+        } else if gamma == 0.0 {
+            LuminanceColorSpace::Srgb
+        } else {
+            LuminanceColorSpace::Gamma(gamma)
+        }
+    }
+
+    pub fn to_luma(&self, luminance: f32) -> f32 {
+        match *self {
+            LuminanceColorSpace::Linear => luminance,
+            LuminanceColorSpace::Gamma(gamma) => luminance.powf(gamma),
+            LuminanceColorSpace::Srgb => {
+                //The magic numbers are derived from the sRGB specification.
+                //See http://www.color.org/chardata/rgb/srgb.xalter .
+                if luminance <= 0.04045 {
+                    luminance / 12.92
+                } else {
+                    ((luminance + 0.055) / 1.055).powf(2.4)
+                }
+            }
+        }
+    }
+
+    pub fn from_luma(&self, luma: f32) -> f32 {
+        match *self {
+            LuminanceColorSpace::Linear => luma,
+            LuminanceColorSpace::Gamma(gamma) => luma.powf(1. / gamma),
+            LuminanceColorSpace::Srgb => {
+                //The magic numbers are derived from the sRGB specification.
+                //See http://www.color.org/chardata/rgb/srgb.xalter .
+                if luma <= 0.0031308 {
+                    luma * 12.92
+                } else {
+                    1.055 * luma.powf(1./2.4) - 0.055
+                }
+            }
+        }
+    }
+}
+
+//TODO: tests
+fn round_to_u8(x : f32) -> u8 {
+    let v = (x + 0.5).floor() as i32;
+    assert!(0 <= v && v < 0x100);
+    v as u8
+}
+
+//TODO: tests
+/*
+ * Scales base <= 2^N-1 to 2^8-1
+ * @param N [1, 8] the number of bits used by base.
+ * @param base the number to be scaled to [0, 255].
+ */
+fn scale255(n: u8, mut base: u8) -> u8 {
+    base <<= 8 - n;
+    let mut lum = base;
+    let mut i = n;
+
+    while i < 8 {
+        lum |= base >> i;
+        i += n;
+    }
+
+    lum
+}
+
+// Computes the luminance from the given r, g, and b in accordance with
+// SK_LUM_COEFF_X. For correct results, r, g, and b should be in linear space.
+fn compute_luminance(r: u8, g: u8, b: u8) -> u8 {
+    // The following is
+    // r * SK_LUM_COEFF_R + g * SK_LUM_COEFF_G + b * SK_LUM_COEFF_B
+    // with SK_LUM_COEFF_X in 1.8 fixed point (rounding adjusted to sum to 256).
+    let val: u32 = r as u32 * 54 + g as u32 * 183 + b as u32 * 19;
+    assert!(val < 0x10000);
+    (val >> 8) as u8
+}
+
+// Skia uses 3 bits per channel for luminance.
+const LUM_BITS: u8 = 3;
+// Mask of the highest used bits.
+const LUM_MASK: u8 = ((1 << LUM_BITS) - 1) << (8 - LUM_BITS);
+
+pub trait ColorLut {
+    fn quantize(&self) -> ColorU;
+    fn quantized_floor(&self) -> ColorU;
+    fn quantized_ceil(&self) -> ColorU;
+    fn luminance(&self) -> u8;
+    fn luminance_color(&self) -> ColorU;
+}
+
+impl ColorLut for ColorU {
+    // Compute a canonical color that is equivalent to the input color
+    // for preblend table lookups. The alpha channel is never used for
+    // preblending, so overwrite it with opaque.
+    fn quantize(&self) -> ColorU {
+        ColorU::new(
+            scale255(LUM_BITS, self.r >> (8 - LUM_BITS)),
+            scale255(LUM_BITS, self.g >> (8 - LUM_BITS)),
+            scale255(LUM_BITS, self.b >> (8 - LUM_BITS)),
+            255,
+        )
+    }
+
+    // Quantize to the smallest value that yields the same table index.
+    fn quantized_floor(&self) -> ColorU {
+        ColorU::new(
+            self.r & LUM_MASK,
+            self.g & LUM_MASK,
+            self.b & LUM_MASK,
+            255,
+        )
+    }
+
+    // Quantize to the largest value that yields the same table index.
+    fn quantized_ceil(&self) -> ColorU {
+        ColorU::new(
+            self.r | !LUM_MASK,
+            self.g | !LUM_MASK,
+            self.b | !LUM_MASK,
+            255,
+        )
+    }
+
+    // Compute a luminance value suitable for grayscale preblend table
+    // lookups.
+    fn luminance(&self) -> u8 {
+        compute_luminance(self.r, self.g, self.b)
+    }
+
+    // Make a grayscale color from the computed luminance.
+    fn luminance_color(&self) -> ColorU {
+        let lum = self.luminance();
+        ColorU::new(lum, lum, lum, self.a)
+    }
+}
+
+// This will invert the gamma applied by CoreGraphics,
+// so we can get linear values.
+// CoreGraphics obscurely defaults to 2.0 as the smoothing gamma value.
+// The color space used does not appear to affect this choice.
+#[cfg(target_os="macos")]
+fn get_inverse_gamma_table_coregraphics_smoothing() -> [u8; 256] {
+    let mut table = [0u8; 256];
+
+    for (i, v) in table.iter_mut().enumerate() {
+        let x = i as f32 / 255.0;
+        *v = round_to_u8(x * x * 255.0);
+    }
+
+    table
+}
+
+// A value of 0.5 for SK_GAMMA_CONTRAST appears to be a good compromise.
+// With lower values small text appears washed out (though correctly so).
+// With higher values lcd fringing is worse and the smoothing effect of
+// partial coverage is diminished.
+fn apply_contrast(srca: f32, contrast: f32) -> f32 {
+    srca + ((1.0 - srca) * contrast * srca)
+}
+
+// The approach here is not necessarily the one with the lowest error
+// See https://bel.fi/alankila/lcd/alpcor.html for a similar kind of thing
+// that just search for the adjusted alpha value
+pub fn build_gamma_correcting_lut(table: &mut [u8; 256], src: u8, contrast: f32,
+                                  src_space: LuminanceColorSpace,
+                                  dst_convert: LuminanceColorSpace) {
+
+    let src = src as f32 / 255.0;
+    let lin_src = src_space.to_luma(src);
+    // Guess at the dst. The perceptual inverse provides smaller visual
+    // discontinuities when slight changes to desaturated colors cause a channel
+    // to map to a different correcting lut with neighboring srcI.
+    // See https://code.google.com/p/chromium/issues/detail?id=141425#c59 .
+    let dst = 1.0 - src;
+    let lin_dst = dst_convert.to_luma(dst);
+
+    // Contrast value tapers off to 0 as the src luminance becomes white
+    let adjusted_contrast = contrast * lin_dst;
+
+    // Remove discontinuity and instability when src is close to dst.
+    // The value 1/256 is arbitrary and appears to contain the instability.
+    if (src - dst).abs() < (1.0 / 256.0) {
+        let mut ii : f32 = 0.0;
+        for v in table.iter_mut() {
+            let raw_srca = ii / 255.0;
+            let srca = apply_contrast(raw_srca, adjusted_contrast);
+
+            *v = round_to_u8(255.0 * srca);
+            ii += 1.0;
+        }
+    } else {
+        // Avoid slow int to float conversion.
+        let mut ii : f32 = 0.0;
+        for v in table.iter_mut() {
+            // 'raw_srca += 1.0f / 255.0f' and even
+            // 'raw_srca = i * (1.0f / 255.0f)' can add up to more than 1.0f.
+            // When this happens the table[255] == 0x0 instead of 0xff.
+            // See http://code.google.com/p/chromium/issues/detail?id=146466
+            let raw_srca = ii / 255.0;
+            let srca = apply_contrast(raw_srca, adjusted_contrast);
+            assert!(srca <= 1.0);
+            let dsta = 1.0 - srca;
+
+            // Calculate the output we want.
+            let lin_out = lin_src * srca + dsta * lin_dst;
+            assert!(lin_out <= 1.0);
+            let out = dst_convert.from_luma(lin_out);
+
+            // Undo what the blit blend will do.
+            // i.e. given the formula for OVER: out = src * result + (1 - result) * dst
+            // solving for result gives:
+            let result = (out - dst) / (src - dst);
+
+            *v = round_to_u8(255.0 * result);
+            debug!("Setting {:?} to {:?}", ii as u8, *v);
+
+            ii += 1.0;
+        }
+    }
+}
+
+pub struct GammaLut {
+    tables: [[u8; 256]; 1 << LUM_BITS],
+    #[cfg(target_os="macos")]
+    cg_inverse_gamma: [u8; 256],
+}
+
+impl GammaLut {
+    // Skia actually makes 9 gamma tables, then based on the luminance color,
+    // fetches the RGB gamma table for that color.
+    fn generate_tables(&mut self, contrast: f32, paint_gamma: f32, device_gamma: f32) {
+        let paint_color_space = LuminanceColorSpace::new(paint_gamma);
+        let device_color_space = LuminanceColorSpace::new(device_gamma);
+
+        for (i, entry) in self.tables.iter_mut().enumerate() {
+            let luminance = scale255(LUM_BITS, i as u8);
+            build_gamma_correcting_lut(entry,
+                                       luminance,
+                                       contrast,
+                                       paint_color_space,
+                                       device_color_space);
+        }
+    }
+
+    pub fn table_count(&self) -> usize {
+        self.tables.len()
+    }
+
+    pub fn get_table(&self, color: u8) -> &[u8; 256] {
+        &self.tables[(color >> (8 - LUM_BITS)) as usize]
+    }
+
+    pub fn new(contrast: f32, paint_gamma: f32, device_gamma: f32) -> GammaLut {
+        #[cfg(target_os="macos")]
+        let mut table = GammaLut {
+            tables: [[0; 256]; 1 << LUM_BITS],
+            cg_inverse_gamma: get_inverse_gamma_table_coregraphics_smoothing(),
+        };
+        #[cfg(not(target_os="macos"))]
+        let mut table = GammaLut {
+            tables: [[0; 256]; 1 << LUM_BITS],
+        };
+
+        table.generate_tables(contrast, paint_gamma, device_gamma);
+
+        table
+    }
+
+    // Skia normally preblends based on what the text color is.
+    // If we can't do that, use Skia default colors.
+    pub fn preblend_default_colors_bgra(&self, pixels: &mut [u8], width: usize, height: usize) {
+        let preblend_color = ColorU::new(0x7f, 0x80, 0x7f, 0xff);
+        self.preblend_bgra(pixels, width, height, preblend_color);
+    }
+
+    fn replace_pixels_bgra(&self, pixels: &mut [u8], width: usize, height: usize,
+                           table_r: &[u8; 256], table_g: &[u8; 256], table_b: &[u8; 256]) {
+         for y in 0..height {
+            let current_height = y * width * 4;
+
+            for pixel in pixels[current_height..current_height + (width * 4)].chunks_mut(4) {
+                pixel[0] = table_b[pixel[0] as usize];
+                pixel[1] = table_g[pixel[1] as usize];
+                pixel[2] = table_r[pixel[2] as usize];
+                // Don't touch alpha
+            }
+        }
+    }
+
+    // Mostly used by windows and GlyphRunAnalysis::GetAlphaTexture
+    fn replace_pixels_rgb(&self, pixels: &mut [u8], width: usize, height: usize,
+                          table_r: &[u8; 256], table_g: &[u8; 256], table_b: &[u8; 256]) {
+         for y in 0..height {
+            let current_height = y * width * 3;
+
+            for pixel in pixels[current_height..current_height + (width * 3)].chunks_mut(3) {
+                pixel[0] = table_r[pixel[0] as usize];
+                pixel[1] = table_g[pixel[1] as usize];
+                pixel[2] = table_b[pixel[2] as usize];
+            }
+        }
+    }
+
+    // Assumes pixels are in BGRA format. Assumes pixel values are in linear space already.
+    pub fn preblend_bgra(&self, pixels: &mut [u8], width: usize, height: usize, color: ColorU) {
+        let table_r = self.get_table(color.r);
+        let table_g = self.get_table(color.g);
+        let table_b = self.get_table(color.b);
+
+        self.replace_pixels_bgra(pixels, width, height, table_r, table_g, table_b);
+    }
+
+    // Assumes pixels are in RGB format. Assumes pixel values are in linear space already. NOTE:
+    // there is no alpha here.
+    pub fn preblend_rgb(&self, pixels: &mut [u8], width: usize, height: usize, color: ColorU) {
+        let table_r = self.get_table(color.r);
+        let table_g = self.get_table(color.g);
+        let table_b = self.get_table(color.b);
+
+        self.replace_pixels_rgb(pixels, width, height, table_r, table_g, table_b);
+    }
+
+    #[cfg(target_os="macos")]
+    pub fn coregraphics_convert_to_linear_bgra(&self, pixels: &mut [u8], width: usize, height: usize) {
+        self.replace_pixels_bgra(pixels, width, height,
+                                 &self.cg_inverse_gamma,
+                                 &self.cg_inverse_gamma,
+                                 &self.cg_inverse_gamma);
+    }
+
+    // Assumes pixels are in BGRA format. Assumes pixel values are in linear space already.
+    pub fn preblend_grayscale_bgra(&self, pixels: &mut [u8], width: usize, height: usize, color: ColorU) {
+        let table_g = self.get_table(color.g);
+
+         for y in 0..height {
+            let current_height = y * width * 4;
+
+            for pixel in pixels[current_height..current_height + (width * 4)].chunks_mut(4) {
+                let luminance = compute_luminance(pixel[2], pixel[1], pixel[0]);
+                pixel[0] = table_g[luminance as usize];
+                pixel[1] = table_g[luminance as usize];
+                pixel[2] = table_g[luminance as usize];
+                pixel[3] = table_g[luminance as usize];
+            }
+        }
+    }
+
+} // end impl GammaLut
+
+#[cfg(test)]
+mod tests {
+    use std::cmp;
+    use super::*;
+
+    fn over(dst: u32, src: u32, alpha: u32) -> u32 {
+        (src * alpha + dst * (255 - alpha))/255
+    }
+
+    fn overf(dst: f32, src: f32, alpha: f32) -> f32 {
+        ((src * alpha + dst * (255. - alpha))/255.) as f32
+    }
+
+
+    fn absdiff(a: u32, b: u32) -> u32 {
+        if a < b  { b - a } else { a - b }
+    }
+
+    #[test]
+    fn gamma() {
+        let mut table = [0u8; 256];
+        let g = 2.0;
+        let space = LuminanceColorSpace::Gamma(g);
+        let mut src : u32 = 131;
+        while src < 256 {
+            build_gamma_correcting_lut(&mut table, src as u8, 0., space, space);
+            let mut max_diff = 0;
+            let mut dst = 0;
+            while dst < 256 {
+                for alpha in 0u32..256 {
+                    let preblend = table[alpha as usize];
+                    let lin_dst = (dst as f32 / 255.).powf(g) * 255.;
+                    let lin_src = (src as f32 / 255.).powf(g) * 255.;
+
+                    let preblend_result = over(dst, src, preblend as u32);
+                    let true_result = ((overf(lin_dst, lin_src, alpha as f32) / 255.).powf(1. / g) * 255.) as u32;
+                    let diff = absdiff(preblend_result, true_result);
+                    //println!("{} -- {} {} = {}", alpha, preblend_result, true_result, diff);
+                    max_diff = cmp::max(max_diff, diff);
+                }
+
+                //println!("{} {} max {}", src, dst, max_diff);
+                assert!(max_diff <= 33);
+                dst += 1;
+
+            }
+            src += 1;
+        }
+    }
+} // end mod
--- a/gfx/webrender/src/gpu_types.rs
+++ b/gfx/webrender/src/gpu_types.rs
@@ -175,8 +175,18 @@ impl From<BrushInstance> for PrimitiveIn
                 instance.z,
                 instance.flags,
                 instance.user_data0,
                 instance.user_data1,
             ]
         }
     }
 }
+
+// Defines how a brush image is stretched onto the primitive.
+// In the future, we may draw with segments for each portion
+// of the primitive, in which case this will be redundant.
+#[repr(C)]
+pub enum BrushImageKind {
+    Simple = 0,     // A normal rect
+    NinePatch = 1,  // A nine-patch image (stretch inside segments)
+    Mirror = 2,     // A top left corner only (mirror across x/y axes)
+}
--- a/gfx/webrender/src/lib.rs
+++ b/gfx/webrender/src/lib.rs
@@ -59,16 +59,18 @@ mod debug_font_data;
 mod debug_render;
 #[cfg(feature = "debugger")]
 mod debug_server;
 mod device;
 mod ellipse;
 mod frame;
 mod frame_builder;
 mod freelist;
+#[cfg(any(target_os = "macos", target_os = "windows"))]
+mod gamma_lut;
 mod geometry;
 mod glyph_cache;
 mod glyph_rasterizer;
 mod gpu_cache;
 mod gpu_types;
 mod internal_types;
 mod picture;
 mod prim_store;
@@ -141,18 +143,15 @@ extern crate rayon;
 extern crate serde_derive;
 #[cfg(feature = "debugger")]
 extern crate serde_json;
 extern crate time;
 #[cfg(feature = "debugger")]
 extern crate ws;
 pub extern crate webrender_api;
 
-#[cfg(any(target_os = "macos", target_os = "windows"))]
-extern crate gamma_lut;
-
 #[doc(hidden)]
 pub use device::build_shader_strings;
 pub use renderer::{CpuProfile, DebugFlags, GpuProfile, OutputImageHandler, RendererKind};
 pub use renderer::{ExternalImage, ExternalImageHandler, ExternalImageSource};
 pub use renderer::{GraphicsApi, GraphicsApiInfo, ReadPixelsFormat, Renderer, RendererOptions};
 pub use renderer::MAX_VERTEX_TEXTURE_WIDTH;
 pub use webrender_api as api;
--- a/gfx/webrender/src/picture.rs
+++ b/gfx/webrender/src/picture.rs
@@ -1,14 +1,14 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{ColorF, ClipAndScrollInfo, device_length, DeviceIntSize};
-use api::{BoxShadowClipMode, LayerRect, Shadow};
+use api::{BorderRadiusKind, ColorF, ClipAndScrollInfo, device_length, DeviceIntSize};
+use api::{BoxShadowClipMode, LayerPoint, LayerRect, LayerSize, Shadow};
 use box_shadow::BLUR_SAMPLE_SCALE;
 use frame_builder::PrimitiveContext;
 use gpu_cache::GpuDataRequest;
 use prim_store::PrimitiveIndex;
 use render_task::{ClearMode, RenderTask, RenderTaskId, RenderTaskTree};
 use tiling::RenderTargetKind;
 
 /*
@@ -33,16 +33,17 @@ pub enum PictureKind {
     TextShadow {
         shadow: Shadow,
     },
     BoxShadow {
         blur_radius: f32,
         color: ColorF,
         blur_regions: Vec<LayerRect>,
         clip_mode: BoxShadowClipMode,
+        radii_kind: BorderRadiusKind,
     },
 }
 
 #[derive(Debug)]
 pub struct PicturePrimitive {
     pub prim_runs: Vec<PrimitiveRun>,
     pub render_task_id: Option<RenderTaskId>,
     pub kind: PictureKind,
@@ -65,26 +66,28 @@ impl PicturePrimitive {
         }
     }
 
     pub fn new_box_shadow(
         blur_radius: f32,
         color: ColorF,
         blur_regions: Vec<LayerRect>,
         clip_mode: BoxShadowClipMode,
+        radii_kind: BorderRadiusKind,
     ) -> PicturePrimitive {
         PicturePrimitive {
             prim_runs: Vec::new(),
             render_task_id: None,
             content_rect: LayerRect::zero(),
             kind: PictureKind::BoxShadow {
                 blur_radius,
                 color: color.premultiplied(),
                 blur_regions,
                 clip_mode,
+                radii_kind,
             },
         }
     }
 
     pub fn as_text_shadow(&self) -> &Shadow {
         match self.kind {
             PictureKind::TextShadow { ref shadow } => shadow,
             PictureKind::BoxShadow { .. } => panic!("bug: not a text shadow")
@@ -129,95 +132,151 @@ impl PicturePrimitive {
 
                 self.content_rect = self.content_rect.inflate(
                     blur_offset,
                     blur_offset,
                 );
 
                 self.content_rect.translate(&shadow.offset)
             }
-            PictureKind::BoxShadow { blur_radius, .. } => {
-                // TODO(gw): The 2.0 here should actually be BLUR_SAMPLE_SCALE.
-                //           I'm leaving it as is for now, to avoid having to
-                //           change the code in box_shadow.rs. As I work on
-                //           the box shadow optimizations, I'll fix this up.
-                let blur_offset = blur_radius * 2.0;
+            PictureKind::BoxShadow { blur_radius, clip_mode, radii_kind, .. } => {
+                // We need to inflate the content rect if outset.
+                match clip_mode {
+                    BoxShadowClipMode::Outset => {
+                        let blur_offset = blur_radius * BLUR_SAMPLE_SCALE;
 
-                self.content_rect = self.content_rect.inflate(
-                    blur_offset,
-                    blur_offset,
-                );
+                        // If the radii are uniform, we can render just the top
+                        // left corner and mirror it across the primitive. In
+                        // this case, shift the content rect to leave room
+                        // for the blur to take effect.
+                        match radii_kind {
+                            BorderRadiusKind::Uniform => {
+                                let origin = LayerPoint::new(
+                                    self.content_rect.origin.x - blur_offset,
+                                    self.content_rect.origin.y - blur_offset,
+                                );
+                                let size = LayerSize::new(
+                                    self.content_rect.size.width + blur_offset,
+                                    self.content_rect.size.height + blur_offset,
+                                );
+                                self.content_rect = LayerRect::new(origin, size);
+                            }
+                            BorderRadiusKind::NonUniform => {
+                                // For a non-uniform radii, we need to expand
+                                // the content rect on all sides for the blur.
+                                self.content_rect = self.content_rect.inflate(
+                                    blur_offset,
+                                    blur_offset,
+                                );
+                            }
+                        }
+                    }
+                    BoxShadowClipMode::Inset => {}
+                }
 
                 self.content_rect
             }
         }
     }
 
     pub fn prepare_for_render(
         &mut self,
         prim_index: PrimitiveIndex,
         prim_context: &PrimitiveContext,
         render_tasks: &mut RenderTaskTree,
     ) {
         // This is a shadow element. Create a render task that will
         // render the text run to a target, and then apply a gaussian
         // blur to that text run in order to build the actual primitive
         // which will be blitted to the framebuffer.
+
+        // TODO(gw): Rounding the content rect here to device pixels is not
+        // technically correct. Ideally we should ceil() here, and ensure that
+        // the extra part pixel in the case of fractional sizes is correctly
+        // handled. For now, just use rounding which passes the existing
+        // Gecko tests.
         let cache_width =
-            (self.content_rect.size.width * prim_context.device_pixel_ratio).ceil() as i32;
+            (self.content_rect.size.width * prim_context.device_pixel_ratio).round() as i32;
         let cache_height =
-            (self.content_rect.size.height * prim_context.device_pixel_ratio).ceil() as i32;
+            (self.content_rect.size.height * prim_context.device_pixel_ratio).round() as i32;
         let cache_size = DeviceIntSize::new(cache_width, cache_height);
 
-        let (blur_radius, target_kind, blur_regions, clear_mode, color) = match self.kind {
+        match self.kind {
             PictureKind::TextShadow { ref shadow } => {
-                let dummy: &[LayerRect] = &[];
-                (shadow.blur_radius,
-                 RenderTargetKind::Color,
-                 dummy,
-                 ClearMode::Transparent,
-                 shadow.color)
+                let blur_radius = device_length(shadow.blur_radius, prim_context.device_pixel_ratio);
+
+                // Quote from https://drafts.csswg.org/css-backgrounds-3/#shadow-blur
+                // "the image that would be generated by applying to the shadow a
+                // Gaussian blur with a standard deviation equal to half the blur radius."
+                let blur_std_deviation = blur_radius.0 as f32 * 0.5;
+
+                let picture_task = RenderTask::new_picture(
+                    cache_size,
+                    prim_index,
+                    RenderTargetKind::Color,
+                    self.content_rect.origin,
+                    shadow.color,
+                    ClearMode::Transparent,
+                );
+
+                let picture_task_id = render_tasks.add(picture_task);
+
+                let render_task = RenderTask::new_blur(
+                    blur_std_deviation,
+                    picture_task_id,
+                    render_tasks,
+                    RenderTargetKind::Color,
+                    &[],
+                    ClearMode::Transparent,
+                    shadow.color,
+                );
+
+                self.render_task_id = Some(render_tasks.add(render_task));
             }
             PictureKind::BoxShadow { blur_radius, clip_mode, ref blur_regions, color, .. } => {
-                let clear_mode = match clip_mode {
-                    BoxShadowClipMode::Outset => ClearMode::One,
-                    BoxShadowClipMode::Inset => ClearMode::Zero,
+                let blur_radius = device_length(blur_radius, prim_context.device_pixel_ratio);
+
+                // Quote from https://drafts.csswg.org/css-backgrounds-3/#shadow-blur
+                // "the image that would be generated by applying to the shadow a
+                // Gaussian blur with a standard deviation equal to half the blur radius."
+                let blur_std_deviation = blur_radius.0 as f32 * 0.5;
+
+                let blur_clear_mode = match clip_mode {
+                    BoxShadowClipMode::Outset => {
+                        ClearMode::One
+                    }
+                    BoxShadowClipMode::Inset => {
+                        ClearMode::Zero
+                    }
                 };
-                (blur_radius,
-                 RenderTargetKind::Alpha,
-                 blur_regions.as_slice(),
-                 clear_mode,
-                 color)
-            }
-        };
-        let blur_radius = device_length(blur_radius, prim_context.device_pixel_ratio);
-
-        // Quote from https://drafts.csswg.org/css-backgrounds-3/#shadow-blur
-        // "the image that would be generated by applying to the shadow a
-        // Gaussian blur with a standard deviation equal to half the blur radius."
-        let blur_std_deviation = blur_radius.0 as f32 * 0.5;
 
-        let picture_task = RenderTask::new_picture(
-            cache_size,
-            prim_index,
-            target_kind,
-            self.content_rect.origin,
-            color,
-        );
-        let picture_task_id = render_tasks.add(picture_task);
-        let render_task = RenderTask::new_blur(
-            blur_std_deviation,
-            picture_task_id,
-            render_tasks,
-            target_kind,
-            blur_regions,
-            clear_mode,
-            color,
-        );
-        self.render_task_id = Some(render_tasks.add(render_task));
+                let picture_task = RenderTask::new_picture(
+                    cache_size,
+                    prim_index,
+                    RenderTargetKind::Alpha,
+                    self.content_rect.origin,
+                    color,
+                    ClearMode::Zero,
+                );
+
+                let picture_task_id = render_tasks.add(picture_task);
+
+                let render_task = RenderTask::new_blur(
+                    blur_std_deviation,
+                    picture_task_id,
+                    render_tasks,
+                    RenderTargetKind::Alpha,
+                    blur_regions,
+                    blur_clear_mode,
+                    color,
+                );
+
+                self.render_task_id = Some(render_tasks.add(render_task));
+            }
+        }
     }
 
     pub fn write_gpu_blocks(&self, mut _request: GpuDataRequest) {
         // TODO(gw): We'll need to write the GPU blocks
         //           here specific to a brush primitive
         //           once we start drawing pictures as brushes!
     }
 
--- a/gfx/webrender/src/platform/macos/font.rs
+++ b/gfx/webrender/src/platform/macos/font.rs
@@ -6,27 +6,27 @@ use api::{ColorU, FontKey, FontRenderMod
 use api::{FontInstance, FontVariation, NativeFontHandle};
 use api::{GlyphKey, SubpixelDirection};
 use app_units::Au;
 use core_foundation::array::{CFArray, CFArrayRef};
 use core_foundation::base::TCFType;
 use core_foundation::dictionary::{CFDictionary, CFDictionaryRef};
 use core_foundation::number::{CFNumber, CFNumberRef};
 use core_foundation::string::{CFString, CFStringRef};
-use core_graphics::base::{kCGImageAlphaNoneSkipFirst, kCGImageAlphaPremultipliedFirst, kCGImageAlphaPremultipliedLast};
+use core_graphics::base::{kCGImageAlphaNoneSkipFirst, kCGImageAlphaPremultipliedFirst};
 use core_graphics::base::kCGBitmapByteOrder32Little;
 use core_graphics::color_space::CGColorSpace;
 use core_graphics::context::{CGContext, CGTextDrawingMode};
 use core_graphics::data_provider::CGDataProvider;
 use core_graphics::font::{CGFont, CGFontRef, CGGlyph};
 use core_graphics::geometry::{CGPoint, CGRect, CGSize};
 use core_text;
 use core_text::font::{CTFont, CTFontRef};
 use core_text::font_descriptor::{kCTFontDefaultOrientation, kCTFontColorGlyphsTrait};
-use gamma_lut::{Color as ColorLut, GammaLut};
+use gamma_lut::{ColorLut, GammaLut};
 use glyph_rasterizer::{GlyphFormat, RasterizedGlyph};
 use internal_types::FastHashMap;
 use std::collections::hash_map::Entry;
 use std::ptr;
 use std::sync::Arc;
 
 pub struct FontContext {
     cg_fonts: FastHashMap<FontKey, CGFont>,
@@ -69,21 +69,19 @@ fn supports_subpixel_aa() -> bool {
     let point = CGPoint { x: -1., y: 0. };
     let glyph = '|' as CGGlyph;
     ct_font.draw_glyphs(&[glyph], &[point], cg_context.clone());
     let data = cg_context.data();
     data[0] != data[1] || data[1] != data[2]
 }
 
 fn should_use_white_on_black(color: ColorU) -> bool {
-    let r = color.r as f32 / 255.0;
-    let g = color.g as f32 / 255.0;
-    let b = color.b as f32 / 255.0;
+    let (r, g, b) = (color.r as u32, color.g as u32, color.b as u32);
     // These thresholds were determined on 10.12 by observing what CG does.
-    r >= 0.333 && g >= 0.333 && b >= 0.333 && r + g + b >= 2.0
+    r >= 85 && g >= 85 && b >= 85 && r + g + b >= 2 * 255
 }
 
 fn get_glyph_metrics(
     ct_font: &CTFont,
     glyph: CGGlyph,
     x_offset: f64,
     y_offset: f64,
 ) -> GlyphMetrics {
@@ -374,25 +372,24 @@ impl FontContext {
         &self,
         pixels: &mut Vec<u8>,
         width: usize,
         height: usize,
         render_mode: FontRenderMode,
         color: ColorU,
     ) {
         // Then convert back to gamma corrected values.
-        let color_lut = ColorLut::new(color.r, color.g, color.b, color.a);
         match render_mode {
             FontRenderMode::Alpha => {
                 self.gamma_lut
-                    .preblend_grayscale_bgra(pixels, width, height, color_lut);
+                    .preblend_grayscale_bgra(pixels, width, height, color);
             }
             FontRenderMode::Subpixel => {
                 self.gamma_lut
-                    .preblend_bgra(pixels, width, height, color_lut);
+                    .preblend_bgra(pixels, width, height, color);
             }
             _ => {} // Again, give mono untouched since only the alpha matters.
         }
     }
 
     #[allow(dead_code)]
     fn print_glyph_data(&mut self, data: &[u8], width: usize, height: usize) {
         // Rust doesn't have step_by support on stable :(
@@ -425,24 +422,35 @@ impl FontContext {
         match font.render_mode {
             FontRenderMode::Mono | FontRenderMode::Bitmap => {
                 // In mono/bitmap modes the color of the font is irrelevant.
                 font.color = ColorU::new(255, 255, 255, 255);
                 // Subpixel positioning is disabled in mono and bitmap modes.
                 font.subpx_dir = SubpixelDirection::None;
             }
             FontRenderMode::Alpha => {
-                font.color = if font.platform_options.unwrap_or_default().font_smoothing &&
-                                should_use_white_on_black(font.color) {
+                font.color = if font.platform_options.unwrap_or_default().font_smoothing {
+                    // Only the G channel is used to index grayscale tables,
+                    // so use R and B to preserve light/dark determination.
+                    let ColorU { g, a, .. } = font.color.luminance_color().quantized_ceil();
+                    let rb = if should_use_white_on_black(font.color) { 255 } else { 0 };
+                    ColorU::new(rb, g, rb, a)
+                } else {
                     ColorU::new(255, 255, 255, 255)
-                } else {
-                    ColorU::new(0, 0, 0, 255)
                 };
             }
-            FontRenderMode::Subpixel => {}
+            FontRenderMode::Subpixel => {
+                // Quantization may change the light/dark determination, so quantize in the
+                // direction necessary to respect the threshold.
+                font.color = if should_use_white_on_black(font.color) {
+                    font.color.quantized_ceil()
+                } else {
+                    font.color.quantized_floor()
+                };
+            }
         }
     }
 
     pub fn rasterize_glyph(
         &mut self,
         font: &FontInstance,
         key: &GlyphKey,
     ) -> Option<RasterizedGlyph> {
@@ -453,82 +461,95 @@ impl FontContext {
 
         let glyph = key.index as CGGlyph;
         let (x_offset, y_offset) = font.get_subpx_offset(key);
         let metrics = get_glyph_metrics(&ct_font, glyph, x_offset, y_offset);
         if metrics.rasterized_width == 0 || metrics.rasterized_height == 0 {
             return None;
         }
 
+        // The result of this function, in all render modes, is going to be a
+        // BGRA surface with white text on transparency using premultiplied
+        // alpha. For subpixel text, the RGB values will be the mask value for
+        // the individual components. For bitmap glyphs, the RGB values will be
+        // the (premultiplied) color of the pixel. For Alpha and Mono, each
+        // pixel will have R==G==B==A at the end of this function.
+        // We access the color channels in little-endian order.
+        // The CGContext will create and own our pixel buffer.
+        // In the non-Bitmap cases, we will ask CoreGraphics to draw text onto
+        // an opaque background. In order to hit the most efficient path in CG
+        // for this, we will tell CG that the CGContext is opaque, by passing
+        // an "[...]AlphaNone[...]" context flag. This creates a slight
+        // contradiction to the way we use the buffer after CG is done with it,
+        // because we will convert it into text-on-transparency. But that's ok;
+        // we still get four bytes per pixel and CG won't mess with the alpha
+        // channel after we've stopped calling CG functions. We just need to
+        // make sure that we don't look at the alpha values of the pixels that
+        // we get from CG, and compute our own alpha value only from RGB.
+        // Note that CG requires kCGBitmapByteOrder32Little in order to do
+        // subpixel AA at all (which we need it to do in both Subpixel and
+        // Alpha+smoothing mode). But little-endian is what we want anyway, so
+        // this works out nicely.
         let context_flags = match font.render_mode {
-            FontRenderMode::Subpixel => {
+            FontRenderMode::Subpixel | FontRenderMode::Alpha |
+            FontRenderMode::Mono => {
                 kCGBitmapByteOrder32Little | kCGImageAlphaNoneSkipFirst
             }
-            FontRenderMode::Alpha | FontRenderMode::Mono => {
-                kCGImageAlphaPremultipliedLast
-            }
             FontRenderMode::Bitmap => {
                 kCGBitmapByteOrder32Little | kCGImageAlphaPremultipliedFirst
             }
         };
 
         let mut cg_context = CGContext::create_bitmap_context(
             None,
             metrics.rasterized_width as usize,
             metrics.rasterized_height as usize,
             8,
             metrics.rasterized_width as usize * 4,
             &CGColorSpace::create_device_rgb(),
             context_flags,
         );
 
-
-        // Tested on mac OS Sierra, 10.12
-        // For Mono + alpha, the only values that matter are the alpha values.
-        // For subpixel, we need each individual rgb channel.
-        // CG has two individual glyphs for subpixel AA (pre-10.11, this is not true):
-        // 1) black text on white opaque background
-        // 2) white text on black opaque background
-        // Gecko does (1). Note, the BG must be opaque for subpixel AA to work.
-        // See https://bugzilla.mozilla.org/show_bug.cgi?id=1230366#c35
-        //
-        // For grayscale / mono, CG still produces two glyphs, but it doesn't matter
-        // 1) black text on transparent white - only alpha values filled
-        // 2) white text on transparent black - channels == alpha
+        // If the font render mode is Alpha, we support two different ways to
+        // compute the grayscale mask, depending on the value of the platform
+        // options' font_smoothing flag:
+        //  - Alpha + smoothing:
+        //    We will recover a grayscale mask from a subpixel rasterization, in
+        //    such a way that the result looks as close to subpixel text
+        //    blending as we can make it. This involves gamma correction,
+        //    luminance computations and preblending based on the text color,
+        //    just like with the Subpixel render mode.
+        //  - Alpha without smoothing:
+        //    We will ask CoreGraphics to rasterize the text with font_smoothing
+        //    off. This will cause it to use grayscale anti-aliasing with
+        //    comparatively thin text. This method of text rendering is not
+        //    gamma-aware.
         //
-        // If we draw grayscale/mono on an opaque background
-        // the RGB channels are the alpha values from transparent backgrounds
-        // with the alpha set as opaque.
-        // At the end of all this, WR expects individual RGB channels and ignores alpha
-        // for subpixel AA.
-        // For alpha/mono, WR ignores all channels other than alpha.
-        // Also note that WR expects text to be white text on black bg, so invert
-        // when we draw the glyphs as black on white.
-        //
-        // Unless platform_options.font_smoothing is false, the grayscale AA'd version
-        // of the glyph will actually be rasterized with subpixel AA. The color channels
-        // will be then converted to luminance in gamma_correct_pixels to produce the
-        // final grayscale AA. This ensures that the dilation of the glyph from grayscale
-        // AA more closely resembles the dilation from subpixel AA in the general case.
+        // For subpixel rasterization, starting with macOS 10.11, CoreGraphics
+        // uses different glyph dilation based on the text color. Bright text
+        // uses less font dilation (looks thinner) than dark text.
+        // As a consequence, when we ask CG to rasterize with subpixel AA, we
+        // will render white-on-black text as opposed to black-on-white text if
+        // the text color brightness exceeds a certain threshold. This applies
+        // to both the Subpixel and the "Alpha + smoothing" modes, but not to
+        // the "Alpha without smoothing" and Mono modes.
         let use_white_on_black = should_use_white_on_black(font.color);
         let use_font_smoothing = font.platform_options.unwrap_or_default().font_smoothing;
-        let (antialias, smooth, text_color, bg_color, bg_alpha, invert) = match font.render_mode {
-            FontRenderMode::Subpixel => if use_white_on_black {
-                (true, true, 1.0, 0.0, 1.0, false)
-            } else {
-                (true, true, 0.0, 1.0, 1.0, true)
-            },
-            FontRenderMode::Alpha => if use_font_smoothing && use_white_on_black {
-                (true, use_font_smoothing, 1.0, 0.0, 1.0, false)
-            } else {
-                (true, use_font_smoothing, 0.0, 1.0, 1.0, true)
-            },
-            FontRenderMode::Bitmap => (true, false, 0.0, 0.0, 0.0, false),
-            FontRenderMode::Mono => (false, false, 0.0, 1.0, 1.0, true),
-        };
+        let (antialias, smooth, text_color, bg_color, bg_alpha, invert) =
+            match (font.render_mode, use_font_smoothing) {
+                (FontRenderMode::Subpixel, _) |
+                (FontRenderMode::Alpha, true) => if use_white_on_black {
+                    (true, true, 1.0, 0.0, 1.0, false)
+                } else {
+                    (true, true, 0.0, 1.0, 1.0, true)
+                },
+                (FontRenderMode::Alpha, false) => (true, false, 0.0, 1.0, 1.0, true),
+                (FontRenderMode::Mono, _) => (false, false, 0.0, 1.0, 1.0, true),
+                (FontRenderMode::Bitmap, _) => (true, false, 0.0, 0.0, 0.0, false),
+            };
 
         // These are always true in Gecko, even for non-AA fonts
         cg_context.set_allows_font_subpixel_positioning(true);
         cg_context.set_should_subpixel_position_fonts(true);
 
         // Don't quantize because we're doing it already.
         cg_context.set_allows_font_subpixel_quantization(false);
         cg_context.set_should_subpixel_quantize_fonts(false);
@@ -539,39 +560,46 @@ impl FontContext {
         cg_context.set_should_antialias(antialias);
 
         // CG Origin is bottom left, WR is top left. Need -y offset
         let rasterization_origin = CGPoint {
             x: -metrics.rasterized_left as f64 + x_offset,
             y: metrics.rasterized_descent as f64 - y_offset,
         };
 
-        // Always draw black text on a white background
-        // Fill the background
+        // Fill the background. This could be opaque white, opaque black, or
+        // transparency.
         cg_context.set_rgb_fill_color(bg_color, bg_color, bg_color, bg_alpha);
         let rect = CGRect {
             origin: CGPoint { x: 0.0, y: 0.0 },
             size: CGSize {
                 width: metrics.rasterized_width as f64,
                 height: metrics.rasterized_height as f64,
             },
         };
         cg_context.fill_rect(rect);
 
-        // Set the text color
+        // Set the text color and draw the glyphs.
         cg_context.set_rgb_fill_color(text_color, text_color, text_color, 1.0);
         cg_context.set_text_drawing_mode(CGTextDrawingMode::CGTextFill);
         ct_font.draw_glyphs(&[glyph], &[rasterization_origin], cg_context.clone());
 
         let mut rasterized_pixels = cg_context.data().to_vec();
 
         if font.render_mode != FontRenderMode::Bitmap {
-            // Convert to linear space for subpixel AA.
-            // We explicitly do not do this for grayscale AA
+            // We rendered text into an opaque surface. The code below needs to
+            // ignore the current value of each pixel's alpha channel. But it's
+            // allowed to write to the alpha channel, because we're done calling
+            // CG functions now.
+
             if smooth {
+                // Convert to linear space for subpixel AA.
+                // We explicitly do not do this for grayscale AA ("Alpha without
+                // smoothing" or Mono) because those rendering modes are not
+                // gamma-aware in CoreGraphics.
                 self.gamma_lut.coregraphics_convert_to_linear_bgra(
                     &mut rasterized_pixels,
                     metrics.rasterized_width as usize,
                     metrics.rasterized_height as usize,
                 );
             }
 
             for i in 0 .. metrics.rasterized_height {
@@ -580,26 +608,33 @@ impl FontContext {
 
                 for pixel in rasterized_pixels[current_height .. end_row].chunks_mut(4) {
                     if invert {
                         pixel[0] = 255 - pixel[0];
                         pixel[1] = 255 - pixel[1];
                         pixel[2] = 255 - pixel[2];
                     }
 
-                    pixel[3] = match font.render_mode {
-                        FontRenderMode::Subpixel => 255,
-                        _ => {
-                            pixel[0]
-                        }
-                    }; // end match
+                    // Set alpha to the value of the green channel. For grayscale
+                    // text, all three channels have the same value anyway.
+                    // For subpixel text, the mask's alpha only makes a difference
+                    // when computing the destination alpha on destination pixels
+                    // that are not completely opaque. Picking an alpha value
+                    // that's somehow based on the mask at least ensures that text
+                    // blending doesn't modify the destination alpha on pixels where
+                    // the mask is entirely zero.
+                    pixel[3] = pixel[1];
                 } // end row
             } // end height
 
             if smooth {
+                // Convert back from linear space into device space, and perform
+                // some "preblending" based on the text color.
+                // In Alpha + smoothing mode, this will also convert subpixel AA
+                // into grayscale AA.
                 self.gamma_correct_pixels(
                     &mut rasterized_pixels,
                     metrics.rasterized_width as usize,
                     metrics.rasterized_height as usize,
                     font.render_mode,
                     font.color,
                 );
             }
--- a/gfx/webrender/src/platform/windows/font.rs
+++ b/gfx/webrender/src/platform/windows/font.rs
@@ -1,16 +1,16 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{FontInstance, FontInstancePlatformOptions, FontKey, FontRenderMode};
 use api::{ColorU, GlyphDimensions, GlyphKey, SubpixelDirection};
 use dwrote;
-use gamma_lut::{Color as ColorLut, GammaLut};
+use gamma_lut::{ColorLut, GammaLut};
 use glyph_rasterizer::{GlyphFormat, RasterizedGlyph};
 use internal_types::FastHashMap;
 use std::sync::Arc;
 
 lazy_static! {
     static ref DEFAULT_FONT_DESCRIPTOR: dwrote::FontDescriptor = dwrote::FontDescriptor {
         family_name: "Arial".to_owned(),
         weight: dwrote::FontWeight::Regular,
@@ -306,20 +306,21 @@ impl FontContext {
         match font.render_mode {
             FontRenderMode::Mono | FontRenderMode::Bitmap => {
                 // In mono/bitmap modes the color of the font is irrelevant.
                 font.color = ColorU::new(255, 255, 255, 255);
                 // Subpixel positioning is disabled in mono and bitmap modes.
                 font.subpx_dir = SubpixelDirection::None;
             }
             FontRenderMode::Alpha => {
-                // In alpha mode the color of the font is irrelevant.
-                font.color = ColorU::new(255, 255, 255, 255);
+                font.color = font.color.luminance_color().quantize();
             }
-            FontRenderMode::Subpixel => {}
+            FontRenderMode::Subpixel => {
+                font.color = font.color.quantize();
+            }
         }
     }
 
     pub fn rasterize_glyph(
         &mut self,
         font: &FontInstance,
         key: &GlyphKey,
     ) -> Option<RasterizedGlyph> {
@@ -349,17 +350,17 @@ impl FontContext {
                     },
                     None => &self.gamma_lut,
                 };
 
                 lut_correction.preblend_rgb(
                     &mut pixels,
                     width,
                     height,
-                    ColorLut::new(font.color.r, font.color.g, font.color.b, font.color.a),
+                    font.color,
                 );
             }
         }
 
         let rgba_pixels = self.convert_to_rgba(&mut pixels, font.render_mode);
 
         Some(RasterizedGlyph {
             left: bounds.left as f32,
--- a/gfx/webrender/src/prim_store.rs
+++ b/gfx/webrender/src/prim_store.rs
@@ -147,54 +147,94 @@ pub struct PrimitiveMetadata {
     pub is_backface_visible: bool,
     pub screen_rect: Option<DeviceIntRect>,
 
     /// A tag used to identify this primitive outside of WebRender. This is
     /// used for returning useful data during hit testing.
     pub tag: Option<ItemTag>,
 }
 
+#[derive(Debug,Clone,Copy)]
+pub enum RectangleContent {
+    Fill(ColorF),
+    Clear,
+}
+
 #[derive(Debug)]
-#[repr(C)]
 pub struct RectanglePrimitive {
-    pub color: ColorF,
+    pub content: RectangleContent,
 }
 
 impl ToGpuBlocks for RectanglePrimitive {
     fn write_gpu_blocks(&self, mut request: GpuDataRequest) {
-        request.push(self.color.premultiplied());
+        match &self.content {
+            &RectangleContent::Fill(ref color) => {
+                request.push(color.premultiplied());
+            }
+            &RectangleContent::Clear => {
+                // Opaque black with operator dest out
+                request.push(ColorF::new(0.0, 0.0, 0.0, 1.0));
+            }
+        }
+    }
+}
+
+#[derive(Debug)]
+pub enum BrushMaskKind {
+    //Rect,         // TODO(gw): Optimization opportunity for masks with 0 border radii.
+    Corner(LayerSize),
+    RoundedRect(LayerRect, BorderRadius),
+}
+
+#[derive(Debug)]
+pub enum BrushKind {
+    Mask {
+        clip_mode: ClipMode,
+        kind: BrushMaskKind,
     }
 }
 
 #[derive(Debug)]
 pub struct BrushPrimitive {
-    pub clip_mode: ClipMode,
-    pub radius: BorderRadius,
+    pub kind: BrushKind,
 }
 
 impl ToGpuBlocks for BrushPrimitive {
     fn write_gpu_blocks(&self, mut request: GpuDataRequest) {
-        request.push([
-            self.clip_mode as u32 as f32,
-            0.0,
-            0.0,
-            0.0
-        ]);
-        request.push([
-            self.radius.top_left.width,
-            self.radius.top_left.height,
-            self.radius.top_right.width,
-            self.radius.top_right.height,
-        ]);
-        request.push([
-            self.radius.bottom_right.width,
-            self.radius.bottom_right.height,
-            self.radius.bottom_left.width,
-            self.radius.bottom_left.height,
-        ]);
+        match self.kind {
+            BrushKind::Mask { clip_mode, kind: BrushMaskKind::Corner(radius) } => {
+                request.push([
+                    radius.width,
+                    radius.height,
+                    clip_mode as u32 as f32,
+                    0.0,
+                ]);
+            }
+            BrushKind::Mask { clip_mode, kind: BrushMaskKind::RoundedRect(rect, radii) } => {
+                request.push([
+                    clip_mode as u32 as f32,
+                    0.0,
+                    0.0,
+                    0.0
+                ]);
+                request.push(rect);
+                request.push([
+                    radii.top_left.width,
+                    radii.top_left.height,
+                    radii.top_right.width,
+                    radii.top_right.height,
+                ]);
+                request.push([
+                    radii.bottom_right.width,
+                    radii.bottom_right.height,
+                    radii.bottom_left.width,
+                    radii.bottom_left.height,
+                ]);
+            }
+        }
     }
 }
 
 #[derive(Debug, Clone)]
 #[repr(C)]
 pub struct LinePrimitive {
     pub color: ColorF,
     pub wavy_line_thickness: f32,
@@ -869,18 +909,24 @@ impl PrimitiveStore {
 
             opacity: PrimitiveOpacity::translucent(),
             prim_kind: PrimitiveKind::Rectangle,
             cpu_prim_index: SpecificPrimitiveIndex(0),
         };
 
         let metadata = match container {
             PrimitiveContainer::Rectangle(rect) => {
+                let opacity = match &rect.content {
+                    &RectangleContent::Fill(ref color) => {
+                        PrimitiveOpacity::from_alpha(color.a)
+                    },
+                    &RectangleContent::Clear => PrimitiveOpacity::opaque()
+                };
                 let metadata = PrimitiveMetadata {
-                    opacity: PrimitiveOpacity::from_alpha(rect.color.a),
+                    opacity,
                     prim_kind: PrimitiveKind::Rectangle,
                     cpu_prim_index: SpecificPrimitiveIndex(self.cpu_rectangles.len()),
                     ..base_metadata
                 };
 
                 self.cpu_rectangles.push(rect);
 
                 metadata
@@ -1186,64 +1232,53 @@ impl PrimitiveStore {
             gpu_cache,
             resource_cache,
             prim_context.device_pixel_ratio,
         );
 
         // Try to create a mask if we may need to.
         let prim_clips = clip_store.get(&metadata.clip_sources);
         let is_axis_aligned = prim_context.packed_layer.transform.preserves_2d_axis_alignment();
-        let clip_task = if prim_clips.is_masking() {
+        let clip_task = if prim_context.clip_chain.is_some() || prim_clips.is_masking() {
             // Take into account the actual clip info of the primitive, and
             // mutate the current bounds accordingly.
             let mask_rect = match prim_clips.bounds.outer {
                 Some(ref outer) => match prim_screen_rect.intersection(&outer.device_rect) {
                     Some(rect) => rect,
                     None => {
                         metadata.screen_rect = None;
                         return false;
                     }
                 },
                 _ => prim_screen_rect,
             };
 
-            let extra_clip = Some(Rc::new(ClipChainNode {
-                work_item: ClipWorkItem {
-                    layer_index: prim_context.packed_layer_index,
-                    clip_sources: metadata.clip_sources.weak(),
-                    coordinate_system_id: prim_context.coordinate_system_id,
-                },
-                prev: None,
-            }));
+            let extra_clip = if prim_clips.is_masking() {
+                Some(Rc::new(ClipChainNode {
+                    work_item: ClipWorkItem {
+                        layer_index: prim_context.packed_layer_index,
+                        clip_sources: metadata.clip_sources.weak(),
+                        coordinate_system_id: prim_context.coordinate_system_id,
+                    },
+                    prev: None,
+                }))
+            } else {
+                None
+            };
 
             RenderTask::new_mask(
                 None,
                 mask_rect,
                 prim_context.clip_chain.clone(),
                 extra_clip,
                 prim_screen_rect,
                 clip_store,
                 is_axis_aligned,
                 prim_context.coordinate_system_id,
             )
-        } else if prim_context.clip_chain.is_some() {
-            // If the primitive doesn't have a specific clip, key the task ID off the
-            // stacking context. This means that two primitives which are only clipped
-            // by the stacking context stack can share clip masks during render task
-            // assignment to targets.
-            RenderTask::new_mask(
-                Some(prim_context.clip_id),
-                prim_context.clip_bounds,
-                prim_context.clip_chain.clone(),
-                None,
-                prim_screen_rect,
-                clip_store,
-                is_axis_aligned,
-                prim_context.coordinate_system_id,
-            )
         } else {
             None
         };
 
         metadata.clip_task_id = clip_task.map(|clip_task| render_tasks.add(clip_task));
         true
     }
 
--- a/gfx/webrender/src/render_backend.rs
+++ b/gfx/webrender/src/render_backend.rs
@@ -705,16 +705,17 @@ trait ToDebugString {
 #[cfg(feature = "debugger")]
 impl ToDebugString for SpecificDisplayItem {
     fn debug_string(&self) -> String {
         match *self {
             SpecificDisplayItem::Image(..) => String::from("image"),
             SpecificDisplayItem::YuvImage(..) => String::from("yuv_image"),
             SpecificDisplayItem::Text(..) => String::from("text"),
             SpecificDisplayItem::Rectangle(..) => String::from("rectangle"),
+            SpecificDisplayItem::ClearRectangle => String::from("clear_rectangle"),
             SpecificDisplayItem::Line(..) => String::from("line"),
             SpecificDisplayItem::Gradient(..) => String::from("gradient"),
             SpecificDisplayItem::RadialGradient(..) => String::from("radial_gradient"),
             SpecificDisplayItem::BoxShadow(..) => String::from("box_shadow"),
             SpecificDisplayItem::Border(..) => String::from("border"),
             SpecificDisplayItem::PushStackingContext(..) => String::from("push_stacking_context"),
             SpecificDisplayItem::Iframe(..) => String::from("iframe"),
             SpecificDisplayItem::Clip(..) => String::from("clip"),
--- a/gfx/webrender/src/render_task.rs
+++ b/gfx/webrender/src/render_task.rs
@@ -333,22 +333,18 @@ impl RenderTask {
     }
 
     pub fn new_picture(
         size: DeviceIntSize,
         prim_index: PrimitiveIndex,
         target_kind: RenderTargetKind,
         content_origin: LayerPoint,
         color: ColorF,
+        clear_mode: ClearMode,
     ) -> RenderTask {
-        let clear_mode = match target_kind {
-            RenderTargetKind::Color => ClearMode::Transparent,
-            RenderTargetKind::Alpha => ClearMode::One,
-        };
-
         RenderTask {
             cache_key: None,
             children: Vec::new(),
             location: RenderTaskLocation::Dynamic(None, size),
             kind: RenderTaskKind::Picture(PictureTask {
                 prim_index,
                 target_kind,
                 content_origin,
--- a/gfx/webrender/src/renderer.rs
+++ b/gfx/webrender/src/renderer.rs
@@ -631,16 +631,17 @@ impl SourceTextureResolver {
     }
 }
 
 #[derive(Debug, Copy, Clone, PartialEq)]
 pub enum BlendMode {
     None,
     Alpha,
     PremultipliedAlpha,
+    PremultipliedDestOut,
     Subpixel,
 }
 
 // Tracks the state of each row in the GPU cache texture.
 struct CacheRow {
     is_dirty: bool,
 }
 
@@ -1002,16 +1003,17 @@ impl BrushShader {
         renderer_errors: &mut Vec<RendererError>,
     ) where M: Into<ShaderMode> {
         match blend_mode {
             BlendMode::None => {
                 self.opaque.bind(device, projection, mode, renderer_errors)
             }
             BlendMode::Alpha |
             BlendMode::PremultipliedAlpha |
+            BlendMode::PremultipliedDestOut |
             BlendMode::Subpixel => {
                 self.alpha.bind(device, projection, mode, renderer_errors)
             }
         }
     }
 
     fn deinit(self, device: &mut Device) {
         self.opaque.deinit(device);
@@ -1183,17 +1185,18 @@ pub struct Renderer {
     // draw intermediate results to cache targets. The results
     // of these shaders are then used by the primitive shaders.
     cs_text_run: LazilyCompiledShader,
     cs_line: LazilyCompiledShader,
     cs_blur_a8: LazilyCompiledShader,
     cs_blur_rgba8: LazilyCompiledShader,
 
     // Brush shaders
-    brush_mask: LazilyCompiledShader,
+    brush_mask_corner: LazilyCompiledShader,
+    brush_mask_rounded_rect: LazilyCompiledShader,
     brush_image_rgba8: BrushShader,
     brush_image_a8: BrushShader,
 
     /// These are "cache clip shaders". These shaders are used to
     /// draw clip instances into the cached clip mask. The results
     /// of these shaders are also used by the primitive shaders.
     cs_clip_rectangle: LazilyCompiledShader,
     cs_clip_image: LazilyCompiledShader,
@@ -1375,19 +1378,27 @@ impl Renderer {
         let cs_line = try!{
             LazilyCompiledShader::new(ShaderKind::Cache(VertexArrayKind::Primitive),
                                       "ps_line",
                                       &["CACHE"],
                                       &mut device,
                                       options.precache_shaders)
         };
 
-        let brush_mask = try!{
+        let brush_mask_corner = try!{
             LazilyCompiledShader::new(ShaderKind::Brush,
-                                      "brush_mask",
+                                      "brush_mask_corner",
+                                      &[],
+                                      &mut device,
+                                      options.precache_shaders)
+        };
+
+        let brush_mask_rounded_rect = try!{
+            LazilyCompiledShader::new(ShaderKind::Brush,
+                                      "brush_mask_rounded_rect",
                                       &[],
                                       &mut device,
                                       options.precache_shaders)
         };
 
         let brush_image_a8 = try!{
             BrushShader::new("brush_image",
                              &mut device,
@@ -1798,17 +1809,18 @@ impl Renderer {
             current_frame: None,
             pending_texture_updates: Vec::new(),
             pending_gpu_cache_updates: Vec::new(),
             pending_shader_updates: Vec::new(),
             cs_text_run,
             cs_line,
             cs_blur_a8,
             cs_blur_rgba8,
-            brush_mask,
+            brush_mask_corner,
+            brush_mask_rounded_rect,
             brush_image_rgba8,
             brush_image_a8,
             cs_clip_rectangle,
             cs_clip_border,
             cs_clip_image,
             ps_rectangle,
             ps_rectangle_clip,
             ps_text_run,
@@ -2000,18 +2012,23 @@ impl Renderer {
                     );
                     debug_target.add(
                         debug_server::BatchKind::Clip,
                         "Rectangles",
                         target.clip_batcher.rectangles.len(),
                     );
                     debug_target.add(
                         debug_server::BatchKind::Cache,
-                        "Rectangle Brush",
-                        target.rect_cache_prims.len(),
+                        "Rectangle Brush (Corner)",
+                        target.brush_mask_corners.len(),
+                    );
+                    debug_target.add(
+                        debug_server::BatchKind::Cache,
+                        "Rectangle Brush (Rounded Rect)",
+                        target.brush_mask_rounded_rects.len(),
                     );
                     for (_, items) in target.clip_batcher.images.iter() {
                         debug_target.add(debug_server::BatchKind::Clip, "Image mask", items.len());
                     }
 
                     debug_pass.add(debug_target);
                 }
 
@@ -2455,16 +2472,17 @@ impl Renderer {
                 }
             }
             BatchKind::Transformable(transform_kind, batch_kind) => match batch_kind {
                 TransformBatchKind::Rectangle(needs_clipping) => {
                     debug_assert!(
                         !needs_clipping || match key.blend_mode {
                             BlendMode::Alpha |
                             BlendMode::PremultipliedAlpha |
+                            BlendMode::PremultipliedDestOut |
                             BlendMode::Subpixel => true,
                             BlendMode::None => false,
                         }
                     );
 
                     if needs_clipping {
                         self.ps_rectangle_clip.bind(
                             &mut self.device,
@@ -2797,16 +2815,17 @@ impl Renderer {
             self.gpu_profile.add_sampler(GPU_SAMPLER_TAG_TRANSPARENT);
 
             for batch in &target.alpha_batcher.batch_list.alpha_batch_list.batches {
                 if self.debug_flags.contains(DebugFlags::ALPHA_PRIM_DBG) {
                     let color = match batch.key.blend_mode {
                         BlendMode::None => ColorF::new(0.3, 0.3, 0.3, 1.0),
                         BlendMode::Alpha => ColorF::new(0.0, 0.9, 0.1, 1.0),
                         BlendMode::PremultipliedAlpha => ColorF::new(0.0, 0.3, 0.7, 1.0),
+                        BlendMode::PremultipliedDestOut => ColorF::new(0.6, 0.2, 0.0, 1.0),
                         BlendMode::Subpixel => ColorF::new(0.5, 0.0, 0.4, 1.0),
                     }.into();
                     for item_rect in &batch.item_rects {
                         self.debug.add_rect(item_rect, color);
                     }
                 }
 
                 match batch.key.kind {
@@ -2873,17 +2892,17 @@ impl Renderer {
 
                                 // When drawing the 2nd pass, we know that the VAO, textures etc
                                 // are all set up from the previous draw_instanced_batch call,
                                 // so just issue a draw call here to avoid re-uploading the
                                 // instances and re-binding textures etc.
                                 self.device
                                     .draw_indexed_triangles_instanced_u16(6, batch.instances.len() as i32);
                             }
-                            BlendMode::Alpha | BlendMode::None => {
+                            BlendMode::Alpha | BlendMode::PremultipliedDestOut | BlendMode::None => {
                                 unreachable!("bug: bad blend mode for text");
                             }
                         }
 
                         prev_blend_mode = BlendMode::None;
                         self.device.set_blend(false);
                     }
                     _ => {
@@ -2895,16 +2914,20 @@ impl Renderer {
                                 BlendMode::Alpha => {
                                     self.device.set_blend(true);
                                     self.device.set_blend_mode_alpha();
                                 }
                                 BlendMode::PremultipliedAlpha => {
                                     self.device.set_blend(true);
                                     self.device.set_blend_mode_premultiplied_alpha();
                                 }
+                                BlendMode::PremultipliedDestOut => {
+                                    self.device.set_blend(true);
+                                    self.device.set_blend_mode_premultiplied_dest_out();
+                                }
                                 BlendMode::Subpixel => {
                                     unreachable!("bug: subpx text handled earlier");
                                 }
                             }
                             prev_blend_mode = batch.key.blend_mode;
                         }
 
                         self.submit_batch(
@@ -3018,24 +3041,37 @@ impl Renderer {
                 self.draw_instanced_batch(
                     &target.horizontal_blurs,
                     VertexArrayKind::Blur,
                     &BatchTextures::no_texture(),
                 );
             }
         }
 
-        if !target.rect_cache_prims.is_empty() {
+        if !target.brush_mask_corners.is_empty() {
             self.device.set_blend(false);
 
             let _gm = self.gpu_profile.add_marker(GPU_TAG_BRUSH_MASK);
-            self.brush_mask
+            self.brush_mask_corner
                 .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
             self.draw_instanced_batch(
-                &target.rect_cache_prims,
+                &target.brush_mask_corners,
+                VertexArrayKind::Primitive,
+                &BatchTextures::no_texture(),
+            );
+        }
+
+        if !target.brush_mask_rounded_rects.is_empty() {
+            self.device.set_blend(false);
+
+            let _gm = self.gpu_profile.add_marker(GPU_TAG_BRUSH_MASK);
+            self.brush_mask_rounded_rect
+                .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
+            self.draw_instanced_batch(
+                &target.brush_mask_rounded_rects,
                 VertexArrayKind::Primitive,
                 &BatchTextures::no_texture(),
             );
         }
 
         // Draw the clip items into the tiled alpha mask.
         {
             let _gm = self.gpu_profile.add_marker(GPU_TAG_CACHE_CLIP);
@@ -3578,17 +3614,18 @@ impl Renderer {
         self.device.delete_vao(self.prim_vao);
         self.device.delete_vao(self.clip_vao);
         self.device.delete_vao(self.blur_vao);
         self.debug.deinit(&mut self.device);
         self.cs_text_run.deinit(&mut self.device);
         self.cs_line.deinit(&mut self.device);
         self.cs_blur_a8.deinit(&mut self.device);
         self.cs_blur_rgba8.deinit(&mut self.device);
-        self.brush_mask.deinit(&mut self.device);
+        self.brush_mask_rounded_rect.deinit(&mut self.device);
+        self.brush_mask_corner.deinit(&mut self.device);
         self.brush_image_rgba8.deinit(&mut self.device);
         self.brush_image_a8.deinit(&mut self.device);
         self.cs_clip_rectangle.deinit(&mut self.device);
         self.cs_clip_image.deinit(&mut self.device);
         self.cs_clip_border.deinit(&mut self.device);
         self.ps_rectangle.deinit(&mut self.device);
         self.ps_rectangle_clip.deinit(&mut self.device);
         self.ps_text_run.deinit(&mut self.device);
--- a/gfx/webrender/src/tiling.rs
+++ b/gfx/webrender/src/tiling.rs
@@ -1,30 +1,31 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{ClipAndScrollInfo, ClipId, ColorF, DeviceIntPoint, ImageKey};
+use api::{BorderRadiusKind, ClipAndScrollInfo, ClipId, ColorF, DeviceIntPoint, ImageKey};
 use api::{DeviceIntRect, DeviceIntSize, DeviceUintPoint, DeviceUintSize};
 use api::{ExternalImageType, FilterOp, FontRenderMode, ImageRendering, LayerRect};
 use api::{LayerToWorldTransform, MixBlendMode, PipelineId, PropertyBinding, TransformStyle};
 use api::{LayerVector2D, TileOffset, WorldToLayerTransform, YuvColorSpace, YuvFormat};
 use border::{BorderCornerInstance, BorderCornerSide};
 use clip::{ClipSource, ClipStore};
 use clip_scroll_tree::CoordinateSystemId;
 use device::Texture;
 use glyph_rasterizer::GlyphFormat;
 use gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle, GpuCacheUpdateList};
-use gpu_types::{BlurDirection, BlurInstance, BrushInstance, ClipMaskInstance};
+use gpu_types::{BlurDirection, BlurInstance, BrushInstance, BrushImageKind, ClipMaskInstance};
 use gpu_types::{CompositePrimitiveInstance, PrimitiveInstance, SimplePrimitiveInstance};
 use gpu_types::{BRUSH_FLAG_USES_PICTURE};
 use internal_types::{FastHashMap, SourceTexture};
 use internal_types::BatchTextures;
+use picture::PictureKind;
 use prim_store::{PrimitiveIndex, PrimitiveKind, PrimitiveMetadata, PrimitiveStore};
-use prim_store::{DeferredResolve, TextRunMode};
+use prim_store::{BrushMaskKind, BrushKind, DeferredResolve, RectangleContent, TextRunMode};
 use profiler::FrameProfileCounters;
 use render_task::{AlphaRenderItem, ClipWorkItem, MaskGeometryKind, MaskSegment};
 use render_task::{RenderTaskAddress, RenderTaskId, RenderTaskKey, RenderTaskKind};
 use render_task::{BlurTask, ClearMode, RenderTaskLocation, RenderTaskTree};
 use renderer::BlendMode;
 use renderer::ImageBufferKind;
 use resource_cache::{GlyphFetchResult, ResourceCache};
 use std::{cmp, usize, f32, i32};
@@ -58,18 +59,34 @@ impl AlphaBatchHelpers for PrimitiveStor
             PrimitiveKind::TextRun => {
                 let text_run_cpu = &self.cpu_text_runs[metadata.cpu_prim_index.0];
                 match text_run_cpu.font.render_mode {
                     FontRenderMode::Subpixel => BlendMode::Subpixel,
                     FontRenderMode::Alpha |
                     FontRenderMode::Mono |
                     FontRenderMode::Bitmap => BlendMode::PremultipliedAlpha,
                 }
-            }
-            PrimitiveKind::Rectangle |
+            },
+            PrimitiveKind::Rectangle => {
+                let rectangle_cpu = &self.cpu_rectangles[metadata.cpu_prim_index.0];
+                match rectangle_cpu.content {
+                    RectangleContent::Fill(..) => if needs_blending {
+                        BlendMode::PremultipliedAlpha
+                    } else {
+                        BlendMode::None
+                    },
+                    RectangleContent::Clear => {
+                        // TODO: If needs_blending == false, we could use BlendMode::None
+                        // to clear the rectangle, but then we'd need to draw the rectangle
+                        // with alpha == 0.0 instead of alpha == 1.0, and the RectanglePrimitive
+                        // would need to know about that.
+                        BlendMode::PremultipliedDestOut
+                    },
+                }
+            },
             PrimitiveKind::Border |
             PrimitiveKind::Image |
             PrimitiveKind::AlignedGradient |
             PrimitiveKind::AngleGradient |
             PrimitiveKind::RadialGradient |
             PrimitiveKind::Picture => if needs_blending {
                 BlendMode::PremultipliedAlpha
             } else {
@@ -254,17 +271,18 @@ impl BatchList {
 
     fn get_suitable_batch(
         &mut self,
         key: BatchKey,
         item_bounding_rect: &DeviceIntRect,
     ) -> &mut Vec<PrimitiveInstance> {
         match key.blend_mode {
             BlendMode::None => self.opaque_batch_list.get_suitable_batch(key),
-            BlendMode::Alpha | BlendMode::PremultipliedAlpha | BlendMode::Subpixel => {
+            BlendMode::Alpha | BlendMode::PremultipliedAlpha |
+            BlendMode::PremultipliedDestOut | BlendMode::Subpixel => {
                 self.alpha_batch_list
                     .get_suitable_batch(key, item_bounding_rect)
             }
         }
     }
 
     fn finalize(&mut self) {
         self.opaque_batch_list.finalize()
@@ -585,25 +603,40 @@ impl AlphaRenderItem {
                         let cache_task_id = picture.render_task_id.expect("no render task!");
                         let cache_task_address = render_tasks.get_task_address(cache_task_id);
                         let textures = BatchTextures::render_target_cache();
                         let kind = BatchKind::Brush(
                             BrushBatchKind::Image(picture.target_kind()),
                         );
                         let key = BatchKey::new(kind, blend_mode, textures);
                         let batch = batch_list.get_suitable_batch(key, item_bounding_rect);
+                        let image_kind = match picture.kind {
+                            PictureKind::TextShadow { .. } => {
+                                BrushImageKind::Simple
+                            }
+                            PictureKind::BoxShadow { radii_kind, .. } => {
+                                match radii_kind {
+                                    BorderRadiusKind::Uniform => {
+                                        BrushImageKind::Mirror
+                                    }
+                                    BorderRadiusKind::NonUniform => {
+                                        BrushImageKind::NinePatch
+                                    }
+                                }
+                            }
+                        };
                         let instance = BrushInstance {
                             picture_address: task_address,
                             prim_address: prim_cache_address,
                             layer_address: packed_layer_index.into(),
                             clip_task_address,
                             z,
                             flags: 0,
                             user_data0: cache_task_address.0 as i32,
-                            user_data1: 0,
+                            user_data1: image_kind as i32,
                         };
                         batch.push(PrimitiveInstance::from(instance));
                     }
                     PrimitiveKind::AlignedGradient => {
                         let gradient_cpu =
                             &ctx.prim_store.cpu_gradients[prim_metadata.cpu_prim_index.0];
                         let kind = BatchKind::Transformable(
                             transform_kind,
@@ -1249,33 +1282,35 @@ impl RenderTarget for ColorRenderTarget 
                 self.readbacks.push(device_rect);
             }
         }
     }
 }
 
 pub struct AlphaRenderTarget {
     pub clip_batcher: ClipBatcher,
-    pub rect_cache_prims: Vec<PrimitiveInstance>,
+    pub brush_mask_corners: Vec<PrimitiveInstance>,
+    pub brush_mask_rounded_rects: Vec<PrimitiveInstance>,
     // List of blur operations to apply for this render target.
     pub vertical_blurs: Vec<BlurInstance>,
     pub horizontal_blurs: Vec<BlurInstance>,
     pub zero_clears: Vec<RenderTaskId>,
     allocator: TextureAllocator,
 }
 
 impl RenderTarget for AlphaRenderTarget {
     fn allocate(&mut self, size: DeviceUintSize) -> Option<DeviceUintPoint> {
         self.allocator.allocate(&size)
     }
 
     fn new(size: Option<DeviceUintSize>) -> AlphaRenderTarget {
         AlphaRenderTarget {
             clip_batcher: ClipBatcher::new(),
-            rect_cache_prims: Vec::new(),
+            brush_mask_corners: Vec::new(),
+            brush_mask_rounded_rects: Vec::new(),
             vertical_blurs: Vec::new(),
             horizontal_blurs: Vec::new(),
             zero_clears: Vec::new(),
             allocator: TextureAllocator::new(size.expect("bug: alpha targets need size")),
         }
     }
 
     fn used_rect(&self) -> DeviceIntRect {
@@ -1357,17 +1392,26 @@ impl RenderTarget for AlphaRenderTarget 
                                             //           will need to be filled out!
                                             layer_address: PackedLayerIndex(0).into(),
                                             clip_task_address: RenderTaskAddress(0),
                                             z: 0,
                                             flags: BRUSH_FLAG_USES_PICTURE,
                                             user_data0: 0,
                                             user_data1: 0,
                                         };
-                                        self.rect_cache_prims.push(PrimitiveInstance::from(instance));
+                                        let brush = &ctx.prim_store.cpu_brushes[sub_metadata.cpu_prim_index.0];
+                                        let batch = match brush.kind {
+                                            BrushKind::Mask { ref kind, .. } => {
+                                                match *kind {
+                                                    BrushMaskKind::Corner(..) => &mut self.brush_mask_corners,
+                                                    BrushMaskKind::RoundedRect(..) => &mut self.brush_mask_rounded_rects,
+                                                }
+                                            }
+                                        };
+                                        batch.push(PrimitiveInstance::from(instance));
                                     }
                                     _ => {
                                         unreachable!("Unexpected sub primitive type");
                                     }
                                 }
                             }
                         }
                     }
--- a/gfx/webrender_api/src/display_item.rs
+++ b/gfx/webrender_api/src/display_item.rs
@@ -1,16 +1,16 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use {ColorF, FontInstanceKey, ImageKey, LayerPixel, LayoutPixel, LayoutPoint, LayoutRect,
      LayoutSize, LayoutTransform};
 use {GlyphOptions, LayoutVector2D, PipelineId, PropertyBinding};
-use euclid::{SideOffsets2D, TypedRect, TypedSideOffsets2D};
+use euclid::{SideOffsets2D, TypedRect};
 use std::ops::Not;
 
 // NOTE: some of these structs have an "IMPLICIT" comment.
 // This indicates that the BuiltDisplayList will have serialized
 // a list of values nearby that this item consumes. The traversal
 // iterator should handle finding these.
 
 #[derive(Clone, Copy, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]
@@ -86,16 +86,17 @@ pub type LayoutPrimitiveInfo = Primitive
 pub type LayerPrimitiveInfo = PrimitiveInfo<LayerPixel>;
 
 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
 pub enum SpecificDisplayItem {
     Clip(ClipDisplayItem),
     ScrollFrame(ScrollFrameDisplayItem),
     StickyFrame(StickyFrameDisplayItem),
     Rectangle(RectangleDisplayItem),
+    ClearRectangle,
     Line(LineDisplayItem),
     Text(TextDisplayItem),
     Image(ImageDisplayItem),
     YuvImage(YuvImageDisplayItem),
     Border(BorderDisplayItem),
     BoxShadow(BoxShadowDisplayItem),
     Gradient(GradientDisplayItem),
     RadialGradient(RadialGradientDisplayItem),
@@ -108,29 +109,57 @@ pub enum SpecificDisplayItem {
 }
 
 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
 pub struct ClipDisplayItem {
     pub id: ClipId,
     pub image_mask: Option<ImageMask>,
 }
 
+/// The minimum and maximum allowable offset for a sticky frame in a single dimension.
+#[repr(C)]
+#[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
+pub struct StickyOffsetBounds {
+    /// The minimum offset for this frame, typically a negative value, which specifies how
+    /// far in the negative direction the sticky frame can offset its contents in this
+    /// dimension.
+    pub min: f32,
+
+    /// The maximum offset for this frame, typically a positive value, which specifies how
+    /// far in the positive direction the sticky frame can offset its contents in this
+    /// dimension.
+    pub max: f32,
+}
+
+impl StickyOffsetBounds {
+    pub fn new(min: f32, max: f32) -> StickyOffsetBounds {
+        StickyOffsetBounds { min, max }
+    }
+}
+
 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
 pub struct StickyFrameDisplayItem {
     pub id: ClipId,
-    pub sticky_frame_info: StickyFrameInfo,
-}
 
-pub type StickyFrameInfo = TypedSideOffsets2D<Option<StickySideConstraint>, LayoutPoint>;
+    /// The margins that should be maintained between the edge of the parent viewport and this
+    /// sticky frame. A margin of None indicates that the sticky frame should not stick at all
+    /// to that particular edge of the viewport.
+    pub margins: SideOffsets2D<Option<f32>>,
 
-#[repr(C)]
-#[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
-pub struct StickySideConstraint {
-    pub margin: f32,
-    pub max_offset: f32,
+    /// The minimum and maximum vertical offsets for this sticky frame. Ignoring these constraints,
+    /// the sticky frame will continue to stick to the edge of the viewport as its original
+    /// position is scrolled out of view. Constraints specify a maximum and minimum offset from the
+    /// original position relative to non-sticky content within the same scrolling frame.
+    pub vertical_offset_bounds: StickyOffsetBounds,
+
+    /// The minimum and maximum horizontal offsets for this sticky frame. Ignoring these constraints,
+    /// the sticky frame will continue to stick to the edge of the viewport as its original
+    /// position is scrolled out of view. Constraints specify a maximum and minimum offset from the
+    /// original position relative to non-sticky content within the same scrolling frame.
+    pub horizontal_offset_bounds: StickyOffsetBounds,
 }
 
 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
 pub enum ScrollSensitivity {
     ScriptAndInputEvents,
     Script,
 }
 
@@ -238,16 +267,23 @@ pub enum BorderDetails {
 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
 pub struct BorderDisplayItem {
     pub widths: BorderWidths,
     pub details: BorderDetails,
 }
 
 #[repr(C)]
 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
+pub enum BorderRadiusKind {
+    Uniform,
+    NonUniform,
+}
+
+#[repr(C)]
+#[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
 pub struct BorderRadius {
     pub top_left: LayoutSize,
     pub top_right: LayoutSize,
     pub bottom_left: LayoutSize,
     pub bottom_right: LayoutSize,
 }
 
 #[repr(C)]
@@ -648,29 +684,25 @@ impl ComplexClipRegion {
 #[derive(Clone, Copy, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]
 pub enum ClipId {
     Clip(u64, PipelineId),
     ClipExternalId(u64, PipelineId),
     DynamicallyAddedNode(u64, PipelineId),
 }
 
 impl ClipId {
-    pub fn root_scroll_node(pipeline_id: PipelineId) -> ClipId {
-        ClipId::Clip(0, pipeline_id)
-    }
-
     pub fn root_reference_frame(pipeline_id: PipelineId) -> ClipId {
         ClipId::DynamicallyAddedNode(0, pipeline_id)
     }
 
     pub fn new(id: u64, pipeline_id: PipelineId) -> ClipId {
-        // We do this because it is very easy to create accidentally create something that
-        // seems like a root scroll node, but isn't one.
+        // We do this because it is very easy to accidentally create something that
+        // seems like the root node, but isn't one.
         if id == 0 {
-            return ClipId::root_scroll_node(pipeline_id);
+            return ClipId::root_reference_frame(pipeline_id);
         }
 
         ClipId::ClipExternalId(id, pipeline_id)
     }
 
     pub fn pipeline_id(&self) -> PipelineId {
         match *self {
             ClipId::Clip(_, pipeline_id) |
@@ -681,15 +713,15 @@ impl ClipId {
 
     pub fn external_id(&self) -> Option<u64> {
         match *self {
             ClipId::ClipExternalId(id, _) => Some(id),
             _ => None,
         }
     }
 
-    pub fn is_root_scroll_node(&self) -> bool {
+    pub fn is_root(&self) -> bool {
         match *self {
-            ClipId::Clip(0, _) => true,
+            ClipId::DynamicallyAddedNode(0, _) => true,
             _ => false,
         }
     }
 }
--- a/gfx/webrender_api/src/display_list.rs
+++ b/gfx/webrender_api/src/display_list.rs
@@ -1,25 +1,25 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use {BorderDetails, BorderDisplayItem, BorderWidths, BoxShadowClipMode, BoxShadowDisplayItem};
-use {ClipAndScrollInfo, ClipDisplayItem, ClipId, ColorF, ComplexClipRegion, DisplayItem};
-use {ExtendMode, FilterOp, FontInstanceKey, GlyphInstance};
-use {GlyphOptions, Gradient, GradientDisplayItem, GradientStop, IframeDisplayItem};
-use {ImageDisplayItem, ImageKey, ImageMask, ImageRendering, LayerPrimitiveInfo, LayoutPoint};
-use {LayoutPrimitiveInfo, LayoutRect, LayoutSize, LayoutTransform, LayoutVector2D};
-use {LineDisplayItem, LineOrientation, LineStyle, LocalClip, MixBlendMode, PipelineId};
-use {PropertyBinding, PushStackingContextDisplayItem, RadialGradient, RadialGradientDisplayItem};
-use {RectangleDisplayItem, ScrollFrameDisplayItem, ScrollPolicy, ScrollSensitivity};
-use {SpecificDisplayItem, StackingContext, StickyFrameDisplayItem, StickyFrameInfo};
-use {BorderRadius, TextDisplayItem, Shadow, TransformStyle, YuvColorSpace, YuvData};
+use {BorderDetails, BorderDisplayItem, BorderRadius, BorderWidths, BoxShadowClipMode};
+use {BoxShadowDisplayItem, ClipAndScrollInfo, ClipDisplayItem, ClipId, ColorF, ComplexClipRegion};
+use {DisplayItem, ExtendMode, FilterOp, FontInstanceKey, GlyphInstance, GlyphOptions, Gradient};
+use {GradientDisplayItem, GradientStop, IframeDisplayItem, ImageDisplayItem, ImageKey, ImageMask};
+use {ImageRendering, LayerPrimitiveInfo, LayoutPoint, LayoutPrimitiveInfo, LayoutRect, LayoutSize};
+use {LayoutTransform, LayoutVector2D, LineDisplayItem, LineOrientation, LineStyle, LocalClip};
+use {MixBlendMode, PipelineId, PropertyBinding, PushStackingContextDisplayItem, RadialGradient};
+use {RadialGradientDisplayItem, RectangleDisplayItem, ScrollFrameDisplayItem, ScrollPolicy};
+use {ScrollSensitivity, Shadow, SpecificDisplayItem, StackingContext, StickyFrameDisplayItem};
+use {StickyOffsetBounds, TextDisplayItem, TransformStyle, YuvColorSpace, YuvData};
 use YuvImageDisplayItem;
 use bincode;
+use euclid::SideOffsets2D;
 use serde::{Deserialize, Serialize, Serializer};
 use serde::ser::{SerializeMap, SerializeSeq};
 use std::io::{Read, Write};
 use std::{io, ptr};
 use std::marker::PhantomData;
 use std::slice;
 use time::precise_time_ns;
 
@@ -652,17 +652,17 @@ impl DisplayListBuilder {
 
         // We start at 1 here, because the root scroll id is always 0.
         const FIRST_CLIP_ID: u64 = 1;
 
         DisplayListBuilder {
             data: Vec::with_capacity(capacity),
             pipeline_id,
             clip_stack: vec![
-                ClipAndScrollInfo::simple(ClipId::root_scroll_node(pipeline_id)),
+                ClipAndScrollInfo::simple(ClipId::root_reference_frame(pipeline_id)),
             ],
             next_clip_id: FIRST_CLIP_ID,
             builder_start_time: start_time,
             content_size,
             save_state: None,
         }
     }
 
@@ -786,16 +786,20 @@ impl DisplayListBuilder {
         debug_assert_eq!(len, count);
     }
 
     pub fn push_rect(&mut self, info: &LayoutPrimitiveInfo, color: ColorF) {
         let item = SpecificDisplayItem::Rectangle(RectangleDisplayItem { color });
         self.push_item(item, info);
     }
 
+    pub fn push_clear_rect(&mut self, info: &LayoutPrimitiveInfo) {
+        self.push_item(SpecificDisplayItem::ClearRectangle, info);
+    }
+
     pub fn push_line(
         &mut self,
         info: &LayoutPrimitiveInfo,
         wavy_line_thickness: f32,
         orientation: LineOrientation,
         color: &ColorF,
         style: LineStyle,
     ) {
@@ -1252,22 +1256,26 @@ impl DisplayListBuilder {
         self.push_iter(complex_clips);
         id
     }
 
     pub fn define_sticky_frame(
         &mut self,
         id: Option<ClipId>,
         frame_rect: LayoutRect,
-        sticky_frame_info: StickyFrameInfo,
+        margins: SideOffsets2D<Option<f32>>,
+        vertical_offset_bounds: StickyOffsetBounds,
+        horizontal_offset_bounds: StickyOffsetBounds,
     ) -> ClipId {
         let id = self.generate_clip_id(id);
         let item = SpecificDisplayItem::StickyFrame(StickyFrameDisplayItem {
             id,
-            sticky_frame_info,
+            margins,
+            vertical_offset_bounds,
+            horizontal_offset_bounds,
         });
 
         let info = LayoutPrimitiveInfo::new(frame_rect);
         self.push_item(item, &info);
         id
     }
 
     pub fn push_clip_id(&mut self, id: ClipId) {