Bug 1415150 - Update webrender to commit f58ed651b47f47382b63dd2bce6e4ed10ee18c78. r=jrmuizel
authorKartikaya Gupta <kgupta@mozilla.com>
Fri, 10 Nov 2017 10:55:05 -0500
changeset 391306 c1aec254e3c6ddc1bad318aa3852440de172b1e0
parent 391305 777cfa8b3fd90a327f213b588b0887b872ebd7de
child 391307 d6857ce95bb686803ab4b9634e969cbdc586012b
push id97236
push userryanvm@gmail.com
push dateFri, 10 Nov 2017 21:14:41 +0000
treeherdermozilla-inbound@abc17e0eea77 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjrmuizel
bugs1415150
milestone58.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1415150 - Update webrender to commit f58ed651b47f47382b63dd2bce6e4ed10ee18c78. r=jrmuizel MozReview-Commit-ID: 8wGfwYlhJ1N
gfx/doc/README.webrender
gfx/webrender/Cargo.toml
gfx/webrender/doc/text-rendering.md
gfx/webrender/examples/animation.rs
gfx/webrender/res/prim_shared.glsl
gfx/webrender/res/ps_border_corner.glsl
gfx/webrender/res/ps_border_edge.glsl
gfx/webrender/res/ps_gradient.glsl
gfx/webrender/res/ps_image.glsl
gfx/webrender/res/ps_line.glsl
gfx/webrender/res/ps_rectangle.glsl
gfx/webrender/res/ps_text_run.glsl
gfx/webrender/res/ps_yuv_image.glsl
gfx/webrender/src/border.rs
gfx/webrender/src/box_shadow.rs
gfx/webrender/src/clip_scroll_node.rs
gfx/webrender/src/device.rs
gfx/webrender/src/frame.rs
gfx/webrender/src/frame_builder.rs
gfx/webrender/src/lib.rs
gfx/webrender/src/picture.rs
gfx/webrender/src/prim_store.rs
gfx/webrender/src/render_task.rs
gfx/webrender/src/renderer.rs
gfx/webrender/src/tiling.rs
gfx/webrender/src/util.rs
gfx/webrender_api/src/display_item.rs
gfx/webrender_api/src/display_list.rs
--- a/gfx/doc/README.webrender
+++ b/gfx/doc/README.webrender
@@ -170,9 +170,9 @@ 2. Sometimes autoland tip has changed en
    has an env var you can set to do this). In theory you can get the same
    result by resolving the conflict manually but Cargo.lock files are usually not
    trivial to merge by hand. If it's just the third_party/rust dir that has conflicts
    you can delete it and run |mach vendor rust| again to repopulate it.
 
 -------------------------------------------------------------------------------
 
 The version of WebRender currently in the tree is:
-34f1e8ed19a19cb950deef89ee31c1cf3d442d22
+f58ed651b47f47382b63dd2bce6e4ed10ee18c78
--- a/gfx/webrender/Cargo.toml
+++ b/gfx/webrender/Cargo.toml
@@ -12,17 +12,17 @@ freetype-lib = ["freetype/servo-freetype
 profiler = ["thread_profiler/thread_profiler"]
 debugger = ["ws", "serde_json", "serde", "serde_derive"]
 query = []
 
 [dependencies]
 app_units = "0.5.6"
 bincode = "0.9"
 byteorder = "1.0"
-euclid = "0.15.2"
+euclid = "0.15.5"
 fxhash = "0.2.1"
 gleam = "0.4.8"
 lazy_static = "0.2"
 log = "0.3"
 num-traits = "0.1.32"
 time = "0.1"
 rayon = "0.8"
 webrender_api = {path = "../webrender_api"}
--- a/gfx/webrender/doc/text-rendering.md
+++ b/gfx/webrender/doc/text-rendering.md
@@ -288,22 +288,54 @@ Notably, in the single-channel alpha cas
 So we were able to take advantage of OpenGL's `ONE_MINUS_SRC_ALPHA` blend func.
 
 In the component alpha case, we're not so lucky: Each destination color channel
 is multiplied with a different factor. We can use `ONE_MINUS_SRC_COLOR` instead,
 and output `text_color.a * mask` from our fragment shader.
 But then there's still the problem that the first summand of the computation for `result.r` uses
 `text_color.r * mask.r` and the second summand uses `text_color.a * mask.r`.
 
-There's no way around it, we have to use two passes.
-(Actually, there is a way around it, but it requires the use of `glBlendColor`, which we want to avoid because
-we'd have to use different draw calls for different text colors, or it requires "dual source blending" which is
-not supported everywhere.)
+There are multiple ways to deal with this. They are:
+
+ 1. Making use of `glBlendColor` and the `GL_CONSTANT_COLOR` blend func.
+ 2. Using a two-pass method.
+ 3. Using "dual source blending".
+
+Let's look at them in order.
+
+#### 1. Subpixel text blending in OpenGL using `glBlendColor`
+
+In this approach we return `text_color.a * mask` from the shader.
+Then we set the blend color to `text_color / text_color.a` and use `GL_CONSTANT_COLOR` as the source blendfunc.
+This results in the following blend equation:
 
-Here's how we can express the subpixel text blend function with two passes:
+```
+result.r = (text_color.r / text_color.a) * oFragColor.r + (1 - oFragColor.r) * dest.r;
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^                ^  ^^^^^^^^^^^^^^^^^
+                         |                              |      |
+                         +--gl::CONSTANT_COLOR          |      +-- gl::ONE_MINUS_SRC_COLOR
+                                                        |
+                                                        +-- gl::FUNC_ADD
+
+         = (text_color.r / text_color.a) * (text_color.a * mask.r) + (1 - (text_color.a * mask.r)) * dest.r
+         = text_color.r * mask.r + (1 - text_color.a * mask.r) * dest.r
+```
+
+At the very beginning of this document, we defined `text_color` as the *premultiplied* text color.
+So instead of actually doing the calculation `text_color.r / text_color.a` when specifying the blend color,
+we really just want to use the *unpremultiplied* text color in that place.
+That's usually the representation we start with anyway.
+
+#### 2. Two-pass subpixel blending in OpenGL
+
+The `glBlendColor` method has the disadvantage that the text color is part of the OpenGL state.
+So if we want to draw text with different colors, we have two use separate batches / draw calls
+to draw the differently-colored parts of text.
+
+Alternatively, we can use a two-pass method which avoids the need to use the `GL_CONSTANT_COLOR` blend func:
 
  - The first pass outputs `text_color.a * mask` from the fragment shader and
    uses `gl::ZERO, gl::ONE_MINUS_SRC_COLOR` as the glBlendFuncs. This achieves:
 
 ```
 oFragColor = text_color.a * mask;
 
 result_after_pass0.r = 0 * oFragColor.r + (1 - oFragColor.r) * dest.r
@@ -311,30 +343,36 @@ result_after_pass0.r = 0 * oFragColor.r 
 
 result_after_pass0.g = 0 * oFragColor.g + (1 - oFragColor.g) * dest.r
                      = (1 - text_color.a * mask.r) * dest.r
 
 ...
 ```
 
  - The second pass outputs `text_color * mask` from the fragment shader and uses
-   `gl::ONE, gl::ONE` as the glBlendFuncs. This gets us:
+   `gl::ONE, gl::ONE` as the glBlendFuncs. This results in the correct overall blend equation.
 
 ```
 oFragColor = text_color * mask;
 
 result_after_pass1.r
  = 1 * oFragColor.r + 1 * result_after_pass0.r
  = text_color.r * mask.r + result_after_pass0.r
  = text_color.r * mask.r + (1 - text_color.a * mask.r) * dest.r
 ```
 
-And analogous results for the other channels.
+#### 3. Dual source subpixel blending in OpenGL
 
-This achieves what we set out to do, so we're done here.
+The third approach is similar to the second approach, but makes use of the [`ARB_blend_func_extended`](https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_blend_func_extended.txt) extension
+in order to fold the two passes into one:
+Instead of outputting the two different colors in two separate passes, we output them from the same pass,
+as two separate fragment shader outputs.
+Those outputs can then be treated as two different sources in the blend equation.
+
+This method of text blending has not been implemented in WebRender yet.
 
 ## Subpixel Text Rendering to Transparent Destinations with a Background Color Hint
 
 ### Motivation
 
 As we've seen in the previous section, subpixel text drawing has the limitation that it only works on opaque destinations.
 
 In other words, if you use the `subpixeltextblend` function to draw something to a transparent surface,
--- a/gfx/webrender/examples/animation.rs
+++ b/gfx/webrender/examples/animation.rs
@@ -1,82 +1,99 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+//! This example creates a 200x200 white rect and allows the user to move it
+//! around by using the arrow keys and rotate with '<'/'>'.
+//! It does this by using the animation API.
+
+//! The example also features seamless opaque/transparent split of a
+//! rounded cornered rectangle, which is done automatically during the
+//! scene building for render optimization.
+
+extern crate euclid;
 extern crate gleam;
 extern crate glutin;
 extern crate webrender;
 
 #[path = "common/boilerplate.rs"]
 mod boilerplate;
 
 use boilerplate::{Example, HandyDandyRectBuilder};
+use euclid::Radians;
 use webrender::api::*;
 
-// This example creates a 100x100 white rect and allows the user to move it
-// around by using the arrow keys. It does this by using the animation API.
-
 struct App {
+    property_key: PropertyBindingKey<LayoutTransform>,
     transform: LayoutTransform,
 }
 
 impl Example for App {
     fn render(
         &mut self,
         _api: &RenderApi,
         builder: &mut DisplayListBuilder,
         _resources: &mut ResourceUpdates,
         _layout_size: LayoutSize,
         _pipeline_id: PipelineId,
         _document_id: DocumentId,
     ) {
-        // Create a 100x100 stacking context with an animatable transform property.
-        // Note the magic "42" we use as the animation key. That is used to update
-        // the transform in the keyboard event handler code.
-        let bounds = (0, 0).to(100, 100);
-        let info = LayoutPrimitiveInfo::new(bounds);
+        // Create a 200x200 stacking context with an animated transform property.
+        let bounds = (0, 0).to(200, 200);
+        let complex_clip = ComplexClipRegion {
+            rect: bounds,
+            radii: BorderRadius::uniform(50.0),
+            mode: ClipMode::Clip,
+        };
+        let info = LayoutPrimitiveInfo {
+            local_clip: LocalClip::RoundedRect(bounds, complex_clip),
+            .. LayoutPrimitiveInfo::new(bounds)
+        };
 
         builder.push_stacking_context(
             &info,
             ScrollPolicy::Scrollable,
-            Some(PropertyBinding::Binding(PropertyBindingKey::new(42))),
+            Some(PropertyBinding::Binding(self.property_key)),
             TransformStyle::Flat,
             None,
             MixBlendMode::Normal,
             Vec::new(),
         );
 
         // Fill it with a white rect
         builder.push_rect(&info, ColorF::new(1.0, 1.0, 1.0, 1.0));
 
         builder.pop_stacking_context();
     }
 
     fn on_event(&mut self, event: glutin::Event, api: &RenderApi, document_id: DocumentId) -> bool {
         match event {
             glutin::Event::KeyboardInput(glutin::ElementState::Pressed, _, Some(key)) => {
-                let offset = match key {
-                    glutin::VirtualKeyCode::Down => (0.0, 10.0),
-                    glutin::VirtualKeyCode::Up => (0.0, -10.0),
-                    glutin::VirtualKeyCode::Right => (10.0, 0.0),
-                    glutin::VirtualKeyCode::Left => (-10.0, 0.0),
+                let (offset_x, offset_y, angle) = match key {
+                    glutin::VirtualKeyCode::Down => (0.0, 10.0, 0.0),
+                    glutin::VirtualKeyCode::Up => (0.0, -10.0, 0.0),
+                    glutin::VirtualKeyCode::Right => (10.0, 0.0, 0.0),
+                    glutin::VirtualKeyCode::Left => (-10.0, 0.0, 0.0),
+                    glutin::VirtualKeyCode::Comma => (0.0, 0.0, 0.1),
+                    glutin::VirtualKeyCode::Period => (0.0, 0.0, -0.1),
                     _ => return false,
                 };
                 // Update the transform based on the keyboard input and push it to
                 // webrender using the generate_frame API. This will recomposite with
                 // the updated transform.
                 let new_transform = self.transform
-                    .post_translate(LayoutVector3D::new(offset.0, offset.1, 0.0));
+                    .pre_rotate(0.0, 0.0, 1.0, Radians::new(angle))
+                    .post_translate(LayoutVector3D::new(offset_x, offset_y, 0.0));
                 api.generate_frame(
                     document_id,
                     Some(DynamicProperties {
                         transforms: vec![
                             PropertyValue {
-                                key: PropertyBindingKey::new(42),
+                                key: self.property_key,
                                 value: new_transform,
                             },
                         ],
                         floats: vec![],
                     }),
                 );
                 self.transform = new_transform;
             }
@@ -84,12 +101,13 @@ impl Example for App {
         }
 
         false
     }
 }
 
 fn main() {
     let mut app = App {
-        transform: LayoutTransform::identity(),
+        property_key: PropertyBindingKey::new(42), // arbitrary magic number
+        transform: LayoutTransform::create_translation(0.0, 0.0, 0.0),
     };
     boilerplate::main_wrapper(&mut app, None);
 }
--- a/gfx/webrender/res/prim_shared.glsl
+++ b/gfx/webrender/res/prim_shared.glsl
@@ -629,27 +629,30 @@ vec2 intersect_lines(vec2 p0, vec2 p1, v
     float nx = s0 * d1.x - d0.x * s1;
     float ny = s0 * d1.y - d0.y * s1;
 
     return vec2(nx / d, ny / d);
 }
 
 TransformVertexInfo write_transform_vertex(RectWithSize instance_rect,
                                            RectWithSize local_clip_rect,
+                                           vec4 clip_edge_mask,
                                            float z,
                                            Layer layer,
-                                           AlphaBatchTask task,
-                                           RectWithSize snap_rect) {
+                                           AlphaBatchTask task) {
     RectWithEndpoint local_rect = to_rect_with_endpoint(instance_rect);
+    RectWithSize clip_rect;
+    clip_rect.p0 = clamp_rect(local_clip_rect.p0, layer.local_clip_rect);
+    clip_rect.size = clamp_rect(local_clip_rect.p0 + local_clip_rect.size, layer.local_clip_rect) - clip_rect.p0;
 
     vec2 current_local_pos, prev_local_pos, next_local_pos;
 
     // Clamp to the two local clip rects.
-    local_rect.p0 = clamp_rect(clamp_rect(local_rect.p0, local_clip_rect), layer.local_clip_rect);
-    local_rect.p1 = clamp_rect(clamp_rect(local_rect.p1, local_clip_rect), layer.local_clip_rect);
+    local_rect.p0 = clamp_rect(local_rect.p0, clip_rect);
+    local_rect.p1 = clamp_rect(local_rect.p1, clip_rect);
 
     // Select the current vertex and the previous/next vertices,
     // based on the vertex ID that is known based on the instance rect.
     switch (gl_VertexID) {
         case 0:
             current_local_pos = vec2(local_rect.p0.x, local_rect.p0.y);
             next_local_pos = vec2(local_rect.p0.x, local_rect.p1.y);
             prev_local_pos = vec2(local_rect.p1.x, local_rect.p0.y);
@@ -702,23 +705,39 @@ TransformVertexInfo write_transform_vert
 
     vec4 layer_pos = get_layer_pos(device_pos / uDevicePixelRatio, layer);
 
     // Apply offsets for the render task to get correct screen location.
     vec2 final_pos = device_pos - //Note: `snap_rect` is not used
                      task.screen_space_origin +
                      task.render_target_origin;
 
+
     gl_Position = uTransform * vec4(final_pos, z, 1.0);
 
-    vLocalBounds = vec4(local_rect.p0, local_rect.p1);
+    vLocalBounds = mix(
+        vec4(clip_rect.p0, clip_rect.p0 + clip_rect.size),
+        vec4(local_rect.p0, local_rect.p1),
+        clip_edge_mask
+    );
 
     return TransformVertexInfo(layer_pos.xyw, device_pos);
 }
 
+TransformVertexInfo write_transform_vertex_primitive(Primitive prim) {
+    return write_transform_vertex(
+        prim.local_rect,
+        prim.local_clip_rect,
+        vec4(0.0),
+        prim.z,
+        prim.layer,
+        prim.task
+    );
+}
+
 #endif //WR_FEATURE_TRANSFORM
 
 struct GlyphResource {
     vec4 uv_rect;
     float layer;
     vec2 offset;
     float scale;
 };
@@ -740,21 +759,23 @@ ImageResource fetch_image_resource(int a
 
 ImageResource fetch_image_resource_direct(ivec2 address) {
     vec4 data[2] = fetch_from_resource_cache_2_direct(address);
     return ImageResource(data[0], data[1].x);
 }
 
 struct Rectangle {
     vec4 color;
+    vec4 edge_aa_segment_mask;
 };
 
 Rectangle fetch_rectangle(int address) {
-    vec4 data = fetch_from_resource_cache_1(address);
-    return Rectangle(data);
+    vec4 data[2] = fetch_from_resource_cache_2(address);
+    vec4 mask = vec4((int(data[1].x) & ivec4(1,2,4,8)) != ivec4(0));
+    return Rectangle(data[0], mask);
 }
 
 struct TextRun {
     vec4 color;
     vec4 bg_color;
     vec2 offset;
     int subpx_dir;
 };
--- a/gfx/webrender/res/ps_border_corner.glsl
+++ b/gfx/webrender/res/ps_border_corner.glsl
@@ -299,20 +299,20 @@ void main(void) {
 
     RectWithSize segment_rect;
     segment_rect.p0 = p0;
     segment_rect.size = p1 - p0;
 
 #ifdef WR_FEATURE_TRANSFORM
     TransformVertexInfo vi = write_transform_vertex(segment_rect,
                                                     prim.local_clip_rect,
+                                                    vec4(1.0),
                                                     prim.z,
                                                     prim.layer,
-                                                    prim.task,
-                                                    prim.local_rect);
+                                                    prim.task);
 #else
     VertexInfo vi = write_vertex(segment_rect,
                                  prim.local_clip_rect,
                                  prim.z,
                                  prim.layer,
                                  prim.task,
                                  prim.local_rect);
 #endif
--- a/gfx/webrender/res/ps_border_edge.glsl
+++ b/gfx/webrender/res/ps_border_edge.glsl
@@ -217,20 +217,20 @@ void main(void) {
 
     write_alpha_select(style);
     write_color0(color, style, color_flip);
     write_color1(color, style, color_flip);
 
 #ifdef WR_FEATURE_TRANSFORM
     TransformVertexInfo vi = write_transform_vertex(segment_rect,
                                                     prim.local_clip_rect,
+                                                    vec4(1.0),
                                                     prim.z,
                                                     prim.layer,
-                                                    prim.task,
-                                                    prim.local_rect);
+                                                    prim.task);
 #else
     VertexInfo vi = write_vertex(segment_rect,
                                  prim.local_clip_rect,
                                  prim.z,
                                  prim.layer,
                                  prim.task,
                                  prim.local_rect);
 #endif
--- a/gfx/webrender/res/ps_gradient.glsl
+++ b/gfx/webrender/res/ps_gradient.glsl
@@ -68,20 +68,20 @@ void main(void) {
         vec2 adjusted_offset = (g01_y_clamped - g01_y.xx) / (g01_y.y - g01_y.x);
         adjusted_color_g0 = mix(g0.color, g1.color, adjusted_offset.x);
         adjusted_color_g1 = mix(g0.color, g1.color, adjusted_offset.y);
     }
 
 #ifdef WR_FEATURE_TRANSFORM
     TransformVertexInfo vi = write_transform_vertex(segment_rect,
                                                     prim.local_clip_rect,
+                                                    vec4(1.0),
                                                     prim.z,
                                                     prim.layer,
-                                                    prim.task,
-                                                    prim.local_rect);
+                                                    prim.task);
     vLocalPos = vi.local_pos;
     vec2 f = (vi.local_pos.xy - prim.local_rect.p0) / prim.local_rect.size;
 #else
     VertexInfo vi = write_vertex(segment_rect,
                                  prim.local_clip_rect,
                                  prim.z,
                                  prim.layer,
                                  prim.task,
--- a/gfx/webrender/res/ps_image.glsl
+++ b/gfx/webrender/res/ps_image.glsl
@@ -23,22 +23,17 @@ flat varying vec2 vStretchSize;
 
 #ifdef WR_VERTEX_SHADER
 void main(void) {
     Primitive prim = load_primitive();
     Image image = fetch_image(prim.specific_prim_address);
     ImageResource res = fetch_image_resource(prim.user_data0);
 
 #ifdef WR_FEATURE_TRANSFORM
-    TransformVertexInfo vi = write_transform_vertex(prim.local_rect,
-                                                    prim.local_clip_rect,
-                                                    prim.z,
-                                                    prim.layer,
-                                                    prim.task,
-                                                    prim.local_rect);
+    TransformVertexInfo vi = write_transform_vertex_primitive(prim);
     vLocalPos = vi.local_pos;
     vLocalRect = vec4(prim.local_rect.p0, prim.local_rect.p0 + prim.local_rect.size);
 #else
     VertexInfo vi = write_vertex(prim.local_rect,
                                  prim.local_clip_rect,
                                  prim.z,
                                  prim.layer,
                                  prim.task,
--- a/gfx/webrender/res/ps_line.glsl
+++ b/gfx/webrender/res/ps_line.glsl
@@ -112,22 +112,17 @@ void main(void) {
                     prim.local_rect.p0 + prim.local_rect.size,
                     aPosition.xy);
 
     gl_Position = uTransform * vec4(device_pos, 0.0, 1.0);
 #else
     vColor = line.color;
 
     #ifdef WR_FEATURE_TRANSFORM
-        TransformVertexInfo vi = write_transform_vertex(prim.local_rect,
-                                                        prim.local_clip_rect,
-                                                        prim.z,
-                                                        prim.layer,
-                                                        prim.task,
-                                                        prim.local_rect);
+        TransformVertexInfo vi = write_transform_vertex_primitive(prim);
     #else
         VertexInfo vi = write_vertex(prim.local_rect,
                                      prim.local_clip_rect,
                                      prim.z,
                                      prim.layer,
                                      prim.task,
                                      prim.local_rect);
     #endif
--- a/gfx/webrender/res/ps_rectangle.glsl
+++ b/gfx/webrender/res/ps_rectangle.glsl
@@ -13,20 +13,20 @@ varying vec3 vLocalPos;
 #ifdef WR_VERTEX_SHADER
 void main(void) {
     Primitive prim = load_primitive();
     Rectangle rect = fetch_rectangle(prim.specific_prim_address);
     vColor = rect.color;
 #ifdef WR_FEATURE_TRANSFORM
     TransformVertexInfo vi = write_transform_vertex(prim.local_rect,
                                                     prim.local_clip_rect,
+                                                    rect.edge_aa_segment_mask,
                                                     prim.z,
                                                     prim.layer,
-                                                    prim.task,
-                                                    prim.local_rect);
+                                                    prim.task);
     vLocalPos = vi.local_pos;
 #else
     VertexInfo vi = write_vertex(prim.local_rect,
                                  prim.local_clip_rect,
                                  prim.z,
                                  prim.layer,
                                  prim.task,
                                  prim.local_rect);
--- a/gfx/webrender/res/ps_text_run.glsl
+++ b/gfx/webrender/res/ps_text_run.glsl
@@ -9,24 +9,24 @@ varying vec3 vUv;
 flat varying vec4 vUvBorder;
 
 #ifdef WR_FEATURE_TRANSFORM
 varying vec3 vLocalPos;
 #endif
 
 #ifdef WR_VERTEX_SHADER
 
-#define MODE_ALPHA          0
-#define MODE_SUBPX_OPAQUE   1
-#define MODE_SUBPX_PASS0    2
-#define MODE_SUBPX_PASS1    3
-#define MODE_SUBPX_BG_PASS0 4
-#define MODE_SUBPX_BG_PASS1 5
-#define MODE_SUBPX_BG_PASS2 6
-#define MODE_COLOR_BITMAP   7
+#define MODE_ALPHA              0
+#define MODE_SUBPX_CONST_COLOR  1
+#define MODE_SUBPX_PASS0        2
+#define MODE_SUBPX_PASS1        3
+#define MODE_SUBPX_BG_PASS0     4
+#define MODE_SUBPX_BG_PASS1     5
+#define MODE_SUBPX_BG_PASS2     6
+#define MODE_COLOR_BITMAP       7
 
 void main(void) {
     Primitive prim = load_primitive();
     TextRun text = fetch_text_run(prim.specific_prim_address);
 
     int glyph_index = prim.user_data0;
     int resource_address = prim.user_data1;
 
@@ -40,20 +40,20 @@ void main(void) {
                      vec2(res.offset.x, -res.offset.y) / uDevicePixelRatio;
 
     RectWithSize local_rect = RectWithSize(local_pos,
                                            (res.uv_rect.zw - res.uv_rect.xy) * res.scale / uDevicePixelRatio);
 
 #ifdef WR_FEATURE_TRANSFORM
     TransformVertexInfo vi = write_transform_vertex(local_rect,
                                                     prim.local_clip_rect,
+                                                    vec4(0.0),
                                                     prim.z,
                                                     prim.layer,
-                                                    prim.task,
-                                                    local_rect);
+                                                    prim.task);
     vLocalPos = vi.local_pos;
     vec2 f = (vi.local_pos.xy / vi.local_pos.z - local_rect.p0) / local_rect.size;
 #else
     VertexInfo vi = write_vertex(local_rect,
                                  prim.local_clip_rect,
                                  prim.z,
                                  prim.layer,
                                  prim.task,
@@ -67,26 +67,22 @@ void main(void) {
     vColor = vec4(text.color.a) * text.bg_color;
 #else
     switch (uMode) {
         case MODE_ALPHA:
         case MODE_SUBPX_PASS1:
         case MODE_SUBPX_BG_PASS2:
             vColor = text.color;
             break;
+        case MODE_SUBPX_CONST_COLOR:
         case MODE_SUBPX_PASS0:
         case MODE_SUBPX_BG_PASS0:
         case MODE_COLOR_BITMAP:
             vColor = vec4(text.color.a);
             break;
-        case MODE_SUBPX_OPAQUE:
-            // The text foreground color is handled by the constant
-            // color blend mode.
-            vColor = vec4(1.0);
-            break;
         case MODE_SUBPX_BG_PASS1:
             // This should never be reached.
             break;
     }
 #endif
 
     vec2 texture_size = vec2(textureSize(sColor0, 0));
     vec2 st0 = res.uv_rect.xy / texture_size;
--- a/gfx/webrender/res/ps_yuv_image.glsl
+++ b/gfx/webrender/res/ps_yuv_image.glsl
@@ -32,22 +32,17 @@ struct YuvImage {
 YuvImage fetch_yuv_image(int address) {
     vec4 data = fetch_from_resource_cache_1(address);
     return YuvImage(data.xy);
 }
 
 void main(void) {
     Primitive prim = load_primitive();
 #ifdef WR_FEATURE_TRANSFORM
-    TransformVertexInfo vi = write_transform_vertex(prim.local_rect,
-                                                    prim.local_clip_rect,
-                                                    prim.z,
-                                                    prim.layer,
-                                                    prim.task,
-                                                    prim.local_rect);
+    TransformVertexInfo vi = write_transform_vertex_primitive(prim);
     vLocalPos = vi.local_pos;
     vLocalRect = vec4(prim.local_rect.p0, prim.local_rect.p0 + prim.local_rect.size);
 #else
     VertexInfo vi = write_vertex(prim.local_rect,
                                  prim.local_clip_rect,
                                  prim.z,
                                  prim.layer,
                                  prim.task,
--- a/gfx/webrender/src/border.rs
+++ b/gfx/webrender/src/border.rs
@@ -1,13 +1,14 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{BorderSide, BorderStyle, BorderWidths, ClipAndScrollInfo, ColorF, LayerPoint, LayerRect};
+use api::{BorderSide, BorderStyle, BorderWidths, ClipAndScrollInfo, ColorF};
+use api::{EdgeAaSegmentMask, LayerPoint, LayerRect};
 use api::{LayerPrimitiveInfo, LayerSize, NormalBorder, RepeatMode};
 use clip::ClipSource;
 use ellipse::Ellipse;
 use frame_builder::FrameBuilder;
 use gpu_cache::GpuDataRequest;
 use prim_store::{BorderPrimitiveCpu, RectangleContent, PrimitiveContainer, TexelRect};
 use tiling::PrimitiveFlags;
 use util::{lerp, pack_as_float};
@@ -372,64 +373,65 @@ impl FrameBuilder {
 
         let has_no_curve = radius.is_zero();
 
         if has_no_curve && all_corners_simple && all_edges_simple {
             let p0 = info.rect.origin;
             let p1 = info.rect.bottom_right();
             let rect_width = info.rect.size.width;
             let rect_height = info.rect.size.height;
+            let mut info = info.clone();
 
             // Add a solid rectangle for each visible edge/corner combination.
             if top_edge == BorderEdgeKind::Solid {
-                let mut info = info.clone();
                 info.rect = LayerRect::new(p0, LayerSize::new(rect_width, top_len));
+                info.edge_aa_segment_mask = EdgeAaSegmentMask::BOTTOM;
                 self.add_solid_rectangle(
                     clip_and_scroll,
                     &info,
-                    &RectangleContent::Fill(border.top.color),
+                    RectangleContent::Fill(border.top.color),
                     PrimitiveFlags::None,
                 );
             }
             if left_edge == BorderEdgeKind::Solid {
-                let mut info = info.clone();
                 info.rect = LayerRect::new(
                     LayerPoint::new(p0.x, p0.y + top_len),
                     LayerSize::new(left_len, rect_height - top_len - bottom_len),
                 );
+                info.edge_aa_segment_mask = EdgeAaSegmentMask::RIGHT;
                 self.add_solid_rectangle(
                     clip_and_scroll,
                     &info,
-                    &RectangleContent::Fill(border.left.color),
+                    RectangleContent::Fill(border.left.color),
                     PrimitiveFlags::None,
                 );
             }
             if right_edge == BorderEdgeKind::Solid {
-                let mut info = info.clone();
                 info.rect = LayerRect::new(
                     LayerPoint::new(p1.x - right_len, p0.y + top_len),
                     LayerSize::new(right_len, rect_height - top_len - bottom_len),
                 );
+                info.edge_aa_segment_mask = EdgeAaSegmentMask::LEFT;
                 self.add_solid_rectangle(
                     clip_and_scroll,
                     &info,
-                    &RectangleContent::Fill(border.right.color),
+                    RectangleContent::Fill(border.right.color),
                     PrimitiveFlags::None,
                 );
             }
             if bottom_edge == BorderEdgeKind::Solid {
-                let mut info = info.clone();
                 info.rect = LayerRect::new(
                     LayerPoint::new(p0.x, p1.y - bottom_len),
                     LayerSize::new(rect_width, bottom_len),
                 );
+                info.edge_aa_segment_mask = EdgeAaSegmentMask::TOP;
                 self.add_solid_rectangle(
                     clip_and_scroll,
                     &info,
-                    &RectangleContent::Fill(border.bottom.color),
+                    RectangleContent::Fill(border.bottom.color),
                     PrimitiveFlags::None,
                 );
             }
         } else {
             // Create clip masks for border corners, if required.
             let mut extra_clips = Vec::new();
             let mut corner_instances = [BorderCornerInstance::Single; 4];
 
@@ -550,17 +552,17 @@ impl BorderCornerClipSource {
                 // Round that up to the nearest integer, so that the dash length
                 // doesn't exceed the ratio above. Add one extra dash to cover
                 // the last half-dash of the arc.
                 (ellipse, 1 + desired_count.ceil() as usize)
             }
             BorderCornerClipKind::Dot => {
                 // The centers of dots follow an ellipse along the middle of the
                 // border radius.
-                let inner_radius = corner_radius - widths * 0.5;
+                let inner_radius = (corner_radius - widths * 0.5).abs();
                 let ellipse = Ellipse::new(inner_radius);
 
                 // Allocate a "worst case" number of dot clips. This can be
                 // calculated by taking the minimum edge radius, since that
                 // will result in the maximum number of dots along the path.
                 let min_diameter = widths.width.min(widths.height);
 
                 // Get the number of circles (assuming spacing of one diameter
--- a/gfx/webrender/src/box_shadow.rs
+++ b/gfx/webrender/src/box_shadow.rs
@@ -1,21 +1,22 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BorderRadiusKind, ColorF, LayerPoint, LayerRect, LayerSize, LayerVector2D};
 use api::{BorderRadius, BoxShadowClipMode, LayoutSize, LayerPrimitiveInfo};
-use api::{ClipMode, ComplexClipRegion, LocalClip, ClipAndScrollInfo};
+use api::{ClipMode, ComplexClipRegion, EdgeAaSegmentMask, LocalClip, ClipAndScrollInfo};
 use clip::ClipSource;
 use frame_builder::FrameBuilder;
 use prim_store::{PrimitiveContainer, RectangleContent, RectanglePrimitive};
 use prim_store::{BrushMaskKind, BrushKind, BrushPrimitive};
 use picture::PicturePrimitive;
 use util::RectHelpers;
+use render_task::MAX_BLUR_STD_DEVIATION;
 
 // The blur shader samples BLUR_SAMPLE_SCALE * blur_radius surrounding texels.
 pub const BLUR_SAMPLE_SCALE: f32 = 3.0;
 
 // The amount of padding added to the border corner drawn in the box shadow
 // mask. This ensures that we get a few pixels past the corner that can be
 // blurred without being affected by the border radius.
 pub const MASK_CORNER_PADDING: f32 = 4.0;
@@ -99,16 +100,17 @@ impl FrameBuilder {
             };
 
             self.add_primitive(
                 clip_and_scroll,
                 &fast_info,
                 clips,
                 PrimitiveContainer::Rectangle(RectanglePrimitive {
                     content: RectangleContent::Fill(*color),
+                    edge_aa_segment_mask: EdgeAaSegmentMask::empty(),
                 }),
             );
         } else {
             let blur_offset = BLUR_SAMPLE_SCALE * blur_radius;
             let mut extra_clips = vec![];
 
             match clip_mode {
                 BoxShadowClipMode::Outset => {
@@ -221,20 +223,30 @@ impl FrameBuilder {
                         prim_info.rect.size
                     );
 
                     // Define where the inset box shadow rect is, local
                     // to the brush rect above.
                     let clip_rect = brush_rect.translate(box_offset)
                                               .inflate(spread_amount, spread_amount);
 
-                    // Ensure there is one pixel around the edges, so that there
+                    // Ensure there are more than one pixel around the edges, so that there
                     // is non-zero data to blur, in the case of an inset shadow
                     // with zero spread and zero offset.
-                    let brush_rect = brush_rect.inflate(1.0, 1.0);
+                    // The size of inflation edge is determined by std deviation because large
+                    // std deviation blur would be downscaled first. Thus, we need more thick
+                    // edge to prevent edge get blurred after downscled.
+                    let mut adjusted_blur_std_deviation = blur_radius * 0.5;
+                    let mut inflate_size = 1.0;
+                    while adjusted_blur_std_deviation > MAX_BLUR_STD_DEVIATION {
+                        adjusted_blur_std_deviation *= 0.5;
+                        inflate_size += 1.0;
+                    }
+
+                    let brush_rect = brush_rect.inflate(inflate_size, inflate_size);
                     let brush_prim = BrushPrimitive {
                         kind: BrushKind::Mask {
                             clip_mode: brush_clip_mode,
                             kind: BrushMaskKind::RoundedRect(clip_rect, shadow_radius),
                         }
                     };
                     let brush_info = LayerPrimitiveInfo::new(brush_rect);
                     let brush_prim_index = self.create_primitive(
@@ -259,17 +271,17 @@ impl FrameBuilder {
                         clip_and_scroll
                     );
                     pic_prim.build();
 
                     // Draw the picture one pixel outside the original
                     // rect to account for the inflate above. This
                     // extra edge will be clipped by the local clip
                     // rect set below.
-                    let pic_rect = prim_info.rect.inflate(1.0, 1.0);
+                    let pic_rect = prim_info.rect.inflate(inflate_size, inflate_size);
                     let pic_info = LayerPrimitiveInfo::with_clip_rect(
                         pic_rect,
                         prim_info.rect
                     );
 
                     // Add a normal clip to ensure nothing gets drawn
                     // outside the primitive rect.
                     if !border_radius.is_zero() {
--- a/gfx/webrender/src/clip_scroll_node.rs
+++ b/gfx/webrender/src/clip_scroll_node.rs
@@ -155,17 +155,17 @@ pub struct ClipScrollNode {
 }
 
 impl ClipScrollNode {
     fn new(
         pipeline_id: PipelineId,
         parent_id: Option<ClipId>,
         rect: &LayerRect,
         node_type: NodeType
-    ) -> ClipScrollNode {
+    ) -> Self {
         ClipScrollNode {
             local_viewport_rect: *rect,
             local_clip_rect: *rect,
             combined_local_viewport_rect: LayerRect::zero(),
             world_viewport_transform: LayerToWorldTransform::identity(),
             world_content_transform: LayerToWorldTransform::identity(),
             reference_frame_relative_scroll_offset: LayerVector2D::zero(),
             parent: parent_id,
@@ -180,57 +180,57 @@ impl ClipScrollNode {
     }
 
     pub fn new_scroll_frame(
         pipeline_id: PipelineId,
         parent_id: ClipId,
         frame_rect: &LayerRect,
         content_size: &LayerSize,
         scroll_sensitivity: ScrollSensitivity,
-    ) -> ClipScrollNode {
+    ) -> Self {
         let node_type = NodeType::ScrollFrame(ScrollingState::new(
             scroll_sensitivity,
             LayerSize::new(
                 (content_size.width - frame_rect.size.width).max(0.0),
                 (content_size.height - frame_rect.size.height).max(0.0)
             )
         ));
 
         Self::new(pipeline_id, Some(parent_id), frame_rect, node_type)
     }
 
     pub fn new_clip_node(
         pipeline_id: PipelineId,
         parent_id: ClipId,
         clip_info: ClipInfo,
         clip_rect: LayerRect,
-    ) -> ClipScrollNode {
+    ) -> Self {
         Self::new(pipeline_id, Some(parent_id), &clip_rect, NodeType::Clip(clip_info))
     }
 
     pub fn new_reference_frame(
         parent_id: Option<ClipId>,
         frame_rect: &LayerRect,
         transform: &LayerToScrollTransform,
         origin_in_parent_reference_frame: LayerVector2D,
         pipeline_id: PipelineId,
-    ) -> ClipScrollNode {
+    ) -> Self {
         let info = ReferenceFrameInfo {
             transform: *transform,
             origin_in_parent_reference_frame,
         };
         Self::new(pipeline_id, parent_id, frame_rect, NodeType::ReferenceFrame(info))
     }
 
     pub fn new_sticky_frame(
         parent_id: ClipId,
         frame_rect: LayerRect,
         sticky_frame_info: StickyFrameInfo,
         pipeline_id: PipelineId,
-    ) -> ClipScrollNode {
+    ) -> Self {
         let node_type = NodeType::StickyFrame(sticky_frame_info);
         Self::new(pipeline_id, Some(parent_id), &frame_rect, node_type)
     }
 
 
     pub fn add_child(&mut self, child: ClipId) {
         self.children.push(child);
     }
@@ -705,16 +705,20 @@ impl ClipScrollNode {
     }
 
     pub fn is_overscrolling(&self) -> bool {
         match self.node_type {
             NodeType::ScrollFrame(ref state) => state.overscroll_amount() != LayerVector2D::zero(),
             _ => false,
         }
     }
+
+    pub fn is_visible(&self) -> bool {
+        self.combined_clip_outer_bounds != DeviceIntRect::zero()
+    }
 }
 
 #[derive(Copy, Clone, Debug)]
 pub struct ScrollingState {
     pub offset: LayerVector2D,
     pub spring: Spring,
     pub started_bouncing_back: bool,
     pub bouncing_back: bool,
--- a/gfx/webrender/src/device.rs
+++ b/gfx/webrender/src/device.rs
@@ -1920,18 +1920,19 @@ impl Device {
     pub fn set_blend_mode_subpixel_with_bg_color_pass1(&self) {
         self.gl.blend_func_separate(gl::ONE_MINUS_DST_ALPHA, gl::ONE, gl::ZERO, gl::ONE);
         self.gl.blend_equation(gl::FUNC_ADD);
     }
     pub fn set_blend_mode_subpixel_with_bg_color_pass2(&self) {
         self.gl.blend_func_separate(gl::ONE, gl::ONE, gl::ONE, gl::ONE_MINUS_SRC_ALPHA);
         self.gl.blend_equation(gl::FUNC_ADD);
     }
-    pub fn set_blend_mode_subpixel_opaque(&self, color: ColorF) {
-        self.gl.blend_color(color.r, color.g, color.b, color.a);
+    pub fn set_blend_mode_subpixel_constant_text_color(&self, color: ColorF) {
+        // color is an unpremultiplied color.
+        self.gl.blend_color(color.r, color.g, color.b, 1.0);
         self.gl
             .blend_func(gl::CONSTANT_COLOR, gl::ONE_MINUS_SRC_COLOR);
         self.gl.blend_equation(gl::FUNC_ADD);
     }
 }
 
 /// return (gl_internal_format, gl_format)
 fn gl_texture_formats_for_image_format(
--- a/gfx/webrender/src/frame.rs
+++ b/gfx/webrender/src/frame.rs
@@ -1,33 +1,34 @@
 
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BuiltDisplayListIter, ClipAndScrollInfo, ClipId, ColorF, ComplexClipRegion};
 use api::{DeviceUintRect, DeviceUintSize, DisplayItemRef, Epoch, FilterOp};
 use api::{ImageDisplayItem, ItemRange, LayerPoint, LayerPrimitiveInfo, LayerRect};
-use api::{LayerSize, LayerToScrollTransform, LayerVector2D, LayoutSize, LayoutTransform};
+use api::{LayerSize, LayerToScrollTransform, LayerVector2D};
+use api::{LayoutRect, LayoutSize, LayoutTransform};
 use api::{LocalClip, PipelineId, ScrollClamping, ScrollEventPhase, ScrollLayerState};
 use api::{ScrollLocation, ScrollPolicy, ScrollSensitivity, SpecificDisplayItem, StackingContext};
 use api::{ClipMode, TileOffset, TransformStyle, WorldPoint};
 use clip::ClipRegion;
 use clip_scroll_node::StickyFrameInfo;
 use clip_scroll_tree::{ClipScrollTree, ScrollStates};
 use euclid::rect;
 use frame_builder::{FrameBuilder, FrameBuilderConfig};
 use gpu_cache::GpuCache;
 use internal_types::{FastHashMap, FastHashSet, RendererFrame};
 use prim_store::RectangleContent;
 use profiler::{GpuCacheProfileCounters, TextureCacheProfileCounters};
 use resource_cache::{FontInstanceMap,ResourceCache, TiledImageMap};
 use scene::{Scene, StackingContextHelpers, ScenePipeline};
 use tiling::{CompositeOps, Frame, PrimitiveFlags};
-use util::{subtract_rect, ComplexClipRegionHelpers};
+use util::ComplexClipRegionHelpers;
 
 #[derive(Copy, Clone, PartialEq, PartialOrd, Debug, Eq, Ord)]
 pub struct FrameId(pub u32);
 
 static DEFAULT_SCROLLBAR_COLOR: ColorF = ColorF {
     r: 0.3,
     g: 0.3,
     b: 0.3,
@@ -37,16 +38,21 @@ static DEFAULT_SCROLLBAR_COLOR: ColorF =
 struct FlattenContext<'a> {
     scene: &'a Scene,
     builder: FrameBuilder,
     clip_scroll_tree: &'a mut ClipScrollTree,
     font_instances: FontInstanceMap,
     tiled_image_map: TiledImageMap,
     pipeline_epochs: Vec<(PipelineId, Epoch)>,
     replacements: Vec<(ClipId, ClipId)>,
+    /// Opaque rectangle vector, stored here in order to
+    /// avoid re-allocation on each use.
+    opaque_parts: Vec<LayoutRect>,
+    /// Same for the transparent rectangles.
+    transparent_parts: Vec<LayoutRect>,
 }
 
 impl<'a> FlattenContext<'a> {
     /// Since WebRender still handles fixed position and reference frame content internally
     /// we need to apply this table of id replacements only to the id that affects the
     /// position of a node. We can eventually remove this when clients start handling
     /// reference frames themselves. This method applies these replacements.
     fn apply_scroll_frame_id_replacement(&self, id: ClipId) -> ClipId {
@@ -101,34 +107,34 @@ impl<'a> FlattenContext<'a> {
         if self.scene.root_pipeline_id != Some(pipeline_id) {
             if let Some(pipeline) = self.scene.pipelines.get(&pipeline_id) {
                 if let Some(bg_color) = pipeline.background_color {
                     let root_bounds = LayerRect::new(LayerPoint::zero(), *content_size);
                     let info = LayerPrimitiveInfo::new(root_bounds);
                     self.builder.add_solid_rectangle(
                         ClipAndScrollInfo::simple(clip_id),
                         &info,
-                        &RectangleContent::Fill(bg_color),
+                        RectangleContent::Fill(bg_color),
                         PrimitiveFlags::None,
                     );
                 }
             }
         }
 
 
         self.flatten_items(traversal, pipeline_id, LayerVector2D::zero());
 
         if self.builder.config.enable_scrollbars {
             let scrollbar_rect = LayerRect::new(LayerPoint::zero(), LayerSize::new(10.0, 70.0));
             let info = LayerPrimitiveInfo::new(scrollbar_rect);
 
             self.builder.add_solid_rectangle(
                 ClipAndScrollInfo::simple(clip_id),
                 &info,
-                &RectangleContent::Fill(DEFAULT_SCROLLBAR_COLOR),
+                RectangleContent::Fill(DEFAULT_SCROLLBAR_COLOR),
                 PrimitiveFlags::Scrollbar(self.clip_scroll_tree.topmost_scrolling_node_id(), 4.0),
             );
         }
 
         self.builder.pop_stacking_context();
     }
 
     fn flatten_items(
@@ -441,32 +447,32 @@ impl<'a> FlattenContext<'a> {
                     None => {
                         warn!("Unknown font instance key: {:?}", text_info.font_key);
                     }
                 }
             }
             SpecificDisplayItem::Rectangle(ref info) => {
                 if !self.try_to_add_rectangle_splitting_on_clip(
                     &prim_info,
-                    &RectangleContent::Fill(info.color),
+                    RectangleContent::Fill(info.color),
                     &clip_and_scroll,
                 ) {
                     self.builder.add_solid_rectangle(
                         clip_and_scroll,
                         &prim_info,
-                        &RectangleContent::Fill(info.color),
+                        RectangleContent::Fill(info.color),
                         PrimitiveFlags::None,
                     );
                 }
             }
             SpecificDisplayItem::ClearRectangle => {
                 self.builder.add_solid_rectangle(
                     clip_and_scroll,
                     &prim_info,
-                    &RectangleContent::Clear,
+                    RectangleContent::Clear,
                     PrimitiveFlags::None,
                 );
             }
             SpecificDisplayItem::Line(ref info) => {
                 self.builder.add_line(
                     clip_and_scroll,
                     &prim_info,
                     info.wavy_line_thickness,
@@ -630,68 +636,84 @@ impl<'a> FlattenContext<'a> {
 
     /// Try to optimize the rendering of a solid rectangle that is clipped by a single
     /// rounded rectangle, by only masking the parts of the rectangle that intersect
     /// the rounded parts of the clip. This is pretty simple now, so has a lot of
     /// potential for further optimizations.
     fn try_to_add_rectangle_splitting_on_clip(
         &mut self,
         info: &LayerPrimitiveInfo,
-        content: &RectangleContent,
+        content: RectangleContent,
         clip_and_scroll: &ClipAndScrollInfo,
     ) -> bool {
+        if info.rect.size.area() < 200.0 { // arbitrary threshold
+            // too few pixels, don't bother adding instances
+            return false;
+        }
         // If this rectangle is not opaque, splitting the rectangle up
         // into an inner opaque region just ends up hurting batching and
         // doing more work than necessary.
-        if let &RectangleContent::Fill(ColorF{a, ..}) = content {
+        if let RectangleContent::Fill(ColorF{a, ..}) = content {
             if a != 1.0 {
                 return false;
             }
         }
 
-        let inner_unclipped_rect = match &info.local_clip {
-            &LocalClip::Rect(_) => return false,
-            &LocalClip::RoundedRect(_, ref region) => {
+        self.opaque_parts.clear();
+        self.transparent_parts.clear();
+
+        match info.local_clip {
+            LocalClip::Rect(_) => return false,
+            LocalClip::RoundedRect(_, ref region) => {
                 if region.mode == ClipMode::ClipOut {
                     return false;
                 }
-                region.get_inner_rect_full()
+                region.split_rectangles(
+                    &mut self.opaque_parts,
+                    &mut self.transparent_parts,
+                );
             }
         };
-        let inner_unclipped_rect = match inner_unclipped_rect {
-            Some(rect) => rect,
-            None => return false,
-        };
 
-        // The inner rectangle is not clipped by its assigned clipping node, so we can
-        // let it be clipped by the parent of the clipping node, which may result in
-        // less masking some cases.
-        let mut clipped_rects = Vec::new();
-        subtract_rect(&info.rect, &inner_unclipped_rect, &mut clipped_rects);
+        let local_clip = LocalClip::from(*info.local_clip.clip_rect());
+        let mut has_opaque = false;
 
-        let prim_info = LayerPrimitiveInfo {
-            rect: inner_unclipped_rect,
-            local_clip: LocalClip::from(*info.local_clip.clip_rect()),
-            is_backface_visible: info.is_backface_visible,
-            tag: None,
-        };
-
-        self.builder.add_solid_rectangle(
-            *clip_and_scroll,
-            &prim_info,
-            content,
-            PrimitiveFlags::None,
-        );
-
-        for clipped_rect in &clipped_rects {
-            let mut info = info.clone();
-            info.rect = *clipped_rect;
+        for opaque in &self.opaque_parts {
+            let prim_info = LayerPrimitiveInfo {
+                rect: match opaque.intersection(&info.rect) {
+                    Some(rect) => rect,
+                    None => continue,
+                },
+                local_clip,
+                .. info.clone()
+            };
             self.builder.add_solid_rectangle(
                 *clip_and_scroll,
-                &info,
+                &prim_info,
+                content,
+                PrimitiveFlags::None,
+            );
+            has_opaque = true;
+        }
+
+        if !has_opaque {
+            return false
+        }
+
+        for transparent in &self.transparent_parts {
+            let prim_info = LayerPrimitiveInfo {
+                rect: match transparent.intersection(&info.rect) {
+                    Some(rect) => rect,
+                    None => continue,
+                },
+                .. info.clone()
+            };
+            self.builder.add_solid_rectangle(
+                *clip_and_scroll,
+                &prim_info,
                 content,
                 PrimitiveFlags::None,
             );
         }
         true
     }
 
     /// Decomposes an image display item that is repeated into an image per individual repetition.
@@ -1097,16 +1119,18 @@ impl FrameContext {
                     background_color,
                     self.frame_builder_config,
                 ),
                 clip_scroll_tree: &mut self.clip_scroll_tree,
                 font_instances: resource_cache.get_font_instances(),
                 tiled_image_map: resource_cache.get_tiled_image_map(),
                 pipeline_epochs: Vec::new(),
                 replacements: Vec::new(),
+                opaque_parts: Vec::new(),
+                transparent_parts: Vec::new(),
             };
 
             roller.builder.push_root(
                 root_pipeline_id,
                 &root_pipeline.viewport_size,
                 &root_pipeline.content_size,
                 roller.clip_scroll_tree,
             );
--- a/gfx/webrender/src/frame_builder.rs
+++ b/gfx/webrender/src/frame_builder.rs
@@ -20,17 +20,17 @@ use clip_scroll_tree::{ClipScrollTree};
 use euclid::{SideOffsets2D, TypedTransform3D, vec2, vec3};
 use frame::FrameId;
 use gpu_cache::GpuCache;
 use internal_types::{FastHashMap, FastHashSet, HardwareCompositeOp};
 use picture::{PicturePrimitive};
 use plane_split::{BspSplitter, Polygon, Splitter};
 use prim_store::{TexelRect, YuvImagePrimitiveCpu};
 use prim_store::{GradientPrimitiveCpu, ImagePrimitiveCpu, LinePrimitive, PrimitiveKind};
-use prim_store::{PrimitiveContainer, PrimitiveIndex};
+use prim_store::{PrimitiveContainer, PrimitiveIndex, PrimitiveRun};
 use prim_store::{PrimitiveStore, RadialGradientPrimitiveCpu};
 use prim_store::{RectangleContent, RectanglePrimitive, TextRunPrimitiveCpu};
 use profiler::{FrameProfileCounters, GpuCacheProfileCounters, TextureCacheProfileCounters};
 use render_task::{AlphaRenderItem, ClearMode, RenderTask, RenderTaskId, RenderTaskLocation};
 use render_task::RenderTaskTree;
 use resource_cache::ResourceCache;
 use scene::ScenePipeline;
 use std::{mem, usize, f32, i32};
@@ -258,34 +258,34 @@ impl FrameBuilder {
     /// Add an already created primitive to the draw lists.
     pub fn add_primitive_to_draw_list(
         &mut self,
         prim_index: PrimitiveIndex,
         clip_and_scroll: ClipAndScrollInfo,
     ) {
         match self.cmds.last_mut().unwrap() {
             &mut PrimitiveRunCmd::PrimitiveRun(
-                run_prim_index,
-                ref mut count,
-                run_clip_and_scroll,
-            ) => if run_clip_and_scroll == clip_and_scroll &&
-                run_prim_index.0 + *count == prim_index.0
+                ref mut run,
+            ) => if run.clip_and_scroll == clip_and_scroll &&
+                run.base_prim_index.0 + run.count == prim_index.0
             {
-                *count += 1;
+                run.count += 1;
                 return;
             },
             &mut PrimitiveRunCmd::PushStackingContext(..) |
             &mut PrimitiveRunCmd::PopStackingContext => {}
         }
 
-        self.cmds.push(PrimitiveRunCmd::PrimitiveRun(
-            prim_index,
-            1,
+        let run = PrimitiveRun {
+            base_prim_index: prim_index,
+            count: 1,
             clip_and_scroll,
-        ));
+        };
+
+        self.cmds.push(PrimitiveRunCmd::PrimitiveRun(run));
     }
 
     /// Convenience interface that creates a primitive entry and adds it
     /// to the draw list.
     pub fn add_primitive(
         &mut self,
         clip_and_scroll: ClipAndScrollInfo,
         info: &LayerPrimitiveInfo,
@@ -559,28 +559,31 @@ impl FrameBuilder {
         mem::replace(&mut self.pending_shadow_contents, pending_primitives);
         mem::replace(&mut self.shadow_prim_stack, shadows);
     }
 
     pub fn add_solid_rectangle(
         &mut self,
         clip_and_scroll: ClipAndScrollInfo,
         info: &LayerPrimitiveInfo,
-        content: &RectangleContent,
+        content: RectangleContent,
         flags: PrimitiveFlags,
     ) {
-        if let &RectangleContent::Fill(ColorF{a, ..}) = content {
+        if let RectangleContent::Fill(ColorF{a, ..}) = content {
             if a == 0.0 {
                 // Don't add transparent rectangles to the draw list, but do consider them for hit
                 // testing. This allows specifying invisible hit testing areas.
                 self.add_primitive_to_hit_testing_list(info, clip_and_scroll);
                 return;
             }
         }
-        let prim = RectanglePrimitive { content: *content };
+        let prim = RectanglePrimitive {
+            content,
+            edge_aa_segment_mask: info.edge_aa_segment_mask,
+        };
 
         let prim_index = self.add_primitive(
             clip_and_scroll,
             info,
             Vec::new(),
             PrimitiveContainer::Rectangle(prim),
         );
 
@@ -1332,94 +1335,88 @@ impl FrameBuilder {
 
         result.items.dedup();
         return result;
     }
 
 
     fn handle_primitive_run(
         &mut self,
-        base_prim_index: PrimitiveIndex,
-        prim_count: usize,
-        clip_and_scroll: ClipAndScrollInfo,
+        run: &PrimitiveRun,
         render_tasks: &mut RenderTaskTree,
         gpu_cache: &mut GpuCache,
         resource_cache: &mut ResourceCache,
         pipelines: &FastHashMap<PipelineId, ScenePipeline>,
         clip_scroll_tree: &ClipScrollTree,
         device_pixel_ratio: f32,
         profile_counters: &mut FrameProfileCounters,
-    ) -> bool {
+    ) {
         let stacking_context_index = *self.stacking_context_stack.last().unwrap();
-        let scroll_node = &clip_scroll_tree.nodes[&clip_and_scroll.scroll_node_id];
-        let clip_node = &clip_scroll_tree.nodes[&clip_and_scroll.clip_node_id()];
+        let scroll_node = &clip_scroll_tree.nodes[&run.clip_and_scroll.scroll_node_id];
+        let clip_node = &clip_scroll_tree.nodes[&run.clip_and_scroll.clip_node_id()];
 
-        if clip_node.combined_clip_outer_bounds == DeviceIntRect::zero() {
-            debug!("{:?} of clipped out {:?}", base_prim_index, stacking_context_index);
-            return false;
+        if !clip_node.is_visible() {
+            debug!("{:?} of clipped out {:?}", run.base_prim_index, stacking_context_index);
+            return;
         }
 
         let stacking_context = &mut self.stacking_context_store[stacking_context_index.0];
         let pipeline_id = {
             if !stacking_context.can_contribute_to_scene() {
-                return false;
+                return;
             }
 
             // At least one primitive in this stacking context is visible, so the stacking
             // context is visible.
             stacking_context.is_visible = true;
             stacking_context.pipeline_id
         };
 
         debug!(
             "\t{:?} of {:?}",
-            base_prim_index,
+            run.base_prim_index,
             stacking_context_index,
         );
 
         let display_list = &pipelines
             .get(&pipeline_id)
             .expect("No display list?")
             .display_list;
 
         if !stacking_context.is_backface_visible && scroll_node.world_content_transform.is_backface_visible() {
-            return false;
+            return;
         }
 
         let prim_context = PrimitiveContext::new(
             device_pixel_ratio,
             display_list,
             clip_node,
             scroll_node,
         );
 
-        for i in 0 .. prim_count {
-            let prim_index = PrimitiveIndex(base_prim_index.0 + i);
+        let result = self.prim_store.prepare_prim_run(
+            run,
+            &prim_context,
+            gpu_cache,
+            resource_cache,
+            render_tasks,
+            &mut self.clip_store,
+        );
 
-            if let Some(prim_geom) = self.prim_store.prepare_prim_for_render(
-                prim_index,
-                &prim_context,
-                resource_cache,
-                gpu_cache,
-                render_tasks,
-                &mut self.clip_store,
-            ) {
-                stacking_context.screen_bounds = stacking_context
-                    .screen_bounds
-                    .union(&prim_geom.device_rect);
-                stacking_context.isolated_items_bounds = stacking_context
-                    .isolated_items_bounds
-                    .union(&prim_geom.local_rect);
-                stacking_context.has_any_primitive = true;
+        if result.visible_primitives > 0 {
+            stacking_context.screen_bounds = stacking_context
+                .screen_bounds
+                .union(&result.device_rect);
+            stacking_context.isolated_items_bounds = stacking_context
+                .isolated_items_bounds
+                .union(&result.local_rect);
+            stacking_context.has_any_primitive = true;
 
-                profile_counters.visible_primitives.inc();
-            }
+            profile_counters.visible_primitives.add(result.visible_primitives);
         }
-
-        true //visible
     }
 
     fn handle_pop_stacking_context(
         &mut self,
         screen_rect: &DeviceIntRect,
         clip_scroll_tree: &ClipScrollTree) {
         let stacking_context_index = self.stacking_context_stack.pop().unwrap();
 
@@ -1491,21 +1488,19 @@ impl FrameBuilder {
 
         debug!("processing commands...");
         let commands = mem::replace(&mut self.cmds, Vec::new());
         for cmd in &commands {
             match *cmd {
                 PrimitiveRunCmd::PushStackingContext(stacking_context_index) => {
                     self.handle_push_stacking_context(stacking_context_index)
                 }
-                PrimitiveRunCmd::PrimitiveRun(prim_index, prim_count, clip_and_scroll) => {
+                PrimitiveRunCmd::PrimitiveRun(ref run) => {
                     self.handle_primitive_run(
-                        prim_index,
-                        prim_count,
-                        clip_and_scroll,
+                        run,
                         render_tasks,
                         gpu_cache,
                         resource_cache,
                         pipelines,
                         clip_scroll_tree,
                         device_pixel_ratio,
                         profile_counters,
                     );
@@ -1835,29 +1830,32 @@ impl FrameBuilder {
                             current_task_size,
                         );
                         next_z += 1;
                         prev_task.as_alpha_batch_mut().items.push(item);
                         prev_task.children.push(current_task_id);
                         current_task = prev_task;
                     }
                 }
-                PrimitiveRunCmd::PrimitiveRun(first_prim_index, prim_count, clip_and_scroll) => {
+                PrimitiveRunCmd::PrimitiveRun(ref run) => {
                     let stacking_context_index = *sc_stack.last().unwrap();
                     if !self.stacking_context_store[stacking_context_index.0].is_visible {
                         continue;
                     }
 
-                    debug!("\trun of {} items", prim_count);
+                    debug!("\trun of {} items", run.count);
 
-                    let scroll_node = &clip_scroll_tree.nodes[&clip_and_scroll.scroll_node_id];
-                    let clip_node = &clip_scroll_tree.nodes[&clip_and_scroll.clip_node_id()];
+                    let clip_node = &clip_scroll_tree.nodes[&run.clip_and_scroll.clip_node_id()];
+                    if !clip_node.is_visible() {
+                        continue;
+                    }
+                    let scroll_node = &clip_scroll_tree.nodes[&run.clip_and_scroll.scroll_node_id];
 
-                    for i in 0 .. prim_count {
-                        let prim_index = PrimitiveIndex(first_prim_index.0 + i);
+                    for i in 0 .. run.count {
+                        let prim_index = PrimitiveIndex(run.base_prim_index.0 + i);
 
                         if self.prim_store.cpu_metadata[prim_index.0].screen_rect.is_some() {
                             self.prim_store
                                 .add_render_tasks_for_prim(prim_index, &mut current_task);
                             let item =
                                 AlphaRenderItem::Primitive(clip_node.id, scroll_node.id, prim_index, next_z);
                             current_task.as_alpha_batch_mut().items.push(item);
                             next_z += 1;
--- a/gfx/webrender/src/lib.rs
+++ b/gfx/webrender/src/lib.rs
@@ -148,10 +148,11 @@ extern crate time;
 extern crate ws;
 pub extern crate webrender_api;
 
 #[doc(hidden)]
 pub use device::build_shader_strings;
 pub use renderer::{CpuProfile, DebugFlags, GpuProfile, OutputImageHandler, RendererKind};
 pub use renderer::{ExternalImage, ExternalImageHandler, ExternalImageSource};
 pub use renderer::{GraphicsApi, GraphicsApiInfo, ReadPixelsFormat, Renderer, RendererOptions};
+pub use renderer::{ThreadListener};
 pub use renderer::MAX_VERTEX_TEXTURE_WIDTH;
 pub use webrender_api as api;
--- a/gfx/webrender/src/picture.rs
+++ b/gfx/webrender/src/picture.rs
@@ -2,37 +2,30 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BorderRadiusKind, ColorF, ClipAndScrollInfo, device_length, DeviceIntSize};
 use api::{BoxShadowClipMode, LayerPoint, LayerRect, LayerSize, Shadow};
 use box_shadow::BLUR_SAMPLE_SCALE;
 use frame_builder::PrimitiveContext;
 use gpu_cache::GpuDataRequest;
-use prim_store::PrimitiveIndex;
+use prim_store::{PrimitiveIndex, PrimitiveRun};
 use render_task::{ClearMode, RenderTask, RenderTaskId, RenderTaskTree};
 use tiling::RenderTargetKind;
 
 /*
  A picture represents a dynamically rendered image. It consists of:
 
  * A number of primitives that are drawn onto the picture.
  * A composite operation describing how to composite this
    picture into its parent.
  * A configuration describing how to draw the primitives on
    this picture (e.g. in screen space or local space).
  */
 
-#[derive(Clone, Debug)]
-pub struct PrimitiveRun {
-    pub prim_index: PrimitiveIndex,
-    pub count: usize,
-    pub clip_and_scroll: ClipAndScrollInfo,
-}
-
 #[derive(Debug)]
 pub enum PictureKind {
     TextShadow {
         shadow: Shadow,
     },
     BoxShadow {
         blur_radius: f32,
         color: ColorF,
@@ -107,24 +100,24 @@ impl PicturePrimitive {
         //           to be in the same local space. Once we start
         //           using pictures for other uses, we will need
         //           to consider the space of a primitive in order
         //           to build a correct contect rect!
         self.content_rect = self.content_rect.union(local_rect);
 
         if let Some(ref mut run) = self.prim_runs.last_mut() {
             if run.clip_and_scroll == clip_and_scroll &&
-               run.prim_index.0 + run.count == prim_index.0 {
+               run.base_prim_index.0 + run.count == prim_index.0 {
                 run.count += 1;
                 return;
             }
         }
 
         self.prim_runs.push(PrimitiveRun {
-            prim_index,
+            base_prim_index: prim_index,
             count: 1,
             clip_and_scroll,
         });
     }
 
     pub fn build(&mut self) -> LayerRect {
         match self.kind {
             PictureKind::TextShadow { ref shadow } => {
--- a/gfx/webrender/src/prim_store.rs
+++ b/gfx/webrender/src/prim_store.rs
@@ -1,30 +1,44 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BorderRadius, BuiltDisplayList, ColorF, ComplexClipRegion, DeviceIntRect};
 use api::{DevicePoint, ExtendMode, FontInstance, GlyphInstance, GlyphKey};
 use api::{GradientStop, ImageKey, ImageRendering, ItemRange, ItemTag, LayerPoint, LayerRect};
 use api::{ClipMode, LayerSize, LayerVector2D, LineOrientation, LineStyle};
-use api::{TileOffset, YuvColorSpace, YuvFormat};
+use api::{ClipAndScrollInfo, EdgeAaSegmentMask, TileOffset, YuvColorSpace, YuvFormat};
 use border::BorderCornerInstance;
 use clip::{ClipSourcesHandle, ClipStore, Geometry};
 use frame_builder::PrimitiveContext;
 use gpu_cache::{GpuBlockData, GpuCache, GpuCacheAddress, GpuCacheHandle, GpuDataRequest,
                 ToGpuBlocks};
 use picture::PicturePrimitive;
 use render_task::{ClipWorkItem, ClipChainNode, RenderTask, RenderTaskId, RenderTaskTree};
 use renderer::MAX_VERTEX_TEXTURE_WIDTH;
 use resource_cache::{ImageProperties, ResourceCache};
 use std::{mem, usize};
 use std::rc::Rc;
 use util::{pack_as_float, recycle_vec, MatrixHelpers, TransformedRect, TransformedRectKind};
 
+#[derive(Clone, Debug)]
+pub struct PrimitiveRun {
+    pub base_prim_index: PrimitiveIndex,
+    pub count: usize,
+    pub clip_and_scroll: ClipAndScrollInfo,
+}
+
+#[derive(Debug)]
+pub struct PrimitiveRunResult {
+    pub local_rect: LayerRect,
+    pub device_rect: DeviceIntRect,
+    pub visible_primitives: usize,
+}
+
 #[derive(Debug, Copy, Clone)]
 pub struct PrimitiveOpacity {
     pub is_opaque: bool,
 }
 
 impl PrimitiveOpacity {
     pub fn opaque() -> PrimitiveOpacity {
         PrimitiveOpacity { is_opaque: true }
@@ -156,29 +170,33 @@ pub struct PrimitiveMetadata {
 pub enum RectangleContent {
     Fill(ColorF),
     Clear,
 }
 
 #[derive(Debug)]
 pub struct RectanglePrimitive {
     pub content: RectangleContent,
+    pub edge_aa_segment_mask: EdgeAaSegmentMask,
 }
 
 impl ToGpuBlocks for RectanglePrimitive {
     fn write_gpu_blocks(&self, mut request: GpuDataRequest) {
         match &self.content {
             &RectangleContent::Fill(ref color) => {
                 request.push(color.premultiplied());
             }
             &RectangleContent::Clear => {
                 // Opaque black with operator dest out
                 request.push(ColorF::new(0.0, 0.0, 0.0, 1.0));
             }
         }
+        request.extend_from_slice(&[GpuBlockData {
+            data: [self.edge_aa_segment_mask.bits() as f32, 0.0, 0.0, 0.0],
+        }]);
     }
 }
 
 #[derive(Debug)]
 pub enum BrushMaskKind {
     //Rect,         // TODO(gw): Optimization opportunity for masks with 0 border radii.
     Corner(LayerSize),
     RoundedRect(LayerRect, BorderRadius),
@@ -885,17 +903,16 @@ impl PrimitiveStore {
             clip_sources,
             gpu_location: GpuCacheHandle::new(),
             clip_task_id: None,
             local_rect: *local_rect,
             local_clip_rect: *local_clip_rect,
             is_backface_visible: is_backface_visible,
             screen_rect: None,
             tag,
-
             opacity: PrimitiveOpacity::translucent(),
             prim_kind: PrimitiveKind::Rectangle,
             cpu_prim_index: SpecificPrimitiveIndex(0),
         };
 
         let metadata = match container {
             PrimitiveContainer::Rectangle(rect) => {
                 let opacity = match &rect.content {
@@ -1328,17 +1345,17 @@ impl PrimitiveStore {
 
         // Recurse into any sub primitives and prepare them for rendering first.
         // TODO(gw): This code is a bit hacky to work around the borrow checker.
         //           Specifically, the clone() below on the primitive list for
         //           text shadow primitives. Consider restructuring this code to
         //           avoid borrow checker issues.
         for run in dependent_primitives {
             for i in 0 .. run.count {
-                let sub_prim_index = PrimitiveIndex(run.prim_index.0 + i);
+                let sub_prim_index = PrimitiveIndex(run.base_prim_index.0 + i);
 
                 self.prepare_prim_for_render_inner(
                     sub_prim_index,
                     prim_context,
                     resource_cache,
                     gpu_cache,
                     render_tasks,
                 );
@@ -1362,19 +1379,53 @@ impl PrimitiveStore {
             prim_context,
             resource_cache,
             gpu_cache,
             render_tasks,
         );
 
         Some(geometry)
     }
+
+    pub fn prepare_prim_run(
+        &mut self,
+        run: &PrimitiveRun,
+        prim_context: &PrimitiveContext,
+        gpu_cache: &mut GpuCache,
+        resource_cache: &mut ResourceCache,
+        render_tasks: &mut RenderTaskTree,
+        clip_store: &mut ClipStore,
+    ) -> PrimitiveRunResult {
+        let mut result = PrimitiveRunResult {
+            local_rect: LayerRect::zero(),
+            device_rect: DeviceIntRect::zero(),
+            visible_primitives: 0,
+        };
+
+        for i in 0 .. run.count {
+            let prim_index = PrimitiveIndex(run.base_prim_index.0 + i);
+
+            if let Some(prim_geom) = self.prepare_prim_for_render(
+                prim_index,
+                prim_context,
+                resource_cache,
+                gpu_cache,
+                render_tasks,
+                clip_store,
+            ) {
+                result.local_rect = result.local_rect.union(&prim_geom.local_rect);
+                result.device_rect = result.device_rect.union(&prim_geom.device_rect);
+                result.visible_primitives += 1;
+            }
+        }
+
+        result
+    }
 }
 
-
 //Test for one clip region contains another
 trait InsideTest<T> {
     fn might_contain(&self, clip: &T) -> bool;
 }
 
 impl InsideTest<ComplexClipRegion> for ComplexClipRegion {
     // Returns true if clip is inside self, can return false negative
     fn might_contain(&self, clip: &ComplexClipRegion) -> bool {
--- a/gfx/webrender/src/render_task.rs
+++ b/gfx/webrender/src/render_task.rs
@@ -12,16 +12,18 @@ use gpu_types::{ClipScrollNodeIndex};
 use internal_types::HardwareCompositeOp;
 use prim_store::PrimitiveIndex;
 use std::{cmp, usize, f32, i32};
 use std::rc::Rc;
 use tiling::{RenderPass, RenderTargetIndex};
 use tiling::{RenderTargetKind, StackingContextIndex};
 
 const FLOATS_PER_RENDER_TASK_INFO: usize = 12;
+pub const MAX_BLUR_STD_DEVIATION: f32 = 4.0;
+pub const MIN_DOWNSCALING_RT_SIZE: i32 = 128;
 
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
 pub struct RenderTaskId(pub u32); // TODO(gw): Make private when using GPU cache!
 
 #[derive(Debug, Copy, Clone)]
 #[repr(C)]
 pub struct RenderTaskAddress(pub u32);
 
@@ -474,18 +476,16 @@ impl RenderTask {
         src_task_id: RenderTaskId,
         render_tasks: &mut RenderTaskTree,
         target_kind: RenderTargetKind,
         regions: &[LayerRect],
         clear_mode: ClearMode,
         color: ColorF,
     ) -> RenderTask {
         // Adjust large std deviation value.
-        const MAX_BLUR_STD_DEVIATION: f32 = 4.0;
-        const MIN_DOWNSCALING_RT_SIZE: i32 = 128;
         let mut adjusted_blur_std_deviation = blur_std_deviation;
         let blur_target_size = render_tasks.get(src_task_id).get_dynamic_size();
         let mut adjusted_blur_target_size = blur_target_size;
         let mut downscaling_src_task_id = src_task_id;
         let mut scale_factor = 1.0;
         while adjusted_blur_std_deviation > MAX_BLUR_STD_DEVIATION {
             if adjusted_blur_target_size.width < MIN_DOWNSCALING_RT_SIZE ||
                adjusted_blur_target_size.height < MIN_DOWNSCALING_RT_SIZE {
--- a/gfx/webrender/src/renderer.rs
+++ b/gfx/webrender/src/renderer.rs
@@ -216,17 +216,17 @@ bitflags! {
 
 // A generic mode that can be passed to shaders to change
 // behaviour per draw-call.
 type ShaderMode = i32;
 
 #[repr(C)]
 enum TextShaderMode {
     Alpha = 0,
-    SubpixelOpaque = 1,
+    SubpixelConstantTextColor = 1,
     SubpixelPass0 = 2,
     SubpixelPass1 = 3,
     SubpixelWithBgColorPass0 = 4,
     SubpixelWithBgColorPass1 = 5,
     SubpixelWithBgColorPass2 = 6,
     ColorBitmap = 7,
 }
 
@@ -631,24 +631,25 @@ impl SourceTextureResolver {
                 panic!("BUG: External textures cannot be resolved, they can only be bound.");
             }
             SourceTexture::TextureCache(index) => Some(&self.cache_texture_map[index.0]),
         }
     }
 }
 
 #[derive(Debug, Copy, Clone, PartialEq)]
+#[allow(dead_code)] // SubpixelVariableTextColor is not used at the moment.
 pub enum BlendMode {
     None,
     Alpha,
     PremultipliedAlpha,
     PremultipliedDestOut,
-    SubpixelOpaque(ColorU),
-    SubpixelWithAlpha,
+    SubpixelConstantTextColor(ColorU),
     SubpixelWithBgColor,
+    SubpixelVariableTextColor,
 }
 
 // Tracks the state of each row in the GPU cache texture.
 struct CacheRow {
     is_dirty: bool,
 }
 
 impl CacheRow {
@@ -1010,18 +1011,18 @@ impl BrushShader {
     ) where M: Into<ShaderMode> {
         match blend_mode {
             BlendMode::None => {
                 self.opaque.bind(device, projection, mode, renderer_errors)
             }
             BlendMode::Alpha |
             BlendMode::PremultipliedAlpha |
             BlendMode::PremultipliedDestOut |
-            BlendMode::SubpixelOpaque(..) |
-            BlendMode::SubpixelWithAlpha |
+            BlendMode::SubpixelConstantTextColor(..) |
+            BlendMode::SubpixelVariableTextColor |
             BlendMode::SubpixelWithBgColor => {
                 self.alpha.bind(device, projection, mode, renderer_errors)
             }
         }
     }
 
     fn deinit(self, device: &mut Device) {
         self.opaque.deinit(device);
@@ -1781,43 +1782,67 @@ impl Renderer {
             default_font_render_mode,
             debug: options.debug,
         };
 
         let device_pixel_ratio = options.device_pixel_ratio;
         let debug_flags = options.debug_flags;
         let payload_tx_for_backend = payload_tx.clone();
         let recorder = options.recorder;
-        let worker_config = ThreadPoolConfig::new()
-            .thread_name(|idx| format!("WebRender:Worker#{}", idx))
-            .start_handler(|idx| {
-                register_thread_with_profiler(format!("WebRender:Worker#{}", idx));
-            });
+        let thread_listener = Arc::new(options.thread_listener);
+        let thread_listener_for_rayon_start = thread_listener.clone();
+        let thread_listener_for_rayon_end = thread_listener.clone();
         let workers = options
             .workers
             .take()
-            .unwrap_or_else(|| Arc::new(ThreadPool::new(worker_config).unwrap()));
+            .unwrap_or_else(|| {
+                let worker_config = ThreadPoolConfig::new()
+                    .thread_name(|idx|{ format!("WRWorker#{}", idx) })
+                    .start_handler(move |idx| {
+                        register_thread_with_profiler(format!("WRWorker#{}", idx));
+                        if let Some(ref thread_listener) = *thread_listener_for_rayon_start {
+                            thread_listener.thread_started(&format!("WRWorker#{}", idx));
+                        }
+                    })
+                    .exit_handler(move |idx| {
+                        if let Some(ref thread_listener) = *thread_listener_for_rayon_end {
+                            thread_listener.thread_stopped(&format!("WRWorker#{}", idx));
+                        }
+                    });
+                Arc::new(ThreadPool::new(worker_config).unwrap())
+            });
         let enable_render_on_scroll = options.enable_render_on_scroll;
 
         let blob_image_renderer = options.blob_image_renderer.take();
-        try!{ thread::Builder::new().name("RenderBackend".to_string()).spawn(move || {
-            let mut backend = RenderBackend::new(api_rx,
-                                                 payload_rx,
-                                                 payload_tx_for_backend,
-                                                 result_tx,
-                                                 device_pixel_ratio,
-                                                 texture_cache,
-                                                 workers,
-                                                 backend_notifier,
-                                                 config,
-                                                 recorder,
-                                                 blob_image_renderer,
-                                                 enable_render_on_scroll);
-            backend.run(backend_profile_counters);
-        })};
+        let thread_listener_for_render_backend = thread_listener.clone();
+        let thread_name = format!("WRRenderBackend#{}", options.renderer_id.unwrap_or(0));
+        try!{
+            thread::Builder::new().name(thread_name.clone()).spawn(move || {
+                register_thread_with_profiler(thread_name.clone());
+                if let Some(ref thread_listener) = *thread_listener_for_render_backend {
+                    thread_listener.thread_started(&thread_name);
+                }
+                let mut backend = RenderBackend::new(api_rx,
+                                                     payload_rx,
+                                                     payload_tx_for_backend,
+                                                     result_tx,
+                                                     device_pixel_ratio,
+                                                     texture_cache,
+                                                     workers,
+                                                     backend_notifier,
+                                                     config,
+                                                     recorder,
+                                                     blob_image_renderer,
+                                                     enable_render_on_scroll);
+                backend.run(backend_profile_counters);
+                if let Some(ref thread_listener) = *thread_listener_for_render_backend {
+                    thread_listener.thread_stopped(&thread_name);
+                }
+            })
+        };
 
         let gpu_cache_texture = CacheTexture::new(&mut device);
 
         let gpu_profile = GpuProfiler::new(device.rc_gl());
 
         let renderer = Renderer {
             result_rx,
             debug_server,
@@ -2490,18 +2515,18 @@ impl Renderer {
             }
             BatchKind::Transformable(transform_kind, batch_kind) => match batch_kind {
                 TransformBatchKind::Rectangle(needs_clipping) => {
                     debug_assert!(
                         !needs_clipping || match key.blend_mode {
                             BlendMode::Alpha |
                             BlendMode::PremultipliedAlpha |
                             BlendMode::PremultipliedDestOut |
-                            BlendMode::SubpixelOpaque(..) |
-                            BlendMode::SubpixelWithAlpha |
+                            BlendMode::SubpixelConstantTextColor(..) |
+                            BlendMode::SubpixelVariableTextColor |
                             BlendMode::SubpixelWithBgColor => true,
                             BlendMode::None => false,
                         }
                     );
 
                     if needs_clipping {
                         self.ps_rectangle_clip.bind(
                             &mut self.device,
@@ -2861,18 +2886,18 @@ impl Renderer {
 
             for batch in &target.alpha_batcher.batch_list.alpha_batch_list.batches {
                 if self.debug_flags.contains(DebugFlags::ALPHA_PRIM_DBG) {
                     let color = match batch.key.blend_mode {
                         BlendMode::None => debug_colors::BLACK,
                         BlendMode::Alpha => debug_colors::YELLOW,
                         BlendMode::PremultipliedAlpha => debug_colors::GREY,
                         BlendMode::PremultipliedDestOut => debug_colors::SALMON,
-                        BlendMode::SubpixelOpaque(..) => debug_colors::GREEN,
-                        BlendMode::SubpixelWithAlpha => debug_colors::RED,
+                        BlendMode::SubpixelConstantTextColor(..) => debug_colors::GREEN,
+                        BlendMode::SubpixelVariableTextColor => debug_colors::RED,
                         BlendMode::SubpixelWithBgColor => debug_colors::BLUE,
                     }.into();
                     for item_rect in &batch.item_rects {
                         self.debug.add_rect(item_rect, color);
                     }
                 }
 
                 match batch.key.kind {
@@ -2901,34 +2926,34 @@ impl Renderer {
                                 );
 
                                 self.draw_instanced_batch(
                                     &batch.instances,
                                     VertexArrayKind::Primitive,
                                     &batch.key.textures
                                 );
                             }
-                            BlendMode::SubpixelOpaque(color) => {
-                                self.device.set_blend_mode_subpixel_opaque(color.into());
+                            BlendMode::SubpixelConstantTextColor(color) => {
+                                self.device.set_blend_mode_subpixel_constant_text_color(color.into());
 
                                 self.ps_text_run.bind(
                                     &mut self.device,
                                     transform_kind,
                                     projection,
-                                    TextShaderMode::SubpixelOpaque,
+                                    TextShaderMode::SubpixelConstantTextColor,
                                     &mut self.renderer_errors,
                                 );
 
                                 self.draw_instanced_batch(
                                     &batch.instances,
                                     VertexArrayKind::Primitive,
                                     &batch.key.textures
                                 );
                             }
-                            BlendMode::SubpixelWithAlpha => {
+                            BlendMode::SubpixelVariableTextColor => {
                                 // Using the two pass component alpha rendering technique:
                                 //
                                 // http://anholt.livejournal.com/32058.html
                                 //
                                 self.device.set_blend_mode_subpixel_pass0();
 
                                 self.ps_text_run.bind(
                                     &mut self.device,
@@ -3034,18 +3059,18 @@ impl Renderer {
                                 BlendMode::PremultipliedAlpha => {
                                     self.device.set_blend(true);
                                     self.device.set_blend_mode_premultiplied_alpha();
                                 }
                                 BlendMode::PremultipliedDestOut => {
                                     self.device.set_blend(true);
                                     self.device.set_blend_mode_premultiplied_dest_out();
                                 }
-                                BlendMode::SubpixelOpaque(..) |
-                                BlendMode::SubpixelWithAlpha |
+                                BlendMode::SubpixelConstantTextColor(..) |
+                                BlendMode::SubpixelVariableTextColor |
                                 BlendMode::SubpixelWithBgColor => {
                                     unreachable!("bug: subpx text handled earlier");
                                 }
                             }
                             prev_blend_mode = batch.key.blend_mode;
                         }
 
                         self.submit_batch(
@@ -3744,16 +3769,17 @@ impl Renderer {
         self.brush_image_rgba8.deinit(&mut self.device);
         self.brush_image_a8.deinit(&mut self.device);
         self.cs_clip_rectangle.deinit(&mut self.device);
         self.cs_clip_image.deinit(&mut self.device);
         self.cs_clip_border.deinit(&mut self.device);
         self.ps_rectangle.deinit(&mut self.device);
         self.ps_rectangle_clip.deinit(&mut self.device);
         self.ps_text_run.deinit(&mut self.device);
+        self.ps_text_run_subpx_bg_pass1.deinit(&mut self.device);
         for shader in self.ps_image {
             if let Some(shader) = shader {
                 shader.deinit(&mut self.device);
             }
         }
         for shader in self.ps_yuv_image {
             if let Some(shader) = shader {
                 shader.deinit(&mut self.device);
@@ -3820,16 +3846,21 @@ pub trait ExternalImageHandler {
 /// and the size of the texture. Unlock will only be called if the lock()
 /// call succeeds, when WR has issued the GL commands to copy the output
 /// to the texture handle.
 pub trait OutputImageHandler {
     fn lock(&mut self, pipeline_id: PipelineId) -> Option<(u32, DeviceIntSize)>;
     fn unlock(&mut self, pipeline_id: PipelineId);
 }
 
+pub trait ThreadListener {
+    fn thread_started(&self, thread_name: &str);
+    fn thread_stopped(&self, thread_name: &str);
+}
+
 pub struct RendererOptions {
     pub device_pixel_ratio: f32,
     pub resource_override_path: Option<PathBuf>,
     pub enable_aa: bool,
     pub enable_dithering: bool,
     pub max_recorded_profiles: usize,
     pub debug: bool,
     pub enable_scrollbars: bool,
@@ -3839,18 +3870,20 @@ pub struct RendererOptions {
     pub clear_framebuffer: bool,
     pub clear_color: ColorF,
     pub enable_clear_scissor: bool,
     pub enable_batcher: bool,
     pub max_texture_size: Option<u32>,
     pub workers: Option<Arc<ThreadPool>>,
     pub blob_image_renderer: Option<Box<BlobImageRenderer>>,
     pub recorder: Option<Box<ApiRecordingReceiver>>,
+    pub thread_listener: Option<Box<ThreadListener + Send + Sync>>,
     pub enable_render_on_scroll: bool,
     pub debug_flags: DebugFlags,
+    pub renderer_id: Option<u64>,
 }
 
 impl Default for RendererOptions {
     fn default() -> RendererOptions {
         RendererOptions {
             device_pixel_ratio: 1.0,
             resource_override_path: None,
             enable_aa: true,
@@ -3865,17 +3898,19 @@ impl Default for RendererOptions {
             clear_framebuffer: true,
             clear_color: ColorF::new(1.0, 1.0, 1.0, 1.0),
             enable_clear_scissor: true,
             enable_batcher: true,
             max_texture_size: None,
             workers: None,
             blob_image_renderer: None,
             recorder: None,
+            thread_listener: None,
             enable_render_on_scroll: true,
+            renderer_id: None,
         }
     }
 }
 
 #[cfg(not(feature = "debugger"))]
 pub struct DebugServer;
 
 #[cfg(not(feature = "debugger"))]
--- a/gfx/webrender/src/tiling.rs
+++ b/gfx/webrender/src/tiling.rs
@@ -1,13 +1,13 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{BorderRadiusKind, ClipAndScrollInfo, ClipId, ColorF, DeviceIntPoint, ImageKey};
+use api::{BorderRadiusKind, ClipId, ColorF, DeviceIntPoint, ImageKey};
 use api::{DeviceIntRect, DeviceIntSize, DeviceUintPoint, DeviceUintSize};
 use api::{ExternalImageType, FilterOp, FontRenderMode, ImageRendering, LayerRect};
 use api::{MixBlendMode, PipelineId, PropertyBinding, TransformStyle};
 use api::{LayerVector2D, TileOffset, YuvColorSpace, YuvFormat};
 use border::{BorderCornerInstance, BorderCornerSide};
 use clip::{ClipSource, ClipStore};
 use clip_scroll_tree::CoordinateSystemId;
 use device::Texture;
@@ -15,17 +15,17 @@ use glyph_rasterizer::GlyphFormat;
 use gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle, GpuCacheUpdateList};
 use gpu_types::{BlurDirection, BlurInstance, BrushInstance, BrushImageKind, ClipMaskInstance};
 use gpu_types::{CompositePrimitiveInstance, PrimitiveInstance, SimplePrimitiveInstance};
 use gpu_types::{BRUSH_FLAG_USES_PICTURE, ClipScrollNodeIndex, ClipScrollNodeData};
 use internal_types::{FastHashMap, SourceTexture};
 use internal_types::BatchTextures;
 use picture::PictureKind;
 use prim_store::{PrimitiveIndex, PrimitiveKind, PrimitiveMetadata, PrimitiveStore};
-use prim_store::{BrushMaskKind, BrushKind, DeferredResolve, RectangleContent};
+use prim_store::{BrushMaskKind, BrushKind, DeferredResolve, PrimitiveRun, RectangleContent};
 use profiler::FrameProfileCounters;
 use render_task::{AlphaRenderItem, ClipWorkItem, MaskGeometryKind, MaskSegment};
 use render_task::{RenderTaskAddress, RenderTaskId, RenderTaskKey, RenderTaskKind};
 use render_task::{BlurTask, ClearMode, RenderTaskLocation, RenderTaskTree};
 use renderer::BlendMode;
 use renderer::ImageBufferKind;
 use resource_cache::{GlyphFetchResult, ResourceCache};
 use std::{cmp, usize, f32, i32};
@@ -56,20 +56,18 @@ impl AlphaBatchHelpers for PrimitiveStor
 
         match metadata.prim_kind {
             PrimitiveKind::TextRun => {
                 let font = &self.cpu_text_runs[metadata.cpu_prim_index.0].font;
                 match font.render_mode {
                     FontRenderMode::Subpixel => {
                         if font.bg_color.a != 0 {
                             BlendMode::SubpixelWithBgColor
-                        } else if font.color.a != 255 || metadata.clip_task_id.is_some() {
-                            BlendMode::SubpixelWithAlpha
                         } else {
-                            BlendMode::SubpixelOpaque(font.color)
+                            BlendMode::SubpixelConstantTextColor(font.color)
                         }
                     }
                     FontRenderMode::Alpha |
                     FontRenderMode::Mono |
                     FontRenderMode::Bitmap => BlendMode::PremultipliedAlpha,
                 }
             },
             PrimitiveKind::Rectangle => {
@@ -116,17 +114,17 @@ pub struct ScrollbarPrimitive {
     pub prim_index: PrimitiveIndex,
     pub border_radius: f32,
 }
 
 #[derive(Debug)]
 pub enum PrimitiveRunCmd {
     PushStackingContext(StackingContextIndex),
     PopStackingContext,
-    PrimitiveRun(PrimitiveIndex, usize, ClipAndScrollInfo),
+    PrimitiveRun(PrimitiveRun),
 }
 
 #[derive(Debug, Copy, Clone)]
 pub enum PrimitiveFlags {
     None,
     Scrollbar(ClipId, f32),
 }
 
@@ -163,17 +161,17 @@ impl AlphaBatchList {
         match (key.kind, key.blend_mode) {
             (BatchKind::Composite { .. }, _) => {
                 // Composites always get added to their own batch.
                 // This is because the result of a composite can affect
                 // the input to the next composite. Perhaps we can
                 // optimize this in the future.
             }
             (BatchKind::Transformable(_, TransformBatchKind::TextRun(_)), BlendMode::SubpixelWithBgColor) |
-            (BatchKind::Transformable(_, TransformBatchKind::TextRun(_)), BlendMode::SubpixelWithAlpha) => {
+            (BatchKind::Transformable(_, TransformBatchKind::TextRun(_)), BlendMode::SubpixelVariableTextColor) => {
                 'outer_text: for (batch_index, batch) in self.batches.iter().enumerate().rev().take(10) {
                     // Subpixel text is drawn in two passes. Because of this, we need
                     // to check for overlaps with every batch (which is a bit different
                     // than the normal batching below).
                     for item_rect in &batch.item_rects {
                         if item_rect.intersects(item_bounding_rect) {
                             break 'outer_text;
                         }
@@ -281,18 +279,18 @@ impl BatchList {
         &mut self,
         key: BatchKey,
         item_bounding_rect: &DeviceIntRect,
     ) -> &mut Vec<PrimitiveInstance> {
         match key.blend_mode {
             BlendMode::None => self.opaque_batch_list.get_suitable_batch(key),
             BlendMode::Alpha | BlendMode::PremultipliedAlpha |
             BlendMode::PremultipliedDestOut |
-            BlendMode::SubpixelOpaque(..) |
-            BlendMode::SubpixelWithAlpha |
+            BlendMode::SubpixelConstantTextColor(..) |
+            BlendMode::SubpixelVariableTextColor |
             BlendMode::SubpixelWithBgColor => {
                 self.alpha_batch_list
                     .get_suitable_batch(key, item_bounding_rect)
             }
         }
     }
 
     fn finalize(&mut self) {
@@ -1240,17 +1238,17 @@ impl RenderTarget for ColorRenderTarget 
                 match prim_metadata.prim_kind {
                     PrimitiveKind::Picture => {
                         let prim = &ctx.prim_store.cpu_pictures[prim_metadata.cpu_prim_index.0];
 
                         let task_index = render_tasks.get_task_address(task_id);
 
                         for run in &prim.prim_runs {
                             for i in 0 .. run.count {
-                                let sub_prim_index = PrimitiveIndex(run.prim_index.0 + i);
+                                let sub_prim_index = PrimitiveIndex(run.base_prim_index.0 + i);
 
                                 let sub_metadata = ctx.prim_store.get_metadata(sub_prim_index);
                                 let sub_prim_address =
                                     gpu_cache.get_address(&sub_metadata.gpu_location);
                                 let instance = SimplePrimitiveInstance::new(
                                     sub_prim_address,
                                     task_index,
                                     RenderTaskAddress(0),
@@ -1409,17 +1407,17 @@ impl RenderTarget for AlphaRenderTarget 
                 match prim_metadata.prim_kind {
                     PrimitiveKind::Picture => {
                         let prim = &ctx.prim_store.cpu_pictures[prim_metadata.cpu_prim_index.0];
 
                         let task_index = render_tasks.get_task_address(task_id);
 
                         for run in &prim.prim_runs {
                             for i in 0 .. run.count {
-                                let sub_prim_index = PrimitiveIndex(run.prim_index.0 + i);
+                                let sub_prim_index = PrimitiveIndex(run.base_prim_index.0 + i);
 
                                 let sub_metadata = ctx.prim_store.get_metadata(sub_prim_index);
                                 let sub_prim_address =
                                     gpu_cache.get_address(&sub_metadata.gpu_location);
 
                                 match sub_metadata.prim_kind {
                                     PrimitiveKind::Brush => {
                                         let instance = BrushInstance {
--- a/gfx/webrender/src/util.rs
+++ b/gfx/webrender/src/util.rs
@@ -1,14 +1,15 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BorderRadius, ComplexClipRegion, DeviceIntPoint, DeviceIntRect, DeviceIntSize};
-use api::{DevicePoint, DeviceRect, DeviceSize, LayerRect, LayerToWorldTransform, LayoutRect};
+use api::{DevicePoint, DeviceRect, DeviceSize, LayerRect, LayerToWorldTransform};
+use api::{LayoutPoint, LayoutRect, LayoutSize};
 use api::WorldPoint3D;
 use euclid::{Point2D, Rect, Size2D, TypedPoint2D, TypedRect, TypedSize2D, TypedTransform2D};
 use euclid::TypedTransform3D;
 use num_traits::Zero;
 use std::f32::consts::FRAC_1_SQRT_2;
 use std::i32;
 
 // Matches the definition of SK_ScalarNearlyZero in Skia.
@@ -137,26 +138,27 @@ impl<U> RectHelpers<U> for TypedRect<f32
 }
 
 // Don't use `euclid`'s `is_empty` because that has effectively has an "and" in the conditional
 // below instead of an "or".
 pub fn rect_is_empty<N: PartialEq + Zero, U>(rect: &TypedRect<N, U>) -> bool {
     rect.size.width == Zero::zero() || rect.size.height == Zero::zero()
 }
 
+#[allow(dead_code)]
 #[inline]
 pub fn rect_from_points_f(x0: f32, y0: f32, x1: f32, y1: f32) -> Rect<f32> {
     Rect::new(Point2D::new(x0, y0), Size2D::new(x1 - x0, y1 - y0))
 }
 
 pub fn lerp(a: f32, b: f32, t: f32) -> f32 {
     (b - a) * t + a
 }
 
-pub fn subtract_rect<U>(
+pub fn _subtract_rect<U>(
     rect: &TypedRect<f32, U>,
     other: &TypedRect<f32, U>,
     results: &mut Vec<TypedRect<f32, U>>,
 ) {
     results.clear();
 
     let int = rect.intersection(other);
     match int {
@@ -278,24 +280,89 @@ pub fn pack_as_float(value: u32) -> f32 
     value as f32 + 0.5
 }
 
 
 pub trait ComplexClipRegionHelpers {
     /// Return the approximately largest aligned rectangle that is fully inside
     /// the provided clip region.
     fn get_inner_rect_full(&self) -> Option<LayoutRect>;
+    /// Split the clip region into 2 sets of rectangles: opaque and transparent.
+    /// Guarantees no T-junctions in the produced split.
+    /// Attempts to cover more space in opaque, where it reasonably makes sense.
+    fn split_rectangles(
+        &self,
+        opaque: &mut Vec<LayoutRect>,
+        transparent: &mut Vec<LayoutRect>,
+    );
 }
 
 impl ComplexClipRegionHelpers for ComplexClipRegion {
     fn get_inner_rect_full(&self) -> Option<LayoutRect> {
-        // this `k` optimal for a simple case of all border radii being equal
+        // this `k` is optimal for a simple case of all border radii being equal
         let k = 1.0 - 0.5 * FRAC_1_SQRT_2; // could be nicely approximated to `0.3`
         extract_inner_rect_impl(&self.rect, &self.radii, k)
     }
+
+    fn split_rectangles(
+        &self,
+        opaque: &mut Vec<LayoutRect>,
+        transparent: &mut Vec<LayoutRect>,
+    ) {
+        fn rect(p0: LayoutPoint, p1: LayoutPoint) -> Option<LayoutRect> {
+            if p0.x != p1.x && p0.y != p1.y {
+                Some(LayerRect::new(p0.min(p1), (p1 - p0).abs().to_size()))
+            } else {
+                None
+            }
+        }
+
+        let inner = match extract_inner_rect_impl(&self.rect, &self.radii, 1.0) {
+            Some(rect) => rect,
+            None => {
+                transparent.push(self.rect);
+                return
+            },
+        };
+        let left_top = inner.origin - self.rect.origin;
+        let right_bot = self.rect.bottom_right() - inner.bottom_right();
+
+        // fill in the opaque parts
+        opaque.push(inner);
+        if left_top.x > 0.0 {
+            opaque.push(LayerRect::new(
+                LayoutPoint::new(self.rect.origin.x, inner.origin.y),
+                LayoutSize::new(left_top.x, inner.size.height),
+            ));
+        }
+        if right_bot.y > 0.0 {
+            opaque.push(LayerRect::new(
+                LayoutPoint::new(inner.origin.x, inner.origin.y + inner.size.height),
+                LayoutSize::new(inner.size.width, right_bot.y),
+            ));
+        }
+        if right_bot.x > 0.0 {
+            opaque.push(LayerRect::new(
+                LayoutPoint::new(inner.origin.x + inner.size.width, inner.origin.y),
+                LayoutSize::new(right_bot.x, inner.size.height),
+            ));
+        }
+        if left_top.y > 0.0 {
+            opaque.push(LayerRect::new(
+                LayoutPoint::new(inner.origin.x, self.rect.origin.y),
+                LayoutSize::new(inner.size.width, left_top.y),
+            ));
+        }
+
+        // fill in the transparent parts
+        transparent.extend(rect(self.rect.origin, inner.origin));
+        transparent.extend(rect(self.rect.bottom_left(), inner.bottom_left()));
+        transparent.extend(rect(self.rect.bottom_right(), inner.bottom_right()));
+        transparent.extend(rect(self.rect.top_right(), inner.top_right()));
+    }
 }
 
 #[inline]
 fn extract_inner_rect_impl<U>(
     rect: &TypedRect<f32, U>,
     radii: &BorderRadius,
     k: f32,
 ) -> Option<TypedRect<f32, U>> {
--- a/gfx/webrender_api/src/display_item.rs
+++ b/gfx/webrender_api/src/display_item.rs
@@ -34,16 +34,32 @@ impl ClipAndScrollInfo {
         }
     }
 
     pub fn clip_node_id(&self) -> ClipId {
         self.clip_node_id.unwrap_or(self.scroll_node_id)
     }
 }
 
+bitflags! {
+    /// Each bit of the edge AA mask is:
+    /// 0, when the edge of the primitive needs to be considered for AA
+    /// 1, when the edge of the segment needs to be considered for AA
+    ///
+    /// *Note*: the bit values have to match the shader logic in
+    /// `write_transform_vertex()` function.
+    #[derive(Deserialize, Serialize)]
+    pub struct EdgeAaSegmentMask: u8 {
+        const LEFT = 0x1;
+        const TOP = 0x2;
+        const RIGHT = 0x4;
+        const BOTTOM = 0x8;
+    }
+}
+
 /// A tag that can be used to identify items during hit testing. If the tag
 /// is missing then the item doesn't take part in hit testing at all. This
 /// is composed of two numbers. In Servo, the first is an identifier while the
 /// second is used to select the cursor that should be used during mouse
 /// movement.
 pub type ItemTag = (u64, u8);
 
 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
@@ -52,35 +68,38 @@ pub struct DisplayItem {
     pub clip_and_scroll: ClipAndScrollInfo,
     pub info: LayoutPrimitiveInfo,
 }
 
 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
 pub struct PrimitiveInfo<T> {
     pub rect: TypedRect<f32, T>,
     pub local_clip: LocalClip,
+    pub edge_aa_segment_mask: EdgeAaSegmentMask,
     pub is_backface_visible: bool,
     pub tag: Option<ItemTag>,
 }
 
 impl LayerPrimitiveInfo {
     pub fn new(rect: TypedRect<f32, LayerPixel>) -> Self {
         Self::with_clip_rect(rect, rect)
     }
 
-    pub fn with_clip_rect(rect: TypedRect<f32, LayerPixel>,
-                          clip_rect: TypedRect<f32, LayerPixel>)
-                          -> Self {
+    pub fn with_clip_rect(
+        rect: TypedRect<f32, LayerPixel>,
+        clip_rect: TypedRect<f32, LayerPixel>,
+    ) -> Self {
         Self::with_clip(rect, LocalClip::from(clip_rect))
     }
 
     pub fn with_clip(rect: TypedRect<f32, LayerPixel>, clip: LocalClip) -> Self {
         PrimitiveInfo {
             rect: rect,
             local_clip: clip,
+            edge_aa_segment_mask: EdgeAaSegmentMask::empty(),
             is_backface_visible: true,
             tag: None,
         }
     }
 }
 
 pub type LayoutPrimitiveInfo = PrimitiveInfo<LayoutPixel>;
 pub type LayerPrimitiveInfo = PrimitiveInfo<LayerPixel>;
@@ -678,17 +697,17 @@ impl BorderRadius {
 }
 
 impl ComplexClipRegion {
     /// Create a new complex clip region.
     pub fn new(
         rect: LayoutRect,
         radii: BorderRadius,
         mode: ClipMode,
-    ) -> ComplexClipRegion {
+    ) -> Self {
         ComplexClipRegion { rect, radii, mode }
     }
 }
 
 #[derive(Clone, Copy, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]
 pub enum ClipId {
     Clip(u64, PipelineId),
     ClipExternalId(u64, PipelineId),
--- a/gfx/webrender_api/src/display_list.rs
+++ b/gfx/webrender_api/src/display_list.rs
@@ -310,16 +310,17 @@ impl<'a, 'b> DisplayItemRef<'a, 'b> {
         self.iter.cur_item.info.rect
     }
 
     pub fn get_layer_primitive_info(&self, offset: &LayoutVector2D) -> LayerPrimitiveInfo {
         let info = self.iter.cur_item.info;
         LayerPrimitiveInfo {
             rect: info.rect.translate(&offset),
             local_clip: info.local_clip.create_with_offset(offset),
+            edge_aa_segment_mask: info.edge_aa_segment_mask,
             is_backface_visible: info.is_backface_visible,
             tag: info.tag,
         }
     }
 
     pub fn local_clip(&self) -> &LocalClip {
         &self.iter.cur_item.info.local_clip
     }
@@ -664,17 +665,17 @@ impl DisplayListBuilder {
             content_size,
             save_state: None,
         }
     }
 
     /// Saves the current display list state, so it may be `restore()`'d.
     ///
     /// # Conditions:
-    /// 
+    ///
     /// * Doesn't support popping clips that were pushed before the save.
     /// * Doesn't support nested saves.
     /// * Must call `clear_save()` if the restore becomes unnecessary.
     pub fn save(&mut self) {
         assert!(self.save_state.is_none(), "DisplayListBuilder doesn't support nested saves");
 
         self.save_state = Some(SaveState {
             clip_stack_len: self.clip_stack.len(),
@@ -1192,23 +1193,17 @@ impl DisplayListBuilder {
         I::IntoIter: ExactSizeIterator,
     {
         let id = self.generate_clip_id(id);
         let item = SpecificDisplayItem::ScrollFrame(ScrollFrameDisplayItem {
             id: id,
             image_mask: image_mask,
             scroll_sensitivity,
         });
-
-        let info = LayoutPrimitiveInfo {
-            rect: content_rect,
-            local_clip: LocalClip::from(clip_rect),
-            is_backface_visible: true,
-            tag: None,
-        };
+        let info = LayoutPrimitiveInfo::with_clip_rect(content_rect, clip_rect);
 
         let scrollinfo = ClipAndScrollInfo::simple(parent);
         self.push_item_with_clip_scroll_info(item, &info, scrollinfo);
         self.push_iter(complex_clips);
         id
     }
 
     pub fn define_clip<I>(