Bug 1426116 - Update webrender to commit a422f907be948b92bf5c7003a01f7744391a795e. r=jrmuizel
authorKartikaya Gupta <kgupta@mozilla.com>
Mon, 08 Jan 2018 09:19:23 -0500
changeset 398307 da9b75fc0866092a81e9c222b33a08f8790e7c6a
parent 398306 c174ef283f5561aa3cf24b1d014b5f62dcd5ccd4
child 398308 7855af69185d01ce76917801ace0b104422447a3
push id98725
push userebalazs@mozilla.com
push dateTue, 09 Jan 2018 10:16:39 +0000
treeherdermozilla-inbound@34ae1d0d625e [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjrmuizel
bugs1426116
milestone59.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1426116 - Update webrender to commit a422f907be948b92bf5c7003a01f7744391a795e. r=jrmuizel MozReview-Commit-ID: 1PTMg1c0KiC
gfx/doc/README.webrender
gfx/webrender/Cargo.toml
gfx/webrender/examples/basic.rs
gfx/webrender/examples/common/boilerplate.rs
gfx/webrender/res/brush.glsl
gfx/webrender/res/brush_image.glsl
gfx/webrender/res/brush_line.glsl
gfx/webrender/res/brush_mask_corner.glsl
gfx/webrender/res/brush_mask_rounded_rect.glsl
gfx/webrender/res/brush_solid.glsl
gfx/webrender/res/clip_shared.glsl
gfx/webrender/res/cs_clip_border.glsl
gfx/webrender/res/cs_clip_image.glsl
gfx/webrender/res/cs_clip_rectangle.glsl
gfx/webrender/res/prim_shared.glsl
gfx/webrender/res/ps_angle_gradient.glsl
gfx/webrender/res/ps_border_corner.glsl
gfx/webrender/res/ps_border_edge.glsl
gfx/webrender/res/ps_gradient.glsl
gfx/webrender/res/ps_image.glsl
gfx/webrender/res/ps_line.glsl
gfx/webrender/res/ps_radial_gradient.glsl
gfx/webrender/res/ps_text_run.glsl
gfx/webrender/res/ps_yuv_image.glsl
gfx/webrender/res/shared.glsl
gfx/webrender/src/batch.rs
gfx/webrender/src/border.rs
gfx/webrender/src/box_shadow.rs
gfx/webrender/src/clip.rs
gfx/webrender/src/clip_scroll_node.rs
gfx/webrender/src/clip_scroll_tree.rs
gfx/webrender/src/device.rs
gfx/webrender/src/frame.rs
gfx/webrender/src/frame_builder.rs
gfx/webrender/src/glyph_rasterizer.rs
gfx/webrender/src/gpu_types.rs
gfx/webrender/src/internal_types.rs
gfx/webrender/src/lib.rs
gfx/webrender/src/picture.rs
gfx/webrender/src/platform/macos/font.rs
gfx/webrender/src/platform/unix/font.rs
gfx/webrender/src/platform/windows/font.rs
gfx/webrender/src/prim_store.rs
gfx/webrender/src/profiler.rs
gfx/webrender/src/render_backend.rs
gfx/webrender/src/render_task.rs
gfx/webrender/src/renderer.rs
gfx/webrender/src/resource_cache.rs
gfx/webrender/src/scene.rs
gfx/webrender/src/segment.rs
gfx/webrender/src/texture_cache.rs
gfx/webrender/src/tiling.rs
gfx/webrender/src/util.rs
gfx/webrender/tests/angle_shader_validation.rs
gfx/webrender_api/Cargo.toml
gfx/webrender_api/src/api.rs
gfx/webrender_api/src/display_item.rs
gfx/webrender_api/src/display_list.rs
gfx/webrender_api/src/image.rs
gfx/webrender_api/src/lib.rs
gfx/webrender_api/src/units.rs
--- a/gfx/doc/README.webrender
+++ b/gfx/doc/README.webrender
@@ -170,9 +170,9 @@ 2. Sometimes autoland tip has changed en
    has an env var you can set to do this). In theory you can get the same
    result by resolving the conflict manually but Cargo.lock files are usually not
    trivial to merge by hand. If it's just the third_party/rust dir that has conflicts
    you can delete it and run |mach vendor rust| again to repopulate it.
 
 -------------------------------------------------------------------------------
 
 The version of WebRender currently in the tree is:
-1142dfc557c319119a5117450718c5b67a93cb9f
+a422f907be948b92bf5c7003a01f7744391a795e
--- a/gfx/webrender/Cargo.toml
+++ b/gfx/webrender/Cargo.toml
@@ -5,17 +5,18 @@ authors = ["Glenn Watson <gw@intuitionli
 license = "MPL-2.0"
 repository = "https://github.com/servo/webrender"
 build = "build.rs"
 
 [features]
 default = ["freetype-lib"]
 freetype-lib = ["freetype/servo-freetype-sys"]
 profiler = ["thread_profiler/thread_profiler"]
-debugger = ["ws", "serde_json", "serde", "serde_derive", "image", "base64"]
+debugger = ["ws", "serde_json", "serde", "image", "base64"]
+capture = ["webrender_api/debug-serialization", "ron", "serde"]
 
 [dependencies]
 app_units = "0.6"
 bincode = "0.9"
 byteorder = "1.0"
 euclid = "0.16"
 fxhash = "0.2.1"
 gleam = "0.4.15"
@@ -26,20 +27,20 @@ time = "0.1"
 rayon = "0.8"
 webrender_api = {path = "../webrender_api"}
 bitflags = "1.0"
 thread_profiler = "0.1.1"
 plane-split = "0.7"
 smallvec = "0.6"
 ws = { optional = true, version = "0.7.3" }
 serde_json = { optional = true, version = "1.0" }
-serde = { optional = true, version = "1.0" }
-serde_derive = { optional = true, version = "1.0" }
+serde = { optional = true, version = "1.0", features = ["serde_derive"] }
 image = { optional = true, version = "0.17" }
 base64 = { optional = true, version = "0.3.0" }
+ron = { optional = true, version = "0.1.3" }
 
 [dev-dependencies]
 angle = {git = "https://github.com/servo/angle", branch = "servo"}
 env_logger = "0.4"
 rand = "0.3"                # for the benchmarks
 servo-glutin = "0.13"     # for the example apps
 
 [target.'cfg(any(target_os = "android", all(unix, not(target_os = "macos"))))'.dependencies]
--- a/gfx/webrender/examples/basic.rs
+++ b/gfx/webrender/examples/basic.rs
@@ -251,17 +251,17 @@ impl Example for App {
             left: border_side,
             radius: BorderRadius::uniform(20.0),
         });
 
         let info = LayoutPrimitiveInfo::new((100, 100).to(200, 200));
         builder.push_border(&info, border_widths, border_details);
         builder.pop_clip_id();
 
-        if false {
+        if true {
             // draw text?
             let font_key = api.generate_font_key();
             let font_bytes = load_file("../wrench/reftests/text/FreeSans.ttf");
             resources.add_raw_font(font_key, font_bytes, 0);
 
             let font_instance_key = api.generate_font_instance_key();
             resources.add_font_instance(font_instance_key, font_key, Au::from_px(32), None, None, Vec::new());
 
--- a/gfx/webrender/examples/common/boilerplate.rs
+++ b/gfx/webrender/examples/common/boilerplate.rs
@@ -258,16 +258,29 @@ pub fn main_wrapper<E: Example>(
                 }
                 glutin::Event::KeyboardInput(
                     glutin::ElementState::Pressed,
                     _,
                     Some(glutin::VirtualKeyCode::M),
                 ) => {
                     api.notify_memory_pressure();
                 }
+                #[cfg(feature = "capture")]
+                glutin::Event::KeyboardInput(
+                    glutin::ElementState::Pressed,
+                    _,
+                    Some(glutin::VirtualKeyCode::C),
+                ) => {
+                    let path: PathBuf = "captures/example".into();
+                    if path.is_dir() {
+                        api.load_capture(path);
+                    } else {
+                        api.save_capture(path);
+                    }
+                }
                 _ => if example.on_event(event, &api, document_id) {
                     let mut builder = DisplayListBuilder::new(pipeline_id, layout_size);
                     let mut resources = ResourceUpdates::new();
 
                     example.render(
                         &api,
                         &mut builder,
                         &mut resources,
@@ -280,17 +293,17 @@ pub fn main_wrapper<E: Example>(
                         epoch,
                         None,
                         layout_size,
                         builder.finalize(),
                         true,
                         resources,
                     );
                     api.generate_frame(document_id, None);
-                },
+                }
             }
         }
 
         renderer.update();
         renderer.render(framebuffer_size).unwrap();
         example.draw_custom(&*gl);
         window.swap_buffers().ok();
     }
--- a/gfx/webrender/res/brush.glsl
+++ b/gfx/webrender/res/brush.glsl
@@ -3,204 +3,141 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #ifdef WR_VERTEX_SHADER
 
 void brush_vs(
     int prim_address,
     vec2 local_pos,
     RectWithSize local_rect,
-    ivec2 user_data
+    ivec2 user_data,
+    PictureTask pic_task
 );
 
-#define RASTERIZATION_MODE_LOCAL_SPACE      0.0
-#define RASTERIZATION_MODE_SCREEN_SPACE     1.0
-
-#define SEGMENT_ALL             0
-#define SEGMENT_TOP_LEFT        1
-#define SEGMENT_TOP_RIGHT       2
-#define SEGMENT_BOTTOM_RIGHT    3
-#define SEGMENT_BOTTOM_LEFT     4
-#define SEGMENT_TOP_MID         5
-#define SEGMENT_MID_RIGHT       6
-#define SEGMENT_BOTTOM_MID      7
-#define SEGMENT_MID_LEFT        8
-#define SEGMENT_CENTER          9
-
-#define AA_KIND_DEFAULT         0
-#define AA_KIND_SEGMENT         1
-
-#define VECS_PER_BRUSH_PRIM                 4
+#define VECS_PER_BRUSH_PRIM                 2
+#define VECS_PER_SEGMENT                    2
 
 struct BrushInstance {
     int picture_address;
     int prim_address;
-    int clip_node_id;
+    int clip_chain_rect_index;
     int scroll_node_id;
     int clip_address;
     int z;
-    int segment_kind;
+    int segment_index;
     ivec2 user_data;
 };
 
 BrushInstance load_brush() {
-	BrushInstance bi;
+    BrushInstance bi;
 
     bi.picture_address = aData0.x;
     bi.prim_address = aData0.y;
-    bi.clip_node_id = aData0.z / 65536;
+    bi.clip_chain_rect_index = aData0.z / 65536;
     bi.scroll_node_id = aData0.z % 65536;
     bi.clip_address = aData0.w;
     bi.z = aData1.x;
-    bi.segment_kind = aData1.y;
+    bi.segment_index = aData1.y;
     bi.user_data = aData1.zw;
 
     return bi;
 }
 
 struct BrushPrimitive {
     RectWithSize local_rect;
     RectWithSize local_clip_rect;
-    vec4 offsets;
-    int aa_kind;
 };
 
-BrushPrimitive fetch_brush_primitive(int address) {
-    vec4 data[4] = fetch_from_resource_cache_4(address);
+BrushPrimitive fetch_brush_primitive(int address, int clip_chain_rect_index) {
+    vec4 data[2] = fetch_from_resource_cache_2(address);
 
-    BrushPrimitive prim = BrushPrimitive(
-        RectWithSize(data[0].xy, data[0].zw),
-        RectWithSize(data[1].xy, data[1].zw),
-        data[2],
-        int(data[3].x)
-    );
+    RectWithSize clip_chain_rect = fetch_clip_chain_rect(clip_chain_rect_index);
+    RectWithSize brush_clip_rect = RectWithSize(data[1].xy, data[1].zw);
+    RectWithSize clip_rect = intersect_rects(clip_chain_rect, brush_clip_rect);
+
+    BrushPrimitive prim = BrushPrimitive(RectWithSize(data[0].xy, data[0].zw), clip_rect);
 
     return prim;
 }
 
 void main(void) {
     // Load the brush instance from vertex attributes.
     BrushInstance brush = load_brush();
 
     // Load the geometry for this brush. For now, this is simply the
     // local rect of the primitive. In the future, this will support
     // loading segment rects, and other rect formats (glyphs).
-    BrushPrimitive brush_prim = fetch_brush_primitive(brush.prim_address);
+    BrushPrimitive brush_prim =
+        fetch_brush_primitive(brush.prim_address, brush.clip_chain_rect_index);
 
     // Fetch the segment of this brush primitive we are drawing.
-    RectWithSize local_segment_rect;
-    vec4 edge_aa_segment_mask;
-
-    // p0 = origin of outer rect
-    // p1 = origin of inner rect
-    // p2 = bottom right corner of inner rect
-    // p3 = bottom right corner of outer rect
-    vec2 p0 = brush_prim.local_rect.p0;
-    vec2 p1 = brush_prim.local_rect.p0 + brush_prim.offsets.xy;
-    vec2 p2 = brush_prim.local_rect.p0 + brush_prim.local_rect.size - brush_prim.offsets.zw;
-    vec2 p3 = brush_prim.local_rect.p0 + brush_prim.local_rect.size;
-
-    switch (brush.segment_kind) {
-        case SEGMENT_ALL:
-            local_segment_rect = brush_prim.local_rect;
-            break;
-
-        case SEGMENT_TOP_LEFT:
-            local_segment_rect = RectWithSize(p0, p1 - p0);
-            break;
-        case SEGMENT_TOP_RIGHT:
-            local_segment_rect = RectWithSize(vec2(p2.x, p0.y), vec2(p3.x - p2.x, p1.y - p0.y));
-            break;
-        case SEGMENT_BOTTOM_RIGHT:
-            local_segment_rect = RectWithSize(vec2(p2.x, p2.y), vec2(p3.x - p2.x, p3.y - p2.y));
-            break;
-        case SEGMENT_BOTTOM_LEFT:
-            local_segment_rect = RectWithSize(vec2(p0.x, p2.y), vec2(p1.x - p0.x, p3.y - p2.y));
-            break;
+    int segment_address = brush.prim_address +
+                          VECS_PER_BRUSH_PRIM +
+                          VECS_PER_SPECIFIC_BRUSH +
+                          brush.segment_index * VECS_PER_SEGMENT;
 
-        case SEGMENT_TOP_MID:
-            local_segment_rect = RectWithSize(vec2(p1.x, p0.y), vec2(p2.x - p1.x, p1.y - p0.y));
-            break;
-        case SEGMENT_MID_RIGHT:
-            local_segment_rect = RectWithSize(vec2(p2.x, p1.y), vec2(p3.x - p2.x, p2.y - p1.y));
-            break;
-        case SEGMENT_BOTTOM_MID:
-            local_segment_rect = RectWithSize(vec2(p1.x, p2.y), vec2(p2.x - p1.x, p3.y - p2.y));
-            break;
-        case SEGMENT_MID_LEFT:
-            local_segment_rect = RectWithSize(vec2(p0.x, p1.y), vec2(p1.x - p0.x, p2.y - p1.y));
-            break;
-
-        case SEGMENT_CENTER:
-            local_segment_rect = RectWithSize(p1, p2 - p1);
-            break;
-
-        default:
-            local_segment_rect = RectWithSize(vec2(0.0), vec2(0.0));
-            break;
-    }
-
-    switch (brush_prim.aa_kind) {
-        case AA_KIND_SEGMENT:
-            // TODO: select these correctly based on the segment kind.
-            edge_aa_segment_mask = vec4(1.0);
-            break;
-        case AA_KIND_DEFAULT:
-            edge_aa_segment_mask = vec4(1.0);
-            break;
-    }
+    vec4[2] segment_data = fetch_from_resource_cache_2(segment_address);
+    RectWithSize local_segment_rect = RectWithSize(segment_data[0].xy, segment_data[0].zw);
 
     vec2 device_pos, local_pos;
 
     // Fetch the dynamic picture that we are drawing on.
     PictureTask pic_task = fetch_picture_task(brush.picture_address);
+    ClipArea clip_area = fetch_clip_area(brush.clip_address);
 
-    if (pic_task.rasterization_mode == RASTERIZATION_MODE_LOCAL_SPACE) {
+    if (pic_task.pic_kind_and_raster_mode > 0.0) {
         local_pos = local_segment_rect.p0 + aPosition.xy * local_segment_rect.size;
 
         // Right now - pictures only support local positions. In the future, this
         // will be expanded to support transform picture types (the common kind).
         device_pos = pic_task.common_data.task_rect.p0 +
                      uDevicePixelRatio * (local_pos - pic_task.content_origin);
 
+#ifdef WR_FEATURE_ALPHA_PASS
+        write_clip(
+            vec2(0.0),
+            clip_area
+        );
+#endif
+
         // Write the final position transformed by the orthographic device-pixel projection.
         gl_Position = uTransform * vec4(device_pos, 0.0, 1.0);
     } else {
         VertexInfo vi;
-        Layer layer = fetch_layer(brush.clip_node_id, brush.scroll_node_id);
-        ClipArea clip_area = fetch_clip_area(brush.clip_address);
+        ClipScrollNode scroll_node = fetch_clip_scroll_node(brush.scroll_node_id);
 
         // Write the normal vertex information out.
-        if (layer.is_axis_aligned) {
+        if (scroll_node.is_axis_aligned) {
             vi = write_vertex(
                 local_segment_rect,
                 brush_prim.local_clip_rect,
                 float(brush.z),
-                layer,
+                scroll_node,
                 pic_task,
                 brush_prim.local_rect
             );
 
             // TODO(gw): vLocalBounds may be referenced by
             //           the fragment shader when running in
             //           the alpha pass, even on non-transformed
             //           items. For now, just ensure it has no
             //           effect. We can tidy this up as we move
             //           more items to be brush shaders.
 #ifdef WR_FEATURE_ALPHA_PASS
             vLocalBounds = vec4(vec2(-1000000.0), vec2(1000000.0));
 #endif
         } else {
+            bvec4 edge_mask = notEqual(int(segment_data[1].x) & ivec4(1, 2, 4, 8), ivec4(0));
             vi = write_transform_vertex(
                 local_segment_rect,
                 brush_prim.local_rect,
                 brush_prim.local_clip_rect,
-                edge_aa_segment_mask,
+                mix(vec4(0.0), vec4(1.0), edge_mask),
                 float(brush.z),
-                layer,
+                scroll_node,
                 pic_task
             );
         }
 
         local_pos = vi.local_pos;
 
         // For brush instances in the alpha pass, always write
         // out clip information.
@@ -216,17 +153,18 @@ void main(void) {
 #endif
     }
 
     // Run the specific brush VS code to write interpolators.
     brush_vs(
         brush.prim_address + VECS_PER_BRUSH_PRIM,
         local_pos,
         brush_prim.local_rect,
-        brush.user_data
+        brush.user_data,
+        pic_task
     );
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 
 vec4 brush_fs();
 
--- a/gfx/webrender/res/brush_image.glsl
+++ b/gfx/webrender/res/brush_image.glsl
@@ -1,12 +1,14 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+#define VECS_PER_SPECIFIC_BRUSH 0
+
 #include shared,prim_shared,brush
 
 #ifdef WR_FEATURE_ALPHA_PASS
 varying vec2 vLocalPos;
 #endif
 
 varying vec3 vUv;
 flat varying int vImageKind;
@@ -22,17 +24,18 @@ flat varying vec4 vColor;
 #define BRUSH_IMAGE_NINEPATCH   1
 #define BRUSH_IMAGE_MIRROR      2
 
 #ifdef WR_VERTEX_SHADER
 void brush_vs(
     int prim_address,
     vec2 local_pos,
     RectWithSize local_rect,
-    ivec2 user_data
+    ivec2 user_data,
+    PictureTask pic_task
 ) {
     // TODO(gw): For now, this brush_image shader is only
     //           being used to draw items from the intermediate
     //           surface cache (render tasks). In the future
     //           we can expand this to support items from
     //           the normal texture cache and unify this
     //           with the normal image shader.
     BlurTask blur_task = fetch_blur_task(user_data.x);
new file mode 100644
--- /dev/null
+++ b/gfx/webrender/res/brush_line.glsl
@@ -0,0 +1,229 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#define VECS_PER_SPECIFIC_BRUSH 2
+
+#include shared,prim_shared,brush
+
+varying vec2 vLocalPos;
+
+flat varying vec4 vColor;
+flat varying int vStyle;
+flat varying float vAxisSelect;
+flat varying vec4 vParams;
+flat varying vec2 vLocalOrigin;
+
+#ifdef WR_VERTEX_SHADER
+
+#define LINE_ORIENTATION_VERTICAL       0
+#define LINE_ORIENTATION_HORIZONTAL     1
+
+struct Line {
+    vec4 color;
+    float wavyLineThickness;
+    float style;
+    float orientation;
+};
+
+Line fetch_line(int address) {
+    vec4 data[2] = fetch_from_resource_cache_2(address);
+    return Line(data[0], data[1].x, data[1].y, data[1].z);
+}
+
+void brush_vs(
+    int prim_address,
+    vec2 local_pos,
+    RectWithSize local_rect,
+    ivec2 user_data,
+    PictureTask pic_task
+) {
+    vLocalPos = local_pos;
+
+    Line line = fetch_line(prim_address);
+
+    switch (int(abs(pic_task.pic_kind_and_raster_mode))) {
+        case PIC_TYPE_TEXT_SHADOW:
+            vColor = pic_task.color;
+            break;
+        default:
+            vColor = line.color;
+            break;
+    }
+
+    vec2 pos, size;
+
+    switch (int(line.orientation)) {
+        case LINE_ORIENTATION_HORIZONTAL:
+            vAxisSelect = 0.0;
+            pos = local_rect.p0;
+            size = local_rect.size;
+            break;
+        case LINE_ORIENTATION_VERTICAL:
+            vAxisSelect = 1.0;
+            pos = local_rect.p0.yx;
+            size = local_rect.size.yx;
+            break;
+    }
+
+    vLocalOrigin = pos;
+    vStyle = int(line.style);
+
+    switch (vStyle) {
+        case LINE_STYLE_SOLID: {
+            break;
+        }
+        case LINE_STYLE_DASHED: {
+            float dash_length = size.y * 3.0;
+            vParams = vec4(2.0 * dash_length, // period
+                           dash_length,       // dash length
+                           0.0,
+                           0.0);
+            break;
+        }
+        case LINE_STYLE_DOTTED: {
+            float diameter = size.y;
+            float period = diameter * 2.0;
+            float center_line = pos.y + 0.5 * size.y;
+            float max_x = floor(size.x / period) * period;
+            vParams = vec4(period,
+                           diameter / 2.0, // radius
+                           center_line,
+                           max_x);
+            break;
+        }
+        case LINE_STYLE_WAVY: {
+            // This logic copied from gecko to get the same results
+            float line_thickness = max(line.wavyLineThickness, 1.0);
+            // Difference in height between peaks and troughs
+            // (and since slopes are 45 degrees, the length of each slope)
+            float slope_length = size.y - line_thickness;
+            // Length of flat runs
+            float flat_length = max((line_thickness - 1.0) * 2.0, 1.0);
+
+            vParams = vec4(line_thickness / 2.0,
+                           slope_length,
+                           flat_length,
+                           size.y);
+            break;
+        }
+    }
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+
+#define MAGIC_WAVY_LINE_AA_SNAP         0.7
+
+float det(vec2 a, vec2 b) {
+    return a.x * b.y - b.x * a.y;
+}
+
+// From: http://research.microsoft.com/en-us/um/people/hoppe/ravg.pdf
+vec2 get_distance_vector(vec2 b0, vec2 b1, vec2 b2) {
+    float a = det(b0, b2);
+    float b = 2.0 * det(b1, b0);
+    float d = 2.0 * det(b2, b1);
+
+    float f = b * d - a * a;
+    vec2 d21 = b2 - b1;
+    vec2 d10 = b1 - b0;
+    vec2 d20 = b2 - b0;
+
+    vec2 gf = 2.0 * (b *d21 + d * d10 + a * d20);
+    gf = vec2(gf.y,-gf.x);
+    vec2 pp = -f * gf / dot(gf, gf);
+    vec2 d0p = b0 - pp;
+    float ap = det(d0p, d20);
+    float bp = 2.0 * det(d10, d0p);
+
+    float t = clamp((ap + bp) / (2.0 * a + b + d), 0.0, 1.0);
+    return mix(mix(b0, b1, t), mix(b1,b2,t), t);
+}
+
+// Approximate distance from point to quadratic bezier.
+float approx_distance(vec2 p, vec2 b0, vec2 b1, vec2 b2) {
+    return length(get_distance_vector(b0 - p, b1 - p, b2 - p));
+}
+
+vec4 brush_fs() {
+    // Find the appropriate distance to apply the step over.
+    vec2 local_pos = vLocalPos;
+    float aa_range = compute_aa_range(local_pos);
+    float alpha = 1.0;
+
+    // Select the x/y coord, depending on which axis this edge is.
+    vec2 pos = mix(local_pos.xy, local_pos.yx, vAxisSelect);
+
+    switch (vStyle) {
+        case LINE_STYLE_SOLID: {
+            break;
+        }
+        case LINE_STYLE_DASHED: {
+            // Get the main-axis position relative to closest dot or dash.
+            float x = mod(pos.x - vLocalOrigin.x, vParams.x);
+
+            // Calculate dash alpha (on/off) based on dash length
+            alpha = step(x, vParams.y);
+            break;
+        }
+        case LINE_STYLE_DOTTED: {
+            // Get the main-axis position relative to closest dot or dash.
+            float x = mod(pos.x - vLocalOrigin.x, vParams.x);
+
+            // Get the dot alpha
+            vec2 dot_relative_pos = vec2(x, pos.y) - vParams.yz;
+            float dot_distance = length(dot_relative_pos) - vParams.y;
+            alpha = distance_aa(aa_range, dot_distance);
+            // Clip off partial dots
+            alpha *= step(pos.x - vLocalOrigin.x, vParams.w);
+            break;
+        }
+        case LINE_STYLE_WAVY: {
+            vec2 normalized_local_pos = pos - vLocalOrigin.xy;
+
+            float half_line_thickness = vParams.x;
+            float slope_length = vParams.y;
+            float flat_length = vParams.z;
+            float vertical_bounds = vParams.w;
+            // Our pattern is just two slopes and two flats
+            float half_period = slope_length + flat_length;
+
+            float mid_height = vertical_bounds / 2.0;
+            float peak_offset = mid_height - half_line_thickness;
+            // Flip the wave every half period
+            float flip = -2.0 * (step(mod(normalized_local_pos.x, 2.0 * half_period), half_period) - 0.5);
+            // float flip = -1.0;
+            peak_offset *= flip;
+            float peak_height = mid_height + peak_offset;
+
+            // Convert pos to a local position within one half period
+            normalized_local_pos.x = mod(normalized_local_pos.x, half_period);
+
+            // Compute signed distance to the 3 lines that make up an arc
+            float dist1 = distance_to_line(vec2(0.0, peak_height),
+                                           vec2(1.0, -flip),
+                                           normalized_local_pos);
+            float dist2 = distance_to_line(vec2(0.0, peak_height),
+                                           vec2(0, -flip),
+                                           normalized_local_pos);
+            float dist3 = distance_to_line(vec2(flat_length, peak_height),
+                                           vec2(-1.0, -flip),
+                                           normalized_local_pos);
+            float dist = abs(max(max(dist1, dist2), dist3));
+
+            // Apply AA based on the thickness of the wave
+            alpha = distance_aa(aa_range, dist - half_line_thickness);
+
+            // Disable AA for thin lines
+            if (half_line_thickness <= 1.0) {
+                alpha = 1.0 - step(alpha, MAGIC_WAVY_LINE_AA_SNAP);
+            }
+
+            break;
+        }
+    }
+
+    return vColor * alpha;
+}
+#endif
--- a/gfx/webrender/res/brush_mask_corner.glsl
+++ b/gfx/webrender/res/brush_mask_corner.glsl
@@ -1,12 +1,14 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+#define VECS_PER_SPECIFIC_BRUSH 1
+
 #include shared,prim_shared,ellipse,brush
 
 flat varying float vClipMode;
 flat varying vec4 vClipCenter_Radius;
 flat varying vec4 vLocalRect;
 varying vec2 vLocalPos;
 
 #ifdef WR_VERTEX_SHADER
@@ -20,17 +22,18 @@ BrushMaskCornerPrimitive fetch_primitive
     vec4 data = fetch_from_resource_cache_1(address);
     return BrushMaskCornerPrimitive(data.xy, data.z);
 }
 
 void brush_vs(
     int prim_address,
     vec2 local_pos,
     RectWithSize local_rect,
-    ivec2 user_data
+    ivec2 user_data,
+    PictureTask pic_task
 ) {
     // Load the specific primitive.
     BrushMaskCornerPrimitive prim = fetch_primitive(prim_address);
 
     // Write clip parameters
     vClipMode = prim.clip_mode;
     vClipCenter_Radius = vec4(local_rect.p0 + prim.radius, prim.radius);
 
--- a/gfx/webrender/res/brush_mask_rounded_rect.glsl
+++ b/gfx/webrender/res/brush_mask_rounded_rect.glsl
@@ -1,12 +1,14 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+#define VECS_PER_SPECIFIC_BRUSH 4
+
 #include shared,prim_shared,ellipse,brush
 
 flat varying float vClipMode;
 flat varying vec4 vClipCenter_Radius_TL;
 flat varying vec4 vClipCenter_Radius_TR;
 flat varying vec4 vClipCenter_Radius_BR;
 flat varying vec4 vClipCenter_Radius_BL;
 flat varying vec4 vLocalRect;
@@ -34,17 +36,18 @@ RoundedRectPrimitive fetch_rounded_rect_
         data[3].zw
     );
 }
 
 void brush_vs(
     int prim_address,
     vec2 local_pos,
     RectWithSize local_rect,
-    ivec2 user_data
+    ivec2 user_data,
+    PictureTask pic_task
 ) {
     // Load the specific primitive.
     RoundedRectPrimitive prim = fetch_rounded_rect_primitive(prim_address);
 
     // Write clip parameters
     vClipMode = prim.clip_mode;
 
     // TODO(gw): In the future, when brush primitives may be segment rects
--- a/gfx/webrender/res/brush_solid.glsl
+++ b/gfx/webrender/res/brush_solid.glsl
@@ -1,12 +1,14 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+#define VECS_PER_SPECIFIC_BRUSH 1
+
 #include shared,prim_shared,brush
 
 flat varying vec4 vColor;
 
 #ifdef WR_FEATURE_ALPHA_PASS
 varying vec2 vLocalPos;
 #endif
 
@@ -20,17 +22,18 @@ SolidBrush fetch_solid_primitive(int add
     vec4 data = fetch_from_resource_cache_1(address);
     return SolidBrush(data);
 }
 
 void brush_vs(
     int prim_address,
     vec2 local_pos,
     RectWithSize local_rect,
-    ivec2 user_data
+    ivec2 user_data,
+    PictureTask pic_task
 ) {
     SolidBrush prim = fetch_solid_primitive(prim_address);
     vColor = prim.color;
 
 #ifdef WR_FEATURE_ALPHA_PASS
     vLocalPos = local_pos;
 #endif
 }
--- a/gfx/webrender/res/clip_shared.glsl
+++ b/gfx/webrender/res/clip_shared.glsl
@@ -6,33 +6,33 @@
 
 #define SEGMENT_ALL         0
 #define SEGMENT_CORNER_TL   1
 #define SEGMENT_CORNER_TR   2
 #define SEGMENT_CORNER_BL   3
 #define SEGMENT_CORNER_BR   4
 
 in int aClipRenderTaskAddress;
-in int aClipLayerAddress;
+in int aScrollNodeId;
 in int aClipSegment;
 in ivec4 aClipDataResourceAddress;
 
 struct ClipMaskInstance {
     int render_task_address;
-    int layer_address;
+    int scroll_node_id;
     int segment;
     ivec2 clip_data_address;
     ivec2 resource_address;
 };
 
 ClipMaskInstance fetch_clip_item() {
     ClipMaskInstance cmi;
 
     cmi.render_task_address = aClipRenderTaskAddress;
-    cmi.layer_address = aClipLayerAddress;
+    cmi.scroll_node_id = aScrollNodeId;
     cmi.segment = aClipSegment;
     cmi.clip_data_address = aClipDataResourceAddress.xy;
     cmi.resource_address = aClipDataResourceAddress.zw;
 
     return cmi;
 }
 
 struct ClipVertexInfo {
@@ -44,28 +44,28 @@ struct ClipVertexInfo {
 RectWithSize intersect_rect(RectWithSize a, RectWithSize b) {
     vec4 p = clamp(vec4(a.p0, a.p0 + a.size), b.p0.xyxy, b.p0.xyxy + b.size.xyxy);
     return RectWithSize(p.xy, max(vec2(0.0), p.zw - p.xy));
 }
 
 // The transformed vertex function that always covers the whole clip area,
 // which is the intersection of all clip instances of a given primitive
 ClipVertexInfo write_clip_tile_vertex(RectWithSize local_clip_rect,
-                                      Layer layer,
+                                      ClipScrollNode scroll_node,
                                       ClipArea area) {
     vec2 actual_pos = area.screen_origin + aPosition.xy * area.common_data.task_rect.size;
 
-    vec4 layer_pos = get_layer_pos(actual_pos / uDevicePixelRatio, layer);
+    vec4 node_pos = get_node_pos(actual_pos / uDevicePixelRatio, scroll_node);
 
-    // compute the point position in side the layer, in CSS space
+    // compute the point position inside the scroll node, in CSS space
     vec2 vertex_pos = actual_pos +
                       area.common_data.task_rect.p0 -
                       area.screen_origin;
 
     gl_Position = uTransform * vec4(vertex_pos, 0.0, 1);
 
     vLocalBounds = vec4(local_clip_rect.p0, local_clip_rect.p0 + local_clip_rect.size);
 
-    ClipVertexInfo vi = ClipVertexInfo(layer_pos.xyw, actual_pos, local_clip_rect);
+    ClipVertexInfo vi = ClipVertexInfo(node_pos.xyw, actual_pos, local_clip_rect);
     return vi;
 }
 
 #endif //WR_VERTEX_SHADER
--- a/gfx/webrender/res/cs_clip_border.glsl
+++ b/gfx/webrender/res/cs_clip_border.glsl
@@ -59,17 +59,17 @@ struct BorderClipDot {
 BorderClipDot fetch_border_clip_dot(ivec2 address, int segment) {
     vec4 data = fetch_from_resource_cache_1_direct(address + ivec2(2 + (segment - 1), 0));
     return BorderClipDot(data.xyz);
 }
 
 void main(void) {
     ClipMaskInstance cmi = fetch_clip_item();
     ClipArea area = fetch_clip_area(cmi.render_task_address);
-    Layer layer = fetch_layer(cmi.layer_address, cmi.layer_address);
+    ClipScrollNode scroll_node = fetch_clip_scroll_node(cmi.scroll_node_id);
 
     // Fetch the header information for this corner clip.
     BorderCorner corner = fetch_border_corner(cmi.clip_data_address);
     vClipCenter = corner.clip_center;
 
     if (cmi.segment == 0) {
         // The first segment is used to zero out the border corner.
         vAlphaMask = vec2(0.0);
@@ -115,30 +115,30 @@ void main(void) {
     }
 
     // Get local vertex position for the corner rect.
     // TODO(gw): We could reduce the number of pixels written here
     // by calculating a tight fitting bounding box of the dash itself.
     vec2 pos = corner.rect.p0 + aPosition.xy * corner.rect.size;
 
     // Transform to world pos
-    vec4 world_pos = layer.transform * vec4(pos, 0.0, 1.0);
+    vec4 world_pos = scroll_node.transform * vec4(pos, 0.0, 1.0);
     world_pos.xyz /= world_pos.w;
 
     // Scale into device pixels.
     vec2 device_pos = world_pos.xy * uDevicePixelRatio;
 
     // Position vertex within the render task area.
     vec2 final_pos = device_pos -
                      area.screen_origin +
                      area.common_data.task_rect.p0;
 
     // Calculate the local space position for this vertex.
-    vec4 layer_pos = get_layer_pos(world_pos.xy, layer);
-    vPos = layer_pos.xyw;
+    vec4 node_pos = get_node_pos(world_pos.xy, scroll_node);
+    vPos = node_pos.xyw;
 
     gl_Position = uTransform * vec4(final_pos, 0.0, 1.0);
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 void main(void) {
     vec2 local_pos = vPos.xy / vPos.z;
--- a/gfx/webrender/res/cs_clip_image.glsl
+++ b/gfx/webrender/res/cs_clip_image.glsl
@@ -19,23 +19,23 @@ ImageMaskData fetch_mask_data(ivec2 addr
     RectWithSize local_rect = RectWithSize(data.xy, data.zw);
     ImageMaskData mask_data = ImageMaskData(local_rect);
     return mask_data;
 }
 
 void main(void) {
     ClipMaskInstance cmi = fetch_clip_item();
     ClipArea area = fetch_clip_area(cmi.render_task_address);
-    Layer layer = fetch_layer(cmi.layer_address, cmi.layer_address);
+    ClipScrollNode scroll_node = fetch_clip_scroll_node(cmi.scroll_node_id);
     ImageMaskData mask = fetch_mask_data(cmi.clip_data_address);
     RectWithSize local_rect = mask.local_rect;
     ImageResource res = fetch_image_resource_direct(cmi.resource_address);
 
     ClipVertexInfo vi = write_clip_tile_vertex(local_rect,
-                                               layer,
+                                               scroll_node,
                                                area);
 
     vPos = vi.local_pos;
     vLayer = res.layer;
 
     vClipMaskUv = vec3((vPos.xy / vPos.z - local_rect.p0) / local_rect.size, 0.0);
     vec2 texture_size = vec2(textureSize(sColor0, 0));
     vClipMaskUvRect = vec4(res.uv_rect.xy, res.uv_rect.zw - res.uv_rect.xy) / texture_size.xyxy;
--- a/gfx/webrender/res/cs_clip_rectangle.glsl
+++ b/gfx/webrender/res/cs_clip_rectangle.glsl
@@ -53,23 +53,21 @@ ClipData fetch_clip(ivec2 address) {
     clip.bottom_right = fetch_clip_corner(address, 3.0);
 
     return clip;
 }
 
 void main(void) {
     ClipMaskInstance cmi = fetch_clip_item();
     ClipArea area = fetch_clip_area(cmi.render_task_address);
-    Layer layer = fetch_layer(cmi.layer_address, cmi.layer_address);
+    ClipScrollNode scroll_node = fetch_clip_scroll_node(cmi.scroll_node_id);
     ClipData clip = fetch_clip(cmi.clip_data_address);
     RectWithSize local_rect = clip.rect.rect;
 
-    ClipVertexInfo vi = write_clip_tile_vertex(local_rect,
-                                               layer,
-                                               area);
+    ClipVertexInfo vi = write_clip_tile_vertex(local_rect, scroll_node, area);
     vPos = vi.local_pos;
 
     vClipMode = clip.rect.mode.x;
 
     RectWithEndpoint clip_rect = to_rect_with_endpoint(local_rect);
 
     vec2 r_tl = clip.top_left.outer_inner_radius.xy;
     vec2 r_tr = clip.top_right.outer_inner_radius.xy;
--- a/gfx/webrender/res/prim_shared.glsl
+++ b/gfx/webrender/res/prim_shared.glsl
@@ -64,24 +64,26 @@ vec4[2] fetch_from_resource_cache_2(int 
     return vec4[2](
         TEXEL_FETCH(sResourceCache, uv, 0, ivec2(0, 0)),
         TEXEL_FETCH(sResourceCache, uv, 0, ivec2(1, 0))
     );
 }
 
 #ifdef WR_VERTEX_SHADER
 
-#define VECS_PER_LAYER              7
+#define VECS_PER_CLIP_SCROLL_NODE   5
+#define VECS_PER_LOCAL_CLIP_RECT    1
 #define VECS_PER_RENDER_TASK        3
 #define VECS_PER_PRIM_HEADER        2
 #define VECS_PER_TEXT_RUN           3
 #define VECS_PER_GRADIENT           3
 #define VECS_PER_GRADIENT_STOP      2
 
 uniform HIGHP_SAMPLER_FLOAT sampler2D sClipScrollNodes;
+uniform HIGHP_SAMPLER_FLOAT sampler2D sLocalClipRects;
 uniform HIGHP_SAMPLER_FLOAT sampler2D sRenderTasks;
 
 // Instanced attributes
 in ivec4 aData0;
 in ivec4 aData1;
 
 // get_fetch_uv is a macro to work around a macOS Intel driver parsing bug.
 // TODO: convert back to a function once the driver issues are resolved, if ever.
@@ -138,72 +140,44 @@ vec4 fetch_from_resource_cache_1_direct(
 
 vec4 fetch_from_resource_cache_1(int address) {
     ivec2 uv = get_resource_cache_uv(address);
     return texelFetch(sResourceCache, uv, 0);
 }
 
 struct ClipScrollNode {
     mat4 transform;
-    vec4 local_clip_rect;
-    vec2 reference_frame_relative_scroll_offset;
-    vec2 scroll_offset;
     bool is_axis_aligned;
 };
 
 ClipScrollNode fetch_clip_scroll_node(int index) {
     ClipScrollNode node;
 
     // Create a UV base coord for each 8 texels.
     // This is required because trying to use an offset
     // of more than 8 texels doesn't work on some versions
     // of OSX.
-    ivec2 uv = get_fetch_uv(index, VECS_PER_LAYER);
+    ivec2 uv = get_fetch_uv(index, VECS_PER_CLIP_SCROLL_NODE);
     ivec2 uv0 = ivec2(uv.x + 0, uv.y);
 
     node.transform[0] = TEXEL_FETCH(sClipScrollNodes, uv0, 0, ivec2(0, 0));
     node.transform[1] = TEXEL_FETCH(sClipScrollNodes, uv0, 0, ivec2(1, 0));
     node.transform[2] = TEXEL_FETCH(sClipScrollNodes, uv0, 0, ivec2(2, 0));
     node.transform[3] = TEXEL_FETCH(sClipScrollNodes, uv0, 0, ivec2(3, 0));
 
-    vec4 clip_rect = TEXEL_FETCH(sClipScrollNodes, uv0, 0, ivec2(4, 0));
-    node.local_clip_rect = clip_rect;
-
-    vec4 offsets = TEXEL_FETCH(sClipScrollNodes, uv0, 0, ivec2(5, 0));
-    node.reference_frame_relative_scroll_offset = offsets.xy;
-    node.scroll_offset = offsets.zw;
-
-    vec4 misc = TEXEL_FETCH(sClipScrollNodes, uv0, 0, ivec2(6, 0));
+    vec4 misc = TEXEL_FETCH(sClipScrollNodes, uv0, 0, ivec2(4, 0));
     node.is_axis_aligned = misc.x == 0.0;
 
     return node;
 }
 
-struct Layer {
-    mat4 transform;
-    RectWithSize local_clip_rect;
-    bool is_axis_aligned;
-};
-
-Layer fetch_layer(int clip_node_id, int scroll_node_id) {
-    ClipScrollNode clip_node = fetch_clip_scroll_node(clip_node_id);
-    ClipScrollNode scroll_node = fetch_clip_scroll_node(scroll_node_id);
-
-    Layer layer;
-    layer.transform = scroll_node.transform;
-
-    vec4 local_clip_rect = clip_node.local_clip_rect;
-    local_clip_rect.xy += clip_node.reference_frame_relative_scroll_offset;
-    local_clip_rect.xy -= scroll_node.reference_frame_relative_scroll_offset;
-    local_clip_rect.xy -= scroll_node.scroll_offset;
-
-    layer.local_clip_rect = RectWithSize(local_clip_rect.xy, local_clip_rect.zw);
-    layer.is_axis_aligned = scroll_node.is_axis_aligned;
-
-    return layer;
+RectWithSize fetch_clip_chain_rect(int index) {
+    ivec2 uv = get_fetch_uv(index, VECS_PER_LOCAL_CLIP_RECT);
+    vec4 rect = TEXEL_FETCH(sLocalClipRects, uv, 0, ivec2(0, 0));
+    return RectWithSize(rect.xy, rect.zw);
 }
 
 struct RenderTaskCommonData {
     RectWithSize task_rect;
     float texture_layer_index;
 };
 
 struct RenderTaskData {
@@ -252,25 +226,29 @@ RenderTaskCommonData fetch_render_task_c
     RenderTaskCommonData data = RenderTaskCommonData(
         task_rect,
         texel1.x
     );
 
     return data;
 }
 
+#define PIC_TYPE_IMAGE          1
+#define PIC_TYPE_TEXT_SHADOW    2
+#define PIC_TYPE_BOX_SHADOW     3
+
 /*
  The dynamic picture that this brush exists on. Right now, it
  contains minimal information. In the future, it will describe
  the transform mode of primitives on this picture, among other things.
  */
 struct PictureTask {
     RenderTaskCommonData common_data;
     vec2 content_origin;
-    float rasterization_mode;
+    float pic_kind_and_raster_mode;
     vec4 color;
 };
 
 PictureTask fetch_picture_task(int address) {
     RenderTaskData task_data = fetch_render_task_data(address);
 
     PictureTask task = PictureTask(
         task_data.common_data,
@@ -396,31 +374,31 @@ Glyph fetch_glyph(int specific_prim_addr
 }
 
 struct PrimitiveInstance {
     int prim_address;
     int specific_prim_address;
     int render_task_index;
     int clip_task_index;
     int scroll_node_id;
-    int clip_node_id;
+    int clip_chain_rect_index;
     int z;
     int user_data0;
     int user_data1;
     int user_data2;
 };
 
 PrimitiveInstance fetch_prim_instance() {
     PrimitiveInstance pi;
 
     pi.prim_address = aData0.x;
     pi.specific_prim_address = pi.prim_address + VECS_PER_PRIM_HEADER;
     pi.render_task_index = aData0.y;
     pi.clip_task_index = aData0.z;
-    pi.clip_node_id = aData0.w / 65536;
+    pi.clip_chain_rect_index = aData0.w / 65536;
     pi.scroll_node_id = aData0.w % 65536;
     pi.z = aData1.x;
     pi.user_data0 = aData1.y;
     pi.user_data1 = aData1.z;
     pi.user_data2 = aData1.w;
 
     return pi;
 }
@@ -448,17 +426,17 @@ CompositeInstance fetch_composite_instan
     ci.user_data1 = aData1.y;
     ci.user_data2 = aData1.z;
     ci.user_data3 = aData1.w;
 
     return ci;
 }
 
 struct Primitive {
-    Layer layer;
+    ClipScrollNode scroll_node;
     ClipArea clip_area;
     PictureTask task;
     RectWithSize local_rect;
     RectWithSize local_clip_rect;
     int specific_prim_address;
     int user_data0;
     int user_data1;
     int user_data2;
@@ -476,23 +454,25 @@ PrimitiveGeometry fetch_primitive_geomet
                              RectWithSize(geom[1].xy, geom[1].zw));
 }
 
 Primitive load_primitive() {
     PrimitiveInstance pi = fetch_prim_instance();
 
     Primitive prim;
 
-    prim.layer = fetch_layer(pi.clip_node_id, pi.scroll_node_id);
+    prim.scroll_node = fetch_clip_scroll_node(pi.scroll_node_id);
     prim.clip_area = fetch_clip_area(pi.clip_task_index);
     prim.task = fetch_picture_task(pi.render_task_index);
 
+    RectWithSize clip_chain_rect = fetch_clip_chain_rect(pi.clip_chain_rect_index);
+
     PrimitiveGeometry geom = fetch_primitive_geometry(pi.prim_address);
     prim.local_rect = geom.local_rect;
-    prim.local_clip_rect = geom.local_clip_rect;
+    prim.local_clip_rect = intersect_rects(clip_chain_rect, geom.local_clip_rect);
 
     prim.specific_prim_address = pi.specific_prim_address;
     prim.user_data0 = pi.user_data0;
     prim.user_data1 = pi.user_data1;
     prim.user_data2 = pi.user_data2;
     prim.z = float(pi.z);
 
     return prim;
@@ -510,60 +490,60 @@ bool ray_plane(vec3 normal, vec3 point, 
         return t >= 0.0;
     }
 
     return false;
 }
 
 // Apply the inverse transform "inv_transform"
 // to the reference point "ref" in CSS space,
-// producing a local point on a layer plane,
+// producing a local point on a ClipScrollNode plane,
 // set by a base point "a" and a normal "n".
 vec4 untransform(vec2 ref, vec3 n, vec3 a, mat4 inv_transform) {
     vec3 p = vec3(ref, -10000.0);
     vec3 d = vec3(0, 0, 1.0);
 
     float t = 0.0;
-    // get an intersection of the layer plane with Z axis vector,
+    // get an intersection of the ClipScrollNode plane with Z axis vector,
     // originated from the "ref" point
     ray_plane(n, a, p, d, t);
-    float z = p.z + d.z * t; // Z of the visible point on the layer
+    float z = p.z + d.z * t; // Z of the visible point on the ClipScrollNode
 
     vec4 r = inv_transform * vec4(ref, z, 1.0);
     return r;
 }
 
-// Given a CSS space position, transform it back into the layer space.
-vec4 get_layer_pos(vec2 pos, Layer layer) {
-    // get a point on the layer plane
-    vec4 ah = layer.transform * vec4(0.0, 0.0, 0.0, 1.0);
+// Given a CSS space position, transform it back into the ClipScrollNode space.
+vec4 get_node_pos(vec2 pos, ClipScrollNode node) {
+    // get a point on the scroll node plane
+    vec4 ah = node.transform * vec4(0.0, 0.0, 0.0, 1.0);
     vec3 a = ah.xyz / ah.w;
 
-    // get the normal to the layer plane
-    mat4 inv_transform = inverse(layer.transform);
+    // get the normal to the scroll node plane
+    mat4 inv_transform = inverse(node.transform);
     vec3 n = transpose(mat3(inv_transform)) * vec3(0.0, 0.0, 1.0);
     return untransform(pos, n, a, inv_transform);
 }
 
 // Compute a snapping offset in world space (adjusted to pixel ratio),
-// given local position on the layer and a snap rectangle.
+// given local position on the scroll_node and a snap rectangle.
 vec2 compute_snap_offset(vec2 local_pos,
-                         Layer layer,
+                         ClipScrollNode scroll_node,
                          RectWithSize snap_rect) {
     // Ensure that the snap rect is at *least* one device pixel in size.
     // TODO(gw): It's not clear to me that this is "correct". Specifically,
     //           how should it interact with sub-pixel snap rects when there
-    //           is a layer transform with scale present? But it does fix
+    //           is a scroll_node transform with scale present? But it does fix
     //           the test cases we have in Servo that are failing without it
     //           and seem better than not having this at all.
     snap_rect.size = max(snap_rect.size, vec2(1.0 / uDevicePixelRatio));
 
     // Transform the snap corners to the world space.
-    vec4 world_snap_p0 = layer.transform * vec4(snap_rect.p0, 0.0, 1.0);
-    vec4 world_snap_p1 = layer.transform * vec4(snap_rect.p0 + snap_rect.size, 0.0, 1.0);
+    vec4 world_snap_p0 = scroll_node.transform * vec4(snap_rect.p0, 0.0, 1.0);
+    vec4 world_snap_p1 = scroll_node.transform * vec4(snap_rect.p0 + snap_rect.size, 0.0, 1.0);
     // Snap bounds in world coordinates, adjusted for pixel ratio. XY = top left, ZW = bottom right
     vec4 world_snap = uDevicePixelRatio * vec4(world_snap_p0.xy, world_snap_p1.xy) /
                                           vec4(world_snap_p0.ww, world_snap_p1.ww);
     /// World offsets applied to the corners of the snap rectangle.
     vec4 snap_offsets = floor(world_snap + 0.5) - world_snap;
 
     /// Compute the position of this vertex inside the snap rectangle.
     vec2 normalized_snap_pos = (local_pos - snap_rect.p0) / snap_rect.size;
@@ -574,31 +554,31 @@ vec2 compute_snap_offset(vec2 local_pos,
 struct VertexInfo {
     vec2 local_pos;
     vec2 screen_pos;
 };
 
 VertexInfo write_vertex(RectWithSize instance_rect,
                         RectWithSize local_clip_rect,
                         float z,
-                        Layer layer,
+                        ClipScrollNode scroll_node,
                         PictureTask task,
                         RectWithSize snap_rect) {
 
     // Select the corner of the local rect that we are processing.
     vec2 local_pos = instance_rect.p0 + instance_rect.size * aPosition.xy;
 
     // Clamp to the two local clip rects.
-    vec2 clamped_local_pos = clamp_rect(clamp_rect(local_pos, local_clip_rect), layer.local_clip_rect);
+    vec2 clamped_local_pos = clamp_rect(local_pos, local_clip_rect);
 
     /// Compute the snapping offset.
-    vec2 snap_offset = compute_snap_offset(clamped_local_pos, layer, snap_rect);
+    vec2 snap_offset = compute_snap_offset(clamped_local_pos, scroll_node, snap_rect);
 
     // Transform the current vertex to world space.
-    vec4 world_pos = layer.transform * vec4(clamped_local_pos, 0.0, 1.0);
+    vec4 world_pos = scroll_node.transform * vec4(clamped_local_pos, 0.0, 1.0);
 
     // Convert the world positions to device pixel space.
     vec2 device_pos = world_pos.xy / world_pos.w * uDevicePixelRatio;
 
     // Apply offsets for the render task to get correct screen location.
     vec2 final_pos = device_pos + snap_offset -
                      task.content_origin +
                      task.common_data.task_rect.p0;
@@ -628,29 +608,25 @@ vec2 intersect_lines(vec2 p0, vec2 p1, v
     return vec2(nx / d, ny / d);
 }
 
 VertexInfo write_transform_vertex(RectWithSize local_segment_rect,
                                   RectWithSize local_prim_rect,
                                   RectWithSize local_clip_rect,
                                   vec4 clip_edge_mask,
                                   float z,
-                                  Layer layer,
+                                  ClipScrollNode scroll_node,
                                   PictureTask task) {
-    // Calculate a clip rect from local clip + layer clip.
+    // Calculate a clip rect from local_rect + local clip
     RectWithEndpoint clip_rect = to_rect_with_endpoint(local_clip_rect);
-    clip_rect.p0 = clamp_rect(clip_rect.p0, layer.local_clip_rect);
-    clip_rect.p1 = clamp_rect(clip_rect.p1, layer.local_clip_rect);
-
-    // Calculate a clip rect from local_rect + local clip + layer clip.
     RectWithEndpoint segment_rect = to_rect_with_endpoint(local_segment_rect);
     segment_rect.p0 = clamp(segment_rect.p0, clip_rect.p0, clip_rect.p1);
     segment_rect.p1 = clamp(segment_rect.p1, clip_rect.p0, clip_rect.p1);
 
-    // Calculate a clip rect from local_rect + local clip + layer clip.
+    // Calculate a clip rect from local_rect + local clip
     RectWithEndpoint prim_rect = to_rect_with_endpoint(local_prim_rect);
     prim_rect.p0 = clamp(prim_rect.p0, clip_rect.p0, clip_rect.p1);
     prim_rect.p1 = clamp(prim_rect.p1, clip_rect.p0, clip_rect.p1);
 
     // As this is a transform shader, extrude by 2 (local space) pixels
     // in each direction. This gives enough space around the edge to
     // apply distance anti-aliasing. Technically, it:
     // (a) slightly over-estimates the number of required pixels in the simple case.
@@ -661,27 +637,27 @@ VertexInfo write_transform_vertex(RectWi
     float extrude_distance = 2.0;
     local_segment_rect.p0 -= vec2(extrude_distance);
     local_segment_rect.size += vec2(2.0 * extrude_distance);
 
     // Select the corner of the local rect that we are processing.
     vec2 local_pos = local_segment_rect.p0 + local_segment_rect.size * aPosition.xy;
 
     // Transform the current vertex to the world cpace.
-    vec4 world_pos = layer.transform * vec4(local_pos, 0.0, 1.0);
+    vec4 world_pos = scroll_node.transform * vec4(local_pos, 0.0, 1.0);
 
     // Convert the world positions to device pixel space.
     vec2 device_pos = world_pos.xy / world_pos.w * uDevicePixelRatio;
     vec2 task_offset = task.common_data.task_rect.p0 - task.content_origin;
 
     // We want the world space coords to be perspective divided by W.
     // We also want that to apply to any interpolators. However, we
     // want a constant Z across the primitive, since we're using it
     // for draw ordering - so scale by the W coord to ensure this.
-    vec4 final_pos = vec4(world_pos.xy * uDevicePixelRatio + task_offset,
+    vec4 final_pos = vec4((device_pos + task_offset) * world_pos.w,
                           z * world_pos.w,
                           world_pos.w);
     gl_Position = uTransform * final_pos;
 
     vLocalBounds = mix(
         vec4(prim_rect.p0, prim_rect.p1),
         vec4(segment_rect.p0, segment_rect.p1),
         clip_edge_mask
@@ -693,17 +669,17 @@ VertexInfo write_transform_vertex(RectWi
 
 VertexInfo write_transform_vertex_primitive(Primitive prim) {
     return write_transform_vertex(
         prim.local_rect,
         prim.local_rect,
         prim.local_clip_rect,
         vec4(0.0),
         prim.z,
-        prim.layer,
+        prim.scroll_node,
         prim.task
     );
 }
 
 struct GlyphResource {
     vec4 uv_rect;
     float layer;
     vec2 offset;
--- a/gfx/webrender/res/ps_angle_gradient.glsl
+++ b/gfx/webrender/res/ps_angle_gradient.glsl
@@ -18,17 +18,17 @@ varying vec2 vPos;
 #ifdef WR_VERTEX_SHADER
 void main(void) {
     Primitive prim = load_primitive();
     Gradient gradient = fetch_gradient(prim.specific_prim_address);
 
     VertexInfo vi = write_vertex(prim.local_rect,
                                  prim.local_clip_rect,
                                  prim.z,
-                                 prim.layer,
+                                 prim.scroll_node,
                                  prim.task,
                                  prim.local_rect);
 
     vPos = vi.local_pos - prim.local_rect.p0;
 
     vec2 start_point = gradient.start_end_point.xy;
     vec2 end_point = gradient.start_end_point.zw;
     vec2 dir = end_point - start_point;
--- a/gfx/webrender/res/ps_border_corner.glsl
+++ b/gfx/webrender/res/ps_border_corner.glsl
@@ -298,23 +298,23 @@ void main(void) {
     segment_rect.size = p1 - p0;
 
 #ifdef WR_FEATURE_TRANSFORM
     VertexInfo vi = write_transform_vertex(segment_rect,
                                            prim.local_rect,
                                            prim.local_clip_rect,
                                            vec4(1.0),
                                            prim.z,
-                                           prim.layer,
+                                           prim.scroll_node,
                                            prim.task);
 #else
     VertexInfo vi = write_vertex(segment_rect,
                                  prim.local_clip_rect,
                                  prim.z,
-                                 prim.layer,
+                                 prim.scroll_node,
                                  prim.task,
                                  prim.local_rect);
 #endif
 
     vLocalPos = vi.local_pos;
     write_clip(vi.screen_pos, prim.clip_area);
 }
 #endif
--- a/gfx/webrender/res/ps_border_edge.glsl
+++ b/gfx/webrender/res/ps_border_edge.glsl
@@ -216,23 +216,23 @@ void main(void) {
     write_color1(color, style, color_flip);
 
 #ifdef WR_FEATURE_TRANSFORM
     VertexInfo vi = write_transform_vertex(segment_rect,
                                            prim.local_rect,
                                            prim.local_clip_rect,
                                            vec4(1.0),
                                            prim.z,
-                                           prim.layer,
+                                           prim.scroll_node,
                                            prim.task);
 #else
     VertexInfo vi = write_vertex(segment_rect,
                                  prim.local_clip_rect,
                                  prim.z,
-                                 prim.layer,
+                                 prim.scroll_node,
                                  prim.task,
                                  prim.local_rect);
 #endif
 
     vLocalPos = vi.local_pos;
     write_clip(vi.screen_pos, prim.clip_area);
 }
 #endif
--- a/gfx/webrender/res/ps_gradient.glsl
+++ b/gfx/webrender/res/ps_gradient.glsl
@@ -67,25 +67,25 @@ void main(void) {
     }
 
 #ifdef WR_FEATURE_TRANSFORM
     VertexInfo vi = write_transform_vertex(segment_rect,
                                            prim.local_rect,
                                            prim.local_clip_rect,
                                            vec4(1.0),
                                            prim.z,
-                                           prim.layer,
+                                           prim.scroll_node,
                                            prim.task);
     vLocalPos = vi.local_pos;
     vec2 f = (vi.local_pos.xy - prim.local_rect.p0) / prim.local_rect.size;
 #else
     VertexInfo vi = write_vertex(segment_rect,
                                  prim.local_clip_rect,
                                  prim.z,
-                                 prim.layer,
+                                 prim.scroll_node,
                                  prim.task,
                                  prim.local_rect);
 
     vec2 f = (vi.local_pos - segment_rect.p0) / segment_rect.size;
     vLocalPos = vi.local_pos;
 #endif
 
     write_clip(vi.screen_pos, prim.clip_area);
--- a/gfx/webrender/res/ps_image.glsl
+++ b/gfx/webrender/res/ps_image.glsl
@@ -29,17 +29,17 @@ void main(void) {
 #ifdef WR_FEATURE_TRANSFORM
     VertexInfo vi = write_transform_vertex_primitive(prim);
     vLocalPos = vi.local_pos;
     vLocalRect = vec4(prim.local_rect.p0, prim.local_rect.p0 + prim.local_rect.size);
 #else
     VertexInfo vi = write_vertex(prim.local_rect,
                                  prim.local_clip_rect,
                                  prim.z,
-                                 prim.layer,
+                                 prim.scroll_node,
                                  prim.task,
                                  prim.local_rect);
     vLocalPos = vi.local_pos - prim.local_rect.p0;
 #endif
 
     write_clip(vi.screen_pos, prim.clip_area);
 
     // If this is in WR_FEATURE_TEXTURE_RECT mode, the rect and size use
deleted file mode 100644
--- a/gfx/webrender/res/ps_line.glsl
+++ /dev/null
@@ -1,254 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#include shared,prim_shared
-
-varying vec4 vColor;
-flat varying int vStyle;
-flat varying float vAxisSelect;
-flat varying vec4 vParams;
-flat varying vec2 vLocalOrigin;
-
-varying vec2 vLocalPos;
-
-#ifdef WR_VERTEX_SHADER
-#define LINE_ORIENTATION_VERTICAL       0
-#define LINE_ORIENTATION_HORIZONTAL     1
-
-struct Line {
-    vec4 color;
-    float wavyLineThickness;
-    float style;
-    float orientation;
-};
-
-Line fetch_line(int address) {
-    vec4 data[2] = fetch_from_resource_cache_2(address);
-    return Line(data[0], data[1].x, data[1].y, data[1].z);
-}
-
-void main(void) {
-    Primitive prim = load_primitive();
-    Line line = fetch_line(prim.specific_prim_address);
-
-    vec2 pos, size;
-
-    switch (int(line.orientation)) {
-        case LINE_ORIENTATION_HORIZONTAL:
-            vAxisSelect = 0.0;
-            pos = prim.local_rect.p0;
-            size = prim.local_rect.size;
-            break;
-        case LINE_ORIENTATION_VERTICAL:
-            vAxisSelect = 1.0;
-            pos = prim.local_rect.p0.yx;
-            size = prim.local_rect.size.yx;
-            break;
-    }
-
-    vLocalOrigin = pos;
-    vStyle = int(line.style);
-
-    switch (vStyle) {
-        case LINE_STYLE_SOLID: {
-            break;
-        }
-        case LINE_STYLE_DASHED: {
-            float dash_length = size.y * 3.0;
-            vParams = vec4(2.0 * dash_length, // period
-                           dash_length,       // dash length
-                           0.0,
-                           0.0);
-            break;
-        }
-        case LINE_STYLE_DOTTED: {
-            float diameter = size.y;
-            float period = diameter * 2.0;
-            float center_line = pos.y + 0.5 * size.y;
-            float max_x = floor(size.x / period) * period;
-            vParams = vec4(period,
-                           diameter / 2.0, // radius
-                           center_line,
-                           max_x);
-            break;
-        }
-        case LINE_STYLE_WAVY: {
-            // This logic copied from gecko to get the same results
-            float line_thickness = max(line.wavyLineThickness, 1.0);
-            // Difference in height between peaks and troughs
-            // (and since slopes are 45 degrees, the length of each slope)
-            float slope_length = size.y - line_thickness;
-            // Length of flat runs
-            float flat_length = max((line_thickness - 1.0) * 2.0, 1.0);
-
-            vParams = vec4(line_thickness / 2.0,
-                           slope_length,
-                           flat_length,
-                           size.y);
-            break;
-        }
-    }
-
-#ifdef WR_FEATURE_CACHE
-    vec2 device_origin = prim.task.common_data.task_rect.p0 +
-                         uDevicePixelRatio * (prim.local_rect.p0 - prim.task.content_origin);
-    vec2 device_size = uDevicePixelRatio * prim.local_rect.size;
-
-    vec2 device_pos = mix(device_origin,
-                          device_origin + device_size,
-                          aPosition.xy);
-
-    vColor = prim.task.color;
-    vLocalPos = mix(prim.local_rect.p0,
-                    prim.local_rect.p0 + prim.local_rect.size,
-                    aPosition.xy);
-
-    gl_Position = uTransform * vec4(device_pos, 0.0, 1.0);
-#else
-    vColor = line.color;
-
-    #ifdef WR_FEATURE_TRANSFORM
-        VertexInfo vi = write_transform_vertex_primitive(prim);
-    #else
-        VertexInfo vi = write_vertex(prim.local_rect,
-                                     prim.local_clip_rect,
-                                     prim.z,
-                                     prim.layer,
-                                     prim.task,
-                                     prim.local_rect);
-    #endif
-
-    vLocalPos = vi.local_pos;
-    write_clip(vi.screen_pos, prim.clip_area);
-#endif
-}
-#endif
-
-#ifdef WR_FRAGMENT_SHADER
-
-#define MAGIC_WAVY_LINE_AA_SNAP         0.7
-
-float det(vec2 a, vec2 b) {
-    return a.x * b.y - b.x * a.y;
-}
-
-// From: http://research.microsoft.com/en-us/um/people/hoppe/ravg.pdf
-vec2 get_distance_vector(vec2 b0, vec2 b1, vec2 b2) {
-    float a = det(b0, b2);
-    float b = 2.0 * det(b1, b0);
-    float d = 2.0 * det(b2, b1);
-
-    float f = b * d - a * a;
-    vec2 d21 = b2 - b1;
-    vec2 d10 = b1 - b0;
-    vec2 d20 = b2 - b0;
-
-    vec2 gf = 2.0 * (b *d21 + d * d10 + a * d20);
-    gf = vec2(gf.y,-gf.x);
-    vec2 pp = -f * gf / dot(gf, gf);
-    vec2 d0p = b0 - pp;
-    float ap = det(d0p, d20);
-    float bp = 2.0 * det(d10, d0p);
-
-    float t = clamp((ap + bp) / (2.0 * a + b + d), 0.0, 1.0);
-    return mix(mix(b0, b1, t), mix(b1,b2,t), t);
-}
-
-// Approximate distance from point to quadratic bezier.
-float approx_distance(vec2 p, vec2 b0, vec2 b1, vec2 b2) {
-    return length(get_distance_vector(b0 - p, b1 - p, b2 - p));
-}
-
-void main(void) {
-    float alpha = 1.0;
-
-    vec2 local_pos = vLocalPos;
-
-#ifdef WR_FEATURE_CACHE
-#else
-    #ifdef WR_FEATURE_TRANSFORM
-        alpha = init_transform_fs(vLocalPos);
-    #endif
-
-        alpha *= do_clip();
-#endif
-
-    // Find the appropriate distance to apply the step over.
-    float aa_range = compute_aa_range(local_pos);
-
-    // Select the x/y coord, depending on which axis this edge is.
-    vec2 pos = mix(local_pos.xy, local_pos.yx, vAxisSelect);
-
-    switch (vStyle) {
-        case LINE_STYLE_SOLID: {
-            break;
-        }
-        case LINE_STYLE_DASHED: {
-            // Get the main-axis position relative to closest dot or dash.
-            float x = mod(pos.x - vLocalOrigin.x, vParams.x);
-
-            // Calculate dash alpha (on/off) based on dash length
-            alpha = min(alpha, step(x, vParams.y));
-            break;
-        }
-        case LINE_STYLE_DOTTED: {
-            // Get the main-axis position relative to closest dot or dash.
-            float x = mod(pos.x - vLocalOrigin.x, vParams.x);
-
-            // Get the dot alpha
-            vec2 dot_relative_pos = vec2(x, pos.y) - vParams.yz;
-            float dot_distance = length(dot_relative_pos) - vParams.y;
-            alpha = min(alpha, distance_aa(aa_range, dot_distance));
-            // Clip off partial dots
-            alpha *= step(pos.x - vLocalOrigin.x, vParams.w);
-            break;
-        }
-        case LINE_STYLE_WAVY: {
-            vec2 normalized_local_pos = pos - vLocalOrigin.xy;
-
-            float half_line_thickness = vParams.x;
-            float slope_length = vParams.y;
-            float flat_length = vParams.z;
-            float vertical_bounds = vParams.w;
-            // Our pattern is just two slopes and two flats
-            float half_period = slope_length + flat_length;
-
-            float mid_height = vertical_bounds / 2.0;
-            float peak_offset = mid_height - half_line_thickness;
-            // Flip the wave every half period
-            float flip = -2.0 * (step(mod(normalized_local_pos.x, 2.0 * half_period), half_period) - 0.5);
-            // float flip = -1.0;
-            peak_offset *= flip;
-            float peak_height = mid_height + peak_offset;
-
-            // Convert pos to a local position within one half period
-            normalized_local_pos.x = mod(normalized_local_pos.x, half_period);
-
-            // Compute signed distance to the 3 lines that make up an arc
-            float dist1 = distance_to_line(vec2(0.0, peak_height),
-                                           vec2(1.0, -flip),
-                                           normalized_local_pos);
-            float dist2 = distance_to_line(vec2(0.0, peak_height),
-                                           vec2(0, -flip),
-                                           normalized_local_pos);
-            float dist3 = distance_to_line(vec2(flat_length, peak_height),
-                                           vec2(-1.0, -flip),
-                                           normalized_local_pos);
-            float dist = abs(max(max(dist1, dist2), dist3));
-
-            // Apply AA based on the thickness of the wave
-            alpha = distance_aa(aa_range, dist - half_line_thickness);
-
-            // Disable AA for thin lines
-            if (half_line_thickness <= 1.0) {
-                alpha = 1.0 - step(alpha, MAGIC_WAVY_LINE_AA_SNAP);
-            }
-
-            break;
-        }
-    }
-
-    oFragColor = vColor * alpha;
-}
-#endif
--- a/gfx/webrender/res/ps_radial_gradient.glsl
+++ b/gfx/webrender/res/ps_radial_gradient.glsl
@@ -20,17 +20,17 @@ varying vec2 vPos;
 #ifdef WR_VERTEX_SHADER
 void main(void) {
     Primitive prim = load_primitive();
     RadialGradient gradient = fetch_radial_gradient(prim.specific_prim_address);
 
     VertexInfo vi = write_vertex(prim.local_rect,
                                  prim.local_clip_rect,
                                  prim.z,
-                                 prim.layer,
+                                 prim.scroll_node,
                                  prim.task,
                                  prim.local_rect);
 
     vPos = vi.local_pos - prim.local_rect.p0;
 
     vStartCenter = gradient.start_end_center.xy;
     vEndCenter = gradient.start_end_center.zw;
 
--- a/gfx/webrender/res/ps_text_run.glsl
+++ b/gfx/webrender/res/ps_text_run.glsl
@@ -17,44 +17,45 @@ varying vec4 vUvClip;
 
 #define MODE_ALPHA              0
 #define MODE_SUBPX_CONST_COLOR  1
 #define MODE_SUBPX_PASS0        2
 #define MODE_SUBPX_PASS1        3
 #define MODE_SUBPX_BG_PASS0     4
 #define MODE_SUBPX_BG_PASS1     5
 #define MODE_SUBPX_BG_PASS2     6
-#define MODE_BITMAP             7
-#define MODE_COLOR_BITMAP       8
+#define MODE_SUBPX_DUAL_SOURCE  7
+#define MODE_BITMAP             8
+#define MODE_COLOR_BITMAP       9
 
 VertexInfo write_text_vertex(vec2 clamped_local_pos,
                              RectWithSize local_clip_rect,
                              float z,
-                             Layer layer,
+                             ClipScrollNode scroll_node,
                              PictureTask task,
                              RectWithSize snap_rect) {
     // Transform the current vertex to world space.
-    vec4 world_pos = layer.transform * vec4(clamped_local_pos, 0.0, 1.0);
+    vec4 world_pos = scroll_node.transform * vec4(clamped_local_pos, 0.0, 1.0);
 
     // Convert the world positions to device pixel space.
     vec2 device_pos = world_pos.xy / world_pos.w * uDevicePixelRatio;
 
     // Apply offsets for the render task to get correct screen location.
     vec2 final_pos = device_pos -
                      task.content_origin +
                      task.common_data.task_rect.p0;
 
 #ifdef WR_FEATURE_GLYPH_TRANSFORM
     // For transformed subpixels, we just need to align the glyph origin to a device pixel.
-    // Only check the layer transform's translation since the scales and axes match.
-    vec2 world_snap_p0 = snap_rect.p0 + layer.transform[3].xy * uDevicePixelRatio;
+    // Only check the scroll node transform's translation since the scales and axes match.
+    vec2 world_snap_p0 = snap_rect.p0 + scroll_node.transform[3].xy * uDevicePixelRatio;
     final_pos += floor(world_snap_p0 + 0.5) - world_snap_p0;
 #elif !defined(WR_FEATURE_TRANSFORM)
-    // Compute the snapping offset only if the layer transform is axis-aligned.
-    final_pos += compute_snap_offset(clamped_local_pos, layer, snap_rect);
+    // Compute the snapping offset only if the scroll node transform is axis-aligned.
+    final_pos += compute_snap_offset(clamped_local_pos, scroll_node, snap_rect);
 #endif
 
     gl_Position = uTransform * vec4(final_pos, z, 1.0);
 
     VertexInfo vi = VertexInfo(clamped_local_pos, device_pos);
     return vi;
 }
 
@@ -68,57 +69,54 @@ void main(void) {
 
     Glyph glyph = fetch_glyph(prim.specific_prim_address,
                               glyph_index,
                               subpx_dir);
     GlyphResource res = fetch_glyph_resource(resource_address);
 
 #ifdef WR_FEATURE_GLYPH_TRANSFORM
     // Transform from local space to glyph space.
-    mat2 transform = mat2(prim.layer.transform) * uDevicePixelRatio;
+    mat2 transform = mat2(prim.scroll_node.transform) * uDevicePixelRatio;
 
     // Compute the glyph rect in glyph space.
     RectWithSize glyph_rect = RectWithSize(res.offset + transform * (text.offset + glyph.offset),
                                            res.uv_rect.zw - res.uv_rect.xy);
 
     // Transform the glyph rect back to local space.
     mat2 inv = inverse(transform);
     RectWithSize local_rect = transform_rect(glyph_rect, inv);
 
     // Select the corner of the glyph's local space rect that we are processing.
     vec2 local_pos = local_rect.p0 + local_rect.size * aPosition.xy;
 
-    // Calculate a combined local clip rect.
-    RectWithSize local_clip_rect = intersect_rects(prim.local_clip_rect, prim.layer.local_clip_rect);
-
     // If the glyph's local rect would fit inside the local clip rect, then select a corner from
     // the device space glyph rect to reduce overdraw of clipped pixels in the fragment shader.
     // Otherwise, fall back to clamping the glyph's local rect to the local clip rect.
-    local_pos = rect_inside_rect(local_rect, local_clip_rect) ?
+    local_pos = rect_inside_rect(local_rect, prim.local_clip_rect) ?
                     inv * (glyph_rect.p0 + glyph_rect.size * aPosition.xy) :
-                    clamp_rect(local_pos, local_clip_rect);
+                    clamp_rect(local_pos, prim.local_clip_rect);
 #else
     // Scale from glyph space to local space.
     float scale = res.scale / uDevicePixelRatio;
 
     // Compute the glyph rect in local space.
     RectWithSize glyph_rect = RectWithSize(scale * res.offset + text.offset + glyph.offset,
                                            scale * (res.uv_rect.zw - res.uv_rect.xy));
 
     // Select the corner of the glyph rect that we are processing.
     vec2 local_pos = glyph_rect.p0 + glyph_rect.size * aPosition.xy;
 
-    // Clamp to the two local clip rects.
-    local_pos = clamp_rect(clamp_rect(local_pos, prim.local_clip_rect), prim.layer.local_clip_rect);
+    // Clamp to the local clip rect.
+    local_pos = clamp_rect(local_pos, prim.local_clip_rect);
 #endif
 
     VertexInfo vi = write_text_vertex(local_pos,
                                       prim.local_clip_rect,
                                       prim.z,
-                                      prim.layer,
+                                      prim.scroll_node,
                                       prim.task,
                                       glyph_rect);
 
 #ifdef WR_FEATURE_GLYPH_TRANSFORM
     vec2 f = (transform * vi.local_pos - glyph_rect.p0) / glyph_rect.size;
     vUvClip = vec4(f, 1.0 - f);
 #else
     vec2 f = (vi.local_pos - glyph_rect.p0) / glyph_rect.size;
@@ -129,16 +127,17 @@ void main(void) {
     switch (uMode) {
         case MODE_ALPHA:
         case MODE_BITMAP:
             vMaskSwizzle = vec2(0.0, 1.0);
             vColor = text.color;
             break;
         case MODE_SUBPX_PASS1:
         case MODE_SUBPX_BG_PASS2:
+        case MODE_SUBPX_DUAL_SOURCE:
             vMaskSwizzle = vec2(1.0, 0.0);
             vColor = text.color;
             break;
         case MODE_SUBPX_CONST_COLOR:
         case MODE_SUBPX_PASS0:
         case MODE_SUBPX_BG_PASS0:
         case MODE_COLOR_BITMAP:
             vMaskSwizzle = vec2(1.0, 0.0);
@@ -165,11 +164,17 @@ void main(void) {
     vec4 mask = texture(sColor0, tc);
     mask.rgb = mask.rgb * vMaskSwizzle.x + mask.aaa * vMaskSwizzle.y;
 
     float alpha = do_clip();
 #ifdef WR_FEATURE_GLYPH_TRANSFORM
     alpha *= float(all(greaterThanEqual(vUvClip, vec4(0.0))));
 #endif
 
+#ifdef WR_FEATURE_DUAL_SOURCE_BLENDING
+    vec4 alpha_mask = mask * alpha;
+    oFragColor = vColor * alpha_mask;
+    oFragBlend = alpha_mask * vColor.a;
+#else
     oFragColor = vColor * mask * alpha;
+#endif
 }
 #endif
--- a/gfx/webrender/res/ps_yuv_image.glsl
+++ b/gfx/webrender/res/ps_yuv_image.glsl
@@ -38,17 +38,17 @@ void main(void) {
 #ifdef WR_FEATURE_TRANSFORM
     VertexInfo vi = write_transform_vertex_primitive(prim);
     vLocalPos = vi.local_pos;
     vLocalRect = vec4(prim.local_rect.p0, prim.local_rect.p0 + prim.local_rect.size);
 #else
     VertexInfo vi = write_vertex(prim.local_rect,
                                  prim.local_clip_rect,
                                  prim.z,
-                                 prim.layer,
+                                 prim.scroll_node,
                                  prim.task,
                                  prim.local_rect);
     vLocalPos = vi.local_pos - prim.local_rect.p0;
 #endif
 
     write_clip(vi.screen_pos, prim.clip_area);
 
     ImageResource y_rect = fetch_image_resource(prim.user_data0);
--- a/gfx/webrender/res/shared.glsl
+++ b/gfx/webrender/res/shared.glsl
@@ -3,16 +3,20 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #ifdef WR_FEATURE_TEXTURE_EXTERNAL
 // Please check https://www.khronos.org/registry/OpenGL/extensions/OES/OES_EGL_image_external_essl3.txt
 // for this extension.
 #extension GL_OES_EGL_image_external_essl3 : require
 #endif
 
+#ifdef WR_FEATURE_DUAL_SOURCE_BLENDING
+#extension GL_ARB_explicit_attrib_location : require
+#endif
+
 #include base
 
 // The textureLod() doesn't support samplerExternalOES for WR_FEATURE_TEXTURE_EXTERNAL.
 // https://www.khronos.org/registry/OpenGL/extensions/OES/OES_EGL_image_external_essl3.txt
 //
 // The textureLod() doesn't support sampler2DRect for WR_FEATURE_TEXTURE_RECT, too.
 //
 // Use texture() instead.
@@ -41,17 +45,22 @@
 
 //======================================================================================
 // Fragment shader attributes and uniforms
 //======================================================================================
 #ifdef WR_FRAGMENT_SHADER
     // Uniform inputs
 
     // Fragment shader outputs
-    out vec4 oFragColor;
+    #ifdef WR_FEATURE_DUAL_SOURCE_BLENDING
+        layout(location = 0, index = 0) out vec4 oFragColor;
+        layout(location = 0, index = 1) out vec4 oFragBlend;
+    #else
+        out vec4 oFragColor;
+    #endif
 #endif
 
 //======================================================================================
 // Shared shader uniforms
 //======================================================================================
 #ifdef WR_FEATURE_TEXTURE_2D
 uniform sampler2D sColor0;
 uniform sampler2D sColor1;
new file mode 100644
--- /dev/null
+++ b/gfx/webrender/src/batch.rs
@@ -0,0 +1,1484 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{DeviceIntRect, DeviceIntSize, ImageKey, LayerToWorldScale};
+use api::{ExternalImageType, FilterOp, ImageRendering, LayerRect};
+use api::{SubpixelDirection, TileOffset, YuvColorSpace, YuvFormat};
+use api::{LayerToWorldTransform, WorldPixel};
+use border::{BorderCornerInstance, BorderCornerSide};
+use clip::{ClipSource, ClipStore};
+use clip_scroll_tree::{CoordinateSystemId};
+use euclid::{TypedTransform3D, vec3};
+use glyph_rasterizer::GlyphFormat;
+use gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle};
+use gpu_types::{BrushImageKind, BrushInstance, ClipChainRectIndex};
+use gpu_types::{ClipMaskInstance, ClipScrollNodeIndex, PictureType};
+use gpu_types::{CompositePrimitiveInstance, PrimitiveInstance, SimplePrimitiveInstance};
+use internal_types::{FastHashMap, SourceTexture};
+use picture::{PictureCompositeMode, PictureKind, PicturePrimitive};
+use plane_split::{BspSplitter, Polygon, Splitter};
+use prim_store::{PrimitiveIndex, PrimitiveKind, PrimitiveMetadata, PrimitiveStore};
+use prim_store::{BrushPrimitive, BrushKind, DeferredResolve, PrimitiveRun};
+use render_task::{ClipWorkItem};
+use render_task::{RenderTaskAddress, RenderTaskId, RenderTaskKind};
+use render_task::{RenderTaskTree};
+use renderer::{BlendMode, ImageBufferKind};
+use renderer::BLOCKS_PER_UV_RECT;
+use resource_cache::{GlyphFetchResult, ResourceCache};
+use std::{usize, f32, i32};
+use tiling::{RenderTargetContext, RenderTargetKind};
+use util::{MatrixHelpers, TransformedRectKind};
+
+// Special sentinel value recognized by the shader. It is considered to be
+// a dummy task that doesn't mask out anything.
+const OPAQUE_TASK_ADDRESS: RenderTaskAddress = RenderTaskAddress(i32::MAX as u32);
+
+#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
+pub enum TransformBatchKind {
+    TextRun(GlyphFormat),
+    Image(ImageBufferKind),
+    YuvImage(ImageBufferKind, YuvFormat, YuvColorSpace),
+    AlignedGradient,
+    AngleGradient,
+    RadialGradient,
+    BorderCorner,
+    BorderEdge,
+}
+
+#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
+pub enum BrushImageSourceKind {
+    Alpha,
+    Color,
+    ColorAlphaMask,
+}
+
+impl BrushImageSourceKind {
+    pub fn from_render_target_kind(render_target_kind: RenderTargetKind) -> BrushImageSourceKind {
+        match render_target_kind {
+            RenderTargetKind::Color => BrushImageSourceKind::Color,
+            RenderTargetKind::Alpha => BrushImageSourceKind::Alpha,
+        }
+    }
+}
+
+#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
+pub enum BrushBatchKind {
+    Image(BrushImageSourceKind),
+    Solid,
+    Line,
+}
+
+#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
+pub enum BatchKind {
+    Composite {
+        task_id: RenderTaskId,
+        source_id: RenderTaskId,
+        backdrop_id: RenderTaskId,
+    },
+    HardwareComposite,
+    SplitComposite,
+    Blend,
+    Transformable(TransformedRectKind, TransformBatchKind),
+    Brush(BrushBatchKind),
+}
+
+/// Optional textures that can be used as a source in the shaders.
+/// Textures that are not used by the batch are equal to TextureId::invalid().
+#[derive(Copy, Clone, Debug)]
+pub struct BatchTextures {
+    pub colors: [SourceTexture; 3],
+}
+
+impl BatchTextures {
+    pub fn no_texture() -> Self {
+        BatchTextures {
+            colors: [SourceTexture::Invalid; 3],
+        }
+    }
+
+    pub fn render_target_cache() -> Self {
+        BatchTextures {
+            colors: [
+                SourceTexture::CacheRGBA8,
+                SourceTexture::CacheA8,
+                SourceTexture::Invalid,
+            ],
+        }
+    }
+
+    pub fn color(texture: SourceTexture) -> Self {
+        BatchTextures {
+            colors: [texture, SourceTexture::Invalid, SourceTexture::Invalid],
+        }
+    }
+}
+
+#[derive(Debug)]
+pub struct AlphaPrimitiveBatch {
+    pub key: BatchKey,
+    pub instances: Vec<PrimitiveInstance>,
+    pub item_rects: Vec<DeviceIntRect>,
+}
+
+impl AlphaPrimitiveBatch {
+    pub fn new(key: BatchKey) -> AlphaPrimitiveBatch {
+        AlphaPrimitiveBatch {
+            key,
+            instances: Vec::new(),
+            item_rects: Vec::new(),
+        }
+    }
+}
+
+#[derive(Debug)]
+pub struct OpaquePrimitiveBatch {
+    pub key: BatchKey,
+    pub instances: Vec<PrimitiveInstance>,
+}
+
+impl OpaquePrimitiveBatch {
+    pub fn new(key: BatchKey) -> OpaquePrimitiveBatch {
+        OpaquePrimitiveBatch {
+            key,
+            instances: Vec::new(),
+        }
+    }
+}
+
+#[derive(Copy, Clone, Debug)]
+pub struct BatchKey {
+    pub kind: BatchKind,
+    pub blend_mode: BlendMode,
+    pub textures: BatchTextures,
+}
+
+impl BatchKey {
+    pub fn new(kind: BatchKind, blend_mode: BlendMode, textures: BatchTextures) -> Self {
+        BatchKey {
+            kind,
+            blend_mode,
+            textures,
+        }
+    }
+
+    pub fn is_compatible_with(&self, other: &BatchKey) -> bool {
+        self.kind == other.kind && self.blend_mode == other.blend_mode &&
+            textures_compatible(self.textures.colors[0], other.textures.colors[0]) &&
+            textures_compatible(self.textures.colors[1], other.textures.colors[1]) &&
+            textures_compatible(self.textures.colors[2], other.textures.colors[2])
+    }
+}
+
+#[inline]
+fn textures_compatible(t1: SourceTexture, t2: SourceTexture) -> bool {
+    t1 == SourceTexture::Invalid || t2 == SourceTexture::Invalid || t1 == t2
+}
+
+pub struct AlphaBatchList {
+    pub batches: Vec<AlphaPrimitiveBatch>,
+}
+
+impl AlphaBatchList {
+    fn new() -> Self {
+        AlphaBatchList {
+            batches: Vec::new(),
+        }
+    }
+
+    pub fn get_suitable_batch(
+        &mut self,
+        key: BatchKey,
+        item_bounding_rect: &DeviceIntRect,
+    ) -> &mut Vec<PrimitiveInstance> {
+        let mut selected_batch_index = None;
+
+        match (key.kind, key.blend_mode) {
+            (BatchKind::Composite { .. }, _) => {
+                // Composites always get added to their own batch.
+                // This is because the result of a composite can affect
+                // the input to the next composite. Perhaps we can
+                // optimize this in the future.
+            }
+            (BatchKind::Transformable(_, TransformBatchKind::TextRun(_)), BlendMode::SubpixelWithBgColor) |
+            (BatchKind::Transformable(_, TransformBatchKind::TextRun(_)), BlendMode::SubpixelVariableTextColor) => {
+                'outer_text: for (batch_index, batch) in self.batches.iter().enumerate().rev().take(10) {
+                    // Subpixel text is drawn in two passes. Because of this, we need
+                    // to check for overlaps with every batch (which is a bit different
+                    // than the normal batching below).
+                    for item_rect in &batch.item_rects {
+                        if item_rect.intersects(item_bounding_rect) {
+                            break 'outer_text;
+                        }
+                    }
+
+                    if batch.key.is_compatible_with(&key) {
+                        selected_batch_index = Some(batch_index);
+                        break;
+                    }
+                }
+            }
+            _ => {
+                'outer_default: for (batch_index, batch) in self.batches.iter().enumerate().rev().take(10) {
+                    // For normal batches, we only need to check for overlaps for batches
+                    // other than the first batch we consider. If the first batch
+                    // is compatible, then we know there isn't any potential overlap
+                    // issues to worry about.
+                    if batch.key.is_compatible_with(&key) {
+                        selected_batch_index = Some(batch_index);
+                        break;
+                    }
+
+                    // check for intersections
+                    for item_rect in &batch.item_rects {
+                        if item_rect.intersects(item_bounding_rect) {
+                            break 'outer_default;
+                        }
+                    }
+                }
+            }
+        }
+
+        if selected_batch_index.is_none() {
+            let new_batch = AlphaPrimitiveBatch::new(key);
+            selected_batch_index = Some(self.batches.len());
+            self.batches.push(new_batch);
+        }
+
+        let batch = &mut self.batches[selected_batch_index.unwrap()];
+        batch.item_rects.push(*item_bounding_rect);
+
+        &mut batch.instances
+    }
+}
+
+pub struct OpaqueBatchList {
+    pub pixel_area_threshold_for_new_batch: i32,
+    pub batches: Vec<OpaquePrimitiveBatch>,
+}
+
+impl OpaqueBatchList {
+    fn new(pixel_area_threshold_for_new_batch: i32) -> Self {
+        OpaqueBatchList {
+            batches: Vec::new(),
+            pixel_area_threshold_for_new_batch,
+        }
+    }
+
+    pub fn get_suitable_batch(
+        &mut self,
+        key: BatchKey,
+        item_bounding_rect: &DeviceIntRect
+    ) -> &mut Vec<PrimitiveInstance> {
+        let mut selected_batch_index = None;
+        let item_area = item_bounding_rect.size.area();
+
+        // If the area of this primitive is larger than the given threshold,
+        // then it is large enough to warrant breaking a batch for. In this
+        // case we just see if it can be added to the existing batch or
+        // create a new one.
+        if item_area > self.pixel_area_threshold_for_new_batch {
+            if let Some(ref batch) = self.batches.last() {
+                if batch.key.is_compatible_with(&key) {
+                    selected_batch_index = Some(self.batches.len() - 1);
+                }
+            }
+        } else {
+            // Otherwise, look back through a reasonable number of batches.
+            for (batch_index, batch) in self.batches.iter().enumerate().rev().take(10) {
+                if batch.key.is_compatible_with(&key) {
+                    selected_batch_index = Some(batch_index);
+                    break;
+                }
+            }
+        }
+
+        if selected_batch_index.is_none() {
+            let new_batch = OpaquePrimitiveBatch::new(key);
+            selected_batch_index = Some(self.batches.len());
+            self.batches.push(new_batch);
+        }
+
+        let batch = &mut self.batches[selected_batch_index.unwrap()];
+
+        &mut batch.instances
+    }
+
+    fn finalize(&mut self) {
+        // Reverse the instance arrays in the opaque batches
+        // to get maximum z-buffer efficiency by drawing
+        // front-to-back.
+        // TODO(gw): Maybe we can change the batch code to
+        //           build these in reverse and avoid having
+        //           to reverse the instance array here.
+        for batch in &mut self.batches {
+            batch.instances.reverse();
+        }
+    }
+}
+
+pub struct BatchList {
+    pub alpha_batch_list: AlphaBatchList,
+    pub opaque_batch_list: OpaqueBatchList,
+}
+
+impl BatchList {
+    pub fn new(screen_size: DeviceIntSize) -> Self {
+        // The threshold for creating a new batch is
+        // one quarter the screen size.
+        let batch_area_threshold = screen_size.width * screen_size.height / 4;
+
+        BatchList {
+            alpha_batch_list: AlphaBatchList::new(),
+            opaque_batch_list: OpaqueBatchList::new(batch_area_threshold),
+        }
+    }
+
+    pub fn get_suitable_batch(
+        &mut self,
+        key: BatchKey,
+        item_bounding_rect: &DeviceIntRect,
+    ) -> &mut Vec<PrimitiveInstance> {
+        match key.blend_mode {
+            BlendMode::None => {
+                self.opaque_batch_list
+                    .get_suitable_batch(key, item_bounding_rect)
+            }
+            BlendMode::PremultipliedAlpha |
+            BlendMode::PremultipliedDestOut |
+            BlendMode::SubpixelConstantTextColor(..) |
+            BlendMode::SubpixelVariableTextColor |
+            BlendMode::SubpixelWithBgColor |
+            BlendMode::SubpixelDualSource => {
+                self.alpha_batch_list
+                    .get_suitable_batch(key, item_bounding_rect)
+            }
+        }
+    }
+
+    fn finalize(&mut self) {
+        self.opaque_batch_list.finalize()
+    }
+}
+
+/// Encapsulates the logic of building batches for items that are blended.
+pub struct AlphaBatcher {
+    pub batch_list: BatchList,
+    pub text_run_cache_prims: FastHashMap<SourceTexture, Vec<PrimitiveInstance>>,
+    pub line_cache_prims: Vec<PrimitiveInstance>,
+    glyph_fetch_buffer: Vec<GlyphFetchResult>,
+}
+
+impl AlphaBatcher {
+    pub fn new(screen_size: DeviceIntSize) -> Self {
+        AlphaBatcher {
+            batch_list: BatchList::new(screen_size),
+            glyph_fetch_buffer: Vec::new(),
+            text_run_cache_prims: FastHashMap::default(),
+            line_cache_prims: Vec::new(),
+        }
+    }
+
+    pub fn build(
+        &mut self,
+        tasks: &[RenderTaskId],
+        ctx: &RenderTargetContext,
+        gpu_cache: &mut GpuCache,
+        render_tasks: &RenderTaskTree,
+        deferred_resolves: &mut Vec<DeferredResolve>,
+    ) {
+        for &task_id in tasks {
+            match render_tasks[task_id].kind {
+                RenderTaskKind::Picture(ref pic_task) => {
+                    let pic_index = ctx.prim_store.cpu_metadata[pic_task.prim_index.0].cpu_prim_index;
+                    let pic = &ctx.prim_store.cpu_pictures[pic_index.0];
+                    self.add_pic_to_batch(
+                        pic,
+                        task_id,
+                        ctx,
+                        gpu_cache,
+                        render_tasks,
+                        deferred_resolves,
+                    );
+                }
+                _ => {
+                    unreachable!();
+                }
+            }
+        }
+
+        self.batch_list.finalize();
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.batch_list.opaque_batch_list.batches.is_empty() &&
+            self.batch_list.alpha_batch_list.batches.is_empty()
+    }
+
+    fn add_pic_to_batch(
+        &mut self,
+        pic: &PicturePrimitive,
+        task_id: RenderTaskId,
+        ctx: &RenderTargetContext,
+        gpu_cache: &mut GpuCache,
+        render_tasks: &RenderTaskTree,
+        deferred_resolves: &mut Vec<DeferredResolve>,
+    ) {
+        let task_address = render_tasks.get_task_address(task_id);
+
+        // Even though most of the time a splitter isn't used or needed,
+        // they are cheap to construct so we will always pass one down.
+        let mut splitter = BspSplitter::new();
+
+        // Add each run in this picture to the batch.
+        for run in &pic.runs {
+            let scroll_node = &ctx.clip_scroll_tree.nodes[&run.clip_and_scroll.scroll_node_id];
+            let scroll_id = scroll_node.node_data_index;
+            self.add_run_to_batch(
+                run,
+                scroll_id,
+                ctx,
+                gpu_cache,
+                render_tasks,
+                task_id,
+                task_address,
+                deferred_resolves,
+                &mut splitter,
+                pic.picture_type(),
+            );
+        }
+
+        // Flush the accumulated plane splits onto the task tree.
+        // Z axis is directed at the screen, `sort` is ascending, and we need back-to-front order.
+        for poly in splitter.sort(vec3(0.0, 0.0, 1.0)) {
+            let prim_index = PrimitiveIndex(poly.anchor);
+            debug!("process sorted poly {:?} {:?}", prim_index, poly.points);
+            let pp = &poly.points;
+            let gpu_blocks = [
+                [pp[0].x as f32, pp[0].y as f32, pp[0].z as f32, pp[1].x as f32].into(),
+                [pp[1].y as f32, pp[1].z as f32, pp[2].x as f32, pp[2].y as f32].into(),
+                [pp[2].z as f32, pp[3].x as f32, pp[3].y as f32, pp[3].z as f32].into(),
+            ];
+            let gpu_handle = gpu_cache.push_per_frame_blocks(&gpu_blocks);
+            let key = BatchKey::new(
+                BatchKind::SplitComposite,
+                BlendMode::PremultipliedAlpha,
+                BatchTextures::no_texture(),
+            );
+            let pic_metadata = &ctx.prim_store.cpu_metadata[prim_index.0];
+            let pic = &ctx.prim_store.cpu_pictures[pic_metadata.cpu_prim_index.0];
+            let batch = self.batch_list.get_suitable_batch(key, pic_metadata.screen_rect.as_ref().expect("bug"));
+            let source_task_address = render_tasks.get_task_address(pic.render_task_id.expect("bug"));
+            let gpu_address = gpu_handle.as_int(gpu_cache);
+
+            let instance = CompositePrimitiveInstance::new(
+                task_address,
+                source_task_address,
+                RenderTaskAddress(0),
+                gpu_address,
+                0,
+                prim_index.0 as i32,
+                0,
+                0,
+            );
+
+            batch.push(PrimitiveInstance::from(instance));
+        }
+    }
+
+    // Helper to add an entire primitive run to a batch list.
+    // TODO(gw): Restructure this so the param list isn't quite
+    //           so daunting!
+    fn add_run_to_batch(
+        &mut self,
+        run: &PrimitiveRun,
+        scroll_id: ClipScrollNodeIndex,
+        ctx: &RenderTargetContext,
+        gpu_cache: &mut GpuCache,
+        render_tasks: &RenderTaskTree,
+        task_id: RenderTaskId,
+        task_address: RenderTaskAddress,
+        deferred_resolves: &mut Vec<DeferredResolve>,
+        splitter: &mut BspSplitter<f64, WorldPixel>,
+        pic_type: PictureType,
+    ) {
+        for i in 0 .. run.count {
+            let prim_index = PrimitiveIndex(run.base_prim_index.0 + i);
+
+            let metadata = &ctx.prim_store.cpu_metadata[prim_index.0];
+
+            // Now that we walk the primitive runs in order to add
+            // items to batches, we need to check if they are
+            // visible here.
+            // We currently only support culling on normal (Image)
+            // picture types.
+            // TODO(gw): Support culling on shadow image types.
+            if pic_type != PictureType::Image || metadata.screen_rect.is_some() {
+                self.add_prim_to_batch(
+                    metadata.clip_chain_rect_index,
+                    scroll_id,
+                    prim_index,
+                    ctx,
+                    gpu_cache,
+                    render_tasks,
+                    task_id,
+                    task_address,
+                    deferred_resolves,
+                    splitter,
+                    pic_type,
+                );
+            }
+        }
+    }
+
+    // Adds a primitive to a batch.
+    // It can recursively call itself in some situations, for
+    // example if it encounters a picture where the items
+    // in that picture are being drawn into the same target.
+    fn add_prim_to_batch(
+        &mut self,
+        clip_chain_rect_index: ClipChainRectIndex,
+        scroll_id: ClipScrollNodeIndex,
+        prim_index: PrimitiveIndex,
+        ctx: &RenderTargetContext,
+        gpu_cache: &mut GpuCache,
+        render_tasks: &RenderTaskTree,
+        task_id: RenderTaskId,
+        task_address: RenderTaskAddress,
+        deferred_resolves: &mut Vec<DeferredResolve>,
+        splitter: &mut BspSplitter<f64, WorldPixel>,
+        pic_type: PictureType,
+    ) {
+        let z = prim_index.0 as i32;
+        let prim_metadata = ctx.prim_store.get_metadata(prim_index);
+        let scroll_node = &ctx.node_data[scroll_id.0 as usize];
+        // TODO(gw): Calculating this for every primitive is a bit
+        //           wasteful. We should probably cache this in
+        //           the scroll node...
+        let transform_kind = scroll_node.transform.transform_kind();
+        let item_bounding_rect = &match prim_metadata.screen_rect {
+            Some(screen_rect) => screen_rect,
+            None => {
+                debug_assert_ne!(pic_type, PictureType::Image);
+                DeviceIntRect::zero()
+            }
+        };
+        let prim_cache_address = gpu_cache.get_address(&prim_metadata.gpu_location);
+        let no_textures = BatchTextures::no_texture();
+        let clip_task_address = prim_metadata
+            .clip_task_id
+            .map_or(OPAQUE_TASK_ADDRESS, |id| render_tasks.get_task_address(id));
+        let base_instance = SimplePrimitiveInstance::new(
+            prim_cache_address,
+            task_address,
+            clip_task_address,
+            clip_chain_rect_index,
+            scroll_id,
+            z,
+        );
+
+        let blend_mode = ctx.prim_store.get_blend_mode(prim_metadata, transform_kind);
+
+        match prim_metadata.prim_kind {
+            PrimitiveKind::Brush => {
+                let brush = &ctx.prim_store.cpu_brushes[prim_metadata.cpu_prim_index.0];
+                let base_instance = BrushInstance {
+                    picture_address: task_address,
+                    prim_address: prim_cache_address,
+                    clip_chain_rect_index,
+                    scroll_id,
+                    clip_task_address,
+                    z,
+                    segment_index: 0,
+                    user_data0: 0,
+                    user_data1: 0,
+                };
+
+                match brush.segment_desc {
+                    Some(ref segment_desc) => {
+                        let opaque_batch = self.batch_list.opaque_batch_list.get_suitable_batch(
+                            brush.get_batch_key(
+                                BlendMode::None
+                            ),
+                            item_bounding_rect
+                        );
+                        let alpha_batch = self.batch_list.alpha_batch_list.get_suitable_batch(
+                            brush.get_batch_key(
+                                BlendMode::PremultipliedAlpha
+                            ),
+                            item_bounding_rect
+                        );
+
+                        for (i, segment) in segment_desc.segments.iter().enumerate() {
+                            let is_inner = segment.edge_flags.is_empty();
+                            let needs_blending = !prim_metadata.opacity.is_opaque ||
+                                                 segment.clip_task_id.is_some() ||
+                                                 (!is_inner && transform_kind == TransformedRectKind::Complex);
+
+                            let clip_task_address = segment
+                                .clip_task_id
+                                .map_or(OPAQUE_TASK_ADDRESS, |id| render_tasks.get_task_address(id));
+
+                            let instance = PrimitiveInstance::from(BrushInstance {
+                                segment_index: i as i32,
+                                clip_task_address,
+                                ..base_instance
+                            });
+
+                            if needs_blending {
+                                alpha_batch.push(instance);
+                            } else {
+                                opaque_batch.push(instance);
+                            }
+                        }
+                    }
+                    None => {
+                        let batch = self.batch_list.get_suitable_batch(brush.get_batch_key(blend_mode), item_bounding_rect);
+                        batch.push(PrimitiveInstance::from(base_instance));
+                    }
+                }
+            }
+            PrimitiveKind::Border => {
+                let border_cpu =
+                    &ctx.prim_store.cpu_borders[prim_metadata.cpu_prim_index.0];
+                // TODO(gw): Select correct blend mode for edges and corners!!
+                let corner_kind = BatchKind::Transformable(
+                    transform_kind,
+                    TransformBatchKind::BorderCorner,
+                );
+                let corner_key = BatchKey::new(corner_kind, blend_mode, no_textures);
+                let edge_kind = BatchKind::Transformable(
+                    transform_kind,
+                    TransformBatchKind::BorderEdge,
+                );
+                let edge_key = BatchKey::new(edge_kind, blend_mode, no_textures);
+
+                // Work around borrow ck on borrowing batch_list twice.
+                {
+                    let batch =
+                        self.batch_list.get_suitable_batch(corner_key, item_bounding_rect);
+                    for (i, instance_kind) in border_cpu.corner_instances.iter().enumerate()
+                    {
+                        let sub_index = i as i32;
+                        match *instance_kind {
+                            BorderCornerInstance::None => {}
+                            BorderCornerInstance::Single => {
+                                batch.push(base_instance.build(
+                                    sub_index,
+                                    BorderCornerSide::Both as i32,
+                                    0,
+                                ));
+                            }
+                            BorderCornerInstance::Double => {
+                                batch.push(base_instance.build(
+                                    sub_index,
+                                    BorderCornerSide::First as i32,
+                                    0,
+                                ));
+                                batch.push(base_instance.build(
+                                    sub_index,
+                                    BorderCornerSide::Second as i32,
+                                    0,
+                                ));
+                            }
+                        }
+                    }
+                }
+
+                let batch = self.batch_list.get_suitable_batch(edge_key, item_bounding_rect);
+                for border_segment in 0 .. 4 {
+                    batch.push(base_instance.build(border_segment, 0, 0));
+                }
+            }
+            PrimitiveKind::Line => {
+                let base_instance = BrushInstance {
+                    picture_address: task_address,
+                    prim_address: prim_cache_address,
+                    clip_chain_rect_index,
+                    scroll_id,
+                    clip_task_address,
+                    z,
+                    segment_index: 0,
+                    user_data0: 0,
+                    user_data1: 0,
+                };
+
+                let instance = PrimitiveInstance::from(base_instance);
+
+                match pic_type {
+                    PictureType::TextShadow => {
+                        self.line_cache_prims.push(instance);
+                    }
+                    PictureType::Image => {
+                        let kind =
+                            BatchKind::Brush(BrushBatchKind::Line);
+                        let key = BatchKey::new(kind, blend_mode, no_textures);
+                        let batch = self.batch_list.get_suitable_batch(key, item_bounding_rect);
+                        batch.push(instance);
+                    }
+                    PictureType::BoxShadow => unreachable!(),
+                }
+            }
+            PrimitiveKind::Image => {
+                let image_cpu = &ctx.prim_store.cpu_images[prim_metadata.cpu_prim_index.0];
+
+                let (color_texture_id, uv_address) = resolve_image(
+                    image_cpu.image_key,
+                    image_cpu.image_rendering,
+                    image_cpu.tile_offset,
+                    ctx.resource_cache,
+                    gpu_cache,
+                    deferred_resolves,
+                );
+
+                if color_texture_id == SourceTexture::Invalid {
+                    warn!("Warnings: skip a PrimitiveKind::Image at {:?}.\n", item_bounding_rect);
+                    return;
+                }
+
+                let batch_kind = match color_texture_id {
+                    SourceTexture::External(ext_image) => {
+                        match ext_image.image_type {
+                            ExternalImageType::Texture2DHandle => {
+                                TransformBatchKind::Image(ImageBufferKind::Texture2D)
+                            }
+                            ExternalImageType::Texture2DArrayHandle => {
+                                TransformBatchKind::Image(ImageBufferKind::Texture2DArray)
+                            }
+                            ExternalImageType::TextureRectHandle => {
+                                TransformBatchKind::Image(ImageBufferKind::TextureRect)
+                            }
+                            ExternalImageType::TextureExternalHandle => {
+                                TransformBatchKind::Image(ImageBufferKind::TextureExternal)
+                            }
+                            ExternalImageType::ExternalBuffer => {
+                                // The ExternalImageType::ExternalBuffer should be handled by resource_cache.
+                                // It should go through the non-external case.
+                                panic!(
+                                    "Non-texture handle type should be handled in other way"
+                                );
+                            }
+                        }
+                    }
+                    _ => TransformBatchKind::Image(ImageBufferKind::Texture2DArray),
+                };
+
+                let textures = BatchTextures {
+                    colors: [
+                        color_texture_id,
+                        SourceTexture::Invalid,
+                        SourceTexture::Invalid,
+                    ],
+                };
+
+                let key = BatchKey::new(
+                    BatchKind::Transformable(transform_kind, batch_kind),
+                    blend_mode,
+                    textures,
+                );
+                let batch = self.batch_list.get_suitable_batch(key, item_bounding_rect);
+                batch.push(base_instance.build(uv_address.as_int(gpu_cache), 0, 0));
+            }
+            PrimitiveKind::TextRun => {
+                let text_cpu =
+                    &ctx.prim_store.cpu_text_runs[prim_metadata.cpu_prim_index.0];
+                let is_shadow = pic_type == PictureType::TextShadow;
+
+                // TODO(gw): It probably makes sense to base this decision on the content
+                //           origin field in the future (once that's configurable).
+                let font_transform = if is_shadow {
+                    None
+                } else {
+                    Some(&scroll_node.transform)
+                };
+
+                let font = text_cpu.get_font(
+                    ctx.device_pixel_scale,
+                    font_transform,
+                );
+
+                let glyph_fetch_buffer = &mut self.glyph_fetch_buffer;
+                let batch_list = &mut self.batch_list;
+                let text_run_cache_prims = &mut self.text_run_cache_prims;
+
+                ctx.resource_cache.fetch_glyphs(
+                    font,
+                    &text_cpu.glyph_keys,
+                    glyph_fetch_buffer,
+                    gpu_cache,
+                    |texture_id, mut glyph_format, glyphs| {
+                        debug_assert_ne!(texture_id, SourceTexture::Invalid);
+
+                        // Ignore color and only sample alpha when shadowing.
+                        if text_cpu.is_shadow() {
+                            glyph_format = glyph_format.ignore_color();
+                        }
+
+                        let subpx_dir = match glyph_format {
+                            GlyphFormat::Bitmap |
+                            GlyphFormat::ColorBitmap => SubpixelDirection::None,
+                            _ => text_cpu.font.subpx_dir.limit_by(text_cpu.font.render_mode),
+                        };
+
+                        let batch = if is_shadow {
+                            text_run_cache_prims
+                                .entry(texture_id)
+                                .or_insert(Vec::new())
+                        } else {
+                            let textures = BatchTextures {
+                                colors: [
+                                    texture_id,
+                                    SourceTexture::Invalid,
+                                    SourceTexture::Invalid,
+                                ],
+                            };
+
+                            let kind = BatchKind::Transformable(
+                                transform_kind,
+                                TransformBatchKind::TextRun(glyph_format),
+                            );
+
+                            let blend_mode = match glyph_format {
+                                GlyphFormat::Subpixel |
+                                GlyphFormat::TransformedSubpixel => {
+                                    if text_cpu.font.bg_color.a != 0 {
+                                        BlendMode::SubpixelWithBgColor
+                                    } else if ctx.use_dual_source_blending {
+                                        BlendMode::SubpixelDualSource
+                                    } else {
+                                        BlendMode::SubpixelConstantTextColor(text_cpu.font.color.into())
+                                    }
+                                }
+                                GlyphFormat::Alpha |
+                                GlyphFormat::TransformedAlpha |
+                                GlyphFormat::Bitmap |
+                                GlyphFormat::ColorBitmap => BlendMode::PremultipliedAlpha,
+                            };
+
+                            let key = BatchKey::new(kind, blend_mode, textures);
+                            batch_list.get_suitable_batch(key, item_bounding_rect)
+                        };
+
+                        for glyph in glyphs {
+                            batch.push(base_instance.build(
+                                glyph.index_in_text_run,
+                                glyph.uv_rect_address.as_int(),
+                                subpx_dir as u32 as i32,
+                            ));
+                        }
+                    },
+                );
+            }
+            PrimitiveKind::Picture => {
+                let picture =
+                    &ctx.prim_store.cpu_pictures[prim_metadata.cpu_prim_index.0];
+
+                match picture.render_task_id {
+                    Some(cache_task_id) => {
+                        let cache_task_address = render_tasks.get_task_address(cache_task_id);
+                        let textures = BatchTextures::render_target_cache();
+
+                        match picture.kind {
+                            PictureKind::TextShadow { .. } => {
+                                let kind = BatchKind::Brush(
+                                    BrushBatchKind::Image(
+                                        BrushImageSourceKind::from_render_target_kind(picture.target_kind())),
+                                );
+                                let key = BatchKey::new(kind, blend_mode, textures);
+                                let batch = self.batch_list.get_suitable_batch(key, item_bounding_rect);
+
+                                let instance = BrushInstance {
+                                    picture_address: task_address,
+                                    prim_address: prim_cache_address,
+                                    clip_chain_rect_index,
+                                    scroll_id,
+                                    clip_task_address,
+                                    z,
+                                    segment_index: 0,
+                                    user_data0: cache_task_address.0 as i32,
+                                    user_data1: BrushImageKind::Simple as i32,
+                                };
+                                batch.push(PrimitiveInstance::from(instance));
+                            }
+                            PictureKind::BoxShadow { image_kind, .. } => {
+                                let kind = BatchKind::Brush(
+                                    BrushBatchKind::Image(
+                                        BrushImageSourceKind::from_render_target_kind(picture.target_kind())),
+                                );
+                                let key = BatchKey::new(kind, blend_mode, textures);
+                                let batch = self.batch_list.get_suitable_batch(key, item_bounding_rect);
+
+                                let instance = BrushInstance {
+                                    picture_address: task_address,
+                                    prim_address: prim_cache_address,
+                                    clip_chain_rect_index,
+                                    scroll_id,
+                                    clip_task_address,
+                                    z,
+                                    segment_index: 0,
+                                    user_data0: cache_task_address.0 as i32,
+                                    user_data1: image_kind as i32,
+                                };
+                                batch.push(PrimitiveInstance::from(instance));
+                            }
+                            PictureKind::Image {
+                                composite_mode,
+                                secondary_render_task_id,
+                                is_in_3d_context,
+                                reference_frame_id,
+                                real_local_rect,
+                                ..
+                            } => {
+                                // If this picture is participating in a 3D rendering context,
+                                // then don't add it to any batches here. Instead, create a polygon
+                                // for it and add it to the current plane splitter.
+                                if is_in_3d_context {
+                                    // Push into parent plane splitter.
+
+                                    let real_xf = &ctx.clip_scroll_tree.nodes[&reference_frame_id].world_content_transform;
+
+                                    let polygon = make_polygon(
+                                        real_local_rect,
+                                        &real_xf,
+                                        prim_index.0,
+                                    );
+
+                                    splitter.add(polygon);
+
+                                    return;
+                                }
+
+                                // Depending on the composite mode of the picture, we generate the
+                                // old style Composite primitive instances. In the future, we'll
+                                // remove these and pass them through the brush batching pipeline.
+                                // This will allow us to unify some of the shaders, apply clip masks
+                                // when compositing pictures, and also correctly apply pixel snapping
+                                // to picture compositing operations.
+                                let source_id = picture.render_task_id.expect("no source!?");
+
+                                match composite_mode.expect("bug: only composites here") {
+                                    PictureCompositeMode::Filter(filter) => {
+                                        match filter {
+                                            FilterOp::Blur(..) => {
+                                                let src_task_address = render_tasks.get_task_address(source_id);
+                                                let key = BatchKey::new(
+                                                    BatchKind::HardwareComposite,
+                                                    BlendMode::PremultipliedAlpha,
+                                                    BatchTextures::render_target_cache(),
+                                                );
+                                                let batch = self.batch_list.get_suitable_batch(key, &item_bounding_rect);
+                                                let instance = CompositePrimitiveInstance::new(
+                                                    task_address,
+                                                    src_task_address,
+                                                    RenderTaskAddress(0),
+                                                    item_bounding_rect.origin.x,
+                                                    item_bounding_rect.origin.y,
+                                                    z,
+                                                    item_bounding_rect.size.width,
+                                                    item_bounding_rect.size.height,
+                                                );
+
+                                                batch.push(PrimitiveInstance::from(instance));
+                                            }
+                                            FilterOp::DropShadow(offset, _, _) => {
+                                                let kind = BatchKind::Brush(
+                                                    BrushBatchKind::Image(BrushImageSourceKind::ColorAlphaMask),
+                                                );
+                                                let key = BatchKey::new(kind, blend_mode, textures);
+
+                                                let instance = BrushInstance {
+                                                    picture_address: task_address,
+                                                    prim_address: prim_cache_address,
+                                                    clip_chain_rect_index,
+                                                    scroll_id,
+                                                    clip_task_address,
+                                                    z,
+                                                    segment_index: 0,
+                                                    user_data0: cache_task_address.0 as i32,
+                                                    user_data1: BrushImageKind::Simple as i32,
+                                                };
+
+                                                {
+                                                    let batch = self.batch_list.get_suitable_batch(key, item_bounding_rect);
+                                                    batch.push(PrimitiveInstance::from(instance));
+                                                }
+
+                                                let secondary_id = secondary_render_task_id.expect("no secondary!?");
+                                                let render_task = &render_tasks[secondary_id];
+                                                let secondary_task_address = render_tasks.get_task_address(secondary_id);
+                                                let render_pass_index = render_task.pass_index.expect("no render_pass_index!?");
+                                                let secondary_textures = BatchTextures {
+                                                    colors: [
+                                                        SourceTexture::RenderTaskCacheRGBA8(render_pass_index),
+                                                        SourceTexture::Invalid,
+                                                        SourceTexture::Invalid,
+                                                    ],
+                                                };
+                                                let key = BatchKey::new(
+                                                    BatchKind::HardwareComposite,
+                                                    BlendMode::PremultipliedAlpha,
+                                                    secondary_textures,
+                                                );
+                                                let batch = self.batch_list.get_suitable_batch(key, &item_bounding_rect);
+                                                let device_offset = (offset * LayerToWorldScale::new(1.0) * ctx.device_pixel_scale).round().to_i32();
+                                                let instance = CompositePrimitiveInstance::new(
+                                                    task_address,
+                                                    secondary_task_address,
+                                                    RenderTaskAddress(0),
+                                                    item_bounding_rect.origin.x - device_offset.x,
+                                                    item_bounding_rect.origin.y - device_offset.y,
+                                                    z,
+                                                    item_bounding_rect.size.width,
+                                                    item_bounding_rect.size.height,
+                                                );
+
+                                                batch.push(PrimitiveInstance::from(instance));
+                                            }
+                                            _ => {
+                                                let key = BatchKey::new(
+                                                    BatchKind::Blend,
+                                                    BlendMode::PremultipliedAlpha,
+                                                    BatchTextures::no_texture(),
+                                                );
+                                                let src_task_address = render_tasks.get_task_address(source_id);
+
+                                                let (filter_mode, amount) = match filter {
+                                                    FilterOp::Blur(..) => (0, 0.0),
+                                                    FilterOp::Contrast(amount) => (1, amount),
+                                                    FilterOp::Grayscale(amount) => (2, amount),
+                                                    FilterOp::HueRotate(angle) => (3, angle),
+                                                    FilterOp::Invert(amount) => (4, amount),
+                                                    FilterOp::Saturate(amount) => (5, amount),
+                                                    FilterOp::Sepia(amount) => (6, amount),
+                                                    FilterOp::Brightness(amount) => (7, amount),
+                                                    FilterOp::Opacity(_, amount) => (8, amount),
+                                                    FilterOp::DropShadow(..) => unreachable!(),
+                                                };
+
+                                                let amount = (amount * 65535.0).round() as i32;
+                                                let batch = self.batch_list.get_suitable_batch(key, &item_bounding_rect);
+
+                                                let instance = CompositePrimitiveInstance::new(
+                                                    task_address,
+                                                    src_task_address,
+                                                    RenderTaskAddress(0),
+                                                    filter_mode,
+                                                    amount,
+                                                    z,
+                                                    0,
+                                                    0,
+                                                );
+
+                                                batch.push(PrimitiveInstance::from(instance));
+                                            }
+                                        }
+                                    }
+                                    PictureCompositeMode::MixBlend(mode) => {
+                                        let backdrop_id = secondary_render_task_id.expect("no backdrop!?");
+
+                                        let key = BatchKey::new(
+                                            BatchKind::Composite {
+                                                task_id,
+                                                source_id,
+                                                backdrop_id,
+                                            },
+                                            BlendMode::PremultipliedAlpha,
+                                            BatchTextures::no_texture(),
+                                        );
+                                        let batch = self.batch_list.get_suitable_batch(key, &item_bounding_rect);
+                                        let backdrop_task_address = render_tasks.get_task_address(backdrop_id);
+                                        let source_task_address = render_tasks.get_task_address(source_id);
+
+                                        let instance = CompositePrimitiveInstance::new(
+                                            task_address,
+                                            source_task_address,
+                                            backdrop_task_address,
+                                            mode as u32 as i32,
+                                            0,
+                                            z,
+                                            0,
+                                            0,
+                                        );
+
+                                        batch.push(PrimitiveInstance::from(instance));
+                                    }
+                                    PictureCompositeMode::Blit => {
+                                        let src_task_address = render_tasks.get_task_address(source_id);
+                                        let key = BatchKey::new(
+                                            BatchKind::HardwareComposite,
+                                            BlendMode::PremultipliedAlpha,
+                                            BatchTextures::render_target_cache(),
+                                        );
+                                        let batch = self.batch_list.get_suitable_batch(key, &item_bounding_rect);
+                                        let instance = CompositePrimitiveInstance::new(
+                                            task_address,
+                                            src_task_address,
+                                            RenderTaskAddress(0),
+                                            item_bounding_rect.origin.x,
+                                            item_bounding_rect.origin.y,
+                                            z,
+                                            item_bounding_rect.size.width,
+                                            item_bounding_rect.size.height,
+                                        );
+
+                                        batch.push(PrimitiveInstance::from(instance));
+                                    }
+                                }
+                            }
+                        }
+                    }
+                    None => {
+                        // If this picture is being drawn into an existing target (i.e. with
+                        // no composition operation), recurse and add to the current batch list.
+                        self.add_pic_to_batch(
+                            picture,
+                            task_id,
+                            ctx,
+                            gpu_cache,
+                            render_tasks,
+                            deferred_resolves,
+                        );
+                    }
+                }
+            }
+            PrimitiveKind::AlignedGradient => {
+                let gradient_cpu =
+                    &ctx.prim_store.cpu_gradients[prim_metadata.cpu_prim_index.0];
+                let kind = BatchKind::Transformable(
+                    transform_kind,
+                    TransformBatchKind::AlignedGradient,
+                );
+                let key = BatchKey::new(kind, blend_mode, no_textures);
+                let batch = self.batch_list.get_suitable_batch(key, item_bounding_rect);
+                for part_index in 0 .. (gradient_cpu.stops_count - 1) {
+                    batch.push(base_instance.build(part_index as i32, 0, 0));
+                }
+            }
+            PrimitiveKind::AngleGradient => {
+                let kind = BatchKind::Transformable(
+                    transform_kind,
+                    TransformBatchKind::AngleGradient,
+                );
+                let key = BatchKey::new(kind, blend_mode, no_textures);
+                let batch = self.batch_list.get_suitable_batch(key, item_bounding_rect);
+                batch.push(base_instance.build(0, 0, 0));
+            }
+            PrimitiveKind::RadialGradient => {
+                let kind = BatchKind::Transformable(
+                    transform_kind,
+                    TransformBatchKind::RadialGradient,
+                );
+                let key = BatchKey::new(kind, blend_mode, no_textures);
+                let batch = self.batch_list.get_suitable_batch(key, item_bounding_rect);
+                batch.push(base_instance.build(0, 0, 0));
+            }
+            PrimitiveKind::YuvImage => {
+                let mut textures = BatchTextures::no_texture();
+                let mut uv_rect_addresses = [0; 3];
+                let image_yuv_cpu =
+                    &ctx.prim_store.cpu_yuv_images[prim_metadata.cpu_prim_index.0];
+
+                //yuv channel
+                let channel_count = image_yuv_cpu.format.get_plane_num();
+                debug_assert!(channel_count <= 3);
+                for channel in 0 .. channel_count {
+                    let image_key = image_yuv_cpu.yuv_key[channel];
+
+                    let (texture, address) = resolve_image(
+                        image_key,
+                        image_yuv_cpu.image_rendering,
+                        None,
+                        ctx.resource_cache,
+                        gpu_cache,
+                        deferred_resolves,
+                    );
+
+                    if texture == SourceTexture::Invalid {
+                        warn!("Warnings: skip a PrimitiveKind::YuvImage at {:?}.\n", item_bounding_rect);
+                        return;
+                    }
+
+                    textures.colors[channel] = texture;
+                    uv_rect_addresses[channel] = address.as_int(gpu_cache);
+                }
+
+                let get_buffer_kind = |texture: SourceTexture| {
+                    match texture {
+                        SourceTexture::External(ext_image) => {
+                            match ext_image.image_type {
+                                ExternalImageType::Texture2DHandle => {
+                                    ImageBufferKind::Texture2D
+                                }
+                                ExternalImageType::Texture2DArrayHandle => {
+                                    ImageBufferKind::Texture2DArray
+                                }
+                                ExternalImageType::TextureRectHandle => {
+                                    ImageBufferKind::TextureRect
+                                }
+                                ExternalImageType::TextureExternalHandle => {
+                                    ImageBufferKind::TextureExternal
+                                }
+                                ExternalImageType::ExternalBuffer => {
+                                    // The ExternalImageType::ExternalBuffer should be handled by resource_cache.
+                                    // It should go through the non-external case.
+                                    panic!("Unexpected non-texture handle type");
+                                }
+                            }
+                        }
+                        _ => ImageBufferKind::Texture2DArray,
+                    }
+                };
+
+                // All yuv textures should be the same type.
+                let buffer_kind = get_buffer_kind(textures.colors[0]);
+                assert!(
+                    textures.colors[1 .. image_yuv_cpu.format.get_plane_num()]
+                        .iter()
+                        .all(|&tid| buffer_kind == get_buffer_kind(tid))
+                );
+
+                let kind = BatchKind::Transformable(
+                    transform_kind,
+                    TransformBatchKind::YuvImage(
+                        buffer_kind,
+                        image_yuv_cpu.format,
+                        image_yuv_cpu.color_space,
+                    ),
+                );
+                let key = BatchKey::new(kind, blend_mode, textures);
+                let batch = self.batch_list.get_suitable_batch(key, item_bounding_rect);
+
+                batch.push(base_instance.build(
+                    uv_rect_addresses[0],
+                    uv_rect_addresses[1],
+                    uv_rect_addresses[2],
+                ));
+            }
+        }
+    }
+}
+
+impl BrushPrimitive {
+    fn get_batch_key(&self, blend_mode: BlendMode) -> BatchKey {
+        match self.kind {
+            BrushKind::Solid { .. } => {
+                BatchKey::new(
+                    BatchKind::Brush(BrushBatchKind::Solid),
+                    blend_mode,
+                    BatchTextures::no_texture(),
+                )
+            }
+            BrushKind::Clear => {
+                BatchKey::new(
+                    BatchKind::Brush(BrushBatchKind::Solid),
+                    BlendMode::PremultipliedDestOut,
+                    BatchTextures::no_texture(),
+                )
+            }
+            BrushKind::Mask { .. } => {
+                unreachable!("bug: mask brushes not expected in normal alpha pass");
+            }
+        }
+    }
+}
+
+trait AlphaBatchHelpers {
+    fn get_blend_mode(
+        &self,
+        metadata: &PrimitiveMetadata,
+        transform_kind: TransformedRectKind,
+    ) -> BlendMode;
+}
+
+impl AlphaBatchHelpers for PrimitiveStore {
+    fn get_blend_mode(
+        &self,
+        metadata: &PrimitiveMetadata,
+        transform_kind: TransformedRectKind,
+    ) -> BlendMode {
+        let needs_blending = !metadata.opacity.is_opaque || metadata.clip_task_id.is_some() ||
+            transform_kind == TransformedRectKind::Complex;
+
+        match metadata.prim_kind {
+            // Can only resolve the TextRun's blend mode once glyphs are fetched.
+            PrimitiveKind::TextRun => BlendMode::PremultipliedAlpha,
+            PrimitiveKind::Border |
+            PrimitiveKind::Image |
+            PrimitiveKind::YuvImage |
+            PrimitiveKind::AlignedGradient |
+            PrimitiveKind::AngleGradient |
+            PrimitiveKind::RadialGradient |
+            PrimitiveKind::Line |
+            PrimitiveKind::Brush |
+            PrimitiveKind::Picture => if needs_blending {
+                BlendMode::PremultipliedAlpha
+            } else {
+                BlendMode::None
+            },
+        }
+    }
+}
+
+fn resolve_image(
+    image_key: ImageKey,
+    image_rendering: ImageRendering,
+    tile_offset: Option<TileOffset>,
+    resource_cache: &ResourceCache,
+    gpu_cache: &mut GpuCache,
+    deferred_resolves: &mut Vec<DeferredResolve>,
+) -> (SourceTexture, GpuCacheHandle) {
+    match resource_cache.get_image_properties(image_key) {
+        Some(image_properties) => {
+            // Check if an external image that needs to be resolved
+            // by the render thread.
+            match image_properties.external_image {
+                Some(external_image) => {
+                    // This is an external texture - we will add it to
+                    // the deferred resolves list to be patched by
+                    // the render thread...
+                    let cache_handle = gpu_cache.push_deferred_per_frame_blocks(BLOCKS_PER_UV_RECT);
+                    deferred_resolves.push(DeferredResolve {
+                        image_properties,
+                        address: gpu_cache.get_address(&cache_handle),
+                    });
+
+                    (SourceTexture::External(external_image), cache_handle)
+                }
+                None => {
+                    if let Ok(cache_item) = resource_cache.get_cached_image(image_key, image_rendering, tile_offset) {
+                        (cache_item.texture_id, cache_item.uv_rect_handle)
+                    } else {
+                        // There is no usable texture entry for the image key. Just return an invalid texture here.
+                        (SourceTexture::Invalid, GpuCacheHandle::new())
+                    }
+                }
+            }
+        }
+        None => (SourceTexture::Invalid, GpuCacheHandle::new()),
+    }
+}
+
+/// Construct a polygon from stacking context boundaries.
+/// `anchor` here is an index that's going to be preserved in all the
+/// splits of the polygon.
+fn make_polygon(
+    rect: LayerRect,
+    transform: &LayerToWorldTransform,
+    anchor: usize,
+) -> Polygon<f64, WorldPixel> {
+    let mat = TypedTransform3D::row_major(
+        transform.m11 as f64,
+        transform.m12 as f64,
+        transform.m13 as f64,
+        transform.m14 as f64,
+        transform.m21 as f64,
+        transform.m22 as f64,
+        transform.m23 as f64,
+        transform.m24 as f64,
+        transform.m31 as f64,
+        transform.m32 as f64,
+        transform.m33 as f64,
+        transform.m34 as f64,
+        transform.m41 as f64,
+        transform.m42 as f64,
+        transform.m43 as f64,
+        transform.m44 as f64);
+    Polygon::from_transformed_rect(rect.cast().unwrap(), mat, anchor)
+}
+
+/// Batcher managing draw calls into the clip mask (in the RT cache).
+#[derive(Debug)]
+pub struct ClipBatcher {
+    /// Rectangle draws fill up the rectangles with rounded corners.
+    pub rectangles: Vec<ClipMaskInstance>,
+    /// Image draws apply the image masking.
+    pub images: FastHashMap<SourceTexture, Vec<ClipMaskInstance>>,
+    pub border_clears: Vec<ClipMaskInstance>,
+    pub borders: Vec<ClipMaskInstance>,
+}
+
+impl ClipBatcher {
+    pub fn new() -> Self {
+        ClipBatcher {
+            rectangles: Vec::new(),
+            images: FastHashMap::default(),
+            border_clears: Vec::new(),
+            borders: Vec::new(),
+        }
+    }
+
+    pub fn add(
+        &mut self,
+        task_address: RenderTaskAddress,
+        clips: &[ClipWorkItem],
+        coordinate_system_id: CoordinateSystemId,
+        resource_cache: &ResourceCache,
+        gpu_cache: &GpuCache,
+        clip_store: &ClipStore,
+    ) {
+        let mut coordinate_system_id = coordinate_system_id;
+        for work_item in clips.iter() {
+            let instance = ClipMaskInstance {
+                render_task_address: task_address,
+                scroll_node_data_index: work_item.scroll_node_data_index,
+                segment: 0,
+                clip_data_address: GpuCacheAddress::invalid(),
+                resource_address: GpuCacheAddress::invalid(),
+            };
+            let info = clip_store
+                .get_opt(&work_item.clip_sources)
+                .expect("bug: clip handle should be valid");
+
+            for &(ref source, ref handle) in &info.clips {
+                let gpu_address = gpu_cache.get_address(handle);
+
+                match *source {
+                    ClipSource::Image(ref mask) => {
+                        if let Ok(cache_item) = resource_cache.get_cached_image(mask.image, ImageRendering::Auto, None) {
+                            self.images
+                                .entry(cache_item.texture_id)
+                                .or_insert(Vec::new())
+                                .push(ClipMaskInstance {
+                                    clip_data_address: gpu_address,
+                                    resource_address: gpu_cache.get_address(&cache_item.uv_rect_handle),
+                                    ..instance
+                                });
+                        } else {
+                            warn!("Warnings: skip a image mask. Key:{:?} Rect::{:?}.\n", mask.image, mask.rect);
+                            continue;
+                        }
+                    }
+                    ClipSource::Rectangle(..) => {
+                        if work_item.coordinate_system_id != coordinate_system_id {
+                            self.rectangles.push(ClipMaskInstance {
+                                clip_data_address: gpu_address,
+                                ..instance
+                            });
+                            coordinate_system_id = work_item.coordinate_system_id;
+                        }
+                    }
+                    ClipSource::RoundedRectangle(..) => {
+                        self.rectangles.push(ClipMaskInstance {
+                            clip_data_address: gpu_address,
+                            ..instance
+                        });
+                    }
+                    ClipSource::BorderCorner(ref source) => {
+                        self.border_clears.push(ClipMaskInstance {
+                            clip_data_address: gpu_address,
+                            segment: 0,
+                            ..instance
+                        });
+                        for clip_index in 0 .. source.actual_clip_count {
+                            self.borders.push(ClipMaskInstance {
+                                clip_data_address: gpu_address,
+                                segment: 1 + clip_index as i32,
+                                ..instance
+                            })
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
--- a/gfx/webrender/src/border.rs
+++ b/gfx/webrender/src/border.rs
@@ -4,18 +4,18 @@
 
 use api::{BorderRadius, BorderSide, BorderStyle, BorderWidths, ClipAndScrollInfo, ColorF};
 use api::{LayerPoint, LayerRect};
 use api::{LayerPrimitiveInfo, LayerSize, NormalBorder, RepeatMode};
 use clip::ClipSource;
 use ellipse::Ellipse;
 use frame_builder::FrameBuilder;
 use gpu_cache::GpuDataRequest;
-use prim_store::{BrushAntiAliasMode, BrushSegmentDescriptor, BrushSegmentKind};
-use prim_store::{BorderPrimitiveCpu, PrimitiveContainer, TexelRect};
+use prim_store::{BorderPrimitiveCpu, BrushSegment, BrushSegmentDescriptor};
+use prim_store::{BrushClipMaskKind, EdgeAaSegmentMask, PrimitiveContainer, TexelRect};
 use util::{lerp, pack_as_float};
 
 #[repr(u8)]
 #[derive(Debug, Copy, Clone, PartialEq)]
 pub enum BorderCornerInstance {
     None,
     Single, // Single instance needed - corner styles are same or similar.
     Double, // Different corner styles. Draw two instances, one per style.
@@ -417,94 +417,100 @@ impl FrameBuilder {
         });
         let all_edges_simple = edges.iter().all(|e| {
             *e == BorderEdgeKind::Solid || *e == BorderEdgeKind::None
         });
 
         let has_no_curve = radius.is_zero();
 
         if has_no_curve && all_corners_simple && all_edges_simple {
-            let inner_rect = LayerRect::new(
-                LayerPoint::new(
-                    info.rect.origin.x + left_len,
-                    info.rect.origin.y + top_len,
-                ),
-                LayerSize::new(
-                    info.rect.size.width - left_len - right_len,
-                    info.rect.size.height - top_len - bottom_len,
-                ),
+            let p0 = info.rect.origin;
+            let p1 = LayerPoint::new(
+                info.rect.origin.x + left_len,
+                info.rect.origin.y + top_len,
+            );
+            let p2 = LayerPoint::new(
+                info.rect.origin.x + info.rect.size.width - right_len,
+                info.rect.origin.y + info.rect.size.height - bottom_len,
+            );
+            let p3 = info.rect.bottom_right();
+
+            let segment = |x0, y0, x1, y1| BrushSegment::new(
+                LayerPoint::new(x0, y0),
+                LayerSize::new(x1-x0, y1-y0),
+                false,
+                EdgeAaSegmentMask::all() // Note: this doesn't seem right, needs revision
             );
 
             // Add a solid rectangle for each visible edge/corner combination.
             if top_edge == BorderEdgeKind::Solid {
-                let descriptor = BrushSegmentDescriptor::new(
-                    &info.rect,
-                    &inner_rect,
-                    Some(&[
-                        BrushSegmentKind::TopLeft,
-                        BrushSegmentKind::TopMid,
-                        BrushSegmentKind::TopRight
-                    ]),
-                );
+                let descriptor = BrushSegmentDescriptor {
+                    segments: vec![
+                        segment(p0.x, p0.y, p1.x, p1.y),
+                        segment(p2.x, p0.y, p3.x, p1.y),
+                        segment(p1.x, p0.y, p2.x, p1.y),
+                    ],
+                    clip_mask_kind: BrushClipMaskKind::Unknown,
+                };
+
                 self.add_solid_rectangle(
                     clip_and_scroll,
                     &info,
                     border.top.color,
-                    Some(Box::new(descriptor)),
-                    BrushAntiAliasMode::Segment,
+                    Some(descriptor),
                 );
             }
+
             if left_edge == BorderEdgeKind::Solid {
-                let descriptor = BrushSegmentDescriptor::new(
-                    &info.rect,
-                    &inner_rect,
-                    Some(&[
-                        BrushSegmentKind::MidLeft,
-                    ]),
-                );
+                let descriptor = BrushSegmentDescriptor {
+                    segments: vec![
+                        segment(p0.x, p1.y, p1.x, p2.y),
+                    ],
+                    clip_mask_kind: BrushClipMaskKind::Unknown,
+                };
+
                 self.add_solid_rectangle(
                     clip_and_scroll,
                     &info,
                     border.left.color,
-                    Some(Box::new(descriptor)),
-                    BrushAntiAliasMode::Segment,
+                    Some(descriptor),
                 );
             }
+
             if right_edge == BorderEdgeKind::Solid {
-                let descriptor = BrushSegmentDescriptor::new(
-                    &info.rect,
-                    &inner_rect,
-                    Some(&[
-                        BrushSegmentKind::MidRight,
-                    ]),
-                );
+                let descriptor = BrushSegmentDescriptor {
+                    segments: vec![
+                        segment(p2.x, p1.y, p3.x, p2.y),
+                    ],
+                    clip_mask_kind: BrushClipMaskKind::Unknown,
+                };
+
                 self.add_solid_rectangle(
                     clip_and_scroll,
                     &info,
                     border.right.color,
-                    Some(Box::new(descriptor)),
-                    BrushAntiAliasMode::Segment,
+                    Some(descriptor),
                 );
             }
+
             if bottom_edge == BorderEdgeKind::Solid {
-                let descriptor = BrushSegmentDescriptor::new(
-                    &info.rect,
-                    &inner_rect,
-                    Some(&[
-                        BrushSegmentKind::BottomLeft,
-                        BrushSegmentKind::BottomMid,
-                        BrushSegmentKind::BottomRight
-                    ]),
-                );
+                let descriptor = BrushSegmentDescriptor {
+                    segments: vec![
+                        segment(p1.x, p2.y, p2.x, p3.y),
+                        segment(p2.x, p2.y, p3.x, p3.y),
+                        segment(p0.x, p2.y, p1.x, p3.y),
+                    ],
+                    clip_mask_kind: BrushClipMaskKind::Unknown,
+                };
+
                 self.add_solid_rectangle(
                     clip_and_scroll,
                     &info,
                     border.bottom.color,
-                    Some(Box::new(descriptor)),
-                    BrushAntiAliasMode::Segment,
+                    Some(descriptor),
                 );
             }
         } else {
             // Create clip masks for border corners, if required.
             let mut extra_clips = Vec::new();
             let mut corner_instances = [BorderCornerInstance::Single; 4];
 
             for (i, corner) in corners.iter().enumerate() {
@@ -923,9 +929,9 @@ impl ImageBorderSegment {
 
         ImageBorderSegment {
             geom_rect: rect,
             sub_rect,
             stretch_size: LayerSize::new(stretch_size_x, stretch_size_y),
             tile_spacing,
         }
     }
-}
\ No newline at end of file
+}
--- a/gfx/webrender/src/box_shadow.rs
+++ b/gfx/webrender/src/box_shadow.rs
@@ -5,17 +5,17 @@
 use api::{ColorF, LayerPoint, LayerRect, LayerSize, LayerVector2D};
 use api::{BorderRadius, BoxShadowClipMode, LayoutSize, LayerPrimitiveInfo};
 use api::{ClipMode, ClipAndScrollInfo, ComplexClipRegion, LocalClip};
 use api::{PipelineId};
 use app_units::Au;
 use clip::ClipSource;
 use frame_builder::FrameBuilder;
 use gpu_types::BrushImageKind;
-use prim_store::{BrushAntiAliasMode, PrimitiveContainer};
+use prim_store::{PrimitiveContainer};
 use prim_store::{BrushMaskKind, BrushKind, BrushPrimitive};
 use picture::PicturePrimitive;
 use util::RectHelpers;
 use render_task::MAX_BLUR_STD_DEVIATION;
 
 // The blur shader samples BLUR_SAMPLE_SCALE * blur_radius surrounding texels.
 pub const BLUR_SAMPLE_SCALE: f32 = 3.0;
 
@@ -131,17 +131,16 @@ impl FrameBuilder {
                 clip_and_scroll,
                 &fast_info,
                 clips,
                 PrimitiveContainer::Brush(
                     BrushPrimitive::new(BrushKind::Solid {
                             color: *color,
                         },
                         None,
-                        BrushAntiAliasMode::Primitive,
                     )
                 ),
             );
         } else {
             let blur_offset = BLUR_SAMPLE_SCALE * blur_radius;
             let mut extra_clips = vec![];
 
             let cache_key = BoxShadowCacheKey {
@@ -183,17 +182,16 @@ impl FrameBuilder {
                         height = MASK_CORNER_PADDING + corner_size.height.max(BLUR_SAMPLE_SCALE * blur_radius);
 
                         brush_prim = BrushPrimitive::new(
                             BrushKind::Mask {
                                 clip_mode: brush_clip_mode,
                                 kind: BrushMaskKind::Corner(corner_size),
                             },
                             None,
-                            BrushAntiAliasMode::Primitive,
                         );
                     } else {
                         // Create a minimal size primitive mask to blur. In this
                         // case, we ensure the size of each corner is the same,
                         // to simplify the shader logic that stretches the blurred
                         // result across the primitive.
                         image_kind = BrushImageKind::NinePatch;
                         let max_width = shadow_radius.top_left.width
@@ -221,17 +219,16 @@ impl FrameBuilder {
                                                        LayerSize::new(width, height));
 
                         brush_prim = BrushPrimitive::new(
                             BrushKind::Mask {
                                 clip_mode: brush_clip_mode,
                                 kind: BrushMaskKind::RoundedRect(clip_rect, shadow_radius),
                             },
                             None,
-                            BrushAntiAliasMode::Primitive,
                         );
                     };
 
                     // Construct a mask primitive to add to the picture.
                     let brush_rect = LayerRect::new(LayerPoint::zero(),
                                                     LayerSize::new(width, height));
                     let brush_info = LayerPrimitiveInfo::new(brush_rect);
                     let brush_prim_index = self.create_primitive(
@@ -296,27 +293,26 @@ impl FrameBuilder {
                     // with zero spread and zero offset.
                     // The size of inflation edge is determined by std deviation because large
                     // std deviation blur would be downscaled first. Thus, we need more thick
                     // edge to prevent edge get blurred after downscled.
                     let mut adjusted_blur_std_deviation = blur_radius * 0.5;
                     let mut inflate_size = 1.0;
                     while adjusted_blur_std_deviation > MAX_BLUR_STD_DEVIATION {
                         adjusted_blur_std_deviation *= 0.5;
-                        inflate_size += 1.0;
+                        inflate_size *= 2.0;
                     }
 
                     let brush_rect = brush_rect.inflate(inflate_size, inflate_size);
                     let brush_prim = BrushPrimitive::new(
                         BrushKind::Mask {
                             clip_mode: brush_clip_mode,
                             kind: BrushMaskKind::RoundedRect(clip_rect, shadow_radius),
                         },
                         None,
-                        BrushAntiAliasMode::Primitive,
                     );
                     let brush_info = LayerPrimitiveInfo::new(brush_rect);
                     let brush_prim_index = self.create_primitive(
                         &brush_info,
                         Vec::new(),
                         PrimitiveContainer::Brush(brush_prim),
                     );
 
--- a/gfx/webrender/src/clip.rs
+++ b/gfx/webrender/src/clip.rs
@@ -1,14 +1,15 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BorderRadius, ClipMode, ComplexClipRegion, DeviceIntRect, ImageMask, ImageRendering};
-use api::{LayerPoint, LayerRect, LayerToWorldTransform, LayoutPoint, LayoutVector2D, LocalClip};
+use api::{LayerPoint, LayerRect, LayoutPoint, LayoutVector2D, LocalClip};
+use api::{DevicePixelScale, LayerToWorldTransform};
 use border::{BorderCornerClipSource, ensure_no_corner_overlap};
 use ellipse::Ellipse;
 use freelist::{FreeList, FreeListHandle, WeakFreeListHandle};
 use gpu_cache::{GpuCache, GpuCacheHandle, ToGpuBlocks};
 use prim_store::{ClipData, ImageMaskData};
 use resource_cache::ResourceCache;
 use util::{MaxRect, MatrixHelpers, calculate_screen_bounding_rect, extract_inner_rect_safe};
 
@@ -163,64 +164,60 @@ impl ClipSources {
 
         // Depending on the complexity of the clip, we may either know the outer and/or inner
         // rect, or neither or these.  In the case of a clip-out, we currently set the mask bounds
         // to be unknown. This is conservative, but ensures correctness. In the future we can make
         // this a lot more clever with some proper region handling.
         let mut local_outer = Some(LayerRect::max_rect());
         let mut local_inner = local_outer;
         let mut can_calculate_inner_rect = true;
-        let mut can_calculate_outer_rect = true;
+        let mut can_calculate_outer_rect = false;
         for source in clips {
             match *source {
                 ClipSource::Image(ref mask) => {
                     if !mask.repeat {
+                        can_calculate_outer_rect = true;
                         local_outer = local_outer.and_then(|r| r.intersection(&mask.rect));
-                        can_calculate_inner_rect = false;
-                    } else {
-                        can_calculate_inner_rect = false;
-                        can_calculate_outer_rect = false;
-                        break;
                     }
                     local_inner = None;
                 }
                 ClipSource::Rectangle(rect) => {
+                    can_calculate_outer_rect = true;
                     local_outer = local_outer.and_then(|r| r.intersection(&rect));
                     local_inner = local_inner.and_then(|r| r.intersection(&rect));
                 }
                 ClipSource::RoundedRectangle(ref rect, ref radius, mode) => {
                     // Once we encounter a clip-out, we just assume the worst
                     // case clip mask size, for now.
                     if mode == ClipMode::ClipOut {
                         can_calculate_inner_rect = false;
-                        can_calculate_outer_rect = false;
                         break;
                     }
 
+                    can_calculate_outer_rect = true;
                     local_outer = local_outer.and_then(|r| r.intersection(rect));
 
                     let inner_rect = extract_inner_rect_safe(rect, radius);
                     local_inner = local_inner
                         .and_then(|r| inner_rect.and_then(|ref inner| r.intersection(inner)));
                 }
                 ClipSource::BorderCorner { .. } => {
                     can_calculate_inner_rect = false;
-                    can_calculate_outer_rect = false;
                     break;
                 }
             }
         }
 
         let outer = match can_calculate_outer_rect {
-            true => local_outer,
+            true => Some(local_outer.unwrap_or_else(LayerRect::zero)),
             false => None,
         };
 
         let inner = match can_calculate_inner_rect {
-            true => local_inner.unwrap_or(LayerRect::zero()),
+            true => local_inner.unwrap_or_else(LayerRect::zero),
             false => LayerRect::zero(),
         };
 
         (inner, outer)
     }
 
     pub fn update(
         &mut self,
@@ -257,34 +254,34 @@ impl ClipSources {
     /// Whether or not this ClipSources has any clips (does any clipping).
     pub fn has_clips(&self) -> bool {
         !self.clips.is_empty()
     }
 
     pub fn get_screen_bounds(
         &self,
         transform: &LayerToWorldTransform,
-        device_pixel_ratio: f32,
+        device_pixel_scale: DevicePixelScale,
     ) -> (DeviceIntRect, Option<DeviceIntRect>) {
         // If this translation isn't axis aligned or has a perspective component, don't try to
         // calculate the inner rectangle. The rectangle that we produce would include potentially
         // clipped screen area.
         // TODO(mrobinson): We should eventually try to calculate an inner region or some inner
         // rectangle so that we can do screen inner rectangle optimizations for these kind of
         // cilps.
         let can_calculate_inner_rect =
             transform.preserves_2d_axis_alignment() && !transform.has_perspective_component();
         let screen_inner_rect = if can_calculate_inner_rect {
-            calculate_screen_bounding_rect(transform, &self.local_inner_rect, device_pixel_ratio)
+            calculate_screen_bounding_rect(transform, &self.local_inner_rect, device_pixel_scale)
         } else {
             DeviceIntRect::zero()
         };
 
         let screen_outer_rect = self.local_outer_rect.map(|outer_rect|
-            calculate_screen_bounding_rect(transform, &outer_rect, device_pixel_ratio)
+            calculate_screen_bounding_rect(transform, &outer_rect, device_pixel_scale)
         );
 
         (screen_inner_rect, screen_outer_rect)
     }
 }
 
 /// Represents a local rect and a device space
 /// rectangles that are either outside or inside bounds.
--- a/gfx/webrender/src/clip_scroll_node.rs
+++ b/gfx/webrender/src/clip_scroll_node.rs
@@ -1,28 +1,28 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{ClipId, DeviceIntRect, LayerPixel, LayerPoint, LayerRect, LayerSize};
-use api::{LayerToScrollTransform, LayerToWorldTransform, LayerVector2D, LayoutVector2D, PipelineId};
-use api::{ScrollClamping, ScrollEventPhase, ScrollLocation, ScrollSensitivity};
-use api::{LayoutTransform, PropertyBinding, StickyOffsetBounds, WorldPoint};
+use api::{ClipId, DeviceIntRect, DevicePixelScale, LayerPixel, LayerPoint, LayerRect, LayerSize};
+use api::{LayerToWorldTransform, LayerTransform, LayerVector2D, LayoutTransform, LayoutVector2D};
+use api::{PipelineId, PropertyBinding, ScrollClamping, ScrollEventPhase, ScrollLocation};
+use api::{ScrollSensitivity, StickyOffsetBounds, WorldPoint};
 use clip::{ClipSourcesHandle, ClipStore};
 use clip_scroll_tree::{CoordinateSystemId, TransformUpdateState};
 use euclid::SideOffsets2D;
 use geometry::ray_intersects_rect;
 use gpu_cache::GpuCache;
 use gpu_types::{ClipScrollNodeIndex, ClipScrollNodeData};
 use render_task::{ClipChain, ClipChainNode, ClipWorkItem};
 use resource_cache::ResourceCache;
 use scene::SceneProperties;
 use spring::{DAMPING, STIFFNESS, Spring};
 use std::rc::Rc;
-use util::{MatrixHelpers, MaxRect, TransformedRectKind};
+use util::{MatrixHelpers, TransformOrOffset, TransformedRectKind};
 
 #[cfg(target_os = "macos")]
 const CAN_OVERSCROLL: bool = true;
 
 #[cfg(not(target_os = "macos"))]
 const CAN_OVERSCROLL: bool = false;
 
 #[derive(Debug)]
@@ -80,20 +80,16 @@ impl NodeType {
 }
 
 /// Contains information common among all types of ClipScrollTree nodes.
 #[derive(Debug)]
 pub struct ClipScrollNode {
     /// Viewing rectangle in the coordinate system of the parent reference frame.
     pub local_viewport_rect: LayerRect,
 
-    /// Clip rect of this node - typically the same as viewport rect, except
-    /// in overscroll cases.
-    pub local_clip_rect: LayerRect,
-
     /// The transformation for this viewport in world coordinates is the transformation for
     /// our parent reference frame, plus any accumulated scrolling offsets from nodes
     /// between our reference frame and this node. For reference frames, we also include
     /// whatever local transformation this reference frame provides. This can be combined
     /// with the local_viewport_rect to get its position in world space.
     pub world_viewport_transform: LayerToWorldTransform,
 
     /// World transform for content transformed by this node.
@@ -111,46 +107,53 @@ pub struct ClipScrollNode {
     /// The type of this node and any data associated with that node type.
     pub node_type: NodeType,
 
     /// The node in the chain of clips that are necessary to clip display items
     /// that have this ClipScrollNode as their clip parent. This will be used to
     /// generate clip tasks.
     pub clip_chain_node: ClipChain,
 
-    /// The intersected outer bounds of the clips for this node.
-    pub combined_clip_outer_bounds: DeviceIntRect,
+    /// True if this node is transformed by an invertible transform.  If not, display items
+    /// transformed by this node will not be displayed and display items not transformed by this
+    /// node will not be clipped by clips that are transformed by this node.
+    pub invertible: bool,
 
     /// The axis-aligned coordinate system id of this node.
     pub coordinate_system_id: CoordinateSystemId,
 
+    /// The transformation from the coordinate system which established our compatible coordinate
+    /// system (same coordinate system id) and us. This can change via scroll offsets and via new
+    /// reference frame transforms.
+    pub coordinate_system_relative_transform: TransformOrOffset,
+
     /// A linear ID / index of this clip-scroll node. Used as a reference to
     /// pass to shaders, to allow them to fetch a given clip-scroll node.
     pub node_data_index: ClipScrollNodeIndex,
 }
 
 impl ClipScrollNode {
     fn new(
         pipeline_id: PipelineId,
         parent_id: Option<ClipId>,
         rect: &LayerRect,
         node_type: NodeType
     ) -> Self {
         ClipScrollNode {
             local_viewport_rect: *rect,
-            local_clip_rect: *rect,
             world_viewport_transform: LayerToWorldTransform::identity(),
             world_content_transform: LayerToWorldTransform::identity(),
             parent: parent_id,
             children: Vec::new(),
             pipeline_id,
             node_type: node_type,
             clip_chain_node: None,
-            combined_clip_outer_bounds: DeviceIntRect::max_rect(),
+            invertible: true,
             coordinate_system_id: CoordinateSystemId(0),
+            coordinate_system_relative_transform: TransformOrOffset::zero(),
             node_data_index: ClipScrollNodeIndex(0),
         }
     }
 
     pub fn new_scroll_frame(
         pipeline_id: PipelineId,
         parent_id: ClipId,
         frame_rect: &LayerRect,
@@ -182,20 +185,21 @@ impl ClipScrollNode {
         frame_rect: &LayerRect,
         source_transform: Option<PropertyBinding<LayoutTransform>>,
         source_perspective: Option<LayoutTransform>,
         origin_in_parent_reference_frame: LayerVector2D,
         pipeline_id: PipelineId,
     ) -> Self {
         let identity = LayoutTransform::identity();
         let info = ReferenceFrameInfo {
-            resolved_transform: LayerToScrollTransform::identity(),
+            resolved_transform: LayerTransform::identity(),
             source_transform: source_transform.unwrap_or(PropertyBinding::Value(identity)),
             source_perspective: source_perspective.unwrap_or(identity),
             origin_in_parent_reference_frame,
+            invertible: true,
         };
         Self::new(pipeline_id, parent_id, frame_rect, NodeType::ReferenceFrame(info))
     }
 
     pub fn new_sticky_frame(
         parent_id: ClipId,
         frame_rect: LayerRect,
         sticky_frame_info: StickyFrameInfo,
@@ -259,188 +263,144 @@ impl ClipScrollNode {
         }
 
         scrolling.offset = new_offset;
         scrolling.bouncing_back = false;
         scrolling.started_bouncing_back = false;
         true
     }
 
-    pub fn update_to_empty_rect(&mut self) {
-        self.combined_clip_outer_bounds = DeviceIntRect::zero();
+    pub fn mark_uninvertible(&mut self) {
+        self.invertible = false;
         self.world_content_transform = LayerToWorldTransform::identity();
         self.world_viewport_transform = LayerToWorldTransform::identity();
         self.clip_chain_node = None;
     }
 
-    pub fn push_gpu_node_data(
-        &mut self,
-        state: &TransformUpdateState,
-        node_data: &mut Vec<ClipScrollNodeData>
-    ) {
-        if self.combined_clip_outer_bounds.is_empty() {
+    pub fn push_gpu_node_data(&mut self, node_data: &mut Vec<ClipScrollNodeData>) {
+        if !self.invertible {
             node_data.push(ClipScrollNodeData::invalid());
             return;
         }
 
-        let local_clip_rect = match self.node_type {
-            _ if self.world_content_transform.has_perspective_component() => LayerRect::max_rect(),
-            NodeType::ReferenceFrame(ref info) => {
-                info.resolved_transform.with_destination::<LayerPixel>()
-                    .inverse_rect_footprint(&state.parent_combined_viewport_rect)
-            }
-            NodeType::Clip(_) | NodeType::ScrollFrame(_) => {
-                state.parent_combined_viewport_rect
-                    .intersection(&self.local_clip_rect)
-                    .unwrap_or(LayerRect::zero())
-            }
-            NodeType::StickyFrame(ref sticky_info) => {
-                state.parent_combined_viewport_rect
-                    .translate(&-sticky_info.current_offset)
-                    .intersection(&self.local_clip_rect)
-                    .unwrap_or(LayerRect::zero())
-            }
-        };
-
         let transform_kind = if self.world_content_transform.preserves_2d_axis_alignment() {
             TransformedRectKind::AxisAligned
         } else {
             TransformedRectKind::Complex
         };
-
-        let reference_frame_relative_scroll_offset = match self.node_type {
-            NodeType::ReferenceFrame(_) => LayerVector2D::zero(),
-            NodeType::Clip(_) | NodeType::ScrollFrame(_) => state.parent_accumulated_scroll_offset,
-            NodeType::StickyFrame(ref sticky_info) =>
-                    state.parent_accumulated_scroll_offset + sticky_info.current_offset,
-        };
-
         let data = ClipScrollNodeData {
             transform: self.world_content_transform,
-            local_clip_rect,
-            reference_frame_relative_scroll_offset,
-            scroll_offset: self.scroll_offset(),
             transform_kind: transform_kind as u32 as f32,
             padding: [0.0; 3],
         };
 
         // Write the data that will be made available to the GPU for this node.
         node_data.push(data);
     }
 
     pub fn update(
         &mut self,
         state: &mut TransformUpdateState,
         next_coordinate_system_id: &mut CoordinateSystemId,
-        device_pixel_ratio: f32,
+        screen_rect: &DeviceIntRect,
+        device_pixel_scale: DevicePixelScale,
         clip_store: &mut ClipStore,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         scene_properties: &SceneProperties,
     ) {
         // If any of our parents was not rendered, we are not rendered either and can just
         // quit here.
-        if state.combined_outer_clip_bounds.is_empty() {
-            self.update_to_empty_rect();
+        if !state.invertible {
+            self.mark_uninvertible();
             return;
         }
 
+        self.update_transform(state, next_coordinate_system_id, scene_properties);
+
         // If this node is a reference frame, we check if the determinant is 0, which means it
         // has a non-invertible matrix. For non-reference-frames we assume that they will
         // produce only additional translations which should be invertible.
-        if self.node_type.is_reference_frame() {
-            if self.world_content_transform.determinant() == 0.0 {
-                self.update_to_empty_rect();
+        match self.node_type {
+            NodeType::ReferenceFrame(info) if !info.invertible => {
+                self.mark_uninvertible();
                 return;
             }
+            _ => self.invertible = true,
         }
 
-        self.update_transform(state, next_coordinate_system_id, scene_properties);
         self.update_clip_work_item(
             state,
-            device_pixel_ratio,
+            screen_rect,
+            device_pixel_scale,
             clip_store,
             resource_cache,
             gpu_cache,
         );
-
-        // This indicates that we are entirely clipped out.
-        if state.combined_outer_clip_bounds.is_empty() {
-            self.update_to_empty_rect();
-            return;
-        }
-
     }
 
     pub fn update_clip_work_item(
         &mut self,
         state: &mut TransformUpdateState,
-        device_pixel_ratio: f32,
+        screen_rect: &DeviceIntRect,
+        device_pixel_scale: DevicePixelScale,
         clip_store: &mut ClipStore,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
     ) {
-        let mut current_clip_chain = state.parent_clip_chain.clone();
+        let current_clip_chain = state.parent_clip_chain.clone();
+        let combined_outer_screen_rect = current_clip_chain.as_ref().map_or(
+            *screen_rect, |clip| clip.combined_outer_screen_rect,
+        );
+
         let clip_sources_handle = match self.node_type {
             NodeType::Clip(ref handle) => handle,
             _ => {
-                self.clip_chain_node = current_clip_chain;
-                self.combined_clip_outer_bounds = state.combined_outer_clip_bounds;
+                self.clip_chain_node = current_clip_chain.clone();
+                self.invertible = true;
                 return;
             }
         };
 
         let clip_sources = clip_store.get_mut(clip_sources_handle);
         clip_sources.update(gpu_cache, resource_cache);
         let (screen_inner_rect, screen_outer_rect) =
-            clip_sources.get_screen_bounds(&self.world_viewport_transform, device_pixel_ratio);
+            clip_sources.get_screen_bounds(&self.world_viewport_transform, device_pixel_scale);
+
+        // All clipping ClipScrollNodes should have outer rectangles, because they never
+        // use the BorderCorner clip type and they always have at last one non-ClipOut
+        // Rectangle ClipSource.
+        let screen_outer_rect = screen_outer_rect.expect("Clipping node didn't have outer rect.");
+        let local_outer_rect = clip_sources.local_outer_rect.expect(
+            "Clipping node didn't have outer rect."
+        );
 
         // If this clip's inner rectangle completely surrounds the existing clip
         // chain's outer rectangle, we can discard this clip entirely since it isn't
         // going to affect anything.
-        if screen_inner_rect.contains_rect(&state.combined_outer_clip_bounds) {
+        if screen_inner_rect.contains_rect(&combined_outer_screen_rect) {
             self.clip_chain_node = current_clip_chain;
-            self.combined_clip_outer_bounds = state.combined_outer_clip_bounds;
             return;
         }
 
-        let combined_outer_screen_rect = match screen_outer_rect {
-            Some(outer_rect) => {
-                // If this clips outer rectangle is completely enclosed by the clip
-                // chain's inner rectangle, then the only clip that matters from this point
-                // on is this clip. We can disconnect this clip from the parent clip chain.
-                if state.combined_inner_clip_bounds.contains_rect(&outer_rect) {
-                    current_clip_chain = None;
-                }
-                outer_rect.intersection(&state.combined_outer_clip_bounds)
-                    .unwrap_or_else(DeviceIntRect::zero)
-            }
-            None => state.combined_outer_clip_bounds,
+        let work_item = ClipWorkItem {
+            scroll_node_data_index: self.node_data_index,
+            clip_sources: clip_sources_handle.weak(),
+            coordinate_system_id: state.current_coordinate_system_id,
         };
 
-        let combined_inner_screen_rect =
-            state.combined_inner_clip_bounds.intersection(&screen_inner_rect)
-            .unwrap_or_else(DeviceIntRect::zero);
-
-        state.combined_outer_clip_bounds = combined_outer_screen_rect;
-        state.combined_inner_clip_bounds = combined_inner_screen_rect;
-        self.combined_clip_outer_bounds = combined_outer_screen_rect;
+        let clip_chain_node = ClipChainNode::new(
+            work_item,
+            self.coordinate_system_relative_transform.apply(&local_outer_rect),
+            screen_outer_rect,
+            screen_inner_rect,
+            current_clip_chain
+        );
 
-        self.clip_chain_node = Some(Rc::new(ClipChainNode {
-            work_item: ClipWorkItem {
-                scroll_node_data_index: self.node_data_index,
-                clip_sources: clip_sources_handle.weak(),
-                coordinate_system_id: state.current_coordinate_system_id,
-            },
-            screen_inner_rect,
-            combined_outer_screen_rect,
-            combined_inner_screen_rect,
-            prev: current_clip_chain,
-        }));
-
+        self.clip_chain_node = Some(Rc::new(clip_chain_node));
         state.parent_clip_chain = self.clip_chain_node.clone();
     }
 
     pub fn update_transform(
         &mut self,
         state: &mut TransformUpdateState,
         next_coordinate_system_id: &mut CoordinateSystemId,
         scene_properties: &SceneProperties,
@@ -474,16 +434,20 @@ impl ClipScrollNode {
         // whatever scrolling offset we supply as well.
         let scroll_offset = self.scroll_offset();
         self.world_content_transform = if scroll_offset != LayerVector2D::zero() {
             self.world_viewport_transform.pre_translate(scroll_offset.to_3d())
         } else {
             self.world_viewport_transform
         };
 
+        let added_offset = state.parent_accumulated_scroll_offset + sticky_offset + scroll_offset;
+        self.coordinate_system_relative_transform =
+            state.coordinate_system_relative_transform.offset(added_offset);
+
         match self.node_type {
             NodeType::StickyFrame(ref mut info) => info.current_offset = sticky_offset,
             _ => {},
         }
 
         self.coordinate_system_id = state.current_coordinate_system_id;
     }
 
@@ -495,40 +459,51 @@ impl ClipScrollNode {
     ) {
         let info = match self.node_type {
             NodeType::ReferenceFrame(ref mut info) => info,
             _ => unreachable!("Called update_transform_for_reference_frame on non-ReferenceFrame"),
         };
 
         // Resolve the transform against any property bindings.
         let source_transform = scene_properties.resolve_layout_transform(&info.source_transform);
-        info.resolved_transform = LayerToScrollTransform::create_translation(
+        info.resolved_transform = LayerTransform::create_translation(
             info.origin_in_parent_reference_frame.x,
             info.origin_in_parent_reference_frame.y,
             0.0
         ).pre_mul(&source_transform)
          .pre_mul(&info.source_perspective);
 
-        if !info.resolved_transform.preserves_2d_axis_alignment() ||
-           info.resolved_transform.has_perspective_component() {
-            state.current_coordinate_system_id = *next_coordinate_system_id;
-            next_coordinate_system_id.advance();
-        }
-        self.coordinate_system_id = state.current_coordinate_system_id;
-
         // The transformation for this viewport in world coordinates is the transformation for
         // our parent reference frame, plus any accumulated scrolling offsets from nodes
         // between our reference frame and this node. Finally, we also include
         // whatever local transformation this reference frame provides. This can be combined
         // with the local_viewport_rect to get its position in world space.
-        self.world_viewport_transform = state
-            .parent_reference_frame_transform
-            .pre_translate(state.parent_accumulated_scroll_offset.to_3d())
-            .pre_mul(&info.resolved_transform.with_destination::<LayerPixel>());
+        let relative_transform = info.resolved_transform
+            .post_translate(state.parent_accumulated_scroll_offset.to_3d());
+        self.world_viewport_transform = state.parent_reference_frame_transform
+            .pre_mul(&relative_transform.with_destination::<LayerPixel>());
         self.world_content_transform = self.world_viewport_transform;
+
+        info.invertible = relative_transform.determinant() != 0.0;
+        if !info.invertible {
+            return;
+        }
+
+        // Try to update our compatible coordinate system transform. If we cannot, start a new
+        // incompatible coordinate system.
+        match state.coordinate_system_relative_transform.update(relative_transform) {
+            Some(offset) => self.coordinate_system_relative_transform = offset,
+            None => {
+                self.coordinate_system_relative_transform = TransformOrOffset::zero();
+                state.current_coordinate_system_id = *next_coordinate_system_id;
+                next_coordinate_system_id.advance();
+            }
+        }
+
+        self.coordinate_system_id = state.current_coordinate_system_id;
     }
 
     fn calculate_sticky_offset(
         &self,
         viewport_scroll_offset: &LayerVector2D,
         viewport_rect: &LayerRect,
     ) -> LayerVector2D {
         let info = match self.node_type {
@@ -630,61 +605,49 @@ impl ClipScrollNode {
                                          &info.vertical_offset_bounds);
         sticky_offset.x = clamp_adjusted(sticky_offset.x,
                                          info.previously_applied_offset.x,
                                          &info.horizontal_offset_bounds);
 
         sticky_offset
     }
 
-    pub fn prepare_state_for_children(
-        &self,
-        state: &mut TransformUpdateState,
-        node_data: &Vec<ClipScrollNodeData>
-    ) {
-        if self.combined_clip_outer_bounds.is_empty() {
-            state.parent_combined_viewport_rect = LayerRect::zero();
-            state.combined_outer_clip_bounds = DeviceIntRect::zero();
+    pub fn prepare_state_for_children(&self, state: &mut TransformUpdateState) {
+        if !self.invertible {
+            state.invertible = false;
             state.parent_clip_chain = None;
             return;
         }
 
-        let combined_local_viewport_rect =
-            node_data[self.node_data_index.0 as usize].local_clip_rect;
-
         // The transformation we are passing is the transformation of the parent
         // reference frame and the offset is the accumulated offset of all the nodes
         // between us and the parent reference frame. If we are a reference frame,
         // we need to reset both these values.
         match self.node_type {
             NodeType::ReferenceFrame(ref info) => {
                 state.parent_reference_frame_transform = self.world_viewport_transform;
-                state.parent_combined_viewport_rect = combined_local_viewport_rect;
                 state.parent_accumulated_scroll_offset = LayerVector2D::zero();
+                state.coordinate_system_relative_transform =
+                    self.coordinate_system_relative_transform.clone();
                 let translation = -info.origin_in_parent_reference_frame;
                 state.nearest_scrolling_ancestor_viewport =
                     state.nearest_scrolling_ancestor_viewport
                        .translate(&translation);
             }
-            NodeType::Clip(..) => {
-                state.parent_combined_viewport_rect = combined_local_viewport_rect;
-            },
+            NodeType::Clip(..) => { }
             NodeType::ScrollFrame(ref scrolling) => {
-                state.parent_combined_viewport_rect =
-                        combined_local_viewport_rect.translate(&-scrolling.offset);
                 state.parent_accumulated_scroll_offset =
                     scrolling.offset + state.parent_accumulated_scroll_offset;
                 state.nearest_scrolling_ancestor_offset = scrolling.offset;
                 state.nearest_scrolling_ancestor_viewport = self.local_viewport_rect;
             }
             NodeType::StickyFrame(ref info) => {
                 // We don't translate the combined rect by the sticky offset, because sticky
                 // offsets actually adjust the node position itself, whereas scroll offsets
                 // only apply to contents inside the node.
-                state.parent_combined_viewport_rect = combined_local_viewport_rect;
                 state.parent_accumulated_scroll_offset =
                     info.current_offset + state.parent_accumulated_scroll_offset;
             }
         }
     }
 
     pub fn scrollable_size(&self) -> LayerSize {
         match self.node_type {
@@ -812,17 +775,23 @@ impl ClipScrollNode {
     pub fn is_overscrolling(&self) -> bool {
         match self.node_type {
             NodeType::ScrollFrame(ref state) => state.overscroll_amount() != LayerVector2D::zero(),
             _ => false,
         }
     }
 
     pub fn is_visible(&self) -> bool {
-        self.combined_clip_outer_bounds != DeviceIntRect::zero()
+        if !self.invertible {
+            return false;
+        }
+        match self.clip_chain_node {
+            Some(ref node) if node.combined_outer_screen_rect.is_empty() => false,
+            _ => true,
+        }
     }
 }
 
 #[derive(Copy, Clone, Debug)]
 pub struct ScrollingState {
     pub offset: LayerVector2D,
     pub spring: Spring,
     pub started_bouncing_back: bool,
@@ -893,22 +862,25 @@ impl ScrollingState {
     }
 }
 
 /// Contains information about reference frames.
 #[derive(Copy, Clone, Debug)]
 pub struct ReferenceFrameInfo {
     /// The transformation that establishes this reference frame, relative to the parent
     /// reference frame. The origin of the reference frame is included in the transformation.
-    pub resolved_transform: LayerToScrollTransform,
+    pub resolved_transform: LayerTransform,
 
     /// The source transform and perspective matrices provided by the stacking context
     /// that forms this reference frame. We maintain the property binding information
     /// here so that we can resolve the animated transform and update the tree each
     /// frame.
     pub source_transform: PropertyBinding<LayoutTransform>,
     pub source_perspective: LayoutTransform,
 
     /// The original, not including the transform and relative to the parent reference frame,
     /// origin of this reference frame. This is already rolled into the `transform' property, but
     /// we also store it here to properly transform the viewport for sticky positioning.
     pub origin_in_parent_reference_frame: LayerVector2D,
+
+    /// True if the resolved transform is invertible.
+    pub invertible: bool,
 }
--- a/gfx/webrender/src/clip_scroll_tree.rs
+++ b/gfx/webrender/src/clip_scroll_tree.rs
@@ -1,41 +1,41 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{ClipId, DeviceIntRect, LayerPoint, LayerRect};
+use api::{ClipId, DeviceIntRect, DevicePixelScale, LayerPoint, LayerRect};
 use api::{LayerToWorldTransform, LayerVector2D, PipelineId, ScrollClamping, ScrollEventPhase};
 use api::{PropertyBinding, LayoutTransform, ScrollLayerState, ScrollLocation, WorldPoint};
 use clip::ClipStore;
 use clip_scroll_node::{ClipScrollNode, NodeType, ScrollingState, StickyFrameInfo};
 use gpu_cache::GpuCache;
 use gpu_types::{ClipScrollNodeIndex, ClipScrollNodeData};
 use internal_types::{FastHashMap, FastHashSet};
 use print_tree::{PrintTree, PrintTreePrinter};
 use render_task::ClipChain;
 use resource_cache::ResourceCache;
 use scene::SceneProperties;
-use util::MaxRect;
+use util::TransformOrOffset;
 
 pub type ScrollStates = FastHashMap<ClipId, ScrollingState>;
 
 /// An id that identifies coordinate systems in the ClipScrollTree. Each
 /// coordinate system has an id and those ids will be shared when the coordinates
 /// system are the same or are in the same axis-aligned space. This allows
 /// for optimizing mask generation.
 #[derive(Debug, Copy, Clone, PartialEq)]
 pub struct CoordinateSystemId(pub u32);
 
 impl CoordinateSystemId {
-    pub fn root() -> CoordinateSystemId {
+    pub fn root() -> Self {
         CoordinateSystemId(0)
     }
 
-    pub fn next(&self) -> CoordinateSystemId {
+    pub fn next(&self) -> Self {
         let CoordinateSystemId(id) = *self;
         CoordinateSystemId(id + 1)
     }
 
     pub fn advance(&mut self) {
         self.0 += 1;
     }
 }
@@ -64,29 +64,34 @@ pub struct ClipScrollTree {
     /// A set of pipelines which should be discarded the next time this
     /// tree is drained.
     pub pipelines_to_discard: FastHashSet<PipelineId>,
 }
 
 #[derive(Clone)]
 pub struct TransformUpdateState {
     pub parent_reference_frame_transform: LayerToWorldTransform,
-    pub parent_combined_viewport_rect: LayerRect,
     pub parent_accumulated_scroll_offset: LayerVector2D,
     pub nearest_scrolling_ancestor_offset: LayerVector2D,
     pub nearest_scrolling_ancestor_viewport: LayerRect,
     pub parent_clip_chain: ClipChain,
-    pub combined_outer_clip_bounds: DeviceIntRect,
-    pub combined_inner_clip_bounds: DeviceIntRect,
 
     /// An id for keeping track of the axis-aligned space of this node. This is used in
     /// order to to track what kinds of clip optimizations can be done for a particular
     /// display list item, since optimizations can usually only be done among
     /// coordinate systems which are relatively axis aligned.
     pub current_coordinate_system_id: CoordinateSystemId,
+
+    /// Transform from the coordinate system that started this compatible coordinate system.
+    pub coordinate_system_relative_transform: TransformOrOffset,
+
+    /// True if this node is transformed by an invertible transform.  If not, display items
+    /// transformed by this node will not be displayed and display items not transformed by this
+    /// node will not be clipped by clips that are transformed by this node.
+    pub invertible: bool,
 }
 
 impl ClipScrollTree {
     pub fn new() -> Self {
         let dummy_pipeline = PipelineId::dummy();
         ClipScrollTree {
             nodes: FastHashMap::default(),
             pending_scroll_offsets: FastHashMap::default(),
@@ -193,21 +198,16 @@ impl ClipScrollTree {
         let clip_sources_handle = match node.node_type {
             NodeType::Clip(ref clip_sources_handle) => clip_sources_handle,
             _ => {
                 cache.insert(*node_id, Some(point_in_layer));
                 return true;
             }
         };
 
-        if !node.local_clip_rect.contains(&transformed_point) {
-            cache.insert(*node_id, None);
-            return false;
-        }
-
         for &(ref clip, _) in clip_store.get(&clip_sources_handle).clips() {
             if !clip.contains(&transformed_point) {
                 cache.insert(*node_id, None);
                 return false;
             }
         }
 
         cache.insert(*node_id, Some(point_in_layer));
@@ -332,66 +332,65 @@ impl ClipScrollTree {
             .get_mut(&clip_id)
             .unwrap()
             .scroll(scroll_location, phase)
     }
 
     pub fn update_tree(
         &mut self,
         screen_rect: &DeviceIntRect,
-        device_pixel_ratio: f32,
+        device_pixel_scale: DevicePixelScale,
         clip_store: &mut ClipStore,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
-        pan: LayerPoint,
+        pan: WorldPoint,
         node_data: &mut Vec<ClipScrollNodeData>,
         scene_properties: &SceneProperties,
     ) {
         if self.nodes.is_empty() {
             return;
         }
 
         let root_reference_frame_id = self.root_reference_frame_id();
-        let root_viewport = self.nodes[&root_reference_frame_id].local_clip_rect;
-
         let mut state = TransformUpdateState {
             parent_reference_frame_transform: LayerToWorldTransform::create_translation(
                 pan.x,
                 pan.y,
                 0.0,
             ),
-            parent_combined_viewport_rect: root_viewport,
             parent_accumulated_scroll_offset: LayerVector2D::zero(),
             nearest_scrolling_ancestor_offset: LayerVector2D::zero(),
             nearest_scrolling_ancestor_viewport: LayerRect::zero(),
             parent_clip_chain: None,
-            combined_outer_clip_bounds: *screen_rect,
-            combined_inner_clip_bounds: DeviceIntRect::max_rect(),
             current_coordinate_system_id: CoordinateSystemId::root(),
+            coordinate_system_relative_transform: TransformOrOffset::zero(),
+            invertible: true,
         };
         let mut next_coordinate_system_id = state.current_coordinate_system_id.next();
         self.update_node(
             root_reference_frame_id,
             &mut state,
             &mut next_coordinate_system_id,
-            device_pixel_ratio,
+            screen_rect,
+            device_pixel_scale,
             clip_store,
             resource_cache,
             gpu_cache,
             node_data,
             scene_properties,
         );
     }
 
     fn update_node(
         &mut self,
         layer_id: ClipId,
         state: &mut TransformUpdateState,
         next_coordinate_system_id: &mut CoordinateSystemId,
-        device_pixel_ratio: f32,
+        screen_rect: &DeviceIntRect,
+        device_pixel_scale: DevicePixelScale,
         clip_store: &mut ClipStore,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         gpu_node_data: &mut Vec<ClipScrollNodeData>,
         scene_properties: &SceneProperties,
     ) {
         // TODO(gw): This is an ugly borrow check workaround to clone these.
         //           Restructure this to avoid the clones!
@@ -403,39 +402,42 @@ impl ClipScrollTree {
             };
 
             // We set this early so that we can use it to populate the ClipChain.
             node.node_data_index = ClipScrollNodeIndex(gpu_node_data.len() as u32);
 
             node.update(
                 &mut state,
                 next_coordinate_system_id,
-                device_pixel_ratio,
+                screen_rect,
+                device_pixel_scale,
                 clip_store,
                 resource_cache,
                 gpu_cache,
                 scene_properties,
             );
 
-            node.push_gpu_node_data(&state, gpu_node_data);
+            node.push_gpu_node_data(gpu_node_data);
 
             if node.children.is_empty() {
                 return;
             }
 
-            node.prepare_state_for_children(&mut state, gpu_node_data);
+
+            node.prepare_state_for_children(&mut state);
             node.children.clone()
         };
 
-        for child_layer_id in node_children {
+        for child_node_id in node_children {
             self.update_node(
-                child_layer_id,
+                child_node_id,
                 &mut state,
                 next_coordinate_system_id,
-                device_pixel_ratio,
+                screen_rect,
+                device_pixel_scale,
                 clip_store,
                 resource_cache,
                 gpu_cache,
                 gpu_node_data,
                 scene_properties,
             );
         }
     }
@@ -561,25 +563,28 @@ impl ClipScrollTree {
                 pt.add_item(format!("sticky info: {:?}", sticky_frame_info));
             }
         }
 
         pt.add_item(format!(
             "local_viewport_rect: {:?}",
             node.local_viewport_rect
         ));
-        pt.add_item(format!("local_clip_rect: {:?}", node.local_clip_rect));
         pt.add_item(format!(
             "world_viewport_transform: {:?}",
             node.world_viewport_transform
         ));
         pt.add_item(format!(
             "world_content_transform: {:?}",
             node.world_content_transform
         ));
+        pt.add_item(format!(
+            "coordinate_system_id: {:?}",
+            node.coordinate_system_id
+        ));
 
         for child_id in &node.children {
             self.print_node(child_id, pt, clip_store);
         }
 
         pt.end_level();
     }
 
--- a/gfx/webrender/src/device.rs
+++ b/gfx/webrender/src/device.rs
@@ -1,14 +1,14 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use super::shader_source;
-use api::{ColorF, ImageFormat};
+use api::{ColorF, ImageDescriptor, ImageFormat};
 use api::{DeviceIntPoint, DeviceIntRect, DeviceUintRect, DeviceUintSize};
 use euclid::Transform3D;
 use gleam::gl;
 use internal_types::{FastHashMap, RenderTargetInfo};
 use smallvec::SmallVec;
 use std::cell::RefCell;
 use std::fs::File;
 use std::io::Read;
@@ -218,31 +218,16 @@ pub fn build_shader_strings(
     let mut shared_result = String::new();
     if let Some(shared_source) = get_shader_source(base_filename, override_path) {
         parse_shader_source(shared_source, override_path, &mut shared_result);
     }
 
     vs_source.push_str(&shared_result);
     fs_source.push_str(&shared_result);
 
-    // Append legacy (.vs and .fs) files if they exist.
-    // TODO(gw): Once all shaders are ported to just use the
-    //           .glsl file, we can remove this code.
-    let vs_name = format!("{}.vs", base_filename);
-    if let Some(old_vs_source) = get_shader_source(&vs_name, override_path) {
-        vs_source.push_str(SHADER_LINE_MARKER);
-        vs_source.push_str(&old_vs_source);
-    }
-
-    let fs_name = format!("{}.fs", base_filename);
-    if let Some(old_fs_source) = get_shader_source(&fs_name, override_path) {
-        fs_source.push_str(SHADER_LINE_MARKER);
-        fs_source.push_str(&old_fs_source);
-    }
-
     (vs_source, fs_source)
 }
 
 pub trait FileWatcherHandler: Send {
     fn file_changed(&self, path: PathBuf);
 }
 
 impl VertexAttributeKind {
@@ -468,17 +453,16 @@ impl Texture {
 
     pub fn get_format(&self) -> ImageFormat {
         self.format
     }
 
     pub fn get_bpp(&self) -> u32 {
         match self.format {
             ImageFormat::A8 => 1,
-            ImageFormat::RGB8 => 3,
             ImageFormat::BGRA8 => 4,
             ImageFormat::RG8 => 2,
             ImageFormat::RGBAF32 => 16,
             ImageFormat::Invalid => unreachable!(),
         }
     }
 
     pub fn has_depth(&self) -> bool {
@@ -670,29 +654,38 @@ pub struct Device {
 
     max_texture_size: u32,
     renderer_name: String,
     cached_programs: Option<Rc<ProgramCache>>,
 
     // Frame counter. This is used to map between CPU
     // frames and GPU frames.
     frame_id: FrameId,
+
+    // GL extensions
+    extensions: Vec<String>,
 }
 
 impl Device {
     pub fn new(
         gl: Rc<gl::Gl>,
         resource_override_path: Option<PathBuf>,
         upload_method: UploadMethod,
         _file_changed_handler: Box<FileWatcherHandler>,
         cached_programs: Option<Rc<ProgramCache>>,
     ) -> Device {
         let max_texture_size = gl.get_integer_v(gl::MAX_TEXTURE_SIZE) as u32;
         let renderer_name = gl.get_string(gl::RENDERER);
 
+        let mut extensions = Vec::new();
+        let extension_count = gl.get_integer_v(gl::NUM_EXTENSIONS) as gl::GLuint;
+        for i in 0 .. extension_count {
+            extensions.push(gl.get_string_i(gl::EXTENSIONS, i));
+        }
+
         Device {
             gl,
             resource_override_path,
             // This is initialized to 1 by default, but it is reset
             // at the beginning of each frame in `Renderer::bind_frame_data`.
             device_pixel_ratio: 1.0,
             upload_method,
             inside_frame: false,
@@ -708,16 +701,17 @@ impl Device {
             bound_draw_fbo: FBOId(0),
             default_read_fbo: 0,
             default_draw_fbo: 0,
 
             max_texture_size,
             renderer_name,
             cached_programs,
             frame_id: FrameId(0),
+            extensions,
         }
     }
 
     pub fn gl(&self) -> &gl::Gl {
         &*self.gl
     }
 
     pub fn rc_gl(&self) -> &Rc<gl::Gl> {
@@ -1490,22 +1484,26 @@ impl Device {
                 gl: &*self.gl,
                 texture,
             },
             buffer,
             marker: PhantomData,
         }
     }
 
-    pub fn read_pixels(&mut self, width: i32, height: i32) -> Vec<u8> {
+    pub fn read_pixels(&mut self, desc: &ImageDescriptor) -> Vec<u8> {
+        let (_, gl_format) = gl_texture_formats_for_image_format(self.gl(), desc.format);
+        let type_ = gl_type_for_texture_format(desc.format);
+
         self.gl.read_pixels(
             0, 0,
-            width as i32, height as i32,
-            gl::RGBA,
-            gl::UNSIGNED_BYTE
+            desc.width as i32,
+            desc.height as i32,
+            gl_format,
+            type_,
         )
     }
 
     fn bind_vao_impl(&mut self, id: gl::GLuint) {
         debug_assert!(self.inside_frame);
 
         if self.bound_vao != id {
             self.bound_vao = id;
@@ -1906,30 +1904,36 @@ impl Device {
     }
     pub fn set_blend_mode_subpixel_constant_text_color(&self, color: ColorF) {
         // color is an unpremultiplied color.
         self.gl.blend_color(color.r, color.g, color.b, 1.0);
         self.gl
             .blend_func(gl::CONSTANT_COLOR, gl::ONE_MINUS_SRC_COLOR);
         self.gl.blend_equation(gl::FUNC_ADD);
     }
+    pub fn set_blend_mode_subpixel_dual_source(&self) {
+        self.gl.blend_func(gl::ONE, gl::ONE_MINUS_SRC1_COLOR);
+    }
+
+    pub fn supports_extension(&self, extension: &str) -> bool {
+        self.extensions.iter().any(|s| s == extension)
+    }
 }
 
 /// return (gl_internal_format, gl_format)
 fn gl_texture_formats_for_image_format(
     gl: &gl::Gl,
     format: ImageFormat,
 ) -> (gl::GLint, gl::GLuint) {
     match format {
         ImageFormat::A8 => if cfg!(any(target_arch = "arm", target_arch = "aarch64")) {
             (get_gl_format_bgra(gl) as gl::GLint, get_gl_format_bgra(gl))
         } else {
             (GL_FORMAT_A as gl::GLint, GL_FORMAT_A)
         },
-        ImageFormat::RGB8 => (gl::RGB as gl::GLint, gl::RGB),
         ImageFormat::BGRA8 => match gl.get_type() {
             gl::GlType::Gl => (gl::RGBA as gl::GLint, get_gl_format_bgra(gl)),
             gl::GlType::Gles => (get_gl_format_bgra(gl) as gl::GLint, get_gl_format_bgra(gl)),
         },
         ImageFormat::RGBAF32 => (gl::RGBA32F as gl::GLint, gl::RGBA),
         ImageFormat::RG8 => (gl::RG8 as gl::GLint, gl::RG),
         ImageFormat::Invalid => unreachable!(),
     }
@@ -2045,17 +2049,16 @@ impl<'a, T> TextureUploader<'a, T> {
         }
     }
 }
 
 impl<'a> UploadTarget<'a> {
     fn update_impl(&mut self, chunk: UploadChunk) {
         let (gl_format, bpp, data_type) = match self.texture.format {
             ImageFormat::A8 => (GL_FORMAT_A, 1, gl::UNSIGNED_BYTE),
-            ImageFormat::RGB8 => (gl::RGB, 3, gl::UNSIGNED_BYTE),
             ImageFormat::BGRA8 => (get_gl_format_bgra(self.gl), 4, gl::UNSIGNED_BYTE),
             ImageFormat::RG8 => (gl::RG, 2, gl::UNSIGNED_BYTE),
             ImageFormat::RGBAF32 => (gl::RGBA, 16, gl::FLOAT),
             ImageFormat::Invalid => unreachable!(),
         };
 
         let row_length = match chunk.stride {
             Some(value) => value / bpp,
--- a/gfx/webrender/src/frame.rs
+++ b/gfx/webrender/src/frame.rs
@@ -1,28 +1,28 @@
 
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BuiltDisplayListIter, ClipAndScrollInfo, ClipId, ColorF, ComplexClipRegion};
-use api::{DeviceUintRect, DeviceUintSize, DisplayItemRef, DocumentLayer, Epoch, FilterOp};
+use api::{DevicePixelScale, DeviceUintRect, DeviceUintSize};
+use api::{DisplayItemRef, DocumentLayer, Epoch, FilterOp};
 use api::{ImageDisplayItem, ItemRange, LayerPoint, LayerPrimitiveInfo, LayerRect};
 use api::{LayerSize, LayerVector2D, LayoutSize};
 use api::{LocalClip, PipelineId, ScrollClamping, ScrollEventPhase, ScrollLayerState};
 use api::{ScrollLocation, ScrollPolicy, ScrollSensitivity, SpecificDisplayItem, StackingContext};
 use api::{TileOffset, TransformStyle, WorldPoint};
 use clip::ClipRegion;
 use clip_scroll_node::StickyFrameInfo;
 use clip_scroll_tree::{ClipScrollTree, ScrollStates};
 use euclid::rect;
 use frame_builder::{FrameBuilder, FrameBuilderConfig, ScrollbarInfo};
 use gpu_cache::GpuCache;
 use internal_types::{FastHashMap, FastHashSet, RenderedDocument};
-use prim_store::{BrushAntiAliasMode};
 use profiler::{GpuCacheProfileCounters, TextureCacheProfileCounters};
 use resource_cache::{FontInstanceMap,ResourceCache, TiledImageMap};
 use scene::{Scene, StackingContextHelpers, ScenePipeline, SceneProperties};
 use tiling::CompositeOps;
 
 #[derive(Copy, Clone, PartialEq, PartialOrd, Debug, Eq, Ord)]
 pub struct FrameId(pub u32);
 
@@ -101,17 +101,16 @@ impl<'a> FlattenContext<'a> {
                 if let Some(bg_color) = pipeline.background_color {
                     let root_bounds = LayerRect::new(LayerPoint::zero(), *frame_size);
                     let info = LayerPrimitiveInfo::new(root_bounds);
                     self.builder.add_solid_rectangle(
                         ClipAndScrollInfo::simple(root_reference_frame_id),
                         &info,
                         bg_color,
                         None,
-                        BrushAntiAliasMode::Primitive,
                     );
                 }
             }
         }
 
 
         self.flatten_items(
             traversal,
@@ -443,17 +442,16 @@ impl<'a> FlattenContext<'a> {
                 }
             }
             SpecificDisplayItem::Rectangle(ref info) => {
                 self.builder.add_solid_rectangle(
                     clip_and_scroll,
                     &prim_info,
                     info.color,
                     None,
-                    BrushAntiAliasMode::Primitive,
                 );
             }
             SpecificDisplayItem::ClearRectangle => {
                 self.builder.add_clear_rectangle(
                     clip_and_scroll,
                     &prim_info,
                 );
             }
@@ -925,17 +923,17 @@ impl<'a> FlattenContext<'a> {
 
 /// Frame context contains the information required to update
 /// (e.g. scroll) a renderer frame builder (`FrameBuilder`).
 pub struct FrameContext {
     window_size: DeviceUintSize,
     clip_scroll_tree: ClipScrollTree,
     pipeline_epoch_map: FastHashMap<PipelineId, Epoch>,
     id: FrameId,
-    frame_builder_config: FrameBuilderConfig,
+    pub frame_builder_config: FrameBuilderConfig,
 }
 
 impl FrameContext {
     pub fn new(config: FrameBuilderConfig) -> Self {
         FrameContext {
             window_size: DeviceUintSize::zero(),
             pipeline_epoch_map: FastHashMap::default(),
             clip_scroll_tree: ClipScrollTree::new(),
@@ -987,17 +985,17 @@ impl FrameContext {
 
     pub fn create(
         &mut self,
         old_builder: FrameBuilder,
         scene: &Scene,
         resource_cache: &mut ResourceCache,
         window_size: DeviceUintSize,
         inner_rect: DeviceUintRect,
-        device_pixel_ratio: f32,
+        device_pixel_scale: DevicePixelScale,
         output_pipelines: &FastHashSet<PipelineId>,
     ) -> FrameBuilder {
         let root_pipeline_id = match scene.root_pipeline_id {
             Some(root_pipeline_id) => root_pipeline_id,
             None => return old_builder,
         };
 
         let root_pipeline = match scene.pipelines.get(&root_pipeline_id) {
@@ -1038,19 +1036,18 @@ impl FrameContext {
             roller.builder.push_root(
                 root_pipeline_id,
                 &root_pipeline.viewport_size,
                 &root_pipeline.content_size,
                 roller.clip_scroll_tree,
             );
 
             roller.builder.setup_viewport_offset(
-                window_size,
                 inner_rect,
-                device_pixel_ratio,
+                device_pixel_scale,
                 roller.clip_scroll_tree,
             );
 
             let reference_frame_id = roller.clip_scroll_tree.root_reference_frame_id;
             let scroll_frame_id = roller.clip_scroll_tree.topmost_scrolling_node_id;
             roller.flatten_root(
                 &mut root_pipeline.display_list.iter(),
                 root_pipeline_id,
@@ -1075,31 +1072,31 @@ impl FrameContext {
     }
 
     pub fn build_rendered_document(
         &mut self,
         frame_builder: &mut FrameBuilder,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         pipelines: &FastHashMap<PipelineId, ScenePipeline>,
-        device_pixel_ratio: f32,
+        device_pixel_scale: DevicePixelScale,
         layer: DocumentLayer,
-        pan: LayerPoint,
+        pan: WorldPoint,
         texture_cache_profile: &mut TextureCacheProfileCounters,
         gpu_cache_profile: &mut GpuCacheProfileCounters,
 		scene_properties: &SceneProperties,
     ) -> RenderedDocument {
         let frame = frame_builder.build(
             resource_cache,
             gpu_cache,
             self.id,
             &mut self.clip_scroll_tree,
             pipelines,
             self.window_size,
-            device_pixel_ratio,
+            device_pixel_scale,
             layer,
             pan,
             texture_cache_profile,
             gpu_cache_profile,
             scene_properties,
         );
 
         let nodes_bouncing_back = self.clip_scroll_tree.collect_nodes_bouncing_back();
--- a/gfx/webrender/src/frame_builder.rs
+++ b/gfx/webrender/src/frame_builder.rs
@@ -1,35 +1,34 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{BorderDetails, BorderDisplayItem, BuiltDisplayList};
-use api::{ClipAndScrollInfo, ClipId, ColorF, ColorU, PropertyBinding};
-use api::{DeviceUintPoint, DeviceUintRect, DeviceUintSize};
-use api::{DocumentLayer, ExtendMode, FontRenderMode, LayoutTransform};
-use api::{GlyphInstance, GlyphOptions, GradientStop, HitTestFlags, HitTestItem, HitTestResult};
-use api::{ImageKey, ImageRendering, ItemRange, ItemTag, LayerPoint, LayerPrimitiveInfo, LayerRect};
-use api::{LayerSize, LayerToScrollTransform, LayerVector2D, LayoutVector2D, LineOrientation};
-use api::{LineStyle, LocalClip, PipelineId, RepeatMode};
-use api::{ScrollSensitivity, Shadow, TileOffset, TransformStyle};
-use api::{PremultipliedColorF, WorldPoint, YuvColorSpace, YuvData};
+use api::{BorderDetails, BorderDisplayItem, BuiltDisplayList, ClipAndScrollInfo, ClipId, ColorF};
+use api::{ColorU, DeviceIntPoint, DevicePixelScale, DeviceUintPoint, DeviceUintRect};
+use api::{DeviceUintSize, DocumentLayer, ExtendMode, FontRenderMode, GlyphInstance, GlyphOptions};
+use api::{GradientStop, HitTestFlags, HitTestItem, HitTestResult, ImageKey, ImageRendering};
+use api::{ItemRange, ItemTag, LayerPoint, LayerPrimitiveInfo, LayerRect, LayerSize};
+use api::{LayerTransform, LayerVector2D, LayoutTransform, LayoutVector2D, LineOrientation};
+use api::{LineStyle, LocalClip, PipelineId, PremultipliedColorF, PropertyBinding, RepeatMode};
+use api::{ScrollSensitivity, Shadow, TileOffset, TransformStyle, WorldPoint, YuvColorSpace};
+use api::YuvData;
 use app_units::Au;
 use border::ImageBorderSegment;
 use clip::{ClipRegion, ClipSource, ClipSources, ClipStore, Contains};
 use clip_scroll_node::{ClipScrollNode, NodeType};
 use clip_scroll_tree::ClipScrollTree;
 use euclid::{SideOffsets2D, vec2};
 use frame::FrameId;
 use glyph_rasterizer::FontInstance;
 use gpu_cache::GpuCache;
-use gpu_types::ClipScrollNodeData;
+use gpu_types::{ClipScrollNodeData, PictureType};
 use internal_types::{FastHashMap, FastHashSet, RenderPassIndex};
-use picture::{PictureCompositeMode, PictureKind, PicturePrimitive, RasterizationSpace};
-use prim_store::{BrushAntiAliasMode, BrushKind, BrushPrimitive, TexelRect, YuvImagePrimitiveCpu};
+use picture::{ContentOrigin, PictureCompositeMode, PictureKind, PicturePrimitive};
+use prim_store::{BrushKind, BrushPrimitive, TexelRect, YuvImagePrimitiveCpu};
 use prim_store::{GradientPrimitiveCpu, ImagePrimitiveCpu, LinePrimitive, PrimitiveKind};
 use prim_store::{PrimitiveContainer, PrimitiveIndex, SpecificPrimitiveIndex};
 use prim_store::{PrimitiveStore, RadialGradientPrimitiveCpu};
 use prim_store::{BrushSegmentDescriptor, TextRunPrimitiveCpu};
 use profiler::{FrameProfileCounters, GpuCacheProfileCounters, TextureCacheProfileCounters};
 use render_task::{ClearMode, RenderTask, RenderTaskId, RenderTaskTree};
 use resource_cache::ResourceCache;
 use scene::{ScenePipeline, SceneProperties};
@@ -64,20 +63,23 @@ struct StackingContext {
     transform_style: TransformStyle,
 
     /// The primitive index for the root Picture primitive
     /// that this stacking context is mapped to.
     pic_prim_index: PrimitiveIndex,
 }
 
 #[derive(Clone, Copy)]
+#[cfg_attr(feature = "capture", derive(Serialize, Deserialize))]
 pub struct FrameBuilderConfig {
     pub enable_scrollbars: bool,
     pub default_font_render_mode: FontRenderMode,
     pub debug: bool,
+    pub dual_source_blending_is_supported: bool,
+    pub dual_source_blending_is_enabled: bool,
 }
 
 #[derive(Debug)]
 pub struct HitTestingItem {
     rect: LayerRect,
     clip: LocalClip,
     tag: ItemTag,
 }
@@ -120,31 +122,31 @@ pub struct FrameBuilder {
     pub picture_stack: Vec<PrimitiveIndex>,
 
     /// A temporary stack of stacking context properties, used only
     /// during scene building.
     sc_stack: Vec<StackingContext>,
 }
 
 pub struct PrimitiveContext<'a> {
-    pub device_pixel_ratio: f32,
+    pub device_pixel_scale: DevicePixelScale,
     pub display_list: &'a BuiltDisplayList,
     pub clip_node: &'a ClipScrollNode,
     pub scroll_node: &'a ClipScrollNode,
 }
 
 impl<'a> PrimitiveContext<'a> {
     pub fn new(
-        device_pixel_ratio: f32,
+        device_pixel_scale: DevicePixelScale,
         display_list: &'a BuiltDisplayList,
         clip_node: &'a ClipScrollNode,
         scroll_node: &'a ClipScrollNode,
     ) -> Self {
         PrimitiveContext {
-            device_pixel_ratio,
+            device_pixel_scale,
             display_list,
             clip_node,
             scroll_node,
         }
     }
 }
 
 impl FrameBuilder {
@@ -160,16 +162,18 @@ impl FrameBuilder {
             prim_store: PrimitiveStore::new(),
             clip_store: ClipStore::new(),
             screen_rect: DeviceUintRect::zero(),
             background_color: None,
             config: FrameBuilderConfig {
                 enable_scrollbars: false,
                 default_font_render_mode: FontRenderMode::Mono,
                 debug: false,
+                dual_source_blending_is_enabled: true,
+                dual_source_blending_is_supported: false,
             },
         }
     }
 
     pub fn recycle(
         self,
         screen_rect: DeviceUintRect,
         background_color: Option<ColorF>,
@@ -582,56 +586,30 @@ impl FrameBuilder {
     }
 
     pub fn current_reference_frame_id(&self) -> ClipId {
         *self.reference_frame_stack.last().unwrap()
     }
 
     pub fn setup_viewport_offset(
         &mut self,
-        window_size: DeviceUintSize,
         inner_rect: DeviceUintRect,
-        device_pixel_ratio: f32,
+        device_pixel_scale: DevicePixelScale,
         clip_scroll_tree: &mut ClipScrollTree,
     ) {
-        let inner_origin = inner_rect.origin.to_f32();
-        let viewport_offset = LayerPoint::new(
-            (inner_origin.x / device_pixel_ratio).round(),
-            (inner_origin.y / device_pixel_ratio).round(),
-        );
-        let outer_size = window_size.to_f32();
-        let outer_size = LayerSize::new(
-            (outer_size.width / device_pixel_ratio).round(),
-            (outer_size.height / device_pixel_ratio).round(),
-        );
-        let clip_size = LayerSize::new(
-            outer_size.width + 2.0 * viewport_offset.x,
-            outer_size.height + 2.0 * viewport_offset.y,
-        );
-
-        let viewport_clip = LayerRect::new(
-            LayerPoint::new(-viewport_offset.x, -viewport_offset.y),
-            LayerSize::new(clip_size.width, clip_size.height),
-        );
-
+        let viewport_offset = (inner_rect.origin.to_vector().to_f32() / device_pixel_scale).round();
         let root_id = clip_scroll_tree.root_reference_frame_id();
         if let Some(root_node) = clip_scroll_tree.nodes.get_mut(&root_id) {
             if let NodeType::ReferenceFrame(ref mut info) = root_node.node_type {
-                info.resolved_transform = LayerToScrollTransform::create_translation(
+                info.resolved_transform = LayerTransform::create_translation(
                     viewport_offset.x,
                     viewport_offset.y,
                     0.0,
                 );
             }
-            root_node.local_clip_rect = viewport_clip;
-        }
-
-        let clip_id = clip_scroll_tree.topmost_scrolling_node_id();
-        if let Some(root_node) = clip_scroll_tree.nodes.get_mut(&clip_id) {
-            root_node.local_clip_rect = viewport_clip;
         }
     }
 
     pub fn push_root(
         &mut self,
         pipeline_id: PipelineId,
         viewport_size: &LayerSize,
         content_size: &LayerSize,
@@ -753,32 +731,30 @@ impl FrameBuilder {
         mem::replace(&mut self.shadow_prim_stack, shadows);
     }
 
     pub fn add_solid_rectangle(
         &mut self,
         clip_and_scroll: ClipAndScrollInfo,
         info: &LayerPrimitiveInfo,
         color: ColorF,
-        segments: Option<Box<BrushSegmentDescriptor>>,
-        aa_mode: BrushAntiAliasMode,
+        segments: Option<BrushSegmentDescriptor>,
     ) {
         if color.a == 0.0 {
             // Don't add transparent rectangles to the draw list, but do consider them for hit
             // testing. This allows specifying invisible hit testing areas.
             self.add_primitive_to_hit_testing_list(info, clip_and_scroll);
             return;
         }
 
         let prim = BrushPrimitive::new(
             BrushKind::Solid {
                 color,
             },
             segments,
-            aa_mode,
         );
 
         self.add_primitive(
             clip_and_scroll,
             info,
             Vec::new(),
             PrimitiveContainer::Brush(prim),
         );
@@ -787,17 +763,16 @@ impl FrameBuilder {
     pub fn add_clear_rectangle(
         &mut self,
         clip_and_scroll: ClipAndScrollInfo,
         info: &LayerPrimitiveInfo,
     ) {
         let prim = BrushPrimitive::new(
             BrushKind::Clear,
             None,
-            BrushAntiAliasMode::Primitive,
         );
 
         self.add_primitive(
             clip_and_scroll,
             info,
             Vec::new(),
             PrimitiveContainer::Brush(prim),
         );
@@ -814,17 +789,16 @@ impl FrameBuilder {
             return;
         }
 
         let prim = BrushPrimitive::new(
             BrushKind::Solid {
                 color,
             },
             None,
-            BrushAntiAliasMode::Primitive,
         );
 
         let prim_index = self.add_primitive(
             clip_and_scroll,
             info,
             Vec::new(),
             PrimitiveContainer::Brush(prim),
         );
@@ -1580,19 +1554,20 @@ impl FrameBuilder {
     fn build_layer_screen_rects_and_cull_layers(
         &mut self,
         clip_scroll_tree: &mut ClipScrollTree,
         pipelines: &FastHashMap<PipelineId, ScenePipeline>,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         render_tasks: &mut RenderTaskTree,
         profile_counters: &mut FrameProfileCounters,
-        device_pixel_ratio: f32,
+        device_pixel_scale: DevicePixelScale,
         scene_properties: &SceneProperties,
         node_data: &[ClipScrollNodeData],
+        local_rects: &mut Vec<LayerRect>,
     ) -> Option<RenderTaskId> {
         profile_scope!("cull");
 
         if self.prim_store.cpu_pictures.is_empty() {
             return None
         }
 
         // The root picture is always the first one added.
@@ -1600,17 +1575,17 @@ impl FrameBuilder {
         let root_clip_scroll_node = &clip_scroll_tree.nodes[&clip_scroll_tree.root_reference_frame_id()];
 
         let display_list = &pipelines
             .get(&root_clip_scroll_node.pipeline_id)
             .expect("No display list?")
             .display_list;
 
         let root_prim_context = PrimitiveContext::new(
-            device_pixel_ratio,
+            device_pixel_scale,
             display_list,
             root_clip_scroll_node,
             root_clip_scroll_node,
         );
 
         let mut child_tasks = Vec::new();
         self.prim_store.reset_prim_visibility();
         self.prim_store.prepare_prim_runs(
@@ -1626,32 +1601,32 @@ impl FrameBuilder {
             true,
             &mut child_tasks,
             profile_counters,
             None,
             scene_properties,
             SpecificPrimitiveIndex(0),
             &self.screen_rect.to_i32(),
             node_data,
+            local_rects,
         );
 
         let pic = &mut self.prim_store.cpu_pictures[0];
         pic.runs = prim_run_cmds;
 
         let root_render_task = RenderTask::new_picture(
             None,
             PrimitiveIndex(0),
             RenderTargetKind::Color,
-            0.0,
-            0.0,
+            ContentOrigin::Screen(DeviceIntPoint::zero()),
             PremultipliedColorF::TRANSPARENT,
             ClearMode::Transparent,
-            RasterizationSpace::Screen,
             child_tasks,
             None,
+            PictureType::Image,
         );
 
         pic.render_task_id = Some(render_tasks.add(root_render_task));
         pic.render_task_id
     }
 
     fn update_scroll_bars(&mut self, clip_scroll_tree: &ClipScrollTree, gpu_cache: &mut GpuCache) {
         static SCROLLBAR_PADDING: f32 = 8.0;
@@ -1685,19 +1660,19 @@ impl FrameBuilder {
     pub fn build(
         &mut self,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         frame_id: FrameId,
         clip_scroll_tree: &mut ClipScrollTree,
         pipelines: &FastHashMap<PipelineId, ScenePipeline>,
         window_size: DeviceUintSize,
-        device_pixel_ratio: f32,
+        device_pixel_scale: DevicePixelScale,
         layer: DocumentLayer,
-        pan: LayerPoint,
+        pan: WorldPoint,
         texture_cache_profile: &mut TextureCacheProfileCounters,
         gpu_cache_profile: &mut GpuCacheProfileCounters,
         scene_properties: &SceneProperties,
     ) -> Frame {
         profile_scope!("build");
         debug_assert!(
             DeviceUintRect::new(DeviceUintPoint::zero(), window_size)
                 .contains_rect(&self.screen_rect)
@@ -1707,19 +1682,24 @@ impl FrameBuilder {
         profile_counters
             .total_primitives
             .set(self.prim_store.prim_count());
 
         resource_cache.begin_frame(frame_id);
         gpu_cache.begin_frame();
 
         let mut node_data = Vec::with_capacity(clip_scroll_tree.nodes.len());
+        let total_prim_runs =
+            self.prim_store.cpu_pictures.iter().fold(1, |count, ref pic| count + pic.runs.len());
+        let mut clip_chain_local_clip_rects = Vec::with_capacity(total_prim_runs);
+        clip_chain_local_clip_rects.push(LayerRect::max_rect());
+
         clip_scroll_tree.update_tree(
             &self.screen_rect.to_i32(),
-            device_pixel_ratio,
+            device_pixel_scale,
             &mut self.clip_store,
             resource_cache,
             gpu_cache,
             pan,
             &mut node_data,
             scene_properties,
         );
 
@@ -1729,19 +1709,20 @@ impl FrameBuilder {
 
         let main_render_task_id = self.build_layer_screen_rects_and_cull_layers(
             clip_scroll_tree,
             pipelines,
             resource_cache,
             gpu_cache,
             &mut render_tasks,
             &mut profile_counters,
-            device_pixel_ratio,
+            device_pixel_scale,
             scene_properties,
             &node_data,
+            &mut clip_chain_local_clip_rects,
         );
 
         let mut passes = Vec::new();
         resource_cache.block_until_all_resources_added(gpu_cache, texture_cache_profile);
 
         if let Some(main_render_task_id) = main_render_task_id {
             let mut required_pass_count = 0;
             render_tasks.max_depth(main_render_task_id, 0, &mut required_pass_count);
@@ -1757,24 +1738,27 @@ impl FrameBuilder {
             render_tasks.assign_to_passes(
                 main_render_task_id,
                 required_pass_count - 1,
                 &mut passes,
             );
         }
 
         let mut deferred_resolves = vec![];
+        let use_dual_source_blending = self.config.dual_source_blending_is_enabled &&
+                                       self.config.dual_source_blending_is_supported;
 
         for (pass_index, pass) in passes.iter_mut().enumerate() {
             let ctx = RenderTargetContext {
-                device_pixel_ratio,
+                device_pixel_scale,
                 prim_store: &self.prim_store,
                 resource_cache,
                 node_data: &node_data,
                 clip_scroll_tree,
+                use_dual_source_blending,
             };
 
             pass.build(
                 &ctx,
                 gpu_cache,
                 &mut render_tasks,
                 &mut deferred_resolves,
                 &self.clip_store,
@@ -1786,20 +1770,21 @@ impl FrameBuilder {
 
         render_tasks.build();
 
         resource_cache.end_frame();
 
         Frame {
             window_size,
             inner_rect: self.screen_rect,
-            device_pixel_ratio,
+            device_pixel_ratio: device_pixel_scale.0,
             background_color: self.background_color,
             layer,
             profile_counters,
             passes,
             node_data,
+            clip_chain_local_clip_rects,
             render_tasks,
             deferred_resolves,
             gpu_cache_updates: Some(gpu_cache_updates),
         }
     }
 }
--- a/gfx/webrender/src/glyph_rasterizer.rs
+++ b/gfx/webrender/src/glyph_rasterizer.rs
@@ -20,18 +20,21 @@ use rayon::ThreadPool;
 use rayon::prelude::*;
 use std::cmp;
 use std::collections::hash_map::Entry;
 use std::hash::{Hash, Hasher};
 use std::mem;
 use std::sync::{Arc, Mutex, MutexGuard};
 use std::sync::mpsc::{channel, Receiver, Sender};
 use texture_cache::{TextureCache, TextureCacheHandle};
+#[cfg(test)]
+use thread_profiler::register_thread_with_profiler;
 
 #[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
+#[cfg_attr(feature = "capture", derive(Serialize, Deserialize))]
 pub struct FontTransform {
     pub scale_x: f32,
     pub skew_x: f32,
     pub skew_y: f32,
     pub scale_y: f32,
 }
 
 // Floats don't impl Hash/Eq/Ord...
@@ -94,45 +97,38 @@ impl FontTransform {
         FontTransform::new(
             self.scale_x * scale_x,
             self.skew_x * scale_y,
             self.skew_y * scale_x,
             self.scale_y * scale_y,
         )
     }
 
-    #[allow(dead_code)]
-    pub fn inverse(&self) -> Option<Self> {
-        let det = self.determinant();
-        if det != 0.0 {
-            let inv_det = det.recip() as f32;
-            Some(FontTransform::new(
-                self.scale_y * inv_det,
-                -self.skew_x * inv_det,
-                -self.skew_y * inv_det,
-                self.scale_x * inv_det
-            ))
-        } else {
-            None
-        }
+    pub fn invert_scale(&self, x_scale: f64, y_scale: f64) -> Self {
+        self.pre_scale(x_scale.recip() as f32, y_scale.recip() as f32)
     }
 
-    #[allow(dead_code)]
-    pub fn apply(&self, x: f32, y: f32) -> (f32, f32) {
-        (self.scale_x * x + self.skew_x * y, self.skew_y * x + self.scale_y * y)
+    pub fn synthesize_italics(&self, skew_factor: f32) -> Self {
+        FontTransform::new(
+            self.scale_x,
+            self.skew_x - self.scale_x * skew_factor,
+            self.skew_y,
+            self.scale_y - self.skew_y * skew_factor,
+        )
     }
 }
 
 impl<'a> From<&'a LayerToWorldTransform> for FontTransform {
     fn from(xform: &'a LayerToWorldTransform) -> Self {
         FontTransform::new(xform.m11, xform.m21, xform.m12, xform.m22)
     }
 }
 
 #[derive(Clone, Hash, PartialEq, Eq, Debug, Ord, PartialOrd)]
+#[cfg_attr(feature = "capture", derive(Serialize, Deserialize))]
 pub struct FontInstance {
     pub font_key: FontKey,
     // The font size is in *device* pixels, not logical pixels.
     // It is stored as an Au since we need sub-pixel sizes, but
     // can't store as a f32 due to use of this type as a hash key.
     // TODO(gw): Perhaps consider having LogicalAu and DeviceAu
     //           or something similar to that.
     pub size: Au,
@@ -562,16 +558,23 @@ impl GlyphRasterizer {
                     let mut context = font_contexts.lock_context(Some(i));
                     for font_key in &fonts_to_remove {
                         context.delete_font(font_key);
                     }
                 }
             });
         }
     }
+
+    #[cfg(feature = "capture")]
+    pub fn reset(&mut self) {
+        //TODO: any signals need to be sent to the workers?
+        self.pending_glyphs.clear();
+        self.fonts_to_remove.clear();
+    }
 }
 
 impl FontContext {
     fn add_font(&mut self, font_key: &FontKey, template: &FontTemplate) {
         match template {
             &FontTemplate::Raw(ref bytes, index) => {
                 self.add_raw_font(&font_key, bytes.clone(), index);
             }
@@ -598,25 +601,30 @@ impl GlyphRequest {
 }
 
 struct GlyphRasterJob {
     request: GlyphRequest,
     result: Option<RasterizedGlyph>,
 }
 
 #[test]
-fn raterize_200_glyphs() {
+fn rasterize_200_glyphs() {
     // This test loads a font from disc, the renders 4 requests containing
     // 50 glyphs each, deletes the font and waits for the result.
 
     use rayon::Configuration;
     use std::fs::File;
     use std::io::Read;
 
-    let workers = Arc::new(ThreadPool::new(Configuration::new()).unwrap());
+    let worker_config = Configuration::new()
+        .thread_name(|idx|{ format!("WRWorker#{}", idx) })
+        .start_handler(move |idx| {
+            register_thread_with_profiler(format!("WRWorker#{}", idx));
+        });
+    let workers = Arc::new(ThreadPool::new(worker_config).unwrap());
     let mut glyph_rasterizer = GlyphRasterizer::new(workers);
     let mut glyph_cache = GlyphCache::new();
     let mut gpu_cache = GpuCache::new();
     let mut texture_cache = TextureCache::new(2048);
 
     let mut font_file =
         File::open("../wrench/reftests/text/VeraBd.ttf").expect("Couldn't open font file");
     let mut font_data = vec![];
--- a/gfx/webrender/src/gpu_types.rs
+++ b/gfx/webrender/src/gpu_types.rs
@@ -1,13 +1,13 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{LayerVector2D, LayerRect, LayerToWorldTransform};
+use api::{LayerRect, LayerToWorldTransform};
 use gpu_cache::GpuCacheAddress;
 use render_task::RenderTaskAddress;
 
 // Contains type that must exactly match the same structures declared in GLSL.
 
 #[repr(i32)]
 #[derive(Debug, Copy, Clone)]
 pub enum BlurDirection {
@@ -42,47 +42,47 @@ pub struct ClipMaskInstance {
 pub struct PrimitiveInstance {
     data: [i32; 8],
 }
 
 pub struct SimplePrimitiveInstance {
     pub specific_prim_address: GpuCacheAddress,
     pub task_address: RenderTaskAddress,
     pub clip_task_address: RenderTaskAddress,
-    pub clip_id: ClipScrollNodeIndex,
+    pub clip_chain_rect_index: ClipChainRectIndex,
     pub scroll_id: ClipScrollNodeIndex,
     pub z_sort_index: i32,
 }
 
 impl SimplePrimitiveInstance {
     pub fn new(
         specific_prim_address: GpuCacheAddress,
         task_address: RenderTaskAddress,
         clip_task_address: RenderTaskAddress,
-        clip_id: ClipScrollNodeIndex,
+        clip_chain_rect_index: ClipChainRectIndex,
         scroll_id: ClipScrollNodeIndex,
         z_sort_index: i32,
     ) -> SimplePrimitiveInstance {
         SimplePrimitiveInstance {
             specific_prim_address,
             task_address,
             clip_task_address,
-            clip_id,
+            clip_chain_rect_index,
             scroll_id,
             z_sort_index,
         }
     }
 
     pub fn build(&self, data0: i32, data1: i32, data2: i32) -> PrimitiveInstance {
         PrimitiveInstance {
             data: [
                 self.specific_prim_address.as_int(),
                 self.task_address.0 as i32,
                 self.clip_task_address.0 as i32,
-                ((self.clip_id.0 as i32) << 16) | self.scroll_id.0 as i32,
+                ((self.clip_chain_rect_index.0 as i32) << 16) | self.scroll_id.0 as i32,
                 self.z_sort_index,
                 data0,
                 data1,
                 data2,
             ],
         }
     }
 }
@@ -145,35 +145,35 @@ impl From<CompositePrimitiveInstance> fo
 //           future, we can compress this vertex
 //           format a lot - e.g. z, render task
 //           addresses etc can reasonably become
 //           a u16 type.
 #[repr(C)]
 pub struct BrushInstance {
     pub picture_address: RenderTaskAddress,
     pub prim_address: GpuCacheAddress,
-    pub clip_id: ClipScrollNodeIndex,
+    pub clip_chain_rect_index: ClipChainRectIndex,
     pub scroll_id: ClipScrollNodeIndex,
     pub clip_task_address: RenderTaskAddress,
     pub z: i32,
-    pub segment_kind: i32,
+    pub segment_index: i32,
     pub user_data0: i32,
     pub user_data1: i32,
 }
 
 impl From<BrushInstance> for PrimitiveInstance {
     fn from(instance: BrushInstance) -> PrimitiveInstance {
         PrimitiveInstance {
             data: [
                 instance.picture_address.0 as i32,
                 instance.prim_address.as_int(),
-                ((instance.clip_id.0 as i32) << 16) | instance.scroll_id.0 as i32,
+                ((instance.clip_chain_rect_index.0 as i32) << 16) | instance.scroll_id.0 as i32,
                 instance.clip_task_address.0 as i32,
                 instance.z,
-                instance.segment_kind,
+                instance.segment_index,
                 instance.user_data0,
                 instance.user_data1,
             ]
         }
     }
 }
 
 // Defines how a brush image is stretched onto the primitive.
@@ -190,38 +190,33 @@ pub enum BrushImageKind {
 #[derive(Copy, Debug, Clone, PartialEq)]
 #[repr(C)]
 pub struct ClipScrollNodeIndex(pub u32);
 
 #[derive(Debug)]
 #[repr(C)]
 pub struct ClipScrollNodeData {
     pub transform: LayerToWorldTransform,
-
-    /// Viewport rectangle clipped against parent viewport rectangles.  This is
-    /// in the coordinate system of the node origin.  Precisely, it combines the
-    /// local clipping rectangles of all the parent nodes on the way to the root,
-    /// including those of `ClipRegion` rectangles.  The combined clip is reset to
-    /// maximum when an incompatible coordinate system is encountered.
-    pub local_clip_rect: LayerRect,
-
-    /// The scroll offset of all the nodes between us and our parent reference frame.
-    /// This is used to calculate intersections between us and content or nodes that
-    /// are also direct children of our reference frame.
-    pub reference_frame_relative_scroll_offset: LayerVector2D,
-
-    pub scroll_offset: LayerVector2D,
     pub transform_kind: f32,
     pub padding: [f32; 3],
 }
 
 impl ClipScrollNodeData {
     pub fn invalid() -> ClipScrollNodeData {
         ClipScrollNodeData {
             transform: LayerToWorldTransform::identity(),
-            local_clip_rect: LayerRect::zero(),
-            reference_frame_relative_scroll_offset: LayerVector2D::zero(),
-            scroll_offset: LayerVector2D::zero(),
             transform_kind: 0.0,
             padding: [0.0; 3],
         }
     }
 }
+
+#[derive(Copy, Debug, Clone, PartialEq)]
+#[repr(C)]
+pub struct ClipChainRectIndex(pub usize);
+
+#[derive(Copy, Debug, Clone, PartialEq)]
+#[repr(C)]
+pub enum PictureType {
+    Image = 1,
+    TextShadow = 2,
+    BoxShadow = 3,
+}
--- a/gfx/webrender/src/internal_types.rs
+++ b/gfx/webrender/src/internal_types.rs
@@ -1,15 +1,17 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{ClipId, DevicePoint, DeviceUintRect, DocumentId, Epoch};
 use api::{ExternalImageData, ExternalImageId};
 use api::{ImageFormat, PipelineId};
+#[cfg(feature = "capture")]
+use api::ImageDescriptor;
 use api::DebugCommand;
 use device::TextureFilter;
 use fxhash::FxHasher;
 use profiler::BackendProfileCounters;
 use std::{usize, i32};
 use std::collections::{HashMap, HashSet};
 use std::f32;
 use std::hash::BuildHasherDefault;
@@ -52,47 +54,16 @@ pub enum SourceTexture {
     #[allow(dead_code)]
     RenderTaskCacheA8(RenderPassIndex),
     RenderTaskCacheRGBA8(RenderPassIndex),
 }
 
 pub const ORTHO_NEAR_PLANE: f32 = -1000000.0;
 pub const ORTHO_FAR_PLANE: f32 = 1000000.0;
 
-/// Optional textures that can be used as a source in the shaders.
-/// Textures that are not used by the batch are equal to TextureId::invalid().
-#[derive(Copy, Clone, Debug)]
-pub struct BatchTextures {
-    pub colors: [SourceTexture; 3],
-}
-
-impl BatchTextures {
-    pub fn no_texture() -> Self {
-        BatchTextures {
-            colors: [SourceTexture::Invalid; 3],
-        }
-    }
-
-    pub fn render_target_cache() -> Self {
-        BatchTextures {
-            colors: [
-                SourceTexture::CacheRGBA8,
-                SourceTexture::CacheA8,
-                SourceTexture::Invalid,
-            ],
-        }
-    }
-
-    pub fn color(texture: SourceTexture) -> Self {
-        BatchTextures {
-            colors: [texture, SourceTexture::Invalid, SourceTexture::Invalid],
-        }
-    }
-}
-
 #[derive(Copy, Clone, Debug, PartialEq)]
 pub struct RenderTargetInfo {
     pub has_depth: bool,
 }
 
 #[derive(Debug)]
 pub enum TextureUpdateSource {
     External {
@@ -166,19 +137,30 @@ impl RenderedDocument {
         RenderedDocument {
             pipeline_epoch_map,
             layers_bouncing_back,
             frame,
         }
     }
 }
 
+#[cfg(feature = "capture")]
+pub struct ExternalCaptureImage {
+    pub short_path: String,
+    pub descriptor: ImageDescriptor,
+    pub external: ExternalImageData,
+}
+
 pub enum DebugOutput {
     FetchDocuments(String),
     FetchClipScrollTree(String),
+    #[cfg(feature = "capture")]
+    SaveCapture(PathBuf, Vec<ExternalCaptureImage>),
+    #[cfg(feature = "capture")]
+    LoadCapture,
 }
 
 pub enum ResultMsg {
     DebugCommand(DebugCommand),
     DebugOutput(DebugOutput),
     RefreshShader(PathBuf),
     PublishDocument(
         DocumentId,
--- a/gfx/webrender/src/lib.rs
+++ b/gfx/webrender/src/lib.rs
@@ -43,17 +43,21 @@ they're nestable.
 #[macro_use]
 extern crate bitflags;
 #[macro_use]
 extern crate lazy_static;
 #[macro_use]
 extern crate log;
 #[macro_use]
 extern crate thread_profiler;
+#[cfg(any(feature = "debugger", feature = "capture"))]
+#[macro_use]
+extern crate serde;
 
+mod batch;
 mod border;
 mod box_shadow;
 mod clip;
 mod clip_scroll_node;
 mod clip_scroll_tree;
 mod debug_colors;
 mod debug_font_data;
 mod debug_render;
@@ -78,16 +82,17 @@ mod print_tree;
 mod profiler;
 mod query;
 mod record;
 mod render_backend;
 mod render_task;
 mod renderer;
 mod resource_cache;
 mod scene;
+mod segment;
 mod spring;
 mod texture_allocator;
 mod texture_cache;
 mod tiling;
 mod util;
 
 mod shader_source {
     include!(concat!(env!("OUT_DIR"), "/shaders.rs"));
@@ -134,19 +139,18 @@ extern crate app_units;
 extern crate bincode;
 extern crate byteorder;
 extern crate euclid;
 extern crate fxhash;
 extern crate gleam;
 extern crate num_traits;
 extern crate plane_split;
 extern crate rayon;
-#[cfg(feature = "debugger")]
-#[macro_use]
-extern crate serde_derive;
+#[cfg(feature = "capture")]
+extern crate ron;
 #[cfg(feature = "debugger")]
 extern crate serde_json;
 extern crate smallvec;
 extern crate time;
 #[cfg(feature = "debugger")]
 extern crate ws;
 #[cfg(feature = "debugger")]
 extern crate image;
--- a/gfx/webrender/src/picture.rs
+++ b/gfx/webrender/src/picture.rs
@@ -1,20 +1,20 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{ColorF, ClipAndScrollInfo, FilterOp, MixBlendMode};
-use api::{device_length, DeviceIntRect, DeviceIntSize, PipelineId};
-use api::{BoxShadowClipMode, LayerPoint, LayerRect, LayerSize, LayerVector2D, Shadow};
+use api::{DeviceIntPoint, DeviceIntRect, LayerToWorldScale, PipelineId};
+use api::{BoxShadowClipMode, LayerPoint, LayerRect, LayerVector2D, Shadow};
 use api::{ClipId, PremultipliedColorF};
 use box_shadow::{BLUR_SAMPLE_SCALE, BoxShadowCacheKey};
 use frame_builder::PrimitiveContext;
 use gpu_cache::GpuDataRequest;
-use gpu_types::BrushImageKind;
+use gpu_types::{BrushImageKind, PictureType};
 use prim_store::{PrimitiveIndex, PrimitiveRun, PrimitiveRunLocalRect};
 use render_task::{ClearMode, RenderTask, RenderTaskId, RenderTaskTree};
 use scene::{FilterOpHelpers, SceneProperties};
 use tiling::RenderTargetKind;
 
 /*
  A picture represents a dynamically rendered image. It consists of:
 
@@ -33,23 +33,22 @@ pub enum PictureCompositeMode {
     MixBlend(MixBlendMode),
     /// Apply a CSS filter.
     Filter(FilterOp),
     /// Draw to intermediate surface, copy straight across. This
     /// is used for CSS isolation, and plane splitting.
     Blit,
 }
 
-/// Configure whether the primitives on this picture
-/// should be rasterized in screen space or local space.
-#[repr(C)]
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
-pub enum RasterizationSpace {
-    Local = 0,
-    Screen = 1,
+/// Configure whether the content to be drawn by a picture
+/// in local space rasterization or the screen space.
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub enum ContentOrigin {
+    Local(LayerPoint),
+    Screen(DeviceIntPoint),
 }
 
 #[derive(Debug)]
 pub enum PictureKind {
     TextShadow {
         offset: LayerVector2D,
         color: ColorF,
         blur_radius: f32,
@@ -103,36 +102,31 @@ pub struct PicturePrimitive {
 
     // The pipeline that the primitives on this picture belong to.
     pub pipeline_id: PipelineId,
 
     // If true, apply visibility culling to primitives on this
     // picture. For text shadows and box shadows, we want to
     // unconditionally draw them.
     pub cull_children: bool,
-
-    /// Configure whether the primitives on this picture
-    /// should be rasterized in screen space or local space.
-    pub rasterization_kind: RasterizationSpace,
 }
 
 impl PicturePrimitive {
     pub fn new_text_shadow(shadow: Shadow, pipeline_id: PipelineId) -> Self {
         PicturePrimitive {
             runs: Vec::new(),
             render_task_id: None,
             kind: PictureKind::TextShadow {
                 offset: shadow.offset,
                 color: shadow.color,
                 blur_radius: shadow.blur_radius,
                 content_rect: LayerRect::zero(),
             },
             pipeline_id,
             cull_children: false,
-            rasterization_kind: RasterizationSpace::Local,
         }
     }
 
     pub fn resolve_scene_properties(&mut self, properties: &SceneProperties) -> bool {
         match self.kind {
             PictureKind::Image { ref mut composite_mode, .. } => {
                 match composite_mode {
                     &mut Some(PictureCompositeMode::Filter(ref mut filter)) => {
@@ -170,43 +164,39 @@ impl PicturePrimitive {
                 blur_regions,
                 clip_mode,
                 image_kind,
                 content_rect: LayerRect::zero(),
                 cache_key,
             },
             pipeline_id,
             cull_children: false,
-            rasterization_kind: RasterizationSpace::Local,
         }
     }
 
     pub fn new_image(
         composite_mode: Option<PictureCompositeMode>,
         is_in_3d_context: bool,
         pipeline_id: PipelineId,
         reference_frame_id: ClipId,
         frame_output_pipeline_id: Option<PipelineId>,
-    ) -> PicturePrimitive {
+    ) -> Self {
         PicturePrimitive {
             runs: Vec::new(),
             render_task_id: None,
             kind: PictureKind::Image {
                 secondary_render_task_id: None,
                 composite_mode,
                 is_in_3d_context,
                 frame_output_pipeline_id,
                 reference_frame_id,
                 real_local_rect: LayerRect::zero(),
             },
             pipeline_id,
             cull_children: true,
-            // TODO(gw): Make this configurable based on an
-            //           exposed API parameter in StackingContext.
-            rasterization_kind: RasterizationSpace::Screen,
         }
     }
 
     pub fn add_primitive(
         &mut self,
         prim_index: PrimitiveIndex,
         clip_and_scroll: ClipAndScrollInfo
     ) {
@@ -257,88 +247,91 @@ impl PicturePrimitive {
                     blur_offset,
                     blur_offset,
                 );
 
                 content_rect.translate(&offset)
             }
             PictureKind::BoxShadow { blur_radius, clip_mode, image_kind, ref mut content_rect, .. } => {
                 // We need to inflate the content rect if outset.
-                match clip_mode {
+                *content_rect = match clip_mode {
                     BoxShadowClipMode::Outset => {
-                        let blur_offset = blur_radius * BLUR_SAMPLE_SCALE;
-
-                        // If the radii are uniform, we can render just the top
-                        // left corner and mirror it across the primitive. In
-                        // this case, shift the content rect to leave room
-                        // for the blur to take effect.
                         match image_kind {
                             BrushImageKind::Mirror => {
-                                let origin = LayerPoint::new(
-                                    local_content_rect.origin.x - blur_offset,
-                                    local_content_rect.origin.y - blur_offset,
-                                );
-                                let size = LayerSize::new(
-                                    local_content_rect.size.width + blur_offset,
-                                    local_content_rect.size.height + blur_offset,
-                                );
-                                *content_rect = LayerRect::new(origin, size);
+                                let half_offset = 0.5 * blur_radius * BLUR_SAMPLE_SCALE;
+                                // If the radii are uniform, we can render just the top
+                                // left corner and mirror it across the primitive. In
+                                // this case, shift the content rect to leave room
+                                // for the blur to take effect.
+                                local_content_rect
+                                    .translate(&-LayerVector2D::new(half_offset, half_offset))
+                                    .inflate(half_offset, half_offset)
                             }
                             BrushImageKind::NinePatch | BrushImageKind::Simple => {
+                                let full_offset = blur_radius * BLUR_SAMPLE_SCALE;
                                 // For a non-uniform radii, we need to expand
                                 // the content rect on all sides for the blur.
-                                *content_rect = local_content_rect.inflate(
-                                    blur_offset,
-                                    blur_offset,
-                                );
+                                local_content_rect.inflate(
+                                    full_offset,
+                                    full_offset,
+                                )
                             }
                         }
                     }
                     BoxShadowClipMode::Inset => {
-                        *content_rect = local_content_rect;
+                        local_content_rect
                     }
-                }
+                };
 
                 prim_local_rect
             }
         }
     }
 
+    pub fn picture_type(&self) -> PictureType {
+        match self.kind {
+            PictureKind::Image { .. } => PictureType::Image,
+            PictureKind::BoxShadow { .. } => PictureType::BoxShadow,
+            PictureKind::TextShadow { .. } => PictureType::TextShadow,
+        }
+    }
+
     pub fn prepare_for_render(
         &mut self,
         prim_index: PrimitiveIndex,
         prim_context: &PrimitiveContext,
         render_tasks: &mut RenderTaskTree,
         prim_screen_rect: &DeviceIntRect,
         child_tasks: Vec<RenderTaskId>,
         parent_tasks: &mut Vec<RenderTaskId>,
     ) {
+        let content_scale = LayerToWorldScale::new(1.0) * prim_context.device_pixel_scale;
+
         match self.kind {
             PictureKind::Image {
                 ref mut secondary_render_task_id,
                 composite_mode,
                 ..
             } => {
+                let content_origin = ContentOrigin::Screen(prim_screen_rect.origin);
                 match composite_mode {
                     Some(PictureCompositeMode::Filter(FilterOp::Blur(blur_radius))) => {
                         let picture_task = RenderTask::new_picture(
                             Some(prim_screen_rect.size),
                             prim_index,
                             RenderTargetKind::Color,
-                            prim_screen_rect.origin.x as f32,
-                            prim_screen_rect.origin.y as f32,
+                            content_origin,
                             PremultipliedColorF::TRANSPARENT,
                             ClearMode::Transparent,
-                            self.rasterization_kind,
                             child_tasks,
                             None,
+                            PictureType::Image,
                         );
 
-                        let blur_radius = device_length(blur_radius, prim_context.device_pixel_ratio);
-                        let blur_std_deviation = blur_radius.0 as f32;
+                        let blur_std_deviation = blur_radius * prim_context.device_pixel_scale.0;
                         let picture_task_id = render_tasks.add(picture_task);
 
                         let blur_render_task = RenderTask::new_blur(
                             blur_std_deviation,
                             picture_task_id,
                             render_tasks,
                             RenderTargetKind::Color,
                             &[],
@@ -346,34 +339,34 @@ impl PicturePrimitive {
                             PremultipliedColorF::TRANSPARENT,
                             None,
                         );
 
                         let blur_render_task_id = render_tasks.add(blur_render_task);
                         self.render_task_id = Some(blur_render_task_id);
                     }
                     Some(PictureCompositeMode::Filter(FilterOp::DropShadow(offset, blur_radius, color))) => {
+                        let screen_offset = (offset * content_scale).round().to_i32();
                         let picture_task = RenderTask::new_picture(
                             Some(prim_screen_rect.size),
                             prim_index,
                             RenderTargetKind::Color,
-                            prim_screen_rect.origin.x as f32 - offset.x,
-                            prim_screen_rect.origin.y as f32 - offset.y,
+                            ContentOrigin::Screen(prim_screen_rect.origin - screen_offset),
                             PremultipliedColorF::TRANSPARENT,
                             ClearMode::Transparent,
-                            self.rasterization_kind,
                             child_tasks,
                             None,
+                            PictureType::Image,
                         );
 
-                        let blur_std_deviation = blur_radius * prim_context.device_pixel_ratio;
+                        let blur_std_deviation = blur_radius * prim_context.device_pixel_scale.0;
                         let picture_task_id = render_tasks.add(picture_task);
 
                         let blur_render_task = RenderTask::new_blur(
-                            blur_std_deviation,
+                            blur_std_deviation.round(),
                             picture_task_id,
                             render_tasks,
                             RenderTargetKind::Color,
                             &[],
                             ClearMode::Transparent,
                             color.premultiplied(),
                             None,
                         );
@@ -381,23 +374,22 @@ impl PicturePrimitive {
                         *secondary_render_task_id = Some(picture_task_id);
                         self.render_task_id = Some(render_tasks.add(blur_render_task));
                     }
                     Some(PictureCompositeMode::MixBlend(..)) => {
                         let picture_task = RenderTask::new_picture(
                             Some(prim_screen_rect.size),
                             prim_index,
                             RenderTargetKind::Color,
-                            prim_screen_rect.origin.x as f32,
-                            prim_screen_rect.origin.y as f32,
+                            content_origin,
                             PremultipliedColorF::TRANSPARENT,
                             ClearMode::Transparent,
-                            self.rasterization_kind,
                             child_tasks,
                             None,
+                            PictureType::Image,
                         );
 
                         let readback_task_id = render_tasks.add(RenderTask::new_readback(*prim_screen_rect));
 
                         *secondary_render_task_id = Some(readback_task_id);
                         parent_tasks.push(readback_task_id);
 
                         self.render_task_id = Some(render_tasks.add(picture_task));
@@ -411,85 +403,77 @@ impl PicturePrimitive {
                         if filter.is_noop() {
                             parent_tasks.extend(child_tasks);
                             self.render_task_id = None;
                         } else {
                             let picture_task = RenderTask::new_picture(
                                 Some(prim_screen_rect.size),
                                 prim_index,
                                 RenderTargetKind::Color,
-                                prim_screen_rect.origin.x as f32,
-                                prim_screen_rect.origin.y as f32,
+                                content_origin,
                                 PremultipliedColorF::TRANSPARENT,
                                 ClearMode::Transparent,
-                                self.rasterization_kind,
                                 child_tasks,
                                 None,
+                                PictureType::Image,
                             );
 
                             self.render_task_id = Some(render_tasks.add(picture_task));
                         }
                     }
                     Some(PictureCompositeMode::Blit) => {
                         let picture_task = RenderTask::new_picture(
                             Some(prim_screen_rect.size),
                             prim_index,
                             RenderTargetKind::Color,
-                            prim_screen_rect.origin.x as f32,
-                            prim_screen_rect.origin.y as f32,
+                            content_origin,
                             PremultipliedColorF::TRANSPARENT,
                             ClearMode::Transparent,
-                            self.rasterization_kind,
                             child_tasks,
                             None,
+                            PictureType::Image,
                         );
 
                         self.render_task_id = Some(render_tasks.add(picture_task));
                     }
                     None => {
                         parent_tasks.extend(child_tasks);
                         self.render_task_id = None;
                     }
                 }
             }
             PictureKind::TextShadow { blur_radius, color, content_rect, .. } => {
                 // This is a shadow element. Create a render task that will
                 // render the text run to a target, and then apply a gaussian
                 // blur to that text run in order to build the actual primitive
                 // which will be blitted to the framebuffer.
 
-                let blur_radius = device_length(blur_radius, prim_context.device_pixel_ratio);
-
                 // TODO(gw): Rounding the content rect here to device pixels is not
                 // technically correct. Ideally we should ceil() here, and ensure that
                 // the extra part pixel in the case of fractional sizes is correctly
                 // handled. For now, just use rounding which passes the existing
                 // Gecko tests.
-                let cache_width =
-                    (content_rect.size.width * prim_context.device_pixel_ratio).round() as i32;
-                let cache_height =
-                    (content_rect.size.height * prim_context.device_pixel_ratio).round() as i32;
-                let cache_size = DeviceIntSize::new(cache_width, cache_height);
+                let cache_size = (content_rect.size * content_scale).round().to_i32();
 
                 // Quote from https://drafts.csswg.org/css-backgrounds-3/#shadow-blur
                 // "the image that would be generated by applying to the shadow a
                 // Gaussian blur with a standard deviation equal to half the blur radius."
-                let blur_std_deviation = blur_radius.0 as f32 * 0.5;
+                let device_radius = (blur_radius * prim_context.device_pixel_scale.0).round();
+                let blur_std_deviation = device_radius * 0.5;
 
                 let picture_task = RenderTask::new_picture(
                     Some(cache_size),
                     prim_index,
                     RenderTargetKind::Color,
-                    content_rect.origin.x,
-                    content_rect.origin.y,
+                    ContentOrigin::Local(content_rect.origin),
                     color.premultiplied(),
                     ClearMode::Transparent,
-                    self.rasterization_kind,
                     Vec::new(),
                     None,
+                    PictureType::TextShadow,
                 );
 
                 let picture_task_id = render_tasks.add(picture_task);
 
                 let render_task = RenderTask::new_blur(
                     blur_std_deviation,
                     picture_task_id,
                     render_tasks,
@@ -498,54 +482,48 @@ impl PicturePrimitive {
                     ClearMode::Transparent,
                     color.premultiplied(),
                     None,
                 );
 
                 self.render_task_id = Some(render_tasks.add(render_task));
             }
             PictureKind::BoxShadow { blur_radius, clip_mode, ref blur_regions, color, content_rect, cache_key, .. } => {
-                let blur_radius = device_length(blur_radius, prim_context.device_pixel_ratio);
-
                 // TODO(gw): Rounding the content rect here to device pixels is not
                 // technically correct. Ideally we should ceil() here, and ensure that
                 // the extra part pixel in the case of fractional sizes is correctly
                 // handled. For now, just use rounding which passes the existing
                 // Gecko tests.
-                let cache_width =
-                    (content_rect.size.width * prim_context.device_pixel_ratio).round() as i32;
-                let cache_height =
-                    (content_rect.size.height * prim_context.device_pixel_ratio).round() as i32;
-                let cache_size = DeviceIntSize::new(cache_width, cache_height);
+                let cache_size = (content_rect.size * content_scale).round().to_i32();
 
                 // Quote from https://drafts.csswg.org/css-backgrounds-3/#shadow-blur
                 // "the image that would be generated by applying to the shadow a
                 // Gaussian blur with a standard deviation equal to half the blur radius."
-                let blur_std_deviation = blur_radius.0 as f32 * 0.5;
+                let device_radius = (blur_radius * prim_context.device_pixel_scale.0).round();
+                let blur_std_deviation = device_radius * 0.5;
 
                 let blur_clear_mode = match clip_mode {
                     BoxShadowClipMode::Outset => {
                         ClearMode::One
                     }
                     BoxShadowClipMode::Inset => {
                         ClearMode::Zero
                     }
                 };
 
                 let picture_task = RenderTask::new_picture(
                     Some(cache_size),
                     prim_index,
                     RenderTargetKind::Alpha,
-                    content_rect.origin.x,
-                    content_rect.origin.y,
+                    ContentOrigin::Local(content_rect.origin),
                     color.premultiplied(),
                     ClearMode::Zero,
-                    self.rasterization_kind,
                     Vec::new(),
                     Some(cache_key),
+                    PictureType::BoxShadow,
                 );
 
                 let picture_task_id = render_tasks.add(picture_task);
 
                 let render_task = RenderTask::new_blur(
                     blur_std_deviation,
                     picture_task_id,
                     render_tasks,
--- a/gfx/webrender/src/platform/macos/font.rs
+++ b/gfx/webrender/src/platform/macos/font.rs
@@ -17,17 +17,17 @@ use core_graphics::color_space::CGColorS
 use core_graphics::context::{CGContext, CGTextDrawingMode};
 use core_graphics::data_provider::CGDataProvider;
 use core_graphics::font::{CGFont, CGGlyph};
 use core_graphics::geometry::{CGAffineTransform, CGPoint, CGRect, CGSize};
 use core_text;
 use core_text::font::{CTFont, CTFontRef};
 use core_text::font_descriptor::{kCTFontDefaultOrientation, kCTFontColorGlyphsTrait};
 use gamma_lut::{ColorLut, GammaLut};
-use glyph_rasterizer::{FontInstance, GlyphFormat, RasterizedGlyph};
+use glyph_rasterizer::{FontInstance, FontTransform, GlyphFormat, RasterizedGlyph};
 use internal_types::FastHashMap;
 use std::collections::hash_map::Entry;
 use std::sync::Arc;
 
 pub struct FontContext {
     cg_fonts: FastHashMap<FontKey, CGFont>,
     ct_fonts: FastHashMap<(FontKey, Au, Vec<FontVariation>), CTFont>,
     gamma_lut: GammaLut,
@@ -259,16 +259,19 @@ fn new_ct_font_with_variations(cg_font: 
     }
 }
 
 fn is_bitmap_font(ct_font: &CTFont) -> bool {
     let traits = ct_font.symbolic_traits();
     (traits & kCTFontColorGlyphsTrait) != 0
 }
 
+// Skew factor matching Gecko/CG.
+const OBLIQUE_SKEW_FACTOR: f32 = 0.25;
+
 impl FontContext {
     pub fn new() -> FontContext {
         debug!("Test for subpixel AA support: {}", supports_subpixel_aa());
 
         // Force CG to use sRGB color space to gamma correct.
         let contrast = 0.0;
         let gamma = 0.0;
 
@@ -353,17 +356,38 @@ impl FontContext {
         font: &FontInstance,
         key: &GlyphKey,
     ) -> Option<GlyphDimensions> {
         self.get_ct_font(font.font_key, font.size, &font.variations)
             .and_then(|ref ct_font| {
                 let glyph = key.index as CGGlyph;
                 let bitmap = is_bitmap_font(ct_font);
                 let (x_offset, y_offset) = if bitmap { (0.0, 0.0) } else { font.get_subpx_offset(key) };
-                let metrics = get_glyph_metrics(ct_font, None, glyph, x_offset, y_offset, 0.0);
+                let transform = if font.flags.contains(FontInstanceFlags::SYNTHETIC_ITALICS) {
+                    let shape = FontTransform::identity().synthesize_italics(OBLIQUE_SKEW_FACTOR);
+                    Some(CGAffineTransform {
+                        a: shape.scale_x as f64,
+                        b: -shape.skew_y as f64,
+                        c: -shape.skew_x as f64,
+                        d: shape.scale_y as f64,
+                        tx: 0.0,
+                        ty: 0.0,
+                    })
+                } else {
+                    None
+                };
+                let extra_strikes = font.get_extra_strikes(1.0);
+                let metrics = get_glyph_metrics(
+                    ct_font,
+                    transform.as_ref(),
+                    glyph,
+                    x_offset,
+                    y_offset,
+                    extra_strikes as f64,
+                );
                 if metrics.rasterized_width == 0 || metrics.rasterized_height == 0 {
                     None
                 } else {
                     Some(GlyphDimensions {
                         left: metrics.rasterized_left,
                         top: metrics.rasterized_ascent,
                         width: metrics.rasterized_width as u32,
                         height: metrics.rasterized_height as u32,
@@ -449,32 +473,38 @@ impl FontContext {
         let (x_scale, y_scale) = font.transform.compute_scale().unwrap_or((1.0, 1.0));
         let size = font.size.scale_by(y_scale as f32);
         let ct_font = match self.get_ct_font(font.font_key, size, &font.variations) {
             Some(font) => font,
             None => return None,
         };
 
         let bitmap = is_bitmap_font(&ct_font);
-        let shape = font.transform.pre_scale(y_scale.recip() as f32, y_scale.recip() as f32);
-        let transform = if bitmap || shape.is_identity() {
-            None
+        let (mut shape, (x_offset, y_offset)) = if bitmap {
+            (FontTransform::identity(), (0.0, 0.0))
         } else {
+            (font.transform.invert_scale(y_scale, y_scale), font.get_subpx_offset(key))
+        };
+        if font.flags.contains(FontInstanceFlags::SYNTHETIC_ITALICS) {
+            shape = shape.synthesize_italics(OBLIQUE_SKEW_FACTOR);
+        }
+        let transform = if !shape.is_identity() {
             Some(CGAffineTransform {
                 a: shape.scale_x as f64,
                 b: -shape.skew_y as f64,
                 c: -shape.skew_x as f64,
                 d: shape.scale_y as f64,
                 tx: 0.0,
-                ty: 0.0
+                ty: 0.0,
             })
+        } else {
+            None
         };
 
         let glyph = key.index as CGGlyph;
-        let (x_offset, y_offset) = if bitmap { (0.0, 0.0) } else { font.get_subpx_offset(key) };
         let (strike_scale, pixel_step) = if bitmap { (y_scale, 1.0) } else { (x_scale, y_scale / x_scale) };
         let extra_strikes = font.get_extra_strikes(strike_scale);
         let metrics = get_glyph_metrics(
             &ct_font,
             transform.as_ref(),
             glyph,
             x_offset,
             y_offset,
--- a/gfx/webrender/src/platform/unix/font.rs
+++ b/gfx/webrender/src/platform/unix/font.rs
@@ -48,17 +48,66 @@ pub struct FontContext {
 
 // FreeType resources are safe to move between threads as long as they
 // are not concurrently accessed. In our case, everything is hidden inside
 // a given FontContext so it is safe to move the latter between threads.
 unsafe impl Send for FontContext {}
 
 extern "C" {
     fn FT_GlyphSlot_Embolden(slot: FT_GlyphSlot);
-    fn FT_GlyphSlot_Oblique(slot: FT_GlyphSlot);
+}
+
+// Skew factor matching Gecko/FreeType.
+const OBLIQUE_SKEW_FACTOR: f32 = 0.2;
+
+fn get_skew_bounds(bottom: i32, top: i32) -> (f32, f32) {
+    let skew_min = ((bottom as f32 + 0.5) * OBLIQUE_SKEW_FACTOR).floor();
+    let skew_max = ((top as f32 - 0.5) * OBLIQUE_SKEW_FACTOR).ceil();
+    (skew_min, skew_max)
+}
+
+fn skew_bitmap(bitmap: &[u8], width: usize, height: usize, left: i32, top: i32) -> (Vec<u8>, usize, i32) {
+    let stride = width * 4;
+    // Calculate the skewed horizontal offsets of the bottom and top of the glyph.
+    let (skew_min, skew_max) = get_skew_bounds(top - height as i32, top);
+    // Allocate enough extra width for the min/max skew offsets.
+    let skew_width = width + (skew_max - skew_min) as usize;
+    let mut skew_buffer = vec![0u8; skew_width * height * 4];
+    for y in 0 .. height {
+        // Calculate a skew offset at the vertical center of the current row.
+        let offset = (top as f32 - y as f32 - 0.5) * OBLIQUE_SKEW_FACTOR - skew_min;
+        // Get a blend factor in 0..256 constant across all pixels in the row.
+        let blend = (offset.fract() * 256.0) as u32;
+        let src_row = y * stride;
+        let dest_row = (y * skew_width + offset.floor() as usize) * 4;
+        let mut prev_px = [0u32; 4];
+        for (src, dest) in
+            bitmap[src_row .. src_row + stride].chunks(4).zip(
+                skew_buffer[dest_row .. dest_row + stride].chunks_mut(4)
+            ) {
+            let px = [src[0] as u32, src[1] as u32, src[2] as u32, src[3] as u32];
+            // Blend current pixel with previous pixel based on blend factor.
+            let next_px = [px[0] * blend, px[1] * blend, px[2] * blend, px[3] * blend];
+            dest[0] = ((((px[0] << 8) - next_px[0]) + prev_px[0] + 128) >> 8) as u8;
+            dest[1] = ((((px[1] << 8) - next_px[1]) + prev_px[1] + 128) >> 8) as u8;
+            dest[2] = ((((px[2] << 8) - next_px[2]) + prev_px[2] + 128) >> 8) as u8;
+            dest[3] = ((((px[3] << 8) - next_px[3]) + prev_px[3] + 128) >> 8) as u8;
+            // Save the remainder for blending onto the next pixel.
+            prev_px = next_px;
+        }
+        // If the skew misaligns the final pixel, write out the remainder.
+        if blend > 0 {
+            let dest = &mut skew_buffer[dest_row + stride .. dest_row + stride + 4];
+            dest[0] = ((prev_px[0] + 128) >> 8) as u8;
+            dest[1] = ((prev_px[1] + 128) >> 8) as u8;
+            dest[2] = ((prev_px[2] + 128) >> 8) as u8;
+            dest[3] = ((prev_px[3] + 128) >> 8) as u8;
+        }
+    }
+    (skew_buffer, skew_width, left + skew_min as i32)
 }
 
 impl FontContext {
     pub fn new() -> FontContext {
         let mut lib: FT_Library = ptr::null_mut();
 
         // Using an LCD filter may add one full pixel to each side if support is built in.
         // As of FreeType 2.8.1, an LCD filter is always used regardless of settings
@@ -146,17 +195,23 @@ impl FontContext {
         }
     }
 
     fn load_glyph(&self, font: &FontInstance, glyph: &GlyphKey) -> Option<FT_GlyphSlot> {
         debug_assert!(self.faces.contains_key(&font.font_key));
         let face = self.faces.get(&font.font_key).unwrap();
 
         let mut load_flags = FT_LOAD_DEFAULT;
-        let FontInstancePlatformOptions { hinting, .. } = font.platform_options.unwrap_or_default();
+        let FontInstancePlatformOptions { mut hinting, .. } = font.platform_options.unwrap_or_default();
+        // Disable hinting if there is a non-axis-aligned transform.
+        if font.flags.contains(FontInstanceFlags::SYNTHETIC_ITALICS) ||
+           ((font.transform.scale_x != 0.0 || font.transform.scale_y != 0.0) &&
+            (font.transform.skew_x != 0.0 || font.transform.skew_y != 0.0)) {
+            hinting = FontHinting::None;
+        }
         match (hinting, font.render_mode) {
             (FontHinting::None, _) => load_flags |= FT_LOAD_NO_HINTING,
             (FontHinting::Mono, _) => load_flags = FT_LOAD_TARGET_MONO,
             (FontHinting::Light, _) => load_flags = FT_LOAD_TARGET_LIGHT,
             (FontHinting::LCD, FontRenderMode::Subpixel) => {
                 load_flags = match font.subpx_dir {
                     SubpixelDirection::Vertical => FT_LOAD_TARGET_LCD_V,
                     _ => FT_LOAD_TARGET_LCD,
@@ -189,17 +244,20 @@ impl FontContext {
         let req_size = font.size.to_f64_px();
         let face_flags = unsafe { (*face.face).face_flags };
         let mut result = if (face_flags & (FT_FACE_FLAG_FIXED_SIZES as FT_Long)) != 0 &&
                             (face_flags & (FT_FACE_FLAG_SCALABLE as FT_Long)) == 0 &&
                             (load_flags & FT_LOAD_NO_BITMAP) == 0 {
             unsafe { FT_Set_Transform(face.face, ptr::null_mut(), ptr::null_mut()) };
             self.choose_bitmap_size(face.face, req_size * y_scale)
         } else {
-            let shape = font.transform.pre_scale(x_scale.recip() as f32, y_scale.recip() as f32);
+            let mut shape = font.transform.invert_scale(x_scale, y_scale);
+            if font.flags.contains(FontInstanceFlags::SYNTHETIC_ITALICS) {
+                shape = shape.synthesize_italics(OBLIQUE_SKEW_FACTOR);
+            };
             let mut ft_shape = FT_Matrix {
                 xx: (shape.scale_x * 65536.0) as FT_Fixed,
                 xy: (shape.skew_x * -65536.0) as FT_Fixed,
                 yx: (shape.skew_y * -65536.0) as FT_Fixed,
                 yy: (shape.scale_y * 65536.0) as FT_Fixed,
             };
             unsafe {
                 FT_Set_Transform(face.face, &mut ft_shape, ptr::null_mut());
@@ -300,50 +358,52 @@ impl FontContext {
         cbox
     }
 
     fn get_glyph_dimensions_impl(
         &self,
         slot: FT_GlyphSlot,
         font: &FontInstance,
         glyph: &GlyphKey,
-        scale_bitmaps: bool,
+        transform_bitmaps: bool,
     ) -> Option<GlyphDimensions> {
         let metrics = unsafe { &(*slot).metrics };
 
-        let advance = metrics.horiAdvance as f32 / 64.0;
+        let mut advance = metrics.horiAdvance as f32 / 64.0;
         match unsafe { (*slot).format } {
             FT_Glyph_Format::FT_GLYPH_FORMAT_BITMAP => {
-                let left = unsafe { (*slot).bitmap_left };
-                let top = unsafe { (*slot).bitmap_top };
-                let width = unsafe { (*slot).bitmap.width };
-                let height = unsafe { (*slot).bitmap.rows };
-                if scale_bitmaps {
+                let mut left = unsafe { (*slot).bitmap_left };
+                let mut top = unsafe { (*slot).bitmap_top };
+                let mut width = unsafe { (*slot).bitmap.width };
+                let mut height = unsafe { (*slot).bitmap.rows };
+                if transform_bitmaps {
                     let y_size = unsafe { (*(*(*slot).face).size).metrics.y_ppem };
                     let scale = font.size.to_f32_px() / y_size as f32;
                     let x0 = left as f32 * scale;
                     let x1 = width as f32 * scale + x0;
                     let y1 = top as f32 * scale;
                     let y0 = y1 - height as f32 * scale;
-                    Some(GlyphDimensions {
-                        left: x0.round() as i32,
-                        top: y1.round() as i32,
-                        width: (x1.ceil() - x0.floor()) as u32,
-                        height: (y1.ceil() - y0.floor()) as u32,
-                        advance: advance * scale,
-                    })
-                } else {
-                    Some(GlyphDimensions {
-                        left,
-                        top,
-                        width,
-                        height,
-                        advance,
-                    })
+                    left = x0.round() as i32;
+                    top = y1.round() as i32;
+                    width = (x1.ceil() - x0.floor()) as u32;
+                    height = (y1.ceil() - y0.floor()) as u32;
+                    advance *= scale;
+                    if font.flags.contains(FontInstanceFlags::SYNTHETIC_ITALICS) {
+                        let (skew_min, skew_max) = get_skew_bounds(top - height as i32, top);
+                        left += skew_min as i32;
+                        width += (skew_max - skew_min) as u32;
+                    }
                 }
+                Some(GlyphDimensions {
+                    left,
+                    top,
+                    width,
+                    height,
+                    advance,
+                })
             }
             FT_Glyph_Format::FT_GLYPH_FORMAT_OUTLINE => {
                 let cbox = self.get_bounding_box(slot, font, glyph);
                 Some(GlyphDimensions {
                     left: (cbox.xMin >> 6) as i32,
                     top: (cbox.yMax >> 6) as i32,
                     width: ((cbox.xMax - cbox.xMin) >> 6) as u32,
                     height: ((cbox.yMax - cbox.yMin) >> 6) as u32,
@@ -424,20 +484,16 @@ impl FontContext {
             let outline = &(*slot).outline;
             let mut cbox: FT_BBox = mem::uninitialized();
             FT_Outline_Get_CBox(outline, &mut cbox);
             FT_Outline_Translate(
                 outline,
                 dx - ((cbox.xMin + dx) & !63),
                 dy - ((cbox.yMin + dy) & !63),
             );
-
-            if font.flags.contains(FontInstanceFlags::SYNTHETIC_ITALICS) {
-                FT_GlyphSlot_Oblique(slot);
-            }
         }
 
         if font.render_mode == FontRenderMode::Subpixel {
             let FontInstancePlatformOptions { lcd_filter, .. } = font.platform_options.unwrap_or_default();
             let filter = match lcd_filter {
                 FontLCDFilter::None => FT_LcdFilter::FT_LCD_FILTER_NONE,
                 FontLCDFilter::Default => FT_LcdFilter::FT_LCD_FILTER_DEFAULT,
                 FontLCDFilter::Light => FT_LcdFilter::FT_LCD_FILTER_LIGHT,
@@ -509,42 +565,42 @@ impl FontContext {
             "Rasterizing {:?} as {:?} with dimensions {:?}",
             key,
             font.render_mode,
             dimensions
         );
 
         let bitmap = unsafe { &(*slot).bitmap };
         let pixel_mode = unsafe { mem::transmute(bitmap.pixel_mode as u32) };
-        let (actual_width, actual_height) = match pixel_mode {
+        let (mut actual_width, actual_height) = match pixel_mode {
             FT_Pixel_Mode::FT_PIXEL_MODE_LCD => {
                 assert!(bitmap.width % 3 == 0);
-                ((bitmap.width / 3) as i32, bitmap.rows as i32)
+                ((bitmap.width / 3) as usize, bitmap.rows as usize)
             }
             FT_Pixel_Mode::FT_PIXEL_MODE_LCD_V => {
                 assert!(bitmap.rows % 3 == 0);
-                (bitmap.width as i32, (bitmap.rows / 3) as i32)
+                (bitmap.width as usize, (bitmap.rows / 3) as usize)
             }
             FT_Pixel_Mode::FT_PIXEL_MODE_MONO |
             FT_Pixel_Mode::FT_PIXEL_MODE_GRAY |
             FT_Pixel_Mode::FT_PIXEL_MODE_BGRA => {
-                (bitmap.width as i32, bitmap.rows as i32)
+                (bitmap.width as usize, bitmap.rows as usize)
             }
             _ => panic!("Unsupported {:?}", pixel_mode),
         };
-        let mut final_buffer = vec![0; (actual_width * actual_height * 4) as usize];
+        let mut final_buffer = vec![0u8; actual_width * actual_height * 4];
 
         // Extract the final glyph from FT format into BGRA8 format, which is
         // what WR expects.
         let subpixel_bgr = font.flags.contains(FontInstanceFlags::SUBPIXEL_BGR);
         let mut src_row = bitmap.buffer;
         let mut dest: usize = 0;
         while dest < final_buffer.len() {
             let mut src = src_row;
-            let row_end = dest + actual_width as usize * 4;
+            let row_end = dest + actual_width * 4;
             match pixel_mode {
                 FT_Pixel_Mode::FT_PIXEL_MODE_MONO => {
                     while dest < row_end {
                         // Cast the byte to signed so that we can left shift each bit into
                         // the top bit, then right shift to fill out the bits with 0s or 1s.
                         let mut byte: i8 = unsafe { *src as i8 };
                         src = unsafe { src.offset(1) };
                         let byte_end = cmp::min(row_end, dest + 8 * 4);
@@ -608,20 +664,29 @@ impl FontContext {
                 }
                 _ => panic!("Unsupported {:?}", pixel_mode),
             }
             src_row = unsafe { src_row.offset(bitmap.pitch as isize) };
             dest = row_end;
         }
 
         match format {
+            FT_Glyph_Format::FT_GLYPH_FORMAT_BITMAP => {
+                if font.flags.contains(FontInstanceFlags::SYNTHETIC_ITALICS) {
+                    let (skew_buffer, skew_width, skew_left) =
+                        skew_bitmap(&final_buffer, actual_width, actual_height, left, top);
+                    final_buffer = skew_buffer;
+                    actual_width = skew_width;
+                    left = skew_left;
+                }
+            }
             FT_Glyph_Format::FT_GLYPH_FORMAT_OUTLINE => {
                 unsafe {
                     left += (*slot).bitmap_left;
-                    top += (*slot).bitmap_top - actual_height;
+                    top += (*slot).bitmap_top - height as i32;
                 }
             }
             _ => {}
         }
 
         let glyph_format = match (pixel_mode, format) {
             (FT_Pixel_Mode::FT_PIXEL_MODE_LCD, _) |
             (FT_Pixel_Mode::FT_PIXEL_MODE_LCD_V, _) => font.get_subpixel_glyph_format(),
--- a/gfx/webrender/src/platform/windows/font.rs
+++ b/gfx/webrender/src/platform/windows/font.rs
@@ -1,17 +1,17 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{FontInstanceFlags, FontKey, FontRenderMode};
 use api::{ColorU, GlyphDimensions, GlyphKey, SubpixelDirection};
 use dwrote;
 use gamma_lut::{ColorLut, GammaLut};
-use glyph_rasterizer::{FontInstance, GlyphFormat, RasterizedGlyph};
+use glyph_rasterizer::{FontInstance, FontTransform, GlyphFormat, RasterizedGlyph};
 use internal_types::FastHashMap;
 use std::collections::hash_map::Entry;
 use std::sync::Arc;
 
 lazy_static! {
     static ref DEFAULT_FONT_DESCRIPTOR: dwrote::FontDescriptor = dwrote::FontDescriptor {
         family_name: "Arial".to_owned(),
         weight: dwrote::FontWeight::Regular,
@@ -81,16 +81,19 @@ fn dwrite_render_mode(
 
 fn is_bitmap_font(font: &FontInstance) -> bool {
     // If bitmaps are requested, then treat as a bitmap font to disable transforms.
     // If mono AA is requested, let that take priority over using bitmaps.
     font.render_mode != FontRenderMode::Mono &&
         font.flags.contains(FontInstanceFlags::EMBEDDED_BITMAPS)
 }
 
+// Skew factor matching Gecko/DWrite.
+const OBLIQUE_SKEW_FACTOR: f32 = 0.3;
+
 impl FontContext {
     pub fn new() -> FontContext {
         // These are the default values we use in Gecko.
         // We use a gamma value of 2.3 for gdi fonts
         // TODO: Fetch this data from Gecko itself.
         let contrast = 1.0;
         let gamma = 1.8;
         let gdi_gamma = 2.3;
@@ -155,26 +158,20 @@ impl FontContext {
             println!("");
         }
     }
 
     fn get_font_face(
         &mut self,
         font: &FontInstance,
     ) -> &dwrote::FontFace {
-        if !font.flags.intersects(FontInstanceFlags::SYNTHETIC_BOLD | FontInstanceFlags::SYNTHETIC_ITALICS) {
+        if !font.flags.contains(FontInstanceFlags::SYNTHETIC_BOLD) {
             return self.fonts.get(&font.font_key).unwrap();
         }
-        let mut sims = dwrote::DWRITE_FONT_SIMULATIONS_NONE;
-        if font.flags.contains(FontInstanceFlags::SYNTHETIC_BOLD) {
-            sims = sims | dwrote::DWRITE_FONT_SIMULATIONS_BOLD;
-        }
-        if font.flags.contains(FontInstanceFlags::SYNTHETIC_ITALICS) {
-            sims = sims | dwrote::DWRITE_FONT_SIMULATIONS_OBLIQUE;
-        }
+        let sims = dwrote::DWRITE_FONT_SIMULATIONS_BOLD;
         match self.simulations.entry((font.font_key, sims)) {
             Entry::Occupied(entry) => entry.into_mut(),
             Entry::Vacant(entry) => {
                 let normal_face = self.fonts.get(&font.font_key).unwrap();
                 entry.insert(normal_face.create_font_face_with_simulations(sims))
             }
         }
     }
@@ -234,17 +231,30 @@ impl FontContext {
 
     pub fn get_glyph_dimensions(
         &mut self,
         font: &FontInstance,
         key: &GlyphKey,
     ) -> Option<GlyphDimensions> {
         let size = font.size.to_f32_px();
         let bitmaps = is_bitmap_font(font);
-        let analysis = self.create_glyph_analysis(font, key, size, None, bitmaps);
+        let transform = if font.flags.contains(FontInstanceFlags::SYNTHETIC_ITALICS) {
+            let shape = FontTransform::identity().synthesize_italics(OBLIQUE_SKEW_FACTOR);
+            Some(dwrote::DWRITE_MATRIX {
+                m11: shape.scale_x,
+                m12: shape.skew_y,
+                m21: shape.skew_x,
+                m22: shape.scale_y,
+                dx: 0.0,
+                dy: 0.0,
+            })
+        } else {
+            None
+        };
+        let analysis = self.create_glyph_analysis(font, key, size, transform, bitmaps);
 
         let texture_type = dwrite_texture_type(font.render_mode);
 
         let bounds = analysis.get_alpha_texture_bounds(texture_type);
 
         let width = (bounds.right - bounds.left) as u32;
         let height = (bounds.bottom - bounds.top) as u32;
 
@@ -339,29 +349,35 @@ impl FontContext {
     pub fn rasterize_glyph(
         &mut self,
         font: &FontInstance,
         key: &GlyphKey,
     ) -> Option<RasterizedGlyph> {
         let (.., y_scale) = font.transform.compute_scale().unwrap_or((1.0, 1.0));
         let size = (font.size.to_f64_px() * y_scale) as f32;
         let bitmaps = is_bitmap_font(font);
-        let transform = if bitmaps {
-            None
+        let (mut shape, (x_offset, y_offset)) = if bitmaps {
+            (FontTransform::identity(), (0.0, 0.0))
         } else {
-            let (x_offset, y_offset) = font.get_subpx_offset(key);
-            let shape = font.transform.pre_scale(y_scale.recip() as f32, y_scale.recip() as f32);
+            (font.transform.invert_scale(y_scale, y_scale), font.get_subpx_offset(key))
+        };
+        if font.flags.contains(FontInstanceFlags::SYNTHETIC_ITALICS) {
+            shape = shape.synthesize_italics(OBLIQUE_SKEW_FACTOR);
+        }
+        let transform = if !shape.is_identity() || (x_offset, y_offset) != (0.0, 0.0) {
             Some(dwrote::DWRITE_MATRIX {
                 m11: shape.scale_x,
                 m12: shape.skew_y,
                 m21: shape.skew_x,
                 m22: shape.scale_y,
                 dx: x_offset as f32,
                 dy: y_offset as f32,
             })
+        } else {
+            None
         };
 
         let analysis = self.create_glyph_analysis(font, key, size, transform, bitmaps);
         let texture_type = dwrite_texture_type(font.render_mode);
 
         let bounds = analysis.get_alpha_texture_bounds(texture_type);
         let width = (bounds.right - bounds.left) as u32;
         let height = (bounds.bottom - bounds.top) as u32;
--- a/gfx/webrender/src/prim_store.rs
+++ b/gfx/webrender/src/prim_store.rs
@@ -1,37 +1,40 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BorderRadius, BuiltDisplayList, ClipAndScrollInfo, ClipId, ClipMode, ColorF, ColorU};
-use api::{ComplexClipRegion, DeviceIntRect, DevicePoint, ExtendMode, FontRenderMode};
+use api::{DeviceIntRect, DevicePixelScale, DevicePoint};
+use api::{ComplexClipRegion, ExtendMode, FontRenderMode};
 use api::{GlyphInstance, GlyphKey, GradientStop, ImageKey, ImageRendering, ItemRange, ItemTag};
 use api::{LayerPoint, LayerRect, LayerSize, LayerToWorldTransform, LayerVector2D, LineOrientation};
 use api::{LineStyle, PipelineId, PremultipliedColorF, TileOffset, WorldToLayerTransform};
 use api::{YuvColorSpace, YuvFormat};
 use border::BorderCornerInstance;
 use clip_scroll_tree::{CoordinateSystemId, ClipScrollTree};
+use clip_scroll_node::ClipScrollNode;
 use clip::{ClipSource, ClipSourcesHandle, ClipStore};
 use frame_builder::PrimitiveContext;
 use glyph_rasterizer::{FontInstance, FontTransform};
 use internal_types::{FastHashMap};
 use gpu_cache::{GpuBlockData, GpuCache, GpuCacheAddress, GpuCacheHandle, GpuDataRequest,
                 ToGpuBlocks};
-use gpu_types::ClipScrollNodeData;
-use picture::{PictureKind, PicturePrimitive, RasterizationSpace};
+use gpu_types::{ClipChainRectIndex, ClipScrollNodeData};
+use picture::{PictureKind, PicturePrimitive};
 use profiler::FrameProfileCounters;
 use render_task::{ClipChain, ClipChainNode, ClipChainNodeIter, ClipWorkItem, RenderTask};
 use render_task::{RenderTaskId, RenderTaskTree};
 use renderer::{BLOCKS_PER_UV_RECT, MAX_VERTEX_TEXTURE_WIDTH};
 use resource_cache::{ImageProperties, ResourceCache};
 use scene::{ScenePipeline, SceneProperties};
-use std::{mem, u16, usize};
+use segment::SegmentBuilder;
+use std::{mem, usize};
 use std::rc::Rc;
-use util::{MatrixHelpers, calculate_screen_bounding_rect, extract_inner_rect_safe, pack_as_float};
+use util::{MatrixHelpers, calculate_screen_bounding_rect, pack_as_float};
 use util::recycle_vec;
 
 
 const MIN_BRUSH_SPLIT_AREA: f32 = 128.0 * 128.0;
 
 #[derive(Debug)]
 pub struct PrimitiveRun {
     pub base_prim_index: PrimitiveIndex,
@@ -178,16 +181,17 @@ pub struct PrimitiveMetadata {
     pub gpu_location: GpuCacheHandle,
     pub clip_task_id: Option<RenderTaskId>,
 
     // TODO(gw): In the future, we should just pull these
     //           directly from the DL item, instead of
     //           storing them here.
     pub local_rect: LayerRect,
     pub local_clip_rect: LayerRect,
+    pub clip_chain_rect_index: ClipChainRectIndex,
     pub is_backface_visible: bool,
     pub screen_rect: Option<DeviceIntRect>,
 
     /// A tag used to identify this primitive outside of WebRender. This is
     /// used for returning useful data during hit testing.
     pub tag: Option<ItemTag>,
 }
 
@@ -214,177 +218,87 @@ impl BrushKind {
     fn is_solid(&self) -> bool {
         match *self {
             BrushKind::Solid { .. } => true,
             _ => false,
         }
     }
 }
 
-#[derive(Debug, Copy, Clone)]
-#[repr(u32)]
-pub enum BrushAntiAliasMode {
-    Primitive = 0,
-    Segment = 1,
-}
-
-#[allow(dead_code)]
-#[derive(Debug, Copy, Clone)]
-#[repr(C)]
-pub enum BrushSegmentKind {
-    TopLeft = 0,
-    TopRight,
-    BottomRight,
-    BottomLeft,
-
-    TopMid,
-    MidRight,
-    BottomMid,
-    MidLeft,
-
-    Center,
+bitflags! {
+    /// Each bit of the edge AA mask is:
+    /// 0, when the edge of the primitive needs to be considered for AA
+    /// 1, when the edge of the segment needs to be considered for AA
+    ///
+    /// *Note*: the bit values have to match the shader logic in
+    /// `write_transform_vertex()` function.
+    pub struct EdgeAaSegmentMask: u8 {
+        const LEFT = 0x1;
+        const TOP = 0x2;
+        const RIGHT = 0x4;
+        const BOTTOM = 0x8;
+    }
 }
 
 #[derive(Debug)]
 pub struct BrushSegment {
     pub local_rect: LayerRect,
     pub clip_task_id: Option<RenderTaskId>,
+    pub may_need_clip_mask: bool,
+    pub edge_flags: EdgeAaSegmentMask,
 }
 
 impl BrushSegment {
-    fn new(
+    pub fn new(
         origin: LayerPoint,
         size: LayerSize,
+        may_need_clip_mask: bool,
+        edge_flags: EdgeAaSegmentMask,
     ) -> BrushSegment {
         BrushSegment {
             local_rect: LayerRect::new(origin, size),
             clip_task_id: None,
+            may_need_clip_mask,
+            edge_flags,
         }
     }
 }
 
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub enum BrushClipMaskKind {
+    Unknown,
+    Individual,
+    Global,
+}
+
 #[derive(Debug)]
 pub struct BrushSegmentDescriptor {
-    pub top_left_offset: LayerVector2D,
-    pub bottom_right_offset: LayerVector2D,
-    pub segments: [BrushSegment; 9],
-    pub enabled_segments: u16,
-    pub can_optimize_clip_mask: bool,
-}
-
-impl BrushSegmentDescriptor {
-    pub fn new(
-        outer_rect: &LayerRect,
-        inner_rect: &LayerRect,
-        valid_segments: Option<&[BrushSegmentKind]>,
-    ) -> BrushSegmentDescriptor {
-        let p0 = outer_rect.origin;
-        let p1 = inner_rect.origin;
-        let p2 = inner_rect.bottom_right();
-        let p3 = outer_rect.bottom_right();
-
-        let enabled_segments = match valid_segments {
-            Some(valid_segments) => {
-                valid_segments.iter().fold(
-                    0,
-                    |acc, segment| acc | 1 << *segment as u32
-                )
-            }
-            None => u16::MAX,
-        };
-
-        BrushSegmentDescriptor {
-            enabled_segments,
-            can_optimize_clip_mask: false,
-            top_left_offset: p1 - p0,
-            bottom_right_offset: p3 - p2,
-            segments: [
-                BrushSegment::new(
-                    LayerPoint::new(p0.x, p0.y),
-                    LayerSize::new(p1.x - p0.x, p1.y - p0.y),
-                ),
-                BrushSegment::new(
-                    LayerPoint::new(p2.x, p0.y),
-                    LayerSize::new(p3.x - p2.x, p1.y - p0.y),
-                ),
-                BrushSegment::new(
-                    LayerPoint::new(p2.x, p2.y),
-                    LayerSize::new(p3.x - p2.x, p3.y - p2.y),
-                ),
-                BrushSegment::new(
-                    LayerPoint::new(p0.x, p2.y),
-                    LayerSize::new(p1.x - p0.x, p3.y - p2.y),
-                ),
-                BrushSegment::new(
-                    LayerPoint::new(p1.x, p0.y),
-                    LayerSize::new(p2.x - p1.x, p1.y - p0.y),
-                ),
-                BrushSegment::new(
-                    LayerPoint::new(p2.x, p1.y),
-                    LayerSize::new(p3.x - p2.x, p2.y - p1.y),
-                ),
-                BrushSegment::new(
-                    LayerPoint::new(p1.x, p2.y),
-                    LayerSize::new(p2.x - p1.x, p3.y - p2.y),
-                ),
-                BrushSegment::new(
-                    LayerPoint::new(p0.x, p1.y),
-                    LayerSize::new(p1.x - p0.x, p2.y - p1.y),
-                ),
-                BrushSegment::new(
-                    LayerPoint::new(p1.x, p1.y),
-                    LayerSize::new(p2.x - p1.x, p2.y - p1.y),
-                ),
-            ],
-        }
-    }
+    pub segments: Vec<BrushSegment>,
+    pub clip_mask_kind: BrushClipMaskKind,
 }
 
 #[derive(Debug)]
 pub struct BrushPrimitive {
     pub kind: BrushKind,
-    pub segment_desc: Option<Box<BrushSegmentDescriptor>>,
-    pub aa_mode: BrushAntiAliasMode,
+    pub segment_desc: Option<BrushSegmentDescriptor>,
 }
 
 impl BrushPrimitive {
     pub fn new(
         kind: BrushKind,
-        segment_desc: Option<Box<BrushSegmentDescriptor>>,
-        aa_mode: BrushAntiAliasMode,
+        segment_desc: Option<BrushSegmentDescriptor>,
     ) -> BrushPrimitive {
         BrushPrimitive {
             kind,
             segment_desc,
-            aa_mode,
         }
     }
-}
 
-impl ToGpuBlocks for BrushPrimitive {
-    fn write_gpu_blocks(&self, mut request: GpuDataRequest) {
-        match self.segment_desc {
-            Some(ref segment_desc) => {
-                request.push([
-                    segment_desc.top_left_offset.x,
-                    segment_desc.top_left_offset.y,
-                    segment_desc.bottom_right_offset.x,
-                    segment_desc.bottom_right_offset.y,
-                ]);
-            }
-            None => {
-                request.push([0.0; 4]);
-            }
-        }
-        request.push([
-            self.aa_mode as u32 as f32,
-            0.0,
-            0.0,
-            0.0,
-        ]);
+    fn write_gpu_blocks(&self, request: &mut GpuDataRequest) {
+        // has to match VECS_PER_SPECIFIC_BRUSH
         match self.kind {
             BrushKind::Solid { color } => {
                 request.push(color.premultiplied());
             }
             BrushKind::Clear => {
                 // Opaque black with operator dest out
                 request.push(PremultipliedColorF::BLACK);
             }
@@ -425,18 +339,18 @@ impl ToGpuBlocks for BrushPrimitive {
 #[repr(C)]
 pub struct LinePrimitive {
     pub color: PremultipliedColorF,
     pub wavy_line_thickness: f32,
     pub style: LineStyle,
     pub orientation: LineOrientation,
 }
 
-impl ToGpuBlocks for LinePrimitive {
-    fn write_gpu_blocks(&self, mut request: GpuDataRequest) {
+impl LinePrimitive {
+    fn write_gpu_blocks(&self, request: &mut GpuDataRequest) {
         request.push(self.color);
         request.push([
             self.wavy_line_thickness,
             pack_as_float(self.style as u32),
             pack_as_float(self.orientation as u32),
             0.0,
         ]);
     }
@@ -741,46 +655,44 @@ pub struct TextRunPrimitiveCpu {
     pub glyph_keys: Vec<GlyphKey>,
     pub glyph_gpu_blocks: Vec<GpuBlockData>,
     pub shadow_color: ColorU,
 }
 
 impl TextRunPrimitiveCpu {
     pub fn get_font(
         &self,
-        device_pixel_ratio: f32,
-        transform: &LayerToWorldTransform,
-        rasterization_kind: RasterizationSpace,
+        device_pixel_scale: DevicePixelScale,
+        transform: Option<&LayerToWorldTransform>,
     ) -> FontInstance {
         let mut font = self.font.clone();
-        font.size = font.size.scale_by(device_pixel_ratio);
-        if rasterization_kind == RasterizationSpace::Screen {
+        font.size = font.size.scale_by(device_pixel_scale.0);
+        if let Some(transform) = transform {
             if transform.has_perspective_component() || !transform.has_2d_inverse() {
                 font.render_mode = font.render_mode.limit_by(FontRenderMode::Alpha);
             } else {
                 font.transform = FontTransform::from(transform).quantize();
             }
         }
         font
     }
 
     pub fn is_shadow(&self) -> bool {
         self.shadow_color.a != 0
     }
 
     fn prepare_for_render(
         &mut self,
         resource_cache: &mut ResourceCache,
-        device_pixel_ratio: f32,
-        transform: &LayerToWorldTransform,
+        device_pixel_scale: DevicePixelScale,
+        transform: Option<&LayerToWorldTransform>,
         display_list: &BuiltDisplayList,
         gpu_cache: &mut GpuCache,
-        rasterization_kind: RasterizationSpace,
     ) {
-        let font = self.get_font(device_pixel_ratio, transform, rasterization_kind);
+        let font = self.get_font(device_pixel_scale, transform);
 
         // Cache the glyph positions, if not in the cache already.
         // TODO(gw): In the future, remove `glyph_instances`
         //           completely, and just reference the glyphs
         //           directly from the display list.
         if self.glyph_keys.is_empty() {
             let subpx_dir = font.subpx_dir.limit_by(font.render_mode);
             let src_glyphs = display_list.get(self.glyph_range);
@@ -1093,16 +1005,17 @@ impl PrimitiveStore {
         let prim_index = self.cpu_metadata.len();
 
         let base_metadata = PrimitiveMetadata {
             clip_sources,
             gpu_location: GpuCacheHandle::new(),
             clip_task_id: None,
             local_rect: *local_rect,
             local_clip_rect: *local_clip_rect,
+            clip_chain_rect_index: ClipChainRectIndex(0),
             is_backface_visible: is_backface_visible,
             screen_rect: None,
             tag,
             opacity: PrimitiveOpacity::translucent(),
             prim_kind: PrimitiveKind::Brush,
             cpu_prim_index: SpecificPrimitiveIndex(0),
         };
 
@@ -1264,23 +1177,30 @@ impl PrimitiveStore {
                         metadata.screen_rect.as_ref().expect("bug: trying to draw an off-screen picture!?"),
                         child_tasks,
                         parent_tasks,
                     );
             }
             PrimitiveKind::TextRun => {
                 let pic = &self.cpu_pictures[pic_index.0];
                 let text = &mut self.cpu_text_runs[metadata.cpu_prim_index.0];
+                // The transform only makes sense for screen space rasterization
+                let transform = match pic.kind {
+                    PictureKind::BoxShadow { .. } => None,
+                    PictureKind::TextShadow { .. } => None,
+                    PictureKind::Image { .. } => {
+                        Some(&prim_context.scroll_node.world_content_transform)
+                    },
+                };
                 text.prepare_for_render(
                     resource_cache,
-                    prim_context.device_pixel_ratio,
-                    &prim_context.scroll_node.world_content_transform,
+                    prim_context.device_pixel_scale,
+                    transform,
                     prim_context.display_list,
                     gpu_cache,
-                    pic.rasterization_kind,
                 );
             }
             PrimitiveKind::Image => {
                 let image_cpu = &mut self.cpu_images[metadata.cpu_prim_index.0];
 
                 resource_cache.request_image(
                     image_cpu.image_key,
                     image_cpu.image_rendering,
@@ -1317,23 +1237,39 @@ impl PrimitiveStore {
             PrimitiveKind::Brush |
             PrimitiveKind::AlignedGradient |
             PrimitiveKind::AngleGradient |
             PrimitiveKind::RadialGradient => {}
         }
 
         // Mark this GPU resource as required for this frame.
         if let Some(mut request) = gpu_cache.request(&mut metadata.gpu_location) {
+            // has to match VECS_PER_BRUSH_PRIM
             request.push(metadata.local_rect);
             request.push(metadata.local_clip_rect);
 
             match metadata.prim_kind {
                 PrimitiveKind::Line => {
                     let line = &self.cpu_lines[metadata.cpu_prim_index.0];
-                    line.write_gpu_blocks(request);
+                    line.write_gpu_blocks(&mut request);
+
+                    // TODO(gw): This is a bit of a hack. The Line type
+                    //           is drawn by the brush_line shader, so the
+                    //           layout here needs to conform to the same
+                    //           BrushPrimitive layout. We should tidy this
+                    //           up in the future so it's enforced that these
+                    //           types use a shared function to write out the
+                    //           GPU blocks...
+                    request.push(metadata.local_rect);
+                    request.push([
+                        EdgeAaSegmentMask::empty().bits() as f32,
+                        0.0,
+                        0.0,
+                        0.0
+                    ]);
                 }
                 PrimitiveKind::Border => {
                     let border = &self.cpu_borders[metadata.cpu_prim_index.0];
                     border.write_gpu_blocks(request);
                 }
                 PrimitiveKind::Image => {
                     let image = &self.cpu_images[metadata.cpu_prim_index.0];
                     image.write_gpu_blocks(request);
@@ -1354,167 +1290,229 @@ impl PrimitiveStore {
                     let gradient = &self.cpu_radial_gradients[metadata.cpu_prim_index.0];
                     gradient.build_gpu_blocks_for_angle_radial(prim_context.display_list, request);
                 }
                 PrimitiveKind::TextRun => {
                     let text = &self.cpu_text_runs[metadata.cpu_prim_index.0];
                     text.write_gpu_blocks(&mut request);
                 }
                 PrimitiveKind::Picture => {
+                    self.cpu_pictures[metadata.cpu_prim_index.0]
+                        .write_gpu_blocks(&mut request);
+
                     // TODO(gw): This is a bit of a hack. The Picture type
                     //           is drawn by the brush_image shader, so the
                     //           layout here needs to conform to the same
                     //           BrushPrimitive layout. We should tidy this
                     //           up in the future so it's enforced that these
                     //           types use a shared function to write out the
                     //           GPU blocks...
-                    request.push([0.0; 4]);
+                    request.push(metadata.local_rect);
                     request.push([
-                        BrushAntiAliasMode::Primitive as u32 as f32,
+                        EdgeAaSegmentMask::empty().bits() as f32,
                         0.0,
                         0.0,
-                        0.0,
+                        0.0
                     ]);
-
-                    self.cpu_pictures[metadata.cpu_prim_index.0]
-                        .write_gpu_blocks(&mut request);
                 }
                 PrimitiveKind::Brush => {
                     let brush = &self.cpu_brushes[metadata.cpu_prim_index.0];
-                    brush.write_gpu_blocks(request);
+                    brush.write_gpu_blocks(&mut request);
+                    match brush.segment_desc {
+                        Some(ref segment_desc) => {
+                            for segment in &segment_desc.segments {
+                                // has to match VECS_PER_SEGMENT
+                                request.push(segment.local_rect);
+                                request.push([
+                                    segment.edge_flags.bits() as f32,
+                                    0.0,
+                                    0.0,
+                                    0.0
+                                ]);
+                            }
+                        }
+                        None => {
+                            request.push(metadata.local_rect);
+                            request.push([
+                                EdgeAaSegmentMask::empty().bits() as f32,
+                                0.0,
+                                0.0,
+                                0.0
+                            ]);
+                        }
+                    }
                 }
             }
         }
     }
 
-    fn write_brush_nine_patch_segment_description(
+    fn write_brush_segment_description(
         &mut self,
         prim_index: PrimitiveIndex,
         prim_context: &PrimitiveContext,
         clip_store: &mut ClipStore,
         node_data: &[ClipScrollNodeData],
         clips: &Vec<ClipWorkItem>,
     ) {
         debug_assert!(self.cpu_metadata[prim_index.0].prim_kind == PrimitiveKind::Brush);
 
-        if clips.len() != 1 {
-            return;
-        }
-
-        let clip_item = clips.first().unwrap();
-        if clip_item.coordinate_system_id != prim_context.scroll_node.coordinate_system_id {
-            return;
-        }
-
         let metadata = &self.cpu_metadata[prim_index.0];
         let brush = &mut self.cpu_brushes[metadata.cpu_prim_index.0];
-        if brush.segment_desc.is_some() {
-            return;
-        }
-        if !brush.kind.is_solid() {
-            return;
-        }
-        if metadata.local_rect.size.area() <= MIN_BRUSH_SPLIT_AREA {
-            return;
-        }
 
-        let local_clips = clip_store.get_opt(&clip_item.clip_sources).expect("bug");
-        let mut selected_clip = None;
-        for &(ref clip, _) in &local_clips.clips {
-            match *clip {
-                ClipSource::RoundedRectangle(rect, radii, ClipMode::Clip) => {
-                    if selected_clip.is_some() {
-                        selected_clip = None;
-                        break;
-                    }
-                    selected_clip = Some((rect, radii, clip_item.scroll_node_data_index));
+        match brush.segment_desc {
+            Some(ref segment_desc) => {
+                // If we already have a segment descriptor, only run through the
+                // clips list if we haven't already determined the mask kind.
+                if segment_desc.clip_mask_kind != BrushClipMaskKind::Unknown {
+                    return;
                 }
-                ClipSource::Rectangle(..) => {}
-                ClipSource::RoundedRectangle(_, _, ClipMode::ClipOut) |
-                ClipSource::BorderCorner(..) |
-                ClipSource::Image(..) => {
-                    selected_clip = None;
-                    break;
+            }
+            None => {
+                // If no segment descriptor built yet, see if it is a brush
+                // type that wants to be segmented.
+                if !brush.kind.is_solid() {
+                    return;
+                }
+                if metadata.local_rect.size.area() <= MIN_BRUSH_SPLIT_AREA {
+                    return;
                 }
             }
         }
 
-        if let Some((rect, radii, clip_scroll_node_data_index)) = selected_clip {
-            // If the scroll node transforms are different between the clip
-            // node and the primitive, we need to get the clip rect in the
-            // local space of the primitive, in order to generate correct
-            // local segments.
-            let local_clip_rect = if clip_scroll_node_data_index == prim_context.scroll_node.node_data_index {
-                rect
-            } else {
-                let clip_transform_data = &node_data[clip_scroll_node_data_index.0 as usize];
-                let prim_transform = &prim_context.scroll_node.world_content_transform;
+        let mut segment_builder = SegmentBuilder::new(
+            metadata.local_rect,
+            metadata.local_clip_rect
+        );
+
+        // If true, we need a clip mask for the entire primitive. This
+        // is either because we don't handle segmenting this clip source,
+        // or we have a clip source from a different coordinate system.
+        let mut clip_mask_kind = BrushClipMaskKind::Individual;
+
+        // Segment the primitive on all the local-space clip sources
+        // that we can.
+        for clip_item in clips {
+            if clip_item.coordinate_system_id != prim_context.scroll_node.coordinate_system_id {
+                clip_mask_kind = BrushClipMaskKind::Global;
+                continue;
+            }
+
+            let local_clips = clip_store.get_opt(&clip_item.clip_sources).expect("bug");
+
+            for &(ref clip, _) in &local_clips.clips {
+                let (local_clip_rect, radius, mode) = match *clip {
+                    ClipSource::RoundedRectangle(rect, radii, clip_mode) => {
+                        (rect, Some(radii), clip_mode)
+                    }
+                    ClipSource::Rectangle(rect) => {
+                        (rect, None, ClipMode::Clip)
+                    }
+                    ClipSource::BorderCorner(..) |
+                    ClipSource::Image(..) => {
+                        // TODO(gw): We can easily extend the segment builder
+                        //           to support these clip sources in the
+                        //           future, but they are rarely used.
+                        clip_mask_kind = BrushClipMaskKind::Global;
+                        continue;
+                    }
+                };
 
-                let relative_transform = prim_transform
-                    .inverse()
-                    .unwrap_or(WorldToLayerTransform::identity())
-                    .pre_mul(&clip_transform_data.transform);
+                // If the scroll node transforms are different between the clip
+                // node and the primitive, we need to get the clip rect in the
+                // local space of the primitive, in order to generate correct
+                // local segments.
+                let local_clip_rect = if clip_item.scroll_node_data_index == prim_context.scroll_node.node_data_index {
+                    local_clip_rect
+                } else {
+                    let clip_transform_data = &node_data[clip_item.scroll_node_data_index.0 as usize];
+                    let prim_transform = &prim_context.scroll_node.world_content_transform;
+
+                    let relative_transform = prim_transform
+                        .inverse()
+                        .unwrap_or(WorldToLayerTransform::identity())
+                        .pre_mul(&clip_transform_data.transform);
+
+                    relative_transform.transform_rect(&local_clip_rect)
+                };
+
+                segment_builder.push_rect(
+                    local_clip_rect,
+                    radius,
+                    mode
+                );
+            }
+        }
 
-                relative_transform.transform_rect(&rect)
-            };
-            brush.segment_desc = create_nine_patch(
-                &metadata.local_rect,
-                &local_clip_rect,
-                &radii
-            );
+        match brush.segment_desc {
+            Some(ref mut segment_desc) => {
+                segment_desc.clip_mask_kind = clip_mask_kind;
+            }
+            None => {
+                // TODO(gw): We can probably make the allocation
+                //           patterns of this and the segment
+                //           builder significantly better, by
+                //           retaining it across primitives.
+                let mut segments = Vec::new();
+
+                segment_builder.build(|segment| {
+                    segments.push(
+                        BrushSegment::new(
+                            segment.rect.origin,
+                            segment.rect.size,
+                            segment.has_mask,
+                            segment.edge_flags,
+                        ),
+                    );
+                });
+
+                brush.segment_desc = Some(BrushSegmentDescriptor {
+                    segments,
+                    clip_mask_kind,
+                });
+            }
         }
     }
 
-    fn update_nine_patch_clip_task_for_brush(
+    fn update_clip_task_for_brush(
         &mut self,
         prim_context: &PrimitiveContext,
         prim_index: PrimitiveIndex,
         render_tasks: &mut RenderTaskTree,
         clip_store: &mut ClipStore,
         tasks: &mut Vec<RenderTaskId>,
         node_data: &[ClipScrollNodeData],
         clips: &Vec<ClipWorkItem>,
         combined_outer_rect: &DeviceIntRect,
     ) -> bool {
         if self.cpu_metadata[prim_index.0].prim_kind != PrimitiveKind::Brush {
             return false;
         }
 
-        self.write_brush_nine_patch_segment_description(
+        self.write_brush_segment_description(
             prim_index,
             prim_context,
             clip_store,
             node_data,
             clips
         );
 
         let metadata = &self.cpu_metadata[prim_index.0];
         let brush = &mut self.cpu_brushes[metadata.cpu_prim_index.0];
         let segment_desc = match brush.segment_desc {
             Some(ref mut description) => description,
             None => return false,
         };
-
-        let enabled_segments = segment_desc.enabled_segments;
-        let can_optimize_clip_mask = segment_desc.can_optimize_clip_mask;
+        let clip_mask_kind = segment_desc.clip_mask_kind;
 
-        for (i, segment) in segment_desc.segments.iter_mut().enumerate() {
-            // We only build clips for the corners. The ordering of the
-            // BrushSegmentKind enum is such that corners come first, then
-            // edges, then inner.
-            let segment_enabled = ((1 << i) & enabled_segments) != 0;
-            let create_clip_task =
-               segment_enabled &&
-               (!can_optimize_clip_mask || i <= BrushSegmentKind::BottomLeft as usize);
-
-            segment.clip_task_id = if create_clip_task {
+        for segment in &mut segment_desc.segments {
+            segment.clip_task_id = if segment.may_need_clip_mask || clip_mask_kind == BrushClipMaskKind::Global {
                 let segment_screen_rect = calculate_screen_bounding_rect(
                     &prim_context.scroll_node.world_content_transform,
                     &segment.local_rect,
-                    prim_context.device_pixel_ratio
+                    prim_context.device_pixel_scale,
                 );
 
                 combined_outer_rect.intersection(&segment_screen_rect).map(|bounds| {
                     let clip_task = RenderTask::new_mask(
                         None,
                         bounds,
                         clips.clone(),
                         prim_context.scroll_node.coordinate_system_id,
@@ -1565,29 +1563,35 @@ impl PrimitiveStore {
         let prim_coordinate_system_id = prim_context.scroll_node.coordinate_system_id;
         let transform = &prim_context.scroll_node.world_content_transform;
         let extra_clip =  {
             let metadata = &self.cpu_metadata[prim_index.0];
             let prim_clips = clip_store.get_mut(&metadata.clip_sources);
             if prim_clips.has_clips() {
                 prim_clips.update(gpu_cache, resource_cache);
                 let (screen_inner_rect, screen_outer_rect) =
-                    prim_clips.get_screen_bounds(transform, prim_context.device_pixel_ratio);
+                    prim_clips.get_screen_bounds(transform, prim_context.device_pixel_scale);
 
                 if let Some(outer) = screen_outer_rect {
                     combined_outer_rect = combined_outer_rect.and_then(|r| r.intersection(&outer));
                 }
 
                 Some(Rc::new(ClipChainNode {
                     work_item: ClipWorkItem {
                         scroll_node_data_index: prim_context.scroll_node.node_data_index,
                         clip_sources: metadata.clip_sources.weak(),
                         coordinate_system_id: prim_coordinate_system_id,
                     },
+                    // The local_clip_rect a property of ClipChain nodes that are ClipScrollNodes.
+                    // It's used to calculate a local clipping rectangle before we reach this
+                    // point, so we can set it to zero here. It should be unused from this point
+                    // on.
+                    local_clip_rect: LayerRect::zero(),
                     screen_inner_rect,
+                    screen_outer_rect: screen_outer_rect.unwrap_or(prim_screen_rect),
                     combined_outer_screen_rect:
                         combined_outer_rect.unwrap_or_else(DeviceIntRect::zero),
                     combined_inner_screen_rect: DeviceIntRect::zero(),
                     prev: None,
                 }))
             } else {
                 None
             }
@@ -1628,18 +1632,18 @@ impl PrimitiveStore {
             // optimization of the empty mask.
             combined_inner_rect = DeviceIntRect::zero();
         }
 
         if combined_inner_rect.contains_rect(&prim_screen_rect) {
            return true;
         }
 
-        // First try to  render this primitive's mask using optimized nine-patch brush rendering.
-        if self.update_nine_patch_clip_task_for_brush(
+        // First try to  render this primitive's mask using optimized brush rendering.
+        if self.update_clip_task_for_brush(
             prim_context,
             prim_index,
             render_tasks,
             clip_store,
             tasks,
             node_data,
             &clips,
             &combined_outer_rect,
@@ -1672,17 +1676,19 @@ impl PrimitiveStore {
         clip_scroll_tree: &ClipScrollTree,
         pipelines: &FastHashMap<PipelineId, ScenePipeline>,
         perform_culling: bool,
         parent_tasks: &mut Vec<RenderTaskId>,
         scene_properties: &SceneProperties,
         profile_counters: &mut FrameProfileCounters,
         pic_index: SpecificPrimitiveIndex,
         screen_rect: &DeviceIntRect,
+        clip_chain_rect_index: ClipChainRectIndex,
         node_data: &[ClipScrollNodeData],
+        local_rects: &mut Vec<LayerRect>,
     ) -> Option<LayerRect> {
         // Reset the visibility of this primitive.
         // Do some basic checks first, that can early out
         // without even knowing the local rect.
         let (cpu_prim_index, dependencies, cull_children, may_need_clip_mask) = {
             let metadata = &mut self.cpu_metadata[prim_index.0];
             metadata.screen_rect = None;
 
@@ -1740,16 +1746,17 @@ impl PrimitiveStore {
                 cull_children,
                 &mut child_tasks,
                 profile_counters,
                 rfid,
                 scene_properties,
                 cpu_prim_index,
                 screen_rect,
                 node_data,
+                local_rects,
             );
 
             let metadata = &mut self.cpu_metadata[prim_index.0];
 
             // Restore the dependencies (borrow check dance)
             let pic = &mut self.cpu_pictures[cpu_prim_index.0];
             pic.runs = dependencies;
 
@@ -1762,36 +1769,41 @@ impl PrimitiveStore {
         let (local_rect, unclipped_device_rect) = {
             let metadata = &mut self.cpu_metadata[prim_index.0];
             if metadata.local_rect.size.width <= 0.0 ||
                metadata.local_rect.size.height <= 0.0 {
                 warn!("invalid primitive rect {:?}", metadata.local_rect);
                 return None;
             }
 
-            let local_rect = metadata.local_rect.intersection(&metadata.local_clip_rect);
+            let local_rect = metadata.local_clip_rect.intersection(&metadata.local_rect);
             let local_rect = match local_rect {
                 Some(local_rect) => local_rect,
                 None if perform_culling => return None,
                 None => LayerRect::zero(),
             };
 
             let screen_bounding_rect = calculate_screen_bounding_rect(
                 &prim_context.scroll_node.world_content_transform,
                 &local_rect,
-                prim_context.device_pixel_ratio
+                prim_context.device_pixel_scale,
             );
 
-            let clip_bounds = &prim_context.clip_node.combined_clip_outer_bounds;
-            metadata.screen_rect = screen_bounding_rect.intersection(clip_bounds);
+            let clip_bounds = match prim_context.clip_node.clip_chain_node {
+                Some(ref node) => node.combined_outer_screen_rect,
+                None => *screen_rect,
+            };
+            metadata.screen_rect = screen_bounding_rect.intersection(&clip_bounds);
 
             if metadata.screen_rect.is_none() && perform_culling {
                 return None;
             }
 
+            metadata.clip_chain_rect_index = clip_chain_rect_index;
+
             (local_rect, screen_bounding_rect)
         };
 
         if perform_culling && may_need_clip_mask && !self.update_clip_task(
             prim_index,
             prim_context,
             &unclipped_device_rect,
             screen_rect,
@@ -1841,16 +1853,17 @@ impl PrimitiveStore {
         perform_culling: bool,
         parent_tasks: &mut Vec<RenderTaskId>,
         profile_counters: &mut FrameProfileCounters,
         original_reference_frame_id: Option<ClipId>,
         scene_properties: &SceneProperties,
         pic_index: SpecificPrimitiveIndex,
         screen_rect: &DeviceIntRect,
         node_data: &[ClipScrollNodeData],
+        local_rects: &mut Vec<LayerRect>,
     ) -> PrimitiveRunLocalRect {
         let mut result = PrimitiveRunLocalRect {
             local_rect_in_actual_parent_space: LayerRect::zero(),
             local_rect_in_original_parent_space: LayerRect::zero(),
         };
 
         for run in runs {
             // TODO(gw): Perhaps we can restructure this to not need to create
@@ -1884,22 +1897,38 @@ impl PrimitiveStore {
                 });
 
             let display_list = &pipelines
                 .get(&pipeline_id)
                 .expect("No display list?")
                 .display_list;
 
             let child_prim_context = PrimitiveContext::new(
-                parent_prim_context.device_pixel_ratio,
+                parent_prim_context.device_pixel_scale,
                 display_list,
                 clip_node,
                 scroll_node,
             );
 
+
+            let clip_chain_rect = match perform_culling {
+                true => get_local_clip_rect_for_nodes(scroll_node, clip_node),
+                false => None,
+            };
+
+            let clip_chain_rect_index = match clip_chain_rect {
+                Some(rect) if rect.is_empty() => continue,
+                Some(rect) => {
+                    local_rects.push(rect);
+                    ClipChainRectIndex(local_rects.len() - 1)
+                }
+                None => ClipChainRectIndex(0), // This is no clipping.
+            };
+
+
             for i in 0 .. run.count {
                 let prim_index = PrimitiveIndex(run.base_prim_index.0 + i);
 
                 if let Some(prim_local_rect) = self.prepare_prim_for_render(
                     prim_index,
                     &child_prim_context,
                     resource_cache,
                     gpu_cache,
@@ -1908,17 +1937,19 @@ impl PrimitiveStore {
                     clip_scroll_tree,
                     pipelines,
                     perform_culling,
                     parent_tasks,
                     scene_properties,
                     profile_counters,
                     pic_index,
                     screen_rect,
+                    clip_chain_rect_index,
                     node_data,
+                    local_rects,
                 ) {
                     profile_counters.visible_primitives.inc();
 
                     if let Some(ref matrix) = original_relative_transform {
                         let bounds = matrix.transform_rect(&prim_local_rect);
                         result.local_rect_in_original_parent_space =
                             result.local_rect_in_original_parent_space.union(&bounds);
                     }
@@ -1956,33 +1987,16 @@ impl InsideTest<ComplexClipRegion> for C
             clip.radii.top_right.height >= self.radii.top_right.height - delta_top &&
             clip.radii.bottom_left.width >= self.radii.bottom_left.width - delta_left &&
             clip.radii.bottom_left.height >= self.radii.bottom_left.height - delta_bottom &&
             clip.radii.bottom_right.width >= self.radii.bottom_right.width - delta_right &&
             clip.radii.bottom_right.height >= self.radii.bottom_right.height - delta_bottom
     }
 }
 
-fn create_nine_patch(
-    local_rect: &LayerRect,
-    local_clip_rect: &LayerRect,
-    radii: &BorderRadius
-) -> Option<Box<BrushSegmentDescriptor>> {
-    extract_inner_rect_safe(local_clip_rect, radii).map(|inner| {
-        let mut desc = BrushSegmentDescriptor::new(
-            local_rect,
-            &inner,
-            None,
-        );
-        desc.can_optimize_clip_mask = true;
-
-        Box::new(desc)
-    })
-}
-
 fn convert_clip_chain_to_clip_vector(
     clip_chain: ClipChain,
     extra_clip: ClipChain,
     combined_outer_rect: &DeviceIntRect,
     combined_inner_rect: &mut DeviceIntRect,
 ) -> Vec<ClipWorkItem> {
     // Filter out all the clip instances that don't contribute to the result.
     ClipChainNodeIter { current: extra_clip }
@@ -2002,8 +2016,34 @@ fn convert_clip_chain_to_clip_vector(
             } else {
                 DeviceIntRect::zero()
             };
 
             Some(node.work_item.clone())
         })
         .collect()
 }
+
+fn get_local_clip_rect_for_nodes(
+    scroll_node: &ClipScrollNode,
+    clip_node: &ClipScrollNode,
+) -> Option<LayerRect> {
+    let local_rect = ClipChainNodeIter { current: clip_node.clip_chain_node.clone() }.fold(
+        None,
+        |combined_local_clip_rect: Option<LayerRect>, node| {
+            if node.work_item.coordinate_system_id != scroll_node.coordinate_system_id {
+                return combined_local_clip_rect;
+            }
+
+            Some(match combined_local_clip_rect {
+                Some(combined_rect) =>
+                    combined_rect.intersection(&node.local_clip_rect).unwrap_or_else(LayerRect::zero),
+                None => node.local_clip_rect,
+            })
+        }
+    );
+
+    match local_rect {
+        Some(local_rect) =>
+            Some(scroll_node.coordinate_system_relative_transform.unapply(&local_rect)),
+        None => None,
+    }
+}
--- a/gfx/webrender/src/profiler.rs
+++ b/gfx/webrender/src/profiler.rs
@@ -309,30 +309,26 @@ impl FrameProfileCounters {
             visible_primitives: IntProfileCounter::new("Visible Primitives"),
         }
     }
 }
 
 #[derive(Clone)]
 pub struct TextureCacheProfileCounters {
     pub pages_a8_linear: ResourceProfileCounter,
-    pub pages_rgb8_linear: ResourceProfileCounter,
     pub pages_rgba8_linear: ResourceProfileCounter,
     pub pages_rgba8_nearest: ResourceProfileCounter,
-    pub pages_rg8_linear: ResourceProfileCounter,
 }
 
 impl TextureCacheProfileCounters {
     pub fn new() -> Self {
         TextureCacheProfileCounters {
             pages_a8_linear: ResourceProfileCounter::new("Texture A8 cached pages"),
-            pages_rgb8_linear: ResourceProfileCounter::new("Texture RGB8 cached pages"),
             pages_rgba8_linear: ResourceProfileCounter::new("Texture RGBA8 cached pages (L)"),
             pages_rgba8_nearest: ResourceProfileCounter::new("Texture RGBA8 cached pages (N)"),
-            pages_rg8_linear: ResourceProfileCounter::new("Texture RG8 cached pages"),
         }
     }
 }
 
 #[derive(Clone)]
 pub struct GpuCacheProfileCounters {
     pub allocated_rows: IntProfileCounter,
     pub allocated_blocks: IntProfileCounter,
@@ -993,20 +989,18 @@ impl Profiler {
             debug_renderer,
             true,
             &mut self.draw_state
         );
 
         Profiler::draw_counters(
             &[
                 &backend_profile.resources.texture_cache.pages_a8_linear,
-                &backend_profile.resources.texture_cache.pages_rgb8_linear,
                 &backend_profile.resources.texture_cache.pages_rgba8_linear,
                 &backend_profile.resources.texture_cache.pages_rgba8_nearest,
-                &backend_profile.resources.texture_cache.pages_rg8_linear,
                 &backend_profile.ipc.display_lists,
             ],
             debug_renderer,
             true,
             &mut self.draw_state
         );
 
         Profiler::draw_counters(
--- a/gfx/webrender/src/render_backend.rs
+++ b/gfx/webrender/src/render_backend.rs
@@ -1,52 +1,76 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{ApiMsg, BlobImageRenderer, BuiltDisplayList, DebugCommand, DeviceIntPoint};
 #[cfg(feature = "debugger")]
 use api::{BuiltDisplayListIter, SpecificDisplayItem};
-use api::{DeviceUintPoint, DeviceUintRect, DeviceUintSize};
+use api::{DevicePixelScale, DeviceUintPoint, DeviceUintRect, DeviceUintSize};
 use api::{DocumentId, DocumentLayer, DocumentMsg};
-use api::{IdNamespace, LayerPoint, PipelineId, RenderNotifier};
+use api::{IdNamespace, PipelineId, RenderNotifier};
 use api::channel::{MsgReceiver, PayloadReceiver, PayloadReceiverHelperMethods};
 use api::channel::{PayloadSender, PayloadSenderHelperMethods};
 #[cfg(feature = "debugger")]
 use debug_server;
 use frame::FrameContext;
 use frame_builder::{FrameBuilder, FrameBuilderConfig};
 use gpu_cache::GpuCache;
 use internal_types::{DebugOutput, FastHashMap, FastHashSet, RenderedDocument, ResultMsg};
+#[cfg(feature = "capture")]
+use internal_types::ExternalCaptureImage;
 use profiler::{BackendProfileCounters, ResourceProfileCounters};
 use rayon::ThreadPool;
 use record::ApiRecordingReceiver;
 use resource_cache::ResourceCache;
+#[cfg(feature = "capture")]
+use resource_cache::PlainResources;
 use scene::Scene;
+#[cfg(feature = "serialize")]
+use serde::{Serialize, Serializer};
 #[cfg(feature = "debugger")]
 use serde_json;
+#[cfg(feature = "capture")]
+use std::path::PathBuf;
 use std::sync::atomic::{ATOMIC_USIZE_INIT, AtomicUsize, Ordering};
 use std::sync::Arc;
 use std::sync::mpsc::Sender;
 use std::u32;
 use texture_cache::TextureCache;
 use time::precise_time_ns;
 
-struct Document {
-    scene: Scene,
-    frame_ctx: FrameContext,
-    // the `Option` here is only to deal with borrow checker
-    frame_builder: Option<FrameBuilder>,
+
+#[cfg_attr(feature = "capture", derive(Clone, Serialize, Deserialize))]
+struct DocumentView {
     window_size: DeviceUintSize,
     inner_rect: DeviceUintRect,
     layer: DocumentLayer,
     pan: DeviceIntPoint,
     device_pixel_ratio: f32,
     page_zoom_factor: f32,
     pinch_zoom_factor: f32,
+}
+
+impl DocumentView {
+    fn accumulated_scale_factor(&self) -> DevicePixelScale {
+        DevicePixelScale::new(
+            self.device_pixel_ratio *
+            self.page_zoom_factor *
+            self.pinch_zoom_factor
+        )
+    }
+}
+
+struct Document {
+    scene: Scene,
+    view: DocumentView,
+    frame_ctx: FrameContext,
+    // the `Option` here is only to deal with borrow checker
+    frame_builder: Option<FrameBuilder>,
     // A set of pipelines that the caller has requested be
     // made available as output textures.
     output_pipelines: FastHashSet<PipelineId>,
     // A helper switch to prevent any frames rendering triggered by scrolling
     // messages between `SetDisplayList` and `GenerateFrame`.
     // If we allow them, then a reftest that scrolls a few layers before generating
     // the first frame would produce inconsistent rendering results, because
     // scroll events are not necessarily received in deterministic order.
@@ -63,69 +87,61 @@ impl Document {
     ) -> Self {
         let render_on_scroll = if enable_render_on_scroll {
             Some(false)
         } else {
             None
         };
         Document {
             scene: Scene::new(),
+            view: DocumentView {
+                window_size,
+                inner_rect: DeviceUintRect::new(DeviceUintPoint::zero(), window_size),
+                layer,
+                pan: DeviceIntPoint::zero(),
+                page_zoom_factor: 1.0,
+                pinch_zoom_factor: 1.0,
+                device_pixel_ratio: default_device_pixel_ratio,
+            },
             frame_ctx: FrameContext::new(config),
             frame_builder: Some(FrameBuilder::empty()),
-            window_size,
-            inner_rect: DeviceUintRect::new(DeviceUintPoint::zero(), window_size),
-            layer,
-            pan: DeviceIntPoint::zero(),
-            page_zoom_factor: 1.0,
-            pinch_zoom_factor: 1.0,
-            device_pixel_ratio: default_device_pixel_ratio,
             render_on_scroll,
             output_pipelines: FastHashSet::default(),
         }
     }
 
-    fn accumulated_scale_factor(&self) -> f32 {
-        self.device_pixel_ratio *
-        self.page_zoom_factor *
-        self.pinch_zoom_factor
-    }
-
     fn build_scene(&mut self, resource_cache: &mut ResourceCache) {
-        let accumulated_scale_factor = self.accumulated_scale_factor();
         // this code is why we have `Option`, which is never `None`
         let frame_builder = self.frame_ctx.create(
             self.frame_builder.take().unwrap(),
             &self.scene,
             resource_cache,
-            self.window_size,
-            self.inner_rect,
-            accumulated_scale_factor,
+            self.view.window_size,
+            self.view.inner_rect,
+            self.view.accumulated_scale_factor(),
             &self.output_pipelines,
         );
         self.frame_builder = Some(frame_builder);
     }
 
     fn render(
         &mut self,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         resource_profile: &mut ResourceProfileCounters,
     ) -> RenderedDocument {
-        let accumulated_scale_factor = self.accumulated_scale_factor();
-        let pan = LayerPoint::new(
-            self.pan.x as f32 / accumulated_scale_factor,
-            self.pan.y as f32 / accumulated_scale_factor,
-        );
+        let accumulated_scale_factor = self.view.accumulated_scale_factor();
+        let pan = self.view.pan.to_f32() / accumulated_scale_factor;
         self.frame_ctx.build_rendered_document(
             self.frame_builder.as_mut().unwrap(),
             resource_cache,
             gpu_cache,
             &self.scene.pipelines,
             accumulated_scale_factor,
-            self.layer,
+            self.view.layer,
             pan,
             &mut resource_profile.texture_cache,
             &mut resource_profile.gpu_cache,
             &self.scene.properties,
         )
     }
 }
 
@@ -135,16 +151,26 @@ enum DocumentOp {
     ScrolledNop,
     Scrolled(RenderedDocument),
     Rendered(RenderedDocument),
 }
 
 /// The unique id for WR resource identification.
 static NEXT_NAMESPACE_ID: AtomicUsize = ATOMIC_USIZE_INIT;
 
+#[cfg(feature = "capture")]
+#[derive(Serialize, Deserialize)]
+struct PlainRenderBackend {
+    default_device_pixel_ratio: f32,
+    enable_render_on_scroll: bool,
+    frame_config: FrameBuilderConfig,
+    documents: FastHashMap<DocumentId, DocumentView>,
+    resources: PlainResources,
+}
+
 /// The render backend is responsible for transforming high level display lists into
 /// GPU-friendly work which is then submitted to the renderer in the form of a frame::Frame.
 ///
 /// The render backend operates on its own thread.
 pub struct RenderBackend {
     api_rx: MsgReceiver<ApiMsg>,
     payload_rx: PayloadReceiver,
     payload_tx: PayloadSender,
@@ -205,44 +231,45 @@ impl RenderBackend {
         document_id: DocumentId,
         message: DocumentMsg,
         frame_counter: u32,
         profile_counters: &mut BackendProfileCounters,
     ) -> DocumentOp {
         let doc = self.documents.get_mut(&document_id).expect("No document?");
 
         match message {
+            //TODO: move view-related messages in a separate enum?
             DocumentMsg::SetPageZoom(factor) => {
-                doc.page_zoom_factor = factor.get();
+                doc.view.page_zoom_factor = factor.get();
                 DocumentOp::Nop
             }
             DocumentMsg::EnableFrameOutput(pipeline_id, enable) => {
                 if enable {
                     doc.output_pipelines.insert(pipeline_id);
                 } else {
                     doc.output_pipelines.remove(&pipeline_id);
                 }
                 DocumentOp::Nop
             }
             DocumentMsg::SetPinchZoom(factor) => {
-                doc.pinch_zoom_factor = factor.get();
+                doc.view.pinch_zoom_factor = factor.get();
                 DocumentOp::Nop
             }
             DocumentMsg::SetPan(pan) => {
-                doc.pan = pan;
+                doc.view.pan = pan;
                 DocumentOp::Nop
             }
             DocumentMsg::SetWindowParameters {
                 window_size,
                 inner_rect,
                 device_pixel_ratio,
             } => {
-                doc.window_size = window_size;
-                doc.inner_rect = inner_rect;
-                doc.device_pixel_ratio = device_pixel_ratio;
+                doc.view.window_size = window_size;
+                doc.view.inner_rect = inner_rect;
+                doc.view.device_pixel_ratio = device_pixel_ratio;
                 DocumentOp::Nop
             }
             DocumentMsg::SetDisplayList {
                 epoch,
                 pipeline_id,
                 background,
                 viewport_size,
                 content_size,
@@ -545,16 +572,44 @@ impl RenderBackend {
                         DebugCommand::FetchDocuments => {
                             let json = self.get_docs_for_debugger();
                             ResultMsg::DebugOutput(DebugOutput::FetchDocuments(json))
                         }
                         DebugCommand::FetchClipScrollTree => {
                             let json = self.get_clip_scroll_tree_for_debugger();
                             ResultMsg::DebugOutput(DebugOutput::FetchClipScrollTree(json))
                         }
+                        #[cfg(feature = "capture")]
+                        DebugCommand::SaveCapture(root) => {
+                            let deferred = self.save_capture(&root);
+                            ResultMsg::DebugOutput(DebugOutput::SaveCapture(root, deferred))
+                        },
+                        #[cfg(feature = "capture")]
+                        DebugCommand::LoadCapture(root) => {
+                            NEXT_NAMESPACE_ID.fetch_add(1, Ordering::Relaxed);
+                            frame_counter += 1;
+                            self.load_capture(&root, &mut profile_counters);
+                            ResultMsg::DebugOutput(DebugOutput::LoadCapture)
+                        },
+                        DebugCommand::EnableDualSourceBlending(enable) => {
+                            // Set in the config used for any future documents
+                            // that are created.
+                            self.frame_config
+                                .dual_source_blending_is_enabled = enable;
+
+                            // Set for any existing documents.
+                            for (_, doc) in &mut self.documents {
+                                doc.frame_ctx
+                                   .frame_builder_config
+                                   .dual_source_blending_is_enabled = enable;
+                            }
+
+                            // We don't want to forward this message to the renderer.
+                            continue;
+                        }
                         _ => ResultMsg::DebugCommand(option),
                     };
                     self.result_tx.send(msg).unwrap();
                     self.notifier.wake_up();
                 }
                 ApiMsg::ShutDown => {
                     self.notifier.shut_down();
                     break;
@@ -712,8 +767,112 @@ impl ToDebugString for SpecificDisplayIt
             SpecificDisplayItem::StickyFrame(..) => String::from("sticky_frame"),
             SpecificDisplayItem::SetGradientStops => String::from("set_gradient_stops"),
             SpecificDisplayItem::PopStackingContext => String::from("pop_stacking_context"),
             SpecificDisplayItem::PushShadow(..) => String::from("push_shadow"),
             SpecificDisplayItem::PopAllShadows => String::from("pop_all_shadows"),
         }
     }
 }
+
+
+#[cfg(feature = "capture")]
+impl RenderBackend {
+    // Note: the mutable `self` is only needed here for resolving blob images
+    fn save_capture(&mut self, root: &PathBuf) -> Vec<ExternalCaptureImage> {
+        use ron::ser::pretty;
+        use std::fs;
+        use std::io::Write;
+
+        info!("capture: saving {}", root.to_string_lossy());
+        let (resources, deferred) = self.resource_cache.save_capture(root);
+
+        for (&id, doc) in &self.documents {
+            info!("\tdocument {:?}", id);
+            let ron = pretty::to_string(&doc.scene).unwrap();
+            let file_name = format!("scene-{}-{}.ron", (id.0).0, id.1);
+            let ron_path = root.clone().join(file_name);
+            let mut file = fs::File::create(ron_path).unwrap();
+            write!(file, "{}\n", ron).unwrap();
+        }
+
+        info!("\tbackend");
+        let serial = PlainRenderBackend {
+            default_device_pixel_ratio: self.default_device_pixel_ratio,
+            enable_render_on_scroll: self.enable_render_on_scroll,
+            frame_config: self.frame_config.clone(),
+            documents: self.documents
+                .iter()
+                .map(|(id, doc)| (*id, doc.view.clone()))
+                .collect(),
+            resources,
+        };
+
+        let ron = pretty::to_string(&serial).unwrap();
+        let ron_path = root.clone().join("backend.ron");
+        let mut file = fs::File::create(ron_path).unwrap();
+        write!(file, "{}\n", ron).unwrap();
+
+        deferred
+    }
+
+    fn load_capture(
+        &mut self,
+        root: &PathBuf,
+        profile_counters: &mut BackendProfileCounters,
+    ) {
+        use ron::de;
+        use std::fs::File;
+        use std::io::Read;
+
+        let mut string = String::new();
+        info!("capture: loading {}", root.to_string_lossy());
+
+        File::open(root.join("backend.ron"))
+            .unwrap()
+            .read_to_string(&mut string)
+            .unwrap();
+        let backend: PlainRenderBackend = de::from_str(&string)
+            .unwrap();
+
+        // Note: it would be great to have RenderBackend to be split
+        // rather explicitly on what's used before and after scene building
+        // so that, for example, we never miss anything in the code below:
+
+        self.resource_cache.load_capture(backend.resources, root);
+        self.gpu_cache = GpuCache::new();
+        self.documents.clear();
+        self.default_device_pixel_ratio = backend.default_device_pixel_ratio;
+        self.frame_config = backend.frame_config;
+        self.enable_render_on_scroll = backend.enable_render_on_scroll;
+
+        for (id, view) in backend.documents {
+            info!("\tdocument {:?}", id);
+            string.clear();
+            let file_name = format!("scene-{}-{}.ron", (id.0).0, id.1);
+            File::open(root.join(file_name))
+                .expect(&format!("Unable to open scene {:?}", id))
+                .read_to_string(&mut string)
+                .unwrap();
+            let scene: Scene = de::from_str(&string)
+                .unwrap();
+
+            let mut doc = Document {
+                scene,
+                view,
+                frame_ctx: FrameContext::new(self.frame_config.clone()),
+                frame_builder: Some(FrameBuilder::empty()),
+                output_pipelines: FastHashSet::default(),
+                render_on_scroll: None,
+            };
+
+            doc.build_scene(&mut self.resource_cache);
+            let render_doc = doc.render(
+                &mut self.resource_cache,
+                &mut self.gpu_cache,
+                &mut profile_counters.resources,
+            );
+            self.publish_document_and_notify_compositor(id, render_doc, profile_counters);
+
+            self.documents.insert(id, doc);
+        }
+    }
+}
--- a/gfx/webrender/src/render_task.rs
+++ b/gfx/webrender/src/render_task.rs
@@ -1,21 +1,20 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{ClipId, DeviceIntPoint, DeviceIntRect, DeviceIntSize, DevicePixel};
-use api::{LayerPoint, LayerRect, PremultipliedColorF};
+use api::{ClipId, DeviceIntPoint, DeviceIntRect, DeviceIntSize};
+use api::{LayerRect, PremultipliedColorF};
 use box_shadow::BoxShadowCacheKey;
 use clip::{ClipSourcesWeakHandle};
 use clip_scroll_tree::CoordinateSystemId;
-use euclid::TypedSize2D;
-use gpu_types::{ClipScrollNodeIndex};
+use gpu_types::{ClipScrollNodeIndex, PictureType};
 use internal_types::RenderPassIndex;
-use picture::RasterizationSpace;
+use picture::ContentOrigin;
 use prim_store::{PrimitiveIndex};
 #[cfg(feature = "debugger")]
 use print_tree::{PrintTreePrinter};
 use std::{cmp, ops, usize, f32, i32};
 use std::rc::Rc;
 use tiling::{RenderPass, RenderTargetIndex};
 use tiling::{RenderTargetKind};
 
@@ -36,22 +35,69 @@ pub struct RenderTaskTree {
     pub task_data: Vec<RenderTaskData>,
 }
 
 pub type ClipChain = Option<Rc<ClipChainNode>>;
 
 #[derive(Debug)]
 pub struct ClipChainNode {
     pub work_item: ClipWorkItem,
+    pub local_clip_rect: LayerRect,
+    pub screen_outer_rect: DeviceIntRect,
     pub screen_inner_rect: DeviceIntRect,
     pub combined_outer_screen_rect: DeviceIntRect,
     pub combined_inner_screen_rect: DeviceIntRect,
     pub prev: ClipChain,
 }
 
+impl ClipChainNode {
+    pub fn new(
+        work_item: ClipWorkItem,
+        local_clip_rect: LayerRect,
+        screen_outer_rect: DeviceIntRect,
+        screen_inner_rect: DeviceIntRect,
+        parent_chain: ClipChain,
+    ) -> ClipChainNode {
+        let mut node = ClipChainNode {
+            work_item,
+            local_clip_rect,
+            screen_outer_rect,
+            screen_inner_rect,
+            combined_outer_screen_rect: screen_outer_rect,
+            combined_inner_screen_rect: screen_inner_rect,
+            prev: None,
+        };
+        node.set_parent(parent_chain);
+        node
+    }
+
+    fn set_parent(&mut self, new_parent: ClipChain) {
+        self.prev = new_parent.clone();
+
+        let parent_node = match new_parent {
+            Some(ref parent_node) => parent_node,
+            None => return,
+        };
+
+        // If this clip's outer rectangle is completely enclosed by the clip
+        // chain's inner rectangle, then the only clip that matters from this point
+        // on is this clip. We can disconnect this clip from the parent clip chain.
+        if parent_node.combined_inner_screen_rect.contains_rect(&self.screen_outer_rect) {
+            self.prev = None;
+        }
+
+        self.combined_outer_screen_rect =
+            parent_node.combined_outer_screen_rect.intersection(&self.screen_outer_rect)
+            .unwrap_or_else(DeviceIntRect::zero);
+        self.combined_inner_screen_rect =
+            parent_node.combined_inner_screen_rect.intersection(&self.screen_inner_rect)
+            .unwrap_or_else(DeviceIntRect::zero);
+    }
+}
+
 pub struct ClipChainNodeIter {
     pub current: ClipChain,
 }
 
 impl Iterator for ClipChainNodeIter {
     type Item = Rc<ClipChainNode>;
 
     fn next(&mut self) -> ClipChain {
@@ -149,17 +195,17 @@ impl ops::IndexMut<RenderTaskId> for Ren
         &mut self.tasks[id.0 as usize]
     }
 }
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
 pub enum RenderTaskKey {
     /// Draw the alpha mask for a shared clip.
     CacheMask(ClipId),
-    CacheScaling(BoxShadowCacheKey, TypedSize2D<i32, DevicePixel>),
+    CacheScaling(BoxShadowCacheKey, DeviceIntSize),
     CacheBlur(BoxShadowCacheKey, i32),
     CachePicture(BoxShadowCacheKey),
 }
 
 #[derive(Debug)]
 pub enum RenderTaskLocation {
     Fixed,
     Dynamic(Option<(DeviceIntPoint, RenderTargetIndex)>, DeviceIntSize),
@@ -178,19 +224,19 @@ pub struct CacheMaskTask {
     pub clips: Vec<ClipWorkItem>,
     pub coordinate_system_id: CoordinateSystemId,
 }
 
 #[derive(Debug)]
 pub struct PictureTask {
     pub prim_index: PrimitiveIndex,
     pub target_kind: RenderTargetKind,
-    pub content_origin: LayerPoint,
+    pub content_origin: ContentOrigin,
     pub color: PremultipliedColorF,
-    pub rasterization_kind: RasterizationSpace,
+    pub pic_type: PictureType,
 }
 
 #[derive(Debug)]
 pub struct BlurTask {
     pub blur_std_deviation: f32,
     pub target_kind: RenderTargetKind,
     pub regions: Vec<LayerRect>,
     pub color: PremultipliedColorF,
@@ -245,42 +291,41 @@ pub struct RenderTask {
     pub pass_index: Option<RenderPassIndex>,
 }
 
 impl RenderTask {
     pub fn new_picture(
         size: Option<DeviceIntSize>,
         prim_index: PrimitiveIndex,
         target_kind: RenderTargetKind,
-        content_origin_x: f32,
-        content_origin_y: f32,
+        content_origin: ContentOrigin,
         color: PremultipliedColorF,
         clear_mode: ClearMode,
-        rasterization_kind: RasterizationSpace,
         children: Vec<RenderTaskId>,
         box_shadow_cache_key: Option<BoxShadowCacheKey>,
+        pic_type: PictureType,
     ) -> Self {
         let location = match size {
             Some(size) => RenderTaskLocation::Dynamic(None, size),
             None => RenderTaskLocation::Fixed,
         };
 
         RenderTask {
             cache_key: match box_shadow_cache_key {
                 Some(key) => Some(RenderTaskKey::CachePicture(key)),
                 None => None,
             },
             children,
             location,
             kind: RenderTaskKind::Picture(PictureTask {
                 prim_index,
                 target_kind,
-                content_origin: LayerPoint::new(content_origin_x, content_origin_y),
+                content_origin,
                 color,
-                rasterization_kind,
+                pic_type,
             }),
             clear_mode,
             pass_index: None,
         }
     }
 
     pub fn new_readback(screen_rect: DeviceIntRect) -> Self {
         RenderTask {
@@ -438,21 +483,30 @@ impl RenderTask {
         //       in renderer.rs.
         // TODO(gw): Maybe there's a way to make this stuff a bit
         //           more type-safe. Although, it will always need
         //           to be kept in sync with the GLSL code anyway.
 
         let (data1, data2) = match self.kind {
             RenderTaskKind::Picture(ref task) => {
                 (
-                    [
-                        task.content_origin.x,
-                        task.content_origin.y,
-                        task.rasterization_kind as u32 as f32,
-                    ],
+                    // Note: has to match `PICTURE_TYPE_*` in shaders
+                    // TODO(gw): Instead of using the sign of the picture
+                    //           type here, we should consider encoding it
+                    //           as a set of flags that get casted here
+                    //           and in the shader. This is a bit tidier
+                    //           and allows for future expansion of flags.
+                    match task.content_origin {
+                        ContentOrigin::Local(point) => [
+                            point.x, point.y, task.pic_type as u32 as f32,
+                        ],
+                        ContentOrigin::Screen(point) => [
+                            point.x as f32, point.y as f32, -(task.pic_type as u32 as f32),
+                        ],
+                    },
                     task.color.to_array()
                 )
             }
             RenderTaskKind::CacheMask(ref task) => {
                 (
                     [
                         task.actual_rect.origin.x as f32,
                         task.actual_rect.origin.y as f32,
@@ -587,17 +641,16 @@ impl RenderTask {
     }
 
     #[cfg(feature = "debugger")]
     pub fn print_with<T: PrintTreePrinter>(&self, pt: &mut T, tree: &RenderTaskTree) -> bool {
         match self.kind {
             RenderTaskKind::Picture(ref task) => {
                 pt.new_level(format!("Picture of {:?}", task.prim_index));
                 pt.add_item(format!("kind: {:?}", task.target_kind));
-                pt.add_item(format!("space: {:?}", task.rasterization_kind));
             }
             RenderTaskKind::CacheMask(ref task) => {
                 pt.new_level(format!("CacheMask with {} clips", task.clips.len()));
                 pt.add_item(format!("rect: {:?}", task.actual_rect));
             }
             RenderTaskKind::VerticalBlur(ref task) => {
                 pt.new_level("VerticalBlur".to_owned());
                 task.print_with(pt);
--- a/gfx/webrender/src/renderer.rs
+++ b/gfx/webrender/src/renderer.rs
@@ -4,48 +4,48 @@
 
 //! The webrender API.
 //!
 //! The `webrender::renderer` module provides the interface to webrender, which
 //! is accessible through [`Renderer`][renderer]
 //!
 //! [renderer]: struct.Renderer.html
 
-use api::{channel, BlobImageRenderer, FontRenderMode};
-use api::{ColorF, DocumentId, Epoch, PipelineId, RenderApiSender, RenderNotifier};
-use api::{DevicePixel, DeviceIntPoint, DeviceIntRect, DeviceIntSize};
-use api::{DeviceUintPoint, DeviceUintRect, DeviceUintSize, ColorU};
-use api::{ExternalImageId, ExternalImageType, ImageFormat};
-use api::{YUV_COLOR_SPACES, YUV_FORMATS};
-use api::{YuvColorSpace, YuvFormat};
+use api::{BlobImageRenderer, ColorF, ColorU, DeviceIntPoint, DeviceIntRect, DeviceIntSize};
+use api::{DeviceUintPoint, DeviceUintRect, DeviceUintSize, DocumentId, Epoch, ExternalImageId};
+use api::{ExternalImageType, FontRenderMode, ImageFormat, PipelineId, RenderApiSender};
+use api::{RenderNotifier, YUV_COLOR_SPACES, YUV_FORMATS, YuvColorSpace, YuvFormat, channel};
 #[cfg(not(feature = "debugger"))]
 use api::ApiMsg;
 use api::DebugCommand;
 #[cfg(not(feature = "debugger"))]
 use api::channel::MsgSender;
+use batch::{BatchKey, BatchKind, BatchTextures, BrushBatchKind};
+use batch::{BrushImageSourceKind, TransformBatchKind};
 use debug_colors;
 use debug_render::DebugRenderer;
 #[cfg(feature = "debugger")]
 use debug_server::{self, DebugServer};
 use device::{DepthFunction, Device, FrameId, Program, UploadMethod, Texture,
              VertexDescriptor, PBO};
 use device::{get_gl_format_bgra, ExternalTexture, FBOId, TextureSlot, VertexAttribute,
              VertexAttributeKind};
 use device::{FileWatcherHandler, ShaderError, TextureFilter, TextureTarget,
              VertexUsageHint, VAO, VBO, CustomVAO};
 use device::ProgramCache;
-use euclid::{rect, TypedScale, Transform3D};
+use euclid::{rect, Transform3D};
 use frame_builder::FrameBuilderConfig;
 use gleam::gl;
 use glyph_rasterizer::GlyphFormat;
 use gpu_cache::{GpuBlockData, GpuCacheUpdate, GpuCacheUpdateList};
 use gpu_types::PrimitiveInstance;
-use internal_types::{BatchTextures, SourceTexture, ORTHO_FAR_PLANE, ORTHO_NEAR_PLANE};
+use internal_types::{SourceTexture, ORTHO_FAR_PLANE, ORTHO_NEAR_PLANE};
 use internal_types::{CacheTextureId, FastHashMap, RenderedDocument, ResultMsg, TextureUpdateOp};
 use internal_types::{DebugOutput, RenderPassIndex, RenderTargetInfo, TextureUpdateList, TextureUpdateSource};
+use picture::ContentOrigin;
 use profiler::{BackendProfileCounters, Profiler};
 use profiler::{GpuProfileTag, RendererProfileCounters, RendererProfileTimers};
 use query::{GpuProfiler, GpuTimer};
 use rayon::Configuration as ThreadPoolConfig;
 use rayon::ThreadPool;
 use record::ApiRecordingReceiver;
 use render_backend::RenderBackend;
 use render_task::{RenderTaskKind, RenderTaskTree};
@@ -61,17 +61,17 @@ use std::path::PathBuf;
 use std::rc::Rc;
 use std::sync::Arc;
 use std::sync::mpsc::{channel, Receiver, Sender};
 use std::thread;
 use texture_cache::TextureCache;
 use thread_profiler::{register_thread_with_profiler, write_profile};
 use tiling::{AlphaRenderTarget, ColorRenderTarget};
 use tiling::{RenderPass, RenderPassKind, RenderTargetList};
-use tiling::{BatchKey, BatchKind, BrushBatchKind, BrushImageSourceKind, Frame, RenderTarget, ScalingInfo, TransformBatchKind};
+use tiling::{Frame, RenderTarget, ScalingInfo};
 use time::precise_time_ns;
 use util::TransformedRectKind;
 
 pub const MAX_VERTEX_TEXTURE_WIDTH: usize = 1024;
 /// Enabling this toggle would force the GPU cache scattered texture to
 /// be resized every frame, which enables GPU debuggers to see if this
 /// is performed correctly.
 const GPU_CACHE_RESIZE_TEST: bool = false;
@@ -86,16 +86,20 @@ const GPU_TAG_BRUSH_SOLID: GpuProfileTag
 const GPU_TAG_BRUSH_MASK: GpuProfileTag = GpuProfileTag {
     label: "B_Mask",
     color: debug_colors::BLACK,
 };
 const GPU_TAG_BRUSH_IMAGE: GpuProfileTag = GpuProfileTag {
     label: "B_Image",
     color: debug_colors::SILVER,
 };
+const GPU_TAG_BRUSH_LINE: GpuProfileTag = GpuProfileTag {
+    label: "Line",
+    color: debug_colors::DARKRED,
+};
 const GPU_TAG_CACHE_CLIP: GpuProfileTag = GpuProfileTag {
     label: "C_Clip",
     color: debug_colors::PURPLE,
 };
 const GPU_TAG_CACHE_TEXT_RUN: GpuProfileTag = GpuProfileTag {
     label: "C_TextRun",
     color: debug_colors::MISTYROSE,
 };
@@ -106,20 +110,16 @@ const GPU_TAG_CACHE_LINE: GpuProfileTag 
 const GPU_TAG_SETUP_TARGET: GpuProfileTag = GpuProfileTag {
     label: "target init",
     color: debug_colors::SLATEGREY,
 };
 const GPU_TAG_SETUP_DATA: GpuProfileTag = GpuProfileTag {
     label: "data init",
     color: debug_colors::LIGHTGREY,
 };
-const GPU_TAG_PRIM_LINE: GpuProfileTag = GpuProfileTag {
-    label: "Line",
-    color: debug_colors::DARKRED,
-};
 const GPU_TAG_PRIM_IMAGE: GpuProfileTag = GpuProfileTag {
     label: "Image",
     color: debug_colors::GREEN,
 };
 const GPU_TAG_PRIM_YUV_IMAGE: GpuProfileTag = GpuProfileTag {
     label: "YuvImage",
     color: debug_colors::DARKGREEN,
 };
@@ -193,23 +193,21 @@ impl TransformBatchKind {
                 ImageBufferKind::Texture2DArray => "Image (Array)",
             },
             TransformBatchKind::YuvImage(..) => "YuvImage",
             TransformBatchKind::AlignedGradient => "AlignedGradient",
             TransformBatchKind::AngleGradient => "AngleGradient",
             TransformBatchKind::RadialGradient => "RadialGradient",
             TransformBatchKind::BorderCorner => "BorderCorner",
             TransformBatchKind::BorderEdge => "BorderEdge",
-            TransformBatchKind::Line => "Line",
         }
     }
 
     fn gpu_sampler_tag(&self) -> GpuProfileTag {
         match *self {
-            TransformBatchKind::Line => GPU_TAG_PRIM_LINE,
             TransformBatchKind::TextRun(..) => GPU_TAG_PRIM_TEXT_RUN,
             TransformBatchKind::Image(..) => GPU_TAG_PRIM_IMAGE,
             TransformBatchKind::YuvImage(..) => GPU_TAG_PRIM_YUV_IMAGE,
             TransformBatchKind::BorderCorner => GPU_TAG_PRIM_BORDER_CORNER,
             TransformBatchKind::BorderEdge => GPU_TAG_PRIM_BORDER_EDGE,
             TransformBatchKind::AlignedGradient => GPU_TAG_PRIM_GRADIENT,
             TransformBatchKind::AngleGradient => GPU_TAG_PRIM_ANGLE_GRADIENT,
             TransformBatchKind::RadialGradient => GPU_TAG_PRIM_RADIAL_GRADIENT,
@@ -224,32 +222,34 @@ impl BatchKind {
             BatchKind::Composite { .. } => "Composite",
             BatchKind::HardwareComposite => "HardwareComposite",
             BatchKind::SplitComposite => "SplitComposite",
             BatchKind::Blend => "Blend",
             BatchKind::Brush(kind) => {
                 match kind {
                     BrushBatchKind::Image(..) => "Brush (Image)",
                     BrushBatchKind::Solid => "Brush (Solid)",
+                    BrushBatchKind::Line => "Brush (Line)",
                 }
             }
             BatchKind::Transformable(_, batch_kind) => batch_kind.debug_name(),
         }
     }
 
     fn gpu_sampler_tag(&self) -> GpuProfileTag {
         match *self {
             BatchKind::Composite { .. } => GPU_TAG_PRIM_COMPOSITE,
             BatchKind::HardwareComposite => GPU_TAG_PRIM_HW_COMPOSITE,
             BatchKind::SplitComposite => GPU_TAG_PRIM_SPLIT_COMPOSITE,
             BatchKind::Blend => GPU_TAG_PRIM_BLEND,
             BatchKind::Brush(kind) => {
                 match kind {
                     BrushBatchKind::Image(..) => GPU_TAG_BRUSH_IMAGE,
                     BrushBatchKind::Solid => GPU_TAG_BRUSH_SOLID,
+                    BrushBatchKind::Line => GPU_TAG_BRUSH_LINE,
                 }
             }
             BatchKind::Transformable(_, batch_kind) => batch_kind.gpu_sampler_tag(),
         }
     }
 }
 
 bitflags! {
@@ -283,18 +283,19 @@ type ShaderMode = i32;
 enum TextShaderMode {
     Alpha = 0,
     SubpixelConstantTextColor = 1,
     SubpixelPass0 = 2,
     SubpixelPass1 = 3,
     SubpixelWithBgColorPass0 = 4,
     SubpixelWithBgColorPass1 = 5,
     SubpixelWithBgColorPass2 = 6,
-    Bitmap = 7,
-    ColorBitmap = 8,
+    SubpixelDualSource = 7,
+    Bitmap = 8,
+    ColorBitmap = 9,
 }
 
 impl Into<ShaderMode> for TextShaderMode {
     fn into(self) -> i32 {
         self as i32
     }
 }
 
@@ -321,16 +322,17 @@ enum TextureSampler {
     ResourceCache,
     ClipScrollNodes,
     RenderTasks,
     Dither,
     // A special sampler that is bound to the A8 output of
     // the *first* pass. Items rendered in this target are
     // available as inputs to tasks in any subsequent pass.
     SharedCacheA8,
+    LocalClipRects
 }
 
 impl TextureSampler {
     fn color(n: usize) -> TextureSampler {
         match n {
             0 => TextureSampler::Color0,
             1 => TextureSampler::Color1,
             2 => TextureSampler::Color2,
@@ -349,16 +351,17 @@ impl Into<TextureSlot> for TextureSample
             TextureSampler::Color2 => TextureSlot(2),
             TextureSampler::CacheA8 => TextureSlot(3),
             TextureSampler::CacheRGBA8 => TextureSlot(4),
             TextureSampler::ResourceCache => TextureSlot(5),
             TextureSampler::ClipScrollNodes => TextureSlot(6),
             TextureSampler::RenderTasks => TextureSlot(7),
             TextureSampler::Dither => TextureSlot(8),
             TextureSampler::SharedCacheA8 => TextureSlot(9),
+            TextureSampler::LocalClipRects => TextureSlot(10),
         }
     }
 }
 
 #[derive(Debug, Clone, Copy)]
 #[repr(C)]
 pub struct PackedVertex {
     pub pos: [f32; 2],
@@ -428,17 +431,17 @@ const DESC_CLIP: VertexDescriptor = Vert
     ],
     instance_attributes: &[
         VertexAttribute {
             name: "aClipRenderTaskAddress",
             count: 1,
             kind: VertexAttributeKind::I32,
         },
         VertexAttribute {
-            name: "aClipLayerAddress",
+            name: "aScrollNodeId",
             count: 1,
             kind: VertexAttributeKind::I32,
         },
         VertexAttribute {
             name: "aClipSegment",
             count: 1,
             kind: VertexAttributeKind::I32,
         },
@@ -763,16 +766,17 @@ impl SourceTextureResolver {
 }
 
 #[derive(Debug, Copy, Clone, PartialEq)]
 #[allow(dead_code)] // SubpixelVariableTextColor is not used at the moment.
 pub enum BlendMode {
     None,
     PremultipliedAlpha,
     PremultipliedDestOut,
+    SubpixelDualSource,
     SubpixelConstantTextColor(ColorF),
     SubpixelWithBgColor,
     SubpixelVariableTextColor,
 }
 
 // Tracks the state of each row in the GPU cache texture.
 struct CacheRow {
     is_dirty: bool,
@@ -1292,16 +1296,17 @@ impl BrushShader {
         renderer_errors: &mut Vec<RendererError>,
     ) where M: Into<ShaderMode> {
         match blend_mode {
             BlendMode::None => {
                 self.opaque.bind(device, projection, mode, renderer_errors)
             }
             BlendMode::PremultipliedAlpha |
             BlendMode::PremultipliedDestOut |
+            BlendMode::SubpixelDualSource |
             BlendMode::SubpixelConstantTextColor(..) |
             BlendMode::SubpixelVariableTextColor |
             BlendMode::SubpixelWithBgColor => {
                 self.alpha.bind(device, projection, mode, renderer_errors)
             }
         }
     }
 
@@ -1486,16 +1491,17 @@ fn create_prim_shader(
                 ("sColor2", TextureSampler::Color2),
                 ("sDither", TextureSampler::Dither),
                 ("sCacheA8", TextureSampler::CacheA8),
                 ("sCacheRGBA8", TextureSampler::CacheRGBA8),
                 ("sClipScrollNodes", TextureSampler::ClipScrollNodes),
                 ("sRenderTasks", TextureSampler::RenderTasks),
                 ("sResourceCache", TextureSampler::ResourceCache),
                 ("sSharedCacheA8", TextureSampler::SharedCacheA8),
+                ("sLocalClipRects", TextureSampler::LocalClipRects),
             ],
         );
     }
 
     program
 }
 
 fn create_clip_shader(name: &'static str, device: &mut Device) -> Result<Program, ShaderError> {
@@ -1513,16 +1519,17 @@ fn create_clip_shader(name: &'static str
         device.bind_shader_samplers(
             program,
             &[
                 ("sColor0", TextureSampler::Color0),
                 ("sClipScrollNodes", TextureSampler::ClipScrollNodes),
                 ("sRenderTasks", TextureSampler::RenderTasks),
                 ("sResourceCache", TextureSampler::ResourceCache),
                 ("sSharedCacheA8", TextureSampler::SharedCacheA8),
+                ("sLocalClipRects", TextureSampler::LocalClipRects),
             ],
         );
     }
 
     program
 }
 
 struct FileWatcher {
@@ -1565,51 +1572,51 @@ pub struct Renderer {
     pending_gpu_cache_updates: Vec<GpuCacheUpdateList>,
     pending_shader_updates: Vec<PathBuf>,
     active_documents: Vec<(DocumentId, RenderedDocument)>,
 
     // These are "cache shaders". These shaders are used to
     // draw intermediate results to cache targets. The results
     // of these shaders are then used by the primitive shaders.
     cs_text_run: LazilyCompiledShader,
-    cs_line: LazilyCompiledShader,
     cs_blur_a8: LazilyCompiledShader,
     cs_blur_rgba8: LazilyCompiledShader,
 
     // Brush shaders
     brush_mask_corner: LazilyCompiledShader,
     brush_mask_rounded_rect: LazilyCompiledShader,
     brush_image_rgba8: BrushShader,
     brush_image_rgba8_alpha_mask: BrushShader,
     brush_image_a8: BrushShader,
     brush_solid: BrushShader,
+    brush_line: BrushShader,
 
     /// These are "cache clip shaders". These shaders are used to
     /// draw clip instances into the cached clip mask. The results
     /// of these shaders are also used by the primitive shaders.
     cs_clip_rectangle: LazilyCompiledShader,
     cs_clip_image: LazilyCompiledShader,
     cs_clip_border: LazilyCompiledShader,
 
     // The are "primitive shaders". These shaders draw and blend
     // final results on screen. They are aware of tile boundaries.
     // Most draw directly to the framebuffer, but some use inputs
     // from the cache shaders to draw. Specifically, the box
     // shadow primitive shader stretches the box shadow cache
     // output, and the cache_image shader blits the results of
     // a cache shader (e.g. blur) to the screen.
     ps_text_run: TextShader,
+    ps_text_run_dual_source: TextShader,
     ps_image: Vec<Option<PrimitiveShader>>,
     ps_yuv_image: Vec<Option<PrimitiveShader>>,
     ps_border_corner: PrimitiveShader,
     ps_border_edge: PrimitiveShader,
     ps_gradient: PrimitiveShader,
     ps_angle_gradient: PrimitiveShader,
     ps_radial_gradient: PrimitiveShader,
-    ps_line: PrimitiveShader,
 
     ps_blend: LazilyCompiledShader,
     ps_hw_composite: LazilyCompiledShader,
     ps_split_composite: LazilyCompiledShader,
     ps_composite: LazilyCompiledShader,
 
     max_texture_size: u32,
 
@@ -1624,16 +1631,17 @@ pub struct Renderer {
     last_time: u64,
 
     gpu_profile: GpuProfiler<GpuProfileTag>,
     prim_vao: VAO,
     blur_vao: VAO,
     clip_vao: VAO,
 
     node_data_texture: VertexDataTexture,
+    local_clip_rects_texture: VertexDataTexture,
     render_task_texture: VertexDataTexture,
     gpu_cache_texture: CacheTexture,
 
     pipeline_epoch_map: FastHashMap<PipelineId, Epoch>,
 
     // Manages and resolves source textures IDs to real texture IDs.
     texture_resolver: SourceTextureResolver,
 
@@ -1719,16 +1727,19 @@ impl Renderer {
         let mut device = Device::new(
             gl,
             options.resource_override_path.clone(),
             options.upload_method,
             Box::new(file_watch_handler),
             options.cached_programs,
         );
 
+        let ext_dual_source_blending = !options.disable_dual_source_blending &&
+            device.supports_extension("GL_ARB_blend_func_extended");
+
         let device_max_size = device.max_texture_size();
         // 512 is the minimum that the texture cache can work with.
         // Broken GL contexts can return a max texture size of zero (See #1260). Better to
         // gracefully fail now than panic as soon as a texture is allocated.
         let min_texture_size = 512;
         if device_max_size < min_texture_size {
             println!(
                 "Device reporting insufficient max texture size ({})",
@@ -1751,24 +1762,16 @@ impl Renderer {
         let cs_text_run = try!{
             LazilyCompiledShader::new(ShaderKind::Cache(VertexArrayKind::Primitive),
                                       "cs_text_run",
                                       &[],
                                       &mut device,
                                       options.precache_shaders)
         };
 
-        let cs_line = try!{
-            LazilyCompiledShader::new(ShaderKind::Cache(VertexArrayKind::Primitive),
-                                      "ps_line",
-                                      &["CACHE"],
-                                      &mut device,
-                                      options.precache_shaders)
-        };
-
         let brush_mask_corner = try!{
             LazilyCompiledShader::new(ShaderKind::Brush,
                                       "brush_mask_corner",
                                       &[],
                                       &mut device,
                                       options.precache_shaders)
         };
 
@@ -1782,16 +1785,23 @@ impl Renderer {
 
         let brush_solid = try!{
             BrushShader::new("brush_solid",
                              &mut device,
                              &[],
                              options.precache_shaders)
         };
 
+        let brush_line = try!{
+            BrushShader::new("brush_line",
+                             &mut device,
+                             &[],
+                             options.precache_shaders)
+        };
+
         let brush_image_a8 = try!{
             BrushShader::new("brush_image",
                              &mut device,
                              &["ALPHA_TARGET"],
                              options.precache_shaders)
         };
 
         let brush_image_rgba8 = try!{
@@ -1843,28 +1853,28 @@ impl Renderer {
         let cs_clip_border = try!{
             LazilyCompiledShader::new(ShaderKind::ClipCache,
                                       "cs_clip_border",
                                       &[],
                                       &mut device,
                                       options.precache_shaders)
         };
 
-        let ps_line = try!{
-            PrimitiveShader::new("ps_line",
-                                 &mut device,
-                                 &[],
-                                 options.precache_shaders)
-        };
-
         let ps_text_run = try!{
             TextShader::new("ps_text_run",
                             &mut device,
                             &[],
-                           options.precache_shaders)
+                            options.precache_shaders)
+        };
+
+        let ps_text_run_dual_source = try!{
+            TextShader::new("ps_text_run",
+                            &mut device,
+                            &["DUAL_SOURCE_BLENDING"],
+                            options.precache_shaders)
         };
 
         // All image configuration.
         let mut image_features = Vec::new();
         let mut ps_image: Vec<Option<PrimitiveShader>> = Vec::new();
         // PrimitiveShader is not clonable. Use push() to initialize the vec.
         for _ in 0 .. IMAGE_BUFFER_KINDS.len() {
             ps_image.push(None);
@@ -2124,16 +2134,17 @@ impl Renderer {
         let blur_vao = device.create_vao_with_new_instances(&DESC_BLUR, &prim_vao);
         let clip_vao = device.create_vao_with_new_instances(&DESC_CLIP, &prim_vao);
 
         let texture_cache_upload_pbo = device.create_pbo();
 
         let texture_resolver = SourceTextureResolver::new(&mut device);
 
         let node_data_texture = VertexDataTexture::new(&mut device);
+        let local_clip_rects_texture = VertexDataTexture::new(&mut device);
         let render_task_texture = VertexDataTexture::new(&mut device);
 
         let gpu_cache_texture = CacheTexture::new(
             &mut device,
             options.scatter_gpu_cache_updates,
         )?;
 
         device.end_frame();
@@ -2145,16 +2156,18 @@ impl Renderer {
             (true, false) => FontRenderMode::Alpha,
             (false, _) => FontRenderMode::Mono,
         };
 
         let config = FrameBuilderConfig {
             enable_scrollbars: options.enable_scrollbars,
             default_font_render_mode,
             debug: options.debug,
+            dual_source_blending_is_enabled: true,
+            dual_source_blending_is_supported: ext_dual_source_blending,
         };
 
         let device_pixel_ratio = options.device_pixel_ratio;
         // First set the flags to default and later call set_debug_flags to ensure any
         // potential transition when enabling a flag is run.
         let debug_flags = DebugFlags::default();
         let payload_tx_for_backend = payload_tx.clone();
         let recorder = options.recorder;
@@ -2218,56 +2231,57 @@ impl Renderer {
             result_rx,
             debug_server,
             device,
             active_documents: Vec::new(),
             pending_texture_updates: Vec::new(),
             pending_gpu_cache_updates: Vec::new(),
             pending_shader_updates: Vec::new(),
             cs_text_run,
-            cs_line,
             cs_blur_a8,
             cs_blur_rgba8,
             brush_mask_corner,
             brush_mask_rounded_rect,
             brush_image_rgba8,
             brush_image_rgba8_alpha_mask,
             brush_image_a8,
             brush_solid,
+            brush_line,
             cs_clip_rectangle,
             cs_clip_border,
             cs_clip_image,
             ps_text_run,
+            ps_text_run_dual_source,
             ps_image,
             ps_yuv_image,
             ps_border_corner,
             ps_border_edge,
             ps_gradient,
             ps_angle_gradient,
             ps_radial_gradient,
             ps_blend,
             ps_hw_composite,
             ps_split_composite,
             ps_composite,
-            ps_line,
             debug: debug_renderer,
             debug_flags,
             backend_profile_counters: BackendProfileCounters::new(),
             profile_counters: RendererProfileCounters::new(),
             profiler: Profiler::new(),
             max_texture_size: max_texture_size,
             max_recorded_profiles: options.max_recorded_profiles,
             clear_color: options.clear_color,
             enable_clear_scissor: options.enable_clear_scissor,
             last_time: 0,
             gpu_profile,
             prim_vao,
             blur_vao,
             clip_vao,
             node_data_texture,
+            local_clip_rects_texture,
             render_task_texture,
             pipeline_epoch_map: FastHashMap::default(),
             dither_matrix_texture,
             external_image_handler: None,
             output_image_handler: None,
             output_targets: FastHashMap::default(),
             cpu_profiles: VecDeque::new(),
             gpu_profiles: VecDeque::new(),
@@ -2371,16 +2385,58 @@ impl Renderer {
                 ResultMsg::RefreshShader(path) => {
                     self.pending_shader_updates.push(path);
                 }
                 ResultMsg::DebugOutput(output) => match output {
                     DebugOutput::FetchDocuments(string) |
                     DebugOutput::FetchClipScrollTree(string) => {
                         self.debug_server.send(string);
                     }
+                    #[cfg(feature = "capture")]
+                    DebugOutput::SaveCapture(path, deferred)=> {
+                        use std::fs::File;
+                        use std::io::Write;
+                        use api::ExternalImageData;
+
+                        if deferred.is_empty() {
+                            continue
+                        }
+
+                        info!("saving external images");
+                        let handler = self.external_image_handler
+                            .as_mut()
+                            .expect("Unable to lock the external image handler!");
+                        for def in deferred {
+                            let ExternalImageData { id, channel_index, .. } = def.external;
+                            let data = match handler.lock(id, channel_index).source {
+                                ExternalImageSource::RawData(data) => data.to_vec(),
+                                ExternalImageSource::NativeTexture(_gl_id) => {
+                                    //TODO: make a read FBO with this GL texture
+                                    //self.device.read_pixels(&def.descriptor);
+                                    unimplemented!()
+                                }
+                                ExternalImageSource::Invalid => {
+                                    // Create a dummy buffer...
+                                    let stride = def.descriptor.compute_stride();
+                                    let total_size = def.descriptor.height * stride;
+                                    vec![0xFF; total_size as usize]
+                                }
+                            };
+                            handler.unlock(id, channel_index);
+
+                            let full_path = format!("{}/{}",
+                                path.to_string_lossy(), def.short_path);
+                            File::create(full_path)
+                                .expect(&format!("Unable to create {}", def.short_path))
+                                .write_all(&data)
+                                .unwrap();
+                        }
+                    }
+                    #[cfg(feature = "capture")]
+                    DebugOutput::LoadCapture => {}
                 },
                 ResultMsg::DebugCommand(command) => {
                     self.handle_debug_command(command);
                 }
             }
         }
     }
 
@@ -2389,18 +2445,21 @@ impl Renderer {
         // Avoid unused param warning.
         let _ = &self.debug_server;
         String::new()
     }
 
 
     #[cfg(feature = "debugger")]
     fn get_screenshot_for_debugger(&mut self) -> String {
-        let data = self.device.read_pixels(1024, 768);
-        let screenshot = debug_server::Screenshot::new(1024, 768, data);
+        use api::ImageDescriptor;
+
+        let desc = ImageDescriptor::new(1024, 768, ImageFormat::BGRA8, true);
+        let data = self.device.read_pixels(&desc);
+        let screenshot = debug_server::Screenshot::new(desc.width, desc.height, data);
 
         serde_json::to_string(&screenshot).unwrap()
     }
 
     #[cfg(not(feature = "debugger"))]
     fn get_passes_for_debugger(&self) -> String {
         // Avoid unused param warning.
         let _ = &self.debug_server;
@@ -2462,27 +2521,27 @@ impl Renderer {
             "Vertical Blur",
             target.vertical_blurs.len(),
         );
         debug_target.add(
             debug_server::BatchKind::Cache,
             "Horizontal Blur",
             target.horizontal_blurs.len(),
         );
-        for (_, batch) in &target.text_run_cache_prims {
+        for (_, batch) in &target.alpha_batcher.text_run_cache_prims {
             debug_target.add(
                 debug_server::BatchKind::Cache,
                 "Text Shadow",
                 batch.len(),
             );
         }
         debug_target.add(
             debug_server::BatchKind::Cache,
             "Lines",
-            target.line_cache_prims.len(),
+            target.alpha_batcher.line_cache_prims.len(),
         );
 
         for batch in target
             .alpha_batcher
             .batch_list
             .opaque_batch_list
             .batches
             .iter()
@@ -2584,16 +2643,23 @@ impl Renderer {
             DebugCommand::FetchPasses => {
                 let json = self.get_passes_for_debugger();
                 self.debug_server.send(json);
             }
             DebugCommand::FetchScreenshot => {
                 let json = self.get_screenshot_for_debugger();
                 self.debug_server.send(json);
             }
+            DebugCommand::SaveCapture(_) |
+            DebugCommand::LoadCapture(_) => {
+                panic!("Capture commands are not welcome here!")
+            }
+            DebugCommand::EnableDualSourceBlending(_) => {
+                panic!("Should be handled by render backend");
+            }
         }
     }
 
     /// Set a callback for handling external images.
     pub fn set_external_image_handler(&mut self, handler: Box<ExternalImageHandler>) {
         self.external_image_handler = Some(handler);
     }
 
@@ -3048,28 +3114,28 @@ impl Renderer {
                         shader.bind(
                             &mut self.device,
                             key.blend_mode,
                             projection,
                             0,
                             &mut self.renderer_errors,
                         );
                     }
+                    BrushBatchKind::Line => {
+                        self.brush_line.bind(
+                            &mut self.device,
+                            key.blend_mode,
+                            projection,
+                            0,
+                            &mut self.renderer_errors,
+                        );
+                    }
                 }
             }
             BatchKind::Transformable(transform_kind, batch_kind) => match batch_kind {
-                TransformBatchKind::Line => {
-                    self.ps_line.bind(
-                        &mut self.device,
-                        transform_kind,
-                        projection,
-                        0,
-                        &mut self.renderer_errors,
-                    );
-                }
                 TransformBatchKind::TextRun(..) => {
                     unreachable!("bug: text batches are special cased");
                 }
                 TransformBatchKind::Image(image_buffer_kind) => {
                     self.ps_image[image_buffer_kind as usize]
                         .as_mut()
                         .expect("Unsupported image shader kind")
                         .bind(
@@ -3155,29 +3221,28 @@ impl Renderer {
             // framebuffer readbacks that are needed for each
             // composite operation in this batch.
             let source = &render_tasks[source_id];
             let backdrop = &render_tasks[task_id];
             let readback = &render_tasks[backdrop_id];
 
             let (readback_rect, readback_layer) = readback.get_target_rect();
             let (backdrop_rect, _) = backdrop.get_target_rect();
-            let content_to_device_scale = TypedScale::<_, _, DevicePixel>::new(1i32);
             let backdrop_screen_origin = match backdrop.kind {
-                RenderTaskKind::Picture(ref task_info) => task_info
-                    .content_origin
-                    .to_i32()
-                    * content_to_device_scale,
+                RenderTaskKind::Picture(ref task_info) => match task_info.content_origin {
+                    ContentOrigin::Local(_) => panic!("bug: composite from a local-space rasterized picture?"),
+                    ContentOrigin::Screen(p) => p,
+                },
                 _ => panic!("bug: composite on non-picture?"),
             };
             let source_screen_origin = match source.kind {
-                RenderTaskKind::Picture(ref task_info) => task_info
-                    .content_origin
-                    .to_i32()
-                    * content_to_device_scale,
+                RenderTaskKind::Picture(ref task_info) => match task_info.content_origin {
+                    ContentOrigin::Local(_) => panic!("bug: composite from a local-space rasterized picture?"),
+                    ContentOrigin::Screen(p) => p,
+                },
                 _ => panic!("bug: composite on non-picture?"),
             };
 
             // Bind the FBO to blit the backdrop to.
             // Called per-instance in case the layer (and therefore FBO)
             // changes. The device will skip the GL call if the requested
             // target is already bound.
             let cache_draw_target = (cache_texture, readback_layer.0 as i32);
@@ -3338,43 +3403,48 @@ impl Renderer {
         self.handle_scaling(render_tasks, &target.scalings, SourceTexture::CacheRGBA8);
 
         // Draw any textrun caches for this target. For now, this
         // is only used to cache text runs that are to be blurred
         // for shadow support. In the future it may be worth
         // considering using this for (some) other text runs, since
         // it removes the overhead of submitting many small glyphs
         // to multiple tiles in the normal text run case.
-        if !target.text_run_cache_prims.is_empty() {
+        if !target.alpha_batcher.text_run_cache_prims.is_empty() {
             self.device.set_blend(true);
             self.device.set_blend_mode_premultiplied_alpha();
 
             let _timer = self.gpu_profile.start_timer(GPU_TAG_CACHE_TEXT_RUN);
             self.cs_text_run
                 .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
-            for (texture_id, instances) in &target.text_run_cache_prims {
+            for (texture_id, instances) in &target.alpha_batcher.text_run_cache_prims {
                 self.draw_instanced_batch(
                     instances,
                     VertexArrayKind::Primitive,
                     &BatchTextures::color(*texture_id),
                     stats,
                 );
             }
         }
-        if !target.line_cache_prims.is_empty() {
+        if !target.alpha_batcher.line_cache_prims.is_empty() {
             // TODO(gw): Technically, we don't need blend for solid
             //           lines. We could check that here?
             self.device.set_blend(true);
             self.device.set_blend_mode_premultiplied_alpha();
 
             let _timer = self.gpu_profile.start_timer(GPU_TAG_CACHE_LINE);
-            self.cs_line
-                .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
+            self.brush_line.bind(
+                &mut self.device,
+                BlendMode::PremultipliedAlpha,
+                projection,
+                0,
+                &mut self.renderer_errors,
+            );
             self.draw_instanced_batch(
-                &target.line_cache_prims,
+                &target.alpha_batcher.line_cache_prims,
                 VertexArrayKind::Primitive,
                 &BatchTextures::no_texture(),
                 stats,
             );
         }
 
         //TODO: record the pixel count for cached primitives
 
@@ -3422,16 +3492,17 @@ impl Renderer {
                 if self.debug_flags.contains(DebugFlags::ALPHA_PRIM_DBG) {
                     let color = match batch.key.blend_mode {
                         BlendMode::None => debug_colors::BLACK,
                         BlendMode::PremultipliedAlpha => debug_colors::GREY,
                         BlendMode::PremultipliedDestOut => debug_colors::SALMON,
                         BlendMode::SubpixelConstantTextColor(..) => debug_colors::GREEN,
                         BlendMode::SubpixelVariableTextColor => debug_colors::RED,
                         BlendMode::SubpixelWithBgColor => debug_colors::BLUE,
+                        BlendMode::SubpixelDualSource => debug_colors::YELLOW,
                     }.into();
                     for item_rect in &batch.item_rects {
                         self.debug.add_rect(item_rect, color);
                     }
                 }
 
                 match batch.key.kind {
                     BatchKind::Transformable(transform_kind, TransformBatchKind::TextRun(glyph_format)) => {
@@ -3461,16 +3532,35 @@ impl Renderer {
 
                                 self.draw_instanced_batch(
                                     &batch.instances,
                                     VertexArrayKind::Primitive,
                                     &batch.key.textures,
                                     stats,
                                 );
                             }
+                            BlendMode::SubpixelDualSource => {
+                                self.device.set_blend_mode_subpixel_dual_source();
+
+                                self.ps_text_run_dual_source.bind(
+                                    &mut self.device,
+                                    glyph_format,
+                                    transform_kind,
+                                    projection,
+                                    TextShaderMode::SubpixelDualSource,
+                                    &mut self.renderer_errors,
+                                );
+
+                                self.draw_instanced_batch(
+                                    &batch.instances,
+                                    VertexArrayKind::Primitive,
+                                    &batch.key.textures,
+                                    stats,
+                                );
+                            }
                             BlendMode::SubpixelConstantTextColor(color) => {
                                 self.device.set_blend_mode_subpixel_constant_text_color(color);
 
                                 self.ps_text_run.bind(
                                     &mut self.device,
                                     glyph_format,
                                     transform_kind,
                                     projection,
@@ -3601,17 +3691,18 @@ impl Renderer {
                                     self.device.set_blend_mode_premultiplied_alpha();
                                 }
                                 BlendMode::PremultipliedDestOut => {
                                     self.device.set_blend(true);
                                     self.device.set_blend_mode_premultiplied_dest_out();
                                 }
                                 BlendMode::SubpixelConstantTextColor(..) |
                                 BlendMode::SubpixelVariableTextColor |
-                                BlendMode::SubpixelWithBgColor => {
+                                BlendMode::SubpixelWithBgColor |
+                                BlendMode::SubpixelDualSource => {
                                     unreachable!("bug: subpx text handled earlier");
                                 }
                             }
                             prev_blend_mode = batch.key.blend_mode;
                         }
 
                         self.submit_batch(
                             &batch.key,
@@ -4013,20 +4104,27 @@ impl Renderer {
         // Now re-allocate the space for the rest of the target textures.
         for pass in &mut frame.passes {
             if let RenderPassKind::OffScreen { ref mut alpha, ref mut color } = pass.kind {
                 self.prepare_target_list(alpha, false);
                 self.prepare_target_list(color, false);
             }
         }
 
-        self.node_data_texture
-            .update(&mut self.device, &mut frame.node_data);
-        self.device
-            .bind_texture(TextureSampler::ClipScrollNodes, &self.node_data_texture.texture);
+        self.node_data_texture.update(&mut self.device, &mut frame.node_data);
+        self.device.bind_texture(TextureSampler::ClipScrollNodes, &self.node_data_texture.texture);
+
+        self.local_clip_rects_texture.update(
+            &mut self.device,