Bug 1329574 - update WR to ee97fcaa4ee8eda89f7bd67b1a8920d3382af294. r=kats?
authorJerryShih <hshih@mozilla.com>
Fri, 13 Jan 2017 20:02:18 +0800
changeset 342073 76ee21f3118f5e10e0362c99e7052a68034d861a
parent 342072 1d71945a1e748f114cd71d0aa5e0ed1d88bf8055
child 342074 1351dcac4ed69f4ce5338a0ca40a4cac6b5b9955
push id31345
push userkwierso@gmail.com
push dateFri, 10 Feb 2017 20:35:09 +0000
treeherdermozilla-central@a288fe35e494 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerskats
bugs1329574
milestone53.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1329574 - update WR to ee97fcaa4ee8eda89f7bd67b1a8920d3382af294. r=kats? MozReview-Commit-ID: JkmecRWImE2
README.webrender
gfx/webrender/Cargo.toml
gfx/webrender/build.rs
gfx/webrender/res/clip_shared.glsl
gfx/webrender/res/cs_clip_copy.fs.glsl
gfx/webrender/res/cs_clip_image.fs.glsl
gfx/webrender/res/cs_clip_image.vs.glsl
gfx/webrender/res/cs_clip_rectangle.fs.glsl
gfx/webrender/res/prim_shared.glsl
gfx/webrender/res/ps_angle_gradient.vs.glsl
gfx/webrender/res/ps_blend.vs.glsl
gfx/webrender/res/ps_border.fs.glsl
gfx/webrender/res/ps_border.glsl
gfx/webrender/res/ps_border.vs.glsl
gfx/webrender/res/ps_box_shadow.vs.glsl
gfx/webrender/res/ps_cache_image.vs.glsl
gfx/webrender/res/ps_composite.vs.glsl
gfx/webrender/res/ps_gradient.vs.glsl
gfx/webrender/res/ps_image.vs.glsl
gfx/webrender/res/ps_rectangle.vs.glsl
gfx/webrender/res/ps_text_run.vs.glsl
gfx/webrender/res/ps_yuv_image.vs.glsl
gfx/webrender/src/device.rs
gfx/webrender/src/frame.rs
gfx/webrender/src/internal_types.rs
gfx/webrender/src/lib.rs
gfx/webrender/src/mask_cache.rs
gfx/webrender/src/platform/windows/font.rs
gfx/webrender/src/prim_store.rs
gfx/webrender/src/record.rs
gfx/webrender/src/render_backend.rs
gfx/webrender/src/renderer.rs
gfx/webrender/src/resource_cache.rs
gfx/webrender/src/scene.rs
gfx/webrender/src/texture_cache.rs
gfx/webrender/src/tiling.rs
gfx/webrender_traits/src/display_item.rs
gfx/webrender_traits/src/types.rs
--- a/README.webrender
+++ b/README.webrender
@@ -46,9 +46,9 @@ 8) Build and test. You may need to make 
    features yet, just get the build working with the minimal changes.
 9) Commit the changes locally from step 7, and push everything to the
    graphics branch.
 10) Now you have an update webrender with the new features you wanted,
    so you can write gecko code against them.
 
 Yes, this is somewhat painful. It used to be worse. :)
 
-Latest Commit: 9839014c9a7269494fb40c1300f9e686738d42a3
+Latest Commit: ee97fcaa4ee8eda89f7bd67b1a8920d3382af294
--- a/gfx/webrender/Cargo.toml
+++ b/gfx/webrender/Cargo.toml
@@ -15,27 +15,27 @@ serde_derive = ["webrender_traits/serde_
 
 [dependencies]
 app_units = "0.3"
 bincode = "0.6"
 bit-set = "0.4"
 byteorder = "0.5"
 euclid = "0.10.3"
 fnv="1.0"
-gleam = "0.2.29"
+gleam = "0.2.30"
 lazy_static = "0.2"
 log = "0.3"
 num-traits = "0.1.32"
 offscreen_gl_context = {version = "0.5", features = ["serde_serialization", "osmesa"]}
 time = "0.1"
 threadpool = "1.3.2"
 webrender_traits = {path = "../webrender_traits", default-features = false}
 bitflags = "0.7"
 
 [target.'cfg(any(target_os = "android", all(unix, not(target_os = "macos"))))'.dependencies]
 freetype = {version = "0.1.2", default-features = false}
 
 [target.'cfg(target_os = "windows")'.dependencies]
-dwrote = "0.1.1"
+dwrote = "0.1.4"
 
 [target.'cfg(target_os = "macos")'.dependencies]
 core-graphics = "0.4.1"
 core-text = "2.0"
--- a/gfx/webrender/build.rs
+++ b/gfx/webrender/build.rs
@@ -33,16 +33,17 @@ fn write_shaders(glsl_files: Vec<PathBuf
 }
 
 fn main() {
     let out_dir = env::var("OUT_DIR").unwrap_or("out".to_owned());
 
     let shaders_file = Path::new(&out_dir).join("shaders.rs");
     let mut glsl_files = vec![];
 
+    println!("cargo:rerun-if-changed=res");
     let res_dir = Path::new("res");
     for entry in read_dir(res_dir).unwrap() {
         let entry = entry.unwrap();
         let path = entry.path();
         if entry.file_name().to_str().unwrap().ends_with(".glsl") {
             glsl_files.push(path.to_owned());
         }
     }
--- a/gfx/webrender/res/clip_shared.glsl
+++ b/gfx/webrender/res/clip_shared.glsl
@@ -23,17 +23,17 @@ CacheClipInstance fetch_clip_item(int in
     cci.render_task_index = aClipRenderTaskIndex;
     cci.layer_index = aClipLayerIndex;
     cci.data_index = aClipDataIndex;
     cci.base_task_index = aClipBaseTaskIndex;
 
     return cci;
 }
 
-// The transformed vertex function that always covers the whole whole clip area,
+// The transformed vertex function that always covers the whole clip area,
 // which is the intersection of all clip instances of a given primitive
 TransformVertexInfo write_clip_tile_vertex(vec4 local_clip_rect,
                                            Layer layer,
                                            ClipArea area) {
     vec2 lp0_base = local_clip_rect.xy;
     vec2 lp1_base = local_clip_rect.xy + local_clip_rect.zw;
 
     vec2 lp0 = clamp_rect(lp0_base, layer.local_clip_rect);
--- a/gfx/webrender/res/cs_clip_copy.fs.glsl
+++ b/gfx/webrender/res/cs_clip_copy.fs.glsl
@@ -1,8 +1,8 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void main(void) {
     float alpha = texelFetch(sCache, ivec3(vClipMaskUv), 0).a;
-    oFragColor = vec4(1.0, 1.0, 1.0, alpha);
+    oFragColor = vec4(alpha, 0.0, 0.0, 1.0);
 }
--- a/gfx/webrender/res/cs_clip_image.fs.glsl
+++ b/gfx/webrender/res/cs_clip_image.fs.glsl
@@ -7,10 +7,10 @@ void main(void) {
     vec2 local_pos = init_transform_fs(vPos, vLocalRect, alpha);
 
     bool repeat_mask = false; //TODO
     vec2 clamped_mask_uv = repeat_mask ? fract(vClipMaskUv.xy) :
         clamp(vClipMaskUv.xy, vec2(0.0, 0.0), vec2(1.0, 1.0));
     vec2 source_uv = clamped_mask_uv * vClipMaskUvRect.zw + vClipMaskUvRect.xy;
     float clip_alpha = texture(sMask, source_uv).r; //careful: texture has type A8
 
-    oFragColor = vec4(1.0, 1.0, 1.0, min(alpha, clip_alpha));
+    oFragColor = vec4(min(alpha, clip_alpha), 1.0, 1.0, 1.0);
 }
--- a/gfx/webrender/res/cs_clip_image.vs.glsl
+++ b/gfx/webrender/res/cs_clip_image.vs.glsl
@@ -28,11 +28,11 @@ void main(void) {
 
     TransformVertexInfo vi = write_clip_tile_vertex(local_rect,
                                                     layer,
                                                     area);
     vLocalRect = vi.clipped_local_rect;
     vPos = vi.local_pos;
 
     vClipMaskUv = vec3((vPos.xy / vPos.z - local_rect.xy) / local_rect.zw, 0.0);
-    vec2 texture_size = textureSize(sMask, 0);
+    vec2 texture_size = vec2(textureSize(sMask, 0));
     vClipMaskUvRect = mask.uv_rect / texture_size.xyxy;
 }
--- a/gfx/webrender/res/cs_clip_rectangle.fs.glsl
+++ b/gfx/webrender/res/cs_clip_rectangle.fs.glsl
@@ -35,10 +35,10 @@ float rounded_rect(vec2 pos) {
 
 
 void main(void) {
     float alpha = 1.f;
     vec2 local_pos = init_transform_fs(vPos, vLocalRect, alpha);
 
     float clip_alpha = rounded_rect(local_pos);
 
-    oFragColor = vec4(1.0, 1.0, 1.0, min(alpha, clip_alpha));
+    oFragColor = vec4(min(alpha, clip_alpha), 0.0, 0.0, 1.0);
 }
--- a/gfx/webrender/res/prim_shared.glsl
+++ b/gfx/webrender/res/prim_shared.glsl
@@ -1,17 +1,19 @@
 #line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-#ifdef GL_FRAGMENT_PRECISION_HIGH
-precision highp sampler2DArray;
-#else
-precision mediump sampler2DArray;
+#if defined(GL_ES) && GL_ES == 1
+    #ifdef GL_FRAGMENT_PRECISION_HIGH
+    precision highp sampler2DArray;
+    #else
+    precision mediump sampler2DArray;
+    #endif
 #endif
 
 #define PST_TOP_LEFT     0
 #define PST_TOP          1
 #define PST_TOP_RIGHT    2
 #define PST_RIGHT        3
 #define PST_BOTTOM_RIGHT 4
 #define PST_BOTTOM       5
@@ -21,28 +23,16 @@ precision mediump sampler2DArray;
 #define BORDER_LEFT      0
 #define BORDER_TOP       1
 #define BORDER_RIGHT     2
 #define BORDER_BOTTOM    3
 
 #define UV_NORMALIZED    uint(0)
 #define UV_PIXEL         uint(1)
 
-// Border styles as defined in webrender_traits/types.rs
-#define BORDER_STYLE_NONE         0
-#define BORDER_STYLE_SOLID        1
-#define BORDER_STYLE_DOUBLE       2
-#define BORDER_STYLE_DOTTED       3
-#define BORDER_STYLE_DASHED       4
-#define BORDER_STYLE_HIDDEN       5
-#define BORDER_STYLE_GROOVE       6
-#define BORDER_STYLE_RIDGE        7
-#define BORDER_STYLE_INSET        8
-#define BORDER_STYLE_OUTSET       9
-
 #define MAX_STOPS_PER_ANGLE_GRADIENT 8
 
 uniform sampler2DArray sCache;
 
 flat varying vec4 vClipMaskUvBounds;
 varying vec3 vClipMaskUv;
 
 #ifdef WR_VERTEX_SHADER
@@ -68,16 +58,17 @@ uniform sampler2D sResourceRects;
 // Instanced attributes
 in int aGlobalPrimId;
 in int aPrimitiveAddress;
 in int aTaskIndex;
 in int aClipTaskIndex;
 in int aLayerIndex;
 in int aElementIndex;
 in ivec2 aUserData;
+in int aZIndex;
 
 // get_fetch_uv is a macro to work around a macOS Intel driver parsing bug.
 // TODO: convert back to a function once the driver issues are resolved, if ever.
 // https://github.com/servo/webrender/pull/623
 // https://github.com/servo/servo/issues/13953
 #define get_fetch_uv(i, vpi)  ivec2(vpi * (i % (WR_MAX_VERTEX_TEXTURE_WIDTH/vpi)), i / (WR_MAX_VERTEX_TEXTURE_WIDTH/vpi))
 
 ivec2 get_fetch_uv_1(int index) {
@@ -226,40 +217,16 @@ Glyph fetch_glyph(int index) {
 
     ivec2 uv = get_fetch_uv_1(index);
 
     glyph.offset = texelFetchOffset(sData16, uv, 0, ivec2(0, 0));
 
     return glyph;
 }
 
-struct Border {
-    vec4 style;
-    vec4 widths;
-    vec4 colors[4];
-    vec4 radii[2];
-};
-
-Border fetch_border(int index) {
-    Border border;
-
-    ivec2 uv = get_fetch_uv_8(index);
-
-    border.style = texelFetchOffset(sData128, uv, 0, ivec2(0, 0));
-    border.widths = texelFetchOffset(sData128, uv, 0, ivec2(1, 0));
-    border.colors[0] = texelFetchOffset(sData128, uv, 0, ivec2(2, 0));
-    border.colors[1] = texelFetchOffset(sData128, uv, 0, ivec2(3, 0));
-    border.colors[2] = texelFetchOffset(sData128, uv, 0, ivec2(4, 0));
-    border.colors[3] = texelFetchOffset(sData128, uv, 0, ivec2(5, 0));
-    border.radii[0] = texelFetchOffset(sData128, uv, 0, ivec2(6, 0));
-    border.radii[1] = texelFetchOffset(sData128, uv, 0, ivec2(7, 0));
-
-    return border;
-}
-
 vec4 fetch_instance_geometry(int index) {
     ivec2 uv = get_fetch_uv_1(index);
 
     vec4 rect = texelFetchOffset(sData16, uv, 0, ivec2(0, 0));
 
     return rect;
 }
 
@@ -281,29 +248,31 @@ PrimitiveGeometry fetch_prim_geometry(in
 
 struct PrimitiveInstance {
     int global_prim_index;
     int specific_prim_index;
     int render_task_index;
     int clip_task_index;
     int layer_index;
     int sub_index;
+    int z;
     ivec2 user_data;
 };
 
 PrimitiveInstance fetch_prim_instance() {
     PrimitiveInstance pi;
 
     pi.global_prim_index = aGlobalPrimId;
     pi.specific_prim_index = aPrimitiveAddress;
     pi.render_task_index = aTaskIndex;
     pi.clip_task_index = aClipTaskIndex;
     pi.layer_index = aLayerIndex;
     pi.sub_index = aElementIndex;
     pi.user_data = aUserData;
+    pi.z = aZIndex;
 
     return pi;
 }
 
 struct CachePrimitiveInstance {
     int global_prim_index;
     int specific_prim_index;
     int render_task_index;
@@ -331,32 +300,34 @@ struct Primitive {
     ClipArea clip_area;
     vec4 local_rect;
     vec4 local_clip_rect;
     int prim_index;
     // when sending multiple primitives of the same type (e.g. border segments)
     // this index allows the vertex shader to recognize the difference
     int sub_index;
     ivec2 user_data;
+    float z;
 };
 
 Primitive load_primitive_custom(PrimitiveInstance pi) {
     Primitive prim;
 
     prim.layer = fetch_layer(pi.layer_index);
     prim.tile = fetch_tile(pi.render_task_index);
     prim.clip_area = fetch_clip_area(pi.clip_task_index);
 
     PrimitiveGeometry pg = fetch_prim_geometry(pi.global_prim_index);
     prim.local_rect = pg.local_rect;
     prim.local_clip_rect = pg.local_clip_rect;
 
     prim.prim_index = pi.specific_prim_index;
     prim.sub_index = pi.sub_index;
     prim.user_data = pi.user_data;
+    prim.z = float(pi.z);
 
     return prim;
 }
 
 Primitive load_primitive() {
     PrimitiveInstance pi = fetch_prim_instance();
 
     return load_primitive_custom(pi);
@@ -419,16 +390,17 @@ struct Rect {
 struct VertexInfo {
     Rect local_rect;
     vec2 local_clamped_pos;
     vec2 global_clamped_pos;
 };
 
 VertexInfo write_vertex(vec4 instance_rect,
                         vec4 local_clip_rect,
+                        float z,
                         Layer layer,
                         Tile tile) {
     vec2 p0 = floor(0.5 + instance_rect.xy * uDevicePixelRatio) / uDevicePixelRatio;
     vec2 p1 = floor(0.5 + (instance_rect.xy + instance_rect.zw) * uDevicePixelRatio) / uDevicePixelRatio;
 
     vec2 local_pos = mix(p0, p1, aPosition.xy);
 
     vec2 cp0 = floor(0.5 + local_clip_rect.xy * uDevicePixelRatio) / uDevicePixelRatio;
@@ -446,32 +418,33 @@ VertexInfo write_vertex(vec4 instance_re
                              tile.screen_origin_task_origin.xy,
                              tile.screen_origin_task_origin.xy + tile.size_target_index.xy);
 
     vec4 local_clamped_pos = layer.inv_transform * vec4(clamped_pos / uDevicePixelRatio, world_pos.z, 1);
     local_clamped_pos.xyz /= local_clamped_pos.w;
 
     vec2 final_pos = clamped_pos + tile.screen_origin_task_origin.zw - tile.screen_origin_task_origin.xy;
 
-    gl_Position = uTransform * vec4(final_pos, 0, 1);
+    gl_Position = uTransform * vec4(final_pos, z, 1.0);
 
     VertexInfo vi = VertexInfo(Rect(p0, p1), local_clamped_pos.xy, clamped_pos.xy);
     return vi;
 }
 
 #ifdef WR_FEATURE_TRANSFORM
 
 struct TransformVertexInfo {
     vec3 local_pos;
     vec2 global_clamped_pos;
     vec4 clipped_local_rect;
 };
 
 TransformVertexInfo write_transform_vertex(vec4 instance_rect,
                                            vec4 local_clip_rect,
+                                           float z,
                                            Layer layer,
                                            Tile tile) {
     vec2 lp0_base = instance_rect.xy;
     vec2 lp1_base = instance_rect.xy + instance_rect.zw;
 
     vec2 lp0 = clamp_rect(clamp_rect(lp0_base, local_clip_rect),
                           layer.local_clip_rect);
     vec2 lp1 = clamp_rect(clamp_rect(lp1_base, local_clip_rect),
@@ -513,17 +486,17 @@ TransformVertexInfo write_transform_vert
                            aPosition.xy);
 
     // compute the point position in side the layer, in CSS space
     vec4 layer_pos = get_layer_pos(clamped_pos / uDevicePixelRatio, layer);
 
     // apply the task offset
     vec2 final_pos = clamped_pos + tile.screen_origin_task_origin.zw - tile.screen_origin_task_origin.xy;
 
-    gl_Position = uTransform * vec4(final_pos, 0.0, 1.0);
+    gl_Position = uTransform * vec4(final_pos, z, 1.0);
 
     return TransformVertexInfo(layer_pos.xyw, clamped_pos, clipped_local_rect);
 }
 
 #endif //WR_FEATURE_TRANSFORM
 
 struct ResourceRect {
     vec4 uv_rect;
@@ -658,11 +631,11 @@ vec2 init_transform_fs(vec3 local_pos, v
 
 float do_clip() {
     // anything outside of the mask is considered transparent
     bvec4 inside = lessThanEqual(
         vec4(vClipMaskUvBounds.xy, vClipMaskUv.xy),
         vec4(vClipMaskUv.xy, vClipMaskUvBounds.zw));
     // check for the dummy bounds, which are given to the opaque objects
     return vClipMaskUvBounds.xy == vClipMaskUvBounds.zw ? 1.0:
-        all(inside) ? textureLod(sCache, vClipMaskUv, 0.0).a : 0.0;
+        all(inside) ? textureLod(sCache, vClipMaskUv, 0.0).r : 0.0;
 }
 #endif //WR_FRAGMENT_SHADER
--- a/gfx/webrender/res/ps_angle_gradient.vs.glsl
+++ b/gfx/webrender/res/ps_angle_gradient.vs.glsl
@@ -4,16 +4,17 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void main(void) {
     Primitive prim = load_primitive();
     Gradient gradient = fetch_gradient(prim.prim_index);
 
     VertexInfo vi = write_vertex(prim.local_rect,
                                  prim.local_clip_rect,
+                                 prim.z,
                                  prim.layer,
                                  prim.tile);
 
     vStopCount = int(prim.user_data.x);
     vPos = vi.local_clamped_pos;
 
     // Snap the start/end points to device pixel units.
     // I'm not sure this is entirely correct, but the
--- a/gfx/webrender/res/ps_blend.vs.glsl
+++ b/gfx/webrender/res/ps_blend.vs.glsl
@@ -1,25 +1,27 @@
 #line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 struct Blend {
     ivec4 src_id_target_id_op_amount;
+    int z;
 };
 
 Blend fetch_blend() {
     PrimitiveInstance pi = fetch_prim_instance();
 
     Blend blend;
     blend.src_id_target_id_op_amount = ivec4(pi.user_data.x,
                                              pi.render_task_index,
                                              pi.sub_index,
                                              pi.user_data.y);
+    blend.z = pi.z;
 
     return blend;
 }
 
 void main(void) {
     Blend blend = fetch_blend();
     Tile src = fetch_tile(blend.src_id_target_id_op_amount.x);
     Tile dest = fetch_tile(blend.src_id_target_id_op_amount.y);
@@ -33,12 +35,12 @@ void main(void) {
                          aPosition.xy);
 
     vec2 texture_size = vec2(textureSize(sCache, 0));
     vec2 st0 = src.screen_origin_task_origin.zw / texture_size;
     vec2 st1 = (src.screen_origin_task_origin.zw + src.size_target_index.xy) / texture_size;
     vUv = vec3(mix(st0, st1, aPosition.xy), src.size_target_index.z);
 
     vOp = blend.src_id_target_id_op_amount.z;
-    vAmount = blend.src_id_target_id_op_amount.w / 65535.0;
+    vAmount = float(blend.src_id_target_id_op_amount.w) / 65535.0;
 
-    gl_Position = uTransform * vec4(local_pos, 0, 1);
+    gl_Position = uTransform * vec4(local_pos, blend.z, 1.0);
 }
--- a/gfx/webrender/res/ps_border.fs.glsl
+++ b/gfx/webrender/res/ps_border.fs.glsl
@@ -1,14 +1,26 @@
 #line 1
 
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+// Border styles as defined in webrender_traits/types.rs
+#define BORDER_STYLE_NONE         0
+#define BORDER_STYLE_SOLID        1
+#define BORDER_STYLE_DOUBLE       2
+#define BORDER_STYLE_DOTTED       3
+#define BORDER_STYLE_DASHED       4
+#define BORDER_STYLE_HIDDEN       5
+#define BORDER_STYLE_GROOVE       6
+#define BORDER_STYLE_RIDGE        7
+#define BORDER_STYLE_INSET        8
+#define BORDER_STYLE_OUTSET       9
+
 void discard_pixels_in_rounded_borders(vec2 local_pos) {
   float distanceFromRef = distance(vRefPoint, local_pos);
   if (vRadii.x > 0.0 && (distanceFromRef > vRadii.x || distanceFromRef < vRadii.z)) {
       discard;
   }
 }
 
 vec4 get_fragment_color(float distanceFromMixLine, float pixelsPerFragment) {
@@ -286,50 +298,100 @@ void draw_solid_border(float distanceFro
       break;
     }
     default:
       oFragColor = vHorizontalColor;
       discard_pixels_in_rounded_borders(localPos);
   }
 }
 
+vec4 draw_mixed_edge(float distance, float border_len, vec4 color, vec2 brightness_mod) {
+  float modulator = distance / border_len > 0.5 ? brightness_mod.x : brightness_mod.y;
+  return vec4(color.xyz * modulator, color.a);
+}
+
+void draw_mixed_border(float distanceFromMixLine, float distanceFromMiddle, vec2 localPos, vec2 brightness_mod) {
+  switch (vBorderPart) {
+    case PST_TOP_LEFT:
+    case PST_TOP_RIGHT:
+    case PST_BOTTOM_LEFT:
+    case PST_BOTTOM_RIGHT: {
+      // This is the conversion factor for transformations and device pixel scaling.
+      float pixelsPerFragment = length(fwidth(localPos.xy));
+      vec4 color = get_fragment_color(distanceFromMixLine, pixelsPerFragment);
+
+      float distance = distance(vRefPoint, localPos) - vRadii.z;
+      float length = vRadii.x - vRadii.z;
+      if (distanceFromMiddle < 0.0) {
+        distance = length - distance;
+      }
+
+      oFragColor = 0.0 <= distance && distance <= length ?
+        draw_mixed_edge(distance, length, color, brightness_mod) : vec4(0.0, 0.0, 0.0, 0.0);
+      break;
+    }
+    case PST_BOTTOM:
+    case PST_TOP: {
+      oFragColor = draw_mixed_edge(localPos.y - vPieceRect.y, vPieceRect.w, vVerticalColor, brightness_mod);
+      break;
+    }
+    case PST_LEFT:
+    case PST_RIGHT: {
+      oFragColor = draw_mixed_edge(localPos.x - vPieceRect.x, vPieceRect.z, vHorizontalColor, brightness_mod);
+      break;
+    }
+  }
+}
+
 // TODO: Investigate performance of this shader and see
 //       if it's worthwhile splitting it / removing branches etc.
 void main(void) {
 #ifdef WR_FEATURE_TRANSFORM
     float alpha = 0.0;
     vec2 local_pos = init_transform_fs(vLocalPos, vLocalRect, alpha);
 #else
     vec2 local_pos = vLocalPos;
 #endif
 
 #ifdef WR_FEATURE_TRANSFORM
     // TODO(gw): Support other border styles for transformed elements.
     float distance_from_mix_line = (local_pos.x - vPieceRect.x) * vPieceRect.w -
                                    (local_pos.y - vPieceRect.y) * vPieceRect.z;
     distance_from_mix_line /= vPieceRectHypotenuseLength;
+    float distance_from_middle = (local_pos.x - vLocalRect.x) +
+                                 (local_pos.y - vLocalRect.y) -
+                                 0.5 * (vLocalRect.z + vLocalRect.w);
 #else
     float distance_from_mix_line = vDistanceFromMixLine;
+    float distance_from_middle = vDistanceFromMiddle;
 #endif
 
+    vec2 brightness_mod = vec2(0.7, 1.3);
+
     switch (vBorderStyle) {
         case BORDER_STYLE_DASHED:
         case BORDER_STYLE_DOTTED:
           draw_dashed_or_dotted_border(local_pos, distance_from_mix_line);
           break;
         case BORDER_STYLE_DOUBLE:
           draw_double_border(distance_from_mix_line, local_pos);
           break;
         case BORDER_STYLE_OUTSET:
         case BORDER_STYLE_INSET:
         case BORDER_STYLE_SOLID:
         case BORDER_STYLE_NONE:
           draw_solid_border(distance_from_mix_line, local_pos);
           break;
+        case BORDER_STYLE_GROOVE:
+          draw_mixed_border(distance_from_mix_line, distance_from_middle, local_pos, brightness_mod.yx);
+          break;
+        case BORDER_STYLE_RIDGE:
+          draw_mixed_border(distance_from_mix_line, distance_from_middle, local_pos, brightness_mod.xy);
+          break;
+        case BORDER_STYLE_HIDDEN:
         default:
           discard;
-
     }
 
 #ifdef WR_FEATURE_TRANSFORM
     oFragColor *= vec4(1.0, 1.0, 1.0, alpha);
 #endif
 }
--- a/gfx/webrender/res/ps_border.glsl
+++ b/gfx/webrender/res/ps_border.glsl
@@ -23,9 +23,11 @@ flat varying vec4 vPieceRect;
 varying vec3 vLocalPos;     // The clamped position in local space.
 flat varying float vPieceRectHypotenuseLength;
 #else
 varying vec2 vLocalPos;     // The clamped position in local space.
 
 // These two are interpolated
 varying float vDistanceFromMixLine;  // This is the distance from the line where two colors
                                      // meet in border corners.
+varying float vDistanceFromMiddle;   // This is the distance from the line between the top
+                                     // left corner and the bottom right.
 #endif
--- a/gfx/webrender/res/ps_border.vs.glsl
+++ b/gfx/webrender/res/ps_border.vs.glsl
@@ -1,13 +1,37 @@
 #line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+struct Border {
+    vec4 style;
+    vec4 widths;
+    vec4 colors[4];
+    vec4 radii[2];
+};
+
+Border fetch_border(int index) {
+    Border border;
+
+    ivec2 uv = get_fetch_uv_8(index);
+
+    border.style = texelFetchOffset(sData128, uv, 0, ivec2(0, 0));
+    border.widths = texelFetchOffset(sData128, uv, 0, ivec2(1, 0));
+    border.colors[0] = texelFetchOffset(sData128, uv, 0, ivec2(2, 0));
+    border.colors[1] = texelFetchOffset(sData128, uv, 0, ivec2(3, 0));
+    border.colors[2] = texelFetchOffset(sData128, uv, 0, ivec2(4, 0));
+    border.colors[3] = texelFetchOffset(sData128, uv, 0, ivec2(5, 0));
+    border.radii[0] = texelFetchOffset(sData128, uv, 0, ivec2(6, 0));
+    border.radii[1] = texelFetchOffset(sData128, uv, 0, ivec2(7, 0));
+
+    return border;
+}
+
 void main(void) {
     Primitive prim = load_primitive();
     Border border = fetch_border(prim.prim_index);
     int sub_part = prim.sub_index;
 
     vec2 tl_outer = prim.local_rect.xy;
     vec2 tl_inner = tl_outer + vec2(max(border.radii[0].x, border.widths.x),
                                     max(border.radii[0].y, border.widths.y));
@@ -26,133 +50,116 @@ void main(void) {
                          prim.local_rect.y + prim.local_rect.w);
     vec2 bl_inner = bl_outer + vec2(max(border.radii[1].z, border.widths.x),
                                     -max(border.radii[1].w, border.widths.w));
 
     vec4 segment_rect;
     switch (sub_part) {
         case PST_TOP_LEFT:
             segment_rect = vec4(tl_outer, tl_inner - tl_outer);
+            vBorderStyle = int(border.style.x);
+            vHorizontalColor = border.colors[BORDER_LEFT];
+            vVerticalColor = border.colors[BORDER_TOP];
+            vRadii = vec4(border.radii[0].xy,
+                          border.radii[0].xy - border.widths.xy);
             break;
         case PST_TOP_RIGHT:
             segment_rect = vec4(tr_inner.x,
                                 tr_outer.y,
                                 tr_outer.x - tr_inner.x,
                                 tr_inner.y - tr_outer.y);
+            vBorderStyle = int(border.style.y);
+            vHorizontalColor = border.colors[BORDER_TOP];
+            vVerticalColor = border.colors[BORDER_RIGHT];
+            vRadii = vec4(border.radii[0].zw,
+                          border.radii[0].zw - border.widths.zy);
             break;
         case PST_BOTTOM_RIGHT:
             segment_rect = vec4(br_inner, br_outer - br_inner);
+            vBorderStyle = int(border.style.z);
+            vHorizontalColor = border.colors[BORDER_BOTTOM];
+            vVerticalColor = border.colors[BORDER_RIGHT];
+            vRadii = vec4(border.radii[1].xy,
+                          border.radii[1].xy - border.widths.zw);
             break;
         case PST_BOTTOM_LEFT:
             segment_rect = vec4(bl_outer.x,
                                 bl_inner.y,
                                 bl_inner.x - bl_outer.x,
                                 bl_outer.y - bl_inner.y);
+            vBorderStyle = int(border.style.w);
+            vHorizontalColor = border.colors[BORDER_BOTTOM];
+            vVerticalColor = border.colors[BORDER_LEFT];
+            vRadii = vec4(border.radii[1].zw,
+                          border.radii[1].zw - border.widths.xw);
             break;
         case PST_LEFT:
             segment_rect = vec4(tl_outer.x,
                                 tl_inner.y,
                                 border.widths.x,
                                 bl_inner.y - tl_inner.y);
+            vBorderStyle = int(border.style.x);
+            vHorizontalColor = border.colors[BORDER_LEFT];
+            vVerticalColor = border.colors[BORDER_LEFT];
+            vRadii = vec4(0.0);
             break;
         case PST_RIGHT:
             segment_rect = vec4(tr_outer.x - border.widths.z,
                                 tr_inner.y,
                                 border.widths.z,
                                 br_inner.y - tr_inner.y);
+            vBorderStyle = int(border.style.z);
+            vHorizontalColor = border.colors[BORDER_RIGHT];
+            vVerticalColor = border.colors[BORDER_RIGHT];
+            vRadii = vec4(0.0);
             break;
         case PST_BOTTOM:
             segment_rect = vec4(bl_inner.x,
                                 bl_outer.y - border.widths.w,
                                 br_inner.x - bl_inner.x,
                                 border.widths.w);
+            vBorderStyle = int(border.style.w);
+            vHorizontalColor = border.colors[BORDER_BOTTOM];
+            vVerticalColor = border.colors[BORDER_BOTTOM];
+            vRadii = vec4(0.0);
             break;
         case PST_TOP:
             segment_rect = vec4(tl_inner.x,
                                 tl_outer.y,
                                 tr_inner.x - tl_inner.x,
                                 border.widths.y);
+            vBorderStyle = int(border.style.y);
+            vHorizontalColor = border.colors[BORDER_TOP];
+            vVerticalColor = border.colors[BORDER_TOP];
+            vRadii = vec4(0.0);
             break;
     }
 
 #ifdef WR_FEATURE_TRANSFORM
     TransformVertexInfo vi = write_transform_vertex(segment_rect,
                                                     prim.local_clip_rect,
+                                                    prim.z,
                                                     prim.layer,
                                                     prim.tile);
     vLocalPos = vi.local_pos;
 
     // Local space
     vLocalRect = vi.clipped_local_rect;
 #else
     VertexInfo vi = write_vertex(segment_rect,
                                  prim.local_clip_rect,
+                                 prim.z,
                                  prim.layer,
                                  prim.tile);
     vLocalPos = vi.local_clamped_pos.xy;
 
     // Local space
     vLocalRect = prim.local_rect;
 #endif
 
-    switch (sub_part) {
-        case PST_LEFT:
-            vBorderStyle = int(border.style.x);
-            vHorizontalColor = border.colors[BORDER_LEFT];
-            vVerticalColor = border.colors[BORDER_LEFT];
-            vRadii = vec4(0.0);
-            break;
-        case PST_TOP_LEFT:
-            vBorderStyle = int(border.style.x);
-            vHorizontalColor = border.colors[BORDER_LEFT];
-            vVerticalColor = border.colors[BORDER_TOP];
-            vRadii = vec4(border.radii[0].xy,
-                          border.radii[0].xy - border.widths.xy);
-            break;
-        case PST_TOP:
-            vBorderStyle = int(border.style.y);
-            vHorizontalColor = border.colors[BORDER_TOP];
-            vVerticalColor = border.colors[BORDER_TOP];
-            vRadii = vec4(0.0);
-            break;
-        case PST_TOP_RIGHT:
-            vBorderStyle = int(border.style.y);
-            vHorizontalColor = border.colors[BORDER_TOP];
-            vVerticalColor = border.colors[BORDER_RIGHT];
-            vRadii = vec4(border.radii[0].zw,
-                          border.radii[0].zw - border.widths.zy);
-            break;
-        case PST_RIGHT:
-            vBorderStyle = int(border.style.z);
-            vHorizontalColor = border.colors[BORDER_RIGHT];
-            vVerticalColor = border.colors[BORDER_RIGHT];
-            vRadii = vec4(0.0);
-            break;
-        case PST_BOTTOM_RIGHT:
-            vBorderStyle = int(border.style.z);
-            vHorizontalColor = border.colors[BORDER_BOTTOM];
-            vVerticalColor = border.colors[BORDER_RIGHT];
-            vRadii = vec4(border.radii[1].xy,
-                          border.radii[1].xy - border.widths.zw);
-            break;
-        case PST_BOTTOM:
-            vBorderStyle = int(border.style.w);
-            vHorizontalColor = border.colors[BORDER_BOTTOM];
-            vVerticalColor = border.colors[BORDER_BOTTOM];
-            vRadii = vec4(0.0);
-            break;
-        case PST_BOTTOM_LEFT:
-            vBorderStyle = int(border.style.w);
-            vHorizontalColor = border.colors[BORDER_BOTTOM];
-            vVerticalColor = border.colors[BORDER_LEFT];
-            vRadii = vec4(border.radii[1].zw,
-                          border.radii[1].zw - border.widths.xw);
-            break;
-    }
-
     float x0, y0, x1, y1;
     switch (sub_part) {
         // These are the layer tile part PrimitivePart as uploaded by the tiling.rs
         case PST_TOP_LEFT:
             x0 = segment_rect.x;
             y0 = segment_rect.y;
             // These are width / heights
             x1 = segment_rect.x + segment_rect.z;
@@ -207,10 +214,13 @@ void main(void) {
     // to properly mix border colors. For transformed borders, we calculate this distance
     // in the fragment shader itself. For non-transformed borders, we can use the
     // interpolator.
 #ifdef WR_FEATURE_TRANSFORM
     vPieceRectHypotenuseLength = sqrt(pow(width, 2.0) + pow(height, 2.0));
 #else
     vDistanceFromMixLine = (vi.local_clamped_pos.x - x0) * height -
                            (vi.local_clamped_pos.y - y0) * width;
+    vDistanceFromMiddle = (vi.local_clamped_pos.x - vLocalRect.x) +
+                          (vi.local_clamped_pos.y - vLocalRect.y) -
+                          0.5 * (vLocalRect.z + vLocalRect.w);
 #endif
 }
--- a/gfx/webrender/res/ps_box_shadow.vs.glsl
+++ b/gfx/webrender/res/ps_box_shadow.vs.glsl
@@ -5,16 +5,17 @@
 
 void main(void) {
     Primitive prim = load_primitive();
     BoxShadow bs = fetch_boxshadow(prim.prim_index);
     vec4 segment_rect = fetch_instance_geometry(prim.sub_index);
 
     VertexInfo vi = write_vertex(segment_rect,
                                  prim.local_clip_rect,
+                                 prim.z,
                                  prim.layer,
                                  prim.tile);
 
     RenderTaskData child_task = fetch_render_task(prim.user_data.x);
     vUv.z = child_task.data1.x;
 
     // Constant offsets to inset from bilinear filtering border.
     vec2 patch_origin = child_task.data0.xy + vec2(1.0);
--- a/gfx/webrender/res/ps_cache_image.vs.glsl
+++ b/gfx/webrender/res/ps_cache_image.vs.glsl
@@ -6,16 +6,17 @@
 // Draw a cached primitive (e.g. a blurred text run) from the
 // target cache to the framebuffer, applying tile clip boundaries.
 
 void main(void) {
     Primitive prim = load_primitive();
 
     VertexInfo vi = write_vertex(prim.local_rect,
                                  prim.local_clip_rect,
+                                 prim.z,
                                  prim.layer,
                                  prim.tile);
 
     RenderTaskData child_task = fetch_render_task(prim.user_data.x);
     vUv.z = child_task.data1.x;
 
     vec2 texture_size = vec2(textureSize(sCache, 0));
     vec2 uv0 = child_task.data0.xy / texture_size;
--- a/gfx/webrender/res/ps_composite.vs.glsl
+++ b/gfx/webrender/res/ps_composite.vs.glsl
@@ -1,24 +1,26 @@
 #line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 struct Composite {
     ivec4 src0_src1_target_id_op;
+    int z;
 };
 
 Composite fetch_composite() {
     PrimitiveInstance pi = fetch_prim_instance();
 
     Composite composite;
     composite.src0_src1_target_id_op = ivec4(pi.user_data.xy,
                                              pi.render_task_index,
                                              pi.sub_index);
+    composite.z = pi.z;
 
     return composite;
 }
 
 void main(void) {
     Composite composite = fetch_composite();
     Tile src0 = fetch_tile(composite.src0_src1_target_id_op.x);
     Tile src1 = fetch_tile(composite.src0_src1_target_id_op.y);
@@ -39,10 +41,10 @@ void main(void) {
                                  dest.screen_origin_task_origin.xy + dest.size_target_index.xy,
                                  aPosition.xy);
     vec2 f = (local_virtual_pos - src1.screen_origin_task_origin.xy) / src1.size_target_index.xy;
     vUv1 = vec3(mix(st0, st1, f), src1.size_target_index.z);
     vUv1Rect = vec4(st0, st1);
 
     vOp = composite.src0_src1_target_id_op.w;
 
-    gl_Position = uTransform * vec4(local_pos, 0, 1);
+    gl_Position = uTransform * vec4(local_pos, composite.z, 1.0);
 }
--- a/gfx/webrender/res/ps_gradient.vs.glsl
+++ b/gfx/webrender/res/ps_gradient.vs.glsl
@@ -34,24 +34,26 @@ void main(void) {
             segment_rect.y = y0;
             segment_rect.w = y1 - y0;
             break;
     }
 
 #ifdef WR_FEATURE_TRANSFORM
     TransformVertexInfo vi = write_transform_vertex(segment_rect,
                                                     prim.local_clip_rect,
+                                                    prim.z,
                                                     prim.layer,
                                                     prim.tile);
     vLocalRect = vi.clipped_local_rect;
     vLocalPos = vi.local_pos;
     vec2 f = (vi.local_pos.xy - prim.local_rect.xy) / prim.local_rect.zw;
 #else
     VertexInfo vi = write_vertex(segment_rect,
                                  prim.local_clip_rect,
+                                 prim.z,
                                  prim.layer,
                                  prim.tile);
 
     vec2 f = (vi.local_clamped_pos - segment_rect.xy) / segment_rect.zw;
     vPos = vi.local_clamped_pos;
 #endif
 
     write_clip(vi.global_clamped_pos, prim.clip_area);
--- a/gfx/webrender/res/ps_image.vs.glsl
+++ b/gfx/webrender/res/ps_image.vs.glsl
@@ -6,23 +6,25 @@
 void main(void) {
     Primitive prim = load_primitive();
     Image image = fetch_image(prim.prim_index);
     ResourceRect res = fetch_resource_rect(prim.user_data.x);
 
 #ifdef WR_FEATURE_TRANSFORM
     TransformVertexInfo vi = write_transform_vertex(prim.local_rect,
                                                     prim.local_clip_rect,
+                                                    prim.z,
                                                     prim.layer,
                                                     prim.tile);
     vLocalRect = vi.clipped_local_rect;
     vLocalPos = vi.local_pos;
 #else
     VertexInfo vi = write_vertex(prim.local_rect,
                                  prim.local_clip_rect,
+                                 prim.z,
                                  prim.layer,
                                  prim.tile);
     vLocalPos = vi.local_clamped_pos - vi.local_rect.p0;
 #endif
 
     write_clip(vi.global_clamped_pos, prim.clip_area);
 
     // vUv will contain how many times this image has wrapped around the image size.
--- a/gfx/webrender/res/ps_rectangle.vs.glsl
+++ b/gfx/webrender/res/ps_rectangle.vs.glsl
@@ -5,23 +5,25 @@
 
 void main(void) {
     Primitive prim = load_primitive();
     Rectangle rect = fetch_rectangle(prim.prim_index);
     vColor = rect.color;
 #ifdef WR_FEATURE_TRANSFORM
     TransformVertexInfo vi = write_transform_vertex(prim.local_rect,
                                                     prim.local_clip_rect,
+                                                    prim.z,
                                                     prim.layer,
                                                     prim.tile);
     vLocalRect = vi.clipped_local_rect;
     vLocalPos = vi.local_pos;
 #else
     VertexInfo vi = write_vertex(prim.local_rect,
                                  prim.local_clip_rect,
+                                 prim.z,
                                  prim.layer,
                                  prim.tile);
 #endif
 
 #ifdef WR_FEATURE_CLIP
     write_clip(vi.global_clamped_pos, prim.clip_area);
 #endif
 }
--- a/gfx/webrender/res/ps_text_run.vs.glsl
+++ b/gfx/webrender/res/ps_text_run.vs.glsl
@@ -9,24 +9,26 @@ void main(void) {
     Glyph glyph = fetch_glyph(prim.sub_index);
     ResourceRect res = fetch_resource_rect(prim.user_data.x);
 
     vec4 local_rect = vec4(glyph.offset.xy, (res.uv_rect.zw - res.uv_rect.xy) / uDevicePixelRatio);
 
 #ifdef WR_FEATURE_TRANSFORM
     TransformVertexInfo vi = write_transform_vertex(local_rect,
                                                     prim.local_clip_rect,
+                                                    prim.z,
                                                     prim.layer,
                                                     prim.tile);
     vLocalRect = vi.clipped_local_rect;
     vLocalPos = vi.local_pos;
     vec2 f = (vi.local_pos.xy / vi.local_pos.z - local_rect.xy) / local_rect.zw;
 #else
     VertexInfo vi = write_vertex(local_rect,
                                  prim.local_clip_rect,
+                                 prim.z,
                                  prim.layer,
                                  prim.tile);
     vec2 f = (vi.local_clamped_pos - vi.local_rect.p0) / (vi.local_rect.p1 - vi.local_rect.p0);
 #endif
 
     write_clip(vi.global_clamped_pos, prim.clip_area);
 
     vec2 texture_size = vec2(textureSize(sColor0, 0));
--- a/gfx/webrender/res/ps_yuv_image.vs.glsl
+++ b/gfx/webrender/res/ps_yuv_image.vs.glsl
@@ -3,23 +3,25 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void main(void) {
     Primitive prim = load_primitive();
 #ifdef WR_FEATURE_TRANSFORM
     TransformVertexInfo vi = write_transform_vertex(prim.local_rect,
                                                     prim.local_clip_rect,
+                                                    prim.z,
                                                     prim.layer,
                                                     prim.tile);
     vLocalRect = vi.clipped_local_rect;
     vLocalPos = vi.local_pos;
 #else
     VertexInfo vi = write_vertex(prim.local_rect,
                                  prim.local_clip_rect,
+                                 prim.z,
                                  prim.layer,
                                  prim.tile);
     vLocalPos = vi.local_clamped_pos - vi.local_rect.p0;
 #endif
 
     YuvImage image = fetch_yuv_image(prim.prim_index);
 
     vec2 y_texture_size = vec2(textureSize(sColor0, 0));
--- a/gfx/webrender/src/device.rs
+++ b/gfx/webrender/src/device.rs
@@ -13,17 +13,17 @@ use super::shader_source;
 use std::collections::HashMap;
 use std::fs::File;
 use std::hash::BuildHasherDefault;
 use std::io::Read;
 use std::mem;
 use std::path::PathBuf;
 //use std::sync::mpsc::{channel, Sender};
 //use std::thread;
-use webrender_traits::{ColorF, ImageFormat};
+use webrender_traits::{ColorF, ImageFormat, DeviceIntRect};
 
 #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))]
 const GL_FORMAT_A: gl::GLuint = gl::RED;
 
 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
 const GL_FORMAT_A: gl::GLuint = gl::ALPHA;
 
 #[cfg(any(target_os = "windows", all(unix, not(target_os = "android"))))]
@@ -43,16 +43,21 @@ static SHADER_PREAMBLE: &'static str = "
 pub type ViewportDimensions = [u32; 2];
 
 lazy_static! {
     pub static ref MAX_TEXTURE_SIZE: gl::GLint = {
         gl::get_integer_v(gl::MAX_TEXTURE_SIZE)
     };
 }
 
+#[repr(u32)]
+pub enum DepthFunction {
+    Less = gl::LESS,
+}
+
 #[derive(Copy, Clone, Debug, PartialEq)]
 pub enum TextureTarget {
     Default,
     Array,
 }
 
 #[derive(Copy, Clone, Debug, PartialEq)]
 pub enum TextureFilter {
@@ -66,16 +71,21 @@ pub enum VertexFormat {
     Rectangles,
     DebugFont,
     DebugColor,
     Clear,
     Blur,
     Clip,
 }
 
+enum FBOTarget {
+    Read,
+    Draw,
+}
+
 fn get_optional_shader_source(shader_name: &str, base_path: &Option<PathBuf>) -> Option<String> {
     if let Some(ref base) = *base_path {
         let shader_path = base.join(&format!("{}.glsl", shader_name));
         if shader_path.exists() {
             let mut source = String::new();
             File::open(&shader_path).unwrap().read_to_string(&mut source).unwrap();
             return Some(source);
         }
@@ -167,16 +177,17 @@ impl VertexFormat {
                 let mut offset = 0;
 
                 for &attrib in [VertexAttribute::GlobalPrimId,
                                 VertexAttribute::PrimitiveAddress,
                                 VertexAttribute::TaskIndex,
                                 VertexAttribute::ClipTaskIndex,
                                 VertexAttribute::LayerIndex,
                                 VertexAttribute::ElementIndex,
+                                VertexAttribute::ZIndex,
                                ].into_iter() {
                     gl::enable_vertex_attrib_array(attrib as gl::GLuint);
                     gl::vertex_attrib_divisor(attrib as gl::GLuint, 1);
                     gl::vertex_attrib_i_pointer(attrib as gl::GLuint,
                                                 1,
                                                 gl::INT,
                                                 instance_stride,
                                                 offset);
@@ -314,18 +325,22 @@ impl IBOId {
 
 impl UBOId {
     fn _bind(&self) {
         gl::bind_buffer(gl::UNIFORM_BUFFER, self.0);
     }
 }
 
 impl FBOId {
-    fn bind(&self) {
-        gl::bind_framebuffer(gl::FRAMEBUFFER, self.0);
+    fn bind(&self, target: FBOTarget) {
+        let target = match target {
+            FBOTarget::Read => gl::READ_FRAMEBUFFER,
+            FBOTarget::Draw => gl::DRAW_FRAMEBUFFER,
+        };
+        gl::bind_framebuffer(target, self.0);
     }
 }
 
 struct Texture {
     id: gl::GLuint,
     format: ImageFormat,
     width: u32,
     height: u32,
@@ -370,16 +385,17 @@ impl Program {
 
         gl::bind_attrib_location(self.id, VertexAttribute::GlobalPrimId as gl::GLuint, "aGlobalPrimId");
         gl::bind_attrib_location(self.id, VertexAttribute::PrimitiveAddress as gl::GLuint, "aPrimitiveAddress");
         gl::bind_attrib_location(self.id, VertexAttribute::TaskIndex as gl::GLuint, "aTaskIndex");
         gl::bind_attrib_location(self.id, VertexAttribute::ClipTaskIndex as gl::GLuint, "aClipTaskIndex");
         gl::bind_attrib_location(self.id, VertexAttribute::LayerIndex as gl::GLuint, "aLayerIndex");
         gl::bind_attrib_location(self.id, VertexAttribute::ElementIndex as gl::GLuint, "aElementIndex");
         gl::bind_attrib_location(self.id, VertexAttribute::UserData as gl::GLuint, "aUserData");
+        gl::bind_attrib_location(self.id, VertexAttribute::ZIndex as gl::GLuint, "aZIndex");
 
         gl::bind_attrib_location(self.id, ClearAttribute::Rectangle as gl::GLuint, "aClearRectangle");
 
         gl::bind_attrib_location(self.id, BlurAttribute::RenderTaskIndex as gl::GLuint, "aBlurRenderTaskIndex");
         gl::bind_attrib_location(self.id, BlurAttribute::SourceTaskIndex as gl::GLuint, "aBlurSourceTaskIndex");
         gl::bind_attrib_location(self.id, BlurAttribute::Direction as gl::GLuint, "aBlurDirection");
 
         gl::bind_attrib_location(self.id, ClipAttribute::RenderTaskIndex as gl::GLuint, "aClipRenderTaskIndex");
@@ -765,18 +781,20 @@ pub struct Capabilities {
     pub supports_multisampling: bool,
 }
 
 pub struct Device {
     // device state
     bound_textures: [TextureId; 16],
     bound_program: ProgramId,
     bound_vao: VAOId,
-    bound_fbo: FBOId,
-    default_fbo: gl::GLuint,
+    bound_read_fbo: FBOId,
+    bound_draw_fbo: FBOId,
+    default_read_fbo: gl::GLuint,
+    default_draw_fbo: gl::GLuint,
     device_pixel_ratio: f32,
 
     // HW or API capabilties
     capabilities: Capabilities,
 
     // debug
     inside_frame: bool,
 
@@ -813,18 +831,20 @@ impl Device {
             capabilities: Capabilities {
                 max_ubo_size: gl::get_integer_v(gl::MAX_UNIFORM_BLOCK_SIZE) as usize,
                 supports_multisampling: false, //TODO
             },
 
             bound_textures: [ TextureId::invalid(); 16 ],
             bound_program: ProgramId(0),
             bound_vao: VAOId(0),
-            bound_fbo: FBOId(0),
-            default_fbo: 0,
+            bound_read_fbo: FBOId(0),
+            bound_draw_fbo: FBOId(0),
+            default_read_fbo: 0,
+            default_draw_fbo: 0,
 
             textures: HashMap::with_hasher(Default::default()),
             programs: HashMap::with_hasher(Default::default()),
             vaos: HashMap::with_hasher(Default::default()),
 
             shader_preamble: shader_preamble,
 
             next_vao_id: 1,
@@ -873,18 +893,20 @@ impl Device {
     }
 
     pub fn begin_frame(&mut self, device_pixel_ratio: f32) {
         debug_assert!(!self.inside_frame);
         self.inside_frame = true;
         self.device_pixel_ratio = device_pixel_ratio;
 
         // Retrive the currently set FBO.
-        let default_fbo = gl::get_integer_v(gl::FRAMEBUFFER_BINDING);
-        self.default_fbo = default_fbo as gl::GLuint;
+        let default_read_fbo = gl::get_integer_v(gl::READ_FRAMEBUFFER_BINDING);
+        self.default_read_fbo = default_read_fbo as gl::GLuint;
+        let default_draw_fbo = gl::get_integer_v(gl::DRAW_FRAMEBUFFER_BINDING);
+        self.default_draw_fbo = default_draw_fbo as gl::GLuint;
 
         // Texture state
         for i in 0..self.bound_textures.len() {
             self.bound_textures[i] = TextureId::invalid();
             gl::active_texture(gl::TEXTURE0 + i as gl::GLuint);
             gl::bind_texture(gl::TEXTURE_2D, 0);
         }
 
@@ -892,17 +914,18 @@ impl Device {
         self.bound_program = ProgramId(0);
         gl::use_program(0);
 
         // Vertex state
         self.bound_vao = VAOId(0);
         self.clear_vertex_array();
 
         // FBO state
-        self.bound_fbo = FBOId(self.default_fbo);
+        self.bound_read_fbo = FBOId(self.default_read_fbo);
+        self.bound_draw_fbo = FBOId(self.default_draw_fbo);
 
         // Pixel op state
         gl::pixel_store_i(gl::UNPACK_ALIGNMENT, 1);
 
         // Default is sampler 0, always
         gl::active_texture(gl::TEXTURE0);
     }
 
@@ -915,28 +938,41 @@ impl Device {
         if self.bound_textures[sampler_index] != texture_id {
             self.bound_textures[sampler_index] = texture_id;
             gl::active_texture(gl::TEXTURE0 + sampler_index as gl::GLuint);
             texture_id.bind();
             gl::active_texture(gl::TEXTURE0);
         }
     }
 
-    pub fn bind_render_target(&mut self,
-                              texture_id: Option<(TextureId, i32)>,
-                              dimensions: Option<ViewportDimensions>) {
+    pub fn bind_read_target(&mut self, texture_id: Option<(TextureId, i32)>) {
         debug_assert!(self.inside_frame);
 
-        let fbo_id = texture_id.map_or(FBOId(self.default_fbo), |texture_id| {
+        let fbo_id = texture_id.map_or(FBOId(self.default_read_fbo), |texture_id| {
             self.textures.get(&texture_id.0).unwrap().fbo_ids[texture_id.1 as usize]
         });
 
-        if self.bound_fbo != fbo_id {
-            self.bound_fbo = fbo_id;
-            fbo_id.bind();
+        if self.bound_read_fbo != fbo_id {
+            self.bound_read_fbo = fbo_id;
+            fbo_id.bind(FBOTarget::Read);
+        }
+    }
+
+    pub fn bind_draw_target(&mut self,
+                            texture_id: Option<(TextureId, i32)>,
+                            dimensions: Option<ViewportDimensions>) {
+        debug_assert!(self.inside_frame);
+
+        let fbo_id = texture_id.map_or(FBOId(self.default_draw_fbo), |texture_id| {
+            self.textures.get(&texture_id.0).unwrap().fbo_ids[texture_id.1 as usize]
+        });
+
+        if self.bound_draw_fbo != fbo_id {
+            self.bound_draw_fbo = fbo_id;
+            fbo_id.bind(FBOTarget::Draw);
         }
 
         if let Some(dimensions) = dimensions {
             gl::viewport(0, 0, dimensions[0] as gl::GLint, dimensions[1] as gl::GLint);
         }
     }
 
     pub fn bind_program(&mut self,
@@ -1080,16 +1116,20 @@ impl Device {
                                           internal_format as u32,
                                           gl_format,
                                           type_,
                                           pixels);
             }
         }
     }
 
+    pub fn get_render_target_layer_count(&self, texture_id: TextureId) -> usize {
+        self.textures[&texture_id].fbo_ids.len()
+    }
+
     pub fn create_fbo_for_texture_if_necessary(&mut self,
                                                texture_id: TextureId,
                                                layer_count: Option<i32>) {
         let texture = self.textures.get_mut(&texture_id).unwrap();
 
         match layer_count {
             Some(layer_count) => {
                 debug_assert!(layer_count > 0);
@@ -1122,16 +1162,31 @@ impl Device {
 
                 for (fbo_index, fbo_id) in texture.fbo_ids.iter().enumerate() {
                     gl::bind_framebuffer(gl::FRAMEBUFFER, fbo_id.0);
                     gl::framebuffer_texture_layer(gl::FRAMEBUFFER,
                                                   gl::COLOR_ATTACHMENT0,
                                                   texture_id.name,
                                                   0,
                                                   fbo_index as gl::GLint);
+
+                    // TODO(gw): Share depth render buffer between FBOs to
+                    //           save memory!
+                    // TODO(gw): Free these renderbuffers on exit!
+                    let renderbuffer_ids = gl::gen_renderbuffers(1);
+                    let depth_rb = renderbuffer_ids[0];
+                    gl::bind_renderbuffer(gl::RENDERBUFFER, depth_rb);
+                    gl::renderbuffer_storage(gl::RENDERBUFFER,
+                                             gl::DEPTH_COMPONENT24,
+                                             texture.width as gl::GLsizei,
+                                             texture.height as gl::GLsizei);
+                    gl::framebuffer_renderbuffer(gl::FRAMEBUFFER,
+                                                 gl::DEPTH_ATTACHMENT,
+                                                 gl::RENDERBUFFER,
+                                                 depth_rb);
                 }
             }
             None => {
                 debug_assert!(texture.fbo_ids.len() == 0 || texture.fbo_ids.len() == 1);
                 if texture.fbo_ids.is_empty() {
                     let new_fbo = gl::gen_framebuffers(1)[0];
 
                     gl::bind_framebuffer(gl::FRAMEBUFFER, new_fbo);
@@ -1142,17 +1197,41 @@ impl Device {
                                                texture_id.name,
                                                0);
 
                     texture.fbo_ids.push(FBOId(new_fbo));
                 }
             }
         }
 
-        gl::bind_framebuffer(gl::FRAMEBUFFER, self.default_fbo);
+        // TODO(gw): Hack! Modify the code above to use the normal binding interfaces the device exposes.
+        gl::bind_framebuffer(gl::READ_FRAMEBUFFER, self.bound_read_fbo.0);
+        gl::bind_framebuffer(gl::DRAW_FRAMEBUFFER, self.bound_draw_fbo.0);
+    }
+
+    pub fn blit_render_target(&mut self,
+                              src_texture_id: TextureId,
+                              src_texture_layer: i32,
+                              dest_rect: DeviceIntRect) {
+        debug_assert!(self.inside_frame);
+
+        self.bind_read_target(Some((src_texture_id, src_texture_layer)));
+
+        let texture = self.textures.get(&src_texture_id).expect("unknown texture id!");
+
+        gl::blit_framebuffer(0,
+                             0,
+                             texture.width as gl::GLint,
+                             texture.height as gl::GLint,
+                             dest_rect.origin.x,
+                             dest_rect.origin.y,
+                             dest_rect.origin.x + dest_rect.size.width,
+                             dest_rect.origin.y + dest_rect.size.height,
+                             gl::COLOR_BUFFER_BIT,
+                             gl::LINEAR);
     }
 
     pub fn resize_texture(&mut self,
                           texture_id: TextureId,
                           new_width: u32,
                           new_height: u32,
                           format: ImageFormat,
                           filter: TextureFilter,
@@ -1160,44 +1239,44 @@ impl Device {
         debug_assert!(self.inside_frame);
 
         let (old_width, old_height) = self.get_texture_dimensions(texture_id);
 
         let temp_texture_id = self.create_texture_ids(1, TextureTarget::Default)[0];
         self.init_texture(temp_texture_id, old_width, old_height, format, filter, mode, None);
         self.create_fbo_for_texture_if_necessary(temp_texture_id, None);
 
-        self.bind_render_target(Some((texture_id, 0)), None);
+        self.bind_read_target(Some((texture_id, 0)));
         self.bind_texture(DEFAULT_TEXTURE, temp_texture_id);
 
         gl::copy_tex_sub_image_2d(temp_texture_id.target,
                                   0,
                                   0,
                                   0,
                                   0,
                                   0,
                                   old_width as i32,
                                   old_height as i32);
 
         self.deinit_texture(texture_id);
         self.init_texture(texture_id, new_width, new_height, format, filter, mode, None);
         self.create_fbo_for_texture_if_necessary(texture_id, None);
-        self.bind_render_target(Some((temp_texture_id, 0)), None);
+        self.bind_read_target(Some((temp_texture_id, 0)));
         self.bind_texture(DEFAULT_TEXTURE, texture_id);
 
         gl::copy_tex_sub_image_2d(texture_id.target,
                                   0,
                                   0,
                                   0,
                                   0,
                                   0,
                                   old_width as i32,
                                   old_height as i32);
 
-        self.bind_render_target(None, None);
+        self.bind_read_target(None);
         self.deinit_texture(temp_texture_id);
     }
 
     pub fn deinit_texture(&mut self, texture_id: TextureId) {
         debug_assert!(self.inside_frame);
 
         self.bind_texture(DEFAULT_TEXTURE, texture_id);
 
@@ -1703,17 +1782,18 @@ impl Device {
     pub fn draw_indexed_triangles_instanced_u16(&mut self,
                                                 index_count: i32,
                                                 instance_count: i32) {
         debug_assert!(self.inside_frame);
         gl::draw_elements_instanced(gl::TRIANGLES, index_count, gl::UNSIGNED_SHORT, 0, instance_count);
     }
 
     pub fn end_frame(&mut self) {
-        self.bind_render_target(None, None);
+        self.bind_draw_target(None, None);
+        self.bind_read_target(None);
 
         debug_assert!(self.inside_frame);
         self.inside_frame = false;
 
         gl::bind_texture(gl::TEXTURE_2D, 0);
         gl::use_program(0);
 
         for i in 0..self.bound_textures.len() {
@@ -1757,25 +1837,52 @@ impl Device {
             if enable {
                 gl::enable(gl::MULTISAMPLE);
             } else {
                 gl::disable(gl::MULTISAMPLE);
             }
         }
     }
 
-    pub fn clear_color(&self, c: [f32; 4]) {
-        gl::clear_color(c[0], c[1], c[2], c[3]);
-        gl::clear(gl::COLOR_BUFFER_BIT);
+    pub fn clear_target(&self,
+                        color: Option<[f32; 4]>,
+                        depth: Option<f32>) {
+        let mut clear_bits = 0;
+
+        if let Some(color) = color {
+            gl::clear_color(color[0], color[1], color[2], color[3]);
+            clear_bits |= gl::COLOR_BUFFER_BIT;
+        }
+
+        if let Some(depth) = depth {
+            gl::clear_depth(depth as f64);
+            clear_bits |= gl::DEPTH_BUFFER_BIT;
+        }
+
+        if clear_bits != 0 {
+            gl::clear(clear_bits);
+        }
+    }
+
+    pub fn enable_depth(&self) {
+        gl::enable(gl::DEPTH_TEST);
     }
 
     pub fn disable_depth(&self) {
         gl::disable(gl::DEPTH_TEST);
     }
 
+    pub fn set_depth_func(&self, depth_func: DepthFunction) {
+        gl::depth_func(depth_func as gl::GLuint);
+    }
+
+    pub fn enable_depth_write(&self) {
+        gl::depth_mask(true);
+    }
+
     pub fn disable_depth_write(&self) {
         gl::depth_mask(false);
     }
 
     pub fn disable_stencil(&self) {
         gl::disable(gl::STENCIL_TEST);
     }
 
--- a/gfx/webrender/src/frame.rs
+++ b/gfx/webrender/src/frame.rs
@@ -43,16 +43,17 @@ struct FlattenContext<'a> {
 
 // TODO: doc
 pub struct Frame {
     pub layers: LayerMap,
     pub pipeline_epoch_map: HashMap<PipelineId, Epoch, BuildHasherDefault<FnvHasher>>,
     pub pipeline_auxiliary_lists: AuxiliaryListsMap,
     pub root_scroll_layer_id: Option<ScrollLayerId>,
     pending_scroll_offsets: HashMap<(PipelineId, ServoScrollRootId), LayerPoint>,
+    current_scroll_layer_id: Option<ScrollLayerId>,
     id: FrameId,
     debug: bool,
     frame_builder_config: FrameBuilderConfig,
     frame_builder: Option<FrameBuilder>,
 }
 
 trait DisplayListHelpers {
     fn starting_stacking_context<'a>(&'a self) -> Option<(&'a StackingContext, &'a ClipRegion)>;
@@ -204,16 +205,17 @@ impl<'a> Iterator for DisplayListTravers
 impl Frame {
     pub fn new(debug: bool, config: FrameBuilderConfig) -> Frame {
         Frame {
             pipeline_epoch_map: HashMap::with_hasher(Default::default()),
             pipeline_auxiliary_lists: HashMap::with_hasher(Default::default()),
             layers: HashMap::with_hasher(Default::default()),
             root_scroll_layer_id: None,
             pending_scroll_offsets: HashMap::new(),
+            current_scroll_layer_id: None,
             id: FrameId(0),
             debug: debug,
             frame_builder: None,
             frame_builder_config: config,
         }
     }
 
     pub fn reset(&mut self)
@@ -332,19 +334,27 @@ impl Frame {
                   cursor: WorldPoint,
                   phase: ScrollEventPhase)
                   -> bool {
         let root_scroll_layer_id = match self.root_scroll_layer_id {
             Some(root_scroll_layer_id) => root_scroll_layer_id,
             None => return false,
         };
 
-        let scroll_layer_id = match self.get_scroll_layer(&cursor, root_scroll_layer_id) {
-            Some(scroll_layer_id) => scroll_layer_id,
-            None => return false,
+        let scroll_layer_id = match (
+            phase,
+            self.get_scroll_layer(&cursor, root_scroll_layer_id),
+            self.current_scroll_layer_id) {
+            (ScrollEventPhase::Start, Some(scroll_layer_id), _) => {
+                self.current_scroll_layer_id = Some(scroll_layer_id);
+                scroll_layer_id
+            },
+            (ScrollEventPhase::Start, None, _) => return false,
+            (_, _, Some(scroll_layer_id)) => scroll_layer_id,
+            (_, _, None) => return false,
         };
 
         let scroll_root_id = match scroll_layer_id.info {
             ScrollLayerInfo::Scrollable(_, scroll_root_id) => scroll_root_id,
             ScrollLayerInfo::Fixed => unreachable!("Tried to scroll a fixed position layer."),
         };
 
         let mut scrolled_a_layer = false;
@@ -361,30 +371,30 @@ impl Frame {
 
             if layer.scrolling.started_bouncing_back && phase == ScrollEventPhase::Move(false) {
                 continue;
             }
 
             let mut delta = match scroll_location {
                 ScrollLocation::Delta(delta) => delta,
                 ScrollLocation::Start => {
-                    if layer.scrolling.offset.y.round() <= 0.0 {
+                    if layer.scrolling.offset.y.round() >= 0.0 {
                         // Nothing to do on this layer.
                         continue;
                     }
 
                     layer.scrolling.offset.y = 0.0;
                     scrolled_a_layer = true;
                     continue;
                 },
                 ScrollLocation::End => {
                     let end_pos = layer.local_viewport_rect.size.height
                                   - layer.content_size.height;
 
-                    if layer.scrolling.offset.y.round() >= end_pos {
+                    if layer.scrolling.offset.y.round() <= end_pos {
                         // Nothing to do on this layer.
                         continue;
                     }
 
                     layer.scrolling.offset.y = end_pos;
                     scrolled_a_layer = true;
                     continue;
                 }
@@ -498,17 +508,26 @@ impl Frame {
         let root_viewport = LayerRect::new(LayerPoint::zero(), root_pipeline.viewport_size);
         let layer = Layer::new(&root_viewport,
                                root_clip.main.size,
                                &LayerToScrollTransform::identity(),
                                root_pipeline_id);
         self.layers.insert(root_fixed_layer_id, layer.clone());
         self.layers.insert(root_scroll_layer_id, layer);
 
+        let background_color = root_pipeline.background_color.and_then(|color| {
+            if color.a > 0.0 {
+                Some(color)
+            } else {
+                None
+            }
+        });
+
         let mut frame_builder = FrameBuilder::new(root_pipeline.viewport_size,
+                                                  background_color,
                                                   self.debug,
                                                   self.frame_builder_config);
 
         {
             let mut context = FlattenContext {
                 scene: scene,
                 pipeline_sizes: pipeline_sizes,
                 builder: &mut frame_builder,
--- a/gfx/webrender/src/internal_types.rs
+++ b/gfx/webrender/src/internal_types.rs
@@ -232,16 +232,17 @@ pub enum VertexAttribute {
     // instance-frequency primitive attributes
     GlobalPrimId,
     PrimitiveAddress,
     TaskIndex,
     ClipTaskIndex,
     LayerIndex,
     ElementIndex,
     UserData,
+    ZIndex,
 }
 
 #[derive(Clone, Copy, Debug)]
 pub enum ClearAttribute {
     // vertex frequency
     Position,
     // instance frequency
     Rectangle,
@@ -290,16 +291,17 @@ impl PackedColor {
 
 #[derive(Debug, Clone, Copy)]
 #[repr(C)]
 pub struct PackedVertex {
     pub pos: [f32; 2],
 }
 
 #[derive(Debug)]
+#[repr(C)]
 pub struct DebugFontVertex {
     pub x: f32,
     pub y: f32,
     pub color: PackedColor,
     pub u: f32,
     pub v: f32,
 }
 
@@ -310,16 +312,17 @@ impl DebugFontVertex {
             y: y,
             color: color,
             u: u,
             v: v,
         }
     }
 }
 
+#[repr(C)]
 pub struct DebugColorVertex {
     pub x: f32,
     pub y: f32,
     pub color: PackedColor,
 }
 
 impl DebugColorVertex {
     pub fn new(x: f32, y: f32, color: PackedColor) -> DebugColorVertex {
@@ -337,18 +340,21 @@ pub enum RenderTargetMode {
     SimpleRenderTarget,
     LayerRenderTarget(i32),      // Number of texture layers
 }
 
 pub enum TextureUpdateOp {
     Create(u32, u32, ImageFormat, TextureFilter, RenderTargetMode, Option<Arc<Vec<u8>>>),
     Update(u32, u32, u32, u32, Arc<Vec<u8>>, Option<u32>),
     Grow(u32, u32, ImageFormat, TextureFilter, RenderTargetMode),
+    Free
 }
 
+pub type ExternalImageUpdateList = Vec<ExternalImageId>;
+
 pub struct TextureUpdate {
     pub id: CacheTextureId,
     pub op: TextureUpdateOp,
 }
 
 pub struct TextureUpdateList {
     pub updates: Vec<TextureUpdate>,
 }
@@ -387,19 +393,18 @@ impl RendererFrame {
             pipeline_epoch_map: pipeline_epoch_map,
             layers_bouncing_back: layers_bouncing_back,
             frame: frame,
         }
     }
 }
 
 pub enum ResultMsg {
-    UpdateTextureCache(TextureUpdateList),
     RefreshShader(PathBuf),
-    NewFrame(RendererFrame, BackendProfileCounters),
+    NewFrame(RendererFrame, TextureUpdateList, ExternalImageUpdateList, BackendProfileCounters),
 }
 
 #[repr(u32)]
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum AxisDirection {
     Horizontal,
     Vertical,
 }
--- a/gfx/webrender/src/lib.rs
+++ b/gfx/webrender/src/lib.rs
@@ -71,17 +71,17 @@ mod spring;
 mod texture_cache;
 mod tiling;
 mod util;
 
 mod shader_source {
     include!(concat!(env!("OUT_DIR"), "/shaders.rs"));
 }
 
-pub use record::{ApiRecordingReceiver, set_recording_detour};
+pub use record::{ApiRecordingReceiver, set_recording_detour, WEBRENDER_RECORDING_HEADER};
 
 mod platform {
     #[cfg(target_os="macos")]
     pub use platform::macos::font;
     #[cfg(any(target_os = "android", all(unix, not(target_os = "macos"))))]
     pub use platform::unix::font;
     #[cfg(target_os = "windows")]
     pub use platform::windows::font;
--- a/gfx/webrender/src/mask_cache.rs
+++ b/gfx/webrender/src/mask_cache.rs
@@ -38,18 +38,16 @@ impl<'a> From<&'a ClipRegion> for ClipSo
 }
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
 pub struct ClipAddressRange {
     pub start: GpuStoreAddress,
     pub item_count: u32,
 }
 
-type ImageMaskIndex = u16;
-
 #[derive(Clone, Debug)]
 pub struct MaskCacheInfo {
     pub clip_range: ClipAddressRange,
     pub image: Option<(ImageMask, GpuStoreAddress)>,
     pub local_rect: Option<LayerRect>,
     pub local_inner: Option<LayerRect>,
     pub inner_rect: DeviceIntRect,
     pub outer_rect: DeviceIntRect,
--- a/gfx/webrender/src/platform/windows/font.rs
+++ b/gfx/webrender/src/platform/windows/font.rs
@@ -14,32 +14,28 @@ lazy_static! {
         weight: dwrote::FontWeight::Regular,
         stretch: dwrote::FontStretch::Normal,
         style: dwrote::FontStyle::Normal,
     };
 }
 
 pub struct FontContext {
     fonts: HashMap<FontKey, dwrote::Font>,
-    gdi_interop: dwrote::GdiInterop,
-    main_display_rendering_params: dwrote::RenderingParams,
 }
 
 pub struct RasterizedGlyph {
     pub width: u32,
     pub height: u32,
     pub bytes: Vec<u8>,
 }
 
 impl FontContext {
     pub fn new() -> FontContext {
         FontContext {
             fonts: HashMap::new(),
-            gdi_interop: dwrote::GdiInterop::create(),
-            main_display_rendering_params: dwrote::RenderingParams::create_for_primary_monitor(),
         }
     }
 
     pub fn add_raw_font(&mut self, font_key: &FontKey, _: &[u8]) {
         if self.fonts.contains_key(font_key) {
             return
         }
 
@@ -64,75 +60,113 @@ impl FontContext {
                                                 glyph: u32,
                                                 render_mode: Option<FontRenderMode>)
                                                 -> (Option<GlyphDimensions>, Option<RasterizedGlyph>)
     {
         let font = self.fonts.get(&font_key).unwrap();
         let face = font.create_font_face();
         let glyph = glyph as u16;
 
-        let gm = face.get_design_glyph_metrics(&[glyph], false)[0];
-
-        let em_size = size.to_f32_px() / 16.; // (16px per em)
-        let du_per_pixel = face.metrics().designUnitsPerEm as f32 / 16.; // (once again, 16px per em)
-        let scaled_du_to_pixels = em_size / du_per_pixel;
+        let glyph = glyph as u16;
+        let advance = 0.0f32;
+        let offset = dwrote::GlyphOffset { advanceOffset: 0.0, ascenderOffset: 0.0 };
 
-        let width = (gm.advanceWidth as i32 - (gm.leftSideBearing + gm.rightSideBearing)) as f32
-            * scaled_du_to_pixels;
-        let height = (gm.advanceHeight as i32 - (gm.topSideBearing + gm.bottomSideBearing)) as f32
-            * scaled_du_to_pixels;
-        let x = (gm.leftSideBearing) as f32
-            * scaled_du_to_pixels;
-        let y = (gm.verticalOriginY - gm.topSideBearing) as f32
-            * scaled_du_to_pixels;
-
-        // Round the glyph rect outwards.  These values are in device pixels.
-        let x0_i = x.floor() as i32;
-        let y0_i = y.floor() as i32;
-        let x1_i = (x + width).ceil() as i32;
-        let y1_i = (y + height).ceil() as i32;
-        let width_u = (x1_i - x0_i) as u32;
-        let height_u = (y1_i - y0_i) as u32;
-
-        let dims = GlyphDimensions {
-            left: x0_i, top: y0_i,
-            width: width_u, height: height_u,
+        let glyph_run = dwrote::DWRITE_GLYPH_RUN {
+            fontFace: unsafe { face.as_ptr() },
+            fontEmSize: size.to_f32_px(), // size in DIPs (1/96", same as CSS pixels)
+            glyphCount: 1,
+            glyphIndices: &glyph,
+            glyphAdvances: &advance,
+            glyphOffsets: &offset,
+            isSideways: 0,
+            bidiLevel: 0,
         };
 
-        // blank glyphs are None
-        if width_u == 0 || height_u == 0 {
-            return (None, None)
+        // dwrite requires DWRITE_RENDERING_MODE_ALIASED if the texture
+        // type is DWRITE_TEXTURE_ALIASED_1x1.  If CLEARTYPE_3x1,
+        // then the other modes can be used.
+
+        // TODO(vlad): get_glyph_dimensions needs to take the render mode into account
+        // but the API doesn't give it to us right now.  Just assume subpixel.
+        let (r_mode, m_mode, tex_type) = match render_mode {
+            Some(FontRenderMode::Mono) => (dwrote::DWRITE_RENDERING_MODE_ALIASED,
+                                           dwrote::DWRITE_MEASURING_MODE_GDI_NATURAL,
+                                           dwrote::DWRITE_TEXTURE_ALIASED_1x1),
+            Some(FontRenderMode::Alpha) => (dwrote::DWRITE_RENDERING_MODE_GDI_NATURAL,
+                                            dwrote::DWRITE_MEASURING_MODE_GDI_NATURAL,
+                                            dwrote::DWRITE_TEXTURE_CLEARTYPE_3x1),
+            Some(FontRenderMode::Subpixel) | None => (dwrote::DWRITE_RENDERING_MODE_CLEARTYPE_GDI_NATURAL,
+                                                      dwrote::DWRITE_MEASURING_MODE_GDI_NATURAL,
+                                                      dwrote::DWRITE_TEXTURE_CLEARTYPE_3x1),
+        };
+
+        // XX use the xform to handle subpixel positioning (what skia does), I believe that keeps
+        //let xform = dwrote::DWRITE_MATRIX { m11: 1.0, m12: 0.0, m21: 0.0, m22: 1.0, dx: 0.0, dy: 0.0 };
+        let analysis = dwrote::GlyphRunAnalysis::create(&glyph_run, 1.0, None, r_mode, m_mode, 0.0, 0.0);
+        let bounds = analysis.get_alpha_texture_bounds(tex_type);
+
+        let width = (bounds.right - bounds.left) as u32;
+        let height = (bounds.bottom - bounds.top) as u32;
+        let dims = GlyphDimensions {
+            left: bounds.left,
+            top: -bounds.top,
+            width: width,
+            height: height,
+        };
+
+        // if empty, then nothing
+        if dims.width == 0 || dims.height == 0 {
+            return (None, None);
         }
 
         // if we weren't asked to rasterize, we're done
         if render_mode.is_none() {
             return (Some(dims), None);
         }
 
-        // the coords and size to draw_glyph_run are in 1/96ths of an inch
-
-        // size is in app units, which we convert to CSS pixels (1/96"), which
-        // is the same as DIPs.
-        let size_dip = size.to_f32_px();
-
-        let rt = self.gdi_interop.create_bitmap_render_target(width_u, height_u);
-        rt.set_pixels_per_dip(1.);
-        rt.draw_glyph_run(-x, y,
-                          dwrote::DWRITE_MEASURING_MODE_NATURAL,
-                          &face, size_dip,
-                          &[glyph], &[0.0],
-                          &[dwrote::GlyphOffset { advanceOffset: 0., ascenderOffset: 0. }],
-                          &self.main_display_rendering_params,
-                          &(1.0, 1.0, 1.0));
-        let bytes = rt.get_opaque_values_as_mask();
+        let pixels = analysis.create_alpha_texture(tex_type, bounds);
+        let rgba_pixels = match render_mode.unwrap() {
+            FontRenderMode::Mono => {
+                let mut rgba_pixels = vec![0; pixels.len() * 4];
+                for i in 0..pixels.len() {
+                    rgba_pixels[i*4+0] = 0xff;
+                    rgba_pixels[i*4+1] = 0xff;
+                    rgba_pixels[i*4+2] = 0xff;
+                    rgba_pixels[i*4+3] = pixels[i];
+                }
+                rgba_pixels
+            }
+            FontRenderMode::Alpha => {
+                let mut rgba_pixels = vec![0; pixels.len()/3 * 4];
+                for i in 0..pixels.len()/3 {
+                    // TODO(vlad): we likely need to do something smarter
+                    let alpha = (pixels[i*3+0] as u32 + pixels[i*3+0] as u32 + pixels[i*3+0] as u32) / 3;
+                    rgba_pixels[i*4+0] = 0xff;
+                    rgba_pixels[i*4+1] = 0xff;
+                    rgba_pixels[i*4+2] = 0xff;
+                    rgba_pixels[i*4+3] = alpha as u8;
+                }
+                rgba_pixels
+            }
+            FontRenderMode::Subpixel => {
+                let mut rgba_pixels = vec![0; pixels.len()/3 * 4];
+                for i in 0..pixels.len()/3 {
+                    rgba_pixels[i*4+0] = pixels[i*3+0];
+                    rgba_pixels[i*4+1] = pixels[i*3+1];
+                    rgba_pixels[i*4+2] = pixels[i*3+2];
+                    rgba_pixels[i*4+3] = 0xff;
+                }
+                rgba_pixels
+            }
+        };
 
         (Some(dims), Some(RasterizedGlyph {
-            width: width_u,
-            height: height_u,
-            bytes: bytes
+            width: dims.width,
+            height: dims.height,
+            bytes: rgba_pixels,
         }))
     }
 
     pub fn get_glyph_dimensions(&self,
                                 font_key: FontKey,
                                 size: Au,
                                 glyph: u32) -> Option<GlyphDimensions> {
         let (maybe_dims, _) =
--- a/gfx/webrender/src/prim_store.rs
+++ b/gfx/webrender/src/prim_store.rs
@@ -110,16 +110,17 @@ pub struct PrimitiveMetadata {
     // that the box-shadow shader needs to run on. For
     // text-shadow, this creates a render task chain
     // that implements a 2-pass separable blur on a
     // text run.
     pub render_task: Option<RenderTask>,
 }
 
 #[derive(Debug, Clone)]
+#[repr(C)]
 pub struct RectanglePrimitive {
     pub color: ColorF,
 }
 
 #[derive(Debug)]
 pub enum ImagePrimitiveKind {
     Image(ImageKey, ImageRendering, LayerSize),
     WebGL(WebGLContextId),
@@ -128,32 +129,34 @@ pub enum ImagePrimitiveKind {
 #[derive(Debug)]
 pub struct ImagePrimitiveCpu {
     pub kind: ImagePrimitiveKind,
     pub color_texture_id: SourceTexture,
     pub resource_address: GpuStoreAddress,
 }
 
 #[derive(Debug, Clone)]
+#[repr(C)]
 pub struct ImagePrimitiveGpu {
     pub stretch_size: LayerSize,
     pub tile_spacing: LayerSize,
 }
 
 #[derive(Debug)]
 pub struct YuvImagePrimitiveCpu {
     pub y_key: ImageKey,
     pub u_key: ImageKey,
     pub v_key: ImageKey,
     pub y_texture_id: SourceTexture,
     pub u_texture_id: SourceTexture,
     pub v_texture_id: SourceTexture,
 }
 
 #[derive(Debug, Clone)]
+#[repr(C)]
 pub struct YuvImagePrimitiveGpu {
     pub y_uv0: DevicePoint,
     pub y_uv1: DevicePoint,
     pub u_uv0: DevicePoint,
     pub u_uv1: DevicePoint,
     pub v_uv0: DevicePoint,
     pub v_uv1: DevicePoint,
     pub size: LayerSize,
@@ -178,32 +181,34 @@ impl YuvImagePrimitiveGpu {
 }
 
 #[derive(Debug, Clone)]
 pub struct BorderPrimitiveCpu {
     pub inner_rect: LayerRect,
 }
 
 #[derive(Debug, Clone)]
+#[repr(C)]
 pub struct BorderPrimitiveGpu {
     pub style: [f32; 4],
     pub widths: [f32; 4],
     pub colors: [ColorF; 4],
     pub radii: [LayerSize; 4],
 }
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
 pub struct BoxShadowPrimitiveCacheKey {
     pub shadow_rect_size: Size2D<Au>,
     pub border_radius: Au,
     pub blur_radius: Au,
     pub inverted: bool,
 }
 
 #[derive(Debug, Clone)]
+#[repr(C)]
 pub struct BoxShadowPrimitiveGpu {
     pub src_rect: LayerRect,
     pub bs_rect: LayerRect,
     pub color: ColorF,
     pub border_radius: f32,
     pub edge_size: f32,
     pub blur_radius: f32,
     pub inverted: f32,
@@ -213,44 +218,48 @@ pub struct BoxShadowPrimitiveGpu {
 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
 pub enum GradientType {
     Horizontal,
     Vertical,
     Rotated,
 }
 
 #[derive(Debug, Clone)]
+#[repr(C)]
 pub struct GradientStop {
     color: ColorF,
     offset: f32,
     padding: [f32; 3],
 }
 
 #[derive(Debug, Clone)]
+#[repr(C)]
 pub struct GradientPrimitiveGpu {
     pub start_point: LayerPoint,
     pub end_point: LayerPoint,
     pub kind: f32,
     pub padding: [f32; 3],
 }
 
 #[derive(Debug)]
 pub struct GradientPrimitiveCpu {
     pub stops_range: ItemRange,
     pub kind: GradientType,
     pub reverse_stops: bool,
     pub cache_dirty: bool,
 }
 
 #[derive(Debug, Clone)]
+#[repr(C)]
 struct InstanceRect {
     rect: LayerRect,
 }
 
 #[derive(Debug, Clone)]
+#[repr(C)]
 pub struct TextRunPrimitiveGpu {
     pub color: ColorF,
 }
 
 #[derive(Debug, Clone)]
 pub struct TextRunPrimitiveCpu {
     pub font_key: FontKey,
     pub logical_font_size: Au,
@@ -261,28 +270,31 @@ pub struct TextRunPrimitiveCpu {
     pub glyph_indices: Vec<u32>,
     pub color_texture_id: SourceTexture,
     pub color: ColorF,
     pub render_mode: FontRenderMode,
     pub resource_address: GpuStoreAddress,
 }
 
 #[derive(Debug, Clone)]
+#[repr(C)]
 struct GlyphPrimitive {
     offset: LayerPoint,
     padding: LayerPoint,
 }
 
 #[derive(Debug, Clone)]
+#[repr(C)]
 struct ClipRect {
     rect: LayerRect,
     padding: [f32; 4],
 }
 
 #[derive(Debug, Clone)]
+#[repr(C)]
 struct ClipCorner {
     rect: LayerRect,
     outer_radius_x: f32,
     outer_radius_y: f32,
     inner_radius_x: f32,
     inner_radius_y: f32,
 }
 
@@ -294,16 +306,17 @@ impl ClipCorner {
             outer_radius_y: outer_radius,
             inner_radius_x: inner_radius,
             inner_radius_y: inner_radius,
         }
     }
 }
 
 #[derive(Debug, Clone)]
+#[repr(C)]
 pub struct ImageMaskData {
     uv_rect: DeviceRect,
     local_rect: LayerRect,
 }
 
 #[derive(Debug, Clone)]
 pub struct ClipData {
     rect: ClipRect,
@@ -1006,16 +1019,17 @@ impl PrimitiveStore {
             self.prims_to_resolve.push(prim_index);
         }
 
         rebuild_bounding_rect
     }
 }
 
 #[derive(Clone)]
+#[repr(C)]
 pub struct GpuBlock16 {
     data: [f32; 4],
 }
 
 impl Default for GpuBlock16 {
     fn default() -> GpuBlock16 {
         GpuBlock16 {
             data: unsafe { mem::uninitialized() }
@@ -1059,16 +1073,17 @@ impl From<GlyphPrimitive> for GpuBlock16
     fn from(data: GlyphPrimitive) -> GpuBlock16 {
         unsafe {
             mem::transmute::<GlyphPrimitive, GpuBlock16>(data)
         }
     }
 }
 
 #[derive(Clone)]
+#[repr(C)]
 pub struct GpuBlock32 {
     data: [f32; 8],
 }
 
 impl Default for GpuBlock32 {
     fn default() -> GpuBlock32 {
         GpuBlock32 {
             data: unsafe { mem::uninitialized() }
@@ -1120,16 +1135,17 @@ impl From<ClipCorner> for GpuBlock32 {
     fn from(data: ClipCorner) -> GpuBlock32 {
         unsafe {
             mem::transmute::<ClipCorner, GpuBlock32>(data)
         }
     }
 }
 
 #[derive(Clone)]
+#[repr(C)]
 pub struct GpuBlock64 {
     data: [f32; 16],
 }
 
 impl Default for GpuBlock64 {
     fn default() -> GpuBlock64 {
         GpuBlock64 {
             data: unsafe { mem::uninitialized() }
@@ -1141,16 +1157,17 @@ impl From<BoxShadowPrimitiveGpu> for Gpu
     fn from(data: BoxShadowPrimitiveGpu) -> GpuBlock64 {
         unsafe {
             mem::transmute::<BoxShadowPrimitiveGpu, GpuBlock64>(data)
         }
     }
 }
 
 #[derive(Clone)]
+#[repr(C)]
 pub struct GpuBlock128 {
     data: [f32; 32],
 }
 
 impl Default for GpuBlock128 {
     fn default() -> GpuBlock128 {
         GpuBlock128 {
             data: unsafe { mem::uninitialized() }
--- a/gfx/webrender/src/record.rs
+++ b/gfx/webrender/src/record.rs
@@ -4,44 +4,49 @@
 
 use bincode::serde::serialize;
 use bincode;
 use std::mem;
 use std::any::TypeId;
 use std::fs::{File, OpenOptions};
 use std::io::Write;
 use std::ops::DerefMut;
-use std::path::PathBuf;
-use std::sync::{Arc, Mutex};
+use std::sync::Mutex;
 use webrender_traits::ApiMsg;
 use byteorder::{LittleEndian, WriteBytesExt};
 
 lazy_static! {
-    static ref WEBRENDER_RECORDING_DETOUR: Arc<Mutex<Option<Box<ApiRecordingReceiver>>>> = Arc::new(Mutex::new(None));
+    static ref WEBRENDER_RECORDING_DETOUR: Mutex<Option<Box<ApiRecordingReceiver>>> = Mutex::new(None);
 }
 
+pub static WEBRENDER_RECORDING_HEADER: u64 = 0xbeefbeefbeefbe01u64;
+static mut CURRENT_FRAME_NUMBER: u32 = 0xffffffffu32;
+
 pub trait ApiRecordingReceiver: Send {
     fn write_msg(&mut self, frame: u32, msg: &ApiMsg);
     fn write_payload(&mut self, frame: u32, data: &[u8]);
 }
 
 pub fn set_recording_detour(detour: Option<Box<ApiRecordingReceiver>>) {
     let mut recorder = WEBRENDER_RECORDING_DETOUR.lock();
     *recorder.as_mut().unwrap().deref_mut() = detour;
 }
 
 fn write_data(frame: u32, data: &[u8]) {
     let filename = format!("record/frame_{}.bin", frame);
-    let mut file = if !PathBuf::from(&filename).exists() {
+    let mut file = if unsafe { CURRENT_FRAME_NUMBER != frame } {
+        unsafe { CURRENT_FRAME_NUMBER = frame; }
+
         let mut file = File::create(filename).unwrap();
-
         let apimsg_type_id = unsafe {
             assert!(mem::size_of::<TypeId>() == mem::size_of::<u64>());
             mem::transmute::<TypeId, u64>(TypeId::of::<ApiMsg>())
         };
+
+        file.write_u64::<LittleEndian>(WEBRENDER_RECORDING_HEADER).ok();
         file.write_u64::<LittleEndian>(apimsg_type_id).ok();
         file
     } else {
         OpenOptions::new().append(true).create(false).open(filename).unwrap()
     };
     file.write_u32::<LittleEndian>(data.len() as u32).ok();
     file.write(data).ok();
 }
--- a/gfx/webrender/src/render_backend.rs
+++ b/gfx/webrender/src/render_backend.rs
@@ -426,28 +426,25 @@ impl RenderBackend {
         self.scene.pipeline_sizes = updated_pipeline_sizes;
     }
 
     fn render(&mut self) -> RendererFrame {
         let frame = self.frame.build(&mut self.resource_cache,
                                      &self.scene.pipeline_auxiliary_lists,
                                      self.device_pixel_ratio);
 
-        let pending_update = self.resource_cache.pending_updates();
-        if !pending_update.updates.is_empty() {
-            self.result_tx.send(ResultMsg::UpdateTextureCache(pending_update)).unwrap();
-        }
-
         frame
     }
 
     fn publish_frame(&mut self,
                      frame: RendererFrame,
                      profile_counters: &mut BackendProfileCounters) {
-        let msg = ResultMsg::NewFrame(frame, profile_counters.clone());
+        let pending_update = self.resource_cache.pending_updates();
+        let pending_external_image_update = self.resource_cache.pending_external_image_updates();
+        let msg = ResultMsg::NewFrame(frame, pending_update, pending_external_image_update, profile_counters.clone());
         self.result_tx.send(msg).unwrap();
         profile_counters.reset();
     }
 
     fn publish_frame_and_notify_compositor(&mut self,
                                            frame: RendererFrame,
                                            profile_counters: &mut BackendProfileCounters) {
         self.publish_frame(frame, profile_counters);
--- a/gfx/webrender/src/renderer.rs
+++ b/gfx/webrender/src/renderer.rs
@@ -6,55 +6,54 @@
 //!
 //! The `webrender::renderer` module provides the interface to webrender, which
 //! is accessible through [`Renderer`][renderer]
 //!
 //! [renderer]: struct.Renderer.html
 
 use debug_colors;
 use debug_render::DebugRenderer;
-use device::{Device, ProgramId, TextureId, VertexFormat, GpuMarker, GpuProfiler};
+use device::{DepthFunction, Device, ProgramId, TextureId, VertexFormat, GpuMarker, GpuProfiler};
 use device::{TextureFilter, VAOId, VertexUsageHint, FileWatcherHandler, TextureTarget};
 use euclid::Matrix4D;
 use fnv::FnvHasher;
 use internal_types::{CacheTextureId, RendererFrame, ResultMsg, TextureUpdateOp};
-use internal_types::{TextureUpdateList, PackedVertex, RenderTargetMode};
+use internal_types::{ExternalImageUpdateList, TextureUpdateList, PackedVertex, RenderTargetMode};
 use internal_types::{ORTHO_NEAR_PLANE, ORTHO_FAR_PLANE, SourceTexture};
 use internal_types::{BatchTextures, TextureSampler, GLContextHandleWrapper};
 use profiler::{Profiler, BackendProfileCounters};
 use profiler::{GpuProfileTag, RendererProfileTimers, RendererProfileCounters};
 use render_backend::RenderBackend;
 use std::cmp;
 use std::collections::HashMap;
 use std::f32;
 use std::hash::BuildHasherDefault;
 use std::mem;
 use std::path::PathBuf;
 use std::sync::{Arc, Mutex};
 use std::sync::mpsc::{channel, Receiver, Sender};
 use std::thread;
 use texture_cache::TextureCache;
-use tiling::{Frame, FrameBuilderConfig, PrimitiveBatchData};
-use tiling::{BlurCommand, CacheClipInstance, ClearTile, PrimitiveInstance, RenderTarget};
+use tiling::{Frame, FrameBuilderConfig, PrimitiveBatch, PrimitiveBatchData};
+use tiling::{BlurCommand, CacheClipInstance, PrimitiveInstance, RenderTarget};
 use time::precise_time_ns;
 use util::TransformedRectKind;
 use webrender_traits::{ColorF, Epoch, PipelineId, RenderNotifier, RenderDispatcher};
 use webrender_traits::{ExternalImageId, ImageFormat, RenderApiSender, RendererKind};
-use webrender_traits::{DeviceSize, DevicePoint, DeviceIntPoint, DeviceIntSize, DeviceUintSize};
+use webrender_traits::{DeviceIntRect, DeviceSize, DevicePoint, DeviceIntPoint, DeviceIntSize, DeviceUintSize};
 use webrender_traits::channel;
 use webrender_traits::VRCompositorHandler;
 
 pub const MAX_VERTEX_TEXTURE_WIDTH: usize = 1024;
 
 const GPU_TAG_CACHE_BOX_SHADOW: GpuProfileTag = GpuProfileTag { label: "C_BoxShadow", color: debug_colors::BLACK };
 const GPU_TAG_CACHE_CLIP: GpuProfileTag = GpuProfileTag { label: "C_Clip", color: debug_colors::PURPLE };
 const GPU_TAG_CACHE_TEXT_RUN: GpuProfileTag = GpuProfileTag { label: "C_TextRun", color: debug_colors::MISTYROSE };
 const GPU_TAG_INIT: GpuProfileTag = GpuProfileTag { label: "Init", color: debug_colors::WHITE };
 const GPU_TAG_SETUP_TARGET: GpuProfileTag = GpuProfileTag { label: "Target", color: debug_colors::SLATEGREY };
-const GPU_TAG_CLEAR_TILES: GpuProfileTag = GpuProfileTag { label: "Clear Tiles", color: debug_colors::BROWN };
 const GPU_TAG_PRIM_RECT: GpuProfileTag = GpuProfileTag { label: "Rect", color: debug_colors::RED };
 const GPU_TAG_PRIM_IMAGE: GpuProfileTag = GpuProfileTag { label: "Image", color: debug_colors::GREEN };
 const GPU_TAG_PRIM_YUV_IMAGE: GpuProfileTag = GpuProfileTag { label: "YuvImage", color: debug_colors::DARKGREEN };
 const GPU_TAG_PRIM_BLEND: GpuProfileTag = GpuProfileTag { label: "Blend", color: debug_colors::LIGHTBLUE };
 const GPU_TAG_PRIM_COMPOSITE: GpuProfileTag = GpuProfileTag { label: "Composite", color: debug_colors::MAGENTA };
 const GPU_TAG_PRIM_TEXT_RUN: GpuProfileTag = GpuProfileTag { label: "TextRun", color: debug_colors::BLUE };
 const GPU_TAG_PRIM_GRADIENT: GpuProfileTag = GpuProfileTag { label: "Gradient", color: debug_colors::YELLOW };
 const GPU_TAG_PRIM_ANGLE_GRADIENT: GpuProfileTag = GpuProfileTag { label: "AngleGradient", color: debug_colors::POWDERBLUE };
@@ -118,17 +117,16 @@ impl VertexDataTexture {
 }
 
 const TRANSFORM_FEATURE: &'static str = "TRANSFORM";
 const SUBPIXEL_AA_FEATURE: &'static str = "SUBPIXEL_AA";
 const CLIP_FEATURE: &'static str = "CLIP";
 
 enum ShaderKind {
     Primitive,
-    Clear,
     Cache,
     ClipCache,
 }
 
 struct LazilyCompiledShader {
     id: Option<ProgramId>,
     name: &'static str,
     kind: ShaderKind,
@@ -153,19 +151,16 @@ impl LazilyCompiledShader {
         }
 
         shader
     }
 
     fn get(&mut self, device: &mut Device) -> ProgramId {
         if self.id.is_none() {
             let id = match self.kind {
-                ShaderKind::Clear => {
-                    create_clear_shader(self.name, device)
-                }
                 ShaderKind::Primitive | ShaderKind::Cache => {
                     create_prim_shader(self.name,
                                        device,
                                        &self.features)
                 }
                 ShaderKind::ClipCache => {
                     create_clip_shader(self.name, device)
                 }
@@ -270,26 +265,16 @@ fn create_clip_shader(name: &'static str
     let program_id = device.create_program_with_prefix(name,
                                                        includes,
                                                        Some(prefix));
     debug!("ClipShader {}", name);
 
     program_id
 }
 
-fn create_clear_shader(name: &'static str, device: &mut Device) -> ProgramId {
-    let includes = &[];
-    let program_id = device.create_program_with_prefix(name,
-                                                       includes,
-                                                       None);
-    debug!("ClearShader {}", name);
-
-    program_id
-}
-
 /// The renderer is responsible for submitting to the GPU the work prepared by the
 /// RenderBackend.
 pub struct Renderer {
     result_rx: Receiver<ResultMsg>,
     device: Device,
     pending_texture_updates: Vec<TextureUpdateList>,
     pending_shader_updates: Vec<PathBuf>,
     current_frame: Option<RendererFrame>,
@@ -298,17 +283,16 @@ pub struct Renderer {
     // draw intermediate results to cache targets. The results
     // of these shaders are then used by the primitive shaders.
     cs_box_shadow: LazilyCompiledShader,
     cs_text_run: LazilyCompiledShader,
     cs_blur: LazilyCompiledShader,
     /// These are "cache clip shaders". These shaders are used to
     /// draw clip instances into the cached clip mask. The results
     /// of these shaders are also used by the primitive shaders.
-    cs_clip_clear: LazilyCompiledShader,
     cs_clip_copy: LazilyCompiledShader,
     cs_clip_rectangle: LazilyCompiledShader,
     cs_clip_image: LazilyCompiledShader,
 
     // The are "primitive shaders". These shaders draw and blend
     // final results on screen. They are aware of tile boundaries.
     // Most draw directly to the framebuffer, but some use inputs
     // from the cache shaders to draw. Specifically, the box
@@ -325,35 +309,32 @@ pub struct Renderer {
     ps_gradient: PrimitiveShader,
     ps_angle_gradient: PrimitiveShader,
     ps_box_shadow: PrimitiveShader,
     ps_cache_image: PrimitiveShader,
 
     ps_blend: LazilyCompiledShader,
     ps_composite: LazilyCompiledShader,
 
-    tile_clear_shader: LazilyCompiledShader,
-
     notifier: Arc<Mutex<Option<Box<RenderNotifier>>>>,
 
     enable_profiler: bool,
     clear_framebuffer: bool,
-    clear_empty_tiles: bool,
     clear_color: ColorF,
     debug: DebugRenderer,
+    render_target_debug: bool,
     backend_profile_counters: BackendProfileCounters,
     profile_counters: RendererProfileCounters,
     profiler: Profiler,
     last_time: u64,
 
     render_targets: Vec<TextureId>,
 
     gpu_profile: GpuProfiler<GpuProfileTag>,
     prim_vao_id: VAOId,
-    clear_vao_id: VAOId,
     blur_vao_id: VAOId,
     clip_vao_id: VAOId,
 
     layer_texture: VertexDataTexture,
     render_task_texture: VertexDataTexture,
     prim_geom_texture: VertexDataTexture,
     data16_texture: VertexDataTexture,
     data32_texture: VertexDataTexture,
@@ -363,21 +344,23 @@ pub struct Renderer {
 
     pipeline_epoch_map: HashMap<PipelineId, Epoch, BuildHasherDefault<FnvHasher>>,
     /// Used to dispatch functions to the main thread's event loop.
     /// Required to allow GLContext sharing in some implementations like WGL.
     main_thread_dispatcher: Arc<Mutex<Option<Box<RenderDispatcher>>>>,
 
     /// A vector for fast resolves of texture cache IDs to
     /// native texture IDs. This maps to a free-list managed
-    /// by the backend thread / texture cache. Because of this,
-    /// items in this array may be None if they have been
-    /// freed by the backend thread. This saves having to
+    /// by the backend thread / texture cache. We free the
+    /// texture memory associated with a TextureId when its
+    /// texture cache ID is freed by the texture cache, but
+    /// reuse the TextureId when the texture caches's free
+    /// list reuses the texture cache ID. This saves having to
     /// use a hashmap, and allows a flat vector for performance.
-    cache_texture_id_map: Vec<Option<TextureId>>,
+    cache_texture_id_map: Vec<TextureId>,
 
     /// Optional trait object that allows the client
     /// application to provide external buffers for image data.
     external_image_handler: Option<Box<ExternalImageHandler>>,
 
     /// Map of external image IDs to native textures.
     external_images: HashMap<ExternalImageId, TextureId, BuildHasherDefault<FnvHasher>>,
 
@@ -396,17 +379,16 @@ impl Renderer {
     ///
     /// ```rust,ignore
     /// # use webrender::renderer::Renderer;
     /// # use std::path::PathBuf;
     /// let opts = webrender::RendererOptions {
     ///    device_pixel_ratio: 1.0,
     ///    resource_override_path: None,
     ///    enable_aa: false,
-    ///    enable_msaa: false,
     ///    enable_profiler: false,
     /// };
     /// let (renderer, sender) = Renderer::new(opts);
     /// ```
     pub fn new(options: RendererOptions) -> (Renderer, RenderApiSender) {
         let (api_tx, api_rx) = channel::msg_channel().unwrap();
         let (payload_tx, payload_rx) = channel::payload_channel().unwrap();
         let (result_tx, result_rx) = channel();
@@ -434,21 +416,16 @@ impl Renderer {
                                                     &mut device,
                                                     options.precache_shaders);
         let cs_blur = LazilyCompiledShader::new(ShaderKind::Cache,
                                                 "cs_blur",
                                                  &[],
                                                  &mut device,
                                                  options.precache_shaders);
 
-        let cs_clip_clear = LazilyCompiledShader::new(ShaderKind::ClipCache,
-                                                      "cs_clip_clear",
-                                                      &[],
-                                                      &mut device,
-                                                      options.precache_shaders);
         let cs_clip_copy = LazilyCompiledShader::new(ShaderKind::ClipCache,
                                                      "cs_clip_copy",
                                                      &[],
                                                      &mut device,
                                                      options.precache_shaders);
         let cs_clip_rectangle = LazilyCompiledShader::new(ShaderKind::ClipCache,
                                                           "cs_clip_rectangle",
                                                           &[],
@@ -513,22 +490,16 @@ impl Renderer {
                                                  &mut device,
                                                  options.precache_shaders);
         let ps_composite = LazilyCompiledShader::new(ShaderKind::Primitive,
                                                      "ps_composite",
                                                      &[],
                                                      &mut device,
                                                      options.precache_shaders);
 
-        let tile_clear_shader = LazilyCompiledShader::new(ShaderKind::Clear,
-                                                          "ps_clear",
-                                                          &[],
-                                                          &mut device,
-                                                          options.precache_shaders);
-
         let mut texture_cache = TextureCache::new();
 
         let white_pixels: Vec<u8> = vec![
             0xff, 0xff, 0xff, 0xff,
             0xff, 0xff, 0xff, 0xff,
             0xff, 0xff, 0xff, 0xff,
             0xff, 0xff, 0xff, 0xff,
         ];
@@ -589,17 +560,16 @@ impl Renderer {
             },
         ];
 
         let prim_vao_id = device.create_vao(VertexFormat::Triangles, mem::size_of::<PrimitiveInstance>() as i32);
         device.bind_vao(prim_vao_id);
         device.update_vao_indices(prim_vao_id, &quad_indices, VertexUsageHint::Static);
         device.update_vao_main_vertices(prim_vao_id, &quad_vertices, VertexUsageHint::Static);
 
-        let clear_vao_id = device.create_vao_with_new_instances(VertexFormat::Clear, mem::size_of::<ClearTile>() as i32, prim_vao_id);
         let blur_vao_id = device.create_vao_with_new_instances(VertexFormat::Blur, mem::size_of::<BlurCommand>() as i32, prim_vao_id);
         let clip_vao_id = device.create_vao_with_new_instances(VertexFormat::Clip, mem::size_of::<CacheClipInstance>() as i32, prim_vao_id);
 
         device.end_frame();
 
         let main_thread_dispatcher = Arc::new(Mutex::new(None));
         let backend_notifier = notifier.clone();
         let backend_main_thread_dispatcher = main_thread_dispatcher.clone();
@@ -614,16 +584,17 @@ impl Renderer {
             RendererKind::OSMesa => GLContextHandleWrapper::current_osmesa_handle(),
         };
 
         let config = FrameBuilderConfig::new(options.enable_scrollbars,
                                              options.enable_subpixel_aa);
 
         let debug = options.debug;
         let (device_pixel_ratio, enable_aa) = (options.device_pixel_ratio, options.enable_aa);
+        let render_target_debug = options.render_target_debug;
         let payload_tx_for_backend = payload_tx.clone();
         let enable_recording = options.enable_recording;
         thread::spawn(move || {
             let mut backend = RenderBackend::new(api_rx,
                                                  payload_rx,
                                                  payload_tx_for_backend,
                                                  result_tx,
                                                  device_pixel_ratio,
@@ -640,21 +611,19 @@ impl Renderer {
         });
 
         let renderer = Renderer {
             result_rx: result_rx,
             device: device,
             current_frame: None,
             pending_texture_updates: Vec::new(),
             pending_shader_updates: Vec::new(),
-            tile_clear_shader: tile_clear_shader,
             cs_box_shadow: cs_box_shadow,
             cs_text_run: cs_text_run,
             cs_blur: cs_blur,
-            cs_clip_clear: cs_clip_clear,
             cs_clip_copy: cs_clip_copy,
             cs_clip_rectangle: cs_clip_rectangle,
             cs_clip_image: cs_clip_image,
             ps_rectangle: ps_rectangle,
             ps_rectangle_clip: ps_rectangle_clip,
             ps_text_run: ps_text_run,
             ps_text_run_subpixel: ps_text_run_subpixel,
             ps_image: ps_image,
@@ -663,28 +632,27 @@ impl Renderer {
             ps_box_shadow: ps_box_shadow,
             ps_gradient: ps_gradient,
             ps_angle_gradient: ps_angle_gradient,
             ps_cache_image: ps_cache_image,
             ps_blend: ps_blend,
             ps_composite: ps_composite,
             notifier: notifier,
             debug: debug_renderer,
+            render_target_debug: render_target_debug,
             backend_profile_counters: BackendProfileCounters::new(),
             profile_counters: RendererProfileCounters::new(),
             profiler: Profiler::new(),
             enable_profiler: options.enable_profiler,
             clear_framebuffer: options.clear_framebuffer,
-            clear_empty_tiles: options.clear_empty_tiles,
             clear_color: options.clear_color,
             last_time: 0,
             render_targets: Vec::new(),
             gpu_profile: GpuProfiler::new(),
             prim_vao_id: prim_vao_id,
-            clear_vao_id: clear_vao_id,
             blur_vao_id: blur_vao_id,
             clip_vao_id: clip_vao_id,
             layer_texture: layer_texture,
             render_task_texture: render_task_texture,
             prim_geom_texture: prim_geom_texture,
             data16_texture: data16_texture,
             data32_texture: data32_texture,
             data64_texture: data64_texture,
@@ -735,20 +703,22 @@ impl Renderer {
 
     /// Processes the result queue.
     ///
     /// Should be called before `render()`, as texture cache updates are done here.
     pub fn update(&mut self) {
         // Pull any pending results and return the most recent.
         while let Ok(msg) = self.result_rx.try_recv() {
             match msg {
-                ResultMsg::UpdateTextureCache(update_list) => {
-                    self.pending_texture_updates.push(update_list);
-                }
-                ResultMsg::NewFrame(frame, profile_counters) => {
+                ResultMsg::NewFrame(frame, texture_update_list, external_image_update_list, profile_counters) => {
+                    self.pending_texture_updates.push(texture_update_list);
+
+                    // When a new frame is ready, we could start to update all pending external image requests here.
+                    self.release_external_images(external_image_update_list);
+
                     self.backend_profile_counters = profile_counters;
 
                     // Update the list of available epochs for use during reftests.
                     // This is a workaround for https://github.com/servo/servo/issues/13149.
                     for (pipeline_id, epoch) in &frame.pipeline_epoch_map {
                         self.pipeline_epoch_map.insert(*pipeline_id, *epoch);
                     }
 
@@ -773,17 +743,16 @@ impl Renderer {
             &SourceTexture::WebGL(id) => TextureId::new(id),
             &SourceTexture::External(ref key) => {
                 *self.external_images
                      .get(key)
                      .expect("BUG: External image should be resolved by now!")
             }
             &SourceTexture::TextureCache(index) => {
                 self.cache_texture_id_map[index.0]
-                    .expect("BUG: Texture should exist in texture cache map!")
             }
         }
     }
 
     /// Set a callback for handling external images.
     pub fn set_external_image_handler(&mut self, handler: Box<ExternalImageHandler>) {
         self.external_image_handler = Some(handler);
     }
@@ -873,61 +842,59 @@ impl Renderer {
 
     fn update_texture_cache(&mut self) {
         let _gm = GpuMarker::new("texture cache update");
         let mut pending_texture_updates = mem::replace(&mut self.pending_texture_updates, vec![]);
         for update_list in pending_texture_updates.drain(..) {
             for update in update_list.updates {
                 match update.op {
                     TextureUpdateOp::Create(width, height, format, filter, mode, maybe_bytes) => {
-                        // Create a new native texture, as requested by the texture cache.
-                        let texture_id = self.device
-                                             .create_texture_ids(1, TextureTarget::Default)[0];
-
                         let CacheTextureId(cache_texture_index) = update.id;
                         if self.cache_texture_id_map.len() == cache_texture_index {
-                            // It was a new texture, so add to end of the map.
-                            self.cache_texture_id_map.push(Some(texture_id));
-                        } else {
-                            // It was re-using an item from the free-list, so store
-                            // the new ID there.
-                            debug_assert!(self.cache_texture_id_map[cache_texture_index].is_none());
-                            self.cache_texture_id_map[cache_texture_index] = Some(texture_id);
+                            // Create a new native texture, as requested by the texture cache.
+                            let texture_id = self.device
+                                             .create_texture_ids(1, TextureTarget::Default)[0];
+                            self.cache_texture_id_map.push(texture_id);
                         }
+                        let texture_id = self.cache_texture_id_map[cache_texture_index];
 
                         let maybe_slice = maybe_bytes.as_ref().map(|bytes|{ bytes.as_slice() });
                         self.device.init_texture(texture_id,
                                                  width,
                                                  height,
                                                  format,
                                                  filter,
                                                  mode,
                                                  maybe_slice);
                     }
                     TextureUpdateOp::Grow(new_width,
                                           new_height,
                                           format,
                                           filter,
                                           mode) => {
-                        let texture_id = self.cache_texture_id_map[update.id.0].unwrap();
+                        let texture_id = self.cache_texture_id_map[update.id.0];
                         self.device.resize_texture(texture_id,
                                                    new_width,
                                                    new_height,
                                                    format,
                                                    filter,
                                                    mode);
                     }
                     TextureUpdateOp::Update(x, y, width, height, bytes, stride) => {
-                        let texture_id = self.cache_texture_id_map[update.id.0].unwrap();
+                        let texture_id = self.cache_texture_id_map[update.id.0];
                         self.device.update_texture(texture_id,
                                                    x,
                                                    y,
                                                    width, height, stride,
                                                    bytes.as_slice());
                     }
+                    TextureUpdateOp::Free => {
+                        let texture_id = self.cache_texture_id_map[update.id.0];
+                        self.device.deinit_texture(texture_id);
+                    }
                 }
             }
         }
     }
 
     fn add_debug_rect(&mut self,
                       p0: DeviceIntPoint,
                       p1: DeviceIntPoint,
@@ -985,61 +952,154 @@ impl Renderer {
         }
 
         self.device.update_vao_instances(vao, data, VertexUsageHint::Stream);
         self.device.draw_indexed_triangles_instanced_u16(6, data.len() as i32);
         self.profile_counters.vertices.add(6 * data.len());
         self.profile_counters.draw_calls.inc();
     }
 
+    fn submit_batch(&mut self,
+                    batch: &PrimitiveBatch,
+                    projection: &Matrix4D<f32>) {
+        let transform_kind = batch.key.flags.transform_kind();
+        let needs_clipping = batch.key.flags.needs_clipping();
+        debug_assert!(!needs_clipping || batch.key.blend_mode == BlendMode::Alpha);
+
+        let (data, marker, shader) = match &batch.data {
+            &PrimitiveBatchData::CacheImage(ref data) => {
+                let shader = self.ps_cache_image.get(&mut self.device, transform_kind);
+                (data, GPU_TAG_PRIM_CACHE_IMAGE, shader)
+            }
+            &PrimitiveBatchData::Blend(ref data) => {
+                let shader = self.ps_blend.get(&mut self.device);
+                (data, GPU_TAG_PRIM_BLEND, shader)
+            }
+            &PrimitiveBatchData::Composite(ref data) => {
+                // The composite shader only samples from sCache.
+                let shader = self.ps_composite.get(&mut self.device);
+                (data, GPU_TAG_PRIM_COMPOSITE, shader)
+            }
+            &PrimitiveBatchData::Rectangles(ref data) => {
+                let shader = if needs_clipping {
+                    self.ps_rectangle_clip.get(&mut self.device, transform_kind)
+                } else {
+                    self.ps_rectangle.get(&mut self.device, transform_kind)
+                };
+                (data, GPU_TAG_PRIM_RECT, shader)
+            }
+            &PrimitiveBatchData::Image(ref data) => {
+                let shader = self.ps_image.get(&mut self.device, transform_kind);
+                (data, GPU_TAG_PRIM_IMAGE, shader)
+            }
+            &PrimitiveBatchData::YuvImage(ref data) => {
+                let shader = self.ps_yuv_image.get(&mut self.device, transform_kind);
+                (data, GPU_TAG_PRIM_YUV_IMAGE, shader)
+            }
+            &PrimitiveBatchData::Borders(ref data) => {
+                let shader = self.ps_border.get(&mut self.device, transform_kind);
+                (data, GPU_TAG_PRIM_BORDER, shader)
+            }
+            &PrimitiveBatchData::BoxShadow(ref data) => {
+                let shader = self.ps_box_shadow.get(&mut self.device, transform_kind);
+                (data, GPU_TAG_PRIM_BOX_SHADOW, shader)
+            }
+            &PrimitiveBatchData::TextRun(ref data) => {
+                let shader = match batch.key.blend_mode {
+                    BlendMode::Subpixel(..) => self.ps_text_run_subpixel.get(&mut self.device, transform_kind),
+                    BlendMode::Alpha | BlendMode::None => self.ps_text_run.get(&mut self.device, transform_kind),
+                };
+                (data, GPU_TAG_PRIM_TEXT_RUN, shader)
+            }
+            &PrimitiveBatchData::AlignedGradient(ref data) => {
+                let shader = self.ps_gradient.get(&mut self.device, transform_kind);
+                (data, GPU_TAG_PRIM_GRADIENT, shader)
+            }
+            &PrimitiveBatchData::AngleGradient(ref data) => {
+                let shader = self.ps_angle_gradient.get(&mut self.device, transform_kind);
+                (data, GPU_TAG_PRIM_ANGLE_GRADIENT, shader)
+            }
+        };
+
+        let _gm = self.gpu_profile.add_marker(marker);
+        let vao = self.prim_vao_id;
+        self.draw_instanced_batch(data,
+                                  vao,
+                                  shader,
+                                  &batch.key.textures,
+                                  &projection);
+    }
+
     fn draw_target(&mut self,
                    render_target: Option<(TextureId, i32)>,
                    target: &RenderTarget,
                    target_size: &DeviceSize,
                    cache_texture: Option<TextureId>,
-                   should_clear: bool) {
+                   should_clear: bool,
+                   background_color: Option<ColorF>) {
+        self.device.disable_depth();
+        self.device.enable_depth_write();
+
         let dimensions = [target_size.width as u32, target_size.height as u32];
         let projection = {
             let _gm = self.gpu_profile.add_marker(GPU_TAG_SETUP_TARGET);
-            self.device.bind_render_target(render_target, Some(dimensions));
+            self.device.bind_draw_target(render_target, Some(dimensions));
 
             self.device.set_blend(false);
             self.device.set_blend_mode_alpha();
             if let Some(cache_texture) = cache_texture {
                 self.device.bind_texture(TextureSampler::Cache, cache_texture);
             }
 
             let (color, projection) = match render_target {
                 Some(..) => (
-                    [0.0, 0.0, 0.0, 0.0],
+                    // The clear color here is chosen specifically such that:
+                    // - The red channel is cleared to 1, so that the clip mask
+                    //   generation (which reads/writes the red channel) can
+                    //   assume that each allocated rect is opaque / non-clipped
+                    //   initially.
+                    // - The alpha channel is cleared to 0, so that visual render
+                    //   tasks can assume that pixels are transparent if not
+                    //   rendered. (This is relied on by the compositing support
+                    //   for mix-blend-mode etc).
+                    [1.0, 0.0, 0.0, 0.0],
                     Matrix4D::ortho(0.0,
                                    target_size.width,
                                    0.0,
                                    target_size.height,
                                    ORTHO_NEAR_PLANE,
                                    ORTHO_FAR_PLANE)
                 ),
                 None => (
-                    self.clear_color.to_array(),
+                    background_color.map_or(self.clear_color.to_array(), |color| {
+                        color.to_array()
+                    }),
                     Matrix4D::ortho(0.0,
                                    target_size.width,
                                    target_size.height,
                                    0.0,
                                    ORTHO_NEAR_PLANE,
                                    ORTHO_FAR_PLANE)
                 ),
             };
 
-            if should_clear {
-                self.device.clear_color(color);
-            }
+            let clear_depth = Some(1.0);
+            let clear_color = if should_clear {
+                Some(color)
+            } else {
+                None
+            };
+
+            self.device.clear_target(clear_color, clear_depth);
 
             projection
         };
 
+        self.device.disable_depth_write();
+
         // Draw any blurs for this target.
         // Blurs are rendered as a standard 2-pass
         // separable implementation.
         // TODO(gw): In the future, consider having
         //           fast path blur shaders for common
         //           blur radii with fixed weights.
         if !target.vertical_blurs.is_empty() || !target.horizontal_blurs.is_empty() {
             let _gm = self.gpu_profile.add_marker(GPU_TAG_BLUR);
@@ -1072,45 +1132,26 @@ impl Renderer {
                                       &BatchTextures::no_texture(),
                                       &projection);
         }
 
         // Draw the clip items into the tiled alpha mask.
         {
             let _gm = self.gpu_profile.add_marker(GPU_TAG_CACHE_CLIP);
             let vao = self.clip_vao_id;
-            // first, mark the target area as opaque
-            //Note: not needed if we know the target is cleared with opaque
-            self.device.set_blend(false);
-            if !target.clip_batcher.clears.is_empty() {
-                let shader = self.cs_clip_clear.get(&mut self.device);
-                self.draw_instanced_batch(&target.clip_batcher.clears,
-                                          vao,
-                                          shader,
-                                          &BatchTextures::no_texture(),
-                                          &projection);
-            }
-            // alternatively, copy the contents from another task
+            // Optionally, copy the contents from another task
             if !target.clip_batcher.copies.is_empty() {
+                self.device.set_blend(false);
                 let shader = self.cs_clip_copy.get(&mut self.device);
                 self.draw_instanced_batch(&target.clip_batcher.copies,
                                           vao,
                                           shader,
                                           &BatchTextures::no_texture(),
                                           &projection);
             }
-            // the fast path for clear + rect, which is just the rectangle without blending
-            if !target.clip_batcher.rectangles_noblend.is_empty() {
-                let shader = self.cs_clip_rectangle.get(&mut self.device);
-                self.draw_instanced_batch(&target.clip_batcher.rectangles_noblend,
-                                          vao,
-                                          shader,
-                                          &BatchTextures::no_texture(),
-                                          &projection);
-            }
             // now switch to multiplicative blending
             self.device.set_blend(true);
             self.device.set_blend_mode_multiply();
             // draw rounded cornered rectangles
             if !target.clip_batcher.rectangles.is_empty() {
                 let _gm2 = GpuMarker::new("clip rectangles");
                 let shader = self.cs_clip_rectangle.get(&mut self.device);
                 self.draw_instanced_batch(&target.clip_batcher.rectangles,
@@ -1153,21 +1194,27 @@ impl Renderer {
                                       &target.text_run_textures,
                                       &projection);
         }
 
         let _gm2 = GpuMarker::new("alpha batches");
         self.device.set_blend(false);
         let mut prev_blend_mode = BlendMode::None;
 
-        for batch in &target.alpha_batcher.batches {
-            let transform_kind = batch.key.flags.transform_kind();
-            let needs_clipping = batch.key.flags.needs_clipping();
-            debug_assert!(!needs_clipping || batch.key.blend_mode == BlendMode::Alpha);
+        self.device.set_depth_func(DepthFunction::Less);
+        self.device.enable_depth();
+        self.device.enable_depth_write();
 
+        for batch in &target.alpha_batcher.opaque_batches {
+            self.submit_batch(batch, &projection);
+        }
+
+        self.device.disable_depth_write();
+
+        for batch in &target.alpha_batcher.alpha_batches {
             if batch.key.blend_mode != prev_blend_mode {
                 match batch.key.blend_mode {
                     BlendMode::None => {
                         self.device.set_blend(false);
                     }
                     BlendMode::Alpha => {
                         self.device.set_blend(true);
                         self.device.set_blend_mode_alpha();
@@ -1175,81 +1222,20 @@ impl Renderer {
                     BlendMode::Subpixel(color) => {
                         self.device.set_blend(true);
                         self.device.set_blend_mode_subpixel(color);
                     }
                 }
                 prev_blend_mode = batch.key.blend_mode;
             }
 
-            let (data, marker, shader) = match &batch.data {
-                &PrimitiveBatchData::CacheImage(ref data) => {
-                    let shader = self.ps_cache_image.get(&mut self.device, transform_kind);
-                    (data, GPU_TAG_PRIM_CACHE_IMAGE, shader)
-                }
-                &PrimitiveBatchData::Blend(ref data) => {
-                    let shader = self.ps_blend.get(&mut self.device);
-                    (data, GPU_TAG_PRIM_BLEND, shader)
-                }
-                &PrimitiveBatchData::Composite(ref data) => {
-                    // The composite shader only samples from sCache.
-                    debug_assert!(cache_texture.is_some());
-                    let shader = self.ps_composite.get(&mut self.device);
-                    (data, GPU_TAG_PRIM_COMPOSITE, shader)
-                }
-                &PrimitiveBatchData::Rectangles(ref data) => {
-                    let shader = if needs_clipping {
-                        self.ps_rectangle_clip.get(&mut self.device, transform_kind)
-                    } else {
-                        self.ps_rectangle.get(&mut self.device, transform_kind)
-                    };
-                    (data, GPU_TAG_PRIM_RECT, shader)
-                }
-                &PrimitiveBatchData::Image(ref data) => {
-                    let shader = self.ps_image.get(&mut self.device, transform_kind);
-                    (data, GPU_TAG_PRIM_IMAGE, shader)
-                }
-                &PrimitiveBatchData::YuvImage(ref data) => {
-                    let shader = self.ps_yuv_image.get(&mut self.device, transform_kind);
-                    (data, GPU_TAG_PRIM_YUV_IMAGE, shader)
-                }
-                &PrimitiveBatchData::Borders(ref data) => {
-                    let shader = self.ps_border.get(&mut self.device, transform_kind);
-                    (data, GPU_TAG_PRIM_BORDER, shader)
-                }
-                &PrimitiveBatchData::BoxShadow(ref data) => {
-                    let shader = self.ps_box_shadow.get(&mut self.device, transform_kind);
-                    (data, GPU_TAG_PRIM_BOX_SHADOW, shader)
-                }
-                &PrimitiveBatchData::TextRun(ref data) => {
-                    let shader = match batch.key.blend_mode {
-                        BlendMode::Subpixel(..) => self.ps_text_run_subpixel.get(&mut self.device, transform_kind),
-                        BlendMode::Alpha | BlendMode::None => self.ps_text_run.get(&mut self.device, transform_kind),
-                    };
-                    (data, GPU_TAG_PRIM_TEXT_RUN, shader)
-                }
-                &PrimitiveBatchData::AlignedGradient(ref data) => {
-                    let shader = self.ps_gradient.get(&mut self.device, transform_kind);
-                    (data, GPU_TAG_PRIM_GRADIENT, shader)
-                }
-                &PrimitiveBatchData::AngleGradient(ref data) => {
-                    let shader = self.ps_angle_gradient.get(&mut self.device, transform_kind);
-                    (data, GPU_TAG_PRIM_ANGLE_GRADIENT, shader)
-                }
-            };
-
-            let _gm = self.gpu_profile.add_marker(marker);
-            let vao = self.prim_vao_id;
-            self.draw_instanced_batch(data,
-                                      vao,
-                                      shader,
-                                      &batch.key.textures,
-                                      &projection);
+            self.submit_batch(batch, &projection);
         }
 
+        self.device.disable_depth();
         self.device.set_blend(false);
     }
 
     fn update_deferred_resolves(&mut self, frame: &mut Frame) {
         // The first thing we do is run through any pending deferred
         // resolves, and use a callback to get the UV rect for this
         // custom item. Then we patch the resource_rects structure
         // here before it's uploaded to the GPU.
@@ -1258,38 +1244,50 @@ impl Renderer {
                               .as_mut()
                               .expect("Found external image, but no handler set!");
 
             for deferred_resolve in &frame.deferred_resolves {
                 GpuMarker::fire("deferred resolve");
                 let props = &deferred_resolve.image_properties;
                 let external_id = props.external_id
                                        .expect("BUG: Deferred resolves must be external images!");
-                let image = handler.get(external_id);
+                let image = handler.lock(external_id);
 
                 let texture_id = match image.source {
                     ExternalImageSource::NativeTexture(texture_id) => TextureId::new(texture_id),
                 };
 
                 self.external_images.insert(external_id, texture_id);
                 let resource_rect_index = deferred_resolve.resource_address.0 as usize;
                 let resource_rect = &mut frame.gpu_resource_rects[resource_rect_index];
                 resource_rect.uv0 = DevicePoint::new(image.u0, image.v0);
                 resource_rect.uv1 = DevicePoint::new(image.u1, image.v1);
             }
         }
     }
 
-    fn release_external_textures(&mut self) {
+    fn unlock_external_images(&mut self) {
         if !self.external_images.is_empty() {
             let handler = self.external_image_handler
                               .as_mut()
                               .expect("Found external image, but no handler set!");
 
             for (external_id, _) in self.external_images.drain() {
+                handler.unlock(external_id);
+            }
+        }
+    }
+
+    fn release_external_images(&mut self, mut pending_external_image_updates: ExternalImageUpdateList) {
+        if !pending_external_image_updates.is_empty() {
+            let handler = self.external_image_handler
+                              .as_mut()
+                              .expect("found external image updates, but no handler set!");
+
+            for external_id in pending_external_image_updates.drain(..) {
                 handler.release(external_id);
             }
         }
     }
 
     fn draw_tile_frame(&mut self,
                        frame: &mut Frame,
                        framebuffer_size: &DeviceUintSize) {
@@ -1313,25 +1311,18 @@ impl Renderer {
                                     &debug_rect.color);
             }
         }
 
         self.device.disable_depth_write();
         self.device.disable_stencil();
         self.device.set_blend(false);
 
-        let projection = Matrix4D::ortho(0.0,
-                                         framebuffer_size.width as f32,
-                                         framebuffer_size.height as f32,
-                                         0.0,
-                                         ORTHO_NEAR_PLANE,
-                                         ORTHO_FAR_PLANE);
-
         if frame.passes.is_empty() {
-            self.device.clear_color(self.clear_color.to_array());
+            self.device.clear_target(Some(self.clear_color.to_array()), Some(1.0));
         } else {
             // Add new render targets to the pool if required.
             let needed_targets = frame.passes.len() - 1;     // framebuffer doesn't need a target!
             let current_target_count = self.render_targets.len();
             if needed_targets > current_target_count {
                 let new_target_count = needed_targets - current_target_count;
                 let new_targets = self.device.create_texture_ids(new_target_count as i32,
                                                                  TextureTarget::Array);
@@ -1382,52 +1373,76 @@ impl Renderer {
                 for (target_index, target) in pass.targets.iter().enumerate() {
                     let render_target = target_id.map(|texture_id| {
                         (texture_id, target_index as i32)
                     });
                     self.draw_target(render_target,
                                      target,
                                      &size,
                                      src_id,
-                                     do_clear);
+                                     do_clear,
+                                     frame.background_color);
 
                 }
 
                 src_id = target_id;
             }
+
+            self.draw_render_target_debug(framebuffer_size);
         }
 
-        let _gm = self.gpu_profile.add_marker(GPU_TAG_CLEAR_TILES);
-
-        // Tiles with no items
-        if self.clear_empty_tiles && !frame.empty_tiles.is_empty() {
-            self.device.set_blend(false);
-            let vao = self.clear_vao_id;
-            let shader = self.tile_clear_shader.get(&mut self.device);
-            self.draw_instanced_batch(&frame.empty_tiles,
-                                      vao,
-                                      shader,
-                                      &BatchTextures::no_texture(),
-                                      &projection);
-        }
-
-        self.release_external_textures();
+        self.unlock_external_images();
     }
 
     pub fn debug_renderer<'a>(&'a mut self) -> &'a mut DebugRenderer {
         &mut self.debug
     }
 
     pub fn get_profiler_enabled(&mut self) -> bool {
         self.enable_profiler
     }
 
     pub fn set_profiler_enabled(&mut self, enabled: bool) {
         self.enable_profiler = enabled;
     }
+
+    fn draw_render_target_debug(&mut self,
+                                framebuffer_size: &DeviceUintSize) {
+        if self.render_target_debug {
+            // TODO(gw): Make the layout of the render targets a bit more sophisticated.
+            // Right now, it just draws them in one row at the bottom of the screen,
+            // with a fixed size.
+            let rt_debug_x0 = 16;
+            let rt_debug_y0 = 16;
+            let rt_debug_spacing = 16;
+            let rt_debug_size = 512;
+            let mut current_target = 0;
+
+            for texture_id in &self.render_targets {
+                let layer_count = self.device.get_render_target_layer_count(*texture_id);
+                for layer_index in 0..layer_count {
+                    let x0 = rt_debug_x0 + (rt_debug_spacing + rt_debug_size) * current_target;
+                    let y0 = rt_debug_y0;
+
+                    // If we have more targets than fit on one row in screen, just early exit.
+                    if x0 > framebuffer_size.width as i32 {
+                        return;
+                    }
+
+                    let dest_rect = DeviceIntRect::new(DeviceIntPoint::new(x0, y0),
+                                                       DeviceIntSize::new(rt_debug_size, rt_debug_size));
+                    self.device.blit_render_target(*texture_id,
+                                                   layer_index as i32,
+                                                   dest_rect);
+
+                    current_target += 1;
+                }
+            }
+        }
+    }
 }
 
 pub enum ExternalImageSource {
     // TODO(gw): Work out the API for raw buffers.
     //RawData(*const u8, usize),
     NativeTexture(u32),                // Is a gl::GLuint texture handle
 }
 
@@ -1443,33 +1458,40 @@ pub enum ExternalImageSource {
 pub struct ExternalImage {
     pub u0: f32,
     pub v0: f32,
     pub u1: f32,
     pub v1: f32,
     pub source: ExternalImageSource,
 }
 
-/// Interface that an application can implement
-/// to support providing external image buffers.
+/// The interfaces that an application can implement to support providing
+/// external image buffers.
+/// When the the application passes an external image to WR, it should kepp that
+/// external image life time untile the release() call.
 pub trait ExternalImageHandler {
-    fn get(&mut self, key: ExternalImageId) -> ExternalImage;
+    /// Lock the external image. Then, WR could start to read the image content.
+    /// The WR client should not change the image content until the unlock()
+    /// call.
+    fn lock(&mut self, key: ExternalImageId) -> ExternalImage;
+    /// Unlock the external image. The WR should not read the image content
+    /// after this call.
+    fn unlock(&mut self, key: ExternalImageId);
+    /// Tell the WR client that it could start to release this external image.
     fn release(&mut self, key: ExternalImageId);
 }
 
 #[derive(Clone, Debug)]
 pub struct RendererOptions {
     pub device_pixel_ratio: f32,
     pub resource_override_path: Option<PathBuf>,
     pub enable_aa: bool,
-    pub enable_msaa: bool,
     pub enable_profiler: bool,
     pub debug: bool,
     pub enable_recording: bool,
     pub enable_scrollbars: bool,
     pub precache_shaders: bool,
     pub renderer_kind: RendererKind,
     pub enable_subpixel_aa: bool,
-    // TODO: this option ignores the clear color (always opaque white).
-    pub clear_empty_tiles: bool,
     pub clear_framebuffer: bool,
     pub clear_color: ColorF,
+    pub render_target_debug: bool,
 }
--- a/gfx/webrender/src/resource_cache.rs
+++ b/gfx/webrender/src/resource_cache.rs
@@ -1,24 +1,25 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use app_units::Au;
 use device::TextureFilter;
 use fnv::FnvHasher;
 use frame::FrameId;
-use internal_types::{FontTemplate, SourceTexture, TextureUpdateList};
+use internal_types::{ExternalImageUpdateList, FontTemplate, SourceTexture, TextureUpdateList};
 use platform::font::{FontContext, RasterizedGlyph};
 use std::cell::RefCell;
 use std::collections::{HashMap, HashSet};
 use std::collections::hash_map::Entry::{self, Occupied, Vacant};
 use std::fmt::Debug;
 use std::hash::BuildHasherDefault;
 use std::hash::Hash;
+use std::mem;
 use std::sync::{Arc, Barrier};
 use std::sync::mpsc::{channel, Receiver, Sender};
 use std::thread;
 use texture_cache::{TextureCache, TextureCacheItemId};
 use webrender_traits::{Epoch, FontKey, GlyphKey, ImageKey, ImageFormat, ImageRendering};
 use webrender_traits::{FontRenderMode, ImageData, GlyphDimensions, WebGLContextId};
 use webrender_traits::{DevicePoint, DeviceIntSize};
 use webrender_traits::ExternalImageId;
@@ -202,16 +203,17 @@ pub struct ResourceCache {
 
     texture_cache: TextureCache,
 
     // TODO(gw): We should expire (parts of) this cache semi-regularly!
     cached_glyph_dimensions: HashMap<GlyphKey, Option<GlyphDimensions>, BuildHasherDefault<FnvHasher>>,
     pending_image_requests: Vec<ImageRequest>,
     glyph_cache_tx: Sender<GlyphCacheMsg>,
     glyph_cache_result_queue: Receiver<GlyphCacheResultMsg>,
+    pending_external_image_update_list: ExternalImageUpdateList,
 }
 
 impl ResourceCache {
     pub fn new(texture_cache: TextureCache,
                enable_aa: bool) -> ResourceCache {
         let (glyph_cache_tx, glyph_cache_result_queue) = spawn_glyph_cache_thread();
 
         ResourceCache {
@@ -223,16 +225,17 @@ impl ResourceCache {
             cached_glyph_dimensions: HashMap::with_hasher(Default::default()),
             texture_cache: texture_cache,
             state: State::Idle,
             enable_aa: enable_aa,
             current_frame_id: FrameId(0),
             pending_image_requests: Vec::new(),
             glyph_cache_tx: glyph_cache_tx,
             glyph_cache_result_queue: glyph_cache_result_queue,
+            pending_external_image_update_list: ExternalImageUpdateList::new(),
         }
     }
 
     pub fn add_font_template(&mut self, font_key: FontKey, template: FontTemplate) {
         // Push the new font to the glyph cache thread, and also store
         // it locally for glyph metric requests.
         self.glyph_cache_tx
             .send(GlyphCacheMsg::AddFont(font_key, template.clone()))
@@ -267,16 +270,24 @@ impl ResourceCache {
     pub fn update_image_template(&mut self,
                                  image_key: ImageKey,
                                  width: u32,
                                  height: u32,
                                  format: ImageFormat,
                                  bytes: Vec<u8>) {
         let next_epoch = match self.image_templates.get(&image_key) {
             Some(image) => {
+                // This image should not be an external image.
+                match image.data {
+                    ImageData::External(id) => {
+                        panic!("Update an external image with buffer, id={} image_key={:?}", id.0, image_key);
+                    },
+                    _ => {},
+                }
+
                 let Epoch(current_epoch) = image.epoch;
                 Epoch(current_epoch + 1)
             }
             None => {
                 Epoch(0)
             }
         };
 
@@ -289,17 +300,31 @@ impl ResourceCache {
             data: ImageData::new(bytes),
             epoch: next_epoch,
         };
 
         self.image_templates.insert(image_key, resource);
     }
 
     pub fn delete_image_template(&mut self, image_key: ImageKey) {
-        self.image_templates.remove(&image_key);
+        let value = self.image_templates.remove(&image_key);
+
+        // If the key is associated to an external image, pass the external id to renderer for cleanup.
+        if let Some(image) = value {
+            match image.data {
+                ImageData::External(id) => {
+                    self.pending_external_image_update_list.push(id);
+                },
+                _ => {},
+            }
+
+            return;
+        }
+
+        println!("Delete the non-exist key:{:?}", image_key);
     }
 
     pub fn add_webgl_texture(&mut self, id: WebGLContextId, texture_id: SourceTexture, size: DeviceIntSize) {
         self.webgl_textures.insert(id, WebGLTexture {
             id: texture_id,
             size: size,
         });
     }
@@ -338,16 +363,20 @@ impl ResourceCache {
                                                render_mode);
         self.glyph_cache_tx.send(msg).unwrap();
     }
 
     pub fn pending_updates(&mut self) -> TextureUpdateList {
         self.texture_cache.pending_updates()
     }
 
+    pub fn pending_external_image_updates(&mut self) -> ExternalImageUpdateList {
+        mem::replace(&mut self.pending_external_image_update_list, ExternalImageUpdateList::new())
+    }
+
     pub fn get_glyphs<F>(&self,
                          font_key: FontKey,
                          size: Au,
                          glyph_indices: &[u32],
                          render_mode: FontRenderMode,
                          mut f: F) -> SourceTexture where F: FnMut(usize, DevicePoint, DevicePoint) {
         debug_assert!(self.state == State::QueryResources);
         let cache = self.cached_glyphs.as_ref().unwrap();
--- a/gfx/webrender/src/scene.rs
+++ b/gfx/webrender/src/scene.rs
@@ -5,29 +5,16 @@
 use fnv::FnvHasher;
 use std::collections::HashMap;
 use std::hash::BuildHasherDefault;
 use tiling::AuxiliaryListsMap;
 use webrender_traits::{AuxiliaryLists, BuiltDisplayList, PipelineId, Epoch, ColorF};
 use webrender_traits::{DisplayItem, SpecificDisplayItem, StackingContext};
 use webrender_traits::LayerSize;
 
-trait DisplayListHelpers {
-    fn starting_stacking_context<'a>(&'a self) -> Option<&'a StackingContext>;
-}
-
-impl DisplayListHelpers for Vec<DisplayItem> {
-    fn starting_stacking_context<'a>(&'a self) -> Option<&'a StackingContext> {
-        self.first().and_then(|item| match item.item {
-            SpecificDisplayItem::PushStackingContext(ref item) => Some(&item.stacking_context),
-            _ => None,
-        })
-    }
-}
-
 /// A representation of the layout within the display port for a given document or iframe.
 #[derive(Debug)]
 pub struct ScenePipeline {
     pub pipeline_id: PipelineId,
     pub epoch: Epoch,
     pub viewport_size: LayerSize,
     pub background_color: Option<ColorF>,
 }
--- a/gfx/webrender/src/texture_cache.rs
+++ b/gfx/webrender/src/texture_cache.rs
@@ -872,16 +872,20 @@ impl TextureCache {
     pub fn free(&mut self, id: TextureCacheItemId) {
         {
             let item = self.items.get(id);
             match self.arena.texture_page_for_id(item.texture_id) {
                 Some(texture_page) => texture_page.free(&item.allocated_rect),
                 None => {
                     // This is a standalone texture allocation. Just push it back onto the free
                     // list.
+                    self.pending_updates.push(TextureUpdate {
+                        id: item.texture_id,
+                        op: TextureUpdateOp::Free,
+                    });
                     self.cache_id_list.free(item.texture_id);
                 }
             }
         }
 
         self.items.free(id)
     }
 }
--- a/gfx/webrender/src/tiling.rs
+++ b/gfx/webrender/src/tiling.rs
@@ -71,17 +71,18 @@ trait AlphaBatchHelpers {
     fn add_prim_to_batch(&self,
                          prim_index: PrimitiveIndex,
                          batch: &mut PrimitiveBatch,
                          layer_index: StackingContextIndex,
                          task_index: RenderTaskIndex,
                          tile_id: TileUniqueId,
                          base_mask_task_index: RenderTaskIndex,
                          render_tasks: &RenderTaskCollection,
-                         pass_index: RenderPassIndex);
+                         pass_index: RenderPassIndex,
+                         z_sort_index: i32);
 }
 
 impl AlphaBatchHelpers for PrimitiveStore {
     fn get_batch_kind(&self, metadata: &PrimitiveMetadata) -> AlphaBatchKind {
         let batch_kind = match metadata.prim_kind {
             PrimitiveKind::Border => AlphaBatchKind::Border,
             PrimitiveKind::BoxShadow => AlphaBatchKind::BoxShadow,
             PrimitiveKind::Image => AlphaBatchKind::Image,
@@ -188,17 +189,18 @@ impl AlphaBatchHelpers for PrimitiveStor
     fn add_prim_to_batch(&self,
                          prim_index: PrimitiveIndex,
                          batch: &mut PrimitiveBatch,
                          layer_index: StackingContextIndex,
                          task_index: RenderTaskIndex,
                          tile_id: TileUniqueId,
                          base_mask_task_index: RenderTaskIndex,
                          render_tasks: &RenderTaskCollection,
-                         child_pass_index: RenderPassIndex) {
+                         child_pass_index: RenderPassIndex,
+                         z_sort_index: i32) {
         let metadata = self.get_metadata(prim_index);
         let layer_index = layer_index.0 as i32;
         let global_prim_id = prim_index.0 as i32;
         let prim_address = metadata.gpu_prim_index;
         let clip_task_key = RenderTaskKey::CacheMask(MaskCacheKey::Primitive(prim_index), tile_id);
         let clip_task_index = if metadata.clip_cache_info.is_some() &&
                                  render_tasks.has_dynamic_task(&clip_task_key, child_pass_index) {
             let cache_task_id = RenderTaskId::Dynamic(clip_task_key);
@@ -217,92 +219,99 @@ impl AlphaBatchHelpers for PrimitiveStor
                 data.push(PrimitiveInstance {
                     task_index: task_index,
                     clip_task_index: clip_task_index,
                     layer_index: layer_index,
                     global_prim_id: global_prim_id,
                     prim_address: prim_address,
                     sub_index: 0,
                     user_data: [0, 0],
+                    z_sort_index: z_sort_index,
                 });
             }
             &mut PrimitiveBatchData::TextRun(ref mut data) => {
                 let text_cpu = &self.cpu_text_runs[metadata.cpu_prim_index.0];
 
                 for glyph_index in 0..metadata.gpu_data_count {
                     data.push(PrimitiveInstance {
                         task_index: task_index,
                         clip_task_index: clip_task_index,
                         layer_index: layer_index,
                         global_prim_id: global_prim_id,
                         prim_address: prim_address,
                         sub_index: metadata.gpu_data_address.0 + glyph_index,
                         user_data: [ text_cpu.resource_address.0 + glyph_index, 0 ],
+                        z_sort_index: z_sort_index,
                     });
                 }
             }
             &mut PrimitiveBatchData::Image(ref mut data) => {
                 let image_cpu = &self.cpu_images[metadata.cpu_prim_index.0];
 
                 data.push(PrimitiveInstance {
                     task_index: task_index,
                     clip_task_index: clip_task_index,
                     layer_index: layer_index,
                     global_prim_id: global_prim_id,
                     prim_address: prim_address,
                     sub_index: 0,
                     user_data: [ image_cpu.resource_address.0, 0 ],
+                    z_sort_index: z_sort_index,
                 });
             }
             &mut PrimitiveBatchData::YuvImage(ref mut data) => {
                 data.push(PrimitiveInstance {
                     task_index: task_index,
                     clip_task_index: clip_task_index,
                     layer_index: layer_index,
                     global_prim_id: global_prim_id,
                     prim_address: prim_address,
                     sub_index: 0,
                     user_data: [ 0, 0 ],
+                    z_sort_index: z_sort_index,
                 });
             }
             &mut PrimitiveBatchData::Borders(ref mut data) => {
                 for border_segment in 0..8 {
                     data.push(PrimitiveInstance {
                         task_index: task_index,
                         clip_task_index: clip_task_index,
                         layer_index: layer_index,
                         global_prim_id: global_prim_id,
                         prim_address: prim_address,
                         sub_index: border_segment,
                         user_data: [ 0, 0 ],
+                        z_sort_index: z_sort_index,
                     });
                 }
             }
             &mut PrimitiveBatchData::AlignedGradient(ref mut data) => {
                 for part_index in 0..(metadata.gpu_data_count - 1) {
                     data.push(PrimitiveInstance {
                         task_index: task_index,
                         clip_task_index: clip_task_index,
                         layer_index: layer_index,
                         global_prim_id: global_prim_id,
                         prim_address: prim_address,
                         sub_index: metadata.gpu_data_address.0 + part_index,
                         user_data: [ 0, 0 ],
+                        z_sort_index: z_sort_index,
                     });
                 }
             }
             &mut PrimitiveBatchData::AngleGradient(ref mut data) => {
                 data.push(PrimitiveInstance {
                     task_index: task_index,
                     clip_task_index: clip_task_index,
                     layer_index: layer_index,
                     global_prim_id: global_prim_id,
                     prim_address: prim_address,
                     sub_index: metadata.gpu_data_address.0,
                     user_data: [ metadata.gpu_data_count, 0 ],
+                    z_sort_index: z_sort_index,
                 });
             }
             &mut PrimitiveBatchData::CacheImage(ref mut data) => {
                 // Find the render task index for the render task
                 // that this primitive depends on. Pass it to the
                 // shader so that it can sample from the cache texture
                 // at the correct location.
                 let cache_task_id = &metadata.render_task.as_ref().unwrap().id;
@@ -312,32 +321,34 @@ impl AlphaBatchHelpers for PrimitiveStor
                 data.push(PrimitiveInstance {
                     task_index: task_index,
                     clip_task_index: clip_task_index,
                     layer_index: layer_index,
                     global_prim_id: global_prim_id,
                     prim_address: prim_address,
                     sub_index: 0,
                     user_data: [ cache_task_index.0 as i32, 0 ],
+                    z_sort_index: z_sort_index,
                 });
             }
             &mut PrimitiveBatchData::BoxShadow(ref mut data) => {
                 let cache_task_id = &metadata.render_task.as_ref().unwrap().id;
                 let cache_task_index = render_tasks.get_task_index(cache_task_id,
                                                                    child_pass_index);
 
                 for rect_index in 0..metadata.gpu_data_count {
                     data.push(PrimitiveInstance {
                         task_index: task_index,
                         clip_task_index: clip_task_index,
                         layer_index: layer_index,
                         global_prim_id: global_prim_id,
                         prim_address: prim_address,
                         sub_index: metadata.gpu_data_address.0 + rect_index,
                         user_data: [ cache_task_index.0 as i32, 0 ],
+                        z_sort_index: z_sort_index,
                     });
                 }
             }
         }
     }
 }
 
 #[derive(Debug)]
@@ -499,56 +510,62 @@ impl Default for PrimitiveGeometry {
             local_rect: unsafe { mem::uninitialized() },
             local_clip_rect: unsafe { mem::uninitialized() },
         }
     }
 }
 
 struct AlphaBatchTask {
     task_id: RenderTaskId,
-    items: Vec<AlphaRenderItem>,
+    opaque_items: Vec<AlphaRenderItem>,
+    alpha_items: Vec<AlphaRenderItem>,
     tile_id: TileUniqueId,
 }
 
 /// Encapsulates the logic of building batches for items that are blended.
 pub struct AlphaBatcher {
-    pub batches: Vec<PrimitiveBatch>,
+    pub alpha_batches: Vec<PrimitiveBatch>,
+    pub opaque_batches: Vec<PrimitiveBatch>,
     tasks: Vec<AlphaBatchTask>,
 }
 
 impl AlphaBatcher {
     fn new() -> AlphaBatcher {
         AlphaBatcher {
-            batches: Vec::new(),
+            alpha_batches: Vec::new(),
+            opaque_batches: Vec::new(),
             tasks: Vec::new(),
         }
     }
 
     fn add_task(&mut self, task: AlphaBatchTask) {
         self.tasks.push(task);
     }
 
     fn build(&mut self,
              ctx: &RenderTargetContext,
              render_tasks: &RenderTaskCollection,
              child_pass_index: RenderPassIndex) {
-        let mut batches: Vec<PrimitiveBatch> = vec![];
+        let mut alpha_batches: Vec<PrimitiveBatch> = vec![];
+        let mut opaque_batches: Vec<PrimitiveBatch> = vec![];
+
         for task in &mut self.tasks {
             let task_index = render_tasks.get_static_task_index(&task.task_id);
-
-            let mut existing_batch_index = 0;
-            for item in task.items.drain(..) {
+            let mut existing_alpha_batch_index = 0;
+            let mut existing_opaque_batch_index = 0;
+
+            for item in &task.alpha_items {
                 let batch_key = match item {
-                    AlphaRenderItem::Composite(..) => {
+                    &AlphaRenderItem::Composite(..) => {
                         AlphaBatchKey::composite()
                     }
-                    AlphaRenderItem::Blend(..) => {
+                    &AlphaRenderItem::Blend(..) => {
                         AlphaBatchKey::blend()
                     }
-                    AlphaRenderItem::Primitive(sc_index, prim_index) => {
+                    &AlphaRenderItem::Primitive(sc_index, prim_index, _) => {
                         // See if this task fits into the tile UBO
                         let layer = &ctx.layer_store[sc_index.0];
                         let prim_metadata = ctx.prim_store.get_metadata(prim_index);
                         let transform_kind = layer.xf_rect.as_ref().unwrap().kind;
                         let needs_clipping = prim_metadata.clip_cache_info.is_some() ||
                                              ctx.layer_masks_tasks.get(&(task.tile_id, sc_index)).is_some();
                         let needs_blending = transform_kind == TransformedRectKind::Complex ||
                                              !prim_metadata.is_opaque ||
@@ -571,98 +588,173 @@ impl AlphaBatcher {
 
                         AlphaBatchKey::primitive(batch_kind,
                                                  flags,
                                                  blend_mode,
                                                  textures)
                     }
                 };
 
-                while existing_batch_index < batches.len() &&
-                        !batches[existing_batch_index].key.is_compatible_with(&batch_key) {
-                    existing_batch_index += 1
+                while existing_alpha_batch_index < alpha_batches.len() &&
+                        !alpha_batches[existing_alpha_batch_index].key.is_compatible_with(&batch_key) {
+                    existing_alpha_batch_index += 1
                 }
 
-                if existing_batch_index == batches.len() {
+                if existing_alpha_batch_index == alpha_batches.len() {
                     let new_batch = match item {
-                        AlphaRenderItem::Composite(..) => {
+                        &AlphaRenderItem::Composite(..) => {
                             PrimitiveBatch::composite()
                         }
-                        AlphaRenderItem::Blend(..) => {
+                        &AlphaRenderItem::Blend(..) => {
                             PrimitiveBatch::blend()
                         }
-                        AlphaRenderItem::Primitive(_, prim_index) => {
+                        &AlphaRenderItem::Primitive(_, prim_index, _) => {
                             // See if this task fits into the tile UBO
                             let prim_metadata = ctx.prim_store.get_metadata(prim_index);
                             let batch_kind = ctx.prim_store.get_batch_kind(prim_metadata);
                             PrimitiveBatch::new(batch_kind, batch_key)
                         }
                     };
-                    batches.push(new_batch)
+                    alpha_batches.push(new_batch)
                 }
 
-                let batch = &mut batches[existing_batch_index];
+                let batch = &mut alpha_batches[existing_alpha_batch_index];
                 match item {
-                    AlphaRenderItem::Composite(src0_id, src1_id, info) => {
+                    &AlphaRenderItem::Composite(src0_id, src1_id, info, z) => {
                         let ok = batch.pack_composite(render_tasks.get_static_task_index(&src0_id),
                                                       render_tasks.get_static_task_index(&src1_id),
                                                       render_tasks.get_static_task_index(&task.task_id),
-                                                      info);
+                                                      info,
+                                                      z);
                         debug_assert!(ok)
                     }
-                    AlphaRenderItem::Blend(src_id, info) => {
+                    &AlphaRenderItem::Blend(src_id, info, z) => {
                         let ok = batch.pack_blend(render_tasks.get_static_task_index(&src_id),
                                                   render_tasks.get_static_task_index(&task.task_id),
-                                                  info);
+                                                  info,
+                                                  z);
                         debug_assert!(ok)
                     }
-                    AlphaRenderItem::Primitive(sc_index, prim_index) => {
+                    &AlphaRenderItem::Primitive(sc_index, prim_index, z) => {
                         let mask_task_index = match ctx.layer_masks_tasks.get(&(task.tile_id, sc_index)) {
                             Some(ref mask_task_id) => render_tasks.get_task_index(mask_task_id, child_pass_index),
                             None => OPAQUE_TASK_INDEX,
                         };
                         ctx.prim_store.add_prim_to_batch(prim_index,
                                                          batch,
                                                          sc_index,
                                                          task_index,
                                                          task.tile_id,
                                                          mask_task_index,
                                                          render_tasks,
-                                                         child_pass_index);
+                                                         child_pass_index,
+                                                         z);
+                    }
+                }
+            }
+
+            for item in task.opaque_items.iter().rev() {
+                let batch_key = match item {
+                    &AlphaRenderItem::Composite(..) => unreachable!(),
+                    &AlphaRenderItem::Blend(..) => unreachable!(),
+                    &AlphaRenderItem::Primitive(sc_index, prim_index, _) => {
+                        // See if this task fits into the tile UBO
+                        let layer = &ctx.layer_store[sc_index.0];
+                        let prim_metadata = ctx.prim_store.get_metadata(prim_index);
+                        let transform_kind = layer.xf_rect.as_ref().unwrap().kind;
+                        let needs_clipping = prim_metadata.clip_cache_info.is_some() ||
+                                             ctx.layer_masks_tasks.get(&(task.tile_id, sc_index)).is_some();
+                        let needs_blending = transform_kind == TransformedRectKind::Complex ||
+                                             !prim_metadata.is_opaque ||
+                                             needs_clipping;
+                        let blend_mode = ctx.prim_store.get_blend_mode(needs_blending, prim_metadata);
+                        let needs_clipping_flag = if needs_clipping {
+                            NEEDS_CLIPPING
+                        } else {
+                            AlphaBatchKeyFlags::empty()
+                        };
+                        let flags = match transform_kind {
+                            TransformedRectKind::AxisAligned => AXIS_ALIGNED | needs_clipping_flag,
+                            _ => needs_clipping_flag,
+                        };
+                        let batch_kind = ctx.prim_store.get_batch_kind(prim_metadata);
+
+                        let textures = BatchTextures {
+                            colors: ctx.prim_store.get_color_textures(prim_metadata),
+                        };
+
+                        AlphaBatchKey::primitive(batch_kind,
+                                                 flags,
+                                                 blend_mode,
+                                                 textures)
+                    }
+                };
+
+                while existing_opaque_batch_index < opaque_batches.len() &&
+                        !opaque_batches[existing_opaque_batch_index].key.is_compatible_with(&batch_key) {
+                    existing_opaque_batch_index += 1
+                }
+
+                if existing_opaque_batch_index == opaque_batches.len() {
+                    let new_batch = match item {
+                        &AlphaRenderItem::Composite(..) => unreachable!(),
+                        &AlphaRenderItem::Blend(..) => unreachable!(),
+                        &AlphaRenderItem::Primitive(_, prim_index, _) => {
+                            // See if this task fits into the tile UBO
+                            let prim_metadata = ctx.prim_store.get_metadata(prim_index);
+                            let batch_kind = ctx.prim_store.get_batch_kind(prim_metadata);
+                            PrimitiveBatch::new(batch_kind, batch_key)
+                        }
+                    };
+                    opaque_batches.push(new_batch)
+                }
+
+                let batch = &mut opaque_batches[existing_opaque_batch_index];
+                match item {
+                    &AlphaRenderItem::Composite(..) => unreachable!(),
+                    &AlphaRenderItem::Blend(..) => unreachable!(),
+                    &AlphaRenderItem::Primitive(sc_index, prim_index, z) => {
+                        let mask_task_index = match ctx.layer_masks_tasks.get(&(task.tile_id, sc_index)) {
+                            Some(ref mask_task_id) => render_tasks.get_task_index(mask_task_id, child_pass_index),
+                            None => OPAQUE_TASK_INDEX,
+                        };
+                        ctx.prim_store.add_prim_to_batch(prim_index,
+                                                         batch,
+                                                         sc_index,
+                                                         task_index,
+                                                         task.tile_id,
+                                                         mask_task_index,
+                                                         render_tasks,
+                                                         child_pass_index,
+                                                         z);
                     }
                 }
             }
         }
 
-        self.batches.extend(batches.into_iter())
+        self.alpha_batches.extend(alpha_batches.into_iter());
+        self.opaque_batches.extend(opaque_batches.into_iter());
     }
 }
 
 /// Batcher managing draw calls into the clip mask (in the RT cache).
 #[derive(Debug)]
 pub struct ClipBatcher {
-    /// Clear draws initialize the target area to full opacity (1.0)
-    /// So that the following primitive can be blended with MULtiplication.
-    pub clears: Vec<CacheClipInstance>,
     /// Copy draws get the existing mask from a parent layer.
     pub copies: Vec<CacheClipInstance>,
-    /// A fast path for masks that only have clear + rectangle.
-    pub rectangles_noblend: Vec<CacheClipInstance>,
     /// Rectangle draws fill up the rectangles with rounded corners.
     pub rectangles: Vec<CacheClipInstance>,
     /// Image draws apply the image masking.
     pub images: HashMap<SourceTexture, Vec<CacheClipInstance>>,
 }
 
 impl ClipBatcher {
     fn new() -> ClipBatcher {
         ClipBatcher {
-            clears: Vec::new(),
             copies: Vec::new(),
-            rectangles_noblend: Vec::new(),
             rectangles: Vec::new(),
             images: HashMap::new(),
         }
     }
 
     fn add(&mut self,
            task_index: RenderTaskIndex,
            base_task_index: Option<RenderTaskIndex>,
@@ -672,45 +764,35 @@ impl ClipBatcher {
         // TODO: don't draw clipping instances covering the whole tile
         for &(layer_id, ref info) in clips.iter() {
             let instance = CacheClipInstance {
                 task_id: task_index.0 as i32,
                 layer_index: layer_id.0 as i32,
                 address: GpuStoreAddress(0),
                 base_task_id: 0,
             };
-            let mut start_rect_id = 0;
-            // clear/copy on the first clip only
+            // copy on the first clip only
             if info as *const _ == &clips[0].1 as *const _ {
                 if let Some(layer_task_id) = base_task_index {
                     self.copies.push(CacheClipInstance {
                         base_task_id: layer_task_id.0 as i32,
                         ..instance
                     });
-                } else if info.clip_range.item_count > 0 {
-                    // draw the first rectangle without blending in order
-                    // to avoid clearing the area first
-                    start_rect_id = 1;
-                    self.rectangles_noblend.push(CacheClipInstance {
-                        address: info.clip_range.start,
-                        ..instance
-                    })
-                } else {
-                    self.clears.push(instance);
                 }
             }
 
-            self.rectangles.extend((start_rect_id .. info.clip_range.item_count as usize)
+            self.rectangles.extend((0 .. info.clip_range.item_count as usize)
                            .map(|region_id| {
                 let offset = info.clip_range.start.0 + ((CLIP_DATA_GPU_SIZE * region_id) as i32);
                 CacheClipInstance {
                     address: GpuStoreAddress(offset),
                     ..instance
                 }
             }));
+
             if let Some((ref mask, address)) = info.image {
                 let cache_item = resource_cache.get_cached_image(mask.image, ImageRendering::Auto);
                 self.images.entry(cache_item.texture_id)
                            .or_insert(Vec::new())
                            .push(CacheClipInstance {
                     address: address,
                     ..instance
                 })
@@ -783,17 +865,18 @@ impl RenderTarget {
                 task: RenderTask,
                 ctx: &RenderTargetContext,
                 render_tasks: &RenderTaskCollection,
                 pass_index: RenderPassIndex) {
         match task.kind {
             RenderTaskKind::Alpha(info) => {
                 self.alpha_batcher.add_task(AlphaBatchTask {
                     task_id: task.id,
-                    items: info.items,
+                    opaque_items: info.opaque_items,
+                    alpha_items: info.alpha_items,
                     tile_id: info.tile_id,
                 });
             }
             RenderTaskKind::VerticalBlur(_, prim_index) => {
                 // Find the child render task that we are applying
                 // a vertical blur on.
                 // TODO(gw): Consider a simpler way for render tasks to find
                 //           their child tasks than having to construct the
@@ -828,16 +911,17 @@ impl RenderTarget {
                         self.box_shadow_cache_prims.push(PrimitiveInstance {
                             global_prim_id: prim_index.0 as i32,
                             prim_address: prim_metadata.gpu_prim_index,
                             task_index: render_tasks.get_task_index(&task.id, pass_index).0 as i32,
                             clip_task_index: 0,
                             layer_index: 0,
                             sub_index: 0,
                             user_data: [0; 2],
+                            z_sort_index: 0,        // z is disabled for rendering cache primitives
                         });
                     }
                     PrimitiveKind::TextRun => {
                         let text = &ctx.prim_store.cpu_text_runs[prim_metadata.cpu_prim_index.0];
                         // We only cache text runs with a text-shadow (for now).
                         debug_assert!(text.blur_radius.0 != 0);
 
                         // TODO(gw): This should always be fine for now, since the texture
@@ -856,16 +940,17 @@ impl RenderTarget {
                             self.text_run_cache_prims.push(PrimitiveInstance {
                                 global_prim_id: prim_index.0 as i32,
                                 prim_address: prim_metadata.gpu_prim_index,
                                 task_index: render_tasks.get_task_index(&task.id, pass_index).0 as i32,
                                 clip_task_index: 0,
                                 layer_index: 0,
                                 sub_index: prim_metadata.gpu_data_address.0 + glyph_index,
                                 user_data: [ text.resource_address.0 + glyph_index, 0],
+                                z_sort_index: 0,        // z is disabled for rendering cache primitives
                             });
                         }
                     }
                     _ => {
                         // No other primitives make use of primitive caching yet!
                         unreachable!()
                     }
                 }
@@ -979,25 +1064,26 @@ impl RenderPass {
 #[derive(Debug, Clone)]
 pub enum RenderTaskLocation {
     Fixed(DeviceIntRect),
     Dynamic(Option<(DeviceIntPoint, RenderTargetIndex)>, DeviceIntSize),
 }
 
 #[derive(Debug, Clone)]
 enum AlphaRenderItem {
-    Primitive(StackingContextIndex, PrimitiveIndex),
-    Blend(RenderTaskId, LowLevelFilterOp),
-    Composite(RenderTaskId, RenderTaskId, MixBlendMode),
+    Primitive(StackingContextIndex, PrimitiveIndex, i32),
+    Blend(RenderTaskId, LowLevelFilterOp, i32),
+    Composite(RenderTaskId, RenderTaskId, MixBlendMode, i32),
 }
 
 #[derive(Debug, Clone)]
 pub struct AlphaRenderTask {
     actual_rect: DeviceIntRect,
-    items: Vec<AlphaRenderItem>,
+    opaque_items: Vec<AlphaRenderItem>,
+    alpha_items: Vec<AlphaRenderItem>,
     tile_id: TileUniqueId,
 }
 
 #[derive(Debug, Clone)]
 pub struct CacheMaskTask {
     actual_rect: DeviceIntRect,
     base_task_id: Option<RenderTaskId>,
     clips: Vec<(StackingContextIndex, MaskCacheInfo)>,
@@ -1038,17 +1124,18 @@ impl RenderTask {
         let task_index = ctx.render_task_id_counter.fetch_add(1, Ordering::Relaxed);
 
         RenderTask {
             id: RenderTaskId::Static(RenderTaskIndex(task_index)),
             children: Vec::new(),
             location: RenderTaskLocation::Dynamic(None, actual_rect.size),
             kind: RenderTaskKind::Alpha(AlphaRenderTask {
                 actual_rect: actual_rect,
-                items: Vec::new(),
+                alpha_items: Vec::new(),
+                opaque_items: Vec::new(),
                 tile_id: ctx.tile_id,
             }),
         }
     }
 
     pub fn new_prim_cache(key: PrimitiveCacheKey,
                           size: DeviceIntSize,
                           prim_index: PrimitiveIndex) -> RenderTask {
@@ -1401,41 +1488,51 @@ pub struct CacheClipInstance {
 #[derive(Debug, Clone)]
 pub struct PrimitiveInstance {
     global_prim_id: i32,
     prim_address: GpuStoreAddress,
     task_index: i32,
     clip_task_index: i32,
     layer_index: i32,
     sub_index: i32,
+    z_sort_index: i32,
     user_data: [i32; 2],
 }
 
 impl PrimitiveInstance {
-    fn blend(src_task_id: i32, target_task_id: i32, op: i32, amount: i32) -> PrimitiveInstance {
+    fn blend(src_task_id: i32,
+             target_task_id: i32,
+             op: i32,
+             amount: i32,
+             z_sort_index: i32) -> PrimitiveInstance {
         PrimitiveInstance {
             global_prim_id: -1,
             prim_address: GpuStoreAddress(0),
             task_index: target_task_id,
             clip_task_index: -1,
             layer_index: -1,
             sub_index: op,
             user_data: [src_task_id, amount],
+            z_sort_index: z_sort_index,
         }
     }
 
-    fn composite(src_tasks_id: [i32; 2], target_task_id: i32, op: i32) -> PrimitiveInstance {
+    fn composite(src_tasks_id: [i32; 2],
+                 target_task_id: i32,
+                 op: i32,
+                 z_sort_index: i32) -> PrimitiveInstance {
         PrimitiveInstance {
             global_prim_id: -1,
             prim_address: GpuStoreAddress(0),
             task_index: target_task_id,
             clip_task_index: -1,
             layer_index: -1,
             sub_index: op,
             user_data: src_tasks_id,
+            z_sort_index: z_sort_index,
         }
     }
 }
 
 #[derive(Debug)]
 pub enum PrimitiveBatchData {
     Rectangles(Vec<PrimitiveInstance>),
     TextRun(Vec<PrimitiveInstance>),
@@ -1469,17 +1566,18 @@ impl PrimitiveBatch {
             key: AlphaBatchKey::composite(),
             data: PrimitiveBatchData::Composite(Vec::new()),
         }
     }
 
     fn pack_blend(&mut self,
                   src_rect_index: RenderTaskIndex,
                   target_rect_index: RenderTaskIndex,
-                  filter: LowLevelFilterOp) -> bool {
+                  filter: LowLevelFilterOp,
+                  z_sort_index: i32) -> bool {
         match &mut self.data {
             &mut PrimitiveBatchData::Blend(ref mut ubo_data) => {
                 let (filter_mode, amount) = match filter {
                     LowLevelFilterOp::Blur(..) => (0, 0.0),
                     LowLevelFilterOp::Contrast(amount) => (1, amount.to_f32_px()),
                     LowLevelFilterOp::Grayscale(amount) => (2, amount.to_f32_px()),
                     LowLevelFilterOp::HueRotate(angle) => (3, (angle as f32) / ANGLE_FLOAT_TO_FIXED),
                     LowLevelFilterOp::Invert(amount) => (4, amount.to_f32_px()),
@@ -1487,34 +1585,37 @@ impl PrimitiveBatch {
                     LowLevelFilterOp::Sepia(amount) => (6, amount.to_f32_px()),
                     LowLevelFilterOp::Brightness(amount) => (7, amount.to_f32_px()),
                     LowLevelFilterOp::Opacity(amount) => (8, amount.to_f32_px()),
                 };
 
                 ubo_data.push(PrimitiveInstance::blend(src_rect_index.0 as i32,
                                                        target_rect_index.0 as i32,
                                                        filter_mode,
-                                                       (amount * 65535.0).round() as i32));
+                                                       (amount * 65535.0).round() as i32,
+                                                       z_sort_index));
                 true
             }
             _ => false
         }
     }
 
     fn pack_composite(&mut self,
                       rect0_index: RenderTaskIndex,
                       rect1_index: RenderTaskIndex,
                       target_rect_index: RenderTaskIndex,
-                      info: MixBlendMode) -> bool {
+                      info: MixBlendMode,
+                      z_sort_index: i32) -> bool {
         match &mut self.data {
             &mut PrimitiveBatchData::Composite(ref mut ubo_data) => {
                 ubo_data.push(PrimitiveInstance::composite([rect0_index.0 as i32,
                                                             rect1_index.0 as i32],
                                                            target_rect_index.0 as i32,
-                                                           info as i32));
+                                                           info as i32,
+                                                           z_sort_index));
                 true
             }
             _ => false
         }
     }
 
     fn new(batch_kind: AlphaBatchKind,
            key: AlphaBatchKey) -> PrimitiveBatch {
@@ -1651,36 +1752,37 @@ impl FrameBuilderConfig {
             enable_scrollbars: enable_scrollbars,
             enable_subpixel_aa: enable_subpixel_aa,
         }
     }
 }
 
 pub struct FrameBuilder {
     screen_rect: LayerRect,
+    background_color: Option<ColorF>,
     prim_store: PrimitiveStore,
     cmds: Vec<PrimitiveRunCmd>,
     debug: bool,
     config: FrameBuilderConfig,
 
     layer_store: Vec<StackingContext>,
     packed_layers: Vec<PackedStackingContext>,
 
     scrollbar_prims: Vec<ScrollbarPrimitive>,
 }
 
 /// A rendering-oriented representation of frame::Frame built by the render backend
 /// and presented to the renderer.
 pub struct Frame {
     pub viewport_size: LayerSize,
+    pub background_color: Option<ColorF>,
     pub device_pixel_ratio: f32,
     pub debug_rects: Vec<DebugRect>,
     pub cache_size: DeviceSize,
     pub passes: Vec<RenderPass>,
-    pub empty_tiles: Vec<ClearTile>,
     pub profile_counters: FrameProfileCounters,
 
     pub layer_texture_data: Vec<PackedStackingContext>,
     pub render_task_data: Vec<RenderTaskData>,
     pub gpu_data16: Vec<GpuBlock16>,
     pub gpu_data32: Vec<GpuBlock32>,
     pub gpu_data64: Vec<GpuBlock64>,
     pub gpu_data128: Vec<GpuBlock128>,
@@ -1712,19 +1814,19 @@ impl LayerMasksTasks {
         }
         assert!(self.task_ids[index.0].is_none());
         self.task_ids[index.0] = Some(task_id);
     }
 }
 
 /// Some extra per-tile information stored for debugging purposes.
 #[derive(Debug)]
-enum CompiledScreenTileInfo {
-    SimpleAlpha(usize),
-    ComplexAlpha(usize, usize),
+struct CompiledScreenTileInfo {
+    cmd_count: usize,
+    prim_count: usize,
 }
 
 #[derive(Debug)]
 struct CompiledScreenTile {
     main_render_task: RenderTask,
     required_pass_count: usize,
     info: CompiledScreenTileInfo,
     unique_id: TileUniqueId,
@@ -1762,44 +1864,31 @@ enum TileCommand {
     DrawPrimitive(PrimitiveIndex),
 }
 
 #[derive(Debug)]
 struct ScreenTile {
     rect: DeviceIntRect,
     cmds: Vec<TileCommand>,
     prim_count: usize,
-    is_simple: bool,
 }
 
 impl ScreenTile {
     fn new(rect: DeviceIntRect) -> ScreenTile {
         ScreenTile {
             rect: rect,
             cmds: Vec::new(),
             prim_count: 0,
-            is_simple: true,
         }
     }
 
     #[inline(always)]
     fn push_layer(&mut self,
-                  sc_index: StackingContextIndex,
-                  layers: &[StackingContext]) {
+                  sc_index: StackingContextIndex) {
         self.cmds.push(TileCommand::PushLayer(sc_index));
-
-        let layer = &layers[sc_index.0];
-        match layer.composite_kind {
-            CompositeKind::None => {}
-            CompositeKind::Simple(..) | CompositeKind::Complex(..) => {
-                // Bail out on tiles with composites
-                // for now. This can be handled in the future!
-                self.is_simple = false;
-            }
-        }
     }
 
     #[inline(always)]
     fn push_primitive(&mut self, prim_index: PrimitiveIndex) {
         self.cmds.push(TileCommand::DrawPrimitive(prim_index));
         self.prim_count += 1;
     }
 
@@ -1815,16 +1904,17 @@ impl ScreenTile {
 
     fn compile(self, ctx: &CompileTileContext) -> Option<CompiledScreenTile> {
         if self.prim_count == 0 {
             return None;
         }
 
         let cmd_count = self.cmds.len();
         let mut actual_prim_count = 0;
+        let mut next_z = 0;
 
         let mut sc_stack = Vec::new();
         let mut current_task = RenderTask::new_alpha_batch(self.rect, ctx);
         let mut alpha_task_stack = Vec::new();
         let mut clip_info_stack = Vec::new();
         let mut clip_task_stack = Vec::new();
         let mut num_clips_to_skip = 0;
         let mut layer_masks_tasks = LayerMasksTasks::new();
@@ -1877,29 +1967,33 @@ impl ScreenTile {
                 TileCommand::PopLayer => {
                     let sc_index = sc_stack.pop().unwrap();
 
                     let layer = &ctx.layer_store[sc_index.0];
                     match layer.composite_kind {
                         CompositeKind::None => {}
                         CompositeKind::Simple(info) => {
                             let mut prev_task = alpha_task_stack.pop().unwrap();
-                            prev_task.as_alpha_batch().items.push(AlphaRenderItem::Blend(current_task.id,
-                                                                                         info));
+                            let item = AlphaRenderItem::Blend(current_task.id, info, next_z);
+                            next_z += 1;
+                            prev_task.as_alpha_batch().alpha_items.push(item);
                             prev_task.children.push(current_task);
                             current_task = prev_task;
                         }
                         CompositeKind::Complex(info) => {
                             let backdrop = alpha_task_stack.pop().unwrap();
 
                             let mut composite_task = RenderTask::new_alpha_batch(self.rect, ctx);
 
-                            composite_task.as_alpha_batch().items.push(AlphaRenderItem::Composite(backdrop.id,
-                                                                                                  current_task.id,
-                                                                                                  info));
+                            let item = AlphaRenderItem::Composite(backdrop.id,
+                                                                  current_task.id,
+                                                                  info,
+                                                                  next_z);
+                            next_z += 1;
+                            composite_task.as_alpha_batch().alpha_items.push(item);
 
                             composite_task.children.push(backdrop);
                             composite_task.children.push(current_task);
 
                             current_task = composite_task;
                         }
                     }
 
@@ -1914,33 +2008,16 @@ impl ScreenTile {
                             }
                         }
                     }
                 }
                 TileCommand::DrawPrimitive(prim_index) => {
                     let sc_index = *sc_stack.last().unwrap();
                     let prim_metadata = ctx.prim_store.get_metadata(prim_index);
 
-                    // TODO(gw): Complex tiles don't currently get
-                    // any occlusion culling!
-                    if self.is_simple {
-                        let layer = &ctx.layer_store[sc_index.0];
-
-                        let prim_bounding_rect = ctx.prim_store.get_bounding_rect(prim_index);
-
-                        // If an opaque primitive covers a tile entirely, we can discard
-                        // all primitives underneath it.
-                        if layer.xf_rect.as_ref().unwrap().kind == TransformedRectKind::AxisAligned &&
-                           prim_metadata.clip_cache_info.is_none() &&
-                           prim_metadata.is_opaque &&
-                           prim_bounding_rect.as_ref().unwrap().contains_rect(&self.rect) {
-                            current_task.as_alpha_batch().items.clear();
-                        }
-                    }
-
                     // Add a task to render the updated image mask
                     if let Some(ref clip_info) = prim_metadata.clip_cache_info {
                         let mask_opt = RenderTask::new_mask(self.rect,
                                                             clip_task_stack.last(),
                                                             MaskCacheKey::Primitive(prim_index),
                                                             (sc_index, clip_info),
                                                             &clip_info_stack,
                                                             ctx.tile_id);
@@ -1952,41 +2029,56 @@ impl ScreenTile {
                     }
 
                     // Add any dynamic render tasks needed to render this primitive
                     if let Some(ref render_task) = prim_metadata.render_task {
                         current_task.children.push(render_task.clone());
                     }
 
                     actual_prim_count += 1;
-                    current_task.as_alpha_batch().items.push(AlphaRenderItem::Primitive(sc_index, prim_index));
+
+                    let layer = &ctx.layer_store[sc_index.0];
+                    let transform_kind = layer.xf_rect.as_ref().unwrap().kind;
+                    let needs_clipping = layer.clip_cache_info.is_some() || prim_metadata.clip_cache_info.is_some();
+                    let needs_blending = transform_kind == TransformedRectKind::Complex ||
+                                         !prim_metadata.is_opaque ||
+                                         needs_clipping;
+
+                    let items = if needs_blending {
+                        &mut current_task.as_alpha_batch().alpha_items
+                    } else {
+                        &mut current_task.as_alpha_batch().opaque_items
+                    };
+                    items.push(AlphaRenderItem::Primitive(sc_index, prim_index, next_z));
+                    next_z += 1;
                 }
             }
         }
 
         debug_assert!(alpha_task_stack.is_empty());
         debug_assert!(clip_task_stack.is_empty());
 
-        let info = if self.is_simple {
-            CompiledScreenTileInfo::SimpleAlpha(actual_prim_count)
-        } else {
-            CompiledScreenTileInfo::ComplexAlpha(cmd_count, actual_prim_count)
+        let info = CompiledScreenTileInfo {
+            cmd_count: cmd_count,
+            prim_count: actual_prim_count,
         };
 
         current_task.location = RenderTaskLocation::Fixed(self.rect);
         Some(CompiledScreenTile::new(current_task, info, ctx.tile_id, layer_masks_tasks))
     }
 }
 
 impl FrameBuilder {
     pub fn new(viewport_size: LayerSize,
+               background_color: Option<ColorF>,
                debug: bool,
                config: FrameBuilderConfig) -> FrameBuilder {
         FrameBuilder {
             screen_rect: LayerRect::new(LayerPoint::zero(), viewport_size),
+            background_color: background_color,
             layer_store: Vec::new(),
             prim_store: PrimitiveStore::new(),
             cmds: Vec::new(),
             debug: debug,
             packed_layers: Vec::new(),
             scrollbar_prims: Vec::new(),
             config: config,
         }
@@ -2100,16 +2192,18 @@ impl FrameBuilder {
 
     pub fn supported_style(&mut self, border: &BorderSide) -> bool {
         match border.style {
             BorderStyle::Solid |
             BorderStyle::None |
             BorderStyle::Dotted |
             BorderStyle::Dashed |
             BorderStyle::Inset |
+            BorderStyle::Ridge |
+            BorderStyle::Groove |
             BorderStyle::Outset |
             BorderStyle::Double => {
                 return true;
             }
             _ => {
                 println!("TODO: Other border styles {:?}", border.style);
                 return false;
             }
@@ -2624,17 +2718,17 @@ impl FrameBuilder {
                     if let Some(ref clip_info) = layer.clip_cache_info {
                         clip_rect_stack.push(clip_info.outer_rect);
                     }
 
                     let tile_range = layer.tile_range.as_ref().unwrap();
                     for ly in tile_range.y0..tile_range.y1 {
                         for lx in tile_range.x0..tile_range.x1 {
                             let tile = &mut screen_tiles[(ly * x_tile_count + lx) as usize];
-                            tile.push_layer(sc_index, &self.layer_store);
+                            tile.push_layer(sc_index);
                         }
                     }
                 }
                 &PrimitiveRunCmd::PrimitiveRun(first_prim_index, prim_count) => {
                     let sc_index = layer_stack.last().unwrap();
 
                     let layer = &self.layer_store[sc_index.0];
                     if !layer.is_visible() {
@@ -2793,17 +2887,16 @@ impl FrameBuilder {
                          layer_map,
                          auxiliary_lists_map,
                          x_tile_count,
                          y_tile_count,
                          resource_cache,
                          &mut profile_counters,
                          device_pixel_ratio);
 
-        let mut empty_tiles = Vec::new();
         let mut compiled_screen_tiles = Vec::new();
         let mut max_passes_needed = 0;
 
         let mut render_tasks = {
             let mut ctx = CompileTileContext {
                 layer_store: &self.layer_store,
                 prim_store: &self.prim_store,
                 tile_id: 0,
@@ -2820,42 +2913,29 @@ impl FrameBuilder {
                                                   x_tile_count,
                                                   device_pixel_ratio);
             }
 
             // Build list of passes, target allocs that each tile needs.
             for (tile_id, screen_tile) in screen_tiles.into_iter().enumerate() {
                 ctx.tile_id = tile_id;
                 let rect = screen_tile.rect;
-                match screen_tile.compile(&ctx) {
-                    Some(compiled_screen_tile) => {
-                        max_passes_needed = cmp::max(max_passes_needed,
-                                                     compiled_screen_tile.required_pass_count);
-                        if self.debug {
-                            let (label, color) = match &compiled_screen_tile.info {
-                                &CompiledScreenTileInfo::SimpleAlpha(prim_count) => {
-                                    (format!("{}", prim_count), ColorF::new(1.0, 0.0, 1.0, 1.0))
-                                }
-                                &CompiledScreenTileInfo::ComplexAlpha(cmd_count, prim_count) => {
-                                    (format!("{}|{}", cmd_count, prim_count), ColorF::new(1.0, 0.0, 0.0, 1.0))
-                                }
-                            };
-                            debug_rects.push(DebugRect {
-                                label: label,
-                                color: color,
-                                rect: rect,
-                            });
-                        }
-                        compiled_screen_tiles.push(compiled_screen_tile);
-                    }
-                    None => {
-                        empty_tiles.push(ClearTile {
+                if let Some(compiled_screen_tile) = screen_tile.compile(&ctx) {
+                    max_passes_needed = cmp::max(max_passes_needed,
+                                                 compiled_screen_tile.required_pass_count);
+                    if self.debug {
+                        let label = format!("{}|{}", compiled_screen_tile.info.cmd_count, compiled_screen_tile.info.prim_count);
+                        let color =  ColorF::new(1.0, 0.0, 0.0, 1.0);
+                        debug_rects.push(DebugRect {
+                            label: label,
+                            color: color,
                             rect: rect,
                         });
                     }
+                    compiled_screen_tiles.push(compiled_screen_tile);
                 }
             }
 
             let static_render_task_count = ctx.render_task_id_counter.load(Ordering::SeqCst);
             RenderTaskCollection::new(static_render_task_count)
         };
 
         resource_cache.block_until_all_resources_added();
@@ -2904,21 +2984,21 @@ impl FrameBuilder {
                 profile_counters.targets.add(pass.targets.len());
             }
         }
 
         resource_cache.end_frame();
 
         Frame {
             device_pixel_ratio: device_pixel_ratio,
+            background_color: self.background_color,
             viewport_size: self.screen_rect.size,
             debug_rects: debug_rects,
             profile_counters: profile_counters,
             passes: passes,
-            empty_tiles: empty_tiles,
             cache_size: DeviceSize::new(RENDERABLE_CACHE_SIZE as f32,
                                         RENDERABLE_CACHE_SIZE as f32),
             layer_texture_data: self.packed_layers.clone(),
             render_task_data: render_tasks.render_task_data,
             gpu_data16: self.prim_store.gpu_data16.build(),
             gpu_data32: self.prim_store.gpu_data32.build(),
             gpu_data64: self.prim_store.gpu_data64.build(),
             gpu_data128: self.prim_store.gpu_data128.build(),
--- a/gfx/webrender_traits/src/display_item.rs
+++ b/gfx/webrender_traits/src/display_item.rs
@@ -43,16 +43,35 @@ impl BorderRadius {
     pub fn uniform(radius: f32) -> BorderRadius {
         BorderRadius {
             top_left: LayoutSize::new(radius, radius),
             top_right: LayoutSize::new(radius, radius),
             bottom_left: LayoutSize::new(radius, radius),
             bottom_right: LayoutSize::new(radius, radius),
         }
     }
+
+    pub fn is_uniform(&self) -> Option<f32> {
+        let uniform_radius = LayoutSize::new(self.top_left.width, self.top_left.width);
+        if self.top_right == uniform_radius &&
+           self.bottom_left == uniform_radius &&
+           self.bottom_right == uniform_radius {
+            Some(uniform_radius.width)
+        } else {
+            None
+        }
+    }
+
+    pub fn is_zero(&self) -> bool {
+        if let Some(radius) = self.is_uniform() {
+            radius == 0.0
+        } else {
+            false
+        }
+    }
 }
 
 impl ClipRegion {
     pub fn new(rect: &LayoutRect,
                complex: Vec<ComplexClipRegion>,
                image_mask: Option<ImageMask>,
                auxiliary_lists_builder: &mut AuxiliaryListsBuilder)
                -> ClipRegion {
--- a/gfx/webrender_traits/src/types.rs
+++ b/gfx/webrender_traits/src/types.rs
@@ -480,16 +480,17 @@ pub struct ScrollLayerState {
     pub scroll_offset: LayoutPoint,
 }
 
 #[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
 pub enum ScrollPolicy {
     Scrollable,
     Fixed,
 }
+known_heap_size!(0, ScrollPolicy);
 
 #[derive(Clone, Copy, Debug, Deserialize, Serialize)]
 pub enum ScrollLocation {
     /// Scroll by a certain amount.
     Delta(LayoutPoint), 
     /// Scroll to very top of element.
     Start,
     /// Scroll to very bottom of element.