Bug 1331044. Update webrender to 0bbd08a41c2d54b72639f531628ba3800e7f5319 r=gfx?
authorJeff Muizelaar <jmuizelaar@mozilla.com>
Fri, 13 Jan 2017 19:07:18 -0500
changeset 342098 ffa08042356157483d5bd768ae1e068a7bffb611
parent 342097 66ef32af3d4154a6183ca80655a8cbae89caf4c6
child 342099 6a5da724ef90edf99628489c4d773fcded43eefb
push id31345
push userkwierso@gmail.com
push dateFri, 10 Feb 2017 20:35:09 +0000
treeherdermozilla-central@a288fe35e494 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersgfx
bugs1331044
milestone53.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1331044. Update webrender to 0bbd08a41c2d54b72639f531628ba3800e7f5319 r=gfx?
README.webrender
gfx/webrender/Cargo.toml
gfx/webrender/res/clip_shared.glsl
gfx/webrender/res/cs_clip_copy.fs.glsl
gfx/webrender/res/cs_clip_copy.vs.glsl
gfx/webrender/res/cs_clip_image.vs.glsl
gfx/webrender/res/cs_clip_rectangle.vs.glsl
gfx/webrender/res/prim_shared.glsl
gfx/webrender/src/device.rs
gfx/webrender/src/internal_types.rs
gfx/webrender/src/prim_store.rs
gfx/webrender/src/render_backend.rs
gfx/webrender/src/renderer.rs
gfx/webrender/src/resource_cache.rs
gfx/webrender/src/scene.rs
gfx/webrender/src/texture_cache.rs
gfx/webrender/src/tiling.rs
gfx/webrender_traits/src/api.rs
gfx/webrender_traits/src/channel_mpsc.rs
gfx/webrender_traits/src/display_item.rs
gfx/webrender_traits/src/lib.rs
gfx/webrender_traits/src/types.rs
third_party/rust/error-chain/.cargo-checksum.json
third_party/rust/error-chain/.gitattributes
toolkit/library/gtest/rust/Cargo.lock
toolkit/library/rust/Cargo.lock
--- a/README.webrender
+++ b/README.webrender
@@ -54,9 +54,9 @@ 8) Build and test. You may need to make 
    features yet, just get the build working with the minimal changes.
 9) Commit the changes locally from step 7, and push everything to the
    graphics branch.
 10) Now you have an update webrender with the new features you wanted,
    so you can write gecko code against them.
 
 Yes, this is somewhat painful. It used to be worse. :)
 
-Latest Commit: ee97fcaa4ee8eda89f7bd67b1a8920d3382af294
+Latest Commit: 0bbd08a41c2d54b72639f531628ba3800e7f5319
--- a/gfx/webrender/Cargo.toml
+++ b/gfx/webrender/Cargo.toml
@@ -1,11 +1,11 @@
 [package]
 name = "webrender"
-version = "0.11.0"
+version = "0.11.1"
 authors = ["Glenn Watson <gw@intuitionlibrary.com>"]
 license = "MPL-2.0"
 repository = "https://github.com/servo/webrender"
 build = "build.rs"
 workspace = ".."
 
 [features]
 default = ["codegen", "freetype-lib"]
--- a/gfx/webrender/res/clip_shared.glsl
+++ b/gfx/webrender/res/clip_shared.glsl
@@ -1,54 +1,91 @@
 #line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #ifdef WR_VERTEX_SHADER
 
+#define SEGMENT_ALL         0
+#define SEGMENT_CORNER_TL   1
+#define SEGMENT_CORNER_TR   2
+#define SEGMENT_CORNER_BL   3
+#define SEGMENT_CORNER_BR   4
+
 in int aClipRenderTaskIndex;
 in int aClipLayerIndex;
 in int aClipDataIndex;
-in int aClipBaseTaskIndex;
+in int aClipSegmentIndex;
 
 struct CacheClipInstance {
     int render_task_index;
     int layer_index;
     int data_index;
-    int base_task_index;
+    int segment_index;
 };
 
 CacheClipInstance fetch_clip_item(int index) {
     CacheClipInstance cci;
 
     cci.render_task_index = aClipRenderTaskIndex;
     cci.layer_index = aClipLayerIndex;
     cci.data_index = aClipDataIndex;
-    cci.base_task_index = aClipBaseTaskIndex;
+    cci.segment_index = aClipSegmentIndex;
 
     return cci;
 }
 
 // The transformed vertex function that always covers the whole clip area,
 // which is the intersection of all clip instances of a given primitive
 TransformVertexInfo write_clip_tile_vertex(vec4 local_clip_rect,
                                            Layer layer,
-                                           ClipArea area) {
+                                           ClipArea area,
+                                           int segment_index) {
     vec2 lp0_base = local_clip_rect.xy;
     vec2 lp1_base = local_clip_rect.xy + local_clip_rect.zw;
 
     vec2 lp0 = clamp_rect(lp0_base, layer.local_clip_rect);
     vec2 lp1 = clamp_rect(lp1_base, layer.local_clip_rect);
     vec4 clipped_local_rect = vec4(lp0, lp1 - lp0);
 
-    vec2 final_pos = mix(area.task_bounds.xy, area.task_bounds.zw, aPosition.xy);
+    vec2 outer_p0 = area.screen_origin_target_index.xy;
+    vec2 outer_p1 = outer_p0 + area.task_bounds.zw - area.task_bounds.xy;
+    vec2 inner_p0 = area.inner_rect.xy;
+    vec2 inner_p1 = area.inner_rect.zw;
+
+    vec2 p0, p1;
+    switch (segment_index) {
+        case SEGMENT_ALL:
+            p0 = outer_p0;
+            p1 = outer_p1;
+            break;
+        case SEGMENT_CORNER_TL:
+            p0 = outer_p0;
+            p1 = inner_p0;
+            break;
+        case SEGMENT_CORNER_BL:
+            p0 = vec2(outer_p0.x, outer_p1.y);
+            p1 = vec2(inner_p0.x, inner_p1.y);
+            break;
+        case SEGMENT_CORNER_TR:
+            p0 = vec2(outer_p1.x, outer_p1.y);
+            p1 = vec2(inner_p1.x, inner_p1.y);
+            break;
+        case SEGMENT_CORNER_BR:
+            p0 = vec2(outer_p1.x, outer_p0.y);
+            p1 = vec2(inner_p1.x, inner_p0.y);
+            break;
+    }
+
+    vec2 actual_pos = mix(p0, p1, aPosition.xy);
+
+    vec4 layer_pos = get_layer_pos(actual_pos / uDevicePixelRatio, layer);
 
     // compute the point position in side the layer, in CSS space
-    vec2 clamped_pos = final_pos + area.screen_origin_target_index.xy - area.task_bounds.xy;
-    vec4 layer_pos = get_layer_pos(clamped_pos / uDevicePixelRatio, layer);
+    vec2 vertex_pos = actual_pos + area.task_bounds.xy - area.screen_origin_target_index.xy;
 
-    gl_Position = uTransform * vec4(final_pos, 0.0, 1);
+    gl_Position = uTransform * vec4(vertex_pos, 0.0, 1);
 
-    return TransformVertexInfo(layer_pos.xyw, clamped_pos, clipped_local_rect);
+    return TransformVertexInfo(layer_pos.xyw, actual_pos, clipped_local_rect);
 }
 
 #endif //WR_VERTEX_SHADER
deleted file mode 100644
--- a/gfx/webrender/res/cs_clip_copy.fs.glsl
+++ /dev/null
@@ -1,8 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-void main(void) {
-    float alpha = texelFetch(sCache, ivec3(vClipMaskUv), 0).a;
-    oFragColor = vec4(alpha, 0.0, 0.0, 1.0);
-}
deleted file mode 100644
--- a/gfx/webrender/res/cs_clip_copy.vs.glsl
+++ /dev/null
@@ -1,19 +0,0 @@
-#line 1
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-void main(void) {
-    CacheClipInstance cci = fetch_clip_item(gl_InstanceID);
-    ClipArea area = fetch_clip_area(cci.render_task_index);
-    ClipArea source = fetch_clip_area(cci.base_task_index);
-
-    vec2 final_pos = mix(area.task_bounds.xy, area.task_bounds.zw, aPosition.xy);
-
-    gl_Position = uTransform * vec4(final_pos, 0.0, 1.0);
-
-    // convert to the source task space via the screen space
-    vec2 tuv = final_pos - area.task_bounds.xy + area.screen_origin_target_index.xy +
-        source.task_bounds.xy - source.screen_origin_target_index.xy;
-    vClipMaskUv = vec3(tuv, source.screen_origin_target_index.z);
-}
--- a/gfx/webrender/res/cs_clip_image.vs.glsl
+++ b/gfx/webrender/res/cs_clip_image.vs.glsl
@@ -23,16 +23,17 @@ void main(void) {
     CacheClipInstance cci = fetch_clip_item(gl_InstanceID);
     ClipArea area = fetch_clip_area(cci.render_task_index);
     Layer layer = fetch_layer(cci.layer_index);
     ImageMaskData mask = fetch_mask_data(cci.data_index);
     vec4 local_rect = mask.local_rect;
 
     TransformVertexInfo vi = write_clip_tile_vertex(local_rect,
                                                     layer,
-                                                    area);
+                                                    area,
+                                                    cci.segment_index);
     vLocalRect = vi.clipped_local_rect;
     vPos = vi.local_pos;
 
     vClipMaskUv = vec3((vPos.xy / vPos.z - local_rect.xy) / local_rect.zw, 0.0);
     vec2 texture_size = vec2(textureSize(sMask, 0));
     vClipMaskUvRect = mask.uv_rect / texture_size.xyxy;
 }
--- a/gfx/webrender/res/cs_clip_rectangle.vs.glsl
+++ b/gfx/webrender/res/cs_clip_rectangle.vs.glsl
@@ -60,17 +60,18 @@ void main(void) {
     CacheClipInstance cci = fetch_clip_item(gl_InstanceID);
     ClipArea area = fetch_clip_area(cci.render_task_index);
     Layer layer = fetch_layer(cci.layer_index);
     ClipData clip = fetch_clip(cci.data_index);
     vec4 local_rect = clip.rect.rect;
 
     TransformVertexInfo vi = write_clip_tile_vertex(local_rect,
                                                     layer,
-                                                    area);
+                                                    area,
+                                                    cci.segment_index);
     vLocalRect = vi.clipped_local_rect;
     vPos = vi.local_pos;
 
     vClipRect = vec4(local_rect.xy, local_rect.xy + local_rect.zw);
     vClipRadius = vec4(clip.top_left.outer_inner_radius.x,
                        clip.top_right.outer_inner_radius.x,
                        clip.bottom_right.outer_inner_radius.x,
                        clip.bottom_left.outer_inner_radius.x);
--- a/gfx/webrender/res/prim_shared.glsl
+++ b/gfx/webrender/res/prim_shared.glsl
@@ -1,18 +1,20 @@
 #line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-#if defined(GL_ES) && GL_ES == 1
-    #ifdef GL_FRAGMENT_PRECISION_HIGH
-    precision highp sampler2DArray;
-    #else
-    precision mediump sampler2DArray;
+#if defined(GL_ES)
+    #if GL_ES == 1
+        #ifdef GL_FRAGMENT_PRECISION_HIGH
+        precision highp sampler2DArray;
+        #else
+        precision mediump sampler2DArray;
+        #endif
     #endif
 #endif
 
 #define PST_TOP_LEFT     0
 #define PST_TOP          1
 #define PST_TOP_RIGHT    2
 #define PST_RIGHT        3
 #define PST_BOTTOM_RIGHT 4
@@ -33,17 +35,17 @@
 uniform sampler2DArray sCache;
 
 flat varying vec4 vClipMaskUvBounds;
 varying vec3 vClipMaskUv;
 
 #ifdef WR_VERTEX_SHADER
 
 #define VECS_PER_LAYER             13
-#define VECS_PER_RENDER_TASK        2
+#define VECS_PER_RENDER_TASK        3
 #define VECS_PER_PRIM_GEOM          2
 
 #define GRADIENT_HORIZONTAL     0
 #define GRADIENT_VERTICAL       1
 #define GRADIENT_ROTATED        2
 
 uniform sampler2D sLayers;
 uniform sampler2D sRenderTasks;
@@ -123,25 +125,27 @@ Layer fetch_layer(int index) {
     layer.screen_vertices[3] = texelFetchOffset(sLayers, uv1, 0, ivec2(4, 0));
 
     return layer;
 }
 
 struct RenderTaskData {
     vec4 data0;
     vec4 data1;
+    vec4 data2;
 };
 
 RenderTaskData fetch_render_task(int index) {
     RenderTaskData task;
 
     ivec2 uv = get_fetch_uv(index, VECS_PER_RENDER_TASK);
 
     task.data0 = texelFetchOffset(sRenderTasks, uv, 0, ivec2(0, 0));
     task.data1 = texelFetchOffset(sRenderTasks, uv, 0, ivec2(1, 0));
+    task.data2 = texelFetchOffset(sRenderTasks, uv, 0, ivec2(2, 0));
 
     return task;
 }
 
 struct Tile {
     vec4 screen_origin_task_origin;
     vec4 size_target_index;
 };
@@ -154,28 +158,31 @@ Tile fetch_tile(int index) {
     tile.size_target_index = task.data1;
 
     return tile;
 }
 
 struct ClipArea {
     vec4 task_bounds;
     vec4 screen_origin_target_index;
+    vec4 inner_rect;
 };
 
 ClipArea fetch_clip_area(int index) {
     ClipArea area;
 
     if (index == 0x7FFFFFFF) { //special sentinel task index
         area.task_bounds = vec4(0.0, 0.0, 0.0, 0.0);
         area.screen_origin_target_index = vec4(0.0, 0.0, 0.0, 0.0);
+        area.inner_rect = vec4(0.0);
     } else {
         RenderTaskData task = fetch_render_task(index);
         area.task_bounds = task.data0;
         area.screen_origin_target_index = task.data1;
+        area.inner_rect = task.data2;
     }
 
     return area;
 }
 
 struct Gradient {
     vec4 start_end_point;
     vec4 kind;
--- a/gfx/webrender/src/device.rs
+++ b/gfx/webrender/src/device.rs
@@ -263,17 +263,17 @@ impl VertexFormat {
                                           vertex_stride as gl::GLint,
                                           0);
 
                 instance.bind();
 
                 for (i, &attrib) in [ClipAttribute::RenderTaskIndex,
                                      ClipAttribute::LayerIndex,
                                      ClipAttribute::DataIndex,
-                                     ClipAttribute::BaseTaskIndex,
+                                     ClipAttribute::SegmentIndex,
                                     ].into_iter().enumerate() {
                     gl::enable_vertex_attrib_array(attrib as gl::GLuint);
                     gl::vertex_attrib_divisor(attrib as gl::GLuint, 1);
                     gl::vertex_attrib_i_pointer(attrib as gl::GLuint,
                                                 1,
                                                 gl::INT,
                                                 instance_stride,
                                                 (i * 4) as gl::GLuint);
@@ -396,17 +396,17 @@ impl Program {
 
         gl::bind_attrib_location(self.id, BlurAttribute::RenderTaskIndex as gl::GLuint, "aBlurRenderTaskIndex");
         gl::bind_attrib_location(self.id, BlurAttribute::SourceTaskIndex as gl::GLuint, "aBlurSourceTaskIndex");
         gl::bind_attrib_location(self.id, BlurAttribute::Direction as gl::GLuint, "aBlurDirection");
 
         gl::bind_attrib_location(self.id, ClipAttribute::RenderTaskIndex as gl::GLuint, "aClipRenderTaskIndex");
         gl::bind_attrib_location(self.id, ClipAttribute::LayerIndex as gl::GLuint, "aClipLayerIndex");
         gl::bind_attrib_location(self.id, ClipAttribute::DataIndex as gl::GLuint, "aClipDataIndex");
-        gl::bind_attrib_location(self.id, ClipAttribute::BaseTaskIndex as gl::GLuint, "aClipBaseTaskIndex");
+        gl::bind_attrib_location(self.id, ClipAttribute::SegmentIndex as gl::GLuint, "aClipSegmentIndex");
 
         gl::link_program(self.id);
         if gl::get_program_iv(self.id, gl::LINK_STATUS) == (0 as gl::GLint) {
             println!("Failed to link shader program: {}", gl::get_program_info_log(self.id));
             gl::detach_shader(self.id, vs_id);
             gl::detach_shader(self.id, fs_id);
             if panic_on_fail {
                 panic!("-- Program link failed - exiting --");
--- a/gfx/webrender/src/internal_types.rs
+++ b/gfx/webrender/src/internal_types.rs
@@ -261,17 +261,17 @@ pub enum BlurAttribute {
 #[derive(Clone, Copy, Debug)]
 pub enum ClipAttribute {
     // vertex frequency
     Position,
     // instance frequency
     RenderTaskIndex,
     LayerIndex,
     DataIndex,
-    BaseTaskIndex,
+    SegmentIndex,
 }
 
 #[derive(Debug, Clone, Copy)]
 #[repr(C)]
 pub struct PackedColor {
     pub r: u8,
     pub g: u8,
     pub b: u8,
--- a/gfx/webrender/src/prim_store.rs
+++ b/gfx/webrender/src/prim_store.rs
@@ -107,16 +107,17 @@ pub struct PrimitiveMetadata {
     // drawing this primitive. For instance, box shadows
     // use this to draw a portion of the box shadow to
     // a render target to reduce the number of pixels
     // that the box-shadow shader needs to run on. For
     // text-shadow, this creates a render task chain
     // that implements a 2-pass separable blur on a
     // text run.
     pub render_task: Option<RenderTask>,
+    pub clip_task: Option<RenderTask>,
 }
 
 #[derive(Debug, Clone)]
 #[repr(C)]
 pub struct RectanglePrimitive {
     pub color: ColorF,
 }
 
@@ -485,16 +486,17 @@ impl PrimitiveStore {
                     clip_source: clip_source,
                     clip_cache_info: clip_info,
                     prim_kind: PrimitiveKind::Rectangle,
                     cpu_prim_index: SpecificPrimitiveIndex::invalid(),
                     gpu_prim_index: gpu_address,
                     gpu_data_address: GpuStoreAddress(0),
                     gpu_data_count: 0,
                     render_task: None,
+                    clip_task: None,
                 };
 
                 metadata
             }
             PrimitiveContainer::TextRun(mut text_cpu, text_gpu) => {
                 let gpu_address = self.gpu_data16.push(text_gpu);
                 let gpu_glyphs_address = self.gpu_data16.alloc(text_cpu.glyph_range.length);
                 text_cpu.resource_address = self.gpu_resource_rects.alloc(text_cpu.glyph_range.length);
@@ -504,16 +506,17 @@ impl PrimitiveStore {
                     clip_source: clip_source,
                     clip_cache_info: clip_info,
                     prim_kind: PrimitiveKind::TextRun,
                     cpu_prim_index: SpecificPrimitiveIndex(self.cpu_text_runs.len()),
                     gpu_prim_index: gpu_address,
                     gpu_data_address: gpu_glyphs_address,
                     gpu_data_count: text_cpu.glyph_range.length as i32,
                     render_task: None,
+                    clip_task: None,
                 };
 
                 self.cpu_text_runs.push(text_cpu);
                 metadata
             }
             PrimitiveContainer::Image(mut image_cpu, image_gpu) => {
                 image_cpu.resource_address = self.gpu_resource_rects.alloc(1);
 
@@ -524,16 +527,17 @@ impl PrimitiveStore {
                     clip_source: clip_source,
                     clip_cache_info: clip_info,
                     prim_kind: PrimitiveKind::Image,
                     cpu_prim_index: SpecificPrimitiveIndex(self.cpu_images.len()),
                     gpu_prim_index: gpu_address,
                     gpu_data_address: GpuStoreAddress(0),
                     gpu_data_count: 0,
                     render_task: None,
+                    clip_task: None,
                 };
 
                 self.cpu_images.push(image_cpu);
                 metadata
             }
             PrimitiveContainer::YuvImage(image_cpu, image_gpu) => {
                 let gpu_address = self.gpu_data64.push(image_gpu);
 
@@ -542,16 +546,17 @@ impl PrimitiveStore {
                     clip_source: clip_source,
                     clip_cache_info: clip_info,
                     prim_kind: PrimitiveKind::YuvImage,
                     cpu_prim_index: SpecificPrimitiveIndex(self.cpu_yuv_images.len()),
                     gpu_prim_index: gpu_address,
                     gpu_data_address: GpuStoreAddress(0),
                     gpu_data_count: 0,
                     render_task: None,
+                    clip_task: None,
                 };
 
                 self.cpu_yuv_images.push(image_cpu);
                 metadata
             }
             PrimitiveContainer::Border(border_cpu, border_gpu) => {
                 let gpu_address = self.gpu_data128.push(border_gpu);
 
@@ -560,16 +565,17 @@ impl PrimitiveStore {
                     clip_source: clip_source,
                     clip_cache_info: clip_info,
                     prim_kind: PrimitiveKind::Border,
                     cpu_prim_index: SpecificPrimitiveIndex(self.cpu_borders.len()),
                     gpu_prim_index: gpu_address,
                     gpu_data_address: GpuStoreAddress(0),
                     gpu_data_count: 0,
                     render_task: None,
+                    clip_task: None,
                 };
 
                 self.cpu_borders.push(border_cpu);
                 metadata
             }
             PrimitiveContainer::Gradient(gradient_cpu, gradient_gpu) => {
                 let gpu_address = self.gpu_data32.push(gradient_gpu);
                 let gpu_stops_address = self.gpu_data32.alloc(gradient_cpu.stops_range.length);
@@ -579,16 +585,17 @@ impl PrimitiveStore {
                     clip_source: clip_source,
                     clip_cache_info: clip_info,
                     prim_kind: PrimitiveKind::Gradient,
                     cpu_prim_index: SpecificPrimitiveIndex(self.cpu_gradients.len()),
                     gpu_prim_index: gpu_address,
                     gpu_data_address: gpu_stops_address,
                     gpu_data_count: gradient_cpu.stops_range.length as i32,
                     render_task: None,
+                    clip_task: None,
                 };
 
                 self.cpu_gradients.push(gradient_cpu);
                 metadata
             }
             PrimitiveContainer::BoxShadow(box_shadow_gpu, instance_rects) => {
                 let cache_key = PrimitiveCacheKey::BoxShadow(BoxShadowPrimitiveCacheKey {
                     blur_radius: Au::from_f32_px(box_shadow_gpu.blur_radius),
@@ -623,16 +630,17 @@ impl PrimitiveStore {
                     clip_source: clip_source,
                     clip_cache_info: None,
                     prim_kind: PrimitiveKind::BoxShadow,
                     cpu_prim_index: SpecificPrimitiveIndex::invalid(),
                     gpu_prim_index: gpu_prim_address,
                     gpu_data_address: gpu_data_address,
                     gpu_data_count: instance_rects.len() as i32,
                     render_task: Some(render_task),
+                    clip_task: None,
                 };
 
                 for rect in instance_rects {
                     self.gpu_data16.push(InstanceRect {
                         rect: rect,
                     });
                 }
 
--- a/gfx/webrender/src/render_backend.rs
+++ b/gfx/webrender/src/render_backend.rs
@@ -209,19 +209,23 @@ impl RenderBackend {
                             });
 
                             if self.scene.root_pipeline_id.is_some() {
                                 self.publish_frame_and_notify_compositor(frame, &mut profile_counters);
                                 frame_counter += 1;
                             }
                         }
                         ApiMsg::SetRootPipeline(pipeline_id) => {
+                            self.scene.set_root_pipeline_id(pipeline_id);
+
+                            if self.scene.display_lists.get(&pipeline_id).is_none() {
+                                continue;
+                            }
+
                             let frame = profile_counters.total_time.profile(|| {
-                                self.scene.set_root_pipeline_id(pipeline_id);
-
                                 self.build_scene();
                                 self.render()
                             });
 
                             // the root pipeline is guaranteed to be Some() at this point
                             self.publish_frame_and_notify_compositor(frame, &mut profile_counters);
                             frame_counter += 1;
                         }
@@ -342,19 +346,39 @@ impl RenderBackend {
                             let frame = profile_counters.total_time.profile(|| {
                                 self.render()
                             });
                             if self.scene.root_pipeline_id.is_some() {
                                 self.publish_frame_and_notify_compositor(frame, &mut profile_counters);
                                 frame_counter += 1;
                             }
                         }
+                        ApiMsg::ExternalEvent(evt) => {
+                            let notifier = self.notifier.lock();
+                            notifier.unwrap()
+                                    .as_mut()
+                                    .unwrap()
+                                    .external_event(evt);
+                        }
+                        ApiMsg::ShutDown => {
+                            let notifier = self.notifier.lock();
+                            notifier.unwrap()
+                                    .as_mut()
+                                    .unwrap()
+                                    .shut_down();
+                            break;
+                        }
                     }
                 }
                 Err(..) => {
+                    let notifier = self.notifier.lock();
+                    notifier.unwrap()
+                            .as_mut()
+                            .unwrap()
+                            .shut_down();
                     break;
                 }
             }
         }
     }
 
     fn build_scene(&mut self) {
         // Flatten the stacking context hierarchy
--- a/gfx/webrender/src/renderer.rs
+++ b/gfx/webrender/src/renderer.rs
@@ -33,17 +33,17 @@ use std::sync::mpsc::{channel, Receiver,
 use std::thread;
 use texture_cache::TextureCache;
 use tiling::{Frame, FrameBuilderConfig, PrimitiveBatch, PrimitiveBatchData};
 use tiling::{BlurCommand, CacheClipInstance, PrimitiveInstance, RenderTarget};
 use time::precise_time_ns;
 use util::TransformedRectKind;
 use webrender_traits::{ColorF, Epoch, PipelineId, RenderNotifier, RenderDispatcher};
 use webrender_traits::{ExternalImageId, ImageFormat, RenderApiSender, RendererKind};
-use webrender_traits::{DeviceIntRect, DeviceSize, DevicePoint, DeviceIntPoint, DeviceIntSize, DeviceUintSize};
+use webrender_traits::{DeviceIntRect, DevicePoint, DeviceIntPoint, DeviceIntSize, DeviceUintSize};
 use webrender_traits::channel;
 use webrender_traits::VRCompositorHandler;
 
 pub const MAX_VERTEX_TEXTURE_WIDTH: usize = 1024;
 
 const GPU_TAG_CACHE_BOX_SHADOW: GpuProfileTag = GpuProfileTag { label: "C_BoxShadow", color: debug_colors::BLACK };
 const GPU_TAG_CACHE_CLIP: GpuProfileTag = GpuProfileTag { label: "C_Clip", color: debug_colors::PURPLE };
 const GPU_TAG_CACHE_TEXT_RUN: GpuProfileTag = GpuProfileTag { label: "C_TextRun", color: debug_colors::MISTYROSE };
@@ -283,17 +283,16 @@ pub struct Renderer {
     // draw intermediate results to cache targets. The results
     // of these shaders are then used by the primitive shaders.
     cs_box_shadow: LazilyCompiledShader,
     cs_text_run: LazilyCompiledShader,
     cs_blur: LazilyCompiledShader,
     /// These are "cache clip shaders". These shaders are used to
     /// draw clip instances into the cached clip mask. The results
     /// of these shaders are also used by the primitive shaders.
-    cs_clip_copy: LazilyCompiledShader,
     cs_clip_rectangle: LazilyCompiledShader,
     cs_clip_image: LazilyCompiledShader,
 
     // The are "primitive shaders". These shaders draw and blend
     // final results on screen. They are aware of tile boundaries.
     // Most draw directly to the framebuffer, but some use inputs
     // from the cache shaders to draw. Specifically, the box
     // shadow primitive shader stretches the box shadow cache
@@ -416,21 +415,16 @@ impl Renderer {
                                                     &mut device,
                                                     options.precache_shaders);
         let cs_blur = LazilyCompiledShader::new(ShaderKind::Cache,
                                                 "cs_blur",
                                                  &[],
                                                  &mut device,
                                                  options.precache_shaders);
 
-        let cs_clip_copy = LazilyCompiledShader::new(ShaderKind::ClipCache,
-                                                     "cs_clip_copy",
-                                                     &[],
-                                                     &mut device,
-                                                     options.precache_shaders);
         let cs_clip_rectangle = LazilyCompiledShader::new(ShaderKind::ClipCache,
                                                           "cs_clip_rectangle",
                                                           &[],
                                                           &mut device,
                                                           options.precache_shaders);
         let cs_clip_image = LazilyCompiledShader::new(ShaderKind::ClipCache,
                                                       "cs_clip_image",
                                                       &[],
@@ -587,44 +581,43 @@ impl Renderer {
         let config = FrameBuilderConfig::new(options.enable_scrollbars,
                                              options.enable_subpixel_aa);
 
         let debug = options.debug;
         let (device_pixel_ratio, enable_aa) = (options.device_pixel_ratio, options.enable_aa);
         let render_target_debug = options.render_target_debug;
         let payload_tx_for_backend = payload_tx.clone();
         let enable_recording = options.enable_recording;
-        thread::spawn(move || {
+        thread::Builder::new().name("RenderBackend".to_string()).spawn(move || {
             let mut backend = RenderBackend::new(api_rx,
                                                  payload_rx,
                                                  payload_tx_for_backend,
                                                  result_tx,
                                                  device_pixel_ratio,
                                                  texture_cache,
                                                  enable_aa,
                                                  backend_notifier,
                                                  context_handle,
                                                  config,
                                                  debug,
                                                  enable_recording,
                                                  backend_main_thread_dispatcher,
                                                  backend_vr_compositor);
             backend.run();
-        });
+        }).unwrap();
 
         let renderer = Renderer {
             result_rx: result_rx,
             device: device,
             current_frame: None,
             pending_texture_updates: Vec::new(),
             pending_shader_updates: Vec::new(),
             cs_box_shadow: cs_box_shadow,
             cs_text_run: cs_text_run,
             cs_blur: cs_blur,
-            cs_clip_copy: cs_clip_copy,
             cs_clip_rectangle: cs_clip_rectangle,
             cs_clip_image: cs_clip_image,
             ps_rectangle: ps_rectangle,
             ps_rectangle_clip: ps_rectangle_clip,
             ps_text_run: ps_text_run,
             ps_text_run_subpixel: ps_text_run_subpixel,
             ps_image: ps_image,
             ps_yuv_image: ps_yuv_image,
@@ -693,17 +686,17 @@ impl Renderer {
     /// Some WebVR commands such as Vsync and SubmitFrame must be called in the WebGL render thread.
     pub fn set_vr_compositor_handler(&self, creator: Box<VRCompositorHandler>) {
         let mut handler_arc = self.vr_compositor_handler.lock().unwrap();
         *handler_arc = Some(creator);
     }
 
     /// Returns the Epoch of the current frame in a pipeline.
     pub fn current_epoch(&self, pipeline_id: PipelineId) -> Option<Epoch> {
-        self.pipeline_epoch_map.get(&pipeline_id).map(|epoch| *epoch)
+        self.pipeline_epoch_map.get(&pipeline_id).cloned()
     }
 
     /// Processes the result queue.
     ///
     /// Should be called before `render()`, as texture cache updates are done here.
     pub fn update(&mut self) {
         // Pull any pending results and return the most recent.
         while let Ok(msg) = self.result_rx.try_recv() {
@@ -733,25 +726,25 @@ impl Renderer {
 
     // Get the real (OpenGL) texture ID for a given source texture.
     // For a texture cache texture, the IDs are stored in a vector
     // map for fast access. For WebGL textures, the native texture ID
     // is stored inline. When we add support for external textures,
     // we will add a callback here that is able to ask the caller
     // for the image data.
     fn resolve_source_texture(&mut self, texture_id: &SourceTexture) -> TextureId {
-        match texture_id {
-            &SourceTexture::Invalid => TextureId::invalid(),
-            &SourceTexture::WebGL(id) => TextureId::new(id),
-            &SourceTexture::External(ref key) => {
+        match *texture_id {
+            SourceTexture::Invalid => TextureId::invalid(),
+            SourceTexture::WebGL(id) => TextureId::new(id),
+            SourceTexture::External(ref key) => {
                 *self.external_images
                      .get(key)
                      .expect("BUG: External image should be resolved by now!")
             }
-            &SourceTexture::TextureCache(index) => {
+            SourceTexture::TextureCache(index) => {
                 self.cache_texture_id_map[index.0]
             }
         }
     }
 
     /// Set a callback for handling external images.
     pub fn set_external_image_handler(&mut self, handler: Box<ExternalImageHandler>) {
         self.external_image_handler = Some(handler);
@@ -939,17 +932,17 @@ impl Renderer {
 
     fn draw_instanced_batch<T>(&mut self,
                                data: &[T],
                                vao: VAOId,
                                shader: ProgramId,
                                textures: &BatchTextures,
                                projection: &Matrix4D<f32>) {
         self.device.bind_vao(vao);
-        self.device.bind_program(shader, &projection);
+        self.device.bind_program(shader, projection);
 
         for i in 0..textures.colors.len() {
             let texture_id = self.resolve_source_texture(&textures.colors[i]);
             self.device.bind_texture(TextureSampler::color(i), texture_id);
         }
 
         self.device.update_vao_instances(vao, data, VertexUsageHint::Stream);
         self.device.draw_indexed_triangles_instanced_u16(6, data.len() as i32);
@@ -1020,30 +1013,30 @@ impl Renderer {
         };
 
         let _gm = self.gpu_profile.add_marker(marker);
         let vao = self.prim_vao_id;
         self.draw_instanced_batch(data,
                                   vao,
                                   shader,
                                   &batch.key.textures,
-                                  &projection);
+                                  projection);
     }
 
     fn draw_target(&mut self,
                    render_target: Option<(TextureId, i32)>,
                    target: &RenderTarget,
-                   target_size: &DeviceSize,
+                   target_size: &DeviceUintSize,
                    cache_texture: Option<TextureId>,
                    should_clear: bool,
                    background_color: Option<ColorF>) {
         self.device.disable_depth();
         self.device.enable_depth_write();
 
-        let dimensions = [target_size.width as u32, target_size.height as u32];
+        let dimensions = [target_size.width, target_size.height];
         let projection = {
             let _gm = self.gpu_profile.add_marker(GPU_TAG_SETUP_TARGET);
             self.device.bind_draw_target(render_target, Some(dimensions));
 
             self.device.set_blend(false);
             self.device.set_blend_mode_alpha();
             if let Some(cache_texture) = cache_texture {
                 self.device.bind_texture(TextureSampler::Cache, cache_texture);
@@ -1055,31 +1048,31 @@ impl Renderer {
                     // - The red channel is cleared to 1, so that the clip mask
                     //   generation (which reads/writes the red channel) can
                     //   assume that each allocated rect is opaque / non-clipped
                     //   initially.
                     // - The alpha channel is cleared to 0, so that visual render
                     //   tasks can assume that pixels are transparent if not
                     //   rendered. (This is relied on by the compositing support
                     //   for mix-blend-mode etc).
-                    [1.0, 0.0, 0.0, 0.0],
+                    [1.0, 1.0, 1.0, 0.0],
                     Matrix4D::ortho(0.0,
-                                   target_size.width,
+                                   target_size.width as f32,
                                    0.0,
-                                   target_size.height,
+                                   target_size.height as f32,
                                    ORTHO_NEAR_PLANE,
                                    ORTHO_FAR_PLANE)
                 ),
                 None => (
                     background_color.map_or(self.clear_color.to_array(), |color| {
                         color.to_array()
                     }),
                     Matrix4D::ortho(0.0,
-                                   target_size.width,
-                                   target_size.height,
+                                   target_size.width as f32,
+                                   target_size.height as f32,
                                    0.0,
                                    ORTHO_NEAR_PLANE,
                                    ORTHO_FAR_PLANE)
                 ),
             };
 
             let clear_depth = Some(1.0);
             let clear_color = if should_clear {
@@ -1132,27 +1125,17 @@ impl Renderer {
                                       &BatchTextures::no_texture(),
                                       &projection);
         }
 
         // Draw the clip items into the tiled alpha mask.
         {
             let _gm = self.gpu_profile.add_marker(GPU_TAG_CACHE_CLIP);
             let vao = self.clip_vao_id;
-            // Optionally, copy the contents from another task
-            if !target.clip_batcher.copies.is_empty() {
-                self.device.set_blend(false);
-                let shader = self.cs_clip_copy.get(&mut self.device);
-                self.draw_instanced_batch(&target.clip_batcher.copies,
-                                          vao,
-                                          shader,
-                                          &BatchTextures::no_texture(),
-                                          &projection);
-            }
-            // now switch to multiplicative blending
+            // switch to multiplicative blending
             self.device.set_blend(true);
             self.device.set_blend_mode_multiply();
             // draw rounded cornered rectangles
             if !target.clip_batcher.rectangles.is_empty() {
                 let _gm2 = GpuMarker::new("clip rectangles");
                 let shader = self.cs_clip_rectangle.get(&mut self.device);
                 self.draw_instanced_batch(&target.clip_batcher.rectangles,
                                           vao,
@@ -1359,29 +1342,29 @@ impl Renderer {
             self.device.bind_texture(TextureSampler::Data128, self.data128_texture.id);
             self.device.bind_texture(TextureSampler::ResourceRects, self.resource_rects_texture.id);
 
             let mut src_id = None;
 
             for (pass_index, pass) in frame.passes.iter().enumerate() {
                 let (do_clear, size, target_id) = if pass.is_framebuffer {
                     (self.clear_framebuffer || needs_clear,
-                     DeviceSize::new(framebuffer_size.width as f32, framebuffer_size.height as f32),
+                     framebuffer_size,
                      None)
                 } else {
-                    (true, frame.cache_size, Some(self.render_targets[pass_index]))
+                    (true, &frame.cache_size, Some(self.render_targets[pass_index]))
                 };
 
                 for (target_index, target) in pass.targets.iter().enumerate() {
                     let render_target = target_id.map(|texture_id| {
                         (texture_id, target_index as i32)
                     });
                     self.draw_target(render_target,
                                      target,
-                                     &size,
+                                     size,
                                      src_id,
                                      do_clear,
                                      frame.background_color);
 
                 }
 
                 src_id = target_id;
             }
@@ -1490,8 +1473,28 @@ pub struct RendererOptions {
     pub enable_scrollbars: bool,
     pub precache_shaders: bool,
     pub renderer_kind: RendererKind,
     pub enable_subpixel_aa: bool,
     pub clear_framebuffer: bool,
     pub clear_color: ColorF,
     pub render_target_debug: bool,
 }
+
+impl Default for RendererOptions {
+    fn default() -> RendererOptions {
+        RendererOptions {
+            device_pixel_ratio: 1.0,
+            resource_override_path: None,
+            enable_aa: true,
+            enable_profiler: false,
+            debug: false,
+            enable_recording: false,
+            enable_scrollbars: false,
+            precache_shaders: false,
+            renderer_kind: RendererKind::Native,
+            enable_subpixel_aa: false,
+            clear_framebuffer: true,
+            clear_color: ColorF::new(1.0, 1.0, 1.0, 1.0),
+            render_target_debug: false,
+        }
+    }
+}
--- a/gfx/webrender/src/resource_cache.rs
+++ b/gfx/webrender/src/resource_cache.rs
@@ -662,17 +662,17 @@ fn is_image_opaque(format: ImageFormat, 
 fn spawn_glyph_cache_thread() -> (Sender<GlyphCacheMsg>, Receiver<GlyphCacheResultMsg>) {
     // Used for messages from resource cache -> glyph cache thread.
     let (msg_tx, msg_rx) = channel();
     // Used for returning results from glyph cache thread -> resource cache.
     let (result_tx, result_rx) = channel();
     // Used for rasterizer worker threads to send glyphs -> glyph cache thread.
     let (glyph_tx, glyph_rx) = channel();
 
-    thread::spawn(move|| {
+    thread::Builder::new().name("GlyphCache".to_string()).spawn(move|| {
         // TODO(gw): Use a heuristic to select best # of worker threads.
         let worker_count = 4;
         let thread_pool = ThreadPool::new(worker_count);
 
         let mut glyph_cache = None;
         let mut current_frame_id = FrameId(0);
 
         // Maintain a set of glyphs that have been requested this
@@ -773,12 +773,12 @@ fn spawn_glyph_cache_thread() -> (Sender
                     // that text runs get associated with by the texture cache allocator.
                     rasterized_glyphs.sort_by(|a, b| {
                         a.key.cmp(&b.key)
                     });
                     result_tx.send(GlyphCacheResultMsg::EndFrame(cache, rasterized_glyphs)).unwrap();
                 }
             }
         }
-    });
+    }).unwrap();
 
     (msg_tx, result_rx)
 }
--- a/gfx/webrender/src/scene.rs
+++ b/gfx/webrender/src/scene.rs
@@ -2,18 +2,17 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use fnv::FnvHasher;
 use std::collections::HashMap;
 use std::hash::BuildHasherDefault;
 use tiling::AuxiliaryListsMap;
 use webrender_traits::{AuxiliaryLists, BuiltDisplayList, PipelineId, Epoch, ColorF};
-use webrender_traits::{DisplayItem, SpecificDisplayItem, StackingContext};
-use webrender_traits::LayerSize;
+use webrender_traits::{DisplayItem, LayerSize};
 
 /// A representation of the layout within the display port for a given document or iframe.
 #[derive(Debug)]
 pub struct ScenePipeline {
     pub pipeline_id: PipelineId,
     pub epoch: Epoch,
     pub viewport_size: LayerSize,
     pub background_color: Option<ColorF>,
--- a/gfx/webrender/src/texture_cache.rs
+++ b/gfx/webrender/src/texture_cache.rs
@@ -77,24 +77,24 @@ fn copy_pixels(src: &[u8],
 /// Dimensional Rectangle Bin Packing":
 ///
 ///    http://clb.demon.fi/files/RectangleBinPack.pdf
 ///
 /// This approach was chosen because of its simplicity, good performance, and easy support for
 /// dynamic texture deallocation.
 pub struct TexturePage {
     texture_id: CacheTextureId,
-    texture_size: u32,
+    texture_size: DeviceUintSize,
     free_list: FreeRectList,
     allocations: u32,
     dirty: bool,
 }
 
 impl TexturePage {
-    pub fn new(texture_id: CacheTextureId, texture_size: u32) -> TexturePage {
+    pub fn new(texture_id: CacheTextureId, texture_size: DeviceUintSize) -> TexturePage {
         let mut page = TexturePage {
             texture_id: texture_id,
             texture_size: texture_size,
             free_list: FreeRectList::new(),
             allocations: 0,
             dirty: false,
         };
         page.clear();
@@ -305,45 +305,59 @@ impl TexturePage {
         self.free_list = FreeRectList::from_slice(&free_list[..]);
         self.dirty = changed
     }
 
     pub fn clear(&mut self) {
         self.free_list = FreeRectList::new();
         self.free_list.push(&DeviceUintRect::new(
             DeviceUintPoint::zero(),
-            DeviceUintSize::new(self.texture_size, self.texture_size)));
+            self.texture_size));
         self.allocations = 0;
         self.dirty = false;
     }
 
     fn free(&mut self, rect: &DeviceUintRect) {
         debug_assert!(self.allocations > 0);
         self.allocations -= 1;
         if self.allocations == 0 {
             self.clear();
             return
         }
 
         self.free_list.push(rect);
         self.dirty = true
     }
 
-    fn grow(&mut self, new_texture_size: u32) {
-        self.free_list.push(&DeviceUintRect::new(
-            DeviceUintPoint::new(self.texture_size, 0),
-            DeviceUintSize::new(new_texture_size - self.texture_size, new_texture_size)));
-        self.free_list.push(&DeviceUintRect::new(
-            DeviceUintPoint::new(0, self.texture_size),
-            DeviceUintSize::new(self.texture_size, new_texture_size - self.texture_size)));
+    fn grow(&mut self, new_texture_size: DeviceUintSize) {
+        assert!(new_texture_size.width >= self.texture_size.width);
+        assert!(new_texture_size.height >= self.texture_size.height);
+
+        let new_rects = [
+            DeviceUintRect::new(DeviceUintPoint::new(self.texture_size.width, 0),
+                                DeviceUintSize::new(new_texture_size.width - self.texture_size.width,
+                                                    new_texture_size.height)),
+
+            DeviceUintRect::new(DeviceUintPoint::new(0, self.texture_size.height),
+                                DeviceUintSize::new(self.texture_size.width,
+                                                    new_texture_size.height - self.texture_size.height)),
+        ];
+
+        for rect in &new_rects {
+            if rect.size.width > 0 && rect.size.height > 0 {
+                self.free_list.push(rect);
+            }
+        }
+
         self.texture_size = new_texture_size
     }
 
     fn can_grow(&self) -> bool {
-        self.texture_size < max_texture_size()
+        self.texture_size.width < max_texture_size() ||
+        self.texture_size.height < max_texture_size()
     }
 }
 
 /// A binning free list. Binning is important to avoid sifting through lots of small strips when
 /// allocating many texture items.
 struct FreeRectList {
     small: Vec<DeviceUintRect>,
     medium: Vec<DeviceUintRect>,
@@ -661,39 +675,40 @@ impl TextureCache {
                 let allocated_rect = DeviceUintRect::new(location, allocation_size);
                 let requested_rect = DeviceUintRect::new(
                     DeviceUintPoint::new(location.x + border_size, location.y + border_size),
                     requested_size);
 
                 let cache_item = TextureCacheItem::new(page.texture_id,
                                                        allocated_rect,
                                                        requested_rect,
-                                                       &DeviceUintSize::new(page.texture_size, page.texture_size));
+                                                       &page.texture_size);
                 *self.items.get_mut(image_id) = cache_item;
 
                 return AllocationResult {
                     item: self.items.get(image_id).clone(),
                     kind: AllocationKind::TexturePage,
                 }
             }
 
             if !page_list.is_empty() && page_list.last().unwrap().can_grow() {
                 let last_page = page_list.last_mut().unwrap();
                 // Grow the texture.
-                let texture_size = cmp::min(last_page.texture_size * 2,
-                                            max_texture_size());
+                let new_width = cmp::min(last_page.texture_size.width * 2, max_texture_size());
+                let new_height = cmp::min(last_page.texture_size.height * 2, max_texture_size());
+                let texture_size = DeviceUintSize::new(new_width, new_height);
                 self.pending_updates.push(TextureUpdate {
                     id: last_page.texture_id,
                     op: texture_grow_op(texture_size, format, mode),
                 });
                 last_page.grow(texture_size);
 
                 self.items.for_each_item(|item| {
                     if item.texture_id == last_page.texture_id {
-                        item.texture_size = DeviceUintSize::new(texture_size, texture_size);
+                        item.texture_size = texture_size;
                     }
                 });
 
                 continue;
             }
 
             // We need a new page.
             let texture_size = initial_texture_size();
@@ -885,27 +900,27 @@ impl TextureCache {
                 }
             }
         }
 
         self.items.free(id)
     }
 }
 
-fn texture_create_op(texture_size: u32, format: ImageFormat, mode: RenderTargetMode)
+fn texture_create_op(texture_size: DeviceUintSize, format: ImageFormat, mode: RenderTargetMode)
                      -> TextureUpdateOp {
-    TextureUpdateOp::Create(texture_size, texture_size, format, TextureFilter::Linear, mode, None)
+    TextureUpdateOp::Create(texture_size.width, texture_size.height, format, TextureFilter::Linear, mode, None)
 }
 
-fn texture_grow_op(texture_size: u32,
+fn texture_grow_op(texture_size: DeviceUintSize,
                    format: ImageFormat,
                    mode: RenderTargetMode)
                    -> TextureUpdateOp {
-    TextureUpdateOp::Grow(texture_size,
-                          texture_size,
+    TextureUpdateOp::Grow(texture_size.width,
+                          texture_size.height,
                           format,
                           TextureFilter::Linear,
                           mode)
 }
 
 trait FitsInside {
     fn fits_inside(&self, other: &Self) -> bool;
 }
@@ -918,23 +933,24 @@ impl FitsInside for DeviceUintSize {
 
 /// FIXME(pcwalton): Would probably be more efficient as a bit vector.
 #[derive(Clone, Copy)]
 pub struct FreeTextureLevel {
     texture_id: CacheTextureId,
 }
 
 /// Returns the number of pixels on a side we start out with for our texture atlases.
-fn initial_texture_size() -> u32 {
+fn initial_texture_size() -> DeviceUintSize {
     let max_hardware_texture_size = *MAX_TEXTURE_SIZE as u32;
-    if max_hardware_texture_size * max_hardware_texture_size > INITIAL_TEXTURE_AREA {
+    let initial_size = if max_hardware_texture_size * max_hardware_texture_size > INITIAL_TEXTURE_AREA {
         INITIAL_TEXTURE_SIZE
     } else {
         max_hardware_texture_size
-    }
+    };
+    DeviceUintSize::new(initial_size, initial_size)
 }
 
 /// Returns the number of pixels on a side we're allowed to use for our texture atlases.
 fn max_texture_size() -> u32 {
     let max_hardware_texture_size = *MAX_TEXTURE_SIZE as u32;
     if max_hardware_texture_size * max_hardware_texture_size > MAX_RGBA_PIXELS_PER_TEXTURE {
         SQRT_MAX_RGBA_PIXELS_PER_TEXTURE
     } else {
--- a/gfx/webrender/src/tiling.rs
+++ b/gfx/webrender/src/tiling.rs
@@ -32,30 +32,26 @@ use std::usize;
 use texture_cache::TexturePage;
 use util::{self, rect_from_points, rect_from_points_f};
 use util::{TransformedRect, TransformedRectKind, subtract_rect, pack_as_float};
 use webrender_traits::{ColorF, FontKey, ImageKey, ImageRendering, MixBlendMode};
 use webrender_traits::{BorderDisplayItem, BorderSide, BorderStyle, YuvColorSpace};
 use webrender_traits::{AuxiliaryLists, ItemRange, BoxShadowClipMode, ClipRegion};
 use webrender_traits::{PipelineId, ScrollLayerId, WebGLContextId, FontRenderMode};
 use webrender_traits::{DeviceIntRect, DeviceIntPoint, DeviceIntSize, DeviceIntLength, device_length};
-use webrender_traits::{DeviceUintSize, DeviceUintPoint, DeviceSize};
+use webrender_traits::{DeviceUintSize, DeviceUintPoint};
 use webrender_traits::{LayerRect, LayerPoint, LayerSize};
 use webrender_traits::{LayerToScrollTransform, LayerToWorldTransform, WorldToLayerTransform};
 use webrender_traits::{WorldPoint4D, ScrollLayerPixel, as_scroll_parent_rect};
 
-// Removes the clip task dependencies and instead
-// draws all the clip instances that affect a primitive
-const CLIP_TASK_COLLAPSE: bool = true;
-
 // Special sentinel value recognized by the shader. It is considered to be
 // a dummy task that doesn't mask out anything.
 const OPAQUE_TASK_INDEX: RenderTaskIndex = RenderTaskIndex(i32::MAX as usize);
 
-const FLOATS_PER_RENDER_TASK_INFO: usize = 8;
+const FLOATS_PER_RENDER_TASK_INFO: usize = 12;
 
 pub type LayerMap = HashMap<ScrollLayerId,
                             Layer,
                             BuildHasherDefault<FnvHasher>>;
 pub type AuxiliaryListsMap = HashMap<PipelineId,
                                      AuxiliaryLists,
                                      BuildHasherDefault<FnvHasher>>;
 
@@ -68,18 +64,16 @@ trait AlphaBatchHelpers {
                          tile_rect: &DeviceIntRect,
                          transform: &LayerToWorldTransform,
                          device_pixel_ratio: f32) -> bool;
     fn add_prim_to_batch(&self,
                          prim_index: PrimitiveIndex,
                          batch: &mut PrimitiveBatch,
                          layer_index: StackingContextIndex,
                          task_index: RenderTaskIndex,
-                         tile_id: TileUniqueId,
-                         base_mask_task_index: RenderTaskIndex,
                          render_tasks: &RenderTaskCollection,
                          pass_index: RenderPassIndex,
                          z_sort_index: i32);
 }
 
 impl AlphaBatchHelpers for PrimitiveStore {
     fn get_batch_kind(&self, metadata: &PrimitiveMetadata) -> AlphaBatchKind {
         let batch_kind = match metadata.prim_kind {
@@ -186,32 +180,29 @@ impl AlphaBatchHelpers for PrimitiveStor
         }
     }
 
     fn add_prim_to_batch(&self,
                          prim_index: PrimitiveIndex,
                          batch: &mut PrimitiveBatch,
                          layer_index: StackingContextIndex,
                          task_index: RenderTaskIndex,
-                         tile_id: TileUniqueId,
-                         base_mask_task_index: RenderTaskIndex,
                          render_tasks: &RenderTaskCollection,
                          child_pass_index: RenderPassIndex,
                          z_sort_index: i32) {
         let metadata = self.get_metadata(prim_index);
         let layer_index = layer_index.0 as i32;
         let global_prim_id = prim_index.0 as i32;
         let prim_address = metadata.gpu_prim_index;
-        let clip_task_key = RenderTaskKey::CacheMask(MaskCacheKey::Primitive(prim_index), tile_id);
-        let clip_task_index = if metadata.clip_cache_info.is_some() &&
-                                 render_tasks.has_dynamic_task(&clip_task_key, child_pass_index) {
+        let clip_task_key = RenderTaskKey::CacheMask(MaskCacheKey::Primitive(prim_index));
+        let clip_task_index = if metadata.clip_task.is_some() {
             let cache_task_id = RenderTaskId::Dynamic(clip_task_key);
             render_tasks.get_task_index(&cache_task_id, child_pass_index)
         } else {
-            base_mask_task_index
+            OPAQUE_TASK_INDEX
         };
         let task_index = task_index.0 as i32;
         let clip_task_index = clip_task_index.0 as i32;
 
         match &mut batch.data {
             &mut PrimitiveBatchData::Blend(..) |
             &mut PrimitiveBatchData::Composite(..) => unreachable!(),
 
@@ -381,31 +372,28 @@ pub enum PrimitiveFlags {
 pub struct RenderTargetIndex(usize);
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
 struct RenderPassIndex(isize);
 
 #[derive(Debug, Copy, Clone, Eq, Hash, PartialEq)]
 pub struct RenderTaskIndex(usize);
 
-type TileUniqueId = usize;
-
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
 pub enum MaskCacheKey {
     Primitive(PrimitiveIndex),
-    Layer(StackingContextIndex),
 }
 
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
 pub enum RenderTaskKey {
     /// Draw this primitive to a cache target.
     CachePrimitive(PrimitiveCacheKey),
     /// Draw the tile alpha mask for a primitive.
-    CacheMask(MaskCacheKey, TileUniqueId),
+    CacheMask(MaskCacheKey),
     /// Apply a vertical blur pass of given radius for this primitive.
     VerticalBlur(i32, PrimitiveIndex),
     /// Apply a horizontal blur pass of given radius for this primitive.
     HorizontalBlur(i32, PrimitiveIndex),
 }
 
 #[derive(Debug, Copy, Clone)]
 pub enum RenderTaskId {
@@ -471,21 +459,16 @@ impl RenderTaskCollection {
     fn get_task_index(&self, id: &RenderTaskId, pass_index: RenderPassIndex) -> RenderTaskIndex {
         match id {
             &RenderTaskId::Static(index) => index,
             &RenderTaskId::Dynamic(key) => {
                 self.dynamic_tasks[&(key, pass_index)].index
             }
         }
     }
-
-    fn has_dynamic_task(&self, key: &RenderTaskKey, pass_index: RenderPassIndex) -> bool {
-        //TODO: remove clone
-        self.dynamic_tasks.contains_key(&(key.clone(), pass_index))
-    }
 }
 
 #[derive(Debug, Clone)]
 pub struct RenderTaskData {
     data: [f32; FLOATS_PER_RENDER_TASK_INFO],
 }
 
 impl RenderTaskData {
@@ -512,17 +495,16 @@ impl Default for PrimitiveGeometry {
         }
     }
 }
 
 struct AlphaBatchTask {
     task_id: RenderTaskId,
     opaque_items: Vec<AlphaRenderItem>,
     alpha_items: Vec<AlphaRenderItem>,
-    tile_id: TileUniqueId,
 }
 
 /// Encapsulates the logic of building batches for items that are blended.
 pub struct AlphaBatcher {
     pub alpha_batches: Vec<PrimitiveBatch>,
     pub opaque_batches: Vec<PrimitiveBatch>,
     tasks: Vec<AlphaBatchTask>,
 }
@@ -560,18 +542,17 @@ impl AlphaBatcher {
                     &AlphaRenderItem::Blend(..) => {
                         AlphaBatchKey::blend()
                     }
                     &AlphaRenderItem::Primitive(sc_index, prim_index, _) => {
                         // See if this task fits into the tile UBO
                         let layer = &ctx.layer_store[sc_index.0];
                         let prim_metadata = ctx.prim_store.get_metadata(prim_index);
                         let transform_kind = layer.xf_rect.as_ref().unwrap().kind;
-                        let needs_clipping = prim_metadata.clip_cache_info.is_some() ||
-                                             ctx.layer_masks_tasks.get(&(task.tile_id, sc_index)).is_some();
+                        let needs_clipping = prim_metadata.clip_task.is_some();
                         let needs_blending = transform_kind == TransformedRectKind::Complex ||
                                              !prim_metadata.is_opaque ||
                                              needs_clipping;
                         let blend_mode = ctx.prim_store.get_blend_mode(needs_blending, prim_metadata);
                         let needs_clipping_flag = if needs_clipping {
                             NEEDS_CLIPPING
                         } else {
                             AlphaBatchKeyFlags::empty()
@@ -629,44 +610,37 @@ impl AlphaBatcher {
                     &AlphaRenderItem::Blend(src_id, info, z) => {
                         let ok = batch.pack_blend(render_tasks.get_static_task_index(&src_id),
                                                   render_tasks.get_static_task_index(&task.task_id),
                                                   info,
                                                   z);
                         debug_assert!(ok)
                     }
                     &AlphaRenderItem::Primitive(sc_index, prim_index, z) => {
-                        let mask_task_index = match ctx.layer_masks_tasks.get(&(task.tile_id, sc_index)) {
-                            Some(ref mask_task_id) => render_tasks.get_task_index(mask_task_id, child_pass_index),
-                            None => OPAQUE_TASK_INDEX,
-                        };
                         ctx.prim_store.add_prim_to_batch(prim_index,
                                                          batch,
                                                          sc_index,
                                                          task_index,
-                                                         task.tile_id,
-                                                         mask_task_index,
                                                          render_tasks,
                                                          child_pass_index,
                                                          z);
                     }
                 }
             }
 
             for item in task.opaque_items.iter().rev() {
                 let batch_key = match item {
                     &AlphaRenderItem::Composite(..) => unreachable!(),
                     &AlphaRenderItem::Blend(..) => unreachable!(),
                     &AlphaRenderItem::Primitive(sc_index, prim_index, _) => {
                         // See if this task fits into the tile UBO
                         let layer = &ctx.layer_store[sc_index.0];
                         let prim_metadata = ctx.prim_store.get_metadata(prim_index);
                         let transform_kind = layer.xf_rect.as_ref().unwrap().kind;
-                        let needs_clipping = prim_metadata.clip_cache_info.is_some() ||
-                                             ctx.layer_masks_tasks.get(&(task.tile_id, sc_index)).is_some();
+                        let needs_clipping = prim_metadata.clip_task.is_some();
                         let needs_blending = transform_kind == TransformedRectKind::Complex ||
                                              !prim_metadata.is_opaque ||
                                              needs_clipping;
                         let blend_mode = ctx.prim_store.get_blend_mode(needs_blending, prim_metadata);
                         let needs_clipping_flag = if needs_clipping {
                             NEEDS_CLIPPING
                         } else {
                             AlphaBatchKeyFlags::empty()
@@ -707,92 +681,101 @@ impl AlphaBatcher {
                     opaque_batches.push(new_batch)
                 }
 
                 let batch = &mut opaque_batches[existing_opaque_batch_index];
                 match item {
                     &AlphaRenderItem::Composite(..) => unreachable!(),
                     &AlphaRenderItem::Blend(..) => unreachable!(),
                     &AlphaRenderItem::Primitive(sc_index, prim_index, z) => {
-                        let mask_task_index = match ctx.layer_masks_tasks.get(&(task.tile_id, sc_index)) {
-                            Some(ref mask_task_id) => render_tasks.get_task_index(mask_task_id, child_pass_index),
-                            None => OPAQUE_TASK_INDEX,
-                        };
                         ctx.prim_store.add_prim_to_batch(prim_index,
                                                          batch,
                                                          sc_index,
                                                          task_index,
-                                                         task.tile_id,
-                                                         mask_task_index,
                                                          render_tasks,
                                                          child_pass_index,
                                                          z);
                     }
                 }
             }
         }
 
         self.alpha_batches.extend(alpha_batches.into_iter());
         self.opaque_batches.extend(opaque_batches.into_iter());
     }
 }
 
 /// Batcher managing draw calls into the clip mask (in the RT cache).
 #[derive(Debug)]
 pub struct ClipBatcher {
-    /// Copy draws get the existing mask from a parent layer.
-    pub copies: Vec<CacheClipInstance>,
     /// Rectangle draws fill up the rectangles with rounded corners.
     pub rectangles: Vec<CacheClipInstance>,
     /// Image draws apply the image masking.
     pub images: HashMap<SourceTexture, Vec<CacheClipInstance>>,
 }
 
 impl ClipBatcher {
     fn new() -> ClipBatcher {
         ClipBatcher {
-            copies: Vec::new(),
             rectangles: Vec::new(),
             images: HashMap::new(),
         }
     }
 
     fn add(&mut self,
            task_index: RenderTaskIndex,
-           base_task_index: Option<RenderTaskIndex>,
            clips: &[(StackingContextIndex, MaskCacheInfo)],
-           resource_cache: &ResourceCache) {
-
-        // TODO: don't draw clipping instances covering the whole tile
+           resource_cache: &ResourceCache,
+           geometry_kind: MaskGeometryKind) {
+
         for &(layer_id, ref info) in clips.iter() {
             let instance = CacheClipInstance {
                 task_id: task_index.0 as i32,
                 layer_index: layer_id.0 as i32,
                 address: GpuStoreAddress(0),
-                base_task_id: 0,
+                segment: 0,
             };
-            // copy on the first clip only
-            if info as *const _ == &clips[0].1 as *const _ {
-                if let Some(layer_task_id) = base_task_index {
-                    self.copies.push(CacheClipInstance {
-                        base_task_id: layer_task_id.0 as i32,
-                        ..instance
-                    });
+
+            for clip_index in 0..info.clip_range.item_count as usize {
+                let offset = info.clip_range.start.0 + ((CLIP_DATA_GPU_SIZE * clip_index) as i32);
+                match geometry_kind {
+                    MaskGeometryKind::Default => {
+                        self.rectangles.push(CacheClipInstance {
+                            address: GpuStoreAddress(offset),
+                            segment: MaskSegment::All as i32,
+                            ..instance
+                        });
+                    }
+                    MaskGeometryKind::CornersOnly => {
+                        self.rectangles.extend(&[
+                            CacheClipInstance {
+                                address: GpuStoreAddress(offset),
+                                segment: MaskSegment::Corner_TopLeft as i32,
+                                ..instance
+                            },
+                            CacheClipInstance {
+                                address: GpuStoreAddress(offset),
+                                segment: MaskSegment::Corner_TopRight as i32,
+                                ..instance
+                            },
+                            CacheClipInstance {
+                                address: GpuStoreAddress(offset),
+                                segment: MaskSegment::Corner_BottomLeft as i32,
+                                ..instance
+                            },
+                            CacheClipInstance {
+                                address: GpuStoreAddress(offset),
+                                segment: MaskSegment::Corner_BottomRight as i32,
+                                ..instance
+                            },
+                        ]);
+                    }
                 }
             }
 
-            self.rectangles.extend((0 .. info.clip_range.item_count as usize)
-                           .map(|region_id| {
-                let offset = info.clip_range.start.0 + ((CLIP_DATA_GPU_SIZE * region_id) as i32);
-                CacheClipInstance {
-                    address: GpuStoreAddress(offset),
-                    ..instance
-                }
-            }));
-
             if let Some((ref mask, address)) = info.image {
                 let cache_item = resource_cache.get_cached_image(mask.image, ImageRendering::Auto);
                 self.images.entry(cache_item.texture_id)
                            .or_insert(Vec::new())
                            .push(CacheClipInstance {
                     address: address,
                     ..instance
                 })
@@ -800,25 +783,23 @@ impl ClipBatcher {
         }
     }
 }
 
 
 struct CompileTileContext<'a> {
     layer_store: &'a [StackingContext],
     prim_store: &'a PrimitiveStore,
-    tile_id: TileUniqueId,
     render_task_id_counter: AtomicUsize,
 }
 
 struct RenderTargetContext<'a> {
     layer_store: &'a [StackingContext],
     prim_store: &'a PrimitiveStore,
     resource_cache: &'a ResourceCache,
-    layer_masks_tasks: HashMap<(TileUniqueId, StackingContextIndex), RenderTaskId>,
 }
 
 /// A render target represents a number of rendering operations on a surface.
 pub struct RenderTarget {
     pub alpha_batcher: AlphaBatcher,
     pub clip_batcher: ClipBatcher,
     pub box_shadow_cache_prims: Vec<PrimitiveInstance>,
     // List of text runs to be cached to this render target.
@@ -833,27 +814,26 @@ pub struct RenderTarget {
     pub text_run_textures: BatchTextures,
     // List of blur operations to apply for this render target.
     pub vertical_blurs: Vec<BlurCommand>,
     pub horizontal_blurs: Vec<BlurCommand>,
     page_allocator: TexturePage,
 }
 
 impl RenderTarget {
-    fn new() -> RenderTarget {
+    fn new(size: DeviceUintSize) -> RenderTarget {
         RenderTarget {
             alpha_batcher: AlphaBatcher::new(),
             clip_batcher: ClipBatcher::new(),
             box_shadow_cache_prims: Vec::new(),
             text_run_cache_prims: Vec::new(),
             text_run_textures: BatchTextures::no_texture(),
             vertical_blurs: Vec::new(),
             horizontal_blurs: Vec::new(),
-            page_allocator: TexturePage::new(CacheTextureId(0),
-                                             RENDERABLE_CACHE_SIZE as u32),
+            page_allocator: TexturePage::new(CacheTextureId(0), size),
         }
     }
 
     fn build(&mut self,
              ctx: &RenderTargetContext,
              render_tasks: &mut RenderTaskCollection,
              child_pass_index: RenderPassIndex) {
         self.alpha_batcher.build(ctx,
@@ -867,17 +847,16 @@ impl RenderTarget {
                 render_tasks: &RenderTaskCollection,
                 pass_index: RenderPassIndex) {
         match task.kind {
             RenderTaskKind::Alpha(info) => {
                 self.alpha_batcher.add_task(AlphaBatchTask {
                     task_id: task.id,
                     opaque_items: info.opaque_items,
                     alpha_items: info.alpha_items,
-                    tile_id: info.tile_id,
                 });
             }
             RenderTaskKind::VerticalBlur(_, prim_index) => {
                 // Find the child render task that we are applying
                 // a vertical blur on.
                 // TODO(gw): Consider a simpler way for render tasks to find
                 //           their child tasks than having to construct the
                 //           correct id here.
@@ -952,76 +931,81 @@ impl RenderTarget {
                     _ => {
                         // No other primitives make use of primitive caching yet!
                         unreachable!()
                     }
                 }
             }
             RenderTaskKind::CacheMask(ref task_info) => {
                 let task_index = render_tasks.get_task_index(&task.id, pass_index);
-                let base_task_id = task_info.base_task_id.map(|ref task_id|
-                    render_tasks.get_task_index(task_id, pass_index)
-                );
-                self.clip_batcher.add(task_index, base_task_id,
+                self.clip_batcher.add(task_index,
                                       &task_info.clips,
-                                      &ctx.resource_cache);
+                                      &ctx.resource_cache,
+                                      task_info.geometry_kind);
             }
         }
     }
 }
 
 /// A render pass represents a set of rendering operations that don't depend on one
 /// another.
 ///
 /// A render pass can have several render targets if there wasn't enough space in one
 /// target to do all of the rendering for that pass.
 pub struct RenderPass {
     pass_index: RenderPassIndex,
     pub is_framebuffer: bool,
     tasks: Vec<RenderTask>,
     pub targets: Vec<RenderTarget>,
+    size: DeviceUintSize,
 }
 
 impl RenderPass {
-    fn new(pass_index: isize, is_framebuffer: bool) -> RenderPass {
+    fn new(pass_index: isize,
+           is_framebuffer: bool,
+           size: DeviceUintSize) -> RenderPass {
         RenderPass {
             pass_index: RenderPassIndex(pass_index),
             is_framebuffer: is_framebuffer,
-            targets: vec![ RenderTarget::new() ],
+            targets: vec![ RenderTarget::new(size) ],
             tasks: vec![],
+            size: size,
         }
     }
 
     fn add_render_task(&mut self, task: RenderTask) {
         self.tasks.push(task);
     }
 
-    fn allocate_target(targets: &mut Vec<RenderTarget>, size: DeviceUintSize) -> DeviceUintPoint {
-        let existing_origin = targets.last_mut()
-                                     .unwrap()
-                                     .page_allocator.allocate(&size);
+    fn allocate_target(&mut self, alloc_size: DeviceUintSize) -> DeviceUintPoint {
+        let existing_origin = self.targets
+                                  .last_mut()
+                                  .unwrap()
+                                  .page_allocator
+                                  .allocate(&alloc_size);
         match existing_origin {
             Some(origin) => origin,
             None => {
-                let mut new_target = RenderTarget::new();
+                let mut new_target = RenderTarget::new(self.size);
                 let origin = new_target.page_allocator
-                                       .allocate(&size)
-                                       .expect("Each render task must allocate <= size of one target!");
-                targets.push(new_target);
+                                       .allocate(&alloc_size)
+                                       .expect(&format!("Each render task must allocate <= size of one target! ({:?})", alloc_size));
+                self.targets.push(new_target);
                 origin
             }
         }
     }
 
 
     fn build(&mut self,
              ctx: &RenderTargetContext,
              render_tasks: &mut RenderTaskCollection) {
         // Step through each task, adding to batches as appropriate.
-        for mut task in self.tasks.drain(..) {
+        let tasks = mem::replace(&mut self.tasks, Vec::new());
+        for mut task in tasks {
             // Find a target to assign this task to, or create a new
             // one if required.
             match task.location {
                 RenderTaskLocation::Fixed(..) => {}
                 RenderTaskLocation::Dynamic(ref mut origin, ref size) => {
                     // See if this task is a duplicate from another tile.
                     // If so, just skip adding it!
                     match task.id {
@@ -1034,17 +1018,17 @@ impl RenderPass {
                             if let Some(rect) = render_tasks.get_dynamic_allocation(self.pass_index, key) {
                                 debug_assert_eq!(rect.size, *size);
                                 continue;
                             }
                         }
                     }
 
                     let alloc_size = DeviceUintSize::new(size.width as u32, size.height as u32);
-                    let alloc_origin = Self::allocate_target(&mut self.targets, alloc_size);
+                    let alloc_origin = self.allocate_target(alloc_size);
 
                     *origin = Some((DeviceIntPoint::new(alloc_origin.x as i32,
                                                      alloc_origin.y as i32),
                                     RenderTargetIndex(self.targets.len() - 1)));
                 }
             }
 
             render_tasks.add(&task, self.pass_index);
@@ -1074,32 +1058,50 @@ enum AlphaRenderItem {
     Composite(RenderTaskId, RenderTaskId, MixBlendMode, i32),
 }
 
 #[derive(Debug, Clone)]
 pub struct AlphaRenderTask {
     actual_rect: DeviceIntRect,
     opaque_items: Vec<AlphaRenderItem>,
     alpha_items: Vec<AlphaRenderItem>,
-    tile_id: TileUniqueId,
+}
+
+#[derive(Debug, Copy, Clone)]
+#[repr(C)]
+enum MaskSegment {
+    // This must match the SEGMENT_ values
+    // in clip_shared.glsl!
+    All = 0,
+    Corner_TopLeft,
+    Corner_TopRight,
+    Corner_BottomLeft,
+    Corner_BottomRight,
+}
+
+#[derive(Debug, Copy, Clone)]
+#[repr(C)]
+enum MaskGeometryKind {
+    Default,        // Draw the entire rect
+    CornersOnly,    // Draw the corners (simple axis aligned mask)
+    // TODO(gw): Add more types here (e.g. 4 rectangles outside the inner rect)
 }
 
 #[derive(Debug, Clone)]
 pub struct CacheMaskTask {
     actual_rect: DeviceIntRect,
-    base_task_id: Option<RenderTaskId>,
+    inner_rect: DeviceIntRect,
     clips: Vec<(StackingContextIndex, MaskCacheInfo)>,
+    geometry_kind: MaskGeometryKind,
 }
 
 #[derive(Debug)]
 enum MaskResult {
     /// The mask is completely outside the region
     Outside,
-    /// The mask completely covers the region
-    Covering,
     /// The mask is inside and needs to be processed
     Inside(RenderTask),
 }
 
 #[derive(Debug, Clone)]
 pub enum RenderTaskKind {
     Alpha(AlphaRenderTask),
     CachePrimitive(PrimitiveIndex),
@@ -1126,73 +1128,86 @@ impl RenderTask {
         RenderTask {
             id: RenderTaskId::Static(RenderTaskIndex(task_index)),
             children: Vec::new(),
             location: RenderTaskLocation::Dynamic(None, actual_rect.size),
             kind: RenderTaskKind::Alpha(AlphaRenderTask {
                 actual_rect: actual_rect,
                 alpha_items: Vec::new(),
                 opaque_items: Vec::new(),
-                tile_id: ctx.tile_id,
             }),
         }
     }
 
     pub fn new_prim_cache(key: PrimitiveCacheKey,
                           size: DeviceIntSize,
                           prim_index: PrimitiveIndex) -> RenderTask {
         RenderTask {
             id: RenderTaskId::Dynamic(RenderTaskKey::CachePrimitive(key)),
             children: Vec::new(),
             location: RenderTaskLocation::Dynamic(None, size),
             kind: RenderTaskKind::CachePrimitive(prim_index),
         }
     }
 
     fn new_mask(actual_rect: DeviceIntRect,
-                dependent: Option<&RenderTask>,
                 mask_key: MaskCacheKey,
-                top_clip: (StackingContextIndex, &MaskCacheInfo),
-                layer_clips: &[(StackingContextIndex, MaskCacheInfo)],
-                tile_id: TileUniqueId)
+                clips: &[(StackingContextIndex, MaskCacheInfo)],
+                layers: &[StackingContext])
                 -> MaskResult {
-
-        let extra = (top_clip.0, top_clip.1.clone());
+        if clips.is_empty() {
+            return MaskResult::Outside;
+        }
 
         // We scan through the clip stack and detect if our actual rectangle
         // is in the intersection of all of all the outer bounds,
         // and if it's completely inside the intersection of all of the inner bounds.
-        let result = layer_clips.iter().chain(Some(&extra))
-                                .fold(Some((actual_rect, true)), |current, clip| {
-            current.and_then(|(rect, covering)|
-                rect.intersection(&clip.1.outer_rect)
-                    .map(|r| (r, covering & clip.1.inner_rect.contains_rect(&actual_rect))))
+        let result = clips.iter()
+                          .fold(Some(actual_rect), |current, clip| {
+            current.and_then(|rect| rect.intersection(&clip.1.outer_rect))
         });
 
         let task_rect = match result {
             None => return MaskResult::Outside,
-            Some((_, true)) => return MaskResult::Covering,
-            Some((rect, false)) => rect,
+            Some(rect) => rect,
         };
-        let clips = layer_clips.iter()
-                               .map(|lc| lc.clone())
-                               .chain(Some(extra))
-                               .collect();
+
+        let inner_rect = clips.iter()
+                              .fold(Some(task_rect), |current, clip| {
+            current.and_then(|rect| rect.intersection(&clip.1.inner_rect))
+        });
+
+        // TODO(gw): This optimization is very conservative for now.
+        //           For now, only draw optimized geometry if it is
+        //           a single aligned rect mask with rounded corners.
+        //           In the future, we'll expand this to handle the
+        //           more complex types of clip mask geometry.
+        let mut geometry_kind = MaskGeometryKind::Default;
+
+        if inner_rect.is_some() && clips.len() == 1 {
+            let (sc_index, ref clip_info) = clips[0];
+
+            if clip_info.image.is_none() &&
+               clip_info.clip_range.item_count == 1 &&
+               layers[sc_index.0].xf_rect.as_ref().unwrap().kind == TransformedRectKind::AxisAligned {
+                geometry_kind = MaskGeometryKind::CornersOnly;
+            }
+        }
+
+        let inner_rect = inner_rect.unwrap_or(DeviceIntRect::zero());
 
         MaskResult::Inside(RenderTask {
-            id: RenderTaskId::Dynamic(RenderTaskKey::CacheMask(mask_key, tile_id)),
-            children: match dependent {
-                Some(task) => vec![task.clone()],
-                None => Vec::new(),
-            },
+            id: RenderTaskId::Dynamic(RenderTaskKey::CacheMask(mask_key)),
+            children: Vec::new(),
             location: RenderTaskLocation::Dynamic(None, task_rect.size),
             kind: RenderTaskKind::CacheMask(CacheMaskTask {
                 actual_rect: task_rect,
-                base_task_id: dependent.map(|task| task.id),
-                clips: clips,
+                inner_rect: inner_rect,
+                clips: clips.to_vec(),
+                geometry_kind: geometry_kind,
             }),
         })
     }
 
     // Construct a render task to apply a blur to a primitive. For now,
     // this is only used for text runs, but we can probably extend this
     // to handle general blurs to any render task in the future.
     // The render task chain that is constructed looks like:
@@ -1258,60 +1273,76 @@ impl RenderTask {
                         task.actual_rect.origin.x as f32,
                         task.actual_rect.origin.y as f32,
                         target_rect.origin.x as f32,
                         target_rect.origin.y as f32,
                         task.actual_rect.size.width as f32,
                         task.actual_rect.size.height as f32,
                         target_index.0 as f32,
                         0.0,
+                        0.0,
+                        0.0,
+                        0.0,
+                        0.0,
                     ],
                 }
             }
             RenderTaskKind::CachePrimitive(..) => {
                 RenderTaskData {
                     data: [
                         target_rect.origin.x as f32,
                         target_rect.origin.y as f32,
                         target_rect.size.width as f32,
                         target_rect.size.height as f32,
                         target_index.0 as f32,
                         0.0,
                         0.0,
                         0.0,
+                        0.0,
+                        0.0,
+                        0.0,
+                        0.0,
                     ],
                 }
             }
             RenderTaskKind::CacheMask(ref task) => {
                 debug_assert_eq!(target_rect.size, task.actual_rect.size);
                 RenderTaskData {
                     data: [
                         target_rect.origin.x as f32,
                         target_rect.origin.y as f32,
                         (target_rect.origin.x + target_rect.size.width) as f32,
                         (target_rect.origin.y + target_rect.size.height) as f32,
                         task.actual_rect.origin.x as f32,
                         task.actual_rect.origin.y as f32,
                         target_index.0 as f32,
                         0.0,
+                        task.inner_rect.origin.x as f32,
+                        task.inner_rect.origin.y as f32,
+                        (task.inner_rect.origin.x + task.inner_rect.size.width) as f32,
+                        (task.inner_rect.origin.y + task.inner_rect.size.height) as f32,
                     ],
                 }
             }
             RenderTaskKind::VerticalBlur(blur_radius, _) |
             RenderTaskKind::HorizontalBlur(blur_radius, _) => {
                 RenderTaskData {
                     data: [
                         target_rect.origin.x as f32,
                         target_rect.origin.y as f32,
                         target_rect.size.width as f32,
                         target_rect.size.height as f32,
                         target_index.0 as f32,
                         blur_radius.0 as f32,
                         0.0,
                         0.0,
+                        0.0,
+                        0.0,
+                        0.0,
+                        0.0,
                     ]
                 }
             }
         }
     }
 
     fn get_target_rect(&self) -> (DeviceIntRect, RenderTargetIndex) {
         match self.location {
@@ -1352,17 +1383,16 @@ impl RenderTask {
         *max_depth = cmp::max(*max_depth, depth);
         for child in &self.children {
             child.max_depth(depth, max_depth);
         }
     }
 }
 
 pub const SCREEN_TILE_SIZE: i32 = 256;
-pub const RENDERABLE_CACHE_SIZE: i32 = 2048;
 
 #[derive(Debug, Clone)]
 pub struct DebugRect {
     pub label: String,
     pub color: ColorF,
     pub rect: DeviceIntRect,
 }
 
@@ -1477,17 +1507,17 @@ pub struct BlurCommand {
 /// A clipping primitive drawn into the clipping mask.
 /// Could be an image or a rectangle, which defines the
 /// way `address` is treated.
 #[derive(Clone, Copy, Debug)]
 pub struct CacheClipInstance {
     task_id: i32,
     layer_index: i32,
     address: GpuStoreAddress,
-    base_task_id: i32,
+    segment: i32,
 }
 
 #[derive(Debug, Clone)]
 pub struct PrimitiveInstance {
     global_prim_id: i32,
     prim_address: GpuStoreAddress,
     task_index: i32,
     clip_task_index: i32,
@@ -1771,17 +1801,17 @@ pub struct FrameBuilder {
 
 /// A rendering-oriented representation of frame::Frame built by the render backend
 /// and presented to the renderer.
 pub struct Frame {
     pub viewport_size: LayerSize,
     pub background_color: Option<ColorF>,
     pub device_pixel_ratio: f32,
     pub debug_rects: Vec<DebugRect>,
-    pub cache_size: DeviceSize,
+    pub cache_size: DeviceUintSize,
     pub passes: Vec<RenderPass>,
     pub profile_counters: FrameProfileCounters,
 
     pub layer_texture_data: Vec<PackedStackingContext>,
     pub render_task_data: Vec<RenderTaskData>,
     pub gpu_data16: Vec<GpuBlock16>,
     pub gpu_data32: Vec<GpuBlock32>,
     pub gpu_data64: Vec<GpuBlock64>,
@@ -1791,68 +1821,41 @@ pub struct Frame {
 
     // List of textures that we don't know about yet
     // from the backend thread. The render thread
     // will use a callback to resolve these and
     // patch the data structures.
     pub deferred_resolves: Vec<DeferredResolve>,
 }
 
-#[derive(Debug)]
-struct LayerMasksTasks {
-    task_ids: Vec<Option<RenderTaskId>>,
-}
-
-impl LayerMasksTasks {
-    fn new() -> LayerMasksTasks {
-        LayerMasksTasks {
-            task_ids: Vec::new(),
-        }
-    }
-
-    fn add(&mut self, index: StackingContextIndex, task_id: RenderTaskId) {
-        while self.task_ids.len() <= index.0 {
-            self.task_ids.push(None);
-        }
-        assert!(self.task_ids[index.0].is_none());
-        self.task_ids[index.0] = Some(task_id);
-    }
-}
-
 /// Some extra per-tile information stored for debugging purposes.
 #[derive(Debug)]
 struct CompiledScreenTileInfo {
     cmd_count: usize,
     prim_count: usize,
 }
 
 #[derive(Debug)]
 struct CompiledScreenTile {
     main_render_task: RenderTask,
     required_pass_count: usize,
     info: CompiledScreenTileInfo,
-    unique_id: TileUniqueId,
-    layer_masks_tasks: LayerMasksTasks,
 }
 
 impl CompiledScreenTile {
     fn new(main_render_task: RenderTask,
-           info: CompiledScreenTileInfo,
-           unique_id: TileUniqueId,
-           layer_masks_tasks: LayerMasksTasks)
+           info: CompiledScreenTileInfo)
            -> CompiledScreenTile {
         let mut required_pass_count = 0;
         main_render_task.max_depth(0, &mut required_pass_count);
 
         CompiledScreenTile {
             main_render_task: main_render_task,
             required_pass_count: required_pass_count,
             info: info,
-            unique_id: unique_id,
-            layer_masks_tasks: layer_masks_tasks,
         }
     }
 
     fn build(self, passes: &mut Vec<RenderPass>) {
         self.main_render_task.assign_to_passes(passes.len() - 1,
                                                passes);
     }
 }
@@ -1909,20 +1912,16 @@ impl ScreenTile {
 
         let cmd_count = self.cmds.len();
         let mut actual_prim_count = 0;
         let mut next_z = 0;
 
         let mut sc_stack = Vec::new();
         let mut current_task = RenderTask::new_alpha_batch(self.rect, ctx);
         let mut alpha_task_stack = Vec::new();
-        let mut clip_info_stack = Vec::new();
-        let mut clip_task_stack = Vec::new();
-        let mut num_clips_to_skip = 0;
-        let mut layer_masks_tasks = LayerMasksTasks::new();
 
         for cmd in self.cmds {
             match cmd {
                 TileCommand::PushLayer(sc_index) => {
                     sc_stack.push(sc_index);
 
                     let layer = &ctx.layer_store[sc_index.0];
                     match layer.composite_kind {
@@ -1931,43 +1930,16 @@ impl ScreenTile {
                             let layer_rect = layer.xf_rect.as_ref().unwrap().bounding_rect;
                             let needed_rect = layer_rect.intersection(&self.rect)
                                                         .expect("bug if these don't overlap");
                             let prev_task = mem::replace(&mut current_task,
                                                          RenderTask::new_alpha_batch(needed_rect, ctx));
                             alpha_task_stack.push(prev_task);
                         }
                     }
-
-                    // Create a task for the layer mask, if needed,
-                    // i.e. if there are rounded corners or image masks for the layer.
-                    if let Some(ref clip_info) = layer.clip_cache_info {
-                        if CLIP_TASK_COLLAPSE {
-                            clip_info_stack.push((sc_index, clip_info.clone()));
-                        } else {
-                            let mask_opt = RenderTask::new_mask(self.rect,
-                                                                clip_task_stack.last(),
-                                                                MaskCacheKey::Layer(sc_index),
-                                                                (sc_index, clip_info),
-                                                                &clip_info_stack,
-                                                                ctx.tile_id);
-                            match mask_opt {
-                                MaskResult::Inside(mask_task) => {
-                                    current_task.children.push(mask_task.clone());
-                                    clip_task_stack.push(mask_task);
-                                    num_clips_to_skip = 0;
-                                }
-                                _ => num_clips_to_skip += 1,
-                            }
-                        }
-                    }
-                    // Register the layer mask task within the context
-                    if let Some(ref mask_task) = clip_task_stack.last() {
-                        layer_masks_tasks.add(sc_index, mask_task.id);
-                    }
                 }
                 TileCommand::PopLayer => {
                     let sc_index = sc_stack.pop().unwrap();
 
                     let layer = &ctx.layer_store[sc_index.0];
                     match layer.composite_kind {
                         CompositeKind::None => {}
                         CompositeKind::Simple(info) => {
@@ -1991,83 +1963,58 @@ impl ScreenTile {
                             composite_task.as_alpha_batch().alpha_items.push(item);
 
                             composite_task.children.push(backdrop);
                             composite_task.children.push(current_task);
 
                             current_task = composite_task;
                         }
                     }
-
-                    if layer.clip_cache_info.is_some() {
-                        if CLIP_TASK_COLLAPSE {
-                            clip_info_stack.pop().unwrap();
-                        } else {
-                            if num_clips_to_skip > 0 {
-                                num_clips_to_skip -= 1;
-                            } else {
-                                clip_task_stack.pop().unwrap();
-                            }
-                        }
-                    }
                 }
                 TileCommand::DrawPrimitive(prim_index) => {
                     let sc_index = *sc_stack.last().unwrap();
                     let prim_metadata = ctx.prim_store.get_metadata(prim_index);
 
-                    // Add a task to render the updated image mask
-                    if let Some(ref clip_info) = prim_metadata.clip_cache_info {
-                        let mask_opt = RenderTask::new_mask(self.rect,
-                                                            clip_task_stack.last(),
-                                                            MaskCacheKey::Primitive(prim_index),
-                                                            (sc_index, clip_info),
-                                                            &clip_info_stack,
-                                                            ctx.tile_id);
-                        match mask_opt {
-                            MaskResult::Outside => panic!("Primitive be culled by `assign_prims_to_screen_tiles` already"),
-                            MaskResult::Covering => (), //do nothing
-                            MaskResult::Inside(task) => current_task.children.push(task),
-                        }
-                    }
-
                     // Add any dynamic render tasks needed to render this primitive
                     if let Some(ref render_task) = prim_metadata.render_task {
                         current_task.children.push(render_task.clone());
                     }
+                    if let Some(ref clip_task) = prim_metadata.clip_task {
+                        current_task.children.push(clip_task.clone());
+                    }
 
                     actual_prim_count += 1;
 
                     let layer = &ctx.layer_store[sc_index.0];
                     let transform_kind = layer.xf_rect.as_ref().unwrap().kind;
-                    let needs_clipping = layer.clip_cache_info.is_some() || prim_metadata.clip_cache_info.is_some();
+                    let needs_clipping = prim_metadata.clip_task.is_some();
                     let needs_blending = transform_kind == TransformedRectKind::Complex ||
                                          !prim_metadata.is_opaque ||
                                          needs_clipping;
 
                     let items = if needs_blending {
                         &mut current_task.as_alpha_batch().alpha_items
                     } else {
                         &mut current_task.as_alpha_batch().opaque_items
                     };
                     items.push(AlphaRenderItem::Primitive(sc_index, prim_index, next_z));
                     next_z += 1;
                 }
             }
         }
 
         debug_assert!(alpha_task_stack.is_empty());
-        debug_assert!(clip_task_stack.is_empty());
 
         let info = CompiledScreenTileInfo {
             cmd_count: cmd_count,
             prim_count: actual_prim_count,
         };
 
         current_task.location = RenderTaskLocation::Fixed(self.rect);
-        Some(CompiledScreenTile::new(current_task, info, ctx.tile_id, layer_masks_tasks))
+        Some(CompiledScreenTile::new(current_task, info))
     }
 }
 
 impl FrameBuilder {
     pub fn new(viewport_size: LayerSize,
                background_color: Option<ColorF>,
                debug: bool,
                config: FrameBuilderConfig) -> FrameBuilder {
@@ -2538,16 +2485,17 @@ impl FrameBuilder {
                    resource_cache: &mut ResourceCache,
                    profile_counters: &mut FrameProfileCounters,
                    device_pixel_ratio: f32) {
         // Build layer screen rects.
         // TODO(gw): This can be done earlier once update_layer_transforms() is fixed.
 
         // TODO(gw): Remove this stack once the layers refactor is done!
         let mut layer_stack: Vec<StackingContextIndex> = Vec::new();
+        let mut clip_info_stack = Vec::new();
 
         for cmd in &self.cmds {
             match cmd {
                 &PrimitiveRunCmd::PushStackingContext(sc_index) => {
                     layer_stack.push(sc_index);
                     let layer = &mut self.layer_store[sc_index.0];
                     let packed_layer = &mut self.packed_layers[sc_index.0];
 
@@ -2615,54 +2563,105 @@ impl FrameBuilder {
                                          &packed_layer.transform,
                                          &mut self.prim_store.gpu_data32,
                                          device_pixel_ratio,
                                          auxiliary_lists);
                         if let ClipSource::Region(ClipRegion{ image_mask: Some(ref mask), .. }) = layer.clip_source {
                             resource_cache.request_image(mask.image, ImageRendering::Auto);
                             //Note: no need to add the layer for resolve, all layers get resolved
                         }
+
+                        // Create a task for the layer mask, if needed,
+                        // i.e. if there are rounded corners or image masks for the layer.
+                        clip_info_stack.push((sc_index, clip_info.clone()));
                     }
 
                 }
                 &PrimitiveRunCmd::PrimitiveRun(prim_index, prim_count) => {
                     let sc_index = layer_stack.last().unwrap();
-                    let layer = &mut self.layer_store[sc_index.0];
+                    let layer = &self.layer_store[sc_index.0];
                     if !layer.is_visible() {
                         continue;
                     }
 
                     let packed_layer = &self.packed_layers[sc_index.0];
                     let auxiliary_lists = auxiliary_lists_map.get(&layer.pipeline_id)
                                                              .expect("No auxiliary lists?");
 
                     for i in 0..prim_count {
                         let prim_index = PrimitiveIndex(prim_index.0 + i);
                         if self.prim_store.build_bounding_rect(prim_index,
                                                                screen_rect,
                                                                &packed_layer.transform,
                                                                &packed_layer.local_clip_rect,
                                                                device_pixel_ratio) {
-                            profile_counters.visible_primitives.inc();
-
                             if self.prim_store.prepare_prim_for_render(prim_index,
                                                                        resource_cache,
                                                                        &packed_layer.transform,
                                                                        device_pixel_ratio,
                                                                        auxiliary_lists) {
                                 self.prim_store.build_bounding_rect(prim_index,
                                                                     screen_rect,
                                                                     &packed_layer.transform,
                                                                     &packed_layer.local_clip_rect,
                                                                     device_pixel_ratio);
                             }
+
+                            // If the primitive is visible, consider culling it via clip rect(s).
+                            // If it is visible but has clips, create the clip task for it.
+                            if let Some(prim_bounding_rect) = self.prim_store
+                                                                  .cpu_bounding_rects[prim_index.0] {
+                                let prim_metadata = &mut self.prim_store.cpu_metadata[prim_index.0];
+                                let prim_clip_info = prim_metadata.clip_cache_info.as_ref();
+                                let mut visible = true;
+
+                                if let Some(info) = prim_clip_info {
+                                    clip_info_stack.push((*sc_index, info.clone()));
+                                }
+
+                                // Try to create a mask if we may need to.
+                                if prim_clip_info.is_some() || !clip_info_stack.is_empty() {
+                                    let mask_opt = RenderTask::new_mask(prim_bounding_rect,
+                                                                        MaskCacheKey::Primitive(prim_index),
+                                                                        &clip_info_stack,
+                                                                        &self.layer_store);
+                                    match mask_opt {
+                                        MaskResult::Outside => {
+                                            // Primitive is completely clipped out.
+                                            prim_metadata.clip_task = None;
+                                            self.prim_store.cpu_bounding_rects[prim_index.0] = None;
+                                            visible = false;
+                                        }
+                                        MaskResult::Inside(task) => {
+                                            // Got a valid clip task, so store it for this primitive.
+                                            prim_metadata.clip_task = Some(task);
+                                        }
+                                    }
+                                }
+
+                                if let Some(..) = prim_clip_info {
+                                    clip_info_stack.pop();
+                                }
+
+                                if visible {
+                                    profile_counters.visible_primitives.inc();
+                                }
+                            }
                         }
                     }
                 }
                 &PrimitiveRunCmd::PopStackingContext => {
+                    let sc_index = *layer_stack.last().unwrap();
+                    let layer = &mut self.layer_store[sc_index.0];
+                    if layer.can_contribute_to_scene() {
+                        if layer.clip_cache_info.is_some() {
+                            clip_info_stack.pop().unwrap();
+                        }
+                    }
+
                     layer_stack.pop().unwrap();
                 }
             }
         }
     }
 
     fn create_screen_tiles(&self, device_pixel_ratio: f32) -> (i32, i32, Vec<ScreenTile>) {
         let dp_size = DeviceIntSize::from_lengths(device_length(self.screen_rect.size.width as f32,
@@ -2859,29 +2858,42 @@ impl FrameBuilder {
     }
 
     pub fn build(&mut self,
                  resource_cache: &mut ResourceCache,
                  frame_id: FrameId,
                  layer_map: &LayerMap,
                  auxiliary_lists_map: &AuxiliaryListsMap,
                  device_pixel_ratio: f32) -> Frame {
-
         let mut profile_counters = FrameProfileCounters::new();
         profile_counters.total_primitives.set(self.prim_store.prim_count());
 
         resource_cache.begin_frame(frame_id);
 
         let screen_rect = DeviceIntRect::new(
             DeviceIntPoint::zero(),
             DeviceIntSize::from_lengths(device_length(self.screen_rect.size.width as f32,
                                                       device_pixel_ratio),
                                         device_length(self.screen_rect.size.height as f32,
                                                       device_pixel_ratio)));
 
+        // Pick a size for the cache render targets to be. The main requirement is that it
+        // has to be at least as large as the framebuffer size. This ensures that it will
+        // always be able to allocate the worst case render task (such as a clip mask that
+        // covers the entire screen).
+        // However, there are some extremely subtle rounding errors that occur in the
+        // reftests under OSMesa if the cache targets are exactly the size of the
+        // framebuffer. To work around this, we'll align the cache size to a multiple
+        // of the tile size. This can be removed once the tiling code is gone.
+        // TODO(gw): Remove this hack once the tiling code is sorted out!!
+        let max_dimension = cmp::max(screen_rect.size.width, screen_rect.size.height);
+        let aligned_max_dimension = (max_dimension + SCREEN_TILE_SIZE - 1) & !(SCREEN_TILE_SIZE-1);
+        let cache_size = DeviceUintSize::new(aligned_max_dimension as u32,
+                                             aligned_max_dimension as u32);
+
         let mut debug_rects = Vec::new();
 
         let (x_tile_count, y_tile_count, mut screen_tiles) = self.create_screen_tiles(device_pixel_ratio);
 
         self.update_scroll_bars(layer_map);
 
         self.cull_layers(&screen_rect,
                          layer_map,
@@ -2891,37 +2903,35 @@ impl FrameBuilder {
                          resource_cache,
                          &mut profile_counters,
                          device_pixel_ratio);
 
         let mut compiled_screen_tiles = Vec::new();
         let mut max_passes_needed = 0;
 
         let mut render_tasks = {
-            let mut ctx = CompileTileContext {
+            let ctx = CompileTileContext {
                 layer_store: &self.layer_store,
                 prim_store: &self.prim_store,
-                tile_id: 0,
 
                 // This doesn't need to be atomic right now (all the screen tiles are
                 // compiled on a single thread). However, in the future each of the
                 // compile steps below will be run on a worker thread, which will
                 // require an atomic int here anyway.
                 render_task_id_counter: AtomicUsize::new(0),
             };
 
             if !self.layer_store.is_empty() {
                 self.assign_prims_to_screen_tiles(&mut screen_tiles,
                                                   x_tile_count,
                                                   device_pixel_ratio);
             }
 
             // Build list of passes, target allocs that each tile needs.
-            for (tile_id, screen_tile) in screen_tiles.into_iter().enumerate() {
-                ctx.tile_id = tile_id;
+            for screen_tile in screen_tiles {
                 let rect = screen_tile.rect;
                 if let Some(compiled_screen_tile) = screen_tile.compile(&ctx) {
                     max_passes_needed = cmp::max(max_passes_needed,
                                                  compiled_screen_tile.required_pass_count);
                     if self.debug {
                         let label = format!("{}|{}", compiled_screen_tile.info.cmd_count, compiled_screen_tile.info.prim_count);
                         let color =  ColorF::new(1.0, 0.0, 0.0, 1.0);
                         debug_rects.push(DebugRect {
@@ -2947,38 +2957,31 @@ impl FrameBuilder {
         }
 
 
         let deferred_resolves = self.prim_store.resolve_primitives(resource_cache, device_pixel_ratio);
 
         let mut passes = Vec::new();
 
         if !compiled_screen_tiles.is_empty() {
-            let mut ctx = RenderTargetContext {
+            let ctx = RenderTargetContext {
                 layer_store: &self.layer_store,
                 prim_store: &self.prim_store,
                 resource_cache: resource_cache,
-                layer_masks_tasks: HashMap::new(),
             };
 
             // Do the allocations now, assigning each tile's tasks to a render
             // pass and target as required.
             for index in 0..max_passes_needed {
                 passes.push(RenderPass::new(index as isize,
-                                            index == max_passes_needed-1));
+                                            index == max_passes_needed-1,
+                                            cache_size));
             }
 
-            for mut compiled_screen_tile in compiled_screen_tiles {
-                // Grab the mask task indices from the compile tile and append into the context map
-                for (i, mask_task_opt) in compiled_screen_tile.layer_masks_tasks.task_ids.drain(..).enumerate() {
-                    if let Some(mask_task_id) = mask_task_opt {
-                        let key = (compiled_screen_tile.unique_id, StackingContextIndex(i));
-                        ctx.layer_masks_tasks.insert(key, mask_task_id);
-                    }
-                }
+            for compiled_screen_tile in compiled_screen_tiles {
                 compiled_screen_tile.build(&mut passes);
             }
 
             for pass in &mut passes {
                 pass.build(&ctx, &mut render_tasks);
 
                 profile_counters.passes.inc();
                 profile_counters.targets.add(pass.targets.len());
@@ -2989,18 +2992,17 @@ impl FrameBuilder {
 
         Frame {
             device_pixel_ratio: device_pixel_ratio,
             background_color: self.background_color,
             viewport_size: self.screen_rect.size,
             debug_rects: debug_rects,
             profile_counters: profile_counters,
             passes: passes,
-            cache_size: DeviceSize::new(RENDERABLE_CACHE_SIZE as f32,
-                                        RENDERABLE_CACHE_SIZE as f32),
+            cache_size: cache_size,
             layer_texture_data: self.packed_layers.clone(),
             render_task_data: render_tasks.render_task_data,
             gpu_data16: self.prim_store.gpu_data16.build(),
             gpu_data32: self.prim_store.gpu_data32.build(),
             gpu_data64: self.prim_store.gpu_data64.build(),
             gpu_data128: self.prim_store.gpu_data128.build(),
             gpu_geometry: self.prim_store.gpu_geometry.build(),
             gpu_resource_rects: self.prim_store.gpu_resource_rects.build(),
--- a/gfx/webrender_traits/src/api.rs
+++ b/gfx/webrender_traits/src/api.rs
@@ -7,16 +7,17 @@ use channel::{self, MsgSender, PayloadHe
 use offscreen_gl_context::{GLContextAttributes, GLLimits};
 use std::cell::Cell;
 use {ApiMsg, ColorF, DisplayListBuilder, Epoch};
 use {FontKey, IdNamespace, ImageFormat, ImageKey, NativeFontHandle, PipelineId};
 use {RenderApiSender, ResourceId, ScrollEventPhase, ScrollLayerState, ScrollLocation, ServoScrollRootId};
 use {GlyphKey, GlyphDimensions, ImageData, WebGLContextId, WebGLCommand};
 use {DeviceIntSize, LayoutPoint, LayoutSize, WorldPoint};
 use VRCompositorCommand;
+use ExternalEvent;
 
 impl RenderApiSender {
     pub fn new(api_sender: MsgSender<ApiMsg>,
                payload_sender: PayloadSender)
                -> RenderApiSender {
         RenderApiSender {
             api_sender: api_sender,
             payload_sender: payload_sender,
@@ -237,16 +238,25 @@ impl RenderApi {
         self.api_sender.send(msg).unwrap();
     }
 
     pub fn send_vr_compositor_command(&self, context_id: WebGLContextId, command: VRCompositorCommand) {
         let msg = ApiMsg::VRCompositorCommand(context_id, command);
         self.api_sender.send(msg).unwrap();
     }
 
+    pub fn send_external_event(&self, evt: ExternalEvent) {
+        let msg = ApiMsg::ExternalEvent(evt);
+        self.api_sender.send(msg).unwrap();
+    }
+
+    pub fn shut_down(&self) {
+        self.api_sender.send(ApiMsg::ShutDown).unwrap();
+    }
+
     #[inline]
     fn next_unique_id(&self) -> (u32, u32) {
         let IdNamespace(namespace) = self.id_namespace;
         let ResourceId(id) = self.next_id.get();
         self.next_id.set(ResourceId(id + 1));
         (namespace, id)
     }
 }
--- a/gfx/webrender_traits/src/channel_mpsc.rs
+++ b/gfx/webrender_traits/src/channel_mpsc.rs
@@ -1,14 +1,14 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use serde::{Deserialize, Serialize};
-use std::io::Error;
+use std::io::{Error, ErrorKind};
 
 use serde::{Deserializer, Serializer};
 
 use std::sync::mpsc;
 
 ///
 /// Handles the channel implementation when in process channels are enabled.
 ///
@@ -24,28 +24,30 @@ impl PayloadHelperMethods for PayloadSen
 }
 
 pub struct MsgReceiver<T> {
     rx: mpsc::Receiver<T>,
 }
 
 impl<T> MsgReceiver<T> {
     pub fn recv(&self) -> Result<T, Error> {
-        Ok(self.rx.recv().unwrap())
+        use std::io;
+        use std::error::Error;
+        self.rx.recv().map_err(|e| io::Error::new(ErrorKind::Other, e.description()))
     }
 }
 
 #[derive(Clone)]
 pub struct MsgSender<T> {
     tx: mpsc::Sender<T>,
 }
 
 impl<T> MsgSender<T> {
     pub fn send(&self, data: T) -> Result<(), Error> {
-        Ok(self.tx.send(data).unwrap())
+        self.tx.send(data).map_err(|_| Error::new(ErrorKind::Other, "cannot send on closed channel"))
     }
 }
 
 pub fn payload_channel() -> Result<(PayloadSender, PayloadReceiver), Error> {
     let (tx, rx) = mpsc::channel();
     Ok((PayloadSender { tx: tx }, PayloadReceiver { rx: rx }))
 }
 
--- a/gfx/webrender_traits/src/display_item.rs
+++ b/gfx/webrender_traits/src/display_item.rs
@@ -124,27 +124,26 @@ impl ComplexClipRegion {
     pub fn new(rect: LayoutRect, radii: BorderRadius) -> ComplexClipRegion {
         ComplexClipRegion {
             rect: rect,
             radii: radii,
         }
     }
 
     //TODO: move to `util` module?
-    /// Return a maximum aligned rectangle that is fully inside the clip region.
+    /// Return an aligned rectangle that is fully inside the clip region.
     pub fn get_inner_rect(&self) -> Option<LayoutRect> {
-        let k = 0.3; //roughly higher than `1.0 - sqrt(0.5)`
         let xl = self.rect.origin.x +
-            k * self.radii.top_left.width.max(self.radii.bottom_left.width);
+            self.radii.top_left.width.max(self.radii.bottom_left.width);
         let xr = self.rect.origin.x + self.rect.size.width -
-            k * self.radii.top_right.width.max(self.radii.bottom_right.width);
+            self.radii.top_right.width.max(self.radii.bottom_right.width);
         let yt = self.rect.origin.y +
-            k * self.radii.top_left.height.max(self.radii.top_right.height);
+            self.radii.top_left.height.max(self.radii.top_right.height);
         let yb = self.rect.origin.y + self.rect.size.height -
-            k * self.radii.bottom_left.height.max(self.radii.bottom_right.height);
+            self.radii.bottom_left.height.max(self.radii.bottom_right.height);
         if xl <= xr && yt <= yb {
             Some(LayoutRect::new(LayoutPoint::new(xl, yt), LayoutSize::new(xr-xl, yb-yt)))
         } else {
             None
         }
     }
 }
 
--- a/gfx/webrender_traits/src/lib.rs
+++ b/gfx/webrender_traits/src/lib.rs
@@ -1,14 +1,14 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #![cfg_attr(feature = "nightly", feature(nonzero))]
-#![cfg_attr(feature = "serde_derive", feature(proc_macro, rustc_attrs, structural_match))]
+#![cfg_attr(feature = "serde_derive", feature(rustc_attrs, structural_match))]
 
 extern crate app_units;
 extern crate byteorder;
 #[cfg(feature = "nightly")]
 extern crate core;
 extern crate euclid;
 extern crate gleam;
 #[macro_use]
--- a/gfx/webrender_traits/src/types.rs
+++ b/gfx/webrender_traits/src/types.rs
@@ -50,17 +50,36 @@ pub enum ApiMsg {
     TickScrollingBounce,
     TranslatePointToLayerSpace(WorldPoint, MsgSender<(LayoutPoint, PipelineId)>),
     GetScrollLayerState(MsgSender<Vec<ScrollLayerState>>),
     RequestWebGLContext(DeviceIntSize, GLContextAttributes, MsgSender<Result<(WebGLContextId, GLLimits), String>>),
     ResizeWebGLContext(WebGLContextId, DeviceIntSize),
     WebGLCommand(WebGLContextId, WebGLCommand),
     GenerateFrame,
     // WebVR commands that must be called in the WebGL render thread.
-    VRCompositorCommand(WebGLContextId, VRCompositorCommand)
+    VRCompositorCommand(WebGLContextId, VRCompositorCommand),
+    /// An opaque handle that must be passed to the render notifier. It is used by Gecko
+    /// to forward gecko-specific messages to the render thread preserving the ordering
+    /// within the other messages.
+    ExternalEvent(ExternalEvent),
+    ShutDown,
+}
+
+/// An opaque pointer-sized value.
+#[derive(Clone, Deserialize, Serialize)]
+pub struct ExternalEvent {
+    raw: usize,
+}
+
+unsafe impl Send for ExternalEvent {}
+
+impl ExternalEvent {
+    pub fn from_raw(raw: usize) -> Self { ExternalEvent { raw: raw } }
+    /// Consumes self to make it obvious that the event should be forwarded only once.
+    pub fn unwrap(self) -> usize { self.raw }
 }
 
 #[derive(Copy, Clone, Deserialize, Serialize, Debug)]
 pub struct GlyphDimensions {
     pub left: i32,
     pub top: i32,
     pub width: u32,
     pub height: u32,
@@ -419,16 +438,18 @@ pub struct RenderApiSender {
     api_sender: MsgSender<ApiMsg>,
     payload_sender: PayloadSender,
 }
 
 pub trait RenderNotifier: Send {
     fn new_frame_ready(&mut self);
     fn new_scroll_frame_ready(&mut self, composite_needed: bool);
     fn pipeline_size_changed(&mut self, pipeline_id: PipelineId, size: Option<LayoutSize>);
+    fn external_event(&mut self, _evt: ExternalEvent) { unimplemented!() }
+    fn shut_down(&mut self) {}
 }
 
 // Trait to allow dispatching functions to a specific thread or event loop.
 pub trait RenderDispatcher: Send {
     fn dispatch(&self, Box<Fn() + Send>);
 }
 
 #[derive(Clone, Copy, Debug, Deserialize, Serialize)]
--- a/third_party/rust/error-chain/.cargo-checksum.json
+++ b/third_party/rust/error-chain/.cargo-checksum.json
@@ -1,1 +1,1 @@
-{"files":{".cargo-ok":"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",".gitattributes":"7e18693dd440d30e646b599e2088919e50f6955ea6191bed27d852e92d38d08e",".gitignore":"ed8bb3864902ddf6322e6b1d6358bcaec9b51382a5216b9526ad0987ae430b0d",".travis.yml":"d56246d6c8796c638b5012c2d7a91d9b6ec101b6a47128e2d4bfa957c1c784e8","CHANGELOG.md":"8eb613d4a417752d4d1c81e065853e5ba0a21530e0881886c2ae4ffbf0ce57cd","Cargo.toml":"9e551bbef17e031db548e1a81f52d249c94db73a194daf0fe2906bc9404b9805","README.md":"6771ca940645b2f7e7a018c8cd25b25f8bf35786e229b54fa2fded1f2d0ae411","examples/all.rs":"2e6d530e95907bde1e49bda7fde7167568d08a370ade44a153612e2d1cb899d7","examples/doc.rs":"574948eb776c3d363f5cff9a48015bab6c17828c7306dc3eb8818afa90a31a83","examples/quickstart.rs":"0cd227741ed3559c0ead90dcc643cef30b73255d9c9f15c2ee20c4a1085d6f5c","examples/size.rs":"7922acd891dfd06f1d36308a3ccdf03def2646b2f39bfd1b15cf2896247bad8f","src/error_chain.rs":"236c4feead97661b33541434ae71f32c279738a81d0d4b7ce9c50550d5d6a662","src/example_generated.rs":"edaead3c4911afd0a0870cfcab11f8835eb17447031d227bbb5d17210379f778","src/lib.rs":"14ce5d1e76185e762db2414b51411095ddd38207a6f4d9dd50d4a041e7b77d88","src/quick_error.rs":"1889b9ca1f7a5e9124275fd5da81e709d0d6bd3b06915bf320c23d4c4f083301","src/quick_main.rs":"755028c2b4305482a1ab86f8b1b68a95eac22b331c94e14d29777dc69dad1bf4","tests/quick_main.rs":"1d6a726856b954d4cffddab00602583921972ceeeb2bf7ba9ebbac6a51584b53","tests/tests.rs":"2f7ceee2f9808d0985c848d99fe967e8f0b549cf144d4d692a5c5d1c2ba7d660"},"package":"318cb3c71ee4cdea69fdc9e15c173b245ed6063e1709029e8fd32525a881120f"}
\ No newline at end of file
+{"files":{".cargo-ok":"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",".gitignore":"ed8bb3864902ddf6322e6b1d6358bcaec9b51382a5216b9526ad0987ae430b0d",".travis.yml":"d56246d6c8796c638b5012c2d7a91d9b6ec101b6a47128e2d4bfa957c1c784e8","CHANGELOG.md":"8eb613d4a417752d4d1c81e065853e5ba0a21530e0881886c2ae4ffbf0ce57cd","Cargo.toml":"9e551bbef17e031db548e1a81f52d249c94db73a194daf0fe2906bc9404b9805","README.md":"6771ca940645b2f7e7a018c8cd25b25f8bf35786e229b54fa2fded1f2d0ae411","examples/all.rs":"2e6d530e95907bde1e49bda7fde7167568d08a370ade44a153612e2d1cb899d7","examples/doc.rs":"574948eb776c3d363f5cff9a48015bab6c17828c7306dc3eb8818afa90a31a83","examples/quickstart.rs":"0cd227741ed3559c0ead90dcc643cef30b73255d9c9f15c2ee20c4a1085d6f5c","examples/size.rs":"7922acd891dfd06f1d36308a3ccdf03def2646b2f39bfd1b15cf2896247bad8f","src/error_chain.rs":"236c4feead97661b33541434ae71f32c279738a81d0d4b7ce9c50550d5d6a662","src/example_generated.rs":"edaead3c4911afd0a0870cfcab11f8835eb17447031d227bbb5d17210379f778","src/lib.rs":"14ce5d1e76185e762db2414b51411095ddd38207a6f4d9dd50d4a041e7b77d88","src/quick_error.rs":"1889b9ca1f7a5e9124275fd5da81e709d0d6bd3b06915bf320c23d4c4f083301","src/quick_main.rs":"755028c2b4305482a1ab86f8b1b68a95eac22b331c94e14d29777dc69dad1bf4","tests/quick_main.rs":"1d6a726856b954d4cffddab00602583921972ceeeb2bf7ba9ebbac6a51584b53","tests/tests.rs":"2f7ceee2f9808d0985c848d99fe967e8f0b549cf144d4d692a5c5d1c2ba7d660"},"package":"318cb3c71ee4cdea69fdc9e15c173b245ed6063e1709029e8fd32525a881120f"}
\ No newline at end of file
deleted file mode 100644
--- a/third_party/rust/error-chain/.gitattributes
+++ /dev/null
@@ -1,4 +0,0 @@
-*.rs text eol=lf
-*.lock text eol=lf
-*.txt text eol=lf
-*.toml text eol=lf
\ No newline at end of file
--- a/toolkit/library/gtest/rust/Cargo.lock
+++ b/toolkit/library/gtest/rust/Cargo.lock
@@ -496,17 +496,17 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
  "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
  "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
 name = "webrender"
-version = "0.11.0"
+version = "0.11.1"
 dependencies = [
  "app_units 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "bincode 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "bit-set 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "byteorder 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "core-graphics 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "core-text 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -529,17 +529,17 @@ name = "webrender_bindings"
 version = "0.1.0"
 dependencies = [
  "app_units 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "core-foundation 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "euclid 0.10.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
  "gleam 0.2.30 (registry+https://github.com/rust-lang/crates.io-index)",
  "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
- "webrender 0.11.0",
+ "webrender 0.11.1",
  "webrender_traits 0.11.0",
  "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
 name = "webrender_traits"
 version = "0.11.0"
 dependencies = [
--- a/toolkit/library/rust/Cargo.lock
+++ b/toolkit/library/rust/Cargo.lock
@@ -483,17 +483,17 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
  "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
  "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
 name = "webrender"
-version = "0.11.0"
+version = "0.11.1"
 dependencies = [
  "app_units 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "bincode 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "bit-set 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "byteorder 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "core-graphics 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "core-text 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -516,17 +516,17 @@ name = "webrender_bindings"
 version = "0.1.0"
 dependencies = [
  "app_units 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "core-foundation 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "euclid 0.10.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
  "gleam 0.2.30 (registry+https://github.com/rust-lang/crates.io-index)",
  "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
- "webrender 0.11.0",
+ "webrender 0.11.1",
  "webrender_traits 0.11.0",
  "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
 name = "webrender_traits"
 version = "0.11.0"
 dependencies = [