Bug 1335799 - Update webrender to 3c05fd2fccc2b51ec8b1b12cb96b7d011468766c. r=kats
authorJeff Muizelaar <jmuizelaar@mozilla.com>
Tue, 14 Feb 2017 13:34:14 -0500
changeset 372988 9cbd7126db64c03749c553c2a0707c3b8c91dc82
parent 372987 7c8cbd22d82a2029e0c6ac08206a057d8d88c1d4
child 372989 47611a305c4407c98064008592d0df04400acc3b
push id10863
push userjlorenzo@mozilla.com
push dateMon, 06 Mar 2017 23:02:23 +0000
treeherdermozilla-aurora@0931190cd725 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerskats
bugs1335799
milestone54.0a1
Bug 1335799 - Update webrender to 3c05fd2fccc2b51ec8b1b12cb96b7d011468766c. r=kats MozReview-Commit-ID: 2tSt23k7kbm
gfx/doc/README.webrender
gfx/webrender/Cargo.toml
gfx/webrender/res/prim_shared.glsl
gfx/webrender/res/ps_angle_gradient.fs.glsl
gfx/webrender/res/ps_angle_gradient.glsl
gfx/webrender/res/ps_angle_gradient.vs.glsl
gfx/webrender/res/ps_blend.vs.glsl
gfx/webrender/res/ps_border.vs.glsl
gfx/webrender/res/ps_box_shadow.vs.glsl
gfx/webrender/res/ps_cache_image.vs.glsl
gfx/webrender/res/ps_composite.fs.glsl
gfx/webrender/res/ps_composite.glsl
gfx/webrender/res/ps_composite.vs.glsl
gfx/webrender/res/ps_gradient.vs.glsl
gfx/webrender/res/ps_hardware_composite.fs.glsl
gfx/webrender/res/ps_hardware_composite.glsl
gfx/webrender/res/ps_hardware_composite.vs.glsl
gfx/webrender/res/ps_image.fs.glsl
gfx/webrender/res/ps_image.glsl
gfx/webrender/res/ps_image.vs.glsl
gfx/webrender/res/ps_radial_gradient.fs.glsl
gfx/webrender/res/ps_radial_gradient.glsl
gfx/webrender/res/ps_radial_gradient.vs.glsl
gfx/webrender/res/ps_rectangle.vs.glsl
gfx/webrender/res/ps_text_run.fs.glsl
gfx/webrender/res/ps_text_run.glsl
gfx/webrender/res/ps_text_run.vs.glsl
gfx/webrender/res/ps_yuv_image.fs.glsl
gfx/webrender/res/ps_yuv_image.glsl
gfx/webrender/res/ps_yuv_image.vs.glsl
gfx/webrender/src/debug_render.rs
gfx/webrender/src/device.rs
gfx/webrender/src/frame.rs
gfx/webrender/src/frame_builder.rs
gfx/webrender/src/gpu_store.rs
gfx/webrender/src/internal_types.rs
gfx/webrender/src/layer.rs
gfx/webrender/src/lib.rs
gfx/webrender/src/mask_cache.rs
gfx/webrender/src/platform/macos/font.rs
gfx/webrender/src/platform/unix/font.rs
gfx/webrender/src/platform/windows/font.rs
gfx/webrender/src/prim_store.rs
gfx/webrender/src/record.rs
gfx/webrender/src/render_backend.rs
gfx/webrender/src/render_task.rs
gfx/webrender/src/renderer.rs
gfx/webrender/src/resource_cache.rs
gfx/webrender/src/scene.rs
gfx/webrender/src/scroll_tree.rs
gfx/webrender/src/texture_cache.rs
gfx/webrender/src/tiling.rs
gfx/webrender/src/util.rs
gfx/webrender_traits/Cargo.toml
gfx/webrender_traits/src/api.rs
gfx/webrender_traits/src/display_item.rs
gfx/webrender_traits/src/display_list.rs
gfx/webrender_traits/src/stacking_context.rs
gfx/webrender_traits/src/types.rs
--- a/gfx/doc/README.webrender
+++ b/gfx/doc/README.webrender
@@ -74,9 +74,9 @@ there is another crate in m-c called moz
 the same folder to store its rust dependencies. If one of the libraries that is
 required by both mozjs_sys and webrender is updated without updating the other
 project's Cargo.lock file, that results in build bustage.
 This means that any time you do this sort of manual update of packages, you need
 to make sure that mozjs_sys also has its Cargo.lock file updated if needed, hence
 the need to run the cargo update command in js/src as well. Hopefully this will
 be resolved soon.
 
-Latest Commit: bcf3c371086894f5e1d098ee60f0592abf01f6b3
+Latest Commit: 3c05fd2fccc2b51ec8b1b12cb96b7d011468766c
--- a/gfx/webrender/Cargo.toml
+++ b/gfx/webrender/Cargo.toml
@@ -1,44 +1,46 @@
 [package]
 name = "webrender"
-version = "0.11.1"
+version = "0.15.0"
 authors = ["Glenn Watson <gw@intuitionlibrary.com>"]
 license = "MPL-2.0"
 repository = "https://github.com/servo/webrender"
 build = "build.rs"
-workspace = ".."
 
 [features]
 default = ["codegen", "freetype-lib"]
 codegen = ["webrender_traits/codegen"]
 freetype-lib = ["freetype/servo-freetype-sys"]
 serde_derive = ["webrender_traits/serde_derive"]
+profiler = ["thread_profiler/thread_profiler"]
 
 [dependencies]
 app_units = "0.3"
 bincode = "0.6"
 bit-set = "0.4"
-byteorder = "0.5"
+byteorder = "1.0"
 euclid = "0.10.3"
 fnv="1.0"
 gleam = "0.2.30"
 lazy_static = "0.2"
 log = "0.3"
 num-traits = "0.1.32"
 offscreen_gl_context = {version = "0.5", features = ["serde_serialization", "osmesa"]}
 time = "0.1"
 threadpool = "1.3.2"
 webrender_traits = {path = "../webrender_traits", default-features = false}
 bitflags = "0.7"
+gamma-lut = "0.1"
+thread_profiler = "0.1.1"
 
 [dev-dependencies]
 angle = {git = "https://github.com/servo/angle", branch = "servo"}
 
 [target.'cfg(any(target_os = "android", all(unix, not(target_os = "macos"))))'.dependencies]
 freetype = { version = "0.2", default-features = false }
 
 [target.'cfg(target_os = "windows")'.dependencies]
-dwrote = "0.1.5"
+dwrote = "0.1.7"
 
 [target.'cfg(target_os = "macos")'.dependencies]
-core-graphics = "0.5.0"
-core-text = "2.0"
+core-graphics = "0.6.0"
+core-text = "3.0"
--- a/gfx/webrender/res/prim_shared.glsl
+++ b/gfx/webrender/res/prim_shared.glsl
@@ -25,34 +25,31 @@
 #define BORDER_LEFT      0
 #define BORDER_TOP       1
 #define BORDER_RIGHT     2
 #define BORDER_BOTTOM    3
 
 #define UV_NORMALIZED    uint(0)
 #define UV_PIXEL         uint(1)
 
-#define MAX_STOPS_PER_ANGLE_GRADIENT 8
-#define MAX_STOPS_PER_RADIAL_GRADIENT 8
+#define EXTEND_MODE_CLAMP  0
+#define EXTEND_MODE_REPEAT 1
 
 uniform sampler2DArray sCache;
 
 flat varying vec4 vClipMaskUvBounds;
 varying vec3 vClipMaskUv;
 
 #ifdef WR_VERTEX_SHADER
 
-#define VECS_PER_LAYER             13
-#define VECS_PER_RENDER_TASK        3
+// These have to be multiples of WR_MAX_VERTEX_TEXTURE_WIDTH
+#define VECS_PER_LAYER             16
+#define VECS_PER_RENDER_TASK        4
 #define VECS_PER_PRIM_GEOM          2
 
-#define GRADIENT_HORIZONTAL     0
-#define GRADIENT_VERTICAL       1
-#define GRADIENT_ROTATED        2
-
 uniform sampler2D sLayers;
 uniform sampler2D sRenderTasks;
 uniform sampler2D sPrimGeometry;
 
 uniform sampler2D sData16;
 uniform sampler2D sData32;
 uniform sampler2D sData64;
 uniform sampler2D sData128;
@@ -141,29 +138,33 @@ RenderTaskData fetch_render_task(int ind
 
     task.data0 = texelFetchOffset(sRenderTasks, uv, 0, ivec2(0, 0));
     task.data1 = texelFetchOffset(sRenderTasks, uv, 0, ivec2(1, 0));
     task.data2 = texelFetchOffset(sRenderTasks, uv, 0, ivec2(2, 0));
 
     return task;
 }
 
-struct Tile {
-    vec4 screen_origin_task_origin;
-    vec4 size_target_index;
+struct AlphaBatchTask {
+    vec2 screen_space_origin;
+    vec2 render_target_origin;
+    vec2 size;
+    float render_target_layer_index;
 };
 
-Tile fetch_tile(int index) {
-    RenderTaskData task = fetch_render_task(index);
+AlphaBatchTask fetch_alpha_batch_task(int index) {
+    RenderTaskData data = fetch_render_task(index);
 
-    Tile tile;
-    tile.screen_origin_task_origin = task.data0;
-    tile.size_target_index = task.data1;
+    AlphaBatchTask task;
+    task.render_target_origin = data.data0.xy;
+    task.size = data.data0.zw;
+    task.screen_space_origin = data.data1.xy;
+    task.render_target_layer_index = data.data1.z;
 
-    return tile;
+    return task;
 }
 
 struct ClipArea {
     vec4 task_bounds;
     vec4 screen_origin_target_index;
     vec4 inner_rect;
 };
 
@@ -181,26 +182,26 @@ ClipArea fetch_clip_area(int index) {
         area.inner_rect = task.data2;
     }
 
     return area;
 }
 
 struct Gradient {
     vec4 start_end_point;
-    vec4 kind;
+    vec4 extend_mode;
 };
 
 Gradient fetch_gradient(int index) {
     Gradient gradient;
 
     ivec2 uv = get_fetch_uv_2(index);
 
     gradient.start_end_point = texelFetchOffset(sData32, uv, 0, ivec2(0, 0));
-    gradient.kind = texelFetchOffset(sData32, uv, 0, ivec2(1, 0));
+    gradient.extend_mode = texelFetchOffset(sData32, uv, 0, ivec2(1, 0));
 
     return gradient;
 }
 
 struct GradientStop {
     vec4 color;
     vec4 offset;
 };
@@ -213,26 +214,26 @@ GradientStop fetch_gradient_stop(int ind
     stop.color = texelFetchOffset(sData32, uv, 0, ivec2(0, 0));
     stop.offset = texelFetchOffset(sData32, uv, 0, ivec2(1, 0));
 
     return stop;
 }
 
 struct RadialGradient {
     vec4 start_end_center;
-    vec4 start_end_radius;
+    vec4 start_end_radius_extend_mode;
 };
 
 RadialGradient fetch_radial_gradient(int index) {
     RadialGradient gradient;
 
     ivec2 uv = get_fetch_uv_2(index);
 
     gradient.start_end_center = texelFetchOffset(sData32, uv, 0, ivec2(0, 0));
-    gradient.start_end_radius = texelFetchOffset(sData32, uv, 0, ivec2(1, 0));
+    gradient.start_end_radius_extend_mode = texelFetchOffset(sData32, uv, 0, ivec2(1, 0));
 
     return gradient;
 }
 
 struct Glyph {
     vec4 offset;
 };
 
@@ -315,34 +316,34 @@ CachePrimitiveInstance fetch_cache_insta
     cpi.sub_index = pi.sub_index;
     cpi.user_data = pi.user_data;
 
     return cpi;
 }
 
 struct Primitive {
     Layer layer;
-    Tile tile;
     ClipArea clip_area;
+    AlphaBatchTask task;
     vec4 local_rect;
     vec4 local_clip_rect;
     int prim_index;
     // when sending multiple primitives of the same type (e.g. border segments)
     // this index allows the vertex shader to recognize the difference
     int sub_index;
     ivec2 user_data;
     float z;
 };
 
 Primitive load_primitive_custom(PrimitiveInstance pi) {
     Primitive prim;
 
     prim.layer = fetch_layer(pi.layer_index);
-    prim.tile = fetch_tile(pi.render_task_index);
     prim.clip_area = fetch_clip_area(pi.clip_task_index);
+    prim.task = fetch_alpha_batch_task(pi.render_task_index);
 
     PrimitiveGeometry pg = fetch_prim_geometry(pi.global_prim_index);
     prim.local_rect = pg.local_rect;
     prim.local_clip_rect = pg.local_clip_rect;
 
     prim.prim_index = pi.specific_prim_index;
     prim.sub_index = pi.sub_index;
     prim.user_data = pi.user_data;
@@ -408,69 +409,84 @@ vec2 clamp_rect(vec2 point, vec4 rect) {
 
 struct Rect {
     vec2 p0;
     vec2 p1;
 };
 
 struct VertexInfo {
     Rect local_rect;
-    vec2 local_clamped_pos;
-    vec2 global_clamped_pos;
+    vec2 local_pos;
+    vec2 screen_pos;
 };
 
 VertexInfo write_vertex(vec4 instance_rect,
                         vec4 local_clip_rect,
                         float z,
                         Layer layer,
-                        Tile tile) {
-    vec2 p0 = floor(0.5 + instance_rect.xy * uDevicePixelRatio) / uDevicePixelRatio;
-    vec2 p1 = floor(0.5 + (instance_rect.xy + instance_rect.zw) * uDevicePixelRatio) / uDevicePixelRatio;
+                        AlphaBatchTask task) {
+    // Get the min/max local space coords of the rectangle.
+    vec2 local_p0 = instance_rect.xy;
+    vec2 local_p1 = instance_rect.xy + instance_rect.zw;
 
-    vec2 local_pos = mix(p0, p1, aPosition.xy);
+    // Get the min/max coords of the local space clip rect.
+    vec2 local_clip_p0 = local_clip_rect.xy;
+    vec2 local_clip_p1 = local_clip_rect.xy + local_clip_rect.zw;
+
+    // Get the min/max coords of the layer clip rect.
+    vec2 layer_clip_p0 = layer.local_clip_rect.xy;
+    vec2 layer_clip_p1 = layer.local_clip_rect.xy + layer.local_clip_rect.zw;
 
-    vec2 cp0 = floor(0.5 + local_clip_rect.xy * uDevicePixelRatio) / uDevicePixelRatio;
-    vec2 cp1 = floor(0.5 + (local_clip_rect.xy + local_clip_rect.zw) * uDevicePixelRatio) / uDevicePixelRatio;
-    local_pos = clamp(local_pos, cp0, cp1);
+    // Select the corner of the local rect that we are processing.
+    vec2 local_pos = mix(local_p0, local_p1, aPosition.xy);
+
+    // xy = top left corner of the local rect, zw = position of current vertex.
+    vec4 local_p0_pos = vec4(local_p0, local_pos);
 
-    local_pos = clamp_rect(local_pos, layer.local_clip_rect);
+    // Clamp to the two local clip rects.
+    local_p0_pos = clamp(local_p0_pos, local_clip_p0.xyxy, local_clip_p1.xyxy);
+    local_p0_pos = clamp(local_p0_pos, layer_clip_p0.xyxy, layer_clip_p1.xyxy);
 
-    vec4 world_pos = layer.transform * vec4(local_pos, 0.0, 1.0);
+    // Transform the top corner and current vertex to world space.
+    vec4 world_p0 = layer.transform * vec4(local_p0_pos.xy, 0.0, 1.0);
+    world_p0.xyz /= world_p0.w;
+    vec4 world_pos = layer.transform * vec4(local_p0_pos.zw, 0.0, 1.0);
     world_pos.xyz /= world_pos.w;
 
-    vec2 device_pos = world_pos.xy * uDevicePixelRatio;
+    // Convert the world positions to device pixel space. xy=top left corner. zw=current vertex.
+    vec4 device_p0_pos = vec4(world_p0.xy, world_pos.xy) * uDevicePixelRatio;
+
+    // Calculate the distance to snap the vertex by (snap top left corner).
+    vec2 snap_delta = device_p0_pos.xy - floor(device_p0_pos.xy + 0.5);
 
-    vec2 clamped_pos = clamp(device_pos,
-                             tile.screen_origin_task_origin.xy,
-                             tile.screen_origin_task_origin.xy + tile.size_target_index.xy);
-
-    vec4 local_clamped_pos = layer.inv_transform * vec4(clamped_pos / uDevicePixelRatio, world_pos.z, 1);
-    local_clamped_pos.xyz /= local_clamped_pos.w;
-
-    vec2 final_pos = clamped_pos + tile.screen_origin_task_origin.zw - tile.screen_origin_task_origin.xy;
+    // Apply offsets for the render task to get correct screen location.
+    vec2 final_pos = device_p0_pos.zw -
+                     snap_delta -
+                     task.screen_space_origin +
+                     task.render_target_origin;
 
     gl_Position = uTransform * vec4(final_pos, z, 1.0);
 
-    VertexInfo vi = VertexInfo(Rect(p0, p1), local_clamped_pos.xy, clamped_pos.xy);
+    VertexInfo vi = VertexInfo(Rect(local_p0, local_p1), local_p0_pos.zw, device_p0_pos.zw);
     return vi;
 }
 
 #ifdef WR_FEATURE_TRANSFORM
 
 struct TransformVertexInfo {
     vec3 local_pos;
-    vec2 global_clamped_pos;
+    vec2 screen_pos;
     vec4 clipped_local_rect;
 };
 
 TransformVertexInfo write_transform_vertex(vec4 instance_rect,
                                            vec4 local_clip_rect,
                                            float z,
                                            Layer layer,
-                                           Tile tile) {
+                                           AlphaBatchTask task) {
     vec2 lp0_base = instance_rect.xy;
     vec2 lp1_base = instance_rect.xy + instance_rect.zw;
 
     vec2 lp0 = clamp_rect(clamp_rect(lp0_base, local_clip_rect),
                           layer.local_clip_rect);
     vec2 lp1 = clamp_rect(clamp_rect(lp1_base, local_clip_rect),
                           layer.local_clip_rect);
 
@@ -487,42 +503,31 @@ TransformVertexInfo write_transform_vert
     vec4 t3 = layer.transform * vec4(p3, 0, 1);
 
     vec2 tp0 = t0.xy / t0.w;
     vec2 tp1 = t1.xy / t1.w;
     vec2 tp2 = t2.xy / t2.w;
     vec2 tp3 = t3.xy / t3.w;
 
     // compute a CSS space aligned bounding box
-    vec2 min_pos = min(min(tp0.xy, tp1.xy), min(tp2.xy, tp3.xy));
-    vec2 max_pos = max(max(tp0.xy, tp1.xy), max(tp2.xy, tp3.xy));
-
-    // clamp to the tile boundaries, in device space
-    vec2 min_pos_clamped = clamp(min_pos * uDevicePixelRatio,
-                                 tile.screen_origin_task_origin.xy,
-                                 tile.screen_origin_task_origin.xy + tile.size_target_index.xy);
-
-    vec2 max_pos_clamped = clamp(max_pos * uDevicePixelRatio,
-                                 tile.screen_origin_task_origin.xy,
-                                 tile.screen_origin_task_origin.xy + tile.size_target_index.xy);
+    vec2 min_pos = uDevicePixelRatio * min(min(tp0.xy, tp1.xy), min(tp2.xy, tp3.xy));
+    vec2 max_pos = uDevicePixelRatio * max(max(tp0.xy, tp1.xy), max(tp2.xy, tp3.xy));
 
     // compute the device space position of this vertex
-    vec2 clamped_pos = mix(min_pos_clamped,
-                           max_pos_clamped,
-                           aPosition.xy);
+    vec2 device_pos = mix(min_pos, max_pos, aPosition.xy);
 
     // compute the point position in side the layer, in CSS space
-    vec4 layer_pos = get_layer_pos(clamped_pos / uDevicePixelRatio, layer);
+    vec4 layer_pos = get_layer_pos(device_pos / uDevicePixelRatio, layer);
 
     // apply the task offset
-    vec2 final_pos = clamped_pos + tile.screen_origin_task_origin.zw - tile.screen_origin_task_origin.xy;
+    vec2 final_pos = device_pos - task.screen_space_origin + task.render_target_origin;
 
     gl_Position = uTransform * vec4(final_pos, z, 1.0);
 
-    return TransformVertexInfo(layer_pos.xyw, clamped_pos, clipped_local_rect);
+    return TransformVertexInfo(layer_pos.xyw, device_pos, clipped_local_rect);
 }
 
 #endif //WR_FEATURE_TRANSFORM
 
 struct ResourceRect {
     vec4 uv_rect;
 };
 
--- a/gfx/webrender/res/ps_angle_gradient.fs.glsl
+++ b/gfx/webrender/res/ps_angle_gradient.fs.glsl
@@ -1,39 +1,21 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-float offset(int index) {
-    return vOffsets[index / 4][index % 4];
-}
-
-float linearStep(float lo, float hi, float x) {
-    float d = hi - lo;
-    float v = x - lo;
-    if (d != 0.0) {
-        v /= d;
-    }
-    return clamp(v, 0.0, 1.0);
-}
+uniform sampler2D sGradients;
 
 void main(void) {
-    float angle = atan(-vEndPoint.y + vStartPoint.y,
-                        vEndPoint.x - vStartPoint.x);
-    float sa = sin(angle);
-    float ca = cos(angle);
+    vec2 texture_size = vec2(textureSize(sGradients, 0));
 
-    float sx = vStartPoint.x * ca - vStartPoint.y * sa;
-    float ex = vEndPoint.x * ca - vEndPoint.y * sa;
-    float d = ex - sx;
-
-    float x = vPos.x * ca - vPos.y * sa;
+    // Either saturate or modulo the offset depending on repeat mode, then scale to number of
+    // gradient color entries (texture width / 2).
+    float x = mix(clamp(vOffset, 0.0, 1.0), fract(vOffset), vGradientRepeat) * 0.5 * texture_size.x;
 
-    oFragColor = mix(vColors[0],
-                     vColors[1],
-                     linearStep(sx + d * offset(0), sx + d * offset(1), x));
+    // Start at the center of first color in the nearest 2-color entry, then offset with the
+    // fractional remainder to interpolate between the colors. Rely on texture clamping when
+    // outside of valid range.
+    x = 2.0 * floor(x) + 0.5 + fract(x);
 
-    for (int i=1 ; i < vStopCount-1 ; ++i) {
-        oFragColor = mix(oFragColor,
-                         vColors[i+1],
-                         linearStep(sx + d * offset(i), sx + d * offset(i+1), x));
-    }
+    // Normalize the texture coordates so we can use texture() for bilinear filtering.
+    oFragColor = texture(sGradients, vec2(x, vGradientIndex) / texture_size);
 }
--- a/gfx/webrender/res/ps_angle_gradient.glsl
+++ b/gfx/webrender/res/ps_angle_gradient.glsl
@@ -1,11 +1,7 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-flat varying int vStopCount;
-flat varying float vAngle;
-flat varying vec2 vStartPoint;
-flat varying vec2 vEndPoint;
-varying vec2 vPos;
-flat varying vec4 vColors[MAX_STOPS_PER_ANGLE_GRADIENT];
-flat varying vec4 vOffsets[MAX_STOPS_PER_ANGLE_GRADIENT/4];
+flat varying float vGradientIndex;
+flat varying float vGradientRepeat;
+varying float vOffset;
--- a/gfx/webrender/res/ps_angle_gradient.vs.glsl
+++ b/gfx/webrender/res/ps_angle_gradient.vs.glsl
@@ -6,28 +6,29 @@
 void main(void) {
     Primitive prim = load_primitive();
     Gradient gradient = fetch_gradient(prim.prim_index);
 
     VertexInfo vi = write_vertex(prim.local_rect,
                                  prim.local_clip_rect,
                                  prim.z,
                                  prim.layer,
-                                 prim.tile);
-
-    vStopCount = int(prim.user_data.x);
-    vPos = vi.local_clamped_pos;
+                                 prim.task);
 
     // Snap the start/end points to device pixel units.
     // I'm not sure this is entirely correct, but the
     // old render path does this, and it is needed to
     // make the angle gradient ref tests pass. It might
     // be better to fix this higher up in DL construction
     // and not snap here?
-    vStartPoint = floor(0.5 + gradient.start_end_point.xy * uDevicePixelRatio) / uDevicePixelRatio;
-    vEndPoint = floor(0.5 + gradient.start_end_point.zw * uDevicePixelRatio) / uDevicePixelRatio;
+    vec2 start_point = floor(0.5 + gradient.start_end_point.xy * uDevicePixelRatio) / uDevicePixelRatio;
+    vec2 end_point = floor(0.5 + gradient.start_end_point.zw * uDevicePixelRatio) / uDevicePixelRatio;
+
+    vec2 dir = end_point - start_point;
+    // Normalized offset of this vertex within the gradient, before clamp/repeat.
+    vOffset = dot(vi.local_pos - start_point, dir) / dot(dir, dir);
 
-    for (int i=0 ; i < vStopCount ; ++i) {
-        GradientStop stop = fetch_gradient_stop(prim.sub_index + i);
-        vColors[i] = stop.color;
-        vOffsets[i/4][i%4] = stop.offset.x;
-    }
+    // V coordinate of gradient row in lookup texture.
+    vGradientIndex = float(prim.sub_index) + 0.5;
+
+    // Whether to repeat the gradient instead of clamping.
+    vGradientRepeat = float(int(gradient.extend_mode.x) == EXTEND_MODE_REPEAT);
 }
--- a/gfx/webrender/res/ps_blend.vs.glsl
+++ b/gfx/webrender/res/ps_blend.vs.glsl
@@ -1,46 +1,28 @@
 #line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-struct Blend {
-    ivec4 src_id_target_id_op_amount;
-    int z;
-};
-
-Blend fetch_blend() {
+void main(void) {
     PrimitiveInstance pi = fetch_prim_instance();
+    AlphaBatchTask dest_task = fetch_alpha_batch_task(pi.render_task_index);
+    AlphaBatchTask src_task = fetch_alpha_batch_task(pi.user_data.x);
 
-    Blend blend;
-    blend.src_id_target_id_op_amount = ivec4(pi.user_data.x,
-                                             pi.render_task_index,
-                                             pi.sub_index,
-                                             pi.user_data.y);
-    blend.z = pi.z;
-
-    return blend;
-}
-
-void main(void) {
-    Blend blend = fetch_blend();
-    Tile src = fetch_tile(blend.src_id_target_id_op_amount.x);
-    Tile dest = fetch_tile(blend.src_id_target_id_op_amount.y);
-
-    vec2 dest_origin = dest.screen_origin_task_origin.zw -
-                       dest.screen_origin_task_origin.xy +
-                       src.screen_origin_task_origin.xy;
+    vec2 dest_origin = dest_task.render_target_origin -
+                       dest_task.screen_space_origin +
+                       src_task.screen_space_origin;
 
     vec2 local_pos = mix(dest_origin,
-                         dest_origin + src.size_target_index.xy,
+                         dest_origin + src_task.size,
                          aPosition.xy);
 
     vec2 texture_size = vec2(textureSize(sCache, 0));
-    vec2 st0 = src.screen_origin_task_origin.zw / texture_size;
-    vec2 st1 = (src.screen_origin_task_origin.zw + src.size_target_index.xy) / texture_size;
-    vUv = vec3(mix(st0, st1, aPosition.xy), src.size_target_index.z);
+    vec2 st0 = src_task.render_target_origin / texture_size;
+    vec2 st1 = (src_task.render_target_origin + src_task.size) / texture_size;
+    vUv = vec3(mix(st0, st1, aPosition.xy), src_task.render_target_layer_index);
 
-    vOp = blend.src_id_target_id_op_amount.z;
-    vAmount = float(blend.src_id_target_id_op_amount.w) / 65535.0;
+    vOp = pi.sub_index;
+    vAmount = float(pi.user_data.y) / 65535.0;
 
-    gl_Position = uTransform * vec4(local_pos, blend.z, 1.0);
+    gl_Position = uTransform * vec4(local_pos, pi.z, 1.0);
 }
--- a/gfx/webrender/res/ps_border.vs.glsl
+++ b/gfx/webrender/res/ps_border.vs.glsl
@@ -133,28 +133,28 @@ void main(void) {
             break;
     }
 
 #ifdef WR_FEATURE_TRANSFORM
     TransformVertexInfo vi = write_transform_vertex(segment_rect,
                                                     prim.local_clip_rect,
                                                     prim.z,
                                                     prim.layer,
-                                                    prim.tile);
+                                                    prim.task);
     vLocalPos = vi.local_pos;
 
     // Local space
     vLocalRect = vi.clipped_local_rect;
 #else
     VertexInfo vi = write_vertex(segment_rect,
                                  prim.local_clip_rect,
                                  prim.z,
                                  prim.layer,
-                                 prim.tile);
-    vLocalPos = vi.local_clamped_pos.xy;
+                                 prim.task);
+    vLocalPos = vi.local_pos.xy;
 
     // Local space
     vLocalRect = prim.local_rect;
 #endif
 
     float x0, y0, x1, y1;
     switch (sub_part) {
         // These are the layer tile part PrimitivePart as uploaded by the tiling.rs
@@ -212,15 +212,15 @@ void main(void) {
 
     // The fragment shader needs to calculate the distance from the bisecting line
     // to properly mix border colors. For transformed borders, we calculate this distance
     // in the fragment shader itself. For non-transformed borders, we can use the
     // interpolator.
 #ifdef WR_FEATURE_TRANSFORM
     vPieceRectHypotenuseLength = sqrt(pow(width, 2.0) + pow(height, 2.0));
 #else
-    vDistanceFromMixLine = (vi.local_clamped_pos.x - x0) * height -
-                           (vi.local_clamped_pos.y - y0) * width;
-    vDistanceFromMiddle = (vi.local_clamped_pos.x - vLocalRect.x) +
-                          (vi.local_clamped_pos.y - vLocalRect.y) -
+    vDistanceFromMixLine = (vi.local_pos.x - x0) * height -
+                           (vi.local_pos.y - y0) * width;
+    vDistanceFromMiddle = (vi.local_pos.x - vLocalRect.x) +
+                          (vi.local_pos.y - vLocalRect.y) -
                           0.5 * (vLocalRect.z + vLocalRect.w);
 #endif
 }
--- a/gfx/webrender/res/ps_box_shadow.vs.glsl
+++ b/gfx/webrender/res/ps_box_shadow.vs.glsl
@@ -7,26 +7,26 @@ void main(void) {
     Primitive prim = load_primitive();
     BoxShadow bs = fetch_boxshadow(prim.prim_index);
     vec4 segment_rect = fetch_instance_geometry(prim.sub_index);
 
     VertexInfo vi = write_vertex(segment_rect,
                                  prim.local_clip_rect,
                                  prim.z,
                                  prim.layer,
-                                 prim.tile);
+                                 prim.task);
 
     RenderTaskData child_task = fetch_render_task(prim.user_data.x);
     vUv.z = child_task.data1.x;
 
     // Constant offsets to inset from bilinear filtering border.
     vec2 patch_origin = child_task.data0.xy + vec2(1.0);
     vec2 patch_size_device_pixels = child_task.data0.zw - vec2(2.0);
     vec2 patch_size = patch_size_device_pixels / uDevicePixelRatio;
 
-    vUv.xy = (vi.local_clamped_pos - prim.local_rect.xy) / patch_size;
+    vUv.xy = (vi.local_pos - prim.local_rect.xy) / patch_size;
     vMirrorPoint = 0.5 * prim.local_rect.zw / patch_size;
 
     vec2 texture_size = vec2(textureSize(sCache, 0));
     vCacheUvRectCoords = vec4(patch_origin, patch_origin + patch_size_device_pixels) / texture_size.xyxy;
 
     vColor = bs.color;
 }
--- a/gfx/webrender/res/ps_cache_image.vs.glsl
+++ b/gfx/webrender/res/ps_cache_image.vs.glsl
@@ -8,21 +8,21 @@
 
 void main(void) {
     Primitive prim = load_primitive();
 
     VertexInfo vi = write_vertex(prim.local_rect,
                                  prim.local_clip_rect,
                                  prim.z,
                                  prim.layer,
-                                 prim.tile);
+                                 prim.task);
 
     RenderTaskData child_task = fetch_render_task(prim.user_data.x);
     vUv.z = child_task.data1.x;
 
     vec2 texture_size = vec2(textureSize(sCache, 0));
     vec2 uv0 = child_task.data0.xy / texture_size;
     vec2 uv1 = (child_task.data0.xy + child_task.data0.zw) / texture_size;
 
-    vec2 f = (vi.local_clamped_pos - prim.local_rect.xy) / prim.local_rect.zw;
+    vec2 f = (vi.local_pos - prim.local_rect.xy) / prim.local_rect.zw;
 
     vUv.xy = mix(uv0, uv1, f);
 }
--- a/gfx/webrender/res/ps_composite.fs.glsl
+++ b/gfx/webrender/res/ps_composite.fs.glsl
@@ -148,40 +148,39 @@ vec3 Color(vec3 Cb, vec3 Cs) {
 }
 
 vec3 Luminosity(vec3 Cb, vec3 Cs) {
     return SetLum(Cb, Lum(Cs));
 }
 
 void main(void) {
     vec4 Cb = texture(sCache, vUv0);
-
-    if (vUv1.x < vUv1Rect.x ||
-        vUv1.x > vUv1Rect.z ||
-        vUv1.y < vUv1Rect.y ||
-        vUv1.y > vUv1Rect.w) {
-        oFragColor = Cb;
-        return;
-    }
-
     vec4 Cs = texture(sCache, vUv1);
 
     // Return yellow if none of the branches match (shouldn't happen).
     vec4 result = vec4(1.0, 1.0, 0.0, 1.0);
 
     switch (vOp) {
         case 1:
             result.rgb = Multiply(Cb.rgb, Cs.rgb);
             break;
         case 2:
             result.rgb = Screen(Cb.rgb, Cs.rgb);
             break;
         case 3:
             result.rgb = HardLight(Cs.rgb, Cb.rgb);        // Overlay is inverse of Hardlight
             break;
+        case 4:
+            // mix-blend-mode: darken
+            result.rgb = min(Cs.rgb, Cb.rgb);
+            break;
+        case 5:
+            // mix-blend-mode: lighten
+            result.rgb = max(Cs.rgb, Cb.rgb);
+            break;
         case 6:
             result.r = ColorDodge(Cb.r, Cs.r);
             result.g = ColorDodge(Cb.g, Cs.g);
             result.b = ColorDodge(Cb.b, Cs.b);
             break;
         case 7:
             result.r = ColorBurn(Cb.r, Cs.r);
             result.g = ColorBurn(Cb.g, Cs.g);
--- a/gfx/webrender/res/ps_composite.glsl
+++ b/gfx/webrender/res/ps_composite.glsl
@@ -1,8 +1,7 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 varying vec3 vUv0;
 varying vec3 vUv1;
-flat varying vec4 vUv1Rect;
 flat varying int vOp;
--- a/gfx/webrender/res/ps_composite.vs.glsl
+++ b/gfx/webrender/res/ps_composite.vs.glsl
@@ -1,50 +1,34 @@
 #line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-struct Composite {
-    ivec4 src0_src1_target_id_op;
-    int z;
-};
-
-Composite fetch_composite() {
+void main(void) {
     PrimitiveInstance pi = fetch_prim_instance();
+    AlphaBatchTask dest_task = fetch_alpha_batch_task(pi.render_task_index);
+    AlphaBatchTask backdrop_task = fetch_alpha_batch_task(pi.user_data.x);
+    AlphaBatchTask src_task = fetch_alpha_batch_task(pi.user_data.y);
 
-    Composite composite;
-    composite.src0_src1_target_id_op = ivec4(pi.user_data.xy,
-                                             pi.render_task_index,
-                                             pi.sub_index);
-    composite.z = pi.z;
-
-    return composite;
-}
+    vec2 dest_origin = dest_task.render_target_origin -
+                       dest_task.screen_space_origin +
+                       src_task.screen_space_origin;
 
-void main(void) {
-    Composite composite = fetch_composite();
-    Tile src0 = fetch_tile(composite.src0_src1_target_id_op.x);
-    Tile src1 = fetch_tile(composite.src0_src1_target_id_op.y);
-    Tile dest = fetch_tile(composite.src0_src1_target_id_op.z);
-
-    vec2 local_pos = mix(dest.screen_origin_task_origin.zw,
-                         dest.screen_origin_task_origin.zw + dest.size_target_index.xy,
+    vec2 local_pos = mix(dest_origin,
+                         dest_origin + src_task.size,
                          aPosition.xy);
 
     vec2 texture_size = vec2(textureSize(sCache, 0));
-    vec2 st0 = src0.screen_origin_task_origin.zw / texture_size;
-    vec2 st1 = (src0.screen_origin_task_origin.zw + src0.size_target_index.xy) / texture_size;
-    vUv0 = vec3(mix(st0, st1, aPosition.xy), src0.size_target_index.z);
+
+    vec2 st0 = (backdrop_task.render_target_origin + vec2(0.0, backdrop_task.size.y)) / texture_size;
+    vec2 st1 = (backdrop_task.render_target_origin + vec2(backdrop_task.size.x, 0.0)) / texture_size;
+    vUv0 = vec3(mix(st0, st1, aPosition.xy), backdrop_task.render_target_layer_index);
 
-    st0 = vec2(src1.screen_origin_task_origin.zw) / texture_size;
-    st1 = vec2(src1.screen_origin_task_origin.zw + src1.size_target_index.xy) / texture_size;
-    vec2 local_virtual_pos = mix(dest.screen_origin_task_origin.xy,
-                                 dest.screen_origin_task_origin.xy + dest.size_target_index.xy,
-                                 aPosition.xy);
-    vec2 f = (local_virtual_pos - src1.screen_origin_task_origin.xy) / src1.size_target_index.xy;
-    vUv1 = vec3(mix(st0, st1, f), src1.size_target_index.z);
-    vUv1Rect = vec4(st0, st1);
+    st0 = src_task.render_target_origin / texture_size;
+    st1 = (src_task.render_target_origin + src_task.size) / texture_size;
+    vUv1 = vec3(mix(st0, st1, aPosition.xy), src_task.render_target_layer_index);
 
-    vOp = composite.src0_src1_target_id_op.w;
+    vOp = pi.sub_index;
 
-    gl_Position = uTransform * vec4(local_pos, composite.z, 1.0);
+    gl_Position = uTransform * vec4(local_pos, pi.z, 1.0);
+
 }
--- a/gfx/webrender/res/ps_gradient.vs.glsl
+++ b/gfx/webrender/res/ps_gradient.vs.glsl
@@ -6,67 +6,57 @@
 void main(void) {
     Primitive prim = load_primitive();
     Gradient gradient = fetch_gradient(prim.prim_index);
 
     GradientStop g0 = fetch_gradient_stop(prim.sub_index + 0);
     GradientStop g1 = fetch_gradient_stop(prim.sub_index + 1);
 
     vec4 segment_rect;
-    switch (int(gradient.kind.x)) {
-        case GRADIENT_HORIZONTAL: {
-            float x0 = mix(gradient.start_end_point.x,
-                           gradient.start_end_point.z,
-                           g0.offset.x);
-            float x1 = mix(gradient.start_end_point.x,
-                           gradient.start_end_point.z,
-                           g1.offset.x);
-            segment_rect.yw = prim.local_rect.yw;
-            segment_rect.x = x0;
-            segment_rect.z = x1 - x0;
-            } break;
-        case GRADIENT_VERTICAL: {
-            float y0 = mix(gradient.start_end_point.y,
-                           gradient.start_end_point.w,
-                           g0.offset.x);
-            float y1 = mix(gradient.start_end_point.y,
-                           gradient.start_end_point.w,
-                           g1.offset.x);
-            segment_rect.xz = prim.local_rect.xz;
-            segment_rect.y = y0;
-            segment_rect.w = y1 - y0;
-            } break;
+    vec2 axis;
+    if (gradient.start_end_point.y == gradient.start_end_point.w) {
+        float x0 = mix(gradient.start_end_point.x,
+                       gradient.start_end_point.z,
+                       g0.offset.x);
+        float x1 = mix(gradient.start_end_point.x,
+                       gradient.start_end_point.z,
+                       g1.offset.x);
+        segment_rect.yw = prim.local_rect.yw;
+        segment_rect.x = x0;
+        segment_rect.z = x1 - x0;
+        axis = vec2(1.0, 0.0);
+    } else {
+        float y0 = mix(gradient.start_end_point.y,
+                       gradient.start_end_point.w,
+                       g0.offset.x);
+        float y1 = mix(gradient.start_end_point.y,
+                       gradient.start_end_point.w,
+                       g1.offset.x);
+        segment_rect.xz = prim.local_rect.xz;
+        segment_rect.y = y0;
+        segment_rect.w = y1 - y0;
+        axis = vec2(0.0, 1.0);
     }
 
 #ifdef WR_FEATURE_TRANSFORM
     TransformVertexInfo vi = write_transform_vertex(segment_rect,
                                                     prim.local_clip_rect,
                                                     prim.z,
                                                     prim.layer,
-                                                    prim.tile);
+                                                    prim.task);
     vLocalRect = vi.clipped_local_rect;
     vLocalPos = vi.local_pos;
     vec2 f = (vi.local_pos.xy - prim.local_rect.xy) / prim.local_rect.zw;
 #else
     VertexInfo vi = write_vertex(segment_rect,
                                  prim.local_clip_rect,
                                  prim.z,
                                  prim.layer,
-                                 prim.tile);
+                                 prim.task);
 
-    vec2 f = (vi.local_clamped_pos - segment_rect.xy) / segment_rect.zw;
-    vPos = vi.local_clamped_pos;
+    vec2 f = (vi.local_pos - segment_rect.xy) / segment_rect.zw;
+    vPos = vi.local_pos;
 #endif
 
-    write_clip(vi.global_clamped_pos, prim.clip_area);
+    write_clip(vi.screen_pos, prim.clip_area);
 
-    switch (int(gradient.kind.x)) {
-        case GRADIENT_HORIZONTAL:
-            vColor = mix(g0.color, g1.color, f.x);
-            break;
-        case GRADIENT_VERTICAL:
-            vColor = mix(g0.color, g1.color, f.y);
-            break;
-        case GRADIENT_ROTATED:
-            vColor = vec4(1.0, 0.0, 1.0, 1.0);
-            break;
-    }
+    vColor = mix(g0.color, g1.color, dot(f, axis));
 }
copy from gfx/webrender/res/ps_composite.glsl
copy to gfx/webrender/res/ps_hardware_composite.fs.glsl
--- a/gfx/webrender/res/ps_composite.glsl
+++ b/gfx/webrender/res/ps_hardware_composite.fs.glsl
@@ -1,8 +1,7 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-varying vec3 vUv0;
-varying vec3 vUv1;
-flat varying vec4 vUv1Rect;
-flat varying int vOp;
+void main(void) {
+    oFragColor = texture(sCache, vUv);
+}
copy from gfx/webrender/res/ps_composite.glsl
copy to gfx/webrender/res/ps_hardware_composite.glsl
--- a/gfx/webrender/res/ps_composite.glsl
+++ b/gfx/webrender/res/ps_hardware_composite.glsl
@@ -1,8 +1,5 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-varying vec3 vUv0;
-varying vec3 vUv1;
-flat varying vec4 vUv1Rect;
-flat varying int vOp;
+varying vec3 vUv;
new file mode 100644
--- /dev/null
+++ b/gfx/webrender/res/ps_hardware_composite.vs.glsl
@@ -0,0 +1,25 @@
+#line 1
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+void main(void) {
+    PrimitiveInstance pi = fetch_prim_instance();
+    AlphaBatchTask dest_task = fetch_alpha_batch_task(pi.render_task_index);
+    AlphaBatchTask src_task = fetch_alpha_batch_task(pi.user_data.x);
+
+    vec2 dest_origin = dest_task.render_target_origin -
+                       dest_task.screen_space_origin +
+                       src_task.screen_space_origin;
+
+    vec2 local_pos = mix(dest_origin,
+                         dest_origin + src_task.size,
+                         aPosition.xy);
+
+    vec2 texture_size = vec2(textureSize(sCache, 0));
+    vec2 st0 = src_task.render_target_origin / texture_size;
+    vec2 st1 = (src_task.render_target_origin + src_task.size) / texture_size;
+    vUv = vec3(mix(st0, st1, aPosition.xy), src_task.render_target_layer_index);
+
+    gl_Position = uTransform * vec4(local_pos, pi.z, 1.0);
+}
--- a/gfx/webrender/res/ps_image.fs.glsl
+++ b/gfx/webrender/res/ps_image.fs.glsl
@@ -19,13 +19,17 @@ void main(void) {
 #endif
 
     alpha = min(alpha, do_clip());
 
     // We calculate the particular tile this fragment belongs to, taking into
     // account the spacing in between tiles. We only paint if our fragment does
     // not fall into that spacing.
     vec2 position_in_tile = mod(relative_pos_in_rect, vStretchSize + vTileSpacing);
+    // We clamp the texture coordinates to the half-pixel offset from the borders
+    // in order to avoid sampling outside of the texture area.
     vec2 st = vTextureOffset + ((position_in_tile / vStretchSize) * vTextureSize);
+    st = clamp(st, vStRect.xy, vStRect.zw);
+
     alpha = alpha * float(all(bvec2(step(position_in_tile, vStretchSize))));
 
-    oFragColor = vec4(1.0, 1.0, 1.0, alpha) * texture(sColor0, st);
+    oFragColor = vec4(1.0, 1.0, 1.0, alpha) * textureLod(sColor0, st, 0.0);
 }
--- a/gfx/webrender/res/ps_image.glsl
+++ b/gfx/webrender/res/ps_image.glsl
@@ -1,15 +1,16 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 flat varying vec2 vTextureOffset; // Offset of this image into the texture atlas.
 flat varying vec2 vTextureSize;   // Size of the image in the texture atlas.
 flat varying vec2 vTileSpacing;   // Amount of space between tiled instances of this image.
+flat varying vec4 vStRect;     	  // Rectangle of valid texture rect, in st-space.
 
 #ifdef WR_FEATURE_TRANSFORM
 varying vec3 vLocalPos;
 flat varying vec4 vLocalRect;
 flat varying vec2 vStretchSize;
 #else
 varying vec2 vLocalPos;
 flat varying vec2 vStretchSize;
--- a/gfx/webrender/res/ps_image.vs.glsl
+++ b/gfx/webrender/res/ps_image.vs.glsl
@@ -8,32 +8,35 @@ void main(void) {
     Image image = fetch_image(prim.prim_index);
     ResourceRect res = fetch_resource_rect(prim.user_data.x);
 
 #ifdef WR_FEATURE_TRANSFORM
     TransformVertexInfo vi = write_transform_vertex(prim.local_rect,
                                                     prim.local_clip_rect,
                                                     prim.z,
                                                     prim.layer,
-                                                    prim.tile);
+                                                    prim.task);
     vLocalRect = vi.clipped_local_rect;
     vLocalPos = vi.local_pos;
 #else
     VertexInfo vi = write_vertex(prim.local_rect,
                                  prim.local_clip_rect,
                                  prim.z,
                                  prim.layer,
-                                 prim.tile);
-    vLocalPos = vi.local_clamped_pos - vi.local_rect.p0;
+                                 prim.task);
+    vLocalPos = vi.local_pos - vi.local_rect.p0;
 #endif
 
-    write_clip(vi.global_clamped_pos, prim.clip_area);
+    write_clip(vi.screen_pos, prim.clip_area);
 
     // vUv will contain how many times this image has wrapped around the image size.
     vec2 texture_size = vec2(textureSize(sColor0, 0));
     vec2 st0 = res.uv_rect.xy / texture_size;
     vec2 st1 = res.uv_rect.zw / texture_size;
 
     vTextureSize = st1 - st0;
     vTextureOffset = st0;
     vTileSpacing = image.stretch_size_and_tile_spacing.zw;
     vStretchSize = image.stretch_size_and_tile_spacing.xy;
+
+    vec2 half_texel = vec2(0.5) / texture_size;
+    vStRect = vec4(min(st0, st1) + half_texel, max(st0, st1) - half_texel);
 }
--- a/gfx/webrender/res/ps_radial_gradient.fs.glsl
+++ b/gfx/webrender/res/ps_radial_gradient.fs.glsl
@@ -1,24 +1,13 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-float offset(int index) {
-    return vOffsets[index / 4][index % 4];
-}
-
-float linearStep(float lo, float hi, float x) {
-    float d = hi - lo;
-    float v = x - lo;
-    if (d != 0.0) {
-        v /= d;
-    }
-    return clamp(v, 0.0, 1.0);
-}
+uniform sampler2D sGradients;
 
 void main(void) {
     vec2 cd = vEndCenter - vStartCenter;
     vec2 pd = vPos - vStartCenter;
     float rd = vEndRadius - vStartRadius;
 
     // Solve for t in length(t * cd - pd) = vStartRadius + t * rd
     // using a quadratic equation in form of At^2 - 2Bt + C = 0
@@ -50,18 +39,22 @@ void main(void) {
             x = t0;
         } else if (vStartRadius + rd * t1 >= 0.0) {
             x = t1;
         } else {
             discard;
         }
     }
 
-    oFragColor = mix(vColors[0],
-                     vColors[1],
-                     linearStep(offset(0), offset(1), x));
+    vec2 texture_size = vec2(textureSize(sGradients, 0));
+
+    // Either saturate or modulo the offset depending on repeat mode, then scale to number of
+    // gradient color entries (texture width / 2).
+    x = mix(clamp(x, 0.0, 1.0), fract(x), vGradientRepeat) * 0.5 * texture_size.x;
 
-    for (int i=1 ; i < vStopCount-1 ; ++i) {
-        oFragColor = mix(oFragColor,
-                         vColors[i+1],
-                         linearStep(offset(i), offset(i+1), x));
-    }
+    // Start at the center of first color in the nearest 2-color entry, then offset with the
+    // fractional remainder to interpolate between the colors. Rely on texture clamping when
+    // outside of valid range.
+    x = 2.0 * floor(x) + 0.5 + fract(x);
+
+    // Normalize the texture coordates so we can use texture() for bilinear filtering.
+    oFragColor = texture(sGradients, vec2(x, vGradientIndex) / texture_size);
 }
--- a/gfx/webrender/res/ps_radial_gradient.glsl
+++ b/gfx/webrender/res/ps_radial_gradient.glsl
@@ -1,12 +1,11 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-flat varying int vStopCount;
+flat varying float vGradientIndex;
+flat varying float vGradientRepeat;
 flat varying vec2 vStartCenter;
 flat varying vec2 vEndCenter;
 flat varying float vStartRadius;
 flat varying float vEndRadius;
 varying vec2 vPos;
-flat varying vec4 vColors[MAX_STOPS_PER_RADIAL_GRADIENT];
-flat varying vec4 vOffsets[MAX_STOPS_PER_RADIAL_GRADIENT/4];
--- a/gfx/webrender/res/ps_radial_gradient.vs.glsl
+++ b/gfx/webrender/res/ps_radial_gradient.vs.glsl
@@ -6,30 +6,29 @@
 void main(void) {
     Primitive prim = load_primitive();
     RadialGradient gradient = fetch_radial_gradient(prim.prim_index);
 
     VertexInfo vi = write_vertex(prim.local_rect,
                                  prim.local_clip_rect,
                                  prim.z,
                                  prim.layer,
-                                 prim.tile);
+                                 prim.task);
 
-    vStopCount = int(prim.user_data.x);
-    vPos = vi.local_clamped_pos;
+    vPos = vi.local_pos;
 
     // Snap the start/end points to device pixel units.
     // I'm not sure this is entirely correct, but the
     // old render path does this, and it is needed to
     // make the angle gradient ref tests pass. It might
     // be better to fix this higher up in DL construction
     // and not snap here?
     vStartCenter = floor(0.5 + gradient.start_end_center.xy * uDevicePixelRatio) / uDevicePixelRatio;
     vEndCenter = floor(0.5 + gradient.start_end_center.zw * uDevicePixelRatio) / uDevicePixelRatio;
-    vStartRadius = gradient.start_end_radius.x;
-    vEndRadius = gradient.start_end_radius.y;
+    vStartRadius = gradient.start_end_radius_extend_mode.x;
+    vEndRadius = gradient.start_end_radius_extend_mode.y;
 
-    for (int i=0 ; i < vStopCount ; ++i) {
-        GradientStop stop = fetch_gradient_stop(prim.sub_index + i);
-        vColors[i] = stop.color;
-        vOffsets[i/4][i%4] = stop.offset.x;
-    }
+    // V coordinate of gradient row in lookup texture.
+    vGradientIndex = float(prim.sub_index) + 0.5;
+
+    // Whether to repeat the gradient instead of clamping.
+    vGradientRepeat = float(int(gradient.start_end_radius_extend_mode.z) == EXTEND_MODE_REPEAT);
 }
--- a/gfx/webrender/res/ps_rectangle.vs.glsl
+++ b/gfx/webrender/res/ps_rectangle.vs.glsl
@@ -7,23 +7,23 @@ void main(void) {
     Primitive prim = load_primitive();
     Rectangle rect = fetch_rectangle(prim.prim_index);
     vColor = rect.color;
 #ifdef WR_FEATURE_TRANSFORM
     TransformVertexInfo vi = write_transform_vertex(prim.local_rect,
                                                     prim.local_clip_rect,
                                                     prim.z,
                                                     prim.layer,
-                                                    prim.tile);
+                                                    prim.task);
     vLocalRect = vi.clipped_local_rect;
     vLocalPos = vi.local_pos;
 #else
     VertexInfo vi = write_vertex(prim.local_rect,
                                  prim.local_clip_rect,
                                  prim.z,
                                  prim.layer,
-                                 prim.tile);
+                                 prim.task);
 #endif
 
 #ifdef WR_FEATURE_CLIP
-    write_clip(vi.global_clamped_pos, prim.clip_area);
+    write_clip(vi.screen_pos, prim.clip_area);
 #endif
 }
--- a/gfx/webrender/res/ps_text_run.fs.glsl
+++ b/gfx/webrender/res/ps_text_run.fs.glsl
@@ -1,18 +1,19 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void main(void) {
+    vec2 tc = clamp(vUv, vUvBorder.xy, vUvBorder.zw);
 #ifdef WR_FEATURE_SUBPIXEL_AA
     //note: the blend mode is not compatible with clipping
-    oFragColor = texture(sColor0, vUv);
+    oFragColor = texture(sColor0, tc);
 #else
-    float alpha = texture(sColor0, vUv).a;
+    float alpha = texture(sColor0, tc).a;
 #ifdef WR_FEATURE_TRANSFORM
     float a = 0.0;
     init_transform_fs(vLocalPos, vLocalRect, a);
     alpha *= a;
 #endif
     vec4 color = vColor;
     alpha = min(alpha, do_clip());
     oFragColor = vec4(vColor.rgb, vColor.a * alpha);
--- a/gfx/webrender/res/ps_text_run.glsl
+++ b/gfx/webrender/res/ps_text_run.glsl
@@ -1,11 +1,12 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 flat varying vec4 vColor;
 varying vec2 vUv;
+flat varying vec4 vUvBorder;
 
 #ifdef WR_FEATURE_TRANSFORM
 varying vec3 vLocalPos;
 flat varying vec4 vLocalRect;
 #endif
--- a/gfx/webrender/res/ps_text_run.vs.glsl
+++ b/gfx/webrender/res/ps_text_run.vs.glsl
@@ -11,30 +11,31 @@ void main(void) {
 
     vec4 local_rect = vec4(glyph.offset.xy, (res.uv_rect.zw - res.uv_rect.xy) / uDevicePixelRatio);
 
 #ifdef WR_FEATURE_TRANSFORM
     TransformVertexInfo vi = write_transform_vertex(local_rect,
                                                     prim.local_clip_rect,
                                                     prim.z,
                                                     prim.layer,
-                                                    prim.tile);
+                                                    prim.task);
     vLocalRect = vi.clipped_local_rect;
     vLocalPos = vi.local_pos;
     vec2 f = (vi.local_pos.xy / vi.local_pos.z - local_rect.xy) / local_rect.zw;
 #else
     VertexInfo vi = write_vertex(local_rect,
                                  prim.local_clip_rect,
                                  prim.z,
                                  prim.layer,
-                                 prim.tile);
-    vec2 f = (vi.local_clamped_pos - vi.local_rect.p0) / (vi.local_rect.p1 - vi.local_rect.p0);
+                                 prim.task);
+    vec2 f = (vi.local_pos - vi.local_rect.p0) / (vi.local_rect.p1 - vi.local_rect.p0);
 #endif
 
-    write_clip(vi.global_clamped_pos, prim.clip_area);
+    write_clip(vi.screen_pos, prim.clip_area);
 
     vec2 texture_size = vec2(textureSize(sColor0, 0));
     vec2 st0 = res.uv_rect.xy / texture_size;
     vec2 st1 = res.uv_rect.zw / texture_size;
 
     vColor = text.color;
     vUv = mix(st0, st1, f);
+    vUvBorder = (res.uv_rect + vec4(0.5, 0.5, -0.5, -0.5)) / texture_size.xyxy;
 }
--- a/gfx/webrender/res/ps_yuv_image.fs.glsl
+++ b/gfx/webrender/res/ps_yuv_image.fs.glsl
@@ -14,20 +14,27 @@ void main(void) {
          clamp(pos, vLocalRect.xy, vLocalRect.xy + vLocalRect.zw) - vLocalRect.xy;
 #else
     float alpha = 1.0;;
     vec2 relative_pos_in_rect = vLocalPos;
 #endif
 
     alpha = min(alpha, do_clip());
 
-    vec2 st_y = vTextureOffsetY + relative_pos_in_rect / vStretchSize * vTextureSizeY;
-    vec2 st_u = vTextureOffsetU + relative_pos_in_rect / vStretchSize * vTextureSizeUv;
-    vec2 st_v = vTextureOffsetV + relative_pos_in_rect / vStretchSize * vTextureSizeUv;
+    // We clamp the texture coordinates to the half-pixel offset from the borders
+    // in order to avoid sampling outside of the texture area.
+    vec2 st_y = vTextureOffsetY + clamp(
+        relative_pos_in_rect / vStretchSize * vTextureSizeY,
+        vHalfTexelY, vTextureSizeY - vHalfTexelY);
+    vec2 uv_offset = clamp(
+        relative_pos_in_rect / vStretchSize * vTextureSizeUv,
+        vHalfTexelUv, vTextureSizeUv - vHalfTexelUv);
+    vec2 st_u = vTextureOffsetU + uv_offset;
+    vec2 st_v = vTextureOffsetV + uv_offset;
 
-    float y = texture(sColor0, st_y).r;
-    float u = texture(sColor1, st_u).r;
-    float v = texture(sColor2, st_v).r;
+    float y = textureLod(sColor0, st_y, 0.0).r;
+    float u = textureLod(sColor1, st_u, 0.0).r;
+    float v = textureLod(sColor2, st_v, 0.0).r;
 
     // See the vertex shader for an explanation of where the constants come from.
     vec3 rgb = vYuvColorMatrix * vec3(y - 0.06275, u - 0.50196, v - 0.50196);
     oFragColor = vec4(rgb, alpha);
 }
--- a/gfx/webrender/res/ps_yuv_image.glsl
+++ b/gfx/webrender/res/ps_yuv_image.glsl
@@ -3,16 +3,18 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 flat varying vec2 vTextureOffsetY; // Offset of the y plane into the texture atlas.
 flat varying vec2 vTextureOffsetU; // Offset of the u plane into the texture atlas.
 flat varying vec2 vTextureOffsetV; // Offset of the v plane into the texture atlas.
 flat varying vec2 vTextureSizeY;   // Size of the y plane in the texture atlas.
 flat varying vec2 vTextureSizeUv;  // Size of the u and v planes in the texture atlas.
 flat varying vec2 vStretchSize;
+flat varying vec2 vHalfTexelY;     // Normalized length of the half of a Y texel.
+flat varying vec2 vHalfTexelUv;    // Normalized length of the half of u and v texels.
 
 flat varying mat3 vYuvColorMatrix;
 
 #ifdef WR_FEATURE_TRANSFORM
 varying vec3 vLocalPos;
 flat varying vec4 vLocalRect;
 #else
 varying vec2 vLocalPos;
--- a/gfx/webrender/res/ps_yuv_image.vs.glsl
+++ b/gfx/webrender/res/ps_yuv_image.vs.glsl
@@ -5,26 +5,26 @@
 
 void main(void) {
     Primitive prim = load_primitive();
 #ifdef WR_FEATURE_TRANSFORM
     TransformVertexInfo vi = write_transform_vertex(prim.local_rect,
                                                     prim.local_clip_rect,
                                                     prim.z,
                                                     prim.layer,
-                                                    prim.tile);
+                                                    prim.task);
     vLocalRect = vi.clipped_local_rect;
     vLocalPos = vi.local_pos;
 #else
     VertexInfo vi = write_vertex(prim.local_rect,
                                  prim.local_clip_rect,
                                  prim.z,
                                  prim.layer,
-                                 prim.tile);
-    vLocalPos = vi.local_clamped_pos - vi.local_rect.p0;
+                                 prim.task);
+    vLocalPos = vi.local_pos - vi.local_rect.p0;
 #endif
 
     YuvImage image = fetch_yuv_image(prim.prim_index);
 
     vec2 y_texture_size = vec2(textureSize(sColor0, 0));
     vec2 y_st0 = image.y_st_rect.xy / y_texture_size;
     vec2 y_st1 = image.y_st_rect.zw / y_texture_size;
 
@@ -40,16 +40,19 @@ void main(void) {
 
     // This assumes the U and V surfaces have the same size.
     vTextureSizeUv = u_st1 - u_st0;
     vTextureOffsetU = u_st0;
     vTextureOffsetV = v_st0;
 
     vStretchSize = image.size;
 
+    vHalfTexelY = vec2(0.5) / y_texture_size;
+    vHalfTexelUv = vec2(0.5) / uv_texture_size;
+
     // The constants added to the Y, U and V components are applied in the fragment shader.
     if (image.color_space == YUV_REC601) {
         // From Rec601:
         // [R]   [1.1643835616438356,  0.0,                 1.5960267857142858   ]   [Y -  16]
         // [G] = [1.1643835616438358, -0.3917622900949137, -0.8129676472377708   ] x [U - 128]
         // [B]   [1.1643835616438356,  2.017232142857143,   8.862867620416422e-17]   [V - 128]
         //
         // For the range [0,1] instead of [0,255].
@@ -67,11 +70,11 @@ void main(void) {
         // For the range [0,1] instead of [0,255]:
         vYuvColorMatrix = mat3(
             1.16438,  0.0,      1.79274,
             1.16438, -0.21325, -0.53291,
             1.16438,  2.11240,  0.0
         );
     }
 
-    write_clip(vi.global_clamped_pos, prim.clip_area);
+    write_clip(vi.screen_pos, prim.clip_area);
 
 }
--- a/gfx/webrender/src/debug_render.rs
+++ b/gfx/webrender/src/debug_render.rs
@@ -23,18 +23,18 @@ pub struct DebugRenderer {
     tri_vao: VAOId,
     line_vertices: Vec<DebugColorVertex>,
     line_vao: VAOId,
     color_program_id: ProgramId,
 }
 
 impl DebugRenderer {
     pub fn new(device: &mut Device) -> DebugRenderer {
-        let font_program_id = device.create_program("debug_font", "shared_other");
-        let color_program_id = device.create_program("debug_color", "shared_other");
+        let font_program_id = device.create_program("debug_font", "shared_other", VertexFormat::DebugFont).unwrap();
+        let color_program_id = device.create_program("debug_color", "shared_other", VertexFormat::DebugColor).unwrap();
 
         let font_vao = device.create_vao(VertexFormat::DebugFont, 32);
         let line_vao = device.create_vao(VertexFormat::DebugColor, 32);
         let tri_vao = device.create_vao(VertexFormat::DebugColor, 32);
 
         let font_texture_id = device.create_texture_ids(1, TextureTarget::Default)[0];
         device.init_texture(font_texture_id,
                             debug_font_data::BMP_WIDTH,
--- a/gfx/webrender/src/device.rs
+++ b/gfx/webrender/src/device.rs
@@ -9,22 +9,23 @@ use internal_types::{PackedVertex, Rende
 use internal_types::{BlurAttribute, ClearAttribute, ClipAttribute, VertexAttribute};
 use internal_types::{DebugFontVertex, DebugColorVertex};
 //use notify::{self, Watcher};
 use super::shader_source;
 use std::collections::HashMap;
 use std::fs::File;
 use std::hash::BuildHasherDefault;
 use std::io::Read;
+use std::iter::repeat;
 use std::mem;
 use std::path::PathBuf;
 //use std::sync::mpsc::{channel, Sender};
 //use std::thread;
 use webrender_traits::{ColorF, ImageFormat};
-use webrender_traits::{DeviceIntPoint, DeviceIntRect, DeviceIntSize};
+use webrender_traits::{DeviceIntPoint, DeviceIntRect, DeviceIntSize, DeviceUintSize};
 
 #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))]
 const GL_FORMAT_A: gl::GLuint = gl::RED;
 
 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
 const GL_FORMAT_A: gl::GLuint = gl::ALPHA;
 
 #[cfg(any(target_os = "windows", all(unix, not(target_os = "android"))))]
@@ -36,18 +37,16 @@ const GL_FORMAT_BGRA: gl::GLuint = gl::B
 #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))]
 const SHADER_VERSION: &'static str = "#version 150\n";
 
 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
 const SHADER_VERSION: &'static str = "#version 300 es\n";
 
 static SHADER_PREAMBLE: &'static str = "shared";
 
-pub type ViewportDimensions = [u32; 2];
-
 lazy_static! {
     pub static ref MAX_TEXTURE_SIZE: gl::GLint = {
         gl::get_integer_v(gl::MAX_TEXTURE_SIZE)
     };
 }
 
 #[repr(u32)]
 pub enum DepthFunction {
@@ -371,57 +370,65 @@ struct Program {
     vs_id: Option<gl::GLuint>,
     fs_id: Option<gl::GLuint>,
 }
 
 impl Program {
     fn attach_and_bind_shaders(&mut self,
                                vs_id: gl::GLuint,
                                fs_id: gl::GLuint,
-                               panic_on_fail: bool) -> bool {
+                               vertex_format: VertexFormat) -> Result<(), ShaderError> {
         gl::attach_shader(self.id, vs_id);
         gl::attach_shader(self.id, fs_id);
 
-        gl::bind_attrib_location(self.id, VertexAttribute::Position as gl::GLuint, "aPosition");
-        gl::bind_attrib_location(self.id, VertexAttribute::Color as gl::GLuint, "aColor");
-        gl::bind_attrib_location(self.id, VertexAttribute::ColorTexCoord as gl::GLuint, "aColorTexCoord");
+        match vertex_format {
+            VertexFormat::Triangles | VertexFormat::Rectangles |
+            VertexFormat::DebugFont |  VertexFormat::DebugColor => {
+                gl::bind_attrib_location(self.id, VertexAttribute::Position as gl::GLuint, "aPosition");
+                gl::bind_attrib_location(self.id, VertexAttribute::Color as gl::GLuint, "aColor");
+                gl::bind_attrib_location(self.id, VertexAttribute::ColorTexCoord as gl::GLuint, "aColorTexCoord");
 
-        gl::bind_attrib_location(self.id, VertexAttribute::GlobalPrimId as gl::GLuint, "aGlobalPrimId");
-        gl::bind_attrib_location(self.id, VertexAttribute::PrimitiveAddress as gl::GLuint, "aPrimitiveAddress");
-        gl::bind_attrib_location(self.id, VertexAttribute::TaskIndex as gl::GLuint, "aTaskIndex");
-        gl::bind_attrib_location(self.id, VertexAttribute::ClipTaskIndex as gl::GLuint, "aClipTaskIndex");
-        gl::bind_attrib_location(self.id, VertexAttribute::LayerIndex as gl::GLuint, "aLayerIndex");
-        gl::bind_attrib_location(self.id, VertexAttribute::ElementIndex as gl::GLuint, "aElementIndex");
-        gl::bind_attrib_location(self.id, VertexAttribute::UserData as gl::GLuint, "aUserData");
-        gl::bind_attrib_location(self.id, VertexAttribute::ZIndex as gl::GLuint, "aZIndex");
-
-        gl::bind_attrib_location(self.id, ClearAttribute::Rectangle as gl::GLuint, "aClearRectangle");
-
-        gl::bind_attrib_location(self.id, BlurAttribute::RenderTaskIndex as gl::GLuint, "aBlurRenderTaskIndex");
-        gl::bind_attrib_location(self.id, BlurAttribute::SourceTaskIndex as gl::GLuint, "aBlurSourceTaskIndex");
-        gl::bind_attrib_location(self.id, BlurAttribute::Direction as gl::GLuint, "aBlurDirection");
-
-        gl::bind_attrib_location(self.id, ClipAttribute::RenderTaskIndex as gl::GLuint, "aClipRenderTaskIndex");
-        gl::bind_attrib_location(self.id, ClipAttribute::LayerIndex as gl::GLuint, "aClipLayerIndex");
-        gl::bind_attrib_location(self.id, ClipAttribute::DataIndex as gl::GLuint, "aClipDataIndex");
-        gl::bind_attrib_location(self.id, ClipAttribute::SegmentIndex as gl::GLuint, "aClipSegmentIndex");
+                gl::bind_attrib_location(self.id, VertexAttribute::GlobalPrimId as gl::GLuint, "aGlobalPrimId");
+                gl::bind_attrib_location(self.id, VertexAttribute::PrimitiveAddress as gl::GLuint, "aPrimitiveAddress");
+                gl::bind_attrib_location(self.id, VertexAttribute::TaskIndex as gl::GLuint, "aTaskIndex");
+                gl::bind_attrib_location(self.id, VertexAttribute::ClipTaskIndex as gl::GLuint, "aClipTaskIndex");
+                gl::bind_attrib_location(self.id, VertexAttribute::LayerIndex as gl::GLuint, "aLayerIndex");
+                gl::bind_attrib_location(self.id, VertexAttribute::ElementIndex as gl::GLuint, "aElementIndex");
+                gl::bind_attrib_location(self.id, VertexAttribute::UserData as gl::GLuint, "aUserData");
+                gl::bind_attrib_location(self.id, VertexAttribute::ZIndex as gl::GLuint, "aZIndex");
+            }
+            VertexFormat::Clear => {
+                gl::bind_attrib_location(self.id, ClearAttribute::Position as gl::GLuint, "aPosition");
+                gl::bind_attrib_location(self.id, ClearAttribute::Rectangle as gl::GLuint, "aClearRectangle");
+            }
+            VertexFormat::Blur => {
+                gl::bind_attrib_location(self.id, BlurAttribute::Position as gl::GLuint, "aPosition");
+                gl::bind_attrib_location(self.id, BlurAttribute::RenderTaskIndex as gl::GLuint, "aBlurRenderTaskIndex");
+                gl::bind_attrib_location(self.id, BlurAttribute::SourceTaskIndex as gl::GLuint, "aBlurSourceTaskIndex");
+                gl::bind_attrib_location(self.id, BlurAttribute::Direction as gl::GLuint, "aBlurDirection");
+            }
+            VertexFormat::Clip => {
+                gl::bind_attrib_location(self.id, ClipAttribute::Position as gl::GLuint, "aPosition");
+                gl::bind_attrib_location(self.id, ClipAttribute::RenderTaskIndex as gl::GLuint, "aClipRenderTaskIndex");
+                gl::bind_attrib_location(self.id, ClipAttribute::LayerIndex as gl::GLuint, "aClipLayerIndex");
+                gl::bind_attrib_location(self.id, ClipAttribute::DataIndex as gl::GLuint, "aClipDataIndex");
+                gl::bind_attrib_location(self.id, ClipAttribute::SegmentIndex as gl::GLuint, "aClipSegmentIndex");
+            }
+        }
 
         gl::link_program(self.id);
         if gl::get_program_iv(self.id, gl::LINK_STATUS) == (0 as gl::GLint) {
-            println!("Failed to link shader program: {}", gl::get_program_info_log(self.id));
+            let error_log = gl::get_program_info_log(self.id);
+            println!("Failed to link shader program: {}", error_log);
             gl::detach_shader(self.id, vs_id);
             gl::detach_shader(self.id, fs_id);
-            if panic_on_fail {
-                panic!("-- Program link failed - exiting --");
-            }
-            false
-        } else {
-            //println!("{}", gl::get_program_info_log(self.id));
-            true
+            return Err(ShaderError::Link(error_log));
         }
+
+        Ok(())
     }
 }
 
 impl Drop for Program {
     fn drop(&mut self) {
         gl::delete_program(self.id);
     }
 }
@@ -777,16 +784,22 @@ impl FileWatcherThread {
 }
 */
 
 pub struct Capabilities {
     pub max_ubo_size: usize,
     pub supports_multisampling: bool,
 }
 
+#[derive(Clone, Debug)]
+pub enum ShaderError {
+    Compilation(String, String), // name, error mssage
+    Link(String), // error message
+}
+
 pub struct Device {
     // device state
     bound_textures: [TextureId; 16],
     bound_program: ProgramId,
     bound_vao: VAOId,
     bound_read_fbo: FBOId,
     bound_draw_fbo: FBOId,
     default_read_fbo: gl::GLuint,
@@ -855,19 +868,18 @@ impl Device {
 
     pub fn get_capabilities(&self) -> &Capabilities {
         &self.capabilities
     }
 
     pub fn compile_shader(name: &str,
                           source_str: &str,
                           shader_type: gl::GLenum,
-                          shader_preamble: &[String],
-                          panic_on_fail: bool)
-                          -> Option<gl::GLuint> {
+                          shader_preamble: &[String])
+                          -> Result<gl::GLuint, ShaderError> {
         debug!("compile {:?}", name);
 
         let mut s = String::new();
         s.push_str(SHADER_VERSION);
         for prefix in shader_preamble {
             s.push_str(&prefix);
         }
         s.push_str(source_str);
@@ -875,26 +887,22 @@ impl Device {
         let id = gl::create_shader(shader_type);
         let mut source = Vec::new();
         source.extend_from_slice(s.as_bytes());
         gl::shader_source(id, &[&source[..]]);
         gl::compile_shader(id);
         let log = gl::get_shader_info_log(id);
         if gl::get_shader_iv(id, gl::COMPILE_STATUS) == (0 as gl::GLint) {
             println!("Failed to compile shader: {:?}\n{}", name, log);
-            if panic_on_fail {
-                panic!("-- Shader compile failed - exiting --");
-            }
-
-            None
+            Err(ShaderError::Compilation(name.to_string(), log))
         } else {
             if !log.is_empty() {
                 println!("Warnings detected on shader: {:?}\n{}", name, log);
             }
-            Some(id)
+            Ok(id)
         }
     }
 
     pub fn begin_frame(&mut self, device_pixel_ratio: f32) {
         debug_assert!(!self.inside_frame);
         self.inside_frame = true;
         self.device_pixel_ratio = device_pixel_ratio;
 
@@ -954,30 +962,30 @@ impl Device {
         if self.bound_read_fbo != fbo_id {
             self.bound_read_fbo = fbo_id;
             fbo_id.bind(FBOTarget::Read);
         }
     }
 
     pub fn bind_draw_target(&mut self,
                             texture_id: Option<(TextureId, i32)>,
-                            dimensions: Option<ViewportDimensions>) {
+                            dimensions: Option<DeviceUintSize>) {
         debug_assert!(self.inside_frame);
 
         let fbo_id = texture_id.map_or(FBOId(self.default_draw_fbo), |texture_id| {
             self.textures.get(&texture_id.0).unwrap().fbo_ids[texture_id.1 as usize]
         });
 
         if self.bound_draw_fbo != fbo_id {
             self.bound_draw_fbo = fbo_id;
             fbo_id.bind(FBOTarget::Draw);
         }
 
         if let Some(dimensions) = dimensions {
-            gl::viewport(0, 0, dimensions[0] as gl::GLint, dimensions[1] as gl::GLint);
+            gl::viewport(0, 0, dimensions.width as gl::GLint, dimensions.height as gl::GLint);
         }
     }
 
     pub fn bind_program(&mut self,
                         program_id: ProgramId,
                         projection: &Matrix4D<f32>) {
         debug_assert!(self.inside_frame);
 
@@ -1023,19 +1031,19 @@ impl Device {
             self.textures.insert(texture_id, texture);
 
             texture_ids.push(texture_id);
         }
 
         texture_ids
     }
 
-    pub fn get_texture_dimensions(&self, texture_id: TextureId) -> (u32, u32) {
+    pub fn get_texture_dimensions(&self, texture_id: TextureId) -> DeviceUintSize {
         let texture = &self.textures[&texture_id];
-        (texture.width, texture.height)
+        DeviceUintSize::new(texture.width, texture.height)
     }
 
     fn set_texture_parameters(&mut self, target: gl::GLuint, filter: TextureFilter) {
         let filter = match filter {
             TextureFilter::Nearest => {
                 gl::NEAREST
             }
             TextureFilter::Linear => {
@@ -1106,23 +1114,32 @@ impl Device {
             RenderTargetMode::LayerRenderTarget(layer_count) => {
                 self.bind_texture(DEFAULT_TEXTURE, texture_id);
                 self.set_texture_parameters(texture_id.target, filter);
                 self.create_fbo_for_texture_if_necessary(texture_id, Some(layer_count));
             }
             RenderTargetMode::None => {
                 self.bind_texture(DEFAULT_TEXTURE, texture_id);
                 self.set_texture_parameters(texture_id.target, filter);
+                let expanded_data: Vec<u8>;
+                let actual_pixels = if pixels.is_some() &&
+                                       format == ImageFormat::A8 &&
+                                       cfg!(any(target_arch="arm", target_arch="aarch64")) {
+                    expanded_data = pixels.unwrap().iter().flat_map(|&byte| repeat(byte).take(4)).collect();
+                    Some(expanded_data.as_slice())
+                } else {
+                    pixels
+                };
                 self.upload_texture_image(texture_id.target,
                                           width,
                                           height,
                                           internal_format as u32,
                                           gl_format,
                                           type_,
-                                          pixels);
+                                          actual_pixels);
             }
         }
     }
 
     pub fn get_render_target_layer_count(&self, texture_id: TextureId) -> usize {
         self.textures[&texture_id].fbo_ids.len()
     }
 
@@ -1239,48 +1256,48 @@ impl Device {
                           texture_id: TextureId,
                           new_width: u32,
                           new_height: u32,
                           format: ImageFormat,
                           filter: TextureFilter,
                           mode: RenderTargetMode) {
         debug_assert!(self.inside_frame);
 
-        let (old_width, old_height) = self.get_texture_dimensions(texture_id);
+        let old_size = self.get_texture_dimensions(texture_id);
 
         let temp_texture_id = self.create_texture_ids(1, TextureTarget::Default)[0];
-        self.init_texture(temp_texture_id, old_width, old_height, format, filter, mode, None);
+        self.init_texture(temp_texture_id, old_size.width, old_size.height, format, filter, mode, None);
         self.create_fbo_for_texture_if_necessary(temp_texture_id, None);
 
         self.bind_read_target(Some((texture_id, 0)));
         self.bind_texture(DEFAULT_TEXTURE, temp_texture_id);
 
         gl::copy_tex_sub_image_2d(temp_texture_id.target,
                                   0,
                                   0,
                                   0,
                                   0,
                                   0,
-                                  old_width as i32,
-                                  old_height as i32);
+                                  old_size.width as i32,
+                                  old_size.height as i32);
 
         self.deinit_texture(texture_id);
         self.init_texture(texture_id, new_width, new_height, format, filter, mode, None);
         self.create_fbo_for_texture_if_necessary(texture_id, None);
         self.bind_read_target(Some((temp_texture_id, 0)));
         self.bind_texture(DEFAULT_TEXTURE, texture_id);
 
         gl::copy_tex_sub_image_2d(texture_id.target,
                                   0,
                                   0,
                                   0,
                                   0,
                                   0,
-                                  old_width as i32,
-                                  old_height as i32);
+                                  old_size.width as i32,
+                                  old_size.height as i32);
 
         self.bind_read_target(None);
         self.deinit_texture(temp_texture_id);
     }
 
     pub fn deinit_texture(&mut self, texture_id: TextureId) {
         debug_assert!(self.inside_frame);
 
@@ -1308,24 +1325,26 @@ impl Device {
         texture.format = ImageFormat::Invalid;
         texture.width = 0;
         texture.height = 0;
         texture.fbo_ids.clear();
     }
 
     pub fn create_program(&mut self,
                           base_filename: &str,
-                          include_filename: &str) -> ProgramId {
-        self.create_program_with_prefix(base_filename, &[include_filename], None)
+                          include_filename: &str,
+                          vertex_format: VertexFormat) -> Result<ProgramId, ShaderError> {
+        self.create_program_with_prefix(base_filename, &[include_filename], None, vertex_format)
     }
 
     pub fn create_program_with_prefix(&mut self,
                                       base_filename: &str,
                                       include_filenames: &[&str],
-                                      prefix: Option<String>) -> ProgramId {
+                                      prefix: Option<String>,
+                                      vertex_format: VertexFormat) -> Result<ProgramId, ShaderError> {
         debug_assert!(self.inside_frame);
 
         let pid = gl::create_program();
 
         let mut vs_name = String::from(base_filename);
         vs_name.push_str(".vs");
         let mut fs_name = String::from(base_filename);
         fs_name.push_str(".fs");
@@ -1352,25 +1371,25 @@ impl Device {
             fs_id: None,
         };
 
         let program_id = ProgramId(pid);
 
         debug_assert!(self.programs.contains_key(&program_id) == false);
         self.programs.insert(program_id, program);
 
-        self.load_program(program_id, include, true);
+        try!{ self.load_program(program_id, include, vertex_format) };
 
-        program_id
+        Ok(program_id)
     }
 
     fn load_program(&mut self,
                     program_id: ProgramId,
                     include: String,
-                    panic_on_fail: bool) {
+                    vertex_format: VertexFormat) -> Result<(), ShaderError> {
         debug_assert!(self.inside_frame);
 
         let program = self.programs.get_mut(&program_id).unwrap();
 
         let mut vs_preamble = Vec::new();
         let mut fs_preamble = Vec::new();
 
         vs_preamble.push("#define WR_VERTEX_SHADER\n".to_owned());
@@ -1383,132 +1402,124 @@ impl Device {
 
         vs_preamble.push(self.shader_preamble.to_owned());
         fs_preamble.push(self.shader_preamble.to_owned());
 
         vs_preamble.push(include.clone());
         fs_preamble.push(include);
 
         // todo(gw): store shader ids so they can be freed!
-        let vs_id = Device::compile_shader(&program.name,
-                                           &program.vs_source,
-                                           gl::VERTEX_SHADER,
-                                           &vs_preamble,
-                                           panic_on_fail);
-        let fs_id = Device::compile_shader(&program.name,
-                                           &program.fs_source,
-                                           gl::FRAGMENT_SHADER,
-                                           &fs_preamble,
-                                           panic_on_fail);
+        let vs_id = try!{ Device::compile_shader(&program.name,
+                                                 &program.vs_source,
+                                                 gl::VERTEX_SHADER,
+                                                 &vs_preamble) };
+        let fs_id = try!{ Device::compile_shader(&program.name,
+                                                 &program.fs_source,
+                                                 gl::FRAGMENT_SHADER,
+                                                 &fs_preamble) };
+
+        if let Some(vs_id) = program.vs_id {
+            gl::detach_shader(program.id, vs_id);
+        }
 
-        match (vs_id, fs_id) {
-            (Some(vs_id), None) => {
-                println!("FAILED to load fs - falling back to previous!");
+        if let Some(fs_id) = program.fs_id {
+            gl::detach_shader(program.id, fs_id);
+        }
+
+        if let Err(bind_error) = program.attach_and_bind_shaders(vs_id, fs_id, vertex_format) {
+            if let (Some(vs_id), Some(fs_id)) = (program.vs_id, program.fs_id) {
+                try! { program.attach_and_bind_shaders(vs_id, fs_id, vertex_format) };
+            } else {
+               return Err(bind_error);
+            }
+        } else {
+            if let Some(vs_id) = program.vs_id {
                 gl::delete_shader(vs_id);
             }
-            (None, Some(fs_id)) => {
-                println!("FAILED to load vs - falling back to previous!");
+
+            if let Some(fs_id) = program.fs_id {
                 gl::delete_shader(fs_id);
             }
-            (None, None) => {
-                println!("FAILED to load vs/fs - falling back to previous!");
-            }
-            (Some(vs_id), Some(fs_id)) => {
-                if let Some(vs_id) = program.vs_id {
-                    gl::detach_shader(program.id, vs_id);
-                }
 
-                if let Some(fs_id) = program.fs_id {
-                    gl::detach_shader(program.id, fs_id);
-                }
+            program.vs_id = Some(vs_id);
+            program.fs_id = Some(fs_id);
+        }
+
+        program.u_transform = gl::get_uniform_location(program.id, "uTransform");
+        program.u_device_pixel_ratio = gl::get_uniform_location(program.id, "uDevicePixelRatio");
 
-                if program.attach_and_bind_shaders(vs_id, fs_id, panic_on_fail) {
-                    if let Some(vs_id) = program.vs_id {
-                        gl::delete_shader(vs_id);
-                    }
-
-                    if let Some(fs_id) = program.fs_id {
-                        gl::delete_shader(fs_id);
-                    }
+        program_id.bind();
+        let u_color_0 = gl::get_uniform_location(program.id, "sColor0");
+        if u_color_0 != -1 {
+            gl::uniform_1i(u_color_0, TextureSampler::Color0 as i32);
+        }
+        let u_color1 = gl::get_uniform_location(program.id, "sColor1");
+        if u_color1 != -1 {
+            gl::uniform_1i(u_color1, TextureSampler::Color1 as i32);
+        }
+        let u_color_2 = gl::get_uniform_location(program.id, "sColor2");
+        if u_color_2 != -1 {
+            gl::uniform_1i(u_color_2, TextureSampler::Color2 as i32);
+        }
+        let u_mask = gl::get_uniform_location(program.id, "sMask");
+        if u_mask != -1 {
+            gl::uniform_1i(u_mask, TextureSampler::Mask as i32);
+        }
 
-                    program.vs_id = Some(vs_id);
-                    program.fs_id = Some(fs_id);
-                } else {
-                    let vs_id = program.vs_id.unwrap();
-                    let fs_id = program.fs_id.unwrap();
-                    program.attach_and_bind_shaders(vs_id, fs_id, true);
-                }
+        let u_cache = gl::get_uniform_location(program.id, "sCache");
+        if u_cache != -1 {
+            gl::uniform_1i(u_cache, TextureSampler::Cache as i32);
+        }
 
-                program.u_transform = gl::get_uniform_location(program.id, "uTransform");
-                program.u_device_pixel_ratio = gl::get_uniform_location(program.id, "uDevicePixelRatio");
+        let u_layers = gl::get_uniform_location(program.id, "sLayers");
+        if u_layers != -1 {
+            gl::uniform_1i(u_layers, TextureSampler::Layers as i32);
+        }
 
-                program_id.bind();
-                let u_color_0 = gl::get_uniform_location(program.id, "sColor0");
-                if u_color_0 != -1 {
-                    gl::uniform_1i(u_color_0, TextureSampler::Color0 as i32);
-                }
-                let u_color1 = gl::get_uniform_location(program.id, "sColor1");
-                if u_color1 != -1 {
-                    gl::uniform_1i(u_color1, TextureSampler::Color1 as i32);
-                }
-                let u_color_2 = gl::get_uniform_location(program.id, "sColor2");
-                if u_color_2 != -1 {
-                    gl::uniform_1i(u_color_2, TextureSampler::Color2 as i32);
-                }
-                let u_mask = gl::get_uniform_location(program.id, "sMask");
-                if u_mask != -1 {
-                    gl::uniform_1i(u_mask, TextureSampler::Mask as i32);
-                }
+        let u_tasks = gl::get_uniform_location(program.id, "sRenderTasks");
+        if u_tasks != -1 {
+            gl::uniform_1i(u_tasks, TextureSampler::RenderTasks as i32);
+        }
+
+        let u_prim_geom = gl::get_uniform_location(program.id, "sPrimGeometry");
+        if u_prim_geom != -1 {
+            gl::uniform_1i(u_prim_geom, TextureSampler::Geometry as i32);
+        }
 
-                let u_cache = gl::get_uniform_location(program.id, "sCache");
-                if u_cache != -1 {
-                    gl::uniform_1i(u_cache, TextureSampler::Cache as i32);
-                }
+        let u_data16 = gl::get_uniform_location(program.id, "sData16");
+        if u_data16 != -1 {
+            gl::uniform_1i(u_data16, TextureSampler::Data16 as i32);
+        }
 
-                let u_layers = gl::get_uniform_location(program.id, "sLayers");
-                if u_layers != -1 {
-                    gl::uniform_1i(u_layers, TextureSampler::Layers as i32);
-                }
+        let u_data32 = gl::get_uniform_location(program.id, "sData32");
+        if u_data32 != -1 {
+            gl::uniform_1i(u_data32, TextureSampler::Data32 as i32);
+        }
 
-                let u_tasks = gl::get_uniform_location(program.id, "sRenderTasks");
-                if u_tasks != -1 {
-                    gl::uniform_1i(u_tasks, TextureSampler::RenderTasks as i32);
-                }
+        let u_data64 = gl::get_uniform_location(program.id, "sData64");
+        if u_data64 != -1 {
+            gl::uniform_1i(u_data64, TextureSampler::Data64 as i32);
+        }
 
-                let u_prim_geom = gl::get_uniform_location(program.id, "sPrimGeometry");
-                if u_prim_geom != -1 {
-                    gl::uniform_1i(u_prim_geom, TextureSampler::Geometry as i32);
-                }
-
-                let u_data16 = gl::get_uniform_location(program.id, "sData16");
-                if u_data16 != -1 {
-                    gl::uniform_1i(u_data16, TextureSampler::Data16 as i32);
-                }
+        let u_data128 = gl::get_uniform_location(program.id, "sData128");
+        if u_data128 != -1 {
+            gl::uniform_1i(u_data128, TextureSampler::Data128    as i32);
+        }
 
-                let u_data32 = gl::get_uniform_location(program.id, "sData32");
-                if u_data32 != -1 {
-                    gl::uniform_1i(u_data32, TextureSampler::Data32 as i32);
-                }
-
-                let u_data64 = gl::get_uniform_location(program.id, "sData64");
-                if u_data64 != -1 {
-                    gl::uniform_1i(u_data64, TextureSampler::Data64 as i32);
-                }
+        let u_resource_rects = gl::get_uniform_location(program.id, "sResourceRects");
+        if u_resource_rects != -1 {
+            gl::uniform_1i(u_resource_rects, TextureSampler::ResourceRects as i32);
+        }
 
-                let u_data128 = gl::get_uniform_location(program.id, "sData128");
-                if u_data128 != -1 {
-                    gl::uniform_1i(u_data128, TextureSampler::Data128    as i32);
-                }
+        let u_gradients = gl::get_uniform_location(program.id, "sGradients");
+        if u_gradients != -1 {
+            gl::uniform_1i(u_gradients, TextureSampler::Gradients as i32);
+        }
 
-                let u_resource_rects = gl::get_uniform_location(program.id, "sResourceRects");
-                if u_resource_rects != -1 {
-                    gl::uniform_1i(u_resource_rects, TextureSampler::ResourceRects as i32);
-                }
-            }
-        }
+        Ok(())
     }
 
 /*
     pub fn refresh_shader(&mut self, path: PathBuf) {
         let mut vs_preamble_path = self.resource_path.clone();
         vs_preamble_path.push(VERTEX_SHADER_PREAMBLE);
 
         let mut fs_preamble_path = self.resource_path.clone();
@@ -1921,16 +1932,26 @@ impl Device {
         gl::blend_func(gl::CONSTANT_COLOR, gl::ONE_MINUS_SRC_COLOR);
     }
 
     pub fn set_blend_mode_multiply(&self) {
         gl::blend_func_separate(gl::ZERO, gl::SRC_COLOR,
                                 gl::ZERO, gl::SRC_ALPHA);
         gl::blend_equation(gl::FUNC_ADD);
     }
+    pub fn set_blend_mode_max(&self) {
+        gl::blend_func_separate(gl::ONE, gl::ONE,
+                                gl::ONE, gl::ONE);
+        gl::blend_equation_separate(gl::MAX, gl::FUNC_ADD);
+    }
+    pub fn set_blend_mode_min(&self) {
+        gl::blend_func_separate(gl::ONE, gl::ONE,
+                                gl::ONE, gl::ONE);
+        gl::blend_equation_separate(gl::MIN, gl::FUNC_ADD);
+    }
 }
 
 impl Drop for Device {
     fn drop(&mut self) {
         //self.file_watcher.exit();
     }
 }
 
--- a/gfx/webrender/src/frame.rs
+++ b/gfx/webrender/src/frame.rs
@@ -1,49 +1,47 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use app_units::Au;
 use fnv::FnvHasher;
 use internal_types::{ANGLE_FLOAT_TO_FIXED, AxisDirection};
-use internal_types::{CompositionOp};
 use internal_types::{LowLevelFilterOp};
 use internal_types::{RendererFrame};
+use frame_builder::{FrameBuilder, FrameBuilderConfig};
 use layer::Layer;
 use resource_cache::ResourceCache;
-use scene::Scene;
+use scene::{Scene, SceneProperties};
 use scroll_tree::{ScrollTree, ScrollStates};
 use std::collections::HashMap;
 use std::hash::BuildHasherDefault;
-use tiling::{AuxiliaryListsMap, FrameBuilder, FrameBuilderConfig, PrimitiveFlags};
+use tiling::{AuxiliaryListsMap, CompositeOps, PrimitiveFlags};
 use webrender_traits::{AuxiliaryLists, ClipRegion, ColorF, DisplayItem, Epoch, FilterOp};
-use webrender_traits::{LayerPoint, LayerRect, LayerSize, LayerToScrollTransform};
+use webrender_traits::{LayerPoint, LayerRect, LayerSize, LayerToScrollTransform, LayoutTransform};
 use webrender_traits::{MixBlendMode, PipelineId, ScrollEventPhase, ScrollLayerId, ScrollLayerState};
 use webrender_traits::{ScrollLocation, ScrollPolicy, ServoScrollRootId, SpecificDisplayItem};
 use webrender_traits::{StackingContext, WorldPoint};
 
 #[derive(Copy, Clone, PartialEq, PartialOrd, Debug)]
 pub struct FrameId(pub u32);
 
 static DEFAULT_SCROLLBAR_COLOR: ColorF = ColorF { r: 0.3, g: 0.3, b: 0.3, a: 0.6 };
 
 struct FlattenContext<'a> {
     scene: &'a Scene,
-    pipeline_sizes: &'a mut HashMap<PipelineId, LayerSize>,
     builder: &'a mut FrameBuilder,
 }
 
 // TODO: doc
 pub struct Frame {
     pub scroll_tree: ScrollTree,
     pub pipeline_epoch_map: HashMap<PipelineId, Epoch, BuildHasherDefault<FnvHasher>>,
     pub pipeline_auxiliary_lists: AuxiliaryListsMap,
     id: FrameId,
-    debug: bool,
     frame_builder_config: FrameBuilderConfig,
     frame_builder: Option<FrameBuilder>,
 }
 
 trait DisplayListHelpers {
     fn starting_stacking_context<'a>(&'a self) -> Option<(&'a StackingContext, &'a ClipRegion)>;
 }
 
@@ -54,94 +52,81 @@ impl DisplayListHelpers for Vec<DisplayI
                 Some((&specific_item.stacking_context, &item.clip))
             },
             _ => None,
         })
     }
 }
 
 trait StackingContextHelpers {
-    fn needs_composition_operation_for_mix_blend_mode(&self) -> bool;
-    fn composition_operations(&self, auxiliary_lists: &AuxiliaryLists) -> Vec<CompositionOp>;
+    fn mix_blend_mode_for_compositing(&self) -> Option<MixBlendMode>;
+    fn filter_ops_for_compositing(&self,
+                                  auxiliary_lists: &AuxiliaryLists,
+                                  properties: &SceneProperties) -> Vec<LowLevelFilterOp>;
 }
 
 impl StackingContextHelpers for StackingContext {
-    fn needs_composition_operation_for_mix_blend_mode(&self) -> bool {
+    fn mix_blend_mode_for_compositing(&self) -> Option<MixBlendMode> {
         match self.mix_blend_mode {
-            MixBlendMode::Normal => false,
-            MixBlendMode::Multiply |
-            MixBlendMode::Screen |
-            MixBlendMode::Overlay |
-            MixBlendMode::Darken |
-            MixBlendMode::Lighten |
-            MixBlendMode::ColorDodge |
-            MixBlendMode::ColorBurn |
-            MixBlendMode::HardLight |
-            MixBlendMode::SoftLight |
-            MixBlendMode::Difference |
-            MixBlendMode::Exclusion |
-            MixBlendMode::Hue |
-            MixBlendMode::Saturation |
-            MixBlendMode::Color |
-            MixBlendMode::Luminosity => true,
+            MixBlendMode::Normal => None,
+            _ => Some(self.mix_blend_mode),
         }
     }
 
-    fn composition_operations(&self, auxiliary_lists: &AuxiliaryLists) -> Vec<CompositionOp> {
-        let mut composition_operations = vec![];
-        if self.needs_composition_operation_for_mix_blend_mode() {
-            composition_operations.push(CompositionOp::MixBlend(self.mix_blend_mode));
-        }
+    fn filter_ops_for_compositing(&self,
+                                  auxiliary_lists: &AuxiliaryLists,
+                                  properties: &SceneProperties) -> Vec<LowLevelFilterOp> {
+        let mut filters = vec![];
         for filter in auxiliary_lists.filters(&self.filters) {
             match *filter {
                 FilterOp::Blur(radius) => {
-                    composition_operations.push(CompositionOp::Filter(LowLevelFilterOp::Blur(
+                    filters.push(LowLevelFilterOp::Blur(
                         radius,
-                        AxisDirection::Horizontal)));
-                    composition_operations.push(CompositionOp::Filter(LowLevelFilterOp::Blur(
+                        AxisDirection::Horizontal));
+                    filters.push(LowLevelFilterOp::Blur(
                         radius,
-                        AxisDirection::Vertical)));
+                        AxisDirection::Vertical));
                 }
                 FilterOp::Brightness(amount) => {
-                    composition_operations.push(CompositionOp::Filter(
-                            LowLevelFilterOp::Brightness(Au::from_f32_px(amount))));
+                    filters.push(
+                            LowLevelFilterOp::Brightness(Au::from_f32_px(amount)));
                 }
                 FilterOp::Contrast(amount) => {
-                    composition_operations.push(CompositionOp::Filter(
-                            LowLevelFilterOp::Contrast(Au::from_f32_px(amount))));
+                    filters.push(
+                            LowLevelFilterOp::Contrast(Au::from_f32_px(amount)));
                 }
                 FilterOp::Grayscale(amount) => {
-                    composition_operations.push(CompositionOp::Filter(
-                            LowLevelFilterOp::Grayscale(Au::from_f32_px(amount))));
+                    filters.push(
+                            LowLevelFilterOp::Grayscale(Au::from_f32_px(amount)));
                 }
                 FilterOp::HueRotate(angle) => {
-                    composition_operations.push(CompositionOp::Filter(
+                    filters.push(
                             LowLevelFilterOp::HueRotate(f32::round(
-                                    angle * ANGLE_FLOAT_TO_FIXED) as i32)));
+                                    angle * ANGLE_FLOAT_TO_FIXED) as i32));
                 }
                 FilterOp::Invert(amount) => {
-                    composition_operations.push(CompositionOp::Filter(
-                            LowLevelFilterOp::Invert(Au::from_f32_px(amount))));
+                    filters.push(
+                            LowLevelFilterOp::Invert(Au::from_f32_px(amount)));
                 }
-                FilterOp::Opacity(amount) => {
-                    composition_operations.push(CompositionOp::Filter(
-                            LowLevelFilterOp::Opacity(Au::from_f32_px(amount))));
+                FilterOp::Opacity(ref value) => {
+                    let amount = properties.resolve_float(value, 1.0);
+                    filters.push(
+                            LowLevelFilterOp::Opacity(Au::from_f32_px(amount)));
                 }
                 FilterOp::Saturate(amount) => {
-                    composition_operations.push(CompositionOp::Filter(
-                            LowLevelFilterOp::Saturate(Au::from_f32_px(amount))));
+                    filters.push(
+                            LowLevelFilterOp::Saturate(Au::from_f32_px(amount)));
                 }
                 FilterOp::Sepia(amount) => {
-                    composition_operations.push(CompositionOp::Filter(
-                            LowLevelFilterOp::Sepia(Au::from_f32_px(amount))));
+                    filters.push(
+                            LowLevelFilterOp::Sepia(Au::from_f32_px(amount)));
                 }
             }
         }
-
-        composition_operations
+        filters
     }
 }
 
 struct DisplayListTraversal<'a> {
     pub display_list: &'a [DisplayItem],
     pub next_item_index: usize,
 }
 
@@ -186,23 +171,22 @@ impl<'a> Iterator for DisplayListTravers
 
         let item = &self.display_list[self.next_item_index];
         self.next_item_index += 1;
         Some(item)
     }
 }
 
 impl Frame {
-    pub fn new(debug: bool, config: FrameBuilderConfig) -> Frame {
+    pub fn new(config: FrameBuilderConfig) -> Frame {
         Frame {
             pipeline_epoch_map: HashMap::with_hasher(Default::default()),
             pipeline_auxiliary_lists: HashMap::with_hasher(Default::default()),
             scroll_tree: ScrollTree::new(),
             id: FrameId(0),
-            debug: debug,
             frame_builder: None,
             frame_builder_config: config,
         }
     }
 
     pub fn reset(&mut self) -> ScrollStates {
         self.pipeline_epoch_map.clear();
 
@@ -233,19 +217,21 @@ impl Frame {
                   -> bool {
         self.scroll_tree.scroll(scroll_location, cursor, phase,)
     }
 
     pub fn tick_scrolling_bounce_animations(&mut self) {
         self.scroll_tree.tick_scrolling_bounce_animations();
     }
 
-    pub fn create(&mut self,
-                  scene: &Scene,
-                  pipeline_sizes: &mut HashMap<PipelineId, LayerSize>) {
+    pub fn discard_frame_state_for_pipeline(&mut self, pipeline_id: PipelineId) {
+        self.scroll_tree.discard_frame_state_for_pipeline(pipeline_id);
+    }
+
+    pub fn create(&mut self, scene: &Scene) {
         let root_pipeline_id = match scene.root_pipeline_id {
             Some(root_pipeline_id) => root_pipeline_id,
             None => return,
         };
 
         let root_pipeline = match scene.pipeline_map.get(&root_pipeline_id) {
             Some(root_pipeline) => root_pipeline,
             None => return,
@@ -265,213 +251,224 @@ impl Frame {
         let (root_stacking_context, root_clip) = match display_list.starting_stacking_context() {
             Some(some) => some,
             None => {
                 warn!("Pipeline display list does not start with a stacking context.");
                 return;
             }
         };
 
-        // Insert global position: fixed elements layer
-        debug_assert!(self.scroll_tree.layers.is_empty());
-        let root_scroll_layer_id = ScrollLayerId::root(root_pipeline_id);
-        let root_fixed_layer_id = ScrollLayerId::create_fixed(root_pipeline_id);
-        let root_viewport = LayerRect::new(LayerPoint::zero(), root_pipeline.viewport_size);
-        let layer = Layer::new(&root_viewport,
-                               root_clip.main.size,
-                               &LayerToScrollTransform::identity(),
-                               root_pipeline_id);
-        self.scroll_tree.add_layer(layer.clone(), root_fixed_layer_id, None);
-        self.scroll_tree.add_layer(layer, root_scroll_layer_id, None);
-        self.scroll_tree.root_scroll_layer_id = Some(root_scroll_layer_id);
+        self.scroll_tree.establish_root(root_pipeline_id,
+                                        &root_pipeline.viewport_size,
+                                        &root_clip.main.size);
 
         let background_color = root_pipeline.background_color.and_then(|color| {
             if color.a > 0.0 {
                 Some(color)
             } else {
                 None
             }
         });
 
         let mut frame_builder = FrameBuilder::new(root_pipeline.viewport_size,
                                                   background_color,
-                                                  self.debug,
                                                   self.frame_builder_config);
 
         {
             let mut context = FlattenContext {
                 scene: scene,
-                pipeline_sizes: pipeline_sizes,
                 builder: &mut frame_builder,
             };
 
             let mut traversal = DisplayListTraversal::new_skipping_first(display_list);
+            let reference_frame_id = self.scroll_tree.root_reference_frame_id();
+            let topmost_scroll_layer_id = self.scroll_tree.topmost_scroll_layer_id();
+            debug_assert!(reference_frame_id != topmost_scroll_layer_id);
+
+            let viewport_rect = LayerRect::new(LayerPoint::zero(), root_pipeline.viewport_size);
+            let clip = ClipRegion::simple(&viewport_rect);
+            context.builder.push_scroll_layer(reference_frame_id,
+                                              &clip,
+                                              &LayerPoint::zero(),
+                                              &root_pipeline.viewport_size);
+            context.builder.push_scroll_layer(topmost_scroll_layer_id,
+                                              &clip,
+                                              &LayerPoint::zero(),
+                                              &root_clip.main.size);
+
             self.flatten_stacking_context(&mut traversal,
                                           root_pipeline_id,
                                           &mut context,
-                                          root_fixed_layer_id,
-                                          root_scroll_layer_id,
+                                          reference_frame_id,
+                                          topmost_scroll_layer_id,
                                           LayerToScrollTransform::identity(),
                                           0,
                                           &root_stacking_context,
                                           root_clip);
+
+            context.builder.pop_scroll_layer();
+            context.builder.pop_scroll_layer();
         }
 
         self.frame_builder = Some(frame_builder);
         self.scroll_tree.finalize_and_apply_pending_scroll_offsets(old_scrolling_states);
     }
 
     fn flatten_scroll_layer<'a>(&mut self,
                                 traversal: &mut DisplayListTraversal<'a>,
                                 pipeline_id: PipelineId,
                                 context: &mut FlattenContext,
-                                current_fixed_layer_id: ScrollLayerId,
-                                mut current_scroll_layer_id: ScrollLayerId,
+                                current_reference_frame_id: ScrollLayerId,
+                                parent_scroll_layer_id: ScrollLayerId,
                                 layer_relative_transform: LayerToScrollTransform,
                                 level: i32,
-                                clip: &LayerRect,
+                                clip: &ClipRegion,
                                 content_size: &LayerSize,
                                 new_scroll_layer_id: ScrollLayerId) {
         // Avoid doing unnecessary work for empty stacking contexts.
         if traversal.current_stacking_context_empty() {
             traversal.skip_current_stacking_context();
             return;
         }
 
-        let layer = Layer::new(&clip, *content_size, &layer_relative_transform, pipeline_id);
-        self.scroll_tree.add_layer(layer, new_scroll_layer_id, Some(current_scroll_layer_id));
-        current_scroll_layer_id = new_scroll_layer_id;
-
-        let layer_rect = LayerRect::new(LayerPoint::zero(),
-                                        LayerSize::new(content_size.width + clip.origin.x,
-                                                       content_size.height + clip.origin.y));
-        context.builder.push_layer(layer_rect,
-                                   &ClipRegion::simple(&layer_rect),
-                                   LayerToScrollTransform::identity(),
-                                   pipeline_id,
-                                   current_scroll_layer_id,
-                                   &[]);
+        let clip_rect = clip.main;
+        let layer = Layer::new(&clip_rect, *content_size, &layer_relative_transform, pipeline_id);
+        self.scroll_tree.add_layer(layer, new_scroll_layer_id, parent_scroll_layer_id);
+        context.builder.push_scroll_layer(new_scroll_layer_id,
+                                          clip,
+                                          &clip_rect.origin,
+                                          &content_size);
 
         self.flatten_items(traversal,
                            pipeline_id,
                            context,
-                           current_fixed_layer_id,
-                           current_scroll_layer_id,
+                           current_reference_frame_id,
+                           new_scroll_layer_id,
                            LayerToScrollTransform::identity(),
                            level);
 
-        context.builder.pop_layer();
+        context.builder.pop_scroll_layer();
     }
 
     fn flatten_stacking_context<'a>(&mut self,
                                     traversal: &mut DisplayListTraversal<'a>,
                                     pipeline_id: PipelineId,
                                     context: &mut FlattenContext,
-                                    current_fixed_layer_id: ScrollLayerId,
+                                    current_reference_frame_id: ScrollLayerId,
                                     current_scroll_layer_id: ScrollLayerId,
                                     layer_relative_transform: LayerToScrollTransform,
                                     level: i32,
                                     stacking_context: &StackingContext,
                                     clip_region: &ClipRegion) {
         // Avoid doing unnecessary work for empty stacking contexts.
         if traversal.current_stacking_context_empty() {
             traversal.skip_current_stacking_context();
             return;
         }
 
         let composition_operations = {
             let auxiliary_lists = self.pipeline_auxiliary_lists
                                       .get(&pipeline_id)
                                       .expect("No auxiliary lists?!");
-            stacking_context.composition_operations(auxiliary_lists)
+            CompositeOps::new(
+                stacking_context.filter_ops_for_compositing(auxiliary_lists, &context.scene.properties),
+                stacking_context.mix_blend_mode_for_compositing())
         };
 
-        // Detect composition operations that will make us invisible.
-        for composition_operation in &composition_operations {
-            match *composition_operation {
-                CompositionOp::Filter(LowLevelFilterOp::Opacity(Au(0))) => {
-                    traversal.skip_current_stacking_context();
-                    return;
-                }
-                _ => {}
-            }
+        if composition_operations.will_make_invisible() {
+            traversal.skip_current_stacking_context();
+            return;
         }
 
-        let transform = layer_relative_transform.pre_translated(stacking_context.bounds.origin.x,
-                                                                stacking_context.bounds.origin.y,
-                                                                0.0)
-                                                .pre_mul(&stacking_context.transform)
-                                                .pre_mul(&stacking_context.perspective);
+        let stacking_context_transform = context.scene
+                                                .properties
+                                                .resolve_layout_transform(&stacking_context.transform);
 
-        // Build world space transform
-        let scroll_layer_id = match stacking_context.scroll_policy {
-            ScrollPolicy::Fixed => current_fixed_layer_id,
+        let mut transform =
+            layer_relative_transform.pre_translated(stacking_context.bounds.origin.x,
+                                                    stacking_context.bounds.origin.y,
+                                                    0.0)
+                                     .pre_mul(&stacking_context_transform)
+                                     .pre_mul(&stacking_context.perspective);
+
+        let mut reference_frame_id = current_reference_frame_id;
+        let mut scroll_layer_id = match stacking_context.scroll_policy {
+            ScrollPolicy::Fixed => current_reference_frame_id,
             ScrollPolicy::Scrollable => current_scroll_layer_id,
         };
 
+        // If we have a transformation, we establish a new reference frame. This means
+        // that fixed position stacking contexts are positioned relative to us.
+        if stacking_context_transform != LayoutTransform::identity() ||
+           stacking_context.perspective != LayoutTransform::identity() {
+            scroll_layer_id = self.scroll_tree.add_reference_frame(clip_region.main,
+                                                                   transform,
+                                                                   pipeline_id,
+                                                                   scroll_layer_id);
+            reference_frame_id = scroll_layer_id;
+            transform = LayerToScrollTransform::identity();
+        }
+
         if level == 0 {
             if let Some(pipeline) = context.scene.pipeline_map.get(&pipeline_id) {
                 if let Some(bg_color) = pipeline.background_color {
 
                     // Adding a dummy layer for this rectangle in order to disable clipping.
                     let no_clip = ClipRegion::simple(&clip_region.main);
-                    context.builder.push_layer(clip_region.main,
-                                               &no_clip,
-                                               transform,
-                                               pipeline_id,
-                                               scroll_layer_id,
-                                               &composition_operations);
+                    context.builder.push_stacking_context(clip_region.main,
+                                                          transform,
+                                                          pipeline_id,
+                                                          scroll_layer_id,
+                                                          CompositeOps::empty());
 
                     //Note: we don't use the original clip region here,
                     // it's already processed by the layer we just pushed.
                     context.builder.add_solid_rectangle(&clip_region.main,
                                                         &no_clip,
                                                         &bg_color,
                                                         PrimitiveFlags::None);
 
-                    context.builder.pop_layer();
+                    context.builder.pop_stacking_context();
                 }
             }
         }
 
          // TODO(gw): Int with overflow etc
-        context.builder.push_layer(clip_region.main,
-                                   &clip_region,
-                                   transform,
-                                   pipeline_id,
-                                   scroll_layer_id,
-                                   &composition_operations);
+        context.builder.push_stacking_context(clip_region.main,
+                                              transform,
+                                              pipeline_id,
+                                              scroll_layer_id,
+                                              composition_operations);
 
         self.flatten_items(traversal,
                            pipeline_id,
                            context,
-                           current_fixed_layer_id,
-                           current_scroll_layer_id,
+                           reference_frame_id,
+                           scroll_layer_id,
                            transform,
                            level);
 
         if level == 0 && self.frame_builder_config.enable_scrollbars {
             let scrollbar_rect = LayerRect::new(LayerPoint::zero(), LayerSize::new(10.0, 70.0));
             context.builder.add_solid_rectangle(
                 &scrollbar_rect,
                 &ClipRegion::simple(&scrollbar_rect),
                 &DEFAULT_SCROLLBAR_COLOR,
-                PrimitiveFlags::Scrollbar(self.scroll_tree.root_scroll_layer_id.unwrap(), 4.0));
+                PrimitiveFlags::Scrollbar(self.scroll_tree.topmost_scroll_layer_id, 4.0));
         }
 
-        context.builder.pop_layer();
+        context.builder.pop_stacking_context();
     }
 
     fn flatten_iframe<'a>(&mut self,
                           pipeline_id: PipelineId,
                           bounds: &LayerRect,
                           context: &mut FlattenContext,
                           current_scroll_layer_id: ScrollLayerId,
                           layer_relative_transform: LayerToScrollTransform) {
-        context.pipeline_sizes.insert(pipeline_id, bounds.size);
 
         let pipeline = match context.scene.pipeline_map.get(&pipeline_id) {
             Some(pipeline) => pipeline,
             None => return,
         };
 
         let display_list = context.scene.display_lists.get(&pipeline_id);
         let display_list = match display_list {
@@ -484,49 +481,64 @@ impl Frame {
             None => {
                 warn!("Pipeline display list does not start with a stacking context.");
                 return;
             }
         };
 
         self.pipeline_epoch_map.insert(pipeline_id, pipeline.epoch);
 
-        let iframe_rect = &LayerRect::new(LayerPoint::zero(), bounds.size);
+        let iframe_rect = LayerRect::new(LayerPoint::zero(), bounds.size);
         let transform = layer_relative_transform.pre_translated(bounds.origin.x,
                                                                 bounds.origin.y,
                                                                 0.0);
-
-        let iframe_fixed_layer_id = ScrollLayerId::create_fixed(pipeline_id);
-        let iframe_scroll_layer_id = ScrollLayerId::root(pipeline_id);
+        let iframe_reference_frame_id =
+            self.scroll_tree.add_reference_frame(iframe_rect,
+                                                 transform,
+                                                 pipeline_id,
+                                                 current_scroll_layer_id);
+        let iframe_scroll_layer_id = ScrollLayerId::root_scroll_layer(pipeline_id);
+        let layer = Layer::new(&LayerRect::new(LayerPoint::zero(), iframe_rect.size),
+                               iframe_clip.main.size,
+                               &LayerToScrollTransform::identity(),
+                               pipeline_id);
+        self.scroll_tree.add_layer(layer.clone(),
+                                   iframe_scroll_layer_id,
+                                   iframe_reference_frame_id);
 
-        let layer = Layer::new(iframe_rect,
-                               iframe_clip.main.size,
-                               &transform,
-                               pipeline_id);
-        self.scroll_tree.add_layer(layer.clone(), iframe_fixed_layer_id, None);
-        self.scroll_tree.add_layer(layer, iframe_scroll_layer_id, Some(current_scroll_layer_id));
+        context.builder.push_scroll_layer(iframe_reference_frame_id,
+                                          iframe_clip,
+                                          &LayerPoint::zero(),
+                                          &iframe_rect.size);
+        context.builder.push_scroll_layer(iframe_scroll_layer_id,
+                                          iframe_clip,
+                                          &LayerPoint::zero(),
+                                          &iframe_clip.main.size);
 
         let mut traversal = DisplayListTraversal::new_skipping_first(display_list);
 
         self.flatten_stacking_context(&mut traversal,
                                       pipeline_id,
                                       context,
-                                      iframe_fixed_layer_id,
+                                      iframe_reference_frame_id,
                                       iframe_scroll_layer_id,
                                       LayerToScrollTransform::identity(),
                                       0,
                                       &iframe_stacking_context,
                                       iframe_clip);
+
+        context.builder.pop_scroll_layer();
+        context.builder.pop_scroll_layer();
     }
 
     fn flatten_items<'a>(&mut self,
                          traversal: &mut DisplayListTraversal<'a>,
                          pipeline_id: PipelineId,
                          context: &mut FlattenContext,
-                         current_fixed_layer_id: ScrollLayerId,
+                         current_reference_frame_id: ScrollLayerId,
                          current_scroll_layer_id: ScrollLayerId,
                          layer_relative_transform: LayerToScrollTransform,
                          level: i32) {
         while let Some(item) = traversal.next() {
             match item.item {
                 SpecificDisplayItem::WebGL(ref info) => {
                     context.builder.add_webgl_rectangle(item.rect,
                                                         &item.clip, info.context_id);
@@ -549,39 +561,42 @@ impl Frame {
                 }
                 SpecificDisplayItem::Text(ref text_info) => {
                     context.builder.add_text(item.rect,
                                              &item.clip,
                                              text_info.font_key,
                                              text_info.size,
                                              text_info.blur_radius,
                                              &text_info.color,
-                                             text_info.glyphs);
+                                             text_info.glyphs,
+                                             text_info.glyph_options);
                 }
                 SpecificDisplayItem::Rectangle(ref info) => {
                     context.builder.add_solid_rectangle(&item.rect,
                                                         &item.clip,
                                                         &info.color,
                                                         PrimitiveFlags::None);
                 }
                 SpecificDisplayItem::Gradient(ref info) => {
                     context.builder.add_gradient(item.rect,
                                                  &item.clip,
                                                  info.start_point,
                                                  info.end_point,
-                                                 info.stops);
+                                                 info.stops,
+                                                 info.extend_mode);
                 }
                 SpecificDisplayItem::RadialGradient(ref info) => {
                     context.builder.add_radial_gradient(item.rect,
                                                         &item.clip,
                                                         info.start_center,
                                                         info.start_radius,
                                                         info.end_center,
                                                         info.end_radius,
-                                                        info.stops);
+                                                        info.stops,
+                                                        info.extend_mode);
                 }
                 SpecificDisplayItem::BoxShadow(ref box_shadow_info) => {
                     context.builder.add_box_shadow(&box_shadow_info.box_bounds,
                                                    &item.clip,
                                                    &box_shadow_info.offset,
                                                    &box_shadow_info.color,
                                                    box_shadow_info.blur_radius,
                                                    box_shadow_info.spread_radius,
@@ -590,32 +605,32 @@ impl Frame {
                 }
                 SpecificDisplayItem::Border(ref info) => {
                     context.builder.add_border(item.rect, &item.clip, info);
                 }
                 SpecificDisplayItem::PushStackingContext(ref info) => {
                     self.flatten_stacking_context(traversal,
                                                   pipeline_id,
                                                   context,
-                                                  current_fixed_layer_id,
+                                                  current_reference_frame_id,
                                                   current_scroll_layer_id,
                                                   layer_relative_transform,
                                                   level + 1,
                                                   &info.stacking_context,
                                                   &item.clip);
                 }
                 SpecificDisplayItem::PushScrollLayer(ref info) => {
                     self.flatten_scroll_layer(traversal,
                                               pipeline_id,
                                               context,
-                                              current_fixed_layer_id,
+                                              current_reference_frame_id,
                                               current_scroll_layer_id,
                                               layer_relative_transform,
                                               level,
-                                              &item.rect,
+                                              &item.clip,
                                               &info.content_size,
                                               info.id);
                 }
                 SpecificDisplayItem::Iframe(ref info) => {
                     self.flatten_iframe(info.pipeline_id,
                                         &item.rect,
                                         context,
                                         current_scroll_layer_id,
new file mode 100644
--- /dev/null
+++ b/gfx/webrender/src/frame_builder.rs
@@ -0,0 +1,1223 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use app_units::Au;
+use batch_builder::BorderSideHelpers;
+use frame::FrameId;
+use gpu_store::GpuStoreAddress;
+use internal_types::{HardwareCompositeOp, SourceTexture};
+use mask_cache::{ClipSource, MaskCacheInfo};
+use prim_store::{BorderPrimitiveCpu, BorderPrimitiveGpu, BoxShadowPrimitiveGpu};
+use prim_store::{GradientPrimitiveCpu, GradientPrimitiveGpu, ImagePrimitiveCpu, ImagePrimitiveGpu};
+use prim_store::{ImagePrimitiveKind, PrimitiveContainer, PrimitiveGeometry, PrimitiveIndex};
+use prim_store::{PrimitiveStore, RadialGradientPrimitiveCpu, RadialGradientPrimitiveGpu};
+use prim_store::{RectanglePrimitive, TextRunPrimitiveCpu, TextRunPrimitiveGpu};
+use prim_store::{YuvImagePrimitiveCpu, YuvImagePrimitiveGpu};
+use profiler::FrameProfileCounters;
+use render_task::{AlphaRenderItem, MaskCacheKey, MaskResult, RenderTask, RenderTaskIndex};
+use render_task::RenderTaskLocation;
+use resource_cache::ResourceCache;
+use scroll_tree::ScrollTree;
+use std::{cmp, f32, i32, mem, usize};
+use tiling::{AuxiliaryListsMap, CompositeOps, Frame, PackedLayer, PackedLayerIndex};
+use tiling::{PrimitiveFlags, PrimitiveRunCmd, RenderPass, RenderTargetContext};
+use tiling::{RenderTaskCollection, ScrollbarPrimitive, ScrollLayer, ScrollLayerIndex};
+use tiling::{StackingContext, StackingContextIndex};
+use util::{self, pack_as_float, rect_from_points_f, subtract_rect, TransformedRect};
+use util::TransformedRectKind;
+use webrender_traits::{as_scroll_parent_rect, BorderDisplayItem, BorderSide, BorderStyle};
+use webrender_traits::{BoxShadowClipMode, ClipRegion, ColorF, device_length, DeviceIntPoint};
+use webrender_traits::{DeviceIntRect, DeviceIntSize, DeviceUintSize, ExtendMode, FontKey};
+use webrender_traits::{FontRenderMode, GlyphOptions, ImageKey, ImageRendering, ItemRange};
+use webrender_traits::{LayerPoint, LayerRect, LayerSize, LayerToScrollTransform, PipelineId};
+use webrender_traits::{ScrollLayerId, ScrollLayerPixel, WebGLContextId, YuvColorSpace};
+
+#[derive(Clone, Copy)]
+pub struct FrameBuilderConfig {
+    pub enable_scrollbars: bool,
+    pub enable_subpixel_aa: bool,
+    pub debug: bool,
+}
+
+impl FrameBuilderConfig {
+    pub fn new(enable_scrollbars: bool,
+               enable_subpixel_aa: bool,
+               debug: bool)
+               -> FrameBuilderConfig {
+        FrameBuilderConfig {
+            enable_scrollbars: enable_scrollbars,
+            enable_subpixel_aa: enable_subpixel_aa,
+            debug: debug,
+        }
+    }
+}
+
+pub struct FrameBuilder {
+    screen_rect: LayerRect,
+    background_color: Option<ColorF>,
+    prim_store: PrimitiveStore,
+    cmds: Vec<PrimitiveRunCmd>,
+    config: FrameBuilderConfig,
+
+    stacking_context_store: Vec<StackingContext>,
+    scroll_layer_store: Vec<ScrollLayer>,
+    packed_layers: Vec<PackedLayer>,
+
+    scrollbar_prims: Vec<ScrollbarPrimitive>,
+
+    /// A stack of scroll layers used during building to properly parent new scroll layers.
+    scroll_layer_stack: Vec<ScrollLayerIndex>,
+}
+
+impl FrameBuilder {
+    pub fn new(viewport_size: LayerSize,
+               background_color: Option<ColorF>,
+               config: FrameBuilderConfig) -> FrameBuilder {
+        FrameBuilder {
+            screen_rect: LayerRect::new(LayerPoint::zero(), viewport_size),
+            background_color: background_color,
+            stacking_context_store: Vec::new(),
+            scroll_layer_store: Vec::new(),
+            prim_store: PrimitiveStore::new(),
+            cmds: Vec::new(),
+            packed_layers: Vec::new(),
+            scrollbar_prims: Vec::new(),
+            config: config,
+            scroll_layer_stack: Vec::new(),
+        }
+    }
+
+    fn add_primitive(&mut self,
+                     rect: &LayerRect,
+                     clip_region: &ClipRegion,
+                     container: PrimitiveContainer) -> PrimitiveIndex {
+
+        let geometry = PrimitiveGeometry {
+            local_rect: *rect,
+            local_clip_rect: clip_region.main,
+        };
+        let clip_source = if clip_region.is_complex() {
+            ClipSource::Region(clip_region.clone())
+        } else {
+            ClipSource::NoClip
+        };
+        let clip_info = MaskCacheInfo::new(&clip_source,
+                                           false,
+                                           &mut self.prim_store.gpu_data32);
+
+        let prim_index = self.prim_store.add_primitive(geometry,
+                                                       Box::new(clip_source),
+                                                       clip_info,
+                                                       container);
+
+        match self.cmds.last_mut().unwrap() {
+            &mut PrimitiveRunCmd::PrimitiveRun(_run_prim_index, ref mut count) => {
+                debug_assert!(_run_prim_index.0 + *count == prim_index.0);
+                *count += 1;
+                return prim_index;
+            }
+            &mut PrimitiveRunCmd::PushStackingContext(..) |
+            &mut PrimitiveRunCmd::PopStackingContext |
+            &mut PrimitiveRunCmd::PushScrollLayer(..) |
+            &mut PrimitiveRunCmd::PopScrollLayer => {}
+        }
+
+        self.cmds.push(PrimitiveRunCmd::PrimitiveRun(prim_index, 1));
+
+        prim_index
+    }
+
+    pub fn push_stacking_context(&mut self,
+                                 rect: LayerRect,
+                                 transform: LayerToScrollTransform,
+                                 pipeline_id: PipelineId,
+                                 scroll_layer_id: ScrollLayerId,
+                                 composite_ops: CompositeOps) {
+        let stacking_context_index = StackingContextIndex(self.stacking_context_store.len());
+        let packed_layer_index = PackedLayerIndex(self.packed_layers.len());
+
+        self.stacking_context_store.push(StackingContext {
+            local_rect: rect,
+            local_transform: transform,
+            scroll_layer_id: scroll_layer_id,
+            pipeline_id: pipeline_id,
+            xf_rect: None,
+            composite_ops: composite_ops,
+            packed_layer_index: packed_layer_index,
+        });
+        self.packed_layers.push(PackedLayer::empty());
+        self.cmds.push(PrimitiveRunCmd::PushStackingContext(stacking_context_index));
+
+    }
+
+    pub fn pop_stacking_context(&mut self) {
+        self.cmds.push(PrimitiveRunCmd::PopStackingContext);
+    }
+
+    pub fn push_scroll_layer(&mut self,
+                             scroll_layer_id: ScrollLayerId,
+                             clip_region: &ClipRegion,
+                             iframe_origin: &LayerPoint,
+                             content_size: &LayerSize) {
+        let scroll_layer_index = ScrollLayerIndex(self.scroll_layer_store.len());
+        let packed_layer_index = PackedLayerIndex(self.packed_layers.len());
+
+        let clip_source = ClipSource::Region(clip_region.clone());
+        let clip_info = MaskCacheInfo::new(&clip_source,
+                                           true, // needs an extra clip for the clip rectangle
+                                           &mut self.prim_store.gpu_data32);
+
+        let parent_index = *self.scroll_layer_stack.last().unwrap_or(&scroll_layer_index);
+        self.scroll_layer_store.push(ScrollLayer {
+            scroll_layer_id: scroll_layer_id,
+            parent_index: parent_index,
+            clip_source: clip_source,
+            clip_cache_info: clip_info,
+            xf_rect: None,
+            packed_layer_index: packed_layer_index,
+        });
+        self.packed_layers.push(PackedLayer::empty());
+        self.cmds.push(PrimitiveRunCmd::PushScrollLayer(scroll_layer_index));
+
+
+        // We need to push a fake stacking context here, because primitives that are
+        // direct children of this stacking context, need to be adjusted by the scroll
+        // offset of this layer. Eventually we should be able to remove this.
+        let rect = LayerRect::new(LayerPoint::zero(),
+                                  LayerSize::new(content_size.width + iframe_origin.x,
+                                                 content_size.height + iframe_origin.y));
+        self.push_stacking_context(rect,
+                                   LayerToScrollTransform::identity(),
+                                   scroll_layer_id.pipeline_id,
+                                   scroll_layer_id,
+                                   CompositeOps::empty());
+
+        self.scroll_layer_stack.push(scroll_layer_index);
+    }
+
+    pub fn pop_scroll_layer(&mut self) {
+        self.pop_stacking_context();
+        self.cmds.push(PrimitiveRunCmd::PopScrollLayer);
+        self.scroll_layer_stack.pop();
+    }
+
+    pub fn add_solid_rectangle(&mut self,
+                               rect: &LayerRect,
+                               clip_region: &ClipRegion,
+                               color: &ColorF,
+                               flags: PrimitiveFlags) {
+        if color.a == 0.0 {
+            return;
+        }
+
+        let prim = RectanglePrimitive {
+            color: *color,
+        };
+
+        let prim_index = self.add_primitive(rect,
+                                            clip_region,
+                                            PrimitiveContainer::Rectangle(prim));
+
+        match flags {
+            PrimitiveFlags::None => {}
+            PrimitiveFlags::Scrollbar(scroll_layer_id, border_radius) => {
+                self.scrollbar_prims.push(ScrollbarPrimitive {
+                    prim_index: prim_index,
+                    scroll_layer_id: scroll_layer_id,
+                    border_radius: border_radius,
+                });
+            }
+        }
+    }
+
+    pub fn supported_style(&mut self, border: &BorderSide) -> bool {
+        match border.style {
+            BorderStyle::Solid |
+            BorderStyle::None |
+            BorderStyle::Dotted |
+            BorderStyle::Dashed |
+            BorderStyle::Inset |
+            BorderStyle::Ridge |
+            BorderStyle::Groove |
+            BorderStyle::Outset |
+            BorderStyle::Double => {
+                return true;
+            }
+            _ => {
+                println!("TODO: Other border styles {:?}", border.style);
+                return false;
+            }
+        }
+    }
+
+    pub fn add_border(&mut self,
+                      rect: LayerRect,
+                      clip_region: &ClipRegion,
+                      border: &BorderDisplayItem) {
+        let radius = &border.radius;
+        let left = &border.left;
+        let right = &border.right;
+        let top = &border.top;
+        let bottom = &border.bottom;
+
+        if !self.supported_style(left) || !self.supported_style(right) ||
+           !self.supported_style(top) || !self.supported_style(bottom) {
+            println!("Unsupported border style, not rendering border");
+            return;
+        }
+
+        // These colors are used during inset/outset scaling.
+        let left_color      = left.border_color(1.0, 2.0/3.0, 0.3, 0.7);
+        let top_color       = top.border_color(1.0, 2.0/3.0, 0.3, 0.7);
+        let right_color     = right.border_color(2.0/3.0, 1.0, 0.7, 0.3);
+        let bottom_color    = bottom.border_color(2.0/3.0, 1.0, 0.7, 0.3);
+
+        let tl_outer = LayerPoint::new(rect.origin.x, rect.origin.y);
+        let tl_inner = tl_outer + LayerPoint::new(radius.top_left.width.max(left.width),
+                                                  radius.top_left.height.max(top.width));
+
+        let tr_outer = LayerPoint::new(rect.origin.x + rect.size.width, rect.origin.y);
+        let tr_inner = tr_outer + LayerPoint::new(-radius.top_right.width.max(right.width),
+                                                  radius.top_right.height.max(top.width));
+
+        let bl_outer = LayerPoint::new(rect.origin.x, rect.origin.y + rect.size.height);
+        let bl_inner = bl_outer + LayerPoint::new(radius.bottom_left.width.max(left.width),
+                                                  -radius.bottom_left.height.max(bottom.width));
+
+        let br_outer = LayerPoint::new(rect.origin.x + rect.size.width,
+                                       rect.origin.y + rect.size.height);
+        let br_inner = br_outer - LayerPoint::new(radius.bottom_right.width.max(right.width),
+                                                  radius.bottom_right.height.max(bottom.width));
+
+        // The border shader is quite expensive. For simple borders, we can just draw
+        // the border with a few rectangles. This generally gives better batching, and
+        // a GPU win in fragment shader time.
+        // More importantly, the software (OSMesa) implementation we run tests on is
+        // particularly slow at running our complex border shader, compared to the
+        // rectangle shader. This has the effect of making some of our tests time
+        // out more often on CI (the actual cause is simply too many Servo processes and
+        // threads being run on CI at once).
+        // TODO(gw): Detect some more simple cases and handle those with simpler shaders too.
+        // TODO(gw): Consider whether it's only worth doing this for large rectangles (since
+        //           it takes a little more CPU time to handle multiple rectangles compared
+        //           to a single border primitive).
+        if left.style == BorderStyle::Solid {
+            let same_color = left_color == top_color &&
+                             left_color == right_color &&
+                             left_color == bottom_color;
+            let same_style = left.style == top.style &&
+                             left.style == right.style &&
+                             left.style == bottom.style;
+
+            if same_color && same_style && radius.is_zero() {
+                let rects = [
+                    LayerRect::new(rect.origin,
+                                   LayerSize::new(rect.size.width, top.width)),
+                    LayerRect::new(LayerPoint::new(tl_outer.x, tl_inner.y),
+                                   LayerSize::new(left.width,
+                                                  rect.size.height - top.width - bottom.width)),
+                    LayerRect::new(tr_inner,
+                                   LayerSize::new(right.width,
+                                                  rect.size.height - top.width - bottom.width)),
+                    LayerRect::new(LayerPoint::new(bl_outer.x, bl_inner.y),
+                                   LayerSize::new(rect.size.width, bottom.width))
+                ];
+
+                for rect in &rects {
+                    self.add_solid_rectangle(rect,
+                                             clip_region,
+                                             &top_color,
+                                             PrimitiveFlags::None);
+                }
+
+                return;
+            }
+        }
+
+        //Note: while similar to `ComplexClipRegion::get_inner_rect()` in spirit,
+        // this code is a bit more complex and can not there for be merged.
+        let inner_rect = rect_from_points_f(tl_inner.x.max(bl_inner.x),
+                                            tl_inner.y.max(tr_inner.y),
+                                            tr_inner.x.min(br_inner.x),
+                                            bl_inner.y.min(br_inner.y));
+
+        let prim_cpu = BorderPrimitiveCpu {
+            inner_rect: LayerRect::from_untyped(&inner_rect),
+        };
+
+        let prim_gpu = BorderPrimitiveGpu {
+            colors: [ left_color, top_color, right_color, bottom_color ],
+            widths: [ left.width, top.width, right.width, bottom.width ],
+            style: [
+                pack_as_float(left.style as u32),
+                pack_as_float(top.style as u32),
+                pack_as_float(right.style as u32),
+                pack_as_float(bottom.style as u32),
+            ],
+            radii: [
+                radius.top_left,
+                radius.top_right,
+                radius.bottom_right,
+                radius.bottom_left,
+            ],
+        };
+
+        self.add_primitive(&rect,
+                           clip_region,
+                           PrimitiveContainer::Border(prim_cpu, prim_gpu));
+    }
+
+    pub fn add_gradient(&mut self,
+                        rect: LayerRect,
+                        clip_region: &ClipRegion,
+                        start_point: LayerPoint,
+                        end_point: LayerPoint,
+                        stops: ItemRange,
+                        extend_mode: ExtendMode) {
+        // Fast path for clamped, axis-aligned gradients:
+        let aligned = extend_mode == ExtendMode::Clamp &&
+                      (start_point.x == end_point.x ||
+                       start_point.y == end_point.y);
+        // Try to ensure that if the gradient is specified in reverse, then so long as the stops
+        // are also supplied in reverse that the rendered result will be equivalent. To do this,
+        // a reference orientation for the gradient line must be chosen, somewhat arbitrarily, so
+        // just designate the reference orientation as start < end. Aligned gradient rendering
+        // manages to produce the same result regardless of orientation, so don't worry about
+        // reversing in that case.
+        let reverse_stops = !aligned &&
+                            (start_point.x > end_point.x ||
+                             (start_point.x == end_point.x &&
+                              start_point.y > end_point.y));
+
+        let gradient_cpu = GradientPrimitiveCpu {
+            stops_range: stops,
+            extend_mode: extend_mode,
+            reverse_stops: reverse_stops,
+            cache_dirty: true,
+        };
+
+        // To get reftests exactly matching with reverse start/end
+        // points, it's necessary to reverse the gradient
+        // line in some cases.
+        let (sp, ep) = if reverse_stops {
+            (end_point, start_point)
+        } else {
+            (start_point, end_point)
+        };
+
+        let gradient_gpu = GradientPrimitiveGpu {
+            start_point: sp,
+            end_point: ep,
+            extend_mode: pack_as_float(extend_mode as u32),
+            padding: [0.0, 0.0, 0.0],
+        };
+
+        let prim = if aligned {
+            PrimitiveContainer::AlignedGradient(gradient_cpu, gradient_gpu)
+        } else {
+            PrimitiveContainer::AngleGradient(gradient_cpu, gradient_gpu)
+        };
+
+        self.add_primitive(&rect, clip_region, prim);
+    }
+
+    pub fn add_radial_gradient(&mut self,
+                               rect: LayerRect,
+                               clip_region: &ClipRegion,
+                               start_center: LayerPoint,
+                               start_radius: f32,
+                               end_center: LayerPoint,
+                               end_radius: f32,
+                               stops: ItemRange,
+                               extend_mode: ExtendMode) {
+        let radial_gradient_cpu = RadialGradientPrimitiveCpu {
+            stops_range: stops,
+            extend_mode: extend_mode,
+            cache_dirty: true,
+        };
+
+        let radial_gradient_gpu = RadialGradientPrimitiveGpu {
+            start_center: start_center,
+            end_center: end_center,
+            start_radius: start_radius,
+            end_radius: end_radius,
+            extend_mode: pack_as_float(extend_mode as u32),
+            padding: [0.0],
+        };
+
+        self.add_primitive(&rect,
+                           clip_region,
+                           PrimitiveContainer::RadialGradient(radial_gradient_cpu, radial_gradient_gpu));
+    }
+
+    pub fn add_text(&mut self,
+                    rect: LayerRect,
+                    clip_region: &ClipRegion,
+                    font_key: FontKey,
+                    size: Au,
+                    blur_radius: Au,
+                    color: &ColorF,
+                    glyph_range: ItemRange,
+                    glyph_options: Option<GlyphOptions>) {
+        if color.a == 0.0 {
+            return
+        }
+
+        if size.0 <= 0 {
+            return
+        }
+
+        let (render_mode, glyphs_per_run) = if blur_radius == Au(0) {
+            // TODO(gw): Use a proper algorithm to select
+            // whether this item should be rendered with
+            // subpixel AA!
+            let render_mode = if self.config.enable_subpixel_aa {
+                FontRenderMode::Subpixel
+            } else {
+                FontRenderMode::Alpha
+            };
+
+            (render_mode, 8)
+        } else {
+            // TODO(gw): Support breaking up text shadow when
+            // the size of the text run exceeds the dimensions
+            // of the render target texture.
+            (FontRenderMode::Alpha, glyph_range.length)
+        };
+
+        let text_run_count = (glyph_range.length + glyphs_per_run - 1) / glyphs_per_run;
+        for run_index in 0..text_run_count {
+            let start = run_index * glyphs_per_run;
+            let end = cmp::min(start + glyphs_per_run, glyph_range.length);
+            let sub_range = ItemRange {
+                start: glyph_range.start + start,
+                length: end - start,
+            };
+
+            let prim_cpu = TextRunPrimitiveCpu {
+                font_key: font_key,
+                logical_font_size: size,
+                blur_radius: blur_radius,
+                glyph_range: sub_range,
+                cache_dirty: true,
+                glyph_instances: Vec::new(),
+                color_texture_id: SourceTexture::Invalid,
+                color: *color,
+                render_mode: render_mode,
+                glyph_options: glyph_options,
+                resource_address: GpuStoreAddress(0),
+            };
+
+            let prim_gpu = TextRunPrimitiveGpu {
+                color: *color,
+            };
+
+            self.add_primitive(&rect,
+                               clip_region,
+                               PrimitiveContainer::TextRun(prim_cpu, prim_gpu));
+        }
+    }
+
+    pub fn add_box_shadow(&mut self,
+                          box_bounds: &LayerRect,
+                          clip_region: &ClipRegion,
+                          box_offset: &LayerPoint,
+                          color: &ColorF,
+                          blur_radius: f32,
+                          spread_radius: f32,
+                          border_radius: f32,
+                          clip_mode: BoxShadowClipMode) {
+        if color.a == 0.0 {
+            return
+        }
+
+        // Fast path.
+        if blur_radius == 0.0 && spread_radius == 0.0 && clip_mode == BoxShadowClipMode::None {
+            self.add_solid_rectangle(&box_bounds,
+                                     clip_region,
+                                     color,
+                                     PrimitiveFlags::None);
+            return;
+        }
+
+        let bs_rect = box_bounds.translate(box_offset)
+                                .inflate(spread_radius, spread_radius);
+
+        let outside_edge_size = 2.0 * blur_radius;
+        let inside_edge_size = outside_edge_size.max(border_radius);
+        let edge_size = outside_edge_size + inside_edge_size;
+        let outer_rect = bs_rect.inflate(outside_edge_size, outside_edge_size);
+        let mut instance_rects = Vec::new();
+        let (prim_rect, inverted) = match clip_mode {
+            BoxShadowClipMode::Outset | BoxShadowClipMode::None => {
+                subtract_rect(&outer_rect, box_bounds, &mut instance_rects);
+                (outer_rect, 0.0)
+            }
+            BoxShadowClipMode::Inset => {
+                subtract_rect(box_bounds, &bs_rect, &mut instance_rects);
+                (*box_bounds, 1.0)
+            }
+        };
+
+        if edge_size == 0.0 {
+            for rect in &instance_rects {
+                self.add_solid_rectangle(rect,
+                                         clip_region,
+                                         color,
+                                         PrimitiveFlags::None)
+            }
+        } else {
+            let prim_gpu = BoxShadowPrimitiveGpu {
+                src_rect: *box_bounds,
+                bs_rect: bs_rect,
+                color: *color,
+                blur_radius: blur_radius,
+                border_radius: border_radius,
+                edge_size: edge_size,
+                inverted: inverted,
+            };
+
+            self.add_primitive(&prim_rect,
+                               clip_region,
+                               PrimitiveContainer::BoxShadow(prim_gpu, instance_rects));
+        }
+    }
+
+    pub fn add_webgl_rectangle(&mut self,
+                               rect: LayerRect,
+                               clip_region: &ClipRegion,
+                               context_id: WebGLContextId) {
+        let prim_cpu = ImagePrimitiveCpu {
+            kind: ImagePrimitiveKind::WebGL(context_id),
+            color_texture_id: SourceTexture::Invalid,
+            resource_address: GpuStoreAddress(0),
+        };
+
+        let prim_gpu = ImagePrimitiveGpu {
+            stretch_size: rect.size,
+            tile_spacing: LayerSize::zero(),
+        };
+
+        self.add_primitive(&rect,
+                           clip_region,
+                           PrimitiveContainer::Image(prim_cpu, prim_gpu));
+    }
+
+    pub fn add_image(&mut self,
+                     rect: LayerRect,
+                     clip_region: &ClipRegion,
+                     stretch_size: &LayerSize,
+                     tile_spacing: &LayerSize,
+                     image_key: ImageKey,
+                     image_rendering: ImageRendering) {
+        let prim_cpu = ImagePrimitiveCpu {
+            kind: ImagePrimitiveKind::Image(image_key,
+                                            image_rendering,
+                                            *tile_spacing),
+            color_texture_id: SourceTexture::Invalid,
+            resource_address: GpuStoreAddress(0),
+        };
+
+        let prim_gpu = ImagePrimitiveGpu {
+            stretch_size: *stretch_size,
+            tile_spacing: *tile_spacing,
+        };
+
+        self.add_primitive(&rect,
+                           clip_region,
+                           PrimitiveContainer::Image(prim_cpu, prim_gpu));
+    }
+
+    pub fn add_yuv_image(&mut self,
+                         rect: LayerRect,
+                         clip_region: &ClipRegion,
+                         y_image_key: ImageKey,
+                         u_image_key: ImageKey,
+                         v_image_key: ImageKey,
+                         color_space: YuvColorSpace) {
+
+        let prim_cpu = YuvImagePrimitiveCpu {
+            y_key: y_image_key,
+            u_key: u_image_key,
+            v_key: v_image_key,
+            y_texture_id: SourceTexture::Invalid,
+            u_texture_id: SourceTexture::Invalid,
+            v_texture_id: SourceTexture::Invalid,
+        };
+
+        let prim_gpu = YuvImagePrimitiveGpu::new(rect.size, color_space);
+
+        self.add_primitive(&rect,
+                           clip_region,
+                           PrimitiveContainer::YuvImage(prim_cpu, prim_gpu));
+    }
+
+    /// Compute the contribution (bounding rectangles, and resources) of layers and their
+    /// primitives in screen space.
+    fn build_layer_screen_rects_and_cull_layers(&mut self,
+                                                screen_rect: &DeviceIntRect,
+                                                scroll_tree: &ScrollTree,
+                                                auxiliary_lists_map: &AuxiliaryListsMap,
+                                                resource_cache: &mut ResourceCache,
+                                                profile_counters: &mut FrameProfileCounters,
+                                                device_pixel_ratio: f32) {
+        profile_scope!("cull");
+        LayerRectCalculationAndCullingPass::create_and_run(self,
+                                                           screen_rect,
+                                                           scroll_tree,
+                                                           auxiliary_lists_map,
+                                                           resource_cache,
+                                                           profile_counters,
+                                                           device_pixel_ratio);
+    }
+
+    fn update_scroll_bars(&mut self, scroll_tree: &ScrollTree) {
+        let distance_from_edge = 8.0;
+
+        for scrollbar_prim in &self.scrollbar_prims {
+            let mut geom = (*self.prim_store.gpu_geometry.get(GpuStoreAddress(scrollbar_prim.prim_index.0 as i32))).clone();
+            let scroll_layer = &scroll_tree.layers[&scrollbar_prim.scroll_layer_id];
+
+            let scrollable_distance = scroll_layer.scrollable_height();
+
+            if scrollable_distance <= 0.0 {
+                geom.local_clip_rect.size = LayerSize::zero();
+                *self.prim_store.gpu_geometry.get_mut(GpuStoreAddress(scrollbar_prim.prim_index.0 as i32)) = geom;
+                continue;
+            }
+
+            let f = -scroll_layer.scrolling.offset.y / scrollable_distance;
+
+            let min_y = scroll_layer.local_viewport_rect.origin.y -
+                        scroll_layer.scrolling.offset.y +
+                        distance_from_edge;
+
+            let max_y = scroll_layer.local_viewport_rect.origin.y +
+                        scroll_layer.local_viewport_rect.size.height -
+                        scroll_layer.scrolling.offset.y -
+                        geom.local_rect.size.height -
+                        distance_from_edge;
+
+            geom.local_rect.origin.x = scroll_layer.local_viewport_rect.origin.x +
+                                       scroll_layer.local_viewport_rect.size.width -
+                                       geom.local_rect.size.width -
+                                       distance_from_edge;
+
+            geom.local_rect.origin.y = util::lerp(min_y, max_y, f);
+            geom.local_clip_rect = geom.local_rect;
+
+            let clip_source = if scrollbar_prim.border_radius == 0.0 {
+                ClipSource::NoClip
+            } else {
+                ClipSource::Complex(geom.local_rect, scrollbar_prim.border_radius)
+            };
+            self.prim_store.set_clip_source(scrollbar_prim.prim_index, clip_source);
+            *self.prim_store.gpu_geometry.get_mut(GpuStoreAddress(scrollbar_prim.prim_index.0 as i32)) = geom;
+        }
+    }
+
+    fn build_render_task(&self) -> (RenderTask, usize) {
+        profile_scope!("build_render_task");
+
+        let mut next_z = 0;
+        let mut next_task_index = RenderTaskIndex(0);
+
+        let mut sc_stack = Vec::new();
+        let mut current_task = RenderTask::new_alpha_batch(next_task_index,
+                                                           DeviceIntPoint::zero(),
+                                                           RenderTaskLocation::Fixed);
+        next_task_index.0 += 1;
+        let mut alpha_task_stack = Vec::new();
+
+        for cmd in &self.cmds {
+            match *cmd {
+                PrimitiveRunCmd::PushStackingContext(stacking_context_index) => {
+                    let stacking_context = &self.stacking_context_store[stacking_context_index.0];
+                    sc_stack.push(stacking_context_index);
+
+                    if !stacking_context.is_visible() {
+                        continue;
+                    }
+
+                    let composite_count = stacking_context.composite_ops.count();
+                    for _ in 0..composite_count {
+                        let stacking_context_rect =
+                            stacking_context.xf_rect.as_ref().unwrap().bounding_rect;
+                        let location = RenderTaskLocation::Dynamic(None, stacking_context_rect.size);
+                        let new_task = RenderTask::new_alpha_batch(next_task_index,
+                                                                   stacking_context_rect.origin,
+                                                                   location);
+                        next_task_index.0 += 1;
+                        let prev_task = mem::replace(&mut current_task, new_task);
+                        alpha_task_stack.push(prev_task);
+                    }
+                }
+                PrimitiveRunCmd::PopStackingContext => {
+                    let stacking_context_index = sc_stack.pop().unwrap();
+                    let stacking_context = &self.stacking_context_store[stacking_context_index.0];
+
+                    if !stacking_context.is_visible() {
+                        continue;
+                    }
+
+                    for filter in &stacking_context.composite_ops.filters {
+                        let mut prev_task = alpha_task_stack.pop().unwrap();
+                        let item = AlphaRenderItem::Blend(stacking_context_index,
+                                                          current_task.id,
+                                                          *filter,
+                                                          next_z);
+                        next_z += 1;
+                        prev_task.as_alpha_batch().alpha_items.push(item);
+                        prev_task.children.push(current_task);
+                        current_task = prev_task;
+                    }
+                    if let Some(mix_blend_mode) = stacking_context.composite_ops.mix_blend_mode {
+                        match HardwareCompositeOp::from_mix_blend_mode(mix_blend_mode) {
+                            Some(op) => {
+                                let mut prev_task = alpha_task_stack.pop().unwrap();
+                                let item = AlphaRenderItem::HardwareComposite(stacking_context_index,
+                                                                              current_task.id,
+                                                                              op,
+                                                                              next_z);
+                                next_z += 1;
+                                prev_task.as_alpha_batch().alpha_items.push(item);
+                                prev_task.children.push(current_task);
+                                current_task = prev_task;
+                            }
+                            None => {
+                                let stacking_context_rect =
+                                    stacking_context.xf_rect.as_ref().unwrap().bounding_rect;
+                                let readback_task =
+                                    RenderTask::new_readback(stacking_context_index, stacking_context_rect);
+
+                                let mut prev_task = alpha_task_stack.pop().unwrap();
+                                let item = AlphaRenderItem::Composite(stacking_context_index,
+                                                                      readback_task.id,
+                                                                      current_task.id,
+                                                                      mix_blend_mode,
+                                                                      next_z);
+                                next_z += 1;
+                                prev_task.as_alpha_batch().alpha_items.push(item);
+                                prev_task.children.push(current_task);
+                                prev_task.children.push(readback_task);
+                                current_task = prev_task;
+                            }
+                        }
+                    }
+                }
+                PrimitiveRunCmd::PrimitiveRun(first_prim_index, prim_count) => {
+                    let stacking_context_index = *sc_stack.last().unwrap();
+                    let stacking_context = &self.stacking_context_store[stacking_context_index.0];
+
+                    if !stacking_context.is_visible() {
+                        continue;
+                    }
+
+                    for i in 0..prim_count {
+                        let prim_index = PrimitiveIndex(first_prim_index.0 + i);
+
+                        if self.prim_store.cpu_bounding_rects[prim_index.0].is_some() {
+                            let prim_metadata = self.prim_store.get_metadata(prim_index);
+
+                            // Add any dynamic render tasks needed to render this primitive
+                            if let Some(ref render_task) = prim_metadata.render_task {
+                                current_task.children.push(render_task.clone());
+                            }
+                            if let Some(ref clip_task) = prim_metadata.clip_task {
+                                current_task.children.push(clip_task.clone());
+                            }
+
+                            let transform_kind = stacking_context.xf_rect.as_ref().unwrap().kind;
+                            let needs_clipping = prim_metadata.clip_task.is_some();
+                            let needs_blending = transform_kind == TransformedRectKind::Complex ||
+                                                 !prim_metadata.is_opaque ||
+                                                 needs_clipping;
+
+                            let items = if needs_blending {
+                                &mut current_task.as_alpha_batch().alpha_items
+                            } else {
+                                &mut current_task.as_alpha_batch().opaque_items
+                            };
+                            items.push(AlphaRenderItem::Primitive(stacking_context_index,
+                                                                  prim_index,
+                                                                  next_z));
+                            next_z += 1;
+                        }
+                    }
+                }
+                PrimitiveRunCmd::PushScrollLayer(_) | PrimitiveRunCmd::PopScrollLayer => { }
+            }
+        }
+
+        debug_assert!(alpha_task_stack.is_empty());
+        (current_task, next_task_index.0)
+    }
+
+    pub fn build(&mut self,
+                 resource_cache: &mut ResourceCache,
+                 frame_id: FrameId,
+                 scroll_tree: &ScrollTree,
+                 auxiliary_lists_map: &AuxiliaryListsMap,
+                 device_pixel_ratio: f32) -> Frame {
+        profile_scope!("build");
+
+        let mut profile_counters = FrameProfileCounters::new();
+        profile_counters.total_primitives.set(self.prim_store.prim_count());
+
+        resource_cache.begin_frame(frame_id);
+
+        let screen_rect = DeviceIntRect::new(
+            DeviceIntPoint::zero(),
+            DeviceIntSize::from_lengths(device_length(self.screen_rect.size.width as f32,
+                                                      device_pixel_ratio),
+                                        device_length(self.screen_rect.size.height as f32,
+                                                      device_pixel_ratio)));
+
+        // Pick a size for the cache render targets to be. The main requirement is that it
+        // has to be at least as large as the framebuffer size. This ensures that it will
+        // always be able to allocate the worst case render task (such as a clip mask that
+        // covers the entire screen).
+        let cache_size = DeviceUintSize::new(cmp::max(1024, screen_rect.size.width as u32),
+                                             cmp::max(1024, screen_rect.size.height as u32));
+
+        self.update_scroll_bars(scroll_tree);
+
+        self.build_layer_screen_rects_and_cull_layers(&screen_rect,
+                                                      scroll_tree,
+                                                      auxiliary_lists_map,
+                                                      resource_cache,
+                                                      &mut profile_counters,
+                                                      device_pixel_ratio);
+
+        let (main_render_task, static_render_task_count) = self.build_render_task();
+        let mut render_tasks = RenderTaskCollection::new(static_render_task_count);
+
+        let mut required_pass_count = 0;
+        main_render_task.max_depth(0, &mut required_pass_count);
+
+        resource_cache.block_until_all_resources_added();
+
+        for scroll_layer in self.scroll_layer_store.iter() {
+            if let Some(ref clip_info) = scroll_layer.clip_cache_info {
+                self.prim_store.resolve_clip_cache(clip_info, resource_cache);
+            }
+        }
+
+        let deferred_resolves = self.prim_store.resolve_primitives(resource_cache,
+                                                                   device_pixel_ratio);
+
+        let mut passes = Vec::new();
+
+        // Do the allocations now, assigning each tile's tasks to a render
+        // pass and target as required.
+        for index in 0..required_pass_count {
+            passes.push(RenderPass::new(index as isize,
+                                        index == required_pass_count-1,
+                                        cache_size));
+        }
+
+        main_render_task.assign_to_passes(passes.len() - 1, &mut passes);
+
+        for pass in &mut passes {
+            let ctx = RenderTargetContext {
+                stacking_context_store: &self.stacking_context_store,
+                prim_store: &self.prim_store,
+                resource_cache: resource_cache,
+            };
+
+            pass.build(&ctx, &mut render_tasks);
+
+            profile_counters.passes.inc();
+            profile_counters.targets.add(pass.targets.len());
+        }
+
+        resource_cache.end_frame();
+
+        Frame {
+            device_pixel_ratio: device_pixel_ratio,
+            background_color: self.background_color,
+            viewport_size: self.screen_rect.size,
+            profile_counters: profile_counters,
+            passes: passes,
+            cache_size: cache_size,
+            layer_texture_data: self.packed_layers.clone(),
+            render_task_data: render_tasks.render_task_data,
+            gpu_data16: self.prim_store.gpu_data16.build(),
+            gpu_data32: self.prim_store.gpu_data32.build(),
+            gpu_data64: self.prim_store.gpu_data64.build(),
+            gpu_data128: self.prim_store.gpu_data128.build(),
+            gpu_geometry: self.prim_store.gpu_geometry.build(),
+            gpu_gradient_data: self.prim_store.gpu_gradient_data.build(),
+            gpu_resource_rects: self.prim_store.gpu_resource_rects.build(),
+            deferred_resolves: deferred_resolves,
+        }
+    }
+}
+
+struct LayerRectCalculationAndCullingPass<'a> {
+    frame_builder: &'a mut FrameBuilder,
+    screen_rect: &'a DeviceIntRect,
+    scroll_tree: &'a ScrollTree,
+    auxiliary_lists_map: &'a AuxiliaryListsMap,
+    resource_cache: &'a mut ResourceCache,
+    profile_counters: &'a mut FrameProfileCounters,
+    device_pixel_ratio: f32,
+    stacking_context_stack: Vec<StackingContextIndex>,
+    scroll_layer_stack: Vec<ScrollLayerIndex>,
+
+    /// A cached clip info stack, which should handle the most common situation,
+    /// which is that we are using the same clip info stack that we were using
+    /// previously.
+    current_clip_stack: Vec<(PackedLayerIndex, MaskCacheInfo)>,
+
+    /// The scroll layer that defines the previous scroll layer info stack.
+    current_clip_stack_scroll_layer: Option<ScrollLayerIndex>
+}
+
+impl<'a> LayerRectCalculationAndCullingPass<'a> {
+    fn create_and_run(frame_builder: &'a mut FrameBuilder,
+                      screen_rect: &'a DeviceIntRect,
+                      scroll_tree: &'a ScrollTree,
+                      auxiliary_lists_map: &'a AuxiliaryListsMap,
+                      resource_cache: &'a mut ResourceCache,
+                      profile_counters: &'a mut FrameProfileCounters,
+                      device_pixel_ratio: f32) {
+
+        let mut pass = LayerRectCalculationAndCullingPass {
+            frame_builder: frame_builder,
+            screen_rect: screen_rect,
+            scroll_tree: scroll_tree,
+            auxiliary_lists_map: auxiliary_lists_map,
+            resource_cache: resource_cache,
+            profile_counters: profile_counters,
+            device_pixel_ratio: device_pixel_ratio,
+            stacking_context_stack: Vec::new(),
+            scroll_layer_stack: Vec::new(),
+            current_clip_stack: Vec::new(),
+            current_clip_stack_scroll_layer: None,
+        };
+        pass.run();
+    }
+
+    fn run(&mut self) {
+        let commands = mem::replace(&mut self.frame_builder.cmds, Vec::new());
+        for cmd in &commands {
+            match cmd {
+                &PrimitiveRunCmd::PushStackingContext(stacking_context_index) =>
+                    self.handle_push_stacking_context(stacking_context_index),
+                &PrimitiveRunCmd::PushScrollLayer(scroll_layer_index) =>
+                    self.handle_push_scroll_layer(scroll_layer_index),
+                &PrimitiveRunCmd::PrimitiveRun(prim_index, prim_count) =>
+                    self.handle_primitive_run(prim_index, prim_count),
+                &PrimitiveRunCmd::PopStackingContext => {
+                    self.stacking_context_stack.pop();
+                }
+                &PrimitiveRunCmd::PopScrollLayer => self.handle_pop_scroll_layer(),
+            }
+        }
+
+        mem::replace(&mut self.frame_builder.cmds, commands);
+    }
+
+    fn handle_push_scroll_layer(&mut self, scroll_layer_index: ScrollLayerIndex) {
+        self.scroll_layer_stack.push(scroll_layer_index);
+
+        let scroll_layer = &mut self.frame_builder.scroll_layer_store[scroll_layer_index.0];
+        let packed_layer_index = scroll_layer.packed_layer_index;
+        let scroll_tree_layer = &self.scroll_tree.layers[&scroll_layer.scroll_layer_id];
+        let packed_layer = &mut self.frame_builder.packed_layers[packed_layer_index.0];
+
+        packed_layer.transform = scroll_tree_layer.world_viewport_transform;
+        packed_layer.inv_transform = packed_layer.transform.inverse().unwrap();
+
+        let local_rect = &scroll_tree_layer.combined_local_viewport_rect
+                                           .translate(&scroll_tree_layer.scrolling.offset);
+        if !local_rect.is_empty() {
+            let layer_xf_rect = TransformedRect::new(local_rect,
+                                                     &packed_layer.transform,
+                                                     self.device_pixel_ratio);
+
+            if layer_xf_rect.bounding_rect.intersects(&self.screen_rect) {
+                packed_layer.screen_vertices = layer_xf_rect.vertices.clone();
+                packed_layer.local_clip_rect = *local_rect;
+                scroll_layer.xf_rect = Some(layer_xf_rect);
+            }
+        }
+
+        let clip_info = match scroll_layer.clip_cache_info {
+            Some(ref mut clip_info) => clip_info,
+            None => return,
+        };
+
+        let pipeline_id = scroll_layer.scroll_layer_id.pipeline_id;
+        let auxiliary_lists = self.auxiliary_lists_map.get(&pipeline_id)
+                                                       .expect("No auxiliary lists?");
+        clip_info.update(&scroll_layer.clip_source,
+                         &packed_layer.transform,
+                         &mut self.frame_builder.prim_store.gpu_data32,
+                         self.device_pixel_ratio,
+                         auxiliary_lists);
+
+        if let Some(mask) = scroll_layer.clip_source.image_mask() {
+            // We don't add the image mask for resolution, because layer masks are resolved later.
+            self.resource_cache.request_image(mask.image, ImageRendering::Auto);
+        }
+    }
+
+    fn handle_push_stacking_context(&mut self, stacking_context_index: StackingContextIndex) {
+        self.stacking_context_stack.push(stacking_context_index);
+
+        let stacking_context = &mut self.frame_builder
+                                        .stacking_context_store[stacking_context_index.0];
+        let packed_layer = &mut self.frame_builder
+                                    .packed_layers[stacking_context.packed_layer_index.0];
+        let scroll_layer = &self.scroll_tree.layers[&stacking_context.scroll_layer_id];
+        packed_layer.transform = scroll_layer.world_content_transform
+                                             .with_source::<ScrollLayerPixel>()
+                                             .pre_mul(&stacking_context.local_transform);
+        packed_layer.inv_transform = packed_layer.transform.inverse().unwrap();
+
+        if !stacking_context.can_contribute_to_scene() {
+            return;
+        }
+
+        let inv_layer_transform = stacking_context.local_transform.inverse().unwrap();
+        let local_viewport_rect = as_scroll_parent_rect(&scroll_layer.combined_local_viewport_rect);
+        let viewport_rect = inv_layer_transform.transform_rect(&local_viewport_rect);
+        let layer_local_rect = stacking_context.local_rect.intersection(&viewport_rect);
+
+        if let Some(layer_local_rect) = layer_local_rect {
+            let layer_xf_rect = TransformedRect::new(&layer_local_rect,
+                                                     &packed_layer.transform,
+                                                     self.device_pixel_ratio);
+
+            if layer_xf_rect.bounding_rect.intersects(&self.screen_rect) {
+                packed_layer.screen_vertices = layer_xf_rect.vertices.clone();
+                packed_layer.local_clip_rect = layer_local_rect;
+                stacking_context.xf_rect = Some(layer_xf_rect);
+            }
+        }
+    }
+
+    fn rebuild_clip_info_stack_if_necessary(&mut self, mut scroll_layer_index: ScrollLayerIndex) {
+        if let Some(previous_scroll_layer) = self.current_clip_stack_scroll_layer {
+            if previous_scroll_layer == scroll_layer_index {
+                return;
+            }
+        }
+
+        // TODO(mrobinson): If we notice that this process is expensive, we can special-case
+        // more common situations, such as moving from a child or a parent.
+        self.current_clip_stack_scroll_layer = Some(scroll_layer_index);
+        self.current_clip_stack.clear();
+        loop {
+            let scroll_layer = &self.frame_builder.scroll_layer_store[scroll_layer_index.0];
+            match scroll_layer.clip_cache_info {
+                Some(ref clip_info) if clip_info.is_masking() =>
+                    self.current_clip_stack.push((scroll_layer.packed_layer_index,
+                                                  clip_info.clone())),
+                _ => {},
+            };
+
+            if scroll_layer.parent_index == scroll_layer_index {
+                break;
+            }
+            scroll_layer_index = scroll_layer.parent_index;
+        }
+
+        self.current_clip_stack.reverse();
+    }
+
+    fn handle_primitive_run(&mut self, prim_index: PrimitiveIndex, prim_count: usize) {
+        let scroll_layer_index = *self.scroll_layer_stack.last().unwrap();
+        self.rebuild_clip_info_stack_if_necessary(scroll_layer_index);
+
+        let stacking_context_index = self.stacking_context_stack.last().unwrap();
+        let stacking_context = &self.frame_builder.stacking_context_store[stacking_context_index.0];
+        if !stacking_context.is_visible() {
+            return;
+        }
+
+        let packed_layer_index = stacking_context.packed_layer_index;
+        let packed_layer = &self.frame_builder.packed_layers[packed_layer_index.0];
+        let auxiliary_lists = self.auxiliary_lists_map.get(&stacking_context.pipeline_id)
+                                                      .expect("No auxiliary lists?");
+
+        for i in 0..prim_count {
+            let prim_index = PrimitiveIndex(prim_index.0 + i);
+            if self.frame_builder.prim_store.build_bounding_rect(prim_index,
+                                                                 self.screen_rect,
+                                                                 &packed_layer.transform,
+                                                                 &packed_layer.local_clip_rect,
+                                                                 self.device_pixel_ratio) {
+                if self.frame_builder.prim_store.prepare_prim_for_render(prim_index,
+                                                                         self.resource_cache,
+                                                                         &packed_layer.transform,
+                                                                         self.device_pixel_ratio,
+                                                                         auxiliary_lists) {
+                    self.frame_builder.prim_store.build_bounding_rect(prim_index,
+                                                                      self.screen_rect,
+                                                                      &packed_layer.transform,
+                                                                      &packed_layer.local_clip_rect,
+                                                                      self.device_pixel_ratio);
+                }
+
+                // If the primitive is visible, consider culling it via clip rect(s).
+                // If it is visible but has clips, create the clip task for it.
+                let prim_bounding_rect =
+                    match self.frame_builder.prim_store.cpu_bounding_rects[prim_index.0] {
+                    Some(rect) => rect,
+                    _ => continue,
+                };
+
+                let prim_metadata = &mut self.frame_builder.prim_store.cpu_metadata[prim_index.0];
+                let prim_clip_info = prim_metadata.clip_cache_info.as_ref();
+                let mut visible = true;
+
+                if let Some(info) = prim_clip_info {
+                    self.current_clip_stack.push((packed_layer_index, info.clone()));
+                }
+
+                // Try to create a mask if we may need to.
+                if !self.current_clip_stack.is_empty() {
+                    // If the primitive doesn't have a specific clip, key the task ID off the
+                    // stacking context. This means that two primitives which are only clipped
+                    // by the stacking context stack can share clip masks during render task
+                    // assignment to targets.
+                    let (mask_key, mask_rect) = match prim_clip_info {
+                        Some(..) => (MaskCacheKey::Primitive(prim_index), prim_bounding_rect),
+                        None => {
+                            let scroll_layer =
+                                &self.frame_builder.scroll_layer_store[scroll_layer_index.0];
+                            (MaskCacheKey::ScrollLayer(scroll_layer_index),
+                             scroll_layer.xf_rect.as_ref().unwrap().bounding_rect)
+                        }
+                    };
+                    let mask_opt =
+                        RenderTask::new_mask(mask_rect, mask_key, &self.current_clip_stack);
+                    match mask_opt {
+                        MaskResult::Outside => { // Primitive is completely clipped out.
+                            prim_metadata.clip_task = None;
+                            self.frame_builder.prim_store.cpu_bounding_rects[prim_index.0] = None;
+                            visible = false;
+                        }
+                        MaskResult::Inside(task) => prim_metadata.clip_task = Some(task),
+                    }
+                }
+
+                if prim_clip_info.is_some() {
+                    self.current_clip_stack.pop();
+                }
+
+                if visible {
+                    self.profile_counters.visible_primitives.inc();
+                }
+            }
+        }
+    }
+
+    fn handle_pop_scroll_layer(&mut self) {
+        self.scroll_layer_stack.pop();
+    }
+}
--- a/gfx/webrender/src/gpu_store.rs
+++ b/gfx/webrender/src/gpu_store.rs
@@ -1,47 +1,75 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use renderer::MAX_VERTEX_TEXTURE_WIDTH;
+use device::TextureFilter;
+use std::marker::PhantomData;
 use std::mem;
+use webrender_traits::ImageFormat;
 
 #[derive(Debug, Copy, Clone, Eq, Hash, PartialEq)]
 pub struct GpuStoreAddress(pub i32);
 
+pub trait GpuStoreLayout {
+    fn image_format() -> ImageFormat;
+
+    fn texture_width() -> usize;
+
+    fn texture_filter() -> TextureFilter;
+
+    fn texel_size() -> usize {
+        match Self::image_format() {
+            ImageFormat::RGBA8 => 4,
+            ImageFormat::RGBAF32 => 16,
+            _ => unreachable!(),
+        }
+    }
+
+    fn texels_per_item<T>() -> usize {
+        let item_size = mem::size_of::<T>();
+        let texel_size = Self::texel_size();
+        debug_assert!(item_size % texel_size == 0);
+        item_size / texel_size
+    }
+
+    fn items_per_row<T>() -> usize {
+        Self::texture_width() / Self::texels_per_item::<T>()
+    }
+}
+
 /// A CPU-side buffer storing content to be uploaded to the GPU.
-pub struct GpuStore<T> {
+pub struct GpuStore<T, L> {
     data: Vec<T>,
+    layout: PhantomData<L>,
     // TODO(gw): Could store this intrusively inside
     // the data array free slots.
     //free_list: Vec<GpuStoreAddress>,
 }
 
-impl<T: Clone + Default> GpuStore<T> {
-    pub fn new() -> GpuStore<T> {
+impl<T: Clone + Default, L: GpuStoreLayout> GpuStore<T, L> {
+    pub fn new() -> GpuStore<T, L> {
         GpuStore {
             data: Vec::new(),
+            layout: PhantomData,
             //free_list: Vec::new(),
         }
     }
 
     pub fn push<E>(&mut self, data: E) -> GpuStoreAddress where T: From<E> {
         let address = GpuStoreAddress(self.data.len() as i32);
         self.data.push(T::from(data));
         address
     }
 
     // TODO(gw): Change this to do incremental updates, which means
     // there is no need to copy all this data during every scroll!
     pub fn build(&self) -> Vec<T> {
-        let item_size = mem::size_of::<T>();
-        debug_assert!(item_size % 16 == 0);
-        let vecs_per_item = item_size / 16;
-        let items_per_row = MAX_VERTEX_TEXTURE_WIDTH / vecs_per_item;
+        let items_per_row = L::items_per_row::<T>();
 
         let mut items = self.data.clone();
 
         // Extend the data array to be a multiple of the row size.
         // This ensures memory safety when the array is passed to
         // OpenGL to upload to the GPU.
         while items.len() % items_per_row != 0 {
             items.push(T::default());
--- a/gfx/webrender/src/internal_types.rs
+++ b/gfx/webrender/src/internal_types.rs
@@ -13,19 +13,22 @@ use offscreen_gl_context::{ColorAttachme
 use profiler::BackendProfileCounters;
 use std::collections::{HashMap, HashSet};
 use std::f32;
 use std::hash::BuildHasherDefault;
 use std::{i32, usize};
 use std::path::PathBuf;
 use std::sync::Arc;
 use tiling;
+use renderer::BlendMode;
 use webrender_traits::{Epoch, ColorF, PipelineId, DeviceIntSize};
-use webrender_traits::{ImageFormat, MixBlendMode, NativeFontHandle};
+use webrender_traits::{ImageFormat, NativeFontHandle, MixBlendMode};
 use webrender_traits::{ExternalImageId, ScrollLayerId, WebGLCommand};
+use webrender_traits::{ImageData};
+use webrender_traits::{DeviceUintRect};
 
 // An ID for a texture that is owned by the
 // texture cache module. This can include atlases
 // or standalone textures allocated via the
 // texture cache (e.g. if an image is too large
 // to be added to an atlas). The texture cache
 // manages the allocation and freeing of these
 // IDs, and the rendering thread maintains a
@@ -185,16 +188,17 @@ pub enum TextureSampler {
     Data16,
     Data32,
     Data64,
     Data128,
     Layers,
     RenderTasks,
     Geometry,
     ResourceRects,
+    Gradients,
 }
 
 impl TextureSampler {
     pub fn color(n: usize) -> TextureSampler {
         match n {
             0 => TextureSampler::Color0,
             1 => TextureSampler::Color1,
             2 => TextureSampler::Color2,
@@ -264,16 +268,19 @@ pub enum ClipAttribute {
     Position,
     // instance frequency
     RenderTaskIndex,
     LayerIndex,
     DataIndex,
     SegmentIndex,
 }
 
+// A packed RGBA8 color ordered for vertex data or similar.
+// Use PackedTexel instead if intending to upload to a texture.
+
 #[derive(Debug, Clone, Copy)]
 #[repr(C)]
 pub struct PackedColor {
     pub r: u8,
     pub g: u8,
     pub b: u8,
     pub a: u8,
 }
@@ -284,16 +291,39 @@ impl PackedColor {
             r: (0.5 + color.r * COLOR_FLOAT_TO_FIXED).floor() as u8,
             g: (0.5 + color.g * COLOR_FLOAT_TO_FIXED).floor() as u8,
             b: (0.5 + color.b * COLOR_FLOAT_TO_FIXED).floor() as u8,
             a: (0.5 + color.a * COLOR_FLOAT_TO_FIXED).floor() as u8,
         }
     }
 }
 
+// RGBA8 textures currently pack texels in BGRA format for upload.
+// PackedTexel abstracts away this difference from PackedColor.
+
+#[derive(Debug, Clone, Copy)]
+#[repr(C)]
+pub struct PackedTexel {
+    pub b: u8,
+    pub g: u8,
+    pub r: u8,
+    pub a: u8,
+}
+
+impl PackedTexel {
+    pub fn from_color(color: &ColorF) -> PackedTexel {
+        PackedTexel {
+            b: (0.5 + color.b * COLOR_FLOAT_TO_FIXED).floor() as u8,
+            g: (0.5 + color.g * COLOR_FLOAT_TO_FIXED).floor() as u8,
+            r: (0.5 + color.r * COLOR_FLOAT_TO_FIXED).floor() as u8,
+            a: (0.5 + color.a * COLOR_FLOAT_TO_FIXED).floor() as u8,
+        }
+    }
+}
+
 #[derive(Debug, Clone, Copy)]
 #[repr(C)]
 pub struct PackedVertex {
     pub pos: [f32; 2],
 }
 
 #[derive(Debug)]
 #[repr(C)]
@@ -337,20 +367,45 @@ impl DebugColorVertex {
 #[derive(Copy, Clone, Debug, PartialEq)]
 pub enum RenderTargetMode {
     None,
     SimpleRenderTarget,
     LayerRenderTarget(i32),      // Number of texture layers
 }
 
 pub enum TextureUpdateOp {
-    Create(u32, u32, ImageFormat, TextureFilter, RenderTargetMode, Option<Arc<Vec<u8>>>),
-    Update(u32, u32, u32, u32, Arc<Vec<u8>>, Option<u32>),
-    Grow(u32, u32, ImageFormat, TextureFilter, RenderTargetMode),
-    Free
+    Create {
+      width: u32,
+      height: u32,
+      format: ImageFormat,
+      filter: TextureFilter,
+      mode: RenderTargetMode,
+      data: Option<ImageData>,
+    },
+    Update {
+        page_pos_x: u32,    // the texture page position which we want to upload
+        page_pos_y: u32,
+        width: u32,
+        height: u32,
+        data: Arc<Vec<u8>>,
+        stride: Option<u32>,
+    },
+    UpdateForExternalBuffer {
+        rect: DeviceUintRect,
+        id: ExternalImageId,
+        stride: Option<u32>,
+    },
+    Grow {
+        width: u32,
+        height: u32,
+        format: ImageFormat,
+        filter: TextureFilter,
+        mode: RenderTargetMode,
+    },
+    Free,
 }
 
 pub type ExternalImageUpdateList = Vec<ExternalImageId>;
 
 pub struct TextureUpdate {
     pub id: CacheTextureId,
     pub op: TextureUpdateOp,
 }
@@ -430,12 +485,32 @@ pub enum LowLevelFilterOp {
     HueRotate(i32),
     Invert(Au),
     Opacity(Au),
     Saturate(Au),
     Sepia(Au),
 }
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
-pub enum CompositionOp {
-    MixBlend(MixBlendMode),
-    Filter(LowLevelFilterOp),
-}
\ No newline at end of file
+pub enum HardwareCompositeOp {
+    Multiply,
+    Max,
+    Min,
+}
+
+impl HardwareCompositeOp {
+    pub fn from_mix_blend_mode(mix_blend_mode: MixBlendMode) -> Option<HardwareCompositeOp> {
+        match mix_blend_mode {
+            MixBlendMode::Multiply => Some(HardwareCompositeOp::Multiply),
+            MixBlendMode::Lighten => Some(HardwareCompositeOp::Max),
+            MixBlendMode::Darken => Some(HardwareCompositeOp::Min),
+            _ => None,
+        }
+    }
+
+    pub fn to_blend_mode(&self) -> BlendMode {
+        match self {
+            &HardwareCompositeOp::Multiply => BlendMode::Multiply,
+            &HardwareCompositeOp::Max => BlendMode::Max,
+            &HardwareCompositeOp::Min => BlendMode::Min,
+        }
+    }
+}
--- a/gfx/webrender/src/layer.rs
+++ b/gfx/webrender/src/layer.rs
@@ -114,17 +114,26 @@ impl Layer {
         self.scrolling.bouncing_back = false;
         self.scrolling.started_bouncing_back = false;
         return true;
     }
 
     pub fn update_transform(&mut self,
                             parent_world_transform: &ScrollToWorldTransform,
                             parent_viewport_rect: &ScrollLayerRect) {
-        let inv_transform = self.local_transform.inverse().unwrap();
+        let inv_transform = match self.local_transform.inverse() {
+            Some(transform) => transform,
+            None => {
+                // If a transform function causes the current transformation matrix of an object
+                // to be non-invertible, the object and its content do not get displayed.
+                self.combined_local_viewport_rect = LayerRect::zero();
+                return;
+            }
+        };
+
         let parent_viewport_rect_in_local_space = inv_transform.transform_rect(parent_viewport_rect)
                                                                .translate(&-self.scrolling.offset);
         let local_viewport_rect = self.local_viewport_rect.translate(&-self.scrolling.offset);
         let viewport_rect = parent_viewport_rect_in_local_space.intersection(&local_viewport_rect)
                                                                .unwrap_or(LayerRect::zero());
 
         self.combined_local_viewport_rect = viewport_rect;
         self.world_viewport_transform = parent_world_transform.pre_mul(&self.local_transform);
--- a/gfx/webrender/src/lib.rs
+++ b/gfx/webrender/src/lib.rs
@@ -43,46 +43,50 @@
 //! [notifier]: struct.Renderer.html#method.set_render_notifier
 
 #[macro_use]
 extern crate lazy_static;
 #[macro_use]
 extern crate log;
 #[macro_use]
 extern crate bitflags;
+#[macro_use]
+extern crate thread_profiler;
 
 mod batch_builder;
 mod debug_colors;
 mod debug_font_data;
 mod debug_render;
 mod device;
 mod frame;
+mod frame_builder;
 mod freelist;
 mod geometry;
 mod gpu_store;
 mod internal_types;
 mod layer;
 mod mask_cache;
 mod prim_store;
 mod profiler;
 mod record;
 mod render_backend;
+mod render_task;
 mod resource_cache;
 mod scene;
 mod scroll_tree;
 mod spring;
 mod texture_cache;
 mod tiling;
 mod util;
 
 mod shader_source {
     include!(concat!(env!("OUT_DIR"), "/shaders.rs"));
 }
 
-pub use record::{ApiRecordingReceiver, set_recording_detour, WEBRENDER_RECORDING_HEADER};
+pub use record::{ApiRecordingReceiver, BinaryRecorder, WEBRENDER_RECORDING_HEADER};
 
 mod platform {
     #[cfg(target_os="macos")]
     pub use platform::macos::font;
     #[cfg(any(target_os = "android", all(unix, not(target_os = "macos"))))]
     pub use platform::unix::font;
     #[cfg(target_os = "windows")]
     pub use platform::windows::font;
@@ -122,10 +126,13 @@ extern crate gleam;
 extern crate num_traits;
 //extern crate notify;
 extern crate time;
 extern crate webrender_traits;
 extern crate offscreen_gl_context;
 extern crate byteorder;
 extern crate threadpool;
 
+#[cfg(any(target_os="macos", target_os="windows"))]
+extern crate gamma_lut;
+
 pub use renderer::{ExternalImage, ExternalImageSource, ExternalImageHandler};
 pub use renderer::{Renderer, RendererOptions};
--- a/gfx/webrender/src/mask_cache.rs
+++ b/gfx/webrender/src/mask_cache.rs
@@ -1,136 +1,144 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use gpu_store::{GpuStore, GpuStoreAddress};
+use gpu_store::GpuStoreAddress;
 use prim_store::{ClipData, GpuBlock32, PrimitiveStore};
 use prim_store::{CLIP_DATA_GPU_SIZE, MASK_DATA_GPU_SIZE};
-use util::{rect_from_points_f, TransformedRect};
+use renderer::VertexDataStore;
+use util::{MatrixHelpers, TransformedRect};
 use webrender_traits::{AuxiliaryLists, BorderRadius, ClipRegion, ComplexClipRegion, ImageMask};
 use webrender_traits::{DeviceIntRect, DeviceIntSize, LayerRect, LayerToWorldTransform};
 
-const MAX_COORD: f32 = 1.0e+16;
-
 #[derive(Clone, Debug)]
 pub enum ClipSource {
     NoClip,
     Complex(LayerRect, f32),
     Region(ClipRegion),
 }
 
 impl ClipSource {
-    pub fn to_rect(&self) -> Option<LayerRect> {
+    pub fn image_mask(&self) -> Option<ImageMask> {
         match self {
             &ClipSource::NoClip => None,
-            &ClipSource::Complex(rect, _) => Some(rect),
-            &ClipSource::Region(ref region) => Some(region.main),
-        }
-    }
-}
-impl<'a> From<&'a ClipRegion> for ClipSource {
-    fn from(clip_region: &'a ClipRegion) -> ClipSource {
-        if clip_region.is_complex() {
-            ClipSource::Region(clip_region.clone())
-        } else {
-            ClipSource::NoClip
+            &ClipSource::Complex(..) => None,
+            &ClipSource::Region(ref region) => region.image_mask,
         }
     }
 }
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
 pub struct ClipAddressRange {
     pub start: GpuStoreAddress,
-    pub item_count: u32,
+    item_count: u32,
 }
 
 #[derive(Clone, Debug)]
 pub struct MaskCacheInfo {
     pub clip_range: ClipAddressRange,
+    pub effective_clip_count: u32,
     pub image: Option<(ImageMask, GpuStoreAddress)>,
     pub local_rect: Option<LayerRect>,
     pub local_inner: Option<LayerRect>,
     pub inner_rect: DeviceIntRect,
     pub outer_rect: DeviceIntRect,
+    pub is_aligned: bool,
 }
 
 impl MaskCacheInfo {
     /// Create a new mask cache info. It allocates the GPU store data but leaves
     /// it unitialized for the following `update()` call to deal with.
     pub fn new(source: &ClipSource,
-               clip_store: &mut GpuStore<GpuBlock32>)
+               extra_clip: bool,
+               clip_store: &mut VertexDataStore<GpuBlock32>)
                -> Option<MaskCacheInfo> {
         let (image, clip_range) = match source {
             &ClipSource::NoClip => return None,
-            &ClipSource::Complex(..) => (
-                None,
+            &ClipSource::Complex(..) => {
+                (None,
                 ClipAddressRange {
                     start: clip_store.alloc(CLIP_DATA_GPU_SIZE),
                     item_count: 1,
-                }
-            ),
-            &ClipSource::Region(ref region) => (
-                region.image_mask.map(|info|
-                    (info, clip_store.alloc(MASK_DATA_GPU_SIZE))
-                ),
+                })
+            },
+            &ClipSource::Region(ref region) => {
+                let count = region.complex.length + if extra_clip {1} else {0};
+                (region.image_mask.map(|info|
+                    (info, clip_store.alloc(MASK_DATA_GPU_SIZE))),
                 ClipAddressRange {
-                    start: if region.complex.length > 0 {
-                        clip_store.alloc(CLIP_DATA_GPU_SIZE * region.complex.length)
+                    start: if count > 0 {
+                        clip_store.alloc(CLIP_DATA_GPU_SIZE * count)
                     } else {
                         GpuStoreAddress(0)
                     },
-                    item_count: region.complex.length as u32,
-                }
-            ),
+                    item_count: count as u32,
+                })
+            },
         };
 
         Some(MaskCacheInfo {
             clip_range: clip_range,
+            effective_clip_count: clip_range.item_count,
             image: image,
             local_rect: None,
             local_inner: None,
             inner_rect: DeviceIntRect::zero(),
             outer_rect: DeviceIntRect::zero(),
+            is_aligned: true,
         })
     }
 
     pub fn update(&mut self,
                   source: &ClipSource,
                   transform: &LayerToWorldTransform,
-                  clip_store: &mut GpuStore<GpuBlock32>,
+                  clip_store: &mut VertexDataStore<GpuBlock32>,
                   device_pixel_ratio: f32,
                   aux_lists: &AuxiliaryLists) {
 
+        self.is_aligned = transform.can_losslessly_transform_and_perspective_project_a_2d_rect();
+
         if self.local_rect.is_none() {
             let mut local_rect;
             let mut local_inner: Option<LayerRect>;
             match source {
                 &ClipSource::NoClip => unreachable!(),
                 &ClipSource::Complex(rect, radius) => {
                     let slice = clip_store.get_slice_mut(self.clip_range.start, CLIP_DATA_GPU_SIZE);
                     let data = ClipData::uniform(rect, radius);
                     PrimitiveStore::populate_clip_data(slice, data);
                     debug_assert_eq!(self.clip_range.item_count, 1);
                     local_rect = Some(rect);
                     local_inner = ComplexClipRegion::new(rect, BorderRadius::uniform(radius))
                                                     .get_inner_rect();
                 }
                 &ClipSource::Region(ref region) => {
-                    local_rect = Some(LayerRect::from_untyped(&rect_from_points_f(-MAX_COORD, -MAX_COORD, MAX_COORD, MAX_COORD)));
+                    local_rect = Some(region.main);
                     local_inner = match region.image_mask {
                         Some(ref mask) if !mask.repeat => {
                             local_rect = local_rect.and_then(|r| r.intersection(&mask.rect));
                             None
                         },
                         Some(_) => None,
                         None => local_rect,
                     };
+
                     let clips = aux_lists.complex_clip_regions(&region.complex);
-                    assert_eq!(self.clip_range.item_count, clips.len() as u32);
+                    self.effective_clip_count = if !self.is_aligned && self.clip_range.item_count > clips.len() as u32 {
+                        // we have an extra clip rect coming from the transformed layer
+                        assert_eq!(self.clip_range.item_count, clips.len() as u32 + 1);
+                        let address = GpuStoreAddress(self.clip_range.start.0 + (CLIP_DATA_GPU_SIZE * clips.len()) as i32);
+                        let slice = clip_store.get_slice_mut(address, CLIP_DATA_GPU_SIZE);
+                        PrimitiveStore::populate_clip_data(slice, ClipData::uniform(region.main, 0.0));
+                        self.clip_range.item_count
+                    } else {
+                        clips.len() as u32
+                    };
+
                     let slice = clip_store.get_slice_mut(self.clip_range.start, CLIP_DATA_GPU_SIZE * clips.len());
                     for (clip, chunk) in clips.iter().zip(slice.chunks_mut(CLIP_DATA_GPU_SIZE)) {
                         let data = ClipData::from_clip_region(clip);
                         PrimitiveStore::populate_clip_data(chunk, data);
                         local_rect = local_rect.and_then(|r| r.intersection(&clip.rect));
                         local_inner = local_inner.and_then(|r| clip.get_inner_rect()
                                                                    .and_then(|ref inner| r.intersection(&inner)));
                     }
@@ -149,9 +157,16 @@ impl MaskCacheInfo {
             let transformed = TransformedRect::new(inner_rect,
                                                    &transform,
                                                    device_pixel_ratio);
             transformed.inner_rect
         } else {
             DeviceIntRect::new(self.outer_rect.origin, DeviceIntSize::zero())
         }
     }
+
+    /// Check if this `MaskCacheInfo` actually carries any masks. `effective_clip_count`
+    /// can change during the `update` call depending on the transformation, so the mask may
+    /// appear to be empty.
+    pub fn is_masking(&self) -> bool {
+        self.image.is_some() || self.effective_clip_count != 0
+    }
 }
--- a/gfx/webrender/src/platform/macos/font.rs
+++ b/gfx/webrender/src/platform/macos/font.rs
@@ -11,22 +11,23 @@ use core_graphics::data_provider::CGData
 use core_graphics::font::{CGFont, CGGlyph};
 use core_graphics::geometry::{CGPoint, CGSize, CGRect};
 use core_text::font::CTFont;
 use core_text::font_descriptor::kCTFontDefaultOrientation;
 use core_text;
 use std::collections::HashMap;
 use std::collections::hash_map::Entry;
 use webrender_traits::{ColorU, FontKey, FontRenderMode, GlyphDimensions};
-
-pub type NativeFontHandle = CGFont;
+use webrender_traits::{GlyphKey, GlyphOptions, SubpixelPoint};
+use gamma_lut::{GammaLut, Color as ColorLut};
 
 pub struct FontContext {
     cg_fonts: HashMap<FontKey, CGFont>,
     ct_fonts: HashMap<(FontKey, Au), CTFont>,
+    gamma_lut: GammaLut,
 }
 
 pub struct RasterizedGlyph {
     pub width: u32,
     pub height: u32,
     pub bytes: Vec<u8>,
 }
 
@@ -64,42 +65,69 @@ fn supports_subpixel_aa() -> bool {
     cg_context.set_rgb_fill_color(1.0, 1.0, 1.0, 1.0);
     let point = CGPoint {x: -1., y: 0.};
     let glyph = '|' as CGGlyph;
     ct_font.draw_glyphs(&[glyph], &[point], cg_context.clone());
     let data = cg_context.data();
     data[0] != data[1] || data[1] != data[2]
 }
 
-fn get_glyph_metrics(ct_font: &CTFont, glyph: CGGlyph) -> GlyphMetrics {
+fn get_glyph_metrics(ct_font: &CTFont,
+                     glyph: CGGlyph,
+                     subpixel_point: &SubpixelPoint) -> GlyphMetrics {
     let bounds = ct_font.get_bounding_rects_for_glyphs(kCTFontDefaultOrientation, &[glyph]);
 
-    let rasterized_left = bounds.origin.x.floor() as i32;
-    let rasterized_width =
-        (bounds.origin.x - (rasterized_left as f64) + bounds.size.width).ceil() as u32;
-    let rasterized_descent = (-bounds.origin.y).ceil() as i32;
-    let rasterized_ascent = (bounds.size.height + bounds.origin.y).ceil() as i32;
-    let rasterized_height = (rasterized_descent + rasterized_ascent) as u32;
+    let (x_offset, y_offset) = subpixel_point.to_f64();
+
+    // First round out to pixel boundaries
+    // CG Origin is bottom left
+    let mut left = bounds.origin.x.floor() as i32;
+    let mut bottom = bounds.origin.y.floor() as i32;
+    let mut right = (bounds.origin.x
+                    + bounds.size.width
+                    + x_offset).ceil() as i32;
+    let mut top = (bounds.origin.y
+                  + bounds.size.height
+                  + y_offset).ceil() as i32;
 
-    GlyphMetrics {
-        rasterized_ascent: rasterized_ascent,
-        rasterized_descent: rasterized_descent,
-        rasterized_left: rasterized_left,
-        rasterized_width: rasterized_width,
-        rasterized_height: rasterized_height,
-    }
+    // Expand the bounds by 1 pixel, to give CG room for anti-aliasing.
+    // Note that this outset is to allow room for LCD smoothed glyphs. However, the correct outset
+    // is not currently known, as CG dilates the outlines by some percentage.
+    // This is taken from Skia.
+    left -= 1;
+    bottom -= 1;
+    right += 1;
+    top += 1;
+
+    let width = right - left;
+    let height = top - bottom;
+
+    let metrics = GlyphMetrics {
+        rasterized_left: left,
+        rasterized_width: width as u32,
+        rasterized_height: height as u32,
+        rasterized_ascent: top,
+        rasterized_descent: -bottom,
+    };
+
+    metrics
 }
 
 impl FontContext {
     pub fn new() -> FontContext {
         debug!("Test for subpixel AA support: {}", supports_subpixel_aa());
 
+        // Force CG to use sRGB color space to gamma correct.
+        let contrast = 0.0;
+        let gamma = 0.0;
+
         FontContext {
             cg_fonts: HashMap::new(),
             ct_fonts: HashMap::new(),
+            gamma_lut: GammaLut::new(contrast, gamma, gamma),
         }
     }
 
     pub fn add_raw_font(&mut self, font_key: &FontKey, bytes: &[u8]) {
         if self.cg_fonts.contains_key(font_key) {
             return
         }
 
@@ -134,62 +162,81 @@ impl FontContext {
                         size.to_f64_px());
                 entry.insert(ct_font.clone());
                 Some(ct_font)
             }
         }
     }
 
     pub fn get_glyph_dimensions(&mut self,
-                                font_key: FontKey,
-                                size: Au,
-                                character: u32) -> Option<GlyphDimensions> {
-        self.get_ct_font(font_key, size).and_then(|ref ct_font| {
-            let glyph = character as CGGlyph;
-            let metrics = get_glyph_metrics(ct_font, glyph);
+                                key: &GlyphKey) -> Option<GlyphDimensions> {
+        self.get_ct_font(key.font_key, key.size).and_then(|ref ct_font| {
+            let glyph = key.index as CGGlyph;
+            let metrics = get_glyph_metrics(ct_font, glyph, &key.subpixel_point);
             if metrics.rasterized_width == 0 || metrics.rasterized_height == 0 {
                 None
             } else {
                 Some(GlyphDimensions {
                     left: metrics.rasterized_left,
                     top: metrics.rasterized_ascent,
                     width: metrics.rasterized_width as u32,
                     height: metrics.rasterized_height as u32,
                 })
             }
         })
     }
 
+    // Assumes the pixels here are linear values from CG
+    fn gamma_correct_pixels(&self, pixels: &mut Vec<u8>, width: usize,
+                            height: usize, render_mode: FontRenderMode,
+                            color: ColorU) {
+        // Then convert back to gamma corrected values.
+        let color_lut = ColorLut::new(color.r,
+                                     color.g,
+                                     color.b,
+                                     color.a);
+        match render_mode {
+            FontRenderMode::Alpha => {
+                self.gamma_lut.preblend_grayscale_bgra(pixels, width,
+                                                       height, color_lut);
+            },
+            FontRenderMode::Subpixel => {
+                self.gamma_lut.preblend_bgra(pixels, width, height, color_lut);
+            },
+            _ => {} // Again, give mono untouched since only the alpha matters.
+        }
+    }
+
     #[allow(dead_code)]
     fn print_glyph_data(&mut self, data: &Vec<u8>, width: usize, height: usize) {
         // Rust doesn't have step_by support on stable :(
+        println!("Width is: {:?} height: {:?}", width, height);
         for i in 0..height {
             let current_height = i * width * 4;
 
             for pixel in data[current_height .. current_height + (width * 4)].chunks(4) {
                 let b = pixel[0];
                 let g = pixel[1];
                 let r = pixel[2];
                 let a = pixel[3];
                 print!("({}, {}, {}, {}) ", r, g, b, a);
             }
             println!("");
         }
     }
 
     pub fn rasterize_glyph(&mut self,
-                           font_key: FontKey,
-                           size: Au,
-                           color: ColorU,
-                           character: u32,
-                           render_mode: FontRenderMode) -> Option<RasterizedGlyph> {
-        match self.get_ct_font(font_key, size) {
+                           key: &GlyphKey,
+                           render_mode: FontRenderMode,
+                           _glyph_options: Option<GlyphOptions>)
+                           -> Option<RasterizedGlyph> {
+        match self.get_ct_font(key.font_key, key.size) {
             Some(ref ct_font) => {
-                let glyph = character as CGGlyph;
-                let metrics = get_glyph_metrics(ct_font, glyph);
+                let glyph = key.index as CGGlyph;
+                let metrics = get_glyph_metrics(ct_font, glyph, &key.subpixel_point);
                 if metrics.rasterized_width == 0 || metrics.rasterized_height == 0 {
                     return Some(RasterizedGlyph::blank())
                 }
 
                 let context_flags = match render_mode {
                     FontRenderMode::Subpixel => kCGBitmapByteOrder32Little | kCGImageAlphaNoneSkipFirst,
                     FontRenderMode::Alpha | FontRenderMode::Mono => kCGImageAlphaPremultipliedLast,
                 };
@@ -228,24 +275,31 @@ impl FontContext {
                     FontRenderMode::Alpha => (true, false),
                     FontRenderMode::Mono => (false, false),
                 };
 
                 // These are always true in Gecko, even for non-AA fonts
                 cg_context.set_allows_font_subpixel_positioning(true);
                 cg_context.set_should_subpixel_position_fonts(true);
 
+                // Don't quantize because we're doing it already.
+                cg_context.set_allows_font_subpixel_quantization(false);
+                cg_context.set_should_subpixel_quantize_fonts(false);
+
                 cg_context.set_allows_font_smoothing(smooth);
                 cg_context.set_should_smooth_fonts(smooth);
                 cg_context.set_allows_antialiasing(antialias);
                 cg_context.set_should_antialias(antialias);
 
+                let (x_offset, y_offset) = key.subpixel_point.to_f64();
+
+                // CG Origin is bottom left, WR is top left. Need -y offset
                 let rasterization_origin = CGPoint {
-                    x: -metrics.rasterized_left as f64,
-                    y: metrics.rasterized_descent as f64,
+                    x: -metrics.rasterized_left as f64 + x_offset,
+                    y: metrics.rasterized_descent as f64 - y_offset,
                 };
 
                 // Always draw black text on a white background
                 // Fill the background
                 cg_context.set_rgb_fill_color(1.0, 1.0, 1.0, 1.0);
                 let rect = CGRect {
                     origin: CGPoint {
                         x: 0.0,
@@ -260,16 +314,24 @@ impl FontContext {
 
                 // Set the text color
                 cg_context.set_rgb_fill_color(0.0, 0.0, 0.0, 1.0);
                 cg_context.set_text_drawing_mode(CGTextDrawingMode::CGTextFill);
                 ct_font.draw_glyphs(&[glyph], &[rasterization_origin], cg_context.clone());
 
                 let mut rasterized_pixels = cg_context.data().to_vec();
 
+                // Convert to linear space for subpixel AA.
+                // We explicitly do not do this for grayscale AA
+                if render_mode == FontRenderMode::Subpixel {
+                    self.gamma_lut.coregraphics_convert_to_linear_bgra(&mut rasterized_pixels,
+                                                                       metrics.rasterized_width as usize,
+                                                                       metrics.rasterized_height as usize);
+                }
+
                 // We need to invert the pixels back since right now
                 // transparent pixels are actually opaque white.
                 for i in 0..metrics.rasterized_height {
                     let current_height = (i * metrics.rasterized_width * 4) as usize;
                     let end_row = current_height + (metrics.rasterized_width as usize * 4);
 
                     for mut pixel in rasterized_pixels[current_height .. end_row].chunks_mut(4) {
                         pixel[0] = 255 - pixel[0];
@@ -282,16 +344,22 @@ impl FontContext {
                                 assert_eq!(pixel[0], pixel[1]);
                                 assert_eq!(pixel[0], pixel[2]);
                                 pixel[0]
                             }
                         }; // end match
                     } // end row
                 } // end height
 
+                self.gamma_correct_pixels(&mut rasterized_pixels,
+                                          metrics.rasterized_width as usize,
+                                          metrics.rasterized_height as usize,
+                                          render_mode,
+                                          key.color);
+
                 Some(RasterizedGlyph {
                     width: metrics.rasterized_width,
                     height: metrics.rasterized_height,
                     bytes: rasterized_pixels,
                 })
             }
             None => {
                 return Some(RasterizedGlyph::blank());
--- a/gfx/webrender/src/platform/unix/font.rs
+++ b/gfx/webrender/src/platform/unix/font.rs
@@ -1,14 +1,16 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use app_units::Au;
-use webrender_traits::{FontKey, ColorU, FontRenderMode, GlyphDimensions, NativeFontHandle};
+use webrender_traits::{FontKey, FontRenderMode, GlyphDimensions};
+use webrender_traits::{NativeFontHandle, GlyphOptions};
+use webrender_traits::{GlyphKey};
 
 use freetype::freetype::{FT_Render_Mode, FT_Pixel_Mode};
 use freetype::freetype::{FT_Done_FreeType, FT_Library_SetLcdFilter};
 use freetype::freetype::{FT_Library, FT_Set_Char_Size};
 use freetype::freetype::{FT_Face, FT_Long, FT_UInt, FT_F26Dot6};
 use freetype::freetype::{FT_Init_FreeType, FT_Load_Glyph, FT_Render_Glyph};
 use freetype::freetype::{FT_New_Memory_Face, FT_GlyphSlot, FT_LcdFilter};
 
@@ -35,17 +37,16 @@ fn float_to_fixed(before: usize, f: f64)
 }
 
 fn float_to_fixed_ft(f: f64) -> i32 {
     float_to_fixed(6, f)
 }
 
 impl FontContext {
     pub fn new() -> FontContext {
-//        let _pf = util::ProfileScope::new("  FontContext::new");
         let mut lib: FT_Library = ptr::null_mut();
         unsafe {
             let result = FT_Init_FreeType(&mut lib);
             if !result.succeeded() {
                 panic!("Unable to initialize FreeType library {:?}", result);
             }
 
             // TODO(gw): Check result of this to determine if freetype build supports subpixel.
@@ -106,46 +107,43 @@ impl FontContext {
                 assert!(!slot_ptr.is_null());
                 return Some(slot_ptr);
             }
         }
 
         None
     }
 
-    pub fn get_glyph_dimensions(&self,
-                                font_key: FontKey,
-                                size: Au,
-                                character: u32) -> Option<GlyphDimensions> {
-        self.load_glyph(font_key, size, character).and_then(|slot| {
+     pub fn get_glyph_dimensions(&mut self,
+                                 key: &GlyphKey) -> Option<GlyphDimensions> {
+        self.load_glyph(key.font_key, key.size, key.index).and_then(|slot| {
             let metrics = unsafe { &(*slot).metrics };
             if metrics.width == 0 || metrics.height == 0 {
                 None
             } else {
                 Some(GlyphDimensions {
                     left: (metrics.horiBearingX >> 6) as i32,
                     top: (metrics.horiBearingY >> 6) as i32,
                     width: (metrics.width >> 6) as u32,
                     height: (metrics.height >> 6) as u32,
                 })
             }
         })
     }
 
     pub fn rasterize_glyph(&mut self,
-                           font_key: FontKey,
-                           size: Au,
-                           color: ColorU,
-                           character: u32,
-                           render_mode: FontRenderMode) -> Option<RasterizedGlyph> {
+                           key: &GlyphKey,
+                           render_mode: FontRenderMode,
+                           _glyph_options: Option<GlyphOptions>)
+                           -> Option<RasterizedGlyph> {
         let mut glyph = None;
 
-        if let Some(slot) = self.load_glyph(font_key,
-                                            size,
-                                            character) {
+        if let Some(slot) = self.load_glyph(key.font_key,
+                                            key.size,
+                                            key.index) {
             let render_mode = match render_mode {
                 FontRenderMode::Mono => FT_Render_Mode::FT_RENDER_MODE_MONO,
                 FontRenderMode::Alpha => FT_Render_Mode::FT_RENDER_MODE_NORMAL,
                 FontRenderMode::Subpixel => FT_Render_Mode::FT_RENDER_MODE_LCD,
             };
 
             let result = unsafe { FT_Render_Glyph(slot, render_mode) };
 
--- a/gfx/webrender/src/platform/windows/font.rs
+++ b/gfx/webrender/src/platform/windows/font.rs
@@ -1,41 +1,118 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use app_units::Au;
 use std::collections::HashMap;
-use webrender_traits::{FontKey, ColorU, FontRenderMode, GlyphDimensions};
+use webrender_traits::{FontKey, FontRenderMode, GlyphDimensions};
+use webrender_traits::{GlyphKey, GlyphOptions};
+use gamma_lut::{GammaLut, Color as ColorLut};
 
 use dwrote;
 
 lazy_static! {
     static ref DEFAULT_FONT_DESCRIPTOR: dwrote::FontDescriptor = dwrote::FontDescriptor {
         family_name: "Arial".to_owned(),
         weight: dwrote::FontWeight::Regular,
         stretch: dwrote::FontStretch::Normal,
         style: dwrote::FontStyle::Normal,
     };
 }
 
 pub struct FontContext {
     fonts: HashMap<FontKey, dwrote::FontFace>,
+    gamma_lut: GammaLut,
+    gdi_gamma_lut: GammaLut,
 }
 
 pub struct RasterizedGlyph {
     pub width: u32,
     pub height: u32,
     pub bytes: Vec<u8>,
 }
 
+fn dwrite_texture_type(render_mode: FontRenderMode) ->
+                       dwrote::DWRITE_TEXTURE_TYPE {
+    match render_mode {
+        FontRenderMode::Mono => dwrote::DWRITE_TEXTURE_ALIASED_1x1 ,
+        FontRenderMode::Alpha |
+        FontRenderMode::Subpixel => dwrote::DWRITE_TEXTURE_CLEARTYPE_3x1,
+    }
+}
+
+fn dwrite_measure_mode(render_mode: FontRenderMode, options: Option<GlyphOptions>) ->
+                       dwrote::DWRITE_MEASURING_MODE {
+    if let Some(GlyphOptions{ force_gdi_rendering: true, .. }) = options {
+        return dwrote::DWRITE_MEASURING_MODE_GDI_CLASSIC;
+    }
+
+    match render_mode {
+        FontRenderMode::Mono => dwrote::DWRITE_MEASURING_MODE_GDI_NATURAL,
+        FontRenderMode::Alpha |
+        FontRenderMode::Subpixel => dwrote::DWRITE_MEASURING_MODE_NATURAL,
+    }
+}
+
+fn dwrite_render_mode(font_face: &dwrote::FontFace,
+                      render_mode: FontRenderMode,
+                      em_size: f32,
+                      measure_mode: dwrote::DWRITE_MEASURING_MODE,
+                      options: Option<GlyphOptions>) ->
+                      dwrote::DWRITE_RENDERING_MODE {
+    if let Some(GlyphOptions{ force_gdi_rendering: true, .. }) = options {
+        return dwrote::DWRITE_RENDERING_MODE_GDI_CLASSIC;
+    }
+
+    let dwrite_render_mode = match render_mode {
+        FontRenderMode::Mono => dwrote::DWRITE_RENDERING_MODE_ALIASED,
+        FontRenderMode::Alpha |
+        FontRenderMode::Subpixel => {
+            font_face.get_recommended_rendering_mode_default_params(em_size,
+                                                                    1.0,
+                                                                    measure_mode)
+        },
+    };
+
+    if dwrite_render_mode  == dwrote::DWRITE_RENDERING_MODE_OUTLINE {
+        // Outline mode is not supported
+        return dwrote::DWRITE_RENDERING_MODE_CLEARTYPE_NATURAL_SYMMETRIC;
+    }
+
+    dwrite_render_mode
+}
+
+fn get_glyph_dimensions_with_analysis(analysis: dwrote::GlyphRunAnalysis,
+                                      texture_type: dwrote::DWRITE_TEXTURE_TYPE)
+                                      -> GlyphDimensions {
+    let bounds = analysis.get_alpha_texture_bounds(texture_type);
+
+    let width = (bounds.right - bounds.left) as u32;
+    let height = (bounds.bottom - bounds.top) as u32;
+    assert!(width > 0 && height > 0);
+    GlyphDimensions {
+        left: bounds.left,
+        top: -bounds.top,
+        width: width,
+        height: height,
+    }
+}
+
 impl FontContext {
     pub fn new() -> FontContext {
+        // These are the default values we use in Gecko.
+        // We use a gamma value of 2.3 for gdi fonts
+        // TODO: Fetch this data from Gecko itself.
+        let contrast = 1.0;
+        let gamma = 1.8;
+        let gdi_gamma = 2.3;
         FontContext {
             fonts: HashMap::new(),
+            gamma_lut: GammaLut::new(contrast, gamma, gamma),
+            gdi_gamma_lut: GammaLut::new(contrast, gdi_gamma, gdi_gamma),
         }
     }
 
     pub fn add_raw_font(&mut self, font_key: &FontKey, data: &[u8]) {
         if self.fonts.contains_key(font_key) {
             return
         }
 
@@ -55,138 +132,165 @@ impl FontContext {
         }
 
         let system_fc = dwrote::FontCollection::system();
         let font = system_fc.get_font_from_descriptor(&font_handle).unwrap();
         let face = font.create_font_face();
         self.fonts.insert((*font_key).clone(), face);
     }
 
-    fn get_glyph_dimensions_and_maybe_rasterize(&self,
-                                                font_key: FontKey,
-                                                size: Au,
-                                                glyph: u32,
-                                                render_mode: Option<FontRenderMode>)
-                                                -> (Option<GlyphDimensions>, Option<RasterizedGlyph>)
-    {
-        let face = self.fonts.get(&font_key).unwrap();
-        let glyph = glyph as u16;
+    // Assumes RGB format from dwrite, which is 3 bytes per pixel as dwrite
+    // doesn't output an alpha value via GlyphRunAnalysis::CreateAlphaTexture
+    #[allow(dead_code)]
+    fn print_glyph_data(&self, data: &Vec<u8>, width: usize, height: usize) {
+        // Rust doesn't have step_by support on stable :(
+        for i in 0..height {
+            let current_height = i * width * 3;
 
-        let glyph = glyph as u16;
+            for pixel in data[current_height .. current_height + (width * 3)].chunks(3) {
+                let r = pixel[0];
+                let g = pixel[1];
+                let b = pixel[2];
+                print!("({}, {}, {}) ", r, g, b, );
+            }
+            println!("");
+        }
+    }
+
+    fn create_glyph_analysis(&self, key: &GlyphKey,
+                            render_mode: FontRenderMode,
+                            options: Option<GlyphOptions>) ->
+                            dwrote::GlyphRunAnalysis {
+        let face = self.fonts.get(&key.font_key).unwrap();
+        let glyph = key.index as u16;
         let advance = 0.0f32;
         let offset = dwrote::GlyphOffset { advanceOffset: 0.0, ascenderOffset: 0.0 };
 
         let glyph_run = dwrote::DWRITE_GLYPH_RUN {
             fontFace: unsafe { face.as_ptr() },
-            fontEmSize: size.to_f32_px(), // size in DIPs (1/96", same as CSS pixels)
+            fontEmSize: key.size.to_f32_px(), // size in DIPs (1/96", same as CSS pixels)
             glyphCount: 1,
             glyphIndices: &glyph,
             glyphAdvances: &advance,
             glyphOffsets: &offset,
             isSideways: 0,
             bidiLevel: 0,
         };
 
-        // dwrite requires DWRITE_RENDERING_MODE_ALIASED if the texture
-        // type is DWRITE_TEXTURE_ALIASED_1x1.  If CLEARTYPE_3x1,
-        // then the other modes can be used.
+        let dwrite_measure_mode = dwrite_measure_mode(render_mode, options);
+        let dwrite_render_mode = dwrite_render_mode(face,
+                                                    render_mode,
+                                                    key.size.to_f32_px(),
+                                                    dwrite_measure_mode,
+                                                    options);
 
-        // TODO(vlad): get_glyph_dimensions needs to take the render mode into account
-        // but the API doesn't give it to us right now.  Just assume subpixel.
-        let (r_mode, m_mode, tex_type) = match render_mode {
-            Some(FontRenderMode::Mono) => (dwrote::DWRITE_RENDERING_MODE_ALIASED,
-                                           dwrote::DWRITE_MEASURING_MODE_GDI_NATURAL,
-                                           dwrote::DWRITE_TEXTURE_ALIASED_1x1),
-            Some(FontRenderMode::Alpha) => (dwrote::DWRITE_RENDERING_MODE_GDI_NATURAL,
-                                            dwrote::DWRITE_MEASURING_MODE_GDI_NATURAL,
-                                            dwrote::DWRITE_TEXTURE_CLEARTYPE_3x1),
-            Some(FontRenderMode::Subpixel) | None => (dwrote::DWRITE_RENDERING_MODE_CLEARTYPE_GDI_NATURAL,
-                                                      dwrote::DWRITE_MEASURING_MODE_GDI_NATURAL,
-                                                      dwrote::DWRITE_TEXTURE_CLEARTYPE_3x1),
-        };
+        let (x_offset, y_offset) = key.subpixel_point.to_f64();
+        let transform = Some(
+                        dwrote::DWRITE_MATRIX { m11: 1.0, m12: 0.0, m21: 0.0, m22: 1.0,
+                                                dx: x_offset as f32, dy: y_offset as f32 }
+                        );
 
-        // XX use the xform to handle subpixel positioning (what skia does), I believe that keeps
-        //let xform = dwrote::DWRITE_MATRIX { m11: 1.0, m12: 0.0, m21: 0.0, m22: 1.0, dx: 0.0, dy: 0.0 };
-        let analysis = dwrote::GlyphRunAnalysis::create(&glyph_run, 1.0, None, r_mode, m_mode, 0.0, 0.0);
-        let bounds = analysis.get_alpha_texture_bounds(tex_type);
+        dwrote::GlyphRunAnalysis::create(&glyph_run, 1.0, transform,
+                                         dwrite_render_mode,
+                                         dwrite_measure_mode,
+                                         0.0, 0.0)
+    }
 
-        let width = (bounds.right - bounds.left) as u32;
-        let height = (bounds.bottom - bounds.top) as u32;
-        let dims = GlyphDimensions {
-            left: bounds.left,
-            top: -bounds.top,
-            width: width,
-            height: height,
-        };
-
-        // if empty, then nothing
-        if dims.width == 0 || dims.height == 0 {
-            return (None, None);
-        }
+    // TODO: Pipe GlyphOptions into glyph_dimensions too
+    pub fn get_glyph_dimensions(&self,
+                                key: &GlyphKey)
+                                -> Option<GlyphDimensions> {
+        // Probably have to default to something else here.
+        let render_mode = FontRenderMode::Subpixel;
+        let analysis = self.create_glyph_analysis(key, render_mode, None);
 
-        // if we weren't asked to rasterize, we're done
-        if render_mode.is_none() {
-            return (Some(dims), None);
-        }
+        let texture_type = dwrite_texture_type(render_mode);
+        Some(get_glyph_dimensions_with_analysis(analysis, texture_type))
+    }
 
-        let pixels = analysis.create_alpha_texture(tex_type, bounds);
-        let rgba_pixels = match render_mode.unwrap() {
+    // DWRITE gives us values in RGB. WR doesn't really touch it after. Note, CG returns in BGR
+    // TODO: Decide whether all fonts should return RGB or BGR
+    fn convert_to_rgba(&self, pixels: &Vec<u8>, render_mode: FontRenderMode) -> Vec<u8> {
+        match render_mode {
             FontRenderMode::Mono => {
-                let mut rgba_pixels = vec![0; pixels.len() * 4];
+                let mut rgba_pixels: Vec<u8> = vec![0; pixels.len() * 4];
                 for i in 0..pixels.len() {
-                    rgba_pixels[i*4+0] = 0xff;
-                    rgba_pixels[i*4+1] = 0xff;
-                    rgba_pixels[i*4+2] = 0xff;
+                    rgba_pixels[i*4+0] = pixels[i];
+                    rgba_pixels[i*4+1] = pixels[i];
+                    rgba_pixels[i*4+2] = pixels[i];
                     rgba_pixels[i*4+3] = pixels[i];
                 }
                 rgba_pixels
             }
             FontRenderMode::Alpha => {
-                let mut rgba_pixels = vec![0; pixels.len()/3 * 4];
-                for i in 0..pixels.len()/3 {
+                let length = pixels.len() / 3;
+                let mut rgba_pixels: Vec<u8> = vec![0; length * 4];
+                for i in 0..length {
                     // TODO(vlad): we likely need to do something smarter
-                    let alpha = (pixels[i*3+0] as u32 + pixels[i*3+0] as u32 + pixels[i*3+0] as u32) / 3;
-                    rgba_pixels[i*4+0] = 0xff;
-                    rgba_pixels[i*4+1] = 0xff;
-                    rgba_pixels[i*4+2] = 0xff;
-                    rgba_pixels[i*4+3] = alpha as u8;
+                    // This is what skia does
+                    let alpha = ((pixels[i*3+0] as u32 +
+                                pixels[i*3+1] as u32 +
+                                pixels[i*3+2] as u32)
+                                / 3) as u8;
+
+                    rgba_pixels[i*4+0] = alpha;
+                    rgba_pixels[i*4+1] = alpha;
+                    rgba_pixels[i*4+2] = alpha;
+                    rgba_pixels[i*4+3] = alpha;
                 }
                 rgba_pixels
             }
             FontRenderMode::Subpixel => {
-                let mut rgba_pixels = vec![0; pixels.len()/3 * 4];
-                for i in 0..pixels.len()/3 {
+                let length = pixels.len() / 3;
+                let mut rgba_pixels: Vec<u8> = vec![0; length * 4];
+                for i in 0..length {
                     rgba_pixels[i*4+0] = pixels[i*3+0];
                     rgba_pixels[i*4+1] = pixels[i*3+1];
                     rgba_pixels[i*4+2] = pixels[i*3+2];
                     rgba_pixels[i*4+3] = 0xff;
                 }
                 rgba_pixels
             }
-        };
-
-        (Some(dims), Some(RasterizedGlyph {
-            width: dims.width,
-            height: dims.height,
-            bytes: rgba_pixels,
-        }))
-    }
-
-    pub fn get_glyph_dimensions(&self,
-                                font_key: FontKey,
-                                size: Au,
-                                glyph: u32) -> Option<GlyphDimensions> {
-        let (maybe_dims, _) =
-            self.get_glyph_dimensions_and_maybe_rasterize(font_key, size, glyph, None);
-        maybe_dims
+        }
     }
 
     pub fn rasterize_glyph(&mut self,
-                           font_key: FontKey,
-                           size: Au,
-                           color: ColorU,
-                           glyph: u32,
-                           render_mode: FontRenderMode) -> Option<RasterizedGlyph> {
-        let (_, maybe_glyph) =
-            self.get_glyph_dimensions_and_maybe_rasterize(font_key, size, glyph, Some(render_mode));
-        maybe_glyph
+                           key: &GlyphKey,
+                           render_mode: FontRenderMode,
+                           glyph_options: Option<GlyphOptions>)
+                           -> Option<RasterizedGlyph> {
+        let analysis = self.create_glyph_analysis(key,
+                                                  render_mode,
+                                                  glyph_options);
+        let texture_type = dwrite_texture_type(render_mode);
+
+        let bounds = analysis.get_alpha_texture_bounds(texture_type);
+        let width = (bounds.right - bounds.left) as usize;
+        let height = (bounds.bottom - bounds.top) as usize;
+
+        let mut pixels = analysis.create_alpha_texture(texture_type, bounds);
+
+        let lut_correction = match glyph_options {
+            Some(option) => {
+                if option.force_gdi_rendering {
+                    &self.gdi_gamma_lut
+                } else {
+                    &self.gamma_lut
+                }
+            },
+            None => &self.gamma_lut
+        };
+
+        lut_correction.preblend_rgb(&mut pixels, width, height,
+                                    ColorLut::new(key.color.r,
+                                                  key.color.g,
+                                                  key.color.b,
+                                                  key.color.a));
+
+        let rgba_pixels = self.convert_to_rgba(&mut pixels, render_mode);
+
+        Some(RasterizedGlyph {
+            width: width as u32,
+            height: height as u32,
+            bytes: rgba_pixels,
+        })
     }
 }
--- a/gfx/webrender/src/prim_store.rs
+++ b/gfx/webrender/src/prim_store.rs
@@ -1,29 +1,31 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use app_units::Au;
-use euclid::Size2D;
-use gpu_store::{GpuStore, GpuStoreAddress};
-use internal_types::SourceTexture;
+use euclid::{Point2D, Size2D};
+use gpu_store::GpuStoreAddress;
+use internal_types::{SourceTexture, PackedTexel};
 use mask_cache::{ClipSource, MaskCacheInfo};
+use renderer::{VertexDataStore, GradientDataStore};
+use render_task::{RenderTask, RenderTaskLocation};
 use resource_cache::{ImageProperties, ResourceCache};
 use std::mem;
 use std::usize;
-use tiling::{RenderTask, RenderTaskLocation};
 use util::TransformedRect;
 use webrender_traits::{AuxiliaryLists, ColorF, ImageKey, ImageRendering, YuvColorSpace};
 use webrender_traits::{ClipRegion, ComplexClipRegion, ItemRange, GlyphKey};
 use webrender_traits::{FontKey, FontRenderMode, WebGLContextId};
 use webrender_traits::{device_length, DeviceIntRect, DeviceIntSize};
 use webrender_traits::{DeviceRect, DevicePoint, DeviceSize};
 use webrender_traits::{LayerRect, LayerSize, LayerPoint};
-use webrender_traits::LayerToWorldTransform;
+use webrender_traits::{LayerToWorldTransform, GlyphInstance, GlyphOptions};
+use webrender_traits::{ExtendMode, GradientStop};
 
 pub const CLIP_DATA_GPU_SIZE: usize = 5;
 pub const MASK_DATA_GPU_SIZE: usize = 1;
 
 /// Stores two coordinates in texel space. The coordinates
 /// are stored in texel coordinates because the texture atlas
 /// may grow. Storing them as texel coords and normalizing
 /// the UVs in the vertex shader means nothing needs to be
@@ -70,17 +72,18 @@ pub struct PrimitiveIndex(pub usize);
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
 pub enum PrimitiveKind {
     Rectangle,
     TextRun,
     Image,
     YuvImage,
     Border,
-    Gradient,
+    AlignedGradient,
+    AngleGradient,
     RadialGradient,
     BoxShadow,
 }
 
 /// Geometry description for simple rectangular primitives, uploaded to the GPU.
 #[derive(Debug, Clone)]
 pub struct PrimitiveGeometry {
     pub local_rect: LayerRect,
@@ -211,65 +214,166 @@ pub struct BoxShadowPrimitiveGpu {
     pub bs_rect: LayerRect,
     pub color: ColorF,
     pub border_radius: f32,
     pub edge_size: f32,
     pub blur_radius: f32,
     pub inverted: f32,
 }
 
-#[repr(u32)]
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-pub enum GradientType {
-    Horizontal,
-    Vertical,
-    Rotated,
-}
-
 #[derive(Debug, Clone)]
 #[repr(C)]
-pub struct GradientStop {
+pub struct GradientStopGpu {
     color: ColorF,
     offset: f32,
     padding: [f32; 3],
 }
 
 #[derive(Debug, Clone)]
 #[repr(C)]
 pub struct GradientPrimitiveGpu {
     pub start_point: LayerPoint,
     pub end_point: LayerPoint,
-    pub kind: f32,
+    pub extend_mode: f32,
     pub padding: [f32; 3],
 }
 
 #[derive(Debug)]
 pub struct GradientPrimitiveCpu {
     pub stops_range: ItemRange,
-    pub kind: GradientType,
+    pub extend_mode: ExtendMode,
     pub reverse_stops: bool,
     pub cache_dirty: bool,
 }
 
 #[derive(Debug, Clone)]
 #[repr(C)]
 pub struct RadialGradientPrimitiveGpu {
     pub start_center: LayerPoint,
     pub end_center: LayerPoint,
     pub start_radius: f32,
     pub end_radius: f32,
-    pub padding: [f32; 2],
+    pub extend_mode: f32,
+    pub padding: [f32; 1],
 }
 
 #[derive(Debug)]
 pub struct RadialGradientPrimitiveCpu {
     pub stops_range: ItemRange,
+    pub extend_mode: ExtendMode,
     pub cache_dirty: bool,
 }
 
+// The number of entries in a gradient data table.
+pub const GRADIENT_DATA_RESOLUTION: usize = 128;
+
+#[derive(Debug, Clone, Copy)]
+#[repr(C)]
+// An entry in a gradient data table representing a segment of the gradient color space.
+pub struct GradientDataEntry {
+    pub start_color: PackedTexel,
+    pub end_color: PackedTexel,
+}
+
+#[repr(C)]
+// A table of gradient entries, with two colors per entry, that specify the start and end color
+// within the segment of the gradient space represented by that entry. To lookup a gradient result,
+// first the entry index is calculated to determine which two colors to interpolate between, then
+// the offset within that entry bucket is used to interpolate between the two colors in that entry.
+// This layout preserves hard stops, as the end color for a given entry can differ from the start
+// color for the following entry, despite them being adjacent. Colors are stored within in BGRA8
+// format for texture upload.
+pub struct GradientData {
+    pub colors: [GradientDataEntry; GRADIENT_DATA_RESOLUTION],
+}
+
+impl Default for GradientData {
+    fn default() -> GradientData {
+        GradientData {
+            colors: unsafe { mem::uninitialized() }
+        }
+    }
+}
+
+impl Clone for GradientData {
+    fn clone(&self) -> GradientData {
+        GradientData {
+            colors: self.colors,
+        }
+    }
+}
+
+impl GradientData {
+    // Generate a color ramp between the start and end indexes from a start color to an end color.
+    fn fill_colors(&mut self, start_idx: usize, end_idx: usize, start_color: &ColorF, end_color: &ColorF) -> usize {
+        if start_idx >= end_idx {
+            return start_idx;
+        }
+
+        // Calculate the color difference for individual steps in the ramp.
+        let inv_steps = 1.0 / (end_idx - start_idx) as f32;
+        let step_r = (end_color.r - start_color.r) * inv_steps;
+        let step_g = (end_color.g - start_color.g) * inv_steps;
+        let step_b = (end_color.b - start_color.b) * inv_steps;
+        let step_a = (end_color.a - start_color.a) * inv_steps;
+
+        let mut cur_color = *start_color;
+        let mut cur_packed_color = PackedTexel::from_color(&cur_color);
+
+        // Walk the ramp writing start and end colors for each entry.
+        for entry in &mut self.colors[start_idx..end_idx] {
+            entry.start_color = cur_packed_color;
+
+            cur_color.r += step_r;
+            cur_color.g += step_g;
+            cur_color.b += step_b;
+            cur_color.a += step_a;
+            cur_packed_color = PackedTexel::from_color(&cur_color);
+            entry.end_color = cur_packed_color;
+        }
+
+        end_idx
+    }
+
+    // Compute an entry index based on a gradient stop offset.
+    #[inline]
+    fn get_index(offset: f32) -> usize {
+        (offset.max(0.0).min(1.0) * GRADIENT_DATA_RESOLUTION as f32).round() as usize
+    }
+
+    // Build the gradient data from the supplied stops, reversing them if necessary.
+    fn build(&mut self, src_stops: &[GradientStop], reverse_stops: bool) {
+        let mut cur_idx = 0usize;
+        let mut cur_color = if let Some(src) = src_stops.first() {
+            src.color
+        } else {
+            ColorF::new(0.0, 0.0, 0.0, 0.0)
+        };
+
+        if reverse_stops {
+            // If the gradient is reversed, then ensure the stops are processed in reverse order
+            // and that the offsets are inverted.
+            for src in src_stops.iter().rev() {
+                cur_idx = self.fill_colors(cur_idx, Self::get_index(1.0 - src.offset),
+                                           &cur_color, &src.color);
+                cur_color = src.color;
+            }
+        } else {
+            for src in src_stops {
+                cur_idx = self.fill_colors(cur_idx, Self::get_index(src.offset),
+                                           &cur_color, &src.color);
+                cur_color = src.color;
+            }
+        }
+
+        // Fill out any remaining entries in the gradient.
+        self.fill_colors(cur_idx, GRADIENT_DATA_RESOLUTION, &cur_color, &cur_color);
+    }
+}
+
 #[derive(Debug, Clone)]
 #[repr(C)]
 struct InstanceRect {
     rect: LayerRect,
 }
 
 #[derive(Debug, Clone)]
 #[repr(C)]
@@ -280,21 +384,22 @@ pub struct TextRunPrimitiveGpu {
 #[derive(Debug, Clone)]
 pub struct TextRunPrimitiveCpu {
     pub font_key: FontKey,
     pub logical_font_size: Au,
     pub blur_radius: Au,
     pub glyph_range: ItemRange,
     pub cache_dirty: bool,
     // TODO(gw): Maybe make this an Arc for sharing with resource cache
-    pub glyph_indices: Vec<u32>,
+    pub glyph_instances: Vec<GlyphInstance>,
     pub color_texture_id: SourceTexture,
     pub color: ColorF,
     pub render_mode: FontRenderMode,
     pub resource_address: GpuStoreAddress,
+    pub glyph_options: Option<GlyphOptions>,
 }
 
 #[derive(Debug, Clone)]
 #[repr(C)]
 struct GlyphPrimitive {
     offset: LayerPoint,
     padding: LayerPoint,
 }
@@ -423,63 +528,66 @@ impl ClipData {
 
 #[derive(Debug)]
 pub enum PrimitiveContainer {
     Rectangle(RectanglePrimitive),
     TextRun(TextRunPrimitiveCpu, TextRunPrimitiveGpu),
     Image(ImagePrimitiveCpu, ImagePrimitiveGpu),
     YuvImage(YuvImagePrimitiveCpu, YuvImagePrimitiveGpu),
     Border(BorderPrimitiveCpu, BorderPrimitiveGpu),
-    Gradient(GradientPrimitiveCpu, GradientPrimitiveGpu),
+    AlignedGradient(GradientPrimitiveCpu, GradientPrimitiveGpu),
+    AngleGradient(GradientPrimitiveCpu, GradientPrimitiveGpu),
     RadialGradient(RadialGradientPrimitiveCpu, RadialGradientPrimitiveGpu),
     BoxShadow(BoxShadowPrimitiveGpu, Vec<LayerRect>),
 }
 
 pub struct PrimitiveStore {
     // CPU side information only
     pub cpu_bounding_rects: Vec<Option<DeviceIntRect>>,
     pub cpu_text_runs: Vec<TextRunPrimitiveCpu>,
     pub cpu_images: Vec<ImagePrimitiveCpu>,
     pub cpu_yuv_images: Vec<YuvImagePrimitiveCpu>,
     pub cpu_gradients: Vec<GradientPrimitiveCpu>,
     pub cpu_radial_gradients: Vec<RadialGradientPrimitiveCpu>,
     pub cpu_metadata: Vec<PrimitiveMetadata>,
     pub cpu_borders: Vec<BorderPrimitiveCpu>,
 
     // Gets uploaded directly to GPU via vertex texture
-    pub gpu_geometry: GpuStore<PrimitiveGeometry>,
-    pub gpu_data16: GpuStore<GpuBlock16>,
-    pub gpu_data32: GpuStore<GpuBlock32>,
-    pub gpu_data64: GpuStore<GpuBlock64>,
-    pub gpu_data128: GpuStore<GpuBlock128>,
+    pub gpu_geometry: VertexDataStore<PrimitiveGeometry>,
+    pub gpu_data16: VertexDataStore<GpuBlock16>,
+    pub gpu_data32: VertexDataStore<GpuBlock32>,
+    pub gpu_data64: VertexDataStore<GpuBlock64>,
+    pub gpu_data128: VertexDataStore<GpuBlock128>,
+    pub gpu_gradient_data: GradientDataStore,
 
     // Resolved resource rects.
-    pub gpu_resource_rects: GpuStore<TexelRect>,
+    pub gpu_resource_rects: VertexDataStore<TexelRect>,
 
     // General
     prims_to_resolve: Vec<PrimitiveIndex>,
 }
 
 impl PrimitiveStore {
     pub fn new() -> PrimitiveStore {
         PrimitiveStore {
             cpu_metadata: Vec::new(),
             cpu_bounding_rects: Vec::new(),
             cpu_text_runs: Vec::new(),
             cpu_images: Vec::new(),
             cpu_yuv_images: Vec::new(),
             cpu_gradients: Vec::new(),
             cpu_radial_gradients: Vec::new(),
             cpu_borders: Vec::new(),
-            gpu_geometry: GpuStore::new(),
-            gpu_data16: GpuStore::new(),
-            gpu_data32: GpuStore::new(),
-            gpu_data64: GpuStore::new(),
-            gpu_data128: GpuStore::new(),
-            gpu_resource_rects: GpuStore::new(),
+            gpu_geometry: VertexDataStore::new(),
+            gpu_data16: VertexDataStore::new(),
+            gpu_data32: VertexDataStore::new(),
+            gpu_data64: VertexDataStore::new(),
+            gpu_data128: VertexDataStore::new(),
+            gpu_gradient_data: GradientDataStore::new(),
+            gpu_resource_rects: VertexDataStore::new(),
             prims_to_resolve: Vec::new(),
         }
     }
 
     pub fn populate_clip_data(data: &mut [GpuBlock32], clip: ClipData) {
         data[0] = GpuBlock32::from(clip.rect);
         data[1] = GpuBlock32::from(clip.top_left);
         data[2] = GpuBlock32::from(clip.top_right);
@@ -591,49 +699,72 @@ impl PrimitiveStore {
                     gpu_data_count: 0,
                     render_task: None,
                     clip_task: None,
                 };
 
                 self.cpu_borders.push(border_cpu);
                 metadata
             }
-            PrimitiveContainer::Gradient(gradient_cpu, gradient_gpu) => {
+            PrimitiveContainer::AlignedGradient(gradient_cpu, gradient_gpu) => {
                 let gpu_address = self.gpu_data32.push(gradient_gpu);
                 let gpu_stops_address = self.gpu_data32.alloc(gradient_cpu.stops_range.length);
 
                 let metadata = PrimitiveMetadata {
+                    // TODO: calculate if the gradient is actually opaque
                     is_opaque: false,
                     clip_source: clip_source,
                     clip_cache_info: clip_info,
-                    prim_kind: PrimitiveKind::Gradient,
+                    prim_kind: PrimitiveKind::AlignedGradient,
                     cpu_prim_index: SpecificPrimitiveIndex(self.cpu_gradients.len()),
                     gpu_prim_index: gpu_address,
                     gpu_data_address: gpu_stops_address,
                     gpu_data_count: gradient_cpu.stops_range.length as i32,
                     render_task: None,
                     clip_task: None,
                 };
 
                 self.cpu_gradients.push(gradient_cpu);
                 metadata
             }
+            PrimitiveContainer::AngleGradient(gradient_cpu, gradient_gpu) => {
+                let gpu_address = self.gpu_data32.push(gradient_gpu);
+                let gpu_gradient_address = self.gpu_gradient_data.alloc(1);
+
+                let metadata = PrimitiveMetadata {
+                    // TODO: calculate if the gradient is actually opaque
+                    is_opaque: false,
+                    clip_source: clip_source,
+                    clip_cache_info: clip_info,
+                    prim_kind: PrimitiveKind::AngleGradient,
+                    cpu_prim_index: SpecificPrimitiveIndex(self.cpu_gradients.len()),
+                    gpu_prim_index: gpu_address,
+                    gpu_data_address: gpu_gradient_address,
+                    gpu_data_count: 1,
+                    render_task: None,
+                    clip_task: None,
+                };
+
+                self.cpu_gradients.push(gradient_cpu);
+                metadata
+            }
             PrimitiveContainer::RadialGradient(radial_gradient_cpu, radial_gradient_gpu) => {
                 let gpu_address = self.gpu_data32.push(radial_gradient_gpu);
-                let gpu_stops_address = self.gpu_data32.alloc(radial_gradient_cpu.stops_range.length);
+                let gpu_gradient_address = self.gpu_gradient_data.alloc(1);
 
                 let metadata = PrimitiveMetadata {
+                    // TODO: calculate if the gradient is actually opaque
                     is_opaque: false,
                     clip_source: clip_source,
                     clip_cache_info: clip_info,
                     prim_kind: PrimitiveKind::RadialGradient,
                     cpu_prim_index: SpecificPrimitiveIndex(self.cpu_radial_gradients.len()),
                     gpu_prim_index: gpu_address,
-                    gpu_data_address: gpu_stops_address,
-                    gpu_data_count: radial_gradient_cpu.stops_range.length as i32,
+                    gpu_data_address: gpu_gradient_address,
+                    gpu_data_count: 1,
                     render_task: None,
                     clip_task: None,
                 };
 
                 self.cpu_radial_gradients.push(radial_gradient_cpu);
                 metadata
             }
             PrimitiveContainer::BoxShadow(box_shadow_gpu, instance_rects) => {
@@ -688,17 +819,17 @@ impl PrimitiveStore {
             }
         };
 
         self.cpu_metadata.push(metadata);
 
         PrimitiveIndex(prim_index)
     }
 
-    fn resolve_clip_cache_internal(gpu_data32: &mut GpuStore<GpuBlock32>,
+    fn resolve_clip_cache_internal(gpu_data32: &mut VertexDataStore<GpuBlock32>,
                                    clip_info: &MaskCacheInfo,
                                    resource_cache: &ResourceCache) {
         if let Some((ref mask, gpu_address)) = clip_info.image {
             let cache_item = resource_cache.get_cached_image(mask.image, ImageRendering::Auto);
             let mask_data = gpu_data32.get_slice_mut(gpu_address, MASK_DATA_GPU_SIZE);
             mask_data[0] = GpuBlock32::from(ImageMaskData {
                 uv_rect: DeviceRect::new(cache_item.uv0,
                                          DeviceSize::new(cache_item.uv1.x - cache_item.uv0.x,
@@ -712,41 +843,46 @@ impl PrimitiveStore {
                               clip_info: &MaskCacheInfo,
                               resource_cache: &ResourceCache) {
         Self::resolve_clip_cache_internal(&mut self.gpu_data32, clip_info, resource_cache)
     }
 
     pub fn resolve_primitives(&mut self,
                               resource_cache: &ResourceCache,
                               device_pixel_ratio: f32) -> Vec<DeferredResolve> {
+        profile_scope!("resolve_primitives");
         let mut deferred_resolves = Vec::new();
 
         for prim_index in self.prims_to_resolve.drain(..) {
             let metadata = &mut self.cpu_metadata[prim_index.0];
             if let Some(ref clip_info) = metadata.clip_cache_info {
                 Self::resolve_clip_cache_internal(&mut self.gpu_data32, clip_info, resource_cache);
             }
 
             match metadata.prim_kind {
                 PrimitiveKind::Rectangle |
                 PrimitiveKind::Border |
                 PrimitiveKind::BoxShadow |
-                PrimitiveKind::Gradient |
+                PrimitiveKind::AlignedGradient |
+                PrimitiveKind::AngleGradient |
                 PrimitiveKind::RadialGradient=> {}
                 PrimitiveKind::TextRun => {
                     let text = &mut self.cpu_text_runs[metadata.cpu_prim_index.0];
+
                     let font_size_dp = text.logical_font_size.scale_by(device_pixel_ratio);
 
                     let dest_rects = self.gpu_resource_rects.get_slice_mut(text.resource_address,
                                                                            text.glyph_range.length);
+
                     let texture_id = resource_cache.get_glyphs(text.font_key,
                                                                font_size_dp,
                                                                text.color,
-                                                               &text.glyph_indices,
-                                                               text.render_mode, |index, uv0, uv1| {
+                                                               &text.glyph_instances,
+                                                               text.render_mode,
+                                                               text.glyph_options, |index, uv0, uv1| {
                         let dest_rect = &mut dest_rects[index];
                         dest_rect.uv0 = uv0;
                         dest_rect.uv1 = uv1;
                     });
 
                     text.color_texture_id = texture_id;
                 }
                 PrimitiveKind::Image => {
@@ -817,20 +953,16 @@ impl PrimitiveStore {
                     }
                 }
             }
         }
 
         deferred_resolves
     }
 
-    pub fn get_bounding_rect(&self, index: PrimitiveIndex) -> &Option<DeviceIntRect> {
-        &self.cpu_bounding_rects[index.0]
-    }
-
     pub fn set_clip_source(&mut self, index: PrimitiveIndex, source: ClipSource) {
         let metadata = &mut self.cpu_metadata[index.0];
         let (rect, is_complex) = match source {
             ClipSource::NoClip => (None, false),
             ClipSource::Complex(rect, radius) => (Some(rect), radius > 0.0),
             ClipSource::Region(ref region) => (Some(region.main), region.is_complex()),
         };
         if let Some(rect) = rect {
@@ -914,62 +1046,70 @@ impl PrimitiveStore {
                 let edge_size = box_shadow_gpu.edge_size.ceil() * device_pixel_ratio;
                 let edge_size = edge_size as i32 + 2;   // Account for bilinear filtering
                 let cache_size = DeviceIntSize::new(edge_size, edge_size);
                 let location = RenderTaskLocation::Dynamic(None, cache_size);
                 metadata.render_task.as_mut().unwrap().location = location;
             }
             PrimitiveKind::TextRun => {
                 let text = &mut self.cpu_text_runs[metadata.cpu_prim_index.0];
+
                 let font_size_dp = text.logical_font_size.scale_by(device_pixel_ratio);
+                let src_glyphs = auxiliary_lists.glyph_instances(&text.glyph_range);
                 prim_needs_resolve = true;
 
                 if text.cache_dirty {
                     rebuild_bounding_rect = true;
                     text.cache_dirty = false;
 
                     debug_assert!(metadata.gpu_data_count == text.glyph_range.length as i32);
-                    debug_assert!(text.glyph_indices.is_empty());
-                    let src_glyphs = auxiliary_lists.glyph_instances(&text.glyph_range);
+                    debug_assert!(text.glyph_instances.is_empty());
+
                     let dest_glyphs = self.gpu_data16.get_slice_mut(metadata.gpu_data_address,
                                                                     text.glyph_range.length);
                     let mut glyph_key = GlyphKey::new(text.font_key,
                                                       font_size_dp,
                                                       text.color,
-                                                      src_glyphs[0].index);
+                                                      src_glyphs[0].index,
+                                                      src_glyphs[0].point,
+                                                      text.render_mode);
                     let mut local_rect = LayerRect::zero();
                     let mut actual_glyph_count = 0;
 
                     for src in src_glyphs {
                         glyph_key.index = src.index;
+                        glyph_key.subpixel_point.set_offset(src.point, text.render_mode);
 
                         let dimensions = match resource_cache.get_glyph_dimensions(&glyph_key) {
                             None => continue,
                             Some(dimensions) => dimensions,
                         };
 
                         // TODO(gw): Check for this and ensure platforms return None in this case!!!
                         debug_assert!(dimensions.width > 0 && dimensions.height > 0);
 
-                        let x = src.x + dimensions.left as f32 / device_pixel_ratio;
-                        let y = src.y - dimensions.top as f32 / device_pixel_ratio;
+                        let x = src.point.x + dimensions.left as f32 / device_pixel_ratio;
+                        let y = src.point.y - dimensions.top as f32 / device_pixel_ratio;
 
                         let width = dimensions.width as f32 / device_pixel_ratio;
                         let height = dimensions.height as f32 / device_pixel_ratio;
 
                         let local_glyph_rect = LayerRect::new(LayerPoint::new(x, y),
                                                               LayerSize::new(width, height));
                         local_rect = local_rect.union(&local_glyph_rect);
 
                         dest_glyphs[actual_glyph_count] = GpuBlock16::from(GlyphPrimitive {
                             padding: LayerPoint::zero(),
                             offset: local_glyph_rect.origin,
                         });
 
-                        text.glyph_indices.push(src.index);
+                        text.glyph_instances.push(GlyphInstance {
+                            index: src.index,
+                            point: Point2D::new(src.point.x, src.point.y),
+                        });
 
                         actual_glyph_count += 1;
                     }
 
                     // Expand the rectangle of the text run by the blur radius.
                     let local_rect = local_rect.inflate(text.blur_radius.to_f32_px(),
                                                         text.blur_radius.to_f32_px());
 
@@ -995,18 +1135,19 @@ impl PrimitiveStore {
                     metadata.gpu_data_count = actual_glyph_count as i32;
                     metadata.render_task = render_task;
                     self.gpu_geometry.get_mut(GpuStoreAddress(prim_index.0 as i32)).local_rect = local_rect;
                 }
 
                 resource_cache.request_glyphs(text.font_key,
                                               font_size_dp,
                                               text.color,
-                                              &text.glyph_indices,
-                                              text.render_mode);
+                                              &text.glyph_instances,
+                                              text.render_mode,
+                                              text.glyph_options);
             }
             PrimitiveKind::Image => {
                 let image_cpu = &mut self.cpu_images[metadata.cpu_prim_index.0];
 
                 prim_needs_resolve = true;
                 match image_cpu.kind {
                     ImagePrimitiveKind::Image(image_key, image_rendering, tile_spacing) => {
                         resource_cache.request_image(image_key, image_rendering);
@@ -1029,63 +1170,51 @@ impl PrimitiveStore {
 
                 resource_cache.request_image(image_cpu.y_key, ImageRendering::Auto);
                 resource_cache.request_image(image_cpu.u_key, ImageRendering::Auto);
                 resource_cache.request_image(image_cpu.v_key, ImageRendering::Auto);
 
                 // TODO(nical): Currently assuming no tile_spacing for yuv images.
                 metadata.is_opaque = true;
             }
-            PrimitiveKind::Gradient => {
+            PrimitiveKind::AlignedGradient => {
                 let gradient = &mut self.cpu_gradients[metadata.cpu_prim_index.0];
                 if gradient.cache_dirty {
                     let src_stops = auxiliary_lists.gradient_stops(&gradient.stops_range);
 
                     debug_assert!(metadata.gpu_data_count == gradient.stops_range.length as i32);
                     let dest_stops = self.gpu_data32.get_slice_mut(metadata.gpu_data_address,
                                                                    gradient.stops_range.length);
 
-                    if gradient.reverse_stops {
-                        for (src, dest) in src_stops.iter().rev().zip(dest_stops.iter_mut()) {
-                            *dest = GpuBlock32::from(GradientStop {
-                                offset: 1.0 - src.offset,
-                                color: src.color,
-                                padding: [0.0; 3],
-                            });
-                        }
-                    } else {
-                        for (src, dest) in src_stops.iter().zip(dest_stops.iter_mut()) {
-                            *dest = GpuBlock32::from(GradientStop {
-                                offset: src.offset,
-                                color: src.color,
-                                padding: [0.0; 3],
-                            });
-                        }
+                    for (src, dest) in src_stops.iter().zip(dest_stops.iter_mut()) {
+                        *dest = GpuBlock32::from(GradientStopGpu {
+                            offset: src.offset,
+                            color: src.color,
+                            padding: [0.0; 3],
+                        });
                     }
 
                     gradient.cache_dirty = false;
                 }
             }
+            PrimitiveKind::AngleGradient => {
+                let gradient = &mut self.cpu_gradients[metadata.cpu_prim_index.0];
+                if gradient.cache_dirty {
+                    let src_stops = auxiliary_lists.gradient_stops(&gradient.stops_range);
+                    let dest_gradient = self.gpu_gradient_data.get_mut(metadata.gpu_data_address);
+                    dest_gradient.build(src_stops, gradient.reverse_stops);
+                    gradient.cache_dirty = false;
+                }
+            }
             PrimitiveKind::RadialGradient => {
                 let gradient = &mut self.cpu_radial_gradients[metadata.cpu_prim_index.0];
                 if gradient.cache_dirty {
                     let src_stops = auxiliary_lists.gradient_stops(&gradient.stops_range);
-
-                    debug_assert!(metadata.gpu_data_count == gradient.stops_range.length as i32);
-                    let dest_stops = self.gpu_data32.get_slice_mut(metadata.gpu_data_address,
-                                                                   gradient.stops_range.length);
-
-                    for (src, dest) in src_stops.iter().zip(dest_stops.iter_mut()) {
-                        *dest = GpuBlock32::from(GradientStop {
-                            offset: src.offset,
-                            color: src.color,
-                            padding: [0.0; 3],
-                        });
-                    }
-
+                    let dest_gradient = self.gpu_gradient_data.get_mut(metadata.gpu_data_address);
+                    dest_gradient.build(src_stops, false);
                     gradient.cache_dirty = false;
                 }
             }
         }
 
         if prim_needs_resolve {
             self.prims_to_resolve.push(prim_index);
         }
@@ -1165,20 +1294,20 @@ impl Default for GpuBlock32 {
 impl From<GradientPrimitiveGpu> for GpuBlock32 {
     fn from(data: GradientPrimitiveGpu) -> GpuBlock32 {
         unsafe {
             mem::transmute::<GradientPrimitiveGpu, GpuBlock32>(data)
         }
     }
 }
 
-impl From<GradientStop> for GpuBlock32 {
-    fn from(data: GradientStop) -> GpuBlock32 {
+impl From<GradientStopGpu> for GpuBlock32 {
+    fn from(data: GradientStopGpu) -> GpuBlock32 {
         unsafe {
-            mem::transmute::<GradientStop, GpuBlock32>(data)
+            mem::transmute::<GradientStopGpu, GpuBlock32>(data)
         }
     }
 }
 
 impl From<RadialGradientPrimitiveGpu> for GpuBlock32 {
     fn from(data: RadialGradientPrimitiveGpu) -> GpuBlock32 {
         unsafe {
             mem::transmute::<RadialGradientPrimitiveGpu, GpuBlock32>(data)
--- a/gfx/webrender/src/record.rs
+++ b/gfx/webrender/src/record.rs
@@ -1,86 +1,82 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use bincode::serde::serialize;
 use bincode;
+use std::fmt::Debug;
 use std::mem;
 use std::any::TypeId;
-use std::fs::{File, OpenOptions};
+use std::fs::File;
 use std::io::Write;
-use std::ops::DerefMut;
-use std::sync::Mutex;
+use std::path::PathBuf;
 use webrender_traits::ApiMsg;
 use byteorder::{LittleEndian, WriteBytesExt};
 
-lazy_static! {
-    static ref WEBRENDER_RECORDING_DETOUR: Mutex<Option<Box<ApiRecordingReceiver>>> = Mutex::new(None);
-}
+pub static WEBRENDER_RECORDING_HEADER: u64 = 0xbeefbeefbeefbe01u64;
 
-pub static WEBRENDER_RECORDING_HEADER: u64 = 0xbeefbeefbeefbe01u64;
-static mut CURRENT_FRAME_NUMBER: u32 = 0xffffffffu32;
-
-pub trait ApiRecordingReceiver: Send {
+pub trait ApiRecordingReceiver: Send + Debug {
     fn write_msg(&mut self, frame: u32, msg: &ApiMsg);
     fn write_payload(&mut self, frame: u32, data: &[u8]);
 }
 
-pub fn set_recording_detour(detour: Option<Box<ApiRecordingReceiver>>) {
-    let mut recorder = WEBRENDER_RECORDING_DETOUR.lock();
-    *recorder.as_mut().unwrap().deref_mut() = detour;
+#[derive(Debug)]
+pub struct BinaryRecorder {
+    file: File,
 }
 
-fn write_data(frame: u32, data: &[u8]) {
-    let filename = format!("record/frame_{}.bin", frame);
-    let mut file = if unsafe { CURRENT_FRAME_NUMBER != frame } {
-        unsafe { CURRENT_FRAME_NUMBER = frame; }
+impl BinaryRecorder {
+    pub fn new(dest: &PathBuf) -> BinaryRecorder {
+        let mut file = File::create(dest).unwrap();
 
-        let mut file = File::create(filename).unwrap();
+        // write the header
         let apimsg_type_id = unsafe {
             assert!(mem::size_of::<TypeId>() == mem::size_of::<u64>());
             mem::transmute::<TypeId, u64>(TypeId::of::<ApiMsg>())
         };
-
         file.write_u64::<LittleEndian>(WEBRENDER_RECORDING_HEADER).ok();
         file.write_u64::<LittleEndian>(apimsg_type_id).ok();
-        file
-    } else {
-        OpenOptions::new().append(true).create(false).open(filename).unwrap()
-    };
-    file.write_u32::<LittleEndian>(data.len() as u32).ok();
-    file.write(data).ok();
+
+        BinaryRecorder {
+            file: file,
+        }
+    }
+
+    fn write_length_and_data(&mut self, data: &[u8]) {
+        self.file.write_u32::<LittleEndian>(data.len() as u32).ok();
+        self.file.write(data).ok();
+    }
 }
 
-pub fn write_msg(frame: u32, msg: &ApiMsg) {
+impl ApiRecordingReceiver for BinaryRecorder {
+    fn write_msg(&mut self, _: u32, msg: &ApiMsg) {
+        if should_record_msg(msg) {
+            let buf = serialize(msg, bincode::SizeLimit::Infinite).unwrap();
+            self.write_length_and_data(&buf);
+        }
+    }
+
+    fn write_payload(&mut self, _: u32, data: &[u8]) {
+        // signal payload with a 0 length
+        self.file.write_u32::<LittleEndian>(0).ok();
+        self.write_length_and_data(data);
+    }
+}
+
+pub fn should_record_msg(msg: &ApiMsg) -> bool {
     match msg {
         &ApiMsg::AddRawFont(..) |
         &ApiMsg::AddNativeFont(..) |
         &ApiMsg::AddImage(..) |
+        &ApiMsg::GenerateFrame(..) |
         &ApiMsg::UpdateImage(..) |
-        &ApiMsg::DeleteImage(..)|
+        &ApiMsg::DeleteImage(..) |
         &ApiMsg::SetRootDisplayList(..) |
         &ApiMsg::SetRootPipeline(..) |
         &ApiMsg::Scroll(..) |
         &ApiMsg::TickScrollingBounce |
-        &ApiMsg::WebGLCommand(..) => {
-            let mut recorder = WEBRENDER_RECORDING_DETOUR.lock();
-            if let Some(ref mut recorder) = recorder.as_mut().unwrap().as_mut() {
-                recorder.write_msg(frame, &msg);
-            } else {
-                let buff = serialize(msg, bincode::SizeLimit::Infinite).unwrap();
-                write_data(frame, &buff);
-            }
-       }
-       _ => {}
+        &ApiMsg::WebGLCommand(..) =>
+            true,
+        _ => false
     }
 }
-
-pub fn write_payload(frame: u32, data: &[u8]) {
-    let mut recorder = WEBRENDER_RECORDING_DETOUR.lock();
-    if let Some(ref mut recorder) = recorder.as_mut().unwrap().as_mut() {
-        recorder.write_payload(frame, data);
-    } else {
-        write_data(frame, &[]); //signal the payload
-        write_data(frame, data);
-    }
-}
--- a/gfx/webrender/src/render_backend.rs
+++ b/gfx/webrender/src/render_backend.rs
@@ -1,31 +1,32 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use byteorder::{LittleEndian, ReadBytesExt};
 use frame::Frame;
+use frame_builder::FrameBuilderConfig;
 use internal_types::{FontTemplate, GLContextHandleWrapper, GLContextWrapper};
 use internal_types::{SourceTexture, ResultMsg, RendererFrame};
 use profiler::BackendProfileCounters;
-use record;
+use record::ApiRecordingReceiver;
 use resource_cache::ResourceCache;
 use scene::Scene;
 use std::collections::HashMap;
-use std::fs;
 use std::io::{Cursor, Read};
 use std::sync::{Arc, Mutex};
 use std::sync::mpsc::Sender;
 use texture_cache::TextureCache;
+use thread_profiler::register_thread_with_profiler;
+use threadpool::ThreadPool;
 use webrender_traits::{ApiMsg, AuxiliaryLists, BuiltDisplayList, IdNamespace, ImageData};
-use webrender_traits::{RenderNotifier, RenderDispatcher, WebGLCommand, WebGLContextId};
+use webrender_traits::{PipelineId, RenderNotifier, RenderDispatcher, WebGLCommand, WebGLContextId};
 use webrender_traits::channel::{PayloadHelperMethods, PayloadReceiver, PayloadSender, MsgReceiver};
 use webrender_traits::{VRCompositorCommand, VRCompositorHandler};
-use tiling::FrameBuilderConfig;
 use offscreen_gl_context::GLContextDispatcher;
 
 /// The render backend is responsible for transforming high level display lists into
 /// GPU-friendly work which is then submitted to the renderer in the form of a frame::Frame.
 ///
 /// The render backend operates on its own thread.
 pub struct RenderBackend {
     api_rx: MsgReceiver<ApiMsg>,
@@ -40,77 +41,76 @@ pub struct RenderBackend {
 
     scene: Scene,
     frame: Frame,
 
     notifier: Arc<Mutex<Option<Box<RenderNotifier>>>>,
     webrender_context_handle: Option<GLContextHandleWrapper>,
     webgl_contexts: HashMap<WebGLContextId, GLContextWrapper>,
     current_bound_webgl_context_id: Option<WebGLContextId>,
-    enable_recording: bool,
+    recorder: Option<Box<ApiRecordingReceiver>>,
     main_thread_dispatcher: Arc<Mutex<Option<Box<RenderDispatcher>>>>,
 
     next_webgl_id: usize,
 
     vr_compositor_handler: Arc<Mutex<Option<Box<VRCompositorHandler>>>>
 }
 
 impl RenderBackend {
     pub fn new(api_rx: MsgReceiver<ApiMsg>,
                payload_rx: PayloadReceiver,
                payload_tx: PayloadSender,
                result_tx: Sender<ResultMsg>,
                device_pixel_ratio: f32,
                texture_cache: TextureCache,
                enable_aa: bool,
+               workers: Arc<Mutex<ThreadPool>>,
                notifier: Arc<Mutex<Option<Box<RenderNotifier>>>>,
                webrender_context_handle: Option<GLContextHandleWrapper>,
                config: FrameBuilderConfig,
-               debug: bool,
-               enable_recording:bool,
+               recorder: Option<Box<ApiRecordingReceiver>>,
                main_thread_dispatcher: Arc<Mutex<Option<Box<RenderDispatcher>>>>,
                vr_compositor_handler: Arc<Mutex<Option<Box<VRCompositorHandler>>>>) -> RenderBackend {
 
-        let resource_cache = ResourceCache::new(texture_cache,
-                                                enable_aa);
+        let resource_cache = ResourceCache::new(texture_cache, workers, enable_aa);
+
+        register_thread_with_profiler("Backend".to_string());
 
         RenderBackend {
             api_rx: api_rx,
             payload_rx: payload_rx,
             payload_tx: payload_tx,
             result_tx: result_tx,
             device_pixel_ratio: device_pixel_ratio,
             resource_cache: resource_cache,
             scene: Scene::new(),
-            frame: Frame::new(debug, config),
+            frame: Frame::new(config),
             next_namespace_id: IdNamespace(1),
             notifier: notifier,
             webrender_context_handle: webrender_context_handle,
             webgl_contexts: HashMap::new(),
             current_bound_webgl_context_id: None,
-            enable_recording:enable_recording,
+            recorder: recorder,
             main_thread_dispatcher: main_thread_dispatcher,
             next_webgl_id: 0,
             vr_compositor_handler: vr_compositor_handler
         }
     }
 
     pub fn run(&mut self) {
         let mut profile_counters = BackendProfileCounters::new();
         let mut frame_counter: u32 = 0;
-        if self.enable_recording {
-            fs::create_dir("record").ok();
-        }
 
         loop {
             let msg = self.api_rx.recv();
+            profile_scope!("handle_msg");
             match msg {
                 Ok(msg) => {
-                    if self.enable_recording {
-                        record::write_msg(frame_counter, &msg);
+                    if let Some(ref mut r) = self.recorder {
+                        r.write_msg(frame_counter, &msg);
                     }
                     match msg {
                         ApiMsg::AddRawFont(id, bytes) => {
                             profile_counters.font_templates.inc(bytes.len());
                             self.resource_cache
                                 .add_font_template(id, FontTemplate::Raw(Arc::new(bytes)));
                         }
                         ApiMsg::AddNativeFont(id, native_font_handle) => {
@@ -145,17 +145,19 @@ impl RenderBackend {
 
                             sender.send(result).unwrap();
                         }
                         ApiMsg::SetRootDisplayList(background_color,
                                                    epoch,
                                                    pipeline_id,
                                                    viewport_size,
                                                    display_list_descriptor,
-                                                   auxiliary_lists_descriptor) => {
+                                                   auxiliary_lists_descriptor,
+                                                   preserve_frame_state) => {
+                            profile_scope!("SetRootDisplayList");
                             let mut leftover_auxiliary_data = vec![];
                             let mut auxiliary_data;
                             loop {
                                 auxiliary_data = self.payload_rx.recv().unwrap();
                                 {
                                     let mut payload_reader = Cursor::new(&auxiliary_data[..]);
                                     let payload_epoch =
                                         payload_reader.read_u32::<LittleEndian>().unwrap();
@@ -163,53 +165,58 @@ impl RenderBackend {
                                         break
                                     }
                                 }
                                 leftover_auxiliary_data.push(auxiliary_data)
                             }
                             for leftover_auxiliary_data in leftover_auxiliary_data {
                                 self.payload_tx.send_vec(leftover_auxiliary_data).unwrap()
                             }
-                            if self.enable_recording {
-                                record::write_payload(frame_counter, &auxiliary_data);
+                            if let Some(ref mut r) = self.recorder {
+                                r.write_payload(frame_counter, &auxiliary_data);
                             }
 
                             let mut auxiliary_data = Cursor::new(&mut auxiliary_data[4..]);
                             let mut built_display_list_data =
                                 vec![0; display_list_descriptor.size()];
                             auxiliary_data.read_exact(&mut built_display_list_data[..]).unwrap();
                             let built_display_list =
                                 BuiltDisplayList::from_data(built_display_list_data,
                                                             display_list_descriptor);
-
                             let mut auxiliary_lists_data =
                                 vec![0; auxiliary_lists_descriptor.size()];
                             auxiliary_data.read_exact(&mut auxiliary_lists_data[..]).unwrap();
                             let auxiliary_lists =
                                 AuxiliaryLists::from_data(auxiliary_lists_data,
                                                           auxiliary_lists_descriptor);
 
+                            if !preserve_frame_state {
+                                self.discard_frame_state_for_pipeline(pipeline_id);
+                            }
+
                             self.scene.set_root_display_list(pipeline_id,
                                                              epoch,
                                                              built_display_list,
                                                              background_color,
                                                              viewport_size,
                                                              auxiliary_lists);
                             self.build_scene();
                         }
                         ApiMsg::SetRootPipeline(pipeline_id) => {
+                            profile_scope!("SetRootPipeline");
                             self.scene.set_root_pipeline_id(pipeline_id);
 
                             if self.scene.display_lists.get(&pipeline_id).is_none() {
                                 continue;
                             }
 
                             self.build_scene();
                         }
                         ApiMsg::Scroll(delta, cursor, move_phase) => {
+                            profile_scope!("Scroll");
                             let frame = profile_counters.total_time.profile(|| {
                                 if self.frame.scroll(delta, cursor, move_phase) {
                                     Some(self.render())
                                 } else {
                                     None
                                 }
                             });
 
@@ -217,16 +224,17 @@ impl RenderBackend {
                                 Some(frame) => {
                                     self.publish_frame(frame, &mut profile_counters);
                                     self.notify_compositor_of_new_scroll_frame(true)
                                 }
                                 None => self.notify_compositor_of_new_scroll_frame(false),
                             }
                         }
                         ApiMsg::ScrollLayersWithScrollId(origin, pipeline_id, scroll_root_id) => {
+                            profile_scope!("ScrollLayersWithScrollId");
                             let frame = profile_counters.total_time.profile(|| {
                                 if self.frame.scroll_layers(origin, pipeline_id, scroll_root_id) {
                                     Some(self.render())
                                 } else {
                                     None
                                 }
                             });
 
@@ -235,27 +243,29 @@ impl RenderBackend {
                                     self.publish_frame(frame, &mut profile_counters);
                                     self.notify_compositor_of_new_scroll_frame(true)
                                 }
                                 None => self.notify_compositor_of_new_scroll_frame(false),
                             }
 
                         }
                         ApiMsg::TickScrollingBounce => {
+                            profile_scope!("TickScrollingBounce");
                             let frame = profile_counters.total_time.profile(|| {
                                 self.frame.tick_scrolling_bounce_animations();
                                 self.render()
                             });
 
                             self.publish_frame_and_notify_compositor(frame, &mut profile_counters);
                         }
                         ApiMsg::TranslatePointToLayerSpace(..) => {
                             panic!("unused api - remove from webrender_traits");
                         }
                         ApiMsg::GetScrollLayerState(tx) => {
+                            profile_scope!("GetScrollLayerState");
                             tx.send(self.frame.get_scroll_layer_state())
                               .unwrap()
                         }
                         ApiMsg::RequestWebGLContext(size, attributes, tx) => {
                             if let Some(ref wrapper) = self.webrender_context_handle {
                                 let dispatcher: Option<Box<GLContextDispatcher>> = if cfg!(target_os = "windows") {
                                     Some(Box::new(WebRenderGLDispatcher {
                                         dispatcher: self.main_thread_dispatcher.clone()
@@ -312,17 +322,34 @@ impl RenderBackend {
                             ctx.make_current();
                             ctx.apply_command(command);
                             self.current_bound_webgl_context_id = Some(context_id);
                         },
 
                         ApiMsg::VRCompositorCommand(context_id, command) => {
                             self.handle_vr_compositor_command(context_id, command);
                         }
-                        ApiMsg::GenerateFrame => {
+                        ApiMsg::GenerateFrame(property_bindings) => {
+                            profile_scope!("GenerateFrame");
+
+                            // Ideally, when there are property bindings present,
+                            // we won't need to rebuild the entire frame here.
+                            // However, to avoid conflicts with the ongoing work to
+                            // refactor how scroll roots + transforms work, this
+                            // just rebuilds the frame if there are animated property
+                            // bindings present for now.
+                            // TODO(gw): Once the scrolling / reference frame changes
+                            //           are completed, optimize the internals of
+                            //           animated properties to not require a full
+                            //           rebuild of the frame!
+                            if let Some(property_bindings) = property_bindings {
+                                self.scene.properties.set_properties(property_bindings);
+                                self.build_scene();
+                            }
+
                             let frame = profile_counters.total_time.profile(|| {
                                 self.render()
                             });
                             if self.scene.root_pipeline_id.is_some() {
                                 self.publish_frame_and_notify_compositor(frame, &mut profile_counters);
                                 frame_counter += 1;
                             }
                         }
@@ -350,20 +377,22 @@ impl RenderBackend {
                             .unwrap()
                             .shut_down();
                     break;
                 }
             }
         }
     }
 
+    fn discard_frame_state_for_pipeline(&mut self, pipeline_id: PipelineId) {
+        self.frame.discard_frame_state_for_pipeline(pipeline_id);
+    }
+
     fn build_scene(&mut self) {
         // Flatten the stacking context hierarchy
-        let mut new_pipeline_sizes = HashMap::new();
-
         if let Some(id) = self.current_bound_webgl_context_id {
             self.webgl_contexts[&id].unbind();
             self.current_bound_webgl_context_id = None;
         }
 
         // When running in OSMesa mode with texture sharing,
         // a flush is required on any GL contexts to ensure
         // that read-back from the shared texture returns
@@ -372,62 +401,17 @@ impl RenderBackend {
         // context at the start of a render frame should
         // incur minimal cost.
         for (_, webgl_context) in &self.webgl_contexts {
             webgl_context.make_current();
             webgl_context.apply_command(WebGLCommand::Flush);
             webgl_context.unbind();
         }
 
-        self.frame.create(&self.scene, &mut new_pipeline_sizes);
-
-        let mut updated_pipeline_sizes = HashMap::new();
-
-        for (pipeline_id, old_size) in self.scene.pipeline_sizes.drain() {
-            let new_size = new_pipeline_sizes.remove(&pipeline_id);
-
-            match new_size {
-                Some(new_size) => {
-                    // Exists in both old and new -> check if size changed
-                    if new_size != old_size {
-                        let mut notifier = self.notifier.lock();
-                        notifier.as_mut()
-                                .unwrap()
-                                .as_mut()
-                                .unwrap()
-                                .pipeline_size_changed(pipeline_id, Some(new_size));
-                    }
-
-                    // Re-insert
-                    updated_pipeline_sizes.insert(pipeline_id, new_size);
-                }
-                None => {
-                    // Was existing, not in current frame anymore
-                        let mut notifier = self.notifier.lock();
-                        notifier.as_mut()
-                                .unwrap()
-                                .as_mut()
-                                .unwrap()
-                                .pipeline_size_changed(pipeline_id, None);
-                }
-            }
-        }
-
-        // Any remaining items are new pipelines
-        for (pipeline_id, new_size) in new_pipeline_sizes.drain() {
-            let mut notifier = self.notifier.lock();
-            notifier.as_mut()
-                    .unwrap()
-                    .as_mut()
-                    .unwrap()
-                    .pipeline_size_changed(pipeline_id, Some(new_size));
-            updated_pipeline_sizes.insert(pipeline_id, new_size);
-        }
-
-        self.scene.pipeline_sizes = updated_pipeline_sizes;
+        self.frame.create(&self.scene);
     }
 
     fn render(&mut self) -> RendererFrame {
         let frame = self.frame.build(&mut self.resource_cache,
                                      &self.scene.pipeline_auxiliary_lists,
                                      self.device_pixel_ratio);
 
         frame
new file mode 100644
--- /dev/null
+++ b/gfx/webrender/src/render_task.rs
@@ -0,0 +1,436 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use internal_types::{HardwareCompositeOp, LowLevelFilterOp};
+use mask_cache::MaskCacheInfo;
+use prim_store::{PrimitiveCacheKey, PrimitiveIndex};
+use std::{cmp, f32, i32, mem, usize};
+use tiling::{PackedLayerIndex, RenderPass, RenderTargetIndex, ScrollLayerIndex};
+use tiling::StackingContextIndex;
+use webrender_traits::{DeviceIntLength, DeviceIntPoint, DeviceIntRect, DeviceIntSize};
+use webrender_traits::MixBlendMode;
+
+const FLOATS_PER_RENDER_TASK_INFO: usize = 16;
+
+#[derive(Debug, Copy, Clone, Eq, Hash, PartialEq)]
+pub struct RenderTaskIndex(pub usize);
+
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
+pub enum RenderTaskKey {
+    /// Draw this primitive to a cache target.
+    CachePrimitive(PrimitiveCacheKey),
+    /// Draw the alpha mask for a primitive.
+    CacheMask(MaskCacheKey),
+    /// Apply a vertical blur pass of given radius for this primitive.
+    VerticalBlur(i32, PrimitiveIndex),
+    /// Apply a horizontal blur pass of given radius for this primitive.
+    HorizontalBlur(i32, PrimitiveIndex),
+    /// Allocate a block of space in target for framebuffer copy.
+    CopyFramebuffer(StackingContextIndex),
+}
+
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
+pub enum MaskCacheKey {
+    Primitive(PrimitiveIndex),
+    ScrollLayer(ScrollLayerIndex),
+}
+
+#[derive(Debug, Copy, Clone)]
+pub enum RenderTaskId {
+    Static(RenderTaskIndex),
+    Dynamic(RenderTaskKey),
+}
+
+
+#[derive(Debug, Clone)]
+pub enum RenderTaskLocation {
+    Fixed,
+    Dynamic(Option<(DeviceIntPoint, RenderTargetIndex)>, DeviceIntSize),
+}
+
+#[derive(Debug, Clone)]
+pub enum AlphaRenderItem {
+    Primitive(StackingContextIndex, PrimitiveIndex, i32),
+    Blend(StackingContextIndex, RenderTaskId, LowLevelFilterOp, i32),
+    Composite(StackingContextIndex, RenderTaskId, RenderTaskId, MixBlendMode, i32),
+    HardwareComposite(StackingContextIndex, RenderTaskId, HardwareCompositeOp, i32),
+}
+
+#[derive(Debug, Clone)]
+pub struct AlphaRenderTask {
+    screen_origin: DeviceIntPoint,
+    pub opaque_items: Vec<AlphaRenderItem>,
+    pub alpha_items: Vec<AlphaRenderItem>,
+}
+
+#[derive(Debug, Copy, Clone)]
+#[repr(C)]
+pub enum MaskSegment {
+    // This must match the SEGMENT_ values
+    // in clip_shared.glsl!
+    All = 0,
+    Corner_TopLeft,
+    Corner_TopRight,
+    Corner_BottomLeft,
+    Corner_BottomRight,
+}
+
+#[derive(Debug, Copy, Clone)]
+#[repr(C)]
+pub enum MaskGeometryKind {
+    Default,        // Draw the entire rect
+    CornersOnly,    // Draw the corners (simple axis aligned mask)
+    // TODO(gw): Add more types here (e.g. 4 rectangles outside the inner rect)
+}
+
+#[derive(Debug, Clone)]
+pub struct CacheMaskTask {
+    actual_rect: DeviceIntRect,
+    inner_rect: DeviceIntRect,
+    pub clips: Vec<(PackedLayerIndex, MaskCacheInfo)>,
+    pub geometry_kind: MaskGeometryKind,
+}
+
+#[derive(Debug)]
+pub enum MaskResult {
+    /// The mask is completely outside the region
+    Outside,
+    /// The mask is inside and needs to be processed
+    Inside(RenderTask),
+}
+
+#[derive(Debug, Clone)]
+pub struct RenderTaskData {
+    pub data: [f32; FLOATS_PER_RENDER_TASK_INFO],
+}
+
+impl RenderTaskData {
+    pub fn empty() -> RenderTaskData {
+        RenderTaskData {
+            data: unsafe { mem::uninitialized() }
+        }
+    }
+}
+
+impl Default for RenderTaskData {
+    fn default() -> RenderTaskData {
+        RenderTaskData {
+            data: unsafe { mem::uninitialized() },
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+pub enum RenderTaskKind {
+    Alpha(AlphaRenderTask),
+    CachePrimitive(PrimitiveIndex),
+    CacheMask(CacheMaskTask),
+    VerticalBlur(DeviceIntLength, PrimitiveIndex),
+    HorizontalBlur(DeviceIntLength, PrimitiveIndex),
+    Readback(DeviceIntRect),
+}
+
+// TODO(gw): Consider storing these in a separate array and having
+//           primitives hold indices - this could avoid cloning
+//           when adding them as child tasks to tiles.
+#[derive(Debug, Clone)]
+pub struct RenderTask {
+    pub id: RenderTaskId,
+    pub location: RenderTaskLocation,
+    pub children: Vec<RenderTask>,
+    pub kind: RenderTaskKind,
+}
+
+impl RenderTask {
+    pub fn new_alpha_batch(task_index: RenderTaskIndex,
+                           screen_origin: DeviceIntPoint,
+                           location: RenderTaskLocation) -> RenderTask {
+        RenderTask {
+            id: RenderTaskId::Static(task_index),
+            children: Vec::new(),
+            location: location,
+            kind: RenderTaskKind::Alpha(AlphaRenderTask {
+                screen_origin: screen_origin,
+                alpha_items: Vec::new(),
+                opaque_items: Vec::new(),
+            }),
+        }
+    }
+
+    pub fn new_prim_cache(key: PrimitiveCacheKey,
+                          size: DeviceIntSize,
+                          prim_index: PrimitiveIndex) -> RenderTask {
+        RenderTask {
+            id: RenderTaskId::Dynamic(RenderTaskKey::CachePrimitive(key)),
+            children: Vec::new(),
+            location: RenderTaskLocation::Dynamic(None, size),
+            kind: RenderTaskKind::CachePrimitive(prim_index),
+        }
+    }
+
+    pub fn new_readback(key: StackingContextIndex,
+                    screen_rect: DeviceIntRect) -> RenderTask {
+        RenderTask {
+            id: RenderTaskId::Dynamic(RenderTaskKey::CopyFramebuffer(key)),
+            children: Vec::new(),
+            location: RenderTaskLocation::Dynamic(None, screen_rect.size),
+            kind: RenderTaskKind::Readback(screen_rect),
+        }
+    }
+
+    pub fn new_mask(actual_rect: DeviceIntRect,
+                    mask_key: MaskCacheKey,
+                    clips: &[(PackedLayerIndex, MaskCacheInfo)])
+                    -> MaskResult {
+        if clips.is_empty() {
+            return MaskResult::Outside;
+        }
+
+        // We scan through the clip stack and detect if our actual rectangle
+        // is in the intersection of all of all the outer bounds,
+        // and if it's completely inside the intersection of all of the inner bounds.
+        let result = clips.iter()
+                          .fold(Some(actual_rect), |current, clip| {
+            current.and_then(|rect| rect.intersection(&clip.1.outer_rect))
+        });
+
+        let task_rect = match result {
+            None => return MaskResult::Outside,
+            Some(rect) => rect,
+        };
+
+        let inner_rect = clips.iter()
+                              .fold(Some(task_rect), |current, clip| {
+            current.and_then(|rect| rect.intersection(&clip.1.inner_rect))
+        });
+
+        // TODO(gw): This optimization is very conservative for now.
+        //           For now, only draw optimized geometry if it is
+        //           a single aligned rect mask with rounded corners.
+        //           In the future, we'll expand this to handle the
+        //           more complex types of clip mask geometry.
+        let mut geometry_kind = MaskGeometryKind::Default;
+        if inner_rect.is_some() && clips.len() == 1 {
+            let (_, ref clip_info) = clips[0];
+            if clip_info.image.is_none() &&
+               clip_info.effective_clip_count == 1 &&
+               clip_info.is_aligned {
+                geometry_kind = MaskGeometryKind::CornersOnly;
+            }
+        }
+
+        let inner_rect = inner_rect.unwrap_or(DeviceIntRect::zero());
+
+        MaskResult::Inside(RenderTask {
+            id: RenderTaskId::Dynamic(RenderTaskKey::CacheMask(mask_key)),
+            children: Vec::new(),
+            location: RenderTaskLocation::Dynamic(None, task_rect.size),
+            kind: RenderTaskKind::CacheMask(CacheMaskTask {
+                actual_rect: task_rect,
+                inner_rect: inner_rect,
+                clips: clips.to_vec(),
+                geometry_kind: geometry_kind,
+            }),
+        })
+    }
+
+    // Construct a render task to apply a blur to a primitive. For now,
+    // this is only used for text runs, but we can probably extend this
+    // to handle general blurs to any render task in the future.
+    // The render task chain that is constructed looks like:
+    //
+    //    PrimitiveCacheTask: Draw the text run.
+    //           ^
+    //           |
+    //    VerticalBlurTask: Apply the separable vertical blur to the primitive.
+    //           ^
+    //           |
+    //    HorizontalBlurTask: Apply the separable horizontal blur to the vertical blur.
+    //           |
+    //           +---- This is stored as the input task to the primitive shader.
+    //
+    pub fn new_blur(key: PrimitiveCacheKey,
+                    size: DeviceIntSize,
+                    blur_radius: DeviceIntLength,
+                    prim_index: PrimitiveIndex) -> RenderTask {
+        let prim_cache_task = RenderTask::new_prim_cache(key,
+                                                         size,
+                                                         prim_index);
+
+        let blur_target_size = size + DeviceIntSize::new(2 * blur_radius.0,
+                                                         2 * blur_radius.0);
+
+        let blur_task_v = RenderTask {
+            id: RenderTaskId::Dynamic(RenderTaskKey::VerticalBlur(blur_radius.0, prim_index)),
+            children: vec![prim_cache_task],
+            location: RenderTaskLocation::Dynamic(None, blur_target_size),
+            kind: RenderTaskKind::VerticalBlur(blur_radius, prim_index),
+        };
+
+        let blur_task_h = RenderTask {
+            id: RenderTaskId::Dynamic(RenderTaskKey::HorizontalBlur(blur_radius.0, prim_index)),
+            children: vec![blur_task_v],
+            location: RenderTaskLocation::Dynamic(None, blur_target_size),
+            kind: RenderTaskKind::HorizontalBlur(blur_radius, prim_index),
+        };
+
+        blur_task_h
+    }
+
+    pub fn as_alpha_batch<'a>(&'a mut self) -> &'a mut AlphaRenderTask {
+        match self.kind {
+            RenderTaskKind::Alpha(ref mut task) => task,
+            RenderTaskKind::CachePrimitive(..) |
+            RenderTaskKind::CacheMask(..) |
+            RenderTaskKind::VerticalBlur(..) |
+            RenderTaskKind::Readback(..) |
+            RenderTaskKind::HorizontalBlur(..) => unreachable!(),
+        }
+    }
+
+    // Write (up to) 8 floats of data specific to the type
+    // of render task that is provided to the GPU shaders
+    // via a vertex texture.
+    pub fn write_task_data(&self) -> RenderTaskData {
+        let (target_rect, target_index) = self.get_target_rect();
+
+        // NOTE: The ordering and layout of these structures are
+        //       required to match both the GPU structures declared
+        //       in prim_shared.glsl, and also the uses in submit_batch()
+        //       in renderer.rs.
+        // TODO(gw): Maybe there's a way to make this stuff a bit
+        //           more type-safe. Although, it will always need
+        //           to be kept in sync with the GLSL code anyway.
+
+        match self.kind {
+            RenderTaskKind::Alpha(ref task) => {
+                RenderTaskData {
+                    data: [
+                        target_rect.origin.x as f32,
+                        target_rect.origin.y as f32,
+                        target_rect.size.width as f32,
+                        target_rect.size.height as f32,
+                        task.screen_origin.x as f32,
+                        task.screen_origin.y as f32,
+                        target_index.0 as f32,
+                        0.0,
+                        0.0,
+                        0.0,
+                        0.0,
+                        0.0,
+                        0.0, 0.0, 0.0, 0.0,
+                    ],
+                }
+            }
+            RenderTaskKind::CachePrimitive(..) => {
+                RenderTaskData {
+                    data: [
+                        target_rect.origin.x as f32,
+                        target_rect.origin.y as f32,
+                        target_rect.size.width as f32,
+                        target_rect.size.height as f32,
+                        target_index.0 as f32,
+                        0.0,
+                        0.0,
+                        0.0,
+                        0.0, 0.0, 0.0, 0.0,
+                        0.0, 0.0, 0.0, 0.0,
+                    ],
+                }
+            }
+            RenderTaskKind::CacheMask(ref task) => {
+                RenderTaskData {
+                    data: [
+                        target_rect.origin.x as f32,
+                        target_rect.origin.y as f32,
+                        (target_rect.origin.x + target_rect.size.width) as f32,
+                        (target_rect.origin.y + target_rect.size.height) as f32,
+                        task.actual_rect.origin.x as f32,
+                        task.actual_rect.origin.y as f32,
+                        target_index.0 as f32,
+                        0.0,
+                        task.inner_rect.origin.x as f32,
+                        task.inner_rect.origin.y as f32,
+                        (task.inner_rect.origin.x + task.inner_rect.size.width) as f32,
+                        (task.inner_rect.origin.y + task.inner_rect.size.height) as f32,
+                        0.0, 0.0, 0.0, 0.0,
+                    ],
+                }
+            }
+            RenderTaskKind::VerticalBlur(blur_radius, _) |
+            RenderTaskKind::HorizontalBlur(blur_radius, _) => {
+                RenderTaskData {
+                    data: [
+                        target_rect.origin.x as f32,
+                        target_rect.origin.y as f32,
+                        target_rect.size.width as f32,
+                        target_rect.size.height as f32,
+                        target_index.0 as f32,
+                        blur_radius.0 as f32,
+                        0.0,
+                        0.0,
+                        0.0, 0.0, 0.0, 0.0,
+                        0.0, 0.0, 0.0, 0.0,
+                    ]
+                }
+            }
+            RenderTaskKind::Readback(..) => {
+                RenderTaskData {
+                    data: [
+                        target_rect.origin.x as f32,
+                        target_rect.origin.y as f32,
+                        target_rect.size.width as f32,
+                        target_rect.size.height as f32,
+                        target_index.0 as f32,
+                        0.0,
+                        0.0,
+                        0.0,
+                        0.0, 0.0, 0.0, 0.0,
+                        0.0, 0.0, 0.0, 0.0,
+                    ]
+                }
+            }
+        }
+    }
+
+    fn get_target_rect(&self) -> (DeviceIntRect, RenderTargetIndex) {
+        match self.location {
+            RenderTaskLocation::Fixed => {
+                (DeviceIntRect::zero(), RenderTargetIndex(0))
+            },
+            RenderTaskLocation::Dynamic(origin_and_target_index, size) => {
+                let (origin, target_index) = origin_and_target_index.expect("Should have been allocated by now!");
+                (DeviceIntRect::new(origin, size), target_index)
+            }
+        }
+    }
+
+    pub fn assign_to_passes(mut self, pass_index: usize, passes: &mut Vec<RenderPass>) {
+        for child in self.children.drain(..) {
+            child.assign_to_passes(pass_index - 1,
+                                   passes);
+        }
+
+        // Sanity check - can be relaxed if needed
+        match self.location {
+            RenderTaskLocation::Fixed => {
+                debug_assert!(pass_index == passes.len() - 1);
+            }
+            RenderTaskLocation::Dynamic(..) => {
+                debug_assert!(pass_index < passes.len() - 1);
+            }
+        }
+
+        let pass = &mut passes[pass_index];
+        pass.add_render_task(self);
+    }
+
+    pub fn max_depth(&self, depth: usize, max_depth: &mut usize) {
+        let depth = depth + 1;
+        *max_depth = cmp::max(*max_depth, depth);
+        for child in &self.children {
+            child.max_depth(depth, max_depth);
+        }
+    }
+}
--- a/gfx/webrender/src/renderer.rs
+++ b/gfx/webrender/src/renderer.rs
@@ -7,176 +7,235 @@
 //! The `webrender::renderer` module provides the interface to webrender, which
 //! is accessible through [`Renderer`][renderer]
 //!
 //! [renderer]: struct.Renderer.html
 
 use debug_colors;
 use debug_render::DebugRenderer;
 use device::{DepthFunction, Device, ProgramId, TextureId, VertexFormat, GpuMarker, GpuProfiler};
-use device::{TextureFilter, VAOId, VertexUsageHint, FileWatcherHandler, TextureTarget};
+use device::{TextureFilter, VAOId, VertexUsageHint, FileWatcherHandler, TextureTarget, ShaderError};
 use euclid::Matrix4D;
 use fnv::FnvHasher;
+use frame_builder::FrameBuilderConfig;
+use gpu_store::{GpuStore, GpuStoreLayout};
 use internal_types::{CacheTextureId, RendererFrame, ResultMsg, TextureUpdateOp};
 use internal_types::{ExternalImageUpdateList, TextureUpdateList, PackedVertex, RenderTargetMode};
 use internal_types::{ORTHO_NEAR_PLANE, ORTHO_FAR_PLANE, SourceTexture};
 use internal_types::{BatchTextures, TextureSampler, GLContextHandleWrapper};
+use prim_store::GradientData;
 use profiler::{Profiler, BackendProfileCounters};
 use profiler::{GpuProfileTag, RendererProfileTimers, RendererProfileCounters};
+use record::ApiRecordingReceiver;
 use render_backend::RenderBackend;
+use render_task::RenderTaskData;
+use std;
 use std::cmp;
 use std::collections::HashMap;
 use std::f32;
 use std::hash::BuildHasherDefault;
+use std::marker::PhantomData;
 use std::mem;
 use std::path::PathBuf;
 use std::sync::{Arc, Mutex};
 use std::sync::mpsc::{channel, Receiver, Sender};
 use std::thread;
 use texture_cache::TextureCache;
-use tiling::{Frame, FrameBuilderConfig, PrimitiveBatch, PrimitiveBatchData};
-use tiling::{BlurCommand, CacheClipInstance, PrimitiveInstance, RenderTarget};
+use threadpool::ThreadPool;
+use tiling::{AlphaBatchKind, BlurCommand, Frame, PrimitiveBatch, PrimitiveBatchData};
+use tiling::{CacheClipInstance, PrimitiveInstance, RenderTarget};
 use time::precise_time_ns;
+use thread_profiler::{register_thread_with_profiler, write_profile};
 use util::TransformedRectKind;
 use webrender_traits::{ColorF, Epoch, PipelineId, RenderNotifier, RenderDispatcher};
-use webrender_traits::{ExternalImageId, ImageFormat, RenderApiSender, RendererKind};
+use webrender_traits::{ExternalImageId, ImageData, ImageFormat, RenderApiSender, RendererKind};
 use webrender_traits::{DeviceIntRect, DevicePoint, DeviceIntPoint, DeviceIntSize, DeviceUintSize};
 use webrender_traits::ImageDescriptor;
 use webrender_traits::channel;
 use webrender_traits::VRCompositorHandler;
 
-pub const VERTEX_TEXTURE_POOL: usize = 5;
+pub const GPU_DATA_TEXTURE_POOL: usize = 5;
 pub const MAX_VERTEX_TEXTURE_WIDTH: usize = 1024;
 
 const GPU_TAG_CACHE_BOX_SHADOW: GpuProfileTag = GpuProfileTag { label: "C_BoxShadow", color: debug_colors::BLACK };
 const GPU_TAG_CACHE_CLIP: GpuProfileTag = GpuProfileTag { label: "C_Clip", color: debug_colors::PURPLE };
 const GPU_TAG_CACHE_TEXT_RUN: GpuProfileTag = GpuProfileTag { label: "C_TextRun", color: debug_colors::MISTYROSE };
 const GPU_TAG_INIT: GpuProfileTag = GpuProfileTag { label: "Init", color: debug_colors::WHITE };
 const GPU_TAG_SETUP_TARGET: GpuProfileTag = GpuProfileTag { label: "Target", color: debug_colors::SLATEGREY };
 const GPU_TAG_PRIM_RECT: GpuProfileTag = GpuProfileTag { label: "Rect", color: debug_colors::RED };
 const GPU_TAG_PRIM_IMAGE: GpuProfileTag = GpuProfileTag { label: "Image", color: debug_colors::GREEN };
 const GPU_TAG_PRIM_YUV_IMAGE: GpuProfileTag = GpuProfileTag { label: "YuvImage", color: debug_colors::DARKGREEN };
 const GPU_TAG_PRIM_BLEND: GpuProfileTag = GpuProfileTag { label: "Blend", color: debug_colors::LIGHTBLUE };
+const GPU_TAG_PRIM_HW_COMPOSITE: GpuProfileTag = GpuProfileTag { label: "HwComposite", color: debug_colors::DODGERBLUE };
 const GPU_TAG_PRIM_COMPOSITE: GpuProfileTag = GpuProfileTag { label: "Composite", color: debug_colors::MAGENTA };
 const GPU_TAG_PRIM_TEXT_RUN: GpuProfileTag = GpuProfileTag { label: "TextRun", color: debug_colors::BLUE };
 const GPU_TAG_PRIM_GRADIENT: GpuProfileTag = GpuProfileTag { label: "Gradient", color: debug_colors::YELLOW };
 const GPU_TAG_PRIM_ANGLE_GRADIENT: GpuProfileTag = GpuProfileTag { label: "AngleGradient", color: debug_colors::POWDERBLUE };
 const GPU_TAG_PRIM_RADIAL_GRADIENT: GpuProfileTag = GpuProfileTag { label: "RadialGradient", color: debug_colors::LIGHTPINK };
 const GPU_TAG_PRIM_BOX_SHADOW: GpuProfileTag = GpuProfileTag { label: "BoxShadow", color: debug_colors::CYAN };
 const GPU_TAG_PRIM_BORDER: GpuProfileTag = GpuProfileTag { label: "Border", color: debug_colors::ORANGE };
 const GPU_TAG_PRIM_CACHE_IMAGE: GpuProfileTag = GpuProfileTag { label: "CacheImage", color: debug_colors::SILVER };
 const GPU_TAG_BLUR: GpuProfileTag = GpuProfileTag { label: "Blur", color: debug_colors::VIOLET };
 
 #[derive(Debug, Copy, Clone, PartialEq)]
 pub enum BlendMode {
     None,
     Alpha,
+
+    Multiply,
+    Max,
+    Min,
+
     // Use the color of the text itself as a constant color blend factor.
     Subpixel(ColorF),
 }
 
-struct VertexDataTexture {
+struct GpuDataTexture<L> {
     id: TextureId,
+    layout: PhantomData<L>,
 }
 
-impl VertexDataTexture {
-    fn new(device: &mut Device) -> VertexDataTexture {
+impl<L: GpuStoreLayout> GpuDataTexture<L> {
+    fn new(device: &mut Device) -> GpuDataTexture<L> {
         let id = device.create_texture_ids(1, TextureTarget::Default)[0];
 
-        VertexDataTexture {
+        GpuDataTexture {
             id: id,
+            layout: PhantomData,
         }
     }
 
     fn init<T: Default>(&mut self,
                         device: &mut Device,
                         data: &mut Vec<T>) {
         if data.is_empty() {
             return;
         }
 
-        let item_size = mem::size_of::<T>();
-        debug_assert!(item_size % 16 == 0);
-        let vecs_per_item = item_size / 16;
-
-        let items_per_row = MAX_VERTEX_TEXTURE_WIDTH / vecs_per_item;
+        let items_per_row = L::items_per_row::<T>();
 
         // Extend the data array to be a multiple of the row size.
         // This ensures memory safety when the array is passed to
         // OpenGL to upload to the GPU.
         while data.len() % items_per_row != 0 {
             data.push(T::default());
         }
 
-        let width = items_per_row * vecs_per_item;
         let height = data.len() / items_per_row;
 
         device.init_texture(self.id,
-                            width as u32,
+                            L::texture_width() as u32,
                             height as u32,
-                            ImageFormat::RGBAF32,
-                            TextureFilter::Nearest,
+                            L::image_format(),
+                            L::texture_filter(),
                             RenderTargetMode::None,
                             Some(unsafe { mem::transmute(data.as_slice()) } ));
     }
 }
 
+pub struct VertexDataTextureLayout {}
+
+impl GpuStoreLayout for VertexDataTextureLayout {
+    fn image_format() -> ImageFormat {
+        ImageFormat::RGBAF32
+    }
+
+    fn texture_width() -> usize {
+        MAX_VERTEX_TEXTURE_WIDTH
+    }
+
+    fn texture_filter() -> TextureFilter {
+        TextureFilter::Nearest
+    }
+}
+
+type VertexDataTexture = GpuDataTexture<VertexDataTextureLayout>;
+pub type VertexDataStore<T> = GpuStore<T, VertexDataTextureLayout>;
+
+pub struct GradientDataTextureLayout {}
+
+impl GpuStoreLayout for GradientDataTextureLayout {
+    fn image_format() -> ImageFormat {
+        ImageFormat::RGBA8
+    }
+
+    fn texture_width() -> usize {
+        mem::size_of::<GradientData>() / Self::texel_size()
+    }
+
+    fn texture_filter() -> TextureFilter {
+        TextureFilter::Linear
+    }
+}
+
+type GradientDataTexture = GpuDataTexture<GradientDataTextureLayout>;
+pub type GradientDataStore = GpuStore<GradientData, GradientDataTextureLayout>;
+
 const TRANSFORM_FEATURE: &'static str = "TRANSFORM";
 const SUBPIXEL_AA_FEATURE: &'static str = "SUBPIXEL_AA";
 const CLIP_FEATURE: &'static str = "CLIP";
 
 enum ShaderKind {
     Primitive,
-    Cache,
+    Cache(VertexFormat),
     ClipCache,
 }
 
 struct LazilyCompiledShader {
     id: Option<ProgramId>,
     name: &'static str,
     kind: ShaderKind,
     features: Vec<&'static str>,
 }
 
 impl LazilyCompiledShader {
     fn new(kind: ShaderKind,
            name: &'static str,
            features: &[&'static str],
            device: &mut Device,
-           precache: bool) -> LazilyCompiledShader {
+           precache: bool) -> Result<LazilyCompiledShader, ShaderError> {
         let mut shader = LazilyCompiledShader {
             id: None,
             name: name,
             kind: kind,
             features: features.to_vec(),
         };
 
         if precache {
-            shader.get(device);
+            try!{ shader.get(device) };
         }
 
-        shader
+        Ok(shader)
     }
 
-    fn get(&mut self, device: &mut Device) -> ProgramId {
+    fn get(&mut self, device: &mut Device) -> Result<ProgramId, ShaderError> {
         if self.id.is_none() {
-            let id = match self.kind {
-                ShaderKind::Primitive | ShaderKind::Cache => {
-                    create_prim_shader(self.name,
-                                       device,
-                                       &self.features)
-                }
-                ShaderKind::ClipCache => {
-                    create_clip_shader(self.name, device)
+            let id = try!{
+                match self.kind {
+                    ShaderKind::Primitive => {
+                        create_prim_shader(self.name,
+                                           device,
+                                           &self.features,
+                                           VertexFormat::Triangles)
+                    }
+                    ShaderKind::Cache(format) => {
+                        create_prim_shader(self.name,
+                                           device,
+                                           &self.features,
+                                           format)
+                    }
+                    ShaderKind::ClipCache => {
+                        create_clip_shader(self.name, device)
+                    }
                 }
             };
             self.id = Some(id);
         }
 
-        self.id.unwrap()
+        Ok(self.id.unwrap())
     }
 }
 
 struct PrimitiveShader {
     simple: LazilyCompiledShader,
     transform: LazilyCompiledShader,
 }
 
@@ -203,124 +262,127 @@ fn _get_ubo_max_len<T>(max_ubo_size: usi
     //           whether this clamping actually hurts performance!
     cmp::min(max_items, 1024)
 }
 
 impl PrimitiveShader {
     fn new(name: &'static str,
            device: &mut Device,
            features: &[&'static str],
-           precache: bool) -> PrimitiveShader {
-        let simple = LazilyCompiledShader::new(ShaderKind::Primitive,
-                                               name,
-                                               features,
-                                               device,
-                                               precache);
+           precache: bool) -> Result<PrimitiveShader, ShaderError> {
+        let simple = try!{
+            LazilyCompiledShader::new(ShaderKind::Primitive,
+                                      name,
+                                      features,
+                                      device,
+                                      precache)
+        };
 
         let mut transform_features = features.to_vec();
         transform_features.push(TRANSFORM_FEATURE);
 
-        let transform = LazilyCompiledShader::new(ShaderKind::Primitive,
-                                                  name,
-                                                  &transform_features,
-                                                  device,
-                                                  precache);
+        let transform = try!{
+            LazilyCompiledShader::new(ShaderKind::Primitive,
+                                      name,
+                                      &transform_features,
+                                      device,
+                                      precache)
+        };
 
-        PrimitiveShader {
+        Ok(PrimitiveShader {
             simple: simple,
             transform: transform,
-        }
+        })
     }
 
     fn get(&mut self,
            device: &mut Device,
-           transform_kind: TransformedRectKind) -> ProgramId {
+           transform_kind: TransformedRectKind) -> Result<ProgramId, ShaderError> {
         match transform_kind {
             TransformedRectKind::AxisAligned => self.simple.get(device),
             TransformedRectKind::Complex => self.transform.get(device),
         }
     }
 }
 
 fn create_prim_shader(name: &'static str,
                       device: &mut Device,
-                      features: &[&'static str]) -> ProgramId {
+                      features: &[&'static str],
+                      vertex_format: VertexFormat) -> Result<ProgramId, ShaderError> {
     let mut prefix = format!("#define WR_MAX_VERTEX_TEXTURE_WIDTH {}\n",
                               MAX_VERTEX_TEXTURE_WIDTH);
 
     for feature in features {
         prefix.push_str(&format!("#define WR_FEATURE_{}\n", feature));
     }
 
-    let includes = &["prim_shared"];
-    let program_id = device.create_program_with_prefix(name,
-                                                       includes,
-                                                       Some(prefix));
     debug!("PrimShader {}", name);
 
-    program_id
+    let includes = &["prim_shared"];
+    device.create_program_with_prefix(name, includes, Some(prefix), vertex_format)
 }
 
-fn create_clip_shader(name: &'static str, device: &mut Device) -> ProgramId {
+fn create_clip_shader(name: &'static str, device: &mut Device) -> Result<ProgramId, ShaderError> {
     let prefix = format!("#define WR_MAX_VERTEX_TEXTURE_WIDTH {}\n
                           #define WR_FEATURE_TRANSFORM",
                           MAX_VERTEX_TEXTURE_WIDTH);
 
-    let includes = &["prim_shared", "clip_shared"];
-    let program_id = device.create_program_with_prefix(name,
-                                                       includes,
-                                                       Some(prefix));
     debug!("ClipShader {}", name);
 
-    program_id
+    let includes = &["prim_shared", "clip_shared"];
+    device.create_program_with_prefix(name, includes, Some(prefix), VertexFormat::Clip)
 }
 
-struct VertexTextures {
+struct GpuDataTextures {
     layer_texture: VertexDataTexture,
     render_task_texture: VertexDataTexture,
     prim_geom_texture: VertexDataTexture,
     data16_texture: VertexDataTexture,
     data32_texture: VertexDataTexture,
     data64_texture: VertexDataTexture,
     data128_texture: VertexDataTexture,
     resource_rects_texture: VertexDataTexture,
+    gradient_data_texture: GradientDataTexture,
 }
 
-impl VertexTextures {
-    fn new(device: &mut Device) -> VertexTextures {
-        VertexTextures {
+impl GpuDataTextures {
+    fn new(device: &mut Device) -> GpuDataTextures {
+        GpuDataTextures {
             layer_texture: VertexDataTexture::new(device),
             render_task_texture: VertexDataTexture::new(device),
             prim_geom_texture: VertexDataTexture::new(device),
             data16_texture: VertexDataTexture::new(device),
             data32_texture: VertexDataTexture::new(device),
             data64_texture: VertexDataTexture::new(device),
             data128_texture: VertexDataTexture::new(device),
             resource_rects_texture: VertexDataTexture::new(device),
+            gradient_data_texture: GradientDataTexture::new(device),
         }
     }
 
     fn init_frame(&mut self, device: &mut Device, frame: &mut Frame) {
         self.data16_texture.init(device, &mut frame.gpu_data16);
         self.data32_texture.init(device, &mut frame.gpu_data32);
         self.data64_texture.init(device, &mut frame.gpu_data64);
         self.data128_texture.init(device, &mut frame.gpu_data128);
         self.prim_geom_texture.init(device, &mut frame.gpu_geometry);
         self.resource_rects_texture.init(device, &mut frame.gpu_resource_rects);
         self.layer_texture.init(device, &mut frame.layer_texture_data);
         self.render_task_texture.init(device, &mut frame.render_task_data);
+        self.gradient_data_texture.init(device, &mut frame.gpu_gradient_data);
 
         device.bind_texture(TextureSampler::Layers, self.layer_texture.id);
         device.bind_texture(TextureSampler::RenderTasks, self.render_task_texture.id);
         device.bind_texture(TextureSampler::Geometry, self.prim_geom_texture.id);
         device.bind_texture(TextureSampler::Data16, self.data16_texture.id);
         device.bind_texture(TextureSampler::Data32, self.data32_texture.id);
         device.bind_texture(TextureSampler::Data64, self.data64_texture.id);
         device.bind_texture(TextureSampler::Data128, self.data128_texture.id);
         device.bind_texture(TextureSampler::ResourceRects, self.resource_rects_texture.id);
+        device.bind_texture(TextureSampler::Gradients, self.gradient_data_texture.id);
     }
 }
 
 /// The renderer is responsible for submitting to the GPU the work prepared by the
 /// RenderBackend.
 pub struct Renderer {
     result_rx: Receiver<ResultMsg>,
     device: Device,
@@ -356,16 +418,17 @@ pub struct Renderer {
     ps_border: PrimitiveShader,
     ps_gradient: PrimitiveShader,
     ps_angle_gradient: PrimitiveShader,
     ps_radial_gradient: PrimitiveShader,
     ps_box_shadow: PrimitiveShader,
     ps_cache_image: PrimitiveShader,
 
     ps_blend: LazilyCompiledShader,
+    ps_hw_composite: LazilyCompiledShader,
     ps_composite: LazilyCompiledShader,
 
     notifier: Arc<Mutex<Option<Box<RenderNotifier>>>>,
 
     enable_profiler: bool,
     clear_framebuffer: bool,
     clear_color: ColorF,
     debug: DebugRenderer,
@@ -377,18 +440,18 @@ pub struct Renderer {
 
     render_targets: Vec<TextureId>,
 
     gpu_profile: GpuProfiler<GpuProfileTag>,
     prim_vao_id: VAOId,
     blur_vao_id: VAOId,
     clip_vao_id: VAOId,
 
-    vt_index: usize,
-    vertex_textures: [VertexTextures; VERTEX_TEXTURE_POOL],
+    gdt_index: usize,
+    gpu_data_textures: [GpuDataTextures; GPU_DATA_TEXTURE_POOL],
 
     pipeline_epoch_map: HashMap<PipelineId, Epoch, BuildHasherDefault<FnvHasher>>,
     /// Used to dispatch functions to the main thread's event loop.
     /// Required to allow GLContext sharing in some implementations like WGL.
     main_thread_dispatcher: Arc<Mutex<Option<Box<RenderDispatcher>>>>,
 
     /// A vector for fast resolves of texture cache IDs to
     /// native texture IDs. This maps to a free-list managed
@@ -407,16 +470,30 @@ pub struct Renderer {
     /// Map of external image IDs to native textures.
     external_images: HashMap<ExternalImageId, TextureId, BuildHasherDefault<FnvHasher>>,
 
     // Optional trait object that handles WebVR commands.
     // Some WebVR commands such as SubmitFrame must be synced with the WebGL render thread.
     vr_compositor_handler: Arc<Mutex<Option<Box<VRCompositorHandler>>>>
 }
 
+#[derive(Debug)]
+pub enum InitError {
+    Shader(ShaderError),
+    Thread(std::io::Error),
+}
+
+impl From<ShaderError> for InitError {
+    fn from(err: ShaderError) -> Self { InitError::Shader(err) }
+}
+
+impl From<std::io::Error> for InitError {
+    fn from(err: std::io::Error) -> Self { InitError::Thread(err) }
+}
+
 impl Renderer {
     /// Initializes webrender and creates a Renderer and RenderApiSender.
     ///
     /// # Examples
     /// Initializes a Renderer with some reasonable values. For more information see
     /// [RendererOptions][rendereroptions].
     /// [rendereroptions]: struct.RendererOptions.html
     ///
@@ -426,121 +503,182 @@ impl Renderer {
     /// let opts = webrender::RendererOptions {
     ///    device_pixel_ratio: 1.0,
     ///    resource_override_path: None,
     ///    enable_aa: false,
     ///    enable_profiler: false,
     /// };
     /// let (renderer, sender) = Renderer::new(opts);
     /// ```
-    pub fn new(options: RendererOptions) -> (Renderer, RenderApiSender) {
-        let (api_tx, api_rx) = channel::msg_channel().unwrap();
-        let (payload_tx, payload_rx) = channel::payload_channel().unwrap();
+    pub fn new(mut options: RendererOptions) -> Result<(Renderer, RenderApiSender), InitError> {
+        let (api_tx, api_rx) = try!{ channel::msg_channel() };
+        let (payload_tx, payload_rx) = try!{ channel::payload_channel() };
         let (result_tx, result_rx) = channel();
 
+        register_thread_with_profiler("Compositor".to_owned());
+
         let notifier = Arc::new(Mutex::new(None));
 
         let file_watch_handler = FileWatcher {
             result_tx: result_tx.clone(),
             notifier: notifier.clone(),
         };
 
         let mut device = Device::new(options.resource_override_path.clone(),
                                      Box::new(file_watch_handler));
         // device-pixel ratio doesn't matter here - we are just creating resources.
         device.begin_frame(1.0);
 
-        let cs_box_shadow = LazilyCompiledShader::new(ShaderKind::Cache,
-                                                      "cs_box_shadow",
-                                                      &[],
-                                                      &mut device,
-                                                      options.precache_shaders);
-        let cs_text_run = LazilyCompiledShader::new(ShaderKind::Cache,
-                                                    "cs_text_run",
-                                                    &[],
-                                                    &mut device,
-                                                    options.precache_shaders);
-        let cs_blur = LazilyCompiledShader::new(ShaderKind::Cache,
-                                                "cs_blur",
-                                                 &[],
-                                                 &mut device,
-                                                 options.precache_shaders);
+        let cs_box_shadow = try!{
+            LazilyCompiledShader::new(ShaderKind::Cache(VertexFormat::Triangles),
+                                      "cs_box_shadow",
+                                      &[],
+                                      &mut device,
+                                      options.precache_shaders)
+        };
+
+        let cs_text_run = try!{
+            LazilyCompiledShader::new(ShaderKind::Cache(VertexFormat::Triangles),
+                                      "cs_text_run",
+                                      &[],
+                                      &mut device,
+                                      options.precache_shaders)
+        };
+
+        let cs_blur = try!{
+            LazilyCompiledShader::new(ShaderKind::Cache(VertexFormat::Blur),
+                                     "cs_blur",
+                                      &[],
+                                      &mut device,
+                                      options.precache_shaders)
+        };
+
+        let cs_clip_rectangle = try!{
+            LazilyCompiledShader::new(ShaderKind::ClipCache,
+                                      "cs_clip_rectangle",
+                                      &[],
+                                      &mut device,
+                                      options.precache_shaders)
+        };
 
-        let cs_clip_rectangle = LazilyCompiledShader::new(ShaderKind::ClipCache,
-                                                          "cs_clip_rectangle",
-                                                          &[],
-                                                          &mut device,
-                                                          options.precache_shaders);
-        let cs_clip_image = LazilyCompiledShader::new(ShaderKind::ClipCache,
-                                                      "cs_clip_image",
-                                                      &[],
-                                                      &mut device,
-                                                      options.precache_shaders);
+        let cs_clip_image = try!{
+            LazilyCompiledShader::new(ShaderKind::ClipCache,
+                                      "cs_clip_image",
+                                      &[],
+                                      &mut device,
+                                      options.precache_shaders)
+        };
+
+        let ps_rectangle = try!{
+            PrimitiveShader::new("ps_rectangle",
+                                 &mut device,
+                                 &[],
+                                 options.precache_shaders)
+        };
+
+        let ps_rectangle_clip = try!{
+            PrimitiveShader::new("ps_rectangle",
+                                 &mut device,
+                                 &[ CLIP_FEATURE ],
+                                 options.precache_shaders)
+        };
+
+        let ps_text_run = try!{
+            PrimitiveShader::new("ps_text_run",
+                                 &mut device,
+                                 &[],
+                                 options.precache_shaders)
+        };
+
+        let ps_text_run_subpixel = try!{
+            PrimitiveShader::new("ps_text_run",
+                                 &mut device,
+                                 &[ SUBPIXEL_AA_FEATURE ],
+                                 options.precache_shaders)
+        };
 
-        let ps_rectangle = PrimitiveShader::new("ps_rectangle",
-                                                &mut device,
-                                                &[],
-                                                options.precache_shaders);
-        let ps_rectangle_clip = PrimitiveShader::new("ps_rectangle",
-                                                     &mut device,
-                                                     &[ CLIP_FEATURE ],
-                                                     options.precache_shaders);
-        let ps_text_run = PrimitiveShader::new("ps_text_run",
-                                               &mut device,
-                                               &[],
-                                               options.precache_shaders);
-        let ps_text_run_subpixel = PrimitiveShader::new("ps_text_run",
-                                                        &mut device,
-                                                        &[ SUBPIXEL_AA_FEATURE ],
-                                                        options.precache_shaders);
-        let ps_image = PrimitiveShader::new("ps_image",
-                                            &mut device,
-                                            &[],
-                                            options.precache_shaders);
-        let ps_yuv_image = PrimitiveShader::new("ps_yuv_image",
-                                                &mut device,
-                                                &[],
-                                                options.precache_shaders);
-        let ps_border = PrimitiveShader::new("ps_border",
-                                             &mut device,
-                                             &[],
-                                             options.precache_shaders);
+        let ps_image = try!{
+            PrimitiveShader::new("ps_image",
+                                 &mut device,
+                                 &[],
+                                 options.precache_shaders)
+        };
+
+        let ps_yuv_image = try!{
+            PrimitiveShader::new("ps_yuv_image",
+                                 &mut device,
+                                 &[],
+                                 options.precache_shaders)
+        };
+
+        let ps_border = try!{
+            PrimitiveShader::new("ps_border",
+                                 &mut device,
+                                 &[],
+                                 options.precache_shaders)
+        };
+
+        let ps_box_shadow = try!{
+            PrimitiveShader::new("ps_box_shadow",
+                                 &mut device,
+                                 &[],
+                                 options.precache_shaders)
+        };
+
+        let ps_gradient = try!{
+            PrimitiveShader::new("ps_gradient",
+                                 &mut device,
+                                 &[],
+                                 options.precache_shaders)
+        };
 
-        let ps_box_shadow = PrimitiveShader::new("ps_box_shadow",
-                                                 &mut device,
-                                                 &[],
-                                                 options.precache_shaders);
+        let ps_angle_gradient = try!{
+            PrimitiveShader::new("ps_angle_gradient",
+                                 &mut device,
+                                 &[],
+                                 options.precache_shaders)
+        };
+
+        let ps_radial_gradient = try!{
+            PrimitiveShader::new("ps_radial_gradient",
+                                 &mut device,
+                                 &[],
+                                 options.precache_shaders)
+        };
+
+        let ps_cache_image = try!{
+            PrimitiveShader::new("ps_cache_image",
+                                 &mut device,
+                                 &[],
+                                 options.precache_shaders)
+        };
 
-        let ps_gradient = PrimitiveShader::new("ps_gradient",
-                                               &mut device,
-                                               &[],
-                                               options.precache_shaders);
-        let ps_angle_gradient = PrimitiveShader::new("ps_angle_gradient",
-                                                     &mut device,
-                                                     &[],
-                                                     options.precache_shaders);
-        let ps_radial_gradient = PrimitiveShader::new("ps_radial_gradient",
-                                                      &mut device,
-                                                      &[],
-                                                      options.precache_shaders);
-        let ps_cache_image = PrimitiveShader::new("ps_cache_image",
-                                                  &mut device,
-                                                  &[],
-                                                  options.precache_shaders);
+        let ps_blend = try!{
+            LazilyCompiledShader::new(ShaderKind::Primitive,
+                                     "ps_blend",
+                                     &[],
+                                     &mut device,
+                                     options.precache_shaders)
+        };
 
-        let ps_blend = LazilyCompiledShader::new(ShaderKind::Primitive,
-                                                 "ps_blend",
-                                                 &[],
-                                                 &mut device,
-                                                 options.precache_shaders);
-        let ps_composite = LazilyCompiledShader::new(ShaderKind::Primitive,
-                                                     "ps_composite",
-                                                     &[],
-                                                     &mut device,
-                                                     options.precache_shaders);
+        let ps_composite = try!{
+            LazilyCompiledShader::new(ShaderKind::Primitive,
+                                      "ps_composite",
+                                      &[],
+                                      &mut device,
+                                      options.precache_shaders)
+        };
+
+        let ps_hw_composite = try!{
+            LazilyCompiledShader::new(ShaderKind::Primitive,
+                                     "ps_hardware_composite",
+                                     &[],
+                                     &mut device,
+                                     options.precache_shaders)
+        };
 
         let mut texture_cache = TextureCache::new();
 
         let white_pixels: Vec<u8> = vec![
             0xff, 0xff, 0xff, 0xff,
             0xff, 0xff, 0xff, 0xff,
             0xff, 0xff, 0xff, 0xff,
             0xff, 0xff, 0xff, 0xff,
@@ -555,38 +693,38 @@ impl Renderer {
                              ImageDescriptor {
                                 width: 2,
                                 height: 2,
                                 stride: None,
                                 format: ImageFormat::RGBA8,
                                 is_opaque: false,
                              },
                              TextureFilter::Linear,
-                             Arc::new(white_pixels));
+                             ImageData::Raw(Arc::new(white_pixels)));
 
         let dummy_mask_image_id = texture_cache.new_item_id();
         texture_cache.insert(dummy_mask_image_id,
                              ImageDescriptor {
                                 width: 2,
                                 height: 2,
                                 stride: None,
                                 format: ImageFormat::A8,
                                 is_opaque: false,
                              },
                              TextureFilter::Linear,
-                             Arc::new(mask_pixels));
+                             ImageData::Raw(Arc::new(mask_pixels)));
 
         let debug_renderer = DebugRenderer::new(&mut device);
 
-        let vertex_textures = [
-            VertexTextures::new(&mut device),
-            VertexTextures::new(&mut device),
-            VertexTextures::new(&mut device),
-            VertexTextures::new(&mut device),
-            VertexTextures::new(&mut device),
+        let gpu_data_textures = [
+            GpuDataTextures::new(&mut device),
+            GpuDataTextures::new(&mut device),
+            GpuDataTextures::new(&mut device),
+            GpuDataTextures::new(&mut device),
+            GpuDataTextures::new(&mut device),
         ];
 
         let x0 = 0.0;
         let y0 = 0.0;
         let x1 = 1.0;
         let y1 = 1.0;
 
         // TODO(gw): Consider separate VBO for quads vs border corners if VS ever shows up in profile!
@@ -626,40 +764,44 @@ impl Renderer {
         // We need a reference to the webrender context from the render backend in order to share
         // texture ids
         let context_handle = match options.renderer_kind {
             RendererKind::Native => GLContextHandleWrapper::current_native_handle(),
             RendererKind::OSMesa => GLContextHandleWrapper::current_osmesa_handle(),
         };
 
         let config = FrameBuilderConfig::new(options.enable_scrollbars,
-                                             options.enable_subpixel_aa);
+                                             options.enable_subpixel_aa,
+                                             options.debug);
 
-        let debug = options.debug;
         let (device_pixel_ratio, enable_aa) = (options.device_pixel_ratio, options.enable_aa);
         let render_target_debug = options.render_target_debug;
         let payload_tx_for_backend = payload_tx.clone();
-        let enable_recording = options.enable_recording;
-        thread::Builder::new().name("RenderBackend".to_string()).spawn(move || {
+        let recorder = options.recorder;
+        let workers = options.workers.take().unwrap_or_else(||{
+            // TODO(gw): Use a heuristic to select best # of worker threads.
+            Arc::new(Mutex::new(ThreadPool::new_with_name("WebRender:Worker".to_string(), 4)))
+        });
+        try!{ thread::Builder::new().name("RenderBackend".to_string()).spawn(move || {
             let mut backend = RenderBackend::new(api_rx,
                                                  payload_rx,
                                                  payload_tx_for_backend,
                                                  result_tx,
                                                  device_pixel_ratio,
                                                  texture_cache,
                                                  enable_aa,
+                                                 workers,
                                                  backend_notifier,
                                                  context_handle,
                                                  config,
-                                                 debug,
-                                                 enable_recording,
+                                                 recorder,
                                                  backend_main_thread_dispatcher,
                                                  backend_vr_compositor);
             backend.run();
-        }).unwrap();
+        })};
 
         let renderer = Renderer {
             result_rx: result_rx,
             device: device,
             current_frame: None,
             pending_texture_updates: Vec::new(),
             pending_shader_updates: Vec::new(),
             cs_box_shadow: cs_box_shadow,
@@ -675,44 +817,45 @@ impl Renderer {
             ps_yuv_image: ps_yuv_image,
             ps_border: ps_border,
             ps_box_shadow: ps_box_shadow,
             ps_gradient: ps_gradient,
             ps_angle_gradient: ps_angle_gradient,
             ps_radial_gradient: ps_radial_gradient,
             ps_cache_image: ps_cache_image,
             ps_blend: ps_blend,
+            ps_hw_composite: ps_hw_composite,
             ps_composite: ps_composite,
             notifier: notifier,
             debug: debug_renderer,
             render_target_debug: render_target_debug,
             backend_profile_counters: BackendProfileCounters::new(),
             profile_counters: RendererProfileCounters::new(),
             profiler: Profiler::new(),
             enable_profiler: options.enable_profiler,
             clear_framebuffer: options.clear_framebuffer,
             clear_color: options.clear_color,
             last_time: 0,
             render_targets: Vec::new(),
             gpu_profile: GpuProfiler::new(),
             prim_vao_id: prim_vao_id,
             blur_vao_id: blur_vao_id,
             clip_vao_id: clip_vao_id,
-            vt_index: 0,
-            vertex_textures: vertex_textures,
+            gdt_index: 0,
+            gpu_data_textures: gpu_data_textures,
             pipeline_epoch_map: HashMap::with_hasher(Default::default()),
             main_thread_dispatcher: main_thread_dispatcher,
             cache_texture_id_map: Vec::new(),
             external_image_handler: None,
             external_images: HashMap::with_hasher(Default::default()),
             vr_compositor_handler: vr_compositor
         };
 
         let sender = RenderApiSender::new(api_tx, payload_tx);
-        (renderer, sender)
+        Ok((renderer, sender))
     }
 
     /// Sets the new RenderNotifier.
     ///
     /// The RenderNotifier will be called when processing e.g. of a (scrolling) frame is done,
     /// and therefore the screen should be updated.
     pub fn set_render_notifier(&self, notifier: Box<RenderNotifier>) {
         let mut notifier_arc = self.notifier.lock().unwrap();
@@ -746,16 +889,18 @@ impl Renderer {
     pub fn flush_rendered_epochs(&mut self) -> HashMap<PipelineId, Epoch, BuildHasherDefault<FnvHasher>> {
         return mem::replace(&mut self.pipeline_epoch_map, HashMap::with_hasher(Default::default()));
     }
 
     /// Processes the result queue.
     ///
     /// Should be called before `render()`, as texture cache updates are done here.
     pub fn update(&mut self) {
+        profile_scope!("update");
+
         // Pull any pending results and return the most recent.
         while let Ok(msg) = self.result_rx.try_recv() {
             match msg {
                 ResultMsg::NewFrame(frame, texture_update_list, external_image_update_list, profile_counters) => {
                     self.pending_texture_updates.push(texture_update_list);
 
                     // When a new frame is ready, we could start to update all pending external image requests here.
                     self.release_external_images(external_image_update_list);
@@ -803,16 +948,18 @@ impl Renderer {
         self.external_image_handler = Some(handler);
     }
 
     /// Renders the current frame.
     ///
     /// A Frame is supplied by calling [set_root_stacking_context()][newframe].
     /// [newframe]: ../../webrender_traits/struct.RenderApi.html#method.set_root_stacking_context
     pub fn render(&mut self, framebuffer_size: DeviceUintSize) {
+        profile_scope!("render");
+
         if let Some(mut frame) = self.current_frame.take() {
             if let Some(ref mut frame) = frame.frame {
                 let mut profile_timers = RendererProfileTimers::new();
 
                 // Block CPU waiting for last frame's GPU profiles to arrive.
                 // In general this shouldn't block unless heavily GPU limited.
                 if let Some(samples) = self.gpu_profile.build_samples() {
                     profile_timers.gpu_samples = samples;
@@ -887,107 +1034,116 @@ impl Renderer {
 */
 
     fn update_texture_cache(&mut self) {
         let _gm = GpuMarker::new("texture cache update");
         let mut pending_texture_updates = mem::replace(&mut self.pending_texture_updates, vec![]);
         for update_list in pending_texture_updates.drain(..) {
             for update in update_list.updates {
                 match update.op {
-                    TextureUpdateOp::Create(width, height, format, filter, mode, maybe_bytes) => {
+                    TextureUpdateOp::Create { width, height, format, filter, mode, data } => {
                         let CacheTextureId(cache_texture_index) = update.id;
                         if self.cache_texture_id_map.len() == cache_texture_index {
                             // Create a new native texture, as requested by the texture cache.
                             let texture_id = self.device
-                                             .create_texture_ids(1, TextureTarget::Default)[0];
+                                                 .create_texture_ids(1, TextureTarget::Default)[0];
                             self.cache_texture_id_map.push(texture_id);
                         }
                         let texture_id = self.cache_texture_id_map[cache_texture_index];
 
-                        let maybe_slice = maybe_bytes.as_ref().map(|bytes|{ bytes.as_slice() });
-                        self.device.init_texture(texture_id,
-                                                 width,
-                                                 height,
-                                                 format,
-                                                 filter,
-                                                 mode,
-                                                 maybe_slice);
+                        if let Some(image) = data {
+                            match image {
+                                ImageData::Raw(raw) => {
+                                    self.device.init_texture(texture_id,
+                                                             width,
+                                                             height,
+                                                             format,
+                                                             filter,
+                                                             mode,
+                                                             Some(raw.as_slice()));
+                                }
+                                ImageData::ExternalBuffer(id) => {
+                                    let handler = self.external_image_handler
+                                                      .as_mut()
+                                                      .expect("Found external image, but no handler set!");
+
+                                    match handler.lock(id).source {
+                                        ExternalImageSource::RawData(raw) => {
+                                            self.device.init_texture(texture_id,
+                                                                     width,
+                                                                     height,
+                                                                     format,
+                                                                     filter,
+                                                                     mode,
+                                                                     Some(raw));
+                                        }
+                                        _ => panic!("No external buffer found"),
+                                    };
+                                    handler.unlock(id);
+                                }
+                                _ => {
+                                    panic!("No suitable image buffer for TextureUpdateOp::Create.");
+                                }
+                            }
+                        } else {
+                            self.device.init_texture(texture_id,
+                                                     width,
+                                                     height,
+                                                     format,
+                                                     filter,
+                                                     mode,
+                                                     None);
+                        }
                     }
-                    TextureUpdateOp::Grow(new_width,
-                                          new_height,
-                                          format,
-                                          filter,
-                                          mode) => {
+                    TextureUpdateOp::Grow { width, height, format, filter, mode } => {
                         let texture_id = self.cache_texture_id_map[update.id.0];
                         self.device.resize_texture(texture_id,
-                                                   new_width,
-                                                   new_height,
+                                                   width,
+                                                   height,
                                                    format,
                                                    filter,
                                                    mode);
                     }
-                    TextureUpdateOp::Update(x, y, width, height, bytes, stride) => {
+                    TextureUpdateOp::Update { page_pos_x, page_pos_y, width, height, data, stride } => {
                         let texture_id = self.cache_texture_id_map[update.id.0];
                         self.device.update_texture(texture_id,
-                                                   x,
-                                                   y,
+                                                   page_pos_x,
+                                                   page_pos_y,
                                                    width, height, stride,
-                                                   bytes.as_slice());
+                                                   data.as_slice());
+                    }
+                    TextureUpdateOp::UpdateForExternalBuffer { rect, id, stride } => {
+                        let handler = self.external_image_handler
+                                          .as_mut()
+                                          .expect("Found external image, but no handler set!");
+                        let device = &mut self.device;
+                        let cached_id = self.cache_texture_id_map[update.id.0];
+
+                        match handler.lock(id).source {
+                            ExternalImageSource::RawData(data) => {
+                                device.update_texture(cached_id,
+                                                      rect.origin.x,
+                                                      rect.origin.y,
+                                                      rect.size.width,
+                                                      rect.size.height,
+                                                      stride, data);
+                            }
+                            _ => panic!("No external buffer found"),
+                        };
+                        handler.unlock(id);
                     }
                     TextureUpdateOp::Free => {
                         let texture_id = self.cache_texture_id_map[update.id.0];
                         self.device.deinit_texture(texture_id);
                     }
                 }
             }
         }
     }
 
-    fn add_debug_rect(&mut self,
-                      p0: DeviceIntPoint,
-                      p1: DeviceIntPoint,
-                      label: &str,
-                      c: &ColorF) {
-        let tile_x0 = p0.x;
-        let tile_y0 = p0.y;
-        let tile_x1 = p1.x;
-        let tile_y1 = p1.y;
-
-        self.debug.add_line(tile_x0,
-                            tile_y0,
-                            c,
-                            tile_x1,
-                            tile_y0,
-                            c);
-        self.debug.add_line(tile_x0,
-                            tile_y1,
-                            c,
-                            tile_x1,
-                            tile_y1,
-                            c);
-        self.debug.add_line(tile_x0,
-                            tile_y0,
-                            c,
-                            tile_x0,
-                            tile_y1,
-                            c);
-        self.debug.add_line(tile_x1,
-                            tile_y0,
-                            c,
-                            tile_x1,
-                            tile_y1,
-                            c);
-        if label.len() > 0 {
-            self.debug.add_text((tile_x0 as f32 + tile_x1 as f32) * 0.5,
-                                (tile_y0 as f32 + tile_y1 as f32) * 0.5,
-                                label,
-                                c);
-        }
-    }
-
     fn draw_instanced_batch<T>(&mut self,
                                data: &[T],
                                vao: VAOId,
                                shader: ProgramId,
                                textures: &BatchTextures,
                                projection: &Matrix4D<f32>) {
         self.device.bind_vao(vao);
         self.device.bind_program(shader, projection);
@@ -1000,103 +1156,179 @@ impl Renderer {
         self.device.update_vao_instances(vao, data, VertexUsageHint::Stream);
         self.device.draw_indexed_triangles_instanced_u16(6, data.len() as i32);
         self.profile_counters.vertices.add(6 * data.len());
         self.profile_counters.draw_calls.inc();
     }
 
     fn submit_batch(&mut self,
                     batch: &PrimitiveBatch,
-                    projection: &Matrix4D<f32>) {
+                    projection: &Matrix4D<f32>,
+                    render_task_data: &Vec<RenderTaskData>,
+                    cache_texture: Option<TextureId>,
+                    render_target: Option<(TextureId, i32)>,
+                    target_dimensions: DeviceUintSize) {
         let transform_kind = batch.key.flags.transform_kind();
         let needs_clipping = batch.key.flags.needs_clipping();
         debug_assert!(!needs_clipping || batch.key.blend_mode == BlendMode::Alpha);
 
-        let (data, marker, shader) = match &batch.data {
-            &PrimitiveBatchData::CacheImage(ref data) => {
-                let shader = self.ps_cache_image.get(&mut self.device, transform_kind);
-                (data, GPU_TAG_PRIM_CACHE_IMAGE, shader)
-            }
-            &PrimitiveBatchData::Blend(ref data) => {
-                let shader = self.ps_blend.get(&mut self.device);
-                (data, GPU_TAG_PRIM_BLEND, shader)
-            }
-            &PrimitiveBatchData::Composite(ref data) => {
-                // The composite shader only samples from sCache.
-                let shader = self.ps_composite.get(&mut self.device);
-                (data, GPU_TAG_PRIM_COMPOSITE, shader)
-            }
-            &PrimitiveBatchData::Rectangles(ref data) => {
-                let shader = if needs_clipping {
-                    self.ps_rectangle_clip.get(&mut self.device, transform_kind)
-                } else {
-                    self.ps_rectangle.get(&mut self.device, transform_kind)
+        match batch.data {
+            PrimitiveBatchData::Instances(ref data) => {
+                let (marker, shader) = match batch.key.kind {
+                    AlphaBatchKind::Composite => unreachable!(),
+                    AlphaBatchKind::HardwareComposite => {
+                        let shader = self.ps_hw_composite.get(&mut self.device);
+                        (GPU_TAG_PRIM_HW_COMPOSITE, shader)
+                    }
+                    AlphaBatchKind::Blend => {
+                        let shader = self.ps_blend.get(&mut self.device);
+                        (GPU_TAG_PRIM_BLEND, shader)
+                    }
+                    AlphaBatchKind::Rectangle => {
+                        let shader = if needs_clipping {
+                            self.ps_rectangle_clip.get(&mut self.device, transform_kind)
+                        } else {
+                            self.ps_rectangle.get(&mut self.device, transform_kind)
+                        };
+                        (GPU_TAG_PRIM_RECT, shader)
+                    }
+                    AlphaBatchKind::TextRun => {
+                        let shader = match batch.key.blend_mode {
+                            BlendMode::Subpixel(..) => self.ps_text_run_subpixel.get(&mut self.device, transform_kind),
+                            BlendMode::Alpha | BlendMode::None => self.ps_text_run.get(&mut self.device, transform_kind),
+                            _ => unreachable!(),
+                        };
+                        (GPU_TAG_PRIM_TEXT_RUN, shader)
+                    }
+                    AlphaBatchKind::Image => {
+                        let shader = self.ps_image.get(&mut self.device, transform_kind);
+                        (GPU_TAG_PRIM_IMAGE, shader)
+                    }
+                    AlphaBatchKind::YuvImage => {
+                        let shader = self.ps_yuv_image.get(&mut self.device, transform_kind);
+                        (GPU_TAG_PRIM_YUV_IMAGE, shader)
+                    }
+                    AlphaBatchKind::Border => {
+                        let shader = self.ps_border.get(&mut self.device, transform_kind);
+                        (GPU_TAG_PRIM_BORDER, shader)
+                    }
+                    AlphaBatchKind::AlignedGradient => {
+                        let shader = self.ps_gradient.get(&mut self.device, transform_kind);
+                        (GPU_TAG_PRIM_GRADIENT, shader)
+                    }
+                    AlphaBatchKind::AngleGradient => {
+                        let shader = self.ps_angle_gradient.get(&mut self.device, transform_kind);
+                        (GPU_TAG_PRIM_ANGLE_GRADIENT, shader)
+                    }
+                    AlphaBatchKind::RadialGradient => {
+                        let shader = self.ps_radial_gradient.get(&mut self.device, transform_kind);
+                        (GPU_TAG_PRIM_RADIAL_GRADIENT, shader)
+                    }
+                    AlphaBatchKind::BoxShadow => {
+                        let shader = self.ps_box_shadow.get(&mut self.device, transform_kind);
+                        (GPU_TAG_PRIM_BOX_SHADOW, shader)
+                    }
+                    AlphaBatchKind::CacheImage => {
+                        let shader = self.ps_cache_image.get(&mut self.device, transform_kind);
+                        (GPU_TAG_PRIM_CACHE_IMAGE, shader)
+                    }
                 };
-                (data, GPU_TAG_PRIM_RECT, shader)
-            }
-            &PrimitiveBatchData::Image(ref data) => {
-                let shader = self.ps_image.get(&mut self.device, transform_kind);
-                (data, GPU_TAG_PRIM_IMAGE, shader)
-            }
-            &PrimitiveBatchData::YuvImage(ref data) => {
-                let shader = self.ps_yuv_image.get(&mut self.device, transform_kind);
-                (data, GPU_TAG_PRIM_YUV_IMAGE, shader)
+
+                let shader = shader.unwrap();
+
+                let _gm = self.gpu_profile.add_marker(marker);
+                let vao = self.prim_vao_id;
+                self.draw_instanced_batch(data,
+                                          vao,
+                                          shader,
+                                          &batch.key.textures,
+                                          projection);
             }
-            &PrimitiveBatchData::Borders(ref data) => {
-                let shader = self.ps_border.get(&mut self.device, transform_kind);
-                (data, GPU_TAG_PRIM_BORDER, shader)
-            }
-            &PrimitiveBatchData::BoxShadow(ref data) => {
-                let shader = self.ps_box_shadow.get(&mut self.device, transform_kind);
-                (data, GPU_TAG_PRIM_BOX_SHADOW, shader)
-            }
-            &PrimitiveBatchData::TextRun(ref data) => {
-                let shader = match batch.key.blend_mode {
-                    BlendMode::Subpixel(..) => self.ps_text_run_subpixel.get(&mut self.device, transform_kind),
-                    BlendMode::Alpha | BlendMode::None => self.ps_text_run.get(&mut self.device, transform_kind),
+            PrimitiveBatchData::Composite(ref instance) => {
+                let _gm = self.gpu_profile.add_marker(GPU_TAG_PRIM_COMPOSITE);
+                let vao = self.prim_vao_id;
+                let shader = self.ps_composite.get(&mut self.device).unwrap();
+
+                // TODO(gw): This code branch is all a bit hacky. We rely
+                // on pulling specific values from the render target data
+                // and also cloning the single primitive instance to be
+                // able to pass to draw_instanced_batch(). We should
+                // think about a cleaner way to achieve this!
+
+                // Before submitting the composite batch, do the
+                // framebuffer readbacks that are needed for each
+                // composite operation in this batch.
+                let cache_texture_id = cache_texture.unwrap();
+                let cache_texture_dimensions = self.device.get_texture_dimensions(cache_texture_id);
+
+                let backdrop = &render_task_data[instance.task_index as usize];
+                let readback = &render_task_data[instance.user_data[0] as usize];
+                let source = &render_task_data[instance.user_data[1] as usize];
+
+                // Bind the FBO to blit the backdrop to.
+                // Called per-instance in case the layer (and therefore FBO)
+                // changes. The device will skip the GL call if the requested
+                // target is already bound.
+                let cache_draw_target = (cache_texture_id, readback.data[4] as i32);
+                self.device.bind_draw_target(Some(cache_draw_target), Some(cache_texture_dimensions));
+
+                let src_x = backdrop.data[0] - backdrop.data[4] + source.data[4];
+                let src_y = backdrop.data[1] - backdrop.data[5] + source.data[5];
+
+                let dest_x = readback.data[0];
+                let dest_y = readback.data[1];
+
+                let width = readback.data[2];
+                let height = readback.data[3];
+
+                // Need to invert the y coordinates when reading back from
+                // the framebuffer.
+                let y0 = if render_target.is_some() {
+                    src_y as i32
+                } else {
+                    target_dimensions.height as i32 - height as i32 - src_y as i32
                 };
-                (data, GPU_TAG_PRIM_TEXT_RUN, shader)
+
+                let src = DeviceIntRect::new(DeviceIntPoint::new(src_x as i32,
+                                                                 y0),
+                                             DeviceIntSize::new(width as i32, height as i32));
+                let dest = DeviceIntRect::new(DeviceIntPoint::new(dest_x as i32,
+                                                                  dest_y as i32),
+                                              DeviceIntSize::new(width as i32, height as i32));
+
+                self.device.blit_render_target(render_target,
+                                               Some(src),
+                                               dest);
+
+                // Restore draw target to current pass render target + layer.
+                self.device.bind_draw_target(render_target, Some(target_dimensions));
+
+                self.draw_instanced_batch(&[instance.clone()],
+                                          vao,
+                                          shader,
+                                          &batch.key.textures,
+                                          projection);
             }
-            &PrimitiveBatchData::AlignedGradient(ref data) => {
-                let shader = self.ps_gradient.get(&mut self.device, transform_kind);
-                (data, GPU_TAG_PRIM_GRADIENT, shader)
-            }
-            &PrimitiveBatchData::AngleGradient(ref data) => {
-                let shader = self.ps_angle_gradient.get(&mut self.device, transform_kind);
-                (data, GPU_TAG_PRIM_ANGLE_GRADIENT, shader)
-            }
-            &PrimitiveBatchData::RadialGradient(ref data) => {
-                let shader = self.ps_radial_gradient.get(&mut self.device, transform_kind);
-                (data, GPU_TAG_PRIM_RADIAL_GRADIENT, shader)
-            }
-        };
-
-        let _gm = self.gpu_profile.add_marker(marker);
-        let vao = self.prim_vao_id;
-        self.draw_instanced_batch(data,
-                                  vao,
-                                  shader,
-                                  &batch.key.textures,
-                                  projection);
+        }
     }
 
     fn draw_target(&mut self,
                    render_target: Option<(TextureId, i32)>,
                    target: &RenderTarget,
-                   target_size: &DeviceUintSize,
+                   target_size: DeviceUintSize,
                    cache_texture: Option<TextureId>,
                    should_clear: bool,
-                   background_color: Option<ColorF>) {
+                   background_color: Option<ColorF>,
+                   render_task_data: &Vec<RenderTaskData>) {
         self.device.disable_depth();
         self.device.enable_depth_write();
 
-        let dimensions = [target_size.width, target_size.height];
         let projection = {
             let _gm = self.gpu_profile.add_marker(GPU_TAG_SETUP_TARGET);
-            self.device.bind_draw_target(render_target, Some(dimensions));
+            self.device.bind_draw_target(render_target, Some(target_size));
 
             self.device.set_blend(false);
             self.device.set_blend_mode_alpha();
             if let Some(cache_texture) = cache_texture {
                 self.device.bind_texture(TextureSampler::Cache, cache_texture);
             }
 
             let (color, projection) = match render_target {
@@ -1151,17 +1383,17 @@ impl Renderer {
         // TODO(gw): In the future, consider having
         //           fast path blur shaders for common
         //           blur radii with fixed weights.
         if !target.vertical_blurs.is_empty() || !target.horizontal_blurs.is_empty() {
             let _gm = self.gpu_profile.add_marker(GPU_TAG_BLUR);
             let vao = self.blur_vao_id;
 
             self.device.set_blend(false);
-            let shader = self.cs_blur.get(&mut self.device);
+            let shader = self.cs_blur.get(&mut self.device).unwrap();
 
             self.draw_instanced_batch(&target.vertical_blurs,
                                       vao,
                                       shader,
                                       &BatchTextures::no_texture(),
                                       &projection);
             self.draw_instanced_batch(&target.horizontal_blurs,
                                       vao,
@@ -1170,17 +1402,17 @@ impl Renderer {
                                       &projection);
         }
 
         // Draw any box-shadow caches for this target.
         if !target.box_shadow_cache_prims.is_empty() {
             self.device.set_blend(false);
             let _gm = self.gpu_profile.add_marker(GPU_TAG_CACHE_BOX_SHADOW);
             let vao = self.prim_vao_id;
-            let shader = self.cs_box_shadow.get(&mut self.device);
+            let shader = self.cs_box_shadow.get(&mut self.device).unwrap();
             self.draw_instanced_batch(&target.box_shadow_cache_prims,
                                       vao,
                                       shader,
                                       &BatchTextures::no_texture(),
                                       &projection);
         }
 
         // Draw the clip items into the tiled alpha mask.
@@ -1188,29 +1420,29 @@ impl Renderer {
             let _gm = self.gpu_profile.add_marker(GPU_TAG_CACHE_CLIP);
             let vao = self.clip_vao_id;
             // switch to multiplicative blending
             self.device.set_blend(true);
             self.device.set_blend_mode_multiply();
             // draw rounded cornered rectangles
             if !target.clip_batcher.rectangles.is_empty() {
                 let _gm2 = GpuMarker::new("clip rectangles");
-                let shader = self.cs_clip_rectangle.get(&mut self.device);
+                let shader = self.cs_clip_rectangle.get(&mut self.device).unwrap();
                 self.draw_instanced_batch(&target.clip_batcher.rectangles,
                                           vao,
                                           shader,
                                           &BatchTextures::no_texture(),
                                           &projection);
             }
             // draw image masks
             for (mask_texture_id, items) in target.clip_batcher.images.iter() {
                 let _gm2 = GpuMarker::new("clip images");
                 let texture_id = self.resolve_source_texture(mask_texture_id);
                 self.device.bind_texture(TextureSampler::Mask, texture_id);
-                let shader = self.cs_clip_image.get(&mut self.device);
+                let shader = self.cs_clip_image.get(&mut self.device).unwrap();
                 self.draw_instanced_batch(items,
                                           vao,
                                           shader,
                                           &BatchTextures::no_texture(),
                                           &projection);
             }
         }
 
@@ -1221,17 +1453,17 @@ impl Renderer {
         // it removes the overhead of submitting many small glyphs
         // to multiple tiles in the normal text run case.
         if !target.text_run_cache_prims.is_empty() {
             self.device.set_blend(true);
             self.device.set_blend_mode_alpha();
 
             let _gm = self.gpu_profile.add_marker(GPU_TAG_CACHE_TEXT_RUN);
             let vao = self.prim_vao_id;
-            let shader = self.cs_text_run.get(&mut self.device);
+            let shader = self.cs_text_run.get(&mut self.device).unwrap();
 
             self.draw_instanced_batch(&target.text_run_cache_prims,
                                       vao,
                                       shader,
                                       &target.text_run_textures,
                                       &projection);
         }
 
@@ -1239,40 +1471,62 @@ impl Renderer {
         self.device.set_blend(false);
         let mut prev_blend_mode = BlendMode::None;
 
         self.device.set_depth_func(DepthFunction::Less);
         self.device.enable_depth();
         self.device.enable_depth_write();
 
         for batch in &target.alpha_batcher.opaque_batches {
-            self.submit_batch(batch, &projection);
+            self.submit_batch(batch,
+                              &projection,
+                              render_task_data,
+                              cache_texture,
+                              render_target,
+                              target_size);
         }
 
         self.device.disable_depth_write();
 
         for batch in &target.alpha_batcher.alpha_batches {
             if batch.key.blend_mode != prev_blend_mode {
                 match batch.key.blend_mode {
                     BlendMode::None => {
                         self.device.set_blend(false);
                     }
+                    BlendMode::Multiply => {
+                        self.device.set_blend(true);
+                        self.device.set_blend_mode_multiply();
+                    }
+                    BlendMode::Max => {
+                        self.device.set_blend(true);
+                        self.device.set_blend_mode_max();
+                    }
+                    BlendMode::Min => {
+                        self.device.set_blend(true);
+                        self.device.set_blend_mode_min();
+                    }
                     BlendMode::Alpha => {
                         self.device.set_blend(true);
                         self.device.set_blend_mode_alpha();
                     }
                     BlendMode::Subpixel(color) => {
                         self.device.set_blend(true);
                         self.device.set_blend_mode_subpixel(color);
                     }
                 }
                 prev_blend_mode = batch.key.blend_mode;
             }
 
-            self.submit_batch(batch, &projection);
+            self.submit_batch(batch,
+                              &projection,
+                              render_task_data,
+                              cache_texture,
+                              render_target,
+                              target_size);
         }
 
         self.device.disable_depth();
         self.device.set_blend(false);
     }
 
     fn update_deferred_resolves(&mut self, frame: &mut Frame) {
         // The first thing we do is run through any pending deferred
@@ -1288,16 +1542,17 @@ impl Renderer {
                 GpuMarker::fire("deferred resolve");
                 let props = &deferred_resolve.image_properties;
                 let external_id = props.external_id
                                        .expect("BUG: Deferred resolves must be external images!");
                 let image = handler.lock(external_id);
 
                 let texture_id = match image.source {
                     ExternalImageSource::NativeTexture(texture_id) => TextureId::new(texture_id),
+                    _ => panic!("No native texture found."),
                 };
 
                 self.external_images.insert(external_id, texture_id);
                 let resource_rect_index = deferred_resolve.resource_address.0 as usize;
                 let resource_rect = &mut frame.gpu_resource_rects[resource_rect_index];
                 resource_rect.uv0 = DevicePoint::new(image.u0, image.v0);
                 resource_rect.uv1 = DevicePoint::new(image.u1, image.v1);
             }
@@ -1337,26 +1592,16 @@ impl Renderer {
         // Some tests use a restricted viewport smaller than the main screen size.
         // Ensure we clear the framebuffer in these tests.
         // TODO(gw): Find a better solution for this?
         let viewport_size = DeviceIntSize::new((frame.viewport_size.width * frame.device_pixel_ratio) as i32,
                                                (frame.viewport_size.height * frame.device_pixel_ratio) as i32);
         let needs_clear = viewport_size.width < framebuffer_size.width as i32 ||
                           viewport_size.height < framebuffer_size.height as i32;
 
-        {
-            let _gm2 = GpuMarker::new("debug rectangles");
-            for debug_rect in frame.debug_rects.iter().rev() {
-                self.add_debug_rect(debug_rect.rect.origin,
-                                    debug_rect.rect.bottom_right(),
-                                    &debug_rect.label,
-                                    &debug_rect.color);
-            }
-        }
-
         self.device.disable_depth_write();
         self.device.disable_stencil();
         self.device.set_blend(false);
 
         if frame.passes.is_empty() {
             self.device.clear_target(Some(self.clear_color.to_array()), Some(1.0));
         } else {
             // Add new render targets to the pool if required.
@@ -1379,18 +1624,18 @@ impl Renderer {
                                          RenderTargetMode::LayerRenderTarget(pass.targets.len() as i32),
                                          None);
             }
 
             // TODO(gw): This is a hack / workaround for #728.
             // We should find a better way to implement these updates rather
             // than wasting this extra memory, but for now it removes a large
             // number of driver stalls.
-            self.vertex_textures[self.vt_index].init_frame(&mut self.device, frame);
-            self.vt_index = (self.vt_index + 1) % VERTEX_TEXTURE_POOL;
+            self.gpu_data_textures[self.gdt_index].init_frame(&mut self.device, frame);
+            self.gdt_index = (self.gdt_index + 1) % GPU_DATA_TEXTURE_POOL;
 
             let mut src_id = None;
 
             for (pass_index, pass) in frame.passes.iter().enumerate() {
                 let (do_clear, size, target_id) = if pass.is_framebuffer {
                     (self.clear_framebuffer || needs_clear,
                      framebuffer_size,
                      None)
@@ -1399,20 +1644,21 @@ impl Renderer {
                 };
 
                 for (target_index, target) in pass.targets.iter().enumerate() {
                     let render_target = target_id.map(|texture_id| {
                         (texture_id, target_index as i32)
                     });
                     self.draw_target(render_target,
                                      target,
-                                     size,
+                                     *size,
                                      src_id,
                                      do_clear,
-                                     frame.background_color);
+                                     frame.background_color,
+                                     &frame.render_task_data);
 
                 }
 
                 src_id = target_id;
             }
 
             self.draw_render_target_debug(framebuffer_size);
         }
@@ -1427,16 +1673,20 @@ impl Renderer {
     pub fn get_profiler_enabled(&mut self) -> bool {
         self.enable_profiler
     }
 
     pub fn set_profiler_enabled(&mut self, enabled: bool) {
         self.enable_profiler = enabled;
     }
 
+    pub fn save_cpu_profile(&self, filename: &str) {
+        write_profile(filename);
+    }
+
     fn draw_render_target_debug(&mut self,
                                 framebuffer_size: &DeviceUintSize) {
         if self.render_target_debug {
             // TODO(gw): Make the layout of the render targets a bit more sophisticated.
             // Right now, it just draws them in one row at the bottom of the screen,
             // with a fixed size.
             let rt_debug_x0 = 16;
             let rt_debug_y0 = 16;
@@ -1463,37 +1713,36 @@ impl Renderer {
 
                     current_target += 1;
                 }
             }
         }
     }
 }
 
-pub enum ExternalImageSource {
-    // TODO(gw): Work out the API for raw buffers.
-    //RawData(*const u8, usize),
-    NativeTexture(u32),                // Is a gl::GLuint texture handle
+pub enum ExternalImageSource<'a> {
+    RawData(&'a [u8]),      // raw buffers.
+    NativeTexture(u32),     // Is a gl::GLuint texture handle
 }
 
 /// The data that an external client should provide about
 /// an external image. The timestamp is used to test if
 /// the renderer should upload new texture data this
 /// frame. For instance, if providing video frames, the
 /// application could call wr.render() whenever a new
 /// video frame is ready. If the callback increments
 /// the returned timestamp for a given image, the renderer
 /// will know to re-upload the image data to the GPU.
 /// Note that the UV coords are supplied in texel-space!
-pub struct ExternalImage {
+pub struct ExternalImage<'a> {
     pub u0: f32,
     pub v0: f32,
     pub u1: f32,
     pub v1: f32,
-    pub source: ExternalImageSource,
+    pub source: ExternalImageSource<'a>,
 }
 
 /// The interfaces that an application can implement to support providing
 /// external image buffers.
 /// When the the application passes an external image to WR, it should kepp that
 /// external image life time untile the release() call.
 pub trait ExternalImageHandler {
     /// Lock the external image. Then, WR could start to read the image content.
@@ -1502,44 +1751,46 @@ pub trait ExternalImageHandler {
     fn lock(&mut self, key: ExternalImageId) -> ExternalImage;
     /// Unlock the external image. The WR should not read the image content
     /// after this call.
     fn unlock(&mut self, key: ExternalImageId);
     /// Tell the WR client that it could start to release this external image.
     fn release(&mut self, key: ExternalImageId);
 }
 
-#[derive(Clone, Debug)]
+#[derive(Debug)]
 pub struct RendererOptions {
     pub device_pixel_ratio: f32,
     pub resource_override_path: Option<PathBuf>,
     pub enable_aa: bool,
     pub enable_profiler: bool,
     pub debug: bool,
-    pub enable_recording: bool,
     pub enable_scrollbars: bool,
     pub precache_shaders: bool,
     pub renderer_kind: RendererKind,
     pub enable_subpixel_aa: bool,
     pub clear_framebuffer: bool,
     pub clear_color: ColorF,
     pub render_target_debug: bool,
+    pub workers: Option<Arc<Mutex<ThreadPool>>>,
+    pub recorder: Option<Box<ApiRecordingReceiver>>,
 }
 
 impl Default for RendererOptions {
     fn default() -> RendererOptions {
         RendererOptions {
             device_pixel_ratio: 1.0,
             resource_override_path: None,
             enable_aa: true,
             enable_profiler: false,
             debug: false,
-            enable_recording: false,
             enable_scrollbars: false,
             precache_shaders: false,
             renderer_kind: RendererKind::Native,
             enable_subpixel_aa: false,
             clear_framebuffer: true,
             clear_color: ColorF::new(1.0, 1.0, 1.0, 1.0),
             render_target_debug: false,
+            workers: None,
+            recorder: None,
         }
     }
 }
--- a/gfx/webrender/src/resource_cache.rs
+++ b/gfx/webrender/src/resource_cache.rs
@@ -10,38 +10,40 @@ use internal_types::{ExternalImageUpdate
 use platform::font::{FontContext, RasterizedGlyph};
 use std::cell::RefCell;
 use std::collections::{HashMap, HashSet};
 use std::collections::hash_map::Entry::{self, Occupied, Vacant};
 use std::fmt::Debug;
 use std::hash::BuildHasherDefault;
 use std::hash::Hash;
 use std::mem;
-use std::sync::{Arc, Barrier};
+use std::sync::{Arc, Barrier, Mutex};
 use std::sync::mpsc::{channel, Receiver, Sender};
 use std::thread;
 use texture_cache::{TextureCache, TextureCacheItemId};
+use thread_profiler::register_thread_with_profiler;
 use webrender_traits::{Epoch, FontKey, GlyphKey, ImageKey, ImageFormat, ImageRendering};
 use webrender_traits::{FontRenderMode, ImageData, GlyphDimensions, WebGLContextId};
 use webrender_traits::{DevicePoint, DeviceIntSize, ImageDescriptor, ColorF};
-use webrender_traits::ExternalImageId;
+use webrender_traits::{ExternalImageId, GlyphOptions, GlyphInstance};
 use threadpool::ThreadPool;
+use euclid::Point2D;
 
 thread_local!(pub static FONT_CONTEXT: RefCell<FontContext> = RefCell::new(FontContext::new()));
 
 type GlyphCache = ResourceClassCache<RenderedGlyphKey, Option<TextureCacheItemId>>;
 
 /// Message sent from the resource cache to the glyph cache thread.
 enum GlyphCacheMsg {
     /// Begin the frame - pass ownership of the glyph cache to the thread.
     BeginFrame(FrameId, GlyphCache),
     /// Add a new font.
     AddFont(FontKey, FontTemplate),
     /// Request glyphs for a text run.
-    RequestGlyphs(FontKey, Au, ColorF, Vec<u32>, FontRenderMode),
+    RequestGlyphs(FontKey, Au, ColorF, Vec<GlyphInstance>, FontRenderMode, Option<GlyphOptions>),
     /// Finished requesting glyphs. Reply with new glyphs.
     EndFrame,
 }
 
 /// Results send from glyph cache thread back to main resource cache.
 enum GlyphCacheResultMsg {
     /// Return the glyph cache, and a list of newly rasterized glyphs.
     EndFrame(GlyphCache, Vec<GlyphRasterJob>),
@@ -61,27 +63,32 @@ pub struct CacheItem {
     pub uv0: DevicePoint,
     pub uv1: DevicePoint,
 }
 
 #[derive(Clone, Hash, PartialEq, Eq, Debug, Ord, PartialOrd)]
 pub struct RenderedGlyphKey {
     pub key: GlyphKey,
     pub render_mode: FontRenderMode,
+    pub glyph_options: Option<GlyphOptions>,
 }
 
 impl RenderedGlyphKey {
     pub fn new(font_key: FontKey,
                size: Au,
                color: ColorF,
                index: u32,
-               render_mode: FontRenderMode) -> RenderedGlyphKey {
+               point: Point2D<f32>,
+               render_mode: FontRenderMode,
+               glyph_options: Option<GlyphOptions>) -> RenderedGlyphKey {
         RenderedGlyphKey {
-            key: GlyphKey::new(font_key, size, color, index),
+            key: GlyphKey::new(font_key, size, color, index,
+                               point, render_mode),
             render_mode: render_mode,
+            glyph_options: glyph_options,
         }
     }
 }
 
 pub struct ImageProperties {
     pub descriptor: ImageDescriptor,
     pub external_id: Option<ExternalImageId>,
 }
@@ -201,18 +208,19 @@ pub struct ResourceCache {
     pending_image_requests: Vec<ImageRequest>,
     glyph_cache_tx: Sender<GlyphCacheMsg>,
     glyph_cache_result_queue: Receiver<GlyphCacheResultMsg>,
     pending_external_image_update_list: ExternalImageUpdateList,
 }
 
 impl ResourceCache {
     pub fn new(texture_cache: TextureCache,
+               workers: Arc<Mutex<ThreadPool>>,
                enable_aa: bool) -> ResourceCache {
-        let (glyph_cache_tx, glyph_cache_result_queue) = spawn_glyph_cache_thread();
+        let (glyph_cache_tx, glyph_cache_result_queue) = spawn_glyph_cache_thread(workers);
 
         ResourceCache {
             cached_glyphs: Some(ResourceClassCache::new()),
             cached_images: ResourceClassCache::new(),
             webgl_textures: HashMap::with_hasher(Default::default()),
             font_templates: HashMap::with_hasher(Default::default()),
             image_templates: HashMap::with_hasher(Default::default()),
             cached_glyph_dimensions: HashMap::with_hasher(Default::default()),
@@ -252,17 +260,17 @@ impl ResourceCache {
     pub fn update_image_template(&mut self,
                                  image_key: ImageKey,
                                  descriptor: ImageDescriptor,
                                  bytes: Vec<u8>) {
         let next_epoch = match self.image_templates.get(&image_key) {
             Some(image) => {
                 // This image should not be an external image.
                 match image.data {
-                    ImageData::External(id) => {
+                    ImageData::ExternalHandle(id) => {
                         panic!("Update an external image with buffer, id={} image_key={:?}", id.0, image_key);
                     },
                     _ => {},
                 }
 
                 let Epoch(current_epoch) = image.epoch;
                 Epoch(current_epoch + 1)
             }
@@ -281,17 +289,17 @@ impl ResourceCache {
     }
 
     pub fn delete_image_template(&mut self, image_key: ImageKey) {
         let value = self.image_templates.remove(&image_key);
 
         // If the key is associated to an external image, pass the external id to renderer for cleanup.
         if let Some(image) = value {
             match image.data {
-                ImageData::External(id) => {
+                ImageData::ExternalHandle(id) => {
                     self.pending_external_image_update_list.push(id);
                 },
                 _ => {},
             }
 
             return;
         }
 
@@ -322,57 +330,64 @@ impl ResourceCache {
             rendering: rendering,
         });
     }
 
     pub fn request_glyphs(&mut self,
                           key: FontKey,
                           size: Au,
                           color: ColorF,
-                          glyph_indices: &[u32],
-                          render_mode: FontRenderMode) {
+                          glyph_instances: &[GlyphInstance],
+                          render_mode: FontRenderMode,
+                          glyph_options: Option<GlyphOptions>) {
         debug_assert!(self.state == State::AddResources);
         let render_mode = self.get_glyph_render_mode(render_mode);
         // Immediately request that the glyph cache thread start
         // rasterizing glyphs from this request if they aren't
         // already cached.
         let msg = GlyphCacheMsg::RequestGlyphs(key,
                                                size,
                                                color,
-                                               glyph_indices.to_vec(),
-                                               render_mode);
+                                               glyph_instances.to_vec(),
+                                               render_mode,
+                                               glyph_options);
         self.glyph_cache_tx.send(msg).unwrap();
     }
 
     pub fn pending_updates(&mut self) -> TextureUpdateList {
         self.texture_cache.pending_updates()
     }
 
     pub fn pending_external_image_updates(&mut self) -> ExternalImageUpdateList {
         mem::replace(&mut self.pending_external_image_update_list, ExternalImageUpdateList::new())
     }
 
     pub fn get_glyphs<F>(&self,
                          font_key: FontKey,
                          size: Au,
                          color: ColorF,
-                         glyph_indices: &[u32],
+                         glyph_instances: &[GlyphInstance],
                          render_mode: FontRenderMode,
+                         glyph_options: Option<GlyphOptions>,
                          mut f: F) -> SourceTexture where F: FnMut(usize, DevicePoint, DevicePoint) {
         debug_assert!(self.state == State::QueryResources);
         let cache = self.cached_glyphs.as_ref().unwrap();
         let render_mode = self.get_glyph_render_mode(render_mode);
         let mut glyph_key = RenderedGlyphKey::new(font_key,
                                                   size,
                                                   color,
                                                   0,
-                                                  render_mode);
+                                                  Point2D::new(0.0, 0.0),
+                                                  render_mode,
+                                                  glyph_options);
         let mut texture_id = None;
-        for (loop_index, glyph_index) in glyph_indices.iter().enumerate() {
-            glyph_key.key.index = *glyph_index;
+        for (loop_index, glyph_instance) in glyph_instances.iter().enumerate() {
+            glyph_key.key.index = glyph_instance.index;
+            glyph_key.key.subpixel_point.set_offset(glyph_instance.point, render_mode);
+
             let image_id = cache.get(&glyph_key, self.current_frame_id);
             let cache_item = image_id.map(|image_id| self.texture_cache.get(image_id));
             if let Some(cache_item) = cache_item {
                 let uv0 = DevicePoint::new(cache_item.pixel_rect.top_left.x as f32,
                                            cache_item.pixel_rect.top_left.y as f32);
                 let uv1 = DevicePoint::new(cache_item.pixel_rect.bottom_right.x as f32,
                                            cache_item.pixel_rect.bottom_right.y as f32);
                 f(loop_index, uv0, uv1);
@@ -399,19 +414,17 @@ impl ResourceCache {
                             font_context.add_raw_font(&glyph_key.font_key, &**bytes);
                         }
                         FontTemplate::Native(ref native_font_handle) => {
                             font_context.add_native_font(&glyph_key.font_key,
                                                          (*native_font_handle).clone());
                         }
                     }
 
-                    dimensions = font_context.get_glyph_dimensions(glyph_key.font_key,
-                                                                   glyph_key.size,
-                                                                   glyph_key.index);
+                    dimensions = font_context.get_glyph_dimensions(glyph_key);
                 });
 
                 *entry.insert(dimensions)
             }
         }
     }
 
     #[inline]
@@ -433,18 +446,19 @@ impl ResourceCache {
                                   item.pixel_rect.bottom_right.y as f32),
         }
     }
 
     pub fn get_image_properties(&self, image_key: ImageKey) -> ImageProperties {
         let image_template = &self.image_templates[&image_key];
 
         let external_id = match image_template.data {
-            ImageData::External(id) => Some(id),
-            ImageData::Raw(..) => None,
+            ImageData::ExternalHandle(id) => Some(id),
+            // raw and externalBuffer are all use resource_cache.
+            ImageData::Raw(..) | ImageData::ExternalBuffer(..) => None,
         };
 
         ImageProperties {
             descriptor: image_template.descriptor,
             external_id: external_id,
         }
     }
 
@@ -469,16 +483,18 @@ impl ResourceCache {
         debug_assert!(self.state == State::Idle);
         self.state = State::AddResources;
         self.current_frame_id = frame_id;
         let glyph_cache = self.cached_glyphs.take().unwrap();
         self.glyph_cache_tx.send(GlyphCacheMsg::BeginFrame(frame_id, glyph_cache)).ok();
     }
 
     pub fn block_until_all_resources_added(&mut self) {
+        profile_scope!("block_until_all_resources_added");
+
         debug_assert!(self.state == State::AddResources);
         self.state = State::QueryResources;
 
         // Tell the glyph cache thread that all glyphs have been requested
         // and block, waiting for any pending glyphs to be rasterized. In the
         // future, we will expand this to have a timeout. If the glyph rasterizing
         // takes longer than the timeout, then we will select the best glyphs
         // available in the cache, render with those, and then re-render at
@@ -502,17 +518,17 @@ impl ResourceCache {
                                                           ImageDescriptor {
                                                               width: glyph.width,
                                                               height: glyph.height,
                                                               stride: None,
                                                               format: ImageFormat::RGBA8,
                                                               is_opaque: false,
                                                           },
                                                           TextureFilter::Linear,
-                                                          Arc::new(glyph.bytes));
+                                                          ImageData::Raw(Arc::new(glyph.bytes)));
                                 Some(image_id)
                             } else {
                                 None
                             }
                         });
 
                         cache.insert(job.key, image_id, self.current_frame_id);
                     }
@@ -521,50 +537,51 @@ impl ResourceCache {
                     break;
                 }
             }
         }
 
         for request in self.pending_image_requests.drain(..) {
             let cached_images = &mut self.cached_images;
             let image_template = &self.image_templates[&request.key];
+            let image_data = image_template.data.clone();
 
             match image_template.data {
-                ImageData::External(..) => {}
-                ImageData::Raw(ref bytes) => {
+                ImageData::ExternalHandle(..) => {
+                    // external handle doesn't need to update the texture_cache.
+                }
+                ImageData::Raw(..) | ImageData::ExternalBuffer(..) => {
                     match cached_images.entry(request.clone(), self.current_frame_id) {
                         Occupied(entry) => {
                             let image_id = entry.get().texture_cache_id;
 
                             if entry.get().epoch != image_template.epoch {
-                                // TODO: Can we avoid the clone of the bytes here?
                                 self.texture_cache.update(image_id,
                                                           image_template.descriptor,
-                                                          bytes.clone());
+                                                          image_data);
 
                                 // Update the cached epoch
                                 *entry.into_mut() = CachedImageInfo {
                                     texture_cache_id: image_id,
                                     epoch: image_template.epoch,
                                 };
                             }
                         }
                         Vacant(entry) => {
                             let image_id = self.texture_cache.new_item_id();
 
                             let filter = match request.rendering {
                                 ImageRendering::Pixelated => TextureFilter::Nearest,
                                 ImageRendering::Auto | ImageRendering::CrispEdges => TextureFilter::Linear,
                             };
 
-                            // TODO: Can we avoid the clone of the bytes here?
                             self.texture_cache.insert(image_id,
                                                       image_template.descriptor,
                                                       filter,
-                                                      bytes.clone());
+                                                      image_data);
 
                             entry.insert(CachedImageInfo {
                                 texture_cache_id: image_id,
                                 epoch: image_template.epoch,
                             });
                         }
                     }
                 }
@@ -603,59 +620,74 @@ impl Resource for Option<TextureCacheIte
 }
 
 impl Resource for CachedImageInfo {
     fn texture_cache_item_id(&self) -> Option<TextureCacheItemId> {
         Some(self.texture_cache_id)
     }
 }
 
-fn spawn_glyph_cache_thread() -> (Sender<GlyphCacheMsg>, Receiver<GlyphCacheResultMsg>) {
+fn spawn_glyph_cache_thread(workers: Arc<Mutex<ThreadPool>>) -> (Sender<GlyphCacheMsg>, Receiver<GlyphCacheResultMsg>) {
+    let worker_count = {
+        workers.lock().unwrap().max_count()
+    };
     // Used for messages from resource cache -> glyph cache thread.
     let (msg_tx, msg_rx) = channel();
     // Used for returning results from glyph cache thread -> resource cache.
     let (result_tx, result_rx) = channel();
     // Used for rasterizer worker threads to send glyphs -> glyph cache thread.
     let (glyph_tx, glyph_rx) = channel();
 
     thread::Builder::new().name("GlyphCache".to_string()).spawn(move|| {
-        // TODO(gw): Use a heuristic to select best # of worker threads.
-        let worker_count = 4;
-        let thread_pool = ThreadPool::new(worker_count);
-
         let mut glyph_cache = None;
         let mut current_frame_id = FrameId(0);
 
+        register_thread_with_profiler("GlyphCache".to_string());
+
+        let barrier = Arc::new(Barrier::new(worker_count));
+        for i in 0..worker_count {
+            let barrier = barrier.clone();
+            workers.lock().unwrap().execute(move || {
+                register_thread_with_profiler(format!("Glyph Worker {}", i));
+                barrier.wait();
+            });
+        }
+
         // Maintain a set of glyphs that have been requested this
         // frame. This ensures the glyph thread won't rasterize
         // the same glyph more than once in a frame. This is required
         // because the glyph cache hash table is not updated
         // until the glyph cache is passed back to the resource
         // cache which is able to add the items to the texture cache.
         let mut pending_glyphs = HashSet::new();
 
         while let Ok(msg) = msg_rx.recv() {
+            profile_scope!("handle_msg");
             match msg {
                 GlyphCacheMsg::BeginFrame(frame_id, cache) => {
+                    profile_scope!("BeginFrame");
+
                     // We are beginning a new frame. Take ownership of the glyph
                     // cache hash map, so we can easily see which glyph requests
                     // actually need to be rasterized.
                     current_frame_id = frame_id;
                     glyph_cache = Some(cache);
                 }
                 GlyphCacheMsg::AddFont(font_key, font_template) => {
+                    profile_scope!("AddFont");
+
                     // Add a new font to the font context in each worker thread.
                     // Use a barrier to ensure that each worker in the pool handles
                     // one of these messages, to ensure that the new font gets
                     // added to each worker thread.
                     let barrier = Arc::new(Barrier::new(worker_count));
                     for _ in 0..worker_count {
                         let barrier = barrier.clone();
                         let font_template = font_template.clone();
-                        thread_pool.execute(move || {
+                        workers.lock().unwrap().execute(move || {
                             FONT_CONTEXT.with(|font_context| {
                                 let mut font_context = font_context.borrow_mut();
                                 match font_template {
                                     FontTemplate::Raw(ref bytes) => {
                                         font_context.add_raw_font(&font_key, &**bytes);
                                     }
                                     FontTemplate::Native(ref native_font_handle) => {
                                         font_context.add_native_font(&font_key,
@@ -663,50 +695,55 @@ fn spawn_glyph_cache_thread() -> (Sender
                                     }
                                 }
                             });
 
                             barrier.wait();
                         });
                     }
                 }
-                GlyphCacheMsg::RequestGlyphs(key, size, color, indices, render_mode) => {
+                GlyphCacheMsg::RequestGlyphs(key, size, color, glyph_instances, render_mode, glyph_options) => {
+                    profile_scope!("RequestGlyphs");
+
                     // Request some glyphs for a text run.
                     // For any glyph that isn't currently in the cache,
                     // immeediately push a job to the worker thread pool
                     // to start rasterizing this glyph now!
                     let glyph_cache = glyph_cache.as_mut().unwrap();
 
-                    for glyph_index in indices {
+                    for glyph_instance in glyph_instances {
                         let glyph_key = RenderedGlyphKey::new(key,
                                                               size,
                                                               color,
-                                                              glyph_index,
-                                                              render_mode);
+                                                              glyph_instance.index,
+                                                              glyph_instance.point,
+                                                              render_mode,
+                                                              glyph_options);
 
                         glyph_cache.mark_as_needed(&glyph_key, current_frame_id);
                         if !glyph_cache.contains_key(&glyph_key) &&
                            !pending_glyphs.contains(&glyph_key) {
                             let glyph_tx = glyph_tx.clone();
                             pending_glyphs.insert(glyph_key.clone());
-                            thread_pool.execute(move || {
+                            workers.lock().unwrap().execute(move || {
+                                profile_scope!("glyph");
                                 FONT_CONTEXT.with(move |font_context| {
                                     let mut font_context = font_context.borrow_mut();
-                                    let result = font_context.rasterize_glyph(glyph_key.key.font_key,
-                                                                              glyph_key.key.size,
-                                                                              glyph_key.key.color,
-                                                                              glyph_key.key.index,
-                                                                              render_mode);
+                                    let result = font_context.rasterize_glyph(&glyph_key.key,
+                                                                              render_mode,
+                                                                              glyph_options);
                                     glyph_tx.send((glyph_key, result)).unwrap();
                                 });
                             });
                         }
                     }
                 }
                 GlyphCacheMsg::EndFrame => {
+                    profile_scope!("EndFrame");
+
                     // The resource cache has finished requesting glyphs. Block
                     // on completion of any pending glyph rasterizing jobs, and then
                     // return the list of new glyphs to the resource cache.
                     let cache = glyph_cache.take().unwrap();
                     let mut rasterized_glyphs = Vec::new();
                     while !pending_glyphs.is_empty() {
                         let (key, glyph) = glyph_rx.recv()
                                                    .expect("BUG: Should be glyphs pending!");
--- a/gfx/webrender/src/scene.rs
+++ b/gfx/webrender/src/scene.rs
@@ -2,44 +2,108 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use fnv::FnvHasher;
 use std::collections::HashMap;
 use std::hash::BuildHasherDefault;
 use tiling::AuxiliaryListsMap;
 use webrender_traits::{AuxiliaryLists, BuiltDisplayList, PipelineId, Epoch, ColorF};
-use webrender_traits::{DisplayItem, LayerSize};
+use webrender_traits::{DisplayItem, DynamicProperties, LayerSize, LayoutTransform};
+use webrender_traits::{PropertyBinding, PropertyBindingId};
+
+/// Stores a map of the animated property bindings for the current display list. These
+/// can be used to animate the transform and/or opacity of a display list without
+/// re-submitting the display list itself.
+pub struct SceneProperties {
+    transform_properties: HashMap<PropertyBindingId, LayoutTransform>,
+    float_properties: HashMap<PropertyBindingId, f32>,
+}
+
+impl SceneProperties {
+    pub fn new() -> SceneProperties {
+        SceneProperties {
+            transform_properties: HashMap::with_hasher(Default::default()),
+            float_properties: HashMap::with_hasher(Default::default()),
+        }
+    }
+
+    /// Set the current property list for this display list.
+    pub fn set_properties(&mut self, properties: DynamicProperties) {
+        self.transform_properties.clear();
+        self.float_properties.clear();
+
+        for property in properties.transforms {
+            self.transform_properties.insert(property.key.id, property.value);
+        }
+
+        for property in properties.floats {
+            self.float_properties.insert(property.key.id, property.value);
+        }
+    }
+
+    /// Get the current value for a transform property.
+    pub fn resolve_layout_transform(&self, property: &PropertyBinding<LayoutTransform>) -> LayoutTransform {
+        match *property {
+            PropertyBinding::Value(matrix) => matrix,
+            PropertyBinding::Binding(ref key) => {
+                self.transform_properties
+                    .get(&key.id)
+                    .cloned()
+                    .unwrap_or_else(|| {
+                        warn!("Property binding {:?} has an invalid value.", key);
+                        LayoutTransform::identity()
+                    })
+            }
+        }
+    }
+
+    /// Get the current value for a float property.
+    pub fn resolve_float(&self, property: &PropertyBinding<f32>, default_value: f32) -> f32 {
+        match *property {
+            PropertyBinding::Value(value) => value,
+            PropertyBinding::Binding(ref key) => {
+                self.float_properties
+                    .get(&key.id)
+                    .cloned()
+                    .unwrap_or_else(|| {
+                        warn!("Property binding {:?} has an invalid value.", key);
+                        default_value
+                    })
+            }
+        }
+    }
+}
 
 /// A representation of the layout within the display port for a given document or iframe.
 #[derive(Debug)]
 pub struct ScenePipeline {
     pub pipeline_id: PipelineId,
     pub epoch: Epoch,
     pub viewport_size: LayerSize,
     pub background_color: Option<ColorF>,
 }
 
 /// A complete representation of the layout bundling visible pipelines together.
 pub struct Scene {
     pub root_pipeline_id: Optio