Bug 1410893 - Update webrender to commit 4b8493d6bdc64d2d83202ac15b06b0d4b14c6e76. r=jrmuizel
authorKartikaya Gupta <kgupta@mozilla.com>
Fri, 27 Oct 2017 08:51:39 -0400
changeset 388716 55f33f1364a73afb5840b524ac1b6252ff95ae9f
parent 388715 88a41df87dacd767dbbf392ec624917a4e3881d6
child 388717 0eb02890fae4944cbefc46863b4e83abbff8c343
push id54235
push userkgupta@mozilla.com
push dateFri, 27 Oct 2017 13:59:53 +0000
treeherderautoland@a9eb465811d0 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjrmuizel
bugs1410893
milestone58.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1410893 - Update webrender to commit 4b8493d6bdc64d2d83202ac15b06b0d4b14c6e76. r=jrmuizel MozReview-Commit-ID: DoUZXZtRyDY
gfx/doc/README.webrender
gfx/webrender/Cargo.toml
gfx/webrender/res/brush.glsl
gfx/webrender/res/brush_image.glsl
gfx/webrender/res/brush_mask.glsl
gfx/webrender/res/cs_blur.glsl
gfx/webrender/res/cs_clip_image.glsl
gfx/webrender/res/cs_clip_rectangle.glsl
gfx/webrender/res/cs_text_run.glsl
gfx/webrender/res/prim_shared.glsl
gfx/webrender/res/ps_angle_gradient.glsl
gfx/webrender/res/ps_border_corner.glsl
gfx/webrender/res/ps_border_edge.glsl
gfx/webrender/res/ps_cache_image.glsl
gfx/webrender/res/ps_gradient.glsl
gfx/webrender/res/ps_image.glsl
gfx/webrender/res/ps_line.glsl
gfx/webrender/res/ps_radial_gradient.glsl
gfx/webrender/res/ps_rectangle.glsl
gfx/webrender/res/ps_text_run.glsl
gfx/webrender/res/ps_yuv_image.glsl
gfx/webrender/res/shared.glsl
gfx/webrender/src/border.rs
gfx/webrender/src/box_shadow.rs
gfx/webrender/src/clip_scroll_tree.rs
gfx/webrender/src/frame.rs
gfx/webrender/src/frame_builder.rs
gfx/webrender/src/glyph_cache.rs
gfx/webrender/src/glyph_rasterizer.rs
gfx/webrender/src/gpu_types.rs
gfx/webrender/src/internal_types.rs
gfx/webrender/src/lib.rs
gfx/webrender/src/picture.rs
gfx/webrender/src/platform/macos/font.rs
gfx/webrender/src/platform/unix/font.rs
gfx/webrender/src/platform/windows/font.rs
gfx/webrender/src/prim_store.rs
gfx/webrender/src/render_backend.rs
gfx/webrender/src/render_task.rs
gfx/webrender/src/renderer.rs
gfx/webrender/src/resource_cache.rs
gfx/webrender/src/tiling.rs
gfx/webrender/tests/angle_shader_validation.rs
gfx/webrender_api/Cargo.toml
gfx/webrender_api/src/display_item.rs
gfx/webrender_api/src/display_list.rs
gfx/webrender_api/src/font.rs
gfx/webrender_bindings/Cargo.toml
--- a/gfx/doc/README.webrender
+++ b/gfx/doc/README.webrender
@@ -170,9 +170,9 @@ 2. Sometimes autoland tip has changed en
    has an env var you can set to do this). In theory you can get the same
    result by resolving the conflict manually but Cargo.lock files are usually not
    trivial to merge by hand. If it's just the third_party/rust dir that has conflicts
    you can delete it and run |mach vendor rust| again to repopulate it.
 
 -------------------------------------------------------------------------------
 
 The version of WebRender currently in the tree is:
-d741f472dd3d6c3441646f7bf4e714c71bea39b7
+4b8493d6bdc64d2d83202ac15b06b0d4b14c6e76
--- a/gfx/webrender/Cargo.toml
+++ b/gfx/webrender/Cargo.toml
@@ -1,11 +1,11 @@
 [package]
 name = "webrender"
-version = "0.53.0"
+version = "0.53.1"
 authors = ["Glenn Watson <gw@intuitionlibrary.com>"]
 license = "MPL-2.0"
 repository = "https://github.com/servo/webrender"
 build = "build.rs"
 
 [features]
 default = ["freetype-lib"]
 freetype-lib = ["freetype/servo-freetype-sys"]
--- a/gfx/webrender/res/brush.glsl
+++ b/gfx/webrender/res/brush.glsl
@@ -1,81 +1,127 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-varying vec2 vLocalPos;
-flat varying vec4 vLocalRect;
+#ifdef WR_VERTEX_SHADER
 
-#ifdef WR_VERTEX_SHADER
+void brush_vs(
+    int prim_address,
+    vec2 local_pos,
+    RectWithSize local_rect,
+    ivec2 user_data
+);
+
+// Whether this brush is being drawn on a Picture
+// task (new) or an alpha batch task (legacy).
+// Can be removed once everything uses pictures.
+#define BRUSH_FLAG_USES_PICTURE     (1 << 0)
 
 struct BrushInstance {
     int picture_address;
     int prim_address;
+    int layer_address;
+    int clip_address;
+    int z;
+    int flags;
+    ivec2 user_data;
 };
 
 BrushInstance load_brush() {
 	BrushInstance bi;
 
     bi.picture_address = aData0.x;
     bi.prim_address = aData0.y;
+    bi.layer_address = aData0.z;
+    bi.clip_address = aData0.w;
+    bi.z = aData1.x;
+    bi.flags = aData1.y;
+    bi.user_data = aData1.zw;
 
     return bi;
 }
 
-/*
- The dynamic picture that this brush exists on. Right now, it
- contains minimal information. In the future, it will describe
- the transform mode of primitives on this picture, among other things.
- */
-struct PictureTask {
-    RectWithSize target_rect;
-};
-
-PictureTask fetch_picture_task(int index) {
-    ivec2 uv = get_fetch_uv(index, VECS_PER_RENDER_TASK);
-
-    vec4 target_rect = TEXEL_FETCH(sRenderTasks, uv, 0, ivec2(0, 0));
-
-    PictureTask task = PictureTask(RectWithSize(target_rect.xy, target_rect.zw));
-
-    return task;
-}
-
 void main(void) {
     // Load the brush instance from vertex attributes.
     BrushInstance brush = load_brush();
 
-    // Fetch the dynamic picture that we are drawing on.
-    PictureTask pic_task = fetch_picture_task(brush.picture_address);
-
     // Load the geometry for this brush. For now, this is simply the
     // local rect of the primitive. In the future, this will support
     // loading segment rects, and other rect formats (glyphs).
     PrimitiveGeometry geom = fetch_primitive_geometry(brush.prim_address);
 
-    // Write the (p0,p1) form of the primitive rect and the local position
-    // of this vertex. Specific brush shaders can use this information to
-    // interpolate texture coordinates etc.
-    vLocalRect = vec4(geom.local_rect.p0, geom.local_rect.p0 + geom.local_rect.size);
+    vec2 device_pos, local_pos;
+    RectWithSize local_rect = geom.local_rect;
+
+    if ((brush.flags & BRUSH_FLAG_USES_PICTURE) != 0) {
+        // Fetch the dynamic picture that we are drawing on.
+        PictureTask pic_task = fetch_picture_task(brush.picture_address);
+
+        // Right now - pictures only support local positions. In the future, this
+        // will be expanded to support transform picture types (the common kind).
+        device_pos = pic_task.target_rect.p0 + aPosition.xy * pic_task.target_rect.size;
+        local_pos = aPosition.xy * pic_task.target_rect.size / uDevicePixelRatio;
+
+        // Write the final position transformed by the orthographic device-pixel projection.
+        gl_Position = uTransform * vec4(device_pos, 0.0, 1.0);
+    } else {
+        AlphaBatchTask alpha_task = fetch_alpha_batch_task(brush.picture_address);
+        Layer layer = fetch_layer(brush.layer_address);
+        ClipArea clip_area = fetch_clip_area(brush.clip_address);
 
-    // Right now - pictures only support local positions. In the future, this
-    // will be expanded to support transform picture types (the common kind).
-    vec2 pos = pic_task.target_rect.p0 + aPosition.xy * pic_task.target_rect.size;
-    vLocalPos = aPosition.xy * pic_task.target_rect.size / uDevicePixelRatio;
+        // Write the normal vertex information out.
+        // TODO(gw): Support transform types in brushes. For now,
+        //           the old cache image shader didn't support
+        //           them yet anyway, so we're not losing any
+        //           existing functionality.
+        VertexInfo vi = write_vertex(
+            geom.local_rect,
+            geom.local_clip_rect,
+            float(brush.z),
+            layer,
+            alpha_task,
+            geom.local_rect
+        );
+
+        local_pos = vi.local_pos;
+
+        // For brush instances in the alpha pass, always write
+        // out clip information.
+        // TODO(gw): It's possible that we might want alpha
+        //           shaders that don't clip in the future,
+        //           but it's reasonable to assume that one
+        //           implies the other, for now.
+#ifdef WR_FEATURE_ALPHA_PASS
+        write_clip(
+            vi.screen_pos,
+            clip_area
+        );
+#endif
+    }
 
     // Run the specific brush VS code to write interpolators.
-    brush_vs(brush.prim_address, vLocalRect);
-
-    // Write the final position transformed by the orthographic device-pixel projection.
-    gl_Position = uTransform * vec4(pos, 0.0, 1.0);
+    brush_vs(
+        brush.prim_address + VECS_PER_PRIM_HEADER,
+        local_pos,
+        local_rect,
+        brush.user_data
+    );
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
+
+vec4 brush_fs();
+
 void main(void) {
     // Run the specific brush FS code to output the color.
-    vec4 color = brush_fs(vLocalPos, vLocalRect);
+    vec4 color = brush_fs();
+
+#ifdef WR_FEATURE_ALPHA_PASS
+    // Apply the clip mask
+    color *= do_clip();
+#endif
 
     // TODO(gw): Handle pre-multiply common code here as required.
     oFragColor = color;
 }
 #endif
new file mode 100644
--- /dev/null
+++ b/gfx/webrender/res/brush_image.glsl
@@ -0,0 +1,62 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include shared,prim_shared,brush
+
+varying vec3 vUv;
+flat varying vec4 vUvBounds;
+
+#if defined WR_FEATURE_ALPHA_TARGET
+flat varying vec4 vColor;
+#endif
+
+#ifdef WR_VERTEX_SHADER
+void brush_vs(
+    int prim_address,
+    vec2 local_pos,
+    RectWithSize local_rect,
+    ivec2 user_data
+) {
+    // TODO(gw): For now, this brush_image shader is only
+    //           being used to draw items from the intermediate
+    //           surface cache (render tasks). In the future
+    //           we can expand this to support items from
+    //           the normal texture cache and unify this
+    //           with the normal image shader.
+    BlurTask task = fetch_blur_task(user_data.x);
+    vUv.z = task.render_target_layer_index;
+
+#if defined WR_FEATURE_COLOR_TARGET
+    vec2 texture_size = vec2(textureSize(sColor0, 0).xy);
+#else
+    vec2 texture_size = vec2(textureSize(sColor1, 0).xy);
+    vColor = task.color;
+#endif
+
+    vec2 uv0 = task.target_rect.p0;
+    vec2 uv1 = (task.target_rect.p0 + task.target_rect.size);
+
+    vec2 f = (local_pos - local_rect.p0) / local_rect.size;
+
+    vUv.xy = mix(uv0 / texture_size,
+                 uv1 / texture_size,
+                 f);
+
+    vUvBounds = vec4(uv0 + vec2(0.5), uv1 - vec2(0.5)) / texture_size.xyxy;
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+vec4 brush_fs() {
+    vec2 uv = clamp(vUv.xy, vUvBounds.xy, vUvBounds.zw);
+
+#if defined WR_FEATURE_COLOR_TARGET
+    vec4 color = texture(sColor0, vec3(uv, vUv.z));
+#else
+    vec4 color = vColor * texture(sColor1, vec3(uv, vUv.z)).r;
+#endif
+
+    return color;
+}
+#endif
--- a/gfx/webrender/res/brush_mask.glsl
+++ b/gfx/webrender/res/brush_mask.glsl
@@ -1,63 +1,77 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-#include shared,prim_shared,ellipse
+#include shared,prim_shared,ellipse,brush
 
 flat varying float vClipMode;
 flat varying vec4 vClipCenter_Radius_TL;
 flat varying vec4 vClipCenter_Radius_TR;
 flat varying vec4 vClipCenter_Radius_BR;
 flat varying vec4 vClipCenter_Radius_BL;
+flat varying vec4 vLocalRect;
+varying vec2 vLocalPos;
 
 #ifdef WR_VERTEX_SHADER
 
 struct BrushPrimitive {
     float clip_mode;
     vec2 radius_tl;
     vec2 radius_tr;
     vec2 radius_br;
     vec2 radius_bl;
 };
 
 BrushPrimitive fetch_brush_primitive(int address) {
     vec4 data[3] = fetch_from_resource_cache_3(address);
     return BrushPrimitive(data[0].x, data[1].xy, data[1].zw, data[2].xy, data[2].zw);
 }
 
-void brush_vs(int prim_address, vec4 prim_rect) {
+void brush_vs(
+    int prim_address,
+    vec2 local_pos,
+    RectWithSize local_rect,
+    ivec2 user_data
+) {
     // Load the specific primitive.
-    BrushPrimitive prim = fetch_brush_primitive(prim_address + 2);
+    BrushPrimitive prim = fetch_brush_primitive(prim_address);
 
     // Write clip parameters
     vClipMode = prim.clip_mode;
 
+    // TODO(gw): In the future, when brush primitives may be segment rects
+    //           we need to account for that here, and differentiate between
+    //           the segment rect (geometry) amd the primitive rect (which
+    //           defines where the clip radii are relative to).
+    vec4 prim_rect = vec4(local_rect.p0, local_rect.p0 + local_rect.size);
+
     vClipCenter_Radius_TL = vec4(prim_rect.xy + prim.radius_tl, prim.radius_tl);
     vClipCenter_Radius_TR = vec4(prim_rect.zy + vec2(-prim.radius_tr.x, prim.radius_tr.y), prim.radius_tr);
     vClipCenter_Radius_BR = vec4(prim_rect.zw - prim.radius_br, prim.radius_br);
     vClipCenter_Radius_BL = vec4(prim_rect.xw + vec2(prim.radius_bl.x, -prim.radius_bl.y), prim.radius_bl);
+
+    vLocalRect = prim_rect;
+    vLocalPos = local_pos;
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
-vec4 brush_fs(vec2 local_pos, vec4 local_rect) {
+vec4 brush_fs() {
     // TODO(gw): The mask code below is super-inefficient. Once we
     // start using primitive segments in brush shaders, this can
     // be made much faster.
     float d = 0.0;
     // Check if in valid clip region.
-    if (local_pos.x >= local_rect.x && local_pos.x < local_rect.z &&
-        local_pos.y >= local_rect.y && local_pos.y < local_rect.w) {
+    if (vLocalPos.x >= vLocalRect.x && vLocalPos.x < vLocalRect.z &&
+        vLocalPos.y >= vLocalRect.y && vLocalPos.y < vLocalRect.w) {
         // Apply ellipse clip on each corner.
-        d = rounded_rect(local_pos,
+        d = rounded_rect(vLocalPos,
                          vClipCenter_Radius_TL,
                          vClipCenter_Radius_TR,
                          vClipCenter_Radius_BR,
                          vClipCenter_Radius_BL);
     }
 
     return vec4(mix(d, 1.0 - d, vClipMode));
 }
 #endif
-
-#include brush
--- a/gfx/webrender/res/cs_blur.glsl
+++ b/gfx/webrender/res/cs_blur.glsl
@@ -24,23 +24,23 @@ in vec4 aBlurRegion;
 
 void main(void) {
     RenderTaskData task = fetch_render_task(aBlurRenderTaskAddress);
     RenderTaskData src_task = fetch_render_task(aBlurSourceTaskAddress);
 
     vec4 src_rect = src_task.data0;
     vec4 target_rect = task.data0;
 
-#if defined WR_FEATURE_COLOR
+#if defined WR_FEATURE_COLOR_TARGET
     vec2 texture_size = vec2(textureSize(sCacheRGBA8, 0).xy);
 #else
     vec2 texture_size = vec2(textureSize(sCacheA8, 0).xy);
 #endif
     vUv.z = src_task.data1.x;
-    vBlurRadius = 3 * int(task.data1.y);
+    vBlurRadius = int(3.0 * task.data1.y);
     vSigma = task.data1.y;
 
     switch (aBlurDirection) {
         case DIR_HORIZONTAL:
             vOffsetScale = vec2(1.0 / texture_size.x, 0.0);
             break;
         case DIR_VERTICAL:
             vOffsetScale = vec2(0.0, 1.0 / texture_size.y);
@@ -64,17 +64,17 @@ void main(void) {
     vUv.xy = mix(uv0, uv1, aPosition.xy);
 
     gl_Position = uTransform * vec4(pos, 0.0, 1.0);
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 
-#if defined WR_FEATURE_COLOR
+#if defined WR_FEATURE_COLOR_TARGET
 #define SAMPLE_TYPE vec4
 #define SAMPLE_TEXTURE(uv)  texture(sCacheRGBA8, uv)
 #else
 #define SAMPLE_TYPE float
 #define SAMPLE_TEXTURE(uv)  texture(sCacheA8, uv).r
 #endif
 
 // TODO(gw): Write a fast path blur that handles smaller blur radii
@@ -101,17 +101,17 @@ void main(void) {
     gauss_coefficient.y = exp(-0.5 / (vSigma * vSigma));
     gauss_coefficient.z = gauss_coefficient.y * gauss_coefficient.y;
 
     float gauss_coefficient_sum = 0.0;
     SAMPLE_TYPE avg_color = original_color * gauss_coefficient.x;
     gauss_coefficient_sum += gauss_coefficient.x;
     gauss_coefficient.xy *= gauss_coefficient.yz;
 
-    for (int i=1 ; i <= vBlurRadius/2 ; ++i) {
+    for (int i=1 ; i <= vBlurRadius ; ++i) {
         vec2 offset = vOffsetScale * float(i);
 
         vec2 st0 = clamp(vUv.xy - offset, vUvRect.xy, vUvRect.zw);
         avg_color += SAMPLE_TEXTURE(vec3(st0, vUv.z)) * gauss_coefficient.x;
 
         vec2 st1 = clamp(vUv.xy + offset, vUvRect.xy, vUvRect.zw);
         avg_color += SAMPLE_TEXTURE(vec3(st1, vUv.z)) * gauss_coefficient.x;
 
--- a/gfx/webrender/res/cs_clip_image.glsl
+++ b/gfx/webrender/res/cs_clip_image.glsl
@@ -51,11 +51,11 @@ void main(void) {
 
     bool repeat_mask = false; //TODO
     vec2 clamped_mask_uv = repeat_mask ? fract(vClipMaskUv.xy) :
         clamp(vClipMaskUv.xy, vec2(0.0, 0.0), vec2(1.0, 1.0));
     vec2 source_uv = clamp(clamped_mask_uv * vClipMaskUvRect.zw + vClipMaskUvRect.xy,
         vClipMaskUvInnerRect.xy, vClipMaskUvInnerRect.zw);
     float clip_alpha = texture(sColor0, vec3(source_uv, vLayer)).r; //careful: texture has type A8
 
-    oFragColor = vec4(min(alpha, clip_alpha), 1.0, 1.0, 1.0);
+    oFragColor = vec4(alpha * clip_alpha, 1.0, 1.0, 1.0);
 }
 #endif
--- a/gfx/webrender/res/cs_clip_rectangle.glsl
+++ b/gfx/webrender/res/cs_clip_rectangle.glsl
@@ -22,49 +22,40 @@ ClipRect fetch_clip_rect(ivec2 address) 
     return ClipRect(RectWithSize(data[0].xy, data[0].zw), data[1]);
 }
 
 struct ClipCorner {
     RectWithSize rect;
     vec4 outer_inner_radius;
 };
 
-ClipCorner fetch_clip_corner(ivec2 address) {
+// index is of type float instead of int because using an int led to shader
+// miscompilations with a macOS 10.12 Intel driver.
+ClipCorner fetch_clip_corner(ivec2 address, float index) {
+    address += ivec2(2 + 2 * int(index), 0);
     vec4 data[2] = fetch_from_resource_cache_2_direct(address);
     return ClipCorner(RectWithSize(data[0].xy, data[0].zw), data[1]);
 }
 
 struct ClipData {
     ClipRect rect;
     ClipCorner top_left;
     ClipCorner top_right;
     ClipCorner bottom_left;
     ClipCorner bottom_right;
 };
 
 ClipData fetch_clip(ivec2 address) {
     ClipData clip;
 
     clip.rect = fetch_clip_rect(address);
-
-    // Read the corners in groups of two texels, and adjust the read address
-    // before every read.
-    // The address adjustment is done inside this function, and not by passing
-    // the corner index to fetch_clip_corner and computing the correct address
-    // there, because doing so was hitting a driver bug on certain Intel macOS
-    // drivers which creates wrong results when doing arithmetic with integer
-    // variables (under certain, unknown, circumstances).
-    address.x += 2;
-    clip.top_left = fetch_clip_corner(address);
-    address.x += 2;
-    clip.top_right = fetch_clip_corner(address);
-    address.x += 2;
-    clip.bottom_left = fetch_clip_corner(address);
-    address.x += 2;
-    clip.bottom_right = fetch_clip_corner(address);
+    clip.top_left = fetch_clip_corner(address, 0.0);
+    clip.top_right = fetch_clip_corner(address, 1.0);
+    clip.bottom_left = fetch_clip_corner(address, 2.0);
+    clip.bottom_right = fetch_clip_corner(address, 3.0);
 
     return clip;
 }
 
 void main(void) {
     ClipMaskInstance cmi = fetch_clip_item();
     ClipArea area = fetch_clip_area(cmi.render_task_address);
     Layer layer = fetch_layer(cmi.layer_address);
@@ -106,16 +97,16 @@ void main(void) {
     vec2 local_pos = init_transform_fs(vPos, alpha);
 
     float clip_alpha = rounded_rect(local_pos,
                                     vClipCenter_Radius_TL,
                                     vClipCenter_Radius_TR,
                                     vClipCenter_Radius_BR,
                                     vClipCenter_Radius_BL);
 
-    float combined_alpha = min(alpha, clip_alpha);
+    float combined_alpha = alpha * clip_alpha;
 
     // Select alpha or inverse alpha depending on clip in/out.
     float final_alpha = mix(combined_alpha, 1.0 - combined_alpha, vClipMode);
 
     oFragColor = vec4(final_alpha, 0.0, 0.0, 1.0);
 }
 #endif
--- a/gfx/webrender/res/cs_text_run.glsl
+++ b/gfx/webrender/res/cs_text_run.glsl
@@ -1,64 +1,56 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+#define PRIMITIVE_HAS_PICTURE_TASK
+
 #include shared,prim_shared
 
 varying vec3 vUv;
 flat varying vec4 vColor;
 
 #ifdef WR_VERTEX_SHADER
 // Draw a text run to a cache target. These are always
 // drawn un-transformed. These are used for effects such
 // as text-shadow.
 
 void main(void) {
     Primitive prim = load_primitive();
     TextRun text = fetch_text_run(prim.specific_prim_address);
 
     int glyph_index = prim.user_data0;
     int resource_address = prim.user_data1;
-    int picture_address = prim.user_data2;
-
-    // Fetch the owning picture for this primitive. This allows the code
-    // below to normalize the glyph offsets relative to the original text
-    // shadow rect, which is the union of all elements that make up this
-    // text shadow. This allows the text shadow to be rendered at an
-    // arbitrary location in a render target (provided by the render
-    // task render_target_origin field).
-    PrimitiveGeometry shadow_geom = fetch_primitive_geometry(picture_address);
-    Picture pic = fetch_picture(picture_address + VECS_PER_PRIM_HEADER);
 
     Glyph glyph = fetch_glyph(prim.specific_prim_address,
                               glyph_index,
                               text.subpx_dir);
 
     GlyphResource res = fetch_glyph_resource(resource_address);
 
     // Glyphs size is already in device-pixels.
     // The render task origin is in device-pixels. Offset that by
     // the glyph offset, relative to its primitive bounding rect.
     vec2 size = (res.uv_rect.zw - res.uv_rect.xy) * res.scale;
     vec2 local_pos = glyph.offset + vec2(res.offset.x, -res.offset.y) / uDevicePixelRatio;
-    vec2 origin = prim.task.render_target_origin +
-                  uDevicePixelRatio * (local_pos + pic.offset - shadow_geom.local_rect.p0);
+    vec2 origin = prim.task.target_rect.p0 +
+                  uDevicePixelRatio * (local_pos - prim.task.content_origin);
     vec4 local_rect = vec4(origin, size);
 
     vec2 texture_size = vec2(textureSize(sColor0, 0));
     vec2 st0 = res.uv_rect.xy / texture_size;
     vec2 st1 = res.uv_rect.zw / texture_size;
 
     vec2 pos = mix(local_rect.xy,
                    local_rect.xy + local_rect.zw,
                    aPosition.xy);
 
     vUv = vec3(mix(st0, st1, aPosition.xy), res.layer);
-    vColor = pic.color;
+    vColor = prim.task.color;
 
     gl_Position = uTransform * vec4(pos, 0.0, 1.0);
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 void main(void) {
     float a = texture(sColor0, vUv).a;
--- a/gfx/webrender/res/prim_shared.glsl
+++ b/gfx/webrender/res/prim_shared.glsl
@@ -189,16 +189,57 @@ RenderTaskData fetch_render_task(int ind
 
     task.data0 = TEXEL_FETCH(sRenderTasks, uv, 0, ivec2(0, 0));
     task.data1 = TEXEL_FETCH(sRenderTasks, uv, 0, ivec2(1, 0));
     task.data2 = TEXEL_FETCH(sRenderTasks, uv, 0, ivec2(2, 0));
 
     return task;
 }
 
+/*
+ The dynamic picture that this brush exists on. Right now, it
+ contains minimal information. In the future, it will describe
+ the transform mode of primitives on this picture, among other things.
+ */
+struct PictureTask {
+    RectWithSize target_rect;
+    float render_target_layer_index;
+    vec2 content_origin;
+    vec4 color;
+};
+
+PictureTask fetch_picture_task(int address) {
+    RenderTaskData task_data = fetch_render_task(address);
+
+    return PictureTask(
+        RectWithSize(task_data.data0.xy, task_data.data0.zw),
+        task_data.data1.x,
+        task_data.data1.yz,
+        task_data.data2
+    );
+}
+
+struct BlurTask {
+    RectWithSize target_rect;
+    float render_target_layer_index;
+    float blur_radius;
+    vec4 color;
+};
+
+BlurTask fetch_blur_task(int address) {
+    RenderTaskData task_data = fetch_render_task(address);
+
+    return BlurTask(
+        RectWithSize(task_data.data0.xy, task_data.data0.zw),
+        task_data.data1.x,
+        task_data.data1.y,
+        task_data.data2
+    );
+}
+
 struct AlphaBatchTask {
     vec2 screen_space_origin;
     vec2 render_target_origin;
     vec2 size;
     float render_target_layer_index;
 };
 
 AlphaBatchTask fetch_alpha_batch_task(int index) {
@@ -354,17 +395,21 @@ CompositeInstance fetch_composite_instan
     ci.user_data1 = aData1.y;
 
     return ci;
 }
 
 struct Primitive {
     Layer layer;
     ClipArea clip_area;
+#ifdef PRIMITIVE_HAS_PICTURE_TASK
+    PictureTask task;
+#else
     AlphaBatchTask task;
+#endif
     RectWithSize local_rect;
     RectWithSize local_clip_rect;
     int specific_prim_address;
     int user_data0;
     int user_data1;
     int user_data2;
     float z;
 };
@@ -382,17 +427,21 @@ PrimitiveGeometry fetch_primitive_geomet
 
 Primitive load_primitive() {
     PrimitiveInstance pi = fetch_prim_instance();
 
     Primitive prim;
 
     prim.layer = fetch_layer(pi.layer_index);
     prim.clip_area = fetch_clip_area(pi.clip_task_index);
+#ifdef PRIMITIVE_HAS_PICTURE_TASK
+    prim.task = fetch_picture_task(pi.render_task_index);
+#else
     prim.task = fetch_alpha_batch_task(pi.render_task_index);
+#endif
 
     PrimitiveGeometry geom = fetch_primitive_geometry(pi.prim_address);
     prim.local_rect = geom.local_rect;
     prim.local_clip_rect = geom.local_clip_rect;
 
     prim.specific_prim_address = pi.specific_prim_address;
     prim.user_data0 = pi.user_data0;
     prim.user_data1 = pi.user_data1;
@@ -654,27 +703,16 @@ struct Rectangle {
     vec4 color;
 };
 
 Rectangle fetch_rectangle(int address) {
     vec4 data = fetch_from_resource_cache_1(address);
     return Rectangle(data);
 }
 
-struct Picture {
-    vec4 color;
-    vec2 offset;
-    float blur_radius;
-};
-
-Picture fetch_picture(int address) {
-    vec4 data[2] = fetch_from_resource_cache_2(address);
-    return Picture(data[0], data[1].xy, data[1].z);
-}
-
 struct TextRun {
     vec4 color;
     vec2 offset;
     int subpx_dir;
 };
 
 TextRun fetch_text_run(int address) {
     vec4 data[2] = fetch_from_resource_cache_2(address);
--- a/gfx/webrender/res/ps_angle_gradient.glsl
+++ b/gfx/webrender/res/ps_angle_gradient.glsl
@@ -58,22 +58,11 @@ void main(void) {
     }
 
     float offset = dot(pos - vStartPoint, vScaledDir);
 
     vec4 color = sample_gradient(vGradientAddress,
                                  offset,
                                  vGradientRepeat);
 
-    // Un-premultiply the color from sampling the gradient.
-    if (color.a > 0.0) {
-        color.rgb /= color.a;
-
-        // Apply the clip mask
-        color.a = min(color.a, do_clip());
-
-        // Pre-multiply the result.
-        color.rgb *= color.a;
-    }
-
-    oFragColor = color;
+    oFragColor = color * do_clip();
 }
 #endif
--- a/gfx/webrender/res/ps_border_corner.glsl
+++ b/gfx/webrender/res/ps_border_corner.glsl
@@ -105,20 +105,20 @@ void write_color(vec4 color0, vec4 color
         case SIDE_FIRST:
             color0.a = 0.0;
             break;
         case SIDE_SECOND:
             color1.a = 0.0;
             break;
     }
 
-    vColor00 = vec4(clamp(color0.rgb * modulate.x, vec3(0.0), vec3(1.0)), color0.a);
-    vColor01 = vec4(clamp(color0.rgb * modulate.y, vec3(0.0), vec3(1.0)), color0.a);
-    vColor10 = vec4(clamp(color1.rgb * modulate.z, vec3(0.0), vec3(1.0)), color1.a);
-    vColor11 = vec4(clamp(color1.rgb * modulate.w, vec3(0.0), vec3(1.0)), color1.a);
+    vColor00 = vec4(clamp(color0.rgb * modulate.x, vec3(0.0), vec3(color0.a)), color0.a);
+    vColor01 = vec4(clamp(color0.rgb * modulate.y, vec3(0.0), vec3(color0.a)), color0.a);
+    vColor10 = vec4(clamp(color1.rgb * modulate.z, vec3(0.0), vec3(color1.a)), color1.a);
+    vColor11 = vec4(clamp(color1.rgb * modulate.w, vec3(0.0), vec3(color1.a)), color1.a);
 }
 
 int select_style(int color_select, vec2 fstyle) {
     ivec2 style = ivec2(fstyle);
 
     switch (color_select) {
         case SIDE_BOTH:
         {
@@ -317,17 +317,17 @@ void main(void) {
     float alpha = 1.0;
 #ifdef WR_FEATURE_TRANSFORM
     alpha = 0.0;
     vec2 local_pos = init_transform_fs(vLocalPos, alpha);
 #else
     vec2 local_pos = vLocalPos;
 #endif
 
-    alpha = min(alpha, do_clip());
+    alpha *= do_clip();
 
     float aa_range = compute_aa_range(local_pos);
 
     float distance_for_color;
     float color_mix_factor;
 
     // Only apply the clip AA if inside the clip region. This is
     // necessary for correctness when the border width is greater
@@ -394,11 +394,11 @@ void main(void) {
     vec4 color1 = mix(vColor10, vColor11, color_mix_factor);
 
     // Select color based on side of line. Get distance from the
     // reference line, and then apply AA along the edge.
     float ld = distance_to_line(vColorEdgeLine.xy, vColorEdgeLine.zw, local_pos);
     float m = distance_aa(aa_range, -ld);
     vec4 color = mix(color0, color1, m);
 
-    oFragColor = color * vec4(1.0, 1.0, 1.0, alpha);
+    oFragColor = color * alpha;
 }
 #endif
--- a/gfx/webrender/res/ps_border_edge.glsl
+++ b/gfx/webrender/res/ps_border_edge.glsl
@@ -70,17 +70,17 @@ void write_color0(vec4 color, float styl
             modulate = flip ? vec2(0.7, 1.3) : vec2(1.3, 0.7);
             break;
         }
         default:
             modulate = vec2(1.0);
             break;
     }
 
-    vColor0 = vec4(color.rgb * modulate.x, color.a);
+    vColor0 = vec4(min(color.rgb * modulate.x, vec3(color.a)), color.a);
 }
 
 void write_color1(vec4 color, float style, bool flip) {
     vec2 modulate;
 
     switch (int(style)) {
         case BORDER_STYLE_GROOVE:
         {
@@ -92,17 +92,17 @@ void write_color1(vec4 color, float styl
             modulate = flip ? vec2(0.7, 1.3) : vec2(1.3, 0.7);
             break;
         }
         default:
             modulate = vec2(1.0);
             break;
     }
 
-    vColor1 = vec4(color.rgb * modulate.y, color.a);
+    vColor1 = vec4(min(color.rgb * modulate.y, vec3(color.a)), color.a);
 }
 
 void write_clip_params(float style,
                        float border_width,
                        float edge_length,
                        float edge_offset,
                        float center_line) {
     // x = offset
@@ -245,17 +245,17 @@ void main(void) {
     float alpha = 1.0;
 #ifdef WR_FEATURE_TRANSFORM
     alpha = 0.0;
     vec2 local_pos = init_transform_fs(vLocalPos, alpha);
 #else
     vec2 local_pos = vLocalPos;
 #endif
 
-    alpha = min(alpha, do_clip());
+    alpha *= do_clip();
 
     // Find the appropriate distance to apply the step over.
     float aa_range = compute_aa_range(local_pos);
 
     // Applies the math necessary to draw a style: double
     // border. In the case of a solid border, the vertex
     // shader sets interpolator values that make this have
     // no effect.
@@ -290,11 +290,11 @@ void main(void) {
     // Get the dot alpha
     vec2 dot_relative_pos = vec2(x, pos.x) - vClipParams.zw;
     float dot_distance = length(dot_relative_pos) - vClipParams.z;
     float dot_alpha = distance_aa(aa_range, dot_distance);
 
     // Select between dot/dash alpha based on clip mode.
     alpha = min(alpha, mix(dash_alpha, dot_alpha, vClipSelect));
 
-    oFragColor = color * vec4(1.0, 1.0, 1.0, alpha);
+    oFragColor = color * alpha;
 }
 #endif
deleted file mode 100644
--- a/gfx/webrender/res/ps_cache_image.glsl
+++ /dev/null
@@ -1,76 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#include shared,prim_shared
-
-varying vec3 vUv;
-flat varying vec4 vUvBounds;
-
-#if defined WR_FEATURE_ALPHA
-flat varying vec4 vColor;
-#endif
-
-#ifdef WR_VERTEX_SHADER
-// Draw a cached primitive (e.g. a blurred text run) from the
-// target cache to the framebuffer, applying tile clip boundaries.
-
-void main(void) {
-    Primitive prim = load_primitive();
-
-    VertexInfo vi = write_vertex(prim.local_rect,
-                                 prim.local_clip_rect,
-                                 prim.z,
-                                 prim.layer,
-                                 prim.task,
-                                 prim.local_rect);
-
-    RenderTaskData child_task = fetch_render_task(prim.user_data1);
-    vUv.z = child_task.data1.x;
-
-#if defined WR_FEATURE_COLOR
-    vec2 texture_size = vec2(textureSize(sColor0, 0).xy);
-#else
-    Picture pic = fetch_picture(prim.specific_prim_address);
-
-    vec2 texture_size = vec2(textureSize(sColor1, 0).xy);
-    vColor = pic.color;
-#endif
-    vec2 uv0 = child_task.data0.xy;
-    vec2 uv1 = (child_task.data0.xy + child_task.data0.zw);
-
-    vec2 f = (vi.local_pos - prim.local_rect.p0) / prim.local_rect.size;
-
-    vUv.xy = mix(uv0 / texture_size,
-                 uv1 / texture_size,
-                 f);
-    vUvBounds = vec4(uv0 + vec2(0.5), uv1 - vec2(0.5)) / texture_size.xyxy;
-
-    write_clip(vi.screen_pos, prim.clip_area);
-}
-#endif
-
-#ifdef WR_FRAGMENT_SHADER
-void main(void) {
-    vec2 uv = clamp(vUv.xy, vUvBounds.xy, vUvBounds.zw);
-
-#if defined WR_FEATURE_COLOR
-    vec4 color = texture(sColor0, vec3(uv, vUv.z));
-#else
-    vec4 color = vColor * texture(sColor1, vec3(uv, vUv.z)).r;
-#endif
-
-    // Un-premultiply the color from sampling the gradient.
-    if (color.a > 0.0) {
-        color.rgb /= color.a;
-
-        // Apply the clip mask
-        color.a = min(color.a, do_clip());
-
-        // Pre-multiply the result.
-        color.rgb *= color.a;
-    }
-
-    oFragColor = color;
-}
-#endif
--- a/gfx/webrender/res/ps_gradient.glsl
+++ b/gfx/webrender/res/ps_gradient.glsl
@@ -102,12 +102,12 @@ void main(void) {
 #ifdef WR_FEATURE_TRANSFORM
     float alpha = 0.0;
     vec2 local_pos = init_transform_fs(vLocalPos, alpha);
 #else
     float alpha = 1.0;
     vec2 local_pos = vPos;
 #endif
 
-    alpha = min(alpha, do_clip());
+    alpha *= do_clip();
     oFragColor = dither(vColor * alpha);
 }
 #endif
--- a/gfx/webrender/res/ps_image.glsl
+++ b/gfx/webrender/res/ps_image.glsl
@@ -94,17 +94,17 @@ void main(void) {
     vec2 upper_bound_mask = step(vLocalRect.zw, pos);
     vec2 relative_pos_in_rect = clamp(pos, vLocalRect.xy, vLocalRect.zw) - vLocalRect.xy;
 #else
     float alpha = 1.0;
     vec2 relative_pos_in_rect = vLocalPos;
     vec2 upper_bound_mask = vec2(0.0);
 #endif
 
-    alpha = min(alpha, do_clip());
+    alpha *= do_clip();
 
     // We calculate the particular tile this fragment belongs to, taking into
     // account the spacing in between tiles. We only paint if our fragment does
     // not fall into that spacing.
     // If the pixel is at the local rectangle upper bound, we force the current
     // tile upper bound in order to avoid wrapping.
     vec2 position_in_tile = mix(
         mod(relative_pos_in_rect, vStretchSize + vTileSpacing),
--- a/gfx/webrender/res/ps_line.glsl
+++ b/gfx/webrender/res/ps_line.glsl
@@ -1,12 +1,16 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+#ifdef WR_FEATURE_CACHE
+    #define PRIMITIVE_HAS_PICTURE_TASK
+#endif
+
 #include shared,prim_shared
 
 varying vec4 vColor;
 flat varying int vStyle;
 flat varying float vAxisSelect;
 flat varying vec4 vParams;
 flat varying vec2 vLocalOrigin;
 
@@ -17,23 +21,24 @@ varying vec2 vLocalPos;
 #endif
 
 #ifdef WR_VERTEX_SHADER
 #define LINE_ORIENTATION_VERTICAL       0
 #define LINE_ORIENTATION_HORIZONTAL     1
 
 struct Line {
     vec4 color;
+    float wavyLineThickness;
     float style;
     float orientation;
 };
 
 Line fetch_line(int address) {
     vec4 data[2] = fetch_from_resource_cache_2(address);
-    return Line(data[0], data[1].x, data[1].y);
+    return Line(data[0], data[1].x, data[1].y, data[1].z);
 }
 
 void main(void) {
     Primitive prim = load_primitive();
     Line line = fetch_line(prim.specific_prim_address);
 
     vec2 pos, size;
 
@@ -53,69 +58,61 @@ void main(void) {
     vLocalOrigin = pos;
     vStyle = int(line.style);
 
     switch (vStyle) {
         case LINE_STYLE_SOLID: {
             break;
         }
         case LINE_STYLE_DASHED: {
-            // y = dash on + off length
-            // z = dash length
-            // w = center line of edge cross-axis (for dots only)
-            float desired_dash_length = size.y * 3.0;
-            // Consider half total length since there is an equal on/off for each dash.
-            float dash_count = 1.0 + ceil(size.x / desired_dash_length);
-            float dash_length = size.x / dash_count;
-            vParams = vec4(2.0 * dash_length,
-                           dash_length,
+            float dash_length = size.y * 3.0;
+            vParams = vec4(2.0 * dash_length, // period
+                           dash_length,       // dash length
                            0.0,
                            0.0);
             break;
         }
         case LINE_STYLE_DOTTED: {
             float diameter = size.y;
-            float radius = 0.5 * diameter;
-            float dot_count = ceil(0.5 * size.x / diameter);
-            float empty_space = size.x - dot_count * diameter;
-            float distance_between_centers = diameter + empty_space / dot_count;
+            float period = diameter * 2.0;
             float center_line = pos.y + 0.5 * size.y;
-            vParams = vec4(distance_between_centers,
-                           radius,
+            float max_x = floor(size.x / period) * period;
+            vParams = vec4(period,
+                           diameter / 2.0, // radius
                            center_line,
-                           0.0);
+                           max_x);
             break;
         }
         case LINE_STYLE_WAVY: {
-            // Choose some arbitrary values to scale thickness,
-            // wave period etc.
-            // TODO(gw): Tune these to get closer to what Gecko uses.
-            float thickness = 0.15 * size.y;
-            vParams = vec4(thickness,
-                           size.y * 0.5,
-                           size.y * 0.75,
-                           size.y * 0.5);
+            // This logic copied from gecko to get the same results
+            float line_thickness = max(line.wavyLineThickness, 1.0);
+            // Difference in height between peaks and troughs
+            // (and since slopes are 45 degrees, the length of each slope)
+            float slope_length = size.y - line_thickness;
+            // Length of flat runs
+            float flat_length = max((line_thickness - 1.0) * 2.0, 1.0);
+
+            vParams = vec4(line_thickness / 2.0,
+                           slope_length,
+                           flat_length,
+                           size.y);
             break;
         }
     }
 
 #ifdef WR_FEATURE_CACHE
-    int picture_address = prim.user_data0;
-    PrimitiveGeometry picture_geom = fetch_primitive_geometry(picture_address);
-    Picture pic = fetch_picture(picture_address + VECS_PER_PRIM_HEADER);
-
-    vec2 device_origin = prim.task.render_target_origin +
-                         uDevicePixelRatio * (prim.local_rect.p0 + pic.offset - picture_geom.local_rect.p0);
+    vec2 device_origin = prim.task.target_rect.p0 +
+                         uDevicePixelRatio * (prim.local_rect.p0 - prim.task.content_origin);
     vec2 device_size = uDevicePixelRatio * prim.local_rect.size;
 
     vec2 device_pos = mix(device_origin,
                           device_origin + device_size,
                           aPosition.xy);
 
-    vColor = pic.color;
+    vColor = prim.task.color;
     vLocalPos = mix(prim.local_rect.p0,
                     prim.local_rect.p0 + prim.local_rect.size,
                     aPosition.xy);
 
     gl_Position = uTransform * vec4(device_pos, 0.0, 1.0);
 #else
     vColor = line.color;
 
@@ -137,16 +134,19 @@ void main(void) {
 
     vLocalPos = vi.local_pos;
     write_clip(vi.screen_pos, prim.clip_area);
 #endif
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
+
+#define MAGIC_WAVY_LINE_AA_SNAP         0.7
+
 float det(vec2 a, vec2 b) {
     return a.x * b.y - b.x * a.y;
 }
 
 // From: http://research.microsoft.com/en-us/um/people/hoppe/ravg.pdf
 vec2 get_distance_vector(vec2 b0, vec2 b1, vec2 b2) {
     float a = det(b0, b2);
     float b = 2.0 * det(b1, b0);
@@ -210,49 +210,60 @@ void main(void) {
         case LINE_STYLE_DOTTED: {
             // Get the main-axis position relative to closest dot or dash.
             float x = mod(pos.x - vLocalOrigin.x, vParams.x);
 
             // Get the dot alpha
             vec2 dot_relative_pos = vec2(x, pos.y) - vParams.yz;
             float dot_distance = length(dot_relative_pos) - vParams.y;
             alpha = min(alpha, distance_aa(aa_range, dot_distance));
+            // Clip off partial dots
+            alpha *= step(pos.x - vLocalOrigin.x, vParams.w);
             break;
         }
         case LINE_STYLE_WAVY: {
             vec2 normalized_local_pos = pos - vLocalOrigin.xy;
 
-            float y0 = vParams.y;
-            float dy = vParams.z;
-            float dx = vParams.w;
+            float half_line_thickness = vParams.x;
+            float slope_length = vParams.y;
+            float flat_length = vParams.z;
+            float vertical_bounds = vParams.w;
+            // Our pattern is just two slopes and two flats
+            float half_period = slope_length + flat_length;
 
-            // Flip the position of the bezier center points each
-            // wave period.
-            dy *= step(mod(normalized_local_pos.x, 4.0 * dx), 2.0 * dx) * 2.0 - 1.0;
+            float mid_height = vertical_bounds / 2.0;
+            float peak_offset = mid_height - half_line_thickness;
+            // Flip the wave every half period
+            float flip = -2.0 * (step(mod(normalized_local_pos.x, 2.0 * half_period), half_period) - 0.5);
+            // float flip = -1.0;
+            peak_offset *= flip;
+            float peak_height = mid_height + peak_offset;
 
-            // Convert pos to a local position within one wave period.
-            normalized_local_pos.x = dx + mod(normalized_local_pos.x, 2.0 * dx);
+            // Convert pos to a local position within one half period
+            normalized_local_pos.x = mod(normalized_local_pos.x, half_period);
 
-            // Evaluate SDF to the first bezier.
-            vec2 b0_0 = vec2(0.0 * dx,  y0);
-            vec2 b1_0 = vec2(1.0 * dx,  y0 - dy);
-            vec2 b2_0 = vec2(2.0 * dx,  y0);
-            float d1 = approx_distance(normalized_local_pos, b0_0, b1_0, b2_0);
+            // Compute signed distance to the 3 lines that make up an arc
+            float dist1 = distance_to_line(vec2(0.0, peak_height),
+                                           vec2(1.0, -flip),
+                                           normalized_local_pos);
+            float dist2 = distance_to_line(vec2(0.0, peak_height),
+                                           vec2(0, -flip),
+                                           normalized_local_pos);
+            float dist3 = distance_to_line(vec2(flat_length, peak_height),
+                                           vec2(-1.0, -flip),
+                                           normalized_local_pos);
+            float dist = abs(max(max(dist1, dist2), dist3));
 
-            // Evaluate SDF to the second bezier.
-            vec2 b0_1 = vec2(2.0 * dx,  y0);
-            vec2 b1_1 = vec2(3.0 * dx,  y0 + dy);
-            vec2 b2_1 = vec2(4.0 * dx,  y0);
-            float d2 = approx_distance(normalized_local_pos, b0_1, b1_1, b2_1);
+            // Apply AA based on the thickness of the wave
+            alpha = distance_aa(aa_range, dist - half_line_thickness);
 
-            // SDF union - this is needed to avoid artifacts where the
-            // bezier curves join.
-            float d = min(d1, d2);
+            // Disable AA for thin lines
+            if (half_line_thickness <= 1.0) {
+                alpha = 1.0 - step(alpha, MAGIC_WAVY_LINE_AA_SNAP);
+            }
 
-            // Apply AA based on the thickness of the wave.
-            alpha = distance_aa(aa_range, d - vParams.x);
             break;
         }
     }
 
     oFragColor = vColor * vec4(1.0, 1.0, 1.0, alpha);
 }
 #endif
--- a/gfx/webrender/res/ps_radial_gradient.glsl
+++ b/gfx/webrender/res/ps_radial_gradient.glsl
@@ -105,22 +105,11 @@ void main(void) {
             discard;
         }
     }
 
     vec4 color = sample_gradient(vGradientAddress,
                                  offset,
                                  vGradientRepeat);
 
-    // Un-premultiply the color from sampling the gradient.
-    if (color.a > 0.0) {
-        color.rgb /= color.a;
-
-        // Apply the clip mask
-        color.a = min(color.a, do_clip());
-
-        // Pre-multiply the result.
-        color.rgb *= color.a;
-    }
-
-    oFragColor = color;
+    oFragColor = color * do_clip();
 }
 #endif
--- a/gfx/webrender/res/ps_rectangle.glsl
+++ b/gfx/webrender/res/ps_rectangle.glsl
@@ -42,13 +42,13 @@ void main(void) {
 void main(void) {
     float alpha = 1.0;
 #ifdef WR_FEATURE_TRANSFORM
     alpha = 0.0;
     init_transform_fs(vLocalPos, alpha);
 #endif
 
 #ifdef WR_FEATURE_CLIP
-    alpha = min(alpha, do_clip());
+    alpha *= do_clip();
 #endif
-    oFragColor = vColor * vec4(1.0, 1.0, 1.0, alpha);
+    oFragColor = vColor * alpha;
 }
 #endif
--- a/gfx/webrender/res/ps_text_run.glsl
+++ b/gfx/webrender/res/ps_text_run.glsl
@@ -8,16 +8,22 @@ flat varying vec4 vColor;
 varying vec3 vUv;
 flat varying vec4 vUvBorder;
 
 #ifdef WR_FEATURE_TRANSFORM
 varying vec3 vLocalPos;
 #endif
 
 #ifdef WR_VERTEX_SHADER
+
+#define MODE_ALPHA          0
+#define MODE_SUBPX_PASS0    1
+#define MODE_SUBPX_PASS1    2
+#define MODE_COLOR_BITMAP   3
+
 void main(void) {
     Primitive prim = load_primitive();
     TextRun text = fetch_text_run(prim.specific_prim_address);
 
     int glyph_index = prim.user_data0;
     int resource_address = prim.user_data1;
 
     Glyph glyph = fetch_glyph(prim.specific_prim_address,
@@ -48,54 +54,42 @@ void main(void) {
                                  prim.layer,
                                  prim.task,
                                  local_rect);
     vec2 f = (vi.local_pos - local_rect.p0) / local_rect.size;
 #endif
 
     write_clip(vi.screen_pos, prim.clip_area);
 
+    switch (uMode) {
+        case MODE_ALPHA:
+        case MODE_SUBPX_PASS1:
+            vColor = text.color;
+            break;
+        case MODE_SUBPX_PASS0:
+        case MODE_COLOR_BITMAP:
+            vColor = vec4(text.color.a);
+            break;
+    }
+
     vec2 texture_size = vec2(textureSize(sColor0, 0));
     vec2 st0 = res.uv_rect.xy / texture_size;
     vec2 st1 = res.uv_rect.zw / texture_size;
 
-    vColor = vec4(text.color.rgb * text.color.a, text.color.a);
     vUv = vec3(mix(st0, st1, f), res.layer);
     vUvBorder = (res.uv_rect + vec4(0.5, 0.5, -0.5, -0.5)) / texture_size.xyxy;
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
-
-#define MODE_ALPHA          0
-#define MODE_SUBPX_PASS0    1
-#define MODE_SUBPX_PASS1    2
-
 void main(void) {
     vec3 tc = vec3(clamp(vUv.xy, vUvBorder.xy, vUvBorder.zw), vUv.z);
     vec4 color = texture(sColor0, tc);
 
     float alpha = 1.0;
 #ifdef WR_FEATURE_TRANSFORM
     init_transform_fs(vLocalPos, alpha);
 #endif
     alpha *= do_clip();
 
-    // TODO(gw): It would be worth profiling this and seeing
-    //           if we should instead handle the mode via
-    //           a combination of mix() etc. Branching on
-    //           a uniform is probably fast in most GPUs now though?
-    vec4 modulate_color = vec4(0.0);
-    switch (uMode) {
-        case MODE_ALPHA:
-            modulate_color = alpha * vColor;
-            break;
-        case MODE_SUBPX_PASS0:
-            modulate_color = vec4(alpha) * vColor.a;
-            break;
-        case MODE_SUBPX_PASS1:
-            modulate_color = alpha * vColor;
-            break;
-    }
-
-    oFragColor = color * modulate_color;
+    oFragColor = color * vColor * alpha;
 }
 #endif
--- a/gfx/webrender/res/ps_yuv_image.glsl
+++ b/gfx/webrender/res/ps_yuv_image.glsl
@@ -157,17 +157,17 @@ void main(void) {
     // We clamp the texture coordinate calculation here to the local rectangle boundaries,
     // which makes the edge of the texture stretch instead of repeat.
     vec2 relative_pos_in_rect = clamp(pos, vLocalRect.xy, vLocalRect.zw) - vLocalRect.xy;
 #else
     float alpha = 1.0;;
     vec2 relative_pos_in_rect = vLocalPos;
 #endif
 
-    alpha = min(alpha, do_clip());
+    alpha *= do_clip();
 
     // We clamp the texture coordinates to the half-pixel offset from the borders
     // in order to avoid sampling outside of the texture area.
     vec2 st_y = vTextureOffsetY + clamp(
         relative_pos_in_rect / vStretchSize * vTextureSizeY,
         vHalfTexelY, vTextureSizeY - vHalfTexelY);
 #ifndef WR_FEATURE_INTERLEAVED_Y_CB_CR
     vec2 uv_offset = clamp(
--- a/gfx/webrender/res/shared.glsl
+++ b/gfx/webrender/res/shared.glsl
@@ -22,32 +22,32 @@
 // In normal case, we use textureLod(). We haven't used the lod yet. So, we always pass 0.0 now.
 #define TEX_SAMPLE(sampler, tex_coord) textureLod(sampler, tex_coord, 0.0)
 #endif
 
 //======================================================================================
 // Vertex shader attributes and uniforms
 //======================================================================================
 #ifdef WR_VERTEX_SHADER
+    // A generic uniform that shaders can optionally use to configure
+    // an operation mode for this batch.
+    uniform int uMode;
+
     // Uniform inputs
     uniform mat4 uTransform;       // Orthographic projection
     uniform float uDevicePixelRatio;
 
     // Attribute inputs
     in vec3 aPosition;
 #endif
 
 //======================================================================================
 // Fragment shader attributes and uniforms
 //======================================================================================
 #ifdef WR_FRAGMENT_SHADER
-    // A generic uniform that shaders can optionally use to configure
-    // an operation mode for this batch.
-    uniform int uMode;
-
     // Uniform inputs
 
     // Fragment shader outputs
     out vec4 oFragColor;
 #endif
 
 //======================================================================================
 // Shared shader uniforms
--- a/gfx/webrender/src/border.rs
+++ b/gfx/webrender/src/border.rs
@@ -238,20 +238,20 @@ impl FrameBuilder {
     ) {
         let radius = &border.radius;
         let left = &border.left;
         let right = &border.right;
         let top = &border.top;
         let bottom = &border.bottom;
 
         // These colors are used during inset/outset scaling.
-        let left_color = left.border_color(1.0, 2.0 / 3.0, 0.3, 0.7);
-        let top_color = top.border_color(1.0, 2.0 / 3.0, 0.3, 0.7);
-        let right_color = right.border_color(2.0 / 3.0, 1.0, 0.7, 0.3);
-        let bottom_color = bottom.border_color(2.0 / 3.0, 1.0, 0.7, 0.3);
+        let left_color = left.border_color(1.0, 2.0 / 3.0, 0.3, 0.7).premultiplied();
+        let top_color = top.border_color(1.0, 2.0 / 3.0, 0.3, 0.7).premultiplied();
+        let right_color = right.border_color(2.0 / 3.0, 1.0, 0.7, 0.3).premultiplied();
+        let bottom_color = bottom.border_color(2.0 / 3.0, 1.0, 0.7, 0.3).premultiplied();
 
         let prim_cpu = BorderPrimitiveCpu {
             corner_instances,
 
             // TODO(gw): In the future, we will build these on demand
             //           from the deserialized display list, rather
             //           than creating it immediately.
             gpu_blocks: [
new file mode 100644
--- /dev/null
+++ b/gfx/webrender/src/box_shadow.rs
@@ -0,0 +1,307 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{ColorF, LayerPoint, LayerRect, LayerSize, LayerVector2D};
+use api::{BorderRadius, BoxShadowClipMode, LayoutSize, LayerPrimitiveInfo};
+use api::{ClipMode, ComplexClipRegion, LocalClip, ClipAndScrollInfo};
+use clip::ClipSource;
+use frame_builder::FrameBuilder;
+use prim_store::{PrimitiveContainer, RectanglePrimitive, BrushPrimitive};
+use picture::PicturePrimitive;
+use util::RectHelpers;
+
+// The blur shader samples BLUR_SAMPLE_SCALE * blur_radius surrounding texels.
+pub const BLUR_SAMPLE_SCALE: f32 = 3.0;
+
+impl FrameBuilder {
+    pub fn add_box_shadow(
+        &mut self,
+        clip_and_scroll: ClipAndScrollInfo,
+        prim_info: &LayerPrimitiveInfo,
+        box_offset: &LayerVector2D,
+        color: &ColorF,
+        blur_radius: f32,
+        spread_radius: f32,
+        border_radius: BorderRadius,
+        clip_mode: BoxShadowClipMode,
+    ) {
+        if color.a == 0.0 {
+            return;
+        }
+
+        let spread_amount = match clip_mode {
+            BoxShadowClipMode::Outset => {
+                spread_radius
+            }
+            BoxShadowClipMode::Inset => {
+                -spread_radius
+            }
+        };
+
+        let shadow_radius = adjust_border_radius_for_box_shadow(
+            border_radius,
+            spread_amount,
+        );
+        let shadow_rect = prim_info.rect
+                                   .translate(box_offset)
+                                   .inflate(spread_amount, spread_amount);
+
+        if blur_radius == 0.0 {
+            let mut clips = Vec::new();
+
+            let fast_info = match clip_mode {
+                BoxShadowClipMode::Outset => {
+                    // TODO(gw): Add a fast path for ClipOut + zero border radius!
+                    clips.push(ClipSource::RoundedRectangle(
+                        prim_info.rect,
+                        border_radius,
+                        ClipMode::ClipOut
+                    ));
+
+                    LayerPrimitiveInfo::with_clip(
+                        shadow_rect,
+                        LocalClip::RoundedRect(
+                            shadow_rect,
+                            ComplexClipRegion::new(
+                                shadow_rect,
+                                shadow_radius,
+                                ClipMode::Clip,
+                            ),
+                        ),
+                    )
+                }
+                BoxShadowClipMode::Inset => {
+                    clips.push(ClipSource::RoundedRectangle(
+                        shadow_rect,
+                        shadow_radius,
+                        ClipMode::ClipOut
+                    ));
+
+                    LayerPrimitiveInfo::with_clip(
+                        prim_info.rect,
+                        LocalClip::RoundedRect(
+                            prim_info.rect,
+                            ComplexClipRegion::new(
+                                prim_info.rect,
+                                border_radius,
+                                ClipMode::Clip
+                            ),
+                        ),
+                    )
+                }
+            };
+
+            self.add_primitive(
+                clip_and_scroll,
+                &fast_info,
+                clips,
+                PrimitiveContainer::Rectangle(RectanglePrimitive {
+                    color: *color,
+                }),
+            );
+        } else {
+            let blur_offset = 2.0 * blur_radius;
+            let mut extra_clips = vec![];
+            let mut blur_regions = vec![];
+
+            match clip_mode {
+                BoxShadowClipMode::Outset => {
+                    let brush_prim = BrushPrimitive {
+                        clip_mode: ClipMode::Clip,
+                        radius: shadow_radius,
+                    };
+
+                    let brush_rect = LayerRect::new(LayerPoint::new(blur_offset, blur_offset),
+                                                    shadow_rect.size);
+
+                    let brush_info = LayerPrimitiveInfo::new(brush_rect);
+
+                    let brush_prim_index = self.create_primitive(
+                        clip_and_scroll,
+                        &brush_info,
+                        Vec::new(),
+                        PrimitiveContainer::Brush(brush_prim),
+                    );
+
+                    let pic_rect = shadow_rect.inflate(blur_offset, blur_offset);
+                    let blur_range = BLUR_SAMPLE_SCALE * blur_radius;
+
+                    let size = pic_rect.size;
+
+                    let tl = LayerSize::new(
+                        blur_radius.max(border_radius.top_left.width),
+                        blur_radius.max(border_radius.top_left.height)
+                    ) * BLUR_SAMPLE_SCALE;
+                    let tr = LayerSize::new(
+                        blur_radius.max(border_radius.top_right.width),
+                        blur_radius.max(border_radius.top_right.height)
+                    ) * BLUR_SAMPLE_SCALE;
+                    let br = LayerSize::new(
+                        blur_radius.max(border_radius.bottom_right.width),
+                        blur_radius.max(border_radius.bottom_right.height)
+                    ) * BLUR_SAMPLE_SCALE;
+                    let bl = LayerSize::new(
+                        blur_radius.max(border_radius.bottom_left.width),
+                        blur_radius.max(border_radius.bottom_left.height)
+                    ) * BLUR_SAMPLE_SCALE;
+
+                    let max_width = tl.width.max(tr.width.max(bl.width.max(br.width)));
+                    let max_height = tl.height.max(tr.height.max(bl.height.max(br.height)));
+
+                    // Apply a conservative test that if any of the blur regions below
+                    // will overlap, we won't bother applying the region optimization
+                    // and will just blur the entire thing. This should only happen
+                    // in rare cases, where either the blur radius or border radius
+                    // is very large, in which case there's no real point in trying
+                    // to only blur a small region anyway.
+                    if max_width < 0.5 * size.width && max_height < 0.5 * size.height {
+                        blur_regions.push(LayerRect::from_floats(0.0, 0.0, tl.width, tl.height));
+                        blur_regions.push(LayerRect::from_floats(size.width - tr.width, 0.0, size.width, tr.height));
+                        blur_regions.push(LayerRect::from_floats(size.width - br.width, size.height - br.height, size.width, size.height));
+                        blur_regions.push(LayerRect::from_floats(0.0, size.height - bl.height, bl.width, size.height));
+
+                        blur_regions.push(LayerRect::from_floats(0.0, tl.height, blur_range, size.height - bl.height));
+                        blur_regions.push(LayerRect::from_floats(size.width - blur_range, tr.height, size.width, size.height - br.height));
+                        blur_regions.push(LayerRect::from_floats(tl.width, 0.0, size.width - tr.width, blur_range));
+                        blur_regions.push(LayerRect::from_floats(bl.width, size.height - blur_range, size.width - br.width, size.height));
+                    }
+
+                    let mut pic_prim = PicturePrimitive::new_box_shadow(
+                        blur_radius,
+                        *color,
+                        blur_regions,
+                        BoxShadowClipMode::Outset,
+                    );
+
+                    pic_prim.add_primitive(
+                        brush_prim_index,
+                        &brush_rect,
+                        clip_and_scroll
+                    );
+
+                    pic_prim.build();
+
+                    extra_clips.push(ClipSource::RoundedRectangle(
+                        prim_info.rect,
+                        border_radius,
+                        ClipMode::ClipOut,
+                    ));
+
+                    let pic_info = LayerPrimitiveInfo::new(pic_rect);
+
+                    self.add_primitive(
+                        clip_and_scroll,
+                        &pic_info,
+                        extra_clips,
+                        PrimitiveContainer::Picture(pic_prim),
+                    );
+                }
+                BoxShadowClipMode::Inset => {
+                    let brush_prim = BrushPrimitive {
+                        clip_mode: ClipMode::ClipOut,
+                        radius: shadow_radius,
+                    };
+
+                    let mut brush_rect = shadow_rect;
+                    brush_rect.origin.x = brush_rect.origin.x - prim_info.rect.origin.x + blur_offset;
+                    brush_rect.origin.y = brush_rect.origin.y - prim_info.rect.origin.y + blur_offset;
+
+                    let brush_info = LayerPrimitiveInfo::new(brush_rect);
+
+                    let brush_prim_index = self.create_primitive(
+                        clip_and_scroll,
+                        &brush_info,
+                        Vec::new(),
+                        PrimitiveContainer::Brush(brush_prim),
+                    );
+
+                    let pic_rect = prim_info.rect.inflate(blur_offset, blur_offset);
+
+                    // TODO(gw): Apply minimal blur regions for inset box shadows.
+
+                    let mut pic_prim = PicturePrimitive::new_box_shadow(
+                        blur_radius,
+                        *color,
+                        blur_regions,
+                        BoxShadowClipMode::Inset,
+                    );
+
+                    pic_prim.add_primitive(
+                        brush_prim_index,
+                        &prim_info.rect,
+                        clip_and_scroll
+                    );
+
+                    pic_prim.build();
+
+                    extra_clips.push(ClipSource::RoundedRectangle(
+                        prim_info.rect,
+                        border_radius,
+                        ClipMode::Clip,
+                    ));
+
+                    let pic_info = LayerPrimitiveInfo::with_clip_rect(pic_rect, prim_info.rect);
+
+                    self.add_primitive(
+                        clip_and_scroll,
+                        &pic_info,
+                        extra_clips,
+                        PrimitiveContainer::Picture(pic_prim),
+                    );
+                }
+            }
+        }
+    }
+}
+
+fn adjust_border_radius_for_box_shadow(
+    radius: BorderRadius,
+    spread_amount: f32,
+) -> BorderRadius {
+    BorderRadius {
+        top_left: adjust_corner_for_box_shadow(
+            radius.top_left,
+            spread_amount,
+        ),
+        top_right: adjust_corner_for_box_shadow(
+            radius.top_right,
+            spread_amount,
+        ),
+        bottom_right: adjust_corner_for_box_shadow(
+            radius.bottom_right,
+            spread_amount,
+        ),
+        bottom_left: adjust_corner_for_box_shadow(
+            radius.bottom_left,
+            spread_amount,
+        ),
+    }
+}
+
+fn adjust_corner_for_box_shadow(
+    corner: LayoutSize,
+    spread_amount: f32,
+) -> LayoutSize {
+    LayoutSize::new(
+        adjust_radius_for_box_shadow(
+            corner.width,
+            spread_amount
+        ),
+        adjust_radius_for_box_shadow(
+            corner.height,
+            spread_amount
+        ),
+    )
+}
+
+fn adjust_radius_for_box_shadow(
+    border_radius: f32,
+    spread_amount: f32,
+) -> f32 {
+    if border_radius > 0.0 {
+        (border_radius + spread_amount).max(0.0)
+    } else {
+        0.0
+    }
+}
--- a/gfx/webrender/src/clip_scroll_tree.rs
+++ b/gfx/webrender/src/clip_scroll_tree.rs
@@ -508,33 +508,37 @@ impl ClipScrollTree {
 
     fn print_node<T: PrintTreePrinter>(&self, id: &ClipId, pt: &mut T, clip_store: &ClipStore) {
         let node = self.nodes.get(id).unwrap();
 
         match node.node_type {
             NodeType::Clip(ref info) => {
                 pt.new_level("Clip".to_owned());
 
+                pt.add_item(format!("id: {:?}", id));
                 let clips = clip_store.get(&info.clip_sources).clips();
                 pt.new_level(format!("Clip Sources [{}]", clips.len()));
                 for source in clips {
                     pt.add_item(format!("{:?}", source));
                 }
                 pt.end_level();
             }
             NodeType::ReferenceFrame(ref info) => {
                 pt.new_level(format!("ReferenceFrame {:?}", info.transform));
+                pt.add_item(format!("id: {:?}", id));
             }
             NodeType::ScrollFrame(scrolling_info) => {
                 pt.new_level(format!("ScrollFrame"));
+                pt.add_item(format!("id: {:?}", id));
                 pt.add_item(format!("scrollable_size: {:?}", scrolling_info.scrollable_size));
                 pt.add_item(format!("scroll.offset: {:?}", scrolling_info.offset));
             }
             NodeType::StickyFrame(sticky_frame_info, sticky_offset) => {
                 pt.new_level(format!("StickyFrame"));
+                pt.add_item(format!("id: {:?}", id));
                 pt.add_item(format!("sticky info: {:?}", sticky_frame_info));
                 pt.add_item(format!("sticky offset: {:?}", sticky_offset));
             }
         }
 
         pt.add_item(format!(
             "local_viewport_rect: {:?}",
             node.local_viewport_rect
--- a/gfx/webrender/src/frame.rs
+++ b/gfx/webrender/src/frame.rs
@@ -1,65 +1,53 @@
 
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BuiltDisplayListIter, ClipAndScrollInfo, ClipId, ColorF, ComplexClipRegion};
-use api::{DeviceUintRect, DeviceUintSize, DisplayItemRef, Epoch, FilterOp, HitTestFlags};
-use api::{HitTestResult, ImageDisplayItem, ItemRange, LayerPoint, LayerPrimitiveInfo, LayerRect};
+use api::{DeviceUintRect, DeviceUintSize, DisplayItemRef, Epoch, FilterOp};
+use api::{ImageDisplayItem, ItemRange, LayerPoint, LayerPrimitiveInfo, LayerRect};
 use api::{LayerSize, LayerToScrollTransform, LayerVector2D, LayoutSize, LayoutTransform};
 use api::{LocalClip, PipelineId, ScrollClamping, ScrollEventPhase, ScrollLayerState};
 use api::{ScrollLocation, ScrollPolicy, ScrollSensitivity, SpecificDisplayItem, StackingContext};
 use api::{ClipMode, TileOffset, TransformStyle, WorldPoint};
 use clip::ClipRegion;
 use clip_scroll_tree::{ClipScrollTree, ScrollStates};
 use euclid::rect;
 use frame_builder::{FrameBuilder, FrameBuilderConfig};
 use gpu_cache::GpuCache;
 use internal_types::{FastHashMap, FastHashSet, RendererFrame};
 use profiler::{GpuCacheProfileCounters, TextureCacheProfileCounters};
-use resource_cache::{ResourceCache, TiledImageMap};
+use resource_cache::{FontInstanceMap,ResourceCache, TiledImageMap};
 use scene::{Scene, StackingContextHelpers, ScenePipeline};
-use tiling::{CompositeOps, PrimitiveFlags};
+use tiling::{CompositeOps, Frame, PrimitiveFlags};
 use util::{subtract_rect, ComplexClipRegionHelpers};
 
 #[derive(Copy, Clone, PartialEq, PartialOrd, Debug, Eq, Ord)]
 pub struct FrameId(pub u32);
 
 static DEFAULT_SCROLLBAR_COLOR: ColorF = ColorF {
     r: 0.3,
     g: 0.3,
     b: 0.3,
     a: 0.6,
 };
 
 struct FlattenContext<'a> {
     scene: &'a Scene,
-    builder: &'a mut FrameBuilder,
-    resource_cache: &'a ResourceCache,
+    builder: FrameBuilder,
+    clip_scroll_tree: &'a mut ClipScrollTree,
+    font_instances: FontInstanceMap,
     tiled_image_map: TiledImageMap,
+    pipeline_epochs: Vec<(PipelineId, Epoch)>,
     replacements: Vec<(ClipId, ClipId)>,
 }
 
 impl<'a> FlattenContext<'a> {
-    fn new(
-        scene: &'a Scene,
-        builder: &'a mut FrameBuilder,
-        resource_cache: &'a ResourceCache,
-    ) -> FlattenContext<'a> {
-        FlattenContext {
-            scene,
-            builder,
-            resource_cache,
-            tiled_image_map: resource_cache.get_tiled_image_map(),
-            replacements: Vec::new(),
-        }
-    }
-
     /// Since WebRender still handles fixed position and reference frame content internally
     /// we need to apply this table of id replacements only to the id that affects the
     /// position of a node. We can eventually remove this when clients start handling
     /// reference frames themselves. This method applies these replacements.
     fn apply_scroll_frame_id_replacement(&self, id: ClipId) -> ClipId {
         match self.replacements.last() {
             Some(&(to_replace, replacement)) if to_replace == id => replacement,
             _ => id,
@@ -78,493 +66,423 @@ impl<'a> FlattenContext<'a> {
         self.scene
             .pipelines
             .get(&pipeline_id)
             .expect("No display list?")
             .display_list
             .get(complex_clips)
             .collect()
     }
-}
+
+    fn flatten_root(
+        &mut self,
+        traversal: &mut BuiltDisplayListIter<'a>,
+        pipeline_id: PipelineId,
+        content_size: &LayoutSize,
+    ) {
+        self.builder.push_stacking_context(
+            &LayerVector2D::zero(),
+            pipeline_id,
+            CompositeOps::default(),
+            TransformStyle::Flat,
+            true,
+            true,
+        );
+
+        // We do this here, rather than above because we want any of the top-level
+        // stacking contexts in the display list to be treated like root stacking contexts.
+        // FIXME(mrobinson): Currently only the first one will, which for the moment is
+        // sufficient for all our use cases.
+        self.builder.notify_waiting_for_root_stacking_context();
 
-// TODO: doc
-pub struct Frame {
-    pub clip_scroll_tree: ClipScrollTree,
-    pub pipeline_epoch_map: FastHashMap<PipelineId, Epoch>,
-    id: FrameId,
-    frame_builder_config: FrameBuilderConfig,
-    pub frame_builder: Option<FrameBuilder>,
-}
+        // For the root pipeline, there's no need to add a full screen rectangle
+        // here, as it's handled by the framebuffer clear.
+        let clip_id = ClipId::root_scroll_node(pipeline_id);
+        if self.scene.root_pipeline_id != Some(pipeline_id) {
+            if let Some(pipeline) = self.scene.pipelines.get(&pipeline_id) {
+                if let Some(bg_color) = pipeline.background_color {
+                    let root_bounds = LayerRect::new(LayerPoint::zero(), *content_size);
+                    let info = LayerPrimitiveInfo::new(root_bounds);
+                    self.builder.add_solid_rectangle(
+                        ClipAndScrollInfo::simple(clip_id),
+                        &info,
+                        &bg_color,
+                        PrimitiveFlags::None,
+                    );
+                }
+            }
+        }
 
-impl Frame {
-    pub fn new(config: FrameBuilderConfig) -> Frame {
-        Frame {
-            pipeline_epoch_map: FastHashMap::default(),
-            clip_scroll_tree: ClipScrollTree::new(),
-            id: FrameId(0),
-            frame_builder: None,
-            frame_builder_config: config,
+
+        self.flatten_items(traversal, pipeline_id, LayerVector2D::zero());
+
+        if self.builder.config.enable_scrollbars {
+            let scrollbar_rect = LayerRect::new(LayerPoint::zero(), LayerSize::new(10.0, 70.0));
+            let info = LayerPrimitiveInfo::new(scrollbar_rect);
+
+            self.builder.add_solid_rectangle(
+                ClipAndScrollInfo::simple(clip_id),
+                &info,
+                &DEFAULT_SCROLLBAR_COLOR,
+                PrimitiveFlags::Scrollbar(self.clip_scroll_tree.topmost_scrolling_node_id(), 4.0),
+            );
         }
+
+        self.builder.pop_stacking_context();
     }
 
-    pub fn reset(&mut self) -> ScrollStates {
-        self.pipeline_epoch_map.clear();
-
-        // Advance to the next frame.
-        self.id.0 += 1;
-
-        self.clip_scroll_tree.drain()
-    }
-
-    pub fn get_scroll_node_state(&self) -> Vec<ScrollLayerState> {
-        self.clip_scroll_tree.get_scroll_node_state()
-    }
-
-    /// Returns true if the node actually changed position or false otherwise.
-    pub fn scroll_node(&mut self, origin: LayerPoint, id: ClipId, clamp: ScrollClamping) -> bool {
-        self.clip_scroll_tree.scroll_node(origin, id, clamp)
-    }
+    fn flatten_items(
+        &mut self,
+        traversal: &mut BuiltDisplayListIter<'a>,
+        pipeline_id: PipelineId,
+        reference_frame_relative_offset: LayerVector2D,
+    ) {
+        loop {
+            let subtraversal = {
+                let item = match traversal.next() {
+                    Some(item) => item,
+                    None => break,
+                };
 
-    /// Returns true if any nodes actually changed position or false otherwise.
-    pub fn scroll(
-        &mut self,
-        scroll_location: ScrollLocation,
-        cursor: WorldPoint,
-        phase: ScrollEventPhase,
-    ) -> bool {
-        self.clip_scroll_tree.scroll(scroll_location, cursor, phase)
-    }
+                if SpecificDisplayItem::PopStackingContext == *item.item() {
+                    return;
+                }
 
-    pub fn hit_test(&mut self,
-                    pipeline_id: Option<PipelineId>,
-                    point: WorldPoint,
-                    flags: HitTestFlags)
-                    -> HitTestResult {
-        if let Some(ref builder) = self.frame_builder {
-            builder.hit_test(&self.clip_scroll_tree, pipeline_id, point, flags)
-        } else {
-            HitTestResult::default()
+                self.flatten_item(item, pipeline_id, reference_frame_relative_offset)
+            };
+
+            // If flatten_item created a sub-traversal, we need `traversal` to have the
+            // same state as the completed subtraversal, so we reinitialize it here.
+            if let Some(subtraversal) = subtraversal {
+                *traversal = subtraversal;
+            }
         }
     }
 
-    pub fn tick_scrolling_bounce_animations(&mut self) {
-        self.clip_scroll_tree.tick_scrolling_bounce_animations();
-    }
-
-    pub fn discard_frame_state_for_pipeline(&mut self, pipeline_id: PipelineId) {
-        self.clip_scroll_tree
-            .discard_frame_state_for_pipeline(pipeline_id);
-    }
-
-    pub fn create(
+    fn flatten_clip(
         &mut self,
-        scene: &Scene,
-        resource_cache: &mut ResourceCache,
-        window_size: DeviceUintSize,
-        inner_rect: DeviceUintRect,
-        device_pixel_ratio: f32,
-    ) {
-        let root_pipeline_id = match scene.root_pipeline_id {
-            Some(root_pipeline_id) => root_pipeline_id,
-            None => return,
-        };
-
-        let root_pipeline = match scene.pipelines.get(&root_pipeline_id) {
-            Some(root_pipeline) => root_pipeline,
-            None => return,
-        };
-
-        if window_size.width == 0 || window_size.height == 0 {
-            error!("ERROR: Invalid window dimensions! Please call api.set_window_size()");
-        }
-
-        let old_scrolling_states = self.reset();
-
-        self.pipeline_epoch_map
-            .insert(root_pipeline_id, root_pipeline.epoch);
-
-        let background_color = root_pipeline
-            .background_color
-            .and_then(|color| if color.a > 0.0 { Some(color) } else { None });
-
-        let mut frame_builder = FrameBuilder::new(
-            self.frame_builder.take(),
-            window_size,
-            background_color,
-            self.frame_builder_config,
-        );
-
-        {
-            let mut context = FlattenContext::new(scene, &mut frame_builder, resource_cache);
-
-            context.builder.push_root(
-                root_pipeline_id,
-                &root_pipeline.viewport_size,
-                &root_pipeline.content_size,
-                &mut self.clip_scroll_tree,
-            );
-
-            context.builder.setup_viewport_offset(
-                window_size,
-                inner_rect,
-                device_pixel_ratio,
-                &mut self.clip_scroll_tree,
-            );
-
-            self.flatten_root(
-                &mut root_pipeline.display_list.iter(),
-                root_pipeline_id,
-                &mut context,
-                &root_pipeline.content_size,
-            );
-        }
-
-        self.frame_builder = Some(frame_builder);
-        self.clip_scroll_tree
-            .finalize_and_apply_pending_scroll_offsets(old_scrolling_states);
-    }
-
-    pub fn update_epoch(&mut self, pipeline_id: PipelineId, epoch: Epoch) {
-        self.pipeline_epoch_map.insert(pipeline_id, epoch);
-    }
-
-    fn flatten_clip<'a>(
-        &mut self,
-        context: &mut FlattenContext,
         pipeline_id: PipelineId,
         parent_id: &ClipId,
         new_clip_id: &ClipId,
         clip_region: ClipRegion,
     ) {
-        context.builder.add_clip_node(
+        self.builder.add_clip_node(
             *new_clip_id,
             *parent_id,
             pipeline_id,
             clip_region,
-            &mut self.clip_scroll_tree,
+            self.clip_scroll_tree,
         );
     }
 
-    fn flatten_scroll_frame<'a>(
+    fn flatten_scroll_frame(
         &mut self,
-        context: &mut FlattenContext,
         pipeline_id: PipelineId,
         parent_id: &ClipId,
         new_scroll_frame_id: &ClipId,
         frame_rect: &LayerRect,
         content_rect: &LayerRect,
         clip_region: ClipRegion,
         scroll_sensitivity: ScrollSensitivity,
     ) {
         let clip_id = self.clip_scroll_tree.generate_new_clip_id(pipeline_id);
-        context.builder.add_clip_node(
+        self.builder.add_clip_node(
             clip_id,
             *parent_id,
             pipeline_id,
             clip_region,
-            &mut self.clip_scroll_tree,
+            self.clip_scroll_tree,
         );
 
-        context.builder.add_scroll_frame(
+        self.builder.add_scroll_frame(
             *new_scroll_frame_id,
             clip_id,
             pipeline_id,
             &frame_rect,
             &content_rect.size,
             scroll_sensitivity,
-            &mut self.clip_scroll_tree,
+            self.clip_scroll_tree,
         );
     }
 
-    fn flatten_stacking_context<'a>(
+    fn flatten_stacking_context(
         &mut self,
         traversal: &mut BuiltDisplayListIter<'a>,
         pipeline_id: PipelineId,
-        context: &mut FlattenContext,
         context_scroll_node_id: ClipId,
         mut reference_frame_relative_offset: LayerVector2D,
         bounds: &LayerRect,
         stacking_context: &StackingContext,
         filters: ItemRange<FilterOp>,
         is_backface_visible: bool,
     ) {
         // Avoid doing unnecessary work for empty stacking contexts.
         if traversal.current_stacking_context_empty() {
             traversal.skip_current_stacking_context();
             return;
         }
 
         let composition_operations = {
             // TODO(optimization?): self.traversal.display_list()
-            let display_list = &context
+            let display_list = &self
                 .scene
                 .pipelines
                 .get(&pipeline_id)
                 .expect("No display list?!")
                 .display_list;
             CompositeOps::new(
                 stacking_context.filter_ops_for_compositing(
                     display_list,
                     filters,
-                    &context.scene.properties,
+                    &self.scene.properties,
                 ),
                 stacking_context.mix_blend_mode_for_compositing(),
             )
         };
 
         if stacking_context.scroll_policy == ScrollPolicy::Fixed {
-            context.replacements.push((
+            self.replacements.push((
                 context_scroll_node_id,
-                context.builder.current_reference_frame_id(),
+                self.builder.current_reference_frame_id(),
             ));
         }
 
         // If we have a transformation, we establish a new reference frame. This means
         // that fixed position stacking contexts are positioned relative to us.
         let is_reference_frame =
             stacking_context.transform.is_some() || stacking_context.perspective.is_some();
         if is_reference_frame {
             let transform = stacking_context.transform.as_ref();
-            let transform = context.scene.properties.resolve_layout_transform(transform);
+            let transform = self.scene.properties.resolve_layout_transform(transform);
             let perspective = stacking_context
                 .perspective
                 .unwrap_or_else(LayoutTransform::identity);
             let origin = reference_frame_relative_offset + bounds.origin.to_vector();
             let transform = LayerToScrollTransform::create_translation(origin.x, origin.y, 0.0)
                 .pre_mul(&transform)
                 .pre_mul(&perspective);
 
             let reference_frame_bounds = LayerRect::new(LayerPoint::zero(), bounds.size);
-            let mut clip_id = context.apply_scroll_frame_id_replacement(context_scroll_node_id);
-            clip_id = context.builder.push_reference_frame(
+            let mut clip_id = self.apply_scroll_frame_id_replacement(context_scroll_node_id);
+            clip_id = self.builder.push_reference_frame(
                 Some(clip_id),
                 pipeline_id,
                 &reference_frame_bounds,
                 &transform,
                 origin,
                 false,
-                &mut self.clip_scroll_tree,
+                self.clip_scroll_tree,
             );
-            context.replacements.push((context_scroll_node_id, clip_id));
+            self.replacements.push((context_scroll_node_id, clip_id));
             reference_frame_relative_offset = LayerVector2D::zero();
         } else {
             reference_frame_relative_offset = LayerVector2D::new(
                 reference_frame_relative_offset.x + bounds.origin.x,
                 reference_frame_relative_offset.y + bounds.origin.y,
             );
         }
 
-        context.builder.push_stacking_context(
+        self.builder.push_stacking_context(
             &reference_frame_relative_offset,
             pipeline_id,
             composition_operations,
             stacking_context.transform_style,
             is_backface_visible,
             false,
         );
 
         self.flatten_items(
             traversal,
             pipeline_id,
-            context,
             reference_frame_relative_offset,
         );
 
         if stacking_context.scroll_policy == ScrollPolicy::Fixed {
-            context.replacements.pop();
+            self.replacements.pop();
         }
 
         if is_reference_frame {
-            context.replacements.pop();
-            context.builder.pop_reference_frame();
+            self.replacements.pop();
+            self.builder.pop_reference_frame();
         }
 
-        context.builder.pop_stacking_context();
+        self.builder.pop_stacking_context();
     }
 
-    fn flatten_iframe<'a>(
+    fn flatten_iframe(
         &mut self,
         pipeline_id: PipelineId,
         parent_id: ClipId,
         bounds: &LayerRect,
         local_clip: &LocalClip,
-        context: &mut FlattenContext,
         reference_frame_relative_offset: LayerVector2D,
     ) {
-        let pipeline = match context.scene.pipelines.get(&pipeline_id) {
+        let pipeline = match self.scene.pipelines.get(&pipeline_id) {
             Some(pipeline) => pipeline,
             None => return,
         };
 
         let mut clip_region = ClipRegion::create_for_clip_node_with_local_clip(local_clip);
         clip_region.origin += reference_frame_relative_offset;
         let parent_pipeline_id = parent_id.pipeline_id();
         let clip_id = self.clip_scroll_tree
             .generate_new_clip_id(parent_pipeline_id);
-        context.builder.add_clip_node(
+        self.builder.add_clip_node(
             clip_id,
             parent_id,
             parent_pipeline_id,
             clip_region,
-            &mut self.clip_scroll_tree,
+            self.clip_scroll_tree,
         );
 
-        self.pipeline_epoch_map.insert(pipeline_id, pipeline.epoch);
+        self.pipeline_epochs.push((pipeline_id, pipeline.epoch));
 
         let iframe_rect = LayerRect::new(LayerPoint::zero(), bounds.size);
         let origin = reference_frame_relative_offset + bounds.origin.to_vector();
         let transform = LayerToScrollTransform::create_translation(origin.x, origin.y, 0.0);
-        let iframe_reference_frame_id = context.builder.push_reference_frame(
+        let iframe_reference_frame_id = self.builder.push_reference_frame(
             Some(clip_id),
             pipeline_id,
             &iframe_rect,
             &transform,
             origin,
             true,
-            &mut self.clip_scroll_tree,
+            self.clip_scroll_tree,
         );
 
-        context.builder.add_scroll_frame(
+        self.builder.add_scroll_frame(
             ClipId::root_scroll_node(pipeline_id),
             iframe_reference_frame_id,
             pipeline_id,
             &iframe_rect,
             &pipeline.content_size,
             ScrollSensitivity::ScriptAndInputEvents,
-            &mut self.clip_scroll_tree,
+            self.clip_scroll_tree,
         );
 
         self.flatten_root(
             &mut pipeline.display_list.iter(),
             pipeline_id,
-            context,
             &pipeline.content_size,
         );
 
-        context.builder.pop_reference_frame();
+        self.builder.pop_reference_frame();
     }
 
-    fn flatten_item<'a, 'b>(
-        &mut self,
+    fn flatten_item<'b>(
+        &'b mut self,
         item: DisplayItemRef<'a, 'b>,
         pipeline_id: PipelineId,
-        context: &mut FlattenContext,
         reference_frame_relative_offset: LayerVector2D,
     ) -> Option<BuiltDisplayListIter<'a>> {
         let mut clip_and_scroll = item.clip_and_scroll();
 
         let unreplaced_scroll_id = clip_and_scroll.scroll_node_id;
         clip_and_scroll.scroll_node_id =
-            context.apply_scroll_frame_id_replacement(clip_and_scroll.scroll_node_id);
+            self.apply_scroll_frame_id_replacement(clip_and_scroll.scroll_node_id);
 
         let prim_info = item.get_layer_primitive_info(&reference_frame_relative_offset);
         match *item.item() {
             SpecificDisplayItem::Image(ref info) => {
-                if let Some(tiling) = context.tiled_image_map.get(&info.image_key) {
-                    // The image resource is tiled. We have to generate an image primitive
-                    // for each tile.
-                    self.decompose_image(
-                        clip_and_scroll,
-                        &mut context.builder,
-                        &prim_info,
-                        info,
-                        tiling.image_size,
-                        tiling.tile_size as u32,
-                    );
-                } else {
-                    context.builder.add_image(
-                        clip_and_scroll,
-                        &prim_info,
-                        &info.stretch_size,
-                        &info.tile_spacing,
-                        None,
-                        info.image_key,
-                        info.image_rendering,
-                        None,
-                    );
+                match self.tiled_image_map.get(&info.image_key).cloned() {
+                    Some(tiling) => {
+                        // The image resource is tiled. We have to generate an image primitive
+                        // for each tile.
+                        self.decompose_image(
+                            clip_and_scroll,
+                            &prim_info,
+                            info,
+                            tiling.image_size,
+                            tiling.tile_size as u32,
+                        );
+                    }
+                    None => {
+                        self.builder.add_image(
+                            clip_and_scroll,
+                            &prim_info,
+                            &info.stretch_size,
+                            &info.tile_spacing,
+                            None,
+                            info.image_key,
+                            info.image_rendering,
+                            None,
+                        );
+                    }
                 }
             }
             SpecificDisplayItem::YuvImage(ref info) => {
-                context.builder.add_yuv_image(
+                self.builder.add_yuv_image(
                     clip_and_scroll,
                     &prim_info,
                     info.yuv_data,
                     info.color_space,
                     info.image_rendering,
                 );
             }
             SpecificDisplayItem::Text(ref text_info) => {
-                match context.resource_cache.get_font_instance(text_info.font_key) {
+                let instance_map = self.font_instances
+                    .read()
+                    .unwrap();
+                match instance_map.get(&text_info.font_key) {
                     Some(instance) => {
-                        context.builder.add_text(
+                        self.builder.add_text(
                             clip_and_scroll,
                             reference_frame_relative_offset,
                             &prim_info,
                             instance,
                             &text_info.color,
                             item.glyphs(),
                             item.display_list().get(item.glyphs()).count(),
                             text_info.glyph_options,
                         );
                     }
                     None => {
                         warn!("Unknown font instance key: {:?}", text_info.font_key);
                     }
                 }
             }
             SpecificDisplayItem::Rectangle(ref info) => {
-                if !try_to_add_rectangle_splitting_on_clip(
-                    context,
+                if !self.try_to_add_rectangle_splitting_on_clip(
                     &prim_info,
                     &info.color,
                     &clip_and_scroll,
                 ) {
-                    context.builder.add_solid_rectangle(
+                    self.builder.add_solid_rectangle(
                         clip_and_scroll,
                         &prim_info,
                         &info.color,
                         PrimitiveFlags::None,
                     );
                 }
             }
             SpecificDisplayItem::Line(ref info) => {
-                let prim_info = LayerPrimitiveInfo {
-                    rect: LayerRect::zero(),
-                    local_clip: *item.local_clip(),
-                    is_backface_visible: prim_info.is_backface_visible,
-                    tag: prim_info.tag,
-                };
-
-                context.builder.add_line(
+                self.builder.add_line(
                     clip_and_scroll,
                     &prim_info,
-                    info.baseline,
-                    info.start,
-                    info.end,
+                    info.wavy_line_thickness,
                     info.orientation,
-                    info.width,
                     &info.color,
                     info.style,
                 );
             }
             SpecificDisplayItem::Gradient(ref info) => {
-                context.builder.add_gradient(
+                self.builder.add_gradient(
                     clip_and_scroll,
                     &prim_info,
                     info.gradient.start_point,
                     info.gradient.end_point,
                     item.gradient_stops(),
                     item.display_list().get(item.gradient_stops()).count(),
                     info.gradient.extend_mode,
                     info.tile_size,
                     info.tile_spacing,
                 );
             }
             SpecificDisplayItem::RadialGradient(ref info) => {
-                context.builder.add_radial_gradient(
+                self.builder.add_radial_gradient(
                     clip_and_scroll,
                     &prim_info,
                     info.gradient.start_center,
                     info.gradient.start_radius,
                     info.gradient.end_center,
                     info.gradient.end_radius,
                     info.gradient.ratio_xy,
                     item.gradient_stops(),
@@ -574,97 +492,93 @@ impl Frame {
                 );
             }
             SpecificDisplayItem::BoxShadow(ref box_shadow_info) => {
                 let bounds = box_shadow_info
                     .box_bounds
                     .translate(&reference_frame_relative_offset);
                 let mut prim_info = prim_info.clone();
                 prim_info.rect = bounds;
-                context.builder.add_box_shadow(
+                self.builder.add_box_shadow(
                     clip_and_scroll,
                     &prim_info,
                     &box_shadow_info.offset,
                     &box_shadow_info.color,
                     box_shadow_info.blur_radius,
                     box_shadow_info.spread_radius,
                     box_shadow_info.border_radius,
                     box_shadow_info.clip_mode,
                 );
             }
             SpecificDisplayItem::Border(ref info) => {
-                context.builder.add_border(
+                self.builder.add_border(
                     clip_and_scroll,
                     &prim_info,
                     info,
                     item.gradient_stops(),
                     item.display_list().get(item.gradient_stops()).count(),
                 );
             }
             SpecificDisplayItem::PushStackingContext(ref info) => {
                 let mut subtraversal = item.sub_iter();
                 self.flatten_stacking_context(
                     &mut subtraversal,
                     pipeline_id,
-                    context,
                     unreplaced_scroll_id,
                     reference_frame_relative_offset,
                     &item.rect(),
                     &info.stacking_context,
                     item.filters(),
                     prim_info.is_backface_visible,
                 );
                 return Some(subtraversal);
             }
             SpecificDisplayItem::Iframe(ref info) => {
                 self.flatten_iframe(
                     info.pipeline_id,
                     clip_and_scroll.scroll_node_id,
                     &item.rect(),
                     &item.local_clip(),
-                    context,
                     reference_frame_relative_offset,
                 );
             }
             SpecificDisplayItem::Clip(ref info) => {
-                let complex_clips = context.get_complex_clips(pipeline_id, item.complex_clip().0);
+                let complex_clips = self.get_complex_clips(pipeline_id, item.complex_clip().0);
                 let mut clip_region = ClipRegion::create_for_clip_node(
                     *item.local_clip().clip_rect(),
                     complex_clips,
                     info.image_mask,
                 );
                 clip_region.origin += reference_frame_relative_offset;
 
                 self.flatten_clip(
-                    context,
                     pipeline_id,
                     &clip_and_scroll.scroll_node_id,
                     &info.id,
                     clip_region,
                 );
             }
             SpecificDisplayItem::ScrollFrame(ref info) => {
-                let complex_clips = context.get_complex_clips(pipeline_id, item.complex_clip().0);
+                let complex_clips = self.get_complex_clips(pipeline_id, item.complex_clip().0);
                 let mut clip_region = ClipRegion::create_for_clip_node(
                     *item.local_clip().clip_rect(),
                     complex_clips,
                     info.image_mask,
                 );
                 clip_region.origin += reference_frame_relative_offset;
 
                 // Just use clip rectangle as the frame rect for this scroll frame.
                 // This is useful when calculating scroll extents for the
                 // ClipScrollNode::scroll(..) API as well as for properly setting sticky
                 // positioning offsets.
                 let frame_rect = item.local_clip()
                     .clip_rect()
                     .translate(&reference_frame_relative_offset);
                 let content_rect = item.rect().translate(&reference_frame_relative_offset);
                 self.flatten_scroll_frame(
-                    context,
                     pipeline_id,
                     &clip_and_scroll.scroll_node_id,
                     &info.id,
                     &frame_rect,
                     &content_rect,
                     clip_region,
                     info.scroll_sensitivity,
                 );
@@ -683,141 +597,115 @@ impl Frame {
             SpecificDisplayItem::SetGradientStops => {}
 
             SpecificDisplayItem::PopStackingContext => {
                 unreachable!("Should have returned in parent method.")
             }
             SpecificDisplayItem::PushShadow(shadow) => {
                 let mut prim_info = prim_info.clone();
                 prim_info.rect = LayerRect::zero();
-                context
-                    .builder
+                self.builder
                     .push_shadow(shadow, clip_and_scroll, &prim_info);
             }
             SpecificDisplayItem::PopAllShadows => {
-                context.builder.pop_all_shadows();
+                self.builder.pop_all_shadows();
             }
         }
         None
     }
 
-    fn flatten_root<'a>(
+    /// Try to optimize the rendering of a solid rectangle that is clipped by a single
+    /// rounded rectangle, by only masking the parts of the rectangle that intersect
+    /// the rounded parts of the clip. This is pretty simple now, so has a lot of
+    /// potential for further optimizations.
+    fn try_to_add_rectangle_splitting_on_clip(
         &mut self,
-        traversal: &mut BuiltDisplayListIter<'a>,
-        pipeline_id: PipelineId,
-        context: &mut FlattenContext,
-        content_size: &LayoutSize,
-    ) {
-        context.builder.push_stacking_context(
-            &LayerVector2D::zero(),
-            pipeline_id,
-            CompositeOps::default(),
-            TransformStyle::Flat,
-            true,
-            true,
-        );
-
-        // We do this here, rather than above because we want any of the top-level
-        // stacking contexts in the display list to be treated like root stacking contexts.
-        // FIXME(mrobinson): Currently only the first one will, which for the moment is
-        // sufficient for all our use cases.
-        context.builder.notify_waiting_for_root_stacking_context();
-
-        // For the root pipeline, there's no need to add a full screen rectangle
-        // here, as it's handled by the framebuffer clear.
-        let clip_id = ClipId::root_scroll_node(pipeline_id);
-        if context.scene.root_pipeline_id != Some(pipeline_id) {
-            if let Some(pipeline) = context.scene.pipelines.get(&pipeline_id) {
-                if let Some(bg_color) = pipeline.background_color {
-                    let root_bounds = LayerRect::new(LayerPoint::zero(), *content_size);
-                    let info = LayerPrimitiveInfo::new(root_bounds);
-                    context.builder.add_solid_rectangle(
-                        ClipAndScrollInfo::simple(clip_id),
-                        &info,
-                        &bg_color,
-                        PrimitiveFlags::None,
-                    );
-                }
-            }
+        info: &LayerPrimitiveInfo,
+        color: &ColorF,
+        clip_and_scroll: &ClipAndScrollInfo,
+    ) -> bool {
+        // If this rectangle is not opaque, splitting the rectangle up
+        // into an inner opaque region just ends up hurting batching and
+        // doing more work than necessary.
+        if color.a != 1.0 {
+            return false;
         }
 
+        let inner_unclipped_rect = match &info.local_clip {
+            &LocalClip::Rect(_) => return false,
+            &LocalClip::RoundedRect(_, ref region) => {
+                if region.mode == ClipMode::ClipOut {
+                    return false;
+                }
+                region.get_inner_rect_full()
+            }
+        };
+        let inner_unclipped_rect = match inner_unclipped_rect {
+            Some(rect) => rect,
+            None => return false,
+        };
 
-        self.flatten_items(traversal, pipeline_id, context, LayerVector2D::zero());
+        // The inner rectangle is not clipped by its assigned clipping node, so we can
+        // let it be clipped by the parent of the clipping node, which may result in
+        // less masking some cases.
+        let mut clipped_rects = Vec::new();
+        subtract_rect(&info.rect, &inner_unclipped_rect, &mut clipped_rects);
 
-        if self.frame_builder_config.enable_scrollbars {
-            let scrollbar_rect = LayerRect::new(LayerPoint::zero(), LayerSize::new(10.0, 70.0));
-            let info = LayerPrimitiveInfo::new(scrollbar_rect);
+        let prim_info = LayerPrimitiveInfo {
+            rect: inner_unclipped_rect,
+            local_clip: LocalClip::from(*info.local_clip.clip_rect()),
+            is_backface_visible: info.is_backface_visible,
+            tag: None,
+        };
 
-            context.builder.add_solid_rectangle(
-                ClipAndScrollInfo::simple(clip_id),
+        self.builder.add_solid_rectangle(
+            *clip_and_scroll,
+            &prim_info,
+            color,
+            PrimitiveFlags::None,
+        );
+
+        for clipped_rect in &clipped_rects {
+            let mut info = info.clone();
+            info.rect = *clipped_rect;
+            self.builder.add_solid_rectangle(
+                *clip_and_scroll,
                 &info,
-                &DEFAULT_SCROLLBAR_COLOR,
-                PrimitiveFlags::Scrollbar(self.clip_scroll_tree.topmost_scrolling_node_id(), 4.0),
+                color,
+                PrimitiveFlags::None,
             );
         }
-
-        context.builder.pop_stacking_context();
-    }
-
-    fn flatten_items<'a>(
-        &mut self,
-        traversal: &mut BuiltDisplayListIter<'a>,
-        pipeline_id: PipelineId,
-        context: &mut FlattenContext,
-        reference_frame_relative_offset: LayerVector2D,
-    ) {
-        loop {
-            let subtraversal = {
-                let item = match traversal.next() {
-                    Some(item) => item,
-                    None => break,
-                };
-
-                if SpecificDisplayItem::PopStackingContext == *item.item() {
-                    return;
-                }
-
-                self.flatten_item(item, pipeline_id, context, reference_frame_relative_offset)
-            };
-
-            // If flatten_item created a sub-traversal, we need `traversal` to have the
-            // same state as the completed subtraversal, so we reinitialize it here.
-            if let Some(subtraversal) = subtraversal {
-                *traversal = subtraversal;
-            }
-        }
+        true
     }
 
     /// Decomposes an image display item that is repeated into an image per individual repetition.
     /// We need to do this when we are unable to perform the repetition in the shader,
     /// for example if the image is tiled.
     ///
     /// In all of the "decompose" methods below, we independently handle horizontal and vertical
     /// decomposition. This lets us generate the minimum amount of primitives by, for  example,
     /// decompositing the repetition horizontally while repeating vertically in the shader (for
     /// an image where the width is too bug but the height is not).
     ///
     /// decompose_image and decompose_image_row handle image repetitions while decompose_tiled_image
     /// takes care of the decomposition required by the internal tiling of the image.
     fn decompose_image(
         &mut self,
         clip_and_scroll: ClipAndScrollInfo,
-        builder: &mut FrameBuilder,
         prim_info: &LayerPrimitiveInfo,
         info: &ImageDisplayItem,
         image_size: DeviceUintSize,
         tile_size: u32,
     ) {
         let no_vertical_tiling = image_size.height <= tile_size;
         let no_vertical_spacing = info.tile_spacing.height == 0.0;
         let item_rect = prim_info.rect;
         if no_vertical_tiling && no_vertical_spacing {
             self.decompose_image_row(
                 clip_and_scroll,
-                builder,
                 prim_info,
                 info,
                 image_size,
                 tile_size,
             );
             return;
         }
 
@@ -831,41 +719,38 @@ impl Frame {
                 item_rect.size.width,
                 info.stretch_size.height,
             ).intersection(&item_rect)
             {
                 let mut prim_info = prim_info.clone();
                 prim_info.rect = row_rect;
                 self.decompose_image_row(
                     clip_and_scroll,
-                    builder,
                     &prim_info,
                     info,
                     image_size,
                     tile_size,
                 );
             }
         }
     }
 
     fn decompose_image_row(
         &mut self,
         clip_and_scroll: ClipAndScrollInfo,
-        builder: &mut FrameBuilder,
         prim_info: &LayerPrimitiveInfo,
         info: &ImageDisplayItem,
         image_size: DeviceUintSize,
         tile_size: u32,
     ) {
         let no_horizontal_tiling = image_size.width <= tile_size;
         let no_horizontal_spacing = info.tile_spacing.width == 0.0;
         if no_horizontal_tiling && no_horizontal_spacing {
             self.decompose_tiled_image(
                 clip_and_scroll,
-                builder,
                 prim_info,
                 info,
                 image_size,
                 tile_size,
             );
             return;
         }
 
@@ -880,30 +765,28 @@ impl Frame {
                 info.stretch_size.width,
                 item_rect.size.height,
             ).intersection(&item_rect)
             {
                 let mut prim_info = prim_info.clone();
                 prim_info.rect = decomposed_rect;
                 self.decompose_tiled_image(
                     clip_and_scroll,
-                    builder,
                     &prim_info,
                     info,
                     image_size,
                     tile_size,
                 );
             }
         }
     }
 
     fn decompose_tiled_image(
         &mut self,
         clip_and_scroll: ClipAndScrollInfo,
-        builder: &mut FrameBuilder,
         prim_info: &LayerPrimitiveInfo,
         info: &ImageDisplayItem,
         image_size: DeviceUintSize,
         tile_size: u32,
     ) {
         // The image resource is tiled. We have to generate an image primitive
         // for each tile.
         // We need to do this because the image is broken up into smaller tiles in the texture
@@ -968,32 +851,30 @@ impl Frame {
         // Zero means the image size is a multiple of the tile size.
         let leftover =
             DeviceUintSize::new(image_size.width % tile_size, image_size.height % tile_size);
 
         for ty in 0 .. num_tiles_y {
             for tx in 0 .. num_tiles_x {
                 self.add_tile_primitive(
                     clip_and_scroll,
-                    builder,
                     prim_info,
                     info,
                     TileOffset::new(tx, ty),
                     stretched_tile_size,
                     1.0,
                     1.0,
                     shader_repeat_x,
                     shader_repeat_y,
                 );
             }
             if leftover.width != 0 {
                 // Tiles on the right edge that are smaller than the tile size.
                 self.add_tile_primitive(
                     clip_and_scroll,
-                    builder,
                     prim_info,
                     info,
                     TileOffset::new(num_tiles_x, ty),
                     stretched_tile_size,
                     (leftover.width as f32) / tile_size_f32,
                     1.0,
                     shader_repeat_x,
                     shader_repeat_y,
@@ -1001,50 +882,47 @@ impl Frame {
             }
         }
 
         if leftover.height != 0 {
             for tx in 0 .. num_tiles_x {
                 // Tiles on the bottom edge that are smaller than the tile size.
                 self.add_tile_primitive(
                     clip_and_scroll,
-                    builder,
                     prim_info,
                     info,
                     TileOffset::new(tx, num_tiles_y),
                     stretched_tile_size,
                     1.0,
                     (leftover.height as f32) / tile_size_f32,
                     shader_repeat_x,
                     shader_repeat_y,
                 );
             }
 
             if leftover.width != 0 {
                 // Finally, the bottom-right tile with a "leftover" size.
                 self.add_tile_primitive(
                     clip_and_scroll,
-                    builder,
                     prim_info,
                     info,
                     TileOffset::new(num_tiles_x, num_tiles_y),
                     stretched_tile_size,
                     (leftover.width as f32) / tile_size_f32,
                     (leftover.height as f32) / tile_size_f32,
                     shader_repeat_x,
                     shader_repeat_y,
                 );
             }
         }
     }
 
     fn add_tile_primitive(
         &mut self,
         clip_and_scroll: ClipAndScrollInfo,
-        builder: &mut FrameBuilder,
         prim_info: &LayerPrimitiveInfo,
         info: &ImageDisplayItem,
         tile_offset: TileOffset,
         stretched_tile_size: LayerSize,
         tile_ratio_width: f32,
         tile_ratio_height: f32,
         shader_repeat_x: bool,
         shader_repeat_y: bool,
@@ -1081,117 +959,200 @@ impl Frame {
             assert_eq!(tile_offset.y, 0);
             prim_rect.size.height = prim_info.rect.size.height;
         }
 
         // Fix up the primitive's rect if it overflows the original item rect.
         if let Some(prim_rect) = prim_rect.intersection(&prim_info.rect) {
             let mut prim_info = prim_info.clone();
             prim_info.rect = prim_rect;
-            builder.add_image(
+            self.builder.add_image(
                 clip_and_scroll,
                 &prim_info,
                 &stretched_size,
                 &info.tile_spacing,
                 None,
                 info.image_key,
                 info.image_rendering,
                 Some(tile_offset),
             );
         }
     }
+}
+
+/// Frame context contains the information required to update
+/// (e.g. scroll) a renderer frame builder (`FrameBuilder`).
+pub struct FrameContext {
+    clip_scroll_tree: ClipScrollTree,
+    pipeline_epoch_map: FastHashMap<PipelineId, Epoch>,
+    id: FrameId,
+    frame_builder_config: FrameBuilderConfig,
+}
+
+impl FrameContext {
+    pub fn new(config: FrameBuilderConfig) -> Self {
+        FrameContext {
+            pipeline_epoch_map: FastHashMap::default(),
+            clip_scroll_tree: ClipScrollTree::new(),
+            id: FrameId(0),
+            frame_builder_config: config,
+        }
+    }
+
+    pub fn reset(&mut self) -> ScrollStates {
+        self.pipeline_epoch_map.clear();
+
+        // Advance to the next frame.
+        self.id.0 += 1;
+
+        self.clip_scroll_tree.drain()
+    }
+
+    pub fn get_clip_scroll_tree(&self) -> &ClipScrollTree {
+        &self.clip_scroll_tree
+    }
+
+    pub fn get_scroll_node_state(&self) -> Vec<ScrollLayerState> {
+        self.clip_scroll_tree.get_scroll_node_state()
+    }
+
+    /// Returns true if the node actually changed position or false otherwise.
+    pub fn scroll_node(&mut self, origin: LayerPoint, id: ClipId, clamp: ScrollClamping) -> bool {
+        self.clip_scroll_tree.scroll_node(origin, id, clamp)
+    }
+
+    /// Returns true if any nodes actually changed position or false otherwise.
+    pub fn scroll(
+        &mut self,
+        scroll_location: ScrollLocation,
+        cursor: WorldPoint,
+        phase: ScrollEventPhase,
+    ) -> bool {
+        self.clip_scroll_tree.scroll(scroll_location, cursor, phase)
+    }
+
+    pub fn tick_scrolling_bounce_animations(&mut self) {
+        self.clip_scroll_tree.tick_scrolling_bounce_animations();
+    }
+
+    pub fn discard_frame_state_for_pipeline(&mut self, pipeline_id: PipelineId) {
+        self.clip_scroll_tree
+            .discard_frame_state_for_pipeline(pipeline_id);
+    }
+
+    pub fn create(
+        &mut self,
+        old_builder: Option<FrameBuilder>,
+        scene: &Scene,
+        resource_cache: &mut ResourceCache,
+        window_size: DeviceUintSize,
+        inner_rect: DeviceUintRect,
+        device_pixel_ratio: f32,
+    ) -> Option<FrameBuilder> {
+        let root_pipeline_id = match scene.root_pipeline_id {
+            Some(root_pipeline_id) => root_pipeline_id,
+            None => return old_builder,
+        };
+
+        let root_pipeline = match scene.pipelines.get(&root_pipeline_id) {
+            Some(root_pipeline) => root_pipeline,
+            None => return old_builder,
+        };
+
+        if window_size.width == 0 || window_size.height == 0 {
+            error!("ERROR: Invalid window dimensions! Please call api.set_window_size()");
+        }
+
+        let old_scrolling_states = self.reset();
+
+        self.pipeline_epoch_map
+            .insert(root_pipeline_id, root_pipeline.epoch);
+
+        let background_color = root_pipeline
+            .background_color
+            .and_then(|color| if color.a > 0.0 { Some(color) } else { None });
+
+        let frame_builder = {
+            let mut roller = FlattenContext {
+                scene,
+                builder: FrameBuilder::new(
+                    old_builder,
+                    window_size,
+                    background_color,
+                    self.frame_builder_config,
+                ),
+                clip_scroll_tree: &mut self.clip_scroll_tree,
+                font_instances: resource_cache.get_font_instances(),
+                tiled_image_map: resource_cache.get_tiled_image_map(),
+                pipeline_epochs: Vec::new(),
+                replacements: Vec::new(),
+            };
+
+            roller.builder.push_root(
+                root_pipeline_id,
+                &root_pipeline.viewport_size,
+                &root_pipeline.content_size,
+                roller.clip_scroll_tree,
+            );
+
+            roller.builder.setup_viewport_offset(
+                window_size,
+                inner_rect,
+                device_pixel_ratio,
+                roller.clip_scroll_tree,
+            );
+
+            roller.flatten_root(
+                &mut root_pipeline.display_list.iter(),
+                root_pipeline_id,
+                &root_pipeline.content_size,
+            );
+
+            self.pipeline_epoch_map.extend(roller.pipeline_epochs.drain(..));
+            roller.builder
+        };
+
+        self.clip_scroll_tree
+            .finalize_and_apply_pending_scroll_offsets(old_scrolling_states);
+        Some(frame_builder)
+    }
+
+    pub fn update_epoch(&mut self, pipeline_id: PipelineId, epoch: Epoch) {
+        self.pipeline_epoch_map.insert(pipeline_id, epoch);
+    }
+
+    fn get_renderer_frame_impl(&self, frame: Option<Frame>) -> RendererFrame {
+        let nodes_bouncing_back = self.clip_scroll_tree.collect_nodes_bouncing_back();
+        RendererFrame::new(self.pipeline_epoch_map.clone(), nodes_bouncing_back, frame)
+    }
 
     pub fn build_renderer_frame(
         &mut self,
+        frame_builder: &mut FrameBuilder,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         pipelines: &FastHashMap<PipelineId, ScenePipeline>,
         device_pixel_ratio: f32,
         pan: LayerPoint,
         output_pipelines: &FastHashSet<PipelineId>,
         texture_cache_profile: &mut TextureCacheProfileCounters,
         gpu_cache_profile: &mut GpuCacheProfileCounters,
     ) -> RendererFrame {
-        let mut frame_builder = self.frame_builder.take();
-        let frame = frame_builder.as_mut().map(|builder| {
-            builder.build(
-                resource_cache,
-                gpu_cache,
-                self.id,
-                &mut self.clip_scroll_tree,
-                pipelines,
-                device_pixel_ratio,
-                pan,
-                output_pipelines,
-                texture_cache_profile,
-                gpu_cache_profile,
-            )
-        });
-        self.frame_builder = frame_builder;
+        let frame = frame_builder.build(
+            resource_cache,
+            gpu_cache,
+            self.id,
+            &mut self.clip_scroll_tree,
+            pipelines,
+            device_pixel_ratio,
+            pan,
+            output_pipelines,
+            texture_cache_profile,
+            gpu_cache_profile,
+        );
 
-        let nodes_bouncing_back = self.clip_scroll_tree.collect_nodes_bouncing_back();
-        RendererFrame::new(self.pipeline_epoch_map.clone(), nodes_bouncing_back, frame)
-    }
-}
-
-/// Try to optimize the rendering of a solid rectangle that is clipped by a single
-/// rounded rectangle, by only masking the parts of the rectangle that intersect
-/// the rounded parts of the clip. This is pretty simple now, so has a lot of
-/// potential for further optimizations.
-fn try_to_add_rectangle_splitting_on_clip(
-    context: &mut FlattenContext,
-    info: &LayerPrimitiveInfo,
-    color: &ColorF,
-    clip_and_scroll: &ClipAndScrollInfo,
-) -> bool {
-    // If this rectangle is not opaque, splitting the rectangle up
-    // into an inner opaque region just ends up hurting batching and
-    // doing more work than necessary.
-    if color.a != 1.0 {
-        return false;
+        self.get_renderer_frame_impl(Some(frame))
     }
 
-    let inner_unclipped_rect = match &info.local_clip {
-        &LocalClip::Rect(_) => return false,
-        &LocalClip::RoundedRect(_, ref region) => {
-            if region.mode == ClipMode::ClipOut {
-                return false;
-            }
-            region.get_inner_rect_full()
-        }
-    };
-    let inner_unclipped_rect = match inner_unclipped_rect {
-        Some(rect) => rect,
-        None => return false,
-    };
-
-    // The inner rectangle is not clipped by its assigned clipping node, so we can
-    // let it be clipped by the parent of the clipping node, which may result in
-    // less masking some cases.
-    let mut clipped_rects = Vec::new();
-    subtract_rect(&info.rect, &inner_unclipped_rect, &mut clipped_rects);
-
-    let prim_info = LayerPrimitiveInfo {
-        rect: inner_unclipped_rect,
-        local_clip: LocalClip::from(*info.local_clip.clip_rect()),
-        is_backface_visible: info.is_backface_visible,
-        tag: None,
-    };
-
-    context.builder.add_solid_rectangle(
-        *clip_and_scroll,
-        &prim_info,
-        color,
-        PrimitiveFlags::None,
-    );
-
-    for clipped_rect in &clipped_rects {
-        let mut info = info.clone();
-        info.rect = *clipped_rect;
-        context.builder.add_solid_rectangle(
-            *clip_and_scroll,
-            &info,
-            color,
-            PrimitiveFlags::None,
-        );
+    pub fn get_renderer_frame(&self) -> RendererFrame {
+        self.get_renderer_frame_impl(None)
     }
-    true
 }
--- a/gfx/webrender/src/frame_builder.rs
+++ b/gfx/webrender/src/frame_builder.rs
@@ -1,14 +1,14 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{BorderDetails, BorderDisplayItem, BorderRadius, BoxShadowClipMode, BuiltDisplayList};
-use api::{ClipMode, ComplexClipRegion, ClipAndScrollInfo, ClipId, ColorF, LayoutSize};
+use api::{BorderDetails, BorderDisplayItem, BuiltDisplayList};
+use api::{ClipAndScrollInfo, ClipId, ColorF};
 use api::{DeviceIntPoint, DeviceIntRect, DeviceIntSize, DeviceUintRect, DeviceUintSize};
 use api::{ExtendMode, FilterOp, FontInstance, FontRenderMode};
 use api::{GlyphInstance, GlyphOptions, GradientStop, HitTestFlags, HitTestItem, HitTestResult};
 use api::{ImageKey, ImageRendering, ItemRange, ItemTag, LayerPoint, LayerPrimitiveInfo, LayerRect};
 use api::{LayerPixel, LayerSize, LayerToScrollTransform, LayerVector2D, LayoutVector2D, LineOrientation};
 use api::{LineStyle, LocalClip, PipelineId, RepeatMode};
 use api::{ScrollSensitivity, Shadow, TileOffset, TransformStyle};
 use api::{WorldPixel, WorldPoint, YuvColorSpace, YuvData, device_length};
@@ -18,35 +18,33 @@ use clip::{ClipRegion, ClipSource, ClipS
 use clip_scroll_node::{ClipInfo, ClipScrollNode, NodeType};
 use clip_scroll_tree::{ClipScrollTree, CoordinateSystemId};
 use euclid::{SideOffsets2D, TypedTransform3D, vec2, vec3};
 use frame::FrameId;
 use gpu_cache::GpuCache;
 use internal_types::{FastHashMap, FastHashSet, HardwareCompositeOp};
 use picture::{PicturePrimitive};
 use plane_split::{BspSplitter, Polygon, Splitter};
-use prim_store::{BrushPrimitive, TexelRect, YuvImagePrimitiveCpu};
+use prim_store::{TexelRect, YuvImagePrimitiveCpu};
 use prim_store::{GradientPrimitiveCpu, ImagePrimitiveCpu, LinePrimitive, PrimitiveKind};
 use prim_store::{PrimitiveContainer, PrimitiveIndex};
 use prim_store::{PrimitiveStore, RadialGradientPrimitiveCpu};
 use prim_store::{RectanglePrimitive, TextRunPrimitiveCpu};
 use profiler::{FrameProfileCounters, GpuCacheProfileCounters, TextureCacheProfileCounters};
 use render_task::{AlphaRenderItem, ClearMode, ClipChain, RenderTask, RenderTaskId, RenderTaskLocation};
 use render_task::RenderTaskTree;
 use resource_cache::ResourceCache;
 use scene::ScenePipeline;
 use std::{mem, usize, f32, i32};
 use tiling::{ClipScrollGroup, ClipScrollGroupIndex, CompositeOps, Frame};
 use tiling::{ContextIsolation, RenderTargetKind, StackingContextIndex};
 use tiling::{PackedLayer, PackedLayerIndex, PrimitiveFlags, PrimitiveRunCmd, RenderPass};
 use tiling::{RenderTargetContext, ScrollbarPrimitive, StackingContext};
 use util::{self, pack_as_float, RectHelpers, recycle_vec};
-
-// The blur shader samples BLUR_SAMPLE_SCALE * blur_radius surrounding texels.
-const BLUR_SAMPLE_SCALE: f32 = 3.0;
+use box_shadow::BLUR_SAMPLE_SCALE;
 
 /// Construct a polygon from stacking context boundaries.
 /// `anchor` here is an index that's going to be preserved in all the
 /// splits of the polygon.
 fn make_polygon(
     stacking_context: &StackingContext,
     node: &ClipScrollNode,
     anchor: usize,
@@ -99,24 +97,25 @@ impl HitTestingItem {
             clip: info.local_clip,
             tag: tag,
         }
     }
 }
 
 pub struct HitTestingRun(Vec<HitTestingItem>, ClipAndScrollInfo);
 
+/// A builder structure for `RendererFrame`
 pub struct FrameBuilder {
     screen_size: DeviceUintSize,
     background_color: Option<ColorF>,
     prim_store: PrimitiveStore,
     pub clip_store: ClipStore,
     cmds: Vec<PrimitiveRunCmd>,
     hit_testing_runs: Vec<HitTestingRun>,
-    config: FrameBuilderConfig,
+    pub config: FrameBuilderConfig,
 
     stacking_context_store: Vec<StackingContext>,
     clip_scroll_group_store: Vec<ClipScrollGroup>,
     // Note: value here is meant to be `ClipScrollGroupIndex`,
     // but we already have `ClipAndScrollInfo` in the key
     clip_scroll_group_indices: FastHashMap<ClipAndScrollInfo, usize>,
     packed_layers: Vec<PackedLayer>,
 
@@ -173,21 +172,21 @@ impl<'a> PrimitiveContext<'a> {
             clip_id,
             display_list,
         }
     }
 }
 
 impl FrameBuilder {
     pub fn new(
-        previous: Option<FrameBuilder>,
+        previous: Option<Self>,
         screen_size: DeviceUintSize,
         background_color: Option<ColorF>,
         config: FrameBuilderConfig,
-    ) -> FrameBuilder {
+    ) -> Self {
         match previous {
             Some(prev) => FrameBuilder {
                 stacking_context_store: recycle_vec(prev.stacking_context_store),
                 clip_scroll_group_store: recycle_vec(prev.clip_scroll_group_store),
                 clip_scroll_group_indices: FastHashMap::default(),
                 cmds: recycle_vec(prev.cmds),
                 hit_testing_runs: recycle_vec(prev.hit_testing_runs),
                 packed_layers: recycle_vec(prev.packed_layers),
@@ -223,17 +222,17 @@ impl FrameBuilder {
                 has_root_stacking_context: false,
             },
         }
     }
 
     /// Create a primitive and add it to the prim store. This method doesn't
     /// add the primitive to the draw list, so can be used for creating
     /// sub-primitives.
-    fn create_primitive(
+    pub fn create_primitive(
         &mut self,
         clip_and_scroll: ClipAndScrollInfo,
         info: &LayerPrimitiveInfo,
         mut clip_sources: Vec<ClipSource>,
         container: PrimitiveContainer,
     ) -> PrimitiveIndex {
         if !self.clip_scroll_group_indices.contains_key(&clip_and_scroll) {
             let group_id = self.create_clip_scroll_group(&clip_and_scroll);
@@ -584,20 +583,18 @@ impl FrameBuilder {
         let mut shadows = mem::replace(&mut self.shadow_prim_stack, Vec::new());
         for (prim_index, pending_primitives) in shadows.drain(..) {
             {
                 // By now, the local rect of the text shadow has been calculated. It
                 // is calculated as the items in the shadow are added. It's now
                 // safe to offset the local rect by the offset of the shadow, which
                 // is then used when blitting the shadow to the final location.
                 let metadata = &mut self.prim_store.cpu_metadata[prim_index.0];
-                let prim = &self.prim_store.cpu_pictures[metadata.cpu_prim_index.0];
-                let shadow = prim.as_text_shadow();
-
-                metadata.local_rect = metadata.local_rect.translate(&shadow.offset);
+                let prim = &mut self.prim_store.cpu_pictures[metadata.cpu_prim_index.0];
+                metadata.local_rect = prim.build();
             }
 
             // Push any fast-path shadows now
             for (prim_index, clip_and_scroll) in pending_primitives {
                 self.add_primitive_to_draw_list(prim_index, clip_and_scroll);
             }
         }
 
@@ -645,36 +642,23 @@ impl FrameBuilder {
             }
         }
     }
 
     pub fn add_line(
         &mut self,
         clip_and_scroll: ClipAndScrollInfo,
         info: &LayerPrimitiveInfo,
-        baseline: f32,
-        start: f32,
-        end: f32,
+        wavy_line_thickness: f32,
         orientation: LineOrientation,
-        width: f32,
         color: &ColorF,
         style: LineStyle,
     ) {
-        let new_rect = match orientation {
-            LineOrientation::Horizontal => LayerRect::new(
-                LayerPoint::new(start, baseline),
-                LayerSize::new(end - start, width),
-            ),
-            LineOrientation::Vertical => LayerRect::new(
-                LayerPoint::new(baseline, start),
-                LayerSize::new(width, end - start),
-            ),
-        };
-
         let line = LinePrimitive {
+            wavy_line_thickness,
             color: *color,
             style: style,
             orientation: orientation,
         };
 
         let mut fast_shadow_prims = Vec::new();
         for (idx, &(shadow_prim_index, _)) in self.shadow_prim_stack.iter().enumerate() {
             let shadow_metadata = &self.prim_store.cpu_metadata[shadow_prim_index.0];
@@ -684,59 +668,56 @@ impl FrameBuilder {
                 fast_shadow_prims.push((idx, shadow.clone()));
             }
         }
 
         for (idx, shadow) in fast_shadow_prims {
             let mut line = line.clone();
             line.color = shadow.color;
             let mut info = info.clone();
-            info.rect = new_rect.translate(&shadow.offset);
+            info.rect = info.rect.translate(&shadow.offset);
             let prim_index = self.create_primitive(
                 clip_and_scroll,
                 &info,
                 Vec::new(),
                 PrimitiveContainer::Line(line),
             );
             self.shadow_prim_stack[idx].1.push((prim_index, clip_and_scroll));
         }
 
-        let mut info = info.clone();
-        info.rect = new_rect;
         let prim_index = self.create_primitive(
             clip_and_scroll,
             &info,
             Vec::new(),
             PrimitiveContainer::Line(line),
         );
 
         if color.a > 0.0 {
             if self.shadow_prim_stack.is_empty() {
                 self.add_primitive_to_hit_testing_list(&info, clip_and_scroll);
                 self.add_primitive_to_draw_list(prim_index, clip_and_scroll);
             } else {
-                self.pending_shadow_contents.push((prim_index, clip_and_scroll, info));
+                self.pending_shadow_contents.push((prim_index, clip_and_scroll, *info));
             }
         }
 
         for &(shadow_prim_index, _) in &self.shadow_prim_stack {
             let shadow_metadata = &mut self.prim_store.cpu_metadata[shadow_prim_index.0];
             debug_assert_eq!(shadow_metadata.prim_kind, PrimitiveKind::Picture);
             let picture =
                 &mut self.prim_store.cpu_pictures[shadow_metadata.cpu_prim_index.0];
             let blur_radius = picture.as_text_shadow().blur_radius;
 
             // Only run real blurs here (fast path zero blurs are handled above).
             if blur_radius > 0.0 {
-                let shadow_rect = new_rect.inflate(
-                    blur_radius,
-                    blur_radius,
+                picture.add_primitive(
+                    prim_index,
+                    &info.rect,
+                    clip_and_scroll,
                 );
-                shadow_metadata.local_rect = shadow_metadata.local_rect.union(&shadow_rect);
-                picture.add_primitive(prim_index, clip_and_scroll);
             }
         }
     }
 
     pub fn add_border(
         &mut self,
         clip_and_scroll: ClipAndScrollInfo,
         info: &LayerPrimitiveInfo,
@@ -1238,250 +1219,21 @@ impl FrameBuilder {
             let shadow_metadata = &mut self.prim_store.cpu_metadata[shadow_prim_index.0];
             debug_assert_eq!(shadow_metadata.prim_kind, PrimitiveKind::Picture);
             let picture_prim =
                 &mut self.prim_store.cpu_pictures[shadow_metadata.cpu_prim_index.0];
 
             // Only run real blurs here (fast path zero blurs are handled above).
             let blur_radius = picture_prim.as_text_shadow().blur_radius;
             if blur_radius > 0.0 {
-                let shadow_rect = rect.inflate(
-                    blur_radius,
-                    blur_radius,
+                picture_prim.add_primitive(
+                    prim_index,
+                    &rect,
+                    clip_and_scroll,
                 );
-                shadow_metadata.local_rect = shadow_metadata.local_rect.union(&shadow_rect);
-                picture_prim.add_primitive(prim_index, clip_and_scroll);
-            }
-        }
-    }
-
-    pub fn add_box_shadow(
-        &mut self,
-        clip_and_scroll: ClipAndScrollInfo,
-        prim_info: &LayerPrimitiveInfo,
-        box_offset: &LayerVector2D,
-        color: &ColorF,
-        blur_radius: f32,
-        spread_radius: f32,
-        border_radius: BorderRadius,
-        clip_mode: BoxShadowClipMode,
-    ) {
-        if color.a == 0.0 {
-            return;
-        }
-
-        let spread_amount = match clip_mode {
-            BoxShadowClipMode::Outset => {
-                spread_radius
-            }
-            BoxShadowClipMode::Inset => {
-                -spread_radius
-            }
-        };
-
-        let shadow_radius = adjust_border_radius_for_box_shadow(
-            border_radius,
-            spread_amount,
-            spread_radius
-        );
-        let shadow_rect = prim_info.rect
-                                   .translate(box_offset)
-                                   .inflate(spread_amount, spread_amount);
-
-        if blur_radius == 0.0 {
-            let mut clips = Vec::new();
-
-            let fast_info = match clip_mode {
-                BoxShadowClipMode::Outset => {
-                    // TODO(gw): Add a fast path for ClipOut + zero border radius!
-                    clips.push(ClipSource::RoundedRectangle(
-                        prim_info.rect,
-                        border_radius,
-                        ClipMode::ClipOut
-                    ));
-
-                    LayerPrimitiveInfo::with_clip(
-                        shadow_rect,
-                        LocalClip::RoundedRect(
-                            shadow_rect,
-                            ComplexClipRegion::new(
-                                shadow_rect,
-                                shadow_radius,
-                                ClipMode::Clip,
-                            ),
-                        ),
-                    )
-                }
-                BoxShadowClipMode::Inset => {
-                    clips.push(ClipSource::RoundedRectangle(
-                        shadow_rect,
-                        shadow_radius,
-                        ClipMode::ClipOut
-                    ));
-
-                    LayerPrimitiveInfo::with_clip(
-                        prim_info.rect,
-                        LocalClip::RoundedRect(
-                            prim_info.rect,
-                            ComplexClipRegion::new(
-                                prim_info.rect,
-                                border_radius,
-                                ClipMode::Clip
-                            ),
-                        ),
-                    )
-                }
-            };
-
-            self.add_primitive(
-                clip_and_scroll,
-                &fast_info,
-                clips,
-                PrimitiveContainer::Rectangle(RectanglePrimitive {
-                    color: *color,
-                }),
-            );
-        } else {
-            let blur_offset = 2.0 * blur_radius;
-            let mut extra_clips = vec![];
-            let mut blur_regions = vec![];
-
-            match clip_mode {
-                BoxShadowClipMode::Outset => {
-                    let brush_prim = BrushPrimitive {
-                        clip_mode: ClipMode::Clip,
-                        radius: shadow_radius,
-                    };
-
-                    let brush_rect = LayerRect::new(LayerPoint::new(blur_offset, blur_offset),
-                                                    shadow_rect.size);
-
-                    let brush_info = LayerPrimitiveInfo::new(brush_rect);
-
-                    let brush_prim_index = self.create_primitive(
-                        clip_and_scroll,
-                        &brush_info,
-                        Vec::new(),
-                        PrimitiveContainer::Brush(brush_prim),
-                    );
-
-                    let pic_rect = shadow_rect.inflate(blur_offset, blur_offset);
-                    let blur_range = BLUR_SAMPLE_SCALE * blur_radius;
-
-                    let size = pic_rect.size;
-
-                    let tl = LayerSize::new(
-                        blur_radius.max(border_radius.top_left.width),
-                        blur_radius.max(border_radius.top_left.height)
-                    ) * BLUR_SAMPLE_SCALE;
-                    let tr = LayerSize::new(
-                        blur_radius.max(border_radius.top_right.width),
-                        blur_radius.max(border_radius.top_right.height)
-                    ) * BLUR_SAMPLE_SCALE;
-                    let br = LayerSize::new(
-                        blur_radius.max(border_radius.bottom_right.width),
-                        blur_radius.max(border_radius.bottom_right.height)
-                    ) * BLUR_SAMPLE_SCALE;
-                    let bl = LayerSize::new(
-                        blur_radius.max(border_radius.bottom_left.width),
-                        blur_radius.max(border_radius.bottom_left.height)
-                    ) * BLUR_SAMPLE_SCALE;
-
-                    let max_width = tl.width.max(tr.width.max(bl.width.max(br.width)));
-                    let max_height = tl.height.max(tr.height.max(bl.height.max(br.height)));
-
-                    // Apply a conservative test that if any of the blur regions below
-                    // will overlap, we won't bother applying the region optimization
-                    // and will just blur the entire thing. This should only happen
-                    // in rare cases, where either the blur radius or border radius
-                    // is very large, in which case there's no real point in trying
-                    // to only blur a small region anyway.
-                    if max_width < 0.5 * size.width && max_height < 0.5 * size.height {
-                        blur_regions.push(LayerRect::from_floats(0.0, 0.0, tl.width, tl.height));
-                        blur_regions.push(LayerRect::from_floats(size.width - tr.width, 0.0, size.width, tr.height));
-                        blur_regions.push(LayerRect::from_floats(size.width - br.width, size.height - br.height, size.width, size.height));
-                        blur_regions.push(LayerRect::from_floats(0.0, size.height - bl.height, bl.width, size.height));
-
-                        blur_regions.push(LayerRect::from_floats(0.0, tl.height, blur_range, size.height - bl.height));
-                        blur_regions.push(LayerRect::from_floats(size.width - blur_range, tr.height, size.width, size.height - br.height));
-                        blur_regions.push(LayerRect::from_floats(tl.width, 0.0, size.width - tr.width, blur_range));
-                        blur_regions.push(LayerRect::from_floats(bl.width, size.height - blur_range, size.width - br.width, size.height));
-                    }
-
-                    let mut pic_prim = PicturePrimitive::new_box_shadow(
-                        blur_radius,
-                        *color,
-                        blur_regions,
-                        BoxShadowClipMode::Outset,
-                    );
-
-                    pic_prim.add_primitive(brush_prim_index, clip_and_scroll);
-
-                    extra_clips.push(ClipSource::RoundedRectangle(
-                        prim_info.rect,
-                        border_radius,
-                        ClipMode::ClipOut,
-                    ));
-
-                    let pic_info = LayerPrimitiveInfo::new(pic_rect);
-
-                    self.add_primitive(
-                        clip_and_scroll,
-                        &pic_info,
-                        extra_clips,
-                        PrimitiveContainer::Picture(pic_prim),
-                    );
-                }
-                BoxShadowClipMode::Inset => {
-                    let brush_prim = BrushPrimitive {
-                        clip_mode: ClipMode::ClipOut,
-                        radius: shadow_radius,
-                    };
-
-                    let mut brush_rect = shadow_rect;
-                    brush_rect.origin.x = brush_rect.origin.x - prim_info.rect.origin.x + blur_offset;
-                    brush_rect.origin.y = brush_rect.origin.y - prim_info.rect.origin.y + blur_offset;
-
-                    let brush_info = LayerPrimitiveInfo::new(brush_rect);
-
-                    let brush_prim_index = self.create_primitive(
-                        clip_and_scroll,
-                        &brush_info,
-                        Vec::new(),
-                        PrimitiveContainer::Brush(brush_prim),
-                    );
-
-                    let pic_rect = prim_info.rect.inflate(blur_offset, blur_offset);
-
-                    // TODO(gw): Apply minimal blur regions for inset box shadows.
-
-                    let mut pic_prim = PicturePrimitive::new_box_shadow(
-                        blur_radius,
-                        *color,
-                        blur_regions,
-                        BoxShadowClipMode::Inset,
-                    );
-
-                    pic_prim.add_primitive(brush_prim_index, clip_and_scroll);
-
-                    extra_clips.push(ClipSource::RoundedRectangle(
-                        prim_info.rect,
-                        border_radius,
-                        ClipMode::Clip,
-                    ));
-
-                    let pic_info = LayerPrimitiveInfo::with_clip_rect(pic_rect, prim_info.rect);
-
-                    self.add_primitive(
-                        clip_and_scroll,
-                        &pic_info,
-                        extra_clips,
-                        PrimitiveContainer::Picture(pic_prim),
-                    );
-                }
             }
         }
     }
 
     pub fn add_image(
         &mut self,
         clip_and_scroll: ClipAndScrollInfo,
         info: &LayerPrimitiveInfo,
@@ -2101,34 +1853,37 @@ impl FrameBuilder {
 
                     for filter in &stacking_context.composite_ops.filters {
                         let mut prev_task = alpha_task_stack.pop().unwrap();
                         let screen_origin = current_task.as_alpha_batch().screen_origin;
                         let current_task_id = render_tasks.add(current_task);
                         match *filter {
                             FilterOp::Blur(blur_radius) => {
                                 let blur_radius = device_length(blur_radius, device_pixel_ratio);
+                                let blur_std_deviation = blur_radius.0 as f32;
+                                let inflate_size = blur_std_deviation * BLUR_SAMPLE_SCALE;
                                 render_tasks.get_mut(current_task_id)
-                                            .inflate(blur_radius.0);
+                                            .inflate(inflate_size as i32);
                                 let blur_render_task = RenderTask::new_blur(
-                                    blur_radius,
+                                    blur_std_deviation,
                                     current_task_id,
                                     render_tasks,
                                     RenderTargetKind::Color,
                                     &[],
                                     ClearMode::Transparent,
+                                    ColorF::new(0.0, 0.0, 0.0, 0.0),
                                 );
                                 let blur_render_task_id = render_tasks.add(blur_render_task);
                                 let item = AlphaRenderItem::HardwareComposite(
                                     stacking_context_index,
                                     blur_render_task_id,
                                     HardwareCompositeOp::PremultipliedAlpha,
                                     DeviceIntPoint::new(
-                                        screen_origin.x - blur_radius.0,
-                                        screen_origin.y - blur_radius.0,
+                                        screen_origin.x - inflate_size as i32,
+                                        screen_origin.y - inflate_size as i32,
                                     ),
                                     next_z,
                                 );
                                 prev_task.as_alpha_batch_mut().items.push(item);
                                 prev_task.children.push(blur_render_task_id);
                                 current_task = prev_task;
                             }
                             _ => {
@@ -2391,72 +2146,8 @@ impl FrameBuilder {
             passes,
             layer_texture_data: self.packed_layers.clone(),
             render_tasks,
             deferred_resolves,
             gpu_cache_updates: Some(gpu_cache_updates),
         }
     }
 }
-
-fn adjust_border_radius_for_box_shadow(
-    radius: BorderRadius,
-    spread_amount: f32,
-    spread_radius: f32,
-) -> BorderRadius {
-    BorderRadius {
-        top_left: adjust_corner_for_box_shadow(
-            radius.top_left,
-            spread_radius,
-            spread_amount,
-        ),
-        top_right: adjust_corner_for_box_shadow(
-            radius.top_right,
-            spread_radius,
-            spread_amount,
-        ),
-        bottom_right: adjust_corner_for_box_shadow(
-            radius.bottom_right,
-            spread_radius,
-            spread_amount,
-        ),
-        bottom_left: adjust_corner_for_box_shadow(
-            radius.bottom_left,
-            spread_radius,
-            spread_amount,
-        ),
-    }
-}
-
-fn adjust_corner_for_box_shadow(
-    corner: LayoutSize,
-    spread_amount: f32,
-    spread_radius: f32,
-) -> LayoutSize {
-    LayoutSize::new(
-        adjust_radius_for_box_shadow(
-            corner.width,
-            spread_radius,
-            spread_amount
-        ),
-        adjust_radius_for_box_shadow(
-            corner.height,
-            spread_radius,
-            spread_amount
-        ),
-    )
-}
-
-fn adjust_radius_for_box_shadow(
-    border_radius: f32,
-    spread_amount: f32,
-    spread_radius: f32,
-) -> f32 {
-    // Adjust the shadow box radius as per:
-    // https://drafts.csswg.org/css-backgrounds-3/#shadow-shape
-    let sharpness_scale = if border_radius < spread_radius {
-        let r = border_radius / spread_amount;
-        1.0 + (r - 1.0) * (r - 1.0) * (r - 1.0)
-    } else {
-        1.0
-    };
-    (border_radius + spread_amount * sharpness_scale).max(0.0)
-}
--- a/gfx/webrender/src/glyph_cache.rs
+++ b/gfx/webrender/src/glyph_cache.rs
@@ -1,24 +1,26 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{DevicePoint, DeviceUintSize, FontInstance, GlyphKey};
+use glyph_rasterizer::GlyphFormat;
 use internal_types::FastHashMap;
 use resource_cache::ResourceClassCache;
 use std::sync::Arc;
 use texture_cache::TextureCacheHandle;
 
 pub struct CachedGlyphInfo {
     pub texture_cache_handle: TextureCacheHandle,
     pub glyph_bytes: Arc<Vec<u8>>,
     pub size: DeviceUintSize,
     pub offset: DevicePoint,
     pub scale: f32,
+    pub format: GlyphFormat,
 }
 
 pub type GlyphKeyCache = ResourceClassCache<GlyphKey, Option<CachedGlyphInfo>>;
 
 pub struct GlyphCache {
     pub glyph_key_caches: FastHashMap<FontInstance, GlyphKeyCache>,
 }
 
--- a/gfx/webrender/src/glyph_rasterizer.rs
+++ b/gfx/webrender/src/glyph_rasterizer.rs
@@ -1,34 +1,62 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #[cfg(test)]
-use api::{ColorF, IdNamespace, LayoutPoint};
-use api::{DevicePoint, DeviceUintSize, FontInstance};
-use api::{FontKey, FontTemplate, FontRenderMode, ColorU};
-use api::{GlyphDimensions, GlyphKey, SubpixelDirection};
+use api::{ColorF, IdNamespace, LayoutPoint, SubpixelDirection};
+use api::{DevicePoint, DeviceUintSize, FontInstance, FontRenderMode};
+use api::{FontKey, FontTemplate, GlyphDimensions, GlyphKey};
 use api::{ImageData, ImageDescriptor, ImageFormat};
 #[cfg(test)]
 use app_units::Au;
 use device::TextureFilter;
 use glyph_cache::{CachedGlyphInfo, GlyphCache};
 use gpu_cache::GpuCache;
 use internal_types::FastHashSet;
-use platform::font::{FontContext, RasterizedGlyph};
+use platform::font::FontContext;
 use profiler::TextureCacheProfileCounters;
 use rayon::ThreadPool;
 use rayon::prelude::*;
 use std::collections::hash_map::Entry;
 use std::mem;
 use std::sync::{Arc, Mutex, MutexGuard};
 use std::sync::mpsc::{channel, Receiver, Sender};
 use texture_cache::{TextureCache, TextureCacheHandle};
 
+#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
+pub enum GlyphFormat {
+    Mono,
+    Alpha,
+    Subpixel,
+    ColorBitmap,
+}
+
+impl From<FontRenderMode> for GlyphFormat {
+    fn from(render_mode: FontRenderMode) -> GlyphFormat {
+        match render_mode {
+            FontRenderMode::Mono => GlyphFormat::Mono,
+            FontRenderMode::Alpha => GlyphFormat::Alpha,
+            FontRenderMode::Subpixel => GlyphFormat::Subpixel,
+            FontRenderMode::Bitmap => GlyphFormat::ColorBitmap,
+        }
+    }
+}
+
+pub struct RasterizedGlyph {
+    pub top: f32,
+    pub left: f32,
+    pub width: u32,
+    pub height: u32,
+    pub scale: f32,
+    pub format: GlyphFormat,
+    pub bytes: Vec<u8>,
+}
+
 pub struct FontContexts {
     // These worker are mostly accessed from their corresponding worker threads.
     // The goal is that there should be no noticeable contention on the muteces.
     worker_contexts: Vec<Mutex<FontContext>>,
 
     // This worker should be accessed by threads that don't belong to thre thread pool
     // (in theory that's only the render backend thread so no contention expected either).
     shared_context: Mutex<FontContext>,
@@ -140,36 +168,17 @@ impl GlyphRasterizer {
         }
     }
 
     pub fn delete_font(&mut self, font_key: FontKey) {
         self.fonts_to_remove.push(font_key);
     }
 
     pub fn prepare_font(&self, font: &mut FontInstance) {
-        // In alpha/mono mode, the color of the font is irrelevant.
-        // Forcing it to black in those cases saves rasterizing glyphs
-        // of different colors when not needed.
-        match font.render_mode {
-            FontRenderMode::Mono | FontRenderMode::Bitmap => {
-                font.color = ColorU::new(255, 255, 255, 255);
-                // Subpixel positioning is disabled in mono and bitmap modes.
-                font.subpx_dir = SubpixelDirection::None;
-            }
-            FontRenderMode::Alpha => {
-                font.color = ColorU::new(255, 255, 255, 255);
-            }
-            FontRenderMode::Subpixel => {
-                // In subpixel mode, we only actually need the color if preblending
-                // is used in the font backend.
-                if !FontContext::has_gamma_correct_subpixel_aa() {
-                    font.color = ColorU::new(255, 255, 255, 255);
-                }
-            }
-        }
+        FontContext::prepare_font(font);
     }
 
     pub fn request_glyphs(
         &mut self,
         glyph_cache: &mut GlyphCache,
         font: FontInstance,
         glyph_keys: &[GlyphKey],
         texture_cache: &mut TextureCache,
@@ -340,16 +349,17 @@ impl GlyphRasterizer {
                         gpu_cache,
                     );
                     Some(CachedGlyphInfo {
                         texture_cache_handle,
                         glyph_bytes,
                         size: DeviceUintSize::new(glyph.width, glyph.height),
                         offset: DevicePoint::new(glyph.left, glyph.top),
                         scale: glyph.scale,
+                        format: glyph.format,
                     })
                 } else {
                     None
                 });
 
             let glyph_key_cache = glyph_cache.get_glyph_key_cache_for_font_mut(job.request.font);
 
             glyph_key_cache.insert(job.request.key, Ok(glyph_info));
--- a/gfx/webrender/src/gpu_types.rs
+++ b/gfx/webrender/src/gpu_types.rs
@@ -135,42 +135,48 @@ impl From<CompositePrimitiveInstance> fo
                 instance.data1,
                 0,
                 0,
             ],
         }
     }
 }
 
+// Whether this brush is being drawn on a Picture
+// task (new) or an alpha batch task (legacy).
+// Can be removed once everything uses pictures.
+pub const BRUSH_FLAG_USES_PICTURE: i32 = (1 << 0);
+
+// TODO(gw): While we are comverting things over, we
+//           need to have the instance be the same
+//           size as an old PrimitiveInstance. In the
+//           future, we can compress this vertex
+//           format a lot - e.g. z, render task
+//           addresses etc can reasonably become
+//           a u16 type.
 #[repr(C)]
 pub struct BrushInstance {
-    picture_address: RenderTaskAddress,
-    prim_address: GpuCacheAddress,
-}
-
-impl BrushInstance {
-    pub fn new(
-        picture_address: RenderTaskAddress,
-        prim_address: GpuCacheAddress
-    ) -> BrushInstance {
-        BrushInstance {
-            picture_address,
-            prim_address,
-        }
-    }
+    pub picture_address: RenderTaskAddress,
+    pub prim_address: GpuCacheAddress,
+    pub layer_address: PackedLayerAddress,
+    pub clip_task_address: RenderTaskAddress,
+    pub z: i32,
+    pub flags: i32,
+    pub user_data0: i32,
+    pub user_data1: i32,
 }
 
 impl From<BrushInstance> for PrimitiveInstance {
     fn from(instance: BrushInstance) -> PrimitiveInstance {
         PrimitiveInstance {
             data: [
                 instance.picture_address.0 as i32,
                 instance.prim_address.as_int(),
-                0,
-                0,
-                0,
-                0,
-                0,
-                0,
+                instance.layer_address.0,
+                instance.clip_task_address.0 as i32,
+                instance.z,
+                instance.flags,
+                instance.user_data0,
+                instance.user_data1,
             ]
         }
     }
 }
--- a/gfx/webrender/src/internal_types.rs
+++ b/gfx/webrender/src/internal_types.rs
@@ -152,17 +152,17 @@ pub struct RendererFrame {
     pub frame: Option<tiling::Frame>,
 }
 
 impl RendererFrame {
     pub fn new(
         pipeline_epoch_map: FastHashMap<PipelineId, Epoch>,
         layers_bouncing_back: FastHashSet<ClipId>,
         frame: Option<tiling::Frame>,
-    ) -> RendererFrame {
+    ) -> Self {
         RendererFrame {
             pipeline_epoch_map,
             layers_bouncing_back,
             frame,
         }
     }
 }
 
--- a/gfx/webrender/src/lib.rs
+++ b/gfx/webrender/src/lib.rs
@@ -45,16 +45,17 @@ extern crate bitflags;
 #[macro_use]
 extern crate lazy_static;
 #[macro_use]
 extern crate log;
 #[macro_use]
 extern crate thread_profiler;
 
 mod border;
+mod box_shadow;
 mod clip;
 mod clip_scroll_node;
 mod clip_scroll_tree;
 mod debug_colors;
 mod debug_font_data;
 mod debug_render;
 #[cfg(feature = "debugger")]
 mod debug_server;
--- a/gfx/webrender/src/picture.rs
+++ b/gfx/webrender/src/picture.rs
@@ -1,17 +1,18 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{ColorF, ClipAndScrollInfo, device_length, DeviceIntSize};
 use api::{BoxShadowClipMode, LayerRect, Shadow};
+use box_shadow::BLUR_SAMPLE_SCALE;
 use frame_builder::PrimitiveContext;
 use gpu_cache::GpuDataRequest;
-use prim_store::{PrimitiveIndex, PrimitiveMetadata};
+use prim_store::PrimitiveIndex;
 use render_task::{ClearMode, RenderTask, RenderTaskId, RenderTaskTree};
 use tiling::RenderTargetKind;
 
 /*
  A picture represents a dynamically rendered image. It consists of:
 
  * A number of primitives that are drawn onto the picture.
  * A composite operation describing how to composite this
@@ -40,148 +41,189 @@ pub enum PictureKind {
     },
 }
 
 #[derive(Debug)]
 pub struct PicturePrimitive {
     pub prim_runs: Vec<PrimitiveRun>,
     pub render_task_id: Option<RenderTaskId>,
     pub kind: PictureKind,
+    pub content_rect: LayerRect,
 
     // TODO(gw): Add a mode that specifies if this
     //           picture should be rasterized in
     //           screen-space or local-space.
 }
 
 impl PicturePrimitive {
     pub fn new_text_shadow(shadow: Shadow) -> PicturePrimitive {
         PicturePrimitive {
             prim_runs: Vec::new(),
             render_task_id: None,
+            content_rect: LayerRect::zero(),
             kind: PictureKind::TextShadow {
                 shadow,
             },
         }
     }
 
     pub fn new_box_shadow(
         blur_radius: f32,
         color: ColorF,
         blur_regions: Vec<LayerRect>,
         clip_mode: BoxShadowClipMode,
     ) -> PicturePrimitive {
         PicturePrimitive {
             prim_runs: Vec::new(),
             render_task_id: None,
+            content_rect: LayerRect::zero(),
             kind: PictureKind::BoxShadow {
                 blur_radius,
-                color,
+                color: color.premultiplied(),
                 blur_regions,
                 clip_mode,
             },
         }
     }
 
     pub fn as_text_shadow(&self) -> &Shadow {
         match self.kind {
             PictureKind::TextShadow { ref shadow } => shadow,
             PictureKind::BoxShadow { .. } => panic!("bug: not a text shadow")
         }
     }
 
     pub fn add_primitive(
         &mut self,
         prim_index: PrimitiveIndex,
+        local_rect: &LayerRect,
         clip_and_scroll: ClipAndScrollInfo
     ) {
+        // TODO(gw): Accumulating the primitive local rect
+        //           into the content rect here is fine, for now.
+        //           The only way pictures are currently used,
+        //           all the items added to a picture are known
+        //           to be in the same local space. Once we start
+        //           using pictures for other uses, we will need
+        //           to consider the space of a primitive in order
+        //           to build a correct contect rect!
+        self.content_rect = self.content_rect.union(local_rect);
+
         if let Some(ref mut run) = self.prim_runs.last_mut() {
             if run.clip_and_scroll == clip_and_scroll &&
                run.prim_index.0 + run.count == prim_index.0 {
                 run.count += 1;
                 return;
             }
         }
 
         self.prim_runs.push(PrimitiveRun {
             prim_index,
             count: 1,
             clip_and_scroll,
         });
     }
 
+    pub fn build(&mut self) -> LayerRect {
+        match self.kind {
+            PictureKind::TextShadow { ref shadow } => {
+                let blur_offset = shadow.blur_radius * BLUR_SAMPLE_SCALE;
+
+                self.content_rect = self.content_rect.inflate(
+                    blur_offset,
+                    blur_offset,
+                );
+
+                self.content_rect.translate(&shadow.offset)
+            }
+            PictureKind::BoxShadow { blur_radius, .. } => {
+                // TODO(gw): The 2.0 here should actually be BLUR_SAMPLE_SCALE.
+                //           I'm leaving it as is for now, to avoid having to
+                //           change the code in box_shadow.rs. As I work on
+                //           the box shadow optimizations, I'll fix this up.
+                let blur_offset = blur_radius * 2.0;
+
+                self.content_rect = self.content_rect.inflate(
+                    blur_offset,
+                    blur_offset,
+                );
+
+                self.content_rect
+            }
+        }
+    }
+
     pub fn prepare_for_render(
         &mut self,
         prim_index: PrimitiveIndex,
-        prim_metadata: &PrimitiveMetadata,
         prim_context: &PrimitiveContext,
         render_tasks: &mut RenderTaskTree,
     ) {
         // This is a shadow element. Create a render task that will
         // render the text run to a target, and then apply a gaussian
         // blur to that text run in order to build the actual primitive
         // which will be blitted to the framebuffer.
         let cache_width =
-            (prim_metadata.local_rect.size.width * prim_context.device_pixel_ratio).ceil() as i32;
+            (self.content_rect.size.width * prim_context.device_pixel_ratio).ceil() as i32;
         let cache_height =
-            (prim_metadata.local_rect.size.height * prim_context.device_pixel_ratio).ceil() as i32;
+            (self.content_rect.size.height * prim_context.device_pixel_ratio).ceil() as i32;
         let cache_size = DeviceIntSize::new(cache_width, cache_height);
 
-        let (blur_radius, target_kind, blur_regions, clear_mode) = match self.kind {
+        let (blur_radius, target_kind, blur_regions, clear_mode, color) = match self.kind {
             PictureKind::TextShadow { ref shadow } => {
                 let dummy: &[LayerRect] = &[];
-                (shadow.blur_radius, RenderTargetKind::Color, dummy, ClearMode::Transparent)
+                (shadow.blur_radius,
+                 RenderTargetKind::Color,
+                 dummy,
+                 ClearMode::Transparent,
+                 shadow.color)
             }
-            PictureKind::BoxShadow { blur_radius, clip_mode, ref blur_regions, .. } => {
+            PictureKind::BoxShadow { blur_radius, clip_mode, ref blur_regions, color, .. } => {
                 let clear_mode = match clip_mode {
                     BoxShadowClipMode::Outset => ClearMode::One,
                     BoxShadowClipMode::Inset => ClearMode::Zero,
                 };
-                (blur_radius, RenderTargetKind::Alpha, blur_regions.as_slice(), clear_mode)
+                (blur_radius,
+                 RenderTargetKind::Alpha,
+                 blur_regions.as_slice(),
+                 clear_mode,
+                 color)
             }
         };
         let blur_radius = device_length(blur_radius, prim_context.device_pixel_ratio);
 
+        // Quote from https://drafts.csswg.org/css-backgrounds-3/#shadow-blur
+        // "the image that would be generated by applying to the shadow a
+        // Gaussian blur with a standard deviation equal to half the blur radius."
+        let blur_std_deviation = blur_radius.0 as f32 * 0.5;
+
         let picture_task = RenderTask::new_picture(
             cache_size,
             prim_index,
             target_kind,
+            self.content_rect.origin,
+            color,
         );
         let picture_task_id = render_tasks.add(picture_task);
         let render_task = RenderTask::new_blur(
-            blur_radius,
+            blur_std_deviation,
             picture_task_id,
             render_tasks,
             target_kind,
             blur_regions,
             clear_mode,
+            color,
         );
         self.render_task_id = Some(render_tasks.add(render_task));
     }
 
-    pub fn write_gpu_blocks(&self, mut request: GpuDataRequest) {
-        match self.kind {
-            PictureKind::TextShadow { ref shadow } => {
-                request.push(shadow.color);
-                request.push([
-                    shadow.offset.x,
-                    shadow.offset.y,
-                    shadow.blur_radius,
-                    0.0,
-                ]);
-            }
-            PictureKind::BoxShadow { blur_radius, color, .. } => {
-                request.push(color);
-                request.push([
-                    0.0,
-                    0.0,
-                    blur_radius,
-                    0.0,
-                ]);
-            }
-        }
+    pub fn write_gpu_blocks(&self, mut _request: GpuDataRequest) {
+        // TODO(gw): We'll need to write the GPU blocks
+        //           here specific to a brush primitive
+        //           once we start drawing pictures as brushes!
     }
 
     pub fn target_kind(&self) -> RenderTargetKind {
         match self.kind {
             PictureKind::TextShadow { .. } => RenderTargetKind::Color,
             PictureKind::BoxShadow { .. } => RenderTargetKind::Alpha,
         }
     }
--- a/gfx/webrender/src/platform/macos/font.rs
+++ b/gfx/webrender/src/platform/macos/font.rs
@@ -1,15 +1,15 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{ColorU, FontKey, FontRenderMode, GlyphDimensions};
 use api::{FontInstance, FontVariation, NativeFontHandle};
-use api::GlyphKey;
+use api::{GlyphKey, SubpixelDirection};
 use app_units::Au;
 use core_foundation::array::{CFArray, CFArrayRef};
 use core_foundation::base::TCFType;
 use core_foundation::dictionary::{CFDictionary, CFDictionaryRef};
 use core_foundation::number::{CFNumber, CFNumberRef};
 use core_foundation::string::{CFString, CFStringRef};
 use core_graphics::base::{kCGImageAlphaNoneSkipFirst, kCGImageAlphaPremultipliedFirst, kCGImageAlphaPremultipliedLast};
 use core_graphics::base::kCGBitmapByteOrder32Little;
@@ -17,53 +17,32 @@ use core_graphics::color_space::CGColorS
 use core_graphics::context::{CGContext, CGTextDrawingMode};
 use core_graphics::data_provider::CGDataProvider;
 use core_graphics::font::{CGFont, CGFontRef, CGGlyph};
 use core_graphics::geometry::{CGPoint, CGRect, CGSize};
 use core_text;
 use core_text::font::{CTFont, CTFontRef};
 use core_text::font_descriptor::{kCTFontDefaultOrientation, kCTFontColorGlyphsTrait};
 use gamma_lut::{Color as ColorLut, GammaLut};
+use glyph_rasterizer::{GlyphFormat, RasterizedGlyph};
 use internal_types::FastHashMap;
 use std::collections::hash_map::Entry;
 use std::ptr;
 use std::sync::Arc;
 
 pub struct FontContext {
     cg_fonts: FastHashMap<FontKey, CGFont>,
     ct_fonts: FastHashMap<(FontKey, Au, Vec<FontVariation>), CTFont>,
     gamma_lut: GammaLut,
 }
 
 // core text is safe to use on multiple threads and non-shareable resources are
 // all hidden inside their font context.
 unsafe impl Send for FontContext {}
 
-pub struct RasterizedGlyph {
-    pub top: f32,
-    pub left: f32,
-    pub width: u32,
-    pub height: u32,
-    pub scale: f32,
-    pub bytes: Vec<u8>,
-}
-
-impl RasterizedGlyph {
-    pub fn blank() -> RasterizedGlyph {
-        RasterizedGlyph {
-            top: 0.0,
-            left: 0.0,
-            width: 0,
-            height: 0,
-            scale: 1.0,
-            bytes: vec![],
-        }
-    }
-}
-
 struct GlyphMetrics {
     rasterized_left: i32,
     rasterized_descent: i32,
     rasterized_ascent: i32,
     rasterized_width: u32,
     rasterized_height: u32,
     advance: f32,
 }
@@ -89,16 +68,24 @@ fn supports_subpixel_aa() -> bool {
     cg_context.set_rgb_fill_color(1.0, 1.0, 1.0, 1.0);
     let point = CGPoint { x: -1., y: 0. };
     let glyph = '|' as CGGlyph;
     ct_font.draw_glyphs(&[glyph], &[point], cg_context.clone());
     let data = cg_context.data();
     data[0] != data[1] || data[1] != data[2]
 }
 
+fn should_use_white_on_black(color: ColorU) -> bool {
+    let r = color.r as f32 / 255.0;
+    let g = color.g as f32 / 255.0;
+    let b = color.b as f32 / 255.0;
+    // These thresholds were determined on 10.12 by observing what CG does.
+    r >= 0.333 && g >= 0.333 && b >= 0.333 && r + g + b >= 2.0
+}
+
 fn get_glyph_metrics(
     ct_font: &CTFont,
     glyph: CGGlyph,
     x_offset: f64,
     y_offset: f64,
 ) -> GlyphMetrics {
     let bounds = ct_font.get_bounding_rects_for_glyphs(kCTFontDefaultOrientation, &[glyph]);
 
@@ -429,35 +416,51 @@ impl FontContext {
             Some(ref ct_font) => {
                 let traits = ct_font.symbolic_traits();
                 (traits & kCTFontColorGlyphsTrait) != 0
             }
             None => false,
         }
     }
 
-    pub fn has_gamma_correct_subpixel_aa() -> bool {
-        true
+    pub fn prepare_font(font: &mut FontInstance) {
+        match font.render_mode {
+            FontRenderMode::Mono | FontRenderMode::Bitmap => {
+                // In mono/bitmap modes the color of the font is irrelevant.
+                font.color = ColorU::new(255, 255, 255, 255);
+                // Subpixel positioning is disabled in mono and bitmap modes.
+                font.subpx_dir = SubpixelDirection::None;
+            }
+            FontRenderMode::Alpha => {
+                font.color = if font.platform_options.unwrap_or_default().font_smoothing &&
+                                should_use_white_on_black(font.color) {
+                    ColorU::new(255, 255, 255, 255)
+                } else {
+                    ColorU::new(0, 0, 0, 255)
+                };
+            }
+            FontRenderMode::Subpixel => {}
+        }
     }
 
     pub fn rasterize_glyph(
         &mut self,
         font: &FontInstance,
         key: &GlyphKey,
     ) -> Option<RasterizedGlyph> {
         let ct_font = match self.get_ct_font(font.font_key, font.size, &font.variations) {
             Some(font) => font,
-            None => return Some(RasterizedGlyph::blank()),
+            None => return None,
         };
 
         let glyph = key.index as CGGlyph;
         let (x_offset, y_offset) = font.get_subpx_offset(key);
         let metrics = get_glyph_metrics(&ct_font, glyph, x_offset, y_offset);
         if metrics.rasterized_width == 0 || metrics.rasterized_height == 0 {
-            return Some(RasterizedGlyph::blank());
+            return None;
         }
 
         let context_flags = match font.render_mode {
             FontRenderMode::Subpixel => {
                 kCGBitmapByteOrder32Little | kCGImageAlphaNoneSkipFirst
             }
             FontRenderMode::Alpha | FontRenderMode::Mono => {
                 kCGImageAlphaPremultipliedLast
@@ -492,23 +495,39 @@ impl FontContext {
         // 2) white text on transparent black - channels == alpha
         //
         // If we draw grayscale/mono on an opaque background
         // the RGB channels are the alpha values from transparent backgrounds
         // with the alpha set as opaque.
         // At the end of all this, WR expects individual RGB channels and ignores alpha
         // for subpixel AA.
         // For alpha/mono, WR ignores all channels other than alpha.
-        // Also note that WR expects text to be black bg with white text, so invert
-        // when we draw the glyphs.
-        let (antialias, smooth, bg_color) = match font.render_mode {
-            FontRenderMode::Subpixel => (true, true, 1.0),
-            FontRenderMode::Alpha => (true, false, 1.0),
-            FontRenderMode::Bitmap => (true, false, 0.0),
-            FontRenderMode::Mono => (false, false, 1.0),
+        // Also note that WR expects text to be white text on black bg, so invert
+        // when we draw the glyphs as black on white.
+        //
+        // Unless platform_options.font_smoothing is false, the grayscale AA'd version
+        // of the glyph will actually be rasterized with subpixel AA. The color channels
+        // will be then converted to luminance in gamma_correct_pixels to produce the
+        // final grayscale AA. This ensures that the dilation of the glyph from grayscale
+        // AA more closely resembles the dilation from subpixel AA in the general case.
+        let use_white_on_black = should_use_white_on_black(font.color);
+        let use_font_smoothing = font.platform_options.unwrap_or_default().font_smoothing;
+        let (antialias, smooth, text_color, bg_color, bg_alpha, invert) = match font.render_mode {
+            FontRenderMode::Subpixel => if use_white_on_black {
+                (true, true, 1.0, 0.0, 1.0, false)
+            } else {
+                (true, true, 0.0, 1.0, 1.0, true)
+            },
+            FontRenderMode::Alpha => if use_font_smoothing && use_white_on_black {
+                (true, use_font_smoothing, 1.0, 0.0, 1.0, false)
+            } else {
+                (true, use_font_smoothing, 0.0, 1.0, 1.0, true)
+            },
+            FontRenderMode::Bitmap => (true, false, 0.0, 0.0, 0.0, false),
+            FontRenderMode::Mono => (false, false, 0.0, 1.0, 1.0, true),
         };
 
         // These are always true in Gecko, even for non-AA fonts
         cg_context.set_allows_font_subpixel_positioning(true);
         cg_context.set_should_subpixel_position_fonts(true);
 
         // Don't quantize because we're doing it already.
         cg_context.set_allows_font_subpixel_quantization(false);
@@ -522,75 +541,78 @@ impl FontContext {
         // CG Origin is bottom left, WR is top left. Need -y offset
         let rasterization_origin = CGPoint {
             x: -metrics.rasterized_left as f64 + x_offset,
             y: metrics.rasterized_descent as f64 - y_offset,
         };
 
         // Always draw black text on a white background
         // Fill the background
-        cg_context.set_rgb_fill_color(bg_color, bg_color, bg_color, bg_color);
+        cg_context.set_rgb_fill_color(bg_color, bg_color, bg_color, bg_alpha);
         let rect = CGRect {
             origin: CGPoint { x: 0.0, y: 0.0 },
             size: CGSize {
                 width: metrics.rasterized_width as f64,
                 height: metrics.rasterized_height as f64,
             },
         };
         cg_context.fill_rect(rect);
 
         // Set the text color
-        cg_context.set_rgb_fill_color(0.0, 0.0, 0.0, 1.0);
+        cg_context.set_rgb_fill_color(text_color, text_color, text_color, 1.0);
         cg_context.set_text_drawing_mode(CGTextDrawingMode::CGTextFill);
         ct_font.draw_glyphs(&[glyph], &[rasterization_origin], cg_context.clone());
 
         let mut rasterized_pixels = cg_context.data().to_vec();
 
         if font.render_mode != FontRenderMode::Bitmap {
             // Convert to linear space for subpixel AA.
             // We explicitly do not do this for grayscale AA
-            if font.render_mode == FontRenderMode::Subpixel {
+            if smooth {
                 self.gamma_lut.coregraphics_convert_to_linear_bgra(
                     &mut rasterized_pixels,
                     metrics.rasterized_width as usize,
                     metrics.rasterized_height as usize,
                 );
             }
 
-            // We need to invert the pixels back since right now
-            // transparent pixels are actually opaque white.
             for i in 0 .. metrics.rasterized_height {
                 let current_height = (i * metrics.rasterized_width * 4) as usize;
                 let end_row = current_height + (metrics.rasterized_width as usize * 4);
 
                 for pixel in rasterized_pixels[current_height .. end_row].chunks_mut(4) {
-                    pixel[0] = 255 - pixel[0];
-                    pixel[1] = 255 - pixel[1];
-                    pixel[2] = 255 - pixel[2];
+                    if invert {
+                        pixel[0] = 255 - pixel[0];
+                        pixel[1] = 255 - pixel[1];
+                        pixel[2] = 255 - pixel[2];
+                    }
 
                     pixel[3] = match font.render_mode {
                         FontRenderMode::Subpixel => 255,
                         _ => {
                             pixel[0]
                         }
                     }; // end match
                 } // end row
             } // end height
 
-            self.gamma_correct_pixels(
-                &mut rasterized_pixels,
-                metrics.rasterized_width as usize,
-                metrics.rasterized_height as usize,
-                font.render_mode,
-                font.color,
-            );
+            if smooth {
+                self.gamma_correct_pixels(
+                    &mut rasterized_pixels,
+                    metrics.rasterized_width as usize,
+                    metrics.rasterized_height as usize,
+                    font.render_mode,
+                    font.color,
+                );
+            }
         }
 
         Some(RasterizedGlyph {
             left: metrics.rasterized_left as f32,
             top: metrics.rasterized_ascent as f32,
             width: metrics.rasterized_width,
             height: metrics.rasterized_height,
             scale: 1.0,
+            format: GlyphFormat::from(font.render_mode),
             bytes: rasterized_pixels,
         })
     }
 }
--- a/gfx/webrender/src/platform/unix/font.rs
+++ b/gfx/webrender/src/platform/unix/font.rs
@@ -1,28 +1,29 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{FontInstance, FontKey, FontRenderMode, GlyphDimensions};
 use api::{FontInstancePlatformOptions, FontLCDFilter, FontHinting};
-use api::{NativeFontHandle, SubpixelDirection, GlyphKey};
+use api::{NativeFontHandle, SubpixelDirection, GlyphKey, ColorU};
 use api::{FONT_FORCE_AUTOHINT, FONT_NO_AUTOHINT, FONT_EMBEDDED_BITMAP};
 use api::{FONT_EMBOLDEN, FONT_VERTICAL_LAYOUT, FONT_SUBPIXEL_BGR};
 use freetype::freetype::{FT_BBox, FT_Outline_Translate, FT_Pixel_Mode, FT_Render_Mode};
 use freetype::freetype::{FT_Done_Face, FT_Error, FT_Get_Char_Index, FT_Int32};
 use freetype::freetype::{FT_Done_FreeType, FT_Library_SetLcdFilter, FT_Pos};
 use freetype::freetype::{FT_F26Dot6, FT_Face, FT_Glyph_Format, FT_Long, FT_UInt};
 use freetype::freetype::{FT_GlyphSlot, FT_LcdFilter, FT_New_Memory_Face};
 use freetype::freetype::{FT_Init_FreeType, FT_Load_Glyph, FT_Render_Glyph};
 use freetype::freetype::{FT_Library, FT_Outline_Get_CBox, FT_Set_Char_Size, FT_Select_Size};
 use freetype::freetype::{FT_LOAD_COLOR, FT_LOAD_DEFAULT, FT_LOAD_FORCE_AUTOHINT};
 use freetype::freetype::{FT_LOAD_IGNORE_GLOBAL_ADVANCE_WIDTH, FT_LOAD_NO_AUTOHINT};
 use freetype::freetype::{FT_LOAD_NO_BITMAP, FT_LOAD_NO_HINTING, FT_LOAD_VERTICAL_LAYOUT};
 use freetype::freetype::{FT_FACE_FLAG_SCALABLE, FT_FACE_FLAG_FIXED_SIZES, FT_Err_Cannot_Render_Glyph};
+use glyph_rasterizer::{GlyphFormat, RasterizedGlyph};
 use internal_types::FastHashMap;
 use std::{cmp, mem, ptr, slice};
 use std::sync::Arc;
 
 // These constants are not present in the freetype
 // bindings due to bindgen not handling the way
 // the macros are defined.
 //const FT_LOAD_TARGET_NORMAL: FT_UInt = 0 << 16;
@@ -44,25 +45,16 @@ pub struct FontContext {
     lcd_extra_pixels: i64,
 }
 
 // FreeType resources are safe to move between threads as long as they
 // are not concurrently accessed. In our case, everything is hidden inside
 // a given FontContext so it is safe to move the latter between threads.
 unsafe impl Send for FontContext {}
 
-pub struct RasterizedGlyph {
-    pub top: f32,
-    pub left: f32,
-    pub width: u32,
-    pub height: u32,
-    pub scale: f32,
-    pub bytes: Vec<u8>,
-}
-
 extern "C" {
     fn FT_GlyphSlot_Embolden(slot: FT_GlyphSlot);
     fn FT_GlyphSlot_Oblique(slot: FT_GlyphSlot);
 }
 
 impl FontContext {
     pub fn new() -> FontContext {
         let mut lib: FT_Library = ptr::null_mut();
@@ -373,19 +365,29 @@ impl FontContext {
             if (best_dist < 0.0 && dist >= best_dist) || dist.abs() <= best_dist {
                 best_dist = dist;
                 best_size = i;
             }
         }
         unsafe { FT_Select_Size(face, best_size) }
     }
 
-    pub fn has_gamma_correct_subpixel_aa() -> bool {
-        // We don't do any preblending with FreeType currently, so the color is not used.
-        false
+    pub fn prepare_font(font: &mut FontInstance) {
+        match font.render_mode {
+            FontRenderMode::Mono | FontRenderMode::Bitmap => {
+                // In mono/bitmap modes the color of the font is irrelevant.
+                font.color = ColorU::new(255, 255, 255, 255);
+                // Subpixel positioning is disabled in mono and bitmap modes.
+                font.subpx_dir = SubpixelDirection::None;
+            }
+            FontRenderMode::Alpha | FontRenderMode::Subpixel => {
+                // We don't do any preblending with FreeType currently, so the color is not used.
+                font.color = ColorU::new(255, 255, 255, 255);
+            }
+        }
     }
 
     fn rasterize_glyph_outline(
         &mut self,
         slot: FT_GlyphSlot,
         font: &FontInstance,
         key: &GlyphKey,
     ) -> bool {
@@ -484,27 +486,33 @@ impl FontContext {
         let pixel_mode = unsafe { mem::transmute(bitmap.pixel_mode as u32) };
         info!(
             "Rasterizing {:?} as {:?} with dimensions {:?}",
             key,
             font.render_mode,
             dimensions
         );
 
-        let (actual_width, actual_height) = match pixel_mode {
+        let (format, actual_width, actual_height) = match pixel_mode {
             FT_Pixel_Mode::FT_PIXEL_MODE_LCD => {
                 assert!(bitmap.width % 3 == 0);
-                ((bitmap.width / 3) as i32, bitmap.rows as i32)
+                (GlyphFormat::Subpixel, (bitmap.width / 3) as i32, bitmap.rows as i32)
             }
             FT_Pixel_Mode::FT_PIXEL_MODE_LCD_V => {
                 assert!(bitmap.rows % 3 == 0);
-                (bitmap.width as i32, (bitmap.rows / 3) as i32)
+                (GlyphFormat::Subpixel, bitmap.width as i32, (bitmap.rows / 3) as i32)
+            }
+            FT_Pixel_Mode::FT_PIXEL_MODE_MONO => {
+                (GlyphFormat::Mono, bitmap.width as i32, bitmap.rows as i32)
             }
-            FT_Pixel_Mode::FT_PIXEL_MODE_MONO | FT_Pixel_Mode::FT_PIXEL_MODE_GRAY | FT_Pixel_Mode::FT_PIXEL_MODE_BGRA => {
-                (bitmap.width as i32, bitmap.rows as i32)
+            FT_Pixel_Mode::FT_PIXEL_MODE_GRAY => {
+                (GlyphFormat::Alpha, bitmap.width as i32, bitmap.rows as i32)
+            }
+            FT_Pixel_Mode::FT_PIXEL_MODE_BGRA => {
+                (GlyphFormat::ColorBitmap, bitmap.width as i32, bitmap.rows as i32)
             }
             _ => panic!("Unsupported {:?}", pixel_mode),
         };
         let (left, top) = unsafe { ((*slot).bitmap_left, (*slot).bitmap_top) };
         let mut final_buffer = vec![0; (actual_width * actual_height * 4) as usize];
 
         // Extract the final glyph from FT format into RGBA8 format, which is
         // what WR expects.
@@ -600,16 +608,17 @@ impl FontContext {
         }
 
         Some(RasterizedGlyph {
             left: ((dimensions.left + left) as f32 * scale).round(),
             top: ((dimensions.top + top - actual_height) as f32 * scale).round(),
             width: actual_width as u32,
             height: actual_height as u32,
             scale,
+            format,
             bytes: final_buffer,
         })
     }
 }
 
 impl Drop for FontContext {
     fn drop(&mut self) {
         unsafe {
--- a/gfx/webrender/src/platform/windows/font.rs
+++ b/gfx/webrender/src/platform/windows/font.rs
@@ -1,16 +1,17 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{FontInstance, FontInstancePlatformOptions, FontKey, FontRenderMode};
-use api::{GlyphDimensions, GlyphKey};
+use api::{ColorU, GlyphDimensions, GlyphKey, SubpixelDirection};
 use dwrote;
 use gamma_lut::{Color as ColorLut, GammaLut};
+use glyph_rasterizer::{GlyphFormat, RasterizedGlyph};
 use internal_types::FastHashMap;
 use std::sync::Arc;
 
 lazy_static! {
     static ref DEFAULT_FONT_DESCRIPTOR: dwrote::FontDescriptor = dwrote::FontDescriptor {
         family_name: "Arial".to_owned(),
         weight: dwrote::FontWeight::Regular,
         stretch: dwrote::FontStretch::Normal,
@@ -23,25 +24,16 @@ pub struct FontContext {
     gamma_lut: GammaLut,
     gdi_gamma_lut: GammaLut,
 }
 
 // DirectWrite is safe to use on multiple threads and non-shareable resources are
 // all hidden inside their font context.
 unsafe impl Send for FontContext {}
 
-pub struct RasterizedGlyph {
-    pub top: f32,
-    pub left: f32,
-    pub width: u32,
-    pub height: u32,
-    pub scale: f32,
-    pub bytes: Vec<u8>,
-}
-
 fn dwrite_texture_type(render_mode: FontRenderMode) -> dwrote::DWRITE_TEXTURE_TYPE {
     match render_mode {
         FontRenderMode::Mono | FontRenderMode::Bitmap => dwrote::DWRITE_TEXTURE_ALIASED_1x1,
         FontRenderMode::Alpha | FontRenderMode::Subpixel => dwrote::DWRITE_TEXTURE_CLEARTYPE_3x1,
     }
 }
 
 fn dwrite_measure_mode(
@@ -305,18 +297,30 @@ impl FontContext {
         }
     }
 
     pub fn is_bitmap_font(&mut self, _font: &FontInstance) -> bool {
         // TODO(gw): Support bitmap fonts in DWrite.
         false
     }
 
-    pub fn has_gamma_correct_subpixel_aa() -> bool {
-        true
+    pub fn prepare_font(font: &mut FontInstance) {
+        match font.render_mode {
+            FontRenderMode::Mono | FontRenderMode::Bitmap => {
+                // In mono/bitmap modes the color of the font is irrelevant.
+                font.color = ColorU::new(255, 255, 255, 255);
+                // Subpixel positioning is disabled in mono and bitmap modes.
+                font.subpx_dir = SubpixelDirection::None;
+            }
+            FontRenderMode::Alpha => {
+                // In alpha mode the color of the font is irrelevant.
+                font.color = ColorU::new(255, 255, 255, 255);
+            }
+            FontRenderMode::Subpixel => {}
+        }
     }
 
     pub fn rasterize_glyph(
         &mut self,
         font: &FontInstance,
         key: &GlyphKey,
     ) -> Option<RasterizedGlyph> {
         let analysis = self.create_glyph_analysis(font, key);
@@ -358,12 +362,13 @@ impl FontContext {
         let rgba_pixels = self.convert_to_rgba(&mut pixels, font.render_mode);
 
         Some(RasterizedGlyph {
             left: bounds.left as f32,
             top: -bounds.top as f32,
             width: width as u32,
             height: height as u32,
             scale: 1.0,
+            format: GlyphFormat::from(font.render_mode),
             bytes: rgba_pixels,
         })
     }
 }
--- a/gfx/webrender/src/prim_store.rs
+++ b/gfx/webrender/src/prim_store.rs
@@ -155,17 +155,17 @@ pub struct PrimitiveMetadata {
 #[derive(Debug)]
 #[repr(C)]
 pub struct RectanglePrimitive {
     pub color: ColorF,
 }
 
 impl ToGpuBlocks for RectanglePrimitive {
     fn write_gpu_blocks(&self, mut request: GpuDataRequest) {
-        request.push(self.color);
+        request.push(self.color.premultiplied());
     }
 }
 
 #[derive(Debug)]
 pub struct BrushPrimitive {
     pub clip_mode: ClipMode,
     pub radius: BorderRadius,
 }
@@ -192,28 +192,29 @@ impl ToGpuBlocks for BrushPrimitive {
         ]);
     }
 }
 
 #[derive(Debug, Clone)]
 #[repr(C)]
 pub struct LinePrimitive {
     pub color: ColorF,
+    pub wavy_line_thickness: f32,
     pub style: LineStyle,
     pub orientation: LineOrientation,
 }
 
 impl ToGpuBlocks for LinePrimitive {
     fn write_gpu_blocks(&self, mut request: GpuDataRequest) {
         request.push(self.color);
         request.push([
+            self.wavy_line_thickness,
             pack_as_float(self.style as u32),
             pack_as_float(self.orientation as u32),
             0.0,
-            0.0,
         ]);
     }
 }
 
 #[derive(Debug)]
 pub struct ImagePrimitiveCpu {
     pub image_key: ImageKey,
     pub image_rendering: ImageRendering,
@@ -581,17 +582,17 @@ impl TextRunPrimitiveCpu {
                 self.glyph_gpu_blocks.push(gpu_block);
             }
         }
 
         resource_cache.request_glyphs(font, &self.glyph_keys, gpu_cache);
     }
 
     fn write_gpu_blocks(&self, request: &mut GpuDataRequest) {
-        request.push(ColorF::from(self.font.color));
+        request.push(ColorF::from(self.font.color).premultiplied());
         request.push([
             self.offset.x,
             self.offset.y,
             self.font.subpx_dir.limit_by(self.font.render_mode) as u32 as f32,
             0.0,
         ]);
         request.extend_from_slice(&self.glyph_gpu_blocks);
 
@@ -1054,17 +1055,16 @@ impl PrimitiveStore {
     ) {
         let metadata = &mut self.cpu_metadata[prim_index.0];
         match metadata.prim_kind {
             PrimitiveKind::Rectangle | PrimitiveKind::Border | PrimitiveKind::Line => {}
             PrimitiveKind::Picture => {
                 self.cpu_pictures[metadata.cpu_prim_index.0]
                     .prepare_for_render(
                         prim_index,
-                        metadata,
                         prim_context,
                         render_tasks
                     );
             }
             PrimitiveKind::TextRun => {
                 let text = &mut self.cpu_text_runs[metadata.cpu_prim_index.0];
                 text.prepare_for_render(
                     resource_cache,
--- a/gfx/webrender/src/render_backend.rs
+++ b/gfx/webrender/src/render_backend.rs
@@ -1,23 +1,23 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{ApiMsg, BlobImageRenderer, BuiltDisplayList, DebugCommand, DeviceIntPoint};
 #[cfg(feature = "debugger")]
 use api::{BuiltDisplayListIter, SpecificDisplayItem};
 use api::{DeviceUintPoint, DeviceUintRect, DeviceUintSize, DocumentId, DocumentMsg};
-use api::{IdNamespace, LayerPoint, PipelineId, RenderNotifier};
+use api::{HitTestResult, IdNamespace, LayerPoint, PipelineId, RenderNotifier};
 use api::channel::{MsgReceiver, PayloadReceiver, PayloadReceiverHelperMethods};
 use api::channel::{PayloadSender, PayloadSenderHelperMethods};
 #[cfg(feature = "debugger")]
 use debug_server;
-use frame::Frame;
-use frame_builder::FrameBuilderConfig;
+use frame::FrameContext;
+use frame_builder::{FrameBuilder, FrameBuilderConfig};
 use gpu_cache::GpuCache;
 use internal_types::{DebugOutput, FastHashMap, FastHashSet, RendererFrame, ResultMsg};
 use profiler::{BackendProfileCounters, ResourceProfileCounters};
 use rayon::ThreadPool;
 use record::ApiRecordingReceiver;
 use resource_cache::ResourceCache;
 use scene::Scene;
 #[cfg(feature = "debugger")]
@@ -27,17 +27,18 @@ use std::sync::Arc;
 use std::sync::mpsc::Sender;
 use std::u32;
 use texture_cache::TextureCache;
 use thread_profiler::register_thread_with_profiler;
 use time::precise_time_ns;
 
 struct Document {
     scene: Scene,
-    frame: Frame,
+    frame_ctx: FrameContext,
+    frame_builder: Option<FrameBuilder>,
     window_size: DeviceUintSize,
     inner_rect: DeviceUintRect,
     pan: DeviceIntPoint,
     device_pixel_ratio: f32,
     page_zoom_factor: f32,
     pinch_zoom_factor: f32,
     // A set of pipelines that the caller has requested be
     // made available as output textures.
@@ -59,17 +60,18 @@ impl Document {
     ) -> Self {
         let render_on_scroll = if enable_render_on_scroll {
             Some(false)
         } else {
             None
         };
         Document {
             scene: Scene::new(),
-            frame: Frame::new(config),
+            frame_ctx: FrameContext::new(config),
+            frame_builder: None,
             window_size: initial_size,
             inner_rect: DeviceUintRect::new(DeviceUintPoint::zero(), initial_size),
             pan: DeviceIntPoint::zero(),
             page_zoom_factor: 1.0,
             pinch_zoom_factor: 1.0,
             device_pixel_ratio: default_device_pixel_ratio,
             render_on_scroll,
             output_pipelines: FastHashSet::default(),
@@ -79,17 +81,18 @@ impl Document {
     fn accumulated_scale_factor(&self) -> f32 {
         self.device_pixel_ratio *
         self.page_zoom_factor *
         self.pinch_zoom_factor
     }
 
     fn build_scene(&mut self, resource_cache: &mut ResourceCache) {
         let accumulated_scale_factor = self.accumulated_scale_factor();
-        self.frame.create(
+        self.frame_builder = self.frame_ctx.create(
+            self.frame_builder.take(),
             &self.scene,
             resource_cache,
             self.window_size,
             self.inner_rect,
             accumulated_scale_factor,
         );
     }
 
@@ -99,26 +102,34 @@ impl Document {
         gpu_cache: &mut GpuCache,
         resource_profile: &mut ResourceProfileCounters,
     ) -> RendererFrame {
         let accumulated_scale_factor = self.accumulated_scale_factor();
         let pan = LayerPoint::new(
             self.pan.x as f32 / accumulated_scale_factor,
             self.pan.y as f32 / accumulated_scale_factor,
         );
-        self.frame.build_renderer_frame(
-            resource_cache,
-            gpu_cache,
-            &self.scene.pipelines,
-            accumulated_scale_factor,
-            pan,
-            &self.output_pipelines,
-            &mut resource_profile.texture_cache,
-            &mut resource_profile.gpu_cache,
-        )
+        match self.frame_builder {
+            Some(ref mut builder) => {
+                self.frame_ctx.build_renderer_frame(
+                    builder,
+                    resource_cache,
+                    gpu_cache,
+                    &self.scene.pipelines,
+                    accumulated_scale_factor,
+                    pan,
+                    &self.output_pipelines,
+                    &mut resource_profile.texture_cache,
+                    &mut resource_profile.gpu_cache,
+                )
+            }
+            None => {
+                self.frame_ctx.get_renderer_frame()
+            }
+        }
     }
 }
 
 enum DocumentOp {
     Nop,
     Built,
     ScrolledNop,
     Scrolled(RendererFrame),
@@ -256,17 +267,17 @@ impl RenderBackend {
                 if let Some(ref mut r) = self.recorder {
                     r.write_payload(frame_counter, &data.to_data());
                 }
 
                 let built_display_list =
                     BuiltDisplayList::from_data(data.display_list_data, list_descriptor);
 
                 if !preserve_frame_state {
-                    doc.frame.discard_frame_state_for_pipeline(pipeline_id);
+                    doc.frame_ctx.discard_frame_state_for_pipeline(pipeline_id);
                 }
 
                 let display_list_len = built_display_list.data().len();
                 let (builder_start_time, builder_finish_time, send_start_time) =
                     built_display_list.times();
                 let display_list_received_time = precise_time_ns();
 
                 {
@@ -304,17 +315,17 @@ impl RenderBackend {
             }
             DocumentMsg::UpdatePipelineResources { resources, pipeline_id, epoch } => {
                 profile_scope!("UpdateResources");
 
                 self.resource_cache
                     .update_resources(resources, &mut profile_counters.resources);
 
                 doc.scene.update_epoch(pipeline_id, epoch);
-                doc.frame.update_epoch(pipeline_id, epoch);
+                doc.frame_ctx.update_epoch(pipeline_id, epoch);
 
                 DocumentOp::Nop
             }
             DocumentMsg::SetRootPipeline(pipeline_id) => {
                 profile_scope!("SetRootPipeline");
 
                 doc.scene.set_root_pipeline_id(pipeline_id);
                 if doc.scene.pipelines.get(&pipeline_id).is_some() {
@@ -330,68 +341,74 @@ impl RenderBackend {
 
                 doc.scene.remove_pipeline(pipeline_id);
                 DocumentOp::Nop
             }
             DocumentMsg::Scroll(delta, cursor, move_phase) => {
                 profile_scope!("Scroll");
                 let _timer = profile_counters.total_time.timer();
 
-                if doc.frame.scroll(delta, cursor, move_phase) && doc.render_on_scroll == Some(true)
+                if doc.frame_ctx.scroll(delta, cursor, move_phase) && doc.render_on_scroll == Some(true)
                 {
                     let frame = doc.render(
                         &mut self.resource_cache,
                         &mut self.gpu_cache,
                         &mut profile_counters.resources,
                     );
                     DocumentOp::Scrolled(frame)
                 } else {
                     DocumentOp::ScrolledNop
                 }
             }
             DocumentMsg::HitTest(pipeline_id, point, flags, tx) => {
                 profile_scope!("HitTest");
-                let result = doc.frame.hit_test(pipeline_id, point, flags);
+                let result = match doc.frame_builder {
+                    Some(ref builder) => {
+                        let cst = doc.frame_ctx.get_clip_scroll_tree();
+                        builder.hit_test(cst, pipeline_id, point, flags)
+                    },
+                    None => HitTestResult::default(),
+                };
                 tx.send(result).unwrap();
                 DocumentOp::Nop
             }
             DocumentMsg::ScrollNodeWithId(origin, id, clamp) => {
                 profile_scope!("ScrollNodeWithScrollId");
                 let _timer = profile_counters.total_time.timer();
 
-                if doc.frame.scroll_node(origin, id, clamp) && doc.render_on_scroll == Some(true) {
+                if doc.frame_ctx.scroll_node(origin, id, clamp) && doc.render_on_scroll == Some(true) {
                     let frame = doc.render(
                         &mut self.resource_cache,
                         &mut self.gpu_cache,
                         &mut profile_counters.resources,
                     );
                     DocumentOp::Scrolled(frame)
                 } else {
                     DocumentOp::ScrolledNop
                 }
             }
             DocumentMsg::TickScrollingBounce => {
                 profile_scope!("TickScrollingBounce");
                 let _timer = profile_counters.total_time.timer();
 
-                doc.frame.tick_scrolling_bounce_animations();
+                doc.frame_ctx.tick_scrolling_bounce_animations();
                 if doc.render_on_scroll == Some(true) {
                     let frame = doc.render(
                         &mut self.resource_cache,
                         &mut self.gpu_cache,
                         &mut profile_counters.resources,
                     );
                     DocumentOp::Scrolled(frame)
                 } else {
                     DocumentOp::ScrolledNop
                 }
             }
             DocumentMsg::GetScrollNodeState(tx) => {
                 profile_scope!("GetScrollNodeState");
-                tx.send(doc.frame.get_scroll_node_state()).unwrap();
+                tx.send(doc.frame_ctx.get_scroll_node_state()).unwrap();
                 DocumentOp::Nop
             }
             DocumentMsg::GenerateFrame(property_bindings) => {
                 profile_scope!("GenerateFrame");
                 let _timer = profile_counters.total_time.timer();
 
                 // Ideally, when there are property bindings present,
                 // we won't need to rebuild the entire frame here.
@@ -659,22 +676,24 @@ impl RenderBackend {
 
     #[cfg(feature = "debugger")]
     fn get_clip_scroll_tree_for_debugger(&self) -> String {
         let mut debug_root = debug_server::ClipScrollTreeList::new();
 
         for (_, doc) in &self.documents {
             let debug_node = debug_server::TreeNode::new("document clip_scroll tree");
             let mut builder = debug_server::TreeNodeBuilder::new(debug_node);
+
             // TODO(gw): Restructure the storage of clip-scroll tree, clip store
             //           etc so this isn't so untidy.
-            let clip_store = &doc.frame.frame_builder.as_ref().unwrap().clip_store;
-            doc.frame
-                .clip_scroll_tree
-                .print_with(clip_store, &mut builder);
+            if let Some(ref frame_builder) = doc.frame_builder {
+                doc.frame_ctx
+                    .get_clip_scroll_tree()
+                    .print_with(&frame_builder.clip_store, &mut builder);
+            }
 
             debug_root.add(builder.build());
         }
 
         serde_json::to_string(&debug_root).unwrap()
     }
 }
 
--- a/gfx/webrender/src/render_task.rs
+++ b/gfx/webrender/src/render_task.rs
@@ -1,14 +1,14 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{ClipId, DeviceIntLength, DeviceIntPoint, DeviceIntRect, DeviceIntSize};
-use api::{FilterOp, MixBlendMode};
+use api::{ClipId, DeviceIntPoint, DeviceIntRect, DeviceIntSize};
+use api::{ColorF, FilterOp, LayerPoint, MixBlendMode};
 use api::{LayerRect, PipelineId};
 use clip::{ClipSource, ClipSourcesWeakHandle, ClipStore};
 use clip_scroll_tree::CoordinateSystemId;
 use gpu_cache::GpuCacheHandle;
 use internal_types::HardwareCompositeOp;
 use prim_store::PrimitiveIndex;
 use std::{cmp, usize, f32, i32};
 use std::rc::Rc;
@@ -253,23 +253,26 @@ pub struct CacheMaskTask {
     pub geometry_kind: MaskGeometryKind,
     pub coordinate_system_id: CoordinateSystemId,
 }
 
 #[derive(Debug)]
 pub struct PictureTask {
     pub prim_index: PrimitiveIndex,
     pub target_kind: RenderTargetKind,
+    pub content_origin: LayerPoint,
+    pub color: ColorF,
 }
 
 #[derive(Debug)]
 pub struct BlurTask {
-    pub blur_radius: DeviceIntLength,
+    pub blur_std_deviation: f32,
     pub target_kind: RenderTargetKind,
     pub regions: Vec<LayerRect>,
+    pub color: ColorF,
 }
 
 #[derive(Debug)]
 pub struct RenderTaskData {
     pub data: [f32; FLOATS_PER_RENDER_TASK_INFO],
 }
 
 #[derive(Debug)]
@@ -328,29 +331,33 @@ impl RenderTask {
         let location = RenderTaskLocation::Dynamic(None, rect.size);
         Self::new_alpha_batch(rect.origin, location, frame_output_pipeline_id)
     }
 
     pub fn new_picture(
         size: DeviceIntSize,
         prim_index: PrimitiveIndex,
         target_kind: RenderTargetKind,
+        content_origin: LayerPoint,
+        color: ColorF,
     ) -> RenderTask {
         let clear_mode = match target_kind {
             RenderTargetKind::Color => ClearMode::Transparent,
             RenderTargetKind::Alpha => ClearMode::One,
         };
 
         RenderTask {
             cache_key: None,
             children: Vec::new(),
             location: RenderTaskLocation::Dynamic(None, size),
             kind: RenderTaskKind::Picture(PictureTask {
                 prim_index,
                 target_kind,
+                content_origin,
+                color,
             }),
             clear_mode,
         }
     }
 
     pub fn new_readback(screen_rect: DeviceIntRect) -> RenderTask {
         RenderTask {
             cache_key: None,
@@ -455,47 +462,50 @@ impl RenderTask {
     //    VerticalBlurTask: Apply the separable vertical blur to the primitive.
     //           ^
     //           |
     //    HorizontalBlurTask: Apply the separable horizontal blur to the vertical blur.
     //           |
     //           +---- This is stored as the input task to the primitive shader.
     //
     pub fn new_blur(
-        blur_radius: DeviceIntLength,
+        blur_std_deviation: f32,
         src_task_id: RenderTaskId,
         render_tasks: &mut RenderTaskTree,
         target_kind: RenderTargetKind,
         regions: &[LayerRect],
         clear_mode: ClearMode,
+        color: ColorF,
     ) -> RenderTask {
         let blur_target_size = render_tasks.get(src_task_id).get_dynamic_size();
 
         let blur_task_v = RenderTask {
             cache_key: None,
             children: vec![src_task_id],
             location: RenderTaskLocation::Dynamic(None, blur_target_size),
             kind: RenderTaskKind::VerticalBlur(BlurTask {
-                blur_radius,
+                blur_std_deviation,
                 target_kind,
                 regions: regions.to_vec(),
+                color,
             }),
             clear_mode,
         };
 
         let blur_task_v_id = render_tasks.add(blur_task_v);
 
         let blur_task_h = RenderTask {
             cache_key: None,
             children: vec![blur_task_v_id],
             location: RenderTaskLocation::Dynamic(None, blur_target_size),
             kind: RenderTaskKind::HorizontalBlur(BlurTask {
-                blur_radius,
+                blur_std_deviation,
                 target_kind,
                 regions: regions.to_vec(),
+                color,
             }),
             clear_mode,
         };
 
         blur_task_h
     }
 
     pub fn as_alpha_batch_mut<'a>(&'a mut self) -> &'a mut AlphaRenderTask {
@@ -549,32 +559,32 @@ impl RenderTask {
                         0.0,
                         0.0,
                         0.0,
                         0.0,
                         0.0,
                     ],
                 }
             }
-            RenderTaskKind::Picture(..) => {
+            RenderTaskKind::Picture(ref task) => {
                 let (target_rect, target_index) = self.get_target_rect();
                 RenderTaskData {
                     data: [
                         target_rect.origin.x as f32,
                         target_rect.origin.y as f32,
                         target_rect.size.width as f32,
                         target_rect.size.height as f32,
                         target_index.0 as f32,
-                        0.0,
-                        0.0,
-                        0.0,
+                        task.content_origin.x,
+                        task.content_origin.y,
                         0.0,
-                        0.0,
-                        0.0,
-                        0.0,
+                        task.color.r,
+                        task.color.g,
+                        task.color.b,
+                        task.color.a,
                     ],
                 }
             }
             RenderTaskKind::CacheMask(ref task) => {
                 let (target_rect, target_index) = self.get_target_rect();
                 RenderTaskData {
                     data: [
                         target_rect.origin.x as f32,
@@ -597,23 +607,23 @@ impl RenderTask {
                 let (target_rect, target_index) = self.get_target_rect();
                 RenderTaskData {
                     data: [
                         target_rect.origin.x as f32,
                         target_rect.origin.y as f32,
                         target_rect.size.width as f32,
                         target_rect.size.height as f32,
                         target_index.0 as f32,
-                        task_info.blur_radius.0 as f32,
-                        0.0,
+                        task_info.blur_std_deviation,
                         0.0,
                         0.0,
-                        0.0,
-                        0.0,
-                        0.0,
+                        task_info.color.r,
+                        task_info.color.g,
+                        task_info.color.b,
+                        task_info.color.a,
                     ],
                 }
             }
             RenderTaskKind::Readback(..) => {
                 let (target_rect, target_index) = self.get_target_rect();
                 RenderTaskData {
                     data: [
                         target_rect.origin.x as f32,
--- a/gfx/webrender/src/renderer.rs
+++ b/gfx/webrender/src/renderer.rs
@@ -28,16 +28,17 @@ use device::{DepthFunction, Device, Fram
              VertexDescriptor, PBO};
 use device::{get_gl_format_bgra, ExternalTexture, FBOId, TextureSlot, VertexAttribute,
              VertexAttributeKind};
 use device::{FileWatcherHandler, GpuTimer, ShaderError, TextureFilter, TextureTarget,
              VertexUsageHint, VAO};
 use euclid::{rect, Transform3D};
 use frame_builder::FrameBuilderConfig;
 use gleam::gl;
+use glyph_rasterizer::GlyphFormat;
 use gpu_cache::{GpuBlockData, GpuCacheUpdate, GpuCacheUpdateList};
 use gpu_types::PrimitiveInstance;
 use internal_types::{BatchTextures, SourceTexture, ORTHO_FAR_PLANE, ORTHO_NEAR_PLANE};
 use internal_types::{CacheTextureId, FastHashMap, RendererFrame, ResultMsg, TextureUpdateOp};
 use internal_types::{DebugOutput, RenderTargetMode, TextureUpdateList, TextureUpdateSource};
 use profiler::{BackendProfileCounters, Profiler};
 use profiler::{GpuProfileTag, RendererProfileCounters, RendererProfileTimers};
 use rayon::Configuration as ThreadPoolConfig;
@@ -56,26 +57,30 @@ use std::mem;
 use std::path::PathBuf;
 use std::rc::Rc;
 use std::sync::Arc;
 use std::sync::mpsc::{channel, Receiver, Sender};
 use std::thread;
 use texture_cache::TextureCache;
 use thread_profiler::{register_thread_with_profiler, write_profile};
 use tiling::{AlphaRenderTarget, ColorRenderTarget, RenderTargetKind};
-use tiling::{BatchKey, BatchKind, Frame, RenderTarget, TransformBatchKind};
+use tiling::{BatchKey, BatchKind, BrushBatchKind, Frame, RenderTarget, TransformBatchKind};
 use time::precise_time_ns;
 use util::TransformedRectKind;
 
 pub const MAX_VERTEX_TEXTURE_WIDTH: usize = 1024;
 
 const GPU_TAG_BRUSH_MASK: GpuProfileTag = GpuProfileTag {
     label: "B_Mask",
     color: debug_colors::BLACK,
 };
+const GPU_TAG_BRUSH_IMAGE: GpuProfileTag = GpuProfileTag {
+    label: "B_Image",
+    color: debug_colors::SILVER,
+};
 const GPU_TAG_CACHE_CLIP: GpuProfileTag = GpuProfileTag {
     label: "C_Clip",
     color: debug_colors::PURPLE,
 };
 const GPU_TAG_CACHE_TEXT_RUN: GpuProfileTag = GpuProfileTag {
     label: "C_TextRun",
     color: debug_colors::MISTYROSE,
 };
@@ -142,20 +147,16 @@ const GPU_TAG_PRIM_RADIAL_GRADIENT: GpuP
 const GPU_TAG_PRIM_BORDER_CORNER: GpuProfileTag = GpuProfileTag {
     label: "BorderCorner",
     color: debug_colors::DARKSLATEGREY,
 };
 const GPU_TAG_PRIM_BORDER_EDGE: GpuProfileTag = GpuProfileTag {
     label: "BorderEdge",
     color: debug_colors::LAVENDER,
 };
-const GPU_TAG_PRIM_CACHE_IMAGE: GpuProfileTag = GpuProfileTag {
-    label: "CacheImage",
-    color: debug_colors::SILVER,
-};
 const GPU_TAG_BLUR: GpuProfileTag = GpuProfileTag {
     label: "Blur",
     color: debug_colors::VIOLET,
 };
 
 const GPU_SAMPLER_TAG_ALPHA: GpuProfileTag = GpuProfileTag {
     label: "Alpha Targets",
     color: debug_colors::BLACK,
@@ -172,30 +173,34 @@ const GPU_SAMPLER_TAG_TRANSPARENT: GpuPr
 #[cfg(feature = "debugger")]
 impl BatchKind {
     fn debug_name(&self) -> &'static str {
         match *self {
             BatchKind::Composite { .. } => "Composite",
             BatchKind::HardwareComposite => "HardwareComposite",
             BatchKind::SplitComposite => "SplitComposite",
             BatchKind::Blend => "Blend",
+            BatchKind::Brush(kind) => {
+                match kind {
+                    BrushBatchKind::Image(..) => "Brush (Image)",
+                }
+            }
             BatchKind::Transformable(_, kind) => match kind {
                 TransformBatchKind::Rectangle(..) => "Rectangle",
-                TransformBatchKind::TextRun => "TextRun",
+                TransformBatchKind::TextRun(..) => "TextRun",
                 TransformBatchKind::Image(image_buffer_kind, ..) => match image_buffer_kind {
                     ImageBufferKind::Texture2D => "Image (2D)",
                     ImageBufferKind::TextureRect => "Image (Rect)",
                     ImageBufferKind::TextureExternal => "Image (External)",
                     ImageBufferKind::Texture2DArray => "Image (Array)",
                 },
                 TransformBatchKind::YuvImage(..) => "YuvImage",
                 TransformBatchKind::AlignedGradient => "AlignedGradient",
                 TransformBatchKind::AngleGradient => "AngleGradient",
                 TransformBatchKind::RadialGradient => "RadialGradient",
-                TransformBatchKind::CacheImage(..) => "CacheImage",
                 TransformBatchKind::BorderCorner => "BorderCorner",
                 TransformBatchKind::BorderEdge => "BorderEdge",
                 TransformBatchKind::Line => "Line",
             },
         }
     }
 }
 
@@ -213,24 +218,37 @@ bitflags! {
 // behaviour per draw-call.
 type ShaderMode = i32;
 
 #[repr(C)]
 enum TextShaderMode {
     Alpha = 0,
     SubpixelPass0 = 1,
     SubpixelPass1 = 2,
+    ColorBitmap = 3,
 }
 
 impl Into<ShaderMode> for TextShaderMode {
     fn into(self) -> i32 {
         self as i32
     }
 }
 
+impl From<GlyphFormat> for TextShaderMode {
+    fn from(format: GlyphFormat) -> TextShaderMode {
+        match format {
+            GlyphFormat::Mono | GlyphFormat::Alpha => TextShaderMode::Alpha,
+            GlyphFormat::Subpixel => {
+                panic!("Subpixel glyph format must be handled separately.");
+            }
+            GlyphFormat::ColorBitmap => TextShaderMode::ColorBitmap,
+        }
+    }
+}
+
 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
 enum TextureSampler {
     Color0,
     Color1,
     Color2,
     CacheA8,
     CacheRGBA8,
     ResourceCache,
@@ -830,16 +848,17 @@ impl VertexDataTexture {
     fn deinit(self, device: &mut Device) {
         device.delete_pbo(self.pbo);
         device.delete_texture(self.texture);
     }
 }
 
 const TRANSFORM_FEATURE: &str = "TRANSFORM";
 const CLIP_FEATURE: &str = "CLIP";
+const ALPHA_FEATURE: &str = "ALPHA_PASS";
 
 enum ShaderKind {
     Primitive,
     Cache(VertexArrayKind),
     ClipCache,
     Brush,
 }
 
@@ -924,16 +943,87 @@ impl LazilyCompiledShader {
     }
 }
 
 struct PrimitiveShader {
     simple: LazilyCompiledShader,
     transform: LazilyCompiledShader,
 }
 
+// A brush shader supports two modes:
+// opaque:
+//   Used for completely opaque primitives,
+//   or inside segments of partially
+//   opaque primitives. Assumes no need
+//   for clip masks, AA etc.
+// alpha:
+//   Used for brush primitives in the alpha
+//   pass. Assumes that AA should be applied
+//   along the primitive edge, and also that
+//   clip mask is present.
+struct BrushShader {
+    opaque: LazilyCompiledShader,
+    alpha: LazilyCompiledShader,
+}
+
+impl BrushShader {
+    fn new(
+        name: &'static str,
+        device: &mut Device,
+        features: &[&'static str],
+        precache: bool,
+    ) -> Result<BrushShader, ShaderError> {
+        let opaque = try!{
+            LazilyCompiledShader::new(ShaderKind::Brush,
+                                      name,
+                                      features,
+                                      device,
+                                      precache)
+        };
+
+        let mut alpha_features = features.to_vec();
+        alpha_features.push(ALPHA_FEATURE);
+
+        let alpha = try!{
+            LazilyCompiledShader::new(ShaderKind::Brush,
+                                      name,
+                                      &alpha_features,
+                                      device,
+                                      precache)
+        };
+
+        Ok(BrushShader { opaque, alpha })
+    }
+
+    fn bind<M>(
+        &mut self,
+        device: &mut Device,
+        blend_mode: BlendMode,
+        projection: &Transform3D<f32>,
+        mode: M,
+        renderer_errors: &mut Vec<RendererError>,
+    ) where M: Into<ShaderMode> {
+        match blend_mode {
+            BlendMode::None => {
+                self.opaque.bind(device, projection, mode, renderer_errors)
+            }
+            BlendMode::Alpha |
+            BlendMode::PremultipliedAlpha |
+            BlendMode::Subpixel => {
+                self.alpha.bind(device, projection, mode, renderer_errors)
+            }
+        }
+    }
+
+    fn deinit(self, device: &mut Device) {
+        self.opaque.deinit(device);
+        self.alpha.deinit(device);
+    }
+}
+
 struct FileWatcher {
     notifier: Box<RenderNotifier>,
     result_tx: Sender<ResultMsg>,
 }
 
 impl FileWatcherHandler for FileWatcher {
     fn file_changed(&self, path: PathBuf) {
         self.result_tx.send(ResultMsg::RefreshShader(path)).ok();
@@ -1091,17 +1181,21 @@ pub struct Renderer {
 
     // These are "cache shaders". These shaders are used to
     // draw intermediate results to cache targets. The results
     // of these shaders are then used by the primitive shaders.
     cs_text_run: LazilyCompiledShader,
     cs_line: LazilyCompiledShader,
     cs_blur_a8: LazilyCompiledShader,
     cs_blur_rgba8: LazilyCompiledShader,
+
+    // Brush shaders
     brush_mask: LazilyCompiledShader,
+    brush_image_rgba8: BrushShader,
+    brush_image_a8: BrushShader,
 
     /// These are "cache clip shaders". These shaders are used to
     /// draw clip instances into the cached clip mask. The results
     /// of these shaders are also used by the primitive shaders.
     cs_clip_rectangle: LazilyCompiledShader,
     cs_clip_image: LazilyCompiledShader,
     cs_clip_border: LazilyCompiledShader,
 
@@ -1117,18 +1211,16 @@ pub struct Renderer {
     ps_text_run: PrimitiveShader,
     ps_image: Vec<Option<PrimitiveShader>>,
     ps_yuv_image: Vec<Option<PrimitiveShader>>,
     ps_border_corner: PrimitiveShader,
     ps_border_edge: PrimitiveShader,
     ps_gradient: PrimitiveShader,
     ps_angle_gradient: PrimitiveShader,
     ps_radial_gradient: PrimitiveShader,
-    ps_cache_image_rgba8: PrimitiveShader,
-    ps_cache_image_a8: PrimitiveShader,
     ps_line: PrimitiveShader,
 
     ps_blend: LazilyCompiledShader,
     ps_hw_composite: LazilyCompiledShader,
     ps_split_composite: LazilyCompiledShader,
     ps_composite: LazilyCompiledShader,
 
     max_texture_size: u32,
@@ -1291,28 +1383,42 @@ impl Renderer {
         let brush_mask = try!{
             LazilyCompiledShader::new(ShaderKind::Brush,
                                       "brush_mask",
                                       &[],
                                       &mut device,
                                       options.precache_shaders)
         };
 
+        let brush_image_a8 = try!{
+            BrushShader::new("brush_image",
+                             &mut device,
+                             &["ALPHA_TARGET"],
+                             options.precache_shaders)
+        };
+
+        let brush_image_rgba8 = try!{
+            BrushShader::new("brush_image",
+                             &mut device,
+                             &["COLOR_TARGET"],
+                             options.precache_shaders)
+        };
+
         let cs_blur_a8 = try!{
             LazilyCompiledShader::new(ShaderKind::Cache(VertexArrayKind::Blur),
                                      "cs_blur",
-                                      &["ALPHA"],
+                                      &["ALPHA_TARGET"],
                                       &mut device,
                                       options.precache_shaders)
         };
 
         let cs_blur_rgba8 = try!{
             LazilyCompiledShader::new(ShaderKind::Cache(VertexArrayKind::Blur),
                                      "cs_blur",
-                                      &["COLOR"],
+                                      &["COLOR_TARGET"],
                                       &mut device,
                                       options.precache_shaders)
         };
 
         let cs_clip_rectangle = try!{
             LazilyCompiledShader::new(ShaderKind::ClipCache,
                                       "cs_clip_rectangle",
                                       &[],
@@ -1476,30 +1582,16 @@ impl Renderer {
                                  if options.enable_dithering {
                                     &dithering_feature
                                  } else {
                                     &[]
                                  },
                                  options.precache_shaders)
         };
 
-        let ps_cache_image_a8 = try!{
-            PrimitiveShader::new("ps_cache_image",
-                                 &mut device,
-                                 &["ALPHA"],
-                                 options.precache_shaders)
-        };
-
-        let ps_cache_image_rgba8 = try!{
-            PrimitiveShader::new("ps_cache_image",
-                                 &mut device,
-                                 &["COLOR"],
-                                 options.precache_shaders)
-        };
-
         let ps_blend = try!{
             LazilyCompiledShader::new(ShaderKind::Primitive,
                                      "ps_blend",
                                      &[],
                                      &mut device,
                                      options.precache_shaders)
         };
 
@@ -1707,31 +1799,31 @@ impl Renderer {
             pending_texture_updates: Vec::new(),
             pending_gpu_cache_updates: Vec::new(),
             pending_shader_updates: Vec::new(),
             cs_text_run,
             cs_line,
             cs_blur_a8,
             cs_blur_rgba8,
             brush_mask,
+            brush_image_rgba8,
+            brush_image_a8,
             cs_clip_rectangle,
             cs_clip_border,
             cs_clip_image,
             ps_rectangle,
             ps_rectangle_clip,
             ps_text_run,
             ps_image,
             ps_yuv_image,
             ps_border_corner,
             ps_border_edge,
             ps_gradient,
             ps_angle_gradient,
             ps_radial_gradient,
-            ps_cache_image_rgba8,
-            ps_cache_image_a8,
             ps_blend,
             ps_hw_composite,
             ps_split_composite,
             ps_composite,
             ps_line,
             debug: debug_renderer,
             debug_flags,
             enable_batcher: options.enable_batcher,
@@ -2339,16 +2431,34 @@ impl Renderer {
                 );
                 GPU_TAG_PRIM_SPLIT_COMPOSITE
             }
             BatchKind::Blend => {
                 self.ps_blend
                     .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
                 GPU_TAG_PRIM_BLEND
             }
+            BatchKind::Brush(brush_kind) => {
+                match brush_kind {
+                    BrushBatchKind::Image(target_kind) => {
+                        let shader = match target_kind {
+                            RenderTargetKind::Alpha => &mut self.brush_image_a8,
+                            RenderTargetKind::Color => &mut self.brush_image_rgba8,
+                        };
+                        shader.bind(
+                            &mut self.device,
+                            key.blend_mode,
+                            projection,
+                            0,
+                            &mut self.renderer_errors,
+                        );
+                        GPU_TAG_BRUSH_IMAGE
+                    }
+                }
+            }
             BatchKind::Transformable(transform_kind, batch_kind) => match batch_kind {
                 TransformBatchKind::Rectangle(needs_clipping) => {
                     debug_assert!(
                         !needs_clipping || match key.blend_mode {
                             BlendMode::Alpha |
                             BlendMode::PremultipliedAlpha |
                             BlendMode::Subpixel => true,
                             BlendMode::None => false,
@@ -2379,17 +2489,17 @@ impl Renderer {
                         &mut self.device,
                         transform_kind,
                         projection,
                         0,
                         &mut self.renderer_errors,
                     );
                     GPU_TAG_PRIM_LINE
                 }
-                TransformBatchKind::TextRun => {
+                TransformBatchKind::TextRun(..) => {
                     unreachable!("bug: text batches are special cased");
                 }
                 TransformBatchKind::Image(image_buffer_kind) => {
                     self.ps_image[image_buffer_kind as usize]
                         .as_mut()
                         .expect("Unsupported image shader kind")
                         .bind(
                             &mut self.device,
@@ -2460,39 +2570,16 @@ impl Renderer {
                         &mut self.device,
                         transform_kind,
                         projection,
                         0,
                         &mut self.renderer_errors,
                     );
                     GPU_TAG_PRIM_RADIAL_GRADIENT
                 }
-                TransformBatchKind::CacheImage(target_kind) => {
-                    match target_kind {
-                        RenderTargetKind::Alpha => {
-                            self.ps_cache_image_a8.bind(
-                                &mut self.device,
-                                transform_kind,
-                                projection,
-                                0,
-                                &mut self.renderer_errors,
-                            );
-                        }
-                        RenderTargetKind::Color => {
-                            self.ps_cache_image_rgba8.bind(
-                                &mut self.device,
-                                transform_kind,
-                                projection,
-                                0,
-                                &mut self.renderer_errors,
-                            );
-                        }
-                    }
-                    GPU_TAG_PRIM_CACHE_IMAGE
-                }
             },
         };
 
         // Handle special case readback for composites.
         match key.kind {
             BatchKind::Composite {
                 task_id,
                 source_id,
@@ -2718,17 +2805,17 @@ impl Renderer {
                         BlendMode::Subpixel => ColorF::new(0.5, 0.0, 0.4, 1.0),
                     }.into();
                     for item_rect in &batch.item_rects {
                         self.debug.add_rect(item_rect, color);
                     }
                 }
 
                 match batch.key.kind {
-                    BatchKind::Transformable(transform_kind, TransformBatchKind::TextRun) => {
+                    BatchKind::Transformable(transform_kind, TransformBatchKind::TextRun(glyph_format)) => {
                         // Text run batches are handled by this special case branch.
                         // In the case of subpixel text, we draw it as a two pass
                         // effect, to ensure we can apply clip masks correctly.
                         // In the future, there are several optimizations available:
                         // 1) Use dual source blending where available (almost all recent hardware).
                         // 2) Use frame buffer fetch where available (most modern hardware).
                         // 3) Consider the old constant color blend method where no clip is applied.
                         let _gm = self.gpu_profile.add_marker(GPU_TAG_PRIM_TEXT_RUN);
@@ -2738,17 +2825,17 @@ impl Renderer {
                         match batch.key.blend_mode {
                             BlendMode::PremultipliedAlpha => {
                                 self.device.set_blend_mode_premultiplied_alpha();
 
                                 self.ps_text_run.bind(
                                     &mut self.device,
                                     transform_kind,
                                     projection,
-                                    TextShaderMode::Alpha,
+                                    TextShaderMode::from(glyph_format),
                                     &mut self.renderer_errors,
                                 );
 
                                 self.draw_instanced_batch(
                                     &batch.instances,
                                     VertexArrayKind::Primitive,
                                     &batch.key.textures
                                 );
@@ -3348,44 +3435,46 @@ impl Renderer {
     fn draw_render_target_debug(&mut self, framebuffer_size: DeviceUintSize) {
         if !self.debug_flags.contains(DebugFlags::RENDER_TARGET_DBG) {
             return;
         }
 
         let mut spacing = 16;
         let mut size = 512;
         let fb_width = framebuffer_size.width as i32;
-        let num_textures = self.color_render_targets
+        let num_layers: i32 = self.color_render_targets
             .iter()
             .chain(self.alpha_render_targets.iter())
-            .count() as i32;
-
-        if num_textures * (size + spacing) > fb_width {
-            let factor = fb_width as f32 / (num_textures * (size + spacing)) as f32;
+            .map(|texture| texture.get_render_target_layer_count() as i32)
+            .sum();
+
+        if num_layers * (size + spacing) > fb_width {
+            let factor = fb_width as f32 / (num_layers * (size + spacing)) as f32;
             size = (size as f32 * factor) as i32;
             spacing = (spacing as f32 * factor) as i32;
         }
 
-        for (i, texture) in self.color_render_targets
+        let mut target_index = 0;
+        for texture in self.color_render_targets
             .iter()
             .chain(self.alpha_render_targets.iter())
-            .enumerate()
         {
             let dimensions = texture.get_dimensions();
             let src_rect = DeviceIntRect::new(DeviceIntPoint::zero(), dimensions.to_i32());
 
             let layer_count = texture.get_render_target_layer_count();
             for layer_index in 0 .. layer_count {
                 self.device
                     .bind_read_target(Some((texture, layer_index as i32)));
-                let x = fb_width - (spacing + size) * (i as i32 + 1);
+                let x = fb_width - (spacing + size) * (target_index + 1);
                 let y = spacing;
 
                 let dest_rect = rect(x, y, size, size);
                 self.device.blit_render_target(src_rect, dest_rect);
+                target_index += 1;
             }
         }
     }
 
     fn draw_texture_cache_debug(&mut self, framebuffer_size: DeviceUintSize) {
         if !self.debug_flags.contains(DebugFlags::TEXTURE_CACHE_DBG) {
             return;
         }
@@ -3490,16 +3579,18 @@ impl Renderer {
         self.device.delete_vao(self.clip_vao);
         self.device.delete_vao(self.blur_vao);
         self.debug.deinit(&mut self.device);
         self.cs_text_run.deinit(&mut self.device);
         self.cs_line.deinit(&mut self.device);
         self.cs_blur_a8.deinit(&mut self.device);
         self.cs_blur_rgba8.deinit(&mut self.device);
         self.brush_mask.deinit(&mut self.device);
+        self.brush_image_rgba8.deinit(&mut self.device);
+        self.brush_image_a8.deinit(&mut self.device);
         self.cs_clip_rectangle.deinit(&mut self.device);
         self.cs_clip_image.deinit(&mut self.device);
         self.cs_clip_border.deinit(&mut self.device);
         self.ps_rectangle.deinit(&mut self.device);
         self.ps_rectangle_clip.deinit(&mut self.device);
         self.ps_text_run.deinit(&mut self.device);
         for shader in self.ps_image {
             if let Some(shader) = shader {
@@ -3514,18 +3605,16 @@ impl Renderer {
         for (_, target) in self.output_targets {
             self.device.delete_fbo(target.fbo_id);
         }
         self.ps_border_corner.deinit(&mut self.device);
         self.ps_border_edge.deinit(&mut self.device);
         self.ps_gradient.deinit(&mut self.device);
         self.ps_angle_gradient.deinit(&mut self.device);
         self.ps_radial_gradient.deinit(&mut self.device);
-        self.ps_cache_image_rgba8.deinit(&mut self.device);
-        self.ps_cache_image_a8.deinit(&mut self.device);
         self.ps_line.deinit(&mut self.device);
         self.ps_blend.deinit(&mut self.device);
         self.ps_hw_composite.deinit(&mut self.device);
         self.ps_split_composite.deinit(&mut self.device);
         self.ps_composite.deinit(&mut self.device);
         self.device.end_frame();
     }
 }
--- a/gfx/webrender/src/resource_cache.rs
+++ b/gfx/webrender/src/resource_cache.rs
@@ -11,27 +11,27 @@ use api::{ExternalImageData, ExternalIma
 use api::{FontInstanceOptions, FontInstancePlatformOptions, FontVariation};
 use api::{GlyphDimensions, GlyphKey, IdNamespace};
 use api::{ImageData, ImageDescriptor, ImageKey, ImageRendering};
 use api::{TileOffset, TileSize};
 use app_units::Au;
 use device::TextureFilter;
 use frame::FrameId;
 use glyph_cache::GlyphCache;
-use glyph_rasterizer::{GlyphRasterizer, GlyphRequest};
+use glyph_rasterizer::{GlyphFormat, GlyphRasterizer, GlyphRequest};
 use gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle};
 use internal_types::{FastHashMap, FastHashSet, SourceTexture, TextureUpdateList};
 use profiler::{ResourceProfileCounters, TextureCacheProfileCounters};
 use rayon::ThreadPool;
 use std::collections::hash_map::Entry::{self, Occupied, Vacant};
 use std::cmp;
 use std::fmt::Debug;
 use std::hash::Hash;
 use std::mem;
-use std::sync::Arc;
+use std::sync::{Arc, RwLock};
 use texture_cache::{TextureCache, TextureCacheHandle};
 
 const DEFAULT_TILE_SIZE: TileSize = 512;
 
 pub struct GlyphFetchResult {
     pub index_in_text_run: i32,
     pub uv_rect_address: GpuCacheAddress,
 }
@@ -68,17 +68,17 @@ enum State {
 struct ImageResource {
     data: ImageData,
     descriptor: ImageDescriptor,
     epoch: Epoch,
     tiling: Option<TileSize>,
     dirty_rect: Option<DeviceUintRect>,
 }
 
-#[derive(Debug)]
+#[derive(Clone, Debug)]
 pub struct ImageTiling {
     pub image_size: DeviceUintSize,
     pub tile_size: TileSize,
 }
 
 pub type TiledImageMap = FastHashMap<ImageKey, ImageTiling>;
 
 struct ImageTemplates {
@@ -184,36 +184,39 @@ impl Into<BlobImageRequest> for ImageReq
     fn into(self) -> BlobImageRequest {
         BlobImageRequest {
             key: self.key,
             tile: self.tile,
         }
     }
 }
 
+type ImageCache = ResourceClassCache<ImageRequest, CachedImageInfo>;
+pub type FontInstanceMap = Arc<RwLock<FastHashMap<FontInstanceKey, FontInstance>>>;
+
 struct Resources {
     font_templates: FastHashMap<FontKey, FontTemplate>,
-    font_instances: FastHashMap<FontInstanceKey, FontInstance>,
+    font_instances: FontInstanceMap,
     image_templates: ImageTemplates,
 }
 
 impl BlobImageResources for Resources {
     fn get_font_data(&self, key: FontKey) -> &FontTemplate {
         self.font_templates.get(&key).unwrap()
     }
     fn get_image(&self, key: ImageKey) -> Option<(&ImageData, &ImageDescriptor)> {
         self.image_templates
             .get(key)
             .map(|resource| (&resource.data, &resource.descriptor))
     }
 }
 
 pub struct ResourceCache {
     cached_glyphs: GlyphCache,
-    cached_images: ResourceClassCache<ImageRequest, CachedImageInfo>,
+    cached_images: ImageCache,
 
     resources: Resources,
     state: State,
     current_frame_id: FrameId,
 
     texture_cache: TextureCache,
 
     // TODO(gw): We should expire (parts of) this cache semi-regularly!
@@ -234,35 +237,34 @@ impl ResourceCache {
         workers: Arc<ThreadPool>,
         blob_image_renderer: Option<Box<BlobImageRenderer>>,
     ) -> ResourceCache {
         ResourceCache {
             cached_glyphs: GlyphCache::new(),
             cached_images: ResourceClassCache::new(),
             resources: Resources {
                 font_templates: FastHashMap::default(),
-                font_instances: FastHashMap::default(),
+                font_instances: Arc::new(RwLock::new(FastHashMap::default())),
                 image_templates: ImageTemplates::new(),
             },
             cached_glyph_dimensions: FastHashMap::default(),
             texture_cache,
             state: State::Idle,
             current_frame_id: FrameId(0),
             pending_image_requests: FastHashSet::default(),
             glyph_rasterizer: GlyphRasterizer::new(workers),
             blob_image_renderer,
         }
     }
 
     pub fn max_texture_size(&self) -> u32 {
         self.texture_cache.max_texture_size()
     }
 
-    fn should_tile(&self, descriptor: &ImageDescriptor, data: &ImageData) -> bool {
-        let limit = self.max_texture_size();
+    fn should_tile(limit: u32, descriptor: &ImageDescriptor, data: &ImageData) -> bool {
         let size_check = descriptor.width > limit || descriptor.height > limit;
         match *data {
             ImageData::Raw(_) | ImageData::Blob(_) => size_check,
             ImageData::External(info) => {
                 // External handles already represent existing textures so it does
                 // not make sense to tile them into smaller ones.
                 info.image_type == ExternalImageType::ExternalBuffer && size_check
             }
@@ -360,38 +362,44 @@ impl ResourceCache {
             subpx_dir,
             platform_options,
             variations,
             synthetic_italics,
         );
         if self.glyph_rasterizer.is_bitmap_font(&instance) {
             instance.render_mode = instance.render_mode.limit_by(FontRenderMode::Bitmap);
         }
-        self.resources.font_instances.insert(instance_key, instance);
+        self.resources.font_instances
+            .write()
+            .unwrap()
+            .insert(instance_key, instance);
     }
 
     pub fn delete_font_instance(&mut self, instance_key: FontInstanceKey) {
-        self.resources.font_instances.remove(&instance_key);
+        self.resources.font_instances
+            .write()
+            .unwrap()
+            .remove(&instance_key);
         if let Some(ref mut r) = self.blob_image_renderer {
             r.delete_font_instance(instance_key);
         }
     }
 
-    pub fn get_font_instance(&self, instance_key: FontInstanceKey) -> Option<&FontInstance> {
-        self.resources.font_instances.get(&instance_key)
+    pub fn get_font_instances(&self) -> FontInstanceMap {
+        self.resources.font_instances.clone()
     }
 
     pub fn add_image_template(
         &mut self,
         image_key: ImageKey,
         descriptor: ImageDescriptor,
         mut data: ImageData,
         mut tiling: Option<TileSize>,
     ) {
-        if tiling.is_none() && self.should_tile(&descriptor, &data) {
+        if tiling.is_none() && Self::should_tile(self.max_texture_size(), &descriptor, &data) {
             // We aren't going to be able to upload a texture this big, so tile it, even
             // if tiling was not requested.
             tiling = Some(DEFAULT_TILE_SIZE);
         }
 
         if let ImageData::Blob(ref mut blob) = data {
             self.blob_image_renderer.as_mut().unwrap().add(
                 image_key,
@@ -413,50 +421,48 @@ impl ResourceCache {
 
     pub fn update_image_template(
         &mut self,
         image_key: ImageKey,
         descriptor: ImageDescriptor,
         mut data: ImageData,
         dirty_rect: Option<DeviceUintRect>,
     ) {
-        let resource = if let Some(image) = self.resources.image_templates.get(image_key) {
-            let next_epoch = Epoch(image.epoch.0 + 1);
-
-            let mut tiling = image.tiling;
-            if tiling.is_none() && self.should_tile(&descriptor, &data) {
-                tiling = Some(DEFAULT_TILE_SIZE);
-            }
-
-            if let ImageData::Blob(ref mut blob) = data {
-                self.blob_image_renderer
-                    .as_mut()
-                    .unwrap()
-                    .update(image_key, mem::replace(blob, BlobImageData::new()), dirty_rect);
-            }
-
-            ImageResource {
-                descriptor,
-                data,
-                epoch: next_epoch,
-                tiling,
-                dirty_rect: match (dirty_rect, image.dirty_rect) {
-                    (Some(rect), Some(prev_rect)) => Some(rect.union(&prev_rect)),
-                    (Some(rect), None) => Some(rect),
-                    _ => None,
-                },
-            }
-        } else {
-            panic!(
-                "Attempt to update non-existant image (key {:?}).",
+        let max_texture_size = self.max_texture_size();
+        let image = match self.resources.image_templates.get_mut(image_key) {
+            Some(res) => res,
+            None => panic!(
+                "Attempt to update non-existent image (key {:?}).",
                 image_key
-            );
+            ),
         };
 
-        self.resources.image_templates.insert(image_key, resource);
+        let mut tiling = image.tiling;
+        if tiling.is_none() && Self::should_tile(max_texture_size, &descriptor, &data) {
+            tiling = Some(DEFAULT_TILE_SIZE);
+        }
+
+        if let ImageData::Blob(ref mut blob) = data {
+            self.blob_image_renderer
+                .as_mut()
+                .unwrap()
+                .update(image_key, mem::replace(blob, BlobImageData::new()), dirty_rect);
+        }
+
+        *image = ImageResource {
+            descriptor,
+            data,
+            epoch: Epoch(image.epoch.0 + 1),
+            tiling,
+            dirty_rect: match (dirty_rect, image.dirty_rect) {
+                (Some(rect), Some(prev_rect)) => Some(rect.union(&prev_rect)),
+                (Some(rect), None) => Some(rect),
+                (None, _) => None,
+            },
+        };
     }
 
     pub fn delete_image_template(&mut self, image_key: ImageKey) {
         let value = self.resources.image_templates.remove(image_key);
 
         self.cached_images
             .clear_keys(|request| request.key == image_key);
 
@@ -479,108 +485,109 @@ impl ResourceCache {
     ) {
         debug_assert_eq!(self.state, State::AddResources);
         let request = ImageRequest {
             key,
             rendering,
             tile,
         };
 
-        match self.resources.image_templates.get(key) {
-            Some(template) => {
-                // Images that don't use the texture cache can early out.
-                if !template.data.uses_texture_cache() {
-                    return;
-                }
-
-                let side_size =
-                    template.tiling.map_or(cmp::max(template.descriptor.width, template.descriptor.height),
-                                           |tile_size| tile_size as u32);
-                if side_size > self.texture_cache.max_texture_size() {
-                    // The image or tiling size is too big for hardware texture size.
-                    warn!("Dropping image, image:(w:{},h:{}, tile:{}) is too big for hardware!",
-                          template.descriptor.width, template.descriptor.height, template.tiling.unwrap_or(0));
-                    self.cached_images.insert(request, Err(ResourceClassCacheError::OverLimitSize));
-                    return;
-                }
-
-                // If this image exists in the texture cache, *and* the epoch
-                // in the cache matches that of the template, then it is
-                // valid to use as-is.
-                let (entry, needs_update) = match self.cached_images.entry(request) {
-                    Occupied(entry) => {
-                        let needs_update = entry.get().as_ref().unwrap().epoch != template.epoch;
-                        (entry.into_mut(), needs_update)
-                    }
-                    Vacant(entry) => (
-                        entry.insert(Ok(
-                            CachedImageInfo {
-                                epoch: template.epoch,
-                                texture_cache_handle: TextureCacheHandle::new(),
-                            }
-                        )),
-                        true,
-                    ),
-                };
-
-                let needs_upload = self.texture_cache
-                    .request(&mut entry.as_mut().unwrap().texture_cache_handle, gpu_cache);
-
-                if !needs_upload && !needs_update {
-                    return;
-                }
-
-                // We can start a worker thread rasterizing right now, if:
-                //  - The image is a blob.
-                //  - The blob hasn't already been requested this frame.
-                if self.pending_image_requests.insert(request) {
-                    if template.data.is_blob() {
-                        if let Some(ref mut renderer) = self.blob_image_renderer {
-                            let (offset, w, h) = match template.tiling {
-                                Some(tile_size) => {
-                                    let tile_offset = request.tile.unwrap();
-                                    let (w, h) = compute_tile_size(
-                                        &template.descriptor,
-                                        tile_size,
-                                        tile_offset,
-                                    );
-                                    let offset = DevicePoint::new(
-                                        tile_offset.x as f32 * tile_size as f32,
-                                        tile_offset.y as f32 * tile_size as f32,
-                                    );
-
-                                    (offset, w, h)
-                                }
-                                None => (
-                                    DevicePoint::zero(),
-                                    template.descriptor.width,
-                                    template.descriptor.height,
-                                ),
-                            };
-
-                            renderer.request(
-                                &self.resources,
-                                request.into(),
-                                &BlobImageDescriptor {
-                                    width: w,
-                                    height: h,
-                                    offset,
-                                    format: template.descriptor.format,
-                                },
-                                template.dirty_rect,
-                            );
-                        }
-                    }
-                }
-            }
+        let template = match self.resources.image_templates.get(key) {
+            Some(template) => template,
             None => {
                 warn!(
                     "ERROR: Trying to render deleted / non-existent key {:?}",
                     key
                 );
+                return
+            }
+        };
+
+        // Images that don't use the texture cache can early out.
+        if !template.data.uses_texture_cache() {
+            return;
+        }
+
+        let side_size =
+            template.tiling.map_or(cmp::max(template.descriptor.width, template.descriptor.height),
+                                   |tile_size| tile_size as u32);
+        if side_size > self.texture_cache.max_texture_size() {
+            // The image or tiling size is too big for hardware texture size.
+            warn!("Dropping image, image:(w:{},h:{}, tile:{}) is too big for hardware!",
+                  template.descriptor.width, template.descriptor.height, template.tiling.unwrap_or(0));
+            self.cached_images.insert(request, Err(ResourceClassCacheError::OverLimitSize));
+            return;
+        }
+
+        // If this image exists in the texture cache, *and* the epoch
+        // in the cache matches that of the template, then it is
+        // valid to use as-is.
+        let (entry, needs_update) = match self.cached_images.entry(request) {
+            Occupied(entry) => {
+                let needs_update = entry.get().as_ref().unwrap().epoch != template.epoch;
+                (entry.into_mut(), needs_update)
+            }
+            Vacant(entry) => (
+                entry.insert(Ok(
+                    CachedImageInfo {
+                        epoch: template.epoch,
+                        texture_cache_handle: TextureCacheHandle::new(),
+                    }
+                )),
+                true,
+            ),
+        };
+
+        let needs_upload = self.texture_cache
+            .request(&mut entry.as_mut().unwrap().texture_cache_handle, gpu_cache);
+
+        if !needs_upload && !needs_update {
+            return;
+        }
+
+        // We can start a worker thread rasterizing right now, if:
+        //  - The image is a blob.
+        //  - The blob hasn't already been requested this frame.
+        if self.pending_image_requests.insert(request) {
+            if template.data.is_blob() {
+                if let Some(ref mut renderer) = self.blob_image_renderer {
+                    let (offset, w, h) = match template.tiling {
+                        Some(tile_size) => {
+                            let tile_offset = request.tile.unwrap();
+                            let (w, h) = compute_tile_size(
+                                &template.descriptor,
+                                tile_size,
+                                tile_offset,
+                            );
+                            let offset = DevicePoint::new(
+                                tile_offset.x as f32 * tile_size as f32,
+                                tile_offset.y as f32 * tile_size as f32,
+                            );
+
+                            (offset, w, h)
+                        }
+                        None => (
+                            DevicePoint::zero(),
+                            template.descriptor.width,
+                            template.descriptor.height,
+                        ),
+                    };
+
+                    renderer.request(
+                        &self.resources,
+                        request.into(),
+                        &BlobImageDescriptor {
+                            width: w,
+                            height: h,
+                            offset,
+                            format: template.descriptor.format,
+                        },
+                        template.dirty_rect,
+                    );
+                }
             }
         }
     }
 
     pub fn request_glyphs(
         &mut self,
         mut font: FontInstance,
         glyph_keys: &[GlyphKey],
@@ -605,45 +612,48 @@ impl ResourceCache {
     pub fn fetch_glyphs<F>(
         &self,
         mut font: FontInstance,
         glyph_keys: &[GlyphKey],
         fetch_buffer: &mut Vec<GlyphFetchResult>,
         gpu_cache: &GpuCache,
         mut f: F,
     ) where
-        F: FnMut(SourceTexture, &[GlyphFetchResult]),
+        F: FnMut(SourceTexture, GlyphFormat, &[GlyphFetchResult]),
     {
         debug_assert_eq!(self.state, State::QueryResources);
 
         self.glyph_rasterizer.prepare_font(&mut font);
         let glyph_key_cache = self.cached_glyphs.get_glyph_key_cache_for_font(&font);
 
         let mut current_texture_id = SourceTexture::Invalid;
+        let mut current_glyph_format = GlyphFormat::Subpixel;
         debug_assert!(fetch_buffer.is_empty());
 
         for (loop_index, key) in glyph_keys.iter().enumerate() {
             if let Ok(Some(ref glyph)) = *glyph_key_cache.get(key) {
                 let cache_item = self.texture_cache.get(&glyph.texture_cache_handle);
-                if current_texture_id != cache_item.texture_id {
+                if current_texture_id != cache_item.texture_id ||
+                   current_glyph_format != glyph.format {
                     if !fetch_buffer.is_empty() {
-                        f(current_texture_id, fetch_buffer);
+                        f(current_texture_id, current_glyph_format, fetch_buffer);
                         fetch_buffer.clear();
                     }
                     current_texture_id = cache_item.texture_id;
+                    current_glyph_format = glyph.format;
                 }
                 fetch_buffer.push(GlyphFetchResult {
                     index_in_text_run: loop_index as i32,
                     uv_rect_address: gpu_cache.get_address(&cache_item.uv_rect_handle),
                 });
             }
         }
 
         if !fetch_buffer.is_empty() {
-            f(current_texture_id, fetch_buffer);
+            f(current_texture_id, current_glyph_format, fetch_buffer);
             fetch_buffer.clear();
         }
     }
 
     pub fn get_glyph_dimensions(
         &mut self,
         font: &FontInstance,
         key: &GlyphKey,
@@ -874,37 +884,24 @@ impl ResourceCache {
         // The advantage of clearing the cache completely is that it gets rid of any
         // remaining fragmentation that could have persisted if we kept around the most
         // recently used resources.
         self.cached_images.clear();
         self.cached_glyphs.clear();
     }
 
     pub fn clear_namespace(&mut self, namespace: IdNamespace) {
-        //TODO: use `retain` when we are on Rust-1.18
-        let image_keys: Vec<_> = self.resources
+        self.resources
             .image_templates
             .images
-            .keys()
-            .filter(|&key| key.0 == namespace)
-            .cloned()
-            .collect();
-        for key in &image_keys {
-            self.resources.image_templates.images.remove(key);
-        }
+            .retain(|key, _| key.0 != namespace);
 
-        let font_keys: Vec<_> = self.resources
+        self.resources
             .font_templates
-            .keys()
-            .filter(|&key| key.0 == namespace)
-            .cloned()
-            .collect();
-        for key in &font_keys {
-            self.resources.font_templates.remove(key);
-        }
+            .retain(|key, _| key.0 != namespace);
 
         self.cached_images
             .clear_keys(|request| request.key.0 == namespace);
         self.cached_glyphs
             .clear_fonts(|font| font.font_key.0 == namespace);
     }
 }
 
--- a/gfx/webrender/src/tiling.rs
+++ b/gfx/webrender/src/tiling.rs
@@ -6,19 +6,21 @@ use api::{ClipAndScrollInfo, ClipId, Col
 use api::{DeviceIntRect, DeviceIntSize, DeviceUintPoint, DeviceUintSize};
 use api::{ExternalImageType, FilterOp, FontRenderMode, ImageRendering, LayerRect};
 use api::{LayerToWorldTransform, MixBlendMode, PipelineId, PropertyBinding, TransformStyle};
 use api::{LayerVector2D, TileOffset, WorldToLayerTransform, YuvColorSpace, YuvFormat};
 use border::{BorderCornerInstance, BorderCornerSide};
 use clip::{ClipSource, ClipStore};
 use clip_scroll_tree::CoordinateSystemId;
 use device::Texture;
+use glyph_rasterizer::GlyphFormat;
 use gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle, GpuCacheUpdateList};
 use gpu_types::{BlurDirection, BlurInstance, BrushInstance, ClipMaskInstance};
 use gpu_types::{CompositePrimitiveInstance, PrimitiveInstance, SimplePrimitiveInstance};
+use gpu_types::{BRUSH_FLAG_USES_PICTURE};
 use internal_types::{FastHashMap, SourceTexture};
 use internal_types::BatchTextures;
 use prim_store::{PrimitiveIndex, PrimitiveKind, PrimitiveMetadata, PrimitiveStore};
 use prim_store::{DeferredResolve, TextRunMode};
 use profiler::FrameProfileCounters;
 use render_task::{AlphaRenderItem, ClipWorkItem, MaskGeometryKind, MaskSegment};
 use render_task::{RenderTaskAddress, RenderTaskId, RenderTaskKey, RenderTaskKind};
 use render_task::{BlurTask, ClearMode, RenderTaskLocation, RenderTaskTree};
@@ -57,26 +59,30 @@ impl AlphaBatchHelpers for PrimitiveStor
                 let text_run_cpu = &self.cpu_text_runs[metadata.cpu_prim_index.0];
                 match text_run_cpu.font.render_mode {
                     FontRenderMode::Subpixel => BlendMode::Subpixel,
                     FontRenderMode::Alpha |
                     FontRenderMode::Mono |
                     FontRenderMode::Bitmap => BlendMode::PremultipliedAlpha,
                 }
             }
+            PrimitiveKind::Rectangle |
+            PrimitiveKind::Border |
             PrimitiveKind::Image |
             PrimitiveKind::AlignedGradient |
             PrimitiveKind::AngleGradient |
             PrimitiveKind::RadialGradient |
             PrimitiveKind::Picture => if needs_blending {
                 BlendMode::PremultipliedAlpha
             } else {
                 BlendMode::None
             },
-            _ => if needs_blending {
+            PrimitiveKind::YuvImage |
+            PrimitiveKind::Line |
+            PrimitiveKind::Brush => if needs_blending {
                 BlendMode::Alpha
             } else {
                 BlendMode::None
             },
         }
     }
 }
 
@@ -132,17 +138,17 @@ impl AlphaBatchList {
 
         match key.kind {
             BatchKind::Composite { .. } => {
                 // Composites always get added to their own batch.
                 // This is because the result of a composite can affect
                 // the input to the next composite. Perhaps we can
                 // optimize this in the future.
             }
-            BatchKind::Transformable(_, TransformBatchKind::TextRun) => {
+            BatchKind::Transformable(_, TransformBatchKind::TextRun(_)) => {
                 'outer_text: for (batch_index, batch) in self.batches.iter().enumerate().rev().take(10) {
                     // Subpixel text is drawn in two passes. Because of this, we need
                     // to check for overlaps with every batch (which is a bit different
                     // than the normal batching below).
                     for item_rect in &batch.item_rects {
                         if item_rect.intersects(item_bounding_rect) {
                             break 'outer_text;
                         }
@@ -539,30 +545,30 @@ impl AlphaRenderItem {
 
                         let font = text_cpu.get_font(TextRunMode::Normal, ctx.device_pixel_ratio);
 
                         ctx.resource_cache.fetch_glyphs(
                             font,
                             &text_cpu.glyph_keys,
                             glyph_fetch_buffer,
                             gpu_cache,
-                            |texture_id, glyphs| {
+                            |texture_id, glyph_format, glyphs| {
                                 debug_assert_ne!(texture_id, SourceTexture::Invalid);
 
                                 let textures = BatchTextures {
                                     colors: [
                                         texture_id,
                                         SourceTexture::Invalid,
                                         SourceTexture::Invalid,
                                     ],
                                 };
 
                                 let kind = BatchKind::Transformable(
                                     transform_kind,
-                                    TransformBatchKind::TextRun,
+                                    TransformBatchKind::TextRun(glyph_format),
                                 );
 
                                 let key = BatchKey::new(kind, blend_mode, textures);
                                 let batch = batch_list.get_suitable_batch(key, item_bounding_rect);
 
                                 for glyph in glyphs {
                                     batch.push(base_instance.build(
                                         glyph.index_in_text_run,
@@ -574,23 +580,32 @@ impl AlphaRenderItem {
                         );
                     }
                     PrimitiveKind::Picture => {
                         let picture =
                             &ctx.prim_store.cpu_pictures[prim_metadata.cpu_prim_index.0];
                         let cache_task_id = picture.render_task_id.expect("no render task!");
                         let cache_task_address = render_tasks.get_task_address(cache_task_id);
                         let textures = BatchTextures::render_target_cache();
-                        let kind = BatchKind::Transformable(
-                            transform_kind,
-                            TransformBatchKind::CacheImage(picture.target_kind()),
+                        let kind = BatchKind::Brush(
+                            BrushBatchKind::Image(picture.target_kind()),
                         );
                         let key = BatchKey::new(kind, blend_mode, textures);
                         let batch = batch_list.get_suitable_batch(key, item_bounding_rect);
-                        batch.push(base_instance.build(0, cache_task_address.0 as i32, 0));
+                        let instance = BrushInstance {
+                            picture_address: task_address,
+                            prim_address: prim_cache_address,
+                            layer_address: packed_layer_index.into(),
+                            clip_task_address,
+                            z,
+                            flags: 0,
+                            user_data0: cache_task_address.0 as i32,
+                            user_data1: 0,
+                        };
+                        batch.push(PrimitiveInstance::from(instance));
                     }
                     PrimitiveKind::AlignedGradient => {
                         let gradient_cpu =
                             &ctx.prim_store.cpu_gradients[prim_metadata.cpu_prim_index.0];
                         let kind = BatchKind::Transformable(
                             transform_kind,
                             TransformBatchKind::AlignedGradient,
                         );
@@ -1153,18 +1168,16 @@ impl RenderTarget for ColorRenderTarget 
                     task_id,
                     task.children[0],
                     BlurDirection::Horizontal,
                     render_tasks,
                 );
             }
             RenderTaskKind::Picture(ref task_info) => {
                 let prim_metadata = ctx.prim_store.get_metadata(task_info.prim_index);
-                let prim_address = prim_metadata.gpu_location.as_int(gpu_cache);
-
                 match prim_metadata.prim_kind {
                     PrimitiveKind::Picture => {
                         let prim = &ctx.prim_store.cpu_pictures[prim_metadata.cpu_prim_index.0];
 
                         let task_index = render_tasks.get_task_address(task_id);
 
                         for run in &prim.prim_runs {
                             for i in 0 .. run.count {
@@ -1192,34 +1205,34 @@ impl RenderTarget for ColorRenderTarget 
 
                                         let font = text.get_font(TextRunMode::Shadow, ctx.device_pixel_ratio);
 
                                         ctx.resource_cache.fetch_glyphs(
                                             font,
                                             &text.glyph_keys,
                                             &mut self.glyph_fetch_buffer,
                                             gpu_cache,
-                                            |texture_id, glyphs| {
+                                            |texture_id, _glyph_format, glyphs| {
                                                 let batch = text_run_cache_prims
                                                     .entry(texture_id)
                                                     .or_insert(Vec::new());
 
                                                 for glyph in glyphs {
                                                     batch.push(instance.build(
                                                         glyph.index_in_text_run,
                                                         glyph.uv_rect_address.as_int(),
-                                                        prim_address,
+                                                        0
                                                     ));
                                                 }
                                             },
                                         );
                                     }
                                     PrimitiveKind::Line => {
                                         self.line_cache_prims
-                                            .push(instance.build(prim_address, 0, 0));
+                                            .push(instance.build(0, 0, 0));
                                     }
                                     _ => {
                                         unreachable!("Unexpected sub primitive type");
                                     }
                                 }
                             }
                         }
                     }
@@ -1329,17 +1342,31 @@ impl RenderTarget for AlphaRenderTarget 
                                 let sub_prim_index = PrimitiveIndex(run.prim_index.0 + i);
 
                                 let sub_metadata = ctx.prim_store.get_metadata(sub_prim_index);
                                 let sub_prim_address =
                                     gpu_cache.get_address(&sub_metadata.gpu_location);
 
                                 match sub_metadata.prim_kind {
                                     PrimitiveKind::Brush => {
-                                        let instance = BrushInstance::new(task_index, sub_prim_address);
+                                        let instance = BrushInstance {
+                                            picture_address: task_index,
+                                            prim_address: sub_prim_address,
+                                            // TODO(gw): In the future, when brush
+                                            //           primitives on picture backed
+                                            //           tasks support clip masks and
+                                            //           transform primitives, these
+                                            //           will need to be filled out!
+                                            layer_address: PackedLayerIndex(0).into(),
+                                            clip_task_address: RenderTaskAddress(0),
+                                            z: 0,
+                                            flags: BRUSH_FLAG_USES_PICTURE,
+                                            user_data0: 0,
+                                            user_data1: 0,
+                                        };
                                         self.rect_cache_prims.push(PrimitiveInstance::from(instance));
                                     }
                                     _ => {
                                         unreachable!("Unexpected sub primitive type");
                                     }
                                 }
                             }
                         }
@@ -1528,39 +1555,44 @@ impl RenderPass {
         self.alpha_targets
             .build(ctx, gpu_cache, render_tasks, deferred_resolves);
     }
 }
 
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
 pub enum TransformBatchKind {
     Rectangle(bool),
-    TextRun,
+    TextRun(GlyphFormat),
     Image(ImageBufferKind),
     YuvImage(ImageBufferKind, YuvFormat, YuvColorSpace),
     AlignedGradient,
     AngleGradient,
     RadialGradient,
-    CacheImage(RenderTargetKind),
     BorderCorner,
     BorderEdge,
     Line,
 }
 
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
+pub enum BrushBatchKind {
+    Image(RenderTargetKind)
+}
+
+#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
 pub enum BatchKind {
     Composite {
         task_id: RenderTaskId,
         source_id: RenderTaskId,
         backdrop_id: RenderTaskId,
     },
     HardwareComposite,
     SplitComposite,
     Blend,
     Transformable(TransformedRectKind, TransformBatchKind),
+    Brush(BrushBatchKind),
 }
 
 #[derive(Copy, Clone, Debug)]
 pub struct BatchKey {
     pub kind: BatchKind,
     pub blend_mode: BlendMode,
     pub textures: BatchTextures,
 }
--- a/gfx/webrender/tests/angle_shader_validation.rs
+++ b/gfx/webrender/tests/angle_shader_validation.rs
@@ -62,20 +62,16 @@ const SHADERS: &[Shader] = &[
         name: "ps_angle_gradient",
         features: PRIM_FEATURES,
     },
     Shader {
         name: "ps_radial_gradient",
         features: PRIM_FEATURES,
     },
     Shader {
-        name: "ps_cache_image",
-        features: &["COLOR", "ALPHA"],
-    },
-    Shader {
         name: "ps_blend",
         features: PRIM_FEATURES,
     },
     Shader {
         name: "ps_composite",
         features: PRIM_FEATURES,
     },
     Shader {
@@ -102,16 +98,20 @@ const SHADERS: &[Shader] = &[
         name: "ps_rectangle",
         features: &["", "TRANSFORM", "CLIP_FEATURE", "TRANSFORM,CLIP_FEATURE"],
     },
     // Brush shaders
     Shader {
         name: "brush_mask",
         features: &[],
     },
+    Shader {
+        name: "brush_image",
+        features: &["COLOR_TARGET", "ALPHA_TARGET"],
+    },
 ];
 
 const VERSION_STRING: &str = "#version 300 es\n";
 
 #[test]
 fn validate_shaders() {
     angle::hl::initialize().unwrap();
 
--- a/gfx/webrender_api/Cargo.toml
+++ b/gfx/webrender_api/Cargo.toml
@@ -1,11 +1,11 @@
 [package]
 name = "webrender_api"
-version = "0.53.0"
+version = "0.53.1"
 authors = ["Glenn Watson <gw@intuitionlibrary.com>"]
 license = "MPL-2.0"
 repository = "https://github.com/servo/webrender"
 
 [features]
 nightly = ["euclid/unstable", "serde/unstable"]
 ipc = ["ipc-channel"]
 
--- a/gfx/webrender_api/src/display_item.rs
+++ b/gfx/webrender_api/src/display_item.rs
@@ -143,21 +143,18 @@ pub struct ScrollFrameDisplayItem {
 
 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
 pub struct RectangleDisplayItem {
     pub color: ColorF,
 }
 
 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
 pub struct LineDisplayItem {
-    pub baseline: f32, // LayerPixel
-    pub start: f32,
-    pub end: f32,
     pub orientation: LineOrientation, // toggles whether above values are interpreted as x/y values
-    pub width: f32,
+    pub wavy_line_thickness: f32,
     pub color: ColorF,
     pub style: LineStyle,
 }
 
 #[repr(u8)]
 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
 pub enum LineOrientation {
     Vertical,
--- a/gfx/webrender_api/src/display_list.rs
+++ b/gfx/webrender_api/src/display_list.rs
@@ -486,33 +486,54 @@ impl<'a, 'b> Serialize for DisplayItemRe
 //        }
 //    }
 // }
 //
 
 struct UnsafeVecWriter(*mut u8);
 
 impl Write for UnsafeVecWriter {
+    #[inline(always)]
     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
         unsafe {
             ptr::copy_nonoverlapping(buf.as_ptr(), self.0, buf.len());
             self.0 = self.0.offset(buf.len() as isize);
         }
         Ok(buf.len())
     }
+
+    #[inline(always)]
+    fn write_all(&mut self, buf: &[u8]) -> io::Result<()> {
+        unsafe {
+            ptr::copy_nonoverlapping(buf.as_ptr(), self.0, buf.len());
+            self.0 = self.0.offset(buf.len() as isize);
+        }
+        Ok(())
+    }
+
+    #[inline(always)]
     fn flush(&mut self) -> io::Result<()> { Ok(()) }
 }
 
 struct SizeCounter(usize);
 
 impl<'a> Write for SizeCounter {
+    #[inline(always)]
     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
         self.0 += buf.len();
         Ok(buf.len())
     }
+
+    #[inline(always)]
+    fn write_all(&mut self, buf: &[u8]) -> io::Result<()> {
+        self.0 += buf.len();
+        Ok(())
+    }
+
+    #[inline(always)]
     fn flush(&mut self) -> io::Result<()> { Ok(()) }
 }
 
 fn serialize_fast<T: Serialize>(vec: &mut Vec<u8>, e: &T) {
     // manually counting the size is faster than vec.reserve(bincode::serialized_size(&e) as usize) for some reason
     let mut size = SizeCounter(0);
     bincode::serialize_into(&mut size,e , bincode::Infinite).unwrap();
     vec.reserve(size.0);
@@ -768,31 +789,25 @@ impl DisplayListBuilder {
     pub fn push_rect(&mut self, info: &LayoutPrimitiveInfo, color: ColorF) {
         let item = SpecificDisplayItem::Rectangle(RectangleDisplayItem { color });
         self.push_item(item, info);
     }
 
     pub fn push_line(
         &mut self,
         info: &LayoutPrimitiveInfo,
-        baseline: f32,
-        start: f32,
-        end: f32,
+        wavy_line_thickness: f32,
         orientation: LineOrientation,
-        width: f32,
-        color: ColorF,
+        color: &ColorF,
         style: LineStyle,
     ) {
         let item = SpecificDisplayItem::Line(LineDisplayItem {
-            baseline,
-            start,
-            end,
+            wavy_line_thickness,
             orientation,
-            width,
-            color,
+            color: *color,
             style,
         });
 
         self.push_item(item, info);
     }
 
     pub fn push_image(
         &mut self,
--- a/gfx/webrender_api/src/font.rs
+++ b/gfx/webrender_api/src/font.rs
@@ -238,24 +238,24 @@ impl Default for FontInstancePlatformOpt
         }
     }
 }
 
 #[cfg(target_os = "macos")]
 #[repr(C)]
 #[derive(Clone, Copy, Debug, Deserialize, Hash, Eq, PartialEq, PartialOrd, Ord, Serialize)]
 pub struct FontInstancePlatformOptions {
-    pub unused: u32,
+    pub font_smoothing: bool,
 }
 
 #[cfg(target_os = "macos")]
 impl Default for FontInstancePlatformOptions {
     fn default() -> FontInstancePlatformOptions {
         FontInstancePlatformOptions {
-            unused: 0,
+            font_smoothing: true,
         }
     }
 }
 
 pub const FONT_FORCE_AUTOHINT: u16  = 0b1;
 pub const FONT_NO_AUTOHINT: u16     = 0b10;
 pub const FONT_EMBEDDED_BITMAP: u16 = 0b100;
 pub const FONT_EMBOLDEN: u16        = 0b1000;
--- a/gfx/webrender_bindings/Cargo.toml
+++ b/gfx/webrender_bindings/Cargo.toml
@@ -1,19 +1,19 @@
 [package]
 name = "webrender_bindings"
 version = "0.1.0"
 authors = ["The Mozilla Project Developers"]
 license = "MPL-2.0"
 
 [dependencies]
-webrender_api = {path = "../webrender_api", version = "0.53.0"}
+webrender_api = {path = "../webrender_api", version = "0.53.1"}
 bincode = "0.8"
 rayon = "0.8"
 thread_profiler = "0.1.1"
 euclid = "0.15"
 app_units = "0.5.6"
 gleam = "0.4"
 
 [dependencies.webrender]
 path = "../webrender"
-version = "0.53.0"
+version = "0.53.1"
 default-features = false