Bug 1380645 - Update webrender to cset b83c200c657f6b6fb17d09f329ba77803420b46a. r=jrmuizel
authorKartikaya Gupta <kgupta@mozilla.com>
Fri, 21 Jul 2017 09:05:31 -0400
changeset 370257 f2359ba2d006b936c12de6fe5d2fd09b42ece688
parent 370256 f1823b0526565139b7d01822d1fc2c6200557b31
child 370258 6ce98d80d88266a9831cbab430afbb5a8f101ae8
push id92834
push userkwierso@gmail.com
push dateSat, 22 Jul 2017 01:21:01 +0000
treeherdermozilla-inbound@66f0d5a2c077 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjrmuizel
bugs1380645
milestone56.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1380645 - Update webrender to cset b83c200c657f6b6fb17d09f329ba77803420b46a. r=jrmuizel MozReview-Commit-ID: B2CgO2o0RDf
gfx/doc/README.webrender
gfx/webrender/res/cs_blur.vs.glsl
gfx/webrender/res/cs_text_run.vs.glsl
gfx/webrender/res/prim_shared.glsl
gfx/webrender/res/ps_blend.fs.glsl
gfx/webrender/res/ps_cache_image.fs.glsl
gfx/webrender/res/ps_cache_image.glsl
gfx/webrender/res/ps_cache_image.vs.glsl
gfx/webrender/res/ps_text_run.vs.glsl
gfx/webrender/src/device.rs
gfx/webrender/src/frame.rs
gfx/webrender/src/frame_builder.rs
gfx/webrender/src/mask_cache.rs
gfx/webrender/src/prim_store.rs
gfx/webrender/src/resource_cache.rs
gfx/webrender/src/texture_cache.rs
gfx/webrender/src/tiling.rs
gfx/webrender_api/src/display_item.rs
gfx/webrender_api/src/font.rs
--- a/gfx/doc/README.webrender
+++ b/gfx/doc/README.webrender
@@ -74,9 +74,9 @@ there is another crate in m-c called moz
 the same folder to store its rust dependencies. If one of the libraries that is
 required by both mozjs_sys and webrender is updated without updating the other
 project's Cargo.lock file, that results in build bustage.
 This means that any time you do this sort of manual update of packages, you need
 to make sure that mozjs_sys also has its Cargo.lock file updated if needed, hence
 the need to run the cargo update command in js/src as well. Hopefully this will
 be resolved soon.
 
-Latest Commit: 479ae6475a18527206a2534c2b8a5bfb8b06bd6e
+Latest Commit: b83c200c657f6b6fb17d09f329ba77803420b46a
--- a/gfx/webrender/res/cs_blur.vs.glsl
+++ b/gfx/webrender/res/cs_blur.vs.glsl
@@ -49,17 +49,18 @@ void main(void) {
         case DIR_HORIZONTAL:
             vOffsetScale = vec2(1.0 / texture_size.x, 0.0);
             break;
         case DIR_VERTICAL:
             vOffsetScale = vec2(0.0, 1.0 / texture_size.y);
             break;
     }
 
-    vUvRect = vec4(src_task.data0.xy, src_task.data0.xy + src_task.data0.zw);
+    vUvRect = vec4(src_task.data0.xy + vec2(0.5),
+                   src_task.data0.xy + src_task.data0.zw - vec2(0.5));
     vUvRect /= texture_size.xyxy;
 
     vec2 uv0 = src_task.data0.xy / texture_size;
     vec2 uv1 = (src_task.data0.xy + src_task.data0.zw) / texture_size;
     vUv.xy = mix(uv0, uv1, aPosition.xy);
 
     gl_Position = uTransform * vec4(pos, 0.0, 1.0);
 }
--- a/gfx/webrender/res/cs_text_run.vs.glsl
+++ b/gfx/webrender/res/cs_text_run.vs.glsl
@@ -7,33 +7,45 @@
 // drawn un-transformed. These are used for effects such
 // as text-shadow.
 
 void main(void) {
     Primitive prim = load_primitive();
     TextRun text = fetch_text_run(prim.specific_prim_address);
 
     int glyph_index = prim.user_data0;
-    int resource_address = prim.user_data2;
+    int resource_address = prim.user_data1;
+    int text_shadow_address = prim.user_data2;
+
+    // Fetch the parent text-shadow for this primitive. This allows the code
+    // below to normalize the glyph offsets relative to the original text
+    // shadow rect, which is the union of all elements that make up this
+    // text shadow. This allows the text shadow to be rendered at an
+    // arbitrary location in a render target (provided by the render
+    // task render_target_origin field).
+    PrimitiveGeometry shadow_geom = fetch_primitive_geometry(text_shadow_address);
+    TextShadow shadow = fetch_text_shadow(text_shadow_address + VECS_PER_PRIM_HEADER);
+
     Glyph glyph = fetch_glyph(prim.specific_prim_address, glyph_index);
     GlyphResource res = fetch_glyph_resource(resource_address);
 
     // Glyphs size is already in device-pixels.
     // The render task origin is in device-pixels. Offset that by
     // the glyph offset, relative to its primitive bounding rect.
     vec2 size = res.uv_rect.zw - res.uv_rect.xy;
     vec2 local_pos = glyph.offset + vec2(res.offset.x, -res.offset.y) / uDevicePixelRatio;
-    vec2 origin = prim.task.render_target_origin + uDevicePixelRatio * (local_pos - prim.local_rect.p0);
+    vec2 origin = prim.task.render_target_origin +
+                  uDevicePixelRatio * (local_pos + shadow.offset - shadow_geom.local_rect.p0);
     vec4 local_rect = vec4(origin, size);
 
     vec2 texture_size = vec2(textureSize(sColor0, 0));
     vec2 st0 = res.uv_rect.xy / texture_size;
     vec2 st1 = res.uv_rect.zw / texture_size;
 
     vec2 pos = mix(local_rect.xy,
                    local_rect.xy + local_rect.zw,
                    aPosition.xy);
 
     vUv = mix(st0, st1, aPosition.xy);
-    vColor = text.color;
+    vColor = shadow.color;
 
     gl_Position = uTransform * vec4(pos, 0.0, 1.0);
 }
--- a/gfx/webrender/res/prim_shared.glsl
+++ b/gfx/webrender/res/prim_shared.glsl
@@ -128,17 +128,17 @@ vec4[2] fetch_from_resource_cache_2(int 
     );
 }
 
 #ifdef WR_VERTEX_SHADER
 
 #define VECS_PER_LAYER              9
 #define VECS_PER_RENDER_TASK        3
 #define VECS_PER_PRIM_HEADER        2
-#define VECS_PER_TEXT_RUN           1
+#define VECS_PER_TEXT_RUN           2
 #define VECS_PER_GRADIENT           3
 #define VECS_PER_GRADIENT_STOP      2
 
 uniform HIGHP_SAMPLER_FLOAT sampler2D sLayers;
 uniform HIGHP_SAMPLER_FLOAT sampler2D sRenderTasks;
 
 // Instanced attributes
 in ivec4 aData0;
@@ -489,28 +489,39 @@ struct Primitive {
     RectWithSize local_clip_rect;
     int specific_prim_address;
     int user_data0;
     int user_data1;
     int user_data2;
     float z;
 };
 
+struct PrimitiveGeometry {
+    RectWithSize local_rect;
+    RectWithSize local_clip_rect;
+};
+
+PrimitiveGeometry fetch_primitive_geometry(int address) {
+    vec4 geom[2] = fetch_from_resource_cache_2(address);
+    return PrimitiveGeometry(RectWithSize(geom[0].xy, geom[0].zw),
+                             RectWithSize(geom[1].xy, geom[1].zw));
+}
+
 Primitive load_primitive() {
     PrimitiveInstance pi = fetch_prim_instance();
 
     Primitive prim;
 
     prim.layer = fetch_layer(pi.layer_index);
     prim.clip_area = fetch_clip_area(pi.clip_task_index);
     prim.task = fetch_alpha_batch_task(pi.render_task_index);
 
-    vec4 geom[2] = fetch_from_resource_cache_2(pi.prim_address);
-    prim.local_rect = RectWithSize(geom[0].xy, geom[0].zw);
-    prim.local_clip_rect = RectWithSize(geom[1].xy, geom[1].zw);
+    PrimitiveGeometry geom = fetch_primitive_geometry(pi.prim_address);
+    prim.local_rect = geom.local_rect;
+    prim.local_clip_rect = geom.local_clip_rect;
 
     prim.specific_prim_address = pi.specific_prim_address;
     prim.user_data0 = pi.user_data0;
     prim.user_data1 = pi.user_data1;
     prim.user_data2 = pi.user_data2;
     prim.z = float(pi.z);
 
     return prim;
@@ -756,23 +767,35 @@ struct Rectangle {
     vec4 color;
 };
 
 Rectangle fetch_rectangle(int address) {
     vec4 data = fetch_from_resource_cache_1(address);
     return Rectangle(data);
 }
 
+struct TextShadow {
+    vec4 color;
+    vec2 offset;
+    float blur_radius;
+};
+
+TextShadow fetch_text_shadow(int address) {
+    vec4 data[2] = fetch_from_resource_cache_2(address);
+    return TextShadow(data[0], data[1].xy, data[1].z);
+}
+
 struct TextRun {
     vec4 color;
+    vec2 offset;
 };
 
 TextRun fetch_text_run(int address) {
-    vec4 data = fetch_from_resource_cache_1(address);
-    return TextRun(data);
+    vec4 data[2] = fetch_from_resource_cache_2(address);
+    return TextRun(data[0], data[1].xy);
 }
 
 struct Image {
     vec4 stretch_size_and_tile_spacing;  // Size of the actual image and amount of space between
                                          //     tiled instances of this image.
     vec4 sub_rect;                          // If negative, ignored.
 };
 
--- a/gfx/webrender/res/ps_blend.fs.glsl
+++ b/gfx/webrender/res/ps_blend.fs.glsl
@@ -90,17 +90,17 @@ vec4 Sepia(vec4 Cs, float amount) {
                 vec4(0.0, 0.0, 0.0, 1.0)) * Cs;
 }
 
 vec4 Brightness(vec4 Cs, float amount) {
     return vec4(Cs.rgb * amount, Cs.a);
 }
 
 vec4 Opacity(vec4 Cs, float amount) {
-    return vec4(Cs.rgb, Cs.a * amount);
+    return Cs * amount;
 }
 
 void main(void) {
     vec2 uv = clamp(vUv.xy, vUvBounds.xy, vUvBounds.zw);
     vec4 Cs = textureLod(sCacheRGBA8, vec3(uv, vUv.z), 0.0);
 
     if (Cs.a == 0.0) {
         discard;
--- a/gfx/webrender/res/ps_cache_image.fs.glsl
+++ b/gfx/webrender/res/ps_cache_image.fs.glsl
@@ -1,7 +1,9 @@
+#line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void main(void) {
-    oFragColor = texture(sCacheRGBA8, vUv);
+    vec2 uv = clamp(vUv.xy, vUvBounds.xy, vUvBounds.zw);
+    oFragColor = texture(sCacheRGBA8, vec3(uv, vUv.z));
 }
--- a/gfx/webrender/res/ps_cache_image.glsl
+++ b/gfx/webrender/res/ps_cache_image.glsl
@@ -1,5 +1,6 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 varying vec3 vUv;
+flat varying vec4 vUvBounds;
--- a/gfx/webrender/res/ps_cache_image.vs.glsl
+++ b/gfx/webrender/res/ps_cache_image.vs.glsl
@@ -15,15 +15,18 @@ void main(void) {
                                  prim.layer,
                                  prim.task,
                                  prim.local_rect);
 
     RenderTaskData child_task = fetch_render_task(prim.user_data1);
     vUv.z = child_task.data1.x;
 
     vec2 texture_size = vec2(textureSize(sCacheRGBA8, 0));
-    vec2 uv0 = child_task.data0.xy / texture_size;
-    vec2 uv1 = (child_task.data0.xy + child_task.data0.zw) / texture_size;
+    vec2 uv0 = child_task.data0.xy;
+    vec2 uv1 = (child_task.data0.xy + child_task.data0.zw);
 
     vec2 f = (vi.local_pos - prim.local_rect.p0) / prim.local_rect.size;
 
-    vUv.xy = mix(uv0, uv1, f);
+    vUv.xy = mix(uv0 / texture_size,
+                 uv1 / texture_size,
+                 f);
+    vUvBounds = vec4(uv0 + vec2(0.5), uv1 - vec2(0.5)) / texture_size.xyxy;
 }
--- a/gfx/webrender/res/ps_text_run.vs.glsl
+++ b/gfx/webrender/res/ps_text_run.vs.glsl
@@ -1,23 +1,43 @@
 #line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+#define RENDER_MODE_MONO        0
+#define RENDER_MODE_ALPHA       1
+#define RENDER_MODE_SUBPIXEL    2
+
 void main(void) {
     Primitive prim = load_primitive();
     TextRun text = fetch_text_run(prim.specific_prim_address);
 
     int glyph_index = prim.user_data0;
+    int render_mode = prim.user_data1;
     int resource_address = prim.user_data2;
+
     Glyph glyph = fetch_glyph(prim.specific_prim_address, glyph_index);
     GlyphResource res = fetch_glyph_resource(resource_address);
 
-    vec2 local_pos = glyph.offset + vec2(res.offset.x, -res.offset.y) / uDevicePixelRatio;
+    switch (render_mode) {
+        case RENDER_MODE_ALPHA:
+            break;
+        case RENDER_MODE_MONO:
+            break;
+        case RENDER_MODE_SUBPIXEL:
+            // In subpixel mode, the subpixel offset has already been
+            // accounted for while rasterizing the glyph.
+            glyph.offset = trunc(glyph.offset);
+            break;
+    }
+
+    vec2 local_pos = glyph.offset +
+                     text.offset +
+                     vec2(res.offset.x, -res.offset.y) / uDevicePixelRatio;
 
     RectWithSize local_rect = RectWithSize(local_pos,
                                            (res.uv_rect.zw - res.uv_rect.xy) / uDevicePixelRatio);
 
 #ifdef WR_FEATURE_TRANSFORM
     TransformVertexInfo vi = write_transform_vertex(local_rect,
                                                     prim.local_clip_rect,
                                                     prim.z,
--- a/gfx/webrender/src/device.rs
+++ b/gfx/webrender/src/device.rs
@@ -349,16 +349,17 @@ impl FBOId {
 }
 
 struct Texture {
     gl: Rc<gl::Gl>,
     id: gl::GLuint,
     format: ImageFormat,
     width: u32,
     height: u32,
+
     filter: TextureFilter,
     mode: RenderTargetMode,
     fbo_ids: Vec<FBOId>,
     depth_rb: Option<RBOId>,
 }
 
 impl Drop for Texture {
     fn drop(&mut self) {
@@ -1165,19 +1166,21 @@ impl Device {
                         width: u32,
                         height: u32,
                         format: ImageFormat,
                         filter: TextureFilter,
                         mode: RenderTargetMode,
                         pixels: Option<&[u8]>) {
         debug_assert!(self.inside_frame);
 
+        let resized;
         {
             let texture = self.textures.get_mut(&texture_id).expect("Didn't find texture!");
             texture.format = format;
+            resized = texture.width != width || texture.height != height;
             texture.width = width;
             texture.height = height;
             texture.filter = filter;
             texture.mode = mode;
         }
 
         let (internal_format, gl_format) = gl_texture_formats_for_image_format(self.gl(), format);
         let type_ = gl_type_for_texture_format(format);
@@ -1188,22 +1191,22 @@ impl Device {
                 self.set_texture_parameters(texture_id.target, filter);
                 self.upload_texture_image(texture_id.target,
                                           width,
                                           height,
                                           internal_format as u32,
                                           gl_format,
                                           type_,
                                           None);
-                self.create_fbo_for_texture_if_necessary(texture_id, None);
+                self.update_texture_storage(texture_id, None, resized);
             }
             RenderTargetMode::LayerRenderTarget(layer_count) => {
                 self.bind_texture(DEFAULT_TEXTURE, texture_id);
                 self.set_texture_parameters(texture_id.target, filter);
-                self.create_fbo_for_texture_if_necessary(texture_id, Some(layer_count));
+                self.update_texture_storage(texture_id, Some(layer_count), resized);
             }
             RenderTargetMode::None => {
                 self.bind_texture(DEFAULT_TEXTURE, texture_id);
                 self.set_texture_parameters(texture_id.target, filter);
                 let expanded_data: Vec<u8>;
                 let actual_pixels = if pixels.is_some() &&
                                        format == ImageFormat::A8 &&
                                        cfg!(any(target_arch="arm", target_arch="aarch64")) {
@@ -1222,31 +1225,32 @@ impl Device {
             }
         }
     }
 
     pub fn get_render_target_layer_count(&self, texture_id: TextureId) -> usize {
         self.textures[&texture_id].fbo_ids.len()
     }
 
-    pub fn create_fbo_for_texture_if_necessary(&mut self,
-                                               texture_id: TextureId,
-                                               layer_count: Option<i32>) {
+    /// Updates the texture storage for the texture, creating
+    /// FBOs as required.
+    pub fn update_texture_storage(&mut self,
+                                  texture_id: TextureId,
+                                  layer_count: Option<i32>,
+                                  resized: bool) {
         let texture = self.textures.get_mut(&texture_id).unwrap();
 
         match layer_count {
             Some(layer_count) => {
                 assert!(layer_count > 0);
                 assert_eq!(texture_id.target, gl::TEXTURE_2D_ARRAY);
 
-                // If we have enough layers allocated already, just use them.
-                // TODO(gw): Probably worth removing some after a while if
-                //           there is a surplus?
                 let current_layer_count = texture.fbo_ids.len() as i32;
-                if current_layer_count >= layer_count {
+                // If the texture is already the required size skip.
+                if current_layer_count == layer_count && !resized {
                     return;
                 }
 
                 let (internal_format, gl_format) = gl_texture_formats_for_image_format(&*self.gl, texture.format);
                 let type_ = gl_type_for_texture_format(texture.format);
 
                 self.gl.tex_image_3d(texture_id.target,
                                      0,
@@ -1255,32 +1259,40 @@ impl Device {
                                      texture.height as gl::GLint,
                                      layer_count,
                                      0,
                                      gl_format,
                                      type_,
                                      None);
 
                 let needed_layer_count = layer_count - current_layer_count;
-                let new_fbos = self.gl.gen_framebuffers(needed_layer_count);
-                texture.fbo_ids.extend(new_fbos.into_iter().map(|id| FBOId(id)));
+                if needed_layer_count > 0 {
+                    // Create more framebuffers to fill the gap
+                    let new_fbos = self.gl.gen_framebuffers(needed_layer_count);
+                    texture.fbo_ids.extend(new_fbos.into_iter().map(|id| FBOId(id)));
+                } else if needed_layer_count < 0 {
+                    // Remove extra framebuffers
+                    for old in texture.fbo_ids.drain(layer_count as usize ..) {
+                        self.gl.delete_framebuffers(&[old.0]);
+                    }
+                }
 
                 let depth_rb = if let Some(rbo) = texture.depth_rb {
                     rbo.0
                 } else {
                     let renderbuffer_ids = self.gl.gen_renderbuffers(1);
                     let depth_rb = renderbuffer_ids[0];
-                    self.gl.bind_renderbuffer(gl::RENDERBUFFER, depth_rb);
-                    self.gl.renderbuffer_storage(gl::RENDERBUFFER,
-                                                 gl::DEPTH_COMPONENT24,
-                                                 texture.width as gl::GLsizei,
-                                                 texture.height as gl::GLsizei);
                     texture.depth_rb = Some(RBOId(depth_rb));
                     depth_rb
                 };
+                self.gl.bind_renderbuffer(gl::RENDERBUFFER, depth_rb);
+                self.gl.renderbuffer_storage(gl::RENDERBUFFER,
+                                             gl::DEPTH_COMPONENT24,
+                                             texture.width as gl::GLsizei,
+                                             texture.height as gl::GLsizei);
 
                 for (fbo_index, fbo_id) in texture.fbo_ids.iter().enumerate() {
                     self.gl.bind_framebuffer(gl::FRAMEBUFFER, fbo_id.0);
                     self.gl.framebuffer_texture_layer(gl::FRAMEBUFFER,
                                                       gl::COLOR_ATTACHMENT0,
                                                       texture_id.name,
                                                       0,
                                                       fbo_index as gl::GLint);
@@ -1350,33 +1362,33 @@ impl Device {
                           filter: TextureFilter,
                           mode: RenderTargetMode) {
         debug_assert!(self.inside_frame);
 
         let old_size = self.get_texture_dimensions(texture_id);
 
         let temp_texture_id = self.create_texture_ids(1, TextureTarget::Default)[0];
         self.init_texture(temp_texture_id, old_size.width, old_size.height, format, filter, mode, None);
-        self.create_fbo_for_texture_if_necessary(temp_texture_id, None);
+        self.update_texture_storage(temp_texture_id, None, true);
 
         self.bind_read_target(Some((texture_id, 0)));
         self.bind_texture(DEFAULT_TEXTURE, temp_texture_id);
 
         self.gl.copy_tex_sub_image_2d(temp_texture_id.target,
                                        0,
                                        0,
                                        0,
                                        0,
                                        0,
                                        old_size.width as i32,
                                        old_size.height as i32);
 
         self.deinit_texture(texture_id);
         self.init_texture(texture_id, new_width, new_height, format, filter, mode, None);
-        self.create_fbo_for_texture_if_necessary(texture_id, None);
+        self.update_texture_storage(texture_id, None, true);
         self.bind_read_target(Some((temp_texture_id, 0)));
         self.bind_texture(DEFAULT_TEXTURE, texture_id);
 
         self.gl.copy_tex_sub_image_2d(texture_id.target,
                                        0,
                                        0,
                                        0,
                                        0,
--- a/gfx/webrender/src/frame.rs
+++ b/gfx/webrender/src/frame.rs
@@ -685,32 +685,34 @@ impl Frame {
                                     clip_and_scroll.scroll_node_id,
                                     &item.rect(),
                                     &item.local_clip(),
                                     context,
                                     reference_frame_relative_offset);
             }
             SpecificDisplayItem::Clip(ref info) => {
                 let complex_clips = context.get_complex_clips(pipeline_id, item.complex_clip().0);
-                let mut clip_region = ClipRegion::for_clip_node(*item.local_clip().clip_rect(),
-                                                                complex_clips,
-                                                                info.image_mask);
+                let mut clip_region =
+                    ClipRegion::create_for_clip_node(*item.local_clip().clip_rect(),
+                                                     complex_clips,
+                                                     info.image_mask);
                 clip_region.origin += reference_frame_relative_offset;
 
                 self.flatten_clip(context,
                                   pipeline_id,
                                   &clip_and_scroll.scroll_node_id,
                                   &info.id,
                                   clip_region);
             }
             SpecificDisplayItem::ScrollFrame(ref info) => {
                 let complex_clips = context.get_complex_clips(pipeline_id, item.complex_clip().0);
-                let mut clip_region = ClipRegion::for_clip_node(*item.local_clip().clip_rect(),
-                                                                complex_clips,
-                                                                info.image_mask);
+                let mut clip_region =
+                    ClipRegion::create_for_clip_node(*item.local_clip().clip_rect(),
+                                                     complex_clips,
+                                                     info.image_mask);
                 clip_region.origin += reference_frame_relative_offset;
 
                 // Just use clip rectangle as the frame rect for this scroll frame.
                 // This is only interesting when calculating scroll extents for the
                 // ClipScrollNode::scroll(..) API
                 let frame_rect = item.local_clip()
                                      .clip_rect()
                                      .translate(&reference_frame_relative_offset);
--- a/gfx/webrender/src/frame_builder.rs
+++ b/gfx/webrender/src/frame_builder.rs
@@ -9,19 +9,19 @@ use api::{ImageKey, ImageRendering, Item
 use api::{LayerToScrollTransform, LayerVector2D, LocalClip, PipelineId, RepeatMode, TextShadow};
 use api::{TileOffset, TransformStyle, WebGLContextId, WorldPixel, YuvColorSpace, YuvData};
 use app_units::Au;
 use frame::FrameId;
 use gpu_cache::GpuCache;
 use internal_types::HardwareCompositeOp;
 use mask_cache::{ClipMode, ClipRegion, ClipSource, MaskCacheInfo};
 use plane_split::{BspSplitter, Polygon, Splitter};
-use prim_store::{GradientPrimitiveCpu, ImagePrimitiveCpu};
+use prim_store::{GradientPrimitiveCpu, ImagePrimitiveCpu, PrimitiveKind};
 use prim_store::{ImagePrimitiveKind, PrimitiveContainer, PrimitiveIndex};
-use prim_store::{PrimitiveStore, RadialGradientPrimitiveCpu};
+use prim_store::{PrimitiveStore, RadialGradientPrimitiveCpu, TextRunMode};
 use prim_store::{RectanglePrimitive, TextRunPrimitiveCpu, TextShadowPrimitiveCpu};
 use prim_store::{BoxShadowPrimitiveCpu, TexelRect, YuvImagePrimitiveCpu};
 use profiler::{FrameProfileCounters, GpuCacheProfileCounters, TextureCacheProfileCounters};
 use render_task::{AlphaRenderItem, ClipWorkItem, MaskCacheKey, RenderTask, RenderTaskIndex};
 use render_task::{RenderTaskId, RenderTaskLocation};
 use resource_cache::ResourceCache;
 use clip_scroll_node::{ClipInfo, ClipScrollNode, NodeType};
 use clip_scroll_tree::ClipScrollTree;
@@ -101,58 +101,29 @@ fn make_polygon(stacking_context: &Stack
 #[derive(Clone, Copy)]
 pub struct FrameBuilderConfig {
     pub enable_scrollbars: bool,
     pub default_font_render_mode: FontRenderMode,
     pub debug: bool,
     pub cache_expiry_frames: u32,
 }
 
-struct PendingTextShadow {
-    shadow: TextShadow,
-    text_primitives: Vec<TextRunPrimitiveCpu>,
-    clip_and_scroll: ClipAndScrollInfo,
-    local_rect: LayerRect,
-    local_clip: LocalClip,
-}
-
-impl PendingTextShadow {
-    fn new(shadow: TextShadow,
-           clip_and_scroll: ClipAndScrollInfo,
-           local_clip: &LocalClip) -> PendingTextShadow {
-        PendingTextShadow {
-            shadow: shadow,
-            text_primitives: Vec::new(),
-            clip_and_scroll: clip_and_scroll,
-            local_clip: local_clip.clone(),
-            local_rect: LayerRect::zero(),
-        }
-    }
-
-    fn push(&mut self,
-            local_rect: LayerRect,
-            primitive: &TextRunPrimitiveCpu) {
-        self.text_primitives.push(primitive.clone());
-        let shadow_rect = local_rect.inflate(self.shadow.blur_radius,
-                                             self.shadow.blur_radius);
-        self.local_rect = self.local_rect.union(&shadow_rect);
-    }
-}
-
 pub struct FrameBuilder {
     screen_size: DeviceUintSize,
     background_color: Option<ColorF>,
     prim_store: PrimitiveStore,
     cmds: Vec<PrimitiveRunCmd>,
     config: FrameBuilderConfig,
 
     stacking_context_store: Vec<StackingContext>,
     clip_scroll_group_store: Vec<ClipScrollGroup>,
     packed_layers: Vec<PackedLayer>,
-    pending_text_shadows: Vec<PendingTextShadow>,
+
+    // A stack of the current text-shadow primitives.
+    shadow_prim_stack: Vec<PrimitiveIndex>,
 
     scrollbar_prims: Vec<ScrollbarPrimitive>,
 
     /// A stack of scroll nodes used during display list processing to properly
     /// parent new scroll nodes.
     reference_frame_stack: Vec<ClipId>,
 
     /// A stack of stacking contexts used for creating ClipScrollGroups as
@@ -171,34 +142,34 @@ impl FrameBuilder {
                config: FrameBuilderConfig) -> FrameBuilder {
         match previous {
             Some(prev) => {
                 FrameBuilder {
                     stacking_context_store: recycle_vec(prev.stacking_context_store),
                     clip_scroll_group_store: recycle_vec(prev.clip_scroll_group_store),
                     cmds: recycle_vec(prev.cmds),
                     packed_layers: recycle_vec(prev.packed_layers),
-                    pending_text_shadows: recycle_vec(prev.pending_text_shadows),
+                    shadow_prim_stack: recycle_vec(prev.shadow_prim_stack),
                     scrollbar_prims: recycle_vec(prev.scrollbar_prims),
                     reference_frame_stack: recycle_vec(prev.reference_frame_stack),
                     stacking_context_stack: recycle_vec(prev.stacking_context_stack),
                     prim_store: prev.prim_store.recycle(),
                     screen_size,
                     background_color,
                     config,
                     has_root_stacking_context: false,
                 }
             }
             None => {
                 FrameBuilder {
                     stacking_context_store: Vec::new(),
                     clip_scroll_group_store: Vec::new(),
                     cmds: Vec::new(),
                     packed_layers: Vec::new(),
-                    pending_text_shadows: Vec::new(),
+                    shadow_prim_stack: Vec::new(),
                     scrollbar_prims: Vec::new(),
                     reference_frame_stack: Vec::new(),
                     stacking_context_stack: Vec::new(),
                     prim_store: PrimitiveStore::new(),
                     screen_size,
                     background_color,
                     config,
                     has_root_stacking_context: false,
@@ -214,57 +185,84 @@ impl FrameBuilder {
             return;
         }
 
         let group_index = self.create_clip_scroll_group(stacking_context_index, info);
         let stacking_context = &mut self.stacking_context_store[stacking_context_index.0];
         stacking_context.clip_scroll_groups.push(group_index);
     }
 
-    pub fn add_primitive(&mut self,
-                         clip_and_scroll: ClipAndScrollInfo,
-                         rect: &LayerRect,
-                         local_clip: &LocalClip,
-                         extra_clips: &[ClipSource],
-                         container: PrimitiveContainer)
-                         -> PrimitiveIndex {
+    /// Create a primitive and add it to the prim store. This method doesn't
+    /// add the primitive to the draw list, so can be used for creating
+    /// sub-primitives.
+    fn create_primitive(&mut self,
+                        clip_and_scroll: ClipAndScrollInfo,
+                        rect: &LayerRect,
+                        local_clip: &LocalClip,
+                        extra_clips: &[ClipSource],
+                        container: PrimitiveContainer) -> PrimitiveIndex {
         let stacking_context_index = *self.stacking_context_stack.last().unwrap();
 
         self.create_clip_scroll_group_if_necessary(stacking_context_index, clip_and_scroll);
 
         let mut clip_sources = extra_clips.to_vec();
         if let &LocalClip::RoundedRect(_, _) = local_clip {
-            clip_sources.push(ClipSource::Region(ClipRegion::for_local_clip(local_clip)))
+            clip_sources.push(ClipSource::Region(ClipRegion::create_for_local_clip(local_clip)))
         }
 
         let clip_info = if !clip_sources.is_empty() {
             Some(MaskCacheInfo::new(&clip_sources))
         } else {
             None
         };
 
         let prim_index = self.prim_store.add_primitive(rect,
                                                        &local_clip.clip_rect(),
                                                        clip_sources,
                                                        clip_info,
                                                        container);
 
+        prim_index
+    }
+
+    /// Add an already created primitive to the draw lists.
+    pub fn add_primitive_to_draw_list(&mut self,
+                                      prim_index: PrimitiveIndex,
+                                      clip_and_scroll: ClipAndScrollInfo) {
         match self.cmds.last_mut().unwrap() {
-            &mut PrimitiveRunCmd::PrimitiveRun(_run_prim_index, ref mut count, run_clip_and_scroll)
-                if run_clip_and_scroll == clip_and_scroll => {
-                    debug_assert!(_run_prim_index.0 + *count == prim_index.0);
+            &mut PrimitiveRunCmd::PrimitiveRun(run_prim_index, ref mut count, run_clip_and_scroll) => {
+                if run_clip_and_scroll == clip_and_scroll &&
+                   run_prim_index.0 + *count == prim_index.0 {
                     *count += 1;
-                    return prim_index;
+                    return;
+                }
             }
-            &mut PrimitiveRunCmd::PrimitiveRun(..) |
             &mut PrimitiveRunCmd::PushStackingContext(..) |
             &mut PrimitiveRunCmd::PopStackingContext => {}
         }
 
         self.cmds.push(PrimitiveRunCmd::PrimitiveRun(prim_index, 1, clip_and_scroll));
+    }
+
+    /// Convenience interface that creates a primitive entry and adds it
+    /// to the draw list.
+    pub fn add_primitive(&mut self,
+                         clip_and_scroll: ClipAndScrollInfo,
+                         rect: &LayerRect,
+                         local_clip: &LocalClip,
+                         extra_clips: &[ClipSource],
+                         container: PrimitiveContainer) -> PrimitiveIndex {
+        let prim_index = self.create_primitive(clip_and_scroll,
+                                               rect,
+                                               local_clip,
+                                               extra_clips,
+                                               container);
+
+        self.add_primitive_to_draw_list(prim_index, clip_and_scroll);
+
         prim_index
     }
 
     pub fn create_clip_scroll_group(&mut self,
                                     stacking_context_index: StackingContextIndex,
                                     info: ClipAndScrollInfo)
                                     -> ClipScrollGroupIndex {
         let packed_layer_index = PackedLayerIndex(self.packed_layers.len());
@@ -316,17 +314,17 @@ impl FrameBuilder {
         self.has_root_stacking_context = true;
         self.cmds.push(PrimitiveRunCmd::PushStackingContext(stacking_context_index));
         self.stacking_context_stack.push(stacking_context_index);
     }
 
     pub fn pop_stacking_context(&mut self) {
         self.cmds.push(PrimitiveRunCmd::PopStackingContext);
         self.stacking_context_stack.pop();
-        assert!(self.pending_text_shadows.is_empty(),
+        assert!(self.shadow_prim_stack.is_empty(),
             "Found unpopped text shadows when popping stacking context!");
     }
 
     pub fn push_reference_frame(&mut self,
                                 parent_id: Option<ClipId>,
                                 pipeline_id: PipelineId,
                                 rect: &LayerRect,
                                 transform: &LayerToScrollTransform,
@@ -427,71 +425,101 @@ impl FrameBuilder {
     pub fn pop_reference_frame(&mut self) {
         self.reference_frame_stack.pop();
     }
 
     pub fn push_text_shadow(&mut self,
                             shadow: TextShadow,
                             clip_and_scroll: ClipAndScrollInfo,
                             local_clip: &LocalClip) {
-        let text_shadow = PendingTextShadow::new(shadow,
-                                                 clip_and_scroll,
-                                                 local_clip);
-        self.pending_text_shadows.push(text_shadow);
+        let prim = TextShadowPrimitiveCpu {
+            shadow,
+            primitives: Vec::new(),
+        };
+
+        // Create an empty text-shadow primitive. Insert it into
+        // the draw lists immediately so that it will be drawn
+        // before any visual text elements that are added as
+        // part of this text-shadow context.
+        let prim_index = self.add_primitive(clip_and_scroll,
+                                            &LayerRect::zero(),
+                                            local_clip,
+                                            &[],
+                                            PrimitiveContainer::TextShadow(prim));
+
+        self.shadow_prim_stack.push(prim_index);
     }
 
     pub fn pop_text_shadow(&mut self) {
-        let mut text_shadow = self.pending_text_shadows
-                                  .pop()
-                                  .expect("Too many PopTextShadows?");
-        if !text_shadow.text_primitives.is_empty() {
-            let prim_cpu = TextShadowPrimitiveCpu {
-                text_primitives: text_shadow.text_primitives,
-                shadow: text_shadow.shadow,
-            };
+        let prim_index = self.shadow_prim_stack
+                             .pop()
+                             .expect("invalid shadow push/pop count");
 
-            text_shadow.local_rect = text_shadow.local_rect
-                                                .translate(&text_shadow.shadow.offset);
+        // By now, the local rect of the text shadow has been calculated. It
+        // is calculated as the items in the shadow are added. It's now
+        // safe to offset the local rect by the offset of the shadow, which
+        // is then used when blitting the shadow to the final location.
+        let metadata = &mut self.prim_store.cpu_metadata[prim_index.0];
+        let prim = &self.prim_store.cpu_text_shadows[metadata.cpu_prim_index.0];
 
-            self.add_primitive(text_shadow.clip_and_scroll,
-                               &text_shadow.local_rect,
-                               &text_shadow.local_clip,
-                               &[],
-                               PrimitiveContainer::TextShadow(prim_cpu));
-        }
+        metadata.local_rect = metadata.local_rect.translate(&prim.shadow.offset);
     }
 
     pub fn add_solid_rectangle(&mut self,
                                clip_and_scroll: ClipAndScrollInfo,
                                rect: &LayerRect,
                                local_clip: &LocalClip,
                                color: &ColorF,
                                flags: PrimitiveFlags) {
-        if color.a == 0.0 {
-            return;
+        // TODO(gw): This is here as a temporary measure to allow
+        //           solid rectangles to be drawn into an
+        //           (unblurred) text-shadow. Supporting this allows
+        //           a WR update in Servo, since the tests rely
+        //           on this functionality. Once the complete
+        //           text decoration support is added (via the
+        //           Line display item) this can be removed, so that
+        //           rectangles don't participate in text shadows.
+        let mut trivial_shadows = Vec::new();
+        for shadow_prim_index in &self.shadow_prim_stack {
+            let shadow_metadata = &self.prim_store.cpu_metadata[shadow_prim_index.0];
+            let shadow_prim = &self.prim_store.cpu_text_shadows[shadow_metadata.cpu_prim_index.0];
+            if shadow_prim.shadow.blur_radius == 0.0 {
+                trivial_shadows.push(shadow_prim.shadow);
+            }
+        }
+        for shadow in trivial_shadows {
+            self.add_primitive(clip_and_scroll,
+                               &rect.translate(&shadow.offset),
+                               local_clip,
+                               &[],
+                               PrimitiveContainer::Rectangle(RectanglePrimitive {
+                                   color: shadow.color,
+                               }));
         }
 
-        let prim = RectanglePrimitive {
-            color: *color,
-        };
+        if color.a > 0.0 {
+            let prim = RectanglePrimitive {
+                color: *color,
+            };
 
-        let prim_index = self.add_primitive(clip_and_scroll,
-                                            rect,
-                                            local_clip,
-                                            &[],
-                                            PrimitiveContainer::Rectangle(prim));
+            let prim_index = self.add_primitive(clip_and_scroll,
+                                                rect,
+                                                local_clip,
+                                                &[],
+                                                PrimitiveContainer::Rectangle(prim));
 
-        match flags {
-            PrimitiveFlags::None => {}
-            PrimitiveFlags::Scrollbar(clip_id, border_radius) => {
-                self.scrollbar_prims.push(ScrollbarPrimitive {
-                    prim_index,
-                    clip_id,
-                    border_radius,
-                });
+            match flags {
+                PrimitiveFlags::None => {}
+                PrimitiveFlags::Scrollbar(clip_id, border_radius) => {
+                    self.scrollbar_prims.push(ScrollbarPrimitive {
+                        prim_index,
+                        clip_id,
+                        border_radius,
+                    });
+                }
             }
         }
     }
 
     pub fn add_border(&mut self,
                       clip_and_scroll: ClipAndScrollInfo,
                       rect: LayerRect,
                       local_clip: &LocalClip,
@@ -798,77 +826,125 @@ impl FrameBuilder {
                     rect: LayerRect,
                     local_clip: &LocalClip,
                     font_key: FontKey,
                     size: Au,
                     color: &ColorF,
                     glyph_range: ItemRange<GlyphInstance>,
                     glyph_count: usize,
                     glyph_options: Option<GlyphOptions>) {
-        let is_text_shadow = !self.pending_text_shadows.is_empty();
-
-        if color.a == 0.0 && !is_text_shadow {
-            return
-        }
-
+        // Trivial early out checks
         if size.0 <= 0 {
             return
         }
 
         // TODO(gw): Use a proper algorithm to select
         // whether this item should be rendered with
         // subpixel AA!
-        let mut render_mode = self.config.default_font_render_mode;
+        let mut normal_render_mode = self.config.default_font_render_mode;
 
         // There are some conditions under which we can't use
         // subpixel text rendering, even if enabled.
-        if render_mode == FontRenderMode::Subpixel {
-            // text-blur shadow needs to force alpha AA.
-            if is_text_shadow {
-                render_mode = FontRenderMode::Alpha;
-            }
-
+        if normal_render_mode == FontRenderMode::Subpixel {
             if color.a != 1.0 {
-                render_mode = FontRenderMode::Alpha;
+                normal_render_mode = FontRenderMode::Alpha;
             }
 
             // text on a stacking context that has filters
             // (e.g. opacity) can't use sub-pixel.
             // TODO(gw): It's possible we can relax this in
             //           the future, if we modify the way
             //           we handle subpixel blending.
             if let Some(sc_index) = self.stacking_context_stack.last() {
                 let stacking_context = &self.stacking_context_store[sc_index.0];
                 if stacking_context.composite_ops.count() > 0 {
-                    render_mode = FontRenderMode::Alpha;
+                    normal_render_mode = FontRenderMode::Alpha;
                 }
             }
         }
 
-        let prim_cpu = TextRunPrimitiveCpu {
+        // Shadows never use subpixel AA, but need to respect the alpha/mono flag
+        // for reftests.
+        let shadow_render_mode = match self.config.default_font_render_mode {
+            FontRenderMode::Subpixel | FontRenderMode::Alpha => FontRenderMode::Alpha,
+            FontRenderMode::Mono => FontRenderMode::Mono,
+        };
+
+        let prim = TextRunPrimitiveCpu {
             font_key,
             logical_font_size: size,
             glyph_range,
             glyph_count,
             glyph_instances: Vec::new(),
+            glyph_options,
+            normal_render_mode,
+            shadow_render_mode,
+            offset: LayerVector2D::zero(),
             color: *color,
-            render_mode,
-            glyph_options,
         };
 
-        if is_text_shadow {
-            for shadow in &mut self.pending_text_shadows {
-                shadow.push(rect, &prim_cpu);
+        // Text shadows that have a blur radius of 0 need to be rendered as normal
+        // text elements to get pixel perfect results for reftests. It's also a big
+        // performance win to avoid blurs and render target allocations where
+        // possible. For any text shadows that have zero blur, create a normal text
+        // primitive with the shadow's color and offset. These need to be added
+        // *before* the visual text primitive in order to get the correct paint
+        // order. Store them in a Vec first to work around borrowck issues.
+        // TODO(gw): Refactor to avoid having to store them in a Vec first.
+        let mut fast_text_shadow_prims = Vec::new();
+        for shadow_prim_index in &self.shadow_prim_stack {
+            let shadow_metadata = &self.prim_store.cpu_metadata[shadow_prim_index.0];
+            let shadow_prim = &self.prim_store.cpu_text_shadows[shadow_metadata.cpu_prim_index.0];
+            if shadow_prim.shadow.blur_radius == 0.0 {
+                let mut text_prim = prim.clone();
+                text_prim.color = shadow_prim.shadow.color;
+                text_prim.offset = shadow_prim.shadow.offset;
+                fast_text_shadow_prims.push(text_prim);
             }
-        } else {
+        }
+        for text_prim in fast_text_shadow_prims {
             self.add_primitive(clip_and_scroll,
-                               &rect,
+                               &rect.translate(&text_prim.offset),
                                local_clip,
                                &[],
-                               PrimitiveContainer::TextRun(prim_cpu));
+                               PrimitiveContainer::TextRun(text_prim));
+        }
+
+        // Create (and add to primitive store) the primitive that will be
+        // used for both the visual element and also the shadow(s).
+        let prim_index = self.create_primitive(clip_and_scroll,
+                                               &rect,
+                                               local_clip,
+                                               &[],
+                                               PrimitiveContainer::TextRun(prim));
+
+        // Only add a visual element if it can contribute to the scene.
+        if color.a > 0.0 {
+            self.add_primitive_to_draw_list(prim_index, clip_and_scroll);
+        }
+
+        // Now add this primitive index to all the currently active text shadow
+        // primitives. Although we're adding the indices *after* the visual
+        // primitive here, they will still draw before the visual text, since
+        // the text-shadow primitive itself has been added to the draw cmd
+        // list *before* the visual element, during push_text_shadow. We need
+        // the primitive index of the visual element here before we can add
+        // the indices as sub-primitives to the shadow primitives.
+        for shadow_prim_index in &self.shadow_prim_stack {
+            let shadow_metadata = &mut self.prim_store.cpu_metadata[shadow_prim_index.0];
+            debug_assert_eq!(shadow_metadata.prim_kind, PrimitiveKind::TextShadow);
+            let shadow_prim = &mut self.prim_store.cpu_text_shadows[shadow_metadata.cpu_prim_index.0];
+
+            // Only run real blurs here (fast path zero blurs are handled above).
+            if shadow_prim.shadow.blur_radius > 0.0 {
+                let shadow_rect = rect.inflate(shadow_prim.shadow.blur_radius,
+                                               shadow_prim.shadow.blur_radius);
+                shadow_metadata.local_rect = shadow_metadata.local_rect.union(&shadow_rect);
+                shadow_prim.primitives.push(prim_index);
+            }
         }
     }
 
     pub fn fill_box_shadow_rect(&mut self,
                                 clip_and_scroll: ClipAndScrollInfo,
                                 box_bounds: &LayerRect,
                                 bs_rect: LayerRect,
                                 local_clip: &LocalClip,
@@ -1869,17 +1945,18 @@ impl<'a> LayerRectCalculationAndCullingP
 
             debug!("\t\t{:?} bound is {:?}", prim_index, prim_screen_rect);
 
             let prim_metadata = prim_store.prepare_prim_for_render(prim_index,
                                                                    self.resource_cache,
                                                                    self.gpu_cache,
                                                                    &packed_layer.transform,
                                                                    self.device_pixel_ratio,
-                                                                   display_list);
+                                                                   display_list,
+                                                                   TextRunMode::Normal);
 
             stacking_context.screen_bounds = stacking_context.screen_bounds.union(&prim_screen_rect);
             stacking_context.isolated_items_bounds = stacking_context.isolated_items_bounds.union(&prim_local_rect);
 
             // Try to create a mask if we may need to.
             if !self.current_clip_stack.is_empty() || prim_metadata.clip_cache_info.is_some() {
                 // If the primitive doesn't have a specific clip, key the task ID off the
                 // stacking context. This means that two primitives which are only clipped
--- a/gfx/webrender/src/mask_cache.rs
+++ b/gfx/webrender/src/mask_cache.rs
@@ -16,20 +16,20 @@ const MAX_CLIP: f32 = 1000000.0;
 pub struct ClipRegion {
     pub origin: LayerPoint,
     pub main: LayerRect,
     pub image_mask: Option<ImageMask>,
     pub complex_clips: Vec<ComplexClipRegion>,
 }
 
 impl ClipRegion {
-    pub fn for_clip_node(rect: LayerRect,
-                         mut complex_clips: Vec<ComplexClipRegion>,
-                         mut image_mask: Option<ImageMask>)
-                         -> ClipRegion {
+    pub fn create_for_clip_node(rect: LayerRect,
+                                mut complex_clips: Vec<ComplexClipRegion>,
+                                mut image_mask: Option<ImageMask>)
+                                -> ClipRegion {
         // All the coordinates we receive are relative to the stacking context, but we want
         // to convert them to something relative to the origin of the clip.
         let negative_origin = -rect.origin.to_vector();
         if let Some(ref mut image_mask) = image_mask {
             image_mask.rect = image_mask.rect.translate(&negative_origin);
         }
 
         for complex_clip in complex_clips.iter_mut() {
@@ -44,20 +44,20 @@ impl ClipRegion {
         }
     }
 
     pub fn create_for_clip_node_with_local_clip(local_clip: &LocalClip) -> ClipRegion {
         let complex_clips = match local_clip {
             &LocalClip::Rect(_) => Vec::new(),
             &LocalClip::RoundedRect(_, ref region) => vec![region.clone()],
         };
-        ClipRegion::for_clip_node(*local_clip.clip_rect(), complex_clips, None)
+        ClipRegion::create_for_clip_node(*local_clip.clip_rect(), complex_clips, None)
     }
 
-    pub fn for_local_clip(local_clip: &LocalClip) -> ClipRegion {
+    pub fn create_for_local_clip(local_clip: &LocalClip) -> ClipRegion {
         let complex_clips = match local_clip {
             &LocalClip::Rect(_) => Vec::new(),
             &LocalClip::RoundedRect(_, ref region) => vec![region.clone()],
         };
 
         ClipRegion {
             origin: LayerPoint::zero(),
             main: *local_clip.clip_rect(),
--- a/gfx/webrender/src/prim_store.rs
+++ b/gfx/webrender/src/prim_store.rs
@@ -1,17 +1,17 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BuiltDisplayList, ColorF, ComplexClipRegion, DeviceIntRect, DeviceIntSize, DevicePoint};
 use api::{ExtendMode, FontKey, FontRenderMode, GlyphInstance, GlyphOptions, GradientStop};
 use api::{ImageKey, ImageRendering, ItemRange, LayerPoint, LayerRect, LayerSize, TextShadow};
 use api::{LayerToWorldTransform, TileOffset, WebGLContextId, YuvColorSpace, YuvFormat};
-use api::device_length;
+use api::{device_length, LayerVector2D};
 use app_units::Au;
 use border::BorderCornerInstance;
 use euclid::{Size2D};
 use gpu_cache::{GpuCacheAddress, GpuBlockData, GpuCache, GpuCacheHandle, GpuDataRequest, ToGpuBlocks};
 use mask_cache::{ClipMode, ClipRegion, ClipSource, MaskCacheInfo};
 use renderer::MAX_VERTEX_TEXTURE_WIDTH;
 use render_task::{RenderTask, RenderTaskLocation};
 use resource_cache::{ImageProperties, ResourceCache};
@@ -476,89 +476,98 @@ impl RadialGradientPrimitiveCpu {
 
         let gradient_builder = GradientGpuBlockBuilder::new(self.stops_range,
                                                             display_list);
         gradient_builder.build(false, &mut request);
     }
 }
 
 #[derive(Debug, Clone)]
+pub struct TextDecoration {
+    pub local_rect: LayerRect,
+    pub prim: RectanglePrimitive,
+}
+
+#[derive(Debug, Clone)]
 pub struct TextShadowPrimitiveCpu {
-    pub text_primitives: Vec<TextRunPrimitiveCpu>,
     pub shadow: TextShadow,
+    pub primitives: Vec<PrimitiveIndex>,
 }
 
 #[derive(Debug, Clone)]
 pub struct TextRunPrimitiveCpu {
     pub font_key: FontKey,
+    pub offset: LayerVector2D,
     pub logical_font_size: Au,
     pub glyph_range: ItemRange<GlyphInstance>,
     pub glyph_count: usize,
     // TODO(gw): Maybe make this an Arc for sharing with resource cache
     pub glyph_instances: Vec<GlyphInstance>,
+    pub glyph_options: Option<GlyphOptions>,
+    pub normal_render_mode: FontRenderMode,
+    pub shadow_render_mode: FontRenderMode,
     pub color: ColorF,
-    pub render_mode: FontRenderMode,
-    pub glyph_options: Option<GlyphOptions>,
+}
+
+#[derive(Debug, Copy, Clone)]
+pub enum TextRunMode {
+    Normal,
+    Shadow,
 }
 
 impl TextRunPrimitiveCpu {
     fn prepare_for_render(&mut self,
                           resource_cache: &mut ResourceCache,
                           device_pixel_ratio: f32,
-                          display_list: &BuiltDisplayList) {
+                          display_list: &BuiltDisplayList,
+                          run_mode: TextRunMode) {
         // Cache the glyph positions, if not in the cache already.
         // TODO(gw): In the future, remove `glyph_instances`
         //           completely, and just reference the glyphs
         //           directly from the displaty list.
         if self.glyph_instances.is_empty() {
             let src_glyphs = display_list.get(self.glyph_range);
             for src in src_glyphs {
                 self.glyph_instances.push(GlyphInstance {
                     index: src.index,
                     point: src.point,
                 });
             }
         }
 
         let font_size_dp = self.logical_font_size.scale_by(device_pixel_ratio);
+        let render_mode = match run_mode {
+            TextRunMode::Normal => self.normal_render_mode,
+            TextRunMode::Shadow => self.shadow_render_mode,
+        };
 
         resource_cache.request_glyphs(self.font_key,
                                       font_size_dp,
                                       self.color,
                                       &self.glyph_instances,
-                                      self.render_mode,
+                                      render_mode,
                                       self.glyph_options);
     }
 
-    fn write_gpu_blocks(&self, request: &mut GpuDataRequest) {
+    fn write_gpu_blocks(&self,
+                        request: &mut GpuDataRequest) {
+        request.push(self.color);
+        request.push([self.offset.x, self.offset.y, 0.0, 0.0]);
+
         // Two glyphs are packed per GPU block.
         for glyph_chunk in self.glyph_instances.chunks(2) {
             // In the case of an odd number of glyphs, the
             // last glyph will get duplicated in the final
             // GPU block.
             let first_glyph = glyph_chunk.first().unwrap();
             let second_glyph = glyph_chunk.last().unwrap();
-            let data = match self.render_mode {
-                FontRenderMode::Mono |
-                FontRenderMode::Alpha => [
-                    first_glyph.point.x,
-                    first_glyph.point.y,
-                    second_glyph.point.x,
-                    second_glyph.point.y,
-                ],
-                // The sub-pixel offset has already been taken into account
-                // by the glyph rasterizer, thus the truncating here.
-                FontRenderMode::Subpixel => [
-                    first_glyph.point.x.trunc(),
-                    first_glyph.point.y.trunc(),
-                    second_glyph.point.x.trunc(),
-                    second_glyph.point.y.trunc(),
-                ],
-            };
-            request.push(data);
+            request.push([first_glyph.point.x,
+                          first_glyph.point.y,
+                          second_glyph.point.x,
+                          second_glyph.point.y]);
         }
     }
 }
 
 #[derive(Debug, Clone)]
 #[repr(C)]
 struct GlyphPrimitive {
     offset: LayerPoint,
@@ -1023,18 +1032,40 @@ impl PrimitiveStore {
 
     /// Returns true if the bounding box needs to be updated.
     pub fn prepare_prim_for_render(&mut self,
                                    prim_index: PrimitiveIndex,
                                    resource_cache: &mut ResourceCache,
                                    gpu_cache: &mut GpuCache,
                                    layer_transform: &LayerToWorldTransform,
                                    device_pixel_ratio: f32,
-                                   display_list: &BuiltDisplayList)
+                                   display_list: &BuiltDisplayList,
+                                   text_run_mode: TextRunMode)
                                    -> &mut PrimitiveMetadata {
+        let (prim_kind, cpu_prim_index) = {
+            let metadata = &self.cpu_metadata[prim_index.0];
+            (metadata.prim_kind, metadata.cpu_prim_index)
+        };
+
+        // Recurse into any sub primitives and prepare them for rendering first.
+        // TODO(gw): This code is a bit hacky to work around the borrow checker.
+        //           Specifically, the clone() below on the primitive list for
+        //           text shadow primitives. Consider restructuring this code to
+        //           avoid borrow checker issues.
+        if prim_kind == PrimitiveKind::TextShadow {
+            for sub_prim_index in self.cpu_text_shadows[cpu_prim_index.0].primitives.clone() {
+                self.prepare_prim_for_render(sub_prim_index,
+                                             resource_cache,
+                                             gpu_cache,
+                                             layer_transform,
+                                             device_pixel_ratio,
+                                             display_list,
+                                             TextRunMode::Shadow);
+            }
+        }
 
         let metadata = &mut self.cpu_metadata[prim_index.0];
 
         if let Some(ref mut clip_info) = metadata.clip_cache_info {
             clip_info.update(&metadata.clips, layer_transform, gpu_cache, device_pixel_ratio);
 
             //TODO-LCCR: we could tighten up the `local_clip_rect` here
             // but that would require invalidating the whole GPU block
@@ -1052,30 +1083,25 @@ impl PrimitiveStore {
             PrimitiveKind::BoxShadow => {
                 // TODO(gw): Account for zoom factor!
                 // Here, we calculate the size of the patch required in order
                 // to create the box shadow corner. First, scale it by the
                 // device pixel ratio since the cache shader expects vertices
                 // in device space. The shader adds a 1-pixel border around
                 // the patch, in order to prevent bilinear filter artifacts as
                 // the patch is clamped / mirrored across the box shadow rect.
-                let box_shadow_cpu = &self.cpu_box_shadows[metadata.cpu_prim_index.0];
+                let box_shadow_cpu = &self.cpu_box_shadows[cpu_prim_index.0];
                 let edge_size = box_shadow_cpu.edge_size.ceil() * device_pixel_ratio;
                 let edge_size = edge_size as i32 + 2;   // Account for bilinear filtering
                 let cache_size = DeviceIntSize::new(edge_size, edge_size);
                 let location = RenderTaskLocation::Dynamic(None, cache_size);
                 metadata.render_task.as_mut().unwrap().location = location;
             }
             PrimitiveKind::TextShadow => {
-                let shadow = &mut self.cpu_text_shadows[metadata.cpu_prim_index.0];
-                for text in &mut shadow.text_primitives {
-                    text.prepare_for_render(resource_cache,
-                                            device_pixel_ratio,
-                                            display_list);
-                }
+                let shadow = &mut self.cpu_text_shadows[cpu_prim_index.0];
 
                 // This is a text-shadow element. Create a render task that will
                 // render the text run to a target, and then apply a gaussian
                 // blur to that text run in order to build the actual primitive
                 // which will be blitted to the framebuffer.
                 let cache_width = (metadata.local_rect.size.width * device_pixel_ratio).ceil() as i32;
                 let cache_height = (metadata.local_rect.size.height * device_pixel_ratio).ceil() as i32;
                 let cache_size = DeviceIntSize::new(cache_width, cache_height);
@@ -1083,23 +1109,24 @@ impl PrimitiveStore {
                 let blur_radius = device_length(shadow.shadow.blur_radius,
                                                 device_pixel_ratio);
                 metadata.render_task = Some(RenderTask::new_blur(cache_key,
                                                                  cache_size,
                                                                  blur_radius,
                                                                  prim_index));
             }
             PrimitiveKind::TextRun => {
-                let text = &mut self.cpu_text_runs[metadata.cpu_prim_index.0];
+                let text = &mut self.cpu_text_runs[cpu_prim_index.0];
                 text.prepare_for_render(resource_cache,
                                         device_pixel_ratio,
-                                        display_list);
+                                        display_list,
+                                        text_run_mode);
             }
             PrimitiveKind::Image => {
-                let image_cpu = &mut self.cpu_images[metadata.cpu_prim_index.0];
+                let image_cpu = &mut self.cpu_images[cpu_prim_index.0];
 
                 match image_cpu.kind {
                     ImagePrimitiveKind::Image(image_key, image_rendering, tile_offset, tile_spacing) => {
                         resource_cache.request_image(image_key, image_rendering, tile_offset);
 
                         // TODO(gw): This doesn't actually need to be calculated each frame.
                         // It's cheap enough that it's not worth introducing a cache for images
                         // right now, but if we introduce a cache for images for some other
@@ -1108,17 +1135,17 @@ impl PrimitiveStore {
                         metadata.opacity.is_opaque = image_properties.descriptor.is_opaque &&
                                                      tile_spacing.width == 0.0 &&
                                                      tile_spacing.height == 0.0;
                     }
                     ImagePrimitiveKind::WebGL(..) => {}
                 }
             }
             PrimitiveKind::YuvImage => {
-                let image_cpu = &mut self.cpu_yuv_images[metadata.cpu_prim_index.0];
+                let image_cpu = &mut self.cpu_yuv_images[cpu_prim_index.0];
 
                 let channel_num = image_cpu.format.get_plane_num();
                 debug_assert!(channel_num <= 3);
                 for channel in 0..channel_num {
                     resource_cache.request_image(image_cpu.yuv_key[channel], image_cpu.image_rendering, None);
                 }
             }
             PrimitiveKind::AlignedGradient |
@@ -1128,61 +1155,61 @@ impl PrimitiveStore {
 
         // Mark this GPU resource as required for this frame.
         if let Some(mut request) = gpu_cache.request(&mut metadata.gpu_location) {
             request.push(metadata.local_rect);
             request.push(metadata.local_clip_rect);
 
             match metadata.prim_kind {
                 PrimitiveKind::Rectangle => {
-                    let rect = &self.cpu_rectangles[metadata.cpu_prim_index.0];
+                    let rect = &self.cpu_rectangles[cpu_prim_index.0];
                     rect.write_gpu_blocks(request);
                 }
                 PrimitiveKind::Border => {
-                    let border = &self.cpu_borders[metadata.cpu_prim_index.0];
+                    let border = &self.cpu_borders[cpu_prim_index.0];
                     border.write_gpu_blocks(request);
                 }
                 PrimitiveKind::BoxShadow => {
-                    let box_shadow = &self.cpu_box_shadows[metadata.cpu_prim_index.0];
+                    let box_shadow = &self.cpu_box_shadows[cpu_prim_index.0];
                     box_shadow.write_gpu_blocks(request);
                 }
                 PrimitiveKind::Image => {
-                    let image = &self.cpu_images[metadata.cpu_prim_index.0];
+                    let image = &self.cpu_images[cpu_prim_index.0];
                     image.write_gpu_blocks(request);
                 }
                 PrimitiveKind::YuvImage => {
-                    let yuv_image = &self.cpu_yuv_images[metadata.cpu_prim_index.0];
+                    let yuv_image = &self.cpu_yuv_images[cpu_prim_index.0];
                     yuv_image.write_gpu_blocks(request);
                 }
                 PrimitiveKind::AlignedGradient => {
-                    let gradient = &self.cpu_gradients[metadata.cpu_prim_index.0];
+                    let gradient = &self.cpu_gradients[cpu_prim_index.0];
                     metadata.opacity = gradient.build_gpu_blocks_for_aligned(display_list,
                                                                              request);
                 }
                 PrimitiveKind::AngleGradient => {
-                    let gradient = &self.cpu_gradients[metadata.cpu_prim_index.0];
+                    let gradient = &self.cpu_gradients[cpu_prim_index.0];
                     gradient.build_gpu_blocks_for_angle_radial(display_list,
                                                                request);
                 }
                 PrimitiveKind::RadialGradient => {
-                    let gradient = &self.cpu_radial_gradients[metadata.cpu_prim_index.0];
+                    let gradient = &self.cpu_radial_gradients[cpu_prim_index.0];
                     gradient.build_gpu_blocks_for_angle_radial(display_list,
                                                                request);
                 }
                 PrimitiveKind::TextRun => {
-                    let text = &self.cpu_text_runs[metadata.cpu_prim_index.0];
-                    request.push(text.color);
+                    let text = &self.cpu_text_runs[cpu_prim_index.0];
                     text.write_gpu_blocks(&mut request);
                 }
                 PrimitiveKind::TextShadow => {
-                    let prim = &self.cpu_text_shadows[metadata.cpu_prim_index.0];
+                    let prim = &self.cpu_text_shadows[cpu_prim_index.0];
                     request.push(prim.shadow.color);
-                    for text in &prim.text_primitives {
-                        text.write_gpu_blocks(&mut request);
-                    }
+                    request.push([prim.shadow.offset.x,
+                                  prim.shadow.offset.y,
+                                  prim.shadow.blur_radius,
+                                  0.0]);
                 }
             }
         }
 
         metadata
     }
 }
 
--- a/gfx/webrender/src/resource_cache.rs
+++ b/gfx/webrender/src/resource_cache.rs
@@ -23,16 +23,17 @@ use api::{DevicePoint, DeviceIntSize, De
 use api::{GlyphOptions, GlyphInstance, SubpixelPoint, TileOffset, TileSize};
 use api::{BlobImageRenderer, BlobImageDescriptor, BlobImageError, BlobImageRequest, BlobImageData};
 use api::BlobImageResources;
 use api::{ExternalImageData, ExternalImageType, LayoutPoint};
 use rayon::ThreadPool;
 use glyph_rasterizer::{GlyphRasterizer, GlyphCache, GlyphRequest};
 
 const DEFAULT_TILE_SIZE: TileSize = 512;
+const BLACK: ColorF = ColorF { r: 0.0, b: 0.0, g: 0.0, a: 1.0 };
 
 // These coordinates are always in texels.
 // They are converted to normalized ST
 // values in the vertex shader. The reason
 // for this is that the texture may change
 // dimensions (e.g. the pages in a texture
 // atlas can grow). When this happens, by
 // storing the coordinates as texel values
@@ -307,19 +308,16 @@ impl ResourceCache {
     }
 
     pub fn update_image_template(&mut self,
                                  image_key: ImageKey,
                                  descriptor: ImageDescriptor,
                                  mut data: ImageData,
                                  dirty_rect: Option<DeviceUintRect>) {
         let resource = if let Some(image) = self.resources.image_templates.get(image_key) {
-            assert_eq!(image.descriptor.width, descriptor.width);
-            assert_eq!(image.descriptor.height, descriptor.height);
-            assert_eq!(image.descriptor.format, descriptor.format);
 
             let next_epoch = Epoch(image.epoch.0 + 1);
 
             let mut tiling = image.tiling;
             if tiling.is_none() && self.should_tile(&descriptor, &data) {
                 tiling = Some(DEFAULT_TILE_SIZE);
             }
 
@@ -445,22 +443,29 @@ impl ResourceCache {
         } else {
             self.pending_image_requests.push(request);
         }
     }
 
     pub fn request_glyphs(&mut self,
                           key: FontKey,
                           size: Au,
-                          color: ColorF,
+                          mut color: ColorF,
                           glyph_instances: &[GlyphInstance],
                           render_mode: FontRenderMode,
                           glyph_options: Option<GlyphOptions>) {
         debug_assert_eq!(self.state, State::AddResources);
 
+        // In alpha/mono mode, the color of the font is irrelevant.
+        // Forcing it to black in those cases saves rasterizing glyphs
+        // of different colors when not needed.
+        if render_mode != FontRenderMode::Subpixel {
+            color = BLACK;
+        }
+
         self.glyph_rasterizer.request_glyphs(
             &mut self.cached_glyphs,
             self.current_frame_id,
             key,
             size,
             color,
             glyph_instances,
             render_mode,
@@ -471,21 +476,27 @@ impl ResourceCache {
 
     pub fn pending_updates(&mut self) -> TextureUpdateList {
         self.texture_cache.pending_updates()
     }
 
     pub fn get_glyphs<F>(&self,
                          font_key: FontKey,
                          size: Au,
-                         color: ColorF,
+                         mut color: ColorF,
                          glyph_instances: &[GlyphInstance],
                          render_mode: FontRenderMode,
                          glyph_options: Option<GlyphOptions>,
                          mut f: F) -> SourceTexture where F: FnMut(usize, &GpuCacheHandle) {
+        // Color when retrieving glyphs must match that of the request,
+        // otherwise the hash keys won't match.
+        if render_mode != FontRenderMode::Subpixel {
+            color = BLACK;
+        }
+
         debug_assert_eq!(self.state, State::QueryResources);
         let mut glyph_request = GlyphRequest::new(
             font_key,
             size,
             color,
             0,
             LayoutPoint::zero(),
             render_mode,
@@ -654,16 +665,21 @@ impl ResourceCache {
                             request: &ImageRequest,
                             image_data: Option<ImageData>,
                             texture_cache_profile: &mut TextureCacheProfileCounters) {
         let image_template = self.resources.image_templates.get_mut(request.key).unwrap();
         let image_data = image_data.unwrap_or_else(||{
             image_template.data.clone()
         });
 
+        let filter = match request.rendering {
+            ImageRendering::Pixelated => TextureFilter::Nearest,
+            ImageRendering::Auto | ImageRendering::CrispEdges => TextureFilter::Linear,
+        };
+
         let descriptor = if let Some(tile) = request.tile {
             let tile_size = image_template.tiling.unwrap();
             let image_descriptor = &image_template.descriptor;
 
             let (actual_width, actual_height) = compute_tile_size(image_descriptor, tile_size, tile);
 
             // The tiled image could be stored on the CPU as one large image or be
             // already broken up into tiles. This affects the way we compute the stride
@@ -694,16 +710,17 @@ impl ResourceCache {
 
         let image_id = match self.cached_images.entry(*request, self.current_frame_id) {
             Occupied(entry) => {
                 let image_id = entry.get().texture_cache_id;
 
                 if entry.get().epoch != image_template.epoch {
                     self.texture_cache.update(image_id,
                                               descriptor,
+                                              filter,
                                               image_data,
                                               image_template.dirty_rect);
 
                     // Update the cached epoch
                     *entry.into_mut() = CachedImageInfo {
                         texture_cache_id: image_id,
                         epoch: image_template.epoch,
                     };
--- a/gfx/webrender/src/texture_cache.rs
+++ b/gfx/webrender/src/texture_cache.rs
@@ -447,16 +447,18 @@ pub struct TextureCacheItem {
     pub uv_rect: UvRect,
 
     // The size of the allocated rectangle.
     pub allocated_rect: DeviceUintRect,
 
     // Handle to the location of the UV rect for this item in GPU cache.
     pub uv_rect_handle: GpuCacheHandle,
 
+    pub format: ImageFormat,
+
     // Some arbitrary data associated with this item.
     // In the case of glyphs, it is the top / left offset
     // from the rasterized glyph.
     pub user_data: [f32; 2],
 }
 
 // Structure squat the width/height fields to maintain the free list information :)
 impl FreeListItem for TextureCacheItem {
@@ -488,28 +490,30 @@ impl FreeListItem for TextureCacheItem {
             }
         }
     }
 }
 
 impl TextureCacheItem {
     fn new(texture_id: CacheTextureId,
            rect: DeviceUintRect,
+           format: ImageFormat,
            user_data: [f32; 2])
            -> TextureCacheItem {
         TextureCacheItem {
             texture_id,
             uv_rect: UvRect {
                 uv0: DevicePoint::new(rect.origin.x as f32,
                                       rect.origin.y as f32),
                 uv1: DevicePoint::new((rect.origin.x + rect.size.width) as f32,
                                       (rect.origin.y + rect.size.height) as f32),
             },
             allocated_rect: rect,
             uv_rect_handle: GpuCacheHandle::new(),
+            format,
             user_data,
         }
     }
 }
 
 struct TextureCacheArena {
     pages_a8: Vec<TexturePage>,
     pages_rgb8: Vec<TexturePage>,
@@ -610,39 +614,68 @@ impl TextureCache {
     pub fn max_texture_size(&self) -> u32 {
         self.max_texture_size
     }
 
     pub fn pending_updates(&mut self) -> TextureUpdateList {
         mem::replace(&mut self.pending_updates, TextureUpdateList::new())
     }
 
-    pub fn allocate(&mut self,
-                    requested_width: u32,
-                    requested_height: u32,
-                    format: ImageFormat,
-                    filter: TextureFilter,
-                    user_data: [f32; 2],
-                    profile: &mut TextureCacheProfileCounters)
-                    -> AllocationResult {
+    pub fn allocate(
+        &mut self,
+        requested_width: u32,
+        requested_height: u32,
+        format: ImageFormat,
+        filter: TextureFilter,
+        user_data: [f32; 2],
+        profile: &mut TextureCacheProfileCounters
+    ) -> AllocationResult {
+        self.allocate_impl(
+            requested_width,
+            requested_height,
+            format,
+            filter,
+            user_data,
+            profile,
+            None,
+        )
+    }
+
+    // If item_id is None, create a new id, otherwise reuse it.
+    fn allocate_impl(
+        &mut self,
+        requested_width: u32,
+        requested_height: u32,
+        format: ImageFormat,
+        filter: TextureFilter,
+        user_data: [f32; 2],
+        profile: &mut TextureCacheProfileCounters,
+        item_id: Option<TextureCacheItemId>
+    ) -> AllocationResult {
         let requested_size = DeviceUintSize::new(requested_width, requested_height);
 
         // TODO(gw): For now, anything that requests nearest filtering
         //           just fails to allocate in a texture page, and gets a standalone
         //           texture. This isn't ideal, as it causes lots of batch breaks,
         //           but is probably rare enough that it can be fixed up later (it's also
         //           fairly trivial to implement, just tedious).
         if filter == TextureFilter::Nearest {
             // Fall back to standalone texture allocation.
             let texture_id = self.cache_id_list.allocate();
             let cache_item = TextureCacheItem::new(
                 texture_id,
                 DeviceUintRect::new(DeviceUintPoint::zero(), requested_size),
-                user_data);
-            let image_id = self.items.insert(cache_item);
+                format,
+                user_data
+            );
+
+            let image_id = match item_id {
+                Some(id) => id,
+                None => self.items.insert(cache_item.clone()),
+            };
 
             return AllocationResult {
                 item: self.items.get(image_id).clone(),
                 kind: AllocationKind::Standalone,
                 image_id,
             }
         }
 
@@ -726,38 +759,66 @@ impl TextureCache {
                 let page = TexturePage::new(texture_id, texture_size);
                 page_list.push(page);
                 page_list.last_mut().unwrap()
             },
         };
 
         let location = page.allocate(&requested_size)
                            .expect("All the checks have passed till now, there is no way back.");
-        let cache_item = TextureCacheItem::new(page.texture_id,
-                                               DeviceUintRect::new(location, requested_size),
-                                               user_data);
-        let image_id = self.items.insert(cache_item.clone());
+        let cache_item = TextureCacheItem::new(
+            page.texture_id,
+            DeviceUintRect::new(location, requested_size),
+            format,
+            user_data
+        );
+
+        let image_id = match item_id {
+            Some(id) => id,
+            None => self.items.insert(cache_item.clone()),
+        };
 
         AllocationResult {
             item: cache_item,
             kind: AllocationKind::TexturePage,
             image_id,
         }
     }
 
-    pub fn update(&mut self,
-                  image_id: TextureCacheItemId,
-                  descriptor: ImageDescriptor,
-                  data: ImageData,
-                  dirty_rect: Option<DeviceUintRect>) {
-        let existing_item = self.items.get(image_id);
+    pub fn update(
+        &mut self,
+        image_id: TextureCacheItemId,
+        descriptor: ImageDescriptor,
+        filter: TextureFilter,
+        data: ImageData,
+        mut dirty_rect: Option<DeviceUintRect>,
+    ) {
+        let mut existing_item = self.items.get(image_id).clone();
+
+        if existing_item.allocated_rect.size.width != descriptor.width ||
+           existing_item.allocated_rect.size.height != descriptor.height ||
+           existing_item.format != descriptor.format {
+
+            self.free_item_rect(existing_item.clone());
 
-        // TODO(gw): Handle updates to size/format!
-        debug_assert_eq!(existing_item.allocated_rect.size.width, descriptor.width);
-        debug_assert_eq!(existing_item.allocated_rect.size.height, descriptor.height);
+            self.allocate_impl(
+                descriptor.width,
+                descriptor.height,
+                descriptor.format,
+                filter,
+                existing_item.user_data,
+                &mut TextureCacheProfileCounters::new(),
+                Some(image_id),
+            );
+
+            // Fetch the item again because the rect most likely changed during reallocation.
+            existing_item = self.items.get(image_id).clone();
+            // If we reallocated, we need to upload the whole item again.
+            dirty_rect = None;
+        }
 
         let op = match data {
             ImageData::External(..) => {
                 panic!("Doesn't support Update() for external image.");
             }
             ImageData::Blob(..) => {
                 panic!("The vector image should have been rasterized into a raw image.");
             }
@@ -928,16 +989,20 @@ impl TextureCache {
     }
 
     pub fn get_mut(&mut self, id: TextureCacheItemId) -> &mut TextureCacheItem {
         self.items.get_mut(id)
     }
 
     pub fn free(&mut self, id: TextureCacheItemId) {
         let item = self.items.free(id);
+        self.free_item_rect(item);
+    }
+
+    fn free_item_rect(&mut self, item: TextureCacheItem) {
         match self.arena.texture_page_for_id(item.texture_id) {
             Some(texture_page) => texture_page.free(&item.allocated_rect),
             None => {
                 // This is a standalone texture allocation. Just push it back onto the free
                 // list.
                 self.pending_updates.push(TextureUpdate {
                     id: item.texture_id,
                     op: TextureUpdateOp::Free,
--- a/gfx/webrender/src/tiling.rs
+++ b/gfx/webrender/src/tiling.rs
@@ -45,17 +45,17 @@ trait AlphaBatchHelpers {
                       metadata: &PrimitiveMetadata) -> BlendMode;
 }
 
 impl AlphaBatchHelpers for PrimitiveStore {
     fn get_blend_mode(&self, needs_blending: bool, metadata: &PrimitiveMetadata) -> BlendMode {
         match metadata.prim_kind {
             PrimitiveKind::TextRun => {
                 let text_run_cpu = &self.cpu_text_runs[metadata.cpu_prim_index.0];
-                match text_run_cpu.render_mode {
+                match text_run_cpu.normal_render_mode {
                     FontRenderMode::Subpixel => BlendMode::Subpixel(text_run_cpu.color),
                     FontRenderMode::Alpha | FontRenderMode::Mono => BlendMode::Alpha,
                 }
             }
             PrimitiveKind::Image |
             PrimitiveKind::AlignedGradient |
             PrimitiveKind::AngleGradient |
             PrimitiveKind::RadialGradient => {
@@ -274,17 +274,17 @@ impl AlphaRenderItem {
                     child_pass_index: RenderPassIndex,
                     task_index: RenderTaskIndex,
                     deferred_resolves: &mut Vec<DeferredResolve>) {
         match *self {
             AlphaRenderItem::Blend(stacking_context_index, src_id, filter, z) => {
                 let stacking_context = &ctx.stacking_context_store[stacking_context_index.0];
                 let key = AlphaBatchKey::new(AlphaBatchKind::Blend,
                                              AlphaBatchKeyFlags::empty(),
-                                             BlendMode::Alpha,
+                                             BlendMode::PremultipliedAlpha,
                                              BatchTextures::no_texture());
                 let src_task_index = render_tasks.get_static_task_index(&src_id);
 
                 let (filter_mode, amount) = match filter {
                     LowLevelFilterOp::Blur(..) => (0, 0.0),
                     LowLevelFilterOp::Contrast(amount) => (1, amount.to_f32_px()),
                     LowLevelFilterOp::Grayscale(amount) => (2, amount.to_f32_px()),
                     LowLevelFilterOp::HueRotate(angle) => (3, (angle as f32) / ANGLE_FLOAT_TO_FIXED),
@@ -482,20 +482,22 @@ impl AlphaRenderItem {
 
                         // TODO(gw): avoid / recycle this allocation in the future.
                         let mut instances = Vec::new();
 
                         let texture_id = ctx.resource_cache.get_glyphs(text_cpu.font_key,
                                                                        font_size_dp,
                                                                        text_cpu.color,
                                                                        &text_cpu.glyph_instances,
-                                                                       text_cpu.render_mode,
+                                                                       text_cpu.normal_render_mode,
                                                                        text_cpu.glyph_options, |index, handle| {
                             let uv_address = handle.as_int(gpu_cache);
-                            instances.push(base_instance.build(index as i32, 0, uv_address));
+                            instances.push(base_instance.build(index as i32,
+                                                               text_cpu.normal_render_mode as i32,
+                                                               uv_address));
                         });
 
                         if texture_id != SourceTexture::Invalid {
                             let textures = BatchTextures {
                                 colors: [texture_id, SourceTexture::Invalid, SourceTexture::Invalid],
                             };
 
                             let key = AlphaBatchKey::new(AlphaBatchKind::TextRun, flags, blend_mode, textures);
@@ -1028,52 +1030,66 @@ impl RenderTarget for ColorRenderTarget 
                                                                     0);     // z is disabled for rendering cache primitives
                         self.box_shadow_cache_prims.push(instance.build(0, 0, 0));
                     }
                     PrimitiveKind::TextShadow => {
                         let prim = &ctx.prim_store.cpu_text_shadows[prim_metadata.cpu_prim_index.0];
 
                         // todo(gw): avoid / recycle this allocation...
                         let mut instances = Vec::new();
-                        let mut base_index = 0;
 
                         let task_index = render_tasks.get_task_index(&task.id, pass_index);
 
-                        let instance = SimplePrimitiveInstance::new(prim_address,
-                                                                    task_index,
-                                                                    RenderTaskIndex(0),
-                                                                    PackedLayerIndex(0),
-                                                                    0);     // z is disabled for rendering cache primitives
+                        for sub_prim_index in &prim.primitives {
+                            let sub_metadata = ctx.prim_store.get_metadata(*sub_prim_index);
+                            match sub_metadata.prim_kind {
+                                PrimitiveKind::TextRun => {
+                                    // Add instances that reference the text run GPU location. Also supply
+                                    // the parent text-shadow prim address as a user data field, allowing
+                                    // the shader to fetch the text-shadow parameters.
+                                    let sub_prim_address = sub_metadata.gpu_location.as_int(gpu_cache);
+                                    let text = &ctx.prim_store.cpu_text_runs[sub_metadata.cpu_prim_index.0];
 
-                        for text in &prim.text_primitives {
-                            let font_size_dp = text.logical_font_size.scale_by(ctx.device_pixel_ratio);
+                                    let instance = SimplePrimitiveInstance::new(sub_prim_address,
+                                                                                task_index,
+                                                                                RenderTaskIndex(0),
+                                                                                PackedLayerIndex(0),
+                                                                                0);     // z is disabled for rendering cache primitives
+
+                                    let font_size_dp = text.logical_font_size.scale_by(ctx.device_pixel_ratio);
 
-                            let texture_id = ctx.resource_cache.get_glyphs(text.font_key,
-                                                                           font_size_dp,
-                                                                           text.color,
-                                                                           &text.glyph_instances,
-                                                                           text.render_mode,
-                                                                           text.glyph_options, |index, handle| {
-                                let uv_address = handle.as_int(gpu_cache);
-                                instances.push(instance.build(base_index + index as i32, 0, uv_address));
-                            });
+                                    let texture_id = ctx.resource_cache.get_glyphs(text.font_key,
+                                                                                   font_size_dp,
+                                                                                   text.color,
+                                                                                   &text.glyph_instances,
+                                                                                   text.shadow_render_mode,
+                                                                                   text.glyph_options, |index, handle| {
+                                        let uv_address = handle.as_int(gpu_cache);
+                                        instances.push(instance.build(index as i32,
+                                                                      uv_address,
+                                                                      prim_address));
+                                    });
 
-                            if texture_id != SourceTexture::Invalid {
-                                let textures = BatchTextures {
-                                    colors: [texture_id, SourceTexture::Invalid, SourceTexture::Invalid],
-                                };
+                                    if texture_id != SourceTexture::Invalid {
+                                        let textures = BatchTextures {
+                                            colors: [texture_id, SourceTexture::Invalid, SourceTexture::Invalid],
+                                        };
+
+                                        self.text_run_cache_prims.extend_from_slice(&instances);
+                                        instances.clear();
 
-                                self.text_run_cache_prims.extend_from_slice(&instances);
-                                base_index += text.glyph_instances.len() as i32;
-                                instances.clear();
-
-                                debug_assert!(textures.colors[0] != SourceTexture::Invalid);
-                                debug_assert!(self.text_run_textures.colors[0] == SourceTexture::Invalid ||
-                                              self.text_run_textures.colors[0] == textures.colors[0]);
-                                self.text_run_textures = textures;
+                                        debug_assert!(textures.colors[0] != SourceTexture::Invalid);
+                                        debug_assert!(self.text_run_textures.colors[0] == SourceTexture::Invalid ||
+                                                      self.text_run_textures.colors[0] == textures.colors[0]);
+                                        self.text_run_textures = textures;
+                                    }
+                                }
+                                _ => {
+                                    unreachable!("Unexpected sub primitive type");
+                                }
                             }
                         }
                     }
                     _ => {
                         // No other primitives make use of primitive caching yet!
                         unreachable!()
                     }
                 }
@@ -1649,17 +1665,17 @@ impl PackedLayer {
                     -> Option<(TransformedRectKind, DeviceIntRect)> {
         self.local_clip_rect = *local_rect;
         let xf_rect = TransformedRect::new(local_rect, &self.transform, device_pixel_ratio);
         xf_rect.bounding_rect.intersection(screen_rect)
                              .map(|rect| (xf_rect.kind, rect))
     }
 }
 
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, Default)]
 pub struct CompositeOps {
     // Requires only a single texture as input (e.g. most filters)
     pub filters: Vec<LowLevelFilterOp>,
 
     // Requires two source textures (e.g. mix-blend-mode)
     pub mix_blend_mode: Option<MixBlendMode>,
 }
 
@@ -1680,25 +1696,16 @@ impl CompositeOps {
             if op == &LowLevelFilterOp::Opacity(Au(0)) {
                 return true;
             }
         }
         false
     }
 }
 
-impl Default for CompositeOps {
-    fn default() -> CompositeOps {
-        CompositeOps {
-            filters: Vec::new(),
-            mix_blend_mode: None,
-        }
-    }
-}
-
 /// A rendering-oriented representation of frame::Frame built by the render backend
 /// and presented to the renderer.
 pub struct Frame {
     pub window_size: DeviceUintSize,
     pub background_color: Option<ColorF>,
     pub device_pixel_ratio: f32,
     pub cache_size: DeviceUintSize,
     pub passes: Vec<RenderPass>,
--- a/gfx/webrender_api/src/display_item.rs
+++ b/gfx/webrender_api/src/display_item.rs
@@ -220,16 +220,17 @@ pub struct BoxShadowDisplayItem {
     pub offset: LayoutVector2D,
     pub color: ColorF,
     pub blur_radius: f32,
     pub spread_radius: f32,
     pub border_radius: f32,
     pub clip_mode: BoxShadowClipMode,
 }
 
+#[repr(C)]
 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
 pub struct TextShadow {
     pub offset: LayoutVector2D,
     pub color: ColorF,
     pub blur_radius: f32,
 }
 
 #[repr(u32)]
--- a/gfx/webrender_api/src/font.rs
+++ b/gfx/webrender_api/src/font.rs
@@ -66,19 +66,20 @@ impl FontKey {
 
 
 #[derive(Clone)]
 pub enum FontTemplate {
     Raw(Arc<Vec<u8>>, u32),
     Native(NativeFontHandle),
 }
 
+#[repr(C)]
 #[derive(Debug, Copy, Clone, Hash, Eq, PartialEq, Serialize, Deserialize, Ord, PartialOrd)]
 pub enum FontRenderMode {
-    Mono,
+    Mono = 0,
     Alpha,
     Subpixel,
 }
 
 const FIXED16_SHIFT: i32 = 16;
 
 // This matches the behaviour of SkScalarToFixed
 fn f32_truncate_to_fixed16(x: f32) -> i32 {