Bug 1494042. Update webrender to commit 43e8d85789efb95099affe3257a9c254ef3d2f4c
authorJeff Muizelaar <jmuizelaar@mozilla.com>
Wed, 26 Sep 2018 00:36:26 -0400
changeset 438283 3122696fa3fa715332ca9d0a453ffaef0eedc58a
parent 438282 445d1a7b050419f0ea266b0c191001d788f7850d
child 438284 c897c7bab868939033aca2e473c12f46ef3f0de1
push id34713
push useraiakab@mozilla.com
push dateWed, 26 Sep 2018 12:55:41 +0000
treeherdermozilla-central@32fb34059762 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
bugs1494042
milestone64.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1494042. Update webrender to commit 43e8d85789efb95099affe3257a9c254ef3d2f4c
gfx/webrender/Cargo.toml
gfx/webrender/res/brush_yuv_image.glsl
gfx/webrender/res/cs_scale.glsl
gfx/webrender/res/prim_shared.glsl
gfx/webrender/src/batch.rs
gfx/webrender/src/border.rs
gfx/webrender/src/box_shadow.rs
gfx/webrender/src/device/gl.rs
gfx/webrender/src/display_list_flattener.rs
gfx/webrender/src/frame_builder.rs
gfx/webrender/src/gpu_types.rs
gfx/webrender/src/internal_types.rs
gfx/webrender/src/picture.rs
gfx/webrender/src/prim_store.rs
gfx/webrender/src/profiler.rs
gfx/webrender/src/render_backend.rs
gfx/webrender/src/render_task.rs
gfx/webrender/src/renderer.rs
gfx/webrender/src/resource_cache.rs
gfx/webrender/src/segment.rs
gfx/webrender/src/shade.rs
gfx/webrender/src/texture_cache.rs
gfx/webrender/src/tiling.rs
gfx/webrender_api/src/api.rs
gfx/webrender_api/src/display_item.rs
gfx/webrender_api/src/display_list.rs
gfx/webrender_api/src/image.rs
gfx/webrender_bindings/Cargo.toml
gfx/webrender_bindings/revision.txt
gfx/wrench/Cargo.toml
gfx/wrench/src/yaml_frame_reader.rs
--- a/gfx/webrender/Cargo.toml
+++ b/gfx/webrender/Cargo.toml
@@ -22,17 +22,17 @@ serialize_program = ["serde"]
 app_units = "0.7"
 base64 = { optional = true, version = "0.6" }
 bincode = "1.0"
 bitflags = "1.0"
 byteorder = "1.0"
 cfg-if = "0.1.2"
 euclid = "0.19"
 fxhash = "0.2.1"
-gleam = "0.6"
+gleam = "0.6.2"
 image = { optional = true, version = "0.19" }
 lazy_static = "1"
 log = "0.4"
 num-traits = "0.2"
 plane-split = "0.13.2"
 png = { optional = true, version = "0.12" }
 rayon = "1"
 ron = { optional = true, version = "0.1.7" }
--- a/gfx/webrender/res/brush_yuv_image.glsl
+++ b/gfx/webrender/res/brush_yuv_image.glsl
@@ -1,13 +1,13 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-#define VECS_PER_SPECIFIC_BRUSH 0
+#define VECS_PER_SPECIFIC_BRUSH 1
 
 #include shared,prim_shared,brush
 
 // TODO(gw): Consider whether we should even have separate shader compilations
 //           for the various YUV modes. To save on the number of shaders we
 //           need to compile, it might be worth just doing this as an
 //           uber-shader instead.
 // TODO(gw): Regardless of the above, we should remove the separate shader
@@ -42,16 +42,18 @@ varying vec2 vLocalPos;
 #endif
 
 #ifdef WR_FEATURE_TEXTURE_RECT
     #define TEX_SIZE(sampler) vec2(1.0)
 #else
     #define TEX_SIZE(sampler) vec2(textureSize(sampler, 0).xy)
 #endif
 
+flat varying float vCoefficient;
+
 #ifdef WR_VERTEX_SHADER
 void write_uv_rect(
     int resource_id,
     vec2 f,
     vec2 texture_size,
     out vec3 uv,
     out vec4 uv_bounds
 ) {
@@ -65,29 +67,41 @@ void write_uv_rect(
     uv_bounds = vec4(uv0 + vec2(0.5), uv1 - vec2(0.5));
 
     #ifndef WR_FEATURE_TEXTURE_RECT
         uv.xy /= texture_size;
         uv_bounds /= texture_size.xyxy;
     #endif
 }
 
+struct YuvPrimitive {
+    float coefficient;
+};
+
+YuvPrimitive fetch_yuv_primitive(int address) {
+    vec4 data = fetch_from_resource_cache_1(address);
+    return YuvPrimitive(data.x);
+}
+
 void brush_vs(
     VertexInfo vi,
     int prim_address,
     RectWithSize local_rect,
     RectWithSize segment_rect,
     ivec3 user_data,
     mat4 transform,
     PictureTask pic_task,
     int brush_flags,
     vec4 unused
 ) {
     vec2 f = (vi.local_pos - local_rect.p0) / local_rect.size;
 
+    YuvPrimitive prim = fetch_yuv_primitive(prim_address);
+    vCoefficient = prim.coefficient;
+
 #ifdef WR_FEATURE_ALPHA_PASS
     vLocalPos = vi.local_pos;
 #endif
 
 #if defined (WR_FEATURE_YUV_PLANAR)
     write_uv_rect(user_data.x, f, TEX_SIZE(sColor0), vUv_Y, vUvBounds_Y);
     write_uv_rect(user_data.y, f, TEX_SIZE(sColor1), vUv_U, vUvBounds_U);
     write_uv_rect(user_data.z, f, TEX_SIZE(sColor2), vUv_V, vUvBounds_V);
@@ -159,17 +173,17 @@ Fragment brush_fs() {
     // https://www.khronos.org/registry/OpenGL/extensions/APPLE/APPLE_rgb_422.txt
     vec2 uv_y = clamp(vUv_YUV.xy, vUvBounds_YUV.xy, vUvBounds_YUV.zw);
     yuv_value = TEX_SAMPLE(sColor0, vec3(uv_y, vUv_YUV.z)).gbr;
 #else
     yuv_value = vec3(0.0);
 #endif
 
     // See the YuvColorMatrix definition for an explanation of where the constants come from.
-    vec3 rgb = YuvColorMatrix * (yuv_value - vec3(0.06275, 0.50196, 0.50196));
+    vec3 rgb = YuvColorMatrix * (yuv_value * vCoefficient - vec3(0.06275, 0.50196, 0.50196));
     vec4 color = vec4(rgb, 1.0);
 
 #ifdef WR_FEATURE_ALPHA_PASS
     color *= init_transform_fs(vLocalPos);
 #endif
 
     return Fragment(color);
 }
new file mode 100644
--- /dev/null
+++ b/gfx/webrender/res/cs_scale.glsl
@@ -0,0 +1,68 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include shared,prim_shared
+
+varying vec3 vUv;
+flat varying vec4 vUvRect;
+
+#ifdef WR_VERTEX_SHADER
+
+in int aScaleRenderTaskAddress;
+in int aScaleSourceTaskAddress;
+
+struct ScaleTask {
+    RenderTaskCommonData common_data;
+};
+
+ScaleTask fetch_scale_task(int address) {
+    RenderTaskData task_data = fetch_render_task_data(address);
+
+    ScaleTask task = ScaleTask(task_data.common_data);
+
+    return task;
+}
+
+void main(void) {
+    ScaleTask scale_task = fetch_scale_task(aScaleRenderTaskAddress);
+    RenderTaskCommonData src_task = fetch_render_task_common_data(aScaleSourceTaskAddress);
+
+    RectWithSize src_rect = src_task.task_rect;
+    RectWithSize target_rect = scale_task.common_data.task_rect;
+
+#if defined WR_FEATURE_COLOR_TARGET
+    vec2 texture_size = vec2(textureSize(sCacheRGBA8, 0).xy);
+#else
+    vec2 texture_size = vec2(textureSize(sCacheA8, 0).xy);
+#endif
+
+    vUv.z = src_task.texture_layer_index;
+
+    vUvRect = vec4(src_rect.p0 + vec2(0.5),
+                   src_rect.p0 + src_rect.size - vec2(0.5)) / texture_size.xyxy;
+
+    vec2 pos = target_rect.p0 + target_rect.size * aPosition.xy;
+    vUv.xy = (src_rect.p0 + src_rect.size * aPosition.xy) / texture_size;
+
+    gl_Position = uTransform * vec4(pos, 0.0, 1.0);
+}
+
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+
+#if defined WR_FEATURE_COLOR_TARGET
+#define SAMPLE_TYPE vec4
+#define SAMPLE_TEXTURE(uv)  texture(sCacheRGBA8, uv)
+#else
+#define SAMPLE_TYPE float
+#define SAMPLE_TEXTURE(uv)  texture(sCacheA8, uv).r
+#endif
+
+void main(void) {
+    vec2 st = clamp(vUv.xy, vUvRect.xy, vUvRect.zw);
+    oFragColor = vec4(SAMPLE_TEXTURE(vec3(st, vUv.z)));
+}
+
+#endif
--- a/gfx/webrender/res/prim_shared.glsl
+++ b/gfx/webrender/res/prim_shared.glsl
@@ -22,17 +22,16 @@ uniform sampler2DArray sCacheRGBA8;
 uniform sampler2DArray sSharedCacheA8;
 
 vec2 clamp_rect(vec2 pt, RectWithSize rect) {
     return clamp(pt, rect.p0, rect.p0 + rect.size);
 }
 
 // TODO: convert back to RectWithEndPoint if driver issues are resolved, if ever.
 flat varying vec4 vClipMaskUvBounds;
-flat varying vec4 vClipMaskUvSampleBounds;
 // XY and W are homogeneous coordinates, Z is the layer index
 varying vec4 vClipMaskUv;
 
 
 #ifdef WR_VERTEX_SHADER
 
 #define COLOR_MODE_FROM_PASS          0
 #define COLOR_MODE_ALPHA              1
@@ -225,52 +224,42 @@ VertexInfo write_transform_vertex(RectWi
 
 void write_clip(vec4 world_pos, vec2 snap_offset, ClipArea area) {
     vec2 uv = world_pos.xy * uDevicePixelRatio +
         world_pos.w * (snap_offset + area.common_data.task_rect.p0 - area.screen_origin);
     vClipMaskUvBounds = vec4(
         area.common_data.task_rect.p0,
         area.common_data.task_rect.p0 + area.common_data.task_rect.size
     );
-    vClipMaskUvSampleBounds.xy = vClipMaskUvBounds.xy + vec2(0.5);
-    vClipMaskUvSampleBounds.zw = vClipMaskUvBounds.zw - vec2(0.5);
     vClipMaskUv = vec4(uv, area.common_data.texture_layer_index, world_pos.w);
-
-    vec2 texture_size = vec2(textureSize(sCacheA8, 0).xy);
-    vClipMaskUv.xy /= texture_size;
-    vClipMaskUvBounds /= texture_size.xyxy;
-    vClipMaskUvSampleBounds /= texture_size.xyxy;
 }
 #endif //WR_VERTEX_SHADER
 
 #ifdef WR_FRAGMENT_SHADER
 
 float do_clip() {
     // check for the dummy bounds, which are given to the opaque objects
     if (vClipMaskUvBounds.xy == vClipMaskUvBounds.zw) {
         return 1.0;
     }
     // anything outside of the mask is considered transparent
     //Note: we assume gl_FragCoord.w == interpolated(1 / vClipMaskUv.w)
     vec2 mask_uv = vClipMaskUv.xy * gl_FragCoord.w;
-    bvec4 inside = lessThanEqual(
-        vec4(vClipMaskUvBounds.xy, mask_uv),
-        vec4(mask_uv, vClipMaskUvBounds.zw));
+    bvec2 left = lessThanEqual(vClipMaskUvBounds.xy, mask_uv); // inclusive
+    bvec2 right = greaterThan(vClipMaskUvBounds.zw, mask_uv); // non-inclusive
     // bail out if the pixel is outside the valid bounds
-    if (!all(inside)) {
+    if (!all(bvec4(left, right))) {
         return 0.0;
     }
-
     // finally, the slow path - fetch the mask value from an image
-
-    // TODO(gw): texelFetch here fails on some nVidia hardware in
-    //           some cases. For now, just use texture()
-    //           unconditionally.
-    mask_uv = clamp(mask_uv, vClipMaskUvSampleBounds.xy, vClipMaskUvSampleBounds.zw);
-    return texture(sCacheA8, vec3(mask_uv, vClipMaskUv.z)).r;
+    // Note the Z getting rounded to the nearest integer because the variable
+    // is still interpolated and becomes a subject of precision-caused
+    // fluctuations, see https://bugzilla.mozilla.org/show_bug.cgi?id=1491911
+    ivec3 tc = ivec3(mask_uv, vClipMaskUv.z + 0.5);
+    return texelFetch(sCacheA8, tc, 0).r;
 }
 
 #ifdef WR_FEATURE_DITHERING
 vec4 dither(vec4 color) {
     const int matrix_mask = 7;
 
     ivec2 pos = ivec2(gl_FragCoord.xy) & ivec2(matrix_mask);
     float noise_normalized = (texelFetch(sDither, pos, 0).r * 255.0 + 0.5) / 64.0;
--- a/gfx/webrender/src/batch.rs
+++ b/gfx/webrender/src/batch.rs
@@ -1,30 +1,30 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{AlphaType, ClipMode, DeviceIntRect, DeviceIntSize};
 use api::{DeviceUintRect, DeviceUintPoint, ExternalImageType, FilterOp, ImageRendering};
-use api::{YuvColorSpace, YuvFormat, WorldPixel, WorldRect};
+use api::{YuvColorSpace, YuvFormat, WorldPixel, WorldRect, ColorDepth};
 use clip::{ClipDataStore, ClipNodeFlags, ClipNodeRange, ClipItem, ClipStore};
 use clip_scroll_tree::{ClipScrollTree, ROOT_SPATIAL_NODE_INDEX, SpatialNodeIndex};
 use euclid::vec3;
 use glyph_rasterizer::GlyphFormat;
 use gpu_cache::{GpuCache, GpuCacheHandle, GpuCacheAddress};
 use gpu_types::{BrushFlags, BrushInstance, PrimitiveHeaders};
 use gpu_types::{ClipMaskInstance, SplitCompositeInstance};
-use gpu_types::{PrimitiveInstance, RasterizationSpace, GlyphInstance};
+use gpu_types::{PrimitiveInstanceData, RasterizationSpace, GlyphInstance};
 use gpu_types::{PrimitiveHeader, PrimitiveHeaderIndex, TransformPaletteId, TransformPalette};
 use internal_types::{FastHashMap, SavedTargetIndex, SourceTexture};
 use picture::{PictureCompositeMode, PicturePrimitive, PictureSurface};
 use plane_split::{BspSplitter, Clipper, Polygon, Splitter};
 use prim_store::{BrushKind, BrushPrimitive, BrushSegmentTaskId, DeferredResolve};
-use prim_store::{EdgeAaSegmentMask, ImageSource, PrimitiveIndex};
-use prim_store::{PrimitiveMetadata, PrimitiveRun, VisibleGradientTile};
+use prim_store::{EdgeAaSegmentMask, ImageSource};
+use prim_store::{PrimitiveMetadata, VisibleGradientTile, PrimitiveInstance};
 use prim_store::{BorderSource, Primitive, PrimitiveDetails};
 use render_task::{RenderTaskAddress, RenderTaskId, RenderTaskTree};
 use renderer::{BlendMode, ImageBufferKind, ShaderColorMode};
 use renderer::BLOCKS_PER_UV_RECT;
 use resource_cache::{CacheItem, GlyphFetchResult, ImageRequest, ResourceCache, ImageProperties};
 use scene::FilterOpHelpers;
 use std::{f32, i32};
 use tiling::{RenderTargetContext};
@@ -41,17 +41,17 @@ pub enum BrushBatchKind {
     Solid,
     Image(ImageBufferKind),
     Blend,
     MixBlend {
         task_id: RenderTaskId,
         source_id: RenderTaskId,
         backdrop_id: RenderTaskId,
     },
-    YuvImage(ImageBufferKind, YuvFormat, YuvColorSpace),
+    YuvImage(ImageBufferKind, YuvFormat, ColorDepth, YuvColorSpace),
     RadialGradient,
     LinearGradient,
 }
 
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub enum BatchKind {
@@ -136,17 +136,17 @@ impl AlphaBatchList {
             item_rects: Vec::new(),
         }
     }
 
     pub fn get_suitable_batch(
         &mut self,
         key: BatchKey,
         bounding_rect: &WorldRect,
-    ) -> &mut Vec<PrimitiveInstance> {
+    ) -> &mut Vec<PrimitiveInstanceData> {
         let mut selected_batch_index = None;
 
         match key.blend_mode {
             BlendMode::SubpixelWithBgColor => {
                 'outer_multipass: for (batch_index, batch) in self.batches.iter().enumerate().rev().take(10) {
                     // Some subpixel batches are drawn in two passes. Because of this, we need
                     // to check for overlaps with every batch (which is a bit different
                     // than the normal batching below).
@@ -208,17 +208,17 @@ impl OpaqueBatchList {
             pixel_area_threshold_for_new_batch,
         }
     }
 
     pub fn get_suitable_batch(
         &mut self,
         key: BatchKey,
         bounding_rect: &WorldRect,
-    ) -> &mut Vec<PrimitiveInstance> {
+    ) -> &mut Vec<PrimitiveInstanceData> {
         let mut selected_batch_index = None;
         let item_area = bounding_rect.size.area();
 
         // If the area of this primitive is larger than the given threshold,
         // then it is large enough to warrant breaking a batch for. In this
         // case we just see if it can be added to the existing batch or
         // create a new one.
         if item_area > self.pixel_area_threshold_for_new_batch {
@@ -277,17 +277,17 @@ impl BatchList {
             opaque_batch_list: OpaqueBatchList::new(batch_area_threshold),
         }
     }
 
     pub fn get_suitable_batch(
         &mut self,
         key: BatchKey,
         bounding_rect: &WorldRect,
-    ) -> &mut Vec<PrimitiveInstance> {
+    ) -> &mut Vec<PrimitiveInstanceData> {
         match key.blend_mode {
             BlendMode::None => {
                 self.opaque_batch_list
                     .get_suitable_batch(key, bounding_rect)
             }
             BlendMode::Alpha |
             BlendMode::PremultipliedAlpha |
             BlendMode::PremultipliedDestOut |
@@ -322,17 +322,17 @@ impl BatchList {
         self.opaque_batch_list.finalize()
     }
 }
 
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct PrimitiveBatch {
     pub key: BatchKey,
-    pub instances: Vec<PrimitiveInstance>,
+    pub instances: Vec<PrimitiveInstanceData>,
 }
 
 impl PrimitiveBatch {
     fn new(key: BatchKey) -> PrimitiveBatch {
         PrimitiveBatch {
             key,
             instances: Vec::new(),
         }
@@ -445,36 +445,38 @@ impl AlphaBatchBuilder {
     ) {
         let task_address = render_tasks.get_task_address(task_id);
 
         // Even though most of the time a splitter isn't used or needed,
         // they are cheap to construct so we will always pass one down.
         let mut splitter = BspSplitter::new();
 
         // Add each run in this picture to the batch.
-        for run in &pic.runs {
-            self.add_run_to_batch(
-                run,
+        for (plane_split_anchor, prim_instance) in pic.prim_instances.iter().enumerate() {
+            self.add_prim_to_batch(
+                prim_instance,
                 ctx,
                 gpu_cache,
                 render_tasks,
                 task_id,
                 task_address,
                 deferred_resolves,
                 &mut splitter,
                 prim_headers,
                 transforms,
                 root_spatial_node_index,
+                plane_split_anchor,
             );
         }
 
         // Flush the accumulated plane splits onto the task tree.
         // Z axis is directed at the screen, `sort` is ascending, and we need back-to-front order.
         for poly in splitter.sort(vec3(0.0, 0.0, 1.0)) {
-            let prim_index = PrimitiveIndex(poly.anchor);
+            let prim_instance = &pic.prim_instances[poly.anchor];
+            let prim_index = prim_instance.prim_index;
             let pic_metadata = &ctx.prim_store.primitives[prim_index.0].metadata;
             if cfg!(debug_assertions) && ctx.prim_store.chase_id == Some(prim_index) {
                 println!("\t\tsplit polygon {:?}", poly.points);
             }
             let transform = transforms.get_world_transform(pic_metadata.spatial_node_index).inverse().unwrap();
             let transform_id = transforms.get_id(
                 pic_metadata.spatial_node_index,
                 ROOT_SPATIAL_NODE_INDEX,
@@ -482,17 +484,17 @@ impl AlphaBatchBuilder {
             );
 
             let clip_task_address = pic_metadata
                 .clip_task_id
                 .map_or(OPAQUE_TASK_ADDRESS, |id| render_tasks.get_task_address(id));
 
             let prim_header = PrimitiveHeader {
                 local_rect: pic_metadata.local_rect,
-                local_clip_rect: pic_metadata.combined_local_clip_rect,
+                local_clip_rect: prim_instance.combined_local_clip_rect,
                 task_address,
                 specific_prim_address: GpuCacheAddress::invalid(),
                 clip_task_address,
                 transform_id,
             };
 
             let pic = ctx.prim_store.get_pic(prim_index);
 
@@ -542,91 +544,56 @@ impl AlphaBatchBuilder {
             let gpu_address = gpu_cache.get_address(&gpu_handle);
 
             let instance = SplitCompositeInstance::new(
                 prim_header_index,
                 gpu_address,
                 prim_headers.z_generator.next(),
             );
 
-            batch.push(PrimitiveInstance::from(instance));
+            batch.push(PrimitiveInstanceData::from(instance));
         }
     }
 
-    // Helper to add an entire primitive run to a batch list.
-    // TODO(gw): Restructure this so the param list isn't quite
-    //           so daunting!
-    fn add_run_to_batch(
+    // Adds a primitive to a batch.
+    // It can recursively call itself in some situations, for
+    // example if it encounters a picture where the items
+    // in that picture are being drawn into the same target.
+    fn add_prim_to_batch(
         &mut self,
-        run: &PrimitiveRun,
+        prim_instance: &PrimitiveInstance,
         ctx: &RenderTargetContext,
         gpu_cache: &mut GpuCache,
         render_tasks: &RenderTaskTree,
         task_id: RenderTaskId,
         task_address: RenderTaskAddress,
         deferred_resolves: &mut Vec<DeferredResolve>,
         splitter: &mut BspSplitter<f64, WorldPixel>,
         prim_headers: &mut PrimitiveHeaders,
         transforms: &mut TransformPalette,
         root_spatial_node_index: SpatialNodeIndex,
+        plane_split_anchor: usize,
     ) {
-        for i in 0 .. run.count {
-            let prim_index = PrimitiveIndex(run.base_prim_index.0 + i);
-            let metadata = &ctx.prim_store.primitives[prim_index.0].metadata;
-
-            if metadata.clipped_world_rect.is_some() {
-                let transform_id = transforms
-                    .get_id(
-                        metadata.spatial_node_index,
-                        root_spatial_node_index,
-                        ctx.clip_scroll_tree,
-                    );
+        let prim = &ctx.prim_store.primitives[prim_instance.prim_index.0];
+        let prim_metadata = &prim.metadata;
 
-                self.add_prim_to_batch(
-                    transform_id,
-                    prim_index,
-                    ctx,
-                    gpu_cache,
-                    render_tasks,
-                    task_id,
-                    task_address,
-                    deferred_resolves,
-                    splitter,
-                    prim_headers,
-                    transforms,
-                    root_spatial_node_index,
-                );
-            }
+        if prim_metadata.clipped_world_rect.is_none() {
+            return;
         }
-    }
 
-    // Adds a primitive to a batch.
-    // It can recursively call itself in some situations, for
-    // example if it encounters a picture where the items
-    // in that picture are being drawn into the same target.
-    fn add_prim_to_batch(
-        &mut self,
-        transform_id: TransformPaletteId,
-        prim_index: PrimitiveIndex,
-        ctx: &RenderTargetContext,
-        gpu_cache: &mut GpuCache,
-        render_tasks: &RenderTaskTree,
-        task_id: RenderTaskId,
-        task_address: RenderTaskAddress,
-        deferred_resolves: &mut Vec<DeferredResolve>,
-        splitter: &mut BspSplitter<f64, WorldPixel>,
-        prim_headers: &mut PrimitiveHeaders,
-        transforms: &mut TransformPalette,
-        root_spatial_node_index: SpatialNodeIndex,
-    ) {
-        let prim = &ctx.prim_store.primitives[prim_index.0];
-        let prim_metadata = &prim.metadata;
         #[cfg(debug_assertions)] //TODO: why is this needed?
         debug_assert_eq!(prim_metadata.prepared_frame_id, render_tasks.frame_id());
 
+        let transform_id = transforms
+            .get_id(
+                prim_metadata.spatial_node_index,
+                root_spatial_node_index,
+                ctx.clip_scroll_tree,
+            );
+
         // TODO(gw): Calculating this for every primitive is a bit
         //           wasteful. We should probably cache this in
         //           the scroll node...
         let transform_kind = transform_id.transform_kind();
         let bounding_rect = prim_metadata.clipped_world_rect
                                          .as_ref()
                                          .expect("bug");
 
@@ -662,24 +629,24 @@ impl AlphaBatchBuilder {
             transform_kind == TransformedRectKind::Complex {
             specified_blend_mode
         } else {
             BlendMode::None
         };
 
         let prim_header = PrimitiveHeader {
             local_rect: prim_metadata.local_rect,
-            local_clip_rect: prim_metadata.combined_local_clip_rect,
+            local_clip_rect: prim_instance.combined_local_clip_rect,
             task_address,
             specific_prim_address: prim_cache_address,
             clip_task_address,
             transform_id,
         };
 
-        if cfg!(debug_assertions) && ctx.prim_store.chase_id == Some(prim_index) {
+        if cfg!(debug_assertions) && ctx.prim_store.chase_id == Some(prim_instance.prim_index) {
             println!("\ttask target {:?}", self.target_rect);
             println!("\t{:?}", prim_header);
         }
 
         match prim.details {
             PrimitiveDetails::Brush(ref brush) => {
                 match brush.kind {
                     BrushKind::Picture(ref picture) => {
@@ -693,35 +660,38 @@ impl AlphaBatchBuilder {
 
                             // Apply the local clip rect here, before splitting. This is
                             // because the local clip rect can't be applied in the vertex
                             // shader for split composites, since we are drawing polygons
                             // rather that rectangles. The interpolation still works correctly
                             // since we determine the UVs by doing a bilerp with a factor
                             // from the original local rect.
                             let local_rect = prim_metadata.local_rect
-                                                          .intersection(&prim_metadata.combined_local_clip_rect);
+                                                          .intersection(&prim_instance.combined_local_clip_rect);
 
                             if let Some(local_rect) = local_rect {
                                 match transform.transform_kind() {
                                     TransformedRectKind::AxisAligned => {
                                         let inv_transform = transforms.get_world_inv_transform(prim_metadata.spatial_node_index);
                                         let polygon = Polygon::from_transformed_rect_with_inverse(
                                             local_rect.cast(),
                                             &transform.cast(),
                                             &inv_transform.cast(),
-                                            prim_index.0,
+                                            plane_split_anchor,
                                         ).unwrap();
                                         splitter.add(polygon);
                                     }
                                     TransformedRectKind::Complex => {
                                         let mut clipper = Clipper::new();
                                         let matrix = transform.cast();
                                         let results = clipper.clip_transformed(
-                                            Polygon::from_rect(local_rect.cast(), prim_index.0),
+                                            Polygon::from_rect(
+                                                local_rect.cast(),
+                                                plane_split_anchor,
+                                            ),
                                             &matrix,
                                             Some(bounding_rect.to_f64()),
                                         );
                                         if let Ok(results) = results {
                                             for poly in results {
                                                 splitter.add(poly);
                                             }
                                         }
@@ -766,17 +736,17 @@ impl AlphaBatchBuilder {
 
                                                 let instance = BrushInstance {
                                                     prim_header_index,
                                                     segment_index: 0,
                                                     edge_flags: EdgeAaSegmentMask::empty(),
                                                     brush_flags: BrushFlags::empty(),
                                                     clip_task_address,
                                                 };
-                                                batch.push(PrimitiveInstance::from(instance));
+                                                batch.push(PrimitiveInstanceData::from(instance));
                                             }
                                             FilterOp::DropShadow(offset, ..) => {
                                                 // Draw an instance of the shadow first, following by the content.
 
                                                 // Both the shadow and the content get drawn as a brush image.
                                                 let kind = BatchKind::Brush(
                                                     BrushBatchKind::Image(ImageBufferKind::Texture2DArray),
                                                 );
@@ -850,21 +820,21 @@ impl AlphaBatchBuilder {
                                                     clip_task_address,
                                                     segment_index: 0,
                                                     edge_flags: EdgeAaSegmentMask::empty(),
                                                     brush_flags: BrushFlags::empty(),
                                                 };
 
                                                 self.batch_list
                                                     .get_suitable_batch(shadow_key, bounding_rect)
-                                                    .push(PrimitiveInstance::from(shadow_instance));
+                                                    .push(PrimitiveInstanceData::from(shadow_instance));
 
                                                 self.batch_list
                                                     .get_suitable_batch(content_key, bounding_rect)
-                                                    .push(PrimitiveInstance::from(content_instance));
+                                                    .push(PrimitiveInstanceData::from(content_instance));
                                             }
                                             _ => {
                                                 let filter_mode = match filter {
                                                     FilterOp::Identity => 1, // matches `Contrast(1)`
                                                     FilterOp::Blur(..) => 0,
                                                     FilterOp::Contrast(..) => 1,
                                                     FilterOp::Grayscale(..) => 2,
                                                     FilterOp::HueRotate(..) => 3,
@@ -924,17 +894,17 @@ impl AlphaBatchBuilder {
                                                     prim_header_index,
                                                     clip_task_address,
                                                     segment_index: 0,
                                                     edge_flags: EdgeAaSegmentMask::empty(),
                                                     brush_flags: BrushFlags::empty(),
                                                 };
 
                                                 let batch = self.batch_list.get_suitable_batch(key, bounding_rect);
-                                                batch.push(PrimitiveInstance::from(instance));
+                                                batch.push(PrimitiveInstanceData::from(instance));
                                             }
                                         }
                                     }
                                     PictureCompositeMode::MixBlend(mode) => {
                                         let cache_task_id = surface.resolve_render_task_id();
                                         let backdrop_id = picture.secondary_render_task_id.expect("no backdrop!?");
 
                                         let key = BatchKey::new(
@@ -960,17 +930,17 @@ impl AlphaBatchBuilder {
                                         let instance = BrushInstance {
                                             prim_header_index,
                                             clip_task_address,
                                             segment_index: 0,
                                             edge_flags: EdgeAaSegmentMask::empty(),
                                             brush_flags: BrushFlags::empty(),
                                         };
 
-                                        batch.push(PrimitiveInstance::from(instance));
+                                        batch.push(PrimitiveInstanceData::from(instance));
                                     }
                                     PictureCompositeMode::Blit => {
                                         let cache_task_id = surface.resolve_render_task_id();
                                         let kind = BatchKind::Brush(
                                             BrushBatchKind::Image(ImageBufferKind::Texture2DArray)
                                         );
                                         let key = BatchKey::new(
                                             kind,
@@ -994,17 +964,17 @@ impl AlphaBatchBuilder {
 
                                         let instance = BrushInstance {
                                             prim_header_index,
                                             clip_task_address,
                                             segment_index: 0,
                                             edge_flags: EdgeAaSegmentMask::empty(),
                                             brush_flags: BrushFlags::empty(),
                                         };
-                                        batch.push(PrimitiveInstance::from(instance));
+                                        batch.push(PrimitiveInstanceData::from(instance));
                                     }
                                 }
                             }
                             None => {
                                 // If this picture is being drawn into an existing target (i.e. with
                                 // no composition operation), recurse and add to the current batch list.
                                 self.add_pic_to_batch(
                                     picture,
@@ -1077,20 +1047,20 @@ impl AlphaBatchBuilder {
                             prim_headers,
                         );
                     }
                     _ => {
                         if let Some((batch_kind, textures, user_data)) = brush.get_batch_params(
                                 ctx.resource_cache,
                                 gpu_cache,
                                 deferred_resolves,
-                                ctx.prim_store.chase_id == Some(prim_index),
+                                ctx.prim_store.chase_id == Some(prim_instance.prim_index),
                         ) {
                             let prim_header_index = prim_headers.push(&prim_header, user_data);
-                            if cfg!(debug_assertions) && ctx.prim_store.chase_id == Some(prim_index) {
+                            if cfg!(debug_assertions) && ctx.prim_store.chase_id == Some(prim_instance.prim_index) {
                                 println!("\t{:?} {:?}, task relative bounds {:?}",
                                     batch_kind, prim_header_index, bounding_rect);
                             }
 
                             self.add_brush_to_batch(
                                 brush,
                                 prim_metadata,
                                 batch_kind,
@@ -1219,17 +1189,17 @@ impl AlphaBatchBuilder {
         };
 
         let batch_key = BatchKey {
             blend_mode,
             kind: BatchKind::Brush(batch_kind),
             textures,
         };
         let batch = self.batch_list.get_suitable_batch(batch_key, bounding_rect);
-        batch.push(PrimitiveInstance::from(base_instance));
+        batch.push(PrimitiveInstanceData::from(base_instance));
     }
 
     fn add_brush_to_batch(
         &mut self,
         brush: &BrushPrimitive,
         prim_metadata: &PrimitiveMetadata,
         batch_kind: BrushBatchKind,
         alpha_blend_mode: BlendMode,
@@ -1281,17 +1251,17 @@ impl AlphaBatchBuilder {
 
                     let clip_task_address = match segment.clip_task_id {
                         BrushSegmentTaskId::RenderTaskId(id) =>
                             render_tasks.get_task_address(id),
                         BrushSegmentTaskId::Opaque => OPAQUE_TASK_ADDRESS,
                         BrushSegmentTaskId::Empty => continue,
                     };
 
-                    let instance = PrimitiveInstance::from(BrushInstance {
+                    let instance = PrimitiveInstanceData::from(BrushInstance {
                         segment_index: i as i32,
                         edge_flags: segment.edge_flags,
                         clip_task_address,
                         brush_flags: base_instance.brush_flags | segment.brush_flags,
                         ..base_instance
                     });
 
                     if needs_blending {
@@ -1303,17 +1273,17 @@ impl AlphaBatchBuilder {
             }
             None => {
                 let batch_key = BatchKey {
                     blend_mode: non_segmented_blend_mode,
                     kind: BatchKind::Brush(batch_kind),
                     textures,
                 };
                 let batch = self.batch_list.get_suitable_batch(batch_key, bounding_rect);
-                batch.push(PrimitiveInstance::from(base_instance));
+                batch.push(PrimitiveInstanceData::from(base_instance));
             }
         }
 
         self.batch_list.remove_unused_batches();
     }
 }
 
 fn add_gradient_tiles(
@@ -1343,17 +1313,17 @@ fn add_gradient_tiles(
         let prim_header = PrimitiveHeader {
             specific_prim_address: gpu_cache.get_address(&tile.handle),
             local_rect: tile.local_rect,
             local_clip_rect: tile.local_clip_rect,
             ..*base_prim_header
         };
         let prim_header_index = prim_headers.push(&prim_header, user_data);
 
-        batch.push(PrimitiveInstance::from(
+        batch.push(PrimitiveInstanceData::from(
             BrushInstance {
                 prim_header_index,
                 clip_task_address,
                 segment_index: 0,
                 edge_flags: EdgeAaSegmentMask::all(),
                 brush_flags: BrushFlags::PERSPECTIVE_INTERPOLATION,
             }
         ));
@@ -1512,17 +1482,17 @@ impl BrushPrimitive {
                     BatchTextures::no_texture(),
                     [
                         stops_handle.as_int(gpu_cache),
                         0,
                         0,
                     ],
                 ))
             }
-            BrushKind::YuvImage { format, yuv_key, image_rendering, color_space } => {
+            BrushKind::YuvImage { format, yuv_key, image_rendering, color_depth, color_space } => {
                 let mut textures = BatchTextures::no_texture();
                 let mut uv_rect_addresses = [0; 3];
 
                 //yuv channel
                 let channel_count = format.get_plane_num();
                 debug_assert!(channel_count <= 3);
                 for channel in 0 .. channel_count {
                     let image_key = yuv_key[channel];
@@ -1553,16 +1523,17 @@ impl BrushPrimitive {
                     textures.colors[1 .. format.get_plane_num()]
                         .iter()
                         .all(|&tid| buffer_kind == get_buffer_kind(tid))
                 );
 
                 let kind = BrushBatchKind::YuvImage(
                     buffer_kind,
                     format,
+                    color_depth,
                     color_space,
                 );
 
                 Some((
                     kind,
                     textures,
                     [
                         uv_rect_addresses[0],
--- a/gfx/webrender/src/border.rs
+++ b/gfx/webrender/src/border.rs
@@ -6,17 +6,17 @@ use api::{BorderRadius, BorderSide, Bord
 use api::{LayoutSideOffsets, LayoutSizeAu, LayoutPrimitiveInfo, LayoutToDeviceScale};
 use api::{DeviceVector2D, DevicePoint, DeviceIntSize, LayoutRect, LayoutSize, NormalBorder};
 use api::{AuHelpers};
 use app_units::Au;
 use ellipse::Ellipse;
 use euclid::SideOffsets2D;
 use display_list_flattener::DisplayListFlattener;
 use gpu_types::{BorderInstance, BorderSegment, BrushFlags};
-use prim_store::{BrushKind, BrushPrimitive, BrushSegment};
+use prim_store::{BrushKind, BrushPrimitive, BrushSegment, BrushSegmentVec};
 use prim_store::{EdgeAaSegmentMask, PrimitiveContainer, ScrollNodeAndClipChain};
 use util::{lerp, RectHelpers};
 
 // Using 2048 as the maximum radius in device space before which we
 // start stretching is up for debate.
 // the value must be chosen so that the corners will not use an
 // unreasonable amount of memory but should allow crisp corners in the
 // common cases.
@@ -680,17 +680,17 @@ fn get_edge_info(
 }
 
 impl BorderRenderTaskInfo {
     pub fn new(
         rect: &LayoutRect,
         border: &NormalBorder,
         widths: &LayoutSideOffsets,
         scale: LayoutToDeviceScale,
-        brush_segments: &mut Vec<BrushSegment>,
+        brush_segments: &mut BrushSegmentVec,
     ) -> Option<Self> {
         let mut border_segments = Vec::new();
 
         let dp_width_top = (widths.top * scale.0).ceil();
         let dp_width_bottom = (widths.bottom * scale.0).ceil();
         let dp_width_left = (widths.left * scale.0).ceil();
         let dp_width_right = (widths.right * scale.0).ceil();
 
@@ -1062,17 +1062,17 @@ impl BorderRenderTaskInfo {
     }
 }
 
 fn add_brush_segment(
     image_rect: LayoutRect,
     task_rect: DeviceRect,
     brush_flags: BrushFlags,
     edge_flags: EdgeAaSegmentMask,
-    brush_segments: &mut Vec<BrushSegment>,
+    brush_segments: &mut BrushSegmentVec,
 ) {
     if image_rect.size.width <= 0. || image_rect.size.width <= 0. {
         return;
     }
 
     brush_segments.push(
         BrushSegment::new(
             image_rect,
@@ -1214,17 +1214,17 @@ fn add_corner_segment(
     task_rect: DeviceRect,
     side0: &BorderSide,
     side1: &BorderSide,
     widths: DeviceSize,
     radius: DeviceSize,
     segment: BorderSegment,
     edge_flags: EdgeAaSegmentMask,
     border_segments: &mut Vec<BorderSegmentInfo>,
-    brush_segments: &mut Vec<BrushSegment>,
+    brush_segments: &mut BrushSegmentVec,
 ) {
     if side0.color.a <= 0.0 && side1.color.a <= 0.0 {
         return;
     }
 
     if widths.width <= 0.0 && widths.height <= 0.0 {
         return;
     }
@@ -1252,17 +1252,17 @@ fn add_corner_segment(
 fn add_edge_segment(
     image_rect: LayoutRect,
     task_rect: DeviceRect,
     side: &BorderSide,
     segment: BorderSegment,
     edge_flags: EdgeAaSegmentMask,
     border_segments: &mut Vec<BorderSegmentInfo>,
     brush_flags: BrushFlags,
-    brush_segments: &mut Vec<BrushSegment>,
+    brush_segments: &mut BrushSegmentVec,
 ) {
     if side.color.a <= 0.0 {
         return;
     }
 
     if side.style.is_hidden() {
         return;
     }
--- a/gfx/webrender/src/box_shadow.rs
+++ b/gfx/webrender/src/box_shadow.rs
@@ -1,14 +1,14 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BorderRadius, BoxShadowClipMode, ClipMode, ColorF, DeviceIntSize, LayoutPrimitiveInfo};
-use api::{LayoutRect, LayoutSize, LayoutVector2D};
+use api::{LayoutRect, LayoutSize, LayoutVector2D, MAX_BLUR_RADIUS};
 use clip::ClipItemKey;
 use display_list_flattener::DisplayListFlattener;
 use gpu_cache::GpuCacheHandle;
 use gpu_types::BoxShadowStretchMode;
 use prim_store::{BrushKind, BrushPrimitive, PrimitiveContainer};
 use prim_store::ScrollNodeAndClipChain;
 use render_task::RenderTaskCacheEntryHandle;
 use util::RectHelpers;
@@ -40,20 +40,16 @@ pub struct BoxShadowClipSource {
     // Local space rect for the shadow to be drawn or
     // stretched in the shadow primitive.
     pub prim_shadow_rect: LayoutRect,
 }
 
 // The blur shader samples BLUR_SAMPLE_SCALE * blur_radius surrounding texels.
 pub const BLUR_SAMPLE_SCALE: f32 = 3.0;
 
-// Maximum blur radius.
-// Taken from https://searchfox.org/mozilla-central/rev/c633ffa4c4611f202ca11270dcddb7b29edddff8/layout/painting/nsCSSRendering.cpp#4412
-pub const MAX_BLUR_RADIUS : f32 = 300.;
-
 // A cache key that uniquely identifies a minimally sized
 // and blurred box-shadow rect that can be stored in the
 // texture cache and applied to clip-masks.
 #[derive(Debug, Clone, Eq, Hash, PartialEq)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct BoxShadowCacheKey {
     pub blur_radius_dp: i32,
--- a/gfx/webrender/src/device/gl.rs
+++ b/gfx/webrender/src/device/gl.rs
@@ -482,16 +482,33 @@ impl Texture {
     pub fn get_rt_info(&self) -> Option<&RenderTargetInfo> {
         self.render_target.as_ref()
     }
 
     pub fn used_in_frame(&self, frame_id: FrameId) -> bool {
         self.last_frame_used == frame_id
     }
 
+    /// Returns true if this texture was used within `threshold` frames of
+    /// the current frame.
+    pub fn used_recently(&self, current_frame_id: FrameId, threshold: usize) -> bool {
+        self.last_frame_used + threshold >= current_frame_id
+    }
+
+    /// Returns the number of bytes (generally in GPU memory) that this texture
+    /// consumes.
+    pub fn size_in_bytes(&self) -> usize {
+        assert!(self.layer_count > 0 || self.width + self.height == 0);
+        let bpp = self.format.bytes_per_pixel() as usize;
+        let w = self.width as usize;
+        let h = self.height as usize;
+        let count = self.layer_count as usize;
+        bpp * w * h * count
+    }
+
     #[cfg(feature = "replay")]
     pub fn into_external(mut self) -> ExternalTexture {
         let ext = ExternalTexture {
             id: self.id,
             target: self.target,
         };
         self.id = 0; // don't complain, moved out
         ext
@@ -2217,16 +2234,21 @@ impl Device {
 
     fn gl_describe_format(&self, format: ImageFormat) -> FormatDesc {
         match format {
             ImageFormat::R8 => FormatDesc {
                 internal: gl::RED as _,
                 external: gl::RED,
                 pixel_type: gl::UNSIGNED_BYTE,
             },
+            ImageFormat::R16 => FormatDesc {
+                internal: gl::R16 as _,
+                external: gl::RED,
+                pixel_type: gl::UNSIGNED_SHORT,
+            },
             ImageFormat::BGRA8 => {
                 let external = self.bgra_format;
                 FormatDesc {
                     internal: match self.gl.get_type() {
                         gl::GlType::Gl => gl::RGBA as _,
                         gl::GlType::Gles => external as _,
                     },
                     external,
@@ -2373,16 +2395,17 @@ impl<'a, T> TextureUploader<'a, T> {
         upload_size
     }
 }
 
 impl<'a> UploadTarget<'a> {
     fn update_impl(&mut self, chunk: UploadChunk) {
         let (gl_format, bpp, data_type) = match self.texture.format {
             ImageFormat::R8 => (gl::RED, 1, gl::UNSIGNED_BYTE),
+            ImageFormat::R16 => (gl::RED, 2, gl::UNSIGNED_SHORT),
             ImageFormat::BGRA8 => (self.bgra_format, 4, gl::UNSIGNED_BYTE),
             ImageFormat::RG8 => (gl::RG, 2, gl::UNSIGNED_BYTE),
             ImageFormat::RGBAF32 => (gl::RGBA, 16, gl::FLOAT),
             ImageFormat::RGBAI32 => (gl::RGBA_INTEGER, 16, gl::INT),
         };
 
         let row_length = match chunk.stride {
             Some(value) => value / bpp,
--- a/gfx/webrender/src/display_list_flattener.rs
+++ b/gfx/webrender/src/display_list_flattener.rs
@@ -2,17 +2,17 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{AlphaType, BorderDetails, BorderDisplayItem, BuiltDisplayListIter, ClipAndScrollInfo};
 use api::{ClipId, ColorF, ComplexClipRegion, DeviceIntPoint, DeviceIntRect, DeviceIntSize};
 use api::{DevicePixelScale, DeviceUintRect, DisplayItemRef, ExtendMode, ExternalScrollId};
 use api::{FilterOp, FontInstanceKey, GlyphInstance, GlyphOptions, RasterSpace, GradientStop};
-use api::{IframeDisplayItem, ImageKey, ImageRendering, ItemRange, LayoutPoint};
+use api::{IframeDisplayItem, ImageKey, ImageRendering, ItemRange, LayoutPoint, ColorDepth};
 use api::{LayoutPrimitiveInfo, LayoutRect, LayoutSize, LayoutTransform, LayoutVector2D};
 use api::{LineOrientation, LineStyle, LocalClip, NinePatchBorderSource, PipelineId};
 use api::{PropertyBinding, ReferenceFrame, RepeatMode, ScrollFrameDisplayItem, ScrollSensitivity};
 use api::{Shadow, SpecificDisplayItem, StackingContext, StickyFrameDisplayItem, TexelRect};
 use api::{ClipMode, TransformStyle, YuvColorSpace, YuvData};
 use clip::{ClipDataInterner, ClipChainId, ClipRegion, ClipItemKey, ClipStore};
 use clip_scroll_tree::{ClipScrollTree, SpatialNodeIndex};
 use euclid::vec2;
@@ -21,17 +21,17 @@ use glyph_rasterizer::FontInstance;
 use gpu_cache::GpuCacheHandle;
 use gpu_types::BrushFlags;
 use hit_test::{HitTestingItem, HitTestingRun};
 use image::simplify_repeated_primitive;
 use internal_types::{FastHashMap, FastHashSet};
 use picture::{PictureCompositeMode, PictureIdGenerator, PicturePrimitive};
 use prim_store::{BrushKind, BrushPrimitive, BrushSegmentDescriptor};
 use prim_store::{EdgeAaSegmentMask, ImageSource, PrimitiveOpacity};
-use prim_store::{BorderSource, BrushSegment, PrimitiveContainer, PrimitiveIndex, PrimitiveStore};
+use prim_store::{BorderSource, BrushSegment, BrushSegmentVec, PrimitiveContainer, PrimitiveIndex, PrimitiveStore};
 use prim_store::{OpacityBinding, ScrollNodeAndClipChain, TextRunPrimitive};
 use render_backend::{DocumentView};
 use resource_cache::{FontInstanceMap, ImageRequest};
 use scene::{Scene, ScenePipeline, StackingContextHelpers};
 use spatial_node::{SpatialNodeType, StickyFrameInfo};
 use std::{f32, mem};
 use tiling::{CompositeOps, ScrollbarPrimitive};
 use util::{MaxRect, RectHelpers};
@@ -157,16 +157,19 @@ pub struct DisplayListFlattener<'a> {
     pub clip_store: ClipStore,
 
     /// The configuration to use for the FrameBuilder. We consult this in
     /// order to determine the default font.
     pub config: FrameBuilderConfig,
 
     /// Reference to the clip interner for this document.
     clip_interner: &'a mut ClipDataInterner,
+
+    /// The estimated count of primtives we expect to encounter during flattening.
+    prim_count_estimate: usize,
 }
 
 impl<'a> DisplayListFlattener<'a> {
     pub fn create_frame_builder(
         scene: &Scene,
         clip_scroll_tree: &mut ClipScrollTree,
         font_instances: FontInstanceMap,
         view: &DocumentView,
@@ -196,16 +199,17 @@ impl<'a> DisplayListFlattener<'a> {
             scrollbar_prims: Vec::new(),
             shadow_stack: Vec::new(),
             sc_stack: Vec::new(),
             pipeline_clip_chain_stack: vec![ClipChainId::NONE],
             prim_store: PrimitiveStore::new(),
             clip_store: ClipStore::new(),
             picture_id_generator,
             clip_interner,
+            prim_count_estimate: 0,
         };
 
         flattener.push_root(
             root_pipeline_id,
             &root_pipeline.viewport_size,
             &root_pipeline.content_size,
         );
         flattener.setup_viewport_offset(view.inner_rect, view.accumulated_scale_factor());
@@ -281,16 +285,19 @@ impl<'a> DisplayListFlattener<'a> {
                         bg_color,
                         None,
                         Vec::new(),
                     );
                 }
             }
         }
 
+        self.prim_count_estimate += pipeline.display_list.prim_count_estimate();
+        self.prim_store.primitives.reserve(self.prim_count_estimate);
+
         self.flatten_items(&mut pipeline.display_list.iter(), pipeline_id, LayoutVector2D::zero());
 
         if self.config.enable_scrollbars {
             let scrollbar_rect = LayoutRect::new(LayoutPoint::zero(), LayoutSize::new(10.0, 70.0));
             let container_rect = LayoutRect::new(LayoutPoint::zero(), *frame_size);
             self.add_scroll_bar(
                 reference_frame_info.spatial_node_index,
                 &LayoutPrimitiveInfo::new(scrollbar_rect),
@@ -545,16 +552,17 @@ impl<'a> DisplayListFlattener<'a> {
                     info.color,
                 );
             }
             SpecificDisplayItem::YuvImage(ref info) => {
                 self.add_yuv_image(
                     clip_and_scroll,
                     &prim_info,
                     info.yuv_data,
+                    info.color_depth,
                     info.color_space,
                     info.image_rendering,
                 );
             }
             SpecificDisplayItem::Text(ref text_info) => {
                 self.add_text(
                     clip_and_scroll,
                     reference_frame_relative_offset,
@@ -1056,19 +1064,21 @@ impl<'a> DisplayListFlattener<'a> {
         );
 
         // Create a chain of pictures based on presence of filters,
         // mix-blend-mode and/or 3d rendering context containers.
         let mut current_prim_index = leaf_prim_index;
 
         // For each filter, create a new image with that composite mode.
         for filter in &composite_ops.filters {
+            let filter = filter.sanitize();
+
             let mut filter_picture = PicturePrimitive::new_image(
                 self.picture_id_generator.next(),
-                Some(PictureCompositeMode::Filter(*filter)),
+                Some(PictureCompositeMode::Filter(filter)),
                 false,
                 pipeline_id,
                 None,
                 true,
                 requested_raster_space,
             );
 
             filter_picture.add_primitive(current_prim_index);
@@ -1636,17 +1646,17 @@ impl<'a> DisplayListFlattener<'a> {
 
                 let br_outer = LayoutPoint::new(
                     rect.origin.x + rect.size.width,
                     rect.origin.y + rect.size.height,
                 );
                 let br_inner = br_outer - vec2(border_item.widths.right, border_item.widths.bottom);
 
                 fn add_segment(
-                    segments: &mut Vec<BrushSegment>,
+                    segments: &mut BrushSegmentVec,
                     rect: LayoutRect,
                     uv_rect: TexelRect,
                     repeat_horizontal: RepeatMode,
                     repeat_vertical: RepeatMode
                 ) {
                     if uv_rect.uv1.x > uv_rect.uv0.x &&
                        uv_rect.uv1.y > uv_rect.uv0.y {
 
@@ -1675,17 +1685,17 @@ impl<'a> DisplayListFlattener<'a> {
                             brush_flags,
                         );
 
                         segments.push(segment);
                     }
                 }
 
                 // Build the list of image segments
-                let mut segments = vec![];
+                let mut segments = BrushSegmentVec::new();
 
                 // Top left
                 add_segment(
                     &mut segments,
                     LayoutRect::from_floats(tl_outer.x, tl_outer.y, tl_inner.x, tl_inner.y),
                     TexelRect::new(px0, py0, px1, py1),
                     RepeatMode::Stretch,
                     RepeatMode::Stretch
@@ -2038,30 +2048,32 @@ impl<'a> DisplayListFlattener<'a> {
         );
     }
 
     pub fn add_yuv_image(
         &mut self,
         clip_and_scroll: ScrollNodeAndClipChain,
         info: &LayoutPrimitiveInfo,
         yuv_data: YuvData,
+        color_depth: ColorDepth,
         color_space: YuvColorSpace,
         image_rendering: ImageRendering,
     ) {
         let format = yuv_data.get_format();
         let yuv_key = match yuv_data {
             YuvData::NV12(plane_0, plane_1) => [plane_0, plane_1, ImageKey::DUMMY],
             YuvData::PlanarYCbCr(plane_0, plane_1, plane_2) => [plane_0, plane_1, plane_2],
             YuvData::InterleavedYCbCr(plane_0) => [plane_0, ImageKey::DUMMY, ImageKey::DUMMY],
         };
 
         let prim = BrushPrimitive::new(
             BrushKind::YuvImage {
                 yuv_key,
                 format,
+                color_depth,
                 color_space,
                 image_rendering,
             },
             None,
         );
 
         self.add_primitive(
             clip_and_scroll,
--- a/gfx/webrender/src/frame_builder.rs
+++ b/gfx/webrender/src/frame_builder.rs
@@ -8,22 +8,23 @@ use api::{LayoutPoint, LayoutRect, Layou
 use clip::{ClipDataStore, ClipStore};
 use clip_scroll_tree::{ClipScrollTree, ROOT_SPATIAL_NODE_INDEX, SpatialNodeIndex};
 use display_list_flattener::{DisplayListFlattener};
 use gpu_cache::GpuCache;
 use gpu_types::{PrimitiveHeaders, TransformPalette, UvRectKind};
 use hit_test::{HitTester, HitTestingRun};
 use internal_types::{FastHashMap};
 use picture::{PictureCompositeMode, PictureSurface, RasterConfig};
-use prim_store::{PrimitiveIndex, PrimitiveRun, PrimitiveStore, SpaceMapper};
+use prim_store::{PrimitiveIndex, PrimitiveStore, SpaceMapper};
 use profiler::{FrameProfileCounters, GpuCacheProfileCounters, TextureCacheProfileCounters};
 use render_backend::FrameId;
 use render_task::{RenderTask, RenderTaskId, RenderTaskLocation, RenderTaskTree};
 use resource_cache::{ResourceCache};
 use scene::{ScenePipeline, SceneProperties};
+use segment::SegmentBuilder;
 use spatial_node::SpatialNode;
 use std::f32;
 use std::sync::Arc;
 use tiling::{Frame, RenderPass, RenderPassKind, RenderTargetContext};
 use tiling::{ScrollbarPrimitive, SpecialRenderPasses};
 use util;
 
 
@@ -80,21 +81,21 @@ pub struct FrameBuildingState<'a> {
     pub render_tasks: &'a mut RenderTaskTree,
     pub profile_counters: &'a mut FrameProfileCounters,
     pub clip_store: &'a mut ClipStore,
     pub resource_cache: &'a mut ResourceCache,
     pub gpu_cache: &'a mut GpuCache,
     pub special_render_passes: &'a mut SpecialRenderPasses,
     pub transforms: &'a mut TransformPalette,
     pub clip_data_store: &'a mut ClipDataStore,
+    pub segment_builder: SegmentBuilder,
 }
 
 pub struct PictureContext {
     pub pipeline_id: PipelineId,
-    pub prim_runs: Vec<PrimitiveRun>,
     pub apply_local_clip_rect: bool,
     pub inflation_factor: f32,
     pub allow_subpixel_aa: bool,
     pub is_passthrough: bool,
     pub establishes_raster_root: bool,
     pub raster_space: RasterSpace,
 }
 
@@ -219,51 +220,54 @@ impl FrameBuilder {
             render_tasks,
             profile_counters,
             clip_store: &mut self.clip_store,
             resource_cache,
             gpu_cache,
             special_render_passes,
             transforms: transform_palette,
             clip_data_store,
+            segment_builder: SegmentBuilder::new(),
         };
 
         let prim_context = PrimitiveContext::new(
             &clip_scroll_tree.spatial_nodes[root_spatial_node_index.0],
             root_spatial_node_index,
         );
 
-        let (pic_context, mut pic_state) = self
+        let (pic_context, mut pic_state, mut instances) = self
             .prim_store
             .get_pic_mut(root_prim_index)
             .take_context(
                 &prim_context,
                 root_spatial_node_index,
                 root_spatial_node_index,
                 true,
                 &mut frame_state,
                 &frame_context,
                 false,
             )
             .unwrap();
 
         let mut pic_rect = PictureRect::zero();
 
-        self.prim_store.prepare_prim_runs(
+        self.prim_store.prepare_primitives(
+            &mut instances,
             &pic_context,
             &mut pic_state,
             &frame_context,
             &mut frame_state,
             &mut pic_rect,
         );
 
         let pic = self
             .prim_store
             .get_pic_mut(root_prim_index);
         pic.restore_context(
+            instances,
             pic_context,
             pic_state,
             Some(pic_rect),
             &mut frame_state,
         );
 
         let pic_state = pic.take_state();
 
--- a/gfx/webrender/src/gpu_types.rs
+++ b/gfx/webrender/src/gpu_types.rs
@@ -75,16 +75,25 @@ pub enum BlurDirection {
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct BlurInstance {
     pub task_address: RenderTaskAddress,
     pub src_task_address: RenderTaskAddress,
     pub blur_direction: BlurDirection,
 }
 
+#[derive(Debug)]
+#[repr(C)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ScalingInstance {
+    pub task_address: RenderTaskAddress,
+    pub src_task_address: RenderTaskAddress,
+}
+
 #[derive(Debug, Copy, Clone, PartialEq)]
 #[repr(C)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub enum BorderSegment {
     TopLeft,
     TopRight,
     BottomRight,
@@ -135,17 +144,17 @@ pub struct ClipMaskBorderCornerDotDash {
     pub clip_mask_instance: ClipMaskInstance,
     pub dot_dash_data: [f32; 8],
 }
 
 // 16 bytes per instance should be enough for anyone!
 #[derive(Debug, Clone)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct PrimitiveInstance {
+pub struct PrimitiveInstanceData {
     data: [i32; 4],
 }
 
 #[derive(Debug, Copy, Clone)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct PrimitiveHeaderIndex(pub i32);
 
@@ -246,18 +255,18 @@ impl GlyphInstance {
         GlyphInstance {
             prim_header_index,
         }
     }
 
     // TODO(gw): Some of these fields can be moved to the primitive
     //           header since they are constant, and some can be
     //           compressed to a smaller size.
-    pub fn build(&self, data0: i32, data1: i32, data2: i32) -> PrimitiveInstance {
-        PrimitiveInstance {
+    pub fn build(&self, data0: i32, data1: i32, data2: i32) -> PrimitiveInstanceData {
+        PrimitiveInstanceData {
             data: [
                 self.prim_header_index.0 as i32,
                 data0,
                 data1,
                 data2,
             ],
         }
     }
@@ -278,19 +287,19 @@ impl SplitCompositeInstance {
         SplitCompositeInstance {
             prim_header_index,
             polygons_address,
             z,
         }
     }
 }
 
-impl From<SplitCompositeInstance> for PrimitiveInstance {
+impl From<SplitCompositeInstance> for PrimitiveInstanceData {
     fn from(instance: SplitCompositeInstance) -> Self {
-        PrimitiveInstance {
+        PrimitiveInstanceData {
             data: [
                 instance.prim_header_index.0,
                 instance.polygons_address.as_int(),
                 instance.z.0,
                 0,
             ],
         }
     }
@@ -319,19 +328,19 @@ bitflags! {
 pub struct BrushInstance {
     pub prim_header_index: PrimitiveHeaderIndex,
     pub clip_task_address: RenderTaskAddress,
     pub segment_index: i32,
     pub edge_flags: EdgeAaSegmentMask,
     pub brush_flags: BrushFlags,
 }
 
-impl From<BrushInstance> for PrimitiveInstance {
+impl From<BrushInstance> for PrimitiveInstanceData {
     fn from(instance: BrushInstance) -> Self {
-        PrimitiveInstance {
+        PrimitiveInstanceData {
             data: [
                 instance.prim_header_index.0,
                 instance.clip_task_address.0 as i32,
                 instance.segment_index |
                 ((instance.edge_flags.bits() as i32) << 16) |
                 ((instance.brush_flags.bits() as i32) << 24),
                 0,
             ]
--- a/gfx/webrender/src/internal_types.rs
+++ b/gfx/webrender/src/internal_types.rs
@@ -146,17 +146,17 @@ pub enum DebugOutput {
 
 pub enum ResultMsg {
     DebugCommand(DebugCommand),
     DebugOutput(DebugOutput),
     RefreshShader(PathBuf),
     UpdateGpuCache(GpuCacheUpdateList),
     UpdateResources {
         updates: TextureUpdateList,
-        cancel_rendering: bool,
+        memory_pressure: bool,
     },
     PublishPipelineInfo(PipelineInfo),
     PublishDocument(
         DocumentId,
         RenderedDocument,
         TextureUpdateList,
         BackendProfileCounters,
     ),
--- a/gfx/webrender/src/picture.rs
+++ b/gfx/webrender/src/picture.rs
@@ -9,17 +9,17 @@ use api::{PicturePixel, RasterPixel, Wor
 use box_shadow::{BLUR_SAMPLE_SCALE};
 use clip::ClipNodeCollector;
 use clip_scroll_tree::{ROOT_SPATIAL_NODE_INDEX, SpatialNodeIndex};
 use euclid::TypedScale;
 use frame_builder::{FrameBuildingContext, FrameBuildingState, PictureState};
 use frame_builder::{PictureContext, PrimitiveContext};
 use gpu_cache::{GpuCacheHandle};
 use gpu_types::UvRectKind;
-use prim_store::{PrimitiveIndex, PrimitiveRun, SpaceMapper};
+use prim_store::{PrimitiveIndex, PrimitiveInstance, SpaceMapper};
 use prim_store::{PrimitiveMetadata, get_raster_rects};
 use render_task::{ClearMode, RenderTask, RenderTaskCacheEntryHandle};
 use render_task::{RenderTaskCacheKey, RenderTaskCacheKeyKind, RenderTaskId, RenderTaskLocation};
 use scene::{FilterOpHelpers, SceneProperties};
 use std::mem;
 use tiling::RenderTargetKind;
 use util::{TransformedRectKind, MatrixHelpers, MaxRect};
 
@@ -152,17 +152,17 @@ pub struct PictureCacheKey {
     // happen, for example, during zooming or changes
     // in device-pixel-ratio.
     unclipped_size: DeviceIntSize,
 }
 
 #[derive(Debug)]
 pub struct PicturePrimitive {
     // List of primitive runs that make up this picture.
-    pub runs: Vec<PrimitiveRun>,
+    pub prim_instances: Vec<PrimitiveInstance>,
     pub state: Option<PictureState>,
 
     // The pipeline that the primitives on this picture belong to.
     pub pipeline_id: PipelineId,
 
     // If true, apply the local clip rect to primitive drawn
     // in this picture.
     pub apply_local_clip_rect: bool,
@@ -219,17 +219,17 @@ impl PicturePrimitive {
         requested_composite_mode: Option<PictureCompositeMode>,
         is_in_3d_context: bool,
         pipeline_id: PipelineId,
         frame_output_pipeline_id: Option<PipelineId>,
         apply_local_clip_rect: bool,
         requested_raster_space: RasterSpace,
     ) -> Self {
         PicturePrimitive {
-            runs: Vec::new(),
+            prim_instances: Vec::new(),
             state: None,
             secondary_render_task_id: None,
             requested_composite_mode,
             raster_config: None,
             is_in_3d_context,
             frame_output_pipeline_id,
             extra_gpu_data_handle: GpuCacheHandle::new(),
             apply_local_clip_rect,
@@ -243,17 +243,17 @@ impl PicturePrimitive {
         &mut self,
         prim_context: &PrimitiveContext,
         surface_spatial_node_index: SpatialNodeIndex,
         raster_spatial_node_index: SpatialNodeIndex,
         parent_allows_subpixel_aa: bool,
         frame_state: &mut FrameBuildingState,
         frame_context: &FrameBuildingContext,
         is_chased: bool,
-    ) -> Option<(PictureContext, PictureState)> {
+    ) -> Option<(PictureContext, PictureState, Vec<PrimitiveInstance>)> {
         if !self.resolve_scene_properties(frame_context.scene_properties) {
             if cfg!(debug_assertions) && is_chased {
                 println!("\tculled for carrying an invisible composite filter");
             }
 
             return None;
         }
 
@@ -352,53 +352,48 @@ impl PicturePrimitive {
             }
             _ => {
                 0.0
             }
         };
 
         let context = PictureContext {
             pipeline_id: self.pipeline_id,
-            prim_runs: mem::replace(&mut self.runs, Vec::new()),
             apply_local_clip_rect: self.apply_local_clip_rect,
             inflation_factor,
             allow_subpixel_aa,
             is_passthrough: self.raster_config.is_none(),
             establishes_raster_root,
             raster_space,
         };
 
-        Some((context, state))
+        let instances = mem::replace(&mut self.prim_instances, Vec::new());
+
+        Some((context, state, instances))
     }
 
     pub fn add_primitive(
         &mut self,
         prim_index: PrimitiveIndex,
     ) {
-        if let Some(ref mut run) = self.runs.last_mut() {
-            if run.base_prim_index.0 + run.count == prim_index.0 {
-                run.count += 1;
-                return;
-            }
-        }
-
-        self.runs.push(PrimitiveRun {
-            base_prim_index: prim_index,
-            count: 1,
+        self.prim_instances.push(PrimitiveInstance {
+            prim_index,
+            combined_local_clip_rect: LayoutRect::zero(),
         });
     }
 
     pub fn restore_context(
         &mut self,
+        prim_instances: Vec<PrimitiveInstance>,
         context: PictureContext,
         state: PictureState,
         local_rect: Option<PictureRect>,
         frame_state: &mut FrameBuildingState,
     ) -> (LayoutRect, Option<ClipNodeCollector>) {
-        self.runs = context.prim_runs;
+        self.prim_instances = prim_instances;
         self.state = Some(state);
 
         let local_rect = match local_rect {
             Some(local_rect) => {
                 let local_content_rect = LayoutRect::from_untyped(&local_rect.to_untyped());
 
                 match self.raster_config {
                     Some(RasterConfig { composite_mode: PictureCompositeMode::Filter(FilterOp::Blur(blur_radius)), .. }) => {
--- a/gfx/webrender/src/prim_store.rs
+++ b/gfx/webrender/src/prim_store.rs
@@ -3,17 +3,17 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{AlphaType, BorderRadius, BuiltDisplayList, ClipMode, ColorF, PictureRect};
 use api::{DeviceIntRect, DeviceIntSize, DevicePixelScale, ExtendMode, DeviceRect, PictureToRasterTransform};
 use api::{FilterOp, GlyphInstance, GradientStop, ImageKey, ImageRendering, ItemRange, ItemTag, TileOffset};
 use api::{RasterSpace, LayoutPoint, LayoutRect, LayoutSideOffsets, LayoutSize, LayoutToWorldTransform};
 use api::{LayoutVector2D, PremultipliedColorF, PropertyBinding, Shadow, YuvColorSpace, YuvFormat};
 use api::{DeviceIntSideOffsets, WorldPixel, BoxShadowClipMode, LayoutToWorldScale, NormalBorder, WorldRect};
-use api::{PicturePixel, RasterPixel};
+use api::{PicturePixel, RasterPixel, ColorDepth};
 use app_units::Au;
 use border::{BorderCacheKey, BorderRenderTaskInfo};
 use clip_scroll_tree::{ClipScrollTree, CoordinateSystemId, SpatialNodeIndex};
 use clip::{ClipNodeFlags, ClipChainId, ClipChainInstance, ClipItem, ClipNodeCollector};
 use euclid::{TypedTransform3D, TypedRect};
 use frame_builder::{FrameBuildingContext, FrameBuildingState, PictureContext, PictureState};
 use frame_builder::PrimitiveContext;
 use glyph_rasterizer::{FontInstance, FontTransform, GlyphKey, FONT_SIZE_LIMIT};
@@ -24,19 +24,19 @@ use image::{for_each_tile, for_each_repe
 use picture::{PictureCompositeMode, PicturePrimitive};
 #[cfg(debug_assertions)]
 use render_backend::FrameId;
 use render_task::{BlitSource, RenderTask, RenderTaskCacheKey};
 use render_task::{RenderTaskCacheKeyKind, RenderTaskId, RenderTaskCacheEntryHandle};
 use renderer::{MAX_VERTEX_TEXTURE_WIDTH};
 use resource_cache::{ImageProperties, ImageRequest, ResourceCache};
 use scene::SceneProperties;
-use segment::SegmentBuilder;
 use std::{cmp, fmt, mem, usize};
 use util::{ScaleOffset, MatrixHelpers, pack_as_float, project_rect, raster_rect_to_device_pixels};
+use smallvec::SmallVec;
 
 
 const MIN_BRUSH_SPLIT_AREA: f32 = 256.0 * 256.0;
 pub const VECS_PER_SEGMENT: usize = 2;
 
 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
 pub struct ScrollNodeAndClipChain {
     pub spatial_node_index: SpatialNodeIndex,
@@ -50,33 +50,16 @@ impl ScrollNodeAndClipChain {
     ) -> Self {
         ScrollNodeAndClipChain {
             spatial_node_index,
             clip_chain_id,
         }
     }
 }
 
-#[derive(Debug)]
-pub struct PrimitiveRun {
-    pub base_prim_index: PrimitiveIndex,
-    pub count: usize,
-}
-
-impl PrimitiveRun {
-    pub fn is_chasing(&self, index: Option<PrimitiveIndex>) -> bool {
-        match index {
-            Some(id) if cfg!(debug_assertions) => {
-                self.base_prim_index <= id && id.0 < self.base_prim_index.0 + self.count
-            }
-            _ => false,
-        }
-    }
-}
-
 #[derive(Debug, Copy, Clone)]
 pub struct PrimitiveOpacity {
     pub is_opaque: bool,
 }
 
 impl PrimitiveOpacity {
     pub fn opaque() -> PrimitiveOpacity {
         PrimitiveOpacity { is_opaque: true }
@@ -243,17 +226,17 @@ impl GpuCacheHandle {
     pub fn as_int(&self, gpu_cache: &GpuCache) -> i32 {
         gpu_cache.get_address(self).as_int()
     }
 }
 
 impl GpuCacheAddress {
     pub fn as_int(&self) -> i32 {
         // TODO(gw): Temporarily encode GPU Cache addresses as a single int.
-        //           In the future, we can change the PrimitiveInstance struct
+        //           In the future, we can change the PrimitiveInstanceData struct
         //           to use 2x u16 for the vertex attribute instead of an i32.
         self.v as i32 * MAX_VERTEX_TEXTURE_WIDTH as i32 + self.u as i32
     }
 }
 
 // TODO(gw): Pack the fields here better!
 #[derive(Debug)]
 pub struct PrimitiveMetadata {
@@ -264,20 +247,16 @@ pub struct PrimitiveMetadata {
     pub clip_task_id: Option<RenderTaskId>,
 
     // TODO(gw): In the future, we should just pull these
     //           directly from the DL item, instead of
     //           storing them here.
     pub local_rect: LayoutRect,
     pub local_clip_rect: LayoutRect,
 
-    // The current combined local clip for this primitive, from
-    // the primitive local clip above and the current clip chain.
-    pub combined_local_clip_rect: LayoutRect,
-
     pub is_backface_visible: bool,
     pub clipped_world_rect: Option<WorldRect>,
 
     /// A tag used to identify this primitive outside of WebRender. This is
     /// used for returning useful data during hit testing.
     pub tag: Option<ItemTag>,
 
     /// The last frame ID (of the `RenderTaskTree`) this primitive
@@ -372,16 +351,17 @@ pub enum BrushKind {
         source: ImageSource,
         sub_rect: Option<DeviceIntRect>,
         opacity_binding: OpacityBinding,
         visible_tiles: Vec<VisibleImageTile>,
     },
     YuvImage {
         yuv_key: [ImageKey; 3],
         format: YuvFormat,
+        color_depth: ColorDepth,
         color_space: YuvColorSpace,
         image_rendering: ImageRendering,
     },
     RadialGradient {
         stops_handle: GpuCacheHandle,
         stops_range: ItemRange<GradientStop>,
         extend_mode: ExtendMode,
         center: LayoutPoint,
@@ -536,19 +516,21 @@ impl BrushSegment {
             may_need_clip_mask,
             edge_flags,
             extra_data,
             brush_flags,
         }
     }
 }
 
+pub type BrushSegmentVec = SmallVec<[BrushSegment; 8]>;
+
 #[derive(Debug)]
 pub struct BrushSegmentDescriptor {
-    pub segments: Vec<BrushSegment>,
+    pub segments: BrushSegmentVec,
 }
 
 #[derive(Debug)]
 pub struct BrushPrimitive {
     pub kind: BrushKind,
     pub segment_desc: Option<BrushSegmentDescriptor>,
 }
 
@@ -596,17 +578,24 @@ impl BrushPrimitive {
                 request.push(PremultipliedColorF::WHITE);
                 request.push([
                     local_rect.size.width,
                     local_rect.size.height,
                     0.0,
                     0.0,
                 ]);
             }
-            BrushKind::YuvImage { .. } => {}
+            BrushKind::YuvImage { color_depth, .. } => {
+                request.push([
+                    color_depth.rescaling_factor(),
+                    0.0,
+                    0.0,
+                    0.0
+                ]);
+            }
             BrushKind::Picture { .. } => {
                 request.push(PremultipliedColorF::WHITE);
                 request.push(PremultipliedColorF::WHITE);
                 request.push([
                     local_rect.size.width,
                     local_rect.size.height,
                     0.0,
                     0.0,
@@ -1367,16 +1356,25 @@ impl Primitive {
             PrimitiveDetails::Brush(BrushPrimitive { kind: BrushKind::Picture(ref mut pic), .. }) => pic,
             _ => {
                 panic!("bug: not a picture!");
             }
         }
     }
 }
 
+#[derive(Debug)]
+pub struct PrimitiveInstance {
+    pub prim_index: PrimitiveIndex,
+
+    // The current combined local clip for this primitive, from
+    // the primitive local clip above and the current clip chain.
+    pub combined_local_clip_rect: LayoutRect,
+}
+
 pub struct PrimitiveStore {
     pub primitives: Vec<Primitive>,
 
     /// A primitive index to chase through debugging.
     pub chase_id: Option<PrimitiveIndex>,
 }
 
 impl PrimitiveStore {
@@ -1409,17 +1407,16 @@ impl PrimitiveStore {
 
         let base_metadata = PrimitiveMetadata {
             clip_chain_id,
             gpu_location: GpuCacheHandle::new(),
             clip_task_id: None,
             spatial_node_index,
             local_rect: *local_rect,
             local_clip_rect: *local_clip_rect,
-            combined_local_clip_rect: *local_clip_rect,
             is_backface_visible,
             clipped_world_rect: None,
             tag,
             opacity: PrimitiveOpacity::translucent(),
             #[cfg(debug_assertions)]
             prepared_frame_id: FrameId(0),
         };
 
@@ -1476,52 +1473,48 @@ impl PrimitiveStore {
 
         PrimitiveIndex(prim_index)
     }
 
     // Internal method that retrieves the primitive index of a primitive
     // that can be the target for collapsing parent opacity filters into.
     fn get_opacity_collapse_prim(
         &self,
-        prim_index: PrimitiveIndex,
+        pic_prim_index: PrimitiveIndex,
     ) -> Option<PrimitiveIndex> {
-        let pic = self.get_pic(prim_index);
+        let pic = self.get_pic(pic_prim_index);
 
         // We can only collapse opacity if there is a single primitive, otherwise
         // the opacity needs to be applied to the primitives as a group.
-        if pic.runs.len() != 1 {
+        if pic.prim_instances.len() != 1 {
             return None;
         }
 
-        let run = &pic.runs[0];
-        if run.count != 1 {
-            return None;
-        }
-
-        let prim = &self.primitives[run.base_prim_index.0];
+        let prim_instance = &pic.prim_instances[0];
+        let prim = &self.primitives[prim_instance.prim_index.0];
 
         // For now, we only support opacity collapse on solid rects and images.
         // This covers the most common types of opacity filters that can be
         // handled by this optimization. In the future, we can easily extend
         // this to other primitives, such as text runs and gradients.
         match prim.details {
             PrimitiveDetails::Brush(ref brush) => {
                 match brush.kind {
                     BrushKind::Picture(ref pic) => {
                         // If we encounter a picture that is a pass-through
                         // (i.e. no composite mode), then we can recurse into
                         // that to try and find a primitive to collapse to.
                         if pic.requested_composite_mode.is_none() {
-                            return self.get_opacity_collapse_prim(run.base_prim_index);
+                            return self.get_opacity_collapse_prim(prim_instance.prim_index);
                         }
                     }
                     // If we find a single rect or image, we can use that
                     // as the primitive to collapse the opacity into.
                     BrushKind::Solid { .. } | BrushKind::Image { .. } => {
-                        return Some(run.base_prim_index)
+                        return Some(prim_instance.prim_index)
                     }
                     BrushKind::Border { .. } |
                     BrushKind::YuvImage { .. } |
                     BrushKind::LinearGradient { .. } |
                     BrushKind::RadialGradient { .. } |
                     BrushKind::Clear => {}
                 }
             }
@@ -1592,33 +1585,33 @@ impl PrimitiveStore {
     }
 
     pub fn prim_count(&self) -> usize {
         self.primitives.len()
     }
 
     pub fn prepare_prim_for_render(
         &mut self,
-        prim_index: PrimitiveIndex,
+        prim_instance: &mut PrimitiveInstance,
         prim_context: &PrimitiveContext,
         pic_context: &PictureContext,
         pic_state: &mut PictureState,
         frame_context: &FrameBuildingContext,
         frame_state: &mut FrameBuildingState,
         display_list: &BuiltDisplayList,
         is_chased: bool,
         current_pic_rect: &mut PictureRect,
     ) -> bool {
         // If we have dependencies, we need to prepare them first, in order
         // to know the actual rect of this primitive.
         // For example, scrolling may affect the location of an item in
         // local space, which may force us to render this item on a larger
         // picture target, if being composited.
         let pic_info = {
-            match self.primitives[prim_index.0].details {
+            match self.primitives[prim_instance.prim_index.0].details {
                 PrimitiveDetails::Brush(BrushPrimitive { kind: BrushKind::Picture(ref mut pic), .. }) => {
                     match pic.take_context(
                         prim_context,
                         pic_state.surface_spatial_node_index,
                         pic_state.raster_spatial_node_index,
                         pic_context.allow_subpixel_aa,
                         frame_state,
                         frame_context,
@@ -1631,24 +1624,25 @@ impl PrimitiveStore {
                 PrimitiveDetails::Brush(_) |
                 PrimitiveDetails::TextRun(..) => {
                     None
                 }
             }
         };
 
         let (is_passthrough, clip_node_collector) = match pic_info {
-            Some((pic_context_for_children, mut pic_state_for_children)) => {
+            Some((pic_context_for_children, mut pic_state_for_children, mut prim_instances)) => {
                 // Mark whether this picture has a complex coordinate system.
                 let is_passthrough = pic_context_for_children.is_passthrough;
                 pic_state_for_children.has_non_root_coord_system |=
                     prim_context.spatial_node.coordinate_system_id != CoordinateSystemId::root();
 
                 let mut pic_rect = PictureRect::zero();
-                self.prepare_prim_runs(
+                self.prepare_primitives(
+                    &mut prim_instances,
                     &pic_context_for_children,
                     &mut pic_state_for_children,
                     frame_context,
                     frame_state,
                     &mut pic_rect,
                 );
 
                 let pic_rect = if is_passthrough {
@@ -1658,20 +1652,21 @@ impl PrimitiveStore {
                     Some(pic_rect)
                 };
 
                 if !pic_state_for_children.is_cacheable {
                   pic_state.is_cacheable = false;
                 }
 
                 // Restore the dependencies (borrow check dance)
-                let prim = &mut self.primitives[prim_index.0];
+                let prim = &mut self.primitives[prim_instance.prim_index.0];
                 let (new_local_rect, clip_node_collector) = prim
                     .as_pic_mut()
                     .restore_context(
+                        prim_instances,
                         pic_context_for_children,
                         pic_state_for_children,
                         pic_rect,
                         frame_state,
                     );
 
                 if new_local_rect != prim.metadata.local_rect {
                     prim.metadata.local_rect = new_local_rect;
@@ -1681,17 +1676,17 @@ impl PrimitiveStore {
 
                 (is_passthrough, clip_node_collector)
             }
             None => {
                 (false, None)
             }
         };
 
-        let prim = &mut self.primitives[prim_index.0];
+        let prim = &mut self.primitives[prim_instance.prim_index.0];
 
         if !prim.is_cacheable(frame_state.resource_cache) {
             pic_state.is_cacheable = false;
         }
 
         if is_passthrough {
             prim.metadata.clipped_world_rect = Some(pic_state.map_pic_to_world.bounds);
         } else {
@@ -1753,17 +1748,17 @@ impl PrimitiveStore {
                 println!("\teffective clip chain from {:?} {}",
                     clip_chain.clips_range,
                     if pic_context.apply_local_clip_rect { "(applied)" } else { "" },
                 );
             }
 
             pic_state.has_non_root_coord_system |= clip_chain.has_non_root_coord_system;
 
-            prim.metadata.combined_local_clip_rect = if pic_context.apply_local_clip_rect {
+            prim_instance.combined_local_clip_rect = if pic_context.apply_local_clip_rect {
                 clip_chain.local_clip_rect
             } else {
                 prim.metadata.local_clip_rect
             };
 
             let pic_rect = match pic_state.map_local_to_pic
                                           .map(&prim.metadata.local_rect) {
                 Some(pic_rect) => pic_rect,
@@ -1810,17 +1805,17 @@ impl PrimitiveStore {
             if cfg!(debug_assertions) && is_chased {
                 println!("\tconsidered visible and ready with local rect {:?}", local_rect);
             }
 
             *current_pic_rect = current_pic_rect.union(&pic_rect);
         }
 
         prim.prepare_prim_for_render_inner(
-            prim_index,
+            prim_instance,
             prim_context,
             pic_context,
             pic_state,
             frame_context,
             frame_state,
             display_list,
             is_chased,
         );
@@ -1831,99 +1826,98 @@ impl PrimitiveStore {
     // TODO(gw): Make this simpler / more efficient by tidying
     //           up the logic that early outs from prepare_prim_for_render.
     pub fn reset_prim_visibility(&mut self) {
         for prim in &mut self.primitives {
             prim.metadata.clipped_world_rect = None;
         }
     }
 
-    pub fn prepare_prim_runs(
+    pub fn prepare_primitives(
         &mut self,
+        prim_instances: &mut Vec<PrimitiveInstance>,
         pic_context: &PictureContext,
         pic_state: &mut PictureState,
         frame_context: &FrameBuildingContext,
         frame_state: &mut FrameBuildingState,
         current_pic_rect: &mut PictureRect,
     ) {
         let display_list = &frame_context
             .pipelines
             .get(&pic_context.pipeline_id)
             .expect("No display list?")
             .display_list;
 
-        for run in &pic_context.prim_runs {
-            if run.is_chasing(self.chase_id) {
-                println!("\tpreparing a run of length {} in pipeline {:?}",
-                    run.count, pic_context.pipeline_id);
+        for prim_instance in prim_instances {
+            let prim_index = prim_instance.prim_index;
+            let is_chased = Some(prim_index) == self.chase_id;
+
+            if is_chased {
+                println!("\tpreparing prim {:?} in pipeline {:?}",
+                    prim_instance.prim_index, pic_context.pipeline_id);
             }
 
-            for i in 0 .. run.count {
-                let prim_index = PrimitiveIndex(run.base_prim_index.0 + i);
-                let is_chased = Some(prim_index) == self.chase_id;
+            // TODO(gw): These workarounds for borrowck are unfortunate. We
+            //           should see if we can re-structure these to avoid so
+            //           many special borrow blocks.
+            let (spatial_node_index, is_backface_visible) = {
+                let prim = &self.primitives[prim_instance.prim_index.0];
+                (prim.metadata.spatial_node_index, prim.metadata.is_backface_visible)
+            };
 
-                // TODO(gw): These workarounds for borrowck are unfortunate. We
-                //           should see if we can re-structure these to avoid so
-                //           many special borrow blocks.
-                let (spatial_node_index, is_backface_visible) = {
-                    let prim = &self.primitives[prim_index.0];
-                    (prim.metadata.spatial_node_index, prim.metadata.is_backface_visible)
-                };
-
-                let spatial_node = &frame_context
-                    .clip_scroll_tree
-                    .spatial_nodes[spatial_node_index.0];
+            let spatial_node = &frame_context
+                .clip_scroll_tree
+                .spatial_nodes[spatial_node_index.0];
 
-                // TODO(gw): Although constructing these is cheap, they are often
-                //           the same for many consecutive primitives, so it may
-                //           be worth caching the most recent context.
-                let prim_context = PrimitiveContext::new(
-                    spatial_node,
-                    spatial_node_index,
-                );
+            // TODO(gw): Although constructing these is cheap, they are often
+            //           the same for many consecutive primitives, so it may
+            //           be worth caching the most recent context.
+            let prim_context = PrimitiveContext::new(
+                spatial_node,
+                spatial_node_index,
+            );
 
-                // Do some basic checks first, that can early out
-                // without even knowing the local rect.
-                if !is_backface_visible && spatial_node.world_content_transform.is_backface_visible() {
-                    if cfg!(debug_assertions) && is_chased {
-                        println!("\tculled for not having visible back faces");
-                    }
-                    continue;
+            // Do some basic checks first, that can early out
+            // without even knowing the local rect.
+            if !is_backface_visible && spatial_node.world_content_transform.is_backface_visible() {
+                if cfg!(debug_assertions) && is_chased {
+                    println!("\tculled for not having visible back faces");
                 }
+                continue;
+            }
 
-                if !spatial_node.invertible {
-                    if cfg!(debug_assertions) && is_chased {
-                        println!("\tculled for the scroll node transform being invertible");
-                    }
-                    continue;
+            if !spatial_node.invertible {
+                if cfg!(debug_assertions) && is_chased {
+                    println!("\tculled for the scroll node transform being invertible");
                 }
+                continue;
+            }
 
-                // Mark whether this picture contains any complex coordinate
-                // systems, due to either the scroll node or the clip-chain.
-                pic_state.has_non_root_coord_system |=
-                    spatial_node.coordinate_system_id != CoordinateSystemId::root();
+            // Mark whether this picture contains any complex coordinate
+            // systems, due to either the scroll node or the clip-chain.
+            pic_state.has_non_root_coord_system |=
+                spatial_node.coordinate_system_id != CoordinateSystemId::root();
 
-                pic_state.map_local_to_pic.set_target_spatial_node(
-                    spatial_node_index,
-                    frame_context.clip_scroll_tree,
-                );
+            pic_state.map_local_to_pic.set_target_spatial_node(
+                spatial_node_index,
+                frame_context.clip_scroll_tree,
+            );
 
-                if self.prepare_prim_for_render(
-                    prim_index,
-                    &prim_context,
-                    pic_context,
-                    pic_state,
-                    frame_context,
-                    frame_state,
-                    display_list,
-                    is_chased,
-                    current_pic_rect,
-                ) {
-                    frame_state.profile_counters.visible_primitives.inc();
-                }
+            if self.prepare_prim_for_render(
+                prim_instance,
+                &prim_context,
+                pic_context,
+                pic_state,
+                frame_context,
+                frame_state,
+                display_list,
+                is_chased,
+                current_pic_rect,
+            ) {
+                frame_state.profile_counters.visible_primitives.inc();
             }
         }
     }
 }
 
 fn build_gradient_stops_request(
     stops_handle: &mut GpuCacheHandle,
     stops_range: ItemRange<GradientStop>,
@@ -1940,29 +1934,30 @@ fn build_gradient_stops_request(
             reverse_stops,
             &mut request,
         );
     }
 }
 
 fn decompose_repeated_primitive(
     visible_tiles: &mut Vec<VisibleGradientTile>,
+    instance: &PrimitiveInstance,
     metadata: &mut PrimitiveMetadata,
     stretch_size: &LayoutSize,
     tile_spacing: &LayoutSize,
     prim_context: &PrimitiveContext,
     frame_state: &mut FrameBuildingState,
     callback: &mut FnMut(&LayoutRect, GpuDataRequest),
 ) {
     visible_tiles.clear();
 
     // Tighten the clip rect because decomposing the repeated image can
     // produce primitives that are partially covering the original image
     // rect and we want to clip these extra parts out.
-    let tight_clip_rect = metadata
+    let tight_clip_rect = instance
         .combined_local_clip_rect
         .intersection(&metadata.local_rect).unwrap();
 
     let clipped_world_rect = &metadata
         .clipped_world_rect
         .unwrap();
 
     let visible_rect = compute_conservative_visible_rect(
@@ -2074,17 +2069,18 @@ fn write_brush_segment_description(
     // skip allocating a clip mask in these cases.
     let is_large = metadata.local_rect.size.area() > MIN_BRUSH_SPLIT_AREA;
 
     // TODO(gw): We should probably detect and store this on each
     //           ClipSources instance, to avoid having to iterate
     //           the clip sources here.
     let mut rect_clips_only = true;
 
-    let mut segment_builder = SegmentBuilder::new(
+    let segment_builder = &mut frame_state.segment_builder;
+    segment_builder.initialize(
         metadata.local_rect,
         None,
         metadata.local_clip_rect
     );
 
     // Segment the primitive on all the local-space clip sources that we can.
     let mut local_clip_count = 0;
     for i in 0 .. clip_chain.clips_range.count {
@@ -2183,17 +2179,17 @@ fn write_brush_segment_description(
 
         match brush.segment_desc {
             Some(..) => panic!("bug: should not already have descriptor"),
             None => {
                 // TODO(gw): We can probably make the allocation
                 //           patterns of this and the segment
                 //           builder significantly better, by
                 //           retaining it across primitives.
-                let mut segments = Vec::new();
+                let mut segments = BrushSegmentVec::new();
 
                 segment_builder.build(|segment| {
                     segments.push(
                         BrushSegment::new(
                             segment.rect,
                             segment.has_mask,
                             segment.edge_flags,
                             [0.0; 4],
@@ -2324,17 +2320,17 @@ impl Primitive {
             PrimitiveDetails::TextRun(..) => {
                 true
             }
         }
     }
 
     fn prepare_prim_for_render_inner(
         &mut self,
-        prim_index: PrimitiveIndex,
+        prim_instance: &PrimitiveInstance,
         prim_context: &PrimitiveContext,
         pic_context: &PictureContext,
         pic_state: &mut PictureState,
         frame_context: &FrameBuildingContext,
         frame_state: &mut FrameBuildingState,
         display_list: &BuiltDisplayList,
         is_chased: bool,
     ) {
@@ -2493,17 +2489,17 @@ impl Primitive {
                             }
 
                             if let Some(tile_size) = image_properties.tiling {
                                 let device_image_size = image_properties.descriptor.size;
 
                                 // Tighten the clip rect because decomposing the repeated image can
                                 // produce primitives that are partially covering the original image
                                 // rect and we want to clip these extra parts out.
-                                let tight_clip_rect = metadata
+                                let tight_clip_rect = prim_instance
                                     .combined_local_clip_rect
                                     .intersection(&metadata.local_rect).unwrap();
 
                                 let visible_rect = compute_conservative_visible_rect(
                                     prim_context,
                                     &metadata.clipped_world_rect.unwrap(),
                                     &tight_clip_rect
                                 );
@@ -2634,16 +2630,17 @@ impl Primitive {
                             display_list,
                         );
 
                         if tile_spacing != LayoutSize::zero() {
                             is_tiled = true;
 
                             decompose_repeated_primitive(
                                 visible_tiles,
+                                prim_instance,
                                 metadata,
                                 &stretch_size,
                                 &tile_spacing,
                                 prim_context,
                                 frame_state,
                                 &mut |rect, mut request| {
                                     request.push([
                                         center.x,
@@ -2683,16 +2680,17 @@ impl Primitive {
                             display_list,
                         );
 
                         if tile_spacing != LayoutSize::zero() {
                             is_tiled = true;
 
                             decompose_repeated_primitive(
                                 visible_tiles,
+                                prim_instance,
                                 metadata,
                                 &stretch_size,
                                 &tile_spacing,
                                 prim_context,
                                 frame_state,
                                 &mut |rect, mut request| {
                                     request.push([
                                         start_point.x,
@@ -2708,17 +2706,17 @@ impl Primitive {
                                     ]);
                                     request.write_segment(*rect, [0.0; 4]);
                                 }
                             );
                         }
                     }
                     BrushKind::Picture(ref mut pic) => {
                         if !pic.prepare_for_render(
-                            prim_index,
+                            prim_instance.prim_index,
                             metadata,
                             pic_state,
                             frame_context,
                             frame_state,
                         ) {
                             metadata.clipped_world_rect = None;
                         }
                     }
@@ -2875,17 +2873,17 @@ impl Primitive {
             let scale_au = Au::from_f32_px(scale.0);
 
             // NOTE(emilio): This `needs_update` relies on the local rect for a
             // given primitive being immutable. If that changes, this code
             // should probably handle changes to it as well, retaining the old
             // size in cache_key.
             let needs_update = scale_au != cache_key.scale;
 
-            let mut new_segments = Vec::new();
+            let mut new_segments = BrushSegmentVec::new();
 
             let local_rect = &self.metadata.local_rect;
             if needs_update {
                 cache_key.scale = scale_au;
 
                 *task_info = BorderRenderTaskInfo::new(
                     local_rect,
                     border,
--- a/gfx/webrender/src/profiler.rs
+++ b/gfx/webrender/src/profiler.rs
@@ -334,24 +334,26 @@ impl FrameProfileCounters {
         self.targets_changed.reset();
         self.targets_created.reset();
     }
 }
 
 #[derive(Clone)]
 pub struct TextureCacheProfileCounters {
     pub pages_a8_linear: ResourceProfileCounter,
+    pub pages_a16_linear: ResourceProfileCounter,
     pub pages_rgba8_linear: ResourceProfileCounter,
     pub pages_rgba8_nearest: ResourceProfileCounter,
 }
 
 impl TextureCacheProfileCounters {
     pub fn new() -> Self {
         TextureCacheProfileCounters {
             pages_a8_linear: ResourceProfileCounter::new("Texture A8 cached pages"),
+            pages_a16_linear: ResourceProfileCounter::new("Texture A16 cached pages"),
             pages_rgba8_linear: ResourceProfileCounter::new("Texture RGBA8 cached pages (L)"),
             pages_rgba8_nearest: ResourceProfileCounter::new("Texture RGBA8 cached pages (N)"),
         }
     }
 }
 
 #[derive(Clone)]
 pub struct GpuCacheProfileCounters {
--- a/gfx/webrender/src/render_backend.rs
+++ b/gfx/webrender/src/render_backend.rs
@@ -742,17 +742,17 @@ impl RenderBackend {
                 // The advantage of clearing the cache completely is that it gets rid of any
                 // remaining fragmentation that could have persisted if we kept around the most
                 // recently used resources.
                 self.resource_cache.clear(ClearCache::all());
 
                 let pending_update = self.resource_cache.pending_updates();
                 let msg = ResultMsg::UpdateResources {
                     updates: pending_update,
-                    cancel_rendering: true,
+                    memory_pressure: true,
                 };
                 self.result_tx.send(msg).unwrap();
                 self.notifier.wake_up();
             }
             ApiMsg::ReportMemory(tx) => {
                 tx.send(self.report_memory()).unwrap();
             }
             ApiMsg::DebugCommand(option) => {
@@ -1335,17 +1335,17 @@ impl RenderBackend {
         if config.bits.contains(CaptureBits::FRAME) {
             // After we rendered the frames, there are pending updates to both
             // GPU cache and resources. Instead of serializing them, we are going to make sure
             // they are applied on the `Renderer` side.
             let msg_update_gpu_cache = ResultMsg::UpdateGpuCache(self.gpu_cache.extract_updates());
             self.result_tx.send(msg_update_gpu_cache).unwrap();
             let msg_update_resources = ResultMsg::UpdateResources {
                 updates: self.resource_cache.pending_updates(),
-                cancel_rendering: false,
+                memory_pressure: false,
             };
             self.result_tx.send(msg_update_resources).unwrap();
             // Save the texture/glyph/image caches.
             info!("\tresource cache");
             let caches = self.resource_cache.save_caches(&config.root);
             config.serialize(&caches, "resource_cache");
             info!("\tgpu cache");
             config.serialize(&self.gpu_cache, "gpu_cache");
--- a/gfx/webrender/src/render_task.rs
+++ b/gfx/webrender/src/render_task.rs
@@ -225,16 +225,25 @@ impl BlurTask {
     #[cfg(feature = "debugger")]
     fn print_with<T: PrintTreePrinter>(&self, pt: &mut T) {
         pt.add_item(format!("std deviation: {}", self.blur_std_deviation));
         pt.add_item(format!("target: {:?}", self.target_kind));
     }
 }
 
 #[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ScalingTask {
+    pub target_kind: RenderTargetKind,
+    pub uv_rect_handle: GpuCacheHandle,
+    uv_rect_kind: UvRectKind,
+}
+
+#[derive(Debug)]
 #[cfg(feature = "pathfinder")]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct GlyphTask {
     /// After job building, this becomes `None`.
     pub mesh: Option<Mesh>,
     pub origin: DeviceIntPoint,
     pub subpixel_offset: TypedPoint2D<f32, DevicePixel>,
@@ -290,17 +299,17 @@ pub enum RenderTaskKind {
     Picture(PictureTask),
     CacheMask(CacheMaskTask),
     ClipRegion(ClipRegionTask),
     VerticalBlur(BlurTask),
     HorizontalBlur(BlurTask),
     #[allow(dead_code)]
     Glyph(GlyphTask),
     Readback(DeviceIntRect),
-    Scaling(RenderTargetKind),
+    Scaling(ScalingTask),
     Blit(BlitTask),
     Border(BorderTask),
 }
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub enum ClearMode {
@@ -564,18 +573,19 @@ impl RenderTask {
             if adjusted_blur_target_size.width < MIN_DOWNSCALING_RT_SIZE ||
                adjusted_blur_target_size.height < MIN_DOWNSCALING_RT_SIZE {
                 break;
             }
             adjusted_blur_std_deviation *= 0.5;
             scale_factor *= 2.0;
             adjusted_blur_target_size = (blur_target_size.to_f32() / scale_factor).to_i32();
             let downscaling_task = RenderTask::new_scaling(
+                downscaling_src_task_id,
+                render_tasks,
                 target_kind,
-                downscaling_src_task_id,
                 adjusted_blur_target_size,
             );
             downscaling_src_task_id = render_tasks.add(downscaling_task);
         }
 
         let blur_task_v = RenderTask::with_dynamic_location(
             adjusted_blur_target_size,
             vec![downscaling_src_task_id],
@@ -613,24 +623,31 @@ impl RenderTask {
             RenderTaskKind::Border(BorderTask {
                 instances,
             }),
             ClearMode::Transparent,
         )
     }
 
     pub fn new_scaling(
+        src_task_id: RenderTaskId,
+        render_tasks: &mut RenderTaskTree,
         target_kind: RenderTargetKind,
-        src_task_id: RenderTaskId,
         target_size: DeviceIntSize,
     ) -> Self {
+        let uv_rect_kind = render_tasks[src_task_id].uv_rect_kind();
+
         RenderTask::with_dynamic_location(
             target_size,
             vec![src_task_id],
-            RenderTaskKind::Scaling(target_kind),
+            RenderTaskKind::Scaling(ScalingTask {
+                target_kind,
+                uv_rect_handle: GpuCacheHandle::new(),
+                uv_rect_kind,
+            }),
             match target_kind {
                 RenderTargetKind::Color => ClearMode::Transparent,
                 RenderTargetKind::Alpha => ClearMode::One,
             },
         )
     }
 
     #[cfg(feature = "pathfinder")]
@@ -654,30 +671,33 @@ impl RenderTask {
             clear_mode: ClearMode::Transparent,
             saved_index: None,
         }
     }
 
     fn uv_rect_kind(&self) -> UvRectKind {
         match self.kind {
             RenderTaskKind::CacheMask(..) |
-            RenderTaskKind::Readback(..) |
-            RenderTaskKind::Scaling(..) => {
+            RenderTaskKind::Readback(..) => {
                 unreachable!("bug: unexpected render task");
             }
 
             RenderTaskKind::Picture(ref task) => {
                 task.uv_rect_kind
             }
 
             RenderTaskKind::VerticalBlur(ref task) |
             RenderTaskKind::HorizontalBlur(ref task) => {
                 task.uv_rect_kind
             }
 
+            RenderTaskKind::Scaling(ref task) => {
+                task.uv_rect_kind
+            }
+
             RenderTaskKind::ClipRegion(..) |
             RenderTaskKind::Glyph(_) |
             RenderTaskKind::Border(..) |
             RenderTaskKind::Blit(..) => {
                 UvRectKind::Rect
             }
         }
     }
@@ -831,18 +851,18 @@ impl RenderTask {
             RenderTaskKind::HorizontalBlur(ref task_info) => {
                 task_info.target_kind
             }
 
             RenderTaskKind::Glyph(..) => {
                 RenderTargetKind::Color
             }
 
-            RenderTaskKind::Scaling(target_kind) => {
-                target_kind
+            RenderTaskKind::Scaling(ref task_info) => {
+                task_info.target_kind
             }
 
             RenderTaskKind::Border(..) |
             RenderTaskKind::Picture(..) => {
                 RenderTargetKind::Color
             }
 
             RenderTaskKind::Blit(..) => {
--- a/gfx/webrender/src/renderer.rs
+++ b/gfx/webrender/src/renderer.rs
@@ -33,29 +33,31 @@ use euclid::Transform3D;
 use frame_builder::{ChasePrimitive, FrameBuilderConfig};
 use gleam::gl;
 use glyph_rasterizer::{GlyphFormat, GlyphRasterizer};
 use gpu_cache::{GpuBlockData, GpuCacheUpdate, GpuCacheUpdateList};
 #[cfg(feature = "debug_renderer")]
 use gpu_cache::GpuDebugChunk;
 #[cfg(feature = "pathfinder")]
 use gpu_glyph_renderer::GpuGlyphRenderer;
+use gpu_types::ScalingInstance;
 use internal_types::{SourceTexture, ORTHO_FAR_PLANE, ORTHO_NEAR_PLANE, ResourceCacheError};
 use internal_types::{CacheTextureId, DebugOutput, FastHashMap, RenderedDocument, ResultMsg};
 use internal_types::{TextureUpdateList, TextureUpdateOp, TextureUpdateSource};
 use internal_types::{RenderTargetInfo, SavedTargetIndex};
 use prim_store::DeferredResolve;
 use profiler::{BackendProfileCounters, FrameProfileCounters,
                GpuProfileTag, RendererProfileCounters, RendererProfileTimers};
 use device::query::GpuProfiler;
 use rayon::{ThreadPool, ThreadPoolBuilder};
 use record::ApiRecordingReceiver;
 use render_backend::RenderBackend;
 use scene_builder::{SceneBuilder, LowPrioritySceneBuilder};
 use shade::Shaders;
+use smallvec::SmallVec;
 use render_task::{RenderTask, RenderTaskKind, RenderTaskTree};
 use resource_cache::ResourceCache;
 use util::drain_filter;
 
 use std;
 use std::cmp;
 use std::collections::VecDeque;
 use std::collections::hash_map::Entry;
@@ -66,17 +68,17 @@ use std::path::PathBuf;
 use std::rc::Rc;
 use std::sync::Arc;
 use std::sync::mpsc::{channel, Receiver, Sender};
 use std::thread;
 use texture_cache::TextureCache;
 use thread_profiler::{register_thread_with_profiler, write_profile};
 use tiling::{AlphaRenderTarget, ColorRenderTarget};
 use tiling::{BlitJob, BlitJobSource, RenderPass, RenderPassKind, RenderTargetList};
-use tiling::{Frame, RenderTarget, RenderTargetKind, ScalingInfo, TextureCacheRenderTarget};
+use tiling::{Frame, RenderTarget, RenderTargetKind, TextureCacheRenderTarget};
 #[cfg(not(feature = "pathfinder"))]
 use tiling::GlyphJob;
 use time::precise_time_ns;
 
 cfg_if! {
     if #[cfg(feature = "debugger")] {
         use serde_json;
         use debug_server::{self, DebugServer};
@@ -428,16 +430,38 @@ pub(crate) mod desc {
             VertexAttribute {
                 name: "aClipParams2",
                 count: 4,
                 kind: VertexAttributeKind::F32,
             },
         ],
     };
 
+    pub const SCALE: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[
+            VertexAttribute {
+                name: "aPosition",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aScaleRenderTaskAddress",
+                count: 1,
+                kind: VertexAttributeKind::I32,
+            },
+            VertexAttribute {
+                name: "aScaleSourceTaskAddress",
+                count: 1,
+                kind: VertexAttributeKind::I32,
+            },
+        ],
+    };
+
     pub const CLIP: VertexDescriptor = VertexDescriptor {
         vertex_attributes: &[
             VertexAttribute {
                 name: "aPosition",
                 count: 2,
                 kind: VertexAttributeKind::F32,
             },
         ],
@@ -574,16 +598,17 @@ pub(crate) mod desc {
 #[derive(Debug, Copy, Clone)]
 pub(crate) enum VertexArrayKind {
     Primitive,
     Blur,
     Clip,
     VectorStencil,
     VectorCover,
     Border,
+    Scale,
 }
 
 #[derive(Clone, Debug, PartialEq)]
 pub enum GraphicsApi {
     OpenGL,
 }
 
 #[derive(Clone, Debug)]
@@ -757,23 +782,48 @@ impl SourceTextureResolver {
     }
 
     fn begin_frame(&mut self) {
         assert!(self.cache_rgba8_texture.is_none());
         assert!(self.cache_a8_texture.is_none());
         assert!(self.saved_textures.is_empty());
     }
 
-    fn end_frame(&mut self) {
+    fn end_frame(&mut self, device: &mut Device, frame_id: FrameId) {
         // return the cached targets to the pool
         self.end_pass(None, None);
         // return the global alpha texture
         self.render_target_pool.extend(self.shared_alpha_texture.take());
         // return the saved targets as well
         self.render_target_pool.extend(self.saved_textures.drain(..));
+
+        // GC the render target pool.
+        //
+        // We use a simple scheme whereby we drop any texture that hasn't been used
+        // in the last 30 frames. This should generally prevent any sustained build-
+        // up of unused textures, unless we don't generate frames for a long period.
+        // This can happen when the window is minimized, and we probably want to
+        // flush all the WebRender caches in that case [1].
+        //
+        // [1] https://bugzilla.mozilla.org/show_bug.cgi?id=1494099
+        self.retain_targets(device, |texture| texture.used_recently(frame_id, 30));
+    }
+
+    /// Drops all targets from the render target pool that do not satisfy the predicate.
+    pub fn retain_targets<F: Fn(&Texture) -> bool>(&mut self, device: &mut Device, f: F) {
+        // We can't just use retain() because `Texture` requires manual cleanup.
+        let mut tmp = SmallVec::<[Texture; 8]>::new();
+        for target in self.render_target_pool.drain(..) {
+            if f(&target) {
+                tmp.push(target);
+            } else {
+                device.delete_texture(target);
+            }
+        }
+        self.render_target_pool.extend(tmp);
     }
 
     fn end_pass(
         &mut self,
         a8_texture: Option<ActiveTexture>,
         rgba8_texture: Option<ActiveTexture>,
     ) {
         // If we have cache textures from previous pass, return them to the pool.
@@ -867,16 +917,31 @@ impl SourceTextureResolver {
             SourceTexture::TextureCache(index) => {
                 Some(&self.cache_texture_map[index.0])
             }
             SourceTexture::RenderTaskCache(saved_index) => {
                 Some(&self.saved_textures[saved_index.0])
             }
         }
     }
+
+    fn report_memory(&self) -> MemoryReport {
+        let mut report = MemoryReport::default();
+
+        // We're reporting GPU memory rather than heap-allocations, so we don't
+        // use size_of_op.
+        for t in self.cache_texture_map.iter() {
+            report.texture_cache_textures += t.size_in_bytes();
+        }
+        for t in self.render_target_pool.iter() {
+            report.render_target_textures += t.size_in_bytes();
+        }
+
+        report
+    }
 }
 
 #[derive(Debug, Copy, Clone, PartialEq)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub enum BlendMode {
     None,
     Alpha,
@@ -1315,21 +1380,24 @@ impl LazyInitializedDebugRenderer {
 
     pub fn deinit(self, device: &mut Device) {
         if let Some(debug_renderer) = self.debug_renderer {
             debug_renderer.deinit(device);
         }
     }
 }
 
+// NB: If you add more VAOs here, be sure to deinitialize them in
+// `Renderer::deinit()` below.
 pub struct RendererVAOs {
     prim_vao: VAO,
     blur_vao: VAO,
     clip_vao: VAO,
     border_vao: VAO,
+    scale_vao: VAO,
 }
 
 /// The renderer is responsible for submitting to the GPU the work prepared by the
 /// RenderBackend.
 pub struct Renderer {
     result_rx: Receiver<ResultMsg>,
     debug_server: DebugServer,
     pub device: Device,
@@ -1621,18 +1689,18 @@ impl Renderer {
         device.update_vao_main_vertices(&prim_vao, &quad_vertices, VertexUsageHint::Static);
 
         let gpu_glyph_renderer = try!(GpuGlyphRenderer::new(&mut device,
                                                             &prim_vao,
                                                             options.precache_shaders));
 
         let blur_vao = device.create_vao_with_new_instances(&desc::BLUR, &prim_vao);
         let clip_vao = device.create_vao_with_new_instances(&desc::CLIP, &prim_vao);
-        let border_vao =
-            device.create_vao_with_new_instances(&desc::BORDER, &prim_vao);
+        let border_vao = device.create_vao_with_new_instances(&desc::BORDER, &prim_vao);
+        let scale_vao = device.create_vao_with_new_instances(&desc::SCALE, &prim_vao);
         let texture_cache_upload_pbo = device.create_pbo();
 
         let texture_resolver = SourceTextureResolver::new(&mut device);
 
         let prim_header_f_texture = VertexDataTexture::new(&mut device, ImageFormat::RGBAF32);
         let prim_header_i_texture = VertexDataTexture::new(&mut device, ImageFormat::RGBAI32);
         let transforms_texture = VertexDataTexture::new(&mut device, ImageFormat::RGBAF32);
         let render_task_texture = VertexDataTexture::new(&mut device, ImageFormat::RGBAF32);
@@ -1813,16 +1881,17 @@ impl Renderer {
             last_time: 0,
             gpu_profile,
             gpu_glyph_renderer,
             vaos: RendererVAOs {
                 prim_vao,
                 blur_vao,
                 clip_vao,
                 border_vao,
+                scale_vao,
             },
             transforms_texture,
             prim_header_i_texture,
             prim_header_f_texture,
             render_task_texture,
             pipeline_info: PipelineInfo::default(),
             dither_matrix_texture,
             external_image_handler: None,
@@ -1937,27 +2006,37 @@ impl Renderer {
                     #[cfg(feature = "debug_renderer")]
                     {
                         self.gpu_cache_debug_chunks = mem::replace(&mut list.debug_chunks, Vec::new());
                     }
                     self.pending_gpu_cache_updates.push(list);
                 }
                 ResultMsg::UpdateResources {
                     updates,
-                    cancel_rendering,
+                    memory_pressure,
                 } => {
                     self.pending_texture_updates.push(updates);
                     self.device.begin_frame();
                     self.update_texture_cache();
+
+                    // Flush the render target pool on memory pressure.
+                    //
+                    // This needs to be separate from the block below because
+                    // the device module asserts if we delete textures while
+                    // not in a frame.
+                    if memory_pressure {
+                        self.texture_resolver.retain_targets(&mut self.device, |_| false);
+                    }
+
                     self.device.end_frame();
                     // If we receive a `PublishDocument` message followed by this one
                     // within the same update we need to cancel the frame because we
                     // might have deleted the resources in use in the frame due to a
                     // memory pressure event.
-                    if cancel_rendering {
+                    if memory_pressure {
                         self.active_documents.clear();
                     }
                 }
                 ResultMsg::AppendNotificationRequests(mut notifications) => {
                     self.notifications.append(&mut notifications);
                 }
                 ResultMsg::RefreshShader(path) => {
                     self.pending_shader_updates.push(path);
@@ -2842,36 +2921,45 @@ impl Renderer {
                 source_rect,
                 blit.target_rect,
             );
         }
     }
 
     fn handle_scaling(
         &mut self,
-        render_tasks: &RenderTaskTree,
-        scalings: &Vec<ScalingInfo>,
+        scalings: &[ScalingInstance],
         source: SourceTexture,
+        projection: &Transform3D<f32>,
+        stats: &mut RendererStats,
     ) {
-        let cache_texture = self.texture_resolver
-            .resolve(&source)
-            .unwrap();
-        for scaling in scalings {
-            let source = &render_tasks[scaling.src_task_id];
-            let dest = &render_tasks[scaling.dest_task_id];
-
-            let (source_rect, source_layer) = source.get_target_rect();
-            let (dest_rect, _) = dest.get_target_rect();
-
-            let cache_draw_target = (cache_texture, source_layer.0 as i32);
-            self.device
-                .bind_read_target(Some(cache_draw_target));
-
-            self.device.blit_render_target(source_rect, dest_rect);
+        if scalings.is_empty() {
+            return
         }
+
+        match source {
+            SourceTexture::CacheRGBA8 => {
+                self.shaders.cs_scale_rgba8.bind(&mut self.device,
+                                                 &projection,
+                                                 &mut self.renderer_errors);
+            }
+            SourceTexture::CacheA8 => {
+                self.shaders.cs_scale_a8.bind(&mut self.device,
+                                              &projection,
+                                              &mut self.renderer_errors);
+            }
+            _ => unreachable!(),
+        }
+
+        self.draw_instanced_batch(
+            &scalings,
+            VertexArrayKind::Scale,
+            &BatchTextures::no_texture(),
+            stats,
+        );
     }
 
     fn draw_color_target(
         &mut self,
         render_target: Option<(&Texture, i32)>,
         target: &ColorRenderTarget,
         framebuffer_target_rect: DeviceUintRect,
         target_size: DeviceUintSize,
@@ -2970,17 +3058,17 @@ impl Renderer {
                     &target.horizontal_blurs,
                     VertexArrayKind::Blur,
                     &BatchTextures::no_texture(),
                     stats,
                 );
             }
         }
 
-        self.handle_scaling(render_tasks, &target.scalings, SourceTexture::CacheRGBA8);
+        self.handle_scaling(&target.scalings, SourceTexture::CacheRGBA8, projection, stats);
 
         //TODO: record the pixel count for cached primitives
 
         if target.needs_depth() {
             let _gl = self.gpu_profile.start_marker("opaque batches");
             let opaque_sampler = self.gpu_profile.start_sampler(GPU_SAMPLER_TAG_OPAQUE);
             self.set_blend(false, framebuffer_kind);
             //Note: depth equality is needed for split planes
@@ -3262,17 +3350,17 @@ impl Renderer {
                     &target.horizontal_blurs,
                     VertexArrayKind::Blur,
                     &BatchTextures::no_texture(),
                     stats,
                 );
             }
         }
 
-        self.handle_scaling(render_tasks, &target.scalings, SourceTexture::CacheA8);
+        self.handle_scaling(&target.scalings, SourceTexture::CacheA8, projection, stats);
 
         // Draw the clip items into the tiled alpha mask.
         {
             let _timer = self.gpu_profile.start_timer(GPU_TAG_CACHE_CLIP);
 
             // switch to multiplicative blending
             self.set_blend(true, FramebufferKind::Other);
             self.set_blend_mode_multiply(FramebufferKind::Other);
@@ -3827,17 +3915,17 @@ impl Renderer {
             }
 
             self.texture_resolver.end_pass(
                 cur_alpha,
                 cur_color,
             );
         }
 
-        self.texture_resolver.end_frame();
+        self.texture_resolver.end_frame(&mut self.device, frame_id);
 
         #[cfg(feature = "debug_renderer")]
         {
             if let Some(framebuffer_size) = framebuffer_size {
                 self.draw_render_target_debug(framebuffer_size);
                 self.draw_texture_cache_debug(framebuffer_size);
                 self.draw_gpu_cache_debug(framebuffer_size);
             }
@@ -4106,16 +4194,17 @@ impl Renderer {
         self.prim_header_i_texture.deinit(&mut self.device);
         self.render_task_texture.deinit(&mut self.device);
         self.device.delete_pbo(self.texture_cache_upload_pbo);
         self.texture_resolver.deinit(&mut self.device);
         self.device.delete_vao(self.vaos.prim_vao);
         self.device.delete_vao(self.vaos.clip_vao);
         self.device.delete_vao(self.vaos.blur_vao);
         self.device.delete_vao(self.vaos.border_vao);
+        self.device.delete_vao(self.vaos.scale_vao);
 
         #[cfg(feature = "debug_renderer")]
         {
             self.debug.deinit(&mut self.device);
         }
 
         for (_, target) in self.output_targets {
             self.device.delete_fbo(target.fbo_id);
@@ -4133,25 +4222,40 @@ impl Renderer {
     fn size_of<T>(&self, ptr: *const T) -> usize {
         let op = self.size_of_op.as_ref().unwrap();
         unsafe { op(ptr as *const c_void) }
     }
 
     /// Collects a memory report.
     pub fn report_memory(&self) -> MemoryReport {
         let mut report = MemoryReport::default();
+
+        // GPU cache CPU memory.
         if let CacheBus::PixelBuffer{ref cpu_blocks, ..} = self.gpu_cache_texture.bus {
             report.gpu_cache_cpu_mirror += self.size_of(cpu_blocks.as_ptr());
         }
 
+        // GPU cache GPU memory.
+        report.gpu_cache_textures += self.gpu_cache_texture.texture.size_in_bytes();
+
+        // Render task CPU memory.
         for (_id, doc) in &self.active_documents {
             report.render_tasks += self.size_of(doc.frame.render_tasks.tasks.as_ptr());
             report.render_tasks += self.size_of(doc.frame.render_tasks.task_data.as_ptr());
         }
 
+        // Vertex data GPU memory.
+        report.vertex_data_textures += self.prim_header_f_texture.texture.size_in_bytes();
+        report.vertex_data_textures += self.prim_header_i_texture.texture.size_in_bytes();
+        report.vertex_data_textures += self.transforms_texture.texture.size_in_bytes();
+        report.vertex_data_textures += self.render_task_texture.texture.size_in_bytes();
+
+        // Texture cache and render target GPU memory.
+        report += self.texture_resolver.report_memory();
+
         report
     }
 
     // Sets the blend mode. Blend is unconditionally set if the "show overdraw" debugging mode is
     // enabled.
     fn set_blend(&self, mut blend: bool, framebuffer_kind: FramebufferKind) {
         if framebuffer_kind == FramebufferKind::Main &&
                 self.debug_flags.contains(DebugFlags::SHOW_OVERDRAW) {
@@ -4776,30 +4880,32 @@ fn get_vao<'a>(vertex_array_kind: Vertex
                -> &'a VAO {
     match vertex_array_kind {
         VertexArrayKind::Primitive => &vaos.prim_vao,
         VertexArrayKind::Clip => &vaos.clip_vao,
         VertexArrayKind::Blur => &vaos.blur_vao,
         VertexArrayKind::VectorStencil => &gpu_glyph_renderer.vector_stencil_vao,
         VertexArrayKind::VectorCover => &gpu_glyph_renderer.vector_cover_vao,
         VertexArrayKind::Border => &vaos.border_vao,
+        VertexArrayKind::Scale => &vaos.scale_vao,
     }
 }
 
 #[cfg(not(feature = "pathfinder"))]
 fn get_vao<'a>(vertex_array_kind: VertexArrayKind,
                vaos: &'a RendererVAOs,
                _: &'a GpuGlyphRenderer)
                -> &'a VAO {
     match vertex_array_kind {
         VertexArrayKind::Primitive => &vaos.prim_vao,
         VertexArrayKind::Clip => &vaos.clip_vao,
         VertexArrayKind::Blur => &vaos.blur_vao,
         VertexArrayKind::VectorStencil | VertexArrayKind::VectorCover => unreachable!(),
         VertexArrayKind::Border => &vaos.border_vao,
+        VertexArrayKind::Scale => &vaos.scale_vao,
     }
 }
 
 #[derive(Clone, Copy, PartialEq)]
 enum FramebufferKind {
     Main,
     Other,
 }
--- a/gfx/webrender/src/resource_cache.rs
+++ b/gfx/webrender/src/resource_cache.rs
@@ -1,14 +1,14 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{AddFont, BlobImageResources, AsyncBlobImageRasterizer, ResourceUpdate};
-use api::{BlobImageDescriptor, BlobImageHandler, BlobImageRequest};
+use api::{BlobImageDescriptor, BlobImageHandler, BlobImageRequest, RasterizedBlobImage};
 use api::{ClearCache, ColorF, DevicePoint, DeviceUintPoint, DeviceUintRect, DeviceUintSize};
 use api::{FontInstanceKey, FontKey, FontTemplate, GlyphIndex};
 use api::{ExternalImageData, ExternalImageType, BlobImageResult, BlobImageParams};
 use api::{FontInstanceData, FontInstanceOptions, FontInstancePlatformOptions, FontVariation};
 use api::{GlyphDimensions, IdNamespace};
 use api::{ImageData, ImageDescriptor, ImageKey, ImageRendering};
 use api::{MemoryReport, VoidPtrToSizeFn};
 use api::{TileOffset, TileSize, TileRange, NormalizedRect, BlobImageData};
@@ -28,16 +28,17 @@ use glyph_rasterizer::{FontInstance, Gly
 use gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle};
 use gpu_types::UvRectKind;
 use image::{compute_tile_range, for_each_tile_in_range};
 use internal_types::{FastHashMap, FastHashSet, SourceTexture, TextureUpdateList};
 use profiler::{ResourceProfileCounters, TextureCacheProfileCounters};
 use render_backend::FrameId;
 use render_task::{RenderTaskCache, RenderTaskCacheKey, RenderTaskId};
 use render_task::{RenderTaskCacheEntry, RenderTaskCacheEntryHandle, RenderTaskTree};
+use smallvec::SmallVec;
 use std::collections::hash_map::Entry::{self, Occupied, Vacant};
 use std::collections::hash_map::IterMut;
 use std::{cmp, mem};
 use std::fmt::Debug;
 use std::hash::Hash;
 use std::os::raw::c_void;
 #[cfg(any(feature = "capture", feature = "replay"))]
 use std::path::PathBuf;
@@ -95,18 +96,19 @@ pub struct ImageProperties {
 #[derive(Debug, Copy, Clone, PartialEq)]
 enum State {
     Idle,
     AddResources,
     QueryResources,
 }
 
 /// Post scene building state.
-struct RasterizedBlobImage {
-    data: FastHashMap<Option<TileOffset>, BlobImageResult>,
+enum RasterizedBlob {
+    Tiled(FastHashMap<TileOffset, RasterizedBlobImage>),
+    NonTiled(Vec<RasterizedBlobImage>),
 }
 
 /// Pre scene building state.
 /// We use this to generate the async blob rendering requests.
 struct BlobImageTemplate {
     descriptor: ImageDescriptor,
     tiling: Option<TileSize>,
     dirty_rect: Option<DeviceUintRect>,
@@ -400,17 +402,17 @@ pub struct ResourceCache {
     glyph_rasterizer: GlyphRasterizer,
 
     // The set of images that aren't present or valid in the texture cache,
     // and need to be rasterized and/or uploaded this frame. This includes
     // both blobs and regular images.
     pending_image_requests: FastHashSet<ImageRequest>,
 
     blob_image_handler: Option<Box<BlobImageHandler>>,
-    rasterized_blob_images: FastHashMap<ImageKey, RasterizedBlobImage>,
+    rasterized_blob_images: FastHashMap<ImageKey, RasterizedBlob>,
     blob_image_templates: FastHashMap<ImageKey, BlobImageTemplate>,
 
     // If while building a frame we encounter blobs that we didn't already
     // rasterize, add them to this list and rasterize them synchronously.
     missing_blob_images: Vec<BlobImageParams>,
     // The rasterizer associated with the current scene.
     blob_image_rasterizer: Option<Box<AsyncBlobImageRasterizer>>,
 }
@@ -599,20 +601,50 @@ impl ResourceCache {
     }
 
     pub fn set_blob_rasterizer(&mut self, rasterizer: Box<AsyncBlobImageRasterizer>) {
         self.blob_image_rasterizer = Some(rasterizer);
     }
 
     pub fn add_rasterized_blob_images(&mut self, images: Vec<(BlobImageRequest, BlobImageResult)>) {
         for (request, result) in images {
+            let data = match result {
+                Ok(data) => data,
+                Err(..) => {
+                    warn!("Failed to rasterize a blob image");
+                    continue;
+                }
+            };
+
+            // First make sure we have an entry for this key (using a placeholder
+            // if need be).
             let image = self.rasterized_blob_images.entry(request.key).or_insert_with(
-                || { RasterizedBlobImage { data: FastHashMap::default() } }
+                || { RasterizedBlob::Tiled(FastHashMap::default()) }
             );
-            image.data.insert(request.tile, result);
+
+            if let Some(tile) = request.tile {
+                if let RasterizedBlob::NonTiled(..) = *image {
+                    *image = RasterizedBlob::Tiled(FastHashMap::default());
+                }
+
+                if let RasterizedBlob::Tiled(ref mut tiles) = *image {
+                    tiles.insert(tile, data);
+                }
+            } else {
+                if let RasterizedBlob::NonTiled(ref mut queue) = *image {
+                    // If our new rasterized rect overwrites items in the queue, discard them.
+                    queue.retain(|img| {
+                        !data.rasterized_rect.contains_rect(&img.rasterized_rect)
+                    });
+
+                    queue.push(data);
+                } else {
+                    *image = RasterizedBlob::NonTiled(vec![data]);
+                }
+            }
         }
     }
 
     pub fn add_font_template(&mut self, font_key: FontKey, template: FontTemplate) {
         // Push the new font to the font renderer, and also store
         // it locally for glyph metric requests.
         self.glyph_rasterizer.add_font(font_key, template.clone());
         self.resources.font_templates.insert(font_key, template);
@@ -940,19 +972,20 @@ impl ResourceCache {
         }
 
         if !self.pending_image_requests.insert(request) {
             return
         }
 
         if template.data.is_blob() {
             let request: BlobImageRequest = request.into();
-            let missing = match self.rasterized_blob_images.get(&request.key) {
-                Some(img) => !img.data.contains_key(&request.tile),
-                None => true,
+            let missing = match (self.rasterized_blob_images.get(&request.key), request.tile) {
+                (Some(RasterizedBlob::Tiled(tiles)), Some(tile)) => !tiles.contains_key(&tile),
+                (Some(RasterizedBlob::NonTiled(ref queue)), None) => queue.is_empty(),
+                _ => true,
             };
 
             // For some reason the blob image is missing. We'll fall back to
             // rasterizing it on the render backend thread.
             if missing {
                 let descriptor = match template.tiling {
                     Some(tile_size) => {
                         let tile = request.tile.unwrap();
@@ -1077,23 +1110,42 @@ impl ResourceCache {
                                 tile: Some(tile),
                             },
                             descriptor,
                             dirty_rect: None,
                         }
                     );
                 });
             } else {
-                let needs_upload = match self.cached_images.try_get(&key) {
+                let mut needs_upload = match self.cached_images.try_get(&key) {
                     Some(&ImageResult::UntiledAuto(ref entry)) => {
                         self.texture_cache.needs_upload(&entry.texture_cache_handle)
                     }
                     _ => true,
                 };
 
+                // If the queue of ratserized updates is growing it probably means that
+                // the texture is not getting uploaded because the display item is off-screen.
+                // In that case we are better off
+                // - Either not kicking rasterization for that image (avoid wasted cpu work
+                //   but will jank next time the item is visible because of lazy rasterization.
+                // - Clobber the update queue by pushing an update with a larger dirty rect
+                //   to prevent it from accumulating.
+                //
+                // We do the latter here but it's not ideal and might want to revisit and do
+                // the former instead.
+                match self.rasterized_blob_images.get(&key) {
+                    Some(RasterizedBlob::NonTiled(ref queue)) => {
+                        if queue.len() > 2 {
+                            needs_upload = true;
+                        }
+                    }
+                    _ => {},
+                };
+
                 let dirty_rect = if needs_upload {
                     // The texture cache entry has been evicted, treat it as all dirty.
                     None
                 } else {
                     template.dirty_rect
                 };
 
                 blob_request_params.push(
@@ -1129,38 +1181,29 @@ impl ResourceCache {
                 //println!("Missing image template (key={:?})!", key);
                 return;
             }
         };
         let tile_size = match template.tiling {
             Some(size) => size,
             None => { return; }
         };
-        let image = match self.rasterized_blob_images.get_mut(&key) {
-            Some(image) => image,
-            None => {
-                //println!("Missing rasterized blob (key={:?})!", key);
-                return;
-            }
+
+        let tiles = match self.rasterized_blob_images.get_mut(&key) {
+            Some(RasterizedBlob::Tiled(tiles)) => tiles,
+            _ => { return; }
         };
+
         let tile_range = compute_tile_range(
             &area,
             &template.descriptor.size,
             tile_size,
         );
-        image.data.retain(|tile, _| {
-            match *tile {
-                Some(offset) => tile_range.contains(&offset),
-                // This would be a bug. If we get here the blob should be tiled.
-                None => {
-                    error!("Blob image template and image data tiling don't match.");
-                    false
-                }
-            }
-        });
+
+        tiles.retain(|tile, _| { tile_range.contains(tile) });
     }
 
     pub fn request_glyphs(
         &mut self,
         mut font: FontInstance,
         glyph_keys: &[GlyphKey],
         gpu_cache: &mut GpuCache,
         render_task_tree: &mut RenderTaskTree,
@@ -1415,139 +1458,147 @@ impl ResourceCache {
         self.missing_blob_images.clear();
     }
 
     fn update_texture_cache(&mut self, gpu_cache: &mut GpuCache) {
         for request in self.pending_image_requests.drain() {
             let image_template = self.resources.image_templates.get_mut(request.key).unwrap();
             debug_assert!(image_template.data.uses_texture_cache());
 
-            let mut blob_rasterized_rect = None;
-            let image_data = match image_template.data {
+            let mut updates: SmallVec<[(ImageData, Option<DeviceUintRect>); 1]> = SmallVec::new();
+
+            match image_template.data {
                 ImageData::Raw(..) | ImageData::External(..) => {
                     // Safe to clone here since the Raw image data is an
                     // Arc, and the external image data is small.
-                    image_template.data.clone()
+                    updates.push((image_template.data.clone(), None));
                 }
                 ImageData::Blob(..) => {
-                    let blob_image = self.rasterized_blob_images.get(&request.key).unwrap();
-                    match blob_image.data.get(&request.tile) {
-                        Some(result) => {
-                            let result = result
-                                .as_ref()
-                                .expect("Failed to render a blob image");
 
-                            // TODO: we may want to not panic and show a placeholder instead.
-
-                            blob_rasterized_rect = Some(result.rasterized_rect);
-
-                            ImageData::Raw(Arc::clone(&result.data))
+                    let blob_image = self.rasterized_blob_images.get_mut(&request.key).unwrap();
+                    match (blob_image, request.tile) {
+                        (RasterizedBlob::Tiled(ref tiles), Some(tile)) => {
+                            let img = &tiles[&tile];
+                            updates.push((
+                                ImageData::Raw(Arc::clone(&img.data)),
+                                Some(img.rasterized_rect)
+                            ));
                         }
-                        None => {
+                        (RasterizedBlob::NonTiled(ref mut queue), None) => {
+                            for img in queue.drain(..) {
+                                updates.push((
+                                    ImageData::Raw(img.data),
+                                    Some(img.rasterized_rect)
+                                ));
+                            }
+                        }
+                        _ =>  {
                             debug_assert!(false, "invalid blob image request during frame building");
                             continue;
                         }
                     }
                 }
             };
 
-            let entry = match *self.cached_images.get_mut(&request.key) {
-                ImageResult::UntiledAuto(ref mut entry) => entry,
-                ImageResult::Multi(ref mut entries) => entries.get_mut(&request.into()),
-                ImageResult::Err(_) => panic!("Update requested for invalid entry")
-            };
-
-            match (blob_rasterized_rect, entry.dirty_rect) {
-                (Some(rasterized), Some(dirty)) => {
-                    debug_assert!(request.tile.is_some() || rasterized.contains_rect(&dirty));
-                }
-                _ => {}
-            }
-
-            let mut descriptor = image_template.descriptor.clone();
-            let local_dirty_rect;
-
-            if let Some(tile) = request.tile {
-                let tile_size = image_template.tiling.unwrap();
-                let clipped_tile_size = compute_tile_size(&descriptor, tile_size, tile);
-
-                local_dirty_rect = if let Some(rect) = entry.dirty_rect.take() {
-                    // We should either have a dirty rect, or we are re-uploading where the dirty
-                    // rect is ignored anyway.
-                    let intersection = intersect_for_tile(rect, clipped_tile_size, tile_size, tile);
-                    debug_assert!(intersection.is_some() ||
-                                  self.texture_cache.needs_upload(&entry.texture_cache_handle));
-                    intersection
-                } else {
-                    None
+            for (image_data, blob_rasterized_rect) in updates {
+                let entry = match *self.cached_images.get_mut(&request.key) {
+                    ImageResult::UntiledAuto(ref mut entry) => entry,
+                    ImageResult::Multi(ref mut entries) => entries.get_mut(&request.into()),
+                    ImageResult::Err(_) => panic!("Update requested for invalid entry")
                 };
 
-                // The tiled image could be stored on the CPU as one large image or be
-                // already broken up into tiles. This affects the way we compute the stride
-                // and offset.
-                let tiled_on_cpu = image_template.data.is_blob();
-                if !tiled_on_cpu {
-                    let bpp = descriptor.format.bytes_per_pixel();
-                    let stride = descriptor.compute_stride();
-                    descriptor.stride = Some(stride);
-                    descriptor.offset +=
-                        tile.y as u32 * tile_size as u32 * stride +
-                        tile.x as u32 * tile_size as u32 * bpp;
+                let mut descriptor = image_template.descriptor.clone();
+                let mut local_dirty_rect;
+
+                if let Some(tile) = request.tile {
+                    let tile_size = image_template.tiling.unwrap();
+                    let clipped_tile_size = compute_tile_size(&descriptor, tile_size, tile);
+
+                    local_dirty_rect = if let Some(rect) = entry.dirty_rect.take() {
+                        // We should either have a dirty rect, or we are re-uploading where the dirty
+                        // rect is ignored anyway.
+                        let intersection = intersect_for_tile(rect, clipped_tile_size, tile_size, tile);
+                        debug_assert!(intersection.is_some() ||
+                                      self.texture_cache.needs_upload(&entry.texture_cache_handle));
+                        intersection
+                    } else {
+                        None
+                    };
+
+                    // The tiled image could be stored on the CPU as one large image or be
+                    // already broken up into tiles. This affects the way we compute the stride
+                    // and offset.
+                    let tiled_on_cpu = image_template.data.is_blob();
+                    if !tiled_on_cpu {
+                        let bpp = descriptor.format.bytes_per_pixel();
+                        let stride = descriptor.compute_stride();
+                        descriptor.stride = Some(stride);
+                        descriptor.offset +=
+                            tile.y as u32 * tile_size as u32 * stride +
+                            tile.x as u32 * tile_size as u32 * bpp;
+                    }
+
+                    descriptor.size = clipped_tile_size;
+                } else {
+                    local_dirty_rect = entry.dirty_rect.take();
                 }
 
-                descriptor.size = clipped_tile_size;
-            } else {
-                local_dirty_rect = entry.dirty_rect.take();
-            }
-
-            let filter = match request.rendering {
-                ImageRendering::Pixelated => {
-                    TextureFilter::Nearest
+                // If we are uploading the dirty region of a blob image we might have several
+                // rects to upload so we use each of these rasterized rects rather than the
+                // overall dirty rect of the image.
+                if blob_rasterized_rect.is_some() {
+                    local_dirty_rect = blob_rasterized_rect;
                 }
-                ImageRendering::Auto | ImageRendering::CrispEdges => {
-                    // If the texture uses linear filtering, enable mipmaps and
-                    // trilinear filtering, for better image quality. We only
-                    // support this for now on textures that are not placed
-                    // into the shared cache. This accounts for any image
-                    // that is > 512 in either dimension, so it should cover
-                    // the most important use cases. We may want to support
-                    // mip-maps on shared cache items in the future.
-                    if descriptor.allow_mipmaps &&
-                       descriptor.size.width > 512 &&
-                       descriptor.size.height > 512 &&
-                       !self.texture_cache.is_allowed_in_shared_cache(
-                        TextureFilter::Linear,
-                        &descriptor,
-                    ) {
-                        TextureFilter::Trilinear
-                    } else {
-                        TextureFilter::Linear
+
+                let filter = match request.rendering {
+                    ImageRendering::Pixelated => {
+                        TextureFilter::Nearest
                     }
-                }
-            };
-
-            let eviction = if image_template.data.is_blob() {
-                Eviction::Manual
-            } else {
-                Eviction::Auto
-            };
+                    ImageRendering::Auto | ImageRendering::CrispEdges => {
+                        // If the texture uses linear filtering, enable mipmaps and
+                        // trilinear filtering, for better image quality. We only
+                        // support this for now on textures that are not placed
+                        // into the shared cache. This accounts for any image
+                        // that is > 512 in either dimension, so it should cover
+                        // the most important use cases. We may want to support
+                        // mip-maps on shared cache items in the future.
+                        if descriptor.allow_mipmaps &&
+                           descriptor.size.width > 512 &&
+                           descriptor.size.height > 512 &&
+                           !self.texture_cache.is_allowed_in_shared_cache(
+                            TextureFilter::Linear,
+                            &descriptor,
+                        ) {
+                            TextureFilter::Trilinear
+                        } else {
+                            TextureFilter::Linear
+                        }
+                    }
+                };
 
-            //Note: at this point, the dirty rectangle is local to the descriptor space
-            self.texture_cache.update(
-                &mut entry.texture_cache_handle,
-                descriptor,
-                filter,
-                Some(image_data),
-                [0.0; 3],
-                local_dirty_rect,
-                gpu_cache,
-                None,
-                UvRectKind::Rect,
-                eviction,
-            );
+                let eviction = if image_template.data.is_blob() {
+                    Eviction::Manual
+                } else {
+                    Eviction::Auto
+                };
+
+                //Note: at this point, the dirty rectangle is local to the descriptor space
+                self.texture_cache.update(
+                    &mut entry.texture_cache_handle,
+                    descriptor,
+                    filter,
+                    Some(image_data),
+                    [0.0; 3],
+                    local_dirty_rect,
+                    gpu_cache,
+                    None,
+                    UvRectKind::Rect,
+                    eviction,
+                );
+            }
         }
     }
 
     pub fn end_frame(&mut self) {
         debug_assert_eq!(self.state, State::QueryResources);
         self.state = State::Idle;
     }
 
--- a/gfx/webrender/src/segment.rs
+++ b/gfx/webrender/src/segment.rs
@@ -2,16 +2,17 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BorderRadius, ClipMode, LayoutPoint, LayoutPointAu, LayoutRect, LayoutSize};
 use app_units::Au;
 use prim_store::EdgeAaSegmentMask;
 use std::{cmp, usize};
 use util::{extract_inner_rect_safe, RectHelpers};
+use smallvec::SmallVec;
 
 bitflags! {
     pub struct ItemFlags: u8 {
         const X_ACTIVE = 0x1;
         const Y_ACTIVE = 0x2;
         const HAS_MASK = 0x4;
     }
 }
@@ -171,36 +172,51 @@ impl Item {
 #[derive(Copy, Clone, Debug, Eq, PartialEq, PartialOrd)]
 struct ItemIndex(usize);
 
 // The main public interface to the segment module.
 pub struct SegmentBuilder {
     items: Vec<Item>,
     inner_rect: Option<LayoutRect>,
     bounding_rect: Option<LayoutRect>,
+    
+    #[cfg(debug_assertions)]
+    initialized: bool,
 }
 
 impl SegmentBuilder {
     // Create a new segment builder, supplying the primitive
     // local rect and associated local clip rect.
-    pub fn new(
+    pub fn new() -> SegmentBuilder {
+        SegmentBuilder {
+            items: Vec::with_capacity(4),
+            bounding_rect: None,
+            inner_rect: None,
+            #[cfg(debug_assertions)]
+            initialized: false,
+        }
+    }
+
+    pub fn initialize(
+        &mut self,
         local_rect: LayoutRect,
         inner_rect: Option<LayoutRect>,
         local_clip_rect: LayoutRect,
-    ) -> SegmentBuilder {
-        let mut builder = SegmentBuilder {
-            items: Vec::new(),
-            bounding_rect: Some(local_rect),
-            inner_rect,
-        };
+    ) {
+        self.items.clear();
+        self.inner_rect = inner_rect;
+        self.bounding_rect = Some(local_rect);
 
-        builder.push_clip_rect(local_rect, None, ClipMode::Clip);
-        builder.push_clip_rect(local_clip_rect, None, ClipMode::Clip);
-
-        builder
+        self.push_clip_rect(local_rect, None, ClipMode::Clip);
+        self.push_clip_rect(local_clip_rect, None, ClipMode::Clip);
+                    
+        #[cfg(debug_assertions)]
+        {
+            self.initialized = true;
+        }
     }
 
     // Push a region defined by an inner and outer rect where there
     // is a mask required. This ensures that segments which intersect
     // with these areas will get a clip mask task allocated. This
     // is currently used to mark where a box-shadow region can affect
     // the pixels of a clip-mask. It might be useful for other types
     // such as dashed and dotted borders in the future.
@@ -384,33 +400,33 @@ impl SegmentBuilder {
                     mode,
                     false,
                 ))
             }
         }
     }
 
     // Consume this segment builder and produce a list of segments.
-    pub fn build<F>(self, mut f: F) where F: FnMut(&Segment) {
+    pub fn build<F>(&mut self, mut f: F) where F: FnMut(&Segment) {
+        #[cfg(debug_assertions)]
+        debug_assert!(self.initialized);
         let bounding_rect = match self.bounding_rect {
             Some(bounding_rect) => bounding_rect,
             None => return,
         };
 
-        let mut items = self.items;
-
         // First, filter out any items that don't intersect
         // with the visible bounding rect.
-        items.retain(|item| item.rect.intersects(&bounding_rect));
+        self.items.retain(|item| item.rect.intersects(&bounding_rect));
 
         // Create events for each item
-        let mut x_events = Vec::new();
-        let mut y_events = Vec::new();
+        let mut x_events : SmallVec<[Event; 4]> = SmallVec::new();
+        let mut y_events : SmallVec<[Event; 4]> = SmallVec::new();
 
-        for (item_index, item) in items.iter().enumerate() {
+        for (item_index, item) in self.items.iter().enumerate() {
             let p0 = item.rect.origin;
             let p1 = item.rect.bottom_right();
 
             x_events.push(Event::begin(p0.x, item_index));
             x_events.push(Event::end(p1.x, item_index));
             y_events.push(Event::begin(p0.y, item_index));
             y_events.push(Event::end(p1.y, item_index));
         }
@@ -451,17 +467,17 @@ impl SegmentBuilder {
         // Each coordinate is clamped to the bounds of the minimal
         // bounding rect. This ensures that we don't generate segments
         // outside that bounding rect, but does allow correctly handling
         // clips where the clip region starts outside the minimal
         // rect but still intersects with it.
 
         let mut prev_y = clamp(p0.y, y_events[0].value, p1.y);
         let mut region_y = 0;
-        let mut segments = Vec::new();
+        let mut segments : SmallVec<[_; 4]> = SmallVec::new();
         let mut x_count = 0;
         let mut y_count = 0;
 
         for ey in &y_events {
             let cur_y = clamp(p0.y, ey.value, p1.y);
 
             if cur_y != prev_y {
                 let mut prev_x = clamp(p0.x, x_events[0].value, p1.x);
@@ -473,39 +489,39 @@ impl SegmentBuilder {
                     if cur_x != prev_x {
                         segments.push(emit_segment_if_needed(
                             prev_x,
                             prev_y,
                             cur_x,
                             cur_y,
                             region_x,
                             region_y,
-                            &items,
+                            &self.items,
                         ));
 
                         prev_x = cur_x;
                         if y_count == 0 {
                             x_count += 1;
                         }
                     }
 
                     ex.update(
                         ItemFlags::X_ACTIVE,
-                        &mut items,
+                        &mut self.items,
                         &mut region_x,
                     );
                 }
 
                 prev_y = cur_y;
                 y_count += 1;
             }
 
             ey.update(
                 ItemFlags::Y_ACTIVE,
-                &mut items,
+                &mut self.items,
                 &mut region_y,
             );
         }
 
         // Run user supplied closure for each valid segment.
         debug_assert_eq!(segments.len(), x_count * y_count);
         for y in 0 .. y_count {
             for x in 0 .. x_count {
@@ -525,16 +541,21 @@ impl SegmentBuilder {
                 }
 
                 if let Some(ref mut segment) = segments[y * x_count + x] {
                     segment.edge_flags = edge_flags;
                     f(segment);
                 }
             }
         }
+      
+        #[cfg(debug_assertions)]
+        {
+            self.initialized = false;
+        }
     }
 }
 
 fn clamp(low: Au, value: Au, high: Au) -> Au {
     value.max(low).min(high)
 }
 
 fn emit_segment_if_needed(
@@ -645,17 +666,18 @@ mod test {
 
     fn seg_test(
         local_rect: LayoutRect,
         inner_rect: Option<LayoutRect>,
         local_clip_rect: LayoutRect,
         clips: &[(LayoutRect, Option<BorderRadius>, ClipMode)],
         expected_segments: &mut [Segment]
     ) {
-        let mut sb = SegmentBuilder::new(
+        let mut sb = SegmentBuilder::new();
+        sb.initialize(
             local_rect,
             inner_rect,
             local_clip_rect,
         );
         let mut segments = Vec::new();
         for &(rect, radius, mode) in clips {
             sb.push_clip_rect(rect, radius, mode);
         }
--- a/gfx/webrender/src/shade.rs
+++ b/gfx/webrender/src/shade.rs
@@ -364,16 +364,17 @@ fn create_prim_shader(
 
     let vertex_descriptor = match vertex_format {
         VertexArrayKind::Primitive => desc::PRIM_INSTANCES,
         VertexArrayKind::Blur => desc::BLUR,
         VertexArrayKind::Clip => desc::CLIP,
         VertexArrayKind::VectorStencil => desc::VECTOR_STENCIL,
         VertexArrayKind::VectorCover => desc::VECTOR_COVER,
         VertexArrayKind::Border => desc::BORDER,
+        VertexArrayKind::Scale => desc::SCALE,
     };
 
     let program = device.create_program(name, &prefix, &vertex_descriptor);
 
     if let Ok(ref program) = program {
         device.bind_shader_samplers(
             program,
             &[
@@ -420,25 +421,28 @@ fn create_clip_shader(name: &'static str
                 ("sPrimitiveHeadersI", TextureSampler::PrimitiveHeadersI),
             ],
         );
     }
 
     program
 }
 
-
+// NB: If you add a new shader here, make sure to deinitialize it
+// in `Shaders::deinit()` below.
 pub struct Shaders {
     // These are "cache shaders". These shaders are used to
     // draw intermediate results to cache targets. The results
     // of these shaders are then used by the primitive shaders.
     pub cs_blur_a8: LazilyCompiledShader,
     pub cs_blur_rgba8: LazilyCompiledShader,
     pub cs_border_segment: LazilyCompiledShader,
     pub cs_border_solid: LazilyCompiledShader,
+    pub cs_scale_a8: LazilyCompiledShader,
+    pub cs_scale_rgba8: LazilyCompiledShader,
 
     // Brush shaders
     brush_solid: BrushShader,
     brush_image: Vec<Option<BrushShader>>,
     brush_blend: BrushShader,
     brush_mix_blend: BrushShader,
     brush_yuv_image: Vec<Option<BrushShader>>,
     brush_radial_gradient: BrushShader,
@@ -571,16 +575,32 @@ impl Shaders {
         let cs_clip_image = LazilyCompiledShader::new(
             ShaderKind::ClipCache,
             "cs_clip_image",
             &[],
             device,
             options.precache_shaders,
         )?;
 
+        let cs_scale_a8 = LazilyCompiledShader::new(
+            ShaderKind::Cache(VertexArrayKind::Scale),
+            "cs_scale",
+            &["ALPHA_TARGET"],
+            device,
+            options.precache_shaders,
+        )?;
+
+        let cs_scale_rgba8 = LazilyCompiledShader::new(
+            ShaderKind::Cache(VertexArrayKind::Scale),
+            "cs_scale",
+            &["COLOR_TARGET"],
+            device,
+            options.precache_shaders,
+        )?;
+
         let ps_text_run = TextShader::new("ps_text_run",
             device,
             &[],
             options.precache_shaders,
         )?;
 
         let ps_text_run_dual_source = TextShader::new("ps_text_run",
             device,
@@ -684,16 +704,18 @@ impl Shaders {
             device.delete_custom_vao(vao);
         }
 
         Ok(Shaders {
             cs_blur_a8,
             cs_blur_rgba8,
             cs_border_segment,
             cs_border_solid,
+            cs_scale_a8,
+            cs_scale_rgba8,
             brush_solid,
             brush_image,
             brush_blend,
             brush_mix_blend,
             brush_yuv_image,
             brush_radial_gradient,
             brush_linear_gradient,
             cs_clip_rectangle,
@@ -737,17 +759,17 @@ impl Shaders {
                         &mut self.brush_mix_blend
                     }
                     BrushBatchKind::RadialGradient => {
                         &mut self.brush_radial_gradient
                     }
                     BrushBatchKind::LinearGradient => {
                         &mut self.brush_linear_gradient
                     }
-                    BrushBatchKind::YuvImage(image_buffer_kind, format, color_space) => {
+                    BrushBatchKind::YuvImage(image_buffer_kind, format, _color_depth, color_space) => {
                         let shader_index =
                             Self::get_yuv_shader_index(image_buffer_kind, format, color_space);
                         self.brush_yuv_image[shader_index]
                             .as_mut()
                             .expect("Unsupported YUV shader kind")
                     }
                 };
                 brush_shader.get(key.blend_mode, debug_flags)
@@ -758,16 +780,18 @@ impl Shaders {
                     _ => &mut self.ps_text_run,
                 };
                 text_shader.get(glyph_format, debug_flags)
             }
         }
     }
 
     pub fn deinit(self, device: &mut Device) {
+        self.cs_scale_a8.deinit(device);
+        self.cs_scale_rgba8.deinit(device);
         self.cs_blur_a8.deinit(device);
         self.cs_blur_rgba8.deinit(device);
         self.brush_solid.deinit(device);
         self.brush_blend.deinit(device);
         self.brush_mix_blend.deinit(device);
         self.brush_radial_gradient.deinit(device);
         self.brush_linear_gradient.deinit(device);
         self.cs_clip_rectangle.deinit(device);
--- a/gfx/webrender/src/texture_cache.rs
+++ b/gfx/webrender/src/texture_cache.rs
@@ -233,16 +233,17 @@ impl EvictionNotice {
 
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct TextureCache {
     // A lazily allocated, fixed size, texture array for
     // each format the texture cache supports.
     array_rgba8_nearest: TextureArray,
     array_a8_linear: TextureArray,
+    array_a16_linear: TextureArray,
     array_rgba8_linear: TextureArray,
 
     // Maximum texture size supported by hardware.
     max_texture_size: u32,
 
     // A list of texture IDs that represent native
     // texture handles. This indirection allows the texture
     // cache to create / destroy / reuse texture handles
@@ -276,16 +277,21 @@ impl TextureCache {
     pub fn new(max_texture_size: u32) -> Self {
         TextureCache {
             max_texture_size,
             array_a8_linear: TextureArray::new(
                 ImageFormat::R8,
                 TextureFilter::Linear,
                 TEXTURE_ARRAY_LAYERS_LINEAR,
             ),
+            array_a16_linear: TextureArray::new(
+                ImageFormat::R16,
+                TextureFilter::Linear,
+                TEXTURE_ARRAY_LAYERS_LINEAR,
+            ),
             array_rgba8_linear: TextureArray::new(
                 ImageFormat::BGRA8,
                 TextureFilter::Linear,
                 TEXTURE_ARRAY_LAYERS_LINEAR,
             ),
             array_rgba8_nearest: TextureArray::new(
                 ImageFormat::BGRA8,
                 TextureFilter::Nearest,
@@ -328,16 +334,24 @@ impl TextureCache {
         if let Some(texture_id) = self.array_a8_linear.clear() {
             self.pending_updates.push(TextureUpdate {
                 id: texture_id,
                 op: TextureUpdateOp::Free,
             });
             self.cache_textures.free(texture_id, self.array_a8_linear.format);
         }
 
+        if let Some(texture_id) = self.array_a16_linear.clear() {
+            self.pending_updates.push(TextureUpdate {
+                id: texture_id,
+                op: TextureUpdateOp::Free,
+            });
+            self.cache_textures.free(texture_id, self.array_a16_linear.format);
+        }
+
         if let Some(texture_id) = self.array_rgba8_linear.clear() {
             self.pending_updates.push(TextureUpdate {
                 id: texture_id,
                 op: TextureUpdateOp::Free,
             });
             self.cache_textures.free(texture_id, self.array_rgba8_linear.format);
         }
 
@@ -354,16 +368,18 @@ impl TextureCache {
         self.frame_id = frame_id;
     }
 
     pub fn end_frame(&mut self, texture_cache_profile: &mut TextureCacheProfileCounters) {
         self.expire_old_standalone_entries();
 
         self.array_a8_linear
             .update_profile(&mut texture_cache_profile.pages_a8_linear);
+        self.array_a16_linear
+            .update_profile(&mut texture_cache_profile.pages_a16_linear);
         self.array_rgba8_linear
             .update_profile(&mut texture_cache_profile.pages_rgba8_linear);
         self.array_rgba8_nearest
             .update_profile(&mut texture_cache_profile.pages_rgba8_nearest);
     }
 
     // Request an item in the texture cache. All images that will
     // be used on a frame *must* have request() called on their
@@ -506,23 +522,26 @@ impl TextureCache {
     // Get a specific region by index from a shared texture array.
     fn get_region_mut(&mut self,
         format: ImageFormat,
         filter: TextureFilter,
         region_index: u16
     ) -> &mut TextureRegion {
         let texture_array = match (format, filter) {
             (ImageFormat::R8, TextureFilter::Linear) => &mut self.array_a8_linear,
+            (ImageFormat::R16, TextureFilter::Linear) => &mut self.array_a16_linear,
             (ImageFormat::BGRA8, TextureFilter::Linear) => &mut self.array_rgba8_linear,
             (ImageFormat::BGRA8, TextureFilter::Nearest) => &mut self.array_rgba8_nearest,
             (ImageFormat::RGBAF32, _) |
             (ImageFormat::RG8, _) |
             (ImageFormat::RGBAI32, _) |
             (ImageFormat::R8, TextureFilter::Nearest) |
             (ImageFormat::R8, TextureFilter::Trilinear) |
+            (ImageFormat::R16, TextureFilter::Nearest) |
+            (ImageFormat::R16, TextureFilter::Trilinear) |
             (ImageFormat::BGRA8, TextureFilter::Trilinear) => unreachable!(),
         };
 
         &mut texture_array.regions[region_index as usize]
     }
 
     // Check if a given texture handle has a valid allocation
     // in the texture cache.
@@ -655,17 +674,17 @@ impl TextureCache {
     fn expire_old_shared_entries(&mut self, required_alloc: &ImageDescriptor) {
         let mut eviction_candidates = Vec::new();
         let mut retained_entries = Vec::new();
 
         // Build a list of eviction candidates (which are
         // anything not used this frame).
         for handle in self.shared_entry_handles.drain(..) {
             let entry = self.entries.get(&handle);
-            if entry.last_access == self.frame_id {
+            if entry.eviction == Eviction::Manual || entry.last_access == self.frame_id {
                 retained_entries.push(handle);
             } else {
                 eviction_candidates.push(handle);
             }
         }
 
         // Sort by access time so we remove the oldest ones first.
         eviction_candidates.sort_by_key(|handle| {
@@ -743,22 +762,25 @@ impl TextureCache {
         descriptor: &ImageDescriptor,
         filter: TextureFilter,
         user_data: [f32; 3],
         uv_rect_kind: UvRectKind,
     ) -> Option<CacheEntry> {
         // Work out which cache it goes in, based on format.
         let texture_array = match (descriptor.format, filter) {
             (ImageFormat::R8, TextureFilter::Linear) => &mut self.array_a8_linear,
+            (ImageFormat::R16, TextureFilter::Linear) => &mut self.array_a16_linear,
             (ImageFormat::BGRA8, TextureFilter::Linear) => &mut self.array_rgba8_linear,
             (ImageFormat::BGRA8, TextureFilter::Nearest) => &mut self.array_rgba8_nearest,
             (ImageFormat::RGBAF32, _) |
             (ImageFormat::RGBAI32, _) |
             (ImageFormat::R8, TextureFilter::Nearest) |
             (ImageFormat::R8, TextureFilter::Trilinear) |
+            (ImageFormat::R16, TextureFilter::Nearest) |
+            (ImageFormat::R16, TextureFilter::Trilinear) |
             (ImageFormat::BGRA8, TextureFilter::Trilinear) |
             (ImageFormat::RG8, _) => unreachable!(),
         };
 
         // Lazy initialize this texture array if required.
         if texture_array.texture_id.is_none() {
             let texture_id = self.cache_textures.allocate(descriptor.format);
 
--- a/gfx/webrender/src/tiling.rs
+++ b/gfx/webrender/src/tiling.rs
@@ -7,24 +7,25 @@ use api::{DeviceUintPoint, DeviceUintRec
 use api::{LayoutRect, MixBlendMode, PipelineId};
 use batch::{AlphaBatchBuilder, AlphaBatchContainer, ClipBatcher, resolve_image};
 use clip::{ClipDataStore, ClipStore};
 use clip_scroll_tree::{ClipScrollTree, SpatialNodeIndex};
 use device::{FrameId, Texture};
 #[cfg(feature = "pathfinder")]
 use euclid::{TypedPoint2D, TypedVector2D};
 use gpu_cache::{GpuCache};
-use gpu_types::{BorderInstance, BlurDirection, BlurInstance, PrimitiveHeaders, TransformData, TransformPalette};
+use gpu_types::{BorderInstance, BlurDirection, BlurInstance, PrimitiveHeaders, ScalingInstance};
+use gpu_types::{TransformData, TransformPalette};
 use internal_types::{FastHashMap, SavedTargetIndex, SourceTexture};
 #[cfg(feature = "pathfinder")]
 use pathfinder_partitioner::mesh::Mesh;
 use prim_store::{PrimitiveIndex, PrimitiveStore, DeferredResolve};
 use profiler::FrameProfileCounters;
 use render_task::{BlitSource, RenderTaskAddress, RenderTaskId, RenderTaskKind};
-use render_task::{BlurTask, ClearMode, GlyphTask, RenderTaskLocation, RenderTaskTree};
+use render_task::{BlurTask, ClearMode, GlyphTask, RenderTaskLocation, RenderTaskTree, ScalingTask};
 use resource_cache::ResourceCache;
 use std::{cmp, usize, f32, i32, mem};
 use texture_allocator::GuillotineAllocator;
 #[cfg(feature = "pathfinder")]
 use webrender_api::{DevicePixel, FontRenderMode};
 
 const MIN_TARGET_SIZE: u32 = 2048;
 const STYLE_SOLID: i32 = ((BorderStyle::Solid as i32) << 8) | ((BorderStyle::Solid as i32) << 16);
@@ -252,23 +253,16 @@ impl<T: RenderTarget> RenderTargetList<T
 /// pipeline exists at.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct FrameOutput {
     pub task_id: RenderTaskId,
     pub pipeline_id: PipelineId,
 }
 
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct ScalingInfo {
-    pub src_task_id: RenderTaskId,
-    pub dest_task_id: RenderTaskId,
-}
-
 // Defines where the source data for a blit job can be found.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub enum BlitJobSource {
     Texture(SourceTexture, i32, DeviceIntRect),
     RenderTask(RenderTaskId),
 }
 
@@ -301,17 +295,17 @@ pub struct GlyphJob;
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct ColorRenderTarget {
     pub alpha_batch_containers: Vec<AlphaBatchContainer>,
     // List of blur operations to apply for this render target.
     pub vertical_blurs: Vec<BlurInstance>,
     pub horizontal_blurs: Vec<BlurInstance>,
     pub readbacks: Vec<DeviceIntRect>,
-    pub scalings: Vec<ScalingInfo>,
+    pub scalings: Vec<ScalingInstance>,
     pub blits: Vec<BlitJob>,
     // List of frame buffer outputs for this render target.
     pub outputs: Vec<FrameOutput>,
     allocator: Option<TextureAllocator>,
     alpha_tasks: Vec<RenderTaskId>,
     screen_size: DeviceIntSize,
 }
 
@@ -442,19 +436,19 @@ impl RenderTarget for ColorRenderTarget 
             RenderTaskKind::Glyph(..) => {
                 // FIXME(pcwalton): Support color glyphs.
                 panic!("Glyphs should not be added to color target!");
             }
             RenderTaskKind::Readback(device_rect) => {
                 self.readbacks.push(device_rect);
             }
             RenderTaskKind::Scaling(..) => {
-                self.scalings.push(ScalingInfo {
-                    src_task_id: task.children[0],
-                    dest_task_id: task_id,
+                self.scalings.push(ScalingInstance {
+                    task_address: render_tasks.get_task_address(task_id),
+                    src_task_address: render_tasks.get_task_address(task.children[0]),
                 });
             }
             RenderTaskKind::Blit(ref task_info) => {
                 match task_info.source {
                     BlitSource::Image { key } => {
                         // Get the cache item for the source texture.
                         let cache_item = resolve_image(
                             key.request,
@@ -514,17 +508,17 @@ impl RenderTarget for ColorRenderTarget 
 
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct AlphaRenderTarget {
     pub clip_batcher: ClipBatcher,
     // List of blur operations to apply for this render target.
     pub vertical_blurs: Vec<BlurInstance>,
     pub horizontal_blurs: Vec<BlurInstance>,
-    pub scalings: Vec<ScalingInfo>,
+    pub scalings: Vec<ScalingInstance>,
     pub zero_clears: Vec<RenderTaskId>,
     allocator: TextureAllocator,
 }
 
 impl RenderTarget for AlphaRenderTarget {
     fn allocate(&mut self, size: DeviceUintSize) -> Option<DeviceUintPoint> {
         self.allocator.allocate(&size)
     }
@@ -605,21 +599,22 @@ impl RenderTarget for AlphaRenderTarget 
             }
             RenderTaskKind::ClipRegion(ref task) => {
                 let task_address = render_tasks.get_task_address(task_id);
                 self.clip_batcher.add_clip_region(
                     task_address,
                     task.clip_data_address,
                 );
             }
-            RenderTaskKind::Scaling(..) => {
-                self.scalings.push(ScalingInfo {
-                    src_task_id: task.children[0],
-                    dest_task_id: task_id,
-                });
+            RenderTaskKind::Scaling(ref info) => {
+                info.add_instances(
+                    &mut self.scalings,
+                    render_tasks.get_task_address(task_id),
+                    render_tasks.get_task_address(task.children[0]),
+                );
             }
         }
     }
 
     fn used_rect(&self) -> DeviceIntRect {
         self.allocator.used_rect
     }
 
@@ -1037,16 +1032,32 @@ impl BlurTask {
             src_task_address,
             blur_direction,
         };
 
         instances.push(instance);
     }
 }
 
+impl ScalingTask {
+    fn add_instances(
+        &self,
+        instances: &mut Vec<ScalingInstance>,
+        task_address: RenderTaskAddress,
+        src_task_address: RenderTaskAddress,
+    ) {
+        let instance = ScalingInstance {
+            task_address,
+            src_task_address,
+        };
+
+        instances.push(instance);
+    }
+}
+
 pub struct SpecialRenderPasses {
     pub alpha_glyph_pass: RenderPass,
     pub color_glyph_pass: RenderPass,
 }
 
 impl SpecialRenderPasses {
     pub fn new(screen_size: &DeviceIntSize) -> SpecialRenderPasses {
         SpecialRenderPasses {
--- a/gfx/webrender_api/src/api.rs
+++ b/gfx/webrender_api/src/api.rs
@@ -746,38 +746,52 @@ impl PipelineId {
         PipelineId(0, 0)
     }
 }
 
 /// Collection of heap sizes, in bytes.
 #[repr(C)]
 #[derive(Clone, Debug, Default, Deserialize, Serialize)]
 pub struct MemoryReport {
+    //
+    // CPU Memory.
+    //
     pub primitive_stores: usize,
     pub clip_stores: usize,
     pub gpu_cache_metadata: usize,
     pub gpu_cache_cpu_mirror: usize,
     pub render_tasks: usize,
     pub hit_testers: usize,
     pub fonts: usize,
     pub images: usize,
     pub rasterized_blobs: usize,
+    //
+    // GPU memory.
+    //
+    pub gpu_cache_textures: usize,
+    pub vertex_data_textures: usize,
+    pub render_target_textures: usize,
+    pub texture_cache_textures: usize,
 }
 
 impl ::std::ops::AddAssign for MemoryReport {
     fn add_assign(&mut self, other: MemoryReport) {
         self.primitive_stores += other.primitive_stores;
         self.clip_stores += other.clip_stores;
         self.gpu_cache_metadata += other.gpu_cache_metadata;
         self.gpu_cache_cpu_mirror += other.gpu_cache_cpu_mirror;
         self.render_tasks += other.render_tasks;
         self.hit_testers += other.hit_testers;
         self.fonts += other.fonts;
         self.images += other.images;
         self.rasterized_blobs += other.rasterized_blobs;
+        self.gpu_cache_textures += other.gpu_cache_textures;
+        self.vertex_data_textures += other.vertex_data_textures;
+        self.render_target_textures += other.render_target_textures;
+        self.texture_cache_textures += other.texture_cache_textures;
     }
 }
 
 /// A C function that takes a pointer to a heap allocation and returns its size.
 ///
 /// This is borrowed from the malloc_size_of crate, upon which we want to avoid
 /// a dependency from WebRender.
 pub type VoidPtrToSizeFn = unsafe extern "C" fn(ptr: *const c_void) -> usize;
--- a/gfx/webrender_api/src/display_item.rs
+++ b/gfx/webrender_api/src/display_item.rs
@@ -4,17 +4,21 @@
 
 #[cfg(any(feature = "serialize", feature = "deserialize"))]
 use GlyphInstance;
 use euclid::{SideOffsets2D, TypedRect};
 use std::ops::Not;
 use {ColorF, FontInstanceKey, GlyphOptions, ImageKey, LayoutPixel, LayoutPoint};
 use {LayoutRect, LayoutSize, LayoutTransform, LayoutVector2D, PipelineId, PropertyBinding};
 use LayoutSideOffsets;
+use image::ColorDepth;
 
+// Maximum blur radius.
+// Taken from nsCSSRendering.cpp in Gecko.
+pub const MAX_BLUR_RADIUS: f32 = 300.;
 
 // NOTE: some of these structs have an "IMPLICIT" comment.
 // This indicates that the BuiltDisplayList will have serialized
 // a list of values nearby that this item consumes. The traversal
 // iterator should handle finding these.
 
 #[derive(Clone, Copy, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]
 pub struct ClipAndScrollInfo {
@@ -605,16 +609,34 @@ pub enum FilterOp {
     Invert(f32),
     Opacity(PropertyBinding<f32>, f32),
     Saturate(f32),
     Sepia(f32),
     DropShadow(LayoutVector2D, f32, ColorF),
     ColorMatrix([f32; 20]),
 }
 
+impl FilterOp {
+    /// Ensure that the parameters for a filter operation
+    /// are sensible.
+    pub fn sanitize(self) -> FilterOp {
+        match self {
+            FilterOp::Blur(radius) => {
+                let radius = radius.min(MAX_BLUR_RADIUS);
+                FilterOp::Blur(radius)
+            }
+            FilterOp::DropShadow(offset, radius, color) => {
+                let radius = radius.min(MAX_BLUR_RADIUS);
+                FilterOp::DropShadow(offset, radius, color)
+            }
+            filter => filter,
+        }
+    }
+}
+
 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
 pub struct IframeDisplayItem {
     pub clip_id: ClipId,
     pub pipeline_id: PipelineId,
     pub ignore_missing_pipeline: bool,
 }
 
 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
@@ -639,16 +661,17 @@ pub enum ImageRendering {
 pub enum AlphaType {
     Alpha = 0,
     PremultipliedAlpha = 1,
 }
 
 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
 pub struct YuvImageDisplayItem {
     pub yuv_data: YuvData,
+    pub color_depth: ColorDepth,
     pub color_space: YuvColorSpace,
     pub image_rendering: ImageRendering,
 }
 
 #[repr(u32)]
 #[derive(Clone, Copy, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]
 pub enum YuvColorSpace {
     Rec601 = 0,
--- a/gfx/webrender_api/src/display_list.rs
+++ b/gfx/webrender_api/src/display_list.rs
@@ -19,17 +19,17 @@ use {ColorF, ComplexClipRegion, DisplayI
 use {FontInstanceKey, GlyphInstance, GlyphOptions, RasterSpace, Gradient, GradientBuilder};
 use {GradientDisplayItem, GradientStop, IframeDisplayItem, ImageDisplayItem, ImageKey, ImageMask};
 use {ImageRendering, LayoutPoint, LayoutPrimitiveInfo, LayoutRect, LayoutSideOffsets, LayoutSize};
 use {LayoutTransform, LayoutVector2D, LineDisplayItem, LineOrientation, LineStyle, MixBlendMode};
 use {PipelineId, PropertyBinding, PushReferenceFrameDisplayListItem};
 use {PushStackingContextDisplayItem, RadialGradient, RadialGradientDisplayItem};
 use {RectangleDisplayItem, ReferenceFrame, ScrollFrameDisplayItem, ScrollSensitivity, Shadow};
 use {SpecificDisplayItem, StackingContext, StickyFrameDisplayItem, StickyOffsetBounds};
-use {TextDisplayItem, TransformStyle, YuvColorSpace, YuvData, YuvImageDisplayItem};
+use {TextDisplayItem, TransformStyle, YuvColorSpace, YuvData, YuvImageDisplayItem, ColorDepth};
 
 // We don't want to push a long text-run. If a text-run is too long, split it into several parts.
 // This needs to be set to (renderer::MAX_VERTEX_TEXTURE_WIDTH - VECS_PER_TEXT_RUN) * 2
 pub const MAX_TEXT_RUN_LENGTH: usize = 2040;
 
 // We start at 2, because the root reference is always 0 and the root scroll node is always 1.
 // TODO(mrobinson): It would be a good idea to eliminate the root scroll frame which is only
 // used by Servo.
@@ -83,16 +83,18 @@ pub struct BuiltDisplayListDescriptor {
     /// The second IPC time stamp: after serialization
     builder_finish_time: u64,
     /// The third IPC time stamp: just before sending
     send_start_time: u64,
     /// The amount of clipping nodes created while building this display list.
     total_clip_nodes: usize,
     /// The amount of spatial nodes created while building this display list.
     total_spatial_nodes: usize,
+    /// An estimate of the number of primitives that will be created by this display list.
+    prim_count_estimate: usize,
 }
 
 pub struct BuiltDisplayListIter<'a> {
     list: &'a BuiltDisplayList,
     data: &'a [u8],
     cur_item: DisplayItem,
     cur_stops: ItemRange<GradientStop>,
     cur_glyphs: ItemRange<GlyphInstance>,
@@ -140,16 +142,20 @@ impl BuiltDisplayList {
     pub fn item_slice(&self) -> &[u8] {
         &self.data[..]
     }
 
     pub fn descriptor(&self) -> &BuiltDisplayListDescriptor {
         &self.descriptor
     }
 
+    pub fn prim_count_estimate(&self) -> usize {
+        self.descriptor.prim_count_estimate
+    }
+
     pub fn times(&self) -> (u64, u64, u64) {
         (
             self.descriptor.builder_start_time,
             self.descriptor.builder_finish_time,
             self.descriptor.send_start_time,
         )
     }
 
@@ -598,16 +604,17 @@ impl<'de> Deserialize<'de> for BuiltDisp
         Ok(BuiltDisplayList {
             data,
             descriptor: BuiltDisplayListDescriptor {
                 builder_start_time: 0,
                 builder_finish_time: 1,
                 send_start_time: 0,
                 total_clip_nodes,
                 total_spatial_nodes,
+                prim_count_estimate: 0,
             },
         })
     }
 }
 
 // This is a replacement for bincode::serialize_into(&vec)
 // The default implementation Write for Vec will basically
 // call extend_from_slice(). Serde ends up calling that for every
@@ -834,16 +841,17 @@ pub struct SaveState {
 
 #[derive(Clone)]
 pub struct DisplayListBuilder {
     pub data: Vec<u8>,
     pub pipeline_id: PipelineId,
     clip_stack: Vec<ClipAndScrollInfo>,
     next_clip_index: usize,
     next_spatial_index: usize,
+    prim_count_estimate: usize,
     next_clip_chain_id: u64,
     builder_start_time: u64,
 
     /// The size of the content of this display list. This is used to allow scrolling
     /// outside the bounds of the display list items themselves.
     content_size: LayoutSize,
     save_state: Option<SaveState>,
 }
@@ -863,16 +871,17 @@ impl DisplayListBuilder {
         DisplayListBuilder {
             data: Vec::with_capacity(capacity),
             pipeline_id,
             clip_stack: vec![
                 ClipAndScrollInfo::simple(ClipId::root_scroll_node(pipeline_id)),
             ],
             next_clip_index: FIRST_CLIP_NODE_INDEX,
             next_spatial_index: FIRST_SPATIAL_NODE_INDEX,
+            prim_count_estimate: 0,
             next_clip_chain_id: 0,
             builder_start_time: start_time,
             content_size,
             save_state: None,
         }
     }
 
     /// Return the content size for this display list
@@ -948,32 +957,34 @@ impl DisplayListBuilder {
     }
 
     /// Add an item to the display list.
     ///
     /// NOTE: It is usually preferable to use the specialized methods to push
     /// display items. Pushing unexpected or invalid items here may
     /// result in WebRender panicking or behaving in unexpected ways.
     pub fn push_item(&mut self, item: SpecificDisplayItem, info: &LayoutPrimitiveInfo) {
+        self.prim_count_estimate += 1;
         serialize_fast(
             &mut self.data,
             &DisplayItem {
                 item,
                 clip_and_scroll: *self.clip_stack.last().unwrap(),
                 info: *info,
             },
         )
     }
 
     fn push_item_with_clip_scroll_info(
         &mut self,
         item: SpecificDisplayItem,
         info: &LayoutPrimitiveInfo,
         scrollinfo: ClipAndScrollInfo
     ) {
+        self.prim_count_estimate += 1;
         serialize_fast(
             &mut self.data,
             &DisplayItem {
                 item,
                 clip_and_scroll: scrollinfo,
                 info: *info,
             },
         )
@@ -1085,21 +1096,23 @@ impl DisplayListBuilder {
         self.push_item(item, info);
     }
 
     /// Push a yuv image. All planar data in yuv image should use the same buffer type.
     pub fn push_yuv_image(
         &mut self,
         info: &LayoutPrimitiveInfo,
         yuv_data: YuvData,
+        color_depth: ColorDepth,
         color_space: YuvColorSpace,
         image_rendering: ImageRendering,
     ) {
         let item = SpecificDisplayItem::YuvImage(YuvImageDisplayItem {
             yuv_data,
+            color_depth,
             color_space,
             image_rendering,
         });
         self.push_item(item, info);
     }
 
     pub fn push_text(
         &mut self,
@@ -1496,14 +1509,15 @@ impl DisplayListBuilder {
             self.content_size,
             BuiltDisplayList {
                 descriptor: BuiltDisplayListDescriptor {
                     builder_start_time: self.builder_start_time,
                     builder_finish_time: end_time,
                     send_start_time: 0,
                     total_clip_nodes: self.next_clip_index,
                     total_spatial_nodes: self.next_spatial_index,
+                    prim_count_estimate: self.prim_count_estimate,
                 },
                 data: self.data,
             },
         )
     }
 }
--- a/gfx/webrender_api/src/image.rs
+++ b/gfx/webrender_api/src/image.rs
@@ -85,40 +85,75 @@ pub struct ExternalImageData {
 /// Specifies the format of a series of pixels, in driver terms.
 #[repr(u32)]
 #[derive(Clone, Copy, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]
 pub enum ImageFormat {
     /// One-channel, byte storage. The "red" doesn't map to the color
     /// red per se, and is just the way that OpenGL has historically referred
     /// to single-channel buffers.
     R8 = 1,
+    /// One-channel, short storage
+    R16 = 2,
     /// Four channels, byte storage.
     BGRA8 = 3,
     /// Four channels, float storage.
     RGBAF32 = 4,
     /// Two-channels, byte storage. Similar to `R8`, this just means
     /// "two channels" rather than "red and green".
     RG8 = 5,
     /// Four channels, signed integer storage.
     RGBAI32 = 6,
 }
 
 impl ImageFormat {
     /// Returns the number of bytes per pixel for the given format.
     pub fn bytes_per_pixel(self) -> u32 {
         match self {
             ImageFormat::R8 => 1,
+            ImageFormat::R16 => 2,
             ImageFormat::BGRA8 => 4,
             ImageFormat::RGBAF32 => 16,
             ImageFormat::RG8 => 2,
             ImageFormat::RGBAI32 => 16,
         }
     }
 }
 
+/// Specifies the color depth of an image. Currently only used for YUV images.
+#[repr(u8)]
+#[derive(Clone, Copy, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]
+pub enum ColorDepth {
+    /// 8 bits image (most common)
+    Color8,
+    /// 10 bits image
+    Color10,
+    /// 12 bits image
+    Color12,
+}
+
+impl ColorDepth {
+    /// Return the numerical bit depth value for the type.
+    pub fn bit_depth(self) -> u32 {
+        match self {
+            ColorDepth::Color8 => 8,
+            ColorDepth::Color10 => 10,
+            ColorDepth::Color12 => 12,
+        }
+    }
+    /// 10 and 12 bits images are encoded using 16 bits integer, we need to
+    /// rescale the 10 or 12 bits value to extend to 16 bits.
+    pub fn rescaling_factor(self) -> f32 {
+        match self {
+            ColorDepth::Color8 => 1.0,
+            ColorDepth::Color10 => 64.0,
+            ColorDepth::Color12 => 16.0,
+        }
+    }
+}
+
 /// Metadata (but not storage) describing an image In WebRender.
 #[derive(Copy, Clone, Debug, Deserialize, PartialEq, Serialize)]
 pub struct ImageDescriptor {
     /// Format of the image data.
     pub format: ImageFormat,
     /// Width and length of the image data, in pixels.
     pub size: DeviceUintSize,
     /// The number of bytes from the start of one row to the next. If non-None,
--- a/gfx/webrender_bindings/Cargo.toml
+++ b/gfx/webrender_bindings/Cargo.toml
@@ -4,17 +4,17 @@ version = "0.1.0"
 authors = ["The Mozilla Project Developers"]
 license = "MPL-2.0"
 
 [dependencies]
 rayon = "1"
 thread_profiler = "0.1.1"
 euclid = { version = "0.19", features = ["serde"] }
 app_units = "0.7"
-gleam = "0.6"
+gleam = "0.6.2"
 log = "0.4"
 nsstring = { path = "../../servo/support/gecko/nsstring" }
 bincode = "1.0"
 uuid = { version = "0.5", features = ["v4"] }
 fxhash = "0.2.1"
 
 [dependencies.webrender]
 path = "../webrender"
--- a/gfx/webrender_bindings/revision.txt
+++ b/gfx/webrender_bindings/revision.txt
@@ -1,1 +1,1 @@
-2549b791abb3a24c115d03616ddd82e14727b5a1
+43e8d85789efb95099affe3257a9c254ef3d2f4c
--- a/gfx/wrench/Cargo.toml
+++ b/gfx/wrench/Cargo.toml
@@ -6,17 +6,17 @@ build = "build.rs"
 license = "MPL-2.0"
 
 [dependencies]
 base64 = "0.6"
 bincode = "1.0"
 byteorder = "1.0"
 env_logger = { version = "0.5", optional = true }
 euclid = "0.19"
-gleam = "0.6"
+gleam = "0.6.2"
 glutin = "0.17"
 app_units = "0.7"
 image = "0.19"
 clap = { version = "2", features = ["yaml"] }
 lazy_static = "1"
 log = "0.4"
 yaml-rust = { git = "https://github.com/vvuk/yaml-rust", features = ["preserve_order"] }
 serde_json = "1.0"
--- a/gfx/wrench/src/yaml_frame_reader.rs
+++ b/gfx/wrench/src/yaml_frame_reader.rs
@@ -168,16 +168,17 @@ fn is_image_opaque(format: ImageFormat, 
                     is_opaque = false;
                     break;
                 }
             }
             is_opaque
         }
         ImageFormat::RG8 => true,
         ImageFormat::R8 => false,
+        ImageFormat::R16 => false,
         ImageFormat::RGBAF32 |
         ImageFormat::RGBAI32 => unreachable!(),
     }
 }
 
 pub struct YamlFrameReader {
     frame_built: bool,
     yaml_path: PathBuf,
@@ -1012,17 +1013,18 @@ impl YamlFrameReader {
 
     fn handle_yuv_image(
         &mut self,
         dl: &mut DisplayListBuilder,
         wrench: &mut Wrench,
         item: &Yaml,
         info: &mut LayoutPrimitiveInfo,
     ) {
-        // TODO(gw): Support other YUV color spaces.
+        // TODO(gw): Support other YUV color depth and spaces.
+        let color_depth = ColorDepth::Color8;
         let color_space = YuvColorSpace::Rec709;
 
         let yuv_data = match item["format"].as_str().expect("no format supplied") {
             "planar" => {
                 let y_path = rsrc_path(&item["src-y"], &self.aux_dir);
                 let (y_key, _) = self.add_or_get_image(&y_path, None, wrench);
 
                 let u_path = rsrc_path(&item["src-u"], &self.aux_dir);
@@ -1057,16 +1059,17 @@ impl YamlFrameReader {
         info.rect = LayoutRect::new(
             LayoutPoint::new(bounds[0], bounds[1]),
             LayoutSize::new(bounds[2], bounds[3]),
         );
 
         dl.push_yuv_image(
             &info,
             yuv_data,
+            color_depth,
             color_space,
             ImageRendering::Auto,
         );
     }
 
     fn handle_image(
         &mut self,
         dl: &mut DisplayListBuilder,