Bug 1572646 - Detect and optimize picture cache tiles that are solid colors. r=kvark
authorGlenn Watson <github@intuitionlibrary.com>
Mon, 12 Aug 2019 21:59:58 +0000
changeset 487544 96075609026a3816cb7a6dcf9cf3795afe7f82d6
parent 487543 0be0f56e7af6cc55824f382857943ae9c91686d1
child 487545 7b77bedf4ca3985787482064df89323576c6d616
push id36425
push userbtara@mozilla.com
push dateTue, 13 Aug 2019 09:54:32 +0000
treeherdermozilla-central@e29ba984dad2 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerskvark
bugs1572646
milestone70.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1572646 - Detect and optimize picture cache tiles that are solid colors. r=kvark With this patch, tiles that are covered only by the opaque backdrop primitive are detected and noted as solid colors. Solid color tiles save memory and performance, because: - No texture slice is allocated as a render target for them. - No need to rasterize this tile. - Drawing the tile is done with the faster rectangle shader. This already saves performance and GPU memory on quite a few real world sites (esp. when running at 4k). However, the main benefit of this will be once we enable picture caching on multiple content slices and the UI layer. When this occurs, it's important to avoid allocating tile buffers for all the solid rectangle tiles that the UI layer typically contains. Differential Revision: https://phabricator.services.mozilla.com/D41342
gfx/wr/webrender/src/batch.rs
gfx/wr/webrender/src/picture.rs
gfx/wr/wrench/reftests/boxshadow/reftest.list
gfx/wr/wrench/reftests/split/reftest.list
--- a/gfx/wr/webrender/src/batch.rs
+++ b/gfx/wr/webrender/src/batch.rs
@@ -9,17 +9,17 @@ use crate::clip::{ClipDataStore, ClipNod
 use crate::clip_scroll_tree::{ClipScrollTree, ROOT_SPATIAL_NODE_INDEX, SpatialNodeIndex, CoordinateSystemId};
 use crate::glyph_rasterizer::GlyphFormat;
 use crate::gpu_cache::{GpuBlockData, GpuCache, GpuCacheHandle, GpuCacheAddress};
 use crate::gpu_types::{BrushFlags, BrushInstance, PrimitiveHeaders, ZBufferId, ZBufferIdGenerator};
 use crate::gpu_types::{ClipMaskInstance, SplitCompositeInstance, SnapOffsets};
 use crate::gpu_types::{PrimitiveInstanceData, RasterizationSpace, GlyphInstance};
 use crate::gpu_types::{PrimitiveHeader, PrimitiveHeaderIndex, TransformPaletteId, TransformPalette};
 use crate::internal_types::{FastHashMap, SavedTargetIndex, Swizzle, TextureSource, Filter};
-use crate::picture::{Picture3DContext, PictureCompositeMode, PicturePrimitive};
+use crate::picture::{Picture3DContext, PictureCompositeMode, PicturePrimitive, TileSurface};
 use crate::prim_store::{DeferredResolve, EdgeAaSegmentMask, PrimitiveInstanceKind, PrimitiveVisibilityIndex, PrimitiveVisibilityMask};
 use crate::prim_store::{VisibleGradientTile, PrimitiveInstance, PrimitiveOpacity, SegmentInstanceIndex};
 use crate::prim_store::{BrushSegment, ClipMaskKind, ClipTaskIndex, VECS_PER_SEGMENT};
 use crate::prim_store::{recompute_snap_offsets};
 use crate::prim_store::image::ImageSource;
 use crate::render_backend::DataStores;
 use crate::render_task::{RenderTaskAddress, RenderTaskId, RenderTaskGraph};
 use crate::renderer::{BlendMode, ImageBufferKind, ShaderColorMode};
@@ -1186,47 +1186,88 @@ impl BatchBuilder {
                                 };
                                 let local_tile_clip_rect = LayoutRect::from_untyped(&tile_clip_rect.to_untyped());
 
                                 for key in &tile_cache.tiles_to_draw {
                                     let tile = &tile_cache.tiles[key];
 
                                     debug_assert!(tile.is_valid);
                                     let local_tile_rect = LayoutRect::from_untyped(&tile.rect.to_untyped());
+
+                                    // Draw the tile as either a texture image or solid rect.
+                                    let surface = tile.surface.as_ref().expect("no tile surface set!");
+                                    let (opacity, blend_mode, batch_params, prim_cache_address) = match surface {
+                                        TileSurface::Color { color } => {
+                                            let batch_params = BrushBatchParameters::shared(
+                                                BrushBatchKind::Solid,
+                                                BatchTextures::no_texture(),
+                                                [get_shader_opacity(1.0), 0, 0, 0],
+                                                0,
+                                            );
+
+                                            // TODO(gw): Maybe we could retain this GPU cache handle inside
+                                            //           the tile to avoid pushing per-frame GPU cache blocks.
+                                            let gpu_blocks = [
+                                                color.premultiplied().into(),
+                                            ];
+
+                                            let gpu_handle = gpu_cache.push_per_frame_blocks(&gpu_blocks);
+                                            let prim_cache_address = gpu_cache.get_address(&gpu_handle);
+
+                                            (
+                                                PrimitiveOpacity::opaque(),
+                                                BlendMode::None,
+                                                batch_params,
+                                                prim_cache_address,
+                                            )
+                                        }
+                                        TileSurface::Texture { ref handle, .. } => {
+                                            let cache_item = ctx.resource_cache.texture_cache.get(handle);
+                                            let uv_rect_address = gpu_cache
+                                                .get_address(&cache_item.uv_rect_handle)
+                                                .as_int();
+
+                                            let batch_params = BrushBatchParameters::shared(
+                                                BrushBatchKind::Image(ImageBufferKind::Texture2DArray),
+                                                BatchTextures::color(cache_item.texture_id),
+                                                [
+                                                    ShaderColorMode::Image as i32 | ((AlphaType::PremultipliedAlpha as i32) << 16),
+                                                    RasterizationSpace::Local as i32,
+                                                    get_shader_opacity(1.0),
+                                                    0,
+                                                ],
+                                                uv_rect_address,
+                                            );
+
+                                            if tile.is_opaque || tile_cache.is_opaque() {
+                                                (
+                                                    PrimitiveOpacity::opaque(),
+                                                    BlendMode::None,
+                                                    batch_params,
+                                                    prim_cache_address,
+                                                )
+                                            } else {
+                                                (
+                                                    PrimitiveOpacity::translucent(),
+                                                    BlendMode::PremultipliedAlpha,
+                                                    batch_params,
+                                                    prim_cache_address,
+                                                )
+                                            }
+                                        }
+                                    };
+
                                     let prim_header = PrimitiveHeader {
                                         local_rect: local_tile_rect,
                                         local_clip_rect: local_tile_clip_rect,
                                         snap_offsets: SnapOffsets::empty(),
                                         specific_prim_address: prim_cache_address,
                                         transform_id,
                                     };
 
-                                    let (opacity, blend_mode) = if tile.is_opaque || tile_cache.is_opaque() {
-                                        (PrimitiveOpacity::opaque(), BlendMode::None)
-                                    } else {
-                                        (PrimitiveOpacity::translucent(), BlendMode::PremultipliedAlpha)
-                                    };
-
-                                    let cache_item = ctx.resource_cache.texture_cache.get(&tile.handle);
-                                    let uv_rect_address = gpu_cache
-                                        .get_address(&cache_item.uv_rect_handle)
-                                        .as_int();
-                                    let textures = BatchTextures::color(cache_item.texture_id);
-                                    let batch_params = BrushBatchParameters::shared(
-                                        BrushBatchKind::Image(ImageBufferKind::Texture2DArray),
-                                        textures,
-                                        [
-                                            ShaderColorMode::Image as i32 | ((AlphaType::PremultipliedAlpha as i32) << 16),
-                                            RasterizationSpace::Local as i32,
-                                            get_shader_opacity(1.0),
-                                            0,
-                                        ],
-                                        uv_rect_address,
-                                    );
-
                                     let prim_header_index = prim_headers.push(
                                         &prim_header,
                                         z_id,
                                         batch_params.prim_user_data,
                                     );
 
                                     self.add_segmented_prim_to_batch(
                                         None,
--- a/gfx/wr/webrender/src/picture.rs
+++ b/gfx/wr/webrender/src/picture.rs
@@ -248,18 +248,18 @@ struct TilePostUpdateContext<'a> {
     global_device_pixel_scale: DevicePixelScale,
 
     /// The visible part of the screen in world coords.
     global_screen_world_rect: WorldRect,
 
     /// Current state of transforms
     clip_scroll_tree: &'a ClipScrollTree,
 
-    /// The calculated opaque rect of the picture cache.
-    opaque_rect: PictureRect,
+    /// The calculated backdrop information for this cache instance.
+    backdrop: BackdropInfo,
 
     /// The spatial node of the picture cache.
     cache_spatial_node_index: SpatialNodeIndex,
 }
 
 // Mutable state passed to picture cache tiles during post_update
 struct TilePostUpdateState<'a> {
     /// Scratch buffer for drawing debug information.
@@ -330,66 +330,86 @@ impl PrimitiveDependencyInfo {
         }
     }
 }
 
 /// A stable ID for a given tile, to help debugging.
 #[derive(Debug, Copy, Clone, PartialEq)]
 pub struct TileId(usize);
 
+/// The backing surface for this tile.
+#[derive(Debug)]
+pub enum TileSurface {
+    Texture {
+        /// Handle to the texture cache entry which gets drawn to.
+        handle: TextureCacheHandle,
+        /// Bitfield specifying the dirty region(s) that are relevant to this tile.
+        visibility_mask: PrimitiveVisibilityMask,
+    },
+    Color {
+        color: ColorF,
+    },
+}
+
+impl TileSurface {
+    fn kind(&self) -> &'static str {
+        match *self {
+            TileSurface::Color { .. } => "Color",
+            TileSurface::Texture { .. } => "Texture",
+        }
+    }
+}
+
 /// Information about a cached tile.
 #[derive(Debug)]
 pub struct Tile {
     /// The current world rect of this tile.
     pub world_rect: WorldRect,
     /// The current local rect of this tile.
     pub rect: PictureRect,
     /// The local rect of the tile clipped to the overal picture local rect.
     clipped_rect: PictureRect,
     /// Uniquely describes the content of this tile, in a way that can be
     /// (reasonably) efficiently hashed and compared.
     pub descriptor: TileDescriptor,
-    /// Handle to the cached texture for this tile.
-    pub handle: TextureCacheHandle,
+    /// Handle to the backing surface for this tile.
+    pub surface: Option<TileSurface>,
     /// If true, this tile is marked valid, and the existing texture
     /// cache handle can be used. Tiles are invalidated during the
     /// build_dirty_regions method.
     pub is_valid: bool,
     /// If true, the content on this tile is the same as last frame.
     is_same_content: bool,
     /// The tile id is stable between display lists and / or frames,
     /// if the tile is retained. Useful for debugging tile evictions.
     pub id: TileId,
     /// The set of transforms that affect primitives on this tile we
     /// care about. Stored as a set here, and then collected, sorted
     /// and converted to transform key values during post_update.
     transforms: FastHashSet<SpatialNodeIndex>,
-    /// Bitfield specifying the dirty region(s) that are relevant to this tile.
-    visibility_mask: PrimitiveVisibilityMask,
     /// If true, the tile was determined to be opaque, which means blending
     /// can be disabled when drawing it.
     pub is_opaque: bool,
 }
 
 impl Tile {
     /// Construct a new, invalid tile.
     fn new(
         id: TileId,
     ) -> Self {
         Tile {
             rect: PictureRect::zero(),
             clipped_rect: PictureRect::zero(),
             world_rect: WorldRect::zero(),
-            handle: TextureCacheHandle::invalid(),
+            surface: None,
             descriptor: TileDescriptor::new(),
             is_same_content: false,
             is_valid: false,
             transforms: FastHashSet::default(),
             id,
-            visibility_mask: PrimitiveVisibilityMask::empty(),
             is_opaque: false,
         }
     }
 
     /// Invalidate a tile based on change in content. This
     /// must be called even if the tile is not currently
     /// visible on screen. We might be able to improve this
     /// later by changing how ComparableVec is used.
@@ -516,17 +536,17 @@ impl Tile {
     /// Called during tile cache instance post_update. Allows invalidation and dirty
     /// rect calculation after primitive dependencies have been updated.
     fn post_update(
         &mut self,
         ctx: &TilePostUpdateContext,
         state: &mut TilePostUpdateState,
     ) -> bool {
         // Check if this tile can be considered opaque.
-        self.is_opaque = ctx.opaque_rect.contains_rect(&self.clipped_rect);
+        self.is_opaque = ctx.backdrop.rect.contains_rect(&self.clipped_rect);
 
         // Update tile transforms
         let mut transform_spatial_nodes: Vec<SpatialNodeIndex> = self.transforms.drain().collect();
         transform_spatial_nodes.sort();
         for spatial_node_index in transform_spatial_nodes {
             // Note: this is the only place where we don't know beforehand if the tile-affecting
             // spatial node is below or above the current picture.
             let transform = if ctx.cache_spatial_node_index >= spatial_node_index {
@@ -551,45 +571,77 @@ impl Tile {
         //       is_image_dirty check may be incorrect.
         for image_key in self.descriptor.image_keys.items() {
             if state.resource_cache.is_image_dirty(*image_key) {
                 self.is_same_content = false;
                 break;
             }
         }
 
-        // Invalidate if the backing texture was evicted.
-        if state.resource_cache.texture_cache.is_allocated(&self.handle) {
-            // Request the backing texture so it won't get evicted this frame.
-            // We specifically want to mark the tile texture as used, even
-            // if it's detected not visible below and skipped. This is because
-            // we maintain the set of tiles we care about based on visibility
-            // during pre_update. If a tile still exists after that, we are
-            // assuming that it's either visible or we want to retain it for
-            // a while in case it gets scrolled back onto screen soon.
-            // TODO(gw): Consider switching to manual eviction policy?
-            state.resource_cache.texture_cache.request(&self.handle, state.gpu_cache);
-        } else {
-            // When a tile is invalidated, reset the opacity information
-            // so that it is recalculated during prim dependency updates.
-            self.is_valid = false;
-        }
-
         // Invalidate the tile based on the content changing.
         self.update_content_validity();
 
         // If there are no primitives there is no need to draw or cache it.
         if self.descriptor.prims.is_empty() {
             return false;
         }
 
         if !self.world_rect.intersects(&ctx.global_screen_world_rect) {
             return false;
         }
 
+        // See if this tile is a simple color, in which case we can just draw
+        // it as a rect, and avoid allocating a texture surface and drawing it.
+        let is_solid_color = self.descriptor.prims.len() == 1 && self.is_opaque;
+
+        // Set up the backing surface for this tile.
+        let mut surface = if is_solid_color {
+            // If we determine the tile can be represented by a color, set the
+            // surface unconditionally (this will drop any previously used
+            // texture cache backing surface).
+            TileSurface::Color {
+                color: ctx.backdrop.color,
+            }
+        } else {
+            // If this tile will be backed by a surface, we want to retain
+            // the texture handle from the previous frame, if possible. If
+            // the tile was previously a color, or not set, then just set
+            // up a new texture cache handle.
+            match self.surface.take() {
+                Some(old_surface @ TileSurface::Texture { .. }) => {
+                    old_surface
+                }
+                Some(TileSurface::Color { .. }) | None => {
+                    TileSurface::Texture {
+                        handle: TextureCacheHandle::invalid(),
+                        visibility_mask: PrimitiveVisibilityMask::empty(),
+                    }
+                }
+            }
+        };
+
+        if let TileSurface::Texture { ref handle, .. } = surface {
+            // Invalidate if the backing texture was evicted.
+            if state.resource_cache.texture_cache.is_allocated(handle) {
+                // Request the backing texture so it won't get evicted this frame.
+                // We specifically want to mark the tile texture as used, even
+                // if it's detected not visible below and skipped. This is because
+                // we maintain the set of tiles we care about based on visibility
+                // during pre_update. If a tile still exists after that, we are
+                // assuming that it's either visible or we want to retain it for
+                // a while in case it gets scrolled back onto screen soon.
+                // TODO(gw): Consider switching to manual eviction policy?
+                state.resource_cache.texture_cache.request(handle, state.gpu_cache);
+            } else {
+                // When a tile is invalidated, reset the opacity information
+                // so that it is recalculated during prim dependency updates.
+                self.is_valid = false;
+            }
+        }
+
         // Decide how to handle this tile when drawing this frame.
         if self.is_valid {
             if ctx.debug_flags.contains(DebugFlags::PICTURE_CACHING_DBG) {
                 let tile_device_rect = self.world_rect * ctx.global_device_pixel_scale;
                 let label_offset = DeviceVector2D::new(20.0, 30.0);
                 let color = if self.is_opaque {
                     debug_colors::GREEN
                 } else {
@@ -598,66 +650,75 @@ impl Tile {
                 state.scratch.push_debug_rect(
                     tile_device_rect,
                     color.scale_alpha(0.3),
                 );
                 if tile_device_rect.size.height >= label_offset.y {
                     state.scratch.push_debug_string(
                         tile_device_rect.origin + label_offset,
                         debug_colors::RED,
-                        format!("{:?}: is_opaque={}", self.id, self.is_opaque),
+                        format!("{:?}: is_opaque={} surface={}",
+                            self.id,
+                            self.is_opaque,
+                            surface.kind(),
+                        ),
                     );
                 }
             }
         } else {
             if ctx.debug_flags.contains(DebugFlags::PICTURE_CACHING_DBG) {
                 state.scratch.push_debug_rect(
                     self.world_rect * ctx.global_device_pixel_scale,
                     debug_colors::RED,
                 );
             }
 
             // Ensure that this texture is allocated.
-            if !state.resource_cache.texture_cache.is_allocated(&self.handle) {
-                let tile_size = DeviceIntSize::new(
-                    TILE_SIZE_WIDTH,
-                    TILE_SIZE_HEIGHT,
-                );
-                state.resource_cache.texture_cache.update_picture_cache(
-                    tile_size,
-                    &mut self.handle,
-                    state.gpu_cache,
-                );
-            }
-
-            self.visibility_mask = PrimitiveVisibilityMask::empty();
-            let dirty_region_index = state.dirty_region.dirty_rects.len();
-
-            // If we run out of dirty regions, then force the last dirty region to
-            // be a union of any remaining regions. This is an inefficiency, in that
-            // we'll add items to batches later on that are redundant / outside this
-            // tile, but it's really rare except in pathological cases (even on a
-            // 4k screen, the typical dirty region count is < 16).
-            if dirty_region_index < PrimitiveVisibilityMask::MAX_DIRTY_REGIONS {
-                self.visibility_mask.set_visible(dirty_region_index);
-
-                state.dirty_region.push(
-                    self.world_rect,
-                    self.visibility_mask,
-                );
-            } else {
-                self.visibility_mask.set_visible(PrimitiveVisibilityMask::MAX_DIRTY_REGIONS - 1);
-
-                state.dirty_region.include_rect(
-                    PrimitiveVisibilityMask::MAX_DIRTY_REGIONS - 1,
-                    self.world_rect,
-                );
+            if let TileSurface::Texture { ref mut handle, ref mut visibility_mask } = surface {
+                if !state.resource_cache.texture_cache.is_allocated(handle) {
+                    let tile_size = DeviceIntSize::new(
+                        TILE_SIZE_WIDTH,
+                        TILE_SIZE_HEIGHT,
+                    );
+                    state.resource_cache.texture_cache.update_picture_cache(
+                        tile_size,
+                        handle,
+                        state.gpu_cache,
+                    );
+                }
+
+                *visibility_mask = PrimitiveVisibilityMask::empty();
+                let dirty_region_index = state.dirty_region.dirty_rects.len();
+
+                // If we run out of dirty regions, then force the last dirty region to
+                // be a union of any remaining regions. This is an inefficiency, in that
+                // we'll add items to batches later on that are redundant / outside this
+                // tile, but it's really rare except in pathological cases (even on a
+                // 4k screen, the typical dirty region count is < 16).
+                if dirty_region_index < PrimitiveVisibilityMask::MAX_DIRTY_REGIONS {
+                    visibility_mask.set_visible(dirty_region_index);
+
+                    state.dirty_region.push(
+                        self.world_rect,
+                        *visibility_mask,
+                    );
+                } else {
+                    visibility_mask.set_visible(PrimitiveVisibilityMask::MAX_DIRTY_REGIONS - 1);
+
+                    state.dirty_region.include_rect(
+                        PrimitiveVisibilityMask::MAX_DIRTY_REGIONS - 1,
+                        self.world_rect,
+                    );
+                }
             }
         }
 
+        // Store the current surface backing info for use during batching.
+        self.surface = Some(surface);
+
         true
     }
 }
 
 /// Defines a key that uniquely identifies a primitive instance.
 #[derive(Debug, Clone)]
 pub struct PrimitiveDescriptor {
     /// Uniquely identifies the content of the primitive template.
@@ -922,16 +983,36 @@ impl ::std::fmt::Display for RecordedDir
 }
 
 impl ::std::fmt::Debug for RecordedDirtyRegion {
     fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result {
         ::std::fmt::Display::fmt(self, f)
     }
 }
 
+/// Stores information about the calculated opaque backdrop of this slice.
+#[derive(Debug, Copy, Clone)]
+struct BackdropInfo {
+    /// The picture space rectangle that is known to be opaque. This is used
+    /// to determine where subpixel AA can be used, and where alpha blending
+    /// can be disabled.
+    rect: PictureRect,
+    /// Color of the backdrop.
+    color: ColorF,
+}
+
+impl BackdropInfo {
+    fn empty() -> Self {
+        BackdropInfo {
+            rect: PictureRect::zero(),
+            color: ColorF::BLACK,
+        }
+    }
+}
+
 /// Represents a cache of tiles that make up a picture primitives.
 pub struct TileCacheInstance {
     /// Index of the tile cache / slice for this frame builder. It's determined
     /// by the setup_picture_caching method during flattening, which splits the
     /// picture tree into multiple slices. It's used as a simple input to the tile
     /// keys. It does mean we invalidate tiles if a new layer gets inserted / removed
     /// between display lists - this seems very unlikely to occur on most pages, but
     /// can be revisited if we ever notice that.
@@ -965,20 +1046,18 @@ pub struct TileCacheInstance {
     /// Any clips outside this viewport can be ignored (and must be removed so that
     /// we can draw outside the bounds of the viewport).
     pub world_viewport_rect: WorldRect,
     /// The surface index that this tile cache will be drawn into.
     surface_index: SurfaceIndex,
     /// The background color from the renderer. If this is set opaque, we know it's
     /// fine to clear the tiles to this and allow subpixel text on the first slice.
     pub background_color: Option<ColorF>,
-    /// The picture space rectangle that is known to be opaque. This is used
-    /// to determine where subpixel AA can be used, and where alpha blending
-    /// can be disabled.
-    pub opaque_rect: PictureRect,
+    /// Information about the calculated backdrop content of this cache.
+    backdrop: BackdropInfo,
     /// The allowed subpixel mode for this surface, which depends on the detected
     /// opacity of the background.
     pub subpixel_mode: SubpixelMode,
     /// The current fractional offset of the cache transform root. If this changes,
     /// all tiles need to be invalidated and redrawn, since snapping differences are
     /// likely to occur.
     fract_offset: PictureVector2D,
 }
@@ -1004,17 +1083,17 @@ impl TileCacheInstance {
             tile_bounds_p0: TileOffset::zero(),
             tile_bounds_p1: TileOffset::zero(),
             local_rect: PictureRect::zero(),
             local_clip_rect: PictureRect::zero(),
             tiles_to_draw: Vec::new(),
             world_viewport_rect: WorldRect::zero(),
             surface_index: SurfaceIndex(0),
             background_color,
-            opaque_rect: PictureRect::zero(),
+            backdrop: BackdropInfo::empty(),
             subpixel_mode: SubpixelMode::Allow,
             fract_offset: PictureVector2D::zero(),
         }
     }
 
     /// Returns true if this tile cache is considered opaque.
     pub fn is_opaque(&self) -> bool {
         // If known opaque due to background clear color and being the first slice.
@@ -1059,17 +1138,17 @@ impl TileCacheInstance {
         frame_state: &mut FrameVisibilityState,
     ) -> WorldRect {
         let tile_width = TILE_SIZE_WIDTH;
         let tile_height = TILE_SIZE_HEIGHT;
         self.surface_index = surface_index;
 
         // Reset the opaque rect + subpixel mode, as they are calculated
         // during the prim dependency checks.
-        self.opaque_rect = PictureRect::zero();
+        self.backdrop = BackdropInfo::empty();
         self.subpixel_mode = SubpixelMode::Allow;
 
         self.map_local_to_surface = SpaceMapper::new(
             self.spatial_node_index,
             PictureRect::from_untyped(&pic_rect.to_untyped()),
         );
 
         let pic_to_world_mapper = SpaceMapper::new_with_target(
@@ -1390,34 +1469,39 @@ impl TileCacheInstance {
                     // Specifically, we currently require:
                     //  - No opacity binding (to avoid resolving the opacity here).
                     //  - Color.a >= 1.0 (the primitive is opaque).
                     //  - Same coord system as picture cache (ensures rects are axis-aligned).
                     //  - No clip masks exist.
 
                     let on_picture_surface = surface_index == self.surface_index;
 
-                    let prim_is_opaque = match data_stores.prim[data_handle].kind {
-                        PrimitiveTemplateKind::Rectangle { ref color, .. } => color.a >= 1.0,
+                    let color = match data_stores.prim[data_handle].kind {
+                        PrimitiveTemplateKind::Rectangle { color, .. } => color,
                         _ => unreachable!(),
                     };
 
+                    let prim_is_opaque = color.a >= 1.0;
+
                     let same_coord_system = {
                         let prim_spatial_node = &clip_scroll_tree
                             .spatial_nodes[prim_instance.spatial_node_index.0 as usize];
                         let surface_spatial_node = &clip_scroll_tree
                             .spatial_nodes[self.spatial_node_index.0 as usize];
 
                         prim_spatial_node.coordinate_system_id == surface_spatial_node.coordinate_system_id
                     };
 
                     if let Some(ref clip_chain) = prim_clip_chain {
                         if prim_is_opaque && same_coord_system && !clip_chain.needs_mask && on_picture_surface {
-                            if clip_chain.pic_clip_rect.contains_rect(&self.opaque_rect) {
-                                self.opaque_rect = clip_chain.pic_clip_rect;
+                            if clip_chain.pic_clip_rect.contains_rect(&self.backdrop.rect) {
+                                self.backdrop = BackdropInfo {
+                                    rect: clip_chain.pic_clip_rect,
+                                    color,
+                                };
                             }
                         }
                     };
                 } else {
                     let opacity_binding = &opacity_binding_store[opacity_binding_index];
                     for binding in &opacity_binding.bindings {
                         prim_info.opacity_bindings.push(OpacityBinding::from(*binding));
                     }
@@ -1466,17 +1550,17 @@ impl TileCacheInstance {
                     // This is conservative - it may still end up that a subpx requested
                     // text run doesn't get subpx for other reasons (e.g. glyph size).
                     let subpx_requested = match run_data.font.render_mode {
                         FontRenderMode::Subpixel => true,
                         FontRenderMode::Alpha | FontRenderMode::Mono => false,
                     };
 
                     if on_picture_surface && subpx_requested {
-                        if !self.opaque_rect.contains_rect(&prim_info.prim_clip_rect) {
+                        if !self.backdrop.rect.contains_rect(&prim_info.prim_clip_rect) {
                             self.subpixel_mode = SubpixelMode::Deny;
                         }
                     }
                 }
             }
             PrimitiveInstanceKind::LineDecoration { .. } |
             PrimitiveInstanceKind::Clear { .. } |
             PrimitiveInstanceKind::NormalBorder { .. } |
@@ -1516,17 +1600,17 @@ impl TileCacheInstance {
     ) {
         self.tiles_to_draw.clear();
         self.dirty_region.clear();
 
         let ctx = TilePostUpdateContext {
             debug_flags: frame_context.debug_flags,
             global_device_pixel_scale: frame_context.global_device_pixel_scale,
             global_screen_world_rect: frame_context.global_screen_world_rect,
-            opaque_rect: self.opaque_rect,
+            backdrop: self.backdrop,
             cache_spatial_node_index: self.spatial_node_index,
             clip_scroll_tree: frame_context.clip_scroll_tree,
         };
 
         let mut state = TilePostUpdateState {
             resource_cache,
             gpu_cache,
             scratch,
@@ -2708,55 +2792,58 @@ impl PicturePrimitive {
                             if tile.is_valid {
                                 // Register active image keys of valid tile.
                                 for image_key in tile.descriptor.image_keys.items() {
                                     frame_state.resource_cache.set_image_active(*image_key);
                                 }
                                 continue;
                             }
 
-                            let content_origin_f = tile.world_rect.origin * device_pixel_scale;
-                            let content_origin = content_origin_f.round();
-                            debug_assert!((content_origin_f.x - content_origin.x).abs() < 0.01);
-                            debug_assert!((content_origin_f.y - content_origin.y).abs() < 0.01);
-
-                            let cache_item = frame_state.resource_cache.texture_cache.get(&tile.handle);
-
-                            let task = RenderTask::new_picture(
-                                RenderTaskLocation::PictureCache {
-                                    texture: cache_item.texture_id,
-                                    layer: cache_item.texture_layer,
-                                    size: tile_size.to_i32(),
-                                },
-                                tile_size,
-                                pic_index,
-                                content_origin.to_i32(),
-                                UvRectKind::Rect,
-                                surface_spatial_node_index,
-                                device_pixel_scale,
-                                tile.visibility_mask,
-                            );
-
-                            let render_task_id = frame_state.render_tasks.add(task);
-
-                            frame_state.render_tasks.add_dependency(
-                                frame_state.surfaces[parent_surface_index.0].render_tasks.unwrap().port,
-                                render_task_id,
-                            );
-
-                            if first {
-                                // TODO(gw): Maybe we can restructure this code to avoid the
-                                //           first hack here. Or at least explain it with a follow up
-                                //           bug.
-                                frame_state.surfaces[raster_config.surface_index.0].render_tasks = Some(SurfaceRenderTasks {
-                                    root: render_task_id,
-                                    port: render_task_id,
-                                });
-
-                                first = false;
+                            let surface = tile.surface.as_ref().expect("no tile surface set!");
+                            if let TileSurface::Texture { ref handle, visibility_mask } = surface {
+                                let content_origin_f = tile.world_rect.origin * device_pixel_scale;
+                                let content_origin = content_origin_f.round();
+                                debug_assert!((content_origin_f.x - content_origin.x).abs() < 0.01);
+                                debug_assert!((content_origin_f.y - content_origin.y).abs() < 0.01);
+
+                                let cache_item = frame_state.resource_cache.texture_cache.get(handle);
+
+                                let task = RenderTask::new_picture(
+                                    RenderTaskLocation::PictureCache {
+                                        texture: cache_item.texture_id,
+                                        layer: cache_item.texture_layer,
+                                        size: tile_size.to_i32(),
+                                    },
+                                    tile_size,
+                                    pic_index,
+                                    content_origin.to_i32(),
+                                    UvRectKind::Rect,
+                                    surface_spatial_node_index,
+                                    device_pixel_scale,
+                                    *visibility_mask,
+                                );
+
+                                let render_task_id = frame_state.render_tasks.add(task);
+
+                                frame_state.render_tasks.add_dependency(
+                                    frame_state.surfaces[parent_surface_index.0].render_tasks.unwrap().port,
+                                    render_task_id,
+                                );
+
+                                if first {
+                                    // TODO(gw): Maybe we can restructure this code to avoid the
+                                    //           first hack here. Or at least explain it with a follow up
+                                    //           bug.
+                                    frame_state.surfaces[raster_config.surface_index.0].render_tasks = Some(SurfaceRenderTasks {
+                                        root: render_task_id,
+                                        port: render_task_id,
+                                    });
+
+                                    first = false;
+                                }
                             }
 
                             tile.is_valid = true;
                         }
 
                         None
                     }
                     PictureCompositeMode::MixBlend(..) |
--- a/gfx/wr/wrench/reftests/boxshadow/reftest.list
+++ b/gfx/wr/wrench/reftests/boxshadow/reftest.list
@@ -8,17 +8,17 @@ platform(linux,mac) == boxshadow-spread-
 fuzzy(1,396) == inset-large-offset.yaml inset-large-offset-ref.png
 platform(linux,mac) == inset-border-radius.yaml inset-border-radius.png
 platform(linux,mac) == inset-offset.yaml inset-offset.png
 platform(linux,mac) == inset-neg-offset.yaml inset-neg-offset.png
 == box-shadow-empty.yaml blank.yaml
 platform(linux,mac) == box-shadow-suite-no-blur.yaml box-shadow-suite-no-blur.png
 platform(linux,mac) == box-shadow-suite-blur.yaml box-shadow-suite-blur.png
 == box-shadow-large-blur-radius.yaml box-shadow-large-blur-radius-ref.yaml
-fuzzy(1,6388) == rounding.yaml rounding-ref.yaml
+skip_on(android,emulator) fuzzy(1,6388) == rounding.yaml rounding-ref.yaml
 platform(linux,mac) == box-shadow-border-radii.yaml box-shadow-border-radii.png
 skip_on(android) == box-shadow-spread.yaml box-shadow-spread.png  # Too wide for Android
 == box-shadow-spread-radii.yaml box-shadow-spread-radii-ref.yaml
 == invalid.yaml invalid-ref.yaml
 == inset-empty.yaml blank.yaml
 platform(linux,mac) == inset-subpx.yaml inset-subpx.png
 platform(linux,mac) fuzzy(1,4) == inset-downscale.yaml inset-downscale.png
 platform(linux,mac) fuzzy(1,50) == box-shadow-cache.yaml box-shadow-cache.png
--- a/gfx/wr/wrench/reftests/split/reftest.list
+++ b/gfx/wr/wrench/reftests/split/reftest.list
@@ -1,16 +1,16 @@
 skip_on(android,emulator) == simple.yaml simple-ref.yaml
 == order-1.yaml order-1-ref.yaml
 == order-2.yaml order-2-ref.yaml
-== nested.yaml nested-ref.yaml
+skip_on(android,emulator) == nested.yaml nested-ref.yaml
 # fuzziness is needed due to perspective for the edge
 fuzzy(35,200) == nested-coord-systems.yaml nested-coord-systems-ref.yaml
 == nested-preserve3d-crash.yaml nested-preserve3d-crash.yaml
-== perspective-clipping.yaml perspective-clipping-ref.yaml
+skip_on(android,emulator) == perspective-clipping.yaml perspective-clipping-ref.yaml
 == intermediate-1.yaml intermediate-1-ref.yaml
 == intermediate-2.yaml intermediate-1-ref.yaml
 == split-intersect1.yaml split-intersect1-ref.yaml
 == ordering.yaml ordering-ref.yaml
 skip_on(android,emulator) fuzzy(1,20) == near-plane.yaml near-plane.png  # Fails on Android emulator
 # Note: on windows the image is rendered at a slightly different spot.
 # similarly, a lot of tests in "transform" are non-windows. TODO: investigate
 platform(linux,mac) fuzzy(1,20) == same-plane.yaml same-plane.png