Bug 1579235 - Part 9 - Optimize compositor surface overlays. r=Bert
authorGlenn Watson <gw@intuitionlibrary.com>
Mon, 02 Mar 2020 21:50:25 +0000
changeset 516499 f6dc6b38288cfcb9f5f70932830d4a3d2b3759b2
parent 516498 77e15113810771d1e1b2829de2312e0f2a79bb72
child 516500 5024580528108cbff5b06dfefb7e7a44b9d26df1
push id37174
push userbtara@mozilla.com
push dateTue, 03 Mar 2020 03:53:06 +0000
treeherdermozilla-central@f080b12f030a [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersBert
bugs1579235
milestone75.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1579235 - Part 9 - Optimize compositor surface overlays. r=Bert This patch improves the performance of compositor surfaces in two ways: (1) Ignore primitives behind the first compositor surface when determining whether a tile needs to be moved to the overlay (alpha) pass. This means WR only moves a tile to the alpha pass when it has primitives that overlap with the compositor surface bounding rect, and are ordered after that compositor surface. In practice, this means most tiles are able to remain in the fast (opaque) path. Typically, a small number of tiles that contain overlay video controls are moved to the alpha pass. (2) Register the opaque compositor surfaces as potential occluders. This allows tiles that are completely covered by a compositor surface to be removed from the compositor visual tree, which helps both the simple and native compositor modes. Between them, these optimizations typically mean that when watching video in full-screen, nothing is composited except the video surface itself, and some small region(s) where video overlay controls are currently active. Differential Revision: https://phabricator.services.mozilla.com/D64909
gfx/wr/webrender/src/batch.rs
gfx/wr/webrender/src/composite.rs
gfx/wr/webrender/src/gpu_types.rs
gfx/wr/webrender/src/picture.rs
gfx/wr/webrender/src/prim_store/mod.rs
--- a/gfx/wr/webrender/src/batch.rs
+++ b/gfx/wr/webrender/src/batch.rs
@@ -1222,22 +1222,20 @@ impl BatchBuilder {
                                 // TODO(gw): As a follow up to the valid_rect work, see why we use
                                 //           prim_info.combined_local_clip_rect here instead of the
                                 //           local_clip_rect built in the TileCacheInstance. Perhaps
                                 //           these can be unified or are different for a good reason?
                                 let world_clip_rect = map_local_to_world
                                     .map(&prim_info.combined_local_clip_rect)
                                     .expect("bug: unable to map clip rect");
                                 let device_clip_rect = (world_clip_rect * ctx.global_device_pixel_scale).round();
-                                let z_id = composite_state.z_generator.next();
 
                                 composite_state.push_surface(
                                     tile_cache,
                                     device_clip_rect,
-                                    z_id,
                                     ctx.global_device_pixel_scale,
                                     ctx.resource_cache,
                                     gpu_cache,
                                     deferred_resolves,
                                 );
                             }
                             PictureCompositeMode::Filter(ref filter) => {
                                 assert!(filter.is_visible());
--- a/gfx/wr/webrender/src/composite.rs
+++ b/gfx/wr/webrender/src/composite.rs
@@ -84,23 +84,25 @@ pub struct CompositeTile {
     pub tile_id: Option<NativeTileId>,
 }
 
 /// Describes information about drawing a primitive as a compositor surface.
 /// For now, we support only YUV images as compositor surfaces, but in future
 /// this will also support RGBA images.
 pub struct ExternalSurfaceDescriptor {
     pub local_rect: PictureRect,
+    pub world_rect: WorldRect,
     pub device_rect: DeviceRect,
     pub clip_rect: DeviceRect,
     pub image_keys: [ImageKey; 3],
     pub image_rendering: ImageRendering,
     pub yuv_color_space: YuvColorSpace,
     pub yuv_format: YuvFormat,
     pub yuv_rescale: f32,
+    pub z_id: ZBufferId,
 }
 
 /// Information about a plane in a YUV surface.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct YuvPlaneDescriptor {
     pub texture: TextureSource,
     pub texture_layer: i32,
@@ -210,17 +212,17 @@ impl Default for CompositorKind {
         }
     }
 }
 
 /// Information about an opaque surface used to occlude tiles.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 struct Occluder {
-    slice: usize,
+    z_id: ZBufferId,
     device_rect: DeviceIntRect,
 }
 
 /// Describes the properties that identify a tile composition uniquely.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 #[derive(PartialEq, Clone)]
 pub struct CompositeSurfaceDescriptor {
@@ -314,32 +316,32 @@ impl CompositeState {
             external_surfaces: Vec::new(),
         }
     }
 
     /// Register an occluder during picture cache updates that can be
     /// used during frame building to occlude tiles.
     pub fn register_occluder(
         &mut self,
-        slice: usize,
+        z_id: ZBufferId,
         rect: WorldRect,
     ) {
         let device_rect = (rect * self.global_device_pixel_scale).round().to_i32();
 
         self.occluders.push(Occluder {
             device_rect,
-            slice,
+            z_id,
         });
     }
 
-    /// Returns true if a tile with the specified rectangle and slice
+    /// Returns true if a tile with the specified rectangle and z_id
     /// is occluded by an opaque surface in front of it.
     pub fn is_tile_occluded(
         &self,
-        slice: usize,
+        z_id: ZBufferId,
         device_rect: DeviceRect,
     ) -> bool {
         // It's often the case that a tile is only occluded by considering multiple
         // picture caches in front of it (for example, the background tiles are
         // often occluded by a combination of the content slice + the scrollbar slices).
 
         // The basic algorithm is:
         //    For every occluder:
@@ -349,29 +351,28 @@ impl CompositeState {
         //    If the cumulative area of those occluders is the same as the area of the query tile,
         //       Then the entire tile must be occluded and can be skipped during rasterization and compositing.
 
         // Get the reference area we will compare against.
         let device_rect = device_rect.round().to_i32();
         let ref_area = device_rect.size.width * device_rect.size.height;
 
         // Calculate the non-overlapping area of the valid occluders.
-        let cover_area = area_of_occluders(&self.occluders, slice, &device_rect);
+        let cover_area = area_of_occluders(&self.occluders, z_id, &device_rect);
         debug_assert!(cover_area <= ref_area);
 
         // Check if the tile area is completely covered
         ref_area == cover_area
     }
 
     /// Add a picture cache to be composited
     pub fn push_surface(
         &mut self,
         tile_cache: &TileCacheInstance,
         device_clip_rect: DeviceRect,
-        z_id: ZBufferId,
         global_device_pixel_scale: DevicePixelScale,
         resource_cache: &ResourceCache,
         gpu_cache: &mut GpuCache,
         deferred_resolves: &mut Vec<DeferredResolve>,
     ) {
         let mut visible_opaque_tile_count = 0;
         let mut visible_alpha_tile_count = 0;
 
@@ -412,17 +413,17 @@ impl CompositeState {
             }
 
             let tile = CompositeTile {
                 surface,
                 rect: device_rect,
                 valid_rect: tile.device_valid_rect.translate(-device_rect.origin.to_vector()),
                 dirty_rect: tile.device_dirty_rect.translate(-device_rect.origin.to_vector()),
                 clip_rect: device_clip_rect,
-                z_id,
+                z_id: tile.z_id,
                 tile_id,
             };
 
             self.push_tile(tile, is_opaque);
         }
 
         // For each compositor surface that was promoted, build the
         // information required for the compositor to draw it
@@ -474,24 +475,23 @@ impl CompositeState {
                 continue;
             }
 
             let clip_rect = external_surface
                 .clip_rect
                 .intersection(&device_clip_rect)
                 .unwrap_or_else(DeviceRect::zero);
 
-            // z_id for compositor surfaces can be the same as the surface it
-            // exists on, because we use LessEqual depth function. We could
-            // in future consider disabling z-read completely for drawing
-            // surface overlay tiles, since it doesn't do anything useful.
+            // Get a new z_id for each compositor surface, to ensure correct ordering
+            // when drawing with the simple (Draw) compositor.
+
             self.external_surfaces.push(ResolvedExternalSurface {
                 device_rect: external_surface.device_rect,
                 clip_rect,
-                z_id,
+                z_id: external_surface.z_id,
                 yuv_color_space: external_surface.yuv_color_space,
                 yuv_format: external_surface.yuv_format,
                 yuv_rescale: external_surface.yuv_rescale,
                 image_buffer_kind: get_buffer_kind(yuv_planes[0].texture),
                 yuv_planes,
             });
         }
 
@@ -684,17 +684,17 @@ pub trait Compositor {
     /// Enable/disable native compositor usage
     fn enable_native_compositor(&mut self, enable: bool);
 }
 
 /// Return the total area covered by a set of occluders, accounting for
 /// overlapping areas between those rectangles.
 fn area_of_occluders(
     occluders: &[Occluder],
-    slice: usize,
+    z_id: ZBufferId,
     clip_rect: &DeviceIntRect,
 ) -> i32 {
     // This implementation is based on the article https://leetcode.com/articles/rectangle-area-ii/.
     // This is not a particularly efficient implementation (it skips building segment trees), however
     // we typically use this where the length of the rectangles array is < 10, so simplicity is more important.
 
     let mut area = 0;
 
@@ -725,17 +725,17 @@ fn area_of_occluders(
             }
         }
     }
 
     // Step through each rectangle and build the y-axis event list
     let mut events = Vec::with_capacity(occluders.len() * 2);
     for occluder in occluders {
         // Only consider occluders in front of this rect
-        if occluder.slice > slice {
+        if occluder.z_id.0 > z_id.0 {
             // Clip the source rect to the rectangle we care about, since we only
             // want to record area for the tile we are comparing to.
             if let Some(rect) = occluder.device_rect.intersection(clip_rect) {
                 let x0 = rect.origin.x;
                 let x1 = x0 + rect.size.width;
                 events.push(Event::new(rect.origin.y, EventKind::Begin, x0, x1));
                 events.push(Event::new(rect.origin.y + rect.size.height, EventKind::End, x0, x1));
             }
--- a/gfx/wr/webrender/src/gpu_types.rs
+++ b/gfx/wr/webrender/src/gpu_types.rs
@@ -18,17 +18,17 @@ use crate::util::pack_as_float;
 // Contains type that must exactly match the same structures declared in GLSL.
 
 pub const VECS_PER_TRANSFORM: usize = 8;
 
 #[derive(Copy, Clone, Debug, PartialEq)]
 #[repr(C)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct ZBufferId(i32);
+pub struct ZBufferId(pub i32);
 
 // We get 24 bits of Z value - use up 22 bits of it to give us
 // 4 bits to account for GPU issues. This seems to manifest on
 // some GPUs under certain perspectives due to z interpolation
 // precision problems.
 const MAX_DOCUMENT_LAYERS : i8 = 1 << 3;
 const MAX_ITEMS_PER_DOCUMENT_LAYER : i32 = 1 << 19;
 const MAX_DOCUMENT_LAYER_VALUE : i8 = MAX_DOCUMENT_LAYERS / 2 - 1;
--- a/gfx/wr/webrender/src/picture.rs
+++ b/gfx/wr/webrender/src/picture.rs
@@ -109,17 +109,17 @@ use crate::debug_colors;
 use euclid::{vec3, Point2D, Scale, Size2D, Vector2D, Rect, Transform3D};
 use euclid::approxeq::ApproxEq;
 use crate::filterdata::SFilterData;
 use crate::frame_builder::{FrameVisibilityContext, FrameVisibilityState};
 use crate::intern::ItemUid;
 use crate::internal_types::{FastHashMap, FastHashSet, PlaneSplitter, Filter, PlaneSplitAnchor, TextureSource};
 use crate::frame_builder::{FrameBuildingContext, FrameBuildingState, PictureState, PictureContext};
 use crate::gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle};
-use crate::gpu_types::UvRectKind;
+use crate::gpu_types::{UvRectKind, ZBufferId};
 use plane_split::{Clipper, Polygon, Splitter};
 use crate::prim_store::{SpaceMapper, PrimitiveVisibilityMask, PointKey, PrimitiveTemplateKind};
 use crate::prim_store::{SpaceSnapper, PictureIndex, PrimitiveInstance, PrimitiveInstanceKind};
 use crate::prim_store::{get_raster_rects, PrimitiveScratchBuffer, RectangleKey};
 use crate::prim_store::{OpacityBindingStorage, ImageInstanceStorage, OpacityBindingIndex};
 use crate::prim_store::{ColorBindingStorage, ColorBindingIndex};
 use crate::print_tree::{PrintTree, PrintTreePrinter};
 use crate::render_backend::DataStores;
@@ -490,16 +490,26 @@ struct TilePostUpdateContext<'a> {
     /// Current size in device pixels of tiles for this cache
     current_tile_size: DeviceIntSize,
 
     /// The local rect of the overall picture cache
     local_rect: PictureRect,
 
     /// A list of the external surfaces that are present on this slice
     external_surfaces: &'a [ExternalSurfaceDescriptor],
+
+    /// Pre-allocated z-id to assign to opaque tiles during post_update. We
+    /// use a different z-id for opaque/alpha tiles, so that compositor
+    /// surfaces (such as videos) can have a z-id between these values,
+    /// which allows compositor surfaces to occlude opaque tiles, but not
+    /// alpha tiles.
+    z_id_opaque: ZBufferId,
+
+    /// Pre-allocated z-id to assign to alpha tiles during post_update
+    z_id_alpha: ZBufferId,
 }
 
 // Mutable state passed to picture cache tiles during post_update
 struct TilePostUpdateState<'a> {
     /// Allow access to the texture cache for requesting tiles
     resource_cache: &'a mut ResourceCache,
 
     /// Current configuration and setup for compositing all the picture cache tiles in renderer.
@@ -851,16 +861,24 @@ pub struct Tile {
     /// Root node of the quadtree dirty rect tracker.
     root: TileNode,
     /// The last rendered background color on this tile.
     background_color: Option<ColorF>,
     /// The first reason the tile was invalidated this frame.
     invalidation_reason: Option<InvalidationReason>,
     /// If true, this tile has one or more compositor surfaces affecting it.
     pub has_compositor_surface: bool,
+    /// The local space valid rect for any primitives found prior to the first compositor
+    /// surface that affects this tile.
+    bg_local_valid_rect: PictureRect,
+    /// The local space valid rect for any primitives found after the first compositor
+    /// surface that affects this tile.
+    fg_local_valid_rect: PictureRect,
+    /// z-buffer id for this tile, which is one of z_id_opaque or z_id_alpha, depending on tile opacity
+    pub z_id: ZBufferId,
 }
 
 impl Tile {
     /// Construct a new, invalid tile.
     fn new(tile_offset: TileOffset) -> Self {
         let id = TileId(NEXT_TILE_ID.fetch_add(1, Ordering::Relaxed));
 
         Tile {
@@ -877,16 +895,19 @@ impl Tile {
             is_visible: false,
             fract_offset: PictureVector2D::zero(),
             id,
             is_opaque: false,
             root: TileNode::new_leaf(Vec::new()),
             background_color: None,
             invalidation_reason: None,
             has_compositor_surface: false,
+            bg_local_valid_rect: PictureRect::zero(),
+            fg_local_valid_rect: PictureRect::zero(),
+            z_id: ZBufferId::invalid(),
         }
     }
 
     /// Print debug information about this tile to a tree printer.
     fn print(&self, pt: &mut dyn PrintTreePrinter) {
         pt.new_level(format!("Tile {:?}", self.id));
         pt.add_item(format!("local_tile_rect: {}", self.local_tile_rect));
         pt.add_item(format!("fract_offset: {:?}", self.fract_offset));
@@ -994,16 +1015,18 @@ impl Tile {
         // the snapping will be consistent.
         self.local_tile_rect = PictureRect::new(
             PicturePoint::new(
                 self.tile_offset.x as f32 * ctx.tile_size.width + ctx.fract_offset.x,
                 self.tile_offset.y as f32 * ctx.tile_size.height + ctx.fract_offset.y,
             ),
             ctx.tile_size,
         );
+        self.bg_local_valid_rect = PictureRect::zero();
+        self.fg_local_valid_rect = PictureRect::zero();
         self.invalidation_reason  = None;
         self.has_compositor_surface = false;
 
         self.world_tile_rect = ctx.pic_to_world_mapper
             .map(&self.local_tile_rect)
             .expect("bug: map local tile rect");
 
         // Check if this tile is currently on screen.
@@ -1058,18 +1081,27 @@ impl Tile {
         // If this primitive is a compositor surface, any tile it affects must be
         // drawn as an overlay tile.
         if info.is_compositor_surface {
             self.has_compositor_surface = true;
         } else {
             // Incorporate the bounding rect of the primitive in the local valid rect
             // for this tile. This is used to minimize the size of the scissor rect
             // during rasterization and the draw rect during composition of partial tiles.
-            self.current_descriptor.local_valid_rect =
-                self.current_descriptor.local_valid_rect.union(&info.prim_clip_rect);
+
+            // Once we have encountered 1+ compositor surfaces affecting this tile, include
+            // this bounding rect in the foreground. Otherwise, include in the background rect.
+            // This allows us to determine if we found any primitives that are on top of the
+            // compositor surface(s) for this tile. If so, we need to draw the tile with alpha
+            // blending as an overlay.
+            if self.has_compositor_surface {
+                self.fg_local_valid_rect = self.fg_local_valid_rect.union(&info.prim_clip_rect);
+            } else {
+                self.bg_local_valid_rect = self.bg_local_valid_rect.union(&info.prim_clip_rect);
+            }
         }
 
         // Include any image keys this tile depends on.
         self.current_descriptor.images.extend_from_slice(&info.images);
 
         // Include any opacity bindings this primitive depends on.
         self.current_descriptor.opacity_bindings.extend_from_slice(&info.opacity_bindings);
 
@@ -1154,16 +1186,21 @@ impl Tile {
     ) -> bool {
         // If tile is not visible, just early out from here - we don't update dependencies
         // so don't want to invalidate, merge, split etc. The tile won't need to be drawn
         // (and thus updated / invalidated) until it is on screen again.
         if !self.is_visible {
             return false;
         }
 
+        // Calculate the overall valid rect for this tile, including both the foreground
+        // and background local valid rects.
+        self.current_descriptor.local_valid_rect =
+            self.bg_local_valid_rect.union(&self.fg_local_valid_rect);
+
         // TODO(gw): In theory, the local tile rect should always have an
         //           intersection with the overall picture rect. In practice,
         //           due to some accuracy issues with how fract_offset (and
         //           fp accuracy) are used in the calling method, this isn't
         //           always true. In this case, it's safe to set the local
         //           valid rect to zero, which means it will be clipped out
         //           and not affect the scene. In future, we should fix the
         //           accuracy issue above, so that this assumption holds, but
@@ -1211,29 +1248,39 @@ impl Tile {
         // after all early out checks have been performed. Otherwise, we might miss updating
         // the native surface next time this tile becomes visible.
         let clipped_rect = self.current_descriptor.local_valid_rect
             .intersection(&ctx.local_clip_rect)
             .unwrap_or_else(PictureRect::zero);
         let mut is_opaque = ctx.backdrop.rect.contains_rect(&clipped_rect);
 
         if self.has_compositor_surface {
-            // TODO(gw): This will almost always select over blend, due to the
-            //           background rectangle. In future, we can optimize this
-            //           case to only check items that come _after_ the compositor
-            //           surface z_id? A better option might be to tweak the z_id
-            //           values so that the alpha pixels get z-rejected?
+            // If we found primitive(s) that are ordered _after_ the first compositor
+            // surface, _and_ intersect with any compositor surface, then we will need
+            // to draw this tile with alpha blending, as an overlay to the compositor surface.
+            let fg_world_valid_rect = ctx.pic_to_world_mapper
+                .map(&self.fg_local_valid_rect)
+                .expect("bug: map fg local valid rect");
+            let fg_device_valid_rect = fg_world_valid_rect * ctx.global_device_pixel_scale;
+
             for surface in ctx.external_surfaces {
-                if surface.device_rect.intersects(&self.device_valid_rect) {
+                if surface.device_rect.intersects(&fg_device_valid_rect) {
                     is_opaque = false;
                     break;
                 }
             }
         }
 
+        // Set the correct z_id for this tile based on opacity
+        if is_opaque {
+            self.z_id = ctx.z_id_opaque;
+        } else {
+            self.z_id = ctx.z_id_alpha;
+        }
+
         if is_opaque != self.is_opaque {
             // If opacity changed, the native compositor surface and all tiles get invalidated.
             // (this does nothing if not using native compositor mode).
             // TODO(gw): This property probably changes very rarely, so it is OK to invalidate
             //           everything in this case. If it turns out that this isn't true, we could
             //           consider other options, such as per-tile opacity (natively supported
             //           on CoreAnimation, and supported if backed by non-virtual surfaces in
             //           DirectComposition).
@@ -2183,16 +2230,18 @@ pub struct TileCacheInstance {
     /// offset of the surface when building the visual tree.
     pub device_position: DevicePoint,
     /// The currently considered tile size override. Used to check if we should
     /// re-evaluate tile size, even if the frame timer hasn't expired.
     tile_size_override: Option<DeviceIntSize>,
     /// List of external surfaces that have been promoted from primitives
     /// in this tile cache.
     pub external_surfaces: Vec<ExternalSurfaceDescriptor>,
+    /// z-buffer ID assigned to opaque tiles in this slice
+    pub z_id_opaque: ZBufferId,
 }
 
 impl TileCacheInstance {
     pub fn new(
         slice: usize,
         slice_flags: SliceFlags,
         spatial_node_index: SpatialNodeIndex,
         background_color: Option<ColorF>,
@@ -2236,16 +2285,17 @@ impl TileCacheInstance {
             current_tile_size: DeviceIntSize::zero(),
             frames_until_size_eval: 0,
             fract_offset: PictureVector2D::zero(),
             compare_cache: FastHashMap::default(),
             native_surface: None,
             device_position: DevicePoint::zero(),
             tile_size_override: None,
             external_surfaces: Vec::new(),
+            z_id_opaque: ZBufferId::invalid(),
         }
     }
 
     /// Returns true if this tile cache is considered opaque.
     pub fn is_opaque(&self) -> bool {
         // If known opaque due to background clear color and being the first slice.
         // The background_color will only be Some(..) if this is the first slice.
         match self.background_color {
@@ -2287,16 +2337,21 @@ impl TileCacheInstance {
         frame_context: &FrameVisibilityContext,
         frame_state: &mut FrameVisibilityState,
     ) -> WorldRect {
         self.external_surfaces.clear();
         self.surface_index = surface_index;
         self.local_rect = pic_rect;
         self.local_clip_rect = PictureRect::max_rect();
 
+        // Opaque surfaces get the first z_id. Compositor surfaces then get
+        // allocated a z_id each. After all compositor surfaces are added,
+        // then we allocate a z_id for alpha tiles.
+        self.z_id_opaque = frame_state.composite_state.z_generator.next();
+
         // Reset the opaque rect + subpixel mode, as they are calculated
         // during the prim dependency checks.
         self.backdrop = BackdropInfo::empty();
         self.subpixel_mode = SubpixelMode::Allow;
 
         self.map_local_to_surface = SpaceMapper::new(
             self.spatial_node_index,
             PictureRect::from_untyped(&pic_rect.to_untyped()),
@@ -2666,17 +2721,17 @@ impl TileCacheInstance {
         data_stores: &DataStores,
         clip_store: &ClipStore,
         pictures: &[PicturePrimitive],
         resource_cache: &ResourceCache,
         opacity_binding_store: &OpacityBindingStorage,
         color_bindings: &ColorBindingStorage,
         image_instances: &ImageInstanceStorage,
         surface_stack: &[SurfaceIndex],
-        composite_state: &CompositeState,
+        composite_state: &mut CompositeState,
     ) -> bool {
         // This primitive exists on the last element on the current surface stack.
         let prim_surface_index = *surface_stack.last().unwrap();
 
         // If the primitive is completely clipped out by the clip chain, there
         // is no need to add it to any primitive dependencies.
         let prim_clip_chain = match prim_clip_chain {
             Some(prim_clip_chain) => prim_clip_chain,
@@ -2940,25 +2995,28 @@ impl TileCacheInstance {
                     let is_visible = world_clip_rect.intersects(&frame_context.global_screen_world_rect);
                     if is_visible {
                         // TODO(gw): Is there any case where if the primitive ends up on a fractional
                         //           boundary we want to _skip_ promoting to a compositor surface and
                         //           draw it as part of the content?
                         let device_rect = (world_rect * frame_context.global_device_pixel_scale).round();
                         let clip_rect = (world_clip_rect * frame_context.global_device_pixel_scale).round();
 
+                        // Each compositor surface allocates a unique z-id
                         self.external_surfaces.push(ExternalSurfaceDescriptor {
                             local_rect: prim_info.prim_clip_rect,
+                            world_rect,
                             image_keys: prim_data.kind.yuv_key,
                             image_rendering: prim_data.kind.image_rendering,
                             device_rect,
                             clip_rect,
                             yuv_color_space: prim_data.kind.color_space,
                             yuv_format: prim_data.kind.format,
                             yuv_rescale: prim_data.kind.color_depth.rescaling_factor(),
+                            z_id: composite_state.z_generator.next(),
                         });
                     }
                 } else {
                     prim_info.images.extend(
                         prim_data.kind.yuv_key.iter().map(|key| {
                             ImageDependency {
                                 key: *key,
                                 generation: resource_cache.get_image_generation(*key),
@@ -3155,23 +3213,36 @@ impl TileCacheInstance {
                     frame_context.global_screen_world_rect,
                     frame_context.spatial_tree,
                 );
 
                 let world_backdrop_rect = map_pic_to_world
                     .map(&backdrop_rect)
                     .expect("bug: unable to map backdrop to world space");
 
+                // Since we register the entire backdrop rect, use the opaque z-id for the
+                // picture cache slice.
                 frame_state.composite_state.register_occluder(
-                    self.slice,
+                    self.z_id_opaque,
                     world_backdrop_rect,
                 );
             }
         }
 
+        // Register any external compositor surfaces as potential occluders. This
+        // is especially useful when viewing video in full-screen mode, as it is
+        // able to occlude every background tile (avoiding allocation, rasterizion
+        // and compositing).
+        for external_surface in &self.external_surfaces {
+            frame_state.composite_state.register_occluder(
+                external_surface.z_id,
+                external_surface.world_rect,
+            );
+        }
+
         // Detect if the picture cache was scrolled or scaled. In this case,
         // the device space dirty rects aren't applicable (until we properly
         // integrate with OS compositors that can handle scrolling slices).
         let root_transform = frame_context
             .spatial_tree
             .get_relative_transform(
                 self.spatial_node_index,
                 ROOT_SPATIAL_NODE_INDEX,
@@ -3219,27 +3290,32 @@ impl TileCacheInstance {
 
         let pic_to_world_mapper = SpaceMapper::new_with_target(
             ROOT_SPATIAL_NODE_INDEX,
             self.spatial_node_index,
             frame_context.global_screen_world_rect,
             frame_context.spatial_tree,
         );
 
+        // All compositor surfaces have allocated a z_id, so reserve a z_id for alpha tiles.
+        let z_id_alpha = frame_state.composite_state.z_generator.next();
+
         let ctx = TilePostUpdateContext {
             pic_to_world_mapper,
             global_device_pixel_scale: frame_context.global_device_pixel_scale,
             local_clip_rect: self.local_clip_rect,
             backdrop: self.backdrop,
             spatial_nodes: &self.spatial_nodes,
             opacity_bindings: &self.opacity_bindings,
             color_bindings: &self.color_bindings,
             current_tile_size: self.current_tile_size,
             local_rect: self.local_rect,
             external_surfaces: &self.external_surfaces,
+            z_id_opaque: self.z_id_opaque,
+            z_id_alpha,
         };
 
         let mut state = TilePostUpdateState {
             resource_cache: frame_state.resource_cache,
             composite_state: frame_state.composite_state,
             compare_cache: &mut self.compare_cache,
         };
 
@@ -4533,17 +4609,17 @@ impl PicturePrimitive {
                                     continue;
                                 }
                             };
 
                             // If that draw rect is occluded by some set of tiles in front of it,
                             // then mark it as not visible and skip drawing. When it's not occluded
                             // it will fail this test, and get rasterized by the render task setup
                             // code below.
-                            if frame_state.composite_state.is_tile_occluded(tile_cache.slice, device_draw_rect) {
+                            if frame_state.composite_state.is_tile_occluded(tile.z_id, device_draw_rect) {
                                 // If this tile has an allocated native surface, free it, since it's completely
                                 // occluded. We will need to re-allocate this surface if it becomes visible,
                                 // but that's likely to be rare (e.g. when there is no content display list
                                 // for a frame or two during a tab switch).
                                 let surface = tile.surface.as_mut().expect("no tile surface set!");
 
                                 if let TileSurface::Texture { descriptor: SurfaceTextureDescriptor::Native { id, .. }, .. } = surface {
                                     if let Some(id) = id.take() {
--- a/gfx/wr/webrender/src/prim_store/mod.rs
+++ b/gfx/wr/webrender/src/prim_store/mod.rs
@@ -2169,17 +2169,17 @@ impl PrimitiveStore {
                             frame_state.data_stores,
                             frame_state.clip_store,
                             &self.pictures,
                             frame_state.resource_cache,
                             &self.opacity_bindings,
                             &self.color_bindings,
                             &self.images,
                             &frame_state.surface_stack,
-                            &frame_state.composite_state,
+                            &mut frame_state.composite_state,
                         ) {
                             prim_instance.visibility_info = PrimitiveVisibilityIndex::INVALID;
                             // Ensure the primitive clip is popped - perhaps we can use
                             // some kind of scope to do this automatically in future.
                             frame_state.clip_chain_stack.pop_clip();
                             continue;
                         }
                     }