Bug 1527498 - Separate picture texture cache r=gw
authorDzmitry Malyshau <dmalyshau@mozilla.com>
Thu, 14 Feb 2019 14:20:15 +0000
changeset 459159 f2b5a9d987e8ea8a38bebd32f68d5cb04247580f
parent 459158 b30211be4a04f8fbaace77828a0d9d1e80df2df4
child 459160 085bd7c16f996a385a15984170e8f75af51f58f6
push id35556
push userdvarga@mozilla.com
push dateFri, 15 Feb 2019 01:38:24 +0000
treeherdermozilla-central@b29c87add05f [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersgw
bugs1527498
milestone67.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1527498 - Separate picture texture cache r=gw Manage the texture space for picture tiles separately inside the texture cache. Differential Revision: https://phabricator.services.mozilla.com/D19708
gfx/wr/webrender/src/gpu_cache.rs
gfx/wr/webrender/src/picture.rs
gfx/wr/webrender/src/profiler.rs
gfx/wr/webrender/src/renderer.rs
gfx/wr/webrender/src/resource_cache.rs
gfx/wr/webrender/src/texture_cache.rs
--- a/gfx/wr/webrender/src/gpu_cache.rs
+++ b/gfx/wr/webrender/src/gpu_cache.rs
@@ -19,17 +19,19 @@
 //! data is not in the cache, the user provided closure
 //! will be invoked to build the data.
 //!
 //! After ```end_frame``` has occurred, callers can
 //! use the ```get_address``` API to get the allocated
 //! address in the GPU cache of a given resource slot
 //! for this frame.
 
-use api::{DebugFlags, DocumentId, PremultipliedColorF, IdNamespace, TexelRect};
+use api::{DebugFlags, DocumentId, PremultipliedColorF, TexelRect};
+#[cfg(test)]
+use api::IdNamespace;
 use euclid::{HomogeneousVector, TypedRect};
 use internal_types::{FastHashMap};
 use profiler::GpuCacheProfileCounters;
 use render_backend::{FrameStamp, FrameId};
 use renderer::MAX_VERTEX_TEXTURE_WIDTH;
 use std::{mem, u16, u32};
 use std::num::NonZeroU32;
 use std::ops::Add;
@@ -703,17 +705,17 @@ impl GpuCache {
             debug_flags,
             pending_clear: false,
         }
     }
 
     /// Creates a GpuCache and sets it up with a valid `FrameStamp`, which
     /// is useful for avoiding panics when instantiating the `GpuCache`
     /// directly from unit test code.
-    #[allow(dead_code)]
+    #[cfg(test)]
     pub fn new_for_testing() -> Self {
         let mut cache = Self::new();
         let mut now = FrameStamp::first(DocumentId(IdNamespace(1), 1));
         now.advance();
         cache.begin_frame(now);
         cache
     }
 
--- a/gfx/wr/webrender/src/picture.rs
+++ b/gfx/wr/webrender/src/picture.rs
@@ -1,23 +1,22 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{FilterOp, MixBlendMode, PipelineId, PremultipliedColorF, PictureRect, PicturePoint, WorldPoint};
 use api::{DeviceIntRect, DeviceIntSize, DevicePoint, DeviceRect};
 use api::{LayoutRect, PictureToRasterTransform, LayoutPixel, PropertyBinding, PropertyBindingId};
-use api::{DevicePixelScale, RasterRect, RasterSpace, ColorF, ImageKey, DirtyRect, WorldSize, ClipMode, LayoutSize};
-use api::{PicturePixel, RasterPixel, WorldPixel, WorldRect, ImageFormat, ImageDescriptor, WorldVector2D, LayoutPoint};
+use api::{DevicePixelScale, RasterRect, RasterSpace, ColorF, ImageKey, WorldSize, ClipMode, LayoutSize};
+use api::{PicturePixel, RasterPixel, WorldPixel, WorldRect, WorldVector2D, LayoutPoint};
 use api::{DebugFlags, DeviceHomogeneousVector, DeviceVector2D};
 use box_shadow::{BLUR_SAMPLE_SCALE};
 use clip::{ClipChainId, ClipChainNode, ClipItem, ClipStore, ClipDataStore, ClipChainStack};
 use clip_scroll_tree::{ROOT_SPATIAL_NODE_INDEX, ClipScrollTree, SpatialNodeIndex, CoordinateSystemId, VisibleFace};
 use debug_colors;
-use device::TextureFilter;
 use euclid::{size2, vec3, TypedPoint2D, TypedScale, TypedSize2D};
 use euclid::approxeq::ApproxEq;
 use frame_builder::{FrameVisibilityContext, FrameVisibilityState};
 use intern::ItemUid;
 use internal_types::{FastHashMap, FastHashSet, PlaneSplitter};
 use frame_builder::{FrameBuildingContext, FrameBuildingState, PictureState, PictureContext};
 use gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle};
 use gpu_types::{TransformPalette, UvRectKind};
@@ -30,17 +29,17 @@ use render_backend::DataStores;
 use render_task::{ClearMode, RenderTask, RenderTaskCacheEntryHandle, TileBlit};
 use render_task::{RenderTaskId, RenderTaskLocation};
 use resource_cache::ResourceCache;
 use scene::{FilterOpHelpers, SceneProperties};
 use scene_builder::Interners;
 use smallvec::SmallVec;
 use std::{mem, u16};
 use std::sync::atomic::{AtomicUsize, Ordering};
-use texture_cache::{Eviction, TextureCacheHandle};
+use texture_cache::TextureCacheHandle;
 use tiling::RenderTargetKind;
 use util::{ComparableVec, TransformedRectKind, MatrixHelpers, MaxRect};
 
 /*
  A picture represents a dynamically rendered image. It consists of:
 
  * A number of primitives that are drawn onto the picture.
  * A composite operation describing how to composite this
@@ -96,19 +95,20 @@ pub struct TileIndex(pub usize);
 
 /// The size in device pixels of a cached tile. The currently chosen
 /// size is arbitrary. We should do some profiling to find the best
 /// size for real world pages.
 ///
 /// Note that we use a separate, smaller size during wrench testing, so that
 /// we get tighter dirty rects and can do more meaningful invalidation
 /// tests.
-pub const TILE_SIZE_WIDTH: i32 = 1024;
-pub const TILE_SIZE_HEIGHT: i32 = 256;
-pub const TILE_SIZE_TESTING: i32 = 64;
+const TILE_SIZE_WIDTH: i32 = 1024;
+const TILE_SIZE_HEIGHT: i32 = 256;
+const TILE_SIZE_TESTING: i32 = 64;
+
 pub const FRAMES_BEFORE_PICTURE_CACHING: usize = 2;
 const MAX_DIRTY_RECTS: usize = 3;
 
 /// The maximum size per axis of a surface,
 ///  in WorldPixel coordinates.
 const MAX_SURFACE_SIZE: f32 = 4096.0;
 
 
@@ -778,17 +778,17 @@ impl TileCache {
             //           there may be some pre-cached tiles still existing.
             //           They will expire from the texture cache as normal,
             //           but we should check this path a bit more carefully
             //           to see if any other memory should be freed.
             return;
         }
 
         let DeviceIntSize { width: tile_width, height: tile_height, _unit: _ } =
-            self.tile_dimensions(frame_context.config.testing);
+            Self::tile_dimensions(frame_context.config.testing);
 
         // Work out the scroll offset to apply to the world reference point.
         let scroll_offset_point = frame_context.clip_scroll_tree
             .get_relative_transform(
                 self.spatial_node_index,
                 ROOT_SPATIAL_NODE_INDEX,
             )
             .expect("bug: unable to get scroll transform")
@@ -1386,25 +1386,16 @@ impl TileCache {
         self.dirty_region.clear();
         self.pending_blits.clear();
 
         // If the tile cache is disabled, just return a no-op local clip rect.
         if !self.is_enabled {
             return LayoutRect::max_rect();
         }
 
-        let dim = self.tile_dimensions(frame_context.config.testing);
-        let descriptor = ImageDescriptor::new(
-            dim.width,
-            dim.height,
-            ImageFormat::BGRA8,
-            true,
-            false,
-        );
-
         // Skip all tiles if completely off-screen.
         if !self.world_bounding_rect.intersects(&frame_context.screen_world_rect) {
             return LayoutRect::zero();
         }
 
         let map_surface_to_world: SpaceMapper<LayoutPixel, WorldPixel> = SpaceMapper::new_with_target(
             ROOT_SPATIAL_NODE_INDEX,
             self.spatial_node_index,
@@ -1542,27 +1533,19 @@ impl TileCache {
                 }
 
                 // Only cache tiles that have had the same content for at least two
                 // frames. This skips caching on pages / benchmarks that are changing
                 // every frame, which is wasteful.
                 if tile.same_frames >= FRAMES_BEFORE_PICTURE_CACHING {
                     // Ensure that this texture is allocated.
                     if !resource_cache.texture_cache.is_allocated(&tile.handle) {
-                        resource_cache.texture_cache.update(
+                        resource_cache.texture_cache.update_picture_cache(
                             &mut tile.handle,
-                            descriptor,
-                            TextureFilter::Linear,
-                            None,
-                            [0.0; 3],
-                            DirtyRect::All,
                             gpu_cache,
-                            None,
-                            UvRectKind::Rect,
-                            Eviction::Eager,
                         );
                     }
 
                     let cache_item = resource_cache
                         .get_texture_cache_item(&tile.handle);
 
                     let src_origin = (visible_rect.origin * frame_context.device_pixel_scale).round().to_i32();
                     let valid_rect = visible_rect.translate(&-tile.world_rect.origin.to_vector());
@@ -1613,17 +1596,17 @@ impl TileCache {
         // on this by supporting batching per dirty region.
         if self.dirty_region.dirty_rects.len() > MAX_DIRTY_RECTS {
             self.dirty_region.collapse();
         }
 
         local_clip_rect
     }
 
-    fn tile_dimensions(&self, testing: bool) -> DeviceIntSize {
+    pub fn tile_dimensions(testing: bool) -> DeviceIntSize {
         if testing {
             size2(TILE_SIZE_TESTING, TILE_SIZE_TESTING)
         } else {
             size2(TILE_SIZE_WIDTH, TILE_SIZE_HEIGHT)
         }
     }
 }
 
--- a/gfx/wr/webrender/src/profiler.rs
+++ b/gfx/wr/webrender/src/profiler.rs
@@ -386,25 +386,27 @@ impl FrameProfileCounters {
 }
 
 #[derive(Clone)]
 pub struct TextureCacheProfileCounters {
     pub pages_a8_linear: ResourceProfileCounter,
     pub pages_a16_linear: ResourceProfileCounter,
     pub pages_rgba8_linear: ResourceProfileCounter,
     pub pages_rgba8_nearest: ResourceProfileCounter,
+    pub pages_picture: ResourceProfileCounter,
 }
 
 impl TextureCacheProfileCounters {
     pub fn new() -> Self {
         TextureCacheProfileCounters {
             pages_a8_linear: ResourceProfileCounter::new("Texture A8 cached pages"),
             pages_a16_linear: ResourceProfileCounter::new("Texture A16 cached pages"),
             pages_rgba8_linear: ResourceProfileCounter::new("Texture RGBA8 cached pages (L)"),
             pages_rgba8_nearest: ResourceProfileCounter::new("Texture RGBA8 cached pages (N)"),
+            pages_picture: ResourceProfileCounter::new("Picture cached pages"),
         }
     }
 }
 
 #[derive(Clone)]
 pub struct GpuCacheProfileCounters {
     pub allocated_rows: IntProfileCounter,
     pub allocated_blocks: IntProfileCounter,
--- a/gfx/wr/webrender/src/renderer.rs
+++ b/gfx/wr/webrender/src/renderer.rs
@@ -53,17 +53,17 @@ use gpu_cache::{GpuCacheDebugChunk, GpuC
 #[cfg(feature = "pathfinder")]
 use gpu_glyph_renderer::GpuGlyphRenderer;
 use gpu_types::ScalingInstance;
 use internal_types::{TextureSource, ORTHO_FAR_PLANE, ORTHO_NEAR_PLANE, ResourceCacheError};
 use internal_types::{CacheTextureId, DebugOutput, FastHashMap, LayerIndex, RenderedDocument, ResultMsg};
 use internal_types::{TextureCacheAllocationKind, TextureCacheUpdate, TextureUpdateList, TextureUpdateSource};
 use internal_types::{RenderTargetInfo, SavedTargetIndex};
 use malloc_size_of::MallocSizeOfOps;
-use picture::RecordedDirtyRegion;
+use picture::{RecordedDirtyRegion, TileCache};
 use prim_store::DeferredResolve;
 use profiler::{BackendProfileCounters, FrameProfileCounters, TimeProfileCounter,
                GpuProfileTag, RendererProfileCounters, RendererProfileTimers};
 use profiler::{Profiler, ChangeIndicator};
 use device::query::GpuProfiler;
 use rayon::{ThreadPool, ThreadPoolBuilder};
 use record::ApiRecordingReceiver;
 use render_backend::{FrameId, RenderBackend};
@@ -1926,16 +1926,17 @@ impl Renderer {
             register_thread_with_profiler(rb_thread_name.clone());
             if let Some(ref thread_listener) = *thread_listener_for_render_backend {
                 thread_listener.thread_started(&rb_thread_name);
             }
 
             let texture_cache = TextureCache::new(
                 max_texture_size,
                 max_texture_layers,
+                TileCache::tile_dimensions(config.testing),
             );
 
             let resource_cache = ResourceCache::new(
                 texture_cache,
                 glyph_rasterizer,
                 blob_image_handler,
             );
 
@@ -2845,17 +2846,16 @@ impl Renderer {
                                     .insert(TextureFlags::IS_SHARED_TEXTURE_CACHE);
 
                                 // Textures in the cache generally don't need to be cleared,
                                 // but we do so if the debug display is active to make it
                                 // easier to identify unallocated regions.
                                 if self.debug_flags.contains(DebugFlags::TEXTURE_CACHE_DBG) {
                                     self.clear_texture(&texture, TEXTURE_CACHE_DBG_CLEAR_COLOR);
                                 }
-
                             }
 
                             let old = self.texture_resolver.texture_cache_map.insert(allocation.id, texture);
                             assert_eq!(old.is_some(), is_realloc, "Renderer and RenderBackend disagree");
                             if let Some(old) = old {
                                 self.device.blit_renderable_texture(
                                     self.texture_resolver.texture_cache_map.get_mut(&allocation.id).unwrap(),
                                     &old
--- a/gfx/wr/webrender/src/resource_cache.rs
+++ b/gfx/wr/webrender/src/resource_cache.rs
@@ -1787,17 +1787,17 @@ impl ResourceCache {
         }
         if what.contains(ClearCache::GLYPH_DIMENSIONS) {
             self.cached_glyph_dimensions.clear();
         }
         if what.contains(ClearCache::RENDER_TASKS) {
             self.cached_render_tasks.clear();
         }
         if what.contains(ClearCache::TEXTURE_CACHE) {
-            self.texture_cache.clear();
+            self.texture_cache.clear_all();
         }
         if what.contains(ClearCache::RASTERIZED_BLOBS) {
             self.rasterized_blob_images.clear();
         }
     }
 
     pub fn clear_namespace(&mut self, namespace: IdNamespace) {
         self.clear_images(|k| k.0 == namespace);
@@ -2194,16 +2194,17 @@ impl ResourceCache {
                 self.current_frame_id = FrameId::INVALID;
                 self.cached_glyphs.clear();
                 self.cached_glyph_dimensions.clear();
                 self.cached_images.clear();
                 self.cached_render_tasks.clear();
                 self.texture_cache = TextureCache::new(
                     self.texture_cache.max_texture_size(),
                     self.texture_cache.max_texture_layers(),
+                    self.texture_cache.picture_tile_size(),
                 );
             }
         }
 
         self.glyph_rasterizer.reset();
         let res = &mut self.resources;
         res.font_templates.clear();
         *res.font_instances.write().unwrap() = resources.font_instances;
--- a/gfx/wr/webrender/src/texture_cache.rs
+++ b/gfx/wr/webrender/src/texture_cache.rs
@@ -1,15 +1,17 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{DebugFlags, DeviceIntPoint, DeviceIntRect, DeviceIntSize};
 use api::{DirtyRect, ImageDirtyRect, DocumentId, ExternalImageType, ImageFormat};
-use api::{IdNamespace, ImageDescriptor};
+use api::{ImageDescriptor};
+#[cfg(test)]
+use api::IdNamespace;
 use device::{TextureFilter, total_gpu_bytes_allocated};
 use freelist::{FreeList, FreeListHandle, UpsertResult, WeakFreeListHandle};
 use gpu_cache::{GpuCache, GpuCacheHandle};
 use gpu_types::{ImageSource, UvRectKind};
 use internal_types::{CacheTextureId, FastHashMap, LayerIndex, TextureUpdateList, TextureUpdateSource};
 use internal_types::{TextureSource, TextureCacheAllocInfo, TextureCacheUpdate};
 use profiler::{ResourceProfileCounter, TextureCacheProfileCounters};
 use render_backend::{FrameId, FrameStamp};
@@ -18,49 +20,71 @@ use std::cell::Cell;
 use std::cmp;
 use std::mem;
 use std::time::{Duration, SystemTime};
 use std::rc::Rc;
 
 /// The size of each region/layer in shared cache texture arrays.
 const TEXTURE_REGION_DIMENSIONS: i32 = 512;
 
+/// The number of slices for picture caching to allocate at start.
+const BASE_PICTURE_TEXTURE_SLICES: usize = 16;
+/// The number of slices to add when we grow out of the current range.
+const ADD_PICTURE_TEXTURE_SLICES: usize = 8;
+/// The chosen image format for picture tiles.
+const PICTURE_TILE_FORMAT: ImageFormat = ImageFormat::BGRA8;
+
 /// The number of pixels in a region. Derived from the above.
 const TEXTURE_REGION_PIXELS: usize =
     (TEXTURE_REGION_DIMENSIONS as usize) * (TEXTURE_REGION_DIMENSIONS as usize);
 
 /// Items in the texture cache can either be standalone textures,
 /// or a sub-rect inside the shared cache.
 #[derive(Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 enum EntryDetails {
     Standalone,
+    Picture {
+        layer_index: usize,
+    },
     Cache {
         /// Origin within the texture layer where this item exists.
         origin: DeviceIntPoint,
         /// The layer index of the texture array.
         layer_index: usize,
     },
 }
 
 impl EntryDetails {
+    fn describe(&self) -> (LayerIndex, DeviceIntPoint) {
+        match *self {
+            EntryDetails::Standalone => (0, DeviceIntPoint::zero()),
+            EntryDetails::Picture { layer_index } => (layer_index, DeviceIntPoint::zero()),
+            EntryDetails::Cache { origin, layer_index } => (layer_index, origin),
+        }
+    }
+}
+
+impl EntryDetails {
     /// Returns the kind associated with the details.
     fn kind(&self) -> EntryKind {
         match *self {
             EntryDetails::Standalone => EntryKind::Standalone,
+            EntryDetails::Picture { .. } => EntryKind::Picture,
             EntryDetails::Cache { .. } => EntryKind::Shared,
         }
     }
 }
 
 /// Tag identifying standalone-versus-shared, without the details.
 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
 enum EntryKind {
     Standalone,
+    Picture,
     Shared,
 }
 
 #[derive(Debug)]
 pub enum CacheEntryMarker {}
 
 // Stores information related to a single entry in the texture
 // cache. This is stored for each item whether it's in the shared
@@ -115,24 +139,21 @@ impl CacheEntry {
     }
 
     // Update the GPU cache for this texture cache entry.
     // This ensures that the UV rect, and texture layer index
     // are up to date in the GPU cache for vertex shaders
     // to fetch from.
     fn update_gpu_cache(&mut self, gpu_cache: &mut GpuCache) {
         if let Some(mut request) = gpu_cache.request(&mut self.uv_rect_handle) {
-            let (origin, layer_index) = match self.details {
-                EntryDetails::Standalone => (DeviceIntPoint::zero(), 0.0),
-                EntryDetails::Cache { origin, layer_index } => (origin, layer_index as f32),
-            };
+            let (layer_index, origin) = self.details.describe();
             let image_source = ImageSource {
                 p0: origin.to_f32(),
                 p1: (origin + self.size).to_f32(),
-                texture_layer: layer_index,
+                texture_layer: layer_index as f32,
                 user_data: self.user_data,
                 uv_rect_kind: self.uv_rect_kind,
             };
             image_source.write_gpu_blocks(&mut request);
         }
     }
 
     fn evict(&self) {
@@ -279,25 +300,28 @@ impl SharedTextures {
 /// handle for each entry, but unlimited weak handles. Consumers receive the weak
 /// handles, and `TextureCache` owns the strong handles internally.
 #[derive(Default, Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 struct EntryHandles {
     /// Handles for each standalone texture cache entry.
     standalone: Vec<FreeListHandle<CacheEntryMarker>>,
+    /// Handles for each picture cache entry.
+    picture: Vec<FreeListHandle<CacheEntryMarker>>,
     /// Handles for each shared texture cache entry.
     shared: Vec<FreeListHandle<CacheEntryMarker>>,
 }
 
 impl EntryHandles {
     /// Mutably borrows the requested handle list.
     fn select(&mut self, kind: EntryKind) -> &mut Vec<FreeListHandle<CacheEntryMarker>> {
         match kind {
             EntryKind::Standalone => &mut self.standalone,
+            EntryKind::Picture => &mut self.picture,
             EntryKind::Shared => &mut self.shared,
         }
     }
 }
 
 /// Container struct for the various parameters used in cache allocation.
 struct CacheAllocParams {
     descriptor: ImageDescriptor,
@@ -314,16 +338,17 @@ struct CacheAllocParams {
 /// of the entry, but may consider overall memory usage by WebRender, by making
 /// eviction increasingly aggressive as overall memory usage increases.
 ///
 /// Note that we don't just wrap a `FrameStamp` here, because `FrameStamp`
 /// requires that if the id fields are the same, the time fields will be as
 /// well. The pair of values in our eviction threshold generally do not match
 /// the stamp of any actual frame, and the comparison semantics are also
 /// different - so it's best to use a distinct type.
+#[derive(Clone, Copy)]
 struct EvictionThreshold {
     id: FrameId,
     time: SystemTime,
 }
 
 impl EvictionThreshold {
     /// Returns true if the entry with the given access record should be evicted
     /// under this threshold.
@@ -444,16 +469,19 @@ impl PerDocumentData {
 /// understand how it works. Enabling gfx.webrender.debug.texture-cache shows a
 /// live view of its contents in Firefox.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct TextureCache {
     /// Set of texture arrays in different formats used for the shared cache.
     shared_textures: SharedTextures,
 
+    /// A single texture array for picture caching.
+    picture_texture: WholeTextureArray,
+
     /// Maximum texture size supported by hardware.
     max_texture_size: i32,
 
     /// Maximum number of texture layers supported by hardware.
     max_texture_layers: usize,
 
     /// The current set of debug flags.
     debug_flags: DebugFlags,
@@ -484,17 +512,21 @@ pub struct TextureCache {
 
     /// The current document's data. This is moved out of per_doc_data in
     /// begin_frame and moved back in end_frame to solve borrow checker issues.
     /// We should try removing this when we require a rustc with NLL.
     doc_data: PerDocumentData,
 }
 
 impl TextureCache {
-    pub fn new(max_texture_size: i32, mut max_texture_layers: usize) -> Self {
+    pub fn new(
+        max_texture_size: i32,
+        mut max_texture_layers: usize,
+        picture_tile_size: DeviceIntSize,
+    ) -> Self {
         if cfg!(target_os = "macos") {
             // On MBP integrated Intel GPUs, texture arrays appear to be
             // implemented as a single texture of stacked layers, and that
             // texture appears to be subject to the texture size limit. As such,
             // allocating more than 32 512x512 regions results in a dimension
             // longer than 16k (the max texture size), causing incorrect behavior.
             //
             // So we clamp the number of layers on mac. This results in maximum
@@ -511,106 +543,111 @@ impl TextureCache {
             //     driver family, and those drivers are also likely to share
             //     the same max texture size of 16k. If we do encounter a driver
             //     with the same bug but a lower max texture size, we might need
             //     to rethink our strategy anyway, since a limit below 32MB might
             //     start to introduce performance issues.
             max_texture_layers = max_texture_layers.min(32);
         }
 
+        let picture_texture = WholeTextureArray {
+            size: picture_tile_size,
+            filter: TextureFilter::Linear,
+            format: PICTURE_TILE_FORMAT,
+            texture_id: CacheTextureId(1),
+            slices: vec![WholeTextureSlice { uv_rect_handle: None }; BASE_PICTURE_TEXTURE_SLICES],
+        };
+        let mut pending_updates = TextureUpdateList::new();
+        pending_updates.push_alloc(picture_texture.texture_id, picture_texture.to_info());
+
         TextureCache {
             shared_textures: SharedTextures::new(),
+            picture_texture,
             reached_reclaim_threshold: None,
             entries: FreeList::new(),
             max_texture_size,
             max_texture_layers,
             debug_flags: DebugFlags::empty(),
-            next_id: CacheTextureId(1),
-            pending_updates: TextureUpdateList::new(),
+            next_id: CacheTextureId(2),
+            pending_updates,
             now: FrameStamp::INVALID,
             per_doc_data: FastHashMap::default(),
             doc_data: PerDocumentData::new(),
         }
     }
 
     /// Creates a TextureCache and sets it up with a valid `FrameStamp`, which
     /// is useful for avoiding panics when instantiating the `TextureCache`
     /// directly from unit test code.
-    #[allow(dead_code)]
+    #[cfg(test)]
     pub fn new_for_testing(max_texture_size: i32, max_texture_layers: usize) -> Self {
-        let mut cache = Self::new(max_texture_size, max_texture_layers);
+        let tile_size = DeviceIntSize::new(64, 64);
+        let mut cache = Self::new(max_texture_size, max_texture_layers, tile_size);
         let mut now = FrameStamp::first(DocumentId(IdNamespace(1), 1));
         now.advance();
         cache.begin_frame(now);
         cache
     }
 
     pub fn set_debug_flags(&mut self, flags: DebugFlags) {
         self.debug_flags = flags;
     }
 
-    /// Clear all standalone textures in the cache.
-    pub fn clear_standalone(&mut self) {
-        debug_assert!(!self.now.is_valid());
+    /// Clear all entries of the specified kind.
+    fn clear_kind(&mut self, kind: EntryKind) {
         // This pref just helps us avoid crashes when we begin using multiple documents.
         // What we need to do for clear to work correctly with multiple documents is
         // to ensure that we generate frames for all documents whenever we do this.
         if self.debug_flags.contains(DebugFlags::TEXTURE_CACHE_DBG_DISABLE_SHRINK) {
             return;
         }
 
         let mut per_doc_data = mem::replace(&mut self.per_doc_data, FastHashMap::default());
         for (&_, doc_data) in per_doc_data.iter_mut() {
-            let standalone_entry_handles = mem::replace(
-                &mut doc_data.handles.standalone,
+            let entry_handles = mem::replace(
+                doc_data.handles.select(kind),
                 Vec::new(),
             );
 
-            for handle in standalone_entry_handles {
+            for handle in entry_handles {
                 let entry = self.entries.free(handle);
                 entry.evict();
                 self.free(entry);
             }
         }
         self.per_doc_data = per_doc_data;
     }
 
-    /// Clear all shared textures in the cache.
-    pub fn clear_shared(&mut self) {
-        // This pref just helps us avoid crashes when we begin using multiple documents.
-        // What we need to do for clear to work correctly with multiple documents is
-        // to ensure that we generate frames for all documents whenever we do this.
+    fn clear_standalone(&mut self) {
+        debug_assert!(!self.now.is_valid());
+        self.clear_kind(EntryKind::Standalone);
+    }
+
+    fn clear_picture(&mut self) {
+        self.clear_kind(EntryKind::Picture);
+        if let Some(texture_id) = self.picture_texture.reset(BASE_PICTURE_TEXTURE_SLICES) {
+            self.pending_updates.push_realloc(texture_id, self.picture_texture.to_info());
+        }
+    }
+
+    fn clear_shared(&mut self) {
         if self.debug_flags.contains(DebugFlags::TEXTURE_CACHE_DBG_DISABLE_SHRINK) {
             return;
         }
-
         self.unset_doc_data();
-        let mut per_doc_data = mem::replace(&mut self.per_doc_data, FastHashMap::default());
-        for (&_, doc_data) in per_doc_data.iter_mut() {
-            let shared_entry_handles = mem::replace(
-                &mut doc_data.handles.shared,
-                Vec::new(),
-            );
-
-            for handle in shared_entry_handles {
-                let entry = self.entries.free(handle);
-                entry.evict();
-                self.free(entry);
-            }
-        }
-
+        self.clear_kind(EntryKind::Shared);
         self.shared_textures.clear(&mut self.pending_updates);
-        self.per_doc_data = per_doc_data;
         self.set_doc_data();
     }
 
     /// Clear all entries in the texture cache. This is a fairly drastic
     /// step that should only be called very rarely.
-    pub fn clear(&mut self) {
+    pub fn clear_all(&mut self) {
         self.clear_standalone();
+        self.clear_picture();
         self.clear_shared();
     }
 
     fn set_doc_data(&mut self) {
         let document_id = self.now.document_id();
         self.doc_data = self.per_doc_data
                             .remove(&document_id)
                             .unwrap_or_else(|| PerDocumentData::new());
@@ -684,26 +721,30 @@ impl TextureCache {
         // Most of the time, standalone cache entries correspond to images whose
         // width or height is greater than the region size in the shared cache, i.e.
         // 512 pixels. Cached render tasks also frequently get standalone entries,
         // but those use the Eviction::Eager policy (for now). So the tradeoff there
         // is largely around reducing texture upload jank while keeping memory usage
         // at an acceptable level.
         let threshold = self.default_eviction();
         self.expire_old_entries(EntryKind::Standalone, threshold);
+        self.expire_old_entries(EntryKind::Picture, threshold);
 
         self.shared_textures.array_a8_linear
             .update_profile(&mut texture_cache_profile.pages_a8_linear);
         self.shared_textures.array_a16_linear
             .update_profile(&mut texture_cache_profile.pages_a16_linear);
         self.shared_textures.array_rgba8_linear
             .update_profile(&mut texture_cache_profile.pages_rgba8_linear);
         self.shared_textures.array_rgba8_nearest
             .update_profile(&mut texture_cache_profile.pages_rgba8_nearest);
 
+        self.picture_texture
+            .update_profile(&mut texture_cache_profile.pages_picture);
+
         self.unset_doc_data();
         self.now = FrameStamp::INVALID;
     }
 
     // Request an item in the texture cache. All images that will
     // be used on a frame *must* have request() called on their
     // handle, to update the last used timestamp and ensure
     // that resources are not flushed from the cache too early.
@@ -730,21 +771,26 @@ impl TextureCache {
     pub fn needs_upload(&self, handle: &TextureCacheHandle) -> bool {
         self.entries.get_opt(handle).is_none()
     }
 
     pub fn max_texture_size(&self) -> i32 {
         self.max_texture_size
     }
 
-    #[allow(dead_code)]
+    #[cfg(feature = "replay")]
     pub fn max_texture_layers(&self) -> usize {
         self.max_texture_layers
     }
 
+    #[cfg(feature = "replay")]
+    pub fn picture_tile_size(&self) -> DeviceIntSize {
+        self.picture_texture.size
+    }
+
     pub fn pending_updates(&mut self) -> TextureUpdateList {
         mem::replace(&mut self.pending_updates, TextureUpdateList::new())
     }
 
     // Update the data stored by a given texture cache handle.
     pub fn update(
         &mut self,
         handle: &mut TextureCacheHandle,
@@ -789,32 +835,29 @@ impl TextureCache {
 
         // Install the new eviction notice for this update, if applicable.
         entry.eviction_notice = eviction_notice.cloned();
         entry.uv_rect_kind = uv_rect_kind;
 
         // Invalidate the contents of the resource rect in the GPU cache.
         // This ensures that the update_gpu_cache below will add
         // the new information to the GPU cache.
+        //TODO: only invalidate if the parameters change?
         gpu_cache.invalidate(&entry.uv_rect_handle);
 
         // Upload the resource rect and texture array layer.
         entry.update_gpu_cache(gpu_cache);
 
         entry.eviction = eviction;
 
         // Create an update command, which the render thread processes
         // to upload the new image data into the correct location
         // in GPU memory.
         if let Some(data) = data {
-            let (layer_index, origin) = match entry.details {
-                EntryDetails::Standalone => (0, DeviceIntPoint::zero()),
-                EntryDetails::Cache { layer_index, origin } => (layer_index, origin),
-            };
-
+            let (layer_index, origin) = entry.details.describe();
             let op = TextureCacheUpdate::new_update(
                 data,
                 &descriptor,
                 origin,
                 entry.size,
                 entry.texture_id,
                 layer_index as i32,
                 &dirty_rect,
@@ -852,26 +895,17 @@ impl TextureCache {
     pub fn get_cache_location(
         &self,
         handle: &TextureCacheHandle,
     ) -> (CacheTextureId, LayerIndex, DeviceIntRect, GpuCacheHandle) {
         let entry = self.entries
             .get_opt(handle)
             .expect("BUG: was dropped from cache or not updated!");
         debug_assert_eq!(entry.last_access, self.now);
-        let (layer_index, origin) = match entry.details {
-            EntryDetails::Standalone { .. } => {
-                (0, DeviceIntPoint::zero())
-            }
-            EntryDetails::Cache {
-                layer_index,
-                origin,
-                ..
-            } => (layer_index, origin),
-        };
+        let (layer_index, origin) = entry.details.describe();
         (entry.texture_id,
          layer_index as usize,
          DeviceIntRect::new(origin, entry.size),
          entry.uv_rect_handle)
     }
 
     pub fn mark_unused(&mut self, handle: &TextureCacheHandle) {
         if let Some(entry) = self.entries.get_opt_mut(handle) {
@@ -959,30 +993,46 @@ impl TextureCache {
 
     // Free a cache entry from the standalone list or shared cache.
     fn free(&mut self, entry: CacheEntry) {
         match entry.details {
             EntryDetails::Standalone => {
                 // This is a standalone texture allocation. Free it directly.
                 self.pending_updates.push_free(entry.texture_id);
             }
+            EntryDetails::Picture { layer_index } => {
+                self.picture_texture.slices[layer_index].uv_rect_handle = None;
+                if self.debug_flags.contains(
+                    DebugFlags::TEXTURE_CACHE_DBG |
+                    DebugFlags::TEXTURE_CACHE_DBG_CLEAR_EVICTED)
+                {
+                    self.pending_updates.push_debug_clear(
+                        entry.texture_id,
+                        DeviceIntPoint::zero(),
+                        self.picture_texture.size.width,
+                        self.picture_texture.size.height,
+                        layer_index,
+                    );
+                }
+            }
             EntryDetails::Cache { origin, layer_index } => {
                 // Free the block in the given region.
                 let texture_array = self.shared_textures.select(entry.format, entry.filter);
                 let region = &mut texture_array.regions[layer_index];
 
                 if self.debug_flags.contains(
                     DebugFlags::TEXTURE_CACHE_DBG |
-                    DebugFlags::TEXTURE_CACHE_DBG_CLEAR_EVICTED) {
+                    DebugFlags::TEXTURE_CACHE_DBG_CLEAR_EVICTED)
+                {
                     self.pending_updates.push_debug_clear(
                         entry.texture_id,
                         origin,
                         region.slab_size.width,
                         region.slab_size.height,
-                        layer_index
+                        layer_index,
                     );
                 }
                 region.free(origin, &mut texture_array.empty_regions);
             }
         }
     }
 
     // Attempt to allocate a block from the shared cache.
@@ -1152,67 +1202,105 @@ impl TextureCache {
         if added_layer {
             self.allocate_from_shared_cache(params)
                 .expect("Allocation should succeed after adding a fresh layer")
         } else {
             self.allocate_standalone_entry(params)
         }
     }
 
-    /// Allocates a cache entry for the given parameters, and updates the
-    /// provided handle to point to the new entry.
-    fn allocate(&mut self, params: &CacheAllocParams, handle: &mut TextureCacheHandle) {
-        debug_assert!(self.now.is_valid());
-        let new_cache_entry = self.allocate_cache_entry(params);
-        let new_kind = new_cache_entry.details.kind();
-
+    fn upsert_entry(
+        &mut self,
+        cache_entry: CacheEntry,
+        handle: &mut TextureCacheHandle,
+    ) {
+        let new_kind = cache_entry.details.kind();
         // If the handle points to a valid cache entry, we want to replace the
         // cache entry with our newly updated location. We also need to ensure
         // that the storage (region or standalone) associated with the previous
         // entry here gets freed.
         //
         // If the handle is invalid, we need to insert the data, and append the
         // result to the corresponding vector.
         //
         // This is managed with a database style upsert operation.
-        match self.entries.upsert(handle, new_cache_entry) {
+        match self.entries.upsert(handle, cache_entry) {
             UpsertResult::Updated(old_entry) => {
                 if new_kind != old_entry.details.kind() {
                     // Handle the rare case than an update moves an entry from
                     // shared to standalone or vice versa. This involves a linear
                     // search, but should be rare enough not to matter.
                     let (from, to) = match new_kind {
                         EntryKind::Standalone =>
                             (&mut self.doc_data.handles.shared, &mut self.doc_data.handles.standalone),
+                        EntryKind::Picture => unreachable!(),
                         EntryKind::Shared =>
                             (&mut self.doc_data.handles.standalone, &mut self.doc_data.handles.shared),
                     };
                     let idx = from.iter().position(|h| h.weak() == *handle).unwrap();
                     to.push(from.remove(idx));
                 }
                 self.free(old_entry);
             }
             UpsertResult::Inserted(new_handle) => {
                 *handle = new_handle.weak();
                 self.doc_data.handles.select(new_kind).push(new_handle);
             }
         }
     }
+
+    /// Allocates a cache entry for the given parameters, and updates the
+    /// provided handle to point to the new entry.
+    fn allocate(&mut self, params: &CacheAllocParams, handle: &mut TextureCacheHandle) {
+        debug_assert!(self.now.is_valid());
+        let new_cache_entry = self.allocate_cache_entry(params);
+        self.upsert_entry(new_cache_entry, handle)
+    }
+
+    // Update the data stored by a given texture cache handle for picture caching specifically.
+    pub fn update_picture_cache(
+        &mut self,
+        handle: &mut TextureCacheHandle,
+        gpu_cache: &mut GpuCache,
+    ) {
+        debug_assert!(self.now.is_valid());
+
+        if self.entries.get_opt(handle).is_none() {
+            let layer_index = match self.picture_texture.find_free() {
+                Some(index) => index,
+                None => {
+                    let index = self.picture_texture.grow(ADD_PICTURE_TEXTURE_SLICES);
+                    let info = self.picture_texture.to_info();
+                    self.pending_updates.push_realloc(self.picture_texture.texture_id, info);
+                    index
+                },
+            };
+
+            let cache_entry = self.picture_texture.occupy(layer_index, self.now);
+            self.upsert_entry(cache_entry, handle)
+        }
+
+        // Upload the resource rect and texture array layer.
+        self.entries
+            .get_opt_mut(handle)
+            .expect("BUG: handle must be valid now")
+            .update_gpu_cache(gpu_cache);
+    }
 }
 
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 #[derive(Copy, Clone, PartialEq)]
 struct SlabSize {
     width: i32,
     height: i32,
 }
 
 impl SlabSize {
-    fn new(size: DeviceIntSize) -> SlabSize {
+    fn new(size: DeviceIntSize) -> Self {
         let x_size = quantize_dimension(size.width);
         let y_size = quantize_dimension(size.height);
 
         assert!(x_size > 0 && x_size <= TEXTURE_REGION_DIMENSIONS);
         assert!(y_size > 0 && y_size <= TEXTURE_REGION_DIMENSIONS);
 
         let (width, height) = match (x_size, y_size) {
             // Special cased rectangular slab pages.
@@ -1460,16 +1548,111 @@ impl TextureArray {
                 eviction_notice: None,
                 uv_rect_kind: params.uv_rect_kind,
                 eviction: Eviction::Auto,
             }
         })
     }
 }
 
+
+/// A tracking structure for each slice in `WholeTextureArray`.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Clone, Copy, Debug)]
+struct WholeTextureSlice {
+    uv_rect_handle: Option<GpuCacheHandle>,
+}
+
+/// A texture array that allocates whole slices and doesn't do any region tracking.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct WholeTextureArray {
+    size: DeviceIntSize,
+    filter: TextureFilter,
+    format: ImageFormat,
+    texture_id: CacheTextureId,
+    slices: Vec<WholeTextureSlice>,
+}
+
+impl WholeTextureArray {
+    fn to_info(&self) -> TextureCacheAllocInfo {
+        TextureCacheAllocInfo {
+            width: self.size.width,
+            height: self.size.height,
+            format: self.format,
+            filter: self.filter,
+            layer_count: self.slices.len() as i32,
+            is_shared_cache: true, //TODO: reconsider
+        }
+    }
+
+    /// Returns the number of GPU bytes consumed by this texture array.
+    fn size_in_bytes(&self) -> usize {
+        let bpp = self.format.bytes_per_pixel() as usize;
+        self.slices.len() * (self.size.width * self.size.height) as usize * bpp
+    }
+
+    fn update_profile(&self, counter: &mut ResourceProfileCounter) {
+        counter.set(self.slices.len(), self.size_in_bytes());
+    }
+
+    /// Find an free slice.
+    fn find_free(&self) -> Option<LayerIndex> {
+        self.slices.iter().position(|slice| slice.uv_rect_handle.is_none())
+    }
+
+    /// Grow the array by the specified number of slices
+    fn grow(&mut self, count: usize) -> LayerIndex {
+        let index = self.slices.len();
+        for _ in 0 .. count {
+            self.slices.push(WholeTextureSlice {
+                uv_rect_handle: None,
+            });
+        }
+        index
+    }
+
+    /// Occupy a specified slice by a cache entry.
+    fn occupy(&mut self, layer_index: usize, now: FrameStamp) -> CacheEntry {
+        let uv_rect_handle = GpuCacheHandle::new();
+        assert!(self.slices[layer_index].uv_rect_handle.is_none());
+        self.slices[layer_index].uv_rect_handle = Some(uv_rect_handle);
+
+        CacheEntry {
+            size: self.size,
+            user_data: [0.0; 3],
+            last_access: now,
+            details: EntryDetails::Picture {
+                layer_index,
+            },
+            uv_rect_handle,
+            format: self.format,
+            filter: self.filter,
+            texture_id: self.texture_id,
+            eviction_notice: None,
+            uv_rect_kind: UvRectKind::Rect,
+            eviction: Eviction::Eager,
+        }
+    }
+
+    /// Reset the texture array to the specified number of slices, if it's larger.
+    fn reset(
+        &mut self, num_slices: usize
+    ) -> Option<CacheTextureId> {
+        if self.slices.len() <= num_slices {
+            None
+        } else {
+            self.slices.truncate(num_slices);
+            Some(self.texture_id)
+        }
+    }
+}
+
+
 impl TextureCacheUpdate {
     // Constructs a TextureCacheUpdate operation to be passed to the
     // rendering thread in order to do an upload to the right
     // location in the texture cache.
     fn new_update(
         data: CachedImageData,
         descriptor: &ImageDescriptor,
         origin: DeviceIntPoint,