Bug 1551131 - Refactor WR batching code in preparation for different batches per dirty region / render task. r=nical
authorGlenn Watson <github@intuitionlibrary.com>
Tue, 14 May 2019 07:17:24 +0000
changeset 532564 5048590d3b69aa6c1d9fd13e388218dc344d214e
parent 532563 030c742c25d5e3aab01c2e020345a7cd5506b207
child 532565 cf68c385145b52cc075ffc854613603b637c9110
push id11270
push userrgurzau@mozilla.com
push dateWed, 15 May 2019 15:07:19 +0000
treeherdermozilla-beta@571bc76da583 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersnical
bugs1551131
milestone68.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1551131 - Refactor WR batching code in preparation for different batches per dirty region / render task. r=nical * Store render task address per-instance rather than per-primitive, to allow adding a single primitive to multiple batches / render tasks. * Store render task id inside alpha batch builder, since multiple batch builders will be passed in future. * Add primitive visibility mask, storing a bit mask of which dirty regions a visible primitive intersects. * Store RenderTaskAddress as a u16 in CPU and shader types. * Add picture caching debug flag to wrench. Differential Revision: https://phabricator.services.mozilla.com/D30854
gfx/wr/webrender/res/brush.glsl
gfx/wr/webrender/res/prim_shared.glsl
gfx/wr/webrender/res/ps_split_composite.glsl
gfx/wr/webrender/res/ps_text_run.glsl
gfx/wr/webrender/src/batch.rs
gfx/wr/webrender/src/frame_builder.rs
gfx/wr/webrender/src/gpu_cache.rs
gfx/wr/webrender/src/gpu_types.rs
gfx/wr/webrender/src/prim_store/mod.rs
gfx/wr/webrender/src/render_task.rs
gfx/wr/webrender/src/renderer.rs
gfx/wr/webrender/src/tiling.rs
gfx/wr/wrench/src/main.rs
--- a/gfx/wr/webrender/res/brush.glsl
+++ b/gfx/wr/webrender/res/brush.glsl
@@ -25,17 +25,18 @@ void brush_vs(
 #define BRUSH_FLAG_SEGMENT_REPEAT_Y             8
 #define BRUSH_FLAG_TEXEL_RECT                  16
 
 #define INVALID_SEGMENT_INDEX                   0xffff
 
 void main(void) {
     // Load the brush instance from vertex attributes.
     int prim_header_address = aData.x;
-    int clip_address = aData.y;
+    int render_task_index = aData.y >> 16;
+    int clip_address = aData.y & 0xffff;
     int segment_index = aData.z & 0xffff;
     int edge_flags = (aData.z >> 16) & 0xff;
     int brush_flags = (aData.z >> 24) & 0xff;
     int segment_user_data = aData.w;
     PrimitiveHeader ph = fetch_prim_header(prim_header_address);
 
     // Fetch the segment of this brush primitive we are drawing.
     vec4 segment_data;
@@ -52,17 +53,17 @@ void main(void) {
         segment_rect = RectWithSize(segment_info[0].xy, segment_info[0].zw);
         segment_rect.p0 += ph.local_rect.p0;
         segment_data = segment_info[1];
     }
 
     VertexInfo vi;
 
     // Fetch the dynamic picture that we are drawing on.
-    PictureTask pic_task = fetch_picture_task(ph.render_task_index);
+    PictureTask pic_task = fetch_picture_task(render_task_index);
     ClipArea clip_area = fetch_clip_area(clip_address);
 
     Transform transform = fetch_transform(ph.transform_id);
 
     // Write the normal vertex information out.
     if (transform.is_axis_aligned) {
         vi = write_vertex(
             segment_rect,
--- a/gfx/wr/webrender/res/prim_shared.glsl
+++ b/gfx/wr/webrender/res/prim_shared.glsl
@@ -51,17 +51,16 @@ in ivec4 aData;
 #define VECS_PER_PRIM_HEADER_I 2U
 
 struct PrimitiveHeader {
     RectWithSize local_rect;
     RectWithSize local_clip_rect;
     vec4 snap_offsets;
     float z;
     int specific_prim_address;
-    int render_task_index;
     int transform_id;
     ivec4 user_data;
 };
 
 PrimitiveHeader fetch_prim_header(int index) {
     PrimitiveHeader ph;
 
     ivec2 uv_f = get_fetch_uv(index, VECS_PER_PRIM_HEADER_F);
@@ -70,19 +69,18 @@ PrimitiveHeader fetch_prim_header(int in
     ph.snap_offsets = TEXEL_FETCH(sPrimitiveHeadersF, uv_f, 0, ivec2(2, 0));
     ph.local_rect = RectWithSize(local_rect.xy, local_rect.zw);
     ph.local_clip_rect = RectWithSize(local_clip_rect.xy, local_clip_rect.zw);
 
     ivec2 uv_i = get_fetch_uv(index, VECS_PER_PRIM_HEADER_I);
     ivec4 data0 = TEXEL_FETCH(sPrimitiveHeadersI, uv_i, 0, ivec2(0, 0));
     ivec4 data1 = TEXEL_FETCH(sPrimitiveHeadersI, uv_i, 0, ivec2(1, 0));
     ph.z = float(data0.x);
-    ph.render_task_index = data0.y;
-    ph.specific_prim_address = data0.z;
-    ph.transform_id = data0.w;
+    ph.specific_prim_address = data0.y;
+    ph.transform_id = data0.z;
     ph.user_data = data1;
 
     return ph;
 }
 
 struct VertexInfo {
     vec2 local_pos;
     vec2 snap_offset;
--- a/gfx/wr/webrender/res/ps_split_composite.glsl
+++ b/gfx/wr/webrender/res/ps_split_composite.glsl
@@ -37,33 +37,35 @@ vec2 bilerp(vec2 a, vec2 b, vec2 c, vec2
     vec2 y = mix(c, d, t);
     return mix(x, y, s);
 }
 
 struct SplitCompositeInstance {
     int prim_header_index;
     int polygons_address;
     float z;
+    int render_task_index;
 };
 
 SplitCompositeInstance fetch_composite_instance() {
     SplitCompositeInstance ci;
 
     ci.prim_header_index = aData.x;
     ci.polygons_address = aData.y;
     ci.z = float(aData.z);
+    ci.render_task_index = aData.w;
 
     return ci;
 }
 
 void main(void) {
     SplitCompositeInstance ci = fetch_composite_instance();
     SplitGeometry geometry = fetch_split_geometry(ci.polygons_address);
     PrimitiveHeader ph = fetch_prim_header(ci.prim_header_index);
-    PictureTask dest_task = fetch_picture_task(ph.render_task_index);
+    PictureTask dest_task = fetch_picture_task(ci.render_task_index);
     Transform transform = fetch_transform(ph.transform_id);
     ImageResource res = fetch_image_resource(ph.user_data.x);
     ClipArea clip_area = fetch_clip_area(ph.user_data.w);
 
     vec2 dest_origin = dest_task.common_data.task_rect.p0 -
                        dest_task.content_origin;
 
     vec2 local_pos = bilerp(geometry.local[0], geometry.local[1],
--- a/gfx/wr/webrender/res/ps_text_run.glsl
+++ b/gfx/wr/webrender/res/ps_text_run.glsl
@@ -157,25 +157,26 @@ VertexInfo write_text_vertex(RectWithSiz
     );
 
     return vi;
 }
 
 void main(void) {
     int prim_header_address = aData.x;
     int glyph_index = aData.y & 0xffff;
-    int raster_space = aData.y >> 16;
+    int render_task_index = aData.y >> 16;
     int resource_address = aData.z;
-    int subpx_dir = aData.w >> 16;
-    int color_mode = aData.w & 0xffff;
+    int raster_space = aData.w >> 16;
+    int subpx_dir = (aData.w >> 8) & 0xff;
+    int color_mode = aData.w & 0xff;
 
     PrimitiveHeader ph = fetch_prim_header(prim_header_address);
     Transform transform = fetch_transform(ph.transform_id);
     ClipArea clip_area = fetch_clip_area(ph.user_data.w);
-    PictureTask task = fetch_picture_task(ph.render_task_index);
+    PictureTask task = fetch_picture_task(render_task_index);
 
     TextRun text = fetch_text_run(ph.specific_prim_address);
     vec2 text_offset = vec2(ph.user_data.xy) / 256.0;
 
     if (color_mode == COLOR_MODE_FROM_PASS) {
         color_mode = uMode;
     }
 
--- a/gfx/wr/webrender/src/batch.rs
+++ b/gfx/wr/webrender/src/batch.rs
@@ -461,36 +461,39 @@ struct SegmentInstanceData {
     user_data: i32,
 }
 
 /// Encapsulates the logic of building batches for items that are blended.
 pub struct AlphaBatchBuilder {
     pub batch_lists: Vec<BatchList>,
     screen_size: DeviceIntSize,
     break_advanced_blend_batches: bool,
+    render_task_id: RenderTaskId,
 }
 
 impl AlphaBatchBuilder {
     pub fn new(
         screen_size: DeviceIntSize,
         break_advanced_blend_batches: bool,
+        render_task_id: RenderTaskId,
     ) -> Self {
         let batch_lists = vec![
             BatchList::new(
                 screen_size,
                 Vec::new(),
                 Vec::new(),
                 break_advanced_blend_batches,
             ),
         ];
 
         AlphaBatchBuilder {
             batch_lists,
             screen_size,
             break_advanced_blend_batches,
+            render_task_id,
         }
     }
 
     fn push_new_batch_list(
         &mut self,
         regions: Vec<DeviceIntRect>,
         tile_blits: Vec<TileBlit>,
     ) {
@@ -558,38 +561,33 @@ impl BatchBuilder {
         }
     }
 
     /// Add a picture to a given batch builder.
     pub fn add_pic_to_batch(
         &mut self,
         pic: &PicturePrimitive,
         batcher: &mut AlphaBatchBuilder,
-        task_id: RenderTaskId,
         ctx: &RenderTargetContext,
         gpu_cache: &mut GpuCache,
         render_tasks: &RenderTaskTree,
         deferred_resolves: &mut Vec<DeferredResolve>,
         prim_headers: &mut PrimitiveHeaders,
         transforms: &mut TransformPalette,
         root_spatial_node_index: SpatialNodeIndex,
         z_generator: &mut ZBufferIdGenerator,
     ) {
-        let task_address = render_tasks.get_task_address(task_id);
-
         // Add each run in this picture to the batch.
         for prim_instance in &pic.prim_list.prim_instances {
             self.add_prim_to_batch(
                 prim_instance,
                 batcher,
                 ctx,
                 gpu_cache,
                 render_tasks,
-                task_id,
-                task_address,
                 deferred_resolves,
                 prim_headers,
                 transforms,
                 root_spatial_node_index,
                 z_generator,
             );
         }
     }
@@ -600,18 +598,16 @@ impl BatchBuilder {
     // in that picture are being drawn into the same target.
     fn add_prim_to_batch(
         &mut self,
         prim_instance: &PrimitiveInstance,
         batcher: &mut AlphaBatchBuilder,
         ctx: &RenderTargetContext,
         gpu_cache: &mut GpuCache,
         render_tasks: &RenderTaskTree,
-        task_id: RenderTaskId,
-        task_address: RenderTaskAddress,
         deferred_resolves: &mut Vec<DeferredResolve>,
         prim_headers: &mut PrimitiveHeaders,
         transforms: &mut TransformPalette,
         root_spatial_node_index: SpatialNodeIndex,
         z_generator: &mut ZBufferIdGenerator,
     ) {
         if prim_instance.visibility_info == PrimitiveVisibilityIndex::INVALID {
             return;
@@ -640,16 +636,17 @@ impl BatchBuilder {
 
         let prim_common_data = &ctx.data_stores.as_common_data(&prim_instance);
         let prim_rect = LayoutRect::new(
             prim_instance.prim_origin,
             prim_common_data.prim_size,
         );
 
         let snap_offsets = prim_info.snap_offsets;
+        let render_task_address = render_tasks.get_task_address(batcher.render_task_id);
 
         if is_chased {
             println!("\tbatch {:?} with bound {:?}", prim_rect, bounding_rect);
         }
 
         match prim_instance.kind {
             PrimitiveInstanceKind::Clear { data_handle } => {
                 let prim_data = &ctx.data_stores.prim[data_handle];
@@ -658,17 +655,16 @@ impl BatchBuilder {
                 // TODO(gw): We can abstract some of the common code below into
                 //           helper methods, as we port more primitives to make
                 //           use of interning.
 
                 let prim_header = PrimitiveHeader {
                     local_rect: prim_rect,
                     local_clip_rect: prim_info.combined_local_clip_rect,
                     snap_offsets,
-                    task_address,
                     specific_prim_address: prim_cache_address,
                     transform_id,
                 };
 
                 let prim_header_index = prim_headers.push(
                     &prim_header,
                     z_id,
                     [get_shader_opacity(1.0), 0, 0, 0],
@@ -684,16 +680,17 @@ impl BatchBuilder {
                     prim_info.clip_task_index,
                     render_tasks,
                 ).unwrap_or(OPAQUE_TASK_ADDRESS);
 
                 let instance = PrimitiveInstanceData::from(BrushInstance {
                     segment_index: INVALID_SEGMENT_INDEX,
                     edge_flags: EdgeAaSegmentMask::all(),
                     clip_task_address,
+                    render_task_address,
                     brush_flags: BrushFlags::PERSPECTIVE_INTERPOLATION,
                     prim_header_index,
                     user_data: 0,
                 });
 
                 batcher.current_batch_list().push_single_instance(
                     batch_key,
                     bounding_rect,
@@ -733,17 +730,16 @@ impl BatchBuilder {
                 } else {
                     BlendMode::None
                 };
 
                 let prim_header = PrimitiveHeader {
                     local_rect: prim_rect,
                     local_clip_rect: prim_info.combined_local_clip_rect,
                     snap_offsets,
-                    task_address,
                     specific_prim_address: prim_cache_address,
                     transform_id,
                 };
 
                 let batch_params = BrushBatchParameters::instanced(
                     BrushBatchKind::Image(ImageBufferKind::Texture2DArray),
                     [
                         ShaderColorMode::Image as i32 | ((AlphaType::PremultipliedAlpha as i32) << 16),
@@ -768,16 +764,17 @@ impl BatchBuilder {
                     &batch_params,
                     specified_blend_mode,
                     non_segmented_blend_mode,
                     prim_header_index,
                     bounding_rect,
                     transform_kind,
                     render_tasks,
                     z_id,
+                    render_task_address,
                     prim_info.clip_task_index,
                     ctx,
                 );
             }
             PrimitiveInstanceKind::TextRun { data_handle, run_index, .. } => {
                 let run = &ctx.prim_store.text_runs[run_index];
                 let subpx_dir = run.used_font.get_subpx_dir();
 
@@ -786,17 +783,16 @@ impl BatchBuilder {
                 let prim_data = &ctx.data_stores.text_run[data_handle];
                 let alpha_batch_list = &mut batcher.batch_lists.last_mut().unwrap().alpha_batch_list;
                 let prim_cache_address = gpu_cache.get_address(&prim_data.gpu_cache_handle);
 
                 let prim_header = PrimitiveHeader {
                     local_rect: prim_rect,
                     local_clip_rect: prim_info.combined_local_clip_rect,
                     snap_offsets,
-                    task_address,
                     specific_prim_address: prim_cache_address,
                     transform_id,
                 };
 
                 let clip_task_address = ctx.get_prim_clip_task_address(
                     prim_info.clip_task_index,
                     render_tasks,
                 ).unwrap_or(OPAQUE_TASK_ADDRESS);
@@ -891,20 +887,20 @@ impl BatchBuilder {
                         );
 
                         let rasterization_space = match run.raster_space {
                             RasterSpace::Screen => RasterizationSpace::Screen,
                             RasterSpace::Local(..) => RasterizationSpace::Local,
                         };
                         for glyph in glyphs {
                             batch.push(base_instance.build(
-                                glyph.index_in_text_run |
-                                (rasterization_space as i32) << 16,
+                                glyph.index_in_text_run | ((render_task_address.0 as i32) << 16),
                                 glyph.uv_rect_address.as_int(),
-                                (subpx_dir as u32 as i32) << 16 |
+                                (rasterization_space as i32) << 16 |
+                                (subpx_dir as u32 as i32) << 8 |
                                 (color_mode as u32 as i32),
                             ));
                         }
                     },
                 );
             }
             PrimitiveInstanceKind::LineDecoration { data_handle, ref cache_handle, .. } => {
                 // The GPU cache data is stored in the template and reused across
@@ -954,17 +950,16 @@ impl BatchBuilder {
                 } else {
                     BlendMode::None
                 };
 
                 let prim_header = PrimitiveHeader {
                     local_rect: prim_rect,
                     local_clip_rect: prim_info.combined_local_clip_rect,
                     snap_offsets,
-                    task_address,
                     specific_prim_address: prim_cache_address,
                     transform_id,
                 };
 
                 let prim_header_index = prim_headers.push(
                     &prim_header,
                     z_id,
                     prim_user_data,
@@ -980,16 +975,17 @@ impl BatchBuilder {
                     prim_info.clip_task_index,
                     render_tasks,
                 ).unwrap_or(OPAQUE_TASK_ADDRESS);
 
                 let instance = PrimitiveInstanceData::from(BrushInstance {
                     segment_index: INVALID_SEGMENT_INDEX,
                     edge_flags: EdgeAaSegmentMask::all(),
                     clip_task_address,
+                    render_task_address,
                     brush_flags: BrushFlags::PERSPECTIVE_INTERPOLATION,
                     prim_header_index,
                     user_data: segment_user_data,
                 });
 
                 batcher.current_batch_list().push_single_instance(
                     batch_key,
                     bounding_rect,
@@ -1001,17 +997,16 @@ impl BatchBuilder {
                 let picture = &ctx.prim_store.pictures[pic_index.0];
                 let non_segmented_blend_mode = BlendMode::PremultipliedAlpha;
                 let prim_cache_address = gpu_cache.get_address(&ctx.globals.default_image_handle);
 
                 let prim_header = PrimitiveHeader {
                     local_rect: picture.snapped_local_rect,
                     local_clip_rect: prim_info.combined_local_clip_rect,
                     snap_offsets,
-                    task_address,
                     specific_prim_address: prim_cache_address,
                     transform_id,
                 };
 
                 match picture.context_3d {
                     // Convert all children of the 3D hierarchy root into batches.
                     Picture3DContext::In { root_data: Some(ref list), .. } => {
                         for child in list {
@@ -1040,18 +1035,17 @@ impl BatchBuilder {
                                 prim_info.clip_task_index,
                                 render_tasks,
                             ).unwrap_or(OPAQUE_TASK_ADDRESS);
 
                             let prim_header = PrimitiveHeader {
                                 local_rect: pic.snapped_local_rect,
                                 local_clip_rect: prim_info.combined_local_clip_rect,
                                 snap_offsets,
-                                task_address,
-                                specific_prim_address: GpuCacheAddress::invalid(),
+                                specific_prim_address: GpuCacheAddress::INVALID,
                                 transform_id: transforms
                                     .get_id(
                                         child.spatial_node_index,
                                         root_spatial_node_index,
                                         ctx.clip_scroll_tree,
                                     ),
                             };
 
@@ -1078,16 +1072,17 @@ impl BatchBuilder {
                                 BatchKind::SplitComposite,
                                 BlendMode::PremultipliedAlpha,
                                 BatchTextures::no_texture(),
                             );
 
                             let instance = SplitCompositeInstance::new(
                                 prim_header_index,
                                 child.gpu_address,
+                                render_task_address,
                                 z_id,
                             );
 
                             batcher.current_batch_list().push_single_instance(
                                 key,
                                 &prim_info.clip_chain.pic_clip_rect,
                                 z_id,
                                 PrimitiveInstanceData::from(instance),
@@ -1127,17 +1122,16 @@ impl BatchBuilder {
 
                                 // If the tile cache is disabled, just recurse into the
                                 // picture like a normal pass-through picture, adding
                                 // any child primitives into the parent surface batches.
                                 if !tile_cache.is_enabled {
                                     self.add_pic_to_batch(
                                         picture,
                                         batcher,
-                                        task_id,
                                         ctx,
                                         gpu_cache,
                                         render_tasks,
                                         deferred_resolves,
                                         prim_headers,
                                         transforms,
                                         root_spatial_node_index,
                                         z_generator,
@@ -1175,17 +1169,16 @@ impl BatchBuilder {
                                             prim_rect,
                                             snap_offsets,
                                         );
 
                                         let prim_header = PrimitiveHeader {
                                             local_rect: tile_rect,
                                             local_clip_rect,
                                             snap_offsets,
-                                            task_address,
                                             specific_prim_address: prim_cache_address,
                                             transform_id,
                                         };
 
                                         let prim_header_index = prim_headers.push(&prim_header, z_id, [
                                             ShaderColorMode::Image as i32 | ((AlphaType::PremultipliedAlpha as i32) << 16),
                                             RasterizationSpace::Local as i32,
                                             get_shader_opacity(1.0),
@@ -1204,16 +1197,17 @@ impl BatchBuilder {
 
                                         let uv_rect_address = gpu_cache
                                             .get_address(&cache_item.uv_rect_handle)
                                             .as_int();
 
                                         let instance = BrushInstance {
                                             prim_header_index,
                                             clip_task_address,
+                                            render_task_address,
                                             segment_index: INVALID_SEGMENT_INDEX,
                                             edge_flags: EdgeAaSegmentMask::empty(),
                                             brush_flags,
                                             user_data: uv_rect_address,
                                         };
 
                                         // Instead of retrieving the batch once and adding each tile instance,
                                         // use this API to get an appropriate batch for each tile, since
@@ -1228,17 +1222,18 @@ impl BatchBuilder {
                                         batch.push(PrimitiveInstanceData::from(instance));
                                     }
 
                                     // If there is a dirty rect for the tile cache, recurse into the
                                     // main picture primitive list, and draw them first.
                                     if !tile_cache.dirty_region.is_empty() {
                                         let mut tile_blits = Vec::new();
 
-                                        let (target_rect, _) = render_tasks[task_id].get_target_rect();
+                                        let (target_rect, _) = render_tasks[batcher.render_task_id]
+                                            .get_target_rect();
 
                                         for blit in &tile_cache.pending_blits {
                                             tile_blits.push(TileBlit {
                                                 dest_offset: blit.dest_offset,
                                                 size: blit.size,
                                                 target: blit.target.clone(),
                                                 src_offset: DeviceIntPoint::new(
                                                     blit.src_offset.x + target_rect.origin.x,
@@ -1261,17 +1256,16 @@ impl BatchBuilder {
                                         batcher.push_new_batch_list(
                                             batch_regions,
                                             tile_blits,
                                         );
 
                                         self.add_pic_to_batch(
                                             picture,
                                             batcher,
-                                            task_id,
                                             ctx,
                                             gpu_cache,
                                             render_tasks,
                                             deferred_resolves,
                                             prim_headers,
                                             transforms,
                                             root_spatial_node_index,
                                             z_generator,
@@ -1307,16 +1301,17 @@ impl BatchBuilder {
                                             0,
                                         ]);
 
                                         let instance = BrushInstance {
                                             prim_header_index,
                                             segment_index: INVALID_SEGMENT_INDEX,
                                             edge_flags: EdgeAaSegmentMask::empty(),
                                             brush_flags,
+                                            render_task_address,
                                             clip_task_address,
                                             user_data: uv_rect_address.as_int(),
                                         };
 
                                         batcher.current_batch_list().push_single_instance(
                                             key,
                                             bounding_rect,
                                             z_id,
@@ -1378,16 +1373,17 @@ impl BatchBuilder {
                                                 RasterizationSpace::Screen as i32,
                                                 get_shader_opacity(1.0),
                                                 0,
                                             ]);
 
                                             let shadow_instance = BrushInstance {
                                                 prim_header_index: shadow_prim_header_index,
                                                 clip_task_address,
+                                                render_task_address,
                                                 segment_index: INVALID_SEGMENT_INDEX,
                                                 edge_flags: EdgeAaSegmentMask::empty(),
                                                 brush_flags,
                                                 user_data: shadow_uv_rect_address,
                                             };
 
                                             batcher.current_batch_list().push_single_instance(
                                                 shadow_key,
@@ -1403,16 +1399,17 @@ impl BatchBuilder {
                                             RasterizationSpace::Screen as i32,
                                             get_shader_opacity(1.0),
                                             0,
                                         ]);
 
                                         let content_instance = BrushInstance {
                                             prim_header_index: content_prim_header_index,
                                             clip_task_address,
+                                            render_task_address,
                                             segment_index: INVALID_SEGMENT_INDEX,
                                             edge_flags: EdgeAaSegmentMask::empty(),
                                             brush_flags,
                                             user_data: content_uv_rect_address,
                                         };
 
                                         batcher.current_batch_list().push_single_instance(
                                             content_key,
@@ -1485,25 +1482,27 @@ impl BatchBuilder {
                                             RasterizationSpace::Screen as i32,
                                             get_shader_opacity(1.0),
                                             0,
                                         ]);
 
                                         let shadow_instance = BrushInstance {
                                             prim_header_index: shadow_prim_header_index,
                                             clip_task_address,
+                                            render_task_address,
                                             segment_index: INVALID_SEGMENT_INDEX,
                                             edge_flags: EdgeAaSegmentMask::empty(),
                                             brush_flags,
                                             user_data: shadow_uv_rect_address,
                                         };
 
                                         let content_instance = BrushInstance {
                                             prim_header_index: content_prim_header_index,
                                             clip_task_address,
+                                            render_task_address,
                                             segment_index: INVALID_SEGMENT_INDEX,
                                             edge_flags: EdgeAaSegmentMask::empty(),
                                             brush_flags,
                                             user_data: content_uv_rect_address,
                                         };
 
                                         batcher.current_batch_list().push_single_instance(
                                             shadow_key,
@@ -1582,16 +1581,17 @@ impl BatchBuilder {
                                             filter_mode,
                                             user_data,
                                             0,
                                         ]);
 
                                         let instance = BrushInstance {
                                             prim_header_index,
                                             clip_task_address,
+                                            render_task_address,
                                             segment_index: INVALID_SEGMENT_INDEX,
                                             edge_flags: EdgeAaSegmentMask::empty(),
                                             brush_flags,
                                             user_data: 0,
                                         };
 
                                         batcher.current_batch_list().push_single_instance(
                                             key,
@@ -1631,16 +1631,17 @@ impl BatchBuilder {
                                     filter_mode,
                                     user_data,
                                     0,
                                 ]);
 
                                 let instance = BrushInstance {
                                     prim_header_index,
                                     clip_task_address,
+                                    render_task_address,
                                     segment_index: INVALID_SEGMENT_INDEX,
                                     edge_flags: EdgeAaSegmentMask::empty(),
                                     brush_flags,
                                     user_data: 0,
                                 };
 
                                 batcher.current_batch_list().push_single_instance(
                                     key,
@@ -1666,16 +1667,17 @@ impl BatchBuilder {
                                     RasterizationSpace::Local as i32,
                                     get_shader_opacity(1.0),
                                     0,
                                 ]);
 
                                 let instance = BrushInstance {
                                     prim_header_index,
                                     clip_task_address,
+                                    render_task_address,
                                     segment_index: INVALID_SEGMENT_INDEX,
                                     edge_flags: EdgeAaSegmentMask::empty(),
                                     brush_flags,
                                     user_data: uv_rect_address.as_int(),
                                 };
 
                                 batcher.current_batch_list().push_single_instance(
                                     key,
@@ -1686,17 +1688,17 @@ impl BatchBuilder {
                             }
                             PictureCompositeMode::MixBlend(mode) => {
                                 let cache_task_id = surface.expect("bug: surface must be allocated by now");
                                 let backdrop_id = picture.secondary_render_task_id.expect("no backdrop!?");
 
                                 let key = BatchKey::new(
                                     BatchKind::Brush(
                                         BrushBatchKind::MixBlend {
-                                            task_id,
+                                            task_id: batcher.render_task_id,
                                             source_id: cache_task_id,
                                             backdrop_id,
                                         },
                                     ),
                                     BlendMode::PremultipliedAlpha,
                                     BatchTextures::no_texture(),
                                 );
                                 let backdrop_task_address = render_tasks.get_task_address(backdrop_id);
@@ -1706,16 +1708,17 @@ impl BatchBuilder {
                                     backdrop_task_address.0 as i32,
                                     source_task_address.0 as i32,
                                     0,
                                 ]);
 
                                 let instance = BrushInstance {
                                     prim_header_index,
                                     clip_task_address,
+                                    render_task_address,
                                     segment_index: INVALID_SEGMENT_INDEX,
                                     edge_flags: EdgeAaSegmentMask::empty(),
                                     brush_flags,
                                     user_data: 0,
                                 };
 
                                 batcher.current_batch_list().push_single_instance(
                                     key,
@@ -1752,17 +1755,16 @@ impl BatchBuilder {
                                 } else {
                                     (prim_cache_address, None)
                                 };
 
                                 let prim_header = PrimitiveHeader {
                                     local_rect: picture.snapped_local_rect,
                                     local_clip_rect: prim_info.combined_local_clip_rect,
                                     snap_offsets,
-                                    task_address,
                                     specific_prim_address: prim_cache_address,
                                     transform_id,
                                 };
 
                                 let prim_header_index = prim_headers.push(
                                     &prim_header,
                                     z_id,
                                     batch_params.prim_user_data,
@@ -1784,29 +1786,29 @@ impl BatchBuilder {
                                     &batch_params,
                                     specified_blend_mode,
                                     non_segmented_blend_mode,
                                     prim_header_index,
                                     bounding_rect,
                                     transform_kind,
                                     render_tasks,
                                     z_id,
+                                    render_task_address,
                                     prim_info.clip_task_index,
                                     ctx,
                                 );
                             }
                         }
                     }
                     None => {
                         // If this picture is being drawn into an existing target (i.e. with
                         // no composition operation), recurse and add to the current batch list.
                         self.add_pic_to_batch(
                             picture,
                             batcher,
-                            task_id,
                             ctx,
                             gpu_cache,
                             render_tasks,
                             deferred_resolves,
                             prim_headers,
                             transforms,
                             root_spatial_node_index,
                             z_generator,
@@ -1840,17 +1842,16 @@ impl BatchBuilder {
                 } else {
                     BlendMode::None
                 };
 
                 let prim_header = PrimitiveHeader {
                     local_rect: prim_rect,
                     local_clip_rect: prim_info.combined_local_clip_rect,
                     snap_offsets: snap_offsets,
-                    task_address,
                     specific_prim_address: prim_cache_address,
                     transform_id,
                 };
 
                 let batch_params = BrushBatchParameters::shared(
                     BrushBatchKind::Image(get_buffer_kind(cache_item.texture_id)),
                     textures,
                     [
@@ -1875,16 +1876,17 @@ impl BatchBuilder {
                     &batch_params,
                     specified_blend_mode,
                     non_segmented_blend_mode,
                     prim_header_index,
                     bounding_rect,
                     transform_kind,
                     render_tasks,
                     z_id,
+                    render_task_address,
                     prim_info.clip_task_index,
                     ctx,
                 );
             }
             PrimitiveInstanceKind::Rectangle { data_handle, segment_instance_index, opacity_binding_index, .. } => {
                 let prim_data = &ctx.data_stores.prim[data_handle];
                 let specified_blend_mode = BlendMode::PremultipliedAlpha;
                 let opacity_binding = ctx.prim_store.get_opacity_binding(opacity_binding_index);
@@ -1915,17 +1917,16 @@ impl BatchBuilder {
                     let segments = Some(&ctx.scratch.segments[segment_instance.segments_range]);
                     (gpu_cache.get_address(&segment_instance.gpu_cache_handle), segments)
                 };
 
                 let prim_header = PrimitiveHeader {
                     local_rect: prim_rect,
                     local_clip_rect: prim_info.combined_local_clip_rect,
                     snap_offsets: snap_offsets,
-                    task_address,
                     specific_prim_address: prim_cache_address,
                     transform_id,
                 };
 
                 let prim_header_index = prim_headers.push(
                     &prim_header,
                     z_id,
                     batch_params.prim_user_data,
@@ -1938,16 +1939,17 @@ impl BatchBuilder {
                     &batch_params,
                     specified_blend_mode,
                     non_segmented_blend_mode,
                     prim_header_index,
                     bounding_rect,
                     transform_kind,
                     render_tasks,
                     z_id,
+                    render_task_address,
                     prim_info.clip_task_index,
                     ctx,
                 );
             }
             PrimitiveInstanceKind::YuvImage { data_handle, segment_instance_index, .. } => {
                 let yuv_image_data = &ctx.data_stores.yuv_image[data_handle].kind;
                 let mut textures = BatchTextures::no_texture();
                 let mut uv_rect_addresses = [0; 3];
@@ -2024,17 +2026,16 @@ impl BatchBuilder {
                     let segments = Some(&ctx.scratch.segments[segment_instance.segments_range]);
                     (gpu_cache.get_address(&segment_instance.gpu_cache_handle), segments)
                 };
 
                 let prim_header = PrimitiveHeader {
                     local_rect: prim_rect,
                     local_clip_rect: prim_info.combined_local_clip_rect,
                     snap_offsets: snap_offsets,
-                    task_address,
                     specific_prim_address: prim_cache_address,
                     transform_id,
                 };
 
                 let prim_header_index = prim_headers.push(
                     &prim_header,
                     z_id,
                     batch_params.prim_user_data,
@@ -2047,16 +2048,17 @@ impl BatchBuilder {
                     &batch_params,
                     specified_blend_mode,
                     non_segmented_blend_mode,
                     prim_header_index,
                     bounding_rect,
                     transform_kind,
                     render_tasks,
                     z_id,
+                    render_task_address,
                     prim_info.clip_task_index,
                     ctx,
                 );
             }
             PrimitiveInstanceKind::Image { data_handle, image_instance_index, .. } => {
                 let image_data = &ctx.data_stores.image[data_handle].kind;
                 let common_data = &ctx.data_stores.image[data_handle].common;
                 let image_instance = &ctx.prim_store.images[image_instance_index];
@@ -2130,17 +2132,16 @@ impl BatchBuilder {
                         let segments = Some(&ctx.scratch.segments[segment_instance.segments_range]);
                         (gpu_cache.get_address(&segment_instance.gpu_cache_handle), segments)
                     };
 
                     let prim_header = PrimitiveHeader {
                         local_rect: prim_rect,
                         local_clip_rect: prim_info.combined_local_clip_rect,
                         snap_offsets: snap_offsets,
-                        task_address,
                         specific_prim_address: prim_cache_address,
                         transform_id,
                     };
 
                     let prim_header_index = prim_headers.push(
                         &prim_header,
                         z_id,
                         batch_params.prim_user_data,
@@ -2153,16 +2154,17 @@ impl BatchBuilder {
                         &batch_params,
                         specified_blend_mode,
                         non_segmented_blend_mode,
                         prim_header_index,
                         bounding_rect,
                         transform_kind,
                         render_tasks,
                         z_id,
+                        render_task_address,
                         prim_info.clip_task_index,
                         ctx,
                     );
                 } else {
                     const VECS_PER_SPECIFIC_BRUSH: usize = 3;
                     let max_tiles_per_header = (MAX_VERTEX_TEXTURE_WIDTH - VECS_PER_SPECIFIC_BRUSH) / VECS_PER_SEGMENT;
 
                     let clip_task_address = ctx.get_prim_clip_task_address(
@@ -2184,32 +2186,32 @@ impl BatchBuilder {
                             gpu_blocks.push(GpuBlockData::EMPTY);
                         }
 
                         let gpu_handle = gpu_cache.push_per_frame_blocks(&gpu_blocks);
                         let prim_header = PrimitiveHeader {
                             local_rect: prim_rect,
                             local_clip_rect: image_instance.tight_local_clip_rect,
                             snap_offsets,
-                            task_address,
                             specific_prim_address: gpu_cache.get_address(&gpu_handle),
                             transform_id,
                         };
                         let prim_header_index = prim_headers.push(&prim_header, z_id, prim_user_data);
 
                         for (i, tile) in chunk.iter().enumerate() {
                             if let Some((batch_kind, textures, uv_rect_address)) = get_image_tile_params(
                                 ctx.resource_cache,
                                 gpu_cache,
                                 deferred_resolves,
                                 request.with_tile(tile.tile_offset),
                             ) {
                                 let base_instance = BrushInstance {
                                     prim_header_index,
                                     clip_task_address,
+                                    render_task_address,
                                     segment_index: i as i32,
                                     edge_flags: tile.edge_flags,
                                     brush_flags: BrushFlags::SEGMENT_RELATIVE | BrushFlags::PERSPECTIVE_INTERPOLATION,
                                     user_data: uv_rect_address.as_int(),
                                 };
                                 let batch_key = BatchKey {
                                     blend_mode: specified_blend_mode,
                                     kind: BatchKind::Brush(batch_kind),
@@ -2230,18 +2232,17 @@ impl BatchBuilder {
                 let gradient = &ctx.prim_store.linear_gradients[gradient_index];
                 let prim_data = &ctx.data_stores.linear_grad[data_handle];
                 let specified_blend_mode = BlendMode::PremultipliedAlpha;
 
                 let mut prim_header = PrimitiveHeader {
                     local_rect: prim_rect,
                     local_clip_rect: prim_info.combined_local_clip_rect,
                     snap_offsets,
-                    task_address,
-                    specific_prim_address: GpuCacheAddress::invalid(),
+                    specific_prim_address: GpuCacheAddress::INVALID,
                     transform_id,
                 };
 
                 let non_segmented_blend_mode = if !prim_data.opacity.is_opaque ||
                     prim_info.clip_task_index != ClipTaskIndex::INVALID ||
                     transform_kind == TransformedRectKind::Complex
                 {
                     specified_blend_mode
@@ -2286,16 +2287,17 @@ impl BatchBuilder {
                         prim_info.clip_task_index,
                         render_tasks,
                     ).unwrap_or(OPAQUE_TASK_ADDRESS);
 
                     let instance = PrimitiveInstanceData::from(BrushInstance {
                         segment_index: INVALID_SEGMENT_INDEX,
                         edge_flags: EdgeAaSegmentMask::all(),
                         clip_task_address,
+                        render_task_address,
                         brush_flags: BrushFlags::PERSPECTIVE_INTERPOLATION,
                         prim_header_index,
                         user_data: segment_user_data,
                     });
 
                     batcher.current_batch_list().push_single_instance(
                         batch_key,
                         bounding_rect,
@@ -2336,16 +2338,17 @@ impl BatchBuilder {
                         &batch_params,
                         specified_blend_mode,
                         non_segmented_blend_mode,
                         prim_header_index,
                         bounding_rect,
                         transform_kind,
                         render_tasks,
                         z_id,
+                        render_task_address,
                         prim_info.clip_task_index,
                         ctx,
                     );
                 } else {
                     let visible_tiles = &ctx.scratch.gradient_tiles[gradient.visible_tiles_range];
 
                     let clip_task_address = ctx.get_prim_clip_task_address(
                         prim_info.clip_task_index,
@@ -2353,16 +2356,17 @@ impl BatchBuilder {
                     ).unwrap_or(OPAQUE_TASK_ADDRESS);
 
                     add_gradient_tiles(
                         visible_tiles,
                         &prim_data.stops_handle,
                         BrushBatchKind::LinearGradient,
                         specified_blend_mode,
                         bounding_rect,
+                        render_task_address,
                         clip_task_address,
                         gpu_cache,
                         batcher.current_batch_list(),
                         &prim_header,
                         prim_headers,
                         z_id,
                     );
                 }
@@ -2370,18 +2374,17 @@ impl BatchBuilder {
             PrimitiveInstanceKind::RadialGradient { data_handle, ref visible_tiles_range, .. } => {
                 let prim_data = &ctx.data_stores.radial_grad[data_handle];
                 let specified_blend_mode = BlendMode::PremultipliedAlpha;
 
                 let mut prim_header = PrimitiveHeader {
                     local_rect: prim_rect,
                     local_clip_rect: prim_info.combined_local_clip_rect,
                     snap_offsets,
-                    task_address,
-                    specific_prim_address: GpuCacheAddress::invalid(),
+                    specific_prim_address: GpuCacheAddress::INVALID,
                     transform_id,
                 };
 
                 if visible_tiles_range.is_empty() {
                     let non_segmented_blend_mode = if !prim_data.opacity.is_opaque ||
                         prim_info.clip_task_index != ClipTaskIndex::INVALID ||
                         transform_kind == TransformedRectKind::Complex
                     {
@@ -2423,16 +2426,17 @@ impl BatchBuilder {
                         &batch_params,
                         specified_blend_mode,
                         non_segmented_blend_mode,
                         prim_header_index,
                         bounding_rect,
                         transform_kind,
                         render_tasks,
                         z_id,
+                        render_task_address,
                         prim_info.clip_task_index,
                         ctx,
                     );
                 } else {
                     let visible_tiles = &ctx.scratch.gradient_tiles[*visible_tiles_range];
 
                     let clip_task_address = ctx.get_prim_clip_task_address(
                         prim_info.clip_task_index,
@@ -2440,16 +2444,17 @@ impl BatchBuilder {
                     ).unwrap_or(OPAQUE_TASK_ADDRESS);
 
                     add_gradient_tiles(
                         visible_tiles,
                         &prim_data.stops_handle,
                         BrushBatchKind::RadialGradient,
                         specified_blend_mode,
                         bounding_rect,
+                        render_task_address,
                         clip_task_address,
                         gpu_cache,
                         batcher.current_batch_list(),
                         &prim_header,
                         prim_headers,
                         z_id,
                     );
                 }
@@ -2467,16 +2472,17 @@ impl BatchBuilder {
         batch_kind: BrushBatchKind,
         prim_header_index: PrimitiveHeaderIndex,
         alpha_blend_mode: BlendMode,
         bounding_rect: &PictureRect,
         transform_kind: TransformedRectKind,
         render_tasks: &RenderTaskTree,
         z_id: ZBufferId,
         prim_opacity: PrimitiveOpacity,
+        render_task_address: RenderTaskAddress,
         clip_task_index: ClipTaskIndex,
         ctx: &RenderTargetContext,
     ) {
         debug_assert!(clip_task_index != ClipTaskIndex::INVALID);
 
         // Get GPU address of clip task for this segment, or None if
         // the entire segment is clipped out.
         let clip_task_address = match ctx.get_clip_task_address(
@@ -2493,16 +2499,17 @@ impl BatchBuilder {
         let needs_blending = !prim_opacity.is_opaque ||
                              clip_task_address != OPAQUE_TASK_ADDRESS ||
                              (!is_inner && transform_kind == TransformedRectKind::Complex);
 
         let instance = PrimitiveInstanceData::from(BrushInstance {
             segment_index,
             edge_flags: segment.edge_flags,
             clip_task_address,
+            render_task_address,
             brush_flags: BrushFlags::PERSPECTIVE_INTERPOLATION | segment.brush_flags,
             prim_header_index,
             user_data: segment_data.user_data,
         });
 
         let batch_key = BatchKey {
             blend_mode: if needs_blending { alpha_blend_mode } else { BlendMode::None },
             kind: BatchKind::Brush(batch_kind),
@@ -2526,16 +2533,17 @@ impl BatchBuilder {
         params: &BrushBatchParameters,
         alpha_blend_mode: BlendMode,
         non_segmented_blend_mode: BlendMode,
         prim_header_index: PrimitiveHeaderIndex,
         bounding_rect: &PictureRect,
         transform_kind: TransformedRectKind,
         render_tasks: &RenderTaskTree,
         z_id: ZBufferId,
+        render_task_address: RenderTaskAddress,
         clip_task_index: ClipTaskIndex,
         ctx: &RenderTargetContext,
     ) {
         match (brush_segments, &params.segment_data) {
             (Some(ref brush_segments), SegmentDataKind::Instanced(ref segment_data)) => {
                 // In this case, we have both a list of segments, and a list of
                 // per-segment instance data. Zip them together to build batches.
                 debug_assert_eq!(brush_segments.len(), segment_data.len());
@@ -2552,16 +2560,17 @@ impl BatchBuilder {
                         params.batch_kind,
                         prim_header_index,
                         alpha_blend_mode,
                         bounding_rect,
                         transform_kind,
                         render_tasks,
                         z_id,
                         prim_opacity,
+                        render_task_address,
                         clip_task_index,
                         ctx,
                     );
                 }
             }
             (Some(ref brush_segments), SegmentDataKind::Shared(ref segment_data)) => {
                 // A list of segments, but the per-segment data is common
                 // between all segments.
@@ -2577,16 +2586,17 @@ impl BatchBuilder {
                         params.batch_kind,
                         prim_header_index,
                         alpha_blend_mode,
                         bounding_rect,
                         transform_kind,
                         render_tasks,
                         z_id,
                         prim_opacity,
+                        render_task_address,
                         clip_task_index,
                         ctx,
                     );
                 }
             }
             (None, SegmentDataKind::Shared(ref segment_data)) => {
                 // No segments, and thus no per-segment instance data.
                 // Note: the blend mode already takes opacity into account
@@ -2598,16 +2608,17 @@ impl BatchBuilder {
                 let clip_task_address = ctx.get_prim_clip_task_address(
                     clip_task_index,
                     render_tasks,
                 ).unwrap_or(OPAQUE_TASK_ADDRESS);
                 let instance = PrimitiveInstanceData::from(BrushInstance {
                     segment_index: INVALID_SEGMENT_INDEX,
                     edge_flags: EdgeAaSegmentMask::all(),
                     clip_task_address,
+                    render_task_address,
                     brush_flags: BrushFlags::PERSPECTIVE_INTERPOLATION,
                     prim_header_index,
                     user_data: segment_data.user_data,
                 });
                 batcher.current_batch_list().push_single_instance(
                     batch_key,
                     bounding_rect,
                     z_id,
@@ -2624,16 +2635,17 @@ impl BatchBuilder {
 }
 
 fn add_gradient_tiles(
     visible_tiles: &[VisibleGradientTile],
     stops_handle: &GpuCacheHandle,
     kind: BrushBatchKind,
     blend_mode: BlendMode,
     bounding_rect: &PictureRect,
+    render_task_address: RenderTaskAddress,
     clip_task_address: RenderTaskAddress,
     gpu_cache: &GpuCache,
     batch_list: &mut BatchList,
     base_prim_header: &PrimitiveHeader,
     prim_headers: &mut PrimitiveHeaders,
     z_id: ZBufferId,
 ) {
     let batch = batch_list.set_params_and_get_batch(
@@ -2664,16 +2676,17 @@ fn add_gradient_tiles(
             ..*base_prim_header
         };
         let prim_header_index = prim_headers.push(&prim_header, z_id, user_data);
 
         batch.push(PrimitiveInstanceData::from(
             BrushInstance {
                 prim_header_index,
                 clip_task_address,
+                render_task_address,
                 segment_index: INVALID_SEGMENT_INDEX,
                 edge_flags: EdgeAaSegmentMask::all(),
                 brush_flags: BrushFlags::PERSPECTIVE_INTERPOLATION,
                 user_data: 0,
             }
         ));
     }
 }
@@ -2914,17 +2927,17 @@ impl ClipBatcher {
         task_origin: DevicePoint,
         screen_origin: DevicePoint,
         device_pixel_scale: f32,
     ) {
         let instance = ClipMaskInstance {
             clip_transform_id: TransformPaletteId::IDENTITY,
             prim_transform_id: TransformPaletteId::IDENTITY,
             clip_data_address,
-            resource_address: GpuCacheAddress::invalid(),
+            resource_address: GpuCacheAddress::INVALID,
             local_pos,
             tile_rect: LayoutRect::zero(),
             sub_rect,
             snap_offsets: SnapOffsets::empty(),
             task_origin,
             screen_origin,
             device_pixel_scale,
         };
@@ -3072,18 +3085,18 @@ impl ClipBatcher {
                 root_spatial_node_index,
                 ROOT_SPATIAL_NODE_INDEX,
                 clip_scroll_tree,
             );
 
             let instance = ClipMaskInstance {
                 clip_transform_id,
                 prim_transform_id,
-                clip_data_address: GpuCacheAddress::invalid(),
-                resource_address: GpuCacheAddress::invalid(),
+                clip_data_address: GpuCacheAddress::INVALID,
+                resource_address: GpuCacheAddress::INVALID,
                 local_pos: clip_instance.local_pos,
                 tile_rect: LayoutRect::zero(),
                 sub_rect: DeviceRect::new(
                     DevicePoint::zero(),
                     actual_rect.size.to_f32(),
                 ),
                 snap_offsets,
                 task_origin,
--- a/gfx/wr/webrender/src/frame_builder.rs
+++ b/gfx/wr/webrender/src/frame_builder.rs
@@ -608,18 +608,18 @@ impl FrameBuilder {
                     &mut deferred_resolves,
                     &self.clip_store,
                     &mut transform_palette,
                     &mut prim_headers,
                     &mut z_generator,
                 );
 
                 match pass.kind {
-                    RenderPassKind::MainFramebuffer(ref color) => {
-                        has_texture_cache_tasks |= color.must_be_drawn();
+                    RenderPassKind::MainFramebuffer { ref main_target, .. } => {
+                        has_texture_cache_tasks |= main_target.must_be_drawn();
                     }
                     RenderPassKind::OffScreen { ref texture_cache, ref color, .. } => {
                         has_texture_cache_tasks |= !texture_cache.is_empty();
                         has_texture_cache_tasks |= color.must_be_drawn();
                     }
                 }
             }
         }
--- a/gfx/wr/webrender/src/gpu_cache.rs
+++ b/gfx/wr/webrender/src/gpu_cache.rs
@@ -172,22 +172,20 @@ pub struct GpuCacheAddress {
 impl GpuCacheAddress {
     fn new(u: usize, v: usize) -> Self {
         GpuCacheAddress {
             u: u as u16,
             v: v as u16,
         }
     }
 
-    pub fn invalid() -> Self {
-        GpuCacheAddress {
-            u: u16::MAX,
-            v: u16::MAX,
-        }
-    }
+    pub const INVALID: GpuCacheAddress = GpuCacheAddress {
+        u: u16::MAX,
+        v: u16::MAX,
+    };
 }
 
 impl Add<usize> for GpuCacheAddress {
     type Output = GpuCacheAddress;
 
     fn add(self, other: usize) -> GpuCacheAddress {
         GpuCacheAddress {
             u: self.u + other as u16,
--- a/gfx/wr/webrender/src/gpu_types.rs
+++ b/gfx/wr/webrender/src/gpu_types.rs
@@ -235,34 +235,33 @@ impl PrimitiveHeaders {
         self.headers_float.push(PrimitiveHeaderF {
             local_rect: prim_header.local_rect,
             local_clip_rect: prim_header.local_clip_rect,
             snap_offsets: prim_header.snap_offsets,
         });
 
         self.headers_int.push(PrimitiveHeaderI {
             z,
-            task_address: prim_header.task_address,
+            unused: 0,
             specific_prim_address: prim_header.specific_prim_address.as_int(),
             transform_id: prim_header.transform_id,
             user_data,
         });
 
         PrimitiveHeaderIndex(id as i32)
     }
 }
 
 // This is a convenience type used to make it easier to pass
 // the common parts around during batching.
 #[derive(Debug)]
 pub struct PrimitiveHeader {
     pub local_rect: LayoutRect,
     pub local_clip_rect: LayoutRect,
     pub snap_offsets: SnapOffsets,
-    pub task_address: RenderTaskAddress,
     pub specific_prim_address: GpuCacheAddress,
     pub transform_id: TransformPaletteId,
 }
 
 // f32 parts of a primitive header
 #[derive(Debug)]
 #[repr(C)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
@@ -276,19 +275,19 @@ pub struct PrimitiveHeaderF {
 // i32 parts of a primitive header
 // TODO(gw): Compress parts of these down to u16
 #[derive(Debug)]
 #[repr(C)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct PrimitiveHeaderI {
     pub z: ZBufferId,
-    pub task_address: RenderTaskAddress,
     pub specific_prim_address: i32,
     pub transform_id: TransformPaletteId,
+    pub unused: i32,                    // To ensure required 16 byte alignment of vertex textures
     pub user_data: [i32; 4],
 }
 
 pub struct GlyphInstance {
     pub prim_header_index: PrimitiveHeaderIndex,
 }
 
 impl GlyphInstance {
@@ -314,40 +313,43 @@ impl GlyphInstance {
         }
     }
 }
 
 pub struct SplitCompositeInstance {
     pub prim_header_index: PrimitiveHeaderIndex,
     pub polygons_address: GpuCacheAddress,
     pub z: ZBufferId,
+    pub render_task_address: RenderTaskAddress,
 }
 
 impl SplitCompositeInstance {
     pub fn new(
         prim_header_index: PrimitiveHeaderIndex,
         polygons_address: GpuCacheAddress,
+        render_task_address: RenderTaskAddress,
         z: ZBufferId,
     ) -> Self {
         SplitCompositeInstance {
             prim_header_index,
             polygons_address,
             z,
+            render_task_address,
         }
     }
 }
 
 impl From<SplitCompositeInstance> for PrimitiveInstanceData {
     fn from(instance: SplitCompositeInstance) -> Self {
         PrimitiveInstanceData {
             data: [
                 instance.prim_header_index.0,
                 instance.polygons_address.as_int(),
                 instance.z.0,
-                0,
+                instance.render_task_address.0 as i32,
             ],
         }
     }
 }
 
 bitflags! {
     /// Flags that define how the common brush shader
     /// code should process this instance.
@@ -370,28 +372,30 @@ bitflags! {
 }
 
 // TODO(gw): Some of these fields can be moved to the primitive
 //           header since they are constant, and some can be
 //           compressed to a smaller size.
 #[repr(C)]
 pub struct BrushInstance {
     pub prim_header_index: PrimitiveHeaderIndex,
+    pub render_task_address: RenderTaskAddress,
     pub clip_task_address: RenderTaskAddress,
     pub segment_index: i32,
     pub edge_flags: EdgeAaSegmentMask,
     pub brush_flags: BrushFlags,
     pub user_data: i32,
 }
 
 impl From<BrushInstance> for PrimitiveInstanceData {
     fn from(instance: BrushInstance) -> Self {
         PrimitiveInstanceData {
             data: [
                 instance.prim_header_index.0,
+                ((instance.render_task_address.0 as i32) << 16) |
                 instance.clip_task_address.0 as i32,
                 instance.segment_index |
                 ((instance.edge_flags.bits() as i32) << 16) |
                 ((instance.brush_flags.bits() as i32) << 24),
                 instance.user_data,
             ]
         }
     }
--- a/gfx/wr/webrender/src/prim_store/mod.rs
+++ b/gfx/wr/webrender/src/prim_store/mod.rs
@@ -1328,16 +1328,42 @@ pub enum PrimitiveInstanceKind {
 #[derive(Debug, Copy, Clone, PartialEq)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 pub struct PrimitiveVisibilityIndex(pub u32);
 
 impl PrimitiveVisibilityIndex {
     pub const INVALID: PrimitiveVisibilityIndex = PrimitiveVisibilityIndex(u32::MAX);
 }
 
+/// A bit mask describing which dirty regions a primitive is visible in.
+/// A value of 0 means not visible in any region, while a mask of 0xffff
+/// would be considered visible in all regions.
+pub struct PrimitiveVisibilityMask {
+    bits: u16,
+}
+
+impl PrimitiveVisibilityMask {
+    /// Construct a default mask, where no regions are considered visible
+    pub fn empty() -> Self {
+        PrimitiveVisibilityMask {
+            bits: 0,
+        }
+    }
+
+    /// Mark a given region index as visible
+    pub fn set_visible(&mut self, region_index: usize) {
+        self.bits |= 1 << region_index;
+    }
+
+    /// Returns true if there are no visible regions
+    pub fn is_empty(&self) -> bool {
+        self.bits == 0
+    }
+}
+
 /// Information stored for a visible primitive about the visible
 /// rect and associated clip information.
 pub struct PrimitiveVisibility {
     /// The clip chain instance that was built for this primitive.
     pub clip_chain: ClipChainInstance,
 
     /// The current world rect, clipped to screen / dirty rect boundaries.
     // TODO(gw): This is only used by a small number of primitives.
@@ -1347,16 +1373,19 @@ pub struct PrimitiveVisibility {
 
     /// An index into the clip task instances array in the primitive
     /// store. If this is ClipTaskIndex::INVALID, then the primitive
     /// has no clip mask. Otherwise, it may store the offset of the
     /// global clip mask task for this primitive, or the first of
     /// a list of clip task ids (one per segment).
     pub clip_task_index: ClipTaskIndex,
 
+    /// A mask defining which of the dirty regions this primitive is visible in.
+    pub visibility_mask: PrimitiveVisibilityMask,
+
     /// The current combined local clip for this primitive, from
     /// the primitive local clip above and the current clip chain.
     pub combined_local_clip_rect: LayoutRect,
 
     /// The snap offsets in device space for this primitive. They are
     /// generated based on the visible rect, which is the local rect
     /// clipped by the combined local clip for most primitives, or
     /// just the local rect for pictures.
@@ -1897,16 +1926,17 @@ impl PrimitiveStore {
                 frame_state.scratch.prim_info.push(
                     PrimitiveVisibility {
                         clipped_world_rect: WorldRect::max_rect(),
                         clip_chain: ClipChainInstance::empty(),
                         clip_task_index: ClipTaskIndex::INVALID,
                         combined_local_clip_rect: LayoutRect::zero(),
                         snap_offsets: SnapOffsets::empty(),
                         shadow_snap_offsets: SnapOffsets::empty(),
+                        visibility_mask: PrimitiveVisibilityMask::empty(),
                     }
                 );
 
                 prim_instance.visibility_info = vis_index;
             } else {
                 if prim_local_rect.size.width <= 0.0 || prim_local_rect.size.height <= 0.0 {
                     if prim_instance.is_chased() {
                         println!("\tculled for zero local rectangle");
@@ -2105,16 +2135,17 @@ impl PrimitiveStore {
                 frame_state.scratch.prim_info.push(
                     PrimitiveVisibility {
                         clipped_world_rect,
                         clip_chain,
                         clip_task_index: ClipTaskIndex::INVALID,
                         combined_local_clip_rect,
                         snap_offsets,
                         shadow_snap_offsets,
+                        visibility_mask: PrimitiveVisibilityMask::empty(),
                     }
                 );
 
                 prim_instance.visibility_info = vis_index;
 
                 self.request_resources_for_prim(
                     prim_instance,
                     surface,
@@ -2653,24 +2684,23 @@ impl PrimitiveStore {
                         // render task size calculations. In future, we may consider creating multiple
                         // render task trees, one per dirty region.
                         visibility_info.clipped_world_rect = rect;
 
                         // If there is more than one dirty region, it's possible that this primitive
                         // is inside the overal dirty rect, but doesn't intersect any of the individual
                         // dirty rects. If that's the case, then we can skip drawing this primitive too.
                         if dirty_region.dirty_rects.len() > 1 {
-                            let in_dirty_rects = dirty_region
-                                .dirty_rects
-                                .iter()
-                                .any(|dirty_rect| {
-                                    visibility_info.clipped_world_rect.intersects(&dirty_rect.world_rect)
-                                });
-
-                            if !in_dirty_rects {
+                            for (region_index, region) in dirty_region.dirty_rects.iter().enumerate() {
+                                if visibility_info.clipped_world_rect.intersects(&region.world_rect) {
+                                    visibility_info.visibility_mask.set_visible(region_index);
+                                }
+                            }
+
+                            if visibility_info.visibility_mask.is_empty() {
                                 prim_instance.visibility_info = PrimitiveVisibilityIndex::INVALID;
                                 continue;
                             }
                         }
                     }
                     None => {
                         // Outside the overall dirty rect, so can be skipped.
                         prim_instance.visibility_info = PrimitiveVisibilityIndex::INVALID;
--- a/gfx/wr/webrender/src/render_task.rs
+++ b/gfx/wr/webrender/src/render_task.rs
@@ -68,17 +68,17 @@ impl RenderTaskId {
         frame_id: FrameId::INVALID,
     };
 }
 
 #[derive(Debug, Copy, Clone, PartialEq)]
 #[repr(C)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct RenderTaskAddress(pub u32);
+pub struct RenderTaskAddress(pub u16);
 
 #[derive(Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct RenderTaskTree {
     pub tasks: Vec<RenderTask>,
     pub task_data: Vec<RenderTaskData>,
     /// Tasks that don't have dependencies, and that may be shared between
@@ -386,17 +386,17 @@ impl RenderTaskTree {
                 task_redirects[task_index] = Some(blit_id);
             }
         }
     }
 
     pub fn get_task_address(&self, id: RenderTaskId) -> RenderTaskAddress {
         #[cfg(debug_assertions)]
         debug_assert_eq!(self.frame_id, id.frame_id);
-        RenderTaskAddress(id.index)
+        RenderTaskAddress(id.index as u16)
     }
 
     pub fn write_task_data(&mut self) {
         for task in &self.tasks {
             self.task_data.push(task.write_task_data());
         }
     }
 
--- a/gfx/wr/webrender/src/renderer.rs
+++ b/gfx/wr/webrender/src/renderer.rs
@@ -384,22 +384,22 @@ pub(crate) mod desc {
                 count: 2,
                 kind: VertexAttributeKind::F32,
             },
         ],
         instance_attributes: &[
             VertexAttribute {
                 name: "aBlurRenderTaskAddress",
                 count: 1,
-                kind: VertexAttributeKind::I32,
+                kind: VertexAttributeKind::U16,
             },
             VertexAttribute {
                 name: "aBlurSourceTaskAddress",
                 count: 1,
-                kind: VertexAttributeKind::I32,
+                kind: VertexAttributeKind::U16,
             },
             VertexAttribute {
                 name: "aBlurDirection",
                 count: 1,
                 kind: VertexAttributeKind::I32,
             },
         ],
     };
@@ -561,22 +561,22 @@ pub(crate) mod desc {
                 count: 2,
                 kind: VertexAttributeKind::F32,
             },
         ],
         instance_attributes: &[
             VertexAttribute {
                 name: "aScaleRenderTaskAddress",
                 count: 1,
-                kind: VertexAttributeKind::I32,
+                kind: VertexAttributeKind::U16,
             },
             VertexAttribute {
                 name: "aScaleSourceTaskAddress",
                 count: 1,
-                kind: VertexAttributeKind::I32,
+                kind: VertexAttributeKind::U16,
             },
         ],
     };
 
     pub const CLIP: VertexDescriptor = VertexDescriptor {
         vertex_attributes: &[
             VertexAttribute {
                 name: "aPosition",
@@ -2834,18 +2834,18 @@ impl Renderer {
     #[cfg(feature = "debugger")]
     fn get_passes_for_debugger(&self) -> String {
         let mut debug_passes = debug_server::PassList::new();
 
         for &(_, ref render_doc) in &self.active_documents {
             for pass in &render_doc.frame.passes {
                 let mut debug_targets = Vec::new();
                 match pass.kind {
-                    RenderPassKind::MainFramebuffer(ref target) => {
-                        debug_targets.push(Self::debug_color_target(target));
+                    RenderPassKind::MainFramebuffer { ref main_target, .. } => {
+                        debug_targets.push(Self::debug_color_target(main_target));
                     }
                     RenderPassKind::OffScreen { ref alpha, ref color, ref texture_cache } => {
                         debug_targets.extend(alpha.targets.iter().map(Self::debug_alpha_target));
                         debug_targets.extend(color.targets.iter().map(Self::debug_color_target));
                         debug_targets.extend(texture_cache.iter().map(|(_, target)| Self::debug_texture_cache_target(target)))
                     }
                 }
 
@@ -4733,17 +4733,17 @@ impl Renderer {
             );
             self.texture_resolver.bind(
                 &TextureSource::PrevPassColor,
                 TextureSampler::PrevPassColor,
                 &mut self.device,
             );
 
             match pass.kind {
-                RenderPassKind::MainFramebuffer(ref target) => {
+                RenderPassKind::MainFramebuffer { ref main_target, .. } => {
                     if let Some(device_size) = device_size {
                         stats.color_target_count += 1;
 
                         let offset = frame.content_origin.to_f32();
                         let size = frame.device_rect.size.to_f32();
                         let projection = Transform3D::ortho(
                             offset.x,
                             offset.x + size.width,
@@ -4766,17 +4766,17 @@ impl Renderer {
                             self.device.enable_depth_write();
                             self.device.clear_target(self.clear_color.map(|color| color.to_array()),
                                                      Some(1.0),
                                                      None);
                         }
 
                         self.draw_color_target(
                             draw_target,
-                            target,
+                            main_target,
                             frame.content_origin,
                             None,
                             None,
                             &frame.render_tasks,
                             &projection,
                             frame_id,
                             stats,
                         );
--- a/gfx/wr/webrender/src/tiling.rs
+++ b/gfx/wr/webrender/src/tiling.rs
@@ -434,22 +434,22 @@ impl RenderTarget for ColorRenderTarget 
                         None
                     } else {
                         Some(target_rect)
                     };
 
                     let mut alpha_batch_builder = AlphaBatchBuilder::new(
                         self.screen_size,
                         ctx.break_advanced_blend_batches,
+                        *task_id,
                     );
 
                     self.batch_builder.add_pic_to_batch(
                         pic,
                         &mut alpha_batch_builder,
-                        *task_id,
                         ctx,
                         gpu_cache,
                         render_tasks,
                         deferred_resolves,
                         prim_headers,
                         transforms,
                         pic_task.root_spatial_node_index,
                         z_generator,
@@ -904,17 +904,19 @@ impl TextureCacheRenderTarget {
 }
 
 /// Contains the set of `RenderTarget`s specific to the kind of pass.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub enum RenderPassKind {
     /// The final pass to the main frame buffer, where we have a single color
     /// target for display to the user.
-    MainFramebuffer(ColorRenderTarget),
+    MainFramebuffer {
+        main_target: ColorRenderTarget,
+    },
     /// An intermediate pass, where we may have multiple targets.
     OffScreen {
         alpha: RenderTargetList<AlphaRenderTarget>,
         color: RenderTargetList<ColorRenderTarget>,
         texture_cache: FastHashMap<(CacheTextureId, usize), TextureCacheRenderTarget>,
     },
 }
 
@@ -936,19 +938,21 @@ pub struct RenderPass {
 
 impl RenderPass {
     /// Creates a pass for the main framebuffer. There is only one of these, and
     /// it is always the last pass.
     pub fn new_main_framebuffer(
         screen_size: DeviceIntSize,
         gpu_supports_fast_clears: bool,
     ) -> Self {
-        let target = ColorRenderTarget::new(screen_size, gpu_supports_fast_clears);
+        let main_target = ColorRenderTarget::new(screen_size, gpu_supports_fast_clears);
         RenderPass {
-            kind: RenderPassKind::MainFramebuffer(target),
+            kind: RenderPassKind::MainFramebuffer {
+                main_target,
+            },
             tasks: vec![],
         }
     }
 
     /// Creates an intermediate off-screen pass.
     pub fn new_off_screen(
         screen_size: DeviceIntSize,
         gpu_supports_fast_clears: bool,
@@ -1011,30 +1015,30 @@ impl RenderPass {
         clip_store: &ClipStore,
         transforms: &mut TransformPalette,
         prim_headers: &mut PrimitiveHeaders,
         z_generator: &mut ZBufferIdGenerator,
     ) {
         profile_scope!("RenderPass::build");
 
         match self.kind {
-            RenderPassKind::MainFramebuffer(ref mut target) => {
+            RenderPassKind::MainFramebuffer { ref mut main_target, .. } => {
                 for &task_id in &self.tasks {
                     assert_eq!(render_tasks[task_id].target_kind(), RenderTargetKind::Color);
-                    target.add_task(
+                    main_target.add_task(
                         task_id,
                         ctx,
                         gpu_cache,
                         render_tasks,
                         clip_store,
                         transforms,
                         deferred_resolves,
                     );
                 }
-                target.build(
+                main_target.build(
                     ctx,
                     gpu_cache,
                     render_tasks,
                     deferred_resolves,
                     prim_headers,
                     transforms,
                     z_generator,
                 );
--- a/gfx/wr/wrench/src/main.rs
+++ b/gfx/wr/wrench/src/main.rs
@@ -653,16 +653,21 @@ fn render<'a>(
                             wrench.api.send_debug_cmd(DebugCommand::SetFlags(debug_flags));
                             do_render = true;
                         }
                         VirtualKeyCode::S => {
                             debug_flags.toggle(DebugFlags::COMPACT_PROFILER);
                             wrench.api.send_debug_cmd(DebugCommand::SetFlags(debug_flags));
                             do_render = true;
                         }
+                        VirtualKeyCode::D => {
+                            debug_flags.toggle(DebugFlags::PICTURE_CACHING_DBG);
+                            wrench.api.send_debug_cmd(DebugCommand::SetFlags(debug_flags));
+                            do_render = true;
+                        }
                         VirtualKeyCode::Q => {
                             debug_flags.toggle(DebugFlags::GPU_TIME_QUERIES | DebugFlags::GPU_SAMPLE_QUERIES);
                             wrench.api.send_debug_cmd(DebugCommand::SetFlags(debug_flags));
                             do_render = true;
                         }
                         VirtualKeyCode::V => {
                             debug_flags.toggle(DebugFlags::SHOW_OVERDRAW);
                             wrench.api.send_debug_cmd(DebugCommand::SetFlags(debug_flags));