Bug 1495228. Update webrender to commit 9536249e3ed920a920346f6cc0a79473cad16099
authorJeff Muizelaar <jmuizelaar@mozilla.com>
Wed, 03 Oct 2018 11:38:56 -0400
changeset 439397 a351e7a11b2cadfd03f942e5f1e5f506011da08c
parent 439396 831c1cae9dde0bd0f2eff1023fbe80afef8d8e8d
child 439398 1eb163b766fd1c6e1e8a90e53e74d0601c28b655
push id108577
push userjmuizelaar@mozilla.com
push dateWed, 03 Oct 2018 17:54:26 +0000
treeherdermozilla-inbound@4526e08d4477 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
bugs1495228
milestone64.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1495228. Update webrender to commit 9536249e3ed920a920346f6cc0a79473cad16099
gfx/webrender/res/brush.glsl
gfx/webrender/res/brush_blend.glsl
gfx/webrender/res/brush_image.glsl
gfx/webrender/res/brush_linear_gradient.glsl
gfx/webrender/res/brush_mix_blend.glsl
gfx/webrender/res/brush_radial_gradient.glsl
gfx/webrender/res/brush_solid.glsl
gfx/webrender/res/brush_yuv_image.glsl
gfx/webrender/res/clip_shared.glsl
gfx/webrender/res/cs_blur.glsl
gfx/webrender/res/cs_clip_box_shadow.glsl
gfx/webrender/res/cs_clip_image.glsl
gfx/webrender/res/cs_clip_line.glsl
gfx/webrender/res/cs_clip_rectangle.glsl
gfx/webrender/res/cs_scale.glsl
gfx/webrender/res/gpu_cache.glsl
gfx/webrender/res/prim_shared.glsl
gfx/webrender/res/ps_split_composite.glsl
gfx/webrender/res/ps_text_run.glsl
gfx/webrender/res/resource_cache.glsl
gfx/webrender/src/batch.rs
gfx/webrender/src/device/gl.rs
gfx/webrender/src/internal_types.rs
gfx/webrender/src/picture.rs
gfx/webrender/src/render_backend.rs
gfx/webrender/src/render_task.rs
gfx/webrender/src/renderer.rs
gfx/webrender/src/resource_cache.rs
gfx/webrender/src/scene_builder.rs
gfx/webrender/src/shade.rs
gfx/webrender/src/spatial_node.rs
gfx/webrender/src/texture_cache.rs
gfx/webrender/src/tiling.rs
gfx/webrender/src/util.rs
gfx/webrender_api/src/api.rs
gfx/webrender_api/src/image.rs
gfx/webrender_api/src/lib.rs
gfx/webrender_bindings/revision.txt
--- a/gfx/webrender/res/brush.glsl
+++ b/gfx/webrender/res/brush.glsl
@@ -32,17 +32,17 @@ void main(void) {
     int brush_flags = (aData.z >> 24) & 0xff;
     PrimitiveHeader ph = fetch_prim_header(prim_header_address);
 
     // Fetch the segment of this brush primitive we are drawing.
     int segment_address = ph.specific_prim_address +
                           VECS_PER_SPECIFIC_BRUSH +
                           segment_index * VECS_PER_SEGMENT;
 
-    vec4[2] segment_data = fetch_from_resource_cache_2(segment_address);
+    vec4[2] segment_data = fetch_from_gpu_cache_2(segment_address);
     RectWithSize local_segment_rect = RectWithSize(segment_data[0].xy, segment_data[0].zw);
 
     VertexInfo vi;
 
     // Fetch the dynamic picture that we are drawing on.
     PictureTask pic_task = fetch_picture_task(ph.render_task_index);
     ClipArea clip_area = fetch_clip_area(clip_address);
 
--- a/gfx/webrender/res/brush_blend.glsl
+++ b/gfx/webrender/res/brush_blend.glsl
@@ -96,18 +96,18 @@ void brush_vs(
                 vec3(0.769 - 0.769 * invAmount, 0.686 + 0.314 * invAmount, 0.534 - 0.534 * invAmount),
                 vec3(0.189 - 0.189 * invAmount, 0.168 - 0.168 * invAmount, 0.131 + 0.869 * invAmount)
             );
             vColorOffset = vec3(0.0);
             break;
         }
         case 10: {
             // Color Matrix
-            vec4 mat_data[3] = fetch_from_resource_cache_3(user_data.z);
-            vec4 offset_data = fetch_from_resource_cache_1(user_data.z + 4);
+            vec4 mat_data[3] = fetch_from_gpu_cache_3(user_data.z);
+            vec4 offset_data = fetch_from_gpu_cache_1(user_data.z + 4);
             vColorMat = mat3(mat_data[0].xyz, mat_data[1].xyz, mat_data[2].xyz);
             vColorOffset = offset_data.rgb;
             break;
         }
         default: break;
     }
 }
 #endif
--- a/gfx/webrender/res/brush_image.glsl
+++ b/gfx/webrender/res/brush_image.glsl
@@ -28,17 +28,17 @@ flat varying vec2 vTileRepeat;
 
 struct ImageBrushData {
     vec4 color;
     vec4 background_color;
     vec2 stretch_size;
 };
 
 ImageBrushData fetch_image_data(int address) {
-    vec4[3] raw_data = fetch_from_resource_cache_3(address);
+    vec4[3] raw_data = fetch_from_gpu_cache_3(address);
     ImageBrushData data = ImageBrushData(
         raw_data[0],
         raw_data[1],
         raw_data[2].xy
     );
     return data;
 }
 
--- a/gfx/webrender/res/brush_linear_gradient.glsl
+++ b/gfx/webrender/res/brush_linear_gradient.glsl
@@ -27,17 +27,17 @@ flat varying vec2 vTileRepeat;
 
 struct Gradient {
     vec4 start_end_point;
     int extend_mode;
     vec2 stretch_size;
 };
 
 Gradient fetch_gradient(int address) {
-    vec4 data[2] = fetch_from_resource_cache_2(address);
+    vec4 data[2] = fetch_from_gpu_cache_2(address);
     return Gradient(
         data[0],
         int(data[1].x),
         data[1].yz
     );
 }
 
 void brush_vs(
--- a/gfx/webrender/res/brush_mix_blend.glsl
+++ b/gfx/webrender/res/brush_mix_blend.glsl
@@ -19,17 +19,17 @@ void brush_vs(
     RectWithSize segment_rect,
     ivec3 user_data,
     mat4 transform,
     PictureTask pic_task,
     int brush_flags,
     vec4 unused
 ) {
     vec2 snapped_device_pos = snap_device_pos(vi);
-    vec2 texture_size = vec2(textureSize(sCacheRGBA8, 0));
+    vec2 texture_size = vec2(textureSize(sPrevPassColor, 0));
     vOp = user_data.x;
 
     PictureTask src_task = fetch_picture_task(user_data.z);
     vec2 src_uv = snapped_device_pos +
                   src_task.common_data.task_rect.p0 -
                   src_task.content_origin;
     vSrcUv = vec3(src_uv / texture_size, src_task.common_data.texture_layer_index);
 
@@ -195,18 +195,18 @@ const int MixBlendMode_SoftLight   = 9;
 const int MixBlendMode_Difference  = 10;
 const int MixBlendMode_Exclusion   = 11;
 const int MixBlendMode_Hue         = 12;
 const int MixBlendMode_Saturation  = 13;
 const int MixBlendMode_Color       = 14;
 const int MixBlendMode_Luminosity  = 15;
 
 Fragment brush_fs() {
-    vec4 Cb = textureLod(sCacheRGBA8, vBackdropUv, 0.0);
-    vec4 Cs = textureLod(sCacheRGBA8, vSrcUv, 0.0);
+    vec4 Cb = textureLod(sPrevPassColor, vBackdropUv, 0.0);
+    vec4 Cs = textureLod(sPrevPassColor, vSrcUv, 0.0);
 
     if (Cb.a == 0.0) {
         return Fragment(Cs);
     }
     if (Cs.a == 0.0) {
         return Fragment(vec4(0.0));
     }
 
--- a/gfx/webrender/res/brush_radial_gradient.glsl
+++ b/gfx/webrender/res/brush_radial_gradient.glsl
@@ -26,17 +26,17 @@ flat varying vec2 vTileRepeat;
 struct RadialGradient {
     vec4 center_start_end_radius;
     float ratio_xy;
     int extend_mode;
     vec2 stretch_size;
 };
 
 RadialGradient fetch_radial_gradient(int address) {
-    vec4 data[2] = fetch_from_resource_cache_2(address);
+    vec4 data[2] = fetch_from_gpu_cache_2(address);
     return RadialGradient(
         data[0],
         data[1].x,
         int(data[1].y),
         data[1].zw
     );
 }
 
--- a/gfx/webrender/res/brush_solid.glsl
+++ b/gfx/webrender/res/brush_solid.glsl
@@ -14,17 +14,17 @@ varying vec2 vLocalPos;
 
 #ifdef WR_VERTEX_SHADER
 
 struct SolidBrush {
     vec4 color;
 };
 
 SolidBrush fetch_solid_primitive(int address) {
-    vec4 data = fetch_from_resource_cache_1(address);
+    vec4 data = fetch_from_gpu_cache_1(address);
     return SolidBrush(data);
 }
 
 void brush_vs(
     VertexInfo vi,
     int prim_address,
     RectWithSize local_rect,
     RectWithSize segment_rect,
--- a/gfx/webrender/res/brush_yuv_image.glsl
+++ b/gfx/webrender/res/brush_yuv_image.glsl
@@ -72,17 +72,17 @@ void write_uv_rect(
     #endif
 }
 
 struct YuvPrimitive {
     float coefficient;
 };
 
 YuvPrimitive fetch_yuv_primitive(int address) {
-    vec4 data = fetch_from_resource_cache_1(address);
+    vec4 data = fetch_from_gpu_cache_1(address);
     return YuvPrimitive(data.x);
 }
 
 void brush_vs(
     VertexInfo vi,
     int prim_address,
     RectWithSize local_rect,
     RectWithSize segment_rect,
--- a/gfx/webrender/res/clip_shared.glsl
+++ b/gfx/webrender/res/clip_shared.glsl
@@ -1,13 +1,13 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-#include rect,render_task,resource_cache,snap,transform
+#include rect,render_task,gpu_cache,snap,transform
 
 #ifdef WR_VERTEX_SHADER
 
 #define SEGMENT_ALL         0
 #define SEGMENT_CORNER_TL   1
 #define SEGMENT_CORNER_TR   2
 #define SEGMENT_CORNER_BL   3
 #define SEGMENT_CORNER_BR   4
--- a/gfx/webrender/res/cs_blur.glsl
+++ b/gfx/webrender/res/cs_blur.glsl
@@ -41,19 +41,19 @@ BlurTask fetch_blur_task(int address) {
 void main(void) {
     BlurTask blur_task = fetch_blur_task(aBlurRenderTaskAddress);
     RenderTaskCommonData src_task = fetch_render_task_common_data(aBlurSourceTaskAddress);
 
     RectWithSize src_rect = src_task.task_rect;
     RectWithSize target_rect = blur_task.common_data.task_rect;
 
 #if defined WR_FEATURE_COLOR_TARGET
-    vec2 texture_size = vec2(textureSize(sCacheRGBA8, 0).xy);
+    vec2 texture_size = vec2(textureSize(sPrevPassColor, 0).xy);
 #else
-    vec2 texture_size = vec2(textureSize(sCacheA8, 0).xy);
+    vec2 texture_size = vec2(textureSize(sPrevPassAlpha, 0).xy);
 #endif
     vUv.z = src_task.texture_layer_index;
     vSigma = blur_task.blur_radius;
 
     // Ensure that the support is an even number of pixels to simplify the
     // fragment shader logic.
     //
     // TODO(pcwalton): Actually make use of this fact and use the texture
@@ -84,20 +84,20 @@ void main(void) {
     gl_Position = uTransform * vec4(pos, 0.0, 1.0);
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 
 #if defined WR_FEATURE_COLOR_TARGET
 #define SAMPLE_TYPE vec4
-#define SAMPLE_TEXTURE(uv)  texture(sCacheRGBA8, uv)
+#define SAMPLE_TEXTURE(uv)  texture(sPrevPassColor, uv)
 #else
 #define SAMPLE_TYPE float
-#define SAMPLE_TEXTURE(uv)  texture(sCacheA8, uv).r
+#define SAMPLE_TEXTURE(uv)  texture(sPrevPassAlpha, uv).r
 #endif
 
 // TODO(gw): Write a fast path blur that handles smaller blur radii
 //           with a offset / weight uniform table and a constant
 //           loop iteration count!
 
 // TODO(gw): Make use of the bilinear sampling trick to reduce
 //           the number of texture fetches needed for a gaussian blur.
--- a/gfx/webrender/res/cs_clip_box_shadow.glsl
+++ b/gfx/webrender/res/cs_clip_box_shadow.glsl
@@ -21,17 +21,17 @@ struct BoxShadowData {
     vec2 src_rect_size;
     float clip_mode;
     int stretch_mode_x;
     int stretch_mode_y;
     RectWithSize dest_rect;
 };
 
 BoxShadowData fetch_data(ivec2 address) {
-    vec4 data[3] = fetch_from_resource_cache_3_direct(address);
+    vec4 data[3] = fetch_from_gpu_cache_3_direct(address);
     RectWithSize dest_rect = RectWithSize(data[2].xy, data[2].zw);
     BoxShadowData bs_data = BoxShadowData(
         data[0].xy,
         data[0].z,
         int(data[1].x),
         int(data[1].y),
         dest_rect
     );
--- a/gfx/webrender/res/cs_clip_image.glsl
+++ b/gfx/webrender/res/cs_clip_image.glsl
@@ -12,17 +12,17 @@ flat varying vec4 vClipMaskUvInnerRect;
 flat varying float vLayer;
 
 #ifdef WR_VERTEX_SHADER
 struct ImageMaskData {
     RectWithSize local_rect;
 };
 
 ImageMaskData fetch_mask_data(ivec2 address) {
-    vec4 data = fetch_from_resource_cache_1_direct(address);
+    vec4 data = fetch_from_gpu_cache_1_direct(address);
     RectWithSize local_rect = RectWithSize(data.xy, data.zw);
     ImageMaskData mask_data = ImageMaskData(local_rect);
     return mask_data;
 }
 
 void main(void) {
     ClipMaskInstance cmi = fetch_clip_item();
     ClipArea area = fetch_clip_area(cmi.render_task_address);
--- a/gfx/webrender/res/cs_clip_line.glsl
+++ b/gfx/webrender/res/cs_clip_line.glsl
@@ -23,17 +23,17 @@ flat varying vec2 vLocalOrigin;
 struct LineDecorationData {
     RectWithSize local_rect;
     float wavyLineThickness;
     float style;
     float orientation;
 };
 
 LineDecorationData fetch_data(ivec2 address) {
-    vec4 data[2] = fetch_from_resource_cache_2_direct(address);
+    vec4 data[2] = fetch_from_gpu_cache_2_direct(address);
     RectWithSize local_rect = RectWithSize(data[0].xy, data[0].zw);
     LineDecorationData line_data = LineDecorationData(
         local_rect,
         data[1].x,
         data[1].y,
         data[1].z
     );
     return line_data;
--- a/gfx/webrender/res/cs_clip_rectangle.glsl
+++ b/gfx/webrender/res/cs_clip_rectangle.glsl
@@ -13,31 +13,31 @@ flat varying vec4 vClipCenter_Radius_BR;
 
 #ifdef WR_VERTEX_SHADER
 struct ClipRect {
     RectWithSize rect;
     vec4 mode;
 };
 
 ClipRect fetch_clip_rect(ivec2 address) {
-    vec4 data[2] = fetch_from_resource_cache_2_direct(address);
+    vec4 data[2] = fetch_from_gpu_cache_2_direct(address);
     ClipRect rect = ClipRect(RectWithSize(data[0].xy, data[0].zw), data[1]);
     return rect;
 }
 
 struct ClipCorner {
     RectWithSize rect;
     vec4 outer_inner_radius;
 };
 
 // index is of type float instead of int because using an int led to shader
 // miscompilations with a macOS 10.12 Intel driver.
 ClipCorner fetch_clip_corner(ivec2 address, float index) {
     address += ivec2(2 + 2 * int(index), 0);
-    vec4 data[2] = fetch_from_resource_cache_2_direct(address);
+    vec4 data[2] = fetch_from_gpu_cache_2_direct(address);
     ClipCorner corner = ClipCorner(RectWithSize(data[0].xy, data[0].zw), data[1]);
     return corner;
 }
 
 struct ClipData {
     ClipRect rect;
     ClipCorner top_left;
     ClipCorner top_right;
--- a/gfx/webrender/res/cs_scale.glsl
+++ b/gfx/webrender/res/cs_scale.glsl
@@ -27,19 +27,19 @@ ScaleTask fetch_scale_task(int address) 
 void main(void) {
     ScaleTask scale_task = fetch_scale_task(aScaleRenderTaskAddress);
     RenderTaskCommonData src_task = fetch_render_task_common_data(aScaleSourceTaskAddress);
 
     RectWithSize src_rect = src_task.task_rect;
     RectWithSize target_rect = scale_task.common_data.task_rect;
 
 #if defined WR_FEATURE_COLOR_TARGET
-    vec2 texture_size = vec2(textureSize(sCacheRGBA8, 0).xy);
+    vec2 texture_size = vec2(textureSize(sPrevPassColor, 0).xy);
 #else
-    vec2 texture_size = vec2(textureSize(sCacheA8, 0).xy);
+    vec2 texture_size = vec2(textureSize(sPrevPassAlpha, 0).xy);
 #endif
 
     vUv.z = src_task.texture_layer_index;
 
     vUvRect = vec4(src_rect.p0 + vec2(0.5),
                    src_rect.p0 + src_rect.size - vec2(0.5)) / texture_size.xyxy;
 
     vec2 pos = target_rect.p0 + target_rect.size * aPosition.xy;
@@ -49,20 +49,20 @@ void main(void) {
 }
 
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 
 #if defined WR_FEATURE_COLOR_TARGET
 #define SAMPLE_TYPE vec4
-#define SAMPLE_TEXTURE(uv)  texture(sCacheRGBA8, uv)
+#define SAMPLE_TEXTURE(uv)  texture(sPrevPassColor, uv)
 #else
 #define SAMPLE_TYPE float
-#define SAMPLE_TEXTURE(uv)  texture(sCacheA8, uv).r
+#define SAMPLE_TEXTURE(uv)  texture(sPrevPassAlpha, uv).r
 #endif
 
 void main(void) {
     vec2 st = clamp(vUv.xy, vUvRect.xy, vUvRect.zw);
     oFragColor = vec4(SAMPLE_TEXTURE(vec3(st, vUv.z)));
 }
 
 #endif
new file mode 100644
--- /dev/null
+++ b/gfx/webrender/res/gpu_cache.glsl
@@ -0,0 +1,137 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+uniform HIGHP_SAMPLER_FLOAT sampler2D sGpuCache;
+
+#define VECS_PER_IMAGE_RESOURCE     2
+
+// TODO(gw): This is here temporarily while we have
+//           both GPU store and cache. When the GPU
+//           store code is removed, we can change the
+//           PrimitiveInstance instance structure to
+//           use 2x unsigned shorts as vertex attributes
+//           instead of an int, and encode the UV directly
+//           in the vertices.
+ivec2 get_gpu_cache_uv(int address) {
+    return ivec2(uint(address) % WR_MAX_VERTEX_TEXTURE_WIDTH,
+                 uint(address) / WR_MAX_VERTEX_TEXTURE_WIDTH);
+}
+
+vec4[2] fetch_from_gpu_cache_2_direct(ivec2 address) {
+    return vec4[2](
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(1, 0))
+    );
+}
+
+vec4[2] fetch_from_gpu_cache_2(int address) {
+    ivec2 uv = get_gpu_cache_uv(address);
+    return vec4[2](
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(1, 0))
+    );
+}
+
+#ifdef WR_VERTEX_SHADER
+
+vec4[8] fetch_from_gpu_cache_8(int address) {
+    ivec2 uv = get_gpu_cache_uv(address);
+    return vec4[8](
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(1, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(2, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(3, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(4, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(5, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(6, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(7, 0))
+    );
+}
+
+vec4[3] fetch_from_gpu_cache_3(int address) {
+    ivec2 uv = get_gpu_cache_uv(address);
+    return vec4[3](
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(1, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(2, 0))
+    );
+}
+
+vec4[3] fetch_from_gpu_cache_3_direct(ivec2 address) {
+    return vec4[3](
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(1, 0)),
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(2, 0))
+    );
+}
+
+vec4[4] fetch_from_gpu_cache_4_direct(ivec2 address) {
+    return vec4[4](
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(1, 0)),
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(2, 0)),
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(3, 0))
+    );
+}
+
+vec4[4] fetch_from_gpu_cache_4(int address) {
+    ivec2 uv = get_gpu_cache_uv(address);
+    return vec4[4](
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(1, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(2, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(3, 0))
+    );
+}
+
+vec4 fetch_from_gpu_cache_1_direct(ivec2 address) {
+    return texelFetch(sGpuCache, address, 0);
+}
+
+vec4 fetch_from_gpu_cache_1(int address) {
+    ivec2 uv = get_gpu_cache_uv(address);
+    return texelFetch(sGpuCache, uv, 0);
+}
+
+//TODO: image resource is too specific for this module
+
+struct ImageResource {
+    RectWithEndpoint uv_rect;
+    float layer;
+    vec3 user_data;
+};
+
+ImageResource fetch_image_resource(int address) {
+    //Note: number of blocks has to match `renderer::BLOCKS_PER_UV_RECT`
+    vec4 data[2] = fetch_from_gpu_cache_2(address);
+    RectWithEndpoint uv_rect = RectWithEndpoint(data[0].xy, data[0].zw);
+    return ImageResource(uv_rect, data[1].x, data[1].yzw);
+}
+
+ImageResource fetch_image_resource_direct(ivec2 address) {
+    vec4 data[2] = fetch_from_gpu_cache_2_direct(address);
+    RectWithEndpoint uv_rect = RectWithEndpoint(data[0].xy, data[0].zw);
+    return ImageResource(uv_rect, data[1].x, data[1].yzw);
+}
+
+// Fetch optional extra data for a texture cache resource. This can contain
+// a polygon defining a UV rect within the texture cache resource.
+struct ImageResourceExtra {
+    vec2 st_tl;
+    vec2 st_tr;
+    vec2 st_bl;
+    vec2 st_br;
+};
+
+ImageResourceExtra fetch_image_resource_extra(int address) {
+    vec4 data[2] = fetch_from_gpu_cache_2(address + VECS_PER_IMAGE_RESOURCE);
+    return ImageResourceExtra(
+        data[0].xy,
+        data[0].zw,
+        data[1].xy,
+        data[1].zw
+    );
+}
+
+#endif //WR_VERTEX_SHADER
--- a/gfx/webrender/res/prim_shared.glsl
+++ b/gfx/webrender/res/prim_shared.glsl
@@ -1,30 +1,27 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-#include rect,render_task,resource_cache,snap,transform
+#include rect,render_task,gpu_cache,snap,transform
 
 #define EXTEND_MODE_CLAMP  0
 #define EXTEND_MODE_REPEAT 1
 
 #define SUBPX_DIR_NONE        0
 #define SUBPX_DIR_HORIZONTAL  1
 #define SUBPX_DIR_VERTICAL    2
 #define SUBPX_DIR_MIXED       3
 
 #define RASTER_LOCAL            0
 #define RASTER_SCREEN           1
 
-uniform sampler2DArray sCacheA8;
-uniform sampler2DArray sCacheRGBA8;
-
-// An A8 target for standalone tasks that is available to all passes.
-uniform sampler2DArray sSharedCacheA8;
+uniform sampler2DArray sPrevPassAlpha;
+uniform sampler2DArray sPrevPassColor;
 
 vec2 clamp_rect(vec2 pt, RectWithSize rect) {
     return clamp(pt, rect.p0, rect.p0 + rect.size);
 }
 
 // TODO: convert back to RectWithEndPoint if driver issues are resolved, if ever.
 flat varying vec4 vClipMaskUvBounds;
 // XY and W are homogeneous coordinates, Z is the layer index
@@ -116,24 +113,22 @@ VertexInfo write_vertex(RectWithSize ins
         transform.m,
         snap_rect
     );
 
     // Transform the current vertex to world space.
     vec4 world_pos = transform.m * vec4(clamped_local_pos, 0.0, 1.0);
 
     // Convert the world positions to device pixel space.
-    vec2 device_pos = world_pos.xy / world_pos.w * uDevicePixelRatio;
+    vec2 device_pos = world_pos.xy * uDevicePixelRatio;
 
     // Apply offsets for the render task to get correct screen location.
-    vec2 final_pos = device_pos + snap_offset -
-                     task.content_origin +
-                     task.common_data.task_rect.p0;
+    vec2 final_offset = snap_offset - task.content_origin + task.common_data.task_rect.p0;
 
-    gl_Position = uTransform * vec4(final_pos, z, 1.0);
+    gl_Position = uTransform * vec4(device_pos + final_offset * world_pos.w, z * world_pos.w, world_pos.w);
 
     VertexInfo vi = VertexInfo(
         clamped_local_pos,
         snap_offset,
         world_pos
     );
 
     return vi;
@@ -249,17 +244,17 @@ float do_clip() {
     if (!all(bvec4(left, right))) {
         return 0.0;
     }
     // finally, the slow path - fetch the mask value from an image
     // Note the Z getting rounded to the nearest integer because the variable
     // is still interpolated and becomes a subject of precision-caused
     // fluctuations, see https://bugzilla.mozilla.org/show_bug.cgi?id=1491911
     ivec3 tc = ivec3(mask_uv, vClipMaskUv.z + 0.5);
-    return texelFetch(sCacheA8, tc, 0).r;
+    return texelFetch(sPrevPassAlpha, tc, 0).r;
 }
 
 #ifdef WR_FEATURE_DITHERING
 vec4 dither(vec4 color) {
     const int matrix_mask = 7;
 
     ivec2 pos = ivec2(gl_FragCoord.xy) & ivec2(matrix_mask);
     float noise_normalized = (texelFetch(sDither, pos, 0).r * 255.0 + 0.5) / 64.0;
@@ -292,15 +287,15 @@ vec4 sample_gradient(int address, float 
     //     floor(x) is the gradient color entry index
     //     fract(x) is the linear filtering factor between start and end
     int lut_offset = 2 * int(floor(x));     // There is a [start, end] color per entry.
 
     // Ensure we don't fetch outside the valid range of the LUT.
     lut_offset = clamp(lut_offset, 0, 2 * (GRADIENT_ENTRIES + 1));
 
     // Fetch the start and end color.
-    vec4 texels[2] = fetch_from_resource_cache_2(address + lut_offset);
+    vec4 texels[2] = fetch_from_gpu_cache_2(address + lut_offset);
 
     // Finally interpolate and apply dithering
     return dither(mix(texels[0], texels[1], fract(x)));
 }
 
 #endif //WR_FRAGMENT_SHADER
--- a/gfx/webrender/res/ps_split_composite.glsl
+++ b/gfx/webrender/res/ps_split_composite.glsl
@@ -9,21 +9,21 @@ flat varying vec4 vUvSampleBounds;
 
 #ifdef WR_VERTEX_SHADER
 struct SplitGeometry {
     vec2 local[4];
     RectWithSize local_rect;
 };
 
 SplitGeometry fetch_split_geometry(int address) {
-    ivec2 uv = get_resource_cache_uv(address);
+    ivec2 uv = get_gpu_cache_uv(address);
 
-    vec4 data0 = TEXEL_FETCH(sResourceCache, uv, 0, ivec2(0, 0));
-    vec4 data1 = TEXEL_FETCH(sResourceCache, uv, 0, ivec2(1, 0));
-    vec4 data2 = TEXEL_FETCH(sResourceCache, uv, 0, ivec2(2, 0));
+    vec4 data0 = TEXEL_FETCH(sGpuCache, uv, 0, ivec2(0, 0));
+    vec4 data1 = TEXEL_FETCH(sGpuCache, uv, 0, ivec2(1, 0));
+    vec4 data2 = TEXEL_FETCH(sGpuCache, uv, 0, ivec2(2, 0));
 
     SplitGeometry geo;
     geo.local = vec2[4](
         data0.xy,
         data0.zw,
         data1.xy,
         data1.zw
     );
@@ -81,17 +81,17 @@ void main(void) {
     write_clip(
         world_pos,
         vec2(0.0),
         clip_area
     );
 
     gl_Position = uTransform * final_pos;
 
-    vec2 texture_size = vec2(textureSize(sCacheRGBA8, 0));
+    vec2 texture_size = vec2(textureSize(sPrevPassColor, 0));
     vec2 uv0 = res.uv_rect.p0;
     vec2 uv1 = res.uv_rect.p1;
 
     vec2 min_uv = min(uv0, uv1);
     vec2 max_uv = max(uv0, uv1);
 
     vUvSampleBounds = vec4(
         min_uv + vec2(0.5),
@@ -110,11 +110,11 @@ void main(void) {
     vUv = vec3(uv / texture_size, res.layer);
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 void main(void) {
     float alpha = do_clip();
     vec2 uv = clamp(vUv.xy, vUvSampleBounds.xy, vUvSampleBounds.zw);
-    oFragColor = alpha * textureLod(sCacheRGBA8, vec3(uv, vUv.z), 0.0);
+    oFragColor = alpha * textureLod(sPrevPassColor, vec3(uv, vUv.z), 0.0);
 }
 #endif
--- a/gfx/webrender/res/ps_text_run.glsl
+++ b/gfx/webrender/res/ps_text_run.glsl
@@ -23,17 +23,17 @@ struct Glyph {
 };
 
 Glyph fetch_glyph(int specific_prim_address,
                   int glyph_index) {
     // Two glyphs are packed in each texel in the GPU cache.
     int glyph_address = specific_prim_address +
                         VECS_PER_TEXT_RUN +
                         int(uint(glyph_index) / GLYPHS_PER_GPU_BLOCK);
-    vec4 data = fetch_from_resource_cache_1(glyph_address);
+    vec4 data = fetch_from_gpu_cache_1(glyph_address);
     // Select XY or ZW based on glyph index.
     // We use "!= 0" instead of "== 1" here in order to work around a driver
     // bug with equality comparisons on integers.
     vec2 glyph = mix(data.xy, data.zw,
                      bvec2(uint(glyph_index) % GLYPHS_PER_GPU_BLOCK != 0U));
 
     return Glyph(glyph);
 }
@@ -41,28 +41,28 @@ Glyph fetch_glyph(int specific_prim_addr
 struct GlyphResource {
     vec4 uv_rect;
     float layer;
     vec2 offset;
     float scale;
 };
 
 GlyphResource fetch_glyph_resource(int address) {
-    vec4 data[2] = fetch_from_resource_cache_2(address);
+    vec4 data[2] = fetch_from_gpu_cache_2(address);
     return GlyphResource(data[0], data[1].x, data[1].yz, data[1].w);
 }
 
 struct TextRun {
     vec4 color;
     vec4 bg_color;
     vec2 offset;
 };
 
 TextRun fetch_text_run(int address) {
-    vec4 data[3] = fetch_from_resource_cache_3(address);
+    vec4 data[3] = fetch_from_gpu_cache_3(address);
     return TextRun(data[0], data[1], data[2].xy);
 }
 
 VertexInfo write_text_vertex(RectWithSize local_clip_rect,
                              float z,
                              Transform transform,
                              PictureTask task,
                              vec2 text_offset,
deleted file mode 100644
--- a/gfx/webrender/res/resource_cache.glsl
+++ /dev/null
@@ -1,137 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-uniform HIGHP_SAMPLER_FLOAT sampler2D sResourceCache;
-
-#define VECS_PER_IMAGE_RESOURCE     2
-
-// TODO(gw): This is here temporarily while we have
-//           both GPU store and cache. When the GPU
-//           store code is removed, we can change the
-//           PrimitiveInstance instance structure to
-//           use 2x unsigned shorts as vertex attributes
-//           instead of an int, and encode the UV directly
-//           in the vertices.
-ivec2 get_resource_cache_uv(int address) {
-    return ivec2(uint(address) % WR_MAX_VERTEX_TEXTURE_WIDTH,
-                 uint(address) / WR_MAX_VERTEX_TEXTURE_WIDTH);
-}
-
-vec4[2] fetch_from_resource_cache_2_direct(ivec2 address) {
-    return vec4[2](
-        TEXEL_FETCH(sResourceCache, address, 0, ivec2(0, 0)),
-        TEXEL_FETCH(sResourceCache, address, 0, ivec2(1, 0))
-    );
-}
-
-vec4[2] fetch_from_resource_cache_2(int address) {
-    ivec2 uv = get_resource_cache_uv(address);
-    return vec4[2](
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(0, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(1, 0))
-    );
-}
-
-#ifdef WR_VERTEX_SHADER
-
-vec4[8] fetch_from_resource_cache_8(int address) {
-    ivec2 uv = get_resource_cache_uv(address);
-    return vec4[8](
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(0, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(1, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(2, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(3, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(4, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(5, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(6, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(7, 0))
-    );
-}
-
-vec4[3] fetch_from_resource_cache_3(int address) {
-    ivec2 uv = get_resource_cache_uv(address);
-    return vec4[3](
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(0, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(1, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(2, 0))
-    );
-}
-
-vec4[3] fetch_from_resource_cache_3_direct(ivec2 address) {
-    return vec4[3](
-        TEXEL_FETCH(sResourceCache, address, 0, ivec2(0, 0)),
-        TEXEL_FETCH(sResourceCache, address, 0, ivec2(1, 0)),
-        TEXEL_FETCH(sResourceCache, address, 0, ivec2(2, 0))
-    );
-}
-
-vec4[4] fetch_from_resource_cache_4_direct(ivec2 address) {
-    return vec4[4](
-        TEXEL_FETCH(sResourceCache, address, 0, ivec2(0, 0)),
-        TEXEL_FETCH(sResourceCache, address, 0, ivec2(1, 0)),
-        TEXEL_FETCH(sResourceCache, address, 0, ivec2(2, 0)),
-        TEXEL_FETCH(sResourceCache, address, 0, ivec2(3, 0))
-    );
-}
-
-vec4[4] fetch_from_resource_cache_4(int address) {
-    ivec2 uv = get_resource_cache_uv(address);
-    return vec4[4](
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(0, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(1, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(2, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(3, 0))
-    );
-}
-
-vec4 fetch_from_resource_cache_1_direct(ivec2 address) {
-    return texelFetch(sResourceCache, address, 0);
-}
-
-vec4 fetch_from_resource_cache_1(int address) {
-    ivec2 uv = get_resource_cache_uv(address);
-    return texelFetch(sResourceCache, uv, 0);
-}
-
-//TODO: image resource is too specific for this module
-
-struct ImageResource {
-    RectWithEndpoint uv_rect;
-    float layer;
-    vec3 user_data;
-};
-
-ImageResource fetch_image_resource(int address) {
-    //Note: number of blocks has to match `renderer::BLOCKS_PER_UV_RECT`
-    vec4 data[2] = fetch_from_resource_cache_2(address);
-    RectWithEndpoint uv_rect = RectWithEndpoint(data[0].xy, data[0].zw);
-    return ImageResource(uv_rect, data[1].x, data[1].yzw);
-}
-
-ImageResource fetch_image_resource_direct(ivec2 address) {
-    vec4 data[2] = fetch_from_resource_cache_2_direct(address);
-    RectWithEndpoint uv_rect = RectWithEndpoint(data[0].xy, data[0].zw);
-    return ImageResource(uv_rect, data[1].x, data[1].yzw);
-}
-
-// Fetch optional extra data for a texture cache resource. This can contain
-// a polygon defining a UV rect within the texture cache resource.
-struct ImageResourceExtra {
-    vec2 st_tl;
-    vec2 st_tr;
-    vec2 st_bl;
-    vec2 st_br;
-};
-
-ImageResourceExtra fetch_image_resource_extra(int address) {
-    vec4 data[2] = fetch_from_resource_cache_2(address + VECS_PER_IMAGE_RESOURCE);
-    return ImageResourceExtra(
-        data[0].xy,
-        data[0].zw,
-        data[1].xy,
-        data[1].zw
-    );
-}
-
-#endif //WR_VERTEX_SHADER
--- a/gfx/webrender/src/batch.rs
+++ b/gfx/webrender/src/batch.rs
@@ -9,17 +9,17 @@ use clip::{ClipDataStore, ClipNodeFlags,
 use clip_scroll_tree::{ClipScrollTree, ROOT_SPATIAL_NODE_INDEX, SpatialNodeIndex};
 use euclid::vec3;
 use glyph_rasterizer::GlyphFormat;
 use gpu_cache::{GpuCache, GpuCacheHandle, GpuCacheAddress};
 use gpu_types::{BrushFlags, BrushInstance, PrimitiveHeaders};
 use gpu_types::{ClipMaskInstance, SplitCompositeInstance};
 use gpu_types::{PrimitiveInstanceData, RasterizationSpace, GlyphInstance};
 use gpu_types::{PrimitiveHeader, PrimitiveHeaderIndex, TransformPaletteId, TransformPalette};
-use internal_types::{FastHashMap, SavedTargetIndex, SourceTexture};
+use internal_types::{FastHashMap, SavedTargetIndex, TextureSource};
 use picture::{PictureCompositeMode, PicturePrimitive, PictureSurface};
 use plane_split::{BspSplitter, Clipper, Polygon, Splitter};
 use prim_store::{BrushKind, BrushPrimitive, BrushSegmentTaskId, DeferredResolve};
 use prim_store::{EdgeAaSegmentMask, ImageSource};
 use prim_store::{PrimitiveMetadata, VisibleGradientTile, PrimitiveInstance};
 use prim_store::{BorderSource, Primitive, PrimitiveDetails};
 use render_task::{RenderTaskAddress, RenderTaskId, RenderTaskTree};
 use renderer::{BlendMode, ImageBufferKind, ShaderColorMode};
@@ -61,39 +61,39 @@ pub enum BatchKind {
 }
 
 /// Optional textures that can be used as a source in the shaders.
 /// Textures that are not used by the batch are equal to TextureId::invalid().
 #[derive(Copy, Clone, Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct BatchTextures {
-    pub colors: [SourceTexture; 3],
+    pub colors: [TextureSource; 3],
 }
 
 impl BatchTextures {
     pub fn no_texture() -> Self {
         BatchTextures {
-            colors: [SourceTexture::Invalid; 3],
+            colors: [TextureSource::Invalid; 3],
         }
     }
 
     pub fn render_target_cache() -> Self {
         BatchTextures {
             colors: [
-                SourceTexture::CacheRGBA8,
-                SourceTexture::CacheA8,
-                SourceTexture::Invalid,
+                TextureSource::PrevPassColor,
+                TextureSource::PrevPassAlpha,
+                TextureSource::Invalid,
             ],
         }
     }
 
-    pub fn color(texture: SourceTexture) -> Self {
+    pub fn color(texture: TextureSource) -> Self {
         BatchTextures {
-            colors: [texture, texture, SourceTexture::Invalid],
+            colors: [texture, texture, TextureSource::Invalid],
         }
     }
 }
 
 #[derive(Copy, Clone, Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct BatchKey {
@@ -115,18 +115,18 @@ impl BatchKey {
         self.kind == other.kind && self.blend_mode == other.blend_mode &&
             textures_compatible(self.textures.colors[0], other.textures.colors[0]) &&
             textures_compatible(self.textures.colors[1], other.textures.colors[1]) &&
             textures_compatible(self.textures.colors[2], other.textures.colors[2])
     }
 }
 
 #[inline]
-fn textures_compatible(t1: SourceTexture, t2: SourceTexture) -> bool {
-    t1 == SourceTexture::Invalid || t2 == SourceTexture::Invalid || t1 == t2
+fn textures_compatible(t1: TextureSource, t2: TextureSource) -> bool {
+    t1 == TextureSource::Invalid || t2 == TextureSource::Invalid || t1 == t2
 }
 
 pub struct AlphaBatchList {
     pub batches: Vec<PrimitiveBatch>,
     pub item_rects: Vec<Vec<WorldRect>>,
 }
 
 impl AlphaBatchList {
@@ -756,19 +756,19 @@ impl AlphaBatchBuilder {
                                                 let secondary_id = picture.secondary_render_task_id.expect("no secondary!?");
                                                 let saved_index = render_tasks[secondary_id].saved_index.expect("no saved index!?");
                                                 debug_assert_ne!(saved_index, SavedTargetIndex::PENDING);
 
                                                 // Build BatchTextures for shadow/content
                                                 let shadow_textures = BatchTextures::render_target_cache();
                                                 let content_textures = BatchTextures {
                                                     colors: [
-                                                        SourceTexture::RenderTaskCache(saved_index),
-                                                        SourceTexture::Invalid,
-                                                        SourceTexture::Invalid,
+                                                        TextureSource::RenderTaskCache(saved_index),
+                                                        TextureSource::Invalid,
+                                                        TextureSource::Invalid,
                                                     ],
                                                 };
 
                                                 // Build batch keys for shadow/content
                                                 let shadow_key = BatchKey::new(kind, non_segmented_blend_mode, shadow_textures);
                                                 let content_key = BatchKey::new(kind, non_segmented_blend_mode, content_textures);
 
                                                 // Retrieve the UV rect addresses for shadow/content.
@@ -1084,30 +1084,30 @@ impl AlphaBatchBuilder {
                 let batch_list = &mut self.batch_list;
 
                 ctx.resource_cache.fetch_glyphs(
                     text_cpu.used_font.clone(),
                     &text_cpu.glyph_keys,
                     glyph_fetch_buffer,
                     gpu_cache,
                     |texture_id, mut glyph_format, glyphs| {
-                        debug_assert_ne!(texture_id, SourceTexture::Invalid);
+                        debug_assert_ne!(texture_id, TextureSource::Invalid);
 
                         // Ignore color and only sample alpha when shadowing.
                         if text_cpu.shadow {
                             glyph_format = glyph_format.ignore_color();
                         }
 
                         let subpx_dir = subpx_dir.limit_by(glyph_format);
 
                         let textures = BatchTextures {
                             colors: [
                                 texture_id,
-                                SourceTexture::Invalid,
-                                SourceTexture::Invalid,
+                                TextureSource::Invalid,
+                                TextureSource::Invalid,
                             ],
                         };
 
                         let kind = BatchKind::TextRun(glyph_format);
 
                         let (blend_mode, color_mode) = match glyph_format {
                             GlyphFormat::Subpixel |
                             GlyphFormat::TransformedSubpixel => {
@@ -1339,17 +1339,17 @@ fn get_image_tile_params(
 
     let cache_item = resolve_image(
         request,
         resource_cache,
         gpu_cache,
         deferred_resolves,
     );
 
-    if cache_item.texture_id == SourceTexture::Invalid {
+    if cache_item.texture_id == TextureSource::Invalid {
         None
     } else {
         let textures = BatchTextures::color(cache_item.texture_id);
         Some((
             BrushBatchKind::Image(get_buffer_kind(cache_item.texture_id)),
             textures,
             [
                 cache_item.uv_rect_handle.as_int(gpu_cache),
@@ -1388,17 +1388,17 @@ impl BrushPrimitive {
                             .get_cached_render_task(rt_handle);
                         resource_cache.get_texture_cache_item(&rt_cache_entry.handle)
                     }
                 };
                 if cfg!(debug_assertions) && is_chased {
                     println!("\tsource {:?}", cache_item);
                 }
 
-                if cache_item.texture_id == SourceTexture::Invalid {
+                if cache_item.texture_id == TextureSource::Invalid {
                     None
                 } else {
                     let textures = BatchTextures::color(cache_item.texture_id);
 
                     Some((
                         BrushBatchKind::Image(get_buffer_kind(cache_item.texture_id)),
                         textures,
                         [
@@ -1426,17 +1426,17 @@ impl BrushPrimitive {
                             None => return None,
                         };
                         let rt_cache_entry = resource_cache
                             .get_cached_render_task(rt_handle);
                         resource_cache.get_texture_cache_item(&rt_cache_entry.handle)
                     }
                 };
 
-                if cache_item.texture_id == SourceTexture::Invalid {
+                if cache_item.texture_id == TextureSource::Invalid {
                     None
                 } else {
                     let textures = BatchTextures::color(cache_item.texture_id);
 
                     Some((
                         BrushBatchKind::Image(get_buffer_kind(cache_item.texture_id)),
                         textures,
                         [
@@ -1503,17 +1503,17 @@ impl BrushPrimitive {
                             rendering: image_rendering,
                             tile: None,
                         },
                         resource_cache,
                         gpu_cache,
                         deferred_resolves,
                     );
 
-                    if cache_item.texture_id == SourceTexture::Invalid {
+                    if cache_item.texture_id == TextureSource::Invalid {
                         warn!("Warnings: skip a PrimitiveKind::YuvImage");
                         return None;
                     }
 
                     textures.colors[channel] = cache_item.texture_id;
                     uv_rect_addresses[channel] = cache_item.uv_rect_handle.as_int(gpu_cache);
                 }
 
@@ -1659,17 +1659,17 @@ pub fn resolve_image(
             // by the render thread.
             match image_properties.external_image {
                 Some(external_image) => {
                     // This is an external texture - we will add it to
                     // the deferred resolves list to be patched by
                     // the render thread...
                     let cache_handle = gpu_cache.push_deferred_per_frame_blocks(BLOCKS_PER_UV_RECT);
                     let cache_item = CacheItem {
-                        texture_id: SourceTexture::External(external_image),
+                        texture_id: TextureSource::External(external_image),
                         uv_rect_handle: cache_handle,
                         uv_rect: DeviceUintRect::new(
                             DeviceUintPoint::zero(),
                             image_properties.descriptor.size,
                         ),
                         texture_layer: 0,
                     };
 
@@ -1701,18 +1701,18 @@ pub fn resolve_image(
 /// Batcher managing draw calls into the clip mask (in the RT cache).
 #[derive(Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct ClipBatcher {
     /// Rectangle draws fill up the rectangles with rounded corners.
     pub rectangles: Vec<ClipMaskInstance>,
     /// Image draws apply the image masking.
-    pub images: FastHashMap<SourceTexture, Vec<ClipMaskInstance>>,
-    pub box_shadows: FastHashMap<SourceTexture, Vec<ClipMaskInstance>>,
+    pub images: FastHashMap<TextureSource, Vec<ClipMaskInstance>>,
+    pub box_shadows: FastHashMap<TextureSource, Vec<ClipMaskInstance>>,
     pub line_decorations: Vec<ClipMaskInstance>,
 }
 
 impl ClipBatcher {
     pub fn new() -> Self {
         ClipBatcher {
             rectangles: Vec::new(),
             images: FastHashMap::default(),
@@ -1810,17 +1810,17 @@ impl ClipBatcher {
                     let rt_handle = info
                         .cache_handle
                         .as_ref()
                         .expect("bug: render task handle not allocated");
                     let rt_cache_entry = resource_cache
                         .get_cached_render_task(rt_handle);
                     let cache_item = resource_cache
                         .get_texture_cache_item(&rt_cache_entry.handle);
-                    debug_assert_ne!(cache_item.texture_id, SourceTexture::Invalid);
+                    debug_assert_ne!(cache_item.texture_id, TextureSource::Invalid);
 
                     self.box_shadows
                         .entry(cache_item.texture_id)
                         .or_insert(Vec::new())
                         .push(ClipMaskInstance {
                             clip_data_address: gpu_address,
                             resource_address: gpu_cache.get_address(&cache_item.uv_rect_handle),
                             ..instance
@@ -1841,19 +1841,19 @@ impl ClipBatcher {
                         ..instance
                     });
                 }
             }
         }
     }
 }
 
-fn get_buffer_kind(texture: SourceTexture) -> ImageBufferKind {
+fn get_buffer_kind(texture: TextureSource) -> ImageBufferKind {
     match texture {
-        SourceTexture::External(ext_image) => {
+        TextureSource::External(ext_image) => {
             match ext_image.image_type {
                 ExternalImageType::TextureHandle(target) => {
                     target.into()
                 }
                 ExternalImageType::Buffer => {
                     // The ExternalImageType::Buffer should be handled by resource_cache.
                     // It should go through the non-external case.
                     panic!("Unexpected non-texture handle type");
--- a/gfx/webrender/src/device/gl.rs
+++ b/gfx/webrender/src/device/gl.rs
@@ -429,16 +429,21 @@ impl ExternalTexture {
     }
 
     #[cfg(feature = "replay")]
     pub fn internal_id(&self) -> gl::GLuint {
         self.id
     }
 }
 
+/// WebRender interface to an OpenGL texture.
+///
+/// Because freeing a texture requires various device handles that are not
+/// reachable from this struct, manual destruction via `Device` is required.
+/// Our `Drop` implementation asserts that this has happened.
 pub struct Texture {
     id: gl::GLuint,
     target: gl::GLuint,
     layer_count: i32,
     format: ImageFormat,
     width: u32,
     height: u32,
     filter: TextureFilter,
--- a/gfx/webrender/src/internal_types.rs
+++ b/gfx/webrender/src/internal_types.rs
@@ -20,54 +20,60 @@ use std::sync::Arc;
 use capture::{CaptureConfig, ExternalCaptureImage};
 #[cfg(feature = "replay")]
 use capture::PlainExternalImage;
 use tiling;
 
 pub type FastHashMap<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher>>;
 pub type FastHashSet<K> = HashSet<K, BuildHasherDefault<FxHasher>>;
 
-// An ID for a texture that is owned by the
-// texture cache module. This can include atlases
-// or standalone textures allocated via the
-// texture cache (e.g. if an image is too large
-// to be added to an atlas). The texture cache
-// manages the allocation and freeing of these
-// IDs, and the rendering thread maintains a
-// map from cache texture ID to native texture.
-
+/// An ID for a texture that is owned by the `texture_cache` module.
+///
+/// This can include atlases or standalone textures allocated via the texture
+/// cache (e.g.  if an image is too large to be added to an atlas). The texture
+/// cache manages the allocation and freeing of these IDs, and the rendering
+/// thread maintains a map from cache texture ID to native texture.
 #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct CacheTextureId(pub usize);
 
+/// Identifies a render pass target that is persisted until the end of the frame.
+///
+/// By default, only the targets of the immediately-preceding pass are bound as
+/// inputs to the next pass. However, tasks can opt into having their target
+/// preserved in a list until the end of the frame, and this type specifies the
+/// index in that list.
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct SavedTargetIndex(pub usize);
 
 impl SavedTargetIndex {
     pub const PENDING: Self = SavedTargetIndex(!0);
 }
 
-// Represents the source for a texture.
-// These are passed from throughout the
-// pipeline until they reach the rendering
-// thread, where they are resolved to a
-// native texture ID.
-
+/// Identifies the source of an input texture to a shader.
 #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
-pub enum SourceTexture {
+pub enum TextureSource {
+    /// Equivalent to `None`, allowing us to avoid using `Option`s everywhere.
     Invalid,
+    /// An entry in the texture cache.
     TextureCache(CacheTextureId),
+    /// An external image texture, mananged by the embedding.
     External(ExternalImageData),
-    CacheA8,
-    CacheRGBA8,
+    /// The alpha target of the immediately-preceding pass.
+    PrevPassAlpha,
+    /// The color target of the immediately-preceding pass.
+    PrevPassColor,
+    /// A render target from an earlier pass. Unlike the immediately-preceding
+    /// passes, these are not made available automatically, but are instead
+    /// opt-in by the `RenderTask` (see `mark_for_saving()`).
     RenderTaskCache(SavedTargetIndex),
 }
 
 pub const ORTHO_NEAR_PLANE: f32 = -100000.0;
 pub const ORTHO_FAR_PLANE: f32 = 100000.0;
 
 #[derive(Copy, Clone, Debug, PartialEq)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
--- a/gfx/webrender/src/picture.rs
+++ b/gfx/webrender/src/picture.rs
@@ -274,20 +274,29 @@ impl PicturePrimitive {
             raster_spatial_node_index,
             surface_spatial_node_index,
         ).expect("todo");
 
         // Establish a new rasterization root if we have
         // a surface, and we have perspective or local raster
         // space request.
         let raster_space = self.requested_raster_space;
-        let local_scale = raster_space.local_scale();
 
-        let wants_raster_root = xf.has_perspective_component() ||
-                                local_scale.is_some();
+        // TODO(gw): A temporary hack here to revert behavior to
+        //           always raster in screen-space. This is not
+        //           a problem yet, since we're not taking advantage
+        //           of this for caching yet. This is a workaround
+        //           for some existing issues with handling scale
+        //           when rasterizing in local space mode. Once
+        //           the fixes for those are in-place, we can
+        //           remove this hack!
+        //let local_scale = raster_space.local_scale();
+        // let wants_raster_root = xf.has_perspective_component() ||
+        //                         local_scale.is_some();
+        let wants_raster_root = xf.has_perspective_component();
 
         let establishes_raster_root = has_surface && wants_raster_root;
 
         let raster_spatial_node_index = if establishes_raster_root {
             surface_spatial_node_index
         } else {
             raster_spatial_node_index
         };
--- a/gfx/webrender/src/render_backend.rs
+++ b/gfx/webrender/src/render_backend.rs
@@ -1,12 +1,18 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+//! The high-level module responsible for managing the pipeline and preparing
+//! commands to be issued by the `Renderer`.
+//!
+//! See the comment at the top of the `renderer` module for a description of
+//! how these two pieces interact.
+
 use api::{ApiMsg, BuiltDisplayList, ClearCache, DebugCommand};
 #[cfg(feature = "debugger")]
 use api::{BuiltDisplayListIter, SpecificDisplayItem};
 use api::{DeviceIntPoint, DevicePixelScale, DeviceUintPoint, DeviceUintRect, DeviceUintSize};
 use api::{DocumentId, DocumentLayer, ExternalScrollId, FrameMsg, HitTestFlags, HitTestResult};
 use api::{IdNamespace, LayoutPoint, PipelineId, RenderNotifier, SceneMsg, ScrollClamping};
 use api::{MemoryReport, VoidPtrToSizeFn};
 use api::{ScrollLocation, ScrollNodeState, TransactionMsg, ResourceUpdate, ImageKey};
@@ -656,16 +662,20 @@ impl RenderBackend {
                         );
                     },
                     SceneBuilderResult::FlushComplete(tx) => {
                         tx.send(()).ok();
                     }
                     SceneBuilderResult::ExternalEvent(evt) => {
                         self.notifier.external_event(evt);
                     }
+                    SceneBuilderResult::ClearNamespace(id) => {
+                        self.resource_cache.clear_namespace(id);
+                        self.documents.retain(|doc_id, _doc| doc_id.0 != id);
+                    }
                     SceneBuilderResult::Stopped => {
                         panic!("We haven't sent a Stop yet, how did we get a Stopped back?");
                     }
                 }
             }
 
             keep_going = match self.api_rx.recv() {
                 Ok(msg) => {
@@ -765,19 +775,18 @@ impl RenderBackend {
                 self.documents.remove(&document_id);
                 self.low_priority_scene_tx.send(
                     SceneBuilderRequest::DeleteDocument(document_id)
                 ).unwrap();
             }
             ApiMsg::ExternalEvent(evt) => {
                 self.low_priority_scene_tx.send(SceneBuilderRequest::ExternalEvent(evt)).unwrap();
             }
-            ApiMsg::ClearNamespace(namespace_id) => {
-                self.resource_cache.clear_namespace(namespace_id);
-                self.documents.retain(|did, _doc| did.0 != namespace_id);
+            ApiMsg::ClearNamespace(id) => {
+                self.low_priority_scene_tx.send(SceneBuilderRequest::ClearNamespace(id)).unwrap();
             }
             ApiMsg::MemoryPressure => {
                 // This is drastic. It will basically flush everything out of the cache,
                 // and the next frame will have to rebuild all of its resources.
                 // We may want to look into something less extreme, but on the other hand this
                 // should only be used in situations where are running low enough on memory
                 // that we risk crashing if we don't do something about it.
                 // The advantage of clearing the cache completely is that it gets rid of any
--- a/gfx/webrender/src/render_task.rs
+++ b/gfx/webrender/src/render_task.rs
@@ -11,17 +11,17 @@ use clip::{ClipDataStore, ClipItem, Clip
 use clip_scroll_tree::SpatialNodeIndex;
 use device::TextureFilter;
 #[cfg(feature = "pathfinder")]
 use euclid::{TypedPoint2D, TypedVector2D};
 use freelist::{FreeList, FreeListHandle, WeakFreeListHandle};
 use glyph_rasterizer::GpuGlyphCacheKey;
 use gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle};
 use gpu_types::{BorderInstance, ImageSource, RasterizationSpace, UvRectKind};
-use internal_types::{FastHashMap, SavedTargetIndex, SourceTexture};
+use internal_types::{CacheTextureId, FastHashMap, SavedTargetIndex};
 #[cfg(feature = "pathfinder")]
 use pathfinder_partitioner::mesh::Mesh;
 use picture::PictureCacheKey;
 use prim_store::{PrimitiveIndex, ImageCacheKey};
 #[cfg(feature = "debugger")]
 use print_tree::{PrintTreePrinter};
 use render_backend::FrameId;
 use resource_cache::{CacheItem, ResourceCache};
@@ -111,26 +111,16 @@ impl RenderTaskTree {
                 debug_assert!(pass_index == passes.len() - 1);
             }
             RenderTaskLocation::Dynamic(..) |
             RenderTaskLocation::TextureCache(..) => {
                 debug_assert!(pass_index < passes.len() - 1);
             }
         }
 
-        // If this task can be shared between multiple
-        // passes, render it in the first pass so that
-        // it is available to all subsequent passes.
-        let pass_index = if task.is_shared() {
-            debug_assert!(task.children.is_empty());
-            0
-        } else {
-            pass_index
-        };
-
         let pass = &mut passes[pass_index];
         pass.add_render_task(id, task.get_dynamic_size(), task.target_kind());
     }
 
     pub fn prepare_for_render(&mut self) {
         for task in &mut self.tasks {
             task.prepare_for_render();
         }
@@ -169,23 +159,37 @@ impl ops::Index<RenderTaskId> for Render
 
 impl ops::IndexMut<RenderTaskId> for RenderTaskTree {
     fn index_mut(&mut self, id: RenderTaskId) -> &mut RenderTask {
         debug_assert_eq!(self.frame_id, id.1);
         &mut self.tasks[id.0 as usize]
     }
 }
 
+/// Identifies the output buffer location for a given `RenderTask`.
 #[derive(Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub enum RenderTaskLocation {
+    /// The `RenderTask` should be drawn to a fixed region in a specific render
+    /// target. This is used for the root `RenderTask`, where the main
+    /// framebuffer is used as the render target.
     Fixed(DeviceIntRect),
+    /// The `RenderTask` should be drawn to a target provided by the atlas
+    /// allocator. This is the most common case.
+    ///
+    /// The second member specifies the width and height of the task
+    /// output, and the first member is initially left as `None`. During the
+    /// build phase, we invoke `RenderTargetList::alloc()` and store the
+    /// resulting location in the first member. That location identifies the
+    /// render target and the offset of the allocated region within that target.
     Dynamic(Option<(DeviceIntPoint, RenderTargetIndex)>, DeviceIntSize),
-    TextureCache(SourceTexture, i32, DeviceIntRect),
+    /// The output of the `RenderTask` will be persisted beyond this frame, and
+    /// thus should be drawn into the `TextureCache`.
+    TextureCache(CacheTextureId, i32, DeviceIntRect),
 }
 
 #[derive(Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct CacheMaskTask {
     actual_rect: DeviceIntRect,
     pub root_spatial_node_index: SpatialNodeIndex,
@@ -866,43 +870,16 @@ impl RenderTask {
             }
 
             RenderTaskKind::Blit(..) => {
                 RenderTargetKind::Color
             }
         }
     }
 
-    // Check if this task wants to be made available as an input
-    // to all passes (except the first) in the render task tree.
-    // To qualify for this, the task needs to have no children / dependencies.
-    // Currently, this is only supported for A8 targets, but it can be
-    // trivially extended to also support RGBA8 targets in the future
-    // if we decide that is useful.
-    pub fn is_shared(&self) -> bool {
-        match self.kind {
-            RenderTaskKind::Picture(..) |
-            RenderTaskKind::VerticalBlur(..) |
-            RenderTaskKind::Readback(..) |
-            RenderTaskKind::HorizontalBlur(..) |
-            RenderTaskKind::Scaling(..) |
-            RenderTaskKind::ClipRegion(..) |
-            RenderTaskKind::Blit(..) |
-            RenderTaskKind::Border(..) |
-            RenderTaskKind::Glyph(..) => false,
-
-            // TODO(gw): For now, we've disabled the shared clip mask
-            //           optimization. It's of dubious value in the
-            //           future once we start to cache clip tasks anyway.
-            //           I have left shared texture support here though,
-            //           just in case we want it in the future.
-            RenderTaskKind::CacheMask(..) => false,
-        }
-    }
-
     // Optionally, prepare the render task for drawing. This is executed
     // after all resource cache items (textures and glyphs) have been
     // resolved and can be queried. It also allows certain render tasks
     // to defer calculating an exact size until now, if desired.
     pub fn prepare_for_render(&mut self) {
     }
 
     pub fn write_gpu_blocks(
--- a/gfx/webrender/src/renderer.rs
+++ b/gfx/webrender/src/renderer.rs
@@ -1,18 +1,31 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-//! The webrender API.
+//! The high-level module responsible for interfacing with the GPU.
 //!
-//! The `webrender::renderer` module provides the interface to webrender, which
-//! is accessible through [`Renderer`][renderer]
+//! Much of WebRender's design is driven by separating work into different
+//! threads. To avoid the complexities of multi-threaded GPU access, we restrict
+//! all communication with the GPU to one thread, the render thread. But since
+//! issuing GPU commands is often a bottleneck, we move everything else (i.e.
+//! the computation of what commands to issue) to another thread, the
+//! RenderBackend thread. The RenderBackend, in turn, may delegate work to other
+//! thread (like the SceneBuilder threads or Rayon workers), but the
+//! Render-vs-RenderBackend distinction is the most important.
 //!
-//! [renderer]: struct.Renderer.html
+//! The consumer is responsible for initializing the render thread before
+//! calling into WebRender, which means that this module also serves as the
+//! initial entry point into WebRender, and is responsible for spawning the
+//! various other threads discussed above. That said, WebRender initialization
+//! returns both the `Renderer` instance as well as a channel for communicating
+//! directly with the `RenderBackend`. Aside from a few high-level operations
+//! like 'render now', most of interesting commands from the consumer go over
+//! that channel and operate on the `RenderBackend`.
 
 use api::{BlobImageHandler, ColorF, DeviceIntPoint, DeviceIntRect, DeviceIntSize};
 use api::{DeviceUintPoint, DeviceUintRect, DeviceUintSize, DocumentId, Epoch, ExternalImageId};
 use api::{ExternalImageType, FontRenderMode, FrameMsg, ImageFormat, PipelineId};
 use api::{ImageRendering, Checkpoint, NotificationRequest};
 use api::{MemoryReport, VoidPtrToSizeFn};
 use api::{RenderApiSender, RenderNotifier, TexelRect, TextureTarget};
 use api::{channel};
@@ -34,17 +47,17 @@ use frame_builder::{ChasePrimitive, Fram
 use gleam::gl;
 use glyph_rasterizer::{GlyphFormat, GlyphRasterizer};
 use gpu_cache::{GpuBlockData, GpuCacheUpdate, GpuCacheUpdateList};
 #[cfg(feature = "debug_renderer")]
 use gpu_cache::GpuDebugChunk;
 #[cfg(feature = "pathfinder")]
 use gpu_glyph_renderer::GpuGlyphRenderer;
 use gpu_types::ScalingInstance;
-use internal_types::{SourceTexture, ORTHO_FAR_PLANE, ORTHO_NEAR_PLANE, ResourceCacheError};
+use internal_types::{TextureSource, ORTHO_FAR_PLANE, ORTHO_NEAR_PLANE, ResourceCacheError};
 use internal_types::{CacheTextureId, DebugOutput, FastHashMap, RenderedDocument, ResultMsg};
 use internal_types::{TextureUpdateList, TextureUpdateOp, TextureUpdateSource};
 use internal_types::{RenderTargetInfo, SavedTargetIndex};
 use prim_store::DeferredResolve;
 use profiler::{BackendProfileCounters, FrameProfileCounters,
                GpuProfileTag, RendererProfileCounters, RendererProfileTimers};
 use device::query::GpuProfiler;
 use rayon::{ThreadPool, ThreadPoolBuilder};
@@ -269,31 +282,32 @@ impl From<GlyphFormat> for ShaderColorMo
                 panic!("Subpixel glyph formats must be handled separately.");
             }
             GlyphFormat::Bitmap => ShaderColorMode::Bitmap,
             GlyphFormat::ColorBitmap => ShaderColorMode::ColorBitmap,
         }
     }
 }
 
+/// Enumeration of the texture samplers used across the various WebRender shaders.
+///
+/// Each variant corresponds to a uniform declared in shader source. We only bind
+/// the variants we need for a given shader, so not every variant is bound for every
+/// batch.
 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
 pub(crate) enum TextureSampler {
     Color0,
     Color1,
     Color2,
-    CacheA8,
-    CacheRGBA8,
-    ResourceCache,
+    PrevPassAlpha,
+    PrevPassColor,
+    GpuCache,
     TransformPalette,
     RenderTasks,
     Dither,
-    // A special sampler that is bound to the A8 output of
-    // the *first* pass. Items rendered in this target are
-    // available as inputs to tasks in any subsequent pass.
-    SharedCacheA8,
     PrimitiveHeadersF,
     PrimitiveHeadersI,
 }
 
 impl TextureSampler {
     pub(crate) fn color(n: usize) -> TextureSampler {
         match n {
             0 => TextureSampler::Color0,
@@ -307,25 +321,24 @@ impl TextureSampler {
 }
 
 impl Into<TextureSlot> for TextureSampler {
     fn into(self) -> TextureSlot {
         match self {
             TextureSampler::Color0 => TextureSlot(0),
             TextureSampler::Color1 => TextureSlot(1),
             TextureSampler::Color2 => TextureSlot(2),
-            TextureSampler::CacheA8 => TextureSlot(3),
-            TextureSampler::CacheRGBA8 => TextureSlot(4),
-            TextureSampler::ResourceCache => TextureSlot(5),
+            TextureSampler::PrevPassAlpha => TextureSlot(3),
+            TextureSampler::PrevPassColor => TextureSlot(4),
+            TextureSampler::GpuCache => TextureSlot(5),
             TextureSampler::TransformPalette => TextureSlot(6),
             TextureSampler::RenderTasks => TextureSlot(7),
             TextureSampler::Dither => TextureSlot(8),
-            TextureSampler::SharedCacheA8 => TextureSlot(9),
-            TextureSampler::PrimitiveHeadersF => TextureSlot(10),
-            TextureSampler::PrimitiveHeadersI => TextureSlot(11),
+            TextureSampler::PrimitiveHeadersF => TextureSlot(9),
+            TextureSampler::PrimitiveHeadersI => TextureSlot(10),
         }
     }
 }
 
 #[derive(Debug, Clone, Copy)]
 #[repr(C)]
 pub struct PackedVertex {
     pub pos: [f32; 2],
@@ -698,107 +711,120 @@ impl GpuGlyphRenderer {
     fn new(_: &mut Device, _: &VAO, _: bool) -> Result<GpuGlyphRenderer, RendererError> {
         Ok(GpuGlyphRenderer)
     }
 }
 
 #[cfg(not(feature = "pathfinder"))]
 struct StenciledGlyphPage;
 
+/// A Texture that has been initialized by the `device` module and is ready to
+/// be used.
 struct ActiveTexture {
     texture: Texture,
     saved_index: Option<SavedTargetIndex>,
-    is_shared: bool,
 }
 
-struct SourceTextureResolver {
-    /// A vector for fast resolves of texture cache IDs to
-    /// native texture IDs. This maps to a free-list managed
-    /// by the backend thread / texture cache. We free the
-    /// texture memory associated with a TextureId when its
-    /// texture cache ID is freed by the texture cache, but
-    /// reuse the TextureId when the texture caches's free
-    /// list reuses the texture cache ID. This saves having to
-    /// use a hashmap, and allows a flat vector for performance.
-    cache_texture_map: Vec<Texture>,
+/// Helper struct for resolving device Textures for use during rendering passes.
+///
+/// Manages the mapping between the at-a-distance texture handles used by the
+/// `RenderBackend` (which does not directly interface with the GPU) and actual
+/// device texture handles.
+struct TextureResolver {
+    /// A vector for fast resolves of texture cache IDs to native texture IDs.
+    /// This maps to a free-list managed by the backend thread / texture cache.
+    /// We free the texture memory associated with a TextureId when its texture
+    /// cache ID is freed by the texture cache, but reuse the TextureId when the
+    /// texture caches's free list reuses the texture cache ID. This saves
+    /// having to use a hashmap, and allows a flat vector for performance.
+    texture_cache_map: Vec<Texture>,
 
     /// Map of external image IDs to native textures.
     external_images: FastHashMap<(ExternalImageId, u8), ExternalTexture>,
 
-    /// A special 1x1 dummy cache texture used for shaders that expect to work
-    /// with the cache but are actually running in the first pass
-    /// when no target is yet provided as a cache texture input.
+    /// A special 1x1 dummy texture used for shaders that expect to work with
+    /// the output of the previous pass but are actually running in the first
+    /// pass.
     dummy_cache_texture: Texture,
 
-    /// The current cache textures.
-    cache_rgba8_texture: Option<ActiveTexture>,
-    cache_a8_texture: Option<ActiveTexture>,
-
-    /// An alpha texture shared between all passes.
-    //TODO: just use the standard texture saving logic instead.
-    shared_alpha_texture: Option<Texture>,
-
-    /// Saved cache textures that are to be re-used.
-    saved_textures: Vec<Texture>,
-
-    /// General pool of render targets.
+    /// The outputs of the previous pass, if applicable.
+    prev_pass_color: Option<ActiveTexture>,
+    prev_pass_alpha: Option<ActiveTexture>,
+
+    /// Saved render targets from previous passes. This is used when a pass
+    /// needs access to the result of a pass other than the immediately-preceding
+    /// one. In this case, the `RenderTask` will get a a non-`None` `saved_index`,
+    /// which will cause the resulting render target to be persisted in this list
+    /// (at that index) until the end of the frame.
+    saved_targets: Vec<Texture>,
+
+    /// Pool of idle render target textures ready for re-use.
+    ///
+    /// Naively, it would seem like we only ever need two pairs of (color,
+    /// alpha) render targets: one for the output of the previous pass (serving
+    /// as input to the current pass), and one for the output of the current
+    /// pass. However, there are cases where the output of one pass is used as
+    /// the input to multiple future passes. For example, drop-shadows draw the
+    /// picture in pass X, then reference it in pass X+1 to create the blurred
+    /// shadow, and pass the results of both X and X+1 to pass X+2 draw the
+    /// actual content.
+    ///
+    /// See the comments in `allocate_target_texture` for more insight on why
+    /// reuse is a win.
     render_target_pool: Vec<Texture>,
 }
 
-impl SourceTextureResolver {
-    fn new(device: &mut Device) -> SourceTextureResolver {
+impl TextureResolver {
+    fn new(device: &mut Device) -> TextureResolver {
         let mut dummy_cache_texture = device
             .create_texture(TextureTarget::Array, ImageFormat::BGRA8);
         device.init_texture::<u8>(
             &mut dummy_cache_texture,
             1,
             1,
             TextureFilter::Linear,
             None,
             1,
             None,
         );
 
-        SourceTextureResolver {
-            cache_texture_map: Vec::new(),
+        TextureResolver {
+            texture_cache_map: Vec::new(),
             external_images: FastHashMap::default(),
             dummy_cache_texture,
-            cache_a8_texture: None,
-            cache_rgba8_texture: None,
-            shared_alpha_texture: None,
-            saved_textures: Vec::default(),
+            prev_pass_alpha: None,
+            prev_pass_color: None,
+            saved_targets: Vec::default(),
             render_target_pool: Vec::new(),
         }
     }
 
     fn deinit(self, device: &mut Device) {
         device.delete_texture(self.dummy_cache_texture);
 
-        for texture in self.cache_texture_map {
+        for texture in self.texture_cache_map {
             device.delete_texture(texture);
         }
 
         for texture in self.render_target_pool {
             device.delete_texture(texture);
         }
     }
 
     fn begin_frame(&mut self) {
-        assert!(self.cache_rgba8_texture.is_none());
-        assert!(self.cache_a8_texture.is_none());
-        assert!(self.saved_textures.is_empty());
+        assert!(self.prev_pass_color.is_none());
+        assert!(self.prev_pass_alpha.is_none());
+        assert!(self.saved_targets.is_empty());
     }
 
     fn end_frame(&mut self, device: &mut Device, frame_id: FrameId) {
         // return the cached targets to the pool
         self.end_pass(None, None);
-        // return the global alpha texture
-        self.render_target_pool.extend(self.shared_alpha_texture.take());
         // return the saved targets as well
-        self.render_target_pool.extend(self.saved_textures.drain(..));
+        self.render_target_pool.extend(self.saved_targets.drain(..));
 
         // GC the render target pool.
         //
         // We use a simple scheme whereby we drop any texture that hasn't been used
         // in the last 30 frames. This should generally prevent any sustained build-
         // up of unused textures, unless we don't generate frames for a long period.
         // This can happen when the window is minimized, and we probably want to
         // flush all the WebRender caches in that case [1].
@@ -825,115 +851,110 @@ impl SourceTextureResolver {
         &mut self,
         a8_texture: Option<ActiveTexture>,
         rgba8_texture: Option<ActiveTexture>,
     ) {
         // If we have cache textures from previous pass, return them to the pool.
         // Also assign the pool index of those cache textures to last pass's index because this is
         // the result of last pass.
         // Note: the order here is important, needs to match the logic in `RenderPass::build()`.
-        if let Some(at) = self.cache_rgba8_texture.take() {
-            assert!(!at.is_shared);
+        if let Some(at) = self.prev_pass_color.take() {
             if let Some(index) = at.saved_index {
-                assert_eq!(self.saved_textures.len(), index.0);
-                self.saved_textures.push(at.texture);
+                assert_eq!(self.saved_targets.len(), index.0);
+                self.saved_targets.push(at.texture);
             } else {
                 self.render_target_pool.push(at.texture);
             }
         }
-        if let Some(at) = self.cache_a8_texture.take() {
+        if let Some(at) = self.prev_pass_alpha.take() {
             if let Some(index) = at.saved_index {
-                assert!(!at.is_shared);
-                assert_eq!(self.saved_textures.len(), index.0);
-                self.saved_textures.push(at.texture);
-            } else if at.is_shared {
-                assert!(self.shared_alpha_texture.is_none());
-                self.shared_alpha_texture = Some(at.texture);
+                assert_eq!(self.saved_targets.len(), index.0);
+                self.saved_targets.push(at.texture);
             } else {
                 self.render_target_pool.push(at.texture);
             }
         }
 
         // We have another pass to process, make these textures available
         // as inputs to the next pass.
-        self.cache_rgba8_texture = rgba8_texture;
-        self.cache_a8_texture = a8_texture;
+        self.prev_pass_color = rgba8_texture;
+        self.prev_pass_alpha = a8_texture;
     }
 
     // Bind a source texture to the device.
-    fn bind(&self, texture_id: &SourceTexture, sampler: TextureSampler, device: &mut Device) {
+    fn bind(&self, texture_id: &TextureSource, sampler: TextureSampler, device: &mut Device) {
         match *texture_id {
-            SourceTexture::Invalid => {}
-            SourceTexture::CacheA8 => {
-                let texture = match self.cache_a8_texture {
+            TextureSource::Invalid => {}
+            TextureSource::PrevPassAlpha => {
+                let texture = match self.prev_pass_alpha {
                     Some(ref at) => &at.texture,
                     None => &self.dummy_cache_texture,
                 };
                 device.bind_texture(sampler, texture);
             }
-            SourceTexture::CacheRGBA8 => {
-                let texture = match self.cache_rgba8_texture {
+            TextureSource::PrevPassColor => {
+                let texture = match self.prev_pass_color {
                     Some(ref at) => &at.texture,
                     None => &self.dummy_cache_texture,
                 };
                 device.bind_texture(sampler, texture);
             }
-            SourceTexture::External(external_image) => {
+            TextureSource::External(external_image) => {
                 let texture = self.external_images
                     .get(&(external_image.id, external_image.channel_index))
                     .expect(&format!("BUG: External image should be resolved by now"));
                 device.bind_external_texture(sampler, texture);
             }
-            SourceTexture::TextureCache(index) => {
-                let texture = &self.cache_texture_map[index.0];
+            TextureSource::TextureCache(index) => {
+                let texture = &self.texture_cache_map[index.0];
                 device.bind_texture(sampler, texture);
             }
-            SourceTexture::RenderTaskCache(saved_index) => {
-                let texture = &self.saved_textures[saved_index.0];
+            TextureSource::RenderTaskCache(saved_index) => {
+                let texture = &self.saved_targets[saved_index.0];
                 device.bind_texture(sampler, texture)
             }
         }
     }
 
     // Get the real (OpenGL) texture ID for a given source texture.
     // For a texture cache texture, the IDs are stored in a vector
     // map for fast access.
-    fn resolve(&self, texture_id: &SourceTexture) -> Option<&Texture> {
+    fn resolve(&self, texture_id: &TextureSource) -> Option<&Texture> {
         match *texture_id {
-            SourceTexture::Invalid => None,
-            SourceTexture::CacheA8 => Some(
-                match self.cache_a8_texture {
+            TextureSource::Invalid => None,
+            TextureSource::PrevPassAlpha => Some(
+                match self.prev_pass_alpha {
                     Some(ref at) => &at.texture,
                     None => &self.dummy_cache_texture,
                 }
             ),
-            SourceTexture::CacheRGBA8 => Some(
-                match self.cache_rgba8_texture {
+            TextureSource::PrevPassColor => Some(
+                match self.prev_pass_color {
                     Some(ref at) => &at.texture,
                     None => &self.dummy_cache_texture,
                 }
             ),
-            SourceTexture::External(..) => {
+            TextureSource::External(..) => {
                 panic!("BUG: External textures cannot be resolved, they can only be bound.");
             }
-            SourceTexture::TextureCache(index) => {
-                Some(&self.cache_texture_map[index.0])
+            TextureSource::TextureCache(index) => {
+                Some(&self.texture_cache_map[index.0])
             }
-            SourceTexture::RenderTaskCache(saved_index) => {
-                Some(&self.saved_textures[saved_index.0])
+            TextureSource::RenderTaskCache(saved_index) => {
+                Some(&self.saved_targets[saved_index.0])
             }
         }
     }
 
     fn report_memory(&self) -> MemoryReport {
         let mut report = MemoryReport::default();
 
         // We're reporting GPU memory rather than heap-allocations, so we don't
         // use size_of_op.
-        for t in self.cache_texture_map.iter() {
+        for t in self.texture_cache_map.iter() {
             report.texture_cache_textures += t.size_in_bytes();
         }
         for t in self.render_target_pool.iter() {
             report.render_target_textures += t.size_in_bytes();
         }
 
         report
     }
@@ -958,19 +979,19 @@ struct CacheRow {
 }
 
 impl CacheRow {
     fn new() -> Self {
         CacheRow { is_dirty: false }
     }
 }
 
-/// The bus over which CPU and GPU versions of the cache
+/// The bus over which CPU and GPU versions of the GPU cache
 /// get synchronized.
-enum CacheBus {
+enum GpuCacheBus {
     /// PBO-based updates, currently operate on a row granularity.
     /// Therefore, are subject to fragmentation issues.
     PixelBuffer {
         /// PBO used for transfers.
         buffer: PBO,
         /// Meta-data about the cached rows.
         rows: Vec<CacheRow>,
         /// Mirrored block data on CPU.
@@ -988,65 +1009,65 @@ enum CacheBus {
         /// VBO for gpu block data.
         buf_value: VBO<GpuBlockData>,
         /// Currently stored block count.
         count: usize,
     },
 }
 
 /// The device-specific representation of the cache texture in gpu_cache.rs
-struct CacheTexture {
+struct GpuCacheTexture {
     texture: Texture,
-    bus: CacheBus,
+    bus: GpuCacheBus,
 }
 
-impl CacheTexture {
+impl GpuCacheTexture {
     fn new(device: &mut Device, use_scatter: bool) -> Result<Self, RendererError> {
         let texture = device.create_texture(TextureTarget::Default, ImageFormat::RGBAF32);
 
         let bus = if use_scatter {
             let program = device
                 .create_program("gpu_cache_update", "", &desc::GPU_CACHE_UPDATE)?;
             let buf_position = device.create_vbo();
             let buf_value = device.create_vbo();
             //Note: the vertex attributes have to be supplied in the same order
             // as for program creation, but each assigned to a different stream.
             let vao = device.create_custom_vao(&[
                 buf_position.stream_with(&desc::GPU_CACHE_UPDATE.vertex_attributes[0..1]),
                 buf_value   .stream_with(&desc::GPU_CACHE_UPDATE.vertex_attributes[1..2]),
             ]);
-            CacheBus::Scatter {
+            GpuCacheBus::Scatter {
                 program,
                 vao,
                 buf_position,
                 buf_value,
                 count: 0,
             }
         } else {
             let buffer = device.create_pbo();
-            CacheBus::PixelBuffer {
+            GpuCacheBus::PixelBuffer {
                 buffer,
                 rows: Vec::new(),
                 cpu_blocks: Vec::new(),
             }
         };
 
-        Ok(CacheTexture {
+        Ok(GpuCacheTexture {
             texture,
             bus,
         })
     }
 
     fn deinit(self, device: &mut Device) {
         device.delete_texture(self.texture);
         match self.bus {
-            CacheBus::PixelBuffer { buffer, ..} => {
+            GpuCacheBus::PixelBuffer { buffer, ..} => {
                 device.delete_pbo(buffer);
             }
-            CacheBus::Scatter { program, vao, buf_position, buf_value, ..} => {
+            GpuCacheBus::Scatter { program, vao, buf_position, buf_value, ..} => {
                 device.delete_program(program);
                 device.delete_custom_vao(vao);
                 device.delete_vbo(buf_position);
                 device.delete_vbo(buf_value);
             }
         }
     }
 
@@ -1060,17 +1081,17 @@ impl CacheTexture {
         total_block_count: usize,
         max_height: u32,
     ) {
         // See if we need to create or resize the texture.
         let old_size = self.texture.get_dimensions();
         let new_size = DeviceUintSize::new(MAX_VERTEX_TEXTURE_WIDTH as _, max_height);
 
         match self.bus {
-            CacheBus::PixelBuffer { ref mut rows, .. } => {
+            GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
                 if max_height > old_size.height {
                     // Create a f32 texture that can be used for the vertex shader
                     // to fetch data from.
                     device.init_texture::<u8>(
                         &mut self.texture,
                         new_size.width,
                         new_size.height,
                         TextureFilter::Nearest,
@@ -1082,17 +1103,17 @@ impl CacheTexture {
                     // If we had to resize the texture, just mark all rows
                     // as dirty so they will be uploaded to the texture
                     // during the next flush.
                     for row in rows.iter_mut() {
                         row.is_dirty = true;
                     }
                 }
             }
-            CacheBus::Scatter {
+            GpuCacheBus::Scatter {
                 ref mut buf_position,
                 ref mut buf_value,
                 ref mut count,
                 ..
             } => {
                 *count = 0;
                 if total_block_count > buf_value.allocated_count() {
                     device.allocate_vbo(buf_position, total_block_count, VertexUsageHint::Stream);
@@ -1117,17 +1138,17 @@ impl CacheTexture {
                     }
                 }
             }
         }
     }
 
     fn update(&mut self, device: &mut Device, updates: &GpuCacheUpdateList) {
         match self.bus {
-            CacheBus::PixelBuffer { ref mut rows, ref mut cpu_blocks, .. } => {
+            GpuCacheBus::PixelBuffer { ref mut rows, ref mut cpu_blocks, .. } => {
                 for update in &updates.updates {
                     match *update {
                         GpuCacheUpdate::Copy {
                             block_index,
                             block_count,
                             address,
                         } => {
                             let row = address.v as usize;
@@ -1150,17 +1171,17 @@ impl CacheTexture {
                             let data = &mut cpu_blocks[block_offset .. (block_offset + block_count)];
                             for i in 0 .. block_count {
                                 data[i] = updates.blocks[block_index + i];
                             }
                         }
                     }
                 }
             }
-            CacheBus::Scatter {
+            GpuCacheBus::Scatter {
                 ref buf_position,
                 ref buf_value,
                 ref mut count,
                 ..
             } => {
                 //TODO: re-use this heap allocation
                 // Unused positions will be left as 0xFFFF, which translates to
                 // (1.0, 1.0) in the vertex output position and gets culled out
@@ -1188,17 +1209,17 @@ impl CacheTexture {
                 device.fill_vbo(buf_position, &position_data, *count);
                 *count += position_data.len();
             }
         }
     }
 
     fn flush(&mut self, device: &mut Device) -> usize {
         match self.bus {
-            CacheBus::PixelBuffer { ref buffer, ref mut rows, ref cpu_blocks } => {
+            GpuCacheBus::PixelBuffer { ref buffer, ref mut rows, ref cpu_blocks } => {
                 let rows_dirty = rows
                     .iter()
                     .filter(|row| row.is_dirty)
                     .count();
                 if rows_dirty == 0 {
                     return 0
                 }
 
@@ -1223,17 +1244,17 @@ impl CacheTexture {
 
                     uploader.upload(rect, 0, None, cpu_blocks);
 
                     row.is_dirty = false;
                 }
 
                 rows_dirty
             }
-            CacheBus::Scatter { ref program, ref vao, count, .. } => {
+            GpuCacheBus::Scatter { ref program, ref vao, count, .. } => {
                 device.disable_depth();
                 device.set_blend(false);
                 device.bind_program(program);
                 device.bind_custom_vao(vao);
                 device.bind_draw_target(
                     Some((&self.texture, 0)),
                     Some(self.texture.get_dimensions()),
                 );
@@ -1392,16 +1413,19 @@ pub struct RendererVAOs {
     blur_vao: VAO,
     clip_vao: VAO,
     border_vao: VAO,
     scale_vao: VAO,
 }
 
 /// The renderer is responsible for submitting to the GPU the work prepared by the
 /// RenderBackend.
+///
+/// We have a separate `Renderer` instance for each instance of WebRender (generally
+/// one per OS window), and all instances share the same thread.
 pub struct Renderer {
     result_rx: Receiver<ResultMsg>,
     debug_server: DebugServer,
     pub device: Device,
     pending_texture_updates: Vec<TextureUpdateList>,
     pending_gpu_cache_updates: Vec<GpuCacheUpdateList>,
     pending_shader_updates: Vec<PathBuf>,
     active_documents: Vec<(DocumentId, RenderedDocument)>,
@@ -1431,27 +1455,27 @@ pub struct Renderer {
 
     pub gpu_profile: GpuProfiler<GpuProfileTag>,
     vaos: RendererVAOs,
 
     prim_header_f_texture: VertexDataTexture,
     prim_header_i_texture: VertexDataTexture,
     transforms_texture: VertexDataTexture,
     render_task_texture: VertexDataTexture,
-    gpu_cache_texture: CacheTexture,
+    gpu_cache_texture: GpuCacheTexture,
     #[cfg(feature = "debug_renderer")]
     gpu_cache_debug_chunks: Vec<GpuDebugChunk>,
 
     gpu_cache_frame_id: FrameId,
     gpu_cache_overflow: bool,
 
     pipeline_info: PipelineInfo,
 
     // Manages and resolves source textures IDs to real texture IDs.
-    texture_resolver: SourceTextureResolver,
+    texture_resolver: TextureResolver,
 
     // A PBO used to do asynchronous texture cache uploads.
     texture_cache_upload_pbo: PBO,
 
     dither_matrix_texture: Option<Texture>,
 
     /// Optional trait object that allows the client
     /// application to provide external buffers for image data.
@@ -1693,24 +1717,24 @@ impl Renderer {
                                                             options.precache_shaders));
 
         let blur_vao = device.create_vao_with_new_instances(&desc::BLUR, &prim_vao);
         let clip_vao = device.create_vao_with_new_instances(&desc::CLIP, &prim_vao);
         let border_vao = device.create_vao_with_new_instances(&desc::BORDER, &prim_vao);
         let scale_vao = device.create_vao_with_new_instances(&desc::SCALE, &prim_vao);
         let texture_cache_upload_pbo = device.create_pbo();
 
-        let texture_resolver = SourceTextureResolver::new(&mut device);
+        let texture_resolver = TextureResolver::new(&mut device);
 
         let prim_header_f_texture = VertexDataTexture::new(&mut device, ImageFormat::RGBAF32);
         let prim_header_i_texture = VertexDataTexture::new(&mut device, ImageFormat::RGBAI32);
         let transforms_texture = VertexDataTexture::new(&mut device, ImageFormat::RGBAF32);
         let render_task_texture = VertexDataTexture::new(&mut device, ImageFormat::RGBAF32);
 
-        let gpu_cache_texture = CacheTexture::new(
+        let gpu_cache_texture = GpuCacheTexture::new(
             &mut device,
             options.scatter_gpu_cache_updates,
         )?;
 
         device.end_frame();
 
         let backend_notifier = notifier.clone();
 
@@ -2295,23 +2319,23 @@ impl Renderer {
             DebugCommand::LoadCapture(..) => {
                 panic!("Capture commands are not welcome here! Did you build with 'capture' feature?")
             }
             DebugCommand::ClearCaches(_)
             | DebugCommand::SimulateLongSceneBuild(_)
             | DebugCommand::SimulateLongLowPrioritySceneBuild(_) => {}
             DebugCommand::InvalidateGpuCache => {
                 match self.gpu_cache_texture.bus {
-                    CacheBus::PixelBuffer { ref mut rows, .. } => {
+                    GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
                         info!("Invalidating GPU caches");
                         for row in rows {
                             row.is_dirty = true;
                         }
                     }
-                    CacheBus::Scatter { .. } => {
+                    GpuCacheBus::Scatter { .. } => {
                         warn!("Unable to invalidate scattered GPU cache");
                     }
                 }
             }
         }
     }
 
     /// Set a callback for handling external images.
@@ -2646,17 +2670,17 @@ impl Renderer {
         let deferred_update_list = self.update_deferred_resolves(&frame.deferred_resolves);
         self.pending_gpu_cache_updates.extend(deferred_update_list);
 
         self.update_gpu_cache();
 
         // Note: the texture might have changed during the `update`,
         // so we need to bind it here.
         self.device.bind_texture(
-            TextureSampler::ResourceCache,
+            TextureSampler::GpuCache,
             &self.gpu_cache_texture.texture,
         );
     }
 
     fn update_texture_cache(&mut self) {
         let _gm = self.gpu_profile.start_marker("texture cache update");
         let mut pending_texture_updates = mem::replace(&mut self.pending_texture_updates, vec![]);
 
@@ -2667,23 +2691,23 @@ impl Renderer {
                         width,
                         height,
                         layer_count,
                         format,
                         filter,
                         render_target,
                     } => {
                         let CacheTextureId(cache_texture_index) = update.id;
-                        if self.texture_resolver.cache_texture_map.len() == cache_texture_index {
+                        if self.texture_resolver.texture_cache_map.len() == cache_texture_index {
                             // Create a new native texture, as requested by the texture cache.
                             let texture = self.device.create_texture(TextureTarget::Array, format);
-                            self.texture_resolver.cache_texture_map.push(texture);
+                            self.texture_resolver.texture_cache_map.push(texture);
                         }
                         let texture =
-                            &mut self.texture_resolver.cache_texture_map[cache_texture_index];
+                            &mut self.texture_resolver.texture_cache_map[cache_texture_index];
                         assert_eq!(texture.get_format(), format);
 
                         // Ensure no PBO is bound when creating the texture storage,
                         // or GL will attempt to read data from there.
                         self.device.init_texture::<u8>(
                             texture,
                             width,
                             height,
@@ -2695,17 +2719,17 @@ impl Renderer {
                     }
                     TextureUpdateOp::Update {
                         rect,
                         source,
                         stride,
                         layer_index,
                         offset,
                     } => {
-                        let texture = &self.texture_resolver.cache_texture_map[update.id.0];
+                        let texture = &self.texture_resolver.texture_cache_map[update.id.0];
                         let mut uploader = self.device.upload_texture(
                             texture,
                             &self.texture_cache_upload_pbo,
                             0,
                         );
 
                         let bytes_uploaded = match source {
                             TextureUpdateSource::Bytes { data } => {
@@ -2743,17 +2767,17 @@ impl Renderer {
                                 handler.unlock(id, channel_index);
                                 size
                             }
                         };
 
                         self.profile_counters.texture_data_uploaded.add(bytes_uploaded >> 10);
                     }
                     TextureUpdateOp::Free => {
-                        let texture = &mut self.texture_resolver.cache_texture_map[update.id.0];
+                        let texture = &mut self.texture_resolver.texture_cache_map[update.id.0];
                         self.device.free_texture_storage(texture);
                     }
                 }
             }
         }
     }
 
     pub(crate) fn draw_instanced_batch<T>(
@@ -2826,17 +2850,17 @@ impl Renderer {
         backdrop: &RenderTask,
         readback: &RenderTask,
     ) {
         if scissor_rect.is_some() {
             self.device.disable_scissor();
         }
 
         let cache_texture = self.texture_resolver
-            .resolve(&SourceTexture::CacheRGBA8)
+            .resolve(&TextureSource::PrevPassColor)
             .unwrap();
 
         // Before submitting the composite batch, do the
         // framebuffer readbacks that are needed for each
         // composite operation in this batch.
         let (readback_rect, readback_layer) = readback.get_target_rect();
         let (backdrop_rect, _) = backdrop.get_target_rect();
         let backdrop_screen_origin = match backdrop.kind {
@@ -2904,17 +2928,17 @@ impl Renderer {
                     self.device.bind_read_target(Some((src_texture, layer)));
                     source_rect
                 }
                 BlitJobSource::RenderTask(task_id) => {
                     // A blit from the child render task into this target.
                     // TODO(gw): Support R8 format here once we start
                     //           creating mips for alpha masks.
                     let src_texture = self.texture_resolver
-                        .resolve(&SourceTexture::CacheRGBA8)
+                        .resolve(&TextureSource::PrevPassColor)
                         .expect("BUG: invalid source texture");
                     let source = &render_tasks[task_id];
                     let (source_rect, layer) = source.get_target_rect();
                     self.device.bind_read_target(Some((src_texture, layer.0 as i32)));
                     source_rect
                 }
             };
             debug_assert_eq!(source_rect.size, blit.target_rect.size);
@@ -2923,31 +2947,31 @@ impl Renderer {
                 blit.target_rect,
             );
         }
     }
 
     fn handle_scaling(
         &mut self,
         scalings: &[ScalingInstance],
-        source: SourceTexture,
+        source: TextureSource,
         projection: &Transform3D<f32>,
         stats: &mut RendererStats,
     ) {
         if scalings.is_empty() {
             return
         }
 
         match source {
-            SourceTexture::CacheRGBA8 => {
+            TextureSource::PrevPassColor => {
                 self.shaders.cs_scale_rgba8.bind(&mut self.device,
                                                  &projection,
                                                  &mut self.renderer_errors);
             }
-            SourceTexture::CacheA8 => {
+            TextureSource::PrevPassAlpha => {
                 self.shaders.cs_scale_a8.bind(&mut self.device,
                                               &projection,
                                               &mut self.renderer_errors);
             }
             _ => unreachable!(),
         }
 
         self.draw_instanced_batch(
@@ -3059,17 +3083,17 @@ impl Renderer {
                     &target.horizontal_blurs,
                     VertexArrayKind::Blur,
                     &BatchTextures::no_texture(),
                     stats,
                 );
             }
         }
 
-        self.handle_scaling(&target.scalings, SourceTexture::CacheRGBA8, projection, stats);
+        self.handle_scaling(&target.scalings, TextureSource::PrevPassColor, projection, stats);
 
         //TODO: record the pixel count for cached primitives
 
         if target.needs_depth() {
             let _gl = self.gpu_profile.start_marker("opaque batches");
             let opaque_sampler = self.gpu_profile.start_sampler(GPU_SAMPLER_TAG_OPAQUE);
             self.set_blend(false, framebuffer_kind);
             //Note: depth equality is needed for split planes
@@ -3351,17 +3375,17 @@ impl Renderer {
                     &target.horizontal_blurs,
                     VertexArrayKind::Blur,
                     &BatchTextures::no_texture(),
                     stats,
                 );
             }
         }
 
-        self.handle_scaling(&target.scalings, SourceTexture::CacheA8, projection, stats);
+        self.handle_scaling(&target.scalings, TextureSource::PrevPassAlpha, projection, stats);
 
         // Draw the clip items into the tiled alpha mask.
         {
             let _timer = self.gpu_profile.start_timer(GPU_TAG_CACHE_CLIP);
 
             // switch to multiplicative blending
             self.set_blend(true, FramebufferKind::Other);
             self.set_blend_mode_multiply(FramebufferKind::Other);
@@ -3382,18 +3406,18 @@ impl Renderer {
                 );
             }
             // draw box-shadow clips
             for (mask_texture_id, items) in target.clip_batcher.box_shadows.iter() {
                 let _gm2 = self.gpu_profile.start_marker("box-shadows");
                 let textures = BatchTextures {
                     colors: [
                         mask_texture_id.clone(),
-                        SourceTexture::Invalid,
-                        SourceTexture::Invalid,
+                        TextureSource::Invalid,
+                        TextureSource::Invalid,
                     ],
                 };
                 self.shaders.cs_clip_box_shadow
                     .bind(&mut self.device, projection, &mut self.renderer_errors);
                 self.draw_instanced_batch(
                     items,
                     VertexArrayKind::Clip,
                     &textures,
@@ -3418,18 +3442,18 @@ impl Renderer {
             }
 
             // draw image masks
             for (mask_texture_id, items) in target.clip_batcher.images.iter() {
                 let _gm2 = self.gpu_profile.start_marker("clip images");
                 let textures = BatchTextures {
                     colors: [
                         mask_texture_id.clone(),
-                        SourceTexture::Invalid,
-                        SourceTexture::Invalid,
+                        TextureSource::Invalid,
+                        TextureSource::Invalid,
                     ],
                 };
                 self.shaders.cs_clip_image
                     .bind(&mut self.device, projection, &mut self.renderer_errors);
                 self.draw_instanced_batch(
                     items,
                     VertexArrayKind::Clip,
                     &textures,
@@ -3438,25 +3462,26 @@ impl Renderer {
             }
         }
 
         self.gpu_profile.finish_sampler(alpha_sampler);
     }
 
     fn draw_texture_cache_target(
         &mut self,
-        texture: &SourceTexture,
+        texture: &CacheTextureId,
         layer: i32,
         target: &TextureCacheRenderTarget,
         render_tasks: &RenderTaskTree,
         stats: &mut RendererStats,
     ) {
+        let texture_source = TextureSource::TextureCache(*texture);
         let (target_size, projection) = {
             let texture = self.texture_resolver
-                .resolve(texture)
+                .resolve(&texture_source)
                 .expect("BUG: invalid target texture");
             let target_size = texture.get_dimensions();
             let projection = Transform3D::ortho(
                 0.0,
                 target_size.width as f32,
                 0.0,
                 target_size.height as f32,
                 ORTHO_NEAR_PLANE,
@@ -3470,17 +3495,17 @@ impl Renderer {
 
         self.set_blend(false, FramebufferKind::Other);
 
         // Handle any Pathfinder glyphs.
         let stencil_page = self.stencil_glyphs(&target.glyphs, &projection, &target_size, stats);
 
         {
             let texture = self.texture_resolver
-                .resolve(texture)
+                .resolve(&texture_source)
                 .expect("BUG: invalid target texture");
             self.device
                 .bind_draw_target(Some((texture, layer)), Some(target_size));
         }
 
         self.device.disable_depth();
         self.device.disable_depth_write();
         self.set_blend(false, FramebufferKind::Other);
@@ -3655,16 +3680,28 @@ impl Renderer {
                 .expect("Found external image, but no handler set!");
 
             for (ext_data, _) in self.texture_resolver.external_images.drain() {
                 handler.unlock(ext_data.0, ext_data.1);
             }
         }
     }
 
+    /// Allocates a texture to be used as the output for a rendering pass.
+    ///
+    /// We make an effort to reuse render targe textures across passes and
+    /// across frames. Reusing a texture with the same dimensions (width,
+    /// height, and layer-count) and format is obviously ideal. Reusing a
+    /// texture with different dimensions but the same format can be faster
+    /// than allocating a new texture, since it basically boils down to
+    /// a realloc in GPU memory, which can be very cheap if the existing
+    /// region can be resized. However, some drivers/GPUs require textures
+    /// with different formats to be allocated in different arenas,
+    /// reinitializing with a different format can force a large copy. As
+    /// such, we just allocate a new texture in that case.
     fn allocate_target_texture<T: RenderTarget>(
         &mut self,
         list: &mut RenderTargetList<T>,
         counters: &mut FrameProfileCounters,
         frame_id: FrameId,
     ) -> Option<ActiveTexture> {
         debug_assert_ne!(list.max_size, DeviceUintSize::zero());
         if list.targets.is_empty() {
@@ -3720,17 +3757,16 @@ impl Renderer {
             list.targets.len() as _,
             None,
         );
 
         list.check_ready(&texture);
         Some(ActiveTexture {
             texture,
             saved_index: list.saved_index.clone(),
-            is_shared: list.is_shared,
         })
     }
 
     fn bind_frame_data(&mut self, frame: &mut Frame) {
         let _timer = self.gpu_profile.start_timer(GPU_TAG_SETUP_DATA);
         self.device.set_device_pixel_ratio(frame.device_pixel_ratio);
 
         self.prim_header_f_texture.update(
@@ -3762,18 +3798,18 @@ impl Renderer {
 
         self.render_task_texture
             .update(&mut self.device, &mut frame.render_tasks.task_data);
         self.device.bind_texture(
             TextureSampler::RenderTasks,
             &self.render_task_texture.texture,
         );
 
-        debug_assert!(self.texture_resolver.cache_a8_texture.is_none());
-        debug_assert!(self.texture_resolver.cache_rgba8_texture.is_none());
+        debug_assert!(self.texture_resolver.prev_pass_alpha.is_none());
+        debug_assert!(self.texture_resolver.prev_pass_color.is_none());
     }
 
     fn draw_tile_frame(
         &mut self,
         frame: &mut Frame,
         framebuffer_size: Option<DeviceUintSize>,
         framebuffer_depth_is_ready: bool,
         frame_id: FrameId,
@@ -3792,23 +3828,23 @@ impl Renderer {
 
         self.bind_frame_data(frame);
         self.texture_resolver.begin_frame();
 
         for (pass_index, pass) in frame.passes.iter_mut().enumerate() {
             self.gpu_profile.place_marker(&format!("pass {}", pass_index));
 
             self.texture_resolver.bind(
-                &SourceTexture::CacheA8,
-                TextureSampler::CacheA8,
+                &TextureSource::PrevPassAlpha,
+                TextureSampler::PrevPassAlpha,
                 &mut self.device,
             );
             self.texture_resolver.bind(
-                &SourceTexture::CacheRGBA8,
-                TextureSampler::CacheRGBA8,
+                &TextureSource::PrevPassColor,
+                TextureSampler::PrevPassColor,
                 &mut self.device,
             );
 
             let (cur_alpha, cur_color) = match pass.kind {
                 RenderPassKind::MainFramebuffer(ref target) => {
                     if let Some(framebuffer_size) = framebuffer_size {
                         stats.color_target_count += 1;
 
@@ -3904,22 +3940,16 @@ impl Renderer {
                             stats,
                         );
                     }
 
                     (alpha_tex, color_tex)
                 }
             };
 
-            //Note: the `end_pass` will make sure this texture is not recycled this frame
-            if let Some(ActiveTexture { ref texture, is_shared: true, .. }) = cur_alpha {
-                self.device
-                    .bind_texture(TextureSampler::SharedCacheA8, texture);
-            }
-
             self.texture_resolver.end_pass(
                 cur_alpha,
                 cur_color,
             );
         }
 
         self.texture_resolver.end_frame(&mut self.device, frame_id);
 
@@ -4034,29 +4064,29 @@ impl Renderer {
         if !self.debug_flags.contains(DebugFlags::TEXTURE_CACHE_DBG) {
             return;
         }
 
         let mut spacing = 16;
         let mut size = 512;
         let fb_width = framebuffer_size.width as i32;
         let num_layers: i32 = self.texture_resolver
-            .cache_texture_map
+            .texture_cache_map
             .iter()
             .map(|texture| texture.get_layer_count())
             .sum();
 
         if num_layers * (size + spacing) > fb_width {
             let factor = fb_width as f32 / (num_layers * (size + spacing)) as f32;
             size = (size as f32 * factor) as i32;
             spacing = (spacing as f32 * factor) as i32;
         }
 
         let mut i = 0;
-        for texture in &self.texture_resolver.cache_texture_map {
+        for texture in &self.texture_resolver.texture_cache_map {
             let y = spacing + if self.debug_flags.contains(DebugFlags::RENDER_TARGET_DBG) {
                 528
             } else {
                 0
             };
             let dimensions = texture.get_dimensions();
             let src_rect = DeviceIntRect::new(
                 DeviceIntPoint::zero(),
@@ -4225,17 +4255,17 @@ impl Renderer {
         unsafe { op(ptr as *const c_void) }
     }
 
     /// Collects a memory report.
     pub fn report_memory(&self) -> MemoryReport {
         let mut report = MemoryReport::default();
 
         // GPU cache CPU memory.
-        if let CacheBus::PixelBuffer{ref cpu_blocks, ..} = self.gpu_cache_texture.bus {
+        if let GpuCacheBus::PixelBuffer{ref cpu_blocks, ..} = self.gpu_cache_texture.bus {
             report.gpu_cache_cpu_mirror += self.size_of(cpu_blocks.as_ptr());
         }
 
         // GPU cache GPU memory.
         report.gpu_cache_textures += self.gpu_cache_texture.texture.size_in_bytes();
 
         // Render task CPU memory.
         for (_id, doc) in &self.active_documents {
@@ -4735,17 +4765,17 @@ impl Renderer {
                     "gpu", &config.root, &mut self.device,
                 ),
                 gpu_cache_frame_id: self.gpu_cache_frame_id,
                 textures: Vec::new(),
                 external_images: deferred_images,
             };
 
             info!("saving cached textures");
-            for texture in &self.texture_resolver.cache_texture_map {
+            for texture in &self.texture_resolver.texture_cache_map {
                 let file_name = format!("cache-{}", plain_self.textures.len() + 1);
                 info!("\t{}", file_name);
                 let plain = Self::save_texture(texture, &file_name, &config.root, &mut self.device);
                 plain_self.textures.push(plain);
             }
 
             config.serialize(&plain_self, "renderer");
         }
@@ -4789,49 +4819,49 @@ impl Renderer {
             let value = (CapturedExternalImageData::Buffer(data), plain_ext.uv);
             image_handler.data.insert((ext.id, ext.channel_index), value);
         }
 
         if let Some(renderer) = CaptureConfig::deserialize::<PlainRenderer, _>(&root, "renderer") {
             info!("loading cached textures");
             self.device.begin_frame();
 
-            for texture in self.texture_resolver.cache_texture_map.drain(..) {
+            for texture in self.texture_resolver.texture_cache_map.drain(..) {
                 self.device.delete_texture(texture);
             }
             for texture in renderer.textures {
                 info!("\t{}", texture.data);
                 let mut t = self.device.create_texture(TextureTarget::Array, texture.format);
                 Self::load_texture(&mut t, &texture, &root, &mut self.device);
-                self.texture_resolver.cache_texture_map.push(t);
+                self.texture_resolver.texture_cache_map.push(t);
             }
 
             info!("loading gpu cache");
             let gpu_cache_data = Self::load_texture(
                 &mut self.gpu_cache_texture.texture,
                 &renderer.gpu_cache,
                 &root,
                 &mut self.device,
             );
             match self.gpu_cache_texture.bus {
-                CacheBus::PixelBuffer { ref mut rows, ref mut cpu_blocks, .. } => {
+                GpuCacheBus::PixelBuffer { ref mut rows, ref mut cpu_blocks, .. } => {
                     let dim = self.gpu_cache_texture.texture.get_dimensions();
                     let blocks = unsafe {
                         slice::from_raw_parts(
                             gpu_cache_data.as_ptr() as *const GpuBlockData,
                             gpu_cache_data.len() / mem::size_of::<GpuBlockData>(),
                         )
                     };
                     // fill up the CPU cache from the contents we just loaded
                     rows.clear();
                     cpu_blocks.clear();
                     rows.extend((0 .. dim.height).map(|_| CacheRow::new()));
                     cpu_blocks.extend_from_slice(blocks);
                 }
-                CacheBus::Scatter { .. } => {}
+                GpuCacheBus::Scatter { .. } => {}
             }
             self.gpu_cache_frame_id = renderer.gpu_cache_frame_id;
 
             info!("loading external texture-backed images");
             let mut native_map = FastHashMap::<String, gl::GLuint>::default();
             for ExternalCaptureImage { short_path, external, descriptor } in renderer.external_images {
                 let target = match external.image_type {
                     ExternalImageType::TextureHandle(target) => target,
--- a/gfx/webrender/src/resource_cache.rs
+++ b/gfx/webrender/src/resource_cache.rs
@@ -23,17 +23,17 @@ use device::TextureFilter;
 use euclid::{point2, size2};
 use glyph_cache::GlyphCache;
 #[cfg(not(feature = "pathfinder"))]
 use glyph_cache::GlyphCacheEntry;
 use glyph_rasterizer::{FontInstance, GlyphFormat, GlyphKey, GlyphRasterizer};
 use gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle};
 use gpu_types::UvRectKind;
 use image::{compute_tile_range, for_each_tile_in_range};
-use internal_types::{FastHashMap, FastHashSet, SourceTexture, TextureUpdateList};
+use internal_types::{FastHashMap, FastHashSet, TextureSource, TextureUpdateList};
 use profiler::{ResourceProfileCounters, TextureCacheProfileCounters};
 use render_backend::FrameId;
 use render_task::{RenderTaskCache, RenderTaskCacheKey, RenderTaskId};
 use render_task::{RenderTaskCacheEntry, RenderTaskCacheEntryHandle, RenderTaskTree};
 use smallvec::SmallVec;
 use std::collections::hash_map::Entry::{self, Occupied, Vacant};
 use std::collections::hash_map::IterMut;
 use std::{cmp, mem};
@@ -62,26 +62,26 @@ pub struct GlyphFetchResult {
 // for this is that the texture may change
 // dimensions (e.g. the pages in a texture
 // atlas can grow). When this happens, by
 // storing the coordinates as texel values
 // we don't need to go through and update
 // various CPU-side structures.
 #[derive(Debug, Clone)]
 pub struct CacheItem {
-    pub texture_id: SourceTexture,
+    pub texture_id: TextureSource,
     pub uv_rect_handle: GpuCacheHandle,
     pub uv_rect: DeviceUintRect,
     pub texture_layer: i32,
 }
 
 impl CacheItem {
     pub fn invalid() -> Self {
         CacheItem {
-            texture_id: SourceTexture::Invalid,
+            texture_id: TextureSource::Invalid,
             uv_rect_handle: GpuCacheHandle::new(),
             uv_rect: DeviceUintRect::zero(),
             texture_layer: 0,
         }
     }
 }
 
 #[derive(Debug)]
@@ -1227,23 +1227,23 @@ impl ResourceCache {
     pub fn fetch_glyphs<F>(
         &self,
         mut font: FontInstance,
         glyph_keys: &[GlyphKey],
         fetch_buffer: &mut Vec<GlyphFetchResult>,
         gpu_cache: &mut GpuCache,
         mut f: F,
     ) where
-        F: FnMut(SourceTexture, GlyphFormat, &[GlyphFetchResult]),
+        F: FnMut(TextureSource, GlyphFormat, &[GlyphFetchResult]),
     {
         debug_assert_eq!(self.state, State::QueryResources);
 
         self.glyph_rasterizer.prepare_font(&mut font);
 
-        let mut current_texture_id = SourceTexture::Invalid;
+        let mut current_texture_id = TextureSource::Invalid;
         let mut current_glyph_format = GlyphFormat::Subpixel;
         debug_assert!(fetch_buffer.is_empty());
 
         for (loop_index, key) in glyph_keys.iter().enumerate() {
            let (cache_item, glyph_format) =
                 match self.glyph_rasterizer.get_cache_item_for_glyph(key,
                                                                      &font,
                                                                      &self.cached_glyphs,
@@ -1277,24 +1277,24 @@ impl ResourceCache {
     pub fn fetch_glyphs<F>(
         &self,
         mut font: FontInstance,
         glyph_keys: &[GlyphKey],
         fetch_buffer: &mut Vec<GlyphFetchResult>,
         gpu_cache: &mut GpuCache,
         mut f: F,
     ) where
-        F: FnMut(SourceTexture, GlyphFormat, &[GlyphFetchResult]),
+        F: FnMut(TextureSource, GlyphFormat, &[GlyphFetchResult]),
     {
         debug_assert_eq!(self.state, State::QueryResources);
 
         self.glyph_rasterizer.prepare_font(&mut font);
         let glyph_key_cache = self.cached_glyphs.get_glyph_key_cache_for_font(&font);
 
-        let mut current_texture_id = SourceTexture::Invalid;
+        let mut current_texture_id = TextureSource::Invalid;
         let mut current_glyph_format = GlyphFormat::Subpixel;
         debug_assert!(fetch_buffer.is_empty());
 
         for (loop_index, key) in glyph_keys.iter().enumerate() {
             let (cache_item, glyph_format) = match *glyph_key_cache.get(key) {
                 GlyphCacheEntry::Cached(ref glyph) => {
                     (self.texture_cache.get(&glyph.texture_cache_handle), glyph.format)
                 }
@@ -1621,16 +1621,20 @@ impl ResourceCache {
     pub fn clear_namespace(&mut self, namespace: IdNamespace) {
         self.resources
             .image_templates
             .images
             .retain(|key, _| key.0 != namespace);
         self.cached_images
             .clear_keys(|key| key.0 == namespace);
 
+        self.blob_image_templates.retain(|key, _| key.0 != namespace);
+
+        self.rasterized_blob_images.retain(|key, _| key.0 != namespace);
+
         self.resources.font_instances
             .write()
             .unwrap()
             .retain(|key, _| key.0 != namespace);
         for &key in self.resources.font_templates.keys().filter(|key| key.0 == namespace) {
             self.glyph_rasterizer.delete_font(key);
         }
         self.resources
--- a/gfx/webrender/src/scene_builder.rs
+++ b/gfx/webrender/src/scene_builder.rs
@@ -1,15 +1,15 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{AsyncBlobImageRasterizer, BlobImageRequest, BlobImageParams, BlobImageResult};
 use api::{DocumentId, PipelineId, ApiMsg, FrameMsg, ResourceUpdate, ExternalEvent, Epoch};
-use api::{BuiltDisplayList, ColorF, LayoutSize, NotificationRequest, Checkpoint};
+use api::{BuiltDisplayList, ColorF, LayoutSize, NotificationRequest, Checkpoint, IdNamespace};
 use api::channel::MsgSender;
 #[cfg(feature = "capture")]
 use capture::CaptureConfig;
 use frame_builder::{FrameBuilderConfig, FrameBuilder};
 use clip::{ClipDataInterner, ClipDataUpdateList};
 use clip_scroll_tree::ClipScrollTree;
 use display_list_flattener::DisplayListFlattener;
 use internal_types::{FastHashMap, FastHashSet};
@@ -117,31 +117,33 @@ pub struct BuiltScene {
 
 // Message from render backend to scene builder.
 pub enum SceneBuilderRequest {
     Transaction(Box<Transaction>),
     ExternalEvent(ExternalEvent),
     DeleteDocument(DocumentId),
     WakeUp,
     Flush(MsgSender<()>),
+    ClearNamespace(IdNamespace),
     SetFrameBuilderConfig(FrameBuilderConfig),
     SimulateLongSceneBuild(u32),
     SimulateLongLowPrioritySceneBuild(u32),
     Stop,
     #[cfg(feature = "capture")]
     SaveScene(CaptureConfig),
     #[cfg(feature = "replay")]
     LoadScenes(Vec<LoadScene>),
 }
 
 // Message from scene builder to render backend.
 pub enum SceneBuilderResult {
     Transaction(Box<BuiltTransaction>, Option<Sender<SceneSwapResult>>),
     ExternalEvent(ExternalEvent),
     FlushComplete(MsgSender<()>),
+    ClearNamespace(IdNamespace),
     Stopped,
 }
 
 // Message from render backend to scene builder to indicate the
 // scene swap was completed. We need a separate channel for this
 // so that they don't get mixed with SceneBuilderRequest messages.
 pub enum SceneSwapResult {
     Complete(Sender<()>),
@@ -216,50 +218,61 @@ impl SceneBuilder {
                 picture_id_generator: PictureIdGenerator::new(),
                 simulate_slow_ms: 0,
             },
             in_tx,
             out_rx,
         )
     }
 
+    /// Send a message to the render backend thread.
+    ///
+    /// We first put something in the result queue and then send a wake-up
+    /// message to the api queue that the render backend is blocking on.
+    pub fn send(&self, msg: SceneBuilderResult) {
+        self.tx.send(msg).unwrap();
+        let _ = self.api_tx.send(ApiMsg::WakeUp);
+    }
+
     /// The scene builder thread's event loop.
     pub fn run(&mut self) {
         if let Some(ref hooks) = self.hooks {
             hooks.register();
         }
 
         loop {
             match self.rx.recv() {
                 Ok(SceneBuilderRequest::WakeUp) => {}
                 Ok(SceneBuilderRequest::Flush(tx)) => {
-                    self.tx.send(SceneBuilderResult::FlushComplete(tx)).unwrap();
-                    let _ = self.api_tx.send(ApiMsg::WakeUp);
+                    self.send(SceneBuilderResult::FlushComplete(tx));
                 }
                 Ok(SceneBuilderRequest::Transaction(mut txn)) => {
                     let built_txn = self.process_transaction(&mut txn);
                     self.forward_built_transaction(built_txn);
                 }
                 Ok(SceneBuilderRequest::DeleteDocument(document_id)) => {
                     self.documents.remove(&document_id);
                 }
                 Ok(SceneBuilderRequest::SetFrameBuilderConfig(cfg)) => {
                     self.config = cfg;
                 }
+                Ok(SceneBuilderRequest::ClearNamespace(id)) => {
+                    self.documents.retain(|doc_id, _doc| doc_id.0 != id);
+                    self.send(SceneBuilderResult::ClearNamespace(id));
+                }
                 #[cfg(feature = "replay")]
                 Ok(SceneBuilderRequest::LoadScenes(msg)) => {
                     self.load_scenes(msg);
                 }
                 #[cfg(feature = "capture")]
                 Ok(SceneBuilderRequest::SaveScene(config)) => {
                     self.save_scene(config);
                 }
                 Ok(SceneBuilderRequest::ExternalEvent(evt)) => {
-                    self.tx.send(SceneBuilderResult::ExternalEvent(evt)).unwrap();
-                    self.api_tx.send(ApiMsg::WakeUp).unwrap();
+                    self.send(SceneBuilderResult::ExternalEvent(evt));
                 }
                 Ok(SceneBuilderRequest::Stop) => {
                     self.tx.send(SceneBuilderResult::Stopped).unwrap();
                     // We don't need to send a WakeUp to api_tx because we only
                     // get the Stop when the RenderBackend loop is exiting.
                     break;
                 }
                 Ok(SceneBuilderRequest::SimulateLongSceneBuild(time_ms)) => {
--- a/gfx/webrender/src/shade.rs
+++ b/gfx/webrender/src/shade.rs
@@ -377,51 +377,48 @@ fn create_prim_shader(
     if let Ok(ref program) = program {
         device.bind_shader_samplers(
             program,
             &[
                 ("sColor0", TextureSampler::Color0),
                 ("sColor1", TextureSampler::Color1),
                 ("sColor2", TextureSampler::Color2),
                 ("sDither", TextureSampler::Dither),
-                ("sCacheA8", TextureSampler::CacheA8),
-                ("sCacheRGBA8", TextureSampler::CacheRGBA8),
+                ("sPrevPassAlpha", TextureSampler::PrevPassAlpha),
+                ("sPrevPassColor", TextureSampler::PrevPassColor),
                 ("sTransformPalette", TextureSampler::TransformPalette),
                 ("sRenderTasks", TextureSampler::RenderTasks),
-                ("sResourceCache", TextureSampler::ResourceCache),
-                ("sSharedCacheA8", TextureSampler::SharedCacheA8),
+                ("sGpuCache", TextureSampler::GpuCache),
                 ("sPrimitiveHeadersF", TextureSampler::PrimitiveHeadersF),
                 ("sPrimitiveHeadersI", TextureSampler::PrimitiveHeadersI),
             ],
         );
     }
 
     program
 }
 
 fn create_clip_shader(name: &'static str, device: &mut Device) -> Result<Program, ShaderError> {
     let prefix = format!(
-        "#define WR_MAX_VERTEX_TEXTURE_WIDTH {}U\n
-        #define WR_FEATURE_TRANSFORM\n",
+        "#define WR_MAX_VERTEX_TEXTURE_WIDTH {}U\n",
         MAX_VERTEX_TEXTURE_WIDTH
     );
 
     debug!("ClipShader {}", name);
 
     let program = device.create_program(name, &prefix, &desc::CLIP);
 
     if let Ok(ref program) = program {
         device.bind_shader_samplers(
             program,
             &[
                 ("sColor0", TextureSampler::Color0),
                 ("sTransformPalette", TextureSampler::TransformPalette),
                 ("sRenderTasks", TextureSampler::RenderTasks),
-                ("sResourceCache", TextureSampler::ResourceCache),
-                ("sSharedCacheA8", TextureSampler::SharedCacheA8),
+                ("sGpuCache", TextureSampler::GpuCache),
                 ("sPrimitiveHeadersF", TextureSampler::PrimitiveHeadersF),
                 ("sPrimitiveHeadersI", TextureSampler::PrimitiveHeadersI),
             ],
         );
     }
 
     program
 }
--- a/gfx/webrender/src/spatial_node.rs
+++ b/gfx/webrender/src/spatial_node.rs
@@ -251,22 +251,30 @@ impl SpatialNode {
             SpatialNodeType::ReferenceFrame(ref mut info) => {
                 // Resolve the transform against any property bindings.
                 let source_transform = scene_properties.resolve_layout_transform(&info.source_transform);
                 info.resolved_transform =
                     LayoutFastTransform::with_vector(info.origin_in_parent_reference_frame)
                     .pre_mul(&source_transform.into())
                     .pre_mul(&info.source_perspective);
 
-                // The transformation for this viewport in world coordinates is the transformation for
-                // our parent reference frame, plus any accumulated scrolling offsets from nodes
-                // between our reference frame and this node. Finally, we also include
-                // whatever local transformation this reference frame provides.
+                // In order to compute a transformation to world coordinates, we need to apply the
+                // following transforms in order:
+                //   state.parent_accumulated_scroll_offset
+                //   info.source_perspective
+                //   info.source_transform
+                //   info.origin_in_parent_reference_frame
+                //   state.parent_reference_frame_transform
+                // The first one incorporates the scrolling effect of any scrollframes/sticky nodes
+                // between this reference frame and the parent reference frame. The middle three
+                // transforms (which are combined into info.resolved_transform) do the conversion
+                // into the parent reference frame's coordinate space, and then the last one
+                // applies the parent reference frame's transform to the world space.
                 let relative_transform = info.resolved_transform
-                    .post_translate(state.parent_accumulated_scroll_offset)
+                    .pre_translate(&state.parent_accumulated_scroll_offset)
                     .to_transform()
                     .with_destination::<LayoutPixel>();
                 self.world_viewport_transform =
                     state.parent_reference_frame_transform.pre_mul(&relative_transform.into());
                 self.world_content_transform = self.world_viewport_transform;
 
                 info.invertible = self.world_viewport_transform.is_invertible();
                 if !info.invertible {
--- a/gfx/webrender/src/texture_cache.rs
+++ b/gfx/webrender/src/texture_cache.rs
@@ -5,17 +5,17 @@
 use api::{DeviceUintPoint, DeviceUintRect, DeviceUintSize};
 use api::{ExternalImageType, ImageData, ImageFormat};
 use api::ImageDescriptor;
 use device::TextureFilter;
 use freelist::{FreeList, FreeListHandle, UpsertResult, WeakFreeListHandle};
 use gpu_cache::{GpuCache, GpuCacheHandle};
 use gpu_types::{ImageSource, UvRectKind};
 use internal_types::{CacheTextureId, FastHashMap, TextureUpdateList, TextureUpdateSource};
-use internal_types::{RenderTargetInfo, SourceTexture, TextureUpdate, TextureUpdateOp};
+use internal_types::{RenderTargetInfo, TextureSource, TextureUpdate, TextureUpdateOp};
 use profiler::{ResourceProfileCounter, TextureCacheProfileCounters};
 use render_backend::FrameId;
 use resource_cache::CacheItem;
 use std::cell::Cell;
 use std::cmp;
 use std::mem;
 use std::rc::Rc;
 
@@ -570,31 +570,31 @@ impl TextureCache {
                     EntryKind::Cache {
                         layer_index,
                         origin,
                         ..
                     } => (layer_index, origin),
                 };
                 CacheItem {
                     uv_rect_handle: entry.uv_rect_handle,
-                    texture_id: SourceTexture::TextureCache(entry.texture_id),
+                    texture_id: TextureSource::TextureCache(entry.texture_id),
                     uv_rect: DeviceUintRect::new(origin, entry.size),
                     texture_layer: layer_index as i32,
                 }
             }
             None => panic!("BUG: handle not requested earlier in frame"),
         }
     }
 
     // A more detailed version of get(). This allows access to the actual
     // device rect of the cache allocation.
     pub fn get_cache_location(
         &self,
         handle: &TextureCacheHandle,
-    ) -> (SourceTexture, i32, DeviceUintRect) {
+    ) -> (CacheTextureId, i32, DeviceUintRect) {
         let handle = handle
             .entry
             .as_ref()
             .expect("BUG: handle not requested earlier in frame");
 
         let entry = self.entries
             .get_opt(handle)
             .expect("BUG: was dropped from cache or not updated!");
@@ -604,17 +604,17 @@ impl TextureCache {
                 (0, DeviceUintPoint::zero())
             }
             EntryKind::Cache {
                 layer_index,
                 origin,
                 ..
             } => (layer_index, origin),
         };
-        (SourceTexture::TextureCache(entry.texture_id),
+        (entry.texture_id,
          layer_index as i32,
          DeviceUintRect::new(origin, entry.size))
     }
 
     pub fn mark_unused(&mut self, handle: &TextureCacheHandle) {
         if let Some(ref handle) = handle.entry {
             if let Some(entry) = self.entries.get_opt_mut(handle) {
                 // Set a very low last accessed frame to make it very likely that this entry
--- a/gfx/webrender/src/tiling.rs
+++ b/gfx/webrender/src/tiling.rs
@@ -9,17 +9,17 @@ use batch::{AlphaBatchBuilder, AlphaBatc
 use clip::ClipStore;
 use clip_scroll_tree::{ClipScrollTree};
 use device::{FrameId, Texture};
 #[cfg(feature = "pathfinder")]
 use euclid::{TypedPoint2D, TypedVector2D};
 use gpu_cache::{GpuCache};
 use gpu_types::{BorderInstance, BlurDirection, BlurInstance, PrimitiveHeaders, ScalingInstance};
 use gpu_types::{TransformData, TransformPalette};
-use internal_types::{FastHashMap, SavedTargetIndex, SourceTexture};
+use internal_types::{CacheTextureId, FastHashMap, SavedTargetIndex, TextureSource};
 #[cfg(feature = "pathfinder")]
 use pathfinder_partitioner::mesh::Mesh;
 use prim_store::{PrimitiveStore, DeferredResolve};
 use profiler::FrameProfileCounters;
 use render_backend::FrameResources;
 use render_task::{BlitSource, RenderTaskAddress, RenderTaskId, RenderTaskKind};
 use render_task::{BlurTask, ClearMode, GlyphTask, RenderTaskLocation, RenderTaskTree, ScalingTask};
 use resource_cache::ResourceCache;
@@ -27,16 +27,17 @@ use std::{cmp, usize, f32, i32, mem};
 use texture_allocator::GuillotineAllocator;
 #[cfg(feature = "pathfinder")]
 use webrender_api::{DevicePixel, FontRenderMode};
 
 const MIN_TARGET_SIZE: u32 = 2048;
 const STYLE_SOLID: i32 = ((BorderStyle::Solid as i32) << 8) | ((BorderStyle::Solid as i32) << 16);
 const STYLE_MASK: i32 = 0x00FF_FF00;
 
+/// Identifies a given `RenderTarget` in a `RenderTargetList`.
 #[derive(Debug, Copy, Clone)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct RenderTargetIndex(pub usize);
 
 pub struct RenderTargetContext<'a, 'rc> {
     pub device_pixel_scale: DevicePixelScale,
     pub prim_store: &'a PrimitiveStore,
@@ -81,84 +82,134 @@ impl TextureAllocator {
             let rect = DeviceIntRect::new(origin, size);
             self.used_rect = rect.union(&self.used_rect);
         }
 
         origin
     }
 }
 
+/// Represents a number of rendering operations on a surface.
+///
+/// In graphics parlance, a "render target" usually means "a surface (texture or
+/// framebuffer) bound to the output of a shader". This trait has a slightly
+/// different meaning, in that it represents the operations on that surface
+/// _before_ it's actually bound and rendered. So a `RenderTarget` is built by
+/// the `RenderBackend` by inserting tasks, and then shipped over to the
+/// `Renderer` where a device surface is resolved and the tasks are transformed
+/// into draw commands on that surface.
+///
+/// We express this as a trait to generalize over color and alpha surfaces.
+/// a given `RenderTask` will draw to one or the other, depending on its type
+/// and sometimes on its parameters. See `RenderTask::target_kind`.
 pub trait RenderTarget {
+    /// Creates a new RenderTarget of the given type.
     fn new(
         size: Option<DeviceUintSize>,
         screen_size: DeviceIntSize,
     ) -> Self;
+
+    /// Allocates a region of the given size in this target, and returns either
+    /// the offset of that region or `None` if it won't fit.
+    ///
+    /// If a non-`None` result is returned, that value is generally stored in
+    /// a task which is then added to this target via `add_task()`.
     fn allocate(&mut self, size: DeviceUintSize) -> Option<DeviceUintPoint>;
+
+    /// Optional hook to provide additional processing for the target at the
+    /// end of the build phase.
     fn build(
         &mut self,
         _ctx: &mut RenderTargetContext,
         _gpu_cache: &mut GpuCache,
         _render_tasks: &mut RenderTaskTree,
         _deferred_resolves: &mut Vec<DeferredResolve>,
         _prim_headers: &mut PrimitiveHeaders,
         _transforms: &mut TransformPalette,
     ) {
     }
-    // TODO(gw): It's a bit odd that we need the deferred resolves and mutable
-    //           GPU cache here. They are typically used by the build step
-    //           above. They are used for the blit jobs to allow resolve_image
-    //           to be called. It's a bit of extra overhead to store the image
-    //           key here and the resolve them in the build step separately.
-    //           BUT: if/when we add more texture cache target jobs, we might
-    //           want to tidy this up.
+
+    /// Associates a `RenderTask` with this target. That task must be assigned
+    /// to a region returned by invoking `allocate()` on this target.
+    ///
+    /// TODO(gw): It's a bit odd that we need the deferred resolves and mutable
+    /// GPU cache here. They are typically used by the build step above. They
+    /// are used for the blit jobs to allow resolve_image to be called. It's a
+    /// bit of extra overhead to store the image key here and the resolve them
+    /// in the build step separately.  BUT: if/when we add more texture cache
+    /// target jobs, we might want to tidy this up.
     fn add_task(
         &mut self,
         task_id: RenderTaskId,
         ctx: &RenderTargetContext,
         gpu_cache: &mut GpuCache,
         render_tasks: &RenderTaskTree,
         clip_store: &ClipStore,
         transforms: &mut TransformPalette,
         deferred_resolves: &mut Vec<DeferredResolve>,
     );
     fn used_rect(&self) -> DeviceIntRect;
     fn needs_depth(&self) -> bool;
 }
 
+/// A tag used to identify the output format of a `RenderTarget`.
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub enum RenderTargetKind {
-    Color, // RGBA32
+    Color, // RGBA8
     Alpha, // R8
 }
 
+/// A series of `RenderTarget` instances, serving as the high-level container
+/// into which `RenderTasks` are assigned.
+///
+/// During the build phase, we iterate over the tasks in each `RenderPass`. For
+/// each task, we invoke `allocate()` on the `RenderTargetList`, which in turn
+/// attempts to allocate an output region in the last `RenderTarget` in the
+/// list. If allocation fails (or if the list is empty), a new `RenderTarget` is
+/// created and appended to the list. The build phase then assign the task into
+/// the target associated with the final allocation.
+///
+/// The result is that each `RenderPass` is associated with one or two
+/// `RenderTargetLists`, depending on whether we have all our tasks have the
+/// same `RenderTargetKind`. The lists are then shipped to the `Renderer`, which
+/// allocates a device texture array, with one slice per render target in the
+/// list.
+///
+/// The upshot of this scheme is that it maximizes batching. In a given pass,
+/// we need to do a separate batch for each individual render target. But with
+/// the texture array, we can expose the entirety of the previous pass to each
+/// task in the current pass in a single batch, which generally allows each
+/// task to be drawn in a single batch regardless of how many results from the
+/// previous pass it depends on.
+///
+/// Note that in some cases (like drop-shadows), we can depend on the output of
+/// a pass earlier than the immediately-preceding pass. See `SavedTargetIndex`.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct RenderTargetList<T> {
     screen_size: DeviceIntSize,
     pub format: ImageFormat,
     pub max_size: DeviceUintSize,
     pub targets: Vec<T>,
     pub saved_index: Option<SavedTargetIndex>,
-    pub is_shared: bool,
 }
 
 impl<T: RenderTarget> RenderTargetList<T> {
     fn new(
         screen_size: DeviceIntSize,
         format: ImageFormat,
     ) -> Self {
         RenderTargetList {
             screen_size,
             format,
             max_size: DeviceUintSize::new(MIN_TARGET_SIZE, MIN_TARGET_SIZE),
             targets: Vec::new(),
             saved_index: None,
-            is_shared: false,
         }
     }
 
     fn build(
         &mut self,
         ctx: &mut RenderTargetContext,
         gpu_cache: &mut GpuCache,
         render_tasks: &mut RenderTaskTree,
@@ -251,17 +302,17 @@ pub struct FrameOutput {
     pub task_id: RenderTaskId,
     pub pipeline_id: PipelineId,
 }
 
 // Defines where the source data for a blit job can be found.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub enum BlitJobSource {
-    Texture(SourceTexture, i32, DeviceIntRect),
+    Texture(TextureSource, i32, DeviceIntRect),
     RenderTask(RenderTaskId),
 }
 
 // Information required to do a blit from a source to a target.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct BlitJob {
     pub source: BlitJobSource,
@@ -280,17 +331,20 @@ pub struct GlyphJob {
     pub embolden_amount: TypedVector2D<f32, DevicePixel>,
 }
 
 #[cfg(not(feature = "pathfinder"))]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct GlyphJob;
 
-/// A render target represents a number of rendering operations on a surface.
+/// Contains the work (in the form of instance arrays) needed to fill a color
+/// color output surface (RGBA8).
+///
+/// See `RenderTarget`.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct ColorRenderTarget {
     pub alpha_batch_containers: Vec<AlphaBatchContainer>,
     // List of blur operations to apply for this render target.
     pub vertical_blurs: Vec<BlurInstance>,
     pub horizontal_blurs: Vec<BlurInstance>,
     pub readbacks: Vec<DeviceIntRect>,
@@ -495,16 +549,20 @@ impl RenderTarget for ColorRenderTarget 
 
     fn needs_depth(&self) -> bool {
         self.alpha_batch_containers.iter().any(|ab| {
             !ab.opaque_batches.is_empty()
         })
     }
 }
 
+/// Contains the work (in the form of instance arrays) needed to fill an alpha
+/// output surface (R8).
+///
+/// See `RenderTarget`.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct AlphaRenderTarget {
     pub clip_batcher: ClipBatcher,
     // List of blur operations to apply for this render target.
     pub vertical_blurs: Vec<BlurInstance>,
     pub horizontal_blurs: Vec<BlurInstance>,
     pub scalings: Vec<ScalingInstance>,
@@ -722,59 +780,71 @@ impl TextureCacheRenderTarget {
             embolden_amount: task_info.embolden_amount,
         })
     }
 
     #[cfg(not(feature = "pathfinder"))]
     fn add_glyph_task(&mut self, _: &mut GlyphTask, _: DeviceIntRect) {}
 }
 
+/// Contains the set of `RenderTarget`s specific to the kind of pass.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub enum RenderPassKind {
+    /// The final pass to the main frame buffer, where we have a single color
+    /// target for display to the user.
     MainFramebuffer(ColorRenderTarget),
+    /// An intermediate pass, where we may have multiple targets.
     OffScreen {
         alpha: RenderTargetList<AlphaRenderTarget>,
         color: RenderTargetList<ColorRenderTarget>,
-        texture_cache: FastHashMap<(SourceTexture, i32), TextureCacheRenderTarget>,
+        texture_cache: FastHashMap<(CacheTextureId, i32), TextureCacheRenderTarget>,
     },
 }
 
 /// A render pass represents a set of rendering operations that don't depend on one
 /// another.
 ///
 /// A render pass can have several render targets if there wasn't enough space in one
-/// target to do all of the rendering for that pass.
+/// target to do all of the rendering for that pass. See `RenderTargetList`.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct RenderPass {
+    /// The kind of pass, as well as the set of targets associated with that
+    /// kind of pass.
     pub kind: RenderPassKind,
+    /// The set of tasks to be performed in this pass, as indices into the
+    /// `RenderTaskTree`.
     tasks: Vec<RenderTaskId>,
 }
 
 impl RenderPass {
+    /// Creates a pass for the main framebuffer. There is only one of these, and
+    /// it is always the last pass.
     pub fn new_main_framebuffer(screen_size: DeviceIntSize) -> Self {
         let target = ColorRenderTarget::new(None, screen_size);
         RenderPass {
             kind: RenderPassKind::MainFramebuffer(target),
             tasks: vec![],
         }
     }
 
+    /// Creates an intermediate off-screen pass.
     pub fn new_off_screen(screen_size: DeviceIntSize) -> Self {
         RenderPass {
             kind: RenderPassKind::OffScreen {
                 color: RenderTargetList::new(screen_size, ImageFormat::BGRA8),
                 alpha: RenderTargetList::new(screen_size, ImageFormat::R8),
                 texture_cache: FastHashMap::default(),
             },
             tasks: vec![],
         }
     }
 
+    /// Adds a task to this pass.
     pub fn add_render_task(
         &mut self,
         task_id: RenderTaskId,
         size: DeviceIntSize,
         target_kind: RenderTargetKind,
     ) {
         if let RenderPassKind::OffScreen { ref mut color, ref mut alpha, .. } = self.kind {
             let max_size = match target_kind {
@@ -783,16 +853,21 @@ impl RenderPass {
             };
             max_size.width = cmp::max(max_size.width, size.width as u32);
             max_size.height = cmp::max(max_size.height, size.height as u32);
         }
 
         self.tasks.push(task_id);
     }
 
+    /// Processes this pass to prepare it for rendering.
+    ///
+    /// Among other things, this allocates output regions for each of our tasks
+    /// (added via `add_render_task`) in a RenderTarget and assigns it into that
+    /// target.
     pub fn build(
         &mut self,
         ctx: &mut RenderTargetContext,
         gpu_cache: &mut GpuCache,
         render_tasks: &mut RenderTaskTree,
         deferred_resolves: &mut Vec<DeferredResolve>,
         clip_store: &ClipStore,
         transforms: &mut TransformPalette,
@@ -819,21 +894,16 @@ impl RenderPass {
                     gpu_cache,
                     render_tasks,
                     deferred_resolves,
                     prim_headers,
                     transforms,
                 );
             }
             RenderPassKind::OffScreen { ref mut color, ref mut alpha, ref mut texture_cache } => {
-                let is_shared_alpha = self.tasks.iter().any(|&task_id| {
-                    let task = &render_tasks[task_id];
-                    task.is_shared() &&
-                        task.target_kind() == RenderTargetKind::Alpha
-                });
                 let saved_color = if self.tasks.iter().any(|&task_id| {
                     let t = &render_tasks[task_id];
                     t.target_kind() == RenderTargetKind::Color && t.saved_index.is_some()
                 }) {
                     Some(render_tasks.save_target())
                 } else {
                     None
                 };
@@ -935,17 +1005,16 @@ impl RenderPass {
                     ctx,
                     gpu_cache,
                     render_tasks,
                     deferred_resolves,
                     saved_alpha,
                     prim_headers,
                     transforms,
                 );
-                alpha.is_shared = is_shared_alpha;
             }
         }
     }
 }
 
 #[derive(Debug, Clone, Default)]
 pub struct CompositeOps {
     // Requires only a single texture as input (e.g. most filters)
--- a/gfx/webrender/src/util.rs
+++ b/gfx/webrender/src/util.rs
@@ -535,16 +535,17 @@ impl<Src, Dst> FastTransform<Src, Dst> {
             FastTransform::Transform { inverse: Some(ref inverse), is_2d: true, .. }  =>
                 inverse.transform_rect(rect),
             FastTransform::Transform { ref transform, is_2d: false, .. } =>
                 transform.inverse_rect_footprint(rect),
             FastTransform::Transform { inverse: None, .. }  => None,
         }
     }
 
+    #[allow(dead_code)]
     pub fn post_translate(&self, new_offset: TypedVector2D<f32, Dst>) -> Self {
         match *self {
             FastTransform::Offset(offset) => {
                 let offset = offset.to_untyped() + new_offset.to_untyped();
                 FastTransform::Offset(TypedVector2D::from_untyped(&offset))
             }
             FastTransform::Transform { ref transform, .. } => {
                 let transform = transform.post_translate(new_offset.to_3d());
--- a/gfx/webrender_api/src/api.rs
+++ b/gfx/webrender_api/src/api.rs
@@ -6,17 +6,16 @@ extern crate serde_bytes;
 
 use app_units::Au;
 use channel::{self, MsgSender, Payload, PayloadSender, PayloadSenderHelperMethods};
 use std::cell::Cell;
 use std::fmt;
 use std::marker::PhantomData;
 use std::os::raw::c_void;
 use std::path::PathBuf;
-use std::sync::Arc;
 use std::u32;
 use {BuiltDisplayList, BuiltDisplayListDescriptor, ColorF, DeviceIntPoint, DeviceUintRect};
 use {DeviceUintSize, ExternalScrollId, FontInstanceKey, FontInstanceOptions};
 use {FontInstancePlatformOptions, FontKey, FontVariation, GlyphDimensions, GlyphIndex, ImageData};
 use {ImageDescriptor, ImageKey, ItemTag, LayoutPoint, LayoutSize, LayoutTransform, LayoutVector2D};
 use {NativeFontHandle, WorldPoint};
 
 pub type TileSize = u16;
@@ -1264,39 +1263,52 @@ pub enum Checkpoint {
     /// notified. This provides the guarantee that if a request is created it will get notified.
     TransactionDropped,
 }
 
 pub trait NotificationHandler : Send + Sync {
     fn notify(&self, when: Checkpoint);
 }
 
-#[derive(Clone)]
 pub struct NotificationRequest {
-    handler: Arc<NotificationHandler>,
+    handler: Option<Box<NotificationHandler>>,
     when: Checkpoint,
-    done: bool,
 }
 
 impl NotificationRequest {
-    pub fn new(when: Checkpoint, handler: Arc<NotificationHandler>) -> Self {
+    pub fn new(when: Checkpoint, handler: Box<NotificationHandler>) -> Self {
         NotificationRequest {
-            handler,
+            handler: Some(handler),
             when,
-            done: false,
         }
     }
 
     pub fn when(&self) -> Checkpoint { self.when }
 
     pub fn notify(mut self) {
-        self.handler.notify(self.when);
-        self.done = true;
+        if let Some(handler) = self.handler.take() {
+            handler.notify(self.when);
+        }
     }
 }
 
 impl Drop for NotificationRequest {
     fn drop(&mut self) {
-        if !self.done {
-            self.handler.notify(Checkpoint::TransactionDropped);
+        if let Some(ref mut handler) = self.handler {
+            handler.notify(Checkpoint::TransactionDropped);
         }
     }
 }
+
+// This Clone impl yields an "empty" request because we don't want the requests
+// to be notified twice so the request is owned by only one of the API messages
+// (the original one) after the clone.
+// This works in practice because the notifications requests are used for
+// synchronization so we don't need to include them in the recording mechanism
+// in wrench that clones the messages.
+impl Clone for NotificationRequest {
+    fn clone(&self) -> Self {
+        NotificationRequest {
+            when: self.when,
+            handler: None,
+        }
+    }
+}
--- a/gfx/webrender_api/src/image.rs
+++ b/gfx/webrender_api/src/image.rs
@@ -122,34 +122,38 @@ impl ImageFormat {
 #[derive(Clone, Copy, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]
 pub enum ColorDepth {
     /// 8 bits image (most common)
     Color8,
     /// 10 bits image
     Color10,
     /// 12 bits image
     Color12,
+    /// 16 bits image
+    Color16,
 }
 
 impl ColorDepth {
     /// Return the numerical bit depth value for the type.
     pub fn bit_depth(self) -> u32 {
         match self {
             ColorDepth::Color8 => 8,
             ColorDepth::Color10 => 10,
             ColorDepth::Color12 => 12,
+            ColorDepth::Color16 => 16,
         }
     }
     /// 10 and 12 bits images are encoded using 16 bits integer, we need to
     /// rescale the 10 or 12 bits value to extend to 16 bits.
     pub fn rescaling_factor(self) -> f32 {
         match self {
             ColorDepth::Color8 => 1.0,
             ColorDepth::Color10 => 64.0,
             ColorDepth::Color12 => 16.0,
+            ColorDepth::Color16 => 1.0,
         }
     }
 }
 
 /// Metadata (but not storage) describing an image In WebRender.
 #[derive(Copy, Clone, Debug, Deserialize, PartialEq, Serialize)]
 pub struct ImageDescriptor {
     /// Format of the image data.
--- a/gfx/webrender_api/src/lib.rs
+++ b/gfx/webrender_api/src/lib.rs
@@ -1,12 +1,21 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+//! The `webrender_api` crate contains an assortment types and functions used
+//! by WebRender consumers as well as, in many cases, WebRender itself.
+//!
+//! This separation allows Servo to parallelize compilation across `webrender`
+//! and other crates that depend on `webrender_api`. So in practice, we put
+//! things in this crate when Servo needs to use them. Firefox depends on the
+//! `webrender` crate directly, and so this distinction is not really relevant
+//! there.
+
 #![cfg_attr(feature = "nightly", feature(nonzero))]
 #![cfg_attr(feature = "cargo-clippy", allow(float_cmp, too_many_arguments, unreadable_literal))]
 
 extern crate app_units;
 extern crate bincode;
 #[macro_use]
 extern crate bitflags;
 extern crate byteorder;
--- a/gfx/webrender_bindings/revision.txt
+++ b/gfx/webrender_bindings/revision.txt
@@ -1,1 +1,1 @@
-d7a6d081384ce0da9dd359b0cf4b9f758aab1b67
+9536249e3ed920a920346f6cc0a79473cad16099