Bug 1373381 - Update webrender to cset 1d6348023a4a4fdd89dce038640c5da906005acc. r=jrmuizel
authorKartikaya Gupta <kgupta@mozilla.com>
Tue, 20 Jun 2017 09:33:07 -0400
changeset 365081 9d23ec0ef50a71aa635cfa3b9652ae6924bf495f
parent 365080 73edd4e1acef4e708346600b81a337022e450475
child 365082 41de8a2dbb94c994682c467f42fc36d70d1412b1
push id91680
push userkwierso@gmail.com
push dateWed, 21 Jun 2017 01:32:01 +0000
treeherdermozilla-inbound@f7b9dc31956c [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjrmuizel
bugs1373381
milestone56.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1373381 - Update webrender to cset 1d6348023a4a4fdd89dce038640c5da906005acc. r=jrmuizel MozReview-Commit-ID: 3ET9po9ee6l
gfx/doc/README.webrender
gfx/webrender/Cargo.toml
gfx/webrender/examples/blob.rs
gfx/webrender/examples/nested_display_list.rs
gfx/webrender/res/clip_shared.glsl
gfx/webrender/res/cs_clip_image.vs.glsl
gfx/webrender/res/cs_text_run.vs.glsl
gfx/webrender/res/prim_shared.glsl
gfx/webrender/res/ps_angle_gradient.vs.glsl
gfx/webrender/res/ps_border_corner.vs.glsl
gfx/webrender/res/ps_border_edge.vs.glsl
gfx/webrender/res/ps_box_shadow.vs.glsl
gfx/webrender/res/ps_cache_image.vs.glsl
gfx/webrender/res/ps_gradient.vs.glsl
gfx/webrender/res/ps_image.vs.glsl
gfx/webrender/res/ps_radial_gradient.vs.glsl
gfx/webrender/res/ps_rectangle.vs.glsl
gfx/webrender/res/ps_text_run.vs.glsl
gfx/webrender/res/ps_yuv_image.vs.glsl
gfx/webrender/src/device.rs
gfx/webrender/src/frame.rs
gfx/webrender/src/frame_builder.rs
gfx/webrender/src/freelist.rs
gfx/webrender/src/glyph_rasterizer.rs
gfx/webrender/src/gpu_cache.rs
gfx/webrender/src/gpu_store.rs
gfx/webrender/src/internal_types.rs
gfx/webrender/src/mask_cache.rs
gfx/webrender/src/platform/macos/font.rs
gfx/webrender/src/platform/unix/font.rs
gfx/webrender/src/platform/windows/font.rs
gfx/webrender/src/prim_store.rs
gfx/webrender/src/render_backend.rs
gfx/webrender/src/renderer.rs
gfx/webrender/src/resource_cache.rs
gfx/webrender/src/texture_cache.rs
gfx/webrender/src/tiling.rs
gfx/webrender_traits/Cargo.toml
gfx/webrender_traits/src/channel_ipc.rs
gfx/webrender_traits/src/channel_mpsc.rs
gfx/webrender_traits/src/display_item.rs
gfx/webrender_traits/src/display_list.rs
gfx/webrender_traits/src/font.rs
gfx/webrender_traits/src/image.rs
gfx/webrender_traits/src/lib.rs
gfx/webrender_traits/src/webgl.rs
--- a/gfx/doc/README.webrender
+++ b/gfx/doc/README.webrender
@@ -74,9 +74,9 @@ there is another crate in m-c called moz
 the same folder to store its rust dependencies. If one of the libraries that is
 required by both mozjs_sys and webrender is updated without updating the other
 project's Cargo.lock file, that results in build bustage.
 This means that any time you do this sort of manual update of packages, you need
 to make sure that mozjs_sys also has its Cargo.lock file updated if needed, hence
 the need to run the cargo update command in js/src as well. Hopefully this will
 be resolved soon.
 
-Latest Commit: 6752684fcc7402b0a5480e0b9f73152b2f9ed1e5
+Latest Commit: 1d6348023a4a4fdd89dce038640c5da906005acc
--- a/gfx/webrender/Cargo.toml
+++ b/gfx/webrender/Cargo.toml
@@ -1,48 +1,48 @@
 [package]
 name = "webrender"
-version = "0.40.0"
+version = "0.43.0"
 authors = ["Glenn Watson <gw@intuitionlibrary.com>"]
 license = "MPL-2.0"
 repository = "https://github.com/servo/webrender"
 build = "build.rs"
 
 [features]
 default = ["freetype-lib", "webgl"]
 freetype-lib = ["freetype/servo-freetype-sys"]
 profiler = ["thread_profiler/thread_profiler"]
 webgl = ["offscreen_gl_context", "webrender_traits/webgl"]
 
 [dependencies]
-app_units = "0.4"
-bincode = "1.0.0-alpha6"
+app_units = "0.5"
+bincode = "0.8"
 bit-set = "0.4"
 byteorder = "1.0"
-euclid = "0.14.4"
+euclid = "0.15"
 fnv = "1.0"
 gleam = "0.4.3"
 lazy_static = "0.2"
 log = "0.3"
 num-traits = "0.1.32"
-offscreen_gl_context = {version = "0.9.0", features = ["serde", "osmesa"], optional = true}
+offscreen_gl_context = {version = "0.11", features = ["serde", "osmesa"], optional = true}
 time = "0.1"
 rayon = "0.8"
 webrender_traits = {path = "../webrender_traits"}
 bitflags = "0.7"
 gamma-lut = "0.2"
 thread_profiler = "0.1.1"
-plane-split = "0.5"
+plane-split = "0.6"
 
 [dev-dependencies]
 angle = {git = "https://github.com/servo/angle", branch = "servo"}
 rand = "0.3"                # for the benchmarks
-servo-glutin = "0.10.1"     # for the example apps
+servo-glutin = "0.11"     # for the example apps
 
 [target.'cfg(any(target_os = "android", all(unix, not(target_os = "macos"))))'.dependencies]
 freetype = { version = "0.2", default-features = false }
 
 [target.'cfg(target_os = "windows")'.dependencies]
-dwrote = "0.3"
+dwrote = "0.4"
 
 [target.'cfg(target_os = "macos")'.dependencies]
-core-graphics = "0.7.0"
-core-text = "4.0"
+core-graphics = "0.8.0"
+core-text = "5.0"
--- a/gfx/webrender/examples/blob.rs
+++ b/gfx/webrender/examples/blob.rs
@@ -72,17 +72,17 @@ fn render_blob(
             let y2 = y + descriptor.offset.y as u32;
 
             // Render a simple checkerboard pattern
             let checker = if (x2 % 20 >= 10) != (y2 % 20 >= 10) { 1 } else { 0 };
             // ..nested in the per-tile cherkerboard pattern
             let tc = if tile_checker { 0 } else { (1 - checker) * 40 };
 
             match descriptor.format {
-                wt::ImageFormat::RGBA8 => {
+                wt::ImageFormat::BGRA8 => {
                     texels.push(color.b * checker + tc);
                     texels.push(color.g * checker + tc);
                     texels.push(color.r * checker + tc);
                     texels.push(color.a * checker + tc);
                 }
                 wt::ImageFormat::A8 => {
                     texels.push(color.a * checker + tc);
                 }
@@ -216,25 +216,25 @@ impl wt::BlobImageRenderer for Checkerbo
 fn body(api: &wt::RenderApi,
         builder: &mut wt::DisplayListBuilder,
         _pipeline_id: &wt::PipelineId,
         layout_size: &wt::LayoutSize)
 {
     let blob_img1 = api.generate_image_key();
     api.add_image(
         blob_img1,
-        wt::ImageDescriptor::new(500, 500, wt::ImageFormat::RGBA8, true),
+        wt::ImageDescriptor::new(500, 500, wt::ImageFormat::BGRA8, true),
         wt::ImageData::new_blob_image(serialize_blob(wt::ColorU::new(50, 50, 150, 255))),
         Some(128),
     );
 
     let blob_img2 = api.generate_image_key();
     api.add_image(
         blob_img2,
-        wt::ImageDescriptor::new(200, 200, wt::ImageFormat::RGBA8, true),
+        wt::ImageDescriptor::new(200, 200, wt::ImageFormat::BGRA8, true),
         wt::ImageData::new_blob_image(serialize_blob(wt::ColorU::new(50, 150, 50, 255))),
         None,
     );
 
     let bounds = wt::LayoutRect::new(wt::LayoutPoint::zero(), *layout_size);
     builder.push_stacking_context(wt::ScrollPolicy::Scrollable,
                                   bounds,
                                   None,
new file mode 100644
--- /dev/null
+++ b/gfx/webrender/examples/nested_display_list.rs
@@ -0,0 +1,132 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+extern crate gleam;
+extern crate glutin;
+extern crate webrender;
+extern crate webrender_traits;
+
+#[macro_use]
+extern crate lazy_static;
+
+#[path="common/boilerplate.rs"]
+mod boilerplate;
+
+use boilerplate::HandyDandyRectBuilder;
+use std::sync::Mutex;
+use webrender_traits::*;
+
+fn body(_api: &RenderApi,
+        builder: &mut DisplayListBuilder,
+        pipeline_id: &PipelineId,
+        layout_size: &LayoutSize)
+{
+    let bounds = LayoutRect::new(LayoutPoint::zero(), *layout_size);
+    builder.push_stacking_context(webrender_traits::ScrollPolicy::Scrollable,
+                                  bounds,
+                                  None,
+                                  TransformStyle::Flat,
+                                  None,
+                                  webrender_traits::MixBlendMode::Normal,
+                                  Vec::new());
+
+    let outer_scroll_frame_rect = (100, 100).to(600, 400);
+    let token = builder.push_clip_region(&outer_scroll_frame_rect, vec![], None);
+    builder.push_rect(outer_scroll_frame_rect,
+                      token, ColorF::new(1.0, 1.0, 1.0, 1.0));
+    let token = builder.push_clip_region(&outer_scroll_frame_rect, vec![], None);
+    let nested_clip_id = builder.define_clip((100, 100).to(1000, 1000), token, None);
+    builder.push_clip_id(nested_clip_id);
+
+    let mut builder2 = webrender_traits::DisplayListBuilder::new(*pipeline_id, *layout_size);
+    let mut builder3 = webrender_traits::DisplayListBuilder::new(*pipeline_id, *layout_size);
+
+    let rect = (110, 110).to(210, 210);
+    let token = builder3.push_clip_region(&rect, vec![], None);
+    builder3.push_rect(rect, token, ColorF::new(0.0, 1.0, 0.0, 1.0));
+
+    // A fixed position rectangle should be fixed to the reference frame that starts
+    // in the outer display list.
+    builder3.push_stacking_context(webrender_traits::ScrollPolicy::Fixed,
+                                  (220, 110).to(320, 210),
+                                  None,
+                                  TransformStyle::Flat,
+                                  None,
+                                  webrender_traits::MixBlendMode::Normal,
+                                  Vec::new());
+    let rect = (0, 0).to(100, 100);
+    let token = builder3.push_clip_region(&rect, vec![], None);
+    builder3.push_rect(rect, token, ColorF::new(0.0, 1.0, 0.0, 1.0));
+    builder3.pop_stacking_context();
+
+    // Now we push an inner scroll frame that should have the same id as the outer one,
+    // but the WebRender nested display list replacement code should convert it into
+    // a unique ClipId.
+    let inner_scroll_frame_rect = (330, 110).to(530, 360);
+    let token = builder3.push_clip_region(&inner_scroll_frame_rect, vec![], None);
+    builder3.push_rect(inner_scroll_frame_rect, token, ColorF::new(1.0, 0.0, 1.0, 0.5));
+    let token = builder3.push_clip_region(&inner_scroll_frame_rect, vec![], None);
+    let inner_nested_clip_id = builder3.define_clip((330, 110).to(2000, 2000), token, None);
+    builder3.push_clip_id(inner_nested_clip_id);
+    let rect = (340, 120).to(440, 220);
+    let token = builder3.push_clip_region(&rect, vec![], None);
+    builder3.push_rect(rect, token, ColorF::new(0.0, 1.0, 0.0, 1.0));
+    builder3.pop_clip_id();
+
+    let (_, _, built_list) = builder3.finalize();
+    builder2.push_nested_display_list(&built_list);
+    let (_, _, built_list) = builder2.finalize();
+    builder.push_nested_display_list(&built_list);
+
+    builder.pop_clip_id();
+
+    builder.pop_stacking_context();
+}
+
+lazy_static! {
+    static ref CURSOR_POSITION: Mutex<WorldPoint> = Mutex::new(WorldPoint::zero());
+}
+
+fn event_handler(event: &glutin::Event,
+                 api: &RenderApi)
+{
+    match *event {
+        glutin::Event::KeyboardInput(glutin::ElementState::Pressed, _, Some(key)) => {
+            let offset = match key {
+                 glutin::VirtualKeyCode::Down => (0.0, -10.0),
+                 glutin::VirtualKeyCode::Up => (0.0, 10.0),
+                 glutin::VirtualKeyCode::Right => (-10.0, 0.0),
+                 glutin::VirtualKeyCode::Left => (10.0, 0.0),
+                 _ => return,
+            };
+
+            api.scroll(ScrollLocation::Delta(LayoutVector2D::new(offset.0, offset.1)),
+                       *CURSOR_POSITION.lock().unwrap(),
+                       ScrollEventPhase::Start);
+        }
+        glutin::Event::MouseMoved(x, y) => {
+            *CURSOR_POSITION.lock().unwrap() = WorldPoint::new(x as f32, y as f32);
+        }
+        glutin::Event::MouseWheel(delta, _, event_cursor_position) => {
+            if let Some((x, y)) = event_cursor_position {
+                *CURSOR_POSITION.lock().unwrap() = WorldPoint::new(x as f32, y as f32);
+            }
+
+            const LINE_HEIGHT: f32 = 38.0;
+            let (dx, dy) = match delta {
+                glutin::MouseScrollDelta::LineDelta(dx, dy) => (dx, dy * LINE_HEIGHT),
+                glutin::MouseScrollDelta::PixelDelta(dx, dy) => (dx, dy),
+            };
+
+            api.scroll(ScrollLocation::Delta(LayoutVector2D::new(dx, dy)),
+                       *CURSOR_POSITION.lock().unwrap(),
+                       ScrollEventPhase::Start);
+        }
+        _ => ()
+    }
+}
+
+fn main() {
+    boilerplate::main_wrapper(body, event_handler, None);
+}
--- a/gfx/webrender/res/clip_shared.glsl
+++ b/gfx/webrender/res/clip_shared.glsl
@@ -10,31 +10,34 @@
 #define SEGMENT_CORNER_TR   2
 #define SEGMENT_CORNER_BL   3
 #define SEGMENT_CORNER_BR   4
 
 in int aClipRenderTaskIndex;
 in int aClipLayerIndex;
 in int aClipDataIndex;
 in int aClipSegmentIndex;
+in int aClipResourceAddress;
 
 struct CacheClipInstance {
     int render_task_index;
     int layer_index;
     int data_index;
     int segment_index;
+    int resource_address;
 };
 
 CacheClipInstance fetch_clip_item(int index) {
     CacheClipInstance cci;
 
     cci.render_task_index = aClipRenderTaskIndex;
     cci.layer_index = aClipLayerIndex;
     cci.data_index = aClipDataIndex;
     cci.segment_index = aClipSegmentIndex;
+    cci.resource_address = aClipResourceAddress;
 
     return cci;
 }
 
 struct ClipVertexInfo {
     vec3 local_pos;
     vec2 screen_pos;
     RectWithSize clipped_local_rect;
--- a/gfx/webrender/res/cs_clip_image.vs.glsl
+++ b/gfx/webrender/res/cs_clip_image.vs.glsl
@@ -1,37 +1,36 @@
 #line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 struct ImageMaskData {
-    RectWithSize uv_rect;
     RectWithSize local_rect;
 };
 
 ImageMaskData fetch_mask_data(int index) {
     vec4 data[2] = fetch_data_2(index);
-    return ImageMaskData(RectWithSize(data[0].xy, data[0].zw),
-                         RectWithSize(data[1].xy, data[1].zw));
+    return ImageMaskData(RectWithSize(data[0].xy, data[0].zw));
 }
 
 void main(void) {
     CacheClipInstance cci = fetch_clip_item(gl_InstanceID);
     ClipArea area = fetch_clip_area(cci.render_task_index);
     Layer layer = fetch_layer(cci.layer_index);
     ImageMaskData mask = fetch_mask_data(cci.data_index);
     RectWithSize local_rect = mask.local_rect;
+    ImageResource res = fetch_image_resource(cci.resource_address);
 
     ClipVertexInfo vi = write_clip_tile_vertex(local_rect,
                                                layer,
                                                area,
                                                cci.segment_index);
 
     vPos = vi.local_pos;
 
     vClipMaskUv = vec3((vPos.xy / vPos.z - local_rect.p0) / local_rect.size, 0.0);
     vec2 texture_size = vec2(textureSize(sColor0, 0));
-    vClipMaskUvRect = vec4(mask.uv_rect.p0, mask.uv_rect.size) / texture_size.xyxy;
+    vClipMaskUvRect = vec4(res.uv_rect.xy, res.uv_rect.zw - res.uv_rect.xy) / texture_size.xyxy;
     // applying a half-texel offset to the UV boundaries to prevent linear samples from the outside
-    vec4 inner_rect = vec4(mask.uv_rect.p0, mask.uv_rect.p0 + mask.uv_rect.size);
+    vec4 inner_rect = vec4(res.uv_rect.xy, res.uv_rect.zw);
     vClipMaskUvInnerRect = (inner_rect + vec4(0.5, 0.5, -0.5, -0.5)) / texture_size.xyxy;
 }
--- a/gfx/webrender/res/cs_text_run.vs.glsl
+++ b/gfx/webrender/res/cs_text_run.vs.glsl
@@ -7,25 +7,26 @@
 // drawn un-transformed. These are used for effects such
 // as text-shadow.
 
 void main(void) {
     Primitive prim = load_primitive();
     TextRun text = fetch_text_run(prim.specific_prim_address);
 
     int glyph_index = prim.user_data0;
-    int resource_address = prim.user_data1;
+    int resource_address = prim.user_data2;
     Glyph glyph = fetch_glyph(prim.specific_prim_address, glyph_index);
-    ResourceRect res = fetch_resource_rect(resource_address + glyph_index);
+    GlyphResource res = fetch_glyph_resource(resource_address);
 
     // Glyphs size is already in device-pixels.
     // The render task origin is in device-pixels. Offset that by
     // the glyph offset, relative to its primitive bounding rect.
     vec2 size = res.uv_rect.zw - res.uv_rect.xy;
-    vec2 origin = prim.task.screen_space_origin + uDevicePixelRatio * (glyph.offset - prim.local_rect.p0);
+    vec2 local_pos = glyph.offset + vec2(res.offset.x, -res.offset.y) / uDevicePixelRatio;
+    vec2 origin = prim.task.screen_space_origin + uDevicePixelRatio * (local_pos - prim.local_rect.p0);
     vec4 local_rect = vec4(origin, size);
 
     vec2 texture_size = vec2(textureSize(sColor0, 0));
     vec2 st0 = res.uv_rect.xy / texture_size;
     vec2 st1 = res.uv_rect.zw / texture_size;
 
     vec2 pos = mix(local_rect.xy,
                    local_rect.xy + local_rect.zw,
--- a/gfx/webrender/res/prim_shared.glsl
+++ b/gfx/webrender/res/prim_shared.glsl
@@ -5,17 +5,26 @@
 
 #if defined(GL_ES)
     #if GL_ES == 1
         #ifdef GL_FRAGMENT_PRECISION_HIGH
         precision highp sampler2DArray;
         #else
         precision mediump sampler2DArray;
         #endif
+
+        // Sampler default precision is lowp on mobile GPUs.
+        // This causes RGBA32F texture data to be clamped to 16 bit floats on some GPUs (e.g. Mali-T880).
+        // Define highp precision macro to allow lossless FLOAT texture sampling.
+        #define HIGHP_SAMPLER_FLOAT highp
+    #else
+        #define HIGHP_SAMPLER_FLOAT
     #endif
+#else
+    #define HIGHP_SAMPLER_FLOAT
 #endif
 
 #define PST_TOP_LEFT     0
 #define PST_TOP          1
 #define PST_TOP_RIGHT    2
 #define PST_RIGHT        3
 #define PST_BOTTOM_RIGHT 4
 #define PST_BOTTOM       5
@@ -75,27 +84,18 @@ RectWithSize to_rect_with_size(RectWithE
 
     return result;
 }
 
 vec2 clamp_rect(vec2 point, RectWithSize rect) {
     return clamp(point, rect.p0, rect.p0 + rect.size);
 }
 
-vec2 clamp_rect(vec2 point, RectWithEndpoint rect) {
-    return clamp(point, rect.p0, rect.p1);
-}
-
-// Clamp 2 points at once.
-vec4 clamp_rect(vec4 points, RectWithSize rect) {
-    return clamp(points, rect.p0.xyxy, rect.p0.xyxy + rect.size.xyxy);
-}
-
 RectWithSize intersect_rect(RectWithSize a, RectWithSize b) {
-    vec4 p = clamp_rect(vec4(a.p0, a.p0 + a.size), b);
+    vec4 p = clamp(vec4(a.p0, a.p0 + a.size), b.p0.xyxy, b.p0.xyxy + b.size.xyxy);
     return RectWithSize(p.xy, max(vec2(0.0), p.zw - p.xy));
 }
 
 float distance_to_line(vec2 p0, vec2 perp_dir, vec2 p) {
     vec2 dir_to_p0 = p0 - p;
     return dot(normalize(perp_dir), dir_to_p0);
 }
 
@@ -113,17 +113,17 @@ varying vec3 vClipMaskUv;
 //           use 2x unsigned shorts as vertex attributes
 //           instead of an int, and encode the UV directly
 //           in the vertices.
 ivec2 get_resource_cache_uv(int address) {
     return ivec2(address % WR_MAX_VERTEX_TEXTURE_WIDTH,
                  address / WR_MAX_VERTEX_TEXTURE_WIDTH);
 }
 
-uniform sampler2D sResourceCache;
+uniform HIGHP_SAMPLER_FLOAT sampler2D sResourceCache;
 
 vec4[2] fetch_from_resource_cache_2(int address) {
     ivec2 uv = get_resource_cache_uv(address);
     return vec4[2](
         texelFetchOffset(sResourceCache, uv, 0, ivec2(0, 0)),
         texelFetchOffset(sResourceCache, uv, 0, ivec2(1, 0))
     );
 }
@@ -132,38 +132,31 @@ vec4[2] fetch_from_resource_cache_2(int 
 
 #define VECS_PER_LAYER              9
 #define VECS_PER_RENDER_TASK        3
 #define VECS_PER_PRIM_HEADER        2
 #define VECS_PER_TEXT_RUN           1
 #define VECS_PER_GRADIENT           3
 #define VECS_PER_GRADIENT_STOP      2
 
-uniform sampler2D sLayers;
-uniform sampler2D sRenderTasks;
+uniform HIGHP_SAMPLER_FLOAT sampler2D sLayers;
+uniform HIGHP_SAMPLER_FLOAT sampler2D sRenderTasks;
 
-uniform sampler2D sData16;
-uniform sampler2D sData32;
-uniform sampler2D sResourceRects;
+uniform HIGHP_SAMPLER_FLOAT sampler2D sData32;
 
 // Instanced attributes
 in ivec4 aData0;
 in ivec4 aData1;
 
 // get_fetch_uv is a macro to work around a macOS Intel driver parsing bug.
 // TODO: convert back to a function once the driver issues are resolved, if ever.
 // https://github.com/servo/webrender/pull/623
 // https://github.com/servo/servo/issues/13953
 #define get_fetch_uv(i, vpi)  ivec2(vpi * (i % (WR_MAX_VERTEX_TEXTURE_WIDTH/vpi)), i / (WR_MAX_VERTEX_TEXTURE_WIDTH/vpi))
 
-vec4 fetch_data_1(int index) {
-    ivec2 uv = get_fetch_uv(index, 1);
-    return texelFetch(sData16, uv, 0);
-}
-
 vec4[2] fetch_data_2(int index) {
     ivec2 uv = get_fetch_uv(index, 2);
     return vec4[2](
         texelFetchOffset(sData32, uv, 0, ivec2(0, 0)),
         texelFetchOffset(sData32, uv, 0, ivec2(1, 0))
     );
 }
 
@@ -450,29 +443,31 @@ struct PrimitiveInstance {
     int prim_address;
     int specific_prim_address;
     int render_task_index;
     int clip_task_index;
     int layer_index;
     int z;
     int user_data0;
     int user_data1;
+    int user_data2;
 };
 
 PrimitiveInstance fetch_prim_instance() {
     PrimitiveInstance pi;
 
     pi.prim_address = aData0.x;
     pi.specific_prim_address = pi.prim_address + VECS_PER_PRIM_HEADER;
     pi.render_task_index = aData0.y;
     pi.clip_task_index = aData0.z;
     pi.layer_index = aData0.w;
     pi.z = aData1.x;
     pi.user_data0 = aData1.y;
     pi.user_data1 = aData1.z;
+    pi.user_data2 = aData1.w;
 
     return pi;
 }
 
 struct CompositeInstance {
     int render_task_index;
     int src_task_index;
     int backdrop_task_index;
@@ -499,16 +494,17 @@ struct Primitive {
     Layer layer;
     ClipArea clip_area;
     AlphaBatchTask task;
     RectWithSize local_rect;
     RectWithSize local_clip_rect;
     int specific_prim_address;
     int user_data0;
     int user_data1;
+    int user_data2;
     float z;
 };
 
 Primitive load_primitive() {
     PrimitiveInstance pi = fetch_prim_instance();
 
     Primitive prim;
 
@@ -518,16 +514,17 @@ Primitive load_primitive() {
 
     vec4 geom[2] = fetch_from_resource_cache_2(pi.prim_address);
     prim.local_rect = RectWithSize(geom[0].xy, geom[0].zw);
     prim.local_clip_rect = RectWithSize(geom[1].xy, geom[1].zw);
 
     prim.specific_prim_address = pi.specific_prim_address;
     prim.user_data0 = pi.user_data0;
     prim.user_data1 = pi.user_data1;
+    prim.user_data2 = pi.user_data2;
     prim.z = float(pi.z);
 
     return prim;
 }
 
 // Return the intersection of the plane (set up by "normal" and "point")
 // with the ray (set up by "ray_origin" and "ray_dir"),
 // writing the resulting scaler into "t".
@@ -566,58 +563,81 @@ vec4 get_layer_pos(vec2 pos, Layer layer
     // get a point on the layer plane
     vec4 ah = layer.transform * vec4(0.0, 0.0, 0.0, 1.0);
     vec3 a = ah.xyz / ah.w;
     // get the normal to the layer plane
     vec3 n = transpose(mat3(layer.inv_transform)) * vec3(0.0, 0.0, 1.0);
     return untransform(pos, n, a, layer.inv_transform);
 }
 
+// Compute a snapping offset in world space (adjusted to pixel ratio),
+// given local position on the layer and a snap rectangle.
+vec2 compute_snap_offset(vec2 local_pos,
+                         RectWithSize local_clip_rect,
+                         Layer layer,
+                         RectWithSize snap_rect) {
+    // Ensure that the snap rect is at *least* one device pixel in size.
+    // TODO(gw): It's not clear to me that this is "correct". Specifically,
+    //           how should it interact with sub-pixel snap rects when there
+    //           is a layer transform with scale present? But it does fix
+    //           the test cases we have in Servo that are failing without it
+    //           and seem better than not having this at all.
+    snap_rect.size = max(snap_rect.size, vec2(1.0 / uDevicePixelRatio));
+
+    // Transform the snap corners to the world space.
+    vec4 world_snap_p0 = layer.transform * vec4(snap_rect.p0, 0.0, 1.0);
+    vec4 world_snap_p1 = layer.transform * vec4(snap_rect.p0 + snap_rect.size, 0.0, 1.0);
+    // Snap bounds in world coordinates, adjusted for pixel ratio. XY = top left, ZW = bottom right
+    vec4 world_snap = uDevicePixelRatio * vec4(world_snap_p0.xy, world_snap_p1.xy) /
+                                          vec4(world_snap_p0.ww, world_snap_p1.ww);
+    /// World offsets applied to the corners of the snap rectangle.
+    vec4 snap_offsets = floor(world_snap + 0.5) - world_snap;
+
+    /// Compute the position of this vertex inside the snap rectangle.
+    vec2 normalized_snap_pos = (local_pos - snap_rect.p0) / snap_rect.size;
+    /// Compute the actual world offset for this vertex needed to make it snap.
+    return mix(snap_offsets.xy, snap_offsets.zw, normalized_snap_pos);
+}
+
 struct VertexInfo {
     vec2 local_pos;
     vec2 screen_pos;
 };
 
 VertexInfo write_vertex(RectWithSize instance_rect,
                         RectWithSize local_clip_rect,
                         float z,
                         Layer layer,
                         AlphaBatchTask task,
-                        vec2 snap_ref) {
+                        RectWithSize snap_rect) {
+
     // Select the corner of the local rect that we are processing.
     vec2 local_pos = instance_rect.p0 + instance_rect.size * aPosition.xy;
 
-    // xy = top left corner of the local rect, zw = position of current vertex.
-    vec4 local_p0_pos = vec4(snap_ref, local_pos);
-
     // Clamp to the two local clip rects.
-    local_p0_pos = clamp_rect(local_p0_pos, local_clip_rect);
-    local_p0_pos = clamp_rect(local_p0_pos, layer.local_clip_rect);
+    vec2 clamped_local_pos = clamp_rect(clamp_rect(local_pos, local_clip_rect),
+                                        layer.local_clip_rect);
 
-    // Transform the top corner and current vertex to world space.
-    vec4 world_p0 = layer.transform * vec4(local_p0_pos.xy, 0.0, 1.0);
-    world_p0.xyz /= world_p0.w;
-    vec4 world_pos = layer.transform * vec4(local_p0_pos.zw, 0.0, 1.0);
-    world_pos.xyz /= world_pos.w;
+    /// Compute the snapping offset.
+    vec2 snap_offset = compute_snap_offset(clamped_local_pos, local_clip_rect, layer, snap_rect);
 
-    // Convert the world positions to device pixel space. xy=top left corner. zw=current vertex.
-    vec4 device_p0_pos = vec4(world_p0.xy, world_pos.xy) * uDevicePixelRatio;
+    // Transform the current vertex to the world cpace.
+    vec4 world_pos = layer.transform * vec4(clamped_local_pos, 0.0, 1.0);
 
-    // Calculate the distance to snap the vertex by (snap top left corner).
-    vec2 snap_delta = device_p0_pos.xy - floor(device_p0_pos.xy + 0.5);
+    // Convert the world positions to device pixel space.
+    vec2 device_pos = world_pos.xy / world_pos.w * uDevicePixelRatio;
 
     // Apply offsets for the render task to get correct screen location.
-    vec2 final_pos = device_p0_pos.zw -
-                     snap_delta -
+    vec2 final_pos = device_pos + snap_offset -
                      task.screen_space_origin +
                      task.render_target_origin;
 
     gl_Position = uTransform * vec4(final_pos, z, 1.0);
 
-    VertexInfo vi = VertexInfo(local_p0_pos.zw, device_p0_pos.zw);
+    VertexInfo vi = VertexInfo(clamped_local_pos, device_pos);
     return vi;
 }
 
 #ifdef WR_FEATURE_TRANSFORM
 
 struct TransformVertexInfo {
     vec3 local_pos;
     vec2 screen_pos;
@@ -642,17 +662,17 @@ vec2 intersect_lines(vec2 p0, vec2 p1, v
     return vec2(nx / d, ny / d);
 }
 
 TransformVertexInfo write_transform_vertex(RectWithSize instance_rect,
                                            RectWithSize local_clip_rect,
                                            float z,
                                            Layer layer,
                                            AlphaBatchTask task,
-                                           vec2 snap_ref) {
+                                           RectWithSize snap_rect) {
     RectWithEndpoint local_rect = to_rect_with_endpoint(instance_rect);
 
     vec2 current_local_pos, prev_local_pos, next_local_pos;
 
     // Select the current vertex and the previous/next vertices,
     // based on the vertex ID that is known based on the instance rect.
     switch (gl_VertexID) {
         case 0:
@@ -701,50 +721,53 @@ TransformVertexInfo write_transform_vert
     vec2 adjusted_next_p1 = next_device_pos + norm_next * amount;
 
     // Intersect those adjusted lines to find the actual vertex position.
     vec2 device_pos = intersect_lines(adjusted_prev_p0,
                                       adjusted_prev_p1,
                                       adjusted_next_p0,
                                       adjusted_next_p1);
 
-    // Calculate the snap amount based on the first vertex as a reference point.
-    vec4 world_p0 = layer.transform * vec4(snap_ref, 0.0, 1.0);
-    vec2 device_p0 = uDevicePixelRatio * world_p0.xy / world_p0.w;
-    vec2 snap_delta = device_p0 - floor(device_p0 + 0.5);
+    vec4 layer_pos = get_layer_pos(device_pos / uDevicePixelRatio, layer);
+
+    /// Compute the snapping offset.
+    vec2 snap_offset = compute_snap_offset(layer_pos.xy / layer_pos.w,
+                                           local_clip_rect, layer, snap_rect);
 
     // Apply offsets for the render task to get correct screen location.
-    vec2 final_pos = device_pos -
-                     snap_delta -
+    vec2 final_pos = device_pos + snap_offset -
                      task.screen_space_origin +
                      task.render_target_origin;
 
     gl_Position = uTransform * vec4(final_pos, z, 1.0);
 
     vLocalBounds = vec4(local_rect.p0, local_rect.p1);
 
-    vec4 layer_pos = get_layer_pos(device_pos / uDevicePixelRatio, layer);
-
     return TransformVertexInfo(layer_pos.xyw, device_pos);
 }
 
 #endif //WR_FEATURE_TRANSFORM
 
-struct ResourceRect {
+struct GlyphResource {
+    vec4 uv_rect;
+    vec2 offset;
+};
+
+GlyphResource fetch_glyph_resource(int address) {
+    vec4 data[2] = fetch_from_resource_cache_2(address);
+    return GlyphResource(data[0], data[1].xy);
+}
+
+struct ImageResource {
     vec4 uv_rect;
 };
 
-ResourceRect fetch_resource_rect(int index) {
-    ResourceRect rect;
-
-    ivec2 uv = get_fetch_uv(index, 1);
-
-    rect.uv_rect = texelFetchOffset(sResourceRects, uv, 0, ivec2(0, 0));
-
-    return rect;
+ImageResource fetch_image_resource(int address) {
+    vec4 data = fetch_from_resource_cache_1(address);
+    return ImageResource(data);
 }
 
 struct Rectangle {
     vec4 color;
 };
 
 Rectangle fetch_rectangle(int address) {
     vec4 data = fetch_from_resource_cache_1(address);
@@ -758,21 +781,22 @@ struct TextRun {
 TextRun fetch_text_run(int address) {
     vec4 data = fetch_from_resource_cache_1(address);
     return TextRun(data);
 }
 
 struct Image {
     vec4 stretch_size_and_tile_spacing;  // Size of the actual image and amount of space between
                                          //     tiled instances of this image.
+    vec4 sub_rect;                          // If negative, ignored.
 };
 
 Image fetch_image(int address) {
-    vec4 data = fetch_from_resource_cache_1(address);
-    return Image(data);
+    vec4 data[2] = fetch_from_resource_cache_2(address);
+    return Image(data[0], data[1]);
 }
 
 struct YuvImage {
     vec2 size;
 };
 
 YuvImage fetch_yuv_image(int address) {
     vec4 data = fetch_from_resource_cache_1(address);
--- a/gfx/webrender/res/ps_angle_gradient.vs.glsl
+++ b/gfx/webrender/res/ps_angle_gradient.vs.glsl
@@ -7,17 +7,17 @@ void main(void) {
     Primitive prim = load_primitive();
     Gradient gradient = fetch_gradient(prim.specific_prim_address);
 
     VertexInfo vi = write_vertex(prim.local_rect,
                                  prim.local_clip_rect,
                                  prim.z,
                                  prim.layer,
                                  prim.task,
-                                 prim.local_rect.p0);
+                                 prim.local_rect);
 
     vPos = vi.local_pos - prim.local_rect.p0;
 
     vec2 start_point = gradient.start_end_point.xy;
     vec2 end_point = gradient.start_end_point.zw;
     vec2 dir = end_point - start_point;
 
     vStartPoint = start_point;
--- a/gfx/webrender/res/ps_border_corner.vs.glsl
+++ b/gfx/webrender/res/ps_border_corner.vs.glsl
@@ -266,21 +266,21 @@ void main(void) {
     segment_rect.size = p1 - p0;
 
 #ifdef WR_FEATURE_TRANSFORM
     TransformVertexInfo vi = write_transform_vertex(segment_rect,
                                                     prim.local_clip_rect,
                                                     prim.z,
                                                     prim.layer,
                                                     prim.task,
-                                                    prim.local_rect.p0);
+                                                    prim.local_rect);
 #else
     VertexInfo vi = write_vertex(segment_rect,
                                  prim.local_clip_rect,
                                  prim.z,
                                  prim.layer,
                                  prim.task,
-                                 prim.local_rect.p0);
+                                 prim.local_rect);
 #endif
 
     vLocalPos = vi.local_pos;
     write_clip(vi.screen_pos, prim.clip_area);
 }
--- a/gfx/webrender/res/ps_border_edge.vs.glsl
+++ b/gfx/webrender/res/ps_border_edge.vs.glsl
@@ -179,21 +179,21 @@ void main(void) {
     write_color(color, style, color_flip);
 
 #ifdef WR_FEATURE_TRANSFORM
     TransformVertexInfo vi = write_transform_vertex(segment_rect,
                                                     prim.local_clip_rect,
                                                     prim.z,
                                                     prim.layer,
                                                     prim.task,
-                                                    prim.local_rect.p0);
+                                                    prim.local_rect);
 #else
     VertexInfo vi = write_vertex(segment_rect,
                                  prim.local_clip_rect,
                                  prim.z,
                                  prim.layer,
                                  prim.task,
-                                 prim.local_rect.p0);
+                                 prim.local_rect);
 #endif
 
     vLocalPos = vi.local_pos;
     write_clip(vi.screen_pos, prim.clip_area);
 }
--- a/gfx/webrender/res/ps_box_shadow.vs.glsl
+++ b/gfx/webrender/res/ps_box_shadow.vs.glsl
@@ -10,17 +10,17 @@ void main(void) {
     BoxShadow bs = fetch_boxshadow(prim.specific_prim_address);
     RectWithSize segment_rect = fetch_instance_geometry(prim.specific_prim_address + BS_HEADER_VECS + prim.user_data0);
 
     VertexInfo vi = write_vertex(segment_rect,
                                  prim.local_clip_rect,
                                  prim.z,
                                  prim.layer,
                                  prim.task,
-                                 prim.local_rect.p0);
+                                 prim.local_rect);
 
     RenderTaskData child_task = fetch_render_task(prim.user_data1);
     vUv.z = child_task.data1.x;
 
     // Constant offsets to inset from bilinear filtering border.
     vec2 patch_origin = child_task.data0.xy + vec2(1.0);
     vec2 patch_size_device_pixels = child_task.data0.zw - vec2(2.0);
     vec2 patch_size = patch_size_device_pixels / uDevicePixelRatio;
--- a/gfx/webrender/res/ps_cache_image.vs.glsl
+++ b/gfx/webrender/res/ps_cache_image.vs.glsl
@@ -9,17 +9,17 @@
 void main(void) {
     Primitive prim = load_primitive();
 
     VertexInfo vi = write_vertex(prim.local_rect,
                                  prim.local_clip_rect,
                                  prim.z,
                                  prim.layer,
                                  prim.task,
-                                 prim.local_rect.p0);
+                                 prim.local_rect);
 
     RenderTaskData child_task = fetch_render_task(prim.user_data1);
     vUv.z = child_task.data1.x;
 
     vec2 texture_size = vec2(textureSize(sCacheRGBA8, 0));
     vec2 uv0 = child_task.data0.xy / texture_size;
     vec2 uv1 = (child_task.data0.xy + child_task.data0.zw) / texture_size;
 
--- a/gfx/webrender/res/ps_gradient.vs.glsl
+++ b/gfx/webrender/res/ps_gradient.vs.glsl
@@ -61,26 +61,26 @@ void main(void) {
     }
 
 #ifdef WR_FEATURE_TRANSFORM
     TransformVertexInfo vi = write_transform_vertex(segment_rect,
                                                     prim.local_clip_rect,
                                                     prim.z,
                                                     prim.layer,
                                                     prim.task,
-                                                    prim.local_rect.p0);
+                                                    prim.local_rect);
     vLocalPos = vi.local_pos;
     vec2 f = (vi.local_pos.xy - prim.local_rect.p0) / prim.local_rect.size;
 #else
     VertexInfo vi = write_vertex(segment_rect,
                                  prim.local_clip_rect,
                                  prim.z,
                                  prim.layer,
                                  prim.task,
-                                 prim.local_rect.p0);
+                                 prim.local_rect);
 
     vec2 f = (vi.local_pos - segment_rect.p0) / segment_rect.size;
     vPos = vi.local_pos;
 #endif
 
     write_clip(vi.screen_pos, prim.clip_area);
 
     vColor = mix(adjusted_color_g0, adjusted_color_g1, dot(f, axis));
--- a/gfx/webrender/res/ps_image.vs.glsl
+++ b/gfx/webrender/res/ps_image.vs.glsl
@@ -1,49 +1,59 @@
 #line 1
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void main(void) {
     Primitive prim = load_primitive();
     Image image = fetch_image(prim.specific_prim_address);
-    ResourceRect res = fetch_resource_rect(prim.user_data0);
+    ImageResource res = fetch_image_resource(prim.user_data0);
 
 #ifdef WR_FEATURE_TRANSFORM
     TransformVertexInfo vi = write_transform_vertex(prim.local_rect,
                                                     prim.local_clip_rect,
                                                     prim.z,
                                                     prim.layer,
                                                     prim.task,
-                                                    prim.local_rect.p0);
+                                                    prim.local_rect);
     vLocalPos = vi.local_pos;
 #else
     VertexInfo vi = write_vertex(prim.local_rect,
                                  prim.local_clip_rect,
                                  prim.z,
                                  prim.layer,
                                  prim.task,
-                                 prim.local_rect.p0);
+                                 prim.local_rect);
     vLocalPos = vi.local_pos - prim.local_rect.p0;
 #endif
 
     write_clip(vi.screen_pos, prim.clip_area);
 
     // If this is in WR_FEATURE_TEXTURE_RECT mode, the rect and size use
     // non-normalized texture coordinates.
 #ifdef WR_FEATURE_TEXTURE_RECT
     vec2 texture_size_normalization_factor = vec2(1, 1);
 #else
     vec2 texture_size_normalization_factor = vec2(textureSize(sColor0, 0));
 #endif
 
+    vec2 uv0, uv1;
+
+    if (image.sub_rect.x < 0.0) {
+        uv0 = res.uv_rect.xy;
+        uv1 = res.uv_rect.zw;
+    } else {
+        uv0 = res.uv_rect.xy + image.sub_rect.xy;
+        uv1 = res.uv_rect.xy + image.sub_rect.zw;
+    }
+
     // vUv will contain how many times this image has wrapped around the image size.
-    vec2 st0 = res.uv_rect.xy / texture_size_normalization_factor;
-    vec2 st1 = res.uv_rect.zw / texture_size_normalization_factor;
+    vec2 st0 = uv0 / texture_size_normalization_factor;
+    vec2 st1 = uv1 / texture_size_normalization_factor;
 
     vTextureSize = st1 - st0;
     vTextureOffset = st0;
     vTileSpacing = image.stretch_size_and_tile_spacing.zw;
     vStretchSize = image.stretch_size_and_tile_spacing.xy;
 
     // We clamp the texture coordinates to the half-pixel offset from the borders
     // in order to avoid sampling outside of the texture area.
--- a/gfx/webrender/res/ps_radial_gradient.vs.glsl
+++ b/gfx/webrender/res/ps_radial_gradient.vs.glsl
@@ -7,17 +7,17 @@ void main(void) {
     Primitive prim = load_primitive();
     RadialGradient gradient = fetch_radial_gradient(prim.specific_prim_address);
 
     VertexInfo vi = write_vertex(prim.local_rect,
                                  prim.local_clip_rect,
                                  prim.z,
                                  prim.layer,
                                  prim.task,
-                                 prim.local_rect.p0);
+                                 prim.local_rect);
 
     vPos = vi.local_pos - prim.local_rect.p0;
 
     vStartCenter = gradient.start_end_center.xy;
     vEndCenter = gradient.start_end_center.zw;
 
     vStartRadius = gradient.start_end_radius_ratio_xy_extend_mode.x;
     vEndRadius = gradient.start_end_radius_ratio_xy_extend_mode.y;
--- a/gfx/webrender/res/ps_rectangle.vs.glsl
+++ b/gfx/webrender/res/ps_rectangle.vs.glsl
@@ -8,23 +8,23 @@ void main(void) {
     Rectangle rect = fetch_rectangle(prim.specific_prim_address);
     vColor = rect.color;
 #ifdef WR_FEATURE_TRANSFORM
     TransformVertexInfo vi = write_transform_vertex(prim.local_rect,
                                                     prim.local_clip_rect,
                                                     prim.z,
                                                     prim.layer,
                                                     prim.task,
-                                                    prim.local_rect.p0);
+                                                    prim.local_rect);
     vLocalPos = vi.local_pos;
 #else
     VertexInfo vi = write_vertex(prim.local_rect,
                                  prim.local_clip_rect,
                                  prim.z,
                                  prim.layer,
                                  prim.task,
-                                 prim.local_rect.p0);
+                                 prim.local_rect);
 #endif
 
 #ifdef WR_FEATURE_CLIP
     write_clip(vi.screen_pos, prim.clip_area);
 #endif
 }
--- a/gfx/webrender/res/ps_text_run.vs.glsl
+++ b/gfx/webrender/res/ps_text_run.vs.glsl
@@ -3,39 +3,41 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 void main(void) {
     Primitive prim = load_primitive();
     TextRun text = fetch_text_run(prim.specific_prim_address);
 
     int glyph_index = prim.user_data0;
-    int resource_address = prim.user_data1;
+    int resource_address = prim.user_data2;
     Glyph glyph = fetch_glyph(prim.specific_prim_address, glyph_index);
-    ResourceRect res = fetch_resource_rect(resource_address + glyph_index);
+    GlyphResource res = fetch_glyph_resource(resource_address);
 
-    RectWithSize local_rect = RectWithSize(glyph.offset,
+    vec2 local_pos = glyph.offset + vec2(res.offset.x, -res.offset.y) / uDevicePixelRatio;
+
+    RectWithSize local_rect = RectWithSize(local_pos,
                                            (res.uv_rect.zw - res.uv_rect.xy) / uDevicePixelRatio);
 
 #ifdef WR_FEATURE_TRANSFORM
     TransformVertexInfo vi = write_transform_vertex(local_rect,
                                                     prim.local_clip_rect,
                                                     prim.z,
                                                     prim.layer,
                                                     prim.task,
-                                                    local_rect.p0);
+                                                    local_rect);
     vLocalPos = vi.local_pos;
     vec2 f = (vi.local_pos.xy / vi.local_pos.z - local_rect.p0) / local_rect.size;
 #else
     VertexInfo vi = write_vertex(local_rect,
                                  prim.local_clip_rect,
                                  prim.z,
                                  prim.layer,
                                  prim.task,
-                                 local_rect.p0);
+                                 local_rect);
     vec2 f = (vi.local_pos - local_rect.p0) / local_rect.size;
 #endif
 
     write_clip(vi.screen_pos, prim.clip_area);
 
     vec2 texture_size = vec2(textureSize(sColor0, 0));
     vec2 st0 = res.uv_rect.xy / texture_size;
     vec2 st1 = res.uv_rect.zw / texture_size;
--- a/gfx/webrender/res/ps_yuv_image.vs.glsl
+++ b/gfx/webrender/res/ps_yuv_image.vs.glsl
@@ -6,35 +6,35 @@
 void main(void) {
     Primitive prim = load_primitive();
 #ifdef WR_FEATURE_TRANSFORM
     TransformVertexInfo vi = write_transform_vertex(prim.local_rect,
                                                     prim.local_clip_rect,
                                                     prim.z,
                                                     prim.layer,
                                                     prim.task,
-                                                    prim.local_rect.p0);
+                                                    prim.local_rect);
     vLocalPos = vi.local_pos;
 #else
     VertexInfo vi = write_vertex(prim.local_rect,
                                  prim.local_clip_rect,
                                  prim.z,
                                  prim.layer,
                                  prim.task,
-                                 prim.local_rect.p0);
+                                 prim.local_rect);
     vLocalPos = vi.local_pos - prim.local_rect.p0;
 #endif
 
     write_clip(vi.screen_pos, prim.clip_area);
 
-    ResourceRect y_rect = fetch_resource_rect(prim.user_data0);
+    ImageResource y_rect = fetch_image_resource(prim.user_data0);
 #ifndef WR_FEATURE_INTERLEAVED_Y_CB_CR  // only 1 channel
-    ResourceRect u_rect = fetch_resource_rect(prim.user_data0 + 1);
+    ImageResource u_rect = fetch_image_resource(prim.user_data1);
 #ifndef WR_FEATURE_NV12 // 2 channel
-    ResourceRect v_rect = fetch_resource_rect(prim.user_data0 + 2);
+    ImageResource v_rect = fetch_image_resource(prim.user_data2);
 #endif
 #endif
 
     // If this is in WR_FEATURE_TEXTURE_RECT mode, the rect and size use
     // non-normalized texture coordinates.
 #ifdef WR_FEATURE_TEXTURE_RECT
     vec2 y_texture_size_normalization_factor = vec2(1, 1);
 #else
--- a/gfx/webrender/src/device.rs
+++ b/gfx/webrender/src/device.rs
@@ -271,16 +271,17 @@ impl VertexFormat {
                                           0);
 
                 instance.bind(gl);
 
                 for (i, &attrib) in [ClipAttribute::RenderTaskIndex,
                                      ClipAttribute::LayerIndex,
                                      ClipAttribute::DataIndex,
                                      ClipAttribute::SegmentIndex,
+                                     ClipAttribute::ResourceAddress,
                                     ].into_iter().enumerate() {
                     gl.enable_vertex_attrib_array(attrib as gl::GLuint);
                     gl.vertex_attrib_divisor(attrib as gl::GLuint, 1);
                     gl.vertex_attrib_i_pointer(attrib as gl::GLuint,
                                                 1,
                                                 gl::INT,
                                                 instance_stride,
                                                 (i * 4) as gl::GLuint);
@@ -406,16 +407,17 @@ impl Program {
                 self.gl.bind_attrib_location(self.id, BlurAttribute::Direction as gl::GLuint, "aBlurDirection");
             }
             VertexFormat::Clip => {
                 self.gl.bind_attrib_location(self.id, ClipAttribute::Position as gl::GLuint, "aPosition");
                 self.gl.bind_attrib_location(self.id, ClipAttribute::RenderTaskIndex as gl::GLuint, "aClipRenderTaskIndex");
                 self.gl.bind_attrib_location(self.id, ClipAttribute::LayerIndex as gl::GLuint, "aClipLayerIndex");
                 self.gl.bind_attrib_location(self.id, ClipAttribute::DataIndex as gl::GLuint, "aClipDataIndex");
                 self.gl.bind_attrib_location(self.id, ClipAttribute::SegmentIndex as gl::GLuint, "aClipSegmentIndex");
+                self.gl.bind_attrib_location(self.id, ClipAttribute::ResourceAddress as gl::GLuint, "aClipResourceAddress");
             }
         }
 
         self.gl.link_program(self.id);
         if self.gl.get_program_iv(self.id, gl::LINK_STATUS) == (0 as gl::GLint) {
             let error_log = self.gl.get_program_info_log(self.id);
             println!("Failed to link shader program: {:?}\n{}", self.name, error_log);
             self.gl.detach_shader(self.id, vs_id);
@@ -1559,21 +1561,16 @@ impl Device {
             self.gl.uniform_1i(u_data32, TextureSampler::Data32 as i32);
         }
 
         let u_resource_cache = self.gl.get_uniform_location(program.id, "sResourceCache");
         if u_resource_cache != -1 {
             self.gl.uniform_1i(u_resource_cache, TextureSampler::ResourceCache as i32);
         }
 
-        let u_resource_rects = self.gl.get_uniform_location(program.id, "sResourceRects");
-        if u_resource_rects != -1 {
-            self.gl.uniform_1i(u_resource_rects, TextureSampler::ResourceRects as i32);
-        }
-
         Ok(())
     }
 
 /*
     pub fn refresh_shader(&mut self, path: PathBuf) {
         let mut vs_preamble_path = self.resource_path.clone();
         vs_preamble_path.push(VERTEX_SHADER_PREAMBLE);
 
@@ -1648,17 +1645,17 @@ impl Device {
                 if cfg!(any(target_arch="arm", target_arch="aarch64")) {
                     expanded_data.extend(data.iter().flat_map(|byte| repeat(*byte).take(4)));
                     (get_gl_format_bgra(self.gl()), 4, expanded_data.as_slice(), gl::UNSIGNED_BYTE)
                 } else {
                     (GL_FORMAT_A, 1, data, gl::UNSIGNED_BYTE)
                 }
             }
             ImageFormat::RGB8 => (gl::RGB, 3, data, gl::UNSIGNED_BYTE),
-            ImageFormat::RGBA8 => (get_gl_format_bgra(self.gl()), 4, data, gl::UNSIGNED_BYTE),
+            ImageFormat::BGRA8 => (get_gl_format_bgra(self.gl()), 4, data, gl::UNSIGNED_BYTE),
             ImageFormat::RG8 => (gl::RG, 2, data, gl::UNSIGNED_BYTE),
             ImageFormat::RGBAF32 => (gl::RGBA, 16, data, gl::FLOAT),
             ImageFormat::Invalid => unreachable!(),
         };
 
         let row_length = match stride {
             Some(value) => value / bpp,
             None => width,
@@ -2028,17 +2025,17 @@ fn gl_texture_formats_for_image_format(g
         ImageFormat::A8 => {
             if cfg!(any(target_arch="arm", target_arch="aarch64")) {
                 (get_gl_format_bgra(gl) as gl::GLint, get_gl_format_bgra(gl))
             } else {
                 (GL_FORMAT_A as gl::GLint, GL_FORMAT_A)
             }
         },
         ImageFormat::RGB8 => (gl::RGB as gl::GLint, gl::RGB),
-        ImageFormat::RGBA8 => {
+        ImageFormat::BGRA8 => {
             match gl.get_type() {
                 gl::GlType::Gl =>  {
                     (gl::RGBA as gl::GLint, get_gl_format_bgra(gl))
                 }
                 gl::GlType::Gles => {
                     (get_gl_format_bgra(gl) as gl::GLint, get_gl_format_bgra(gl))
                 }
             }
--- a/gfx/webrender/src/frame.rs
+++ b/gfx/webrender/src/frame.rs
@@ -27,37 +27,131 @@ use webrender_traits::{MixBlendMode, Pip
 use webrender_traits::{ScrollLayerState, ScrollLocation, ScrollPolicy, SpecificDisplayItem};
 use webrender_traits::{StackingContext, TileOffset, TransformStyle, WorldPoint};
 
 #[derive(Copy, Clone, PartialEq, PartialOrd, Debug)]
 pub struct FrameId(pub u32);
 
 static DEFAULT_SCROLLBAR_COLOR: ColorF = ColorF { r: 0.3, g: 0.3, b: 0.3, a: 0.6 };
 
+/// Nested display lists cause two types of replacements to ClipIds inside the nesting:
+///     1. References to the root scroll frame are replaced by the ClipIds that
+///        contained the nested display list.
+///     2. Other ClipIds (that aren't custom or reference frames) are assumed to be
+///        local to the nested display list and are converted to an id that is unique
+///        outside of the nested display list as well.
+///
+/// This structure keeps track of what ids are the "root" for one particular level of
+/// nesting as well as keeping and index, which can make ClipIds used internally unique
+/// in the full ClipScrollTree.
+struct NestedDisplayListInfo {
+    /// The index of this nested display list, which is used to generate
+    /// new ClipIds for clips that are defined inside it.
+    nest_index: u64,
+
+    /// The ClipId of the scroll frame node which contains this nested
+    /// display list. This is used to replace references to the root with
+    /// the proper ClipId.
+    scroll_node_id: ClipId,
+
+    /// The ClipId of the clip node which contains this nested display list.
+    /// This is used to replace references to the root with the proper ClipId.
+    clip_node_id: ClipId,
+}
+
+impl NestedDisplayListInfo {
+    fn convert_id_to_nested(&self, id: &ClipId) -> ClipId {
+        match *id {
+            ClipId::Clip(id, _, pipeline_id) => ClipId::Clip(id, self.nest_index, pipeline_id),
+            _ => *id,
+        }
+    }
+
+    fn convert_scroll_id_to_nested(&self, id: &ClipId) -> ClipId {
+        if id.is_root_scroll_node() {
+            self.scroll_node_id
+        } else {
+            self.convert_id_to_nested(id)
+        }
+    }
+
+    fn convert_clip_id_to_nested(&self, id: &ClipId) -> ClipId {
+        if id.is_root_scroll_node() {
+            self.clip_node_id
+        } else {
+            self.convert_id_to_nested(id)
+        }
+    }
+}
+
 struct FlattenContext<'a> {
     scene: &'a Scene,
     builder: &'a mut FrameBuilder,
     resource_cache: &'a mut ResourceCache,
     replacements: Vec<(ClipId, ClipId)>,
+    nested_display_list_info: Vec<NestedDisplayListInfo>,
+    current_nested_display_list_index: u64,
 }
 
 impl<'a> FlattenContext<'a> {
     fn new(scene: &'a Scene,
            builder: &'a mut FrameBuilder,
            resource_cache: &'a mut ResourceCache)
            -> FlattenContext<'a> {
         FlattenContext {
             scene: scene,
             builder: builder,
             resource_cache: resource_cache,
             replacements: Vec::new(),
+            nested_display_list_info: Vec::new(),
+            current_nested_display_list_index: 0,
         }
     }
 
-    fn clip_id_with_replacement(&self, id: ClipId) -> ClipId {
+    fn push_nested_display_list_ids(&mut self, info: ClipAndScrollInfo) {
+        self.current_nested_display_list_index += 1;
+        self.nested_display_list_info.push(NestedDisplayListInfo {
+            nest_index: self.current_nested_display_list_index,
+            scroll_node_id: info.scroll_node_id,
+            clip_node_id: info.clip_node_id(),
+        });
+    }
+
+    fn pop_nested_display_list_ids(&mut self) {
+        self.nested_display_list_info.pop();
+    }
+
+    fn convert_new_id_to_neested(&self, id: &ClipId) -> ClipId {
+        if let Some(nested_info) = self.nested_display_list_info.last() {
+            nested_info.convert_id_to_nested(id)
+        } else {
+            *id
+        }
+    }
+
+    fn convert_clip_scroll_info_to_nested(&self, info: &mut ClipAndScrollInfo) {
+        if let Some(nested_info) = self.nested_display_list_info.last() {
+            info.scroll_node_id = nested_info.convert_scroll_id_to_nested(&info.scroll_node_id);
+            info.clip_node_id =
+                info.clip_node_id.map(|ref id| nested_info.convert_clip_id_to_nested(id));
+        }
+
+        // We only want to produce nested ClipIds if we are in a nested display
+        // list situation.
+        debug_assert!(!info.scroll_node_id.is_nested() ||
+                      !self.nested_display_list_info.is_empty());
+        debug_assert!(!info.clip_node_id().is_nested() ||
+                      !self.nested_display_list_info.is_empty());
+    }
+
+    /// Since WebRender still handles fixed position and reference frame content internally
+    /// we need to apply this table of id replacements only to the id that affects the
+    /// position of a node. We can eventually remove this when clients start handling
+    /// reference frames themselves. This method applies these replacements.
+    fn apply_scroll_frame_id_replacement(&self, id: ClipId) -> ClipId {
         match self.replacements.last() {
             Some(&(to_replace, replacement)) if to_replace == id => replacement,
             _ => id,
         }
     }
 }
 
 // TODO: doc
@@ -270,17 +364,18 @@ impl Frame {
         let clip_viewport = LayerRect::new(content_rect.origin, clip.main.size);
         let new_clip_id = self.clip_scroll_tree.generate_new_clip_id(pipeline_id);
         context.builder.add_clip_scroll_node(new_clip_id,
                                              parent_id,
                                              pipeline_id,
                                              &clip_viewport,
                                              clip,
                                              &mut self.clip_scroll_tree);
-        context.builder.add_scroll_frame(item.id,
+        let new_id = context.convert_new_id_to_neested(&item.id);
+        context.builder.add_scroll_frame(new_id,
                                          new_clip_id,
                                          pipeline_id,
                                          &content_rect,
                                          &clip_viewport,
                                          &mut self.clip_scroll_tree);
 
     }
 
@@ -309,18 +404,16 @@ impl Frame {
                 stacking_context.mix_blend_mode_for_compositing())
         };
 
         if composition_operations.will_make_invisible() {
             traversal.skip_current_stacking_context();
             return;
         }
 
-        let mut clip_id = context.clip_id_with_replacement(context_scroll_node_id);
-
         if stacking_context.scroll_policy == ScrollPolicy::Fixed {
             context.replacements.push((context_scroll_node_id,
                                        context.builder.current_reference_frame_id()));
         }
 
         // If we have a transformation, we establish a new reference frame. This means
         // that fixed position stacking contexts are positioned relative to us.
         let is_reference_frame = stacking_context.transform.is_some() ||
@@ -334,16 +427,17 @@ impl Frame {
                 LayerToScrollTransform::create_translation(reference_frame_relative_offset.x,
                                                            reference_frame_relative_offset.y,
                                                            0.0)
                                         .pre_translate(bounds.origin.to_vector().to_3d())
                                         .pre_mul(&transform)
                                         .pre_mul(&perspective);
 
             let reference_frame_bounds = LayerRect::new(LayerPoint::zero(), bounds.size);
+            let mut clip_id = context.apply_scroll_frame_id_replacement(context_scroll_node_id);
             clip_id = context.builder.push_reference_frame(Some(clip_id),
                                                            pipeline_id,
                                                            &reference_frame_bounds,
                                                            &transform,
                                                            &mut self.clip_scroll_tree);
             context.replacements.push((context_scroll_node_id, clip_id));
             reference_frame_relative_offset = LayerVector2D::zero();
         } else {
@@ -430,18 +524,21 @@ impl Frame {
 
     fn flatten_item<'a, 'b>(&mut self,
                             item: DisplayItemRef<'a, 'b>,
                             pipeline_id: PipelineId,
                             context: &mut FlattenContext,
                             reference_frame_relative_offset: LayerVector2D)
                             -> Option<BuiltDisplayListIter<'a>> {
         let mut clip_and_scroll = item.clip_and_scroll();
+        context.convert_clip_scroll_info_to_nested(&mut clip_and_scroll);
+
+        let unreplaced_scroll_id = clip_and_scroll.scroll_node_id;
         clip_and_scroll.scroll_node_id =
-            context.clip_id_with_replacement(clip_and_scroll.scroll_node_id);
+            context.apply_scroll_frame_id_replacement(clip_and_scroll.scroll_node_id);
 
         match *item.item() {
             SpecificDisplayItem::WebGL(ref info) => {
                 context.builder.add_webgl_rectangle(clip_and_scroll,
                                                     item.rect(),
                                                     item.clip_region(),
                                                     info.context_id);
             }
@@ -567,17 +664,17 @@ impl Frame {
                                            item.display_list()
                                                .get(item.gradient_stops()).count());
             }
             SpecificDisplayItem::PushStackingContext(ref info) => {
                 let mut subtraversal = item.sub_iter();
                 self.flatten_stacking_context(&mut subtraversal,
                                               pipeline_id,
                                               context,
-                                              item.clip_and_scroll().scroll_node_id,
+                                              unreplaced_scroll_id,
                                               reference_frame_relative_offset,
                                               &item.rect(),
                                               &info.stacking_context,
                                               item.filters());
                 return Some(subtraversal);
             }
             SpecificDisplayItem::Iframe(ref info) => {
                 self.flatten_iframe(info.pipeline_id,
@@ -591,16 +688,24 @@ impl Frame {
                 let content_rect = &item.rect().translate(&reference_frame_relative_offset);
                 self.flatten_clip(context,
                                   pipeline_id,
                                   clip_and_scroll.scroll_node_id,
                                   &info,
                                   &content_rect,
                                   item.clip_region());
             }
+            SpecificDisplayItem::PushNestedDisplayList => {
+                // Using the clip and scroll already processed for nesting here
+                // means that in the case of multiple nested display lists, we
+                // will enter the outermost ids into the table and avoid having
+                // to do a replacement for every level of nesting.
+                context.push_nested_display_list_ids(clip_and_scroll);
+            }
+            SpecificDisplayItem::PopNestedDisplayList => context.pop_nested_display_list_ids(),
 
             // Do nothing; these are dummy items for the display list parser
             SpecificDisplayItem::SetGradientStops | SpecificDisplayItem::SetClipRegion(_) => { }
 
             SpecificDisplayItem::PopStackingContext =>
                 unreachable!("Should have returned in parent method."),
         }
         None
--- a/gfx/webrender/src/frame_builder.rs
+++ b/gfx/webrender/src/frame_builder.rs
@@ -1,17 +1,17 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use app_units::Au;
 use frame::FrameId;
 use gpu_cache::GpuCache;
 use gpu_store::GpuStoreAddress;
-use internal_types::{HardwareCompositeOp, SourceTexture};
+use internal_types::HardwareCompositeOp;
 use mask_cache::{ClipMode, ClipSource, MaskCacheInfo, RegionMode};
 use plane_split::{BspSplitter, Polygon, Splitter};
 use prim_store::{GradientPrimitiveCpu, ImagePrimitiveCpu};
 use prim_store::{ImagePrimitiveKind, PrimitiveContainer, PrimitiveIndex};
 use prim_store::{PrimitiveStore, RadialGradientPrimitiveCpu};
 use prim_store::{RectanglePrimitive, TextRunPrimitiveCpu};
 use prim_store::{BoxShadowPrimitiveCpu, TexelRect, YuvImagePrimitiveCpu};
 use profiler::{FrameProfileCounters, GpuCacheProfileCounters, TextureCacheProfileCounters};
@@ -779,21 +779,19 @@ impl FrameBuilder {
 
         let prim_cpu = TextRunPrimitiveCpu {
             font_key: font_key,
             logical_font_size: size,
             blur_radius: blur_radius,
             glyph_range: glyph_range,
             glyph_count: glyph_count,
             glyph_instances: Vec::new(),
-            color_texture_id: SourceTexture::Invalid,
             color: *color,
             render_mode: render_mode,
             glyph_options: glyph_options,
-            resource_address: GpuStoreAddress(0),
         };
 
         self.add_primitive(clip_and_scroll,
                            &rect,
                            clip_region,
                            &[],
                            PrimitiveContainer::TextRun(prim_cpu));
     }
@@ -1009,20 +1007,18 @@ impl FrameBuilder {
 
     pub fn add_webgl_rectangle(&mut self,
                                clip_and_scroll: ClipAndScrollInfo,
                                rect: LayerRect,
                                clip_region: &ClipRegion,
                                context_id: WebGLContextId) {
         let prim_cpu = ImagePrimitiveCpu {
             kind: ImagePrimitiveKind::WebGL(context_id),
-            color_texture_id: SourceTexture::Invalid,
-            resource_address: GpuStoreAddress(0),
-            sub_rect: None,
-            gpu_block: [rect.size.width, rect.size.height, 0.0, 0.0].into(),
+            gpu_blocks: [ [rect.size.width, rect.size.height, 0.0, 0.0].into(),
+                          TexelRect::invalid().into() ],
         };
 
         self.add_primitive(clip_and_scroll,
                            &rect,
                            clip_region,
                            &[],
                            PrimitiveContainer::Image(prim_cpu));
     }
@@ -1032,28 +1028,29 @@ impl FrameBuilder {
                      rect: LayerRect,
                      clip_region: &ClipRegion,
                      stretch_size: &LayerSize,
                      tile_spacing: &LayerSize,
                      sub_rect: Option<TexelRect>,
                      image_key: ImageKey,
                      image_rendering: ImageRendering,
                      tile: Option<TileOffset>) {
+        let sub_rect_block = sub_rect.unwrap_or(TexelRect::invalid()).into();
+
         let prim_cpu = ImagePrimitiveCpu {
             kind: ImagePrimitiveKind::Image(image_key,
                                             image_rendering,
                                             tile,
                                             *tile_spacing),
-            color_texture_id: SourceTexture::Invalid,
-            resource_address: GpuStoreAddress(0),
-            sub_rect: sub_rect,
-            gpu_block: [ stretch_size.width,
-                         stretch_size.height,
-                         tile_spacing.width,
-                         tile_spacing.height ].into(),
+            gpu_blocks: [ [ stretch_size.width,
+                            stretch_size.height,
+                            tile_spacing.width,
+                            tile_spacing.height ].into(),
+                            sub_rect_block,
+                        ],
         };
 
         self.add_primitive(clip_and_scroll,
                            &rect,
                            clip_region,
                            &[],
                            PrimitiveContainer::Image(prim_cpu));
     }
@@ -1071,18 +1068,16 @@ impl FrameBuilder {
             YuvData::PlanarYCbCr(plane_0, plane_1, plane_2) =>
                 [plane_0, plane_1, plane_2],
             YuvData::InterleavedYCbCr(plane_0) =>
                 [plane_0, ImageKey::new(0, 0), ImageKey::new(0, 0)],
         };
 
         let prim_cpu = YuvImagePrimitiveCpu {
             yuv_key: yuv_key,
-            yuv_texture_id: [SourceTexture::Invalid, SourceTexture::Invalid, SourceTexture::Invalid],
-            yuv_resource_address: GpuStoreAddress(0),
             format: format,
             color_space: color_space,
             image_rendering: image_rendering,
             gpu_block: [rect.size.width, rect.size.height, 0.0, 0.0].into(),
         };
 
         self.add_primitive(clip_and_scroll,
                            &rect,
@@ -1408,72 +1403,62 @@ impl FrameBuilder {
                                                       device_pixel_ratio);
 
         let (main_render_task, static_render_task_count) = self.build_render_task(clip_scroll_tree, gpu_cache);
         let mut render_tasks = RenderTaskCollection::new(static_render_task_count);
 
         let mut required_pass_count = 0;
         main_render_task.max_depth(0, &mut required_pass_count);
 
-        resource_cache.block_until_all_resources_added(texture_cache_profile);
+        resource_cache.block_until_all_resources_added(gpu_cache, texture_cache_profile);
 
-        for node in clip_scroll_tree.nodes.values() {
-            if let NodeType::Clip(ref clip_info) = node.node_type {
-                if let Some(ref mask_info) = clip_info.mask_cache_info {
-                    self.prim_store.resolve_clip_cache(mask_info, resource_cache);
-                }
-            }
-        }
-
-        let deferred_resolves = self.prim_store.resolve_primitives(resource_cache,
-                                                                   device_pixel_ratio);
-
-        let gpu_cache_updates = gpu_cache.end_frame(gpu_cache_profile);
+        let mut deferred_resolves = vec![];
 
         let mut passes = Vec::new();
 
         // Do the allocations now, assigning each tile's tasks to a render
         // pass and target as required.
         for index in 0..required_pass_count {
             passes.push(RenderPass::new(index as isize,
                                         index == required_pass_count-1,
                                         cache_size));
         }
 
         main_render_task.assign_to_passes(passes.len() - 1, &mut passes);
 
         for pass in &mut passes {
             let ctx = RenderTargetContext {
+                device_pixel_ratio: device_pixel_ratio,
                 stacking_context_store: &self.stacking_context_store,
                 clip_scroll_group_store: &self.clip_scroll_group_store,
                 prim_store: &self.prim_store,
                 resource_cache: resource_cache,
-                gpu_cache: gpu_cache,
             };
 
-            pass.build(&ctx, &mut render_tasks);
+            pass.build(&ctx, gpu_cache, &mut render_tasks, &mut deferred_resolves);
 
             profile_counters.passes.inc();
             profile_counters.color_targets.add(pass.color_targets.target_count());
             profile_counters.alpha_targets.add(pass.alpha_targets.target_count());
         }
 
+        let gpu_cache_updates = gpu_cache.end_frame(gpu_cache_profile);
+
         resource_cache.end_frame();
 
         Frame {
             device_pixel_ratio: device_pixel_ratio,
             background_color: self.background_color,
             window_size: self.screen_size,
             profile_counters: profile_counters,
             passes: passes,
             cache_size: cache_size,
             layer_texture_data: self.packed_layers.clone(),
             render_task_data: render_tasks.render_task_data,
             gpu_data32: self.prim_store.gpu_data32.build(),
-            gpu_resource_rects: self.prim_store.gpu_resource_rects.build(),
             deferred_resolves: deferred_resolves,
             gpu_cache_updates: Some(gpu_cache_updates),
         }
     }
 
 }
 
 struct LayerRectCalculationAndCullingPass<'a> {
--- a/gfx/webrender/src/freelist.rs
+++ b/gfx/webrender/src/freelist.rs
@@ -78,16 +78,21 @@ impl<T: FreeListItem> FreeList<T> {
         }
     }
 
     pub fn get(&self, id: FreeListItemId) -> &T {
         debug_assert_eq!(self.free_iter().find(|&fid| fid==id), None);
         &self.items[id.0 as usize]
     }
 
+    pub fn get_mut(&mut self, id: FreeListItemId) -> &mut T {
+        debug_assert_eq!(self.free_iter().find(|&fid| fid==id), None);
+        &mut self.items[id.0 as usize]
+    }
+
     #[allow(dead_code)]
     pub fn len(&self) -> usize {
         self.alloc_count
     }
 
     pub fn free(&mut self, id: FreeListItemId) -> T {
         self.alloc_count -= 1;
         let FreeListItemId(index) = id;
--- a/gfx/webrender/src/glyph_rasterizer.rs
+++ b/gfx/webrender/src/glyph_rasterizer.rs
@@ -1,22 +1,25 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use app_units::Au;
 use device::TextureFilter;
+use fnv::FnvHasher;
 use frame::FrameId;
 use platform::font::{FontContext, RasterizedGlyph};
 use profiler::TextureCacheProfileCounters;
 use rayon::ThreadPool;
 use rayon::prelude::*;
 use resource_cache::ResourceClassCache;
+use std::hash::BuildHasherDefault;
 use std::sync::{Arc, Mutex, MutexGuard};
 use std::sync::mpsc::{channel, Receiver, Sender};
+use std::collections::hash_map::Entry;
 use std::collections::HashSet;
 use std::mem;
 use texture_cache::{TextureCacheItemId, TextureCache};
 use webrender_traits::FontTemplate;
 use webrender_traits::{FontKey, FontRenderMode, ImageData, ImageFormat};
 use webrender_traits::{ImageDescriptor, ColorF, LayoutPoint};
 use webrender_traits::{GlyphKey, GlyphOptions, GlyphInstance, GlyphDimensions};
 
@@ -145,16 +148,17 @@ impl GlyphRasterizer {
         glyph_cache: &mut GlyphCache,
         current_frame_id: FrameId,
         font_key: FontKey,
         size: Au,
         color: ColorF,
         glyph_instances: &[GlyphInstance],
         render_mode: FontRenderMode,
         glyph_options: Option<GlyphOptions>,
+        requested_items: &mut HashSet<TextureCacheItemId, BuildHasherDefault<FnvHasher>>,
     ) {
         assert!(self.font_contexts.lock_shared_context().has_font(&font_key));
 
         let mut glyphs = Vec::with_capacity(glyph_instances.len());
 
         // select glyphs that have not been requested yet.
         for glyph in glyph_instances {
             let glyph_request = GlyphRequest::new(
@@ -162,20 +166,28 @@ impl GlyphRasterizer {
                 size,
                 color,
                 glyph.index,
                 glyph.point,
                 render_mode,
                 glyph_options,
             );
 
-            glyph_cache.mark_as_needed(&glyph_request, current_frame_id);
-            if !glyph_cache.contains_key(&glyph_request) && !self.pending_glyphs.contains(&glyph_request) {
-                self.pending_glyphs.insert(glyph_request.clone());
-                glyphs.push(glyph_request);
+            match glyph_cache.entry(glyph_request.clone(), current_frame_id) {
+                Entry::Occupied(entry) => {
+                    if let &Some(texture_cache_item_id) = entry.get() {
+                        requested_items.insert(texture_cache_item_id);
+                    }
+                }
+                Entry::Vacant(..) => {
+                    if !self.pending_glyphs.contains(&glyph_request) {
+                        self.pending_glyphs.insert(glyph_request.clone());
+                        glyphs.push(glyph_request);
+                    }
+                }
             }
         }
 
         if glyphs.is_empty() {
             return;
         }
 
         let font_contexts = Arc::clone(&self.font_contexts);
@@ -213,16 +225,17 @@ impl GlyphRasterizer {
         self.font_contexts.lock_shared_context().get_glyph_dimensions(glyph_key)
     }
 
     pub fn resolve_glyphs(
         &mut self,
         current_frame_id: FrameId,
         glyph_cache: &mut GlyphCache,
         texture_cache: &mut TextureCache,
+        requested_items: &mut HashSet<TextureCacheItemId, BuildHasherDefault<FnvHasher>>,
         texture_cache_profile: &mut TextureCacheProfileCounters,
     ) {
         let mut rasterized_glyphs = Vec::with_capacity(self.pending_glyphs.len());
 
         // Pull rasterized glyphs from the queue.
         while !self.pending_glyphs.is_empty() {
             // TODO: rather than blocking until all pending glyphs are available
             // we could try_recv and steal work from the thread pool to take advantage
@@ -248,24 +261,26 @@ impl GlyphRasterizer {
         for job in rasterized_glyphs {
             let image_id = job.result.and_then(
                 |glyph| if glyph.width > 0 && glyph.height > 0 {
                     let image_id = texture_cache.insert(
                         ImageDescriptor {
                             width: glyph.width,
                             height: glyph.height,
                             stride: None,
-                            format: ImageFormat::RGBA8,
+                            format: ImageFormat::BGRA8,
                             is_opaque: false,
                             offset: 0,
                         },
                         TextureFilter::Linear,
                         ImageData::Raw(Arc::new(glyph.bytes)),
+                        [glyph.left, glyph.top],
                         texture_cache_profile,
                     );
+                    requested_items.insert(image_id);
                     Some(image_id)
                 } else {
                     None
                 }
             );
 
             glyph_cache.insert(job.request, image_id, current_frame_id);
         }
@@ -340,16 +355,17 @@ fn raterize_200_glyphs() {
 
     use rayon::Configuration;
     use std::fs::File;
     use std::io::Read;
 
     let workers = Arc::new(ThreadPool::new(Configuration::new()).unwrap());
     let mut glyph_rasterizer = GlyphRasterizer::new(workers);
     let mut glyph_cache = GlyphCache::new();
+    let mut requested_items = HashSet::default();
 
     let mut font_file = File::open("../wrench/reftests/text/VeraBd.ttf").expect("Couldn't open font file");
     let mut font_data = vec![];
     font_file.read_to_end(&mut font_data).expect("failed to read font file");
 
     let font_key = FontKey::new(0, 0);
     glyph_rasterizer.add_font(font_key, FontTemplate::Raw(Arc::new(font_data), 0));
 
@@ -368,20 +384,22 @@ fn raterize_200_glyphs() {
             &mut glyph_cache,
             frame_id,
             font_key,
             Au::from_px(32),
             ColorF::new(0.0, 0.0, 0.0, 1.0),
             &glyph_instances[(50 * i)..(50 * (i + 1))],
             FontRenderMode::Subpixel,
             None,
+            &mut requested_items,
         );
     }
 
     glyph_rasterizer.delete_font(font_key);
 
     glyph_rasterizer.resolve_glyphs(
         frame_id,
         &mut glyph_cache,
         &mut TextureCache::new(4096),
+        &mut requested_items,
         &mut TextureCacheProfileCounters::new(),
     );
 }
--- a/gfx/webrender/src/gpu_cache.rs
+++ b/gfx/webrender/src/gpu_cache.rs
@@ -20,16 +20,17 @@
 //! will be invoked to build the data.
 //!
 //! After ```end_frame``` has occurred, callers can
 //! use the ```get_address``` API to get the allocated
 //! address in the GPU cache of a given resource slot
 //! for this frame.
 
 use device::FrameId;
+use internal_types::UvRect;
 use profiler::GpuCacheProfileCounters;
 use renderer::MAX_VERTEX_TEXTURE_WIDTH;
 use std::{mem, u32};
 use webrender_traits::{ColorF, LayerRect};
 
 pub const GPU_CACHE_INITIAL_HEIGHT: u32 = 512;
 const FRAMES_BEFORE_EVICTION: usize = 10;
 const NEW_ROWS_PER_RESIZE: u32 = 512;
@@ -78,16 +79,27 @@ impl Into<GpuBlockData> for LayerRect {
             data: [ self.origin.x,
                     self.origin.y,
                     self.size.width,
                     self.size.height ],
         }
     }
 }
 
+impl Into<GpuBlockData> for UvRect {
+    fn into(self) -> GpuBlockData {
+        GpuBlockData {
+            data: [ self.uv0.x,
+                    self.uv0.y,
+                    self.uv1.x,
+                    self.uv1.y ],
+        }
+    }
+}
+
 // Any data type that can be stored in the GPU cache should
 // implement this trait.
 pub trait ToGpuBlocks {
     // Request an arbitrary number of GPU data blocks.
     fn write_gpu_blocks(&self, GpuDataRequest);
 }
 
 // A handle to a GPU resource.
@@ -280,17 +292,17 @@ impl Texture {
             allocated_block_count: 0,
         }
     }
 
     // Push new data into the cache. The ```pending_block_index``` field represents
     // where the data was pushed into the texture ```pending_blocks``` array.
     // Return the allocated address for this data.
     fn push_data(&mut self,
-                 pending_block_index: usize,
+                 pending_block_index: Option<usize>,
                  block_count: usize,
                  frame_id: FrameId) -> CacheLocation {
         // Find the appropriate free list to use based on the block size.
         let (alloc_size, free_list) = self.free_lists
                                           .get_actual_block_count_and_free_list(block_count);
 
         // See if we need a new row (if free-list has nothing available)
         if free_list.is_none() {
@@ -326,23 +338,25 @@ impl Texture {
         *free_list = block.next;
 
         // Add the block to the occupied linked list.
         block.next = self.occupied_list_head;
         block.last_access_time = frame_id;
         self.occupied_list_head = Some(free_block_index);
         self.allocated_block_count += alloc_size;
 
-        // Add this update to the pending list of blocks that need
-        // to be updated on the GPU.
-        self.updates.push(GpuCacheUpdate::Copy {
-            block_index: pending_block_index,
-            block_count: block_count,
-            address: block.address,
-        });
+        if let Some(pending_block_index) = pending_block_index {
+            // Add this update to the pending list of blocks that need
+            // to be updated on the GPU.
+            self.updates.push(GpuCacheUpdate::Copy {
+                block_index: pending_block_index,
+                block_count: block_count,
+                address: block.address,
+            });
+        }
 
         CacheLocation {
             block_index: free_block_index,
             epoch: block.epoch,
         }
     }
 
     // Run through the list of occupied cache blocks and evict
@@ -425,17 +439,17 @@ impl<'a> GpuDataRequest<'a> {
         self.texture.pending_blocks.extend_from_slice(blocks);
     }
 }
 
 impl<'a> Drop for GpuDataRequest<'a> {
     fn drop(&mut self) {
         // Push the data to the texture pending updates list.
         let block_count = self.texture.pending_blocks.len() - self.start_index;
-        let location = self.texture.push_data(self.start_index,
+        let location = self.texture.push_data(Some(self.start_index),
                                               block_count,
                                               self.frame_id);
         self.handle.location = Some(location);
     }
 }
 
 
 /// The main LRU cache interface.
@@ -496,24 +510,36 @@ impl GpuCache {
     // unconditionally for this frame. The cache handle will
     // assert if the caller tries to retrieve the address
     // of this handle on a subsequent frame. This is typically
     // used for uploading data that changes every frame, and
     // therefore makes no sense to try and cache.
     pub fn push_per_frame_blocks(&mut self, blocks: &[GpuBlockData]) -> GpuCacheHandle {
         let start_index = self.texture.pending_blocks.len();
         self.texture.pending_blocks.extend_from_slice(blocks);
-        let location = self.texture.push_data(start_index,
+        let location = self.texture.push_data(Some(start_index),
                                               blocks.len(),
                                               self.frame_id);
         GpuCacheHandle {
             location: Some(location),
         }
     }
 
+    // Reserve space in the cache for per-frame blocks that
+    // will be resolved by the render thread via the
+    // external image callback.
+    pub fn push_deferred_per_frame_blocks(&mut self, block_count: usize) -> GpuCacheHandle {
+        let location = self.texture.push_data(None,
+                                              block_count,
+                                              self.frame_id);
+        GpuCacheHandle {
+            location: Some(location),
+        }
+    }
+
     /// End the frame. Return the list of updates to apply to the
     /// device specific cache texture.
     pub fn end_frame(&mut self,
                      profile_counters: &mut GpuCacheProfileCounters) -> GpuCacheUpdateList {
         profile_counters.allocated_rows.set(self.texture.rows.len());
         profile_counters.allocated_blocks.set(self.texture.allocated_block_count);
 
         GpuCacheUpdateList {
--- a/gfx/webrender/src/gpu_store.rs
+++ b/gfx/webrender/src/gpu_store.rs
@@ -33,17 +33,17 @@ pub trait GpuStoreLayout {
     fn image_format() -> ImageFormat;
 
     fn texture_width<T>() -> usize;
 
     fn texture_filter() -> TextureFilter;
 
     fn texel_size() -> usize {
         match Self::image_format() {
-            ImageFormat::RGBA8 => 4,
+            ImageFormat::BGRA8 => 4,
             ImageFormat::RGBAF32 => 16,
             _ => unreachable!(),
         }
     }
 
     fn texels_per_item<T>() -> usize {
         let item_size = mem::size_of::<T>();
         let texel_size = Self::texel_size();
--- a/gfx/webrender/src/internal_types.rs
+++ b/gfx/webrender/src/internal_types.rs
@@ -1,27 +1,26 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use app_units::Au;
 use device::TextureFilter;
-use euclid::{TypedPoint2D, UnknownUnit};
 use fnv::FnvHasher;
 use profiler::BackendProfileCounters;
 use std::collections::{HashMap, HashSet};
 use std::f32;
 use std::hash::BuildHasherDefault;
 use std::{i32, usize};
 use std::path::PathBuf;
 use std::sync::Arc;
 use tiling;
 use renderer::BlendMode;
 use webrender_traits::{ClipId, ColorF, DeviceUintRect, Epoch, ExternalImageData, ExternalImageId};
-use webrender_traits::{ImageData, ImageFormat, NativeFontHandle, PipelineId};
+use webrender_traits::{DevicePoint, ImageData, ImageFormat, PipelineId};
 
 // An ID for a texture that is owned by the
 // texture cache module. This can include atlases
 // or standalone textures allocated via the
 // texture cache (e.g. if an image is too large
 // to be added to an atlas). The texture cache
 // manages the allocation and freeing of these
 // IDs, and the rendering thread maintains a
@@ -59,17 +58,16 @@ pub enum TextureSampler {
     Color1,
     Color2,
     CacheA8,
     CacheRGBA8,
     Data32,
     ResourceCache,
     Layers,
     RenderTasks,
-    ResourceRects,
     Dither,
 }
 
 impl TextureSampler {
     pub fn color(n: usize) -> TextureSampler {
         match n {
             0 => TextureSampler::Color0,
             1 => TextureSampler::Color1,
@@ -124,16 +122,17 @@ pub enum BlurAttribute {
 pub enum ClipAttribute {
     // vertex frequency
     Position,
     // instance frequency
     RenderTaskIndex,
     LayerIndex,
     DataIndex,
     SegmentIndex,
+    ResourceAddress,
 }
 
 // A packed RGBA8 color ordered for vertex data or similar.
 
 #[derive(Debug, Clone, Copy)]
 #[repr(C)]
 pub struct PackedColor {
     pub r: u8,
@@ -298,21 +297,19 @@ pub enum AxisDirection {
     Horizontal,
     Vertical,
 }
 
 #[derive(Debug, Clone, Copy, Eq, Hash, PartialEq)]
 pub struct StackingContextIndex(pub usize);
 
 #[derive(Clone, Copy, Debug)]
-pub struct RectUv<T, U = UnknownUnit> {
-    pub top_left: TypedPoint2D<T, U>,
-    pub top_right: TypedPoint2D<T, U>,
-    pub bottom_left: TypedPoint2D<T, U>,
-    pub bottom_right: TypedPoint2D<T, U>,
+pub struct UvRect {
+    pub uv0: DevicePoint,
+    pub uv1: DevicePoint,
 }
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
 pub enum LowLevelFilterOp {
     Blur(Au, AxisDirection),
     Brightness(Au),
     Contrast(Au),
     Grayscale(Au),
--- a/gfx/webrender/src/mask_cache.rs
+++ b/gfx/webrender/src/mask_cache.rs
@@ -1,21 +1,21 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use border::BorderCornerClipSource;
 use gpu_store::GpuStoreAddress;
-use prim_store::{ClipData, GpuBlock32, PrimitiveStore};
+use prim_store::{ClipData, GpuBlock32, ImageMaskData, PrimitiveStore};
 use prim_store::{CLIP_DATA_GPU_SIZE, MASK_DATA_GPU_SIZE};
 use renderer::VertexDataStore;
 use util::{ComplexClipRegionHelpers, MatrixHelpers, TransformedRect};
 use webrender_traits::{BorderRadius, BuiltDisplayList, ClipRegion, ComplexClipRegion, ImageMask};
 use webrender_traits::{DeviceIntRect, LayerToWorldTransform};
-use webrender_traits::{LayerRect, LayerPoint, LayerSize};
+use webrender_traits::{DeviceRect, LayerRect, LayerPoint, LayerSize};
 use std::ops::Not;
 
 const MAX_CLIP: f32 = 1000000.0;
 
 #[repr(C)]
 #[derive(Copy, Clone, Debug, PartialEq)]
 pub enum ClipMode {
     Clip,           // Pixels inside the region are visible.
@@ -267,16 +267,24 @@ impl MaskCacheInfo {
 
             for &mut (ref mut source, gpu_address) in &mut self.border_corners {
                 has_border_clip = true;
                 let slice = clip_store.get_slice_mut(gpu_address,
                                                      1 + source.max_clip_count);
                 source.populate_gpu_data(slice);
             }
 
+            if let Some((ref mask, gpu_address)) = self.image {
+                let mask_data = clip_store.get_slice_mut(gpu_address, MASK_DATA_GPU_SIZE);
+                mask_data[0] = GpuBlock32::from(ImageMaskData {
+                    padding: DeviceRect::zero(),
+                    local_rect: mask.rect,
+                });
+            }
+
             // Work out the type of mask geometry we have, based on the
             // list of clip sources above.
             if has_clip_out || has_border_clip {
                 // For clip-out, the mask rect is not known.
                 self.bounds = Some(MaskBounds::None);
             } else {
                 // TODO(gw): local inner is only valid if there's a single clip (for now).
                 // This can be improved in the future, with some proper
--- a/gfx/webrender/src/platform/macos/font.rs
+++ b/gfx/webrender/src/platform/macos/font.rs
@@ -26,24 +26,28 @@ pub struct FontContext {
     gamma_lut: GammaLut,
 }
 
 // core text is safe to use on multiple threads and non-shareable resources are
 // all hidden inside their font context.
 unsafe impl Send for FontContext {}
 
 pub struct RasterizedGlyph {
+    pub top: f32,
+    pub left: f32,
     pub width: u32,
     pub height: u32,
     pub bytes: Vec<u8>,
 }
 
 impl RasterizedGlyph {
     pub fn blank() -> RasterizedGlyph {
         RasterizedGlyph {
+            top: 0.0,
+            left: 0.0,
             width: 0,
             height: 0,
             bytes: vec![],
         }
     }
 }
 
 struct GlyphMetrics {
@@ -54,17 +58,17 @@ struct GlyphMetrics {
     rasterized_height: u32,
 }
 
 // According to the Skia source code, there's no public API to
 // determine if subpixel AA is supported. So jrmuizel ported
 // this function from Skia which is used to check if a glyph
 // can be rendered with subpixel AA.
 fn supports_subpixel_aa() -> bool {
-    let mut cg_context = CGContext::create_bitmap_context(1, 1, 8, 4,
+    let mut cg_context = CGContext::create_bitmap_context(None, 1, 1, 8, 4,
                                                           &CGColorSpace::create_device_rgb(),
                                                           kCGImageAlphaNoneSkipFirst |
                                                           kCGBitmapByteOrder32Little);
     let ct_font = core_text::font::new_from_name("Helvetica", 16.).unwrap();
     cg_context.set_should_smooth_fonts(true);
     cg_context.set_should_antialias(true);
     cg_context.set_allows_font_smoothing(true);
     cg_context.set_rgb_fill_color(1.0, 1.0, 1.0, 1.0);
@@ -280,17 +284,17 @@ impl FontContext {
             return Some(RasterizedGlyph::blank())
         }
 
         let context_flags = match render_mode {
             FontRenderMode::Subpixel => kCGBitmapByteOrder32Little | kCGImageAlphaNoneSkipFirst,
             FontRenderMode::Alpha | FontRenderMode::Mono => kCGImageAlphaPremultipliedLast,
         };
 
-        let mut cg_context = CGContext::create_bitmap_context(metrics.rasterized_width as usize,
+        let mut cg_context = CGContext::create_bitmap_context(None, metrics.rasterized_width as usize,
                                                               metrics.rasterized_height as usize,
                                                               8,
                                                               metrics.rasterized_width as usize * 4,
                                                               &CGColorSpace::create_device_rgb(),
                                                               context_flags);
 
 
         // Tested on mac OS Sierra, 10.12
@@ -395,15 +399,17 @@ impl FontContext {
 
         self.gamma_correct_pixels(&mut rasterized_pixels,
                                   metrics.rasterized_width as usize,
                                   metrics.rasterized_height as usize,
                                   render_mode,
                                   key.color);
 
         Some(RasterizedGlyph {
+            left: metrics.rasterized_left as f32,
+            top: metrics.rasterized_ascent as f32,
             width: metrics.rasterized_width,
             height: metrics.rasterized_height,
             bytes: rasterized_pixels,
         })
     }
 }
 
--- a/gfx/webrender/src/platform/unix/font.rs
+++ b/gfx/webrender/src/platform/unix/font.rs
@@ -28,16 +28,18 @@ pub struct FontContext {
 }
 
 // FreeType resources are safe to move between threads as long as they
 // are not concurrently accessed. In our case, everything is hidden inside
 // a given FontContext so it is safe to move the latter between threads.
 unsafe impl Send for FontContext {}
 
 pub struct RasterizedGlyph {
+    pub top: f32,
+    pub left: f32,
     pub width: u32,
     pub height: u32,
     pub bytes: Vec<u8>,
 }
 
 fn float_to_fixed(before: usize, f: f64) -> i32 {
     ((1i32 << before) as f64 * f) as i32
 }
@@ -253,16 +255,18 @@ impl FontContext {
             }
             for _x in dst_end .. dimensions.width as i32 {
                 final_buffer.extend_from_slice(&[0xff, 0xff, 0xff, 0]);
             }
             assert_eq!(final_buffer.len(), ((y+1) * dimensions.width * 4) as usize);
         }
 
         Some(RasterizedGlyph {
+            left: dimensions.left as f32,
+            top: dimensions.top as f32,
             width: dimensions.width as u32,
             height: dimensions.height as u32,
             bytes: final_buffer,
         })
     }
 }
 
 impl Drop for FontContext {
--- a/gfx/webrender/src/platform/windows/font.rs
+++ b/gfx/webrender/src/platform/windows/font.rs
@@ -24,16 +24,18 @@ pub struct FontContext {
     gdi_gamma_lut: GammaLut,
 }
 
 // DirectWrite is safe to use on multiple threads and non-shareable resources are
 // all hidden inside their font context.
 unsafe impl Send for FontContext {}
 
 pub struct RasterizedGlyph {
+    pub top: f32,
+    pub left: f32,
     pub width: u32,
     pub height: u32,
     pub bytes: Vec<u8>,
 }
 
 fn dwrite_texture_type(render_mode: FontRenderMode) ->
                        dwrote::DWRITE_TEXTURE_TYPE {
     match render_mode {
@@ -304,14 +306,16 @@ impl FontContext {
                                                       key.color.g,
                                                       key.color.b,
                                                       key.color.a));
         }
 
         let rgba_pixels = self.convert_to_rgba(&mut pixels, render_mode);
 
         Some(RasterizedGlyph {
+            left: bounds.left as f32,
+            top: -bounds.top as f32,
             width: width as u32,
             height: height as u32,
             bytes: rgba_pixels,
         })
     }
 }
--- a/gfx/webrender/src/prim_store.rs
+++ b/gfx/webrender/src/prim_store.rs
@@ -1,32 +1,31 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use app_units::Au;
 use border::{BorderCornerClipData, BorderCornerDashClipData, BorderCornerDotClipData};
 use border::BorderCornerInstance;
 use euclid::{Size2D};
-use gpu_cache::{GpuBlockData, GpuCache, GpuCacheHandle, GpuDataRequest, ToGpuBlocks};
+use gpu_cache::{GpuCacheAddress, GpuBlockData, GpuCache, GpuCacheHandle, GpuDataRequest, ToGpuBlocks};
 use gpu_store::GpuStoreAddress;
-use internal_types::SourceTexture;
 use mask_cache::{ClipMode, ClipSource, MaskCacheInfo};
 use renderer::{VertexDataStore, MAX_VERTEX_TEXTURE_WIDTH};
 use render_task::{RenderTask, RenderTaskLocation};
-use resource_cache::{CacheItem, ImageProperties, ResourceCache};
+use resource_cache::{ImageProperties, ResourceCache};
 use std::mem;
 use std::usize;
 use util::{TransformedRect, recycle_vec};
 use webrender_traits::{BuiltDisplayList, ColorF, ImageKey, ImageRendering, YuvColorSpace};
-use webrender_traits::{YuvFormat, ClipRegion, ComplexClipRegion, ItemRange, GlyphKey};
+use webrender_traits::{YuvFormat, ClipRegion, ComplexClipRegion, ItemRange};
 use webrender_traits::{FontKey, FontRenderMode, WebGLContextId};
 use webrender_traits::{device_length, DeviceIntRect, DeviceIntSize};
-use webrender_traits::{DeviceRect, DevicePoint, DeviceSize};
-use webrender_traits::{LayerRect, LayerSize, LayerPoint, LayoutPoint};
+use webrender_traits::{DeviceRect, DevicePoint};
+use webrender_traits::{LayerRect, LayerSize, LayerPoint};
 use webrender_traits::{LayerToWorldTransform, GlyphInstance, GlyphOptions};
 use webrender_traits::{ExtendMode, GradientStop, TileOffset};
 
 pub const CLIP_DATA_GPU_SIZE: usize = 5;
 pub const MASK_DATA_GPU_SIZE: usize = 1;
 
 /// Stores two coordinates in texel space. The coordinates
 /// are stored in texel coordinates because the texture atlas
@@ -34,44 +33,50 @@ pub const MASK_DATA_GPU_SIZE: usize = 1;
 /// the UVs in the vertex shader means nothing needs to be
 /// updated on the CPU when the texture size changes.
 #[derive(Copy, Clone, Debug)]
 pub struct TexelRect {
     pub uv0: DevicePoint,
     pub uv1: DevicePoint,
 }
 
-impl Default for TexelRect {
-    fn default() -> TexelRect {
-        TexelRect {
-            uv0: DevicePoint::zero(),
-            uv1: DevicePoint::zero(),
-        }
-    }
-}
-
 impl TexelRect {
     pub fn new(u0: u32, v0: u32, u1: u32, v1: u32) -> TexelRect {
         TexelRect {
             uv0: DevicePoint::new(u0 as f32, v0 as f32),
             uv1: DevicePoint::new(u1 as f32, v1 as f32),
         }
     }
+
+    pub fn invalid() -> TexelRect {
+        TexelRect {
+            uv0: DevicePoint::new(-1.0, -1.0),
+            uv1: DevicePoint::new(-1.0, -1.0),
+        }
+    }
+}
+
+impl Into<GpuBlockData> for TexelRect {
+    fn into(self) -> GpuBlockData {
+        GpuBlockData {
+            data: [self.uv0.x, self.uv0.y, self.uv1.x, self.uv1.y],
+        }
+    }
 }
 
 /// For external images, it's not possible to know the
 /// UV coords of the image (or the image data itself)
 /// until the render thread receives the frame and issues
 /// callbacks to the client application. For external
 /// images that are visible, a DeferredResolve is created
 /// that is stored in the frame. This allows the render
 /// thread to iterate this list and update any changed
 /// texture data and update the UV rect.
 pub struct DeferredResolve {
-    pub resource_address: GpuStoreAddress,
+    pub address: GpuCacheAddress,
     pub image_properties: ImageProperties,
 }
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
 pub struct SpecificPrimitiveIndex(pub usize);
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
 pub struct PrimitiveIndex(pub usize);
@@ -155,41 +160,32 @@ impl ToGpuBlocks for RectanglePrimitive 
 pub enum ImagePrimitiveKind {
     Image(ImageKey, ImageRendering, Option<TileOffset>, LayerSize),
     WebGL(WebGLContextId),
 }
 
 #[derive(Debug)]
 pub struct ImagePrimitiveCpu {
     pub kind: ImagePrimitiveKind,
-    pub color_texture_id: SourceTexture,
-    pub resource_address: GpuStoreAddress,
-    pub sub_rect: Option<TexelRect>,
     // TODO(gw): Build on demand
-    pub gpu_block: GpuBlockData,
+    pub gpu_blocks: [GpuBlockData; 2],
 }
 
 impl ToGpuBlocks for ImagePrimitiveCpu {
     fn write_gpu_blocks(&self, mut request: GpuDataRequest) {
-        request.push(self.gpu_block);
+        request.extend_from_slice(&self.gpu_blocks);
     }
 }
 
 #[derive(Debug)]
 pub struct YuvImagePrimitiveCpu {
     pub yuv_key: [ImageKey; 3],
-    // All textures should be the same type here.
-    pub yuv_texture_id: [SourceTexture; 3],
     pub format: YuvFormat,
     pub color_space: YuvColorSpace,
 
-    // The first address of yuv resource_address. Use "yuv_resource_address + N-th" to get the N-th channel data.
-    // e.g. yuv_resource_address + 0 => y channel resource_address
-    pub yuv_resource_address: GpuStoreAddress,
-
     pub image_rendering: ImageRendering,
 
     // TODO(gw): Generate on demand
     pub gpu_block: GpuBlockData,
 }
 
 impl ToGpuBlocks for YuvImagePrimitiveCpu {
     fn write_gpu_blocks(&self, mut request: GpuDataRequest) {
@@ -461,20 +457,18 @@ impl RadialGradientPrimitiveCpu {
 pub struct TextRunPrimitiveCpu {
     pub font_key: FontKey,
     pub logical_font_size: Au,
     pub blur_radius: f32,
     pub glyph_range: ItemRange<GlyphInstance>,
     pub glyph_count: usize,
     // TODO(gw): Maybe make this an Arc for sharing with resource cache
     pub glyph_instances: Vec<GlyphInstance>,
-    pub color_texture_id: SourceTexture,
     pub color: ColorF,
     pub render_mode: FontRenderMode,
-    pub resource_address: GpuStoreAddress,
     pub glyph_options: Option<GlyphOptions>,
 }
 
 impl ToGpuBlocks for TextRunPrimitiveCpu {
     fn write_gpu_blocks(&self, mut request: GpuDataRequest) {
         request.push(self.color.into());
 
         // Two glyphs are packed per GPU block.
@@ -527,18 +521,18 @@ impl ClipCorner {
             inner_radius_y: inner_radius,
         }
     }
 }
 
 #[derive(Debug, Clone)]
 #[repr(C)]
 pub struct ImageMaskData {
-    uv_rect: DeviceRect,
-    local_rect: LayerRect,
+    pub local_rect: LayerRect,
+    pub padding: DeviceRect,
 }
 
 #[derive(Debug, Clone)]
 pub struct ClipData {
     rect: ClipRect,
     top_left: ClipCorner,
     top_right: ClipCorner,
     bottom_left: ClipCorner,
@@ -648,58 +642,48 @@ pub struct PrimitiveStore {
     pub cpu_gradients: Vec<GradientPrimitiveCpu>,
     pub cpu_radial_gradients: Vec<RadialGradientPrimitiveCpu>,
     pub cpu_metadata: Vec<PrimitiveMetadata>,
     pub cpu_borders: Vec<BorderPrimitiveCpu>,
     pub cpu_box_shadows: Vec<BoxShadowPrimitiveCpu>,
 
     /// Gets uploaded directly to GPU via vertex texture.
     pub gpu_data32: VertexDataStore<GpuBlock32>,
-
-    /// Resolved resource rects.
-    pub gpu_resource_rects: VertexDataStore<TexelRect>,
-
-    /// General
-    prims_to_resolve: Vec<PrimitiveIndex>,
 }
 
 impl PrimitiveStore {
     pub fn new() -> PrimitiveStore {
         PrimitiveStore {
             cpu_metadata: Vec::new(),
             cpu_rectangles: Vec::new(),
             cpu_bounding_rects: Vec::new(),
             cpu_text_runs: Vec::new(),
             cpu_images: Vec::new(),
             cpu_yuv_images: Vec::new(),
             cpu_gradients: Vec::new(),
             cpu_radial_gradients: Vec::new(),
             cpu_borders: Vec::new(),
             cpu_box_shadows: Vec::new(),
-            prims_to_resolve: Vec::new(),
             gpu_data32: VertexDataStore::new(),
-            gpu_resource_rects: VertexDataStore::new(),
         }
     }
 
     pub fn recycle(self) -> Self {
         PrimitiveStore {
             cpu_metadata: recycle_vec(self.cpu_metadata),
             cpu_rectangles: recycle_vec(self.cpu_rectangles),
             cpu_bounding_rects: recycle_vec(self.cpu_bounding_rects),
             cpu_text_runs: recycle_vec(self.cpu_text_runs),
             cpu_images: recycle_vec(self.cpu_images),
             cpu_yuv_images: recycle_vec(self.cpu_yuv_images),
             cpu_gradients: recycle_vec(self.cpu_gradients),
             cpu_radial_gradients: recycle_vec(self.cpu_radial_gradients),
             cpu_borders: recycle_vec(self.cpu_borders),
             cpu_box_shadows: recycle_vec(self.cpu_box_shadows),
-            prims_to_resolve: recycle_vec(self.prims_to_resolve),
             gpu_data32: self.gpu_data32.recycle(),
-            gpu_resource_rects: self.gpu_resource_rects.recycle(),
         }
     }
 
     pub fn populate_clip_data(data: &mut [GpuBlock32], clip: ClipData) {
         data[0] = GpuBlock32::from(clip.rect);
         data[1] = GpuBlock32::from(clip.top_left);
         data[2] = GpuBlock32::from(clip.top_right);
         data[3] = GpuBlock32::from(clip.bottom_left);
@@ -731,57 +715,51 @@ impl PrimitiveStore {
                     local_rect: *local_rect,
                     local_clip_rect: *local_clip_rect,
                 };
 
                 self.cpu_rectangles.push(rect);
 
                 metadata
             }
-            PrimitiveContainer::TextRun(mut text_cpu) => {
-                text_cpu.resource_address = self.gpu_resource_rects.alloc(text_cpu.glyph_count);
-
+            PrimitiveContainer::TextRun(text_cpu) => {
                 let metadata = PrimitiveMetadata {
                     is_opaque: false,
                     clips: clips,
                     clip_cache_info: clip_info,
                     prim_kind: PrimitiveKind::TextRun,
                     cpu_prim_index: SpecificPrimitiveIndex(self.cpu_text_runs.len()),
                     gpu_location: GpuCacheHandle::new(),
                     render_task: None,
                     clip_task: None,
                     local_rect: *local_rect,
                     local_clip_rect: *local_clip_rect,
                 };
 
                 self.cpu_text_runs.push(text_cpu);
                 metadata
             }
-            PrimitiveContainer::Image(mut image_cpu) => {
-                image_cpu.resource_address = self.gpu_resource_rects.alloc(1);
-
+            PrimitiveContainer::Image(image_cpu) => {
                 let metadata = PrimitiveMetadata {
                     is_opaque: false,
                     clips: clips,
                     clip_cache_info: clip_info,
                     prim_kind: PrimitiveKind::Image,
                     cpu_prim_index: SpecificPrimitiveIndex(self.cpu_images.len()),
                     gpu_location: GpuCacheHandle::new(),
                     render_task: None,
                     clip_task: None,
                     local_rect: *local_rect,
                     local_clip_rect: *local_clip_rect,
                 };
 
                 self.cpu_images.push(image_cpu);
                 metadata
             }
-            PrimitiveContainer::YuvImage(mut image_cpu) => {
-                image_cpu.yuv_resource_address = self.gpu_resource_rects.alloc(3);
-
+            PrimitiveContainer::YuvImage(image_cpu) => {
                 let metadata = PrimitiveMetadata {
                     is_opaque: true,
                     clips: clips,
                     clip_cache_info: clip_info,
                     prim_kind: PrimitiveKind::YuvImage,
                     cpu_prim_index: SpecificPrimitiveIndex(self.cpu_yuv_images.len()),
                     gpu_location: GpuCacheHandle::new(),
                     render_task: None,
@@ -908,177 +886,16 @@ impl PrimitiveStore {
             }
         };
 
         self.cpu_metadata.push(metadata);
 
         PrimitiveIndex(prim_index)
     }
 
-    fn resolve_clip_cache_internal(gpu_data32: &mut VertexDataStore<GpuBlock32>,
-                                   clip_info: &MaskCacheInfo,
-                                   resource_cache: &ResourceCache) {
-        if let Some((ref mask, gpu_address)) = clip_info.image {
-            let cache_item = resource_cache.get_cached_image(mask.image, ImageRendering::Auto, None);
-            let mask_data = gpu_data32.get_slice_mut(gpu_address, MASK_DATA_GPU_SIZE);
-            mask_data[0] = GpuBlock32::from(ImageMaskData {
-                uv_rect: DeviceRect::new(cache_item.uv0,
-                                         DeviceSize::new(cache_item.uv1.x - cache_item.uv0.x,
-                                                         cache_item.uv1.y - cache_item.uv0.y)),
-                local_rect: mask.rect,
-            });
-        }
-    }
-
-    pub fn resolve_clip_cache(&mut self,
-                              clip_info: &MaskCacheInfo,
-                              resource_cache: &ResourceCache) {
-        Self::resolve_clip_cache_internal(&mut self.gpu_data32, clip_info, resource_cache)
-    }
-
-    fn resolve_image(resource_cache: &ResourceCache,
-                     deferred_resolves: &mut Vec<DeferredResolve>,
-                     image_key: ImageKey,
-                     image_uv_address: GpuStoreAddress,
-                     image_rendering: ImageRendering,
-                     tile_offset: Option<TileOffset>) -> (SourceTexture, Option<CacheItem>) {
-        let image_properties = resource_cache.get_image_properties(image_key);
-
-        // Check if an external image that needs to be resolved
-        // by the render thread.
-        match image_properties.external_image {
-            Some(external_image) => {
-                // This is an external texture - we will add it to
-                // the deferred resolves list to be patched by
-                // the render thread...
-                deferred_resolves.push(DeferredResolve {
-                    image_properties: image_properties,
-                    resource_address: image_uv_address,
-                });
-
-                (SourceTexture::External(external_image), None)
-            }
-            None => {
-                let cache_item = resource_cache.get_cached_image(image_key, image_rendering, tile_offset);
-                (cache_item.texture_id, Some(cache_item))
-            }
-        }
-    }
-
-    pub fn resolve_primitives(&mut self,
-                              resource_cache: &ResourceCache,
-                              device_pixel_ratio: f32) -> Vec<DeferredResolve> {
-        profile_scope!("resolve_primitives");
-        let mut deferred_resolves = Vec::new();
-
-        for prim_index in self.prims_to_resolve.drain(..) {
-            let metadata = &mut self.cpu_metadata[prim_index.0];
-            if let Some(ref clip_info) = metadata.clip_cache_info {
-                Self::resolve_clip_cache_internal(&mut self.gpu_data32, clip_info, resource_cache);
-            }
-
-            match metadata.prim_kind {
-                PrimitiveKind::Rectangle |
-                PrimitiveKind::Border |
-                PrimitiveKind::BoxShadow |
-                PrimitiveKind::AlignedGradient |
-                PrimitiveKind::AngleGradient |
-                PrimitiveKind::RadialGradient=> {}
-                PrimitiveKind::TextRun => {
-                    let text = &mut self.cpu_text_runs[metadata.cpu_prim_index.0];
-
-                    let font_size_dp = text.logical_font_size.scale_by(device_pixel_ratio);
-
-                    let dest_rects = self.gpu_resource_rects.get_slice_mut(text.resource_address,
-                                                                           text.glyph_count);
-
-                    let texture_id = resource_cache.get_glyphs(text.font_key,
-                                                               font_size_dp,
-                                                               text.color,
-                                                               &text.glyph_instances,
-                                                               text.render_mode,
-                                                               text.glyph_options, |index, uv0, uv1| {
-                        let dest_rect = &mut dest_rects[index];
-                        dest_rect.uv0 = uv0;
-                        dest_rect.uv1 = uv1;
-                    });
-
-                    text.color_texture_id = texture_id;
-                }
-                PrimitiveKind::Image => {
-                    let image_cpu = &mut self.cpu_images[metadata.cpu_prim_index.0];
-
-                    let (texture_id, cache_item) = match image_cpu.kind {
-                        ImagePrimitiveKind::Image(image_key, image_rendering, tile_offset, _) => {
-                            // Check if an external image that needs to be resolved
-                            // by the render thread.
-                            PrimitiveStore::resolve_image(resource_cache,
-                                                          &mut deferred_resolves,
-                                                          image_key,
-                                                          image_cpu.resource_address,
-                                                          image_rendering,
-                                                          tile_offset)
-                        }
-                        ImagePrimitiveKind::WebGL(context_id) => {
-                            let cache_item = resource_cache.get_webgl_texture(&context_id);
-                            (cache_item.texture_id, Some(cache_item))
-                        }
-                    };
-
-                    if let Some(cache_item) = cache_item {
-                        let resource_rect = self.gpu_resource_rects.get_mut(image_cpu.resource_address);
-                        match image_cpu.sub_rect {
-                            Some(sub_rect) => {
-                                resource_rect.uv0.x = cache_item.uv0.x + sub_rect.uv0.x;
-                                resource_rect.uv0.y = cache_item.uv0.y + sub_rect.uv0.y;
-                                resource_rect.uv1.x = cache_item.uv0.x + sub_rect.uv1.x;
-                                resource_rect.uv1.y = cache_item.uv0.y + sub_rect.uv1.y;
-                            }
-                            None => {
-                                resource_rect.uv0 = cache_item.uv0;
-                                resource_rect.uv1 = cache_item.uv1;
-                            }
-                        }
-                    }
-                    image_cpu.color_texture_id = texture_id;
-                }
-                PrimitiveKind::YuvImage => {
-                    let image_cpu = &mut self.cpu_yuv_images[metadata.cpu_prim_index.0];
-
-                    //yuv channel
-                    let channel_count = image_cpu.format.get_plane_num();
-                    debug_assert!(channel_count <= 3);
-                    for channel in 0..channel_count {
-                        // Check if an external image that needs to be resolved
-                        // by the render thread.
-                        let resource_address = image_cpu.yuv_resource_address + channel as i32;
-
-                        let (texture_id, cache_item) =
-                            PrimitiveStore::resolve_image(resource_cache,
-                                                          &mut deferred_resolves,
-                                                          image_cpu.yuv_key[channel],
-                                                          resource_address,
-                                                          image_cpu.image_rendering,
-                                                          None);
-                        // texture_id
-                        image_cpu.yuv_texture_id[channel] = texture_id;
-                        // uv coordinates
-                        if let Some(cache_item) = cache_item {
-                            let resource_rect = self.gpu_resource_rects.get_mut(image_cpu.yuv_resource_address + channel as i32);
-                            resource_rect.uv0 = cache_item.uv0;
-                            resource_rect.uv1 = cache_item.uv1;
-                        }
-                    }
-                }
-            }
-        }
-
-        deferred_resolves
-    }
-
     pub fn get_metadata(&self, index: PrimitiveIndex) -> &PrimitiveMetadata {
         &self.cpu_metadata[index.0]
     }
 
     pub fn prim_count(&self) -> usize {
         self.cpu_metadata.len()
     }
 
@@ -1109,28 +926,26 @@ impl PrimitiveStore {
                                    prim_index: PrimitiveIndex,
                                    resource_cache: &mut ResourceCache,
                                    gpu_cache: &mut GpuCache,
                                    layer_transform: &LayerToWorldTransform,
                                    device_pixel_ratio: f32,
                                    display_list: &BuiltDisplayList) {
 
         let metadata = &mut self.cpu_metadata[prim_index.0];
-        let mut prim_needs_resolve = false;
 
         if let Some(ref mut clip_info) = metadata.clip_cache_info {
             clip_info.update(&metadata.clips,
                              layer_transform,
                              &mut self.gpu_data32,
                              device_pixel_ratio,
                              display_list);
             for clip in &metadata.clips {
                 if let ClipSource::Region(ClipRegion{ image_mask: Some(ref mask), .. }, ..) = *clip {
                     resource_cache.request_image(mask.image, ImageRendering::Auto, None);
-                    prim_needs_resolve = true;
                 }
             }
         }
 
         match metadata.prim_kind {
             PrimitiveKind::Rectangle |
             PrimitiveKind::Border  => {}
             PrimitiveKind::BoxShadow => {
@@ -1148,46 +963,26 @@ impl PrimitiveStore {
                 let location = RenderTaskLocation::Dynamic(None, cache_size);
                 metadata.render_task.as_mut().unwrap().location = location;
             }
             PrimitiveKind::TextRun => {
                 let text = &mut self.cpu_text_runs[metadata.cpu_prim_index.0];
 
                 let font_size_dp = text.logical_font_size.scale_by(device_pixel_ratio);
                 let src_glyphs = display_list.get(text.glyph_range);
-                prim_needs_resolve = true;
 
                 // Cache the glyph positions, if not in the cache already.
+                // TODO(gw): In the future, remove `glyph_instances`
+                //           completely, and just reference the glyphs
+                //           directly from the displaty list.
                 if text.glyph_instances.is_empty() {
-                    let mut glyph_key = GlyphKey::new(text.font_key,
-                                                      font_size_dp,
-                                                      text.color,
-                                                      0,
-                                                      LayoutPoint::new(0.0, 0.0),
-                                                      text.render_mode);
                     for src in src_glyphs {
-                        glyph_key.index = src.index;
-                        glyph_key.subpixel_point.set_offset(src.point, text.render_mode);
-
-                        let dimensions = match resource_cache.get_glyph_dimensions(&glyph_key) {
-                            None => continue,
-                            Some(dimensions) => dimensions,
-                        };
-
-                        // TODO(gw): Check for this and ensure platforms return None in this case!!!
-                        debug_assert!(dimensions.width > 0 && dimensions.height > 0);
-
-                        let x = src.point.x + dimensions.left as f32 / device_pixel_ratio;
-                        let y = src.point.y - dimensions.top as f32 / device_pixel_ratio;
-
-                        let glyph_pos = LayerPoint::new(x, y);
-
                         text.glyph_instances.push(GlyphInstance {
                             index: src.index,
-                            point: glyph_pos,
+                            point: src.point,
                         });
                     }
                 }
 
                 metadata.render_task = if text.blur_radius == 0.0 {
                     None
                 } else {
                     // This is a text-shadow element. Create a render task that will
@@ -1211,17 +1006,16 @@ impl PrimitiveStore {
                                               text.color,
                                               &text.glyph_instances,
                                               text.render_mode,
                                               text.glyph_options);
             }
             PrimitiveKind::Image => {
                 let image_cpu = &mut self.cpu_images[metadata.cpu_prim_index.0];
 
-                prim_needs_resolve = true;
                 match image_cpu.kind {
                     ImagePrimitiveKind::Image(image_key, image_rendering, tile_offset, tile_spacing) => {
                         resource_cache.request_image(image_key, image_rendering, tile_offset);
 
                         // TODO(gw): This doesn't actually need to be calculated each frame.
                         // It's cheap enough that it's not worth introducing a cache for images
                         // right now, but if we introduce a cache for images for some other
                         // reason then we might as well cache this with it.
@@ -1230,17 +1024,16 @@ impl PrimitiveStore {
                                              tile_spacing.width == 0.0 &&
                                              tile_spacing.height == 0.0;
                     }
                     ImagePrimitiveKind::WebGL(..) => {}
                 }
             }
             PrimitiveKind::YuvImage => {
                 let image_cpu = &mut self.cpu_yuv_images[metadata.cpu_prim_index.0];
-                prim_needs_resolve = true;
 
                 let channel_num = image_cpu.format.get_plane_num();
                 debug_assert!(channel_num <= 3);
                 for channel in 0..channel_num {
                     resource_cache.request_image(image_cpu.yuv_key[channel], image_cpu.image_rendering, None);
                 }
 
                 // TODO(nical): Currently assuming no tile_spacing for yuv images.
@@ -1293,20 +1086,16 @@ impl PrimitiveStore {
                                                                request);
                 }
                 PrimitiveKind::TextRun => {
                     let text = &self.cpu_text_runs[metadata.cpu_prim_index.0];
                     text.write_gpu_blocks(request);
                 }
             }
         }
-
-        if prim_needs_resolve {
-            self.prims_to_resolve.push(prim_index);
-        }
     }
 }
 
 
 macro_rules! define_gpu_block {
     ($name:ident: $ty:ty = $($derive:ident),* ) => (
         #[derive(Clone)]
         #[repr(C)]
--- a/gfx/webrender/src/render_backend.rs
+++ b/gfx/webrender/src/render_backend.rs
@@ -536,17 +536,17 @@ impl RenderBackend {
         //           cleaner way to do this, or use the OnceMutex on crates.io?
         let mut notifier = self.notifier.lock();
         notifier.as_mut().unwrap().as_mut().unwrap().new_scroll_frame_ready(composite_needed);
     }
 
     fn handle_vr_compositor_command(&mut self, ctx_id: WebGLContextId, cmd: VRCompositorCommand) {
         let texture = match cmd {
             VRCompositorCommand::SubmitFrame(..) => {
-                    match self.resource_cache.get_webgl_texture(&ctx_id).texture_id {
+                    match self.resource_cache.get_webgl_texture(&ctx_id).id {
                         SourceTexture::WebGL(texture_id) => {
                             let size = self.resource_cache.get_webgl_texture_size(&ctx_id);
                             Some((texture_id, size))
                         },
                         _=> None
                     }
             },
             _ => None
--- a/gfx/webrender/src/renderer.rs
+++ b/gfx/webrender/src/renderer.rs
@@ -13,17 +13,17 @@ use debug_colors;
 use debug_render::DebugRenderer;
 use device::{DepthFunction, Device, FrameId, ProgramId, TextureId, VertexFormat, GpuMarker, GpuProfiler};
 use device::{GpuSample, TextureFilter, VAOId, VertexUsageHint, FileWatcherHandler, TextureTarget, ShaderError};
 use device::get_gl_format_bgra;
 use euclid::Transform3D;
 use fnv::FnvHasher;
 use frame_builder::FrameBuilderConfig;
 use gleam::gl;
-use gpu_cache::{GpuCacheUpdate, GpuCacheUpdateList};
+use gpu_cache::{GpuBlockData, GpuCacheUpdate, GpuCacheUpdateList};
 use gpu_store::{GpuStore, GpuStoreLayout};
 use internal_types::{CacheTextureId, RendererFrame, ResultMsg, TextureUpdateOp};
 use internal_types::{TextureUpdateList, PackedVertex, RenderTargetMode};
 use internal_types::{ORTHO_NEAR_PLANE, ORTHO_FAR_PLANE, SourceTexture};
 use internal_types::{BatchTextures, TextureSampler};
 use profiler::{Profiler, BackendProfileCounters};
 use profiler::{GpuProfileTag, RendererProfileTimers, RendererProfileCounters};
 use record::ApiRecordingReceiver;
@@ -48,17 +48,17 @@ use rayon::Configuration as ThreadPoolCo
 use tiling::{AlphaBatchKind, BlurCommand, CompositePrimitiveInstance, Frame, PrimitiveBatch, RenderTarget};
 use tiling::{AlphaRenderTarget, CacheClipInstance, PrimitiveInstance, ColorRenderTarget, RenderTargetKind};
 use time::precise_time_ns;
 use thread_profiler::{register_thread_with_profiler, write_profile};
 use util::TransformedRectKind;
 use webgl_types::GLContextHandleWrapper;
 use webrender_traits::{ColorF, Epoch, PipelineId, RenderNotifier, RenderDispatcher};
 use webrender_traits::{ExternalImageId, ExternalImageType, ImageData, ImageFormat, RenderApiSender};
-use webrender_traits::{DeviceIntRect, DeviceUintRect, DevicePoint, DeviceIntPoint, DeviceIntSize, DeviceUintSize};
+use webrender_traits::{DeviceIntRect, DeviceUintRect, DeviceIntPoint, DeviceIntSize, DeviceUintSize};
 use webrender_traits::{BlobImageRenderer, channel, FontRenderMode};
 use webrender_traits::VRCompositorHandler;
 use webrender_traits::{YuvColorSpace, YuvFormat};
 use webrender_traits::{YUV_COLOR_SPACES, YUV_FORMATS};
 
 pub const GPU_DATA_TEXTURE_POOL: usize = 5;
 pub const MAX_VERTEX_TEXTURE_WIDTH: usize = 1024;
 
@@ -202,16 +202,45 @@ impl CacheTexture {
         let ids = device.create_texture_ids(2, TextureTarget::Default);
 
         CacheTexture {
             current_id: ids[0],
             next_id: ids[1],
         }
     }
 
+    fn apply_patch(&mut self,
+                   device: &mut Device,
+                   update: &GpuCacheUpdate,
+                   blocks: &[GpuBlockData]) {
+        match update {
+            &GpuCacheUpdate::Copy { block_index, block_count, address } => {
+                // Apply an incremental update to the cache texture.
+                // TODO(gw): For the initial implementation, we will just
+                //           use update_texture() since it's simple. If / when
+                //           we profile this and find it to be slow on some / all
+                //           devices - we can look into other options, such as
+                //           using glMapBuffer() with the unsynchronized bit,
+                //           and managing the synchronization ourselves with fences.
+                let data: &[u8] = unsafe {
+                    let ptr = blocks.as_ptr()
+                                    .offset(block_index as isize);
+                    slice::from_raw_parts(ptr as *const _, block_count * 16)
+                };
+                device.update_texture(self.current_id,
+                                      address.u as u32,
+                                      address.v as u32,
+                                      block_count as u32,
+                                      1,
+                                      None,
+                                      data);
+            }
+        }
+    }
+
     fn update(&mut self, device: &mut Device, updates: &GpuCacheUpdateList) {
         // See if we need to create or resize the texture.
         let current_dimensions = device.get_texture_dimensions(self.current_id);
         if updates.height > current_dimensions.height {
             // Create a f32 texture that can be used for the vertex shader
             // to fetch data from.
             device.init_texture(self.next_id,
                                 MAX_VERTEX_TEXTURE_WIDTH as u32,
@@ -238,40 +267,17 @@ impl CacheTexture {
                 // Free the GPU memory for that texture until we need to resize again.
                 device.deinit_texture(self.current_id);
             }
 
             mem::swap(&mut self.current_id, &mut self.next_id);
         }
 
         for update in &updates.updates {
-            match update {
-                &GpuCacheUpdate::Copy { block_index, block_count, address } => {
-                    // Apply an incremental update to the cache texture.
-                    // TODO(gw): For the initial implementation, we will just
-                    //           use update_texture() since it's simple. If / when
-                    //           we profile this and find it to be slow on some / all
-                    //           devices - we can look into other options, such as
-                    //           using glMapBuffer() with the unsynchronized bit,
-                    //           and managing the synchronization ourselves with fences.
-                    let data: &[u8] = unsafe {
-                        let ptr = updates.blocks
-                                         .as_ptr()
-                                         .offset(block_index as isize);
-                        slice::from_raw_parts(ptr as *const _, block_count * 16)
-                    };
-                    device.update_texture(self.current_id,
-                                          address.u as u32,
-                                          address.v as u32,
-                                          block_count as u32,
-                                          1,
-                                          None,
-                                          data);
-                }
-            }
+            self.apply_patch(device, update, &updates.blocks);
         }
     }
 }
 
 struct GpuDataTexture<L> {
     id: TextureId,
     layout: PhantomData<L>,
 }
@@ -501,39 +507,35 @@ fn create_clip_shader(name: &'static str
     let includes = &["prim_shared", "clip_shared"];
     device.create_program_with_prefix(name, includes, Some(prefix), VertexFormat::Clip)
 }
 
 struct GpuDataTextures {
     layer_texture: VertexDataTexture,
     render_task_texture: VertexDataTexture,
     data32_texture: VertexDataTexture,
-    resource_rects_texture: VertexDataTexture,
 }
 
 impl GpuDataTextures {
     fn new(device: &mut Device) -> GpuDataTextures {
         GpuDataTextures {
             layer_texture: VertexDataTexture::new(device),
             render_task_texture: VertexDataTexture::new(device),
             data32_texture: VertexDataTexture::new(device),
-            resource_rects_texture: VertexDataTexture::new(device),
         }
     }
 
     fn init_frame(&mut self, device: &mut Device, frame: &mut Frame) {
         self.data32_texture.init(device, &mut frame.gpu_data32);
-        self.resource_rects_texture.init(device, &mut frame.gpu_resource_rects);
         self.layer_texture.init(device, &mut frame.layer_texture_data);
         self.render_task_texture.init(device, &mut frame.render_task_data);
 
         device.bind_texture(TextureSampler::Layers, self.layer_texture.id);
         device.bind_texture(TextureSampler::RenderTasks, self.render_task_texture.id);
         device.bind_texture(TextureSampler::Data32, self.data32_texture.id);
-        device.bind_texture(TextureSampler::ResourceRects, self.resource_rects_texture.id);
     }
 }
 
 #[derive(Clone, Debug, PartialEq)]
 pub enum ReadPixelsFormat {
     Rgba8,
     Bgra8,
 }
@@ -953,17 +955,17 @@ impl Renderer {
 
         let texture_cache = TextureCache::new(max_texture_size);
         let backend_profile_counters = BackendProfileCounters::new();
 
         let dummy_cache_texture_id = device.create_texture_ids(1, TextureTarget::Array)[0];
         device.init_texture(dummy_cache_texture_id,
                             1,
                             1,
-                            ImageFormat::RGBA8,
+                            ImageFormat::BGRA8,
                             TextureFilter::Linear,
                             RenderTargetMode::LayerRenderTarget(1),
                             None);
 
         let dither_matrix_texture_id = if options.enable_dithering {
             let dither_matrix: [u8; 64] = [
                 00, 48, 12, 60, 03, 51, 15, 63,
                 32, 16, 44, 28, 35, 19, 47, 31,
@@ -1309,19 +1311,22 @@ impl Renderer {
                         let _gm = GpuMarker::new(self.device.rc_gl(), "begin frame");
                         let frame_id = self.device.begin_frame(frame.device_pixel_ratio);
                         self.gpu_profile.begin_frame(frame_id);
 
                         self.device.disable_scissor();
                         self.device.disable_depth();
                         self.device.set_blend(false);
                         //self.update_shaders();
+
                         self.update_texture_cache();
 
                         self.update_gpu_cache();
+                        self.update_deferred_resolves(frame);
+
                         self.device.bind_texture(TextureSampler::ResourceCache, self.gpu_cache_texture.current_id);
 
                         frame_id
                     };
 
                     self.draw_tile_frame(frame, &framebuffer_size);
 
                     self.gpu_profile.end_frame();
@@ -1973,20 +1978,24 @@ impl Renderer {
                 };
 
                 let texture_id = match image.source {
                     ExternalImageSource::NativeTexture(texture_id) => TextureId::new(texture_id, texture_target),
                     _ => panic!("No native texture found."),
                 };
 
                 self.external_images.insert((ext_image.id, ext_image.channel_index), texture_id);
-                let resource_rect_index = deferred_resolve.resource_address.0 as usize;
-                let resource_rect = &mut frame.gpu_resource_rects[resource_rect_index];
-                resource_rect.uv0 = DevicePoint::new(image.u0, image.v0);
-                resource_rect.uv1 = DevicePoint::new(image.u1, image.v1);
+
+                let update = GpuCacheUpdate::Copy {
+                    block_index: 0,
+                    block_count: 1,
+                    address: deferred_resolve.address,
+                };
+                let blocks = [ [image.u0, image.v0, image.u1, image.v1].into() ];
+                self.gpu_cache_texture.apply_patch(&mut self.device, &update, &blocks);
             }
         }
     }
 
     fn unlock_external_images(&mut self) {
         if !self.external_images.is_empty() {
             let handler = self.external_image_handler
                               .as_mut()
@@ -2028,17 +2037,17 @@ impl Renderer {
 
         // Init textures and render targets to match this scene.
         for pass in &frame.passes {
             if let Some(texture_id) = pass.color_texture_id {
                 let target_count = pass.required_target_count(RenderTargetKind::Color);
                 self.device.init_texture(texture_id,
                                          frame.cache_size.width as u32,
                                          frame.cache_size.height as u32,
-                                         ImageFormat::RGBA8,
+                                         ImageFormat::BGRA8,
                                          TextureFilter::Linear,
                                          RenderTargetMode::LayerRenderTarget(target_count as i32),
                                          None);
             }
             if let Some(texture_id) = pass.alpha_texture_id {
                 let target_count = pass.required_target_count(RenderTargetKind::Alpha);
                 self.device.init_texture(texture_id,
                                          frame.cache_size.width as u32,
@@ -2057,17 +2066,16 @@ impl Renderer {
         self.gpu_data_textures[self.gdt_index].init_frame(&mut self.device, frame);
         self.gdt_index = (self.gdt_index + 1) % GPU_DATA_TEXTURE_POOL;
     }
 
     fn draw_tile_frame(&mut self,
                        frame: &mut Frame,
                        framebuffer_size: &DeviceUintSize) {
         let _gm = GpuMarker::new(self.device.rc_gl(), "tile frame draw");
-        self.update_deferred_resolves(frame);
 
         // Some tests use a restricted viewport smaller than the main screen size.
         // Ensure we clear the framebuffer in these tests.
         // TODO(gw): Find a better solution for this?
         let needs_clear = frame.window_size.width < framebuffer_size.width ||
                           frame.window_size.height < framebuffer_size.height;
 
         self.device.disable_depth_write();
--- a/gfx/webrender/src/resource_cache.rs
+++ b/gfx/webrender/src/resource_cache.rs
@@ -1,16 +1,17 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use app_units::Au;
 use device::TextureFilter;
 use fnv::FnvHasher;
 use frame::FrameId;
+use gpu_cache::{GpuCache, GpuCacheHandle};
 use internal_types::{SourceTexture, TextureUpdateList};
 use profiler::TextureCacheProfileCounters;
 use std::collections::{HashMap, HashSet};
 use std::collections::hash_map::Entry::{self, Occupied, Vacant};
 use std::fmt::Debug;
 use std::hash::BuildHasherDefault;
 use std::hash::Hash;
 use std::mem;
@@ -34,18 +35,17 @@ const DEFAULT_TILE_SIZE: TileSize = 512;
 // for this is that the texture may change
 // dimensions (e.g. the pages in a texture
 // atlas can grow). When this happens, by
 // storing the coordinates as texel values
 // we don't need to go through and update
 // various CPU-side structures.
 pub struct CacheItem {
     pub texture_id: SourceTexture,
-    pub uv0: DevicePoint,
-    pub uv1: DevicePoint,
+    pub uv_rect_handle: GpuCacheHandle,
 }
 
 pub struct ImageProperties {
     pub descriptor: ImageDescriptor,
     pub external_image: Option<ExternalImageData>,
     pub tiling: Option<TileSize>,
 }
 
@@ -105,36 +105,32 @@ pub struct ResourceClassCache<K,V> {
 impl<K,V> ResourceClassCache<K,V> where K: Clone + Hash + Eq + Debug, V: Resource {
     pub fn new() -> ResourceClassCache<K,V> {
         ResourceClassCache {
             resources: HashMap::default(),
             last_access_times: HashMap::default(),
         }
     }
 
-    pub fn contains_key(&self, key: &K) -> bool {
-        self.resources.contains_key(key)
-    }
-
     fn get(&self, key: &K, frame: FrameId) -> &V {
         // This assert catches cases in which we accidentally request a resource that we forgot to
         // mark as needed this frame.
         debug_assert_eq!(frame, *self.last_access_times
                                      .get(key)
                                      .expect("Didn't find the access time for a cached resource \
                                               with that ID!"));
         self.resources.get(key).expect("Didn't find a cached resource with that ID!")
     }
 
     pub fn insert(&mut self, key: K, value: V, frame: FrameId) {
         self.last_access_times.insert(key.clone(), frame);
         self.resources.insert(key, value);
     }
 
-    fn entry(&mut self, key: K, frame: FrameId) -> Entry<K,V> {
+    pub fn entry(&mut self, key: K, frame: FrameId) -> Entry<K,V> {
         self.last_access_times.insert(key.clone(), frame);
         self.resources.entry(key)
     }
 
     pub fn mark_as_needed(&mut self, key: &K, frame: FrameId) {
         self.last_access_times.insert((*key).clone(), frame);
     }
 
@@ -170,19 +166,19 @@ impl Into<BlobImageRequest> for ImageReq
     fn into(self) -> BlobImageRequest {
         BlobImageRequest {
             key: self.key,
             tile: self.tile,
         }
     }
 }
 
-struct WebGLTexture {
-    id: SourceTexture,
-    size: DeviceIntSize,
+pub struct WebGLTexture {
+    pub id: SourceTexture,
+    pub size: DeviceIntSize,
 }
 
 struct Resources {
     font_templates: HashMap<FontKey, FontTemplate, BuildHasherDefault<FnvHasher>>,
     image_templates: ImageTemplates,
 }
 
 impl BlobImageResources for Resources {
@@ -209,16 +205,19 @@ pub struct ResourceCache {
 
     // TODO(gw): We should expire (parts of) this cache semi-regularly!
     cached_glyph_dimensions: HashMap<GlyphKey, Option<GlyphDimensions>, BuildHasherDefault<FnvHasher>>,
     pending_image_requests: Vec<ImageRequest>,
     glyph_rasterizer: GlyphRasterizer,
 
     blob_image_renderer: Option<Box<BlobImageRenderer>>,
     blob_image_requests: HashSet<ImageRequest>,
+
+    requested_glyphs: HashSet<TextureCacheItemId, BuildHasherDefault<FnvHasher>>,
+    requested_images: HashSet<TextureCacheItemId, BuildHasherDefault<FnvHasher>>,
 }
 
 impl ResourceCache {
     pub fn new(texture_cache: TextureCache,
                workers: Arc<ThreadPool>,
                blob_image_renderer: Option<Box<BlobImageRenderer>>) -> ResourceCache {
         ResourceCache {
             cached_glyphs: ResourceClassCache::new(),
@@ -232,16 +231,19 @@ impl ResourceCache {
             texture_cache: texture_cache,
             state: State::Idle,
             current_frame_id: FrameId(0),
             pending_image_requests: Vec::new(),
             glyph_rasterizer: GlyphRasterizer::new(workers),
 
             blob_image_renderer: blob_image_renderer,
             blob_image_requests: HashSet::new(),
+
+            requested_glyphs: HashSet::default(),
+            requested_images: HashSet::default(),
         }
     }
 
     pub fn max_texture_size(&self) -> u32 {
         self.texture_cache.max_texture_size()
     }
 
     fn should_tile(&self, descriptor: &ImageDescriptor, data: &ImageData) -> bool {
@@ -387,21 +389,32 @@ impl ResourceCache {
         };
 
         let template = self.resources.image_templates.get(key).unwrap();
         if template.data.uses_texture_cache() {
             self.cached_images.mark_as_needed(&request, self.current_frame_id);
         }
         if template.data.is_blob() {
             if let Some(ref mut renderer) = self.blob_image_renderer {
-                let same_epoch = match self.cached_images.resources.get(&request) {
-                    Some(entry) => entry.epoch == template.epoch,
-                    None => false,
+                let (same_epoch, texture_cache_id) = match self.cached_images.resources
+                                                               .get(&request) {
+                    Some(entry) => {
+                        (entry.epoch == template.epoch, Some(entry.texture_cache_id))
+                    }
+                    None => {
+                        (false, None)
+                    }
                 };
 
+                // Ensure that blobs are added to the list of requested items
+                // foe the GPU cache, even if the cached blob image is up to date.
+                if let Some(texture_cache_id) = texture_cache_id {
+                    self.requested_images.insert(texture_cache_id);
+                }
+
                 if !same_epoch && self.blob_image_requests.insert(request) {
                     let (offset, w, h) = match template.tiling {
                         Some(tile_size) => {
                             let tile_offset = request.tile.unwrap();
                             let (w, h) = compute_tile_size(&template.descriptor, tile_size, tile_offset);
                             let offset = DevicePoint::new(
                                 tile_offset.x as f32 * tile_size as f32,
                                 tile_offset.y as f32 * tile_size as f32,
@@ -445,31 +458,32 @@ impl ResourceCache {
             &mut self.cached_glyphs,
             self.current_frame_id,
             key,
             size,
             color,
             glyph_instances,
             render_mode,
             glyph_options,
+            &mut self.requested_glyphs,
         );
     }
 
     pub fn pending_updates(&mut self) -> TextureUpdateList {
         self.texture_cache.pending_updates()
     }
 
     pub fn get_glyphs<F>(&self,
                          font_key: FontKey,
                          size: Au,
                          color: ColorF,
                          glyph_instances: &[GlyphInstance],
                          render_mode: FontRenderMode,
                          glyph_options: Option<GlyphOptions>,
-                         mut f: F) -> SourceTexture where F: FnMut(usize, DevicePoint, DevicePoint) {
+                         mut f: F) -> SourceTexture where F: FnMut(usize, &GpuCacheHandle) {
         debug_assert_eq!(self.state, State::QueryResources);
         let mut glyph_key = GlyphRequest::new(
             font_key,
             size,
             color,
             0,
             LayoutPoint::new(0.0, 0.0),
             render_mode,
@@ -478,21 +492,17 @@ impl ResourceCache {
         let mut texture_id = None;
         for (loop_index, glyph_instance) in glyph_instances.iter().enumerate() {
             glyph_key.key.index = glyph_instance.index;
             glyph_key.key.subpixel_point.set_offset(glyph_instance.point, render_mode);
 
             let image_id = self.cached_glyphs.get(&glyph_key, self.current_frame_id);
             let cache_item = image_id.map(|image_id| self.texture_cache.get(image_id));
             if let Some(cache_item) = cache_item {
-                let uv0 = DevicePoint::new(cache_item.pixel_rect.top_left.x as f32,
-                                           cache_item.pixel_rect.top_left.y as f32);
-                let uv1 = DevicePoint::new(cache_item.pixel_rect.bottom_right.x as f32,
-                                           cache_item.pixel_rect.bottom_right.y as f32);
-                f(loop_index, uv0, uv1);
+                f(loop_index, &cache_item.uv_rect_handle);
                 debug_assert!(texture_id == None ||
                               texture_id == Some(cache_item.texture_id));
                 texture_id = Some(cache_item.texture_id);
             }
         }
 
         texture_id.map_or(SourceTexture::Invalid, SourceTexture::TextureCache)
     }
@@ -516,20 +526,17 @@ impl ResourceCache {
             key: image_key,
             rendering: image_rendering,
             tile: tile,
         };
         let image_info = &self.cached_images.get(&key, self.current_frame_id);
         let item = self.texture_cache.get(image_info.texture_cache_id);
         CacheItem {
             texture_id: SourceTexture::TextureCache(item.texture_id),
-            uv0: DevicePoint::new(item.pixel_rect.top_left.x as f32,
-                                  item.pixel_rect.top_left.y as f32),
-            uv1: DevicePoint::new(item.pixel_rect.bottom_right.x as f32,
-                                  item.pixel_rect.bottom_right.y as f32),
+            uv_rect_handle: item.uv_rect_handle,
         }
     }
 
     pub fn get_image_properties(&self, image_key: ImageKey) -> ImageProperties {
         let image_template = &self.resources.image_templates.get(image_key).unwrap();
 
         let external_image = match image_template.data {
             ImageData::External(ext_image) => {
@@ -549,52 +556,50 @@ impl ResourceCache {
 
         ImageProperties {
             descriptor: image_template.descriptor,
             external_image: external_image,
             tiling: image_template.tiling,
         }
     }
 
-    #[inline]
-    pub fn get_webgl_texture(&self, context_id: &WebGLContextId) -> CacheItem {
-        let webgl_texture = &self.webgl_textures[context_id];
-        CacheItem {
-            texture_id: webgl_texture.id,
-            uv0: DevicePoint::new(0.0, webgl_texture.size.height as f32),
-            uv1: DevicePoint::new(webgl_texture.size.width as f32, 0.0),
-        }
+    pub fn get_webgl_texture(&self, context_id: &WebGLContextId) -> &WebGLTexture {
+        &self.webgl_textures[context_id]
     }
 
     pub fn get_webgl_texture_size(&self, context_id: &WebGLContextId) -> DeviceIntSize {
         self.webgl_textures[context_id].size
     }
 
     pub fn expire_old_resources(&mut self, frame_id: FrameId) {
         self.cached_images.expire_old_resources(&mut self.texture_cache, frame_id);
         self.cached_glyphs.expire_old_resources(&mut self.texture_cache, frame_id);
     }
 
     pub fn begin_frame(&mut self, frame_id: FrameId) {
         debug_assert_eq!(self.state, State::Idle);
         self.state = State::AddResources;
         self.current_frame_id = frame_id;
+        debug_assert!(self.requested_glyphs.is_empty());
+        debug_assert!(self.requested_images.is_empty());
     }
 
     pub fn block_until_all_resources_added(&mut self,
+                                           gpu_cache: &mut GpuCache,
                                            texture_cache_profile: &mut TextureCacheProfileCounters) {
         profile_scope!("block_until_all_resources_added");
 
         debug_assert_eq!(self.state, State::AddResources);
         self.state = State::QueryResources;
 
         self.glyph_rasterizer.resolve_glyphs(
             self.current_frame_id,
             &mut self.cached_glyphs,
             &mut self.texture_cache,
+            &mut self.requested_glyphs,
             texture_cache_profile,
         );
 
         let mut image_requests = mem::replace(&mut self.pending_image_requests, Vec::new());
         for request in image_requests.drain(..) {
             self.finalize_image_request(request, None, texture_cache_profile);
         }
 
@@ -621,16 +626,31 @@ impl ResourceCache {
                         panic!("Invalid vector image data");
                     }
                     Err(BlobImageError::Other(msg)) => {
                         panic!("Vector image error {}", msg);
                     }
                 }
             }
         }
+
+        for texture_cache_item_id in self.requested_images.drain() {
+            let item = self.texture_cache.get_mut(texture_cache_item_id);
+            if let Some(mut request) = gpu_cache.request(&mut item.uv_rect_handle) {
+                request.push(item.uv_rect.into());
+            }
+        }
+
+        for texture_cache_item_id in self.requested_glyphs.drain() {
+            let item = self.texture_cache.get_mut(texture_cache_item_id);
+            if let Some(mut request) = gpu_cache.request(&mut item.uv_rect_handle) {
+                request.push(item.uv_rect.into());
+                request.push([item.user_data[0], item.user_data[1], 0.0, 0.0].into());
+            }
+        }
     }
 
     fn update_texture_cache(&mut self,
                             request: &ImageRequest,
                             image_data: Option<ImageData>,
                             texture_cache_profile: &mut TextureCacheProfileCounters) {
         let image_template = self.resources.image_templates.get_mut(request.key).unwrap();
         let image_data = image_data.unwrap_or_else(||{
@@ -665,51 +685,58 @@ impl ResourceCache {
                 offset: offset,
                 format: image_descriptor.format,
                 is_opaque: image_descriptor.is_opaque,
             }
         } else {
             image_template.descriptor.clone()
         };
 
-        match self.cached_images.entry(*request, self.current_frame_id) {
+        let image_id = match self.cached_images.entry(*request, self.current_frame_id) {
             Occupied(entry) => {
                 let image_id = entry.get().texture_cache_id;
 
                 if entry.get().epoch != image_template.epoch {
                     self.texture_cache.update(image_id,
                                               descriptor,
                                               image_data,
                                               image_template.dirty_rect);
 
                     // Update the cached epoch
                     *entry.into_mut() = CachedImageInfo {
                         texture_cache_id: image_id,
                         epoch: image_template.epoch,
                     };
                     image_template.dirty_rect = None;
                 }
+
+                image_id
             }
             Vacant(entry) => {
                 let filter = match request.rendering {
                     ImageRendering::Pixelated => TextureFilter::Nearest,
                     ImageRendering::Auto | ImageRendering::CrispEdges => TextureFilter::Linear,
                 };
 
                 let image_id = self.texture_cache.insert(descriptor,
                                                          filter,
                                                          image_data,
+                                                         [0.0; 2],
                                                          texture_cache_profile);
 
                 entry.insert(CachedImageInfo {
                     texture_cache_id: image_id,
                     epoch: image_template.epoch,
                 });
+
+                image_id
             }
-        }
+        };
+
+        self.requested_images.insert(image_id);
     }
     fn finalize_image_request(&mut self,
                               request: ImageRequest,
                               image_data: Option<ImageData>,
                               texture_cache_profile: &mut TextureCacheProfileCounters) {
         match self.resources.image_templates.get(request.key).unwrap().data {
             ImageData::External(ext_image) => {
                 match ext_image.image_type {
--- a/gfx/webrender/src/texture_cache.rs
+++ b/gfx/webrender/src/texture_cache.rs
@@ -1,29 +1,30 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use device::TextureFilter;
 use fnv::FnvHasher;
 use freelist::{FreeList, FreeListItem, FreeListItemId};
-use internal_types::{TextureUpdate, TextureUpdateOp};
-use internal_types::{CacheTextureId, RenderTargetMode, TextureUpdateList, RectUv};
+use gpu_cache::GpuCacheHandle;
+use internal_types::{TextureUpdate, TextureUpdateOp, UvRect};
+use internal_types::{CacheTextureId, RenderTargetMode, TextureUpdateList};
 use profiler::TextureCacheProfileCounters;
 use std::cmp;
 use std::collections::HashMap;
 use std::collections::hash_map::Entry;
 use std::hash::BuildHasherDefault;
 use std::mem;
 use std::slice::Iter;
 use time;
 use util;
-use webrender_traits::{ExternalImageType, ImageData, ImageFormat, DevicePixel, DeviceIntPoint};
+use webrender_traits::{ExternalImageType, ImageData, ImageFormat};
 use webrender_traits::{DeviceUintRect, DeviceUintSize, DeviceUintPoint};
-use webrender_traits::ImageDescriptor;
+use webrender_traits::{DevicePoint, ImageDescriptor};
 
 /// The number of bytes we're allowed to use for a texture.
 const MAX_BYTES_PER_TEXTURE: u32 = 1024 * 1024 * 256;  // 256MB
 
 /// The number of RGBA pixels we're allowed to use for a texture.
 const MAX_RGBA_PIXELS_PER_TEXTURE: u32 = MAX_BYTES_PER_TEXTURE / 4;
 
 /// The desired initial size of each texture, in pixels.
@@ -438,20 +439,28 @@ impl FreeListBin {
 }
 
 #[derive(Debug, Clone)]
 pub struct TextureCacheItem {
     // Identifies the texture and array slice
     pub texture_id: CacheTextureId,
 
     // The texture coordinates for this item
-    pub pixel_rect: RectUv<i32, DevicePixel>,
+    pub uv_rect: UvRect,
 
     // The size of the allocated rectangle.
     pub allocated_rect: DeviceUintRect,
+
+    // Handle to the location of the UV rect for this item in GPU cache.
+    pub uv_rect_handle: GpuCacheHandle,
+
+    // Some arbitrary data associated with this item.
+    // In the case of glyphs, it is the top / left offset
+    // from the rasterized glyph.
+    pub user_data: [f32; 2],
 }
 
 // Structure squat the width/height fields to maintain the free list information :)
 impl FreeListItem for TextureCacheItem {
     fn take(&mut self) -> Self {
         let data = self.clone();
         self.texture_id = CacheTextureId(0);
         data
@@ -478,31 +487,30 @@ impl FreeListItem for TextureCacheItem {
                 self.allocated_rect.size.height = 0;
             }
         }
     }
 }
 
 impl TextureCacheItem {
     fn new(texture_id: CacheTextureId,
-           rect: DeviceUintRect)
+           rect: DeviceUintRect,
+           user_data: [f32; 2])
            -> TextureCacheItem {
         TextureCacheItem {
             texture_id: texture_id,
-            pixel_rect: RectUv {
-                top_left: DeviceIntPoint::new(rect.origin.x as i32,
-                                              rect.origin.y as i32),
-                top_right: DeviceIntPoint::new((rect.origin.x + rect.size.width) as i32,
-                                                rect.origin.y as i32),
-                bottom_left: DeviceIntPoint::new(rect.origin.x as i32,
-                                                (rect.origin.y + rect.size.height) as i32),
-                bottom_right: DeviceIntPoint::new((rect.origin.x + rect.size.width) as i32,
-                                                  (rect.origin.y + rect.size.height) as i32)
+            uv_rect: UvRect {
+                uv0: DevicePoint::new(rect.origin.x as f32,
+                                      rect.origin.y as f32),
+                uv1: DevicePoint::new((rect.origin.x + rect.size.width) as f32,
+                                      (rect.origin.y + rect.size.height) as f32),
             },
             allocated_rect: rect,
+            uv_rect_handle: GpuCacheHandle::new(),
+            user_data: user_data,
         }
     }
 }
 
 struct TextureCacheArena {
     pages_a8: Vec<TexturePage>,
     pages_rgb8: Vec<TexturePage>,
     pages_rgba8: Vec<TexturePage>,
@@ -607,44 +615,46 @@ impl TextureCache {
         mem::replace(&mut self.pending_updates, TextureUpdateList::new())
     }
 
     pub fn allocate(&mut self,
                     requested_width: u32,
                     requested_height: u32,
                     format: ImageFormat,
                     filter: TextureFilter,
+                    user_data: [f32; 2],
                     profile: &mut TextureCacheProfileCounters)
                     -> AllocationResult {
         let requested_size = DeviceUintSize::new(requested_width, requested_height);
 
         // TODO(gw): For now, anything that requests nearest filtering
         //           just fails to allocate in a texture page, and gets a standalone
         //           texture. This isn't ideal, as it causes lots of batch breaks,
         //           but is probably rare enough that it can be fixed up later (it's also
         //           fairly trivial to implement, just tedious).
         if filter == TextureFilter::Nearest {
             // Fall back to standalone texture allocation.
             let texture_id = self.cache_id_list.allocate();
             let cache_item = TextureCacheItem::new(
                 texture_id,
-                DeviceUintRect::new(DeviceUintPoint::zero(), requested_size));
+                DeviceUintRect::new(DeviceUintPoint::zero(), requested_size),
+                user_data);
             let image_id = self.items.insert(cache_item);
 
             return AllocationResult {
                 item: self.items.get(image_id).clone(),
                 kind: AllocationKind::Standalone,
                 image_id: image_id,
             }
         }
 
         let mode = RenderTargetMode::SimpleRenderTarget;
         let (page_list, page_profile) = match format {
             ImageFormat::A8 => (&mut self.arena.pages_a8, &mut profile.pages_a8),
-            ImageFormat::RGBA8 => (&mut self.arena.pages_rgba8, &mut profile.pages_rgba8),
+            ImageFormat::BGRA8 => (&mut self.arena.pages_rgba8, &mut profile.pages_rgba8),
             ImageFormat::RGB8 => (&mut self.arena.pages_rgb8, &mut profile.pages_rgb8),
             ImageFormat::RG8 => (&mut self.arena.pages_rg8, &mut profile.pages_rg8),
             ImageFormat::Invalid | ImageFormat::RGBAF32 => unreachable!(),
         };
 
         // TODO(gw): Handle this sensibly (support failing to render items that can't fit?)
         assert!(requested_size.width <= self.max_texture_size);
         assert!(requested_size.height <= self.max_texture_size);
@@ -717,17 +727,18 @@ impl TextureCache {
                 page_list.push(page);
                 page_list.last_mut().unwrap()
             },
         };
 
         let location = page.allocate(&requested_size)
                            .expect("All the checks have passed till now, there is no way back.");
         let cache_item = TextureCacheItem::new(page.texture_id,
-                                               DeviceUintRect::new(location, requested_size));
+                                               DeviceUintRect::new(location, requested_size),
+                                               user_data);
         let image_id = self.items.insert(cache_item.clone());
 
         AllocationResult {
             item: cache_item,
             kind: AllocationKind::TexturePage,
             image_id: image_id,
         }
     }
@@ -787,16 +798,17 @@ impl TextureCache {
 
         self.pending_updates.push(update_op);
     }
 
     pub fn insert(&mut self,
                   descriptor: ImageDescriptor,
                   filter: TextureFilter,
                   data: ImageData,
+                  user_data: [f32; 2],
                   profile: &mut TextureCacheProfileCounters) -> TextureCacheItemId {
         if let ImageData::Blob(..) = data {
             panic!("must rasterize the vector image before adding to the cache");
         }
 
         let width = descriptor.width;
         let height = descriptor.height;
         let format = descriptor.format;
@@ -808,16 +820,17 @@ impl TextureCache {
                          (height-1) * descriptor.compute_stride();
             assert!(vec.len() >= finish as usize);
         }
 
         let result = self.allocate(width,
                                    height,
                                    format,
                                    filter,
+                                   user_data,
                                    profile);
 
         match result.kind {
             AllocationKind::TexturePage => {
                 match data {
                     ImageData::External(ext_image) => {
                         match ext_image.image_type {
                             ExternalImageType::Texture2DHandle |
@@ -909,16 +922,20 @@ impl TextureCache {
 
         result.image_id
     }
 
     pub fn get(&self, id: TextureCacheItemId) -> &TextureCacheItem {
         self.items.get(id)
     }
 
+    pub fn get_mut(&mut self, id: TextureCacheItemId) -> &mut TextureCacheItem {
+        self.items.get_mut(id)
+    }
+
     pub fn free(&mut self, id: TextureCacheItemId) {
         let item = self.items.free(id);
         match self.arena.texture_page_for_id(item.texture_id) {
             Some(texture_page) => texture_page.free(&item.allocated_rect),
             None => {
                 // This is a standalone texture allocation. Just push it back onto the free
                 // list.
                 self.pending_updates.push(TextureUpdate {
--- a/gfx/webrender/src/tiling.rs
+++ b/gfx/webrender/src/tiling.rs
@@ -1,84 +1,58 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use app_units::Au;
 use border::{BorderCornerInstance, BorderCornerSide};
 use device::TextureId;
 use fnv::FnvHasher;
-use gpu_cache::{GpuCache, GpuCacheUpdateList};
+use gpu_cache::{GpuCache, GpuCacheHandle, GpuCacheUpdateList};
 use gpu_store::GpuStoreAddress;
 use internal_types::{ANGLE_FLOAT_TO_FIXED, BatchTextures, CacheTextureId, LowLevelFilterOp};
 use internal_types::SourceTexture;
 use mask_cache::MaskCacheInfo;
 use prim_store::{CLIP_DATA_GPU_SIZE, DeferredResolve, GpuBlock32};
-use prim_store::PrimitiveCacheKey;
-use prim_store::{PrimitiveIndex, PrimitiveKind, PrimitiveMetadata, PrimitiveStore, TexelRect};
+use prim_store::{ImagePrimitiveKind, PrimitiveCacheKey};
+use prim_store::{PrimitiveIndex, PrimitiveKind, PrimitiveMetadata, PrimitiveStore};
 use profiler::FrameProfileCounters;
 use render_task::{AlphaRenderItem, MaskGeometryKind, MaskSegment, RenderTask, RenderTaskData};
 use render_task::{RenderTaskId, RenderTaskIndex, RenderTaskKey, RenderTaskKind};
 use render_task::RenderTaskLocation;
 use renderer::BlendMode;
 use renderer::ImageBufferKind;
 use resource_cache::ResourceCache;
 use std::{f32, i32, mem, usize};
 use std::collections::HashMap;
 use std::hash::BuildHasherDefault;
 use texture_cache::TexturePage;
 use util::{TransformedRect, TransformedRectKind};
-use webrender_traits::{BuiltDisplayList, ClipAndScrollInfo, ClipId, ColorF, DeviceIntPoint};
+use webrender_traits::{BuiltDisplayList, ClipAndScrollInfo, ClipId, ColorF, DeviceIntPoint, ImageKey};
 use webrender_traits::{DeviceIntRect, DeviceIntSize, DeviceUintPoint, DeviceUintSize};
 use webrender_traits::{ExternalImageType, FontRenderMode, ImageRendering, LayerRect};
 use webrender_traits::{LayerToWorldTransform, MixBlendMode, PipelineId, TransformStyle};
-use webrender_traits::{WorldToLayerTransform, YuvColorSpace, YuvFormat, LayerVector2D};
+use webrender_traits::{TileOffset, WorldToLayerTransform, YuvColorSpace, YuvFormat, LayerVector2D};
 
 // Special sentinel value recognized by the shader. It is considered to be
 // a dummy task that doesn't mask out anything.
 const OPAQUE_TASK_INDEX: RenderTaskIndex = RenderTaskIndex(i32::MAX as usize);
 
 
 pub type DisplayListMap = HashMap<PipelineId,
                                   BuiltDisplayList,
                                   BuildHasherDefault<FnvHasher>>;
 
 trait AlphaBatchHelpers {
-    fn get_color_textures(&self, metadata: &PrimitiveMetadata) -> [SourceTexture; 3];
     fn get_blend_mode(&self,
                       needs_blending: bool,
                       metadata: &PrimitiveMetadata) -> BlendMode;
-    fn can_draw(&self, metadata: &PrimitiveMetadata) -> bool;
 }
 
 impl AlphaBatchHelpers for PrimitiveStore {
-    fn get_color_textures(&self, metadata: &PrimitiveMetadata) -> [SourceTexture; 3] {
-        let invalid = SourceTexture::Invalid;
-        match metadata.prim_kind {
-            PrimitiveKind::Border |
-            PrimitiveKind::BoxShadow |
-            PrimitiveKind::Rectangle |
-            PrimitiveKind::AlignedGradient |
-            PrimitiveKind::AngleGradient |
-            PrimitiveKind::RadialGradient => [invalid; 3],
-            PrimitiveKind::Image => {
-                let image_cpu = &self.cpu_images[metadata.cpu_prim_index.0];
-                [image_cpu.color_texture_id, invalid, invalid]
-            }
-            PrimitiveKind::YuvImage => {
-                let image_cpu = &self.cpu_yuv_images[metadata.cpu_prim_index.0];
-                image_cpu.yuv_texture_id
-            }
-            PrimitiveKind::TextRun => {
-                let text_run_cpu = &self.cpu_text_runs[metadata.cpu_prim_index.0];
-                [text_run_cpu.color_texture_id, invalid, invalid]
-            }
-        }
-    }
-
     fn get_blend_mode(&self, needs_blending: bool, metadata: &PrimitiveMetadata) -> BlendMode {
         match metadata.prim_kind {
             PrimitiveKind::TextRun => {
                 let text_run_cpu = &self.cpu_text_runs[metadata.cpu_prim_index.0];
                 if text_run_cpu.blur_radius == 0.0 {
                     match text_run_cpu.render_mode {
                         FontRenderMode::Subpixel => BlendMode::Subpixel(text_run_cpu.color),
                         FontRenderMode::Alpha | FontRenderMode::Mono => BlendMode::Alpha,
@@ -102,36 +76,16 @@ impl AlphaBatchHelpers for PrimitiveStor
                 if needs_blending {
                     BlendMode::Alpha
                 } else {
                     BlendMode::None
                 }
             }
         }
     }
-
-    fn can_draw(&self, metadata: &PrimitiveMetadata) -> bool {
-        match metadata.prim_kind {
-            PrimitiveKind::Border |
-            PrimitiveKind::BoxShadow |
-            PrimitiveKind::Rectangle |
-            PrimitiveKind::AlignedGradient |
-            PrimitiveKind::AngleGradient |
-            PrimitiveKind::RadialGradient |
-            PrimitiveKind::Image |
-            PrimitiveKind::YuvImage => true,
-            PrimitiveKind::TextRun => {
-                // If the glyph failed to rasterize, we may have a text run
-                // without a valid texture. In this case, we need to prevent
-                // drawing the primitive this frame.
-                let text_run_cpu = &self.cpu_text_runs[metadata.cpu_prim_index.0];
-                text_run_cpu.color_texture_id != SourceTexture::Invalid
-            }
-        }
-    }
 }
 
 #[derive(Debug)]
 pub struct ScrollbarPrimitive {
     pub clip_id: ClipId,
     pub prim_index: PrimitiveIndex,
     pub border_radius: f32,
 }
@@ -303,19 +257,21 @@ pub struct AlphaBatcher {
     pub batch_list: BatchList,
     tasks: Vec<AlphaBatchTask>,
 }
 
 impl AlphaRenderItem {
     fn add_to_batch(&self,
                     batch_list: &mut BatchList,
                     ctx: &RenderTargetContext,
+                    gpu_cache: &mut GpuCache,
                     render_tasks: &RenderTaskCollection,
                     child_pass_index: RenderPassIndex,
-                    task_index: RenderTaskIndex) {
+                    task_index: RenderTaskIndex,
+                    deferred_resolves: &mut Vec<DeferredResolve>) {
         match *self {
             AlphaRenderItem::Blend(stacking_context_index, src_id, filter, z) => {
                 let stacking_context = &ctx.stacking_context_store[stacking_context_index.0];
                 let key = AlphaBatchKey::new(AlphaBatchKind::Blend,
                                              AlphaBatchKeyFlags::empty(),
                                              BlendMode::Alpha,
                                              BatchTextures::no_texture());
                 let src_task_index = render_tasks.get_static_task_index(&src_id);
@@ -382,102 +338,117 @@ impl AlphaRenderItem {
                                                                mode as u32 as i32,
                                                                0,
                                                                z);
 
                 batch.add_instance(PrimitiveInstance::from(instance));
             }
             AlphaRenderItem::Primitive(clip_scroll_group_index_opt, prim_index, z) => {
                 let prim_metadata = ctx.prim_store.get_metadata(prim_index);
-                // Bail out if this primitive can't be drawn this frame for some reason.
-                if !ctx.prim_store.can_draw(prim_metadata) {
-                    return;
-                }
                 let (transform_kind, packed_layer_index) = match clip_scroll_group_index_opt {
                     Some(group_index) => {
                         let group = &ctx.clip_scroll_group_store[group_index.0];
                         let bounding_rect = group.screen_bounding_rect.as_ref().unwrap();
                         (bounding_rect.0, group.packed_layer_index)
                     },
                     None => (TransformedRectKind::AxisAligned, PackedLayerIndex(0)),
                 };
                 let needs_clipping = prim_metadata.needs_clipping();
                 let mut flags = AlphaBatchKeyFlags::empty();
                 if needs_clipping {
                     flags |= NEEDS_CLIPPING;
                 }
                 if transform_kind == TransformedRectKind::AxisAligned {
                     flags |= AXIS_ALIGNED;
                 }
-                let textures = BatchTextures {
-                    colors: ctx.prim_store.get_color_textures(prim_metadata),
-                };
                 let item_bounding_rect = ctx.prim_store.cpu_bounding_rects[prim_index.0].as_ref().unwrap();
                 let clip_task_index = match prim_metadata.clip_task {
                     Some(ref clip_task) => {
                         render_tasks.get_task_index(&clip_task.id, child_pass_index)
                     }
                     None => {
                         OPAQUE_TASK_INDEX
                     }
                 };
                 let needs_blending = !prim_metadata.is_opaque ||
                                      needs_clipping ||
                                      transform_kind == TransformedRectKind::Complex;
                 let blend_mode = ctx.prim_store.get_blend_mode(needs_blending, prim_metadata);
 
                 let prim_cache_address = prim_metadata.gpu_location
-                                                      .as_int(&ctx.gpu_cache);
+                                                      .as_int(gpu_cache);
 
                 let base_instance = SimplePrimitiveInstance::new(prim_cache_address,
                                                                  task_index,
                                                                  clip_task_index,
                                                                  packed_layer_index,
                                                                  z);
 
+                let no_textures = BatchTextures::no_texture();
+
                 match prim_metadata.prim_kind {
                     PrimitiveKind::Border => {
                         let border_cpu = &ctx.prim_store.cpu_borders[prim_metadata.cpu_prim_index.0];
                         // TODO(gw): Select correct blend mode for edges and corners!!
-                        let corner_key = AlphaBatchKey::new(AlphaBatchKind::BorderCorner, flags, blend_mode, textures);
-                        let edge_key = AlphaBatchKey::new(AlphaBatchKind::BorderEdge, flags, blend_mode, textures);
+                        let corner_key = AlphaBatchKey::new(AlphaBatchKind::BorderCorner, flags, blend_mode, no_textures);
+                        let edge_key = AlphaBatchKey::new(AlphaBatchKind::BorderEdge, flags, blend_mode, no_textures);
 
                         batch_list.with_suitable_batch(&corner_key, item_bounding_rect, |batch| {
                             for (i, instance_kind) in border_cpu.corner_instances.iter().enumerate() {
                                 let sub_index = i as i32;
                                 match *instance_kind {
                                     BorderCornerInstance::Single => {
                                         batch.add_instance(base_instance.build(sub_index,
-                                                                               BorderCornerSide::Both as i32,));
+                                                                               BorderCornerSide::Both as i32, 0));
                                     }
                                     BorderCornerInstance::Double => {
                                         batch.add_instance(base_instance.build(sub_index,
-                                                                               BorderCornerSide::First as i32));
+                                                                               BorderCornerSide::First as i32, 0));
                                         batch.add_instance(base_instance.build(sub_index,
-                                                                               BorderCornerSide::Second as i32));
+                                                                               BorderCornerSide::Second as i32, 0));
                                     }
                                 }
                             }
                         });
 
                         batch_list.with_suitable_batch(&edge_key, item_bounding_rect, |batch| {
                             for border_segment in 0..4 {
-                                batch.add_instance(base_instance.build(border_segment, 0));
+                                batch.add_instance(base_instance.build(border_segment, 0, 0));
                             }
                         });
                     }
                     PrimitiveKind::Rectangle => {
-                        let key = AlphaBatchKey::new(AlphaBatchKind::Rectangle, flags, blend_mode, textures);
+                        let key = AlphaBatchKey::new(AlphaBatchKind::Rectangle, flags, blend_mode, no_textures);
                         let batch = batch_list.get_suitable_batch(&key, item_bounding_rect);
-                        batch.add_instance(base_instance.build(0, 0));
+                        batch.add_instance(base_instance.build(0, 0, 0));
                     }
                     PrimitiveKind::Image => {
                         let image_cpu = &ctx.prim_store.cpu_images[prim_metadata.cpu_prim_index.0];
 
-                        let batch_kind = match image_cpu.color_texture_id {
+                        let (color_texture_id, uv_address) = match image_cpu.kind {
+                            ImagePrimitiveKind::Image(image_key, image_rendering, tile_offset, _) => {
+                                resolve_image(image_key,
+                                              image_rendering,
+                                              tile_offset,
+                                              ctx.resource_cache,
+                                              gpu_cache,
+                                              deferred_resolves)
+                            }
+                            ImagePrimitiveKind::WebGL(context_id) => {
+                                let webgl_texture = ctx.resource_cache.get_webgl_texture(&context_id);
+                                let uv_rect = [ 0.0,
+                                                webgl_texture.size.height as f32,
+                                                webgl_texture.size.width as f32,
+                                                0.0];
+                                let cache_handle = gpu_cache.push_per_frame_blocks(&[uv_rect.into()]);
+                                (webgl_texture.id, cache_handle)
+                            }
+                        };
+
+                        let batch_kind = match color_texture_id {
                             SourceTexture::External(ext_image) => {
                                 match ext_image.image_type {
                                     ExternalImageType::Texture2DHandle => AlphaBatchKind::Image(ImageBufferKind::Texture2D),
                                     ExternalImageType::TextureRectHandle => AlphaBatchKind::Image(ImageBufferKind::TextureRect),
                                     ExternalImageType::TextureExternalHandle => AlphaBatchKind::Image(ImageBufferKind::TextureExternal),
                                     ExternalImageType::ExternalBuffer => {
                                         // The ExternalImageType::ExternalBuffer should be handled by resource_cache.
                                         // It should go through the non-external case.
@@ -485,73 +456,109 @@ impl AlphaRenderItem {
                                     }
                                 }
                             }
                             _ => {
                                 AlphaBatchKind::Image(ImageBufferKind::Texture2D)
                             }
                         };
 
+                        let textures = BatchTextures {
+                            colors: [color_texture_id, SourceTexture::Invalid, SourceTexture::Invalid],
+                        };
+
                         let key = AlphaBatchKey::new(batch_kind, flags, blend_mode, textures);
                         let batch = batch_list.get_suitable_batch(&key, item_bounding_rect);
-                        batch.add_instance(base_instance.build(image_cpu.resource_address.0, 0));
+                        batch.add_instance(base_instance.build(uv_address.as_int(gpu_cache), 0, 0));
                     }
                     PrimitiveKind::TextRun => {
                         let text_cpu = &ctx.prim_store.cpu_text_runs[prim_metadata.cpu_prim_index.0];
                         let batch_kind = if text_cpu.blur_radius == 0.0 {
                             AlphaBatchKind::TextRun
                         } else {
                             // Select a generic primitive shader that can blit the
                             // results of the cached text blur to the framebuffer,
                             // applying tile clipping etc.
                             AlphaBatchKind::CacheImage
                         };
-                        let key = AlphaBatchKey::new(batch_kind, flags, blend_mode, textures);
-                        let batch = batch_list.get_suitable_batch(&key, item_bounding_rect);
+
+                        let font_size_dp = text_cpu.logical_font_size.scale_by(ctx.device_pixel_ratio);
 
                         let cache_task_index = match prim_metadata.render_task {
                             Some(ref task) => {
                                 let cache_task_id = task.id;
                                 render_tasks.get_task_index(&cache_task_id,
                                                             child_pass_index).0 as i32
                             }
                             None => 0,
                         };
 
-                        let user_data1 = match batch_kind {
-                            AlphaBatchKind::TextRun => text_cpu.resource_address.0,
-                            AlphaBatchKind::CacheImage => cache_task_index,
-                            _ => unreachable!(),
-                        };
+                        // TODO(gw): avoid / recycle this allocation in the future.
+                        let mut instances = Vec::new();
 
-                        for glyph_index in 0..text_cpu.glyph_instances.len() {
-                            batch.add_instance(base_instance.build(glyph_index as i32, user_data1));
+                        let texture_id = ctx.resource_cache.get_glyphs(text_cpu.font_key,
+                                                                       font_size_dp,
+                                                                       text_cpu.color,
+                                                                       &text_cpu.glyph_instances,
+                                                                       text_cpu.render_mode,
+                                                                       text_cpu.glyph_options, |index, handle| {
+                            let uv_address = handle.as_int(gpu_cache);
+                            instances.push(base_instance.build(index as i32, cache_task_index, uv_address));
+                        });
+
+                        if texture_id != SourceTexture::Invalid {
+                            let textures = BatchTextures {
+                                colors: [texture_id, SourceTexture::Invalid, SourceTexture::Invalid],
+                            };
+
+                            let key = AlphaBatchKey::new(batch_kind, flags, blend_mode, textures);
+                            let batch = batch_list.get_suitable_batch(&key, item_bounding_rect);
+
+                            batch.add_instances(&instances);
                         }
                     }
                     PrimitiveKind::AlignedGradient => {
                         let gradient_cpu = &ctx.prim_store.cpu_gradients[prim_metadata.cpu_prim_index.0];
-                        let key = AlphaBatchKey::new(AlphaBatchKind::AlignedGradient, flags, blend_mode, textures);
+                        let key = AlphaBatchKey::new(AlphaBatchKind::AlignedGradient, flags, blend_mode, no_textures);
                         let batch = batch_list.get_suitable_batch(&key, item_bounding_rect);
                         for part_index in 0..(gradient_cpu.stops_count - 1) {
-                            batch.add_instance(base_instance.build(part_index as i32, 0));
+                            batch.add_instance(base_instance.build(part_index as i32, 0, 0));
                         }
                     }
                     PrimitiveKind::AngleGradient => {
-                        let key = AlphaBatchKey::new(AlphaBatchKind::AngleGradient, flags, blend_mode, textures);
+                        let key = AlphaBatchKey::new(AlphaBatchKind::AngleGradient, flags, blend_mode, no_textures);
                         let batch = batch_list.get_suitable_batch(&key, item_bounding_rect);
-                        batch.add_instance(base_instance.build(0, 0));
+                        batch.add_instance(base_instance.build(0, 0, 0));
                     }
                     PrimitiveKind::RadialGradient => {
-                        let key = AlphaBatchKey::new(AlphaBatchKind::RadialGradient, flags, blend_mode, textures);
+                        let key = AlphaBatchKey::new(AlphaBatchKind::RadialGradient, flags, blend_mode, no_textures);
                         let batch = batch_list.get_suitable_batch(&key, item_bounding_rect);
-                        batch.add_instance(base_instance.build(0, 0));
+                        batch.add_instance(base_instance.build(0, 0, 0));
                     }
                     PrimitiveKind::YuvImage => {
+                        let mut textures = BatchTextures::no_texture();
+                        let mut uv_rect_addresses = [0; 3];
                         let image_yuv_cpu = &ctx.prim_store.cpu_yuv_images[prim_metadata.cpu_prim_index.0];
 
+                        //yuv channel
+                        let channel_count = image_yuv_cpu.format.get_plane_num();
+                        debug_assert!(channel_count <= 3);
+                        for channel in 0..channel_count {
+                            let image_key = image_yuv_cpu.yuv_key[channel];
+
+                            let (texture, address) = resolve_image(image_key,
+                                                                   image_yuv_cpu.image_rendering,
+                                                                   None,
+                                                                   ctx.resource_cache,
+                                                                   gpu_cache,
+                                                                   deferred_resolves);
+                            textures.colors[channel] = texture;
+                            uv_rect_addresses[channel] = address.as_int(gpu_cache);
+                        }
+
                         let get_buffer_kind = |texture: SourceTexture| {
                             match texture {
                                 SourceTexture::External(ext_image) => {
                                     match ext_image.image_type {
                                         ExternalImageType::Texture2DHandle => ImageBufferKind::Texture2D,
                                         ExternalImageType::TextureRectHandle => ImageBufferKind::TextureRect,
                                         ExternalImageType::TextureExternalHandle => ImageBufferKind::TextureExternal,
                                         ExternalImageType::ExternalBuffer => {
@@ -563,55 +570,56 @@ impl AlphaRenderItem {
                                 }
                                 _ => {
                                     ImageBufferKind::Texture2D
                                 }
                             }
                         };
 
                         // All yuv textures should be the same type.
-                        let buffer_kind = get_buffer_kind(image_yuv_cpu.yuv_texture_id[0]);
-                        assert!(image_yuv_cpu.yuv_texture_id[1.. image_yuv_cpu.format.get_plane_num()].iter().all(
+                        let buffer_kind = get_buffer_kind(textures.colors[0]);
+                        assert!(textures.colors[1.. image_yuv_cpu.format.get_plane_num()].iter().all(
                             |&tid| buffer_kind == get_buffer_kind(tid)
                         ));
 
                         let key = AlphaBatchKey::new(AlphaBatchKind::YuvImage(buffer_kind, image_yuv_cpu.format, image_yuv_cpu.color_space),
                                                      flags,
                                                      blend_mode,
                                                      textures);
                         let batch = batch_list.get_suitable_batch(&key, item_bounding_rect);
 
-                        batch.add_instance(base_instance.build(image_yuv_cpu.yuv_resource_address.0,
-                                                               0));
+                        batch.add_instance(base_instance.build(uv_rect_addresses[0],
+                                                               uv_rect_addresses[1],
+                                                               uv_rect_addresses[2]));
                     }
                     PrimitiveKind::BoxShadow => {
                         let box_shadow = &ctx.prim_store.cpu_box_shadows[prim_metadata.cpu_prim_index.0];
                         let cache_task_id = &prim_metadata.render_task.as_ref().unwrap().id;
                         let cache_task_index = render_tasks.get_task_index(cache_task_id,
                                                                            child_pass_index);
 
-                        let key = AlphaBatchKey::new(AlphaBatchKind::BoxShadow, flags, blend_mode, textures);
+                        let key = AlphaBatchKey::new(AlphaBatchKind::BoxShadow, flags, blend_mode, no_textures);
                         let batch = batch_list.get_suitable_batch(&key, item_bounding_rect);
 
                         for rect_index in 0..box_shadow.rects.len() {
                             batch.add_instance(base_instance.build(rect_index as i32,
-                                                                   cache_task_index.0 as i32));
+                                                                   cache_task_index.0 as i32, 0));
                         }
                     }
                 }
             }
             AlphaRenderItem::SplitComposite(sc_index, task_id, gpu_handle, z) => {
                 let key = AlphaBatchKey::new(AlphaBatchKind::SplitComposite,
                                              AlphaBatchKeyFlags::empty(),
                                              BlendMode::PremultipliedAlpha,
                                              BatchTextures::no_texture());
                 let stacking_context = &ctx.stacking_context_store[sc_index.0];
                 let batch = batch_list.get_suitable_batch(&key, &stacking_context.screen_bounds);
                 let source_task = render_tasks.get_task_index(&task_id, child_pass_index);
-                let gpu_address = gpu_handle.as_int(ctx.gpu_cache);
+                let gpu_address = gpu_handle.as_int(gpu_cache);
 
                 let instance = CompositePrimitiveInstance::new(task_index,
                                                                source_task,
                                                                RenderTaskIndex(0),
                                                                gpu_address,
                                                                0,
                                                                z);
 
@@ -630,27 +638,31 @@ impl AlphaBatcher {
     }
 
     fn add_task(&mut self, task: AlphaBatchTask) {
         self.tasks.push(task);
     }
 
     fn build(&mut self,
              ctx: &RenderTargetContext,
+             gpu_cache: &mut GpuCache,
              render_tasks: &RenderTaskCollection,
-             child_pass_index: RenderPassIndex) {
+             child_pass_index: RenderPassIndex,
+             deferred_resolves: &mut Vec<DeferredResolve>) {
         for task in &self.tasks {
             let task_index = render_tasks.get_static_task_index(&task.task_id);
 
             for item in &task.items {
                 item.add_to_batch(&mut self.batch_list,
                                   ctx,
+                                  gpu_cache,
                                   render_tasks,
                                   child_pass_index,
-                                  task_index);
+                                  task_index,
+                                  deferred_resolves);
             }
         }
     }
 }
 
 /// Batcher managing draw calls into the clip mask (in the RT cache).
 #[derive(Debug)]
 pub struct ClipBatcher {
@@ -671,24 +683,26 @@ impl ClipBatcher {
             borders: Vec::new(),
         }
     }
 
     fn add<'a>(&mut self,
                task_index: RenderTaskIndex,
                clips: &[(PackedLayerIndex, MaskCacheInfo)],
                resource_cache: &ResourceCache,
+               gpu_cache: &GpuCache,
                geometry_kind: MaskGeometryKind) {
 
         for &(packed_layer_index, ref info) in clips.iter() {
             let instance = CacheClipInstance {
                 task_id: task_index.0 as i32,
                 layer_index: packed_layer_index.0 as i32,
                 address: GpuStoreAddress(0),
                 segment: 0,
+                resource_address: 0,
             };
 
             for clip_index in 0..info.effective_complex_clip_count as usize {
                 let offset = info.complex_clip_range.start.0 + ((CLIP_DATA_GPU_SIZE * clip_index) as i32);
                 match geometry_kind {
                     MaskGeometryKind::Default => {
                         self.rectangles.push(CacheClipInstance {
                             address: GpuStoreAddress(offset),
@@ -724,16 +738,17 @@ impl ClipBatcher {
             }
 
             if let Some((ref mask, address)) = info.image {
                 let cache_item = resource_cache.get_cached_image(mask.image, ImageRendering::Auto, None);
                 self.images.entry(cache_item.texture_id)
                            .or_insert(Vec::new())
                            .push(CacheClipInstance {
                     address: address,
+                    resource_address: cache_item.uv_rect_handle.as_int(gpu_cache),
                     ..instance
                 })
             }
 
             for &(ref source, gpu_address) in &info.border_corners {
                 self.border_clears.push(CacheClipInstance {
                     address: gpu_address,
                     segment: 0,
@@ -748,21 +763,21 @@ impl ClipBatcher {
                     })
                 }
             }
         }
     }
 }
 
 pub struct RenderTargetContext<'a> {
+    pub device_pixel_ratio: f32,
     pub stacking_context_store: &'a [StackingContext],
     pub clip_scroll_group_store: &'a [ClipScrollGroup],
     pub prim_store: &'a PrimitiveStore,
     pub resource_cache: &'a ResourceCache,
-    pub gpu_cache: &'a GpuCache,
 }
 
 struct TextureAllocator {
     // TODO(gw): Replace this with a simpler allocator for
     // render target allocation - this use case doesn't need
     // to deal with coalescing etc that the general texture
     // cache allocator requires.
     page_allocator: TexturePage,
@@ -800,21 +815,24 @@ impl TextureAllocator {
     }
 }
 
 pub trait RenderTarget {
     fn new(size: DeviceUintSize) -> Self;
     fn allocate(&mut self, size: DeviceUintSize) -> Option<DeviceUintPoint>;
     fn build(&mut self,
              _ctx: &RenderTargetContext,
+             _gpu_cache: &mut GpuCache,
              _render_tasks: &mut RenderTaskCollection,
-             _child_pass_index: RenderPassIndex) {}
+             _child_pass_index: RenderPassIndex,
+             _deferred_resolves: &mut Vec<DeferredResolve>) {}
     fn add_task(&mut self,
                 task: RenderTask,
                 ctx: &RenderTargetContext,
+                gpu_cache: &GpuCache,
                 render_tasks: &RenderTaskCollection,
                 pass_index: RenderPassIndex);
     fn used_rect(&self) -> DeviceIntRect;
 }
 
 #[derive(Debug, Copy, Clone)]
 pub enum RenderTargetKind {
     Color,   // RGBA32
@@ -840,30 +858,33 @@ impl<T: RenderTarget> RenderTargetList<T
     }
 
     pub fn target_count(&self) -> usize {
         self.targets.len()
     }
 
     fn build(&mut self,
              ctx: &RenderTargetContext,
+             gpu_cache: &mut GpuCache,
              render_tasks: &mut RenderTaskCollection,
-             pass_index: RenderPassIndex) {
+             pass_index: RenderPassIndex,
+             deferred_resolves: &mut Vec<DeferredResolve>) {
         for target in &mut self.targets {
             let child_pass_index = RenderPassIndex(pass_index.0 - 1);
-            target.build(ctx, render_tasks, child_pass_index);
+            target.build(ctx, gpu_cache, render_tasks, child_pass_index, deferred_resolves);
         }
     }
 
     fn add_task(&mut self,
                 task: RenderTask,
                 ctx: &RenderTargetContext,
+                gpu_cache: &GpuCache,
                 render_tasks: &mut RenderTaskCollection,
                 pass_index: RenderPassIndex) {
-        self.targets.last_mut().unwrap().add_task(task, ctx, render_tasks, pass_index);
+        self.targets.last_mut().unwrap().add_task(task, ctx, gpu_cache, render_tasks, pass_index);
     }
 
     fn allocate(&mut self, alloc_size: DeviceUintSize) -> (DeviceUintPoint, RenderTargetIndex) {
         let existing_origin = self.targets
                                   .last_mut()
                                   .and_then(|target| target.allocate(alloc_size));
 
         let origin = match existing_origin {
@@ -921,26 +942,31 @@ impl RenderTarget for ColorRenderTarget 
     }
 
     fn used_rect(&self) -> DeviceIntRect {
         self.allocator.used_rect
     }
 
     fn build(&mut self,
              ctx: &RenderTargetContext,
+             gpu_cache: &mut GpuCache,
              render_tasks: &mut RenderTaskCollection,
-             child_pass_index: RenderPassIndex) {
+             child_pass_index: RenderPassIndex,
+             deferred_resolves: &mut Vec<DeferredResolve>) {
         self.alpha_batcher.build(ctx,
+                                 gpu_cache,
                                  render_tasks,
-                                 child_pass_index);
+                                 child_pass_index,
+                                 deferred_resolves);
     }
 
     fn add_task(&mut self,
                 task: RenderTask,
                 ctx: &RenderTargetContext,
+                gpu_cache: &GpuCache,
                 render_tasks: &RenderTaskCollection,
                 pass_index: RenderPassIndex) {
         match task.kind {
             RenderTaskKind::Alpha(mut info) => {
                 self.alpha_batcher.add_task(AlphaBatchTask {
                     task_id: task.id,
                     items: mem::replace(&mut info.items, Vec::new()),
                 });
@@ -971,54 +997,64 @@ impl RenderTarget for ColorRenderTarget 
                     src_task_id: render_tasks.get_task_index(&src_id, child_pass_index).0 as i32,
                     blur_direction: BlurDirection::Horizontal as i32,
                     padding: 0,
                 });
             }
             RenderTaskKind::CachePrimitive(prim_index) => {
                 let prim_metadata = ctx.prim_store.get_metadata(prim_index);
 
-                let prim_address = prim_metadata.gpu_location
-                                                .as_int(&ctx.gpu_cache);
+                let prim_address = prim_metadata.gpu_location.as_int(gpu_cache);
 
                 match prim_metadata.prim_kind {
                     PrimitiveKind::BoxShadow => {
                         let instance = SimplePrimitiveInstance::new(prim_address,
                                                                     render_tasks.get_task_index(&task.id, pass_index),
                                                                     RenderTaskIndex(0),
                                                                     PackedLayerIndex(0),
                                                                     0);     // z is disabled for rendering cache primitives
-                        self.box_shadow_cache_prims.push(instance.build(0, 0));
+                        self.box_shadow_cache_prims.push(instance.build(0, 0, 0));
                     }
                     PrimitiveKind::TextRun => {
                         let text = &ctx.prim_store.cpu_text_runs[prim_metadata.cpu_prim_index.0];
                         // We only cache text runs with a text-shadow (for now).
                         debug_assert!(text.blur_radius != 0.0);
 
-                        // TODO(gw): This should always be fine for now, since the texture
-                        // atlas grows to 4k. However, it won't be a problem soon, once
-                        // we switch the texture atlas to use texture layers!
-                        let textures = BatchTextures {
-                            colors: ctx.prim_store.get_color_textures(prim_metadata),
-                        };
+                        // todo(gw): avoid / recycle this allocation...
+                        let mut instances = Vec::new();
 
-                        debug_assert!(textures.colors[0] != SourceTexture::Invalid);
-                        debug_assert!(self.text_run_textures.colors[0] == SourceTexture::Invalid ||
-                                      self.text_run_textures.colors[0] == textures.colors[0]);
-                        self.text_run_textures = textures;
+                        let font_size_dp = text.logical_font_size.scale_by(ctx.device_pixel_ratio);
 
                         let instance = SimplePrimitiveInstance::new(prim_address,
                                                                     render_tasks.get_task_index(&task.id, pass_index),
                                                                     RenderTaskIndex(0),
                                                                     PackedLayerIndex(0),
                                                                     0);     // z is disabled for rendering cache primitives
 
-                        for glyph_index in 0..text.glyph_instances.len() {
-                            self.text_run_cache_prims.push(instance.build(glyph_index as i32,
-                                                                          text.resource_address.0));
+                        let texture_id = ctx.resource_cache.get_glyphs(text.font_key,
+                                                                       font_size_dp,
+                                                                       text.color,
+                                                                       &text.glyph_instances,
+                                                                       text.render_mode,
+                                                                       text.glyph_options, |index, handle| {
+                            let uv_address = handle.as_int(gpu_cache);
+                            instances.push(instance.build(index as i32, 0, uv_address));
+                        });
+
+                        if texture_id != SourceTexture::Invalid {
+                            let textures = BatchTextures {
+                                colors: [texture_id, SourceTexture::Invalid, SourceTexture::Invalid],
+                            };
+
+                            self.text_run_cache_prims.extend_from_slice(&instances);
+
+                            debug_assert!(textures.colors[0] != SourceTexture::Invalid);
+                            debug_assert!(self.text_run_textures.colors[0] == SourceTexture::Invalid ||
+                                          self.text_run_textures.colors[0] == textures.colors[0]);
+                            self.text_run_textures = textures;
                         }
                     }
                     _ => {
                         // No other primitives make use of primitive caching yet!
                         unreachable!()
                     }
                 }
             }
@@ -1051,31 +1087,33 @@ impl RenderTarget for AlphaRenderTarget 
 
     fn used_rect(&self) -> DeviceIntRect {
         self.allocator.used_rect
     }
 
     fn add_task(&mut self,
                 task: RenderTask,
                 ctx: &RenderTargetContext,
+                gpu_cache: &GpuCache,
                 render_tasks: &RenderTaskCollection,
                 pass_index: RenderPassIndex) {
         match task.kind {
             RenderTaskKind::Alpha(..) |
             RenderTaskKind::VerticalBlur(..) |
             RenderTaskKind::HorizontalBlur(..) |
             RenderTaskKind::CachePrimitive(..) |
             RenderTaskKind::Readback(..) => {
                 panic!("Should not be added to alpha target!");
             }
             RenderTaskKind::CacheMask(ref task_info) => {
                 let task_index = render_tasks.get_task_index(&task.id, pass_index);
                 self.clip_batcher.add(task_index,
                                       &task_info.clips,
                                       &ctx.resource_cache,
+                                      gpu_cache,
                                       task_info.geometry_kind);
             }
         }
     }
 }
 
 /// A render pass represents a set of rendering operations that don't depend on one
 /// another.
@@ -1107,20 +1145,21 @@ impl RenderPass {
 
     pub fn add_render_task(&mut self, task: RenderTask) {
         self.tasks.push(task);
     }
 
     fn add_task(&mut self,
                 task: RenderTask,
                 ctx: &RenderTargetContext,
+                gpu_cache: &GpuCache,
                 render_tasks: &mut RenderTaskCollection) {
         match task.target_kind() {
-            RenderTargetKind::Color => self.color_targets.add_task(task, ctx, render_tasks, self.pass_index),
-            RenderTargetKind::Alpha => self.alpha_targets.add_task(task, ctx, render_tasks, self.pass_index),
+            RenderTargetKind::Color => self.color_targets.add_task(task, ctx, gpu_cache, render_tasks, self.pass_index),
+            RenderTargetKind::Alpha => self.alpha_targets.add_task(task, ctx, gpu_cache, render_tasks, self.pass_index),
         }
     }
 
     fn allocate_target(&mut self,
                        kind: RenderTargetKind,
                        alloc_size: DeviceUintSize) -> (DeviceUintPoint, RenderTargetIndex) {
         match kind {
             RenderTargetKind::Color => self.color_targets.allocate(alloc_size),
@@ -1139,17 +1178,21 @@ impl RenderPass {
     pub fn required_target_count(&self, kind: RenderTargetKind) -> usize {
         debug_assert!(!self.is_framebuffer);        // framebuffer never needs targets
         match kind {
             RenderTargetKind::Color => self.color_targets.target_count(),
             RenderTargetKind::Alpha => self.alpha_targets.target_count(),
         }
     }
 
-    pub fn build(&mut self, ctx: &RenderTargetContext, render_tasks: &mut RenderTaskCollection) {
+    pub fn build(&mut self,
+                 ctx: &RenderTargetContext,
+                 gpu_cache: &mut GpuCache,
+                 render_tasks: &mut RenderTaskCollection,
+                 deferred_resolves: &mut Vec<DeferredResolve>) {
         profile_scope!("RenderPass::build");
 
         // Step through each task, adding to batches as appropriate.
         let tasks = mem::replace(&mut self.tasks, Vec::new());
         for mut task in tasks {
             let target_kind = task.target_kind();
 
             // Find a target to assign this task to, or create a new
@@ -1178,21 +1221,21 @@ impl RenderPass {
 
                     *origin = Some((DeviceIntPoint::new(alloc_origin.x as i32,
                                                         alloc_origin.y as i32),
                                     target_index));
                 }
             }
 
             render_tasks.add(&task, self.pass_index);
-            self.add_task(task, ctx, render_tasks);
+            self.add_task(task, ctx, gpu_cache, render_tasks);
         }
 
-        self.color_targets.build(ctx, render_tasks, self.pass_index);
-        self.alpha_targets.build(ctx, render_tasks, self.pass_index);
+        self.color_targets.build(ctx, gpu_cache, render_tasks, self.pass_index, deferred_resolves);
+        self.alpha_targets.build(ctx, gpu_cache, render_tasks, self.pass_index, deferred_resolves);
     }
 }
 
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
 pub enum AlphaBatchKind {
     Composite,
     HardwareComposite,
     SplitComposite,
@@ -1287,16 +1330,17 @@ pub struct BlurCommand {
 /// Could be an image or a rectangle, which defines the
 /// way `address` is treated.
 #[derive(Clone, Copy, Debug)]
 pub struct CacheClipInstance {
     task_id: i32,
     layer_index: i32,
     address: GpuStoreAddress,
     segment: i32,
+    resource_address: i32,
 }
 
 // 32 bytes per instance should be enough for anyone!
 #[derive(Debug, Clone)]
 pub struct PrimitiveInstance {
     data: [i32; 8],
 }
 
@@ -1324,27 +1368,27 @@ impl SimplePrimitiveInstance {
             specific_prim_address: specific_prim_address,
             task_index: task_index.0 as i32,
             clip_task_index: clip_task_index.0 as i32,
             layer_index: layer_index.0 as i32,
             z_sort_index: z_sort_index,
         }
     }
 
-    fn build(&self, data0: i32, data1: i32) -> PrimitiveInstance {
+    fn build(&self, data0: i32, data1: i32, data2: i32) -> PrimitiveInstance {
         PrimitiveInstance {
             data: [
                 self.specific_prim_address,
                 self.task_index,
                 self.clip_task_index,
                 self.layer_index,
                 self.z_sort_index,
                 data0,
                 data1,
-                0,
+                data2,
             ]
         }
     }
 }
 
 pub struct CompositePrimitiveInstance {
     pub task_index: RenderTaskIndex,
     pub src_task_index: RenderTaskIndex,
@@ -1416,16 +1460,20 @@ impl PrimitiveBatch {
             instances: Vec::new(),
             item_rects: Vec::new(),
         }
     }
 
     fn add_instance(&mut self, instance: PrimitiveInstance) {
         self.instances.push(instance);
     }
+
+    fn add_instances(&mut self, instances: &[PrimitiveInstance]) {
+        self.instances.extend_from_slice(instances);
+    }
 }
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
 pub struct PackedLayerIndex(pub usize);
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
 pub struct StackingContextIndex(pub usize);
 
@@ -1628,20 +1676,53 @@ pub struct Frame {
     pub device_pixel_ratio: f32,
     pub cache_size: DeviceUintSize,
     pub passes: Vec<RenderPass>,
     pub profile_counters: FrameProfileCounters,
 
     pub layer_texture_data: Vec<PackedLayer>,
     pub render_task_data: Vec<RenderTaskData>,
     pub gpu_data32: Vec<GpuBlock32>,
-    pub gpu_resource_rects: Vec<TexelRect>,
 
     // List of updates that need to be pushed to the
     // gpu resource cache.
     pub gpu_cache_updates: Option<GpuCacheUpdateList>,
 
     // List of textures that we don't know about yet
     // from the backend thread. The render thread
     // will use a callback to resolve these and
     // patch the data structures.
     pub deferred_resolves: Vec<DeferredResolve>,
 }
+
+fn resolve_image(image_key: ImageKey,
+                 image_rendering: ImageRendering,
+                 tile_offset: Option<TileOffset>,
+                 resource_cache: &ResourceCache,
+                 gpu_cache: &mut GpuCache,
+                 deferred_resolves: &mut Vec<DeferredResolve>) -> (SourceTexture, GpuCacheHandle) {
+    let image_properties = resource_cache.get_image_properties(image_key);
+
+    // Check if an external image that needs to be resolved
+    // by the render thread.
+    match image_properties.external_image {
+        Some(external_image) => {
+            // This is an external texture - we will add it to
+            // the deferred resolves list to be patched by
+            // the render thread...
+            let cache_handle = gpu_cache.push_deferred_per_frame_blocks(1);
+            deferred_resolves.push(DeferredResolve {
+                image_properties: image_properties,
+                address: gpu_cache.get_address(&cache_handle),
+            });
+
+            (SourceTexture::External(external_image), cache_handle)
+        }
+        None => {
+            let cache_item = resource_cache.get_cached_image(image_key,
+                                                             image_rendering,
+                                                             tile_offset);
+
+            (cache_item.texture_id, cache_item.uv_rect_handle)
+        }
+    }
+}
+
--- a/gfx/webrender_traits/Cargo.toml
+++ b/gfx/webrender_traits/Cargo.toml
@@ -1,31 +1,30 @@
 [package]
 name = "webrender_traits"
-version = "0.40.0"
+version = "0.43.0"
 authors = ["Glenn Watson <gw@intuitionlibrary.com>"]
 license = "MPL-2.0"
 repository = "https://github.com/servo/webrender"
 
 [features]
 nightly = ["euclid/unstable", "serde/unstable"]
 ipc = ["ipc-channel"]
 webgl = ["offscreen_gl_context"]
 
 [dependencies]
-app_units = "0.4"
-bincode = "1.0.0-alpha2"
+app_units = "0.5"
+bincode = "0.8"
 byteorder = "1.0"
-euclid = "0.14.4"
+euclid = "0.15"
 gleam = "0.4.5"
 heapsize = ">= 0.3.6, < 0.5"
-ipc-channel = {version = "0.7.2", optional = true}
-offscreen_gl_context = {version = "0.9", features = ["serde"], optional = true}
-serde = "0.9"
-serde_derive = "0.9"
+ipc-channel = {version = "0.8", optional = true}
+offscreen_gl_context = {version = "0.11", features = ["serde"], optional = true}
+serde = { version = "1.0", features = ["rc", "derive"] }
 time = "0.1"
 
 [target.'cfg(target_os = "macos")'.dependencies]
 core-foundation = "0.3"
-core-graphics = "0.7"
+core-graphics = "0.8"
 
 [target.'cfg(target_os = "windows")'.dependencies]
-dwrote = "0.3"
+dwrote = "0.4"
--- a/gfx/webrender_traits/src/channel_ipc.rs
+++ b/gfx/webrender_traits/src/channel_ipc.rs
@@ -29,15 +29,15 @@ impl PayloadSenderHelperMethods for Payl
 
 impl PayloadReceiverHelperMethods for PayloadReceiver {
     fn recv_payload(&self) -> Result<Payload, Error> {
         self.recv().map(|data| Payload::from_data(&data) )
                    .map_err(|e| io::Error::new(ErrorKind::Other, error::Error::description(&e)))
     }
 }
 
-pub fn msg_channel<T: Serialize + Deserialize>() -> Result<(MsgSender<T>, MsgReceiver<T>), Error> {
+pub fn msg_channel<T: Serialize + for<'de> Deserialize<'de>>() -> Result<(MsgSender<T>, MsgReceiver<T>), Error> {
     ipc::channel()
 }
 
 pub fn payload_channel() -> Result<(PayloadSender, PayloadReceiver), Error> {
     ipc::bytes_channel()
 }
--- a/gfx/webrender_traits/src/channel_mpsc.rs
+++ b/gfx/webrender_traits/src/channel_mpsc.rs
@@ -78,21 +78,21 @@ impl<T> Serialize for MsgReceiver<T> {
 }
 
 impl<T> Serialize for MsgSender<T> {
     fn serialize<S: Serializer>(&self, _: S) -> Result<S::Ok, S::Error> {
         unreachable!();
     }
 }
 
-impl<T> Deserialize for MsgReceiver<T> {
+impl<'de, T> Deserialize<'de> for MsgReceiver<T> {
     fn deserialize<D>(_: D) -> Result<MsgReceiver<T>, D::Error>
-                      where D: Deserializer {
+                      where D: Deserializer<'de> {
         unreachable!();
     }
 }
 
-impl<T> Deserialize for MsgSender<T> {
+impl<'de, T> Deserialize<'de> for MsgSender<T> {
     fn deserialize<D>(_: D) -> Result<MsgSender<T>, D::Error>
-                      where D: Deserializer {
+                      where D: Deserializer<'de> {
         unreachable!();
     }
 }
--- a/gfx/webrender_traits/src/display_item.rs
+++ b/gfx/webrender_traits/src/display_item.rs
@@ -58,16 +58,18 @@ pub enum SpecificDisplayItem {
     BoxShadow(BoxShadowDisplayItem),
     Gradient(GradientDisplayItem),
     RadialGradient(RadialGradientDisplayItem),
     Iframe(IframeDisplayItem),
     PushStackingContext(PushStackingContextDisplayItem),
     PopStackingContext,
     SetGradientStops,
     SetClipRegion(ClipRegion),
+    PushNestedDisplayList,
+    PopNestedDisplayList,
 }
 
 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
 pub struct ClipDisplayItem {
     pub id: ClipId,
     pub parent_id: ClipId,
 }
 
@@ -574,26 +576,28 @@ impl ComplexClipRegion {
     pub fn new(rect: LayoutRect, radii: BorderRadius) -> ComplexClipRegion {
         ComplexClipRegion {
             rect: rect,
             radii: radii,
         }
     }
 }
 
+pub type NestingIndex = u64;
+
 #[derive(Clone, Copy, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]
 pub enum ClipId {
-    Clip(u64, PipelineId),
+    Clip(u64, NestingIndex, PipelineId),
     ClipExternalId(u64, PipelineId),
     DynamicallyAddedNode(u64, PipelineId),
 }
 
 impl ClipId {
     pub fn root_scroll_node(pipeline_id: PipelineId) -> ClipId {
-        ClipId::Clip(0, pipeline_id)
+        ClipId::Clip(0, 0, pipeline_id)
     }
 
     pub fn root_reference_frame(pipeline_id: PipelineId) -> ClipId {
         ClipId::DynamicallyAddedNode(0, pipeline_id)
     }
 
     pub fn new(id: u64, pipeline_id: PipelineId) -> ClipId {
         // We do this because it is very easy to create accidentally create something that
@@ -602,32 +606,39 @@ impl ClipId {
             return ClipId::root_scroll_node(pipeline_id);
         }
 
         ClipId::ClipExternalId(id, pipeline_id)
     }
 
     pub fn pipeline_id(&self) -> PipelineId {
         match *self {
-            ClipId::Clip(_, pipeline_id) |
+            ClipId::Clip(_, _, pipeline_id) |
             ClipId::ClipExternalId(_, pipeline_id) |
             ClipId::DynamicallyAddedNode(_, pipeline_id) => pipeline_id,
         }
     }
 
     pub fn external_id(&self) -> Option<u64> {
         match *self {
             ClipId::ClipExternalId(id, _) => Some(id),
             _ => None,
         }
     }
 
     pub fn is_root_scroll_node(&self) -> bool {
         match *self {
-            ClipId::Clip(id, _) if id == 0 => true,
+            ClipId::Clip(0, 0, _)  => true,
+            _ => false,
+        }
+    }
+
+    pub fn is_nested(&self) -> bool {
+        match *self {
+            ClipId::Clip(_, nesting_level, _) => nesting_level != 0,
             _ => false,
         }
     }
 }
 
 macro_rules! define_empty_heap_size_of {
     ($name:ident) => {
         impl ::heapsize::HeapSizeOf for $name {
--- a/gfx/webrender_traits/src/display_list.rs
+++ b/gfx/webrender_traits/src/display_list.rs
@@ -49,18 +49,16 @@ pub struct BuiltDisplayList {
 
 /// Describes the memory layout of a display list.
 ///
 /// A display list consists of some number of display list items, followed by a number of display
 /// items.
 #[repr(C)]
 #[derive(Copy, Clone, Default, Deserialize, Serialize)]
 pub struct BuiltDisplayListDescriptor {
-    /// The size in bytes of the display list items in this display list.
-    display_list_items_size: usize,
     /// The first IPC time stamp: before any work has been done
     builder_start_time: u64,
     /// The second IPC time stamp: after serialization
     builder_finish_time: u64,
 }
 
 pub struct BuiltDisplayListIter<'a> {
     list: &'a BuiltDisplayList,
@@ -87,19 +85,16 @@ enum Peek {
 #[derive(Clone)]
 pub struct AuxIter<'a, T> {
     data: &'a [u8],
     size: usize,
     _boo: PhantomData<T>,
 }
 
 impl BuiltDisplayListDescriptor {
-    pub fn size(&self) -> usize {
-        self.display_list_items_size
-    }
 }
 
 impl BuiltDisplayList {
     pub fn from_data(data: Vec<u8>, descriptor: BuiltDisplayListDescriptor) -> BuiltDisplayList {
         BuiltDisplayList {
             data: data,
             descriptor: descriptor,
         }
@@ -120,17 +115,17 @@ impl BuiltDisplayList {
     pub fn times(&self) -> (u64, u64) {
       (self.descriptor.builder_start_time, self.descriptor.builder_finish_time)
     }
 
     pub fn iter(&self) -> BuiltDisplayListIter {
         BuiltDisplayListIter::new(self)
     }
 
-    pub fn get<T: Deserialize>(&self, range: ItemRange<T>) -> AuxIter<T> {
+    pub fn get<'de, T: Deserialize<'de>>(&self, range: ItemRange<T>) -> AuxIter<T> {
         AuxIter::new(&self.data[range.start .. range.start + range.length])
     }
 }
 
 impl<'a> BuiltDisplayListIter<'a> {
     pub fn new(list: &'a BuiltDisplayList) -> Self {
         BuiltDisplayListIter {
             list: list,
@@ -207,17 +202,17 @@ impl<'a> BuiltDisplayListIter<'a> {
             break;
         }
 
         Some(self.as_ref())
     }
 
     /// Returns the byte-range the slice occupied, and the number of elements
     /// in the slice.
-    fn skip_slice<T: Deserialize>(&mut self) -> (ItemRange<T>, usize) {
+    fn skip_slice<T: for<'de> Deserialize<'de>>(&mut self) -> (ItemRange<T>, usize) {
         let base = self.list.data.as_ptr() as usize;
         let start = self.data.as_ptr() as usize;
 
         // Read through the values (this is a bit of a hack to reuse logic)
         let mut iter = AuxIter::<T>::new(self.data);
         let count = iter.len();
         for _ in &mut iter {}
         let end = iter.data.as_ptr() as usize;
@@ -328,17 +323,17 @@ impl<'a, 'b> DisplayItemRef<'a, 'b> {
             cur_glyphs: ItemRange::default(),
             cur_filters: ItemRange::default(),
             cur_clip: ClipRegion::empty(),
             peeking: Peek::NotPeeking,
         }
     }
 }
 
-impl<'a, T: Deserialize> AuxIter<'a, T> {
+impl<'de, 'a, T: Deserialize<'de>> AuxIter<'a, T> {
     pub fn new(mut data: &'a [u8]) -> Self {
 
         let size: usize = if data.len() == 0 {
             0   // Accept empty ItemRanges pointing anywhere
         } else {
             bincode::deserialize_from(&mut data, bincode::Infinite)
                                   .expect("MEH: malicious input?")
         };
@@ -346,17 +341,17 @@ impl<'a, T: Deserialize> AuxIter<'a, T> 
         AuxIter {
             data: data,
             size: size,
             _boo: PhantomData,
         }
     }
 }
 
-impl<'a, T: Deserialize> Iterator for AuxIter<'a, T> {
+impl<'a, T: for<'de> Deserialize<'de>> Iterator for AuxIter<'a, T> {
     type Item = T;
 
     fn next(&mut self) -> Option<T> {
         if self.size == 0 {
             None
         } else {
             self.size -= 1;
             Some(bincode::deserialize_from(&mut self.data, bincode::Infinite)
@@ -364,17 +359,17 @@ impl<'a, T: Deserialize> Iterator for Au
         }
     }
 
     fn size_hint(&self) -> (usize, Option<usize>) {
         (self.size, Some(self.size))
     }
 }
 
-impl<'a, T: Deserialize> ::std::iter::ExactSizeIterator for AuxIter<'a, T> { }
+impl<'a, T: for<'de> Deserialize<'de>> ::std::iter::ExactSizeIterator for AuxIter<'a, T> { }
 
 
 // This is purely for the JSON writer in wrench
 impl Serialize for BuiltDisplayList {
     fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
         let mut seq = serializer.serialize_seq(None)?;
         let mut traversal = self.iter();
         while let Some(item) = traversal.next() {
@@ -430,23 +425,29 @@ pub struct DisplayListBuilder {
 
     /// The size of the content of this display list. This is used to allow scrolling
     /// outside the bounds of the display list items themselves.
     content_size: LayoutSize,
 }
 
 impl DisplayListBuilder {
     pub fn new(pipeline_id: PipelineId, content_size: LayoutSize) -> DisplayListBuilder {
+        Self::with_capacity(pipeline_id, content_size, 0)
+    }
+
+    pub fn with_capacity(pipeline_id: PipelineId,
+                         content_size: LayoutSize,
+                         capacity: usize) -> DisplayListBuilder {
         let start_time = precise_time_ns();
 
         // We start at 1 here, because the root scroll id is always 0.
         const FIRST_CLIP_ID : u64 = 1;
 
         DisplayListBuilder {
-            data: Vec::with_capacity(1024 * 1024),
+            data: Vec::with_capacity(capacity),
             pipeline_id: pipeline_id,
             clip_stack: vec![ClipAndScrollInfo::simple(ClipId::root_scroll_node(pipeline_id))],
             next_clip_id: FIRST_CLIP_ID,
             builder_start_time: start_time,
             content_size: content_size,
         }
     }
 
@@ -493,20 +494,16 @@ impl DisplayListBuilder {
         for elem in iter {
             count += 1;
             bincode::serialize_into(&mut self.data, &elem, bincode::Infinite).unwrap();
         }
 
         debug_assert_eq!(len, count);
     }
 
-    fn push_range<T>(&mut self, range: ItemRange<T>, src: &BuiltDisplayList) {
-        self.data.extend_from_slice(&src.data[range.start..range.start+range.length]);
-    }
-
     pub fn push_rect(&mut self,
                      rect: LayoutRect,
                      _token: ClipRegionToken,
                      color: ColorF) {
         let item = SpecificDisplayItem::Rectangle(RectangleDisplayItem {
             color: color,
         });
 
@@ -860,17 +857,17 @@ impl DisplayListBuilder {
                        content_rect: LayoutRect,
                        _token: ClipRegionToken,
                        id: Option<ClipId>)
                        -> ClipId {
         let id = match id {
             Some(id) => id,
             None => {
                 self.next_clip_id += 1;
-                ClipId::Clip(self.next_clip_id - 1, self.pipeline_id)
+                ClipId::Clip(self.next_clip_id - 1, 0, self.pipeline_id)
             }
         };
 
         let item = SpecificDisplayItem::Clip(ClipDisplayItem {
             id: id,
             parent_id: self.clip_stack.last().unwrap().scroll_node_id,
         });
 
@@ -904,52 +901,29 @@ impl DisplayListBuilder {
     }
 
     pub fn push_iframe(&mut self, rect: LayoutRect, _token: ClipRegionToken, pipeline_id: PipelineId) {
         let item = SpecificDisplayItem::Iframe(IframeDisplayItem { pipeline_id: pipeline_id });
         self.push_item(item, rect);
     }
 
     // Don't use this function. It will go away.
-    // We're using it as a hack in Gecko to retain parts sub-parts of display lists so that
-    // we can regenerate them without building Gecko display items.
-    pub fn push_built_display_list(&mut self, dl: BuiltDisplayList) {
-        // NOTE: Iframe and Clips aren't supported.
-
-        // FIXME: what `iter` here is doing an expensive deserialization
-        // because we need to update clip_and_scroll info! If we didn't need to
-        // update that, this function could just be memcopy!
-
-        // This implementation is basically BuiltDisplayListIter::next in reverse.
+    //
+    // We're using this method as a hack in Gecko to retain parts sub-parts of display
+    // lists so that we can regenerate them without building Gecko display items. WebRender
+    // will replace references to the root scroll frame id with the current scroll frame
+    // id.
+    pub fn push_nested_display_list(&mut self, built_display_list: &BuiltDisplayList) {
+        self.push_clip_region(&LayoutRect::zero(), vec![], None);
+        self.push_new_empty_item(SpecificDisplayItem::PushNestedDisplayList);
 
-        let mut iter = dl.iter();
-        while let Some(item) = iter.next() {
-            // First handle explicit prefix dummy items
-            let clip_region = item.clip_region();
-            if *clip_region != ClipRegion::empty() {
-                self.push_new_empty_item(SpecificDisplayItem::SetClipRegion(*clip_region));
-                self.push_range(clip_region.complex_clips, &dl);
-            }
+        self.data.extend_from_slice(&built_display_list.data);
 
-            let stops = item.gradient_stops();
-            if stops != ItemRange::default() {
-                self.push_new_empty_item(SpecificDisplayItem::SetGradientStops);
-                self.push_range(stops, &dl);
-            }
-
-            // Then reinsert the actual item, updating its clip_and_scroll
-            self.push_item(*item.item(), item.rect());
-
-            // Then handle implicit suffix items
-            match *item.item() {
-                SpecificDisplayItem::Text(_)                => self.push_range(item.glyphs(), &dl),
-                SpecificDisplayItem::PushStackingContext(_) => self.push_range(item.filters(), &dl),
-                _ => { /* do nothing */ }
-            }
-        }
+        self.push_clip_region(&LayoutRect::zero(), vec![], None);
+        self.push_new_empty_item(SpecificDisplayItem::PopNestedDisplayList);
     }
 
     pub fn push_clip_region<I>(&mut self,
                             rect: &LayoutRect,
                             complex: I,
                             image_mask: Option<ImageMask>)
                             -> ClipRegionToken
     where I: IntoIterator<Item = ComplexClipRegion>,
@@ -965,17 +939,16 @@ impl DisplayListBuilder {
 
     pub fn finalize(self) -> (PipelineId, LayoutSize, BuiltDisplayList) {
         let end_time = precise_time_ns();
 
         (self.pipeline_id,
          self.content_size,
          BuiltDisplayList {
             descriptor: BuiltDisplayListDescriptor {
-                display_list_items_size: self.data.len(),
                 builder_start_time: self.builder_start_time,
                 builder_finish_time: end_time,
             },
             data: self.data,
          })
     }
 }
 
--- a/gfx/webrender_traits/src/font.rs
+++ b/gfx/webrender_traits/src/font.rs
@@ -21,18 +21,18 @@ pub struct NativeFontHandle(pub CGFont);
 impl Serialize for NativeFontHandle {
     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: Serializer {
         let postscript_name = self.0.postscript_name().to_string();
         postscript_name.serialize(serializer)
     }
 }
 
 #[cfg(target_os = "macos")]
-impl Deserialize for NativeFontHandle {
-    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where D: Deserializer {
+impl<'de> Deserialize<'de> for NativeFontHandle {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where D: Deserializer<'de> {
         let postscript_name: String = try!(Deserialize::deserialize(deserializer));
 
         match CGFont::from_name(&CFString::new(&*postscript_name)) {
             Ok(font) => Ok(NativeFontHandle(font)),
             _ => Err(de::Error::custom("Couldn't find a font with that PostScript name!")),
         }
     }
 }
--- a/gfx/webrender_traits/src/image.rs
+++ b/gfx/webrender_traits/src/image.rs
@@ -41,27 +41,27 @@ pub struct ExternalImageData {
 }
 
 #[repr(u32)]
 #[derive(Clone, Copy, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]
 pub enum ImageFormat {
     Invalid  = 0,
     A8       = 1,
     RGB8     = 2,
-    RGBA8    = 3,
+    BGRA8    = 3,
     RGBAF32  = 4,
     RG8      = 5,
 }
 
 impl ImageFormat {
     pub fn bytes_per_pixel(self) -> Option<u32> {
         match self {
             ImageFormat::A8 => Some(1),
             ImageFormat::RGB8 => Some(3),
-            ImageFormat::RGBA8 => Some(4),
+            ImageFormat::BGRA8 => Some(4),
             ImageFormat::RGBAF32 => Some(16),
             ImageFormat::RG8 => Some(2),
             ImageFormat::Invalid => None,
         }
     }
 }
 
 #[derive(Copy, Clone, Debug, Deserialize, PartialEq, Serialize)]
--- a/gfx/webrender_traits/src/lib.rs
+++ b/gfx/webrender_traits/src/lib.rs
@@ -13,19 +13,18 @@ extern crate core;
 extern crate euclid;
 extern crate gleam;
 #[macro_use]
 extern crate heapsize;
 #[cfg(feature = "ipc")]
 extern crate ipc_channel;
 #[cfg(feature = "webgl")]
 extern crate offscreen_gl_context;
+#[macro_use]
 extern crate serde;
-#[macro_use]
-extern crate serde_derive;
 extern crate time;
 
 #[cfg(target_os = "macos")]
 extern crate core_foundation;
 
 #[cfg(target_os = "macos")]
 extern crate core_graphics;
 
--- a/gfx/webrender_traits/src/webgl.rs
+++ b/gfx/webrender_traits/src/webgl.rs
@@ -171,19 +171,19 @@ macro_rules! define_resource_id_struct {
         }
     };
 }
 
 macro_rules! define_resource_id {
     ($name:ident) => {
         define_resource_id_struct!($name);
 
-        impl ::serde::Deserialize for $name {
+        impl<'de> ::serde::Deserialize<'de> for $name {
             fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
-                where D: ::serde::Deserializer
+                where D: ::serde::Deserializer<'de>
             {
                 let id = try!(u32::deserialize(deserializer));
                 if id == 0 {
                     Err(::serde::de::Error::custom("expected a non-zero value"))
                 } else {
                     Ok(unsafe { $name::new(id) })
                 }
             }