Bug 1419440 - Update webrender to commit b7b07562fda338fcb2faff66ce01aafb6235fbcf. r=mstange
authorKartikaya Gupta <kgupta@mozilla.com>
Thu, 23 Nov 2017 09:38:38 -0500
changeset 393481 4bd54453fd2e845387d025804b12342614f28a4e
parent 393480 f7b7bdd7b05c992b4fb435a26a6278829063748b
child 393482 85a4275ed544c2e042deb76b78d9befd8d3a317c
push id97661
push userrgurzau@mozilla.com
push dateThu, 23 Nov 2017 22:38:55 +0000
treeherdermozilla-inbound@5482a1a3cfee [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmstange
bugs1419440
milestone59.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1419440 - Update webrender to commit b7b07562fda338fcb2faff66ce01aafb6235fbcf. r=mstange MozReview-Commit-ID: F1WeQbL6Bhg
gfx/doc/README.webrender
gfx/webrender/Cargo.toml
gfx/webrender/examples/animation.rs
gfx/webrender/examples/basic.rs
gfx/webrender/examples/blob.rs
gfx/webrender/examples/common/boilerplate.rs
gfx/webrender/examples/document.rs
gfx/webrender/examples/frame_output.rs
gfx/webrender/examples/iframe.rs
gfx/webrender/examples/image_resize.rs
gfx/webrender/examples/scrolling.rs
gfx/webrender/examples/texture_cache_stress.rs
gfx/webrender/examples/yuv.rs
gfx/webrender/res/cs_clip_image.glsl
gfx/webrender/res/cs_clip_rectangle.glsl
gfx/webrender/res/cs_text_run.glsl
gfx/webrender/res/prim_shared.glsl
gfx/webrender/res/ps_border_corner.glsl
gfx/webrender/res/ps_border_edge.glsl
gfx/webrender/res/ps_gradient.glsl
gfx/webrender/res/ps_image.glsl
gfx/webrender/res/ps_line.glsl
gfx/webrender/res/ps_rectangle.glsl
gfx/webrender/res/ps_text_run.glsl
gfx/webrender/res/ps_yuv_image.glsl
gfx/webrender/src/box_shadow.rs
gfx/webrender/src/debug_render.rs
gfx/webrender/src/debug_server.rs
gfx/webrender/src/device.rs
gfx/webrender/src/frame.rs
gfx/webrender/src/frame_builder.rs
gfx/webrender/src/glyph_rasterizer.rs
gfx/webrender/src/internal_types.rs
gfx/webrender/src/picture.rs
gfx/webrender/src/platform/macos/font.rs
gfx/webrender/src/platform/unix/font.rs
gfx/webrender/src/platform/windows/font.rs
gfx/webrender/src/prim_store.rs
gfx/webrender/src/profiler.rs
gfx/webrender/src/query.rs
gfx/webrender/src/render_backend.rs
gfx/webrender/src/render_task.rs
gfx/webrender/src/renderer.rs
gfx/webrender/src/texture_cache.rs
gfx/webrender/src/tiling.rs
gfx/webrender/src/util.rs
gfx/webrender_api/Cargo.toml
gfx/webrender_api/src/api.rs
gfx/webrender_api/src/display_item.rs
gfx/webrender_api/src/display_list.rs
gfx/webrender_bindings/Cargo.toml
--- a/gfx/doc/README.webrender
+++ b/gfx/doc/README.webrender
@@ -170,9 +170,9 @@ 2. Sometimes autoland tip has changed en
    has an env var you can set to do this). In theory you can get the same
    result by resolving the conflict manually but Cargo.lock files are usually not
    trivial to merge by hand. If it's just the third_party/rust dir that has conflicts
    you can delete it and run |mach vendor rust| again to repopulate it.
 
 -------------------------------------------------------------------------------
 
 The version of WebRender currently in the tree is:
-81cfbcf0763205f25329adb9b2ff75d1cd56e3f1
+b7b07562fda338fcb2faff66ce01aafb6235fbcf
--- a/gfx/webrender/Cargo.toml
+++ b/gfx/webrender/Cargo.toml
@@ -42,10 +42,10 @@ servo-glutin = "0.13"     # for the exam
 [target.'cfg(any(target_os = "android", all(unix, not(target_os = "macos"))))'.dependencies]
 freetype = { version = "0.3", default-features = false }
 
 [target.'cfg(target_os = "windows")'.dependencies]
 dwrote = "0.4"
 
 [target.'cfg(target_os = "macos")'.dependencies]
 core-foundation = "0.4"
-core-graphics = "0.12.2"
+core-graphics = "0.12.3"
 core-text = { version = "8.0", default-features = false }
--- a/gfx/webrender/examples/animation.rs
+++ b/gfx/webrender/examples/animation.rs
@@ -30,17 +30,17 @@ struct App {
 }
 
 impl Example for App {
     fn render(
         &mut self,
         _api: &RenderApi,
         builder: &mut DisplayListBuilder,
         _resources: &mut ResourceUpdates,
-        _layout_size: LayoutSize,
+        _framebuffer_size: DeviceUintSize,
         _pipeline_id: PipelineId,
         _document_id: DocumentId,
     ) {
         // Create a 200x200 stacking context with an animated transform property.
         let bounds = (0, 0).to(200, 200);
         let complex_clip = ComplexClipRegion {
             rect: bounds,
             radii: BorderRadius::uniform(50.0),
--- a/gfx/webrender/examples/basic.rs
+++ b/gfx/webrender/examples/basic.rs
@@ -180,26 +180,29 @@ fn main() {
     boilerplate::main_wrapper(&mut app, None);
 }
 
 struct App {
     touch_state: TouchState,
 }
 
 impl Example for App {
+    // Make this the only example to test all shaders for compile errors.
+    const PRECACHE_SHADERS: bool = true;
+
     fn render(
         &mut self,
         api: &RenderApi,
         builder: &mut DisplayListBuilder,
         resources: &mut ResourceUpdates,
-        layout_size: LayoutSize,
+        _: DeviceUintSize,
         _pipeline_id: PipelineId,
         _document_id: DocumentId,
     ) {
-        let bounds = LayoutRect::new(LayoutPoint::zero(), layout_size);
+        let bounds = LayoutRect::new(LayoutPoint::zero(), builder.content_size());
         let info = LayoutPrimitiveInfo::new(bounds);
         builder.push_stacking_context(
             &info,
             ScrollPolicy::Scrollable,
             None,
             TransformStyle::Flat,
             None,
             MixBlendMode::Normal,
--- a/gfx/webrender/examples/blob.rs
+++ b/gfx/webrender/examples/blob.rs
@@ -12,18 +12,18 @@ mod boilerplate;
 
 use boilerplate::{Example, HandyDandyRectBuilder};
 use rayon::Configuration as ThreadPoolConfig;
 use rayon::ThreadPool;
 use std::collections::HashMap;
 use std::collections::hash_map::Entry;
 use std::sync::Arc;
 use std::sync::mpsc::{channel, Receiver, Sender};
-use webrender::api::{self, DeviceUintRect, DisplayListBuilder, DocumentId, LayoutSize, PipelineId,
-                     RenderApi, ResourceUpdates};
+use webrender::api::{self,
+    DisplayListBuilder, DocumentId, PipelineId, RenderApi, ResourceUpdates};
 
 // This example shows how to implement a very basic BlobImageRenderer that can only render
 // a checkerboard pattern.
 
 // The deserialized command list internally used by this example is just a color.
 type ImageRenderingCommands = api::ColorU;
 
 // Serialize/deserialze the blob.
@@ -140,17 +140,17 @@ impl CheckerboardRenderer {
 }
 
 impl api::BlobImageRenderer for CheckerboardRenderer {
     fn add(&mut self, key: api::ImageKey, cmds: api::BlobImageData, _: Option<api::TileSize>) {
         self.image_cmds
             .insert(key, Arc::new(deserialize_blob(&cmds[..]).unwrap()));
     }
 
-    fn update(&mut self, key: api::ImageKey, cmds: api::BlobImageData, _dirty_rect: Option<DeviceUintRect>) {
+    fn update(&mut self, key: api::ImageKey, cmds: api::BlobImageData, _dirty_rect: Option<api::DeviceUintRect>) {
         // Here, updating is just replacing the current version of the commands with
         // the new one (no incremental updates).
         self.image_cmds
             .insert(key, Arc::new(deserialize_blob(&cmds[..]).unwrap()));
     }
 
     fn delete(&mut self, key: api::ImageKey) {
         self.image_cmds.remove(&key);
@@ -222,17 +222,17 @@ impl api::BlobImageRenderer for Checkerb
 struct App {}
 
 impl Example for App {
     fn render(
         &mut self,
         api: &RenderApi,
         builder: &mut DisplayListBuilder,
         resources: &mut ResourceUpdates,
-        layout_size: LayoutSize,
+        _framebuffer_size: api::DeviceUintSize,
         _pipeline_id: PipelineId,
         _document_id: DocumentId,
     ) {
         let blob_img1 = api.generate_image_key();
         resources.add_image(
             blob_img1,
             api::ImageDescriptor::new(500, 500, api::ImageFormat::BGRA8, true),
             api::ImageData::new_blob_image(serialize_blob(api::ColorU::new(50, 50, 150, 255))),
@@ -242,17 +242,17 @@ impl Example for App {
         let blob_img2 = api.generate_image_key();
         resources.add_image(
             blob_img2,
             api::ImageDescriptor::new(200, 200, api::ImageFormat::BGRA8, true),
             api::ImageData::new_blob_image(serialize_blob(api::ColorU::new(50, 150, 50, 255))),
             None,
         );
 
-        let bounds = api::LayoutRect::new(api::LayoutPoint::zero(), layout_size);
+        let bounds = api::LayoutRect::new(api::LayoutPoint::zero(), builder.content_size());
         let info = api::LayoutPrimitiveInfo::new(bounds);
         builder.push_stacking_context(
             &info,
             api::ScrollPolicy::Scrollable,
             None,
             api::TransformStyle::Flat,
             None,
             api::MixBlendMode::Normal,
@@ -274,25 +274,16 @@ impl Example for App {
             api::LayoutSize::new(200.0, 200.0),
             api::LayoutSize::new(0.0, 0.0),
             api::ImageRendering::Auto,
             blob_img2,
         );
 
         builder.pop_stacking_context();
     }
-
-    fn on_event(
-        &mut self,
-        _event: glutin::Event,
-        _api: &RenderApi,
-        _document_id: DocumentId,
-    ) -> bool {
-        false
-    }
 }
 
 fn main() {
     let worker_config =
         ThreadPoolConfig::new().thread_name(|idx| format!("WebRender:Worker#{}", idx));
 
     let workers = Arc::new(ThreadPool::new(worker_config).unwrap());
 
--- a/gfx/webrender/examples/common/boilerplate.rs
+++ b/gfx/webrender/examples/common/boilerplate.rs
@@ -1,13 +1,14 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 extern crate env_logger;
+extern crate euclid;
 
 use gleam::gl;
 use glutin;
 use std::env;
 use std::path::PathBuf;
 use webrender;
 use webrender::api::*;
 
@@ -23,24 +24,23 @@ impl Notifier {
 
 impl RenderNotifier for Notifier {
     fn clone(&self) -> Box<RenderNotifier> {
         Box::new(Notifier {
             window_proxy: self.window_proxy.clone(),
         })
     }
 
-    fn new_frame_ready(&self) {
+    fn wake_up(&self) {
         #[cfg(not(target_os = "android"))]
         self.window_proxy.wakeup_event_loop();
     }
 
-    fn new_scroll_frame_ready(&self, _composite_needed: bool) {
-        #[cfg(not(target_os = "android"))]
-        self.window_proxy.wakeup_event_loop();
+    fn new_document_ready(&self, _: DocumentId, _scrolled: bool, _composite_needed: bool) {
+        self.wake_up();
     }
 }
 
 pub trait HandyDandyRectBuilder {
     fn to(&self, x2: i32, y2: i32) -> LayoutRect;
     fn by(&self, w: i32, h: i32) -> LayoutRect;
 }
 // Allows doing `(x, y).to(x2, y2)` or `(x, y).by(width, height)` with i32
@@ -57,50 +57,56 @@ impl HandyDandyRectBuilder for (i32, i32
         LayoutRect::new(
             LayoutPoint::new(self.0 as f32, self.1 as f32),
             LayoutSize::new(w as f32, h as f32),
         )
     }
 }
 
 pub trait Example {
+    const TITLE: &'static str = "WebRender Sample App";
+    const PRECACHE_SHADERS: bool = false;
     fn render(
         &mut self,
         api: &RenderApi,
         builder: &mut DisplayListBuilder,
         resources: &mut ResourceUpdates,
-        layout_size: LayoutSize,
+        framebuffer_size: DeviceUintSize,
         pipeline_id: PipelineId,
         document_id: DocumentId,
     );
-    fn on_event(&mut self, event: glutin::Event, api: &RenderApi, document_id: DocumentId) -> bool;
-    fn get_external_image_handler(&self) -> Option<Box<webrender::ExternalImageHandler>> {
-        None
+    fn on_event(&mut self, glutin::Event, &RenderApi, DocumentId) -> bool {
+        false
     }
-    fn get_output_image_handler(
+    fn get_image_handlers(
         &mut self,
         _gl: &gl::Gl,
-    ) -> Option<Box<webrender::OutputImageHandler>> {
-        None
+    ) -> (Option<Box<webrender::ExternalImageHandler>>, 
+          Option<Box<webrender::OutputImageHandler>>) {
+        (None, None)
     }
-    fn draw_custom(&self, _gl: &gl::Gl) {}
+    fn draw_custom(&self, _gl: &gl::Gl) {
+    }
 }
 
-pub fn main_wrapper(example: &mut Example, options: Option<webrender::RendererOptions>) {
+pub fn main_wrapper<E: Example>(
+    example: &mut E,
+    options: Option<webrender::RendererOptions>,
+) {
     env_logger::init().unwrap();
 
     let args: Vec<String> = env::args().collect();
     let res_path = if args.len() > 1 {
         Some(PathBuf::from(&args[1]))
     } else {
         None
     };
 
     let window = glutin::WindowBuilder::new()
-        .with_title("WebRender Sample App")
+        .with_title(E::TITLE)
         .with_multitouch()
         .with_gl(glutin::GlRequest::GlThenGles {
             opengl_version: (3, 2),
             opengles_version: (3, 0),
         })
         .build()
         .unwrap();
 
@@ -114,143 +120,142 @@ pub fn main_wrapper(example: &mut Exampl
         },
         gl::GlType::Gles => unsafe {
             gl::GlesFns::load_with(|symbol| window.get_proc_address(symbol) as *const _)
         },
     };
 
     println!("OpenGL version {}", gl.get_string(gl::VERSION));
     println!("Shader resource path: {:?}", res_path);
+    let device_pixel_ratio = window.hidpi_factor();
+    println!("Device pixel ratio: {}", device_pixel_ratio);
 
-    let (width, height) = window.get_inner_size_pixels().unwrap();
-
+    println!("Loading shaders...");
     let opts = webrender::RendererOptions {
         resource_override_path: res_path,
         debug: true,
-        precache_shaders: true,
-        device_pixel_ratio: window.hidpi_factor(),
+        precache_shaders: E::PRECACHE_SHADERS,
+        device_pixel_ratio,
+        clear_color: Some(ColorF::new(0.3, 0.0, 0.0, 1.0)),
         ..options.unwrap_or(webrender::RendererOptions::default())
     };
 
-    let size = DeviceUintSize::new(width, height);
+    let framebuffer_size = {
+        let (width, height) = window.get_inner_size_pixels().unwrap();
+        DeviceUintSize::new(width, height)
+    };
     let notifier = Box::new(Notifier::new(window.create_window_proxy()));
     let (mut renderer, sender) = webrender::Renderer::new(gl.clone(), notifier, opts).unwrap();
     let api = sender.create_api();
-    let document_id = api.add_document(size);
+    let document_id = api.add_document(framebuffer_size, 0);
 
-    if let Some(external_image_handler) = example.get_external_image_handler() {
-        renderer.set_external_image_handler(external_image_handler);
-    }
-    if let Some(output_image_handler) = example.get_output_image_handler(&*gl) {
+    let (external, output) = example.get_image_handlers(&*gl);
+
+    if let Some(output_image_handler) = output {
         renderer.set_output_image_handler(output_image_handler);
     }
 
-    let epoch = Epoch(0);
-    let root_background_color = ColorF::new(0.3, 0.0, 0.0, 1.0);
+    if let Some(external_image_handler) = external {
+        renderer.set_external_image_handler(external_image_handler);
+    }
 
+    let epoch = Epoch(0);
     let pipeline_id = PipelineId(0, 0);
-    let layout_size = LayoutSize::new(width as f32, height as f32);
+    let layout_size = framebuffer_size.to_f32() / euclid::ScaleFactor::new(device_pixel_ratio);
     let mut builder = DisplayListBuilder::new(pipeline_id, layout_size);
     let mut resources = ResourceUpdates::new();
 
     example.render(
         &api,
         &mut builder,
         &mut resources,
-        layout_size,
+        framebuffer_size,
         pipeline_id,
         document_id,
     );
     api.set_display_list(
         document_id,
         epoch,
-        Some(root_background_color),
-        LayoutSize::new(width as f32, height as f32),
+        None,
+        layout_size,
         builder.finalize(),
         true,
         resources,
     );
     api.set_root_pipeline(document_id, pipeline_id);
     api.generate_frame(document_id, None);
 
+    println!("Entering event loop");
     'outer: for event in window.wait_events() {
         let mut events = Vec::new();
         events.push(event);
-
-        for event in window.poll_events() {
-            events.push(event);
-        }
+        events.extend(window.poll_events());
 
         for event in events {
             match event {
                 glutin::Event::Closed |
                 glutin::Event::KeyboardInput(_, _, Some(glutin::VirtualKeyCode::Escape)) => break 'outer,
 
                 glutin::Event::KeyboardInput(
                     glutin::ElementState::Pressed,
                     _,
                     Some(glutin::VirtualKeyCode::P),
                 ) => {
-                    let mut flags = renderer.get_debug_flags();
-                    flags.toggle(webrender::DebugFlags::PROFILER_DBG);
-                    renderer.set_debug_flags(flags);
+                    renderer.toggle_debug_flags(webrender::DebugFlags::PROFILER_DBG);
                 }
                 glutin::Event::KeyboardInput(
                     glutin::ElementState::Pressed,
                     _,
                     Some(glutin::VirtualKeyCode::O),
                 ) => {
-                    let mut flags = renderer.get_debug_flags();
-                    flags.toggle(webrender::DebugFlags::RENDER_TARGET_DBG);
-                    renderer.set_debug_flags(flags);
+                    renderer.toggle_debug_flags(webrender::DebugFlags::RENDER_TARGET_DBG);
                 }
                 glutin::Event::KeyboardInput(
                     glutin::ElementState::Pressed,
                     _,
                     Some(glutin::VirtualKeyCode::I),
                 ) => {
-                    let mut flags = renderer.get_debug_flags();
-                    flags.toggle(webrender::DebugFlags::TEXTURE_CACHE_DBG);
-                    renderer.set_debug_flags(flags);
+                    renderer.toggle_debug_flags(webrender::DebugFlags::TEXTURE_CACHE_DBG);
                 }
                 glutin::Event::KeyboardInput(
                     glutin::ElementState::Pressed,
                     _,
                     Some(glutin::VirtualKeyCode::B),
                 ) => {
-                    let mut flags = renderer.get_debug_flags();
-                    flags.toggle(webrender::DebugFlags::ALPHA_PRIM_DBG);
-                    renderer.set_debug_flags(flags);
+                    renderer.toggle_debug_flags(webrender::DebugFlags::ALPHA_PRIM_DBG);
                 }
                 glutin::Event::KeyboardInput(
                     glutin::ElementState::Pressed,
                     _,
                     Some(glutin::VirtualKeyCode::Q),
                 ) => {
-                    renderer.toggle_queries_enabled();
+                    renderer.toggle_debug_flags(webrender::DebugFlags::GPU_TIME_QUERIES
+                        | webrender::DebugFlags::GPU_SAMPLE_QUERIES);
                 }
                 glutin::Event::KeyboardInput(
                     glutin::ElementState::Pressed,
                     _,
                     Some(glutin::VirtualKeyCode::Key1),
                 ) => {
-                    api.set_window_parameters(document_id,
-                        size,
-                        DeviceUintRect::new(DeviceUintPoint::zero(), size),
+                    api.set_window_parameters(
+                        document_id,
+                        framebuffer_size,
+                        DeviceUintRect::new(DeviceUintPoint::zero(), framebuffer_size),
                         1.0
                     );
                 }
                 glutin::Event::KeyboardInput(
                     glutin::ElementState::Pressed,
                     _,
                     Some(glutin::VirtualKeyCode::Key2),
                 ) => {
-                    api.set_window_parameters(document_id,
-                        size,
-                        DeviceUintRect::new(DeviceUintPoint::zero(), size),
+                    api.set_window_parameters(
+                        document_id,
+                        framebuffer_size,
+                        DeviceUintRect::new(DeviceUintPoint::zero(), framebuffer_size),
                         2.0
                     );
                 }
                 glutin::Event::KeyboardInput(
                     glutin::ElementState::Pressed,
                     _,
                     Some(glutin::VirtualKeyCode::M),
                 ) => {
@@ -259,34 +264,34 @@ pub fn main_wrapper(example: &mut Exampl
                 _ => if example.on_event(event, &api, document_id) {
                     let mut builder = DisplayListBuilder::new(pipeline_id, layout_size);
                     let mut resources = ResourceUpdates::new();
 
                     example.render(
                         &api,
                         &mut builder,
                         &mut resources,
-                        layout_size,
+                        framebuffer_size,
                         pipeline_id,
                         document_id,
                     );
                     api.set_display_list(
                         document_id,
                         epoch,
-                        Some(root_background_color),
-                        LayoutSize::new(width as f32, height as f32),
+                        None,
+                        layout_size,
                         builder.finalize(),
                         true,
                         resources,
                     );
                     api.generate_frame(document_id, None);
                 },
             }
         }
 
         renderer.update();
-        renderer.render(DeviceUintSize::new(width, height)).unwrap();
+        renderer.render(framebuffer_size).unwrap();
         example.draw_custom(&*gl);
         window.swap_buffers().ok();
     }
 
     renderer.deinit();
 }
new file mode 100644
--- /dev/null
+++ b/gfx/webrender/examples/document.rs
@@ -0,0 +1,150 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+extern crate euclid;
+extern crate gleam;
+extern crate glutin;
+extern crate webrender;
+
+#[path = "common/boilerplate.rs"]
+mod boilerplate;
+
+use boilerplate::Example;
+use euclid::ScaleFactor;
+use webrender::api::*;
+
+// This example creates multiple documents overlapping each other with
+// specified layer indices.
+
+struct Document {
+    id: DocumentId,
+    pipeline_id: PipelineId,
+    content_rect: LayoutRect,
+    color: ColorF,
+}
+
+struct App {
+    documents: Vec<Document>,
+}
+
+impl App {
+    fn init(
+        &mut self,
+        api: &RenderApi,
+        framebuffer_size: DeviceUintSize,
+        device_pixel_ratio: f32,
+    ) {
+        let init_data = vec![
+            (
+                PipelineId(1, 0),
+                -1,
+                ColorF::new(0.0, 1.0, 0.0, 1.0),
+                DeviceUintPoint::new(0, 0),
+            ),
+            (
+                PipelineId(2, 0),
+                -2,
+                ColorF::new(1.0, 1.0, 0.0, 1.0),
+                DeviceUintPoint::new(200, 0),
+            ),
+            (
+                PipelineId(3, 0),
+                -3,
+                ColorF::new(1.0, 0.0, 0.0, 1.0),
+                DeviceUintPoint::new(200, 200),
+            ),
+            (
+                PipelineId(4, 0),
+                -4,
+                ColorF::new(1.0, 0.0, 1.0, 1.0),
+                DeviceUintPoint::new(0, 200),
+            ),
+        ];
+
+        for (pipeline_id, layer, color, offset) in init_data {
+            let size = DeviceUintSize::new(250, 250);
+            let bounds = DeviceUintRect::new(offset, size);
+
+            let document_id = api.add_document(size, layer);
+            api.set_window_parameters(document_id,
+                framebuffer_size,
+                bounds,
+                1.0
+            );
+            api.set_root_pipeline(document_id, pipeline_id);
+
+            self.documents.push(Document {
+                id: document_id,
+                pipeline_id,
+                content_rect: bounds.to_f32() / ScaleFactor::new(device_pixel_ratio),
+                color,
+            });
+        }
+    }
+}
+
+impl Example for App {
+    fn render(
+        &mut self,
+        api: &RenderApi,
+        base_builder: &mut DisplayListBuilder,
+        _: &mut ResourceUpdates,
+        framebuffer_size: DeviceUintSize,
+        _: PipelineId,
+        _: DocumentId,
+    ) {
+        if self.documents.is_empty() {
+            let device_pixel_ratio = framebuffer_size.width as f32 /
+                base_builder.content_size().width;
+            // this is the first run, hack around the boilerplate,
+            // which assumes an example only needs one document
+            self.init(api, framebuffer_size, device_pixel_ratio);
+        }
+
+        for doc in &self.documents {
+            let mut builder = DisplayListBuilder::new(
+                doc.pipeline_id,
+                doc.content_rect.size,
+            );
+            let local_rect = LayoutRect::new(
+                LayoutPoint::zero(),
+                doc.content_rect.size,
+            );
+
+            builder.push_stacking_context(
+                &LayoutPrimitiveInfo::new(doc.content_rect),
+                ScrollPolicy::Fixed,
+                None,
+                TransformStyle::Flat,
+                None,
+                MixBlendMode::Normal,
+                Vec::new(),
+            );
+            builder.push_rect(
+                &LayoutPrimitiveInfo::new(local_rect),
+                doc.color,
+            );
+            builder.pop_stacking_context();
+
+            api.set_display_list(
+                doc.id,
+                Epoch(0),
+                None,
+                doc.content_rect.size,
+                builder.finalize(),
+                true,
+                ResourceUpdates::new(),
+            );
+
+            api.generate_frame(doc.id, None);
+        }
+    }
+}
+
+fn main() {
+    let mut app = App {
+        documents: Vec::new(),
+    };
+    boilerplate::main_wrapper(&mut app, None);
+}
--- a/gfx/webrender/examples/frame_output.rs
+++ b/gfx/webrender/examples/frame_output.rs
@@ -1,169 +1,185 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+extern crate euclid;
 extern crate gleam;
 extern crate glutin;
 extern crate webrender;
 
 #[path = "common/boilerplate.rs"]
 mod boilerplate;
 
 use boilerplate::{Example, HandyDandyRectBuilder};
 use gleam::gl;
 use webrender::api::*;
+use euclid::ScaleFactor;
 
 // This example demonstrates using the frame output feature to copy
 // the output of a WR framebuffer to a custom texture.
 
-const VS: &str = "#version 130
-    in vec2 aPos;out vec2 vUv;
-    void main() { vUv = aPos; gl_Position = vec4(aPos, 0.0, 1.0); }
-";
-const FS: &str = "#version 130
-    out vec4 oFragColor;
-    in vec2 vUv;
-    uniform sampler2D s;
-    void main() { oFragColor = texture(s, vUv); }
-";
+#[derive(Debug)]
+struct Document {
+    id: DocumentId,
+    pipeline_id: PipelineId,
+    content_rect: LayoutRect,
+    color: ColorF,
+}
+
 
 struct App {
-    iframe_pipeline_id: Option<PipelineId>,
-    texture_id: gl::GLuint,
+    external_image_key: Option<ImageKey>,
+    output_document: Option<Document>
 }
 
 struct OutputHandler {
-    texture_id: gl::GLuint,
+    texture_id: gl::GLuint
 }
 
-impl OutputHandler {
-    fn new(texture_id: gl::GLuint) -> OutputHandler {
-        OutputHandler { texture_id }
-    }
+struct ExternalHandler {
+    texture_id: gl::GLuint
 }
 
 impl webrender::OutputImageHandler for OutputHandler {
     fn lock(&mut self, _id: PipelineId) -> Option<(u32, DeviceIntSize)> {
-        Some((self.texture_id, DeviceIntSize::new(100, 100)))
+        Some((self.texture_id, DeviceIntSize::new(500, 500)))
     }
 
     fn unlock(&mut self, _id: PipelineId) {}
 }
 
-impl Example for App {
-    fn render(
+impl webrender::ExternalImageHandler for ExternalHandler {
+    fn lock(&mut self, _key: ExternalImageId, _channel_index: u8) -> webrender::ExternalImage {
+        webrender::ExternalImage {
+            u0: 0.0,
+            v0: 0.0,
+            u1: 1.0,
+            v1: 1.0,
+            source: webrender::ExternalImageSource::NativeTexture(self.texture_id),
+        }
+    }
+    fn unlock(&mut self, _key: ExternalImageId, _channel_index: u8) {}
+}
+
+impl App {
+    fn init_output_document(
         &mut self,
         api: &RenderApi,
-        builder: &mut DisplayListBuilder,
-        _resources: &mut ResourceUpdates,
-        _layout_size: LayoutSize,
-        _pipeline_id: PipelineId,
-        document_id: DocumentId,
+        framebuffer_size: DeviceUintSize,
+        device_pixel_ratio: f32,
     ) {
-        // Build the iframe display list on first render.
-        if self.iframe_pipeline_id.is_none() {
-            let epoch = Epoch(0);
-            let root_background_color = ColorF::new(0.0, 1.0, 0.0, 1.0);
-
-            let iframe_pipeline_id = PipelineId(0, 1);
-            let layout_size = LayoutSize::new(100.0, 100.0);
-            let mut builder = DisplayListBuilder::new(iframe_pipeline_id, layout_size);
-            let resources = ResourceUpdates::new();
+        // Generate the external image key that will be used to render the output document to the root document.
+        self.external_image_key = Some(api.generate_image_key());
+        let mut resources = ResourceUpdates::new();
+        resources.add_image(
+            self.external_image_key.unwrap(),
+            ImageDescriptor::new(100, 100, ImageFormat::BGRA8, true),
+            ImageData::External(ExternalImageData {
+                id: ExternalImageId(0),
+                channel_index: 0,
+                image_type: ExternalImageType::Texture2DHandle
+            }),
+            None,
+        );
 
-            let bounds = (0, 0).to(50, 50);
-            let info = LayoutPrimitiveInfo::new(bounds);
-            builder.push_stacking_context(
-                &info,
-                ScrollPolicy::Scrollable,
-                None,
-                TransformStyle::Flat,
-                None,
-                MixBlendMode::Normal,
-                Vec::new(),
-            );
+        let pipeline_id = PipelineId(1, 0);
+        let layer = 1;
+        let color = ColorF::new(1., 1., 0., 1.);
+        let bounds = DeviceUintRect::new(DeviceUintPoint::zero(), framebuffer_size);
+        let document_id = api.add_document(framebuffer_size, layer);
 
-            builder.push_rect(&info, ColorF::new(1.0, 1.0, 0.0, 1.0));
-            builder.pop_stacking_context();
+        api.set_root_pipeline(document_id, pipeline_id);
 
-            api.set_display_list(
-                document_id,
-                epoch,
-                Some(root_background_color),
-                layout_size,
-                builder.finalize(),
-                true,
-                resources,
-            );
+        let document = Document {
+            id: document_id,
+            pipeline_id,
+            content_rect: bounds.to_f32() / ScaleFactor::new(device_pixel_ratio),
+            color,
+        };
 
-            self.iframe_pipeline_id = Some(iframe_pipeline_id);
-            api.enable_frame_output(document_id, iframe_pipeline_id, true);
-        }
+        let info = LayoutPrimitiveInfo::new(document.content_rect);
+        let mut builder = DisplayListBuilder::new(
+            document.pipeline_id,
+            document.content_rect.size,
+        );
 
-        let bounds = (100, 100).to(200, 200);
-        let info = LayoutPrimitiveInfo::new(bounds);
         builder.push_stacking_context(
             &info,
             ScrollPolicy::Scrollable,
             None,
             TransformStyle::Flat,
             None,
             MixBlendMode::Normal,
             Vec::new(),
         );
 
-        builder.push_iframe(&info, self.iframe_pipeline_id.unwrap());
+        builder.push_rect(&info, ColorF::new(1.0, 1.0, 0.0, 1.0));
+        builder.pop_stacking_context();
+
+        api.enable_frame_output(document.id, document.pipeline_id, true);
+        api.set_display_list(
+            document.id,
+            Epoch(0),
+            Some(document.color),
+            document.content_rect.size,
+            builder.finalize(),
+            true,
+            resources,
+        );
+        
+        api.generate_frame(document.id, None);
+        self.output_document = Some(document);
+    }
+}
+
+impl Example for App {
+    fn render(
+        &mut self,
+        api: &RenderApi,
+        builder: &mut DisplayListBuilder,
+        _resources: &mut ResourceUpdates,
+        framebuffer_size: DeviceUintSize,
+        _pipeline_id: PipelineId,
+        _document_id: DocumentId,
+    ) {
+        if self.output_document.is_none(){
+            let device_pixel_ratio = framebuffer_size.width as f32 /
+                builder.content_size().width;
+            self.init_output_document(api, DeviceUintSize::new(200, 200), device_pixel_ratio);
+        }
+
+        let info = LayoutPrimitiveInfo::new((100, 100).to(200, 200));
+        builder.push_stacking_context(
+            &info,
+            ScrollPolicy::Scrollable,
+            None,
+            TransformStyle::Flat,
+            None,
+            MixBlendMode::Normal,
+            Vec::new(),
+        );
+
+        builder.push_image(
+            &info,
+            info.rect.size,
+            LayoutSize::zero(),
+            ImageRendering::Auto,
+            self.external_image_key.unwrap()
+        );
 
         builder.pop_stacking_context();
     }
 
-    fn draw_custom(&self, gl: &gl::Gl) {
-        let vbo = gl.gen_buffers(1)[0];
-        let vao = gl.gen_vertex_arrays(1)[0];
-
-        let pid = create_program(gl);
-
-        let vertices: [f32; 12] = [0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0];
-
-        gl.active_texture(gl::TEXTURE0);
-        gl.bind_texture(gl::TEXTURE_2D, self.texture_id);
-
-        gl.use_program(pid);
-        let sampler = gl.get_uniform_location(pid, "s");
-        debug_assert!(sampler != -1);
-        gl.uniform_1i(sampler, 0);
-
-        gl.bind_buffer(gl::ARRAY_BUFFER, vbo);
-        gl::buffer_data(gl, gl::ARRAY_BUFFER, &vertices, gl::STATIC_DRAW);
-
-        gl.bind_vertex_array(vao);
-        gl.enable_vertex_attrib_array(0);
-        gl.vertex_attrib_pointer(0, 2, gl::FLOAT, false, 8, 0);
-
-        gl.draw_arrays(gl::TRIANGLES, 0, 6);
-
-        gl.delete_vertex_arrays(&[vao]);
-        gl.delete_buffers(&[vbo]);
-        gl.delete_program(pid);
-    }
-
-    fn on_event(
-        &mut self,
-        _event: glutin::Event,
-        _api: &RenderApi,
-        _document_id: DocumentId,
-    ) -> bool {
-        false
-    }
-
-    fn get_output_image_handler(
+    fn get_image_handlers(
         &mut self,
         gl: &gl::Gl,
-    ) -> Option<Box<webrender::OutputImageHandler>> {
+    ) -> (Option<Box<webrender::ExternalImageHandler>>, 
+          Option<Box<webrender::OutputImageHandler>>) {
         let texture_id = gl.gen_textures(1)[0];
 
         gl.bind_texture(gl::TEXTURE_2D, texture_id);
         gl.tex_parameter_i(
             gl::TEXTURE_2D,
             gl::TEXTURE_MAG_FILTER,
             gl::LINEAR as gl::GLint,
         );
@@ -190,55 +206,23 @@ impl Example for App {
             100,
             0,
             gl::BGRA,
             gl::UNSIGNED_BYTE,
             None,
         );
         gl.bind_texture(gl::TEXTURE_2D, 0);
 
-        self.texture_id = texture_id;
-        Some(Box::new(OutputHandler::new(texture_id)))
+        (   
+            Some(Box::new(ExternalHandler { texture_id })), 
+            Some(Box::new(OutputHandler { texture_id }))
+        )
     }
 }
 
 fn main() {
     let mut app = App {
-        iframe_pipeline_id: None,
-        texture_id: 0,
+        external_image_key: None,
+        output_document: None
     };
+
     boilerplate::main_wrapper(&mut app, None);
 }
-
-pub fn compile_shader(gl: &gl::Gl, shader_type: gl::GLenum, source: &str) -> gl::GLuint {
-    let id = gl.create_shader(shader_type);
-    gl.shader_source(id, &[source.as_bytes()]);
-    gl.compile_shader(id);
-    let log = gl.get_shader_info_log(id);
-    if gl.get_shader_iv(id, gl::COMPILE_STATUS) == (0 as gl::GLint) {
-        panic!("{:?} {}", source, log);
-    }
-    id
-}
-
-pub fn create_program(gl: &gl::Gl) -> gl::GLuint {
-    let vs_id = compile_shader(gl, gl::VERTEX_SHADER, VS);
-    let fs_id = compile_shader(gl, gl::FRAGMENT_SHADER, FS);
-
-    let pid = gl.create_program();
-    gl.attach_shader(pid, vs_id);
-    gl.attach_shader(pid, fs_id);
-
-    gl.bind_attrib_location(pid, 0, "aPos");
-    gl.link_program(pid);
-
-    gl.detach_shader(pid, vs_id);
-    gl.detach_shader(pid, fs_id);
-    gl.delete_shader(vs_id);
-    gl.delete_shader(fs_id);
-
-    if gl.get_program_iv(pid, gl::LINK_STATUS) == (0 as gl::GLint) {
-        let error_log = gl.get_program_info_log(pid);
-        panic!("{}", error_log);
-    }
-
-    pid
-}
--- a/gfx/webrender/examples/iframe.rs
+++ b/gfx/webrender/examples/iframe.rs
@@ -19,17 +19,17 @@ use webrender::api::*;
 struct App {}
 
 impl Example for App {
     fn render(
         &mut self,
         api: &RenderApi,
         builder: &mut DisplayListBuilder,
         _resources: &mut ResourceUpdates,
-        _layout_size: LayoutSize,
+        _framebuffer_size: DeviceUintSize,
         pipeline_id: PipelineId,
         document_id: DocumentId,
     ) {
         // All the sub_* things are for the nested pipeline
         let sub_size = DeviceUintSize::new(100, 100);
         let sub_bounds = (0, 0).to(sub_size.width as i32, sub_size.height as i32);
 
         let sub_pipeline_id = PipelineId(pipeline_id.0, 42);
@@ -70,23 +70,14 @@ impl Example for App {
             MixBlendMode::Normal,
             Vec::new(),
         );
         // red rect under the iframe: if this is visible, things have gone wrong
         builder.push_rect(&info, ColorF::new(1.0, 0.0, 0.0, 1.0));
         builder.push_iframe(&info, sub_pipeline_id);
         builder.pop_stacking_context();
     }
-
-    fn on_event(
-        &mut self,
-        _event: glutin::Event,
-        _api: &RenderApi,
-        _document_id: DocumentId,
-    ) -> bool {
-        false
-    }
 }
 
 fn main() {
     let mut app = App {};
     boilerplate::main_wrapper(&mut app, None);
 }
--- a/gfx/webrender/examples/image_resize.rs
+++ b/gfx/webrender/examples/image_resize.rs
@@ -19,17 +19,17 @@ struct App {
 }
 
 impl Example for App {
     fn render(
         &mut self,
         _api: &RenderApi,
         builder: &mut DisplayListBuilder,
         resources: &mut ResourceUpdates,
-        _layout_size: LayoutSize,
+        _framebuffer_size: DeviceUintSize,
         _pipeline_id: PipelineId,
         _document_id: DocumentId,
     ) {
         let (image_descriptor, image_data) = image_helper::make_checkerboard(32, 32);
         resources.add_image(
             self.image_key,
             image_descriptor,
             image_data,
--- a/gfx/webrender/examples/scrolling.rs
+++ b/gfx/webrender/examples/scrolling.rs
@@ -19,21 +19,23 @@ struct App {
 }
 
 impl Example for App {
     fn render(
         &mut self,
         _api: &RenderApi,
         builder: &mut DisplayListBuilder,
         _resources: &mut ResourceUpdates,
-        layout_size: LayoutSize,
+        _framebuffer_size: DeviceUintSize,
         _pipeline_id: PipelineId,
         _document_id: DocumentId,
     ) {
-        let info = LayoutPrimitiveInfo::new(LayoutRect::new(LayoutPoint::zero(), layout_size));
+        let info = LayoutPrimitiveInfo::new(
+            LayoutRect::new(LayoutPoint::zero(), builder.content_size())
+        );
         builder.push_stacking_context(
             &info,
             ScrollPolicy::Scrollable,
             None,
             TransformStyle::Flat,
             None,
             MixBlendMode::Normal,
             Vec::new(),
--- a/gfx/webrender/examples/texture_cache_stress.rs
+++ b/gfx/webrender/examples/texture_cache_stress.rs
@@ -5,16 +5,17 @@
 extern crate gleam;
 extern crate glutin;
 extern crate webrender;
 
 #[path = "common/boilerplate.rs"]
 mod boilerplate;
 
 use boilerplate::{Example, HandyDandyRectBuilder};
+use gleam::gl;
 use std::mem;
 use webrender::api::*;
 
 struct ImageGenerator {
     patterns: [[u8; 3]; 6],
     next_pattern: usize,
     current_image: Vec<u8>,
 }
@@ -81,17 +82,17 @@ struct App {
 }
 
 impl Example for App {
     fn render(
         &mut self,
         api: &RenderApi,
         builder: &mut DisplayListBuilder,
         resources: &mut ResourceUpdates,
-        _layout_size: LayoutSize,
+        _framebuffer_size: DeviceUintSize,
         _pipeline_id: PipelineId,
         _document_id: DocumentId,
     ) {
         let bounds = (0, 0).to(512, 512);
         let info = LayoutPrimitiveInfo::new(bounds);
         builder.push_stacking_context(
             &info,
             ScrollPolicy::Scrollable,
@@ -278,18 +279,22 @@ impl Example for App {
                 return true;
             }
             _ => {}
         }
 
         false
     }
 
-    fn get_external_image_handler(&self) -> Option<Box<webrender::ExternalImageHandler>> {
-        Some(Box::new(ImageGenerator::new()))
+    fn get_image_handlers(
+        &mut self,
+        _gl: &gl::Gl,
+    ) -> (Option<Box<webrender::ExternalImageHandler>>, 
+          Option<Box<webrender::OutputImageHandler>>) {
+        (Some(Box::new(ImageGenerator::new())), None)
     }
 }
 
 fn main() {
     let mut app = App {
         image_key: None,
         stress_keys: Vec::new(),
         image_generator: ImageGenerator::new(),
--- a/gfx/webrender/examples/yuv.rs
+++ b/gfx/webrender/examples/yuv.rs
@@ -165,21 +165,21 @@ struct App {
 }
 
 impl Example for App {
     fn render(
         &mut self,
         api: &RenderApi,
         builder: &mut DisplayListBuilder,
         resources: &mut ResourceUpdates,
-        layout_size: LayoutSize,
+        _framebuffer_size: DeviceUintSize,
         _pipeline_id: PipelineId,
         _document_id: DocumentId,
     ) {
-        let bounds = LayoutRect::new(LayoutPoint::zero(), layout_size);
+        let bounds = LayoutRect::new(LayoutPoint::zero(), builder.content_size());
         let info = LayoutPrimitiveInfo::new(bounds);
         builder.push_stacking_context(
             &info,
             ScrollPolicy::Scrollable,
             None,
             TransformStyle::Flat,
             None,
             MixBlendMode::Normal,
--- a/gfx/webrender/res/cs_clip_image.glsl
+++ b/gfx/webrender/res/cs_clip_image.glsl
@@ -43,18 +43,17 @@ void main(void) {
     // applying a half-texel offset to the UV boundaries to prevent linear samples from the outside
     vec4 inner_rect = vec4(res.uv_rect.xy, res.uv_rect.zw);
     vClipMaskUvInnerRect = (inner_rect + vec4(0.5, 0.5, -0.5, -0.5)) / texture_size.xyxy;
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 void main(void) {
-    float alpha = 1.f;
-    vec2 local_pos = init_transform_fs(vPos, alpha);
+    float alpha = init_transform_fs(vPos.xy / vPos.z);
 
     bool repeat_mask = false; //TODO
     vec2 clamped_mask_uv = repeat_mask ? fract(vClipMaskUv.xy) :
         clamp(vClipMaskUv.xy, vec2(0.0, 0.0), vec2(1.0, 1.0));
     vec2 source_uv = clamp(clamped_mask_uv * vClipMaskUvRect.zw + vClipMaskUvRect.xy,
         vClipMaskUvInnerRect.xy, vClipMaskUvInnerRect.zw);
     float clip_alpha = texture(sColor0, vec3(source_uv, vLayer)).r; //careful: texture has type A8
 
--- a/gfx/webrender/res/cs_clip_rectangle.glsl
+++ b/gfx/webrender/res/cs_clip_rectangle.glsl
@@ -88,18 +88,18 @@ void main(void) {
     vClipCenter_Radius_BL = vec4(clip_rect.p0.x + r_bl.x,
                                  clip_rect.p1.y - r_bl.y,
                                  r_bl);
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 void main(void) {
-    float alpha = 1.f;
-    vec2 local_pos = init_transform_fs(vPos, alpha);
+    vec2 local_pos = vPos.xy / vPos.z;
+    float alpha = init_transform_fs(local_pos);
 
     float aa_range = compute_aa_range(local_pos);
 
     float clip_alpha = rounded_rect(local_pos,
                                     vClipCenter_Radius_TL,
                                     vClipCenter_Radius_TR,
                                     vClipCenter_Radius_BR,
                                     vClipCenter_Radius_BL,
--- a/gfx/webrender/res/cs_text_run.glsl
+++ b/gfx/webrender/res/cs_text_run.glsl
@@ -20,37 +20,31 @@ void main(void) {
     int resource_address = prim.user_data1;
 
     Glyph glyph = fetch_glyph(prim.specific_prim_address,
                               glyph_index,
                               text.subpx_dir);
 
     GlyphResource res = fetch_glyph_resource(resource_address);
 
-    // Glyphs size is already in device-pixels.
+    // Glyph size is already in device-pixels.
     // The render task origin is in device-pixels. Offset that by
     // the glyph offset, relative to its primitive bounding rect.
-    vec2 size = (res.uv_rect.zw - res.uv_rect.xy) * res.scale;
-    vec2 local_pos = glyph.offset + vec2(res.offset.x, -res.offset.y) / uDevicePixelRatio;
-    vec2 origin = prim.task.common_data.task_rect.p0 +
-                  uDevicePixelRatio * (local_pos - prim.task.content_origin);
-    vec4 local_rect = vec4(origin, size);
+    vec2 glyph_size = res.uv_rect.zw - res.uv_rect.xy;
+    vec2 glyph_pos = res.offset + glyph_size * aPosition.xy;
+    vec2 local_pos = prim.task.common_data.task_rect.p0 + glyph_pos * res.scale +
+                     uDevicePixelRatio * (glyph.offset - prim.task.content_origin);
+    gl_Position = uTransform * vec4(local_pos, 0.0, 1.0);
 
     vec2 texture_size = vec2(textureSize(sColor0, 0));
     vec2 st0 = res.uv_rect.xy / texture_size;
     vec2 st1 = res.uv_rect.zw / texture_size;
 
-    vec2 pos = mix(local_rect.xy,
-                   local_rect.xy + local_rect.zw,
-                   aPosition.xy);
-
     vUv = vec3(mix(st0, st1, aPosition.xy), res.layer);
     vColor = prim.task.color;
-
-    gl_Position = uTransform * vec4(pos, 0.0, 1.0);
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 void main(void) {
     float a = texture(sColor0, vUv).a;
     oFragColor = vColor * a;
 }
--- a/gfx/webrender/res/prim_shared.glsl
+++ b/gfx/webrender/res/prim_shared.glsl
@@ -547,17 +547,16 @@ vec4 get_layer_pos(vec2 pos, Layer layer
     // get the normal to the layer plane
     vec3 n = transpose(mat3(layer.inv_transform)) * vec3(0.0, 0.0, 1.0);
     return untransform(pos, n, a, layer.inv_transform);
 }
 
 // Compute a snapping offset in world space (adjusted to pixel ratio),
 // given local position on the layer and a snap rectangle.
 vec2 compute_snap_offset(vec2 local_pos,
-                         RectWithSize local_clip_rect,
                          Layer layer,
                          RectWithSize snap_rect) {
     // Ensure that the snap rect is at *least* one device pixel in size.
     // TODO(gw): It's not clear to me that this is "correct". Specifically,
     //           how should it interact with sub-pixel snap rects when there
     //           is a layer transform with scale present? But it does fix
     //           the test cases we have in Servo that are failing without it
     //           and seem better than not having this at all.
@@ -592,19 +591,19 @@ VertexInfo write_vertex(RectWithSize ins
 
     // Select the corner of the local rect that we are processing.
     vec2 local_pos = instance_rect.p0 + instance_rect.size * aPosition.xy;
 
     // Clamp to the two local clip rects.
     vec2 clamped_local_pos = clamp_rect(clamp_rect(local_pos, local_clip_rect), layer.local_clip_rect);
 
     /// Compute the snapping offset.
-    vec2 snap_offset = compute_snap_offset(clamped_local_pos, local_clip_rect, layer, snap_rect);
+    vec2 snap_offset = compute_snap_offset(clamped_local_pos, layer, snap_rect);
 
-    // Transform the current vertex to the world cpace.
+    // Transform the current vertex to world space.
     vec4 world_pos = layer.transform * vec4(clamped_local_pos, 0.0, 1.0);
 
     // Convert the world positions to device pixel space.
     vec2 device_pos = world_pos.xy / world_pos.w * uDevicePixelRatio;
 
     // Apply offsets for the render task to get correct screen location.
     vec2 final_pos = device_pos + snap_offset -
                      task.content_origin +
@@ -613,21 +612,16 @@ VertexInfo write_vertex(RectWithSize ins
     gl_Position = uTransform * vec4(final_pos, z, 1.0);
 
     VertexInfo vi = VertexInfo(clamped_local_pos, device_pos);
     return vi;
 }
 
 #ifdef WR_FEATURE_TRANSFORM
 
-struct TransformVertexInfo {
-    vec3 local_pos;
-    vec2 screen_pos;
-};
-
 float cross2(vec2 v0, vec2 v1) {
     return v0.x * v1.y - v0.y * v1.x;
 }
 
 // Return intersection of line (p0,p1) and line (p2,p3)
 vec2 intersect_lines(vec2 p0, vec2 p1, vec2 p2, vec2 p3) {
     vec2 d0 = p0 - p1;
     vec2 d1 = p2 - p3;
@@ -637,111 +631,77 @@ vec2 intersect_lines(vec2 p0, vec2 p1, v
 
     float d = cross2(d0, d1);
     float nx = s0 * d1.x - d0.x * s1;
     float ny = s0 * d1.y - d0.y * s1;
 
     return vec2(nx / d, ny / d);
 }
 
-TransformVertexInfo write_transform_vertex(RectWithSize instance_rect,
-                                           RectWithSize local_clip_rect,
-                                           vec4 clip_edge_mask,
-                                           float z,
-                                           Layer layer,
-                                           PictureTask task) {
+VertexInfo write_transform_vertex(RectWithSize instance_rect,
+                                  RectWithSize local_clip_rect,
+                                  vec4 clip_edge_mask,
+                                  float z,
+                                  Layer layer,
+                                  PictureTask task) {
+    // Calculate a clip rect from local clip + layer clip.
+    RectWithEndpoint clip_rect = to_rect_with_endpoint(local_clip_rect);
+    clip_rect.p0 = clamp_rect(clip_rect.p0, layer.local_clip_rect);
+    clip_rect.p1 = clamp_rect(clip_rect.p1, layer.local_clip_rect);
+
+    // Calculate a clip rect from local_rect + local clip + layer clip.
     RectWithEndpoint local_rect = to_rect_with_endpoint(instance_rect);
-    RectWithSize clip_rect;
-    clip_rect.p0 = clamp_rect(local_clip_rect.p0, layer.local_clip_rect);
-    clip_rect.size = clamp_rect(local_clip_rect.p0 + local_clip_rect.size, layer.local_clip_rect) - clip_rect.p0;
-
-    vec2 current_local_pos, prev_local_pos, next_local_pos;
-
-    // Clamp to the two local clip rects.
-    local_rect.p0 = clamp_rect(local_rect.p0, clip_rect);
-    local_rect.p1 = clamp_rect(local_rect.p1, clip_rect);
+    local_rect.p0 = clamp(local_rect.p0, clip_rect.p0, clip_rect.p1);
+    local_rect.p1 = clamp(local_rect.p1, clip_rect.p0, clip_rect.p1);
 
-    // Select the current vertex and the previous/next vertices,
-    // based on the vertex ID that is known based on the instance rect.
-    switch (gl_VertexID) {
-        case 0:
-            current_local_pos = vec2(local_rect.p0.x, local_rect.p0.y);
-            next_local_pos = vec2(local_rect.p0.x, local_rect.p1.y);
-            prev_local_pos = vec2(local_rect.p1.x, local_rect.p0.y);
-            break;
-        case 1:
-            current_local_pos = vec2(local_rect.p1.x, local_rect.p0.y);
-            next_local_pos = vec2(local_rect.p0.x, local_rect.p0.y);
-            prev_local_pos = vec2(local_rect.p1.x, local_rect.p1.y);
-            break;
-        case 2:
-            current_local_pos = vec2(local_rect.p0.x, local_rect.p1.y);
-            prev_local_pos = vec2(local_rect.p0.x, local_rect.p0.y);
-            next_local_pos = vec2(local_rect.p1.x, local_rect.p1.y);
-            break;
-        case 3:
-            current_local_pos = vec2(local_rect.p1.x, local_rect.p1.y);
-            prev_local_pos = vec2(local_rect.p0.x, local_rect.p1.y);
-            next_local_pos = vec2(local_rect.p1.x, local_rect.p0.y);
-            break;
-    }
+    // As this is a transform shader, extrude by 2 (local space) pixels
+    // in each direction. This gives enough space around the edge to
+    // apply distance anti-aliasing. Technically, it:
+    // (a) slightly over-estimates the number of required pixels in the simple case.
+    // (b) might not provide enough edge in edge case perspective projections.
+    // However, it's fast and simple. If / when we ever run into issues, we
+    // can do some math on the projection matrix to work out a variable
+    // amount to extrude.
+    float extrude_distance = 2.0;
+    instance_rect.p0 -= vec2(extrude_distance);
+    instance_rect.size += vec2(2.0 * extrude_distance);
 
-    // Transform them to world space
-    vec4 current_world_pos = layer.transform * vec4(current_local_pos, 0.0, 1.0);
-    vec4 prev_world_pos = layer.transform * vec4(prev_local_pos, 0.0, 1.0);
-    vec4 next_world_pos = layer.transform * vec4(next_local_pos, 0.0, 1.0);
+    // Select the corner of the local rect that we are processing.
+    vec2 local_pos = instance_rect.p0 + instance_rect.size * aPosition.xy;
 
-    // Convert to device space
-    vec2 current_device_pos = uDevicePixelRatio * current_world_pos.xy / current_world_pos.w;
-    vec2 prev_device_pos = uDevicePixelRatio * prev_world_pos.xy / prev_world_pos.w;
-    vec2 next_device_pos = uDevicePixelRatio * next_world_pos.xy / next_world_pos.w;
+    // Transform the current vertex to the world cpace.
+    vec4 world_pos = layer.transform * vec4(local_pos, 0.0, 1.0);
 
-    // Get the normals of each of the vectors between the current and next/prev vertices.
-    const float amount = 2.0;
-    vec2 dir_prev = normalize(current_device_pos - prev_device_pos);
-    vec2 dir_next = normalize(current_device_pos - next_device_pos);
-    vec2 norm_prev = vec2(-dir_prev.y,  dir_prev.x);
-    vec2 norm_next = vec2( dir_next.y, -dir_next.x);
+    // Convert the world positions to device pixel space.
+    vec2 device_pos = world_pos.xy / world_pos.w * uDevicePixelRatio;
 
-    // Push those lines out along the normal by a specific amount of device pixels.
-    vec2 adjusted_prev_p0 = current_device_pos + norm_prev * amount;
-    vec2 adjusted_prev_p1 = prev_device_pos + norm_prev * amount;
-    vec2 adjusted_next_p0 = current_device_pos + norm_next * amount;
-    vec2 adjusted_next_p1 = next_device_pos + norm_next * amount;
-
-    // Intersect those adjusted lines to find the actual vertex position.
-    vec2 device_pos = intersect_lines(adjusted_prev_p0,
-                                      adjusted_prev_p1,
-                                      adjusted_next_p0,
-                                      adjusted_next_p1);
-
-    vec4 layer_pos = get_layer_pos(device_pos / uDevicePixelRatio, layer);
-
-    // Apply offsets for the render task to get correct screen location.
-    vec2 final_pos = device_pos - //Note: `snap_rect` is not used
-                     task.content_origin +
-                     task.common_data.task_rect.p0;
-
-
-    gl_Position = uTransform * vec4(final_pos, z, 1.0);
+    // We want the world space coords to be perspective divided by W.
+    // We also want that to apply to any interpolators. However, we
+    // want a constant Z across the primitive, since we're using it
+    // for draw ordering - so scale by the W coord to ensure this.
+    vec4 final_pos = vec4(world_pos.xy + task.common_data.task_rect.p0 - task.content_origin,
+                          z * world_pos.w,
+                          world_pos.w);
+    gl_Position = uTransform * final_pos;
 
     vLocalBounds = mix(
-        vec4(clip_rect.p0, clip_rect.p0 + clip_rect.size),
+        vec4(clip_rect.p0, clip_rect.p1),
         vec4(local_rect.p0, local_rect.p1),
         clip_edge_mask
     );
 
-    return TransformVertexInfo(layer_pos.xyw, device_pos);
+    VertexInfo vi = VertexInfo(local_pos, device_pos);
+    return vi;
 }
 
-TransformVertexInfo write_transform_vertex_primitive(Primitive prim) {
+VertexInfo write_transform_vertex_primitive(Primitive prim) {
     return write_transform_vertex(
         prim.local_rect,
         prim.local_clip_rect,
-        vec4(0.0),
+        vec4(1.0),
         prim.z,
         prim.layer,
         prim.task
     );
 }
 
 #endif //WR_FEATURE_TRANSFORM
 
@@ -850,30 +810,29 @@ float distance_aa(float aa_range, float 
 }
 
 #ifdef WR_FEATURE_TRANSFORM
 float signed_distance_rect(vec2 pos, vec2 p0, vec2 p1) {
     vec2 d = max(p0 - pos, pos - p1);
     return length(max(vec2(0.0), d)) + min(0.0, max(d.x, d.y));
 }
 
-vec2 init_transform_fs(vec3 local_pos, out float fragment_alpha) {
-    fragment_alpha = 1.0;
-    vec2 pos = local_pos.xy / local_pos.z;
-
-    // Now get the actual signed distance.
-    float d = signed_distance_rect(pos, vLocalBounds.xy, vLocalBounds.zw);
+float init_transform_fs(vec2 local_pos) {
+    // Get signed distance from local rect bounds.
+    float d = signed_distance_rect(
+        local_pos,
+        vLocalBounds.xy,
+        vLocalBounds.zw
+    );
 
     // Find the appropriate distance to apply the AA smoothstep over.
-    float aa_range = compute_aa_range(pos.xy);
+    float aa_range = compute_aa_range(local_pos);
 
     // Only apply AA to fragments outside the signed distance field.
-    fragment_alpha = distance_aa(aa_range, d);
-
-    return pos;
+    return distance_aa(aa_range, d);
 }
 #endif //WR_FEATURE_TRANSFORM
 
 float do_clip() {
     // anything outside of the mask is considered transparent
     bvec4 inside = lessThanEqual(
         vec4(vClipMaskUvBounds.xy, vClipMaskUv.xy),
         vec4(vClipMaskUv.xy, vClipMaskUvBounds.zw));
--- a/gfx/webrender/res/ps_border_corner.glsl
+++ b/gfx/webrender/res/ps_border_corner.glsl
@@ -19,21 +19,17 @@ flat varying vec2 vClipSign;
 flat varying vec4 vEdgeDistance;
 flat varying float vSDFSelect;
 
 flat varying float vIsBorderRadiusLessThanBorderWidth;
 
 // Border style
 flat varying float vAlphaSelect;
 
-#ifdef WR_FEATURE_TRANSFORM
-varying vec3 vLocalPos;
-#else
 varying vec2 vLocalPos;
-#endif
 
 #ifdef WR_VERTEX_SHADER
 // Matches BorderCornerSide enum in border.rs
 #define SIDE_BOTH       0
 #define SIDE_FIRST      1
 #define SIDE_SECOND     2
 
 vec2 get_radii(vec2 radius, vec2 invalid) {
@@ -297,22 +293,22 @@ void main(void) {
 
     write_color(color0, color1, style, color_delta, prim.user_data1);
 
     RectWithSize segment_rect;
     segment_rect.p0 = p0;
     segment_rect.size = p1 - p0;
 
 #ifdef WR_FEATURE_TRANSFORM
-    TransformVertexInfo vi = write_transform_vertex(segment_rect,
-                                                    prim.local_clip_rect,
-                                                    vec4(1.0),
-                                                    prim.z,
-                                                    prim.layer,
-                                                    prim.task);
+    VertexInfo vi = write_transform_vertex(segment_rect,
+                                           prim.local_clip_rect,
+                                           vec4(1.0),
+                                           prim.z,
+                                           prim.layer,
+                                           prim.task);
 #else
     VertexInfo vi = write_vertex(segment_rect,
                                  prim.local_clip_rect,
                                  prim.z,
                                  prim.layer,
                                  prim.task,
                                  prim.local_rect);
 #endif
@@ -321,35 +317,32 @@ void main(void) {
     write_clip(vi.screen_pos, prim.clip_area);
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 void main(void) {
     float alpha = 1.0;
 #ifdef WR_FEATURE_TRANSFORM
-    alpha = 0.0;
-    vec2 local_pos = init_transform_fs(vLocalPos, alpha);
-#else
-    vec2 local_pos = vLocalPos;
+    alpha = init_transform_fs(vLocalPos);
 #endif
 
     alpha *= do_clip();
 
-    float aa_range = compute_aa_range(local_pos);
+    float aa_range = compute_aa_range(vLocalPos);
 
     float distance_for_color;
     float color_mix_factor;
 
     // Only apply the clip AA if inside the clip region. This is
     // necessary for correctness when the border width is greater
     // than the border radius.
     if (vIsBorderRadiusLessThanBorderWidth == 0.0 ||
-        all(lessThan(local_pos * vClipSign, vClipCenter * vClipSign))) {
-        vec2 p = local_pos - vClipCenter;
+        all(lessThan(vLocalPos * vClipSign, vClipCenter * vClipSign))) {
+        vec2 p = vLocalPos - vClipCenter;
 
         // The coordinate system is snapped to pixel boundaries. To sample the distance,
         // however, we are interested in the center of the pixels which introduces an
         // error of half a pixel towards the exterior of the curve (See issue #1750).
         // This error is corrected by offsetting the distance by half a device pixel.
         // This not entirely correct: it leaves an error that varries between
         // 0 and (sqrt(2) - 1)/2 = 0.2 pixels but it is hardly noticeable and is better
         // than the constant sqrt(2)/2 px error without the correction.
@@ -377,18 +370,18 @@ void main(void) {
         // Get the groove/ridge mix factor.
         color_mix_factor = distance_aa(aa_range, d2);
     } else {
         // Handle the case where the fragment is outside the clip
         // region in a corner. This occurs when border width is
         // greater than border radius.
 
         // Get linear distances along horizontal and vertical edges.
-        vec2 d0 = vClipSign.xx * (local_pos.xx - vEdgeDistance.xz);
-        vec2 d1 = vClipSign.yy * (local_pos.yy - vEdgeDistance.yw);
+        vec2 d0 = vClipSign.xx * (vLocalPos.xx - vEdgeDistance.xz);
+        vec2 d1 = vClipSign.yy * (vLocalPos.yy - vEdgeDistance.yw);
         // Apply union to get the outer edge signed distance.
         float da = min(d0.x, d1.x);
         // Apply intersection to get the inner edge signed distance.
         float db = max(-d0.y, -d1.y);
         // Apply union to get both edges.
         float d = min(da, db);
         // Select fragment on/off based on signed distance.
         // No AA here, since we know we're on a straight edge
@@ -401,15 +394,15 @@ void main(void) {
     }
 
     // Mix inner/outer color.
     vec4 color0 = mix(vColor00, vColor01, color_mix_factor);
     vec4 color1 = mix(vColor10, vColor11, color_mix_factor);
 
     // Select color based on side of line. Get distance from the
     // reference line, and then apply AA along the edge.
-    float ld = distance_to_line(vColorEdgeLine.xy, vColorEdgeLine.zw, local_pos);
+    float ld = distance_to_line(vColorEdgeLine.xy, vColorEdgeLine.zw, vLocalPos);
     float m = distance_aa(aa_range, -ld);
     vec4 color = mix(color0, color1, m);
 
     oFragColor = color * alpha;
 }
 #endif
--- a/gfx/webrender/res/ps_border_edge.glsl
+++ b/gfx/webrender/res/ps_border_edge.glsl
@@ -7,21 +7,17 @@
 flat varying vec4 vColor0;
 flat varying vec4 vColor1;
 flat varying vec2 vEdgeDistance;
 flat varying float vAxisSelect;
 flat varying float vAlphaSelect;
 flat varying vec4 vClipParams;
 flat varying float vClipSelect;
 
-#ifdef WR_FEATURE_TRANSFORM
-varying vec3 vLocalPos;
-#else
 varying vec2 vLocalPos;
-#endif
 
 #ifdef WR_VERTEX_SHADER
 void write_edge_distance(float p0,
                          float original_width,
                          float adjusted_width,
                          float style,
                          float axis_select,
                          float sign_adjust) {
@@ -215,22 +211,22 @@ void main(void) {
         }
     }
 
     write_alpha_select(style);
     write_color0(color, style, color_flip);
     write_color1(color, style, color_flip);
 
 #ifdef WR_FEATURE_TRANSFORM
-    TransformVertexInfo vi = write_transform_vertex(segment_rect,
-                                                    prim.local_clip_rect,
-                                                    vec4(1.0),
-                                                    prim.z,
-                                                    prim.layer,
-                                                    prim.task);
+    VertexInfo vi = write_transform_vertex(segment_rect,
+                                           prim.local_clip_rect,
+                                           vec4(1.0),
+                                           prim.z,
+                                           prim.layer,
+                                           prim.task);
 #else
     VertexInfo vi = write_vertex(segment_rect,
                                  prim.local_clip_rect,
                                  prim.z,
                                  prim.layer,
                                  prim.task,
                                  prim.local_rect);
 #endif
@@ -239,34 +235,31 @@ void main(void) {
     write_clip(vi.screen_pos, prim.clip_area);
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 void main(void) {
     float alpha = 1.0;
 #ifdef WR_FEATURE_TRANSFORM
-    alpha = 0.0;
-    vec2 local_pos = init_transform_fs(vLocalPos, alpha);
-#else
-    vec2 local_pos = vLocalPos;
+    alpha = init_transform_fs(vLocalPos);
 #endif
 
     alpha *= do_clip();
 
     // Find the appropriate distance to apply the step over.
-    float aa_range = compute_aa_range(local_pos);
+    float aa_range = compute_aa_range(vLocalPos);
 
     // Applies the math necessary to draw a style: double
     // border. In the case of a solid border, the vertex
     // shader sets interpolator values that make this have
     // no effect.
 
     // Select the x/y coord, depending on which axis this edge is.
-    vec2 pos = mix(local_pos.xy, local_pos.yx, vAxisSelect);
+    vec2 pos = mix(vLocalPos.xy, vLocalPos.yx, vAxisSelect);
 
     // Get signed distance from each of the inner edges.
     float d0 = pos.x - vEdgeDistance.x;
     float d1 = vEdgeDistance.y - pos.x;
 
     // SDF union to select both outer edges.
     float d = min(d0, d1);
 
--- a/gfx/webrender/res/ps_gradient.glsl
+++ b/gfx/webrender/res/ps_gradient.glsl
@@ -1,21 +1,17 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include shared,prim_shared
 
 varying vec4 vColor;
 
-#ifdef WR_FEATURE_TRANSFORM
-varying vec3 vLocalPos;
-#else
-varying vec2 vPos;
-#endif
+varying vec2 vLocalPos;
 
 #ifdef WR_VERTEX_SHADER
 void main(void) {
     Primitive prim = load_primitive();
     Gradient gradient = fetch_gradient(prim.specific_prim_address);
 
     vec4 abs_start_end_point = gradient.start_end_point + prim.local_rect.p0.xyxy;
 
@@ -66,48 +62,46 @@ void main(void) {
 
         // Adjust the stop colors by how much they were clamped
         vec2 adjusted_offset = (g01_y_clamped - g01_y.xx) / (g01_y.y - g01_y.x);
         adjusted_color_g0 = mix(g0.color, g1.color, adjusted_offset.x);
         adjusted_color_g1 = mix(g0.color, g1.color, adjusted_offset.y);
     }
 
 #ifdef WR_FEATURE_TRANSFORM
-    TransformVertexInfo vi = write_transform_vertex(segment_rect,
-                                                    prim.local_clip_rect,
-                                                    vec4(1.0),
-                                                    prim.z,
-                                                    prim.layer,
-                                                    prim.task);
+    VertexInfo vi = write_transform_vertex(segment_rect,
+                                           prim.local_clip_rect,
+                                           vec4(1.0),
+                                           prim.z,
+                                           prim.layer,
+                                           prim.task);
     vLocalPos = vi.local_pos;
     vec2 f = (vi.local_pos.xy - prim.local_rect.p0) / prim.local_rect.size;
 #else
     VertexInfo vi = write_vertex(segment_rect,
                                  prim.local_clip_rect,
                                  prim.z,
                                  prim.layer,
                                  prim.task,
                                  prim.local_rect);
 
     vec2 f = (vi.local_pos - segment_rect.p0) / segment_rect.size;
-    vPos = vi.local_pos;
+    vLocalPos = vi.local_pos;
 #endif
 
     write_clip(vi.screen_pos, prim.clip_area);
 
     vColor = mix(adjusted_color_g0, adjusted_color_g1, dot(f, axis));
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 void main(void) {
 #ifdef WR_FEATURE_TRANSFORM
-    float alpha = 0.0;
-    vec2 local_pos = init_transform_fs(vLocalPos, alpha);
+    float alpha = init_transform_fs(vLocalPos);
 #else
     float alpha = 1.0;
-    vec2 local_pos = vPos;
 #endif
 
     alpha *= do_clip();
     oFragColor = dither(vColor * alpha);
 }
 #endif
--- a/gfx/webrender/res/ps_image.glsl
+++ b/gfx/webrender/res/ps_image.glsl
@@ -9,31 +9,30 @@
 // check GL_TEXTURE_RECTANGLE.
 flat varying vec2 vTextureOffset; // Offset of this image into the texture atlas.
 flat varying vec2 vTextureSize;   // Size of the image in the texture atlas.
 flat varying vec2 vTileSpacing;   // Amount of space between tiled instances of this image.
 flat varying vec4 vStRect;        // Rectangle of valid texture rect.
 flat varying float vLayer;
 
 #ifdef WR_FEATURE_TRANSFORM
-varying vec3 vLocalPos;
 flat varying vec4 vLocalRect;
-#else
+#endif
+
 varying vec2 vLocalPos;
-#endif
 flat varying vec2 vStretchSize;
 
 #ifdef WR_VERTEX_SHADER
 void main(void) {
     Primitive prim = load_primitive();
     Image image = fetch_image(prim.specific_prim_address);
     ImageResource res = fetch_image_resource(prim.user_data0);
 
 #ifdef WR_FEATURE_TRANSFORM
-    TransformVertexInfo vi = write_transform_vertex_primitive(prim);
+    VertexInfo vi = write_transform_vertex_primitive(prim);
     vLocalPos = vi.local_pos;
     vLocalRect = vec4(prim.local_rect.p0, prim.local_rect.p0 + prim.local_rect.size);
 #else
     VertexInfo vi = write_vertex(prim.local_rect,
                                  prim.local_clip_rect,
                                  prim.z,
                                  prim.layer,
                                  prim.task,
@@ -76,23 +75,22 @@ void main(void) {
     vec2 half_texel = vec2(0.5) / texture_size_normalization_factor;
     vStRect = vec4(min(st0, st1) + half_texel, max(st0, st1) - half_texel);
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 void main(void) {
 #ifdef WR_FEATURE_TRANSFORM
-    float alpha = 0.0;
-    vec2 pos = init_transform_fs(vLocalPos, alpha);
+    float alpha = init_transform_fs(vLocalPos);
 
     // We clamp the texture coordinate calculation here to the local rectangle boundaries,
     // which makes the edge of the texture stretch instead of repeat.
-    vec2 upper_bound_mask = step(vLocalRect.zw, pos);
-    vec2 relative_pos_in_rect = clamp(pos, vLocalRect.xy, vLocalRect.zw) - vLocalRect.xy;
+    vec2 upper_bound_mask = step(vLocalRect.zw, vLocalPos);
+    vec2 relative_pos_in_rect = clamp(vLocalPos, vLocalRect.xy, vLocalRect.zw) - vLocalRect.xy;
 #else
     float alpha = 1.0;
     vec2 relative_pos_in_rect = vLocalPos;
     vec2 upper_bound_mask = vec2(0.0);
 #endif
 
     alpha *= do_clip();
 
--- a/gfx/webrender/res/ps_line.glsl
+++ b/gfx/webrender/res/ps_line.glsl
@@ -5,21 +5,17 @@
 #include shared,prim_shared
 
 varying vec4 vColor;
 flat varying int vStyle;
 flat varying float vAxisSelect;
 flat varying vec4 vParams;
 flat varying vec2 vLocalOrigin;
 
-#ifdef WR_FEATURE_TRANSFORM
-varying vec3 vLocalPos;
-#else
 varying vec2 vLocalPos;
-#endif
 
 #ifdef WR_VERTEX_SHADER
 #define LINE_ORIENTATION_VERTICAL       0
 #define LINE_ORIENTATION_HORIZONTAL     1
 
 struct Line {
     vec4 color;
     float wavyLineThickness;
@@ -108,17 +104,17 @@ void main(void) {
                     prim.local_rect.p0 + prim.local_rect.size,
                     aPosition.xy);
 
     gl_Position = uTransform * vec4(device_pos, 0.0, 1.0);
 #else
     vColor = line.color;
 
     #ifdef WR_FEATURE_TRANSFORM
-        TransformVertexInfo vi = write_transform_vertex_primitive(prim);
+        VertexInfo vi = write_transform_vertex_primitive(prim);
     #else
         VertexInfo vi = write_vertex(prim.local_rect,
                                      prim.local_clip_rect,
                                      prim.z,
                                      prim.layer,
                                      prim.task,
                                      prim.local_rect);
     #endif
@@ -162,24 +158,22 @@ vec2 get_distance_vector(vec2 b0, vec2 b
 // Approximate distance from point to quadratic bezier.
 float approx_distance(vec2 p, vec2 b0, vec2 b1, vec2 b2) {
     return length(get_distance_vector(b0 - p, b1 - p, b2 - p));
 }
 
 void main(void) {
     float alpha = 1.0;
 
+    vec2 local_pos = vLocalPos;
+
 #ifdef WR_FEATURE_CACHE
-    vec2 local_pos = vLocalPos;
 #else
     #ifdef WR_FEATURE_TRANSFORM
-        alpha = 0.0;
-        vec2 local_pos = init_transform_fs(vLocalPos, alpha);
-    #else
-        vec2 local_pos = vLocalPos;
+        alpha = init_transform_fs(vLocalPos);
     #endif
 
         alpha *= do_clip();
 #endif
 
     // Find the appropriate distance to apply the step over.
     float aa_range = compute_aa_range(local_pos);
 
--- a/gfx/webrender/res/ps_rectangle.glsl
+++ b/gfx/webrender/res/ps_rectangle.glsl
@@ -2,31 +2,31 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include shared,prim_shared
 
 varying vec4 vColor;
 
 #ifdef WR_FEATURE_TRANSFORM
-varying vec3 vLocalPos;
+varying vec2 vLocalPos;
 #endif
 
 #ifdef WR_VERTEX_SHADER
 void main(void) {
     Primitive prim = load_primitive();
     Rectangle rect = fetch_rectangle(prim.specific_prim_address);
     vColor = rect.color;
 #ifdef WR_FEATURE_TRANSFORM
-    TransformVertexInfo vi = write_transform_vertex(prim.local_rect,
-                                                    prim.local_clip_rect,
-                                                    rect.edge_aa_segment_mask,
-                                                    prim.z,
-                                                    prim.layer,
-                                                    prim.task);
+    VertexInfo vi = write_transform_vertex(prim.local_rect,
+                                           prim.local_clip_rect,
+                                           rect.edge_aa_segment_mask,
+                                           prim.z,
+                                           prim.layer,
+                                           prim.task);
     vLocalPos = vi.local_pos;
 #else
     VertexInfo vi = write_vertex(prim.local_rect,
                                  prim.local_clip_rect,
                                  prim.z,
                                  prim.layer,
                                  prim.task,
                                  prim.local_rect);
@@ -37,18 +37,17 @@ void main(void) {
 #endif
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 void main(void) {
     float alpha = 1.0;
 #ifdef WR_FEATURE_TRANSFORM
-    alpha = 0.0;
-    init_transform_fs(vLocalPos, alpha);
+    alpha = init_transform_fs(vLocalPos);
 #endif
 
 #ifdef WR_FEATURE_CLIP
     alpha *= do_clip();
 #endif
     oFragColor = vColor * alpha;
 }
 #endif
--- a/gfx/webrender/res/ps_text_run.glsl
+++ b/gfx/webrender/res/ps_text_run.glsl
@@ -3,67 +3,109 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include shared,prim_shared
 
 flat varying vec4 vColor;
 varying vec3 vUv;
 flat varying vec4 vUvBorder;
 
-#ifdef WR_FEATURE_TRANSFORM
-varying vec3 vLocalPos;
-#endif
-
 #ifdef WR_VERTEX_SHADER
 
 #define MODE_ALPHA              0
 #define MODE_SUBPX_CONST_COLOR  1
 #define MODE_SUBPX_PASS0        2
 #define MODE_SUBPX_PASS1        3
 #define MODE_SUBPX_BG_PASS0     4
 #define MODE_SUBPX_BG_PASS1     5
 #define MODE_SUBPX_BG_PASS2     6
 #define MODE_COLOR_BITMAP       7
 
+VertexInfo write_text_vertex(vec2 local_pos,
+                             RectWithSize local_clip_rect,
+                             float z,
+                             Layer layer,
+                             PictureTask task,
+                             RectWithSize snap_rect) {
+    // Clamp to the two local clip rects.
+    vec2 clamped_local_pos = clamp_rect(clamp_rect(local_pos, local_clip_rect), layer.local_clip_rect);
+
+    // Transform the current vertex to world space.
+    vec4 world_pos = layer.transform * vec4(clamped_local_pos, 0.0, 1.0);
+
+    // Convert the world positions to device pixel space.
+    vec2 device_pos = world_pos.xy / world_pos.w * uDevicePixelRatio;
+
+    // Apply offsets for the render task to get correct screen location.
+    vec2 final_pos = device_pos -
+                     task.content_origin +
+                     task.common_data.task_rect.p0;
+
+#ifdef WR_FEATURE_GLYPH_TRANSFORM
+    // For transformed subpixels, we just need to align the glyph origin to a device pixel.
+    // Only check the layer transform's translation since the scales and axes match.
+    vec2 world_snap_p0 = snap_rect.p0 + layer.transform[3].xy * uDevicePixelRatio;
+    final_pos += floor(world_snap_p0 + 0.5) - world_snap_p0;
+#elif !defined(WR_FEATURE_TRANSFORM)
+    // Compute the snapping offset only if the layer transform is axis-aligned.
+    final_pos += compute_snap_offset(clamped_local_pos, layer, snap_rect);
+#endif
+
+    gl_Position = uTransform * vec4(final_pos, z, 1.0);
+
+    VertexInfo vi = VertexInfo(clamped_local_pos, device_pos);
+    return vi;
+}
+
 void main(void) {
     Primitive prim = load_primitive();
     TextRun text = fetch_text_run(prim.specific_prim_address);
 
     int glyph_index = prim.user_data0;
     int resource_address = prim.user_data1;
 
     Glyph glyph = fetch_glyph(prim.specific_prim_address,
                               glyph_index,
                               text.subpx_dir);
     GlyphResource res = fetch_glyph_resource(resource_address);
 
-    vec2 local_pos = glyph.offset +
-                     text.offset +
-                     vec2(res.offset.x, -res.offset.y) / uDevicePixelRatio;
+#ifdef WR_FEATURE_GLYPH_TRANSFORM
+    // Transform from local space to glyph space.
+    mat2 transform = mat2(prim.layer.transform) * uDevicePixelRatio;
 
-    RectWithSize local_rect = RectWithSize(local_pos,
-                                           (res.uv_rect.zw - res.uv_rect.xy) * res.scale / uDevicePixelRatio);
+    // Compute the glyph rect in glyph space.
+    RectWithSize glyph_rect = RectWithSize(res.offset + transform * (text.offset + glyph.offset),
+                                           res.uv_rect.zw - res.uv_rect.xy);
+
+    // Select the corner of the glyph rect that we are processing.
+    // Transform it from glyph space into local space.
+    vec2 local_pos = inverse(transform) * (glyph_rect.p0 + glyph_rect.size * aPosition.xy);
+#else
+    // Scale from glyph space to local space.
+    float scale = res.scale / uDevicePixelRatio;
 
-#ifdef WR_FEATURE_TRANSFORM
-    TransformVertexInfo vi = write_transform_vertex(local_rect,
-                                                    prim.local_clip_rect,
-                                                    vec4(0.0),
-                                                    prim.z,
-                                                    prim.layer,
-                                                    prim.task);
-    vLocalPos = vi.local_pos;
-    vec2 f = (vi.local_pos.xy / vi.local_pos.z - local_rect.p0) / local_rect.size;
+    // Compute the glyph rect in local space.
+    RectWithSize glyph_rect = RectWithSize(scale * res.offset + text.offset + glyph.offset,
+                                           scale * (res.uv_rect.zw - res.uv_rect.xy));
+
+    // Select the corner of the glyph rect that we are processing.
+    vec2 local_pos = glyph_rect.p0 + glyph_rect.size * aPosition.xy;
+#endif
+
+    VertexInfo vi = write_text_vertex(local_pos,
+                                      prim.local_clip_rect,
+                                      prim.z,
+                                      prim.layer,
+                                      prim.task,
+                                      glyph_rect);
+
+#ifdef WR_FEATURE_GLYPH_TRANSFORM
+    vec2 f = (transform * vi.local_pos - glyph_rect.p0) / glyph_rect.size;
 #else
-    VertexInfo vi = write_vertex(local_rect,
-                                 prim.local_clip_rect,
-                                 prim.z,
-                                 prim.layer,
-                                 prim.task,
-                                 local_rect);
-    vec2 f = (vi.local_pos - local_rect.p0) / local_rect.size;
+    vec2 f = (vi.local_pos - glyph_rect.p0) / glyph_rect.size;
 #endif
 
     write_clip(vi.screen_pos, prim.clip_area);
 
 #ifdef WR_FEATURE_SUBPX_BG_PASS1
     vColor = vec4(text.color.a) * text.bg_color;
 #else
     switch (uMode) {
@@ -93,21 +135,17 @@ void main(void) {
 }
 #endif
 
 #ifdef WR_FRAGMENT_SHADER
 void main(void) {
     vec3 tc = vec3(clamp(vUv.xy, vUvBorder.xy, vUvBorder.zw), vUv.z);
     vec4 mask = texture(sColor0, tc);
 
-    float alpha = 1.0;
-#ifdef WR_FEATURE_TRANSFORM
-    init_transform_fs(vLocalPos, alpha);
-#endif
-    alpha *= do_clip();
+    float alpha = do_clip();
 
 #ifdef WR_FEATURE_SUBPX_BG_PASS1
     mask.rgb = vec3(mask.a) - mask.rgb;
 #endif
 
     oFragColor = vColor * mask * alpha;
 }
 #endif
--- a/gfx/webrender/res/ps_yuv_image.glsl
+++ b/gfx/webrender/res/ps_yuv_image.glsl
@@ -13,36 +13,35 @@ flat varying vec2 vTextureOffsetV; // Of
 flat varying vec2 vTextureSizeY;   // Size of the y plane in the texture atlas.
 flat varying vec2 vTextureSizeUv;  // Size of the u and v planes in the texture atlas.
 flat varying vec2 vStretchSize;
 flat varying vec2 vHalfTexelY;     // Normalized length of the half of a Y texel.
 flat varying vec2 vHalfTexelUv;    // Normalized length of the half of u and v texels.
 flat varying vec3 vLayers;
 
 #ifdef WR_FEATURE_TRANSFORM
-varying vec3 vLocalPos;
 flat varying vec4 vLocalRect;
-#else
+#endif
+
 varying vec2 vLocalPos;
-#endif
 
 #ifdef WR_VERTEX_SHADER
 struct YuvImage {
     vec2 size;
 };
 
 YuvImage fetch_yuv_image(int address) {
     vec4 data = fetch_from_resource_cache_1(address);
     return YuvImage(data.xy);
 }
 
 void main(void) {
     Primitive prim = load_primitive();
 #ifdef WR_FEATURE_TRANSFORM
-    TransformVertexInfo vi = write_transform_vertex_primitive(prim);
+    VertexInfo vi = write_transform_vertex_primitive(prim);
     vLocalPos = vi.local_pos;
     vLocalRect = vec4(prim.local_rect.p0, prim.local_rect.p0 + prim.local_rect.size);
 #else
     VertexInfo vi = write_vertex(prim.local_rect,
                                  prim.local_clip_rect,
                                  prim.z,
                                  prim.layer,
                                  prim.task,
@@ -141,22 +140,21 @@ const mat3 YuvColorMatrix = mat3(
     1.16438,  1.16438,  1.16438,
     0.0    , -0.21325,  2.11240,
     1.79274, -0.53291,  0.0
 );
 #endif
 
 void main(void) {
 #ifdef WR_FEATURE_TRANSFORM
-    float alpha = 0.0;
-    vec2 pos = init_transform_fs(vLocalPos, alpha);
+    float alpha = init_transform_fs(vLocalPos);
 
     // We clamp the texture coordinate calculation here to the local rectangle boundaries,
     // which makes the edge of the texture stretch instead of repeat.
-    vec2 relative_pos_in_rect = clamp(pos, vLocalRect.xy, vLocalRect.zw) - vLocalRect.xy;
+    vec2 relative_pos_in_rect = clamp(vLocalPos, vLocalRect.xy, vLocalRect.zw) - vLocalRect.xy;
 #else
     float alpha = 1.0;;
     vec2 relative_pos_in_rect = vLocalPos;
 #endif
 
     alpha *= do_clip();
 
     // We clamp the texture coordinates to the half-pixel offset from the borders
--- a/gfx/webrender/src/box_shadow.rs
+++ b/gfx/webrender/src/box_shadow.rs
@@ -12,30 +12,34 @@ use prim_store::{PrimitiveContainer, Rec
 use prim_store::{BrushMaskKind, BrushKind, BrushPrimitive};
 use picture::PicturePrimitive;
 use util::RectHelpers;
 use render_task::MAX_BLUR_STD_DEVIATION;
 
 // The blur shader samples BLUR_SAMPLE_SCALE * blur_radius surrounding texels.
 pub const BLUR_SAMPLE_SCALE: f32 = 3.0;
 
+// Maximum blur radius.
+// Taken from https://searchfox.org/mozilla-central/rev/c633ffa4c4611f202ca11270dcddb7b29edddff8/layout/painting/nsCSSRendering.cpp#4412
+pub const MAX_BLUR_RADIUS : f32 = 300.;
+
 // The amount of padding added to the border corner drawn in the box shadow
 // mask. This ensures that we get a few pixels past the corner that can be
 // blurred without being affected by the border radius.
 pub const MASK_CORNER_PADDING: f32 = 4.0;
 
 impl FrameBuilder {
     pub fn add_box_shadow(
         &mut self,
         pipeline_id: PipelineId,
         clip_and_scroll: ClipAndScrollInfo,
         prim_info: &LayerPrimitiveInfo,
         box_offset: &LayerVector2D,
         color: &ColorF,
-        blur_radius: f32,
+        mut blur_radius: f32,
         spread_radius: f32,
         border_radius: BorderRadius,
         clip_mode: BoxShadowClipMode,
     ) {
         if color.a == 0.0 {
             return;
         }
 
@@ -43,16 +47,17 @@ impl FrameBuilder {
             BoxShadowClipMode::Outset => {
                 (spread_radius, ClipMode::Clip)
             }
             BoxShadowClipMode::Inset => {
                 (-spread_radius, ClipMode::ClipOut)
             }
         };
 
+        blur_radius = f32::min(blur_radius, MAX_BLUR_RADIUS);
         let shadow_radius = adjust_border_radius_for_box_shadow(
             border_radius,
             spread_amount,
         );
         let shadow_rect = prim_info.rect
             .translate(box_offset)
             .inflate(spread_amount, spread_amount);
 
--- a/gfx/webrender/src/debug_render.rs
+++ b/gfx/webrender/src/debug_render.rs
@@ -3,17 +3,16 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{ColorU, DeviceIntRect, DeviceUintSize, ImageFormat};
 use debug_font_data;
 use device::{Device, Program, Texture, TextureSlot, VertexDescriptor, VAO};
 use device::{TextureFilter, TextureTarget, VertexAttribute, VertexAttributeKind, VertexUsageHint};
 use euclid::{Point2D, Rect, Size2D, Transform3D};
 use internal_types::{ORTHO_FAR_PLANE, ORTHO_NEAR_PLANE};
-use internal_types::RenderTargetMode;
 use std::f32;
 
 #[derive(Debug, Copy, Clone)]
 enum DebugSampler {
     Font,
 }
 
 impl Into<TextureSlot> for DebugSampler {
@@ -119,17 +118,17 @@ impl DebugRenderer {
 
         let mut font_texture = device.create_texture(TextureTarget::Array);
         device.init_texture(
             &mut font_texture,
             debug_font_data::BMP_WIDTH,
             debug_font_data::BMP_HEIGHT,
             ImageFormat::A8,
             TextureFilter::Linear,
-            RenderTargetMode::None,
+            None,
             1,
             Some(&debug_font_data::FONT_BITMAP),
         );
 
         DebugRenderer {
             font_vertices: Vec::new(),
             font_indices: Vec::new(),
             line_vertices: Vec::new(),
--- a/gfx/webrender/src/debug_server.rs
+++ b/gfx/webrender/src/debug_server.rs
@@ -185,29 +185,17 @@ impl PassList {
 
     pub fn add(&mut self, pass: Pass) {
         self.passes.push(pass);
     }
 }
 
 #[derive(Serialize)]
 pub struct Pass {
-    targets: Vec<Target>,
-}
-
-impl Pass {
-    pub fn new() -> Pass {
-        Pass {
-            targets: Vec::new(),
-        }
-    }
-
-    pub fn add(&mut self, target: Target) {
-        self.targets.push(target);
-    }
+    pub targets: Vec<Target>,
 }
 
 #[derive(Serialize)]
 pub struct Target {
     kind: &'static str,
     batches: Vec<Batch>,
 }
 
--- a/gfx/webrender/src/device.rs
+++ b/gfx/webrender/src/device.rs
@@ -2,18 +2,18 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use super::shader_source;
 use api::{ColorF, ImageFormat};
 use api::{DeviceIntRect, DeviceUintSize};
 use euclid::Transform3D;
 use gleam::gl;
-use internal_types::RenderTargetMode;
-use internal_types::FastHashMap;
+use internal_types::{FastHashMap, RenderTargetInfo};
+use std::cell::RefCell;
 use std::fs::File;
 use std::io::Read;
 use std::iter::repeat;
 use std::mem;
 use std::ops::Add;
 use std::path::PathBuf;
 use std::ptr;
 use std::rc::Rc;
@@ -148,26 +148,30 @@ fn get_shader_source(shader_name: &str, 
     shader_source::SHADERS
         .get(shader_name)
         .map(|s| s.to_string())
 }
 
 // Parse a shader string for imports. Imports are recursively processed, and
 // prepended to the list of outputs.
 fn parse_shader_source(source: String, base_path: &Option<PathBuf>, output: &mut String) {
-    for line in source.lines() {
+    output.push_str(SHADER_LINE_MARKER);
+
+    for (line_num, line) in source.lines().enumerate() {
         if line.starts_with(SHADER_IMPORT) {
             let imports = line[SHADER_IMPORT.len() ..].split(",");
 
             // For each import, get the source, and recurse.
             for import in imports {
                 if let Some(include) = get_shader_source(import, base_path) {
                     parse_shader_source(include, base_path, output);
                 }
             }
+
+            output.push_str(&format!("#line {}\n", line_num+1));
         } else {
             output.push_str(line);
             output.push_str("\n");
         }
     }
 }
 
 pub fn build_shader_strings(
@@ -199,19 +203,17 @@ pub fn build_shader_strings(
 
     // Parse the main .glsl file, including any imports
     // and append them to the list of sources.
     let mut shared_result = String::new();
     if let Some(shared_source) = get_shader_source(base_filename, override_path) {
         parse_shader_source(shared_source, override_path, &mut shared_result);
     }
 
-    vs_source.push_str(SHADER_LINE_MARKER);
     vs_source.push_str(&shared_result);
-    fs_source.push_str(SHADER_LINE_MARKER);
     fs_source.push_str(&shared_result);
 
     // Append legacy (.vs and .fs) files if they exist.
     // TODO(gw): Once all shaders are ported to just use the
     //           .glsl file, we can remove this code.
     let vs_name = format!("{}.vs", base_filename);
     if let Some(old_vs_source) = get_shader_source(&vs_name, override_path) {
         vs_source.push_str(SHADER_LINE_MARKER);
@@ -386,17 +388,17 @@ pub struct Texture {
     id: gl::GLuint,
     target: gl::GLuint,
     layer_count: i32,
     format: ImageFormat,
     width: u32,
     height: u32,
 
     filter: TextureFilter,
-    mode: RenderTargetMode,
+    render_target: Option<RenderTargetInfo>,
     fbo_ids: Vec<FBOId>,
     depth_rb: Option<RBOId>,
 }
 
 impl Texture {
     pub fn get_dimensions(&self) -> DeviceUintSize {
         DeviceUintSize::new(self.width, self.height)
     }
@@ -414,16 +416,20 @@ impl Texture {
             ImageFormat::A8 => 1,
             ImageFormat::RGB8 => 3,
             ImageFormat::BGRA8 => 4,
             ImageFormat::RG8 => 2,
             ImageFormat::RGBAF32 => 16,
             ImageFormat::Invalid => unreachable!(),
         }
     }
+
+    pub fn has_depth(&self) -> bool {
+        self.depth_rb.is_some()
+    }
 }
 
 impl Drop for Texture {
     fn drop(&mut self) {
         debug_assert!(thread::panicking() || self.id == 0);
     }
 }
 
@@ -482,67 +488,57 @@ pub struct RBOId(gl::GLuint);
 
 #[derive(PartialEq, Eq, Hash, Debug, Copy, Clone)]
 pub struct VBOId(gl::GLuint);
 
 #[derive(PartialEq, Eq, Hash, Debug, Copy, Clone)]
 struct IBOId(gl::GLuint);
 
 #[derive(PartialEq, Eq, Hash, Debug)]
-struct ProgramSources {
+pub struct ProgramSources {
     renderer_name: String,
     vs_source: String,
     fs_source: String,
 }
 
 impl ProgramSources {
     fn new(renderer_name: String, vs_source: String, fs_source: String) -> Self {
         ProgramSources {
             renderer_name,
             vs_source,
             fs_source,
         }
     }
 }
 
-struct ProgramBinary {
+pub struct ProgramBinary {
     binary: Vec<u8>,
     format: gl::GLenum,
 }
 
 impl ProgramBinary {
     fn new(binary: Vec<u8>, format: gl::GLenum) -> Self {
         ProgramBinary {
             binary,
             format
         }
     }
 }
 
 pub struct ProgramCache {
-    binaries: FastHashMap<ProgramSources, ProgramBinary>,
+    pub binaries: RefCell<FastHashMap<ProgramSources, ProgramBinary>>,
 }
 
 impl ProgramCache {
-    pub fn new() -> Self {
-        ProgramCache {
-            binaries: FastHashMap::default(),
-        }
-    }
-
-    fn get(&self, sources: &ProgramSources) -> Option<&ProgramBinary> {
-      self.binaries.get(&sources)
-    }
-
-    fn contains(&self, sources: &ProgramSources) -> bool {
-      self.binaries.contains_key(&sources)
-    }
-
-    fn insert(&mut self, sources: ProgramSources, binary: ProgramBinary) {
-      self.binaries.insert(sources, binary);
+    pub fn new() -> Rc<Self> {
+        Rc::new(
+            ProgramCache {
+                binaries: RefCell::new(FastHashMap::default()),
+            }
+        )
     }
 }
 
 #[derive(Debug, Copy, Clone)]
 pub enum VertexUsageHint {
     Static,
     Dynamic,
     Stream,
@@ -572,53 +568,54 @@ pub struct Capabilities {
 }
 
 #[derive(Clone, Debug)]
 pub enum ShaderError {
     Compilation(String, String), // name, error mssage
     Link(String, String),        // name, error message
 }
 
-pub struct Device<'a> {
+pub struct Device {
     gl: Rc<gl::Gl>,
     // device state
     bound_textures: [gl::GLuint; 16],
     bound_program: gl::GLuint,
     bound_vao: gl::GLuint,
     bound_pbo: gl::GLuint,
     bound_read_fbo: FBOId,
     bound_draw_fbo: FBOId,
     default_read_fbo: gl::GLuint,
     default_draw_fbo: gl::GLuint,
-    device_pixel_ratio: f32,
+
+    pub device_pixel_ratio: f32,
 
     // HW or API capabilties
     capabilities: Capabilities,
 
     // debug
     inside_frame: bool,
 
     // resources
     resource_override_path: Option<PathBuf>,
 
     max_texture_size: u32,
     renderer_name: String,
-    cached_programs: Option<&'a mut ProgramCache>,
+    cached_programs: Option<Rc<ProgramCache>>,
 
     // Frame counter. This is used to map between CPU
     // frames and GPU frames.
     frame_id: FrameId,
 }
 
-impl<'a> Device<'a> {
+impl Device {
     pub fn new(
         gl: Rc<gl::Gl>,
         resource_override_path: Option<PathBuf>,
         _file_changed_handler: Box<FileWatcherHandler>,
-        cached_programs: Option<&mut ProgramCache>,
+        cached_programs: Option<Rc<ProgramCache>>,
     ) -> Device {
         let max_texture_size = gl.get_integer_v(gl::MAX_TEXTURE_SIZE) as u32;
         let renderer_name = gl.get_string(gl::RENDERER);
 
         Device {
             gl,
             resource_override_path,
             // This is initialized to 1 by default, but it is set
@@ -649,17 +646,17 @@ impl<'a> Device<'a> {
     pub fn gl(&self) -> &gl::Gl {
         &*self.gl
     }
 
     pub fn rc_gl(&self) -> &Rc<gl::Gl> {
         &self.gl
     }
 
-    pub fn update_program_cache(&mut self, cached_programs: &'a mut ProgramCache) {
+    pub fn update_program_cache(&mut self, cached_programs: Rc<ProgramCache>) {
         self.cached_programs = Some(cached_programs);
     }
 
     pub fn max_texture_size(&self) -> u32 {
         self.max_texture_size
     }
 
     pub fn get_capabilities(&self) -> &Capabilities {
@@ -691,20 +688,19 @@ impl<'a> Device<'a> {
         } else {
             if !log.is_empty() {
                 println!("Warnings detected on shader: {:?}\n{}", name, log);
             }
             Ok(id)
         }
     }
 
-    pub fn begin_frame(&mut self, device_pixel_ratio: f32) -> FrameId {
+    pub fn begin_frame(&mut self) -> FrameId {
         debug_assert!(!self.inside_frame);
         self.inside_frame = true;
-        self.device_pixel_ratio = device_pixel_ratio;
 
         // Retrive the currently set FBO.
         let default_read_fbo = self.gl.get_integer_v(gl::READ_FRAMEBUFFER_BINDING);
         self.default_read_fbo = default_read_fbo as gl::GLuint;
         let default_draw_fbo = self.gl.get_integer_v(gl::DRAW_FRAMEBUFFER_BINDING);
         self.default_draw_fbo = default_draw_fbo as gl::GLuint;
 
         // Texture state
@@ -798,28 +794,27 @@ impl<'a> Device<'a> {
             self.bound_draw_fbo = fbo_id;
             fbo_id.bind(self.gl(), FBOTarget::Draw);
         }
 
         if let Some(dimensions) = dimensions {
             self.gl.viewport(
                 0,
                 0,
-                dimensions.width as gl::GLint,
-                dimensions.height as gl::GLint,
+                dimensions.width as _,
+                dimensions.height as _,
             );
         }
     }
 
     pub fn create_fbo_for_external_texture(&mut self, texture_id: u32) -> FBOId {
         let fbo = FBOId(self.gl.gen_framebuffers(1)[0]);
         self.bind_external_draw_target(fbo);
-        self.gl.bind_framebuffer(gl::FRAMEBUFFER, fbo.0);
         self.gl.framebuffer_texture_2d(
-            gl::FRAMEBUFFER,
+            gl::DRAW_FRAMEBUFFER,
             gl::COLOR_ATTACHMENT0,
             gl::TEXTURE_2D,
             texture_id,
             0,
         );
         fbo
     }
 
@@ -849,17 +844,17 @@ impl<'a> Device<'a> {
         Texture {
             id: self.gl.gen_textures(1)[0],
             target: target.to_gl_target(),
             width: 0,
             height: 0,
             layer_count: 0,
             format: ImageFormat::Invalid,
             filter: TextureFilter::Nearest,
-            mode: RenderTargetMode::None,
+            render_target: None,
             fbo_ids: vec![],
             depth_rb: None,
         }
     }
 
     fn set_texture_parameters(&mut self, target: gl::GLuint, filter: TextureFilter) {
         let filter = match filter {
             TextureFilter::Nearest => gl::NEAREST,
@@ -879,42 +874,43 @@ impl<'a> Device<'a> {
 
     pub fn init_texture(
         &mut self,
         texture: &mut Texture,
         width: u32,
         height: u32,
         format: ImageFormat,
         filter: TextureFilter,
-        mode: RenderTargetMode,
+        render_target: Option<RenderTargetInfo>,
         layer_count: i32,
         pixels: Option<&[u8]>,
     ) {
         debug_assert!(self.inside_frame);
 
         let resized = texture.width != width || texture.height != height;
 
         texture.format = format;
         texture.width = width;
         texture.height = height;
         texture.filter = filter;
         texture.layer_count = layer_count;
-        texture.mode = mode;
+        texture.render_target = render_target;
 
         let (internal_format, gl_format) = gl_texture_formats_for_image_format(self.gl(), format);
         let type_ = gl_type_for_texture_format(format);
 
         self.bind_texture(DEFAULT_TEXTURE, texture);
         self.set_texture_parameters(texture.target, filter);
 
-        match mode {
-            RenderTargetMode::RenderTarget => {
-                self.update_texture_storage(texture, resized);
+        match render_target {
+            Some(info) => {
+                assert!(pixels.is_none());
+                self.update_texture_storage(texture, &info, resized);
             }
-            RenderTargetMode::None => {
+            None => {
                 let expanded_data: Vec<u8>;
                 let actual_pixels = if pixels.is_some() && format == ImageFormat::A8 &&
                     cfg!(any(target_arch = "arm", target_arch = "aarch64"))
                 {
                     expanded_data = pixels
                         .unwrap()
                         .iter()
                         .flat_map(|&byte| repeat(byte).take(4))
@@ -953,100 +949,109 @@ impl<'a> Device<'a> {
                         );
                     }
                     _ => panic!("BUG: Unexpected texture target!"),
                 }
             }
         }
     }
 
-    /// Updates the texture storage for the texture, creating
-    /// FBOs as required.
-    fn update_texture_storage(&mut self, texture: &mut Texture, resized: bool) {
+    /// Updates the texture storage for the texture, creating FBOs as required.
+    fn update_texture_storage(
+        &mut self,
+        texture: &mut Texture,
+        rt_info: &RenderTargetInfo,
+        is_resized: bool,
+    ) {
         assert!(texture.layer_count > 0);
         assert_eq!(texture.target, gl::TEXTURE_2D_ARRAY);
 
         let needed_layer_count = texture.layer_count - texture.fbo_ids.len() as i32;
-        // If the texture is already the required size skip.
-        if needed_layer_count == 0 && !resized {
-            return;
-        }
+        let allocate_color = needed_layer_count != 0 || is_resized;
 
-        let (internal_format, gl_format) =
-            gl_texture_formats_for_image_format(&*self.gl, texture.format);
-        let type_ = gl_type_for_texture_format(texture.format);
+        if allocate_color {
+            let (internal_format, gl_format) =
+                gl_texture_formats_for_image_format(&*self.gl, texture.format);
+            let type_ = gl_type_for_texture_format(texture.format);
 
-        self.gl.tex_image_3d(
-            texture.target,
-            0,
-            internal_format as gl::GLint,
-            texture.width as gl::GLint,
-            texture.height as gl::GLint,
-            texture.layer_count,
-            0,
-            gl_format,
-            type_,
-            None,
-        );
+            self.gl.tex_image_3d(
+                texture.target,
+                0,
+                internal_format as gl::GLint,
+                texture.width as gl::GLint,
+                texture.height as gl::GLint,
+                texture.layer_count,
+                0,
+                gl_format,
+                type_,
+                None,
+            );
+        }
 
         if needed_layer_count > 0 {
             // Create more framebuffers to fill the gap
             let new_fbos = self.gl.gen_framebuffers(needed_layer_count);
             texture
                 .fbo_ids
                 .extend(new_fbos.into_iter().map(FBOId));
         } else if needed_layer_count < 0 {
             // Remove extra framebuffers
             for old in texture.fbo_ids.drain(texture.layer_count as usize ..) {
                 self.gl.delete_framebuffers(&[old.0]);
             }
         }
 
-        let (depth_rb, depth_alloc) = match texture.depth_rb {
-            Some(rbo) => (rbo.0, resized),
-            None => {
+        let (mut depth_rb, allocate_depth) = match texture.depth_rb {
+            Some(rbo) => (rbo.0, is_resized || !rt_info.has_depth),
+            None if rt_info.has_depth => {
                 let renderbuffer_ids = self.gl.gen_renderbuffers(1);
                 let depth_rb = renderbuffer_ids[0];
                 texture.depth_rb = Some(RBOId(depth_rb));
                 (depth_rb, true)
-            }
+            },
+            None => (0, false),
         };
 
-        if depth_alloc {
-            self.gl.bind_renderbuffer(gl::RENDERBUFFER, depth_rb);
-            self.gl.renderbuffer_storage(
-                gl::RENDERBUFFER,
-                gl::DEPTH_COMPONENT24,
-                texture.width as gl::GLsizei,
-                texture.height as gl::GLsizei,
-            );
+        if allocate_depth {
+            if rt_info.has_depth {
+                self.gl.bind_renderbuffer(gl::RENDERBUFFER, depth_rb);
+                self.gl.renderbuffer_storage(
+                    gl::RENDERBUFFER,
+                    gl::DEPTH_COMPONENT24,
+                    texture.width as gl::GLsizei,
+                    texture.height as gl::GLsizei,
+                );
+            } else {
+                self.gl.delete_renderbuffers(&[depth_rb]);
+                depth_rb = 0;
+                texture.depth_rb = None;
+            }
         }
 
-        for (fbo_index, fbo_id) in texture.fbo_ids.iter().enumerate() {
-            self.gl.bind_framebuffer(gl::FRAMEBUFFER, fbo_id.0);
-            self.gl.framebuffer_texture_layer(
-                gl::FRAMEBUFFER,
-                gl::COLOR_ATTACHMENT0,
-                texture.id,
-                0,
-                fbo_index as gl::GLint,
-            );
-            self.gl.framebuffer_renderbuffer(
-                gl::FRAMEBUFFER,
-                gl::DEPTH_ATTACHMENT,
-                gl::RENDERBUFFER,
-                depth_rb,
-            );
+        if allocate_color || allocate_depth {
+            for (fbo_index, &fbo_id) in texture.fbo_ids.iter().enumerate() {
+                self.bind_external_draw_target(fbo_id);
+                self.gl.framebuffer_texture_layer(
+                    gl::DRAW_FRAMEBUFFER,
+                    gl::COLOR_ATTACHMENT0,
+                    texture.id,
+                    0,
+                    fbo_index as gl::GLint,
+                );
+                self.gl.framebuffer_renderbuffer(
+                    gl::DRAW_FRAMEBUFFER,
+                    gl::DEPTH_ATTACHMENT,
+                    gl::RENDERBUFFER,
+                    depth_rb,
+                );
+            }
+            // restore the previous FBO
+            let bound_fbo = self.bound_draw_fbo;
+            self.bind_external_draw_target(bound_fbo);
         }
-
-        // TODO(gw): Hack! Modify the code above to use the normal binding interfaces the device exposes.
-        self.gl
-            .bind_framebuffer(gl::READ_FRAMEBUFFER, self.bound_read_fbo.0);
-        self.gl
-            .bind_framebuffer(gl::DRAW_FRAMEBUFFER, self.bound_draw_fbo.0);
     }
 
     pub fn blit_render_target(&mut self, src_rect: DeviceIntRect, dest_rect: DeviceIntRect) {
         debug_assert!(self.inside_frame);
 
         self.gl.blit_framebuffer(
             src_rect.origin.x,
             src_rect.origin.y,
@@ -1155,17 +1160,17 @@ impl<'a> Device<'a> {
         let sources = ProgramSources::new(self.renderer_name.clone(), vs_source, fs_source);
 
         // Create program
         let pid = self.gl.create_program();
 
         let mut loaded = false;
 
         if let Some(ref cached_programs) = self.cached_programs {
-            if let Some(binary) = cached_programs.get(&sources)
+            if let Some(binary) = cached_programs.binaries.borrow().get(&sources)
             {
                 self.gl.program_binary(pid, binary.format, &binary.binary);
 
                 if self.gl.get_program_iv(pid, gl::LINK_STATUS) == (0 as gl::GLint) {
                     let error_log = self.gl.get_program_info_log(pid);
                     println!(
                       "Failed to load a program object with a program binary: {:?} renderer {}\n{}",
                       base_filename,
@@ -1233,21 +1238,21 @@ impl<'a> Device<'a> {
                     base_filename,
                     error_log
                 );
                 self.gl.delete_program(pid);
                 return Err(ShaderError::Link(base_filename.to_string(), error_log));
             }
         }
 
-        if let Some(ref mut cached_programs) = self.cached_programs {
-            if !cached_programs.contains(&sources) {
+        if let Some(ref cached_programs) = self.cached_programs {
+            if !cached_programs.binaries.borrow().contains_key(&sources) {
                 let (buffer, format) = self.gl.get_program_binary(pid);
                 if buffer.len() > 0 {
-                  cached_programs.insert(sources, ProgramBinary::new(buffer, format));
+                  cached_programs.binaries.borrow_mut().insert(sources, ProgramBinary::new(buffer, format));
                 }
             }
         }
 
         let u_transform = self.gl.get_uniform_location(pid, "uTransform");
         let u_device_pixel_ratio = self.gl.get_uniform_location(pid, "uDevicePixelRatio");
         let u_mode = self.gl.get_uniform_location(pid, "uMode");
 
--- a/gfx/webrender/src/frame.rs
+++ b/gfx/webrender/src/frame.rs
@@ -1,33 +1,33 @@
 
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BuiltDisplayListIter, ClipAndScrollInfo, ClipId, ColorF, ComplexClipRegion};
-use api::{DeviceUintRect, DeviceUintSize, DisplayItemRef, Epoch, FilterOp};
+use api::{DeviceUintRect, DeviceUintSize, DisplayItemRef, DocumentLayer, Epoch, FilterOp};
 use api::{ImageDisplayItem, ItemRange, LayerPoint, LayerPrimitiveInfo, LayerRect};
 use api::{LayerSize, LayerVector2D};
 use api::{LayoutRect, LayoutSize};
 use api::{LocalClip, PipelineId, ScrollClamping, ScrollEventPhase, ScrollLayerState};
 use api::{ScrollLocation, ScrollPolicy, ScrollSensitivity, SpecificDisplayItem, StackingContext};
 use api::{ClipMode, TileOffset, TransformStyle, WorldPoint};
 use clip::ClipRegion;
 use clip_scroll_node::StickyFrameInfo;
 use clip_scroll_tree::{ClipScrollTree, ScrollStates};
 use euclid::rect;
 use frame_builder::{FrameBuilder, FrameBuilderConfig, ScrollbarInfo};
 use gpu_cache::GpuCache;
-use internal_types::{FastHashMap, FastHashSet, RendererFrame};
+use internal_types::{FastHashMap, FastHashSet, RenderedDocument};
 use prim_store::RectangleContent;
 use profiler::{GpuCacheProfileCounters, TextureCacheProfileCounters};
 use resource_cache::{FontInstanceMap,ResourceCache, TiledImageMap};
 use scene::{Scene, StackingContextHelpers, ScenePipeline, SceneProperties};
-use tiling::{CompositeOps, Frame};
+use tiling::CompositeOps;
 use util::ComplexClipRegionHelpers;
 
 #[derive(Copy, Clone, PartialEq, PartialOrd, Debug, Eq, Ord)]
 pub struct FrameId(pub u32);
 
 static DEFAULT_SCROLLBAR_COLOR: ColorF = ColorF {
     r: 0.3,
     g: 0.3,
@@ -257,37 +257,34 @@ impl<'a> FlattenContext<'a> {
                 self.builder.current_reference_frame_id(),
             ));
         }
 
         // If we have a transformation, we establish a new reference frame. This means
         // that fixed position stacking contexts are positioned relative to us.
         let is_reference_frame =
             stacking_context.transform.is_some() || stacking_context.perspective.is_some();
-        if is_reference_frame {
-            let origin = reference_frame_relative_offset + bounds.origin.to_vector();
+        let origin = reference_frame_relative_offset + bounds.origin.to_vector();
+        reference_frame_relative_offset = if is_reference_frame {
             let reference_frame_bounds = LayerRect::new(LayerPoint::zero(), bounds.size);
             let mut clip_id = self.apply_scroll_frame_id_replacement(context_scroll_node_id);
             clip_id = self.builder.push_reference_frame(
                 Some(clip_id),
                 pipeline_id,
                 &reference_frame_bounds,
                 stacking_context.transform,
                 stacking_context.perspective,
                 origin,
                 false,
                 self.clip_scroll_tree,
             );
             self.replacements.push((context_scroll_node_id, clip_id));
-            reference_frame_relative_offset = LayerVector2D::zero();
+            LayerVector2D::zero()
         } else {
-            reference_frame_relative_offset = LayerVector2D::new(
-                reference_frame_relative_offset.x + bounds.origin.x,
-                reference_frame_relative_offset.y + bounds.origin.y,
-            );
+            origin
         };
 
         let sc_scroll_node_id = self.apply_scroll_frame_id_replacement(context_scroll_node_id);
 
         self.builder.push_stacking_context(
             pipeline_id,
             composition_operations,
             stacking_context.transform_style,
@@ -1019,25 +1016,27 @@ impl<'a> FlattenContext<'a> {
             );
         }
     }
 }
 
 /// Frame context contains the information required to update
 /// (e.g. scroll) a renderer frame builder (`FrameBuilder`).
 pub struct FrameContext {
+    window_size: DeviceUintSize,
     clip_scroll_tree: ClipScrollTree,
     pipeline_epoch_map: FastHashMap<PipelineId, Epoch>,
     id: FrameId,
     frame_builder_config: FrameBuilderConfig,
 }
 
 impl FrameContext {
     pub fn new(config: FrameBuilderConfig) -> Self {
         FrameContext {
+            window_size: DeviceUintSize::zero(),
             pipeline_epoch_map: FastHashMap::default(),
             clip_scroll_tree: ClipScrollTree::new(),
             id: FrameId(0),
             frame_builder_config: config,
         }
     }
 
     pub fn reset(&mut self) -> ScrollStates {
@@ -1078,53 +1077,53 @@ impl FrameContext {
 
     pub fn discard_frame_state_for_pipeline(&mut self, pipeline_id: PipelineId) {
         self.clip_scroll_tree
             .discard_frame_state_for_pipeline(pipeline_id);
     }
 
     pub fn create(
         &mut self,
-        old_builder: Option<FrameBuilder>,
+        old_builder: FrameBuilder,
         scene: &Scene,
         resource_cache: &mut ResourceCache,
         window_size: DeviceUintSize,
         inner_rect: DeviceUintRect,
         device_pixel_ratio: f32,
         output_pipelines: &FastHashSet<PipelineId>,
-    ) -> Option<FrameBuilder> {
+    ) -> FrameBuilder {
         let root_pipeline_id = match scene.root_pipeline_id {
             Some(root_pipeline_id) => root_pipeline_id,
             None => return old_builder,
         };
 
         let root_pipeline = match scene.pipelines.get(&root_pipeline_id) {
             Some(root_pipeline) => root_pipeline,
             None => return old_builder,
         };
 
         if window_size.width == 0 || window_size.height == 0 {
             error!("ERROR: Invalid window dimensions! Please call api.set_window_size()");
         }
+        self.window_size = window_size;
 
         let old_scrolling_states = self.reset();
 
         self.pipeline_epoch_map
             .insert(root_pipeline_id, root_pipeline.epoch);
 
         let background_color = root_pipeline
             .background_color
             .and_then(|color| if color.a > 0.0 { Some(color) } else { None });
 
         let frame_builder = {
             let mut roller = FlattenContext {
                 scene,
-                builder: FrameBuilder::new(
-                    old_builder,
-                    window_size,
+                builder: old_builder.recycle(
+                    inner_rect,
                     background_color,
                     self.frame_builder_config,
                 ),
                 clip_scroll_tree: &mut self.clip_scroll_tree,
                 font_instances: resource_cache.get_font_instances(),
                 tiled_image_map: resource_cache.get_tiled_image_map(),
                 pipeline_epochs: Vec::new(),
                 replacements: Vec::new(),
@@ -1160,52 +1159,47 @@ impl FrameContext {
             debug_assert!(roller.builder.picture_stack.is_empty());
 
             self.pipeline_epoch_map.extend(roller.pipeline_epochs.drain(..));
             roller.builder
         };
 
         self.clip_scroll_tree
             .finalize_and_apply_pending_scroll_offsets(old_scrolling_states);
-        Some(frame_builder)
+        frame_builder
     }
 
     pub fn update_epoch(&mut self, pipeline_id: PipelineId, epoch: Epoch) {
         self.pipeline_epoch_map.insert(pipeline_id, epoch);
     }
 
-    fn get_renderer_frame_impl(&self, frame: Option<Frame>) -> RendererFrame {
-        let nodes_bouncing_back = self.clip_scroll_tree.collect_nodes_bouncing_back();
-        RendererFrame::new(self.pipeline_epoch_map.clone(), nodes_bouncing_back, frame)
-    }
-
-    pub fn build_renderer_frame(
+    pub fn build_rendered_document(
         &mut self,
         frame_builder: &mut FrameBuilder,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         pipelines: &FastHashMap<PipelineId, ScenePipeline>,
         device_pixel_ratio: f32,
+        layer: DocumentLayer,
         pan: LayerPoint,
         texture_cache_profile: &mut TextureCacheProfileCounters,
         gpu_cache_profile: &mut GpuCacheProfileCounters,
-        scene_properties: &SceneProperties,
-    ) -> RendererFrame {
+		scene_properties: &SceneProperties,
+    ) -> RenderedDocument {
         let frame = frame_builder.build(
             resource_cache,
             gpu_cache,
             self.id,
             &mut self.clip_scroll_tree,
             pipelines,
+            self.window_size,
             device_pixel_ratio,
+            layer,
             pan,
             texture_cache_profile,
             gpu_cache_profile,
             scene_properties,
         );
 
-        self.get_renderer_frame_impl(Some(frame))
-    }
-
-    pub fn get_renderer_frame(&self) -> RendererFrame {
-        self.get_renderer_frame_impl(None)
+        let nodes_bouncing_back = self.clip_scroll_tree.collect_nodes_bouncing_back();
+        RenderedDocument::new(self.pipeline_epoch_map.clone(), nodes_bouncing_back, frame)
     }
 }
--- a/gfx/webrender/src/frame_builder.rs
+++ b/gfx/webrender/src/frame_builder.rs
@@ -1,16 +1,16 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BorderDetails, BorderDisplayItem, BuiltDisplayList};
 use api::{ClipAndScrollInfo, ClipId, ColorF, PropertyBinding};
-use api::{DeviceIntPoint, DeviceIntRect, DeviceIntSize, DeviceUintRect, DeviceUintSize};
-use api::{ExtendMode, FontRenderMode, LayoutTransform};
+use api::{DeviceUintPoint, DeviceUintRect, DeviceUintSize};
+use api::{DocumentLayer, ExtendMode, FontRenderMode, LayoutTransform};
 use api::{GlyphInstance, GlyphOptions, GradientStop, HitTestFlags, HitTestItem, HitTestResult};
 use api::{ImageKey, ImageRendering, ItemRange, ItemTag, LayerPoint, LayerPrimitiveInfo, LayerRect};
 use api::{LayerSize, LayerToScrollTransform, LayerVector2D, LayoutVector2D, LineOrientation};
 use api::{LineStyle, LocalClip, PipelineId, RepeatMode};
 use api::{ScrollSensitivity, Shadow, TileOffset, TransformStyle};
 use api::{PremultipliedColorF, WorldPoint, YuvColorSpace, YuvData};
 use app_units::Au;
 use border::ImageBorderSegment;
@@ -24,22 +24,22 @@ use gpu_cache::GpuCache;
 use internal_types::{FastHashMap, FastHashSet};
 use picture::{PictureCompositeMode, PictureKind, PicturePrimitive, RasterizationSpace};
 use prim_store::{TexelRect, YuvImagePrimitiveCpu};
 use prim_store::{GradientPrimitiveCpu, ImagePrimitiveCpu, LinePrimitive, PrimitiveKind};
 use prim_store::{PrimitiveContainer, PrimitiveIndex};
 use prim_store::{PrimitiveStore, RadialGradientPrimitiveCpu};
 use prim_store::{RectangleContent, RectanglePrimitive, TextRunPrimitiveCpu};
 use profiler::{FrameProfileCounters, GpuCacheProfileCounters, TextureCacheProfileCounters};
-use render_task::{ClearMode, RenderTask, RenderTaskTree};
+use render_task::{ClearMode, RenderTask, RenderTaskId, RenderTaskTree};
 use resource_cache::ResourceCache;
 use scene::{ScenePipeline, SceneProperties};
-use std::{mem, usize, f32, i32};
+use std::{mem, usize, f32};
 use tiling::{CompositeOps, Frame};
-use tiling::{RenderPass, RenderTargetKind};
+use tiling::{RenderPass, RenderPassKind, RenderTargetKind};
 use tiling::{RenderTargetContext, ScrollbarPrimitive};
 use util::{self, pack_as_float, RectHelpers, recycle_vec};
 
 #[derive(Debug)]
 pub struct ScrollbarInfo(pub ClipId, pub LayerRect);
 
 /// Properties of a stacking context that are maintained
 /// during creation of the scene. These structures are
@@ -88,19 +88,19 @@ impl HitTestingItem {
             clip: info.local_clip,
             tag: tag,
         }
     }
 }
 
 pub struct HitTestingRun(Vec<HitTestingItem>, ClipAndScrollInfo);
 
-/// A builder structure for `RendererFrame`
+/// A builder structure for `tiling::Frame`
 pub struct FrameBuilder {
-    screen_size: DeviceUintSize,
+    screen_rect: DeviceUintRect,
     background_color: Option<ColorF>,
     prim_store: PrimitiveStore,
     pub clip_store: ClipStore,
     hit_testing_runs: Vec<HitTestingRun>,
     pub config: FrameBuilderConfig,
 
     // A stack of the current shadow primitives.
     // The sub-Vec stores a buffer of fast-path primitives to be appended on pop.
@@ -142,51 +142,56 @@ impl<'a> PrimitiveContext<'a> {
             display_list,
             clip_node,
             scroll_node,
         }
     }
 }
 
 impl FrameBuilder {
-    pub fn new(
-        previous: Option<Self>,
-        screen_size: DeviceUintSize,
+    pub fn empty() -> Self {
+        FrameBuilder {
+            hit_testing_runs: Vec::new(),
+            shadow_prim_stack: Vec::new(),
+            pending_shadow_contents: Vec::new(),
+            scrollbar_prims: Vec::new(),
+            reference_frame_stack: Vec::new(),
+            picture_stack: Vec::new(),
+            sc_stack: Vec::new(),
+            prim_store: PrimitiveStore::new(),
+            clip_store: ClipStore::new(),
+            screen_rect: DeviceUintRect::zero(),
+            background_color: None,
+            config: FrameBuilderConfig {
+                enable_scrollbars: false,
+                default_font_render_mode: FontRenderMode::Mono,
+                debug: false,
+            },
+        }
+    }
+
+    pub fn recycle(
+        self,
+        screen_rect: DeviceUintRect,
         background_color: Option<ColorF>,
         config: FrameBuilderConfig,
     ) -> Self {
-        match previous {
-            Some(prev) => FrameBuilder {
-                hit_testing_runs: recycle_vec(prev.hit_testing_runs),
-                shadow_prim_stack: recycle_vec(prev.shadow_prim_stack),
-                pending_shadow_contents: recycle_vec(prev.pending_shadow_contents),
-                scrollbar_prims: recycle_vec(prev.scrollbar_prims),
-                reference_frame_stack: recycle_vec(prev.reference_frame_stack),
-                picture_stack: recycle_vec(prev.picture_stack),
-                sc_stack: recycle_vec(prev.sc_stack),
-                prim_store: prev.prim_store.recycle(),
-                clip_store: prev.clip_store.recycle(),
-                screen_size,
-                background_color,
-                config,
-            },
-            None => FrameBuilder {
-                hit_testing_runs: Vec::new(),
-                shadow_prim_stack: Vec::new(),
-                pending_shadow_contents: Vec::new(),
-                scrollbar_prims: Vec::new(),
-                reference_frame_stack: Vec::new(),
-                picture_stack: Vec::new(),
-                sc_stack: Vec::new(),
-                prim_store: PrimitiveStore::new(),
-                clip_store: ClipStore::new(),
-                screen_size,
-                background_color,
-                config,
-            },
+        FrameBuilder {
+            hit_testing_runs: recycle_vec(self.hit_testing_runs),
+            shadow_prim_stack: recycle_vec(self.shadow_prim_stack),
+            pending_shadow_contents: recycle_vec(self.pending_shadow_contents),
+            scrollbar_prims: recycle_vec(self.scrollbar_prims),
+            reference_frame_stack: recycle_vec(self.reference_frame_stack),
+            picture_stack: recycle_vec(self.picture_stack),
+            sc_stack: recycle_vec(self.sc_stack),
+            prim_store: self.prim_store.recycle(),
+            clip_store: self.clip_store.recycle(),
+            screen_rect,
+            background_color,
+            config,
         }
     }
 
     /// Create a primitive and add it to the prim store. This method doesn't
     /// add the primitive to the draw list, so can be used for creating
     /// sub-primitives.
     pub fn create_primitive(
         &mut self,
@@ -1515,34 +1520,38 @@ impl FrameBuilder {
                 });
                 if !flags.contains(HitTestFlags::FIND_ALL) {
                     return result;
                 }
             }
         }
 
         result.items.dedup();
-        return result;
+        result
     }
 
     /// Compute the contribution (bounding rectangles, and resources) of layers and their
     /// primitives in screen space.
     fn build_layer_screen_rects_and_cull_layers(
         &mut self,
         clip_scroll_tree: &mut ClipScrollTree,
         pipelines: &FastHashMap<PipelineId, ScenePipeline>,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         render_tasks: &mut RenderTaskTree,
         profile_counters: &mut FrameProfileCounters,
         device_pixel_ratio: f32,
         scene_properties: &SceneProperties,
-    ) {
+    ) -> Option<RenderTaskId> {
         profile_scope!("cull");
 
+        if self.prim_store.cpu_pictures.is_empty() {
+            return None
+        }
+
         // The root picture is always the first one added.
         let prim_run_cmds = mem::replace(&mut self.prim_store.cpu_pictures[0].runs, Vec::new());
         let root_clip_scroll_node = &clip_scroll_tree.nodes[&clip_scroll_tree.root_reference_frame_id()];
 
         let display_list = &pipelines
             .get(&root_clip_scroll_node.pipeline_id)
             .expect("No display list?")
             .display_list;
@@ -1586,16 +1595,17 @@ impl FrameBuilder {
             0.0,
             PremultipliedColorF::TRANSPARENT,
             ClearMode::Transparent,
             RasterizationSpace::Screen,
             child_tasks,
         );
 
         pic.render_task_id = Some(render_tasks.add(root_render_task));
+        pic.render_task_id
     }
 
     fn update_scroll_bars(&mut self, clip_scroll_tree: &ClipScrollTree, gpu_cache: &mut GpuCache) {
         static SCROLLBAR_PADDING: f32 = 8.0;
 
         for scrollbar_prim in &self.scrollbar_prims {
             let metadata = &mut self.prim_store.cpu_metadata[scrollbar_prim.prim_index.0];
             let scroll_frame = &clip_scroll_tree.nodes[&scrollbar_prim.clip_id];
@@ -1624,93 +1634,90 @@ impl FrameBuilder {
 
     pub fn build(
         &mut self,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         frame_id: FrameId,
         clip_scroll_tree: &mut ClipScrollTree,
         pipelines: &FastHashMap<PipelineId, ScenePipeline>,
+        window_size: DeviceUintSize,
         device_pixel_ratio: f32,
+        layer: DocumentLayer,
         pan: LayerPoint,
         texture_cache_profile: &mut TextureCacheProfileCounters,
         gpu_cache_profile: &mut GpuCacheProfileCounters,
         scene_properties: &SceneProperties,
     ) -> Frame {
         profile_scope!("build");
+        debug_assert!(
+            DeviceUintRect::new(DeviceUintPoint::zero(), window_size)
+                .contains_rect(&self.screen_rect)
+        );
 
         let mut profile_counters = FrameProfileCounters::new();
         profile_counters
             .total_primitives
             .set(self.prim_store.prim_count());
 
         resource_cache.begin_frame(frame_id);
         gpu_cache.begin_frame();
 
-        let screen_rect = DeviceIntRect::new(
-            DeviceIntPoint::zero(),
-            DeviceIntSize::new(
-                self.screen_size.width as i32,
-                self.screen_size.height as i32,
-            ),
-        );
-
         let mut node_data = Vec::new();
 
         clip_scroll_tree.update_tree(
-            &screen_rect,
+            &self.screen_rect.to_i32(),
             device_pixel_ratio,
             &mut self.clip_store,
             resource_cache,
             gpu_cache,
             pan,
             &mut node_data,
             scene_properties,
         );
 
         self.update_scroll_bars(clip_scroll_tree, gpu_cache);
 
         let mut render_tasks = RenderTaskTree::new();
 
-        self.build_layer_screen_rects_and_cull_layers(
+        let main_render_task_id = self.build_layer_screen_rects_and_cull_layers(
             clip_scroll_tree,
             pipelines,
             resource_cache,
             gpu_cache,
             &mut render_tasks,
             &mut profile_counters,
             device_pixel_ratio,
             scene_properties,
         );
 
-        let main_render_task_id = self.prim_store
-                                      .cpu_pictures[0]
-                                      .render_task_id
-                                      .expect("bug: no root render task!");
+        let mut passes = Vec::new();
+        resource_cache.block_until_all_resources_added(gpu_cache, texture_cache_profile);
+
+        if let Some(main_render_task_id) = main_render_task_id {
+            let mut required_pass_count = 0;
+            render_tasks.max_depth(main_render_task_id, 0, &mut required_pass_count);
+            assert_ne!(required_pass_count, 0);
 
-        let mut required_pass_count = 0;
-        render_tasks.max_depth(main_render_task_id, 0, &mut required_pass_count);
+            // Do the allocations now, assigning each tile's tasks to a render
+            // pass and target as required.
+            for _ in 0 .. required_pass_count - 1 {
+                passes.push(RenderPass::new_off_screen(self.screen_rect.size.to_i32()));
+            }
+            passes.push(RenderPass::new_main_framebuffer(self.screen_rect.size.to_i32()));
 
-        resource_cache.block_until_all_resources_added(gpu_cache, texture_cache_profile);
+            render_tasks.assign_to_passes(
+                main_render_task_id,
+                required_pass_count - 1,
+                &mut passes,
+            );
+        }
 
         let mut deferred_resolves = vec![];
 
-        let mut passes = Vec::new();
-
-        // Do the allocations now, assigning each tile's tasks to a render
-        // pass and target as required.
-        for index in 0 .. required_pass_count {
-            passes.push(RenderPass::new(
-                index == required_pass_count - 1,
-                screen_rect.size,
-            ));
-        }
-
-        render_tasks.assign_to_passes(main_render_task_id, passes.len() - 1, &mut passes);
-
         for pass in &mut passes {
             let ctx = RenderTargetContext {
                 device_pixel_ratio,
                 prim_store: &self.prim_store,
                 resource_cache,
                 node_data: &node_data,
                 clip_scroll_tree,
             };
@@ -1719,34 +1726,44 @@ impl FrameBuilder {
                 &ctx,
                 gpu_cache,
                 &mut render_tasks,
                 &mut deferred_resolves,
                 &self.clip_store,
             );
 
             profile_counters.passes.inc();
-            profile_counters
-                .color_targets
-                .add(pass.color_targets.target_count());
-            profile_counters
-                .alpha_targets
-                .add(pass.alpha_targets.target_count());
+
+            match pass.kind {
+                RenderPassKind::MainFramebuffer(_) => {
+                    profile_counters.color_targets.add(1);
+                }
+                RenderPassKind::OffScreen { ref color, ref alpha } => {
+                    profile_counters
+                        .color_targets
+                        .add(color.targets.len());
+                    profile_counters
+                        .alpha_targets
+                        .add(alpha.targets.len());
+                }
+            }
         }
 
         let gpu_cache_updates = gpu_cache.end_frame(gpu_cache_profile);
 
         render_tasks.build();
 
         resource_cache.end_frame();
 
         Frame {
+            window_size,
+            inner_rect: self.screen_rect,
             device_pixel_ratio,
             background_color: self.background_color,
-            window_size: self.screen_size,
+            layer,
             profile_counters,
             passes,
             node_data,
             render_tasks,
             deferred_resolves,
             gpu_cache_updates: Some(gpu_cache_updates),
         }
     }
--- a/gfx/webrender/src/glyph_rasterizer.rs
+++ b/gfx/webrender/src/glyph_rasterizer.rs
@@ -2,48 +2,152 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #[cfg(test)]
 use api::{IdNamespace, LayoutPoint};
 use api::{ColorF, ColorU, DevicePoint, DeviceUintSize};
 use api::{FontInstancePlatformOptions, FontRenderMode, FontVariation};
 use api::{FontKey, FontTemplate, GlyphDimensions, GlyphKey, SubpixelDirection};
-use api::{ImageData, ImageDescriptor, ImageFormat};
+use api::{ImageData, ImageDescriptor, ImageFormat, LayerToWorldTransform};
 use app_units::Au;
 use device::TextureFilter;
 use glyph_cache::{CachedGlyphInfo, GlyphCache};
 use gpu_cache::GpuCache;
 use internal_types::FastHashSet;
 use platform::font::FontContext;
 use profiler::TextureCacheProfileCounters;
 use rayon::ThreadPool;
 use rayon::prelude::*;
+use std::cmp;
 use std::collections::hash_map::Entry;
+use std::hash::{Hash, Hasher};
 use std::mem;
 use std::sync::{Arc, Mutex, MutexGuard};
 use std::sync::mpsc::{channel, Receiver, Sender};
 use texture_cache::{TextureCache, TextureCacheHandle};
 
+#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
+pub struct FontTransform {
+    pub scale_x: f32,
+    pub skew_x: f32,
+    pub skew_y: f32,
+    pub scale_y: f32,
+}
+
+// Floats don't impl Hash/Eq/Ord...
+impl Eq for FontTransform {}
+impl Ord for FontTransform {
+    fn cmp(&self, other: &Self) -> cmp::Ordering {
+        self.partial_cmp(other).unwrap_or(cmp::Ordering::Equal)
+    }
+}
+impl Hash for FontTransform {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        // Note: this is inconsistent with the Eq impl for -0.0 (don't care).
+        self.scale_x.to_bits().hash(state);
+        self.skew_x.to_bits().hash(state);
+        self.skew_y.to_bits().hash(state);
+        self.scale_y.to_bits().hash(state);
+    }
+}
+
+impl FontTransform {
+    const QUANTIZE_SCALE: f32 = 1024.0;
+
+    pub fn new(scale_x: f32, skew_x: f32, skew_y: f32, scale_y: f32) -> Self {
+        FontTransform { scale_x, skew_x, skew_y, scale_y }
+    }
+
+    pub fn identity() -> Self {
+        FontTransform::new(1.0, 0.0, 0.0, 1.0)
+    }
+
+    pub fn is_identity(&self) -> bool {
+        *self == FontTransform::identity()
+    }
+
+    pub fn quantize(&self) -> Self {
+        FontTransform::new(
+            (self.scale_x * Self::QUANTIZE_SCALE).round() / Self::QUANTIZE_SCALE,
+            (self.skew_x * Self::QUANTIZE_SCALE).round() / Self::QUANTIZE_SCALE,
+            (self.skew_y * Self::QUANTIZE_SCALE).round() / Self::QUANTIZE_SCALE,
+            (self.scale_y * Self::QUANTIZE_SCALE).round() / Self::QUANTIZE_SCALE,
+        )
+    }
+
+    pub fn determinant(&self) -> f64 {
+        self.scale_x as f64 * self.scale_y as f64 - self.skew_y as f64 * self.skew_x as f64
+    }
+
+    pub fn compute_scale(&self) -> Option<(f64, f64)> {
+        let det = self.determinant();
+        if det != 0.0 {
+            let major = (self.scale_x as f64).hypot(self.skew_y as f64);
+            let minor = det.abs() / major;
+            Some((major, minor))
+        } else {
+            None
+        }
+    }
+
+    pub fn pre_scale(&self, scale_x: f32, scale_y: f32) -> Self {
+        FontTransform::new(
+            self.scale_x * scale_x,
+            self.skew_x * scale_y,
+            self.skew_y * scale_x,
+            self.scale_y * scale_y,
+        )
+    }
+
+    #[allow(dead_code)]
+    pub fn inverse(&self) -> Option<Self> {
+        let det = self.determinant();
+        if det != 0.0 {
+            let inv_det = det.recip() as f32;
+            Some(FontTransform::new(
+                self.scale_y * inv_det,
+                -self.skew_x * inv_det,
+                -self.skew_y * inv_det,
+                self.scale_x * inv_det
+            ))
+        } else {
+            None
+        }
+    }
+
+    #[allow(dead_code)]
+    pub fn apply(&self, x: f32, y: f32) -> (f32, f32) {
+        (self.scale_x * x + self.skew_x * y, self.skew_y * x + self.scale_y * y)
+    }
+}
+
+impl<'a> From<&'a LayerToWorldTransform> for FontTransform {
+    fn from(xform: &'a LayerToWorldTransform) -> Self {
+        FontTransform::new(xform.m11, xform.m21, xform.m12, xform.m22)
+    }
+}
+
 #[derive(Clone, Hash, PartialEq, Eq, Debug, Ord, PartialOrd)]
 pub struct FontInstance {
     pub font_key: FontKey,
     // The font size is in *device* pixels, not logical pixels.
     // It is stored as an Au since we need sub-pixel sizes, but
     // can't store as a f32 due to use of this type as a hash key.
     // TODO(gw): Perhaps consider having LogicalAu and DeviceAu
     //           or something similar to that.
     pub size: Au,
     pub color: ColorU,
     pub bg_color: ColorU,
     pub render_mode: FontRenderMode,
     pub subpx_dir: SubpixelDirection,
     pub platform_options: Option<FontInstancePlatformOptions>,
     pub variations: Vec<FontVariation>,
     pub synthetic_italics: bool,
+    pub transform: FontTransform,
 }
 
 impl FontInstance {
     pub fn new(
         font_key: FontKey,
         size: Au,
         color: ColorF,
         bg_color: ColorU,
@@ -58,47 +162,50 @@ impl FontInstance {
             size,
             color: color.into(),
             bg_color,
             render_mode,
             subpx_dir,
             platform_options,
             variations,
             synthetic_italics,
+            transform: FontTransform::identity(),
         }
     }
 
     pub fn get_subpx_offset(&self, glyph: &GlyphKey) -> (f64, f64) {
         match self.subpx_dir {
             SubpixelDirection::None => (0.0, 0.0),
             SubpixelDirection::Horizontal => (glyph.subpixel_offset.into(), 0.0),
             SubpixelDirection::Vertical => (0.0, glyph.subpixel_offset.into()),
         }
     }
+
+    pub fn get_subpixel_glyph_format(&self) -> GlyphFormat {
+        if self.transform.is_identity() { GlyphFormat::Subpixel } else { GlyphFormat::TransformedSubpixel }
+    }
+
+    #[allow(dead_code)]
+    pub fn get_glyph_format(&self) -> GlyphFormat {
+        match self.render_mode {
+            FontRenderMode::Mono | FontRenderMode::Alpha => GlyphFormat::Alpha,
+            FontRenderMode::Subpixel => self.get_subpixel_glyph_format(),
+            FontRenderMode::Bitmap => GlyphFormat::ColorBitmap,
+        }
+    }
 }
 
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
 pub enum GlyphFormat {
-    Mono,
     Alpha,
     Subpixel,
+    TransformedSubpixel,
     ColorBitmap,
 }
 
-impl From<FontRenderMode> for GlyphFormat {
-    fn from(render_mode: FontRenderMode) -> GlyphFormat {
-        match render_mode {
-            FontRenderMode::Mono => GlyphFormat::Mono,
-            FontRenderMode::Alpha => GlyphFormat::Alpha,
-            FontRenderMode::Subpixel => GlyphFormat::Subpixel,
-            FontRenderMode::Bitmap => GlyphFormat::ColorBitmap,
-        }
-    }
-}
-
 pub struct RasterizedGlyph {
     pub top: f32,
     pub left: f32,
     pub width: u32,
     pub height: u32,
     pub scale: f32,
     pub format: GlyphFormat,
     pub bytes: Vec<u8>,
@@ -391,25 +498,25 @@ impl GlyphRasterizer {
                             height: glyph.height,
                             stride: None,
                             format: ImageFormat::BGRA8,
                             is_opaque: false,
                             offset: 0,
                         },
                         TextureFilter::Linear,
                         ImageData::Raw(glyph_bytes.clone()),
-                        [glyph.left, glyph.top, glyph.scale],
+                        [glyph.left, -glyph.top, glyph.scale],
                         None,
                         gpu_cache,
                     );
                     Some(CachedGlyphInfo {
                         texture_cache_handle,
                         glyph_bytes,
                         size: DeviceUintSize::new(glyph.width, glyph.height),
-                        offset: DevicePoint::new(glyph.left, glyph.top),
+                        offset: DevicePoint::new(glyph.left, -glyph.top),
                         scale: glyph.scale,
                         format: glyph.format,
                     })
                 } else {
                     None
                 });
 
             let glyph_key_cache = glyph_cache.get_glyph_key_cache_for_font_mut(job.request.font);
--- a/gfx/webrender/src/internal_types.rs
+++ b/gfx/webrender/src/internal_types.rs
@@ -77,19 +77,18 @@ impl BatchTextures {
     pub fn color(texture: SourceTexture) -> Self {
         BatchTextures {
             colors: [texture, SourceTexture::Invalid, SourceTexture::Invalid],
         }
     }
 }
 
 #[derive(Copy, Clone, Debug, PartialEq)]
-pub enum RenderTargetMode {
-    None,
-    RenderTarget,
+pub struct RenderTargetInfo {
+    pub has_depth: bool,
 }
 
 #[derive(Debug)]
 pub enum TextureUpdateSource {
     External {
         id: ExternalImageId,
         channel_index: u8,
     },
@@ -98,17 +97,17 @@ pub enum TextureUpdateSource {
 
 #[derive(Debug)]
 pub enum TextureUpdateOp {
     Create {
         width: u32,
         height: u32,
         format: ImageFormat,
         filter: TextureFilter,
-        mode: RenderTargetMode,
+        render_target: Option<RenderTargetInfo>,
         layer_count: i32,
     },
     Update {
         rect: DeviceUintRect,
         stride: Option<u32>,
         offset: u32,
         layer_index: i32,
         source: TextureUpdateSource,
@@ -135,53 +134,53 @@ impl TextureUpdateList {
 
     #[inline]
     pub fn push(&mut self, update: TextureUpdate) {
         self.updates.push(update);
     }
 }
 
 /// Mostly wraps a tiling::Frame, adding a bit of extra information.
-pub struct RendererFrame {
+pub struct RenderedDocument {
     /// The last rendered epoch for each pipeline present in the frame.
     /// This information is used to know if a certain transformation on the layout has
     /// been rendered, which is necessary for reftests.
     pub pipeline_epoch_map: FastHashMap<PipelineId, Epoch>,
     /// The layers that are currently affected by the over-scrolling animation.
     pub layers_bouncing_back: FastHashSet<ClipId>,
 
-    pub frame: Option<tiling::Frame>,
+    pub frame: tiling::Frame,
 }
 
-impl RendererFrame {
+impl RenderedDocument {
     pub fn new(
         pipeline_epoch_map: FastHashMap<PipelineId, Epoch>,
         layers_bouncing_back: FastHashSet<ClipId>,
-        frame: Option<tiling::Frame>,
+        frame: tiling::Frame,
     ) -> Self {
-        RendererFrame {
+        RenderedDocument {
             pipeline_epoch_map,
             layers_bouncing_back,
             frame,
         }
     }
 }
 
 pub enum DebugOutput {
     FetchDocuments(String),
     FetchClipScrollTree(String),
 }
 
 pub enum ResultMsg {
     DebugCommand(DebugCommand),
     DebugOutput(DebugOutput),
     RefreshShader(PathBuf),
-    NewFrame(
+    PublishDocument(
         DocumentId,
-        RendererFrame,
+        RenderedDocument,
         TextureUpdateList,
         BackendProfileCounters,
     ),
     UpdateResources {
         updates: TextureUpdateList,
         cancel_rendering: bool,
     },
 }
--- a/gfx/webrender/src/picture.rs
+++ b/gfx/webrender/src/picture.rs
@@ -351,17 +351,42 @@ impl PicturePrimitive {
 
                         let readback_task_id = render_tasks.add(RenderTask::new_readback(*prim_screen_rect));
 
                         *readback_render_task_id = Some(readback_task_id);
                         parent_tasks.push(readback_task_id);
 
                         self.render_task_id = Some(render_tasks.add(picture_task));
                     }
-                    Some(PictureCompositeMode::Filter(..)) | Some(PictureCompositeMode::Blit) => {
+                    Some(PictureCompositeMode::Filter(filter)) => {
+                        // If this filter is not currently going to affect
+                        // the picture, just collapse this picture into the
+                        // current render task. This most commonly occurs
+                        // when opacity == 1.0, but can also occur on other
+                        // filters and be a significant performance win.
+                        if filter.is_noop() {
+                            parent_tasks.extend(child_tasks);
+                            self.render_task_id = None;
+                        } else {
+                            let picture_task = RenderTask::new_picture(
+                                Some(prim_screen_rect.size),
+                                prim_index,
+                                RenderTargetKind::Color,
+                                prim_screen_rect.origin.x as f32,
+                                prim_screen_rect.origin.y as f32,
+                                PremultipliedColorF::TRANSPARENT,
+                                ClearMode::Transparent,
+                                self.rasterization_kind,
+                                child_tasks,
+                            );
+
+                            self.render_task_id = Some(render_tasks.add(picture_task));
+                        }
+                    }
+                    Some(PictureCompositeMode::Blit) => {
                         let picture_task = RenderTask::new_picture(
                             Some(prim_screen_rect.size),
                             prim_index,
                             RenderTargetKind::Color,
                             prim_screen_rect.origin.x as f32,
                             prim_screen_rect.origin.y as f32,
                             PremultipliedColorF::TRANSPARENT,
                             ClearMode::Transparent,
--- a/gfx/webrender/src/platform/macos/font.rs
+++ b/gfx/webrender/src/platform/macos/font.rs
@@ -12,25 +12,24 @@ use core_foundation::dictionary::{CFDict
 use core_foundation::number::{CFNumber, CFNumberRef};
 use core_foundation::string::{CFString, CFStringRef};
 use core_graphics::base::{kCGImageAlphaNoneSkipFirst, kCGImageAlphaPremultipliedFirst};
 use core_graphics::base::kCGBitmapByteOrder32Little;
 use core_graphics::color_space::CGColorSpace;
 use core_graphics::context::{CGContext, CGTextDrawingMode};
 use core_graphics::data_provider::CGDataProvider;
 use core_graphics::font::{CGFont, CGGlyph};
-use core_graphics::geometry::{CGPoint, CGRect, CGSize};
+use core_graphics::geometry::{CGAffineTransform, CGPoint, CGRect, CGSize};
 use core_text;
 use core_text::font::{CTFont, CTFontRef};
 use core_text::font_descriptor::{kCTFontDefaultOrientation, kCTFontColorGlyphsTrait};
 use gamma_lut::{ColorLut, GammaLut};
-use glyph_rasterizer::{FontInstance, GlyphFormat, RasterizedGlyph};
+use glyph_rasterizer::{FontInstance, RasterizedGlyph};
 use internal_types::FastHashMap;
 use std::collections::hash_map::Entry;
-use std::ptr;
 use std::sync::Arc;
 
 pub struct FontContext {
     cg_fonts: FastHashMap<FontKey, CGFont>,
     ct_fonts: FastHashMap<(FontKey, Au, Vec<FontVariation>), CTFont>,
     gamma_lut: GammaLut,
 }
 
@@ -76,21 +75,22 @@ fn supports_subpixel_aa() -> bool {
 fn should_use_white_on_black(color: ColorU) -> bool {
     let (r, g, b) = (color.r as u32, color.g as u32, color.b as u32);
     // These thresholds were determined on 10.12 by observing what CG does.
     r >= 85 && g >= 85 && b >= 85 && r + g + b >= 2 * 255
 }
 
 fn get_glyph_metrics(
     ct_font: &CTFont,
+    transform: Option<&CGAffineTransform>,
     glyph: CGGlyph,
     x_offset: f64,
     y_offset: f64,
 ) -> GlyphMetrics {
-    let bounds = ct_font.get_bounding_rects_for_glyphs(kCTFontDefaultOrientation, &[glyph]);
+    let mut bounds = ct_font.get_bounding_rects_for_glyphs(kCTFontDefaultOrientation, &[glyph]);
 
     if bounds.origin.x.is_nan() || bounds.origin.y.is_nan() || bounds.size.width.is_nan() ||
         bounds.size.height.is_nan()
     {
         // If an unexpected glyph index is requested, core text will return NaN values
         // which causes us to do bad thing as the value is cast into an integer and
         // overflow when expanding the bounds a few lines below.
         // Instead we are better off returning zero-sized metrics because this special
@@ -100,16 +100,24 @@ fn get_glyph_metrics(
             rasterized_width: 0,
             rasterized_height: 0,
             rasterized_ascent: 0,
             rasterized_descent: 0,
             advance: 0.0,
         };
     }
 
+    let mut advance = CGSize { width: 0.0, height: 0.0 };
+    ct_font.get_advances_for_glyphs(kCTFontDefaultOrientation, &glyph, &mut advance, 1);
+
+    if let Some(transform) = transform {
+        bounds = bounds.apply_transform(transform);
+        advance = advance.apply_transform(transform);
+    }
+
     // First round out to pixel boundaries
     // CG Origin is bottom left
     let mut left = bounds.origin.x.floor() as i32;
     let mut bottom = bounds.origin.y.floor() as i32;
     let mut right = (bounds.origin.x + bounds.size.width + x_offset).ceil() as i32;
     let mut top = (bounds.origin.y + bounds.size.height + y_offset).ceil() as i32;
 
     // Expand the bounds by 1 pixel, to give CG room for anti-aliasing.
@@ -119,45 +127,42 @@ fn get_glyph_metrics(
     left -= 1;
     bottom -= 1;
     right += 1;
     top += 1;
 
     let width = right - left;
     let height = top - bottom;
 
-    let advance =
-        ct_font.get_advances_for_glyphs(kCTFontDefaultOrientation, &glyph, ptr::null_mut(), 1);
-
     let metrics = GlyphMetrics {
         rasterized_left: left,
         rasterized_width: width as u32,
         rasterized_height: height as u32,
         rasterized_ascent: top,
         rasterized_descent: -bottom,
-        advance: advance as f32,
+        advance: advance.width as f32,
     };
 
     metrics
 }
 
 #[link(name = "ApplicationServices", kind = "framework")]
 extern {
     static kCTFontVariationAxisIdentifierKey: CFStringRef;
     static kCTFontVariationAxisNameKey: CFStringRef;
     static kCTFontVariationAxisMinimumValueKey: CFStringRef;
     static kCTFontVariationAxisMaximumValueKey: CFStringRef;
     static kCTFontVariationAxisDefaultValueKey: CFStringRef;
 
     fn CTFontCopyVariationAxes(font: CTFontRef) -> CFArrayRef;
 }
 
-fn new_ct_font_with_variations(cg_font: &CGFont, size: Au, variations: &[FontVariation]) -> CTFont {
+fn new_ct_font_with_variations(cg_font: &CGFont, size: f64, variations: &[FontVariation]) -> CTFont {
     unsafe {
-        let ct_font = core_text::font::new_from_CGFont(cg_font, size.to_f64_px());
+        let ct_font = core_text::font::new_from_CGFont(cg_font, size);
         if variations.is_empty() {
             return ct_font;
         }
         let axes_ref = CTFontCopyVariationAxes(ct_font.as_concrete_TypeRef());
         if axes_ref.is_null() {
             return ct_font;
         }
         let axes: CFArray = TCFType::wrap_under_create_rule(axes_ref);
@@ -238,17 +243,17 @@ fn new_ct_font_with_variations(cg_font: 
                 vals.push((name, CFNumber::from_f64(val)));
             }
         }
         if vals.is_empty() {
             return ct_font;
         }
         let vals_dict = CFDictionary::from_CFType_pairs(&vals);
         let cg_var_font = cg_font.create_copy_from_variations(&vals_dict).unwrap();
-        core_text::font::new_from_CGFont(&cg_var_font, size.to_f64_px())
+        core_text::font::new_from_CGFont(&cg_var_font, size)
     }
 }
 
 impl FontContext {
     pub fn new() -> FontContext {
         debug!("Test for subpixel AA support: {}", supports_subpixel_aa());
 
         // Force CG to use sRGB color space to gamma correct.
@@ -312,28 +317,28 @@ impl FontContext {
     ) -> Option<CTFont> {
         match self.ct_fonts.entry((font_key, size, variations.to_vec())) {
             Entry::Occupied(entry) => Some((*entry.get()).clone()),
             Entry::Vacant(entry) => {
                 let cg_font = match self.cg_fonts.get(&font_key) {
                     None => return None,
                     Some(cg_font) => cg_font,
                 };
-                let ct_font = new_ct_font_with_variations(cg_font, size, variations);
+                let ct_font = new_ct_font_with_variations(cg_font, size.to_f64_px(), variations);
                 entry.insert(ct_font.clone());
                 Some(ct_font)
             }
         }
     }
 
     pub fn get_glyph_index(&mut self, font_key: FontKey, ch: char) -> Option<u32> {
         let character = ch as u16;
         let mut glyph = 0;
 
-        self.get_ct_font(font_key, Au(16 * 60), &[])
+        self.get_ct_font(font_key, Au::from_px(16), &[])
             .and_then(|ref ct_font| {
                 let result = ct_font.get_glyphs_for_characters(&character, &mut glyph, 1);
 
                 if result {
                     Some(glyph as u32)
                 } else {
                     None
                 }
@@ -344,17 +349,17 @@ impl FontContext {
         &mut self,
         font: &FontInstance,
         key: &GlyphKey,
     ) -> Option<GlyphDimensions> {
         self.get_ct_font(font.font_key, font.size, &font.variations)
             .and_then(|ref ct_font| {
                 let glyph = key.index as CGGlyph;
                 let (x_offset, y_offset) = font.get_subpx_offset(key);
-                let metrics = get_glyph_metrics(ct_font, glyph, x_offset, y_offset);
+                let metrics = get_glyph_metrics(ct_font, None, glyph, x_offset, y_offset);
                 if metrics.rasterized_width == 0 || metrics.rasterized_height == 0 {
                     None
                 } else {
                     Some(GlyphDimensions {
                         left: metrics.rasterized_left,
                         top: metrics.rasterized_ascent,
                         width: metrics.rasterized_width as u32,
                         height: metrics.rasterized_height as u32,
@@ -442,24 +447,39 @@ impl FontContext {
         }
     }
 
     pub fn rasterize_glyph(
         &mut self,
         font: &FontInstance,
         key: &GlyphKey,
     ) -> Option<RasterizedGlyph> {
-        let ct_font = match self.get_ct_font(font.font_key, font.size, &font.variations) {
+        let (.., minor) = font.transform.compute_scale().unwrap_or((1.0, 1.0));
+        let size = font.size.scale_by(minor as f32);
+        let ct_font = match self.get_ct_font(font.font_key, size, &font.variations) {
             Some(font) => font,
             None => return None,
         };
 
+        let shape = font.transform.pre_scale(minor.recip() as f32, minor.recip() as f32);
+        let transform = if shape.is_identity() {
+            None
+        } else {
+            Some(CGAffineTransform {
+                a: shape.scale_x as f64,
+                b: -shape.skew_y as f64,
+                c: -shape.skew_x as f64,
+                d: shape.scale_y as f64,
+                tx: 0.0,
+                ty: 0.0
+            })
+        };
         let glyph = key.index as CGGlyph;
         let (x_offset, y_offset) = font.get_subpx_offset(key);
-        let metrics = get_glyph_metrics(&ct_font, glyph, x_offset, y_offset);
+        let metrics = get_glyph_metrics(&ct_font, transform.as_ref(), glyph, x_offset, y_offset);
         if metrics.rasterized_width == 0 || metrics.rasterized_height == 0 {
             return None;
         }
 
         // The result of this function, in all render modes, is going to be a
         // BGRA surface with white text on transparency using premultiplied
         // alpha. For subpixel text, the RGB values will be the mask value for
         // the individual components. For bitmap glyphs, the RGB values will be
@@ -547,38 +567,45 @@ impl FontContext {
         cg_context.set_allows_font_subpixel_quantization(false);
         cg_context.set_should_subpixel_quantize_fonts(false);
 
         cg_context.set_allows_font_smoothing(smooth);
         cg_context.set_should_smooth_fonts(smooth);
         cg_context.set_allows_antialiasing(antialias);
         cg_context.set_should_antialias(antialias);
 
-        // CG Origin is bottom left, WR is top left. Need -y offset
-        let rasterization_origin = CGPoint {
-            x: -metrics.rasterized_left as f64 + x_offset,
-            y: metrics.rasterized_descent as f64 - y_offset,
-        };
-
         // Fill the background. This could be opaque white, opaque black, or
         // transparency.
         cg_context.set_rgb_fill_color(bg_color, bg_color, bg_color, bg_alpha);
         let rect = CGRect {
             origin: CGPoint { x: 0.0, y: 0.0 },
             size: CGSize {
                 width: metrics.rasterized_width as f64,
                 height: metrics.rasterized_height as f64,
             },
         };
         cg_context.fill_rect(rect);
 
         // Set the text color and draw the glyphs.
         cg_context.set_rgb_fill_color(text_color, text_color, text_color, 1.0);
         cg_context.set_text_drawing_mode(CGTextDrawingMode::CGTextFill);
-        ct_font.draw_glyphs(&[glyph], &[rasterization_origin], cg_context.clone());
+
+        // CG Origin is bottom left, WR is top left. Need -y offset
+        let mut draw_origin = CGPoint {
+            x: -metrics.rasterized_left as f64 + x_offset,
+            y: metrics.rasterized_descent as f64 - y_offset,
+        };
+
+        if let Some(transform) = transform {
+            cg_context.set_text_matrix(&transform);
+
+            draw_origin = draw_origin.apply_transform(&transform.invert());
+        }
+
+        ct_font.draw_glyphs(&[glyph], &[draw_origin], cg_context.clone());
 
         let mut rasterized_pixels = cg_context.data().to_vec();
 
         if font.render_mode != FontRenderMode::Bitmap {
             // We rendered text into an opaque surface. The code below needs to
             // ignore the current value of each pixel's alpha channel. But it's
             // allowed to write to the alpha channel, because we're done calling
             // CG functions now.
@@ -625,13 +652,13 @@ impl FontContext {
         }
 
         Some(RasterizedGlyph {
             left: metrics.rasterized_left as f32,
             top: metrics.rasterized_ascent as f32,
             width: metrics.rasterized_width,
             height: metrics.rasterized_height,
             scale: 1.0,
-            format: GlyphFormat::from(font.render_mode),
+            format: font.get_glyph_format(),
             bytes: rasterized_pixels,
         })
     }
 }
--- a/gfx/webrender/src/platform/unix/font.rs
+++ b/gfx/webrender/src/platform/unix/font.rs
@@ -9,16 +9,17 @@ use api::{FONT_FORCE_AUTOHINT, FONT_NO_A
 use api::{FONT_EMBOLDEN, FONT_VERTICAL_LAYOUT, FONT_SUBPIXEL_BGR};
 use freetype::freetype::{FT_BBox, FT_Outline_Translate, FT_Pixel_Mode, FT_Render_Mode};
 use freetype::freetype::{FT_Done_Face, FT_Error, FT_Get_Char_Index, FT_Int32};
 use freetype::freetype::{FT_Done_FreeType, FT_Library_SetLcdFilter, FT_Pos};
 use freetype::freetype::{FT_F26Dot6, FT_Face, FT_Glyph_Format, FT_Long, FT_UInt};
 use freetype::freetype::{FT_GlyphSlot, FT_LcdFilter, FT_New_Face, FT_New_Memory_Face};
 use freetype::freetype::{FT_Init_FreeType, FT_Load_Glyph, FT_Render_Glyph};
 use freetype::freetype::{FT_Library, FT_Outline_Get_CBox, FT_Set_Char_Size, FT_Select_Size};
+use freetype::freetype::{FT_Fixed, FT_Matrix, FT_Set_Transform};
 use freetype::freetype::{FT_LOAD_COLOR, FT_LOAD_DEFAULT, FT_LOAD_FORCE_AUTOHINT};
 use freetype::freetype::{FT_LOAD_IGNORE_GLOBAL_ADVANCE_WIDTH, FT_LOAD_NO_AUTOHINT};
 use freetype::freetype::{FT_LOAD_NO_BITMAP, FT_LOAD_NO_HINTING, FT_LOAD_VERTICAL_LAYOUT};
 use freetype::freetype::{FT_FACE_FLAG_SCALABLE, FT_FACE_FLAG_FIXED_SIZES, FT_Err_Cannot_Render_Glyph};
 use glyph_rasterizer::{FontInstance, GlyphFormat, RasterizedGlyph};
 use internal_types::FastHashMap;
 use std::{cmp, mem, ptr, slice};
 use std::cmp::max;
@@ -181,25 +182,43 @@ impl FontContext {
         }
         if (flags & FONT_VERTICAL_LAYOUT) != 0 {
             load_flags |= FT_LOAD_VERTICAL_LAYOUT;
         }
 
         load_flags |= FT_LOAD_COLOR;
         load_flags |= FT_LOAD_IGNORE_GLOBAL_ADVANCE_WIDTH;
 
+        let req_size = font.size.to_f64_px();
         let mut result = if font.render_mode == FontRenderMode::Bitmap {
             if (load_flags & FT_LOAD_NO_BITMAP) != 0 {
                 FT_Error(FT_Err_Cannot_Render_Glyph as i32)
             } else {
-                self.choose_bitmap_size(face.face, font.size.to_f64_px())
+                unsafe { FT_Set_Transform(face.face, ptr::null_mut(), ptr::null_mut()) };
+                self.choose_bitmap_size(face.face, req_size)
             }
         } else {
-            let char_size = font.size.to_f64_px() * 64.0 + 0.5;
-            unsafe { FT_Set_Char_Size(face.face, char_size as FT_F26Dot6, 0, 0, 0) }
+            let (major, minor) = font.transform.compute_scale().unwrap_or((1.0, 1.0));
+            let shape = font.transform.pre_scale(major.recip() as f32, minor.recip() as f32);
+            let mut ft_shape = FT_Matrix {
+                xx: (shape.scale_x * 65536.0) as FT_Fixed,
+                xy: (shape.skew_x * -65536.0) as FT_Fixed,
+                yx: (shape.skew_y * -65536.0) as FT_Fixed,
+                yy: (shape.scale_y * 65536.0) as FT_Fixed,
+            };
+            unsafe {
+                FT_Set_Transform(face.face, &mut ft_shape, ptr::null_mut());
+                FT_Set_Char_Size(
+                    face.face,
+                    (req_size * major * 64.0 + 0.5) as FT_F26Dot6,
+                    (req_size * minor * 64.0 + 0.5) as FT_F26Dot6,
+                    0,
+                    0,
+                )
+            }
         };
 
         if result.succeeded() {
             result = unsafe { FT_Load_Glyph(face.face, glyph.index as FT_UInt, load_flags as FT_Int32) };
         };
 
         if result.succeeded() {
             let slot = unsafe { (*face.face).glyph };
@@ -513,24 +532,24 @@ impl FontContext {
             key,
             font.render_mode,
             dimensions
         );
 
         let (format, actual_width, actual_height) = match pixel_mode {
             FT_Pixel_Mode::FT_PIXEL_MODE_LCD => {
                 assert!(bitmap.width % 3 == 0);
-                (GlyphFormat::Subpixel, (bitmap.width / 3) as i32, bitmap.rows as i32)
+                (font.get_subpixel_glyph_format(), (bitmap.width / 3) as i32, bitmap.rows as i32)
             }
             FT_Pixel_Mode::FT_PIXEL_MODE_LCD_V => {
                 assert!(bitmap.rows % 3 == 0);
-                (GlyphFormat::Subpixel, bitmap.width as i32, (bitmap.rows / 3) as i32)
+                (font.get_subpixel_glyph_format(), bitmap.width as i32, (bitmap.rows / 3) as i32)
             }
             FT_Pixel_Mode::FT_PIXEL_MODE_MONO => {
-                (GlyphFormat::Mono, bitmap.width as i32, bitmap.rows as i32)
+                (GlyphFormat::Alpha, bitmap.width as i32, bitmap.rows as i32)
             }
             FT_Pixel_Mode::FT_PIXEL_MODE_GRAY => {
                 (GlyphFormat::Alpha, bitmap.width as i32, bitmap.rows as i32)
             }
             FT_Pixel_Mode::FT_PIXEL_MODE_BGRA => {
                 (GlyphFormat::ColorBitmap, bitmap.width as i32, bitmap.rows as i32)
             }
             _ => panic!("Unsupported {:?}", pixel_mode),
@@ -615,18 +634,18 @@ impl FontContext {
                 }
                 _ => panic!("Unsupported {:?}", pixel_mode),
             }
             src_row = unsafe { src_row.offset(bitmap.pitch as isize) };
             dest = row_end;
         }
 
         Some(RasterizedGlyph {
-            left: ((dimensions.left + left) as f32 * scale).round(),
-            top: ((dimensions.top + top - actual_height) as f32 * scale).round(),
+            left: (dimensions.left + left) as f32,
+            top: (dimensions.top + top - actual_height) as f32,
             width: actual_width as u32,
             height: actual_height as u32,
             scale,
             format,
             bytes: final_buffer,
         })
     }
 }
--- a/gfx/webrender/src/platform/windows/font.rs
+++ b/gfx/webrender/src/platform/windows/font.rs
@@ -1,17 +1,17 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{FontInstancePlatformOptions, FontKey, FontRenderMode};
 use api::{ColorU, GlyphDimensions, GlyphKey, SubpixelDirection};
 use dwrote;
 use gamma_lut::{ColorLut, GammaLut};
-use glyph_rasterizer::{FontInstance, GlyphFormat, RasterizedGlyph};
+use glyph_rasterizer::{FontInstance, RasterizedGlyph};
 use internal_types::FastHashMap;
 use std::sync::Arc;
 
 lazy_static! {
     static ref DEFAULT_FONT_DESCRIPTOR: dwrote::FontDescriptor = dwrote::FontDescriptor {
         family_name: "Arial".to_owned(),
         weight: dwrote::FontWeight::Regular,
         stretch: dwrote::FontStretch::Normal,
@@ -156,50 +156,54 @@ impl FontContext {
         let face = self.fonts.get(&font.font_key).unwrap();
         let glyph = key.index as u16;
         let advance = 0.0f32;
         let offset = dwrote::GlyphOffset {
             advanceOffset: 0.0,
             ascenderOffset: 0.0,
         };
 
+        let (.., minor) = font.transform.compute_scale().unwrap_or((1.0, 1.0));
+        let size = (font.size.to_f64_px() * minor) as f32;
+
         let glyph_run = dwrote::DWRITE_GLYPH_RUN {
             fontFace: unsafe { face.as_ptr() },
-            fontEmSize: font.size.to_f32_px(), // size in DIPs (1/96", same as CSS pixels)
+            fontEmSize: size, // size in DIPs (1/96", same as CSS pixels)
             glyphCount: 1,
             glyphIndices: &glyph,
             glyphAdvances: &advance,
             glyphOffsets: &offset,
             isSideways: 0,
             bidiLevel: 0,
         };
 
         let dwrite_measure_mode = dwrite_measure_mode(font.render_mode, font.platform_options);
         let dwrite_render_mode = dwrite_render_mode(
             face,
             font.render_mode,
-            font.size.to_f32_px(),
+            size,
             dwrite_measure_mode,
             font.platform_options,
         );
 
         let (x_offset, y_offset) = font.get_subpx_offset(key);
-        let transform = Some(dwrote::DWRITE_MATRIX {
-            m11: 1.0,
-            m12: 0.0,
-            m21: 0.0,
-            m22: 1.0,
+        let shape = font.transform.pre_scale(minor.recip() as f32, minor.recip() as f32);
+        let transform = dwrote::DWRITE_MATRIX {
+            m11: shape.scale_x,
+            m12: shape.skew_y,
+            m21: shape.skew_x,
+            m22: shape.scale_y,
             dx: x_offset as f32,
             dy: y_offset as f32,
-        });
+        };
 
         dwrote::GlyphRunAnalysis::create(
             &glyph_run,
             1.0,
-            transform,
+            Some(transform),
             dwrite_render_mode,
             dwrite_measure_mode,
             0.0,
             0.0,
         )
     }
 
     pub fn get_glyph_index(&mut self, font_key: FontKey, ch: char) -> Option<u32> {
@@ -354,13 +358,13 @@ impl FontContext {
         }
 
         Some(RasterizedGlyph {
             left: bounds.left as f32,
             top: -bounds.top as f32,
             width,
             height,
             scale: 1.0,
-            format: GlyphFormat::from(font.render_mode),
+            format: font.get_glyph_format(),
             bytes: bgra_pixels,
         })
     }
 }
--- a/gfx/webrender/src/prim_store.rs
+++ b/gfx/webrender/src/prim_store.rs
@@ -1,23 +1,23 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BorderRadius, BuiltDisplayList, ColorF, ComplexClipRegion, DeviceIntRect};
-use api::{DevicePoint, ExtendMode, GlyphInstance, GlyphKey};
+use api::{DevicePoint, ExtendMode, FontRenderMode, GlyphInstance, GlyphKey};
 use api::{GradientStop, ImageKey, ImageRendering, ItemRange, ItemTag, LayerPoint, LayerRect};
-use api::{ClipMode, LayerSize, LayerVector2D, LineOrientation, LineStyle};
+use api::{ClipMode, LayerSize, LayerVector2D, LayerToWorldTransform, LineOrientation, LineStyle};
 use api::{ClipAndScrollInfo, EdgeAaSegmentMask, PremultipliedColorF, TileOffset};
 use api::{ClipId, LayerTransform, PipelineId, YuvColorSpace, YuvFormat};
 use border::BorderCornerInstance;
 use clip_scroll_tree::ClipScrollTree;
 use clip::{ClipSourcesHandle, ClipStore};
 use frame_builder::PrimitiveContext;
-use glyph_rasterizer::FontInstance;
+use glyph_rasterizer::{FontInstance, FontTransform};
 use internal_types::FastHashMap;
 use gpu_cache::{GpuBlockData, GpuCache, GpuCacheAddress, GpuCacheHandle, GpuDataRequest,
                 ToGpuBlocks};
 use picture::{PictureKind, PicturePrimitive};
 use profiler::FrameProfileCounters;
 use render_task::{ClipWorkItem, ClipChainNode};
 use render_task::{RenderTask, RenderTaskId, RenderTaskTree};
 use renderer::MAX_VERTEX_TEXTURE_WIDTH;
@@ -586,30 +586,42 @@ pub struct TextRunPrimitiveCpu {
     pub glyph_range: ItemRange<GlyphInstance>,
     pub glyph_count: usize,
     pub glyph_keys: Vec<GlyphKey>,
     pub glyph_gpu_blocks: Vec<GpuBlockData>,
 }
 
 
 impl TextRunPrimitiveCpu {
-    pub fn get_font(&self, device_pixel_ratio: f32) -> FontInstance {
+    pub fn get_font(
+        &self,
+        device_pixel_ratio: f32,
+        transform: &LayerToWorldTransform,
+    ) -> FontInstance {
         let mut font = self.font.clone();
         font.size = font.size.scale_by(device_pixel_ratio);
+        if font.render_mode == FontRenderMode::Subpixel {
+            if transform.has_perspective_component() || !transform.has_2d_inverse() {
+                font.render_mode = FontRenderMode::Alpha;
+            } else {
+                font.transform = FontTransform::from(transform).quantize();
+            }
+        }
         font
     }
 
     fn prepare_for_render(
         &mut self,
         resource_cache: &mut ResourceCache,
         device_pixel_ratio: f32,
+        transform: &LayerToWorldTransform,
         display_list: &BuiltDisplayList,
         gpu_cache: &mut GpuCache,
     ) {
-        let font = self.get_font(device_pixel_ratio);
+        let font = self.get_font(device_pixel_ratio, transform);
 
         // Cache the glyph positions, if not in the cache already.
         // TODO(gw): In the future, remove `glyph_instances`
         //           completely, and just reference the glyphs
         //           directly from the display list.
         if self.glyph_keys.is_empty() {
             let subpx_dir = font.subpx_dir.limit_by(font.render_mode);
             let src_glyphs = display_list.get(self.glyph_range);
@@ -1105,16 +1117,17 @@ impl PrimitiveStore {
                         parent_tasks,
                     );
             }
             PrimitiveKind::TextRun => {
                 let text = &mut self.cpu_text_runs[metadata.cpu_prim_index.0];
                 text.prepare_for_render(
                     resource_cache,
                     prim_context.device_pixel_ratio,
+                    &prim_context.scroll_node.world_content_transform,
                     prim_context.display_list,
                     gpu_cache,
                 );
             }
             PrimitiveKind::Image => {
                 let image_cpu = &mut self.cpu_images[metadata.cpu_prim_index.0];
 
                 resource_cache.request_image(
--- a/gfx/webrender/src/profiler.rs
+++ b/gfx/webrender/src/profiler.rs
@@ -2,16 +2,17 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{ColorF, ColorU};
 use debug_render::DebugRenderer;
 use euclid::{Point2D, Rect, Size2D, vec2};
 use query::{GpuSampler, GpuTimer, NamedTag};
 use std::collections::vec_deque::VecDeque;
+use internal_types::FastHashMap;
 use std::{f32, mem};
 use time::precise_time_ns;
 
 const GRAPH_WIDTH: f32 = 1024.0;
 const GRAPH_HEIGHT: f32 = 320.0;
 const GRAPH_PADDING: f32 = 8.0;
 const GRAPH_FRAME_HEIGHT: f32 = 16.0;
 const PROFILE_PADDING: f32 = 10.0;
@@ -298,16 +299,17 @@ impl ProfileCounter for AverageTimeProfi
             format!("{:.2} fps", 1000000000.0 / self.nanoseconds as f64)
         } else {
             format!("{:.2} ms", self.nanoseconds as f64 / 1000000.0)
         }
     }
 }
 
 
+#[derive(Clone)]
 pub struct FrameProfileCounters {
     pub total_primitives: IntProfileCounter,
     pub visible_primitives: IntProfileCounter,
     pub passes: IntProfileCounter,
     pub color_targets: IntProfileCounter,
     pub alpha_targets: IntProfileCounter,
 }
 
@@ -662,41 +664,76 @@ impl GpuFrameCollection {
         let mut y0 = graph_rect.origin.y;
 
         let max_time = self.frames
             .iter()
             .max_by_key(|f| f.total_time)
             .unwrap()
             .total_time as f32;
 
+        let mut tags_present = FastHashMap::default();
+
         for frame in &self.frames {
             let y1 = y0 + GRAPH_FRAME_HEIGHT;
 
             let mut current_ns = 0;
             for sample in &frame.samples {
                 let x0 = graph_rect.origin.x + w * current_ns as f32 / max_time;
                 current_ns += sample.time_ns;
                 let x1 = graph_rect.origin.x + w * current_ns as f32 / max_time;
-
                 let mut bottom_color = sample.tag.color;
                 bottom_color.a *= 0.5;
 
                 debug_renderer.add_quad(
                     x0,
                     y0,
                     x1,
                     y1,
                     sample.tag.color.into(),
                     bottom_color.into(),
                 );
+
+                tags_present.insert(sample.tag.label, sample.tag.color);
             }
 
             y0 = y1;
         }
 
+        // Add a legend to see which color correspond to what primitive.
+        const LEGEND_SIZE: f32 = 20.0;
+        const PADDED_LEGEND_SIZE: f32 = 25.0;
+        if !tags_present.is_empty() {
+            debug_renderer.add_quad(
+                bounding_rect.max_x() + GRAPH_PADDING,
+                bounding_rect.origin.y,
+                bounding_rect.max_x() + GRAPH_PADDING + 200.0,
+                bounding_rect.origin.y + tags_present.len() as f32 * PADDED_LEGEND_SIZE + GRAPH_PADDING,
+                ColorU::new(25, 25, 25, 200),
+                ColorU::new(51, 51, 51, 200),
+            );
+        }
+
+        for (i, (label, &color)) in tags_present.iter().enumerate() {
+            let x0 = bounding_rect.origin.x + bounding_rect.size.width + GRAPH_PADDING * 2.0;
+            let y0 = bounding_rect.origin.y + GRAPH_PADDING + i as f32 * PADDED_LEGEND_SIZE;
+
+            debug_renderer.add_quad(
+                x0, y0, x0 + LEGEND_SIZE, y0 + LEGEND_SIZE,
+                color.into(),
+                color.into(),
+            );
+
+            debug_renderer.add_text(
+                x0 + PADDED_LEGEND_SIZE,
+                y0 + LEGEND_SIZE * 0.75,
+                label,
+                ColorU::new(255, 255, 0, 255),
+            );
+        }
+
         bounding_rect
     }
 }
 
 pub struct Profiler {
     x_left: f32,
     y_left: f32,
     x_right: f32,
@@ -788,17 +825,17 @@ impl Profiler {
             self.y_left = new_y;
         } else {
             self.y_right = new_y;
         }
     }
 
     pub fn draw_profile(
         &mut self,
-        frame_profile: &FrameProfileCounters,
+        frame_profiles: &[FrameProfileCounters],
         backend_profile: &BackendProfileCounters,
         renderer_profile: &RendererProfileCounters,
         renderer_timers: &mut RendererProfileTimers,
         gpu_samplers: &[GpuSampler<GpuProfileTag>],
         screen_fraction: f32,
         debug_renderer: &mut DebugRenderer,
     ) {
         self.x_left = 20.0;
@@ -813,21 +850,16 @@ impl Profiler {
         }
         renderer_timers.gpu_time.set(gpu_time);
 
         self.draw_counters(&[&renderer_profile.frame_time], debug_renderer, true);
 
         self.draw_counters(
             &[
                 &renderer_profile.frame_counter,
-                &frame_profile.total_primitives,
-                &frame_profile.visible_primitives,
-                &frame_profile.passes,
-                &frame_profile.color_targets,
-                &frame_profile.alpha_targets,
                 &backend_profile.resources.gpu_cache.allocated_rows,
                 &backend_profile.resources.gpu_cache.allocated_blocks,
             ],
             debug_renderer,
             true,
         );
 
         self.draw_counters(
@@ -858,16 +890,30 @@ impl Profiler {
                 &backend_profile.ipc.send_time,
                 &backend_profile.ipc.consume_time,
                 &backend_profile.ipc.total_time,
             ],
             debug_renderer,
             true,
         );
 
+        for frame_profile in frame_profiles {
+            self.draw_counters(
+                &[
+                    &frame_profile.total_primitives,
+                    &frame_profile.visible_primitives,
+                    &frame_profile.passes,
+                    &frame_profile.color_targets,
+                    &frame_profile.alpha_targets,
+                ],
+                debug_renderer,
+                true,
+            );
+        }
+
         self.draw_counters(
             &[&renderer_profile.draw_calls, &renderer_profile.vertices],
             debug_renderer,
             true,
         );
 
         self.draw_counters(
             &[
--- a/gfx/webrender/src/query.rs
+++ b/gfx/webrender/src/query.rs
@@ -209,48 +209,32 @@ impl<T> GpuProfiler<T> {
     }
 
     pub fn disable_timers(&mut self) {
         for frame in &mut self.frames {
             frame.disable_timers();
         }
     }
 
-    pub fn toggle_timers_enabled(&mut self) {
-        if self.frames[0].timers.set.is_empty() {
-            self.enable_timers();
-        } else {
-            self.disable_timers();
-        }
-    }
-
     pub fn enable_samplers(&mut self) {
         const MAX_SAMPLERS_PER_FRAME: i32 = 16;
         if cfg!(target_os = "macos") {
             warn!("Expect OSX driver bugs related to sample queries")
         }
 
         for frame in &mut self.frames {
             frame.enable_samplers(MAX_SAMPLERS_PER_FRAME);
         }
     }
 
     pub fn disable_samplers(&mut self) {
         for frame in &mut self.frames {
             frame.disable_samplers();
         }
     }
-
-    pub fn toggle_samplers_enabled(&mut self) {
-        if self.frames[0].samplers.set.is_empty() {
-            self.enable_samplers();
-        } else {
-            self.disable_samplers();
-        }
-    }
 }
 
 impl<T: NamedTag> GpuProfiler<T> {
     pub fn build_samples(&mut self) -> (FrameId, Vec<GpuTimer<T>>, Vec<GpuSampler<T>>) {
         self.frames[self.next_frame].build_samples()
     }
 
     pub fn begin_frame(&mut self, frame_id: FrameId) {
--- a/gfx/webrender/src/render_backend.rs
+++ b/gfx/webrender/src/render_backend.rs
@@ -1,25 +1,26 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{ApiMsg, BlobImageRenderer, BuiltDisplayList, DebugCommand, DeviceIntPoint};
 #[cfg(feature = "debugger")]
 use api::{BuiltDisplayListIter, SpecificDisplayItem};
-use api::{DeviceUintPoint, DeviceUintRect, DeviceUintSize, DocumentId, DocumentMsg};
-use api::{HitTestResult, IdNamespace, LayerPoint, PipelineId, RenderNotifier};
+use api::{DeviceUintPoint, DeviceUintRect, DeviceUintSize};
+use api::{DocumentId, DocumentLayer, DocumentMsg};
+use api::{IdNamespace, LayerPoint, PipelineId, RenderNotifier};
 use api::channel::{MsgReceiver, PayloadReceiver, PayloadReceiverHelperMethods};
 use api::channel::{PayloadSender, PayloadSenderHelperMethods};
 #[cfg(feature = "debugger")]
 use debug_server;
 use frame::FrameContext;
 use frame_builder::{FrameBuilder, FrameBuilderConfig};
 use gpu_cache::GpuCache;
-use internal_types::{DebugOutput, FastHashMap, FastHashSet, RendererFrame, ResultMsg};
+use internal_types::{DebugOutput, FastHashMap, FastHashSet, RenderedDocument, ResultMsg};
 use profiler::{BackendProfileCounters, ResourceProfileCounters};
 use rayon::ThreadPool;
 use record::ApiRecordingReceiver;
 use resource_cache::ResourceCache;
 use scene::Scene;
 #[cfg(feature = "debugger")]
 use serde_json;
 use std::sync::atomic::{ATOMIC_USIZE_INIT, AtomicUsize, Ordering};
@@ -28,19 +29,21 @@ use std::sync::mpsc::Sender;
 use std::u32;
 use texture_cache::TextureCache;
 use thread_profiler::register_thread_with_profiler;
 use time::precise_time_ns;
 
 struct Document {
     scene: Scene,
     frame_ctx: FrameContext,
+    // the `Option` here is only to deal with borrow checker
     frame_builder: Option<FrameBuilder>,
     window_size: DeviceUintSize,
     inner_rect: DeviceUintRect,
+    layer: DocumentLayer,
     pan: DeviceIntPoint,
     device_pixel_ratio: f32,
     page_zoom_factor: f32,
     pinch_zoom_factor: f32,
     // A set of pipelines that the caller has requested be
     // made available as output textures.
     output_pipelines: FastHashSet<PipelineId>,
     // A helper switch to prevent any frames rendering triggered by scrolling
@@ -49,31 +52,33 @@ struct Document {
     // the first frame would produce inconsistent rendering results, because
     // scroll events are not necessarily received in deterministic order.
     render_on_scroll: Option<bool>,
 }
 
 impl Document {
     pub fn new(
         config: FrameBuilderConfig,
-        initial_size: DeviceUintSize,
+        window_size: DeviceUintSize,
+        layer: DocumentLayer,
         enable_render_on_scroll: bool,
         default_device_pixel_ratio: f32,
     ) -> Self {
         let render_on_scroll = if enable_render_on_scroll {
             Some(false)
         } else {
             None
         };
         Document {
             scene: Scene::new(),
             frame_ctx: FrameContext::new(config),
-            frame_builder: None,
-            window_size: initial_size,
-            inner_rect: DeviceUintRect::new(DeviceUintPoint::zero(), initial_size),
+            frame_builder: Some(FrameBuilder::empty()),
+            window_size,
+            inner_rect: DeviceUintRect::new(DeviceUintPoint::zero(), window_size),
+            layer,
             pan: DeviceIntPoint::zero(),
             page_zoom_factor: 1.0,
             pinch_zoom_factor: 1.0,
             device_pixel_ratio: default_device_pixel_ratio,
             render_on_scroll,
             output_pipelines: FastHashSet::default(),
         }
     }
@@ -81,65 +86,61 @@ impl Document {
     fn accumulated_scale_factor(&self) -> f32 {
         self.device_pixel_ratio *
         self.page_zoom_factor *
         self.pinch_zoom_factor
     }
 
     fn build_scene(&mut self, resource_cache: &mut ResourceCache) {
         let accumulated_scale_factor = self.accumulated_scale_factor();
-        self.frame_builder = self.frame_ctx.create(
-            self.frame_builder.take(),
+        // this code is why we have `Option`, which is never `None`
+        let frame_builder = self.frame_ctx.create(
+            self.frame_builder.take().unwrap(),
             &self.scene,
             resource_cache,
             self.window_size,
             self.inner_rect,
             accumulated_scale_factor,
             &self.output_pipelines,
         );
+        self.frame_builder = Some(frame_builder);
     }
 
     fn render(
         &mut self,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
         resource_profile: &mut ResourceProfileCounters,
-    ) -> RendererFrame {
+    ) -> RenderedDocument {
         let accumulated_scale_factor = self.accumulated_scale_factor();
         let pan = LayerPoint::new(
             self.pan.x as f32 / accumulated_scale_factor,
             self.pan.y as f32 / accumulated_scale_factor,
         );
-        match self.frame_builder {
-            Some(ref mut builder) => {
-                self.frame_ctx.build_renderer_frame(
-                    builder,
-                    resource_cache,
-                    gpu_cache,
-                    &self.scene.pipelines,
-                    accumulated_scale_factor,
-                    pan,
-                    &mut resource_profile.texture_cache,
-                    &mut resource_profile.gpu_cache,
-                    &self.scene.properties,
-                )
-            }
-            None => {
-                self.frame_ctx.get_renderer_frame()
-            }
-        }
+        self.frame_ctx.build_rendered_document(
+            self.frame_builder.as_mut().unwrap(),
+            resource_cache,
+            gpu_cache,
+            &self.scene.pipelines,
+            accumulated_scale_factor,
+            self.layer,
+            pan,
+            &mut resource_profile.texture_cache,
+            &mut resource_profile.gpu_cache,
+            &self.scene.properties,
+        )
     }
 }
 
 enum DocumentOp {
     Nop,
     Built,
     ScrolledNop,
-    Scrolled(RendererFrame),
-    Rendered(RendererFrame),
+    Scrolled(RenderedDocument),
+    Rendered(RenderedDocument),
 }
 
 /// The unique id for WR resource identification.
 static NEXT_NAMESPACE_ID: AtomicUsize = ATOMIC_USIZE_INIT;
 
 /// The render backend is responsible for transforming high level display lists into
 /// GPU-friendly work which is then submitted to the renderer in the form of a frame::Frame.
 ///
@@ -356,23 +357,21 @@ impl RenderBackend {
                     );
                     DocumentOp::Scrolled(frame)
                 } else {
                     DocumentOp::ScrolledNop
                 }
             }
             DocumentMsg::HitTest(pipeline_id, point, flags, tx) => {
                 profile_scope!("HitTest");
-                let result = match doc.frame_builder {
-                    Some(ref builder) => {
-                        let cst = doc.frame_ctx.get_clip_scroll_tree();
-                        builder.hit_test(cst, pipeline_id, point, flags)
-                    },
-                    None => HitTestResult::default(),
-                };
+                let cst = doc.frame_ctx.get_clip_scroll_tree();
+                let result = doc.frame_builder
+                    .as_ref()
+                    .unwrap()
+                    .hit_test(cst, pipeline_id, point, flags);
                 tx.send(result).unwrap();
                 DocumentOp::Nop
             }
             DocumentMsg::ScrollNodeWithId(origin, id, clamp) => {
                 profile_scope!("ScrollNodeWithScrollId");
                 let _timer = profile_counters.total_time.timer();
 
                 if doc.frame_ctx.scroll_node(origin, id, clamp) && doc.render_on_scroll == Some(true) {
@@ -477,45 +476,46 @@ impl RenderBackend {
                         let index = self.resource_cache.get_glyph_index(font_key, ch);
                         glyph_indices.push(index);
                     }
                     tx.send(glyph_indices).unwrap();
                 }
                 ApiMsg::CloneApi(sender) => {
                     sender.send(self.next_namespace_id()).unwrap();
                 }
-                ApiMsg::AddDocument(document_id, initial_size) => {
+                ApiMsg::AddDocument(document_id, initial_size, layer) => {
                     let document = Document::new(
                         self.frame_config.clone(),
                         initial_size,
+                        layer,
                         self.enable_render_on_scroll,
                         self.default_device_pixel_ratio,
                     );
                     self.documents.insert(document_id, document);
                 }
                 ApiMsg::UpdateDocument(document_id, doc_msg) => match self.process_document(
                     document_id,
                     doc_msg,
                     frame_counter,
                     &mut profile_counters,
                 ) {
                     DocumentOp::Nop => {}
                     DocumentOp::Built => {}
                     DocumentOp::ScrolledNop => {
-                        self.notify_compositor_of_new_scroll_frame(false);
+                        self.notify_compositor_of_new_scroll_document(document_id, false);
                     }
-                    DocumentOp::Scrolled(frame) => {
-                        self.publish_frame(document_id, frame, &mut profile_counters);
-                        self.notify_compositor_of_new_scroll_frame(true);
+                    DocumentOp::Scrolled(doc) => {
+                        self.publish_document(document_id, doc, &mut profile_counters);
+                        self.notify_compositor_of_new_scroll_document(document_id, true);
                     }
-                    DocumentOp::Rendered(frame) => {
+                    DocumentOp::Rendered(doc) => {
                         frame_counter += 1;
-                        self.publish_frame_and_notify_compositor(
+                        self.publish_document_and_notify_compositor(
                             document_id,
-                            frame,
+                            doc,
                             &mut profile_counters,
                         );
                     }
                 },
                 ApiMsg::DeleteDocument(document_id) => {
                     self.documents.remove(&document_id);
                 }
                 ApiMsg::ExternalEvent(evt) => {
@@ -536,69 +536,70 @@ impl RenderBackend {
                     self.resource_cache.on_memory_pressure();
 
                     let pending_update = self.resource_cache.pending_updates();
                     let msg = ResultMsg::UpdateResources {
                         updates: pending_update,
                         cancel_rendering: true,
                     };
                     self.result_tx.send(msg).unwrap();
-                    // We use new_frame_ready to wake up the renderer and get the
-                    // resource updates processed, but the UpdateResources message
-                    // will cancel rendering the frame.
-                    self.notifier.new_frame_ready();
+                    self.notifier.wake_up();
                 }
                 ApiMsg::DebugCommand(option) => {
                     let msg = match option {
                         DebugCommand::FetchDocuments => {
                             let json = self.get_docs_for_debugger();
                             ResultMsg::DebugOutput(DebugOutput::FetchDocuments(json))
                         }
                         DebugCommand::FetchClipScrollTree => {
                             let json = self.get_clip_scroll_tree_for_debugger();
                             ResultMsg::DebugOutput(DebugOutput::FetchClipScrollTree(json))
                         }
                         _ => ResultMsg::DebugCommand(option),
                     };
                     self.result_tx.send(msg).unwrap();
-                    self.notifier.new_frame_ready();
+                    self.notifier.wake_up();
                 }
                 ApiMsg::ShutDown => {
                     self.notifier.shut_down();
                     break;
                 }
             }
         }
     }
 
-    fn publish_frame(
+    fn publish_document(
         &mut self,
         document_id: DocumentId,
-        frame: RendererFrame,
+        document: RenderedDocument,
         profile_counters: &mut BackendProfileCounters,
     ) {
         let pending_update = self.resource_cache.pending_updates();
-        let msg = ResultMsg::NewFrame(document_id, frame, pending_update, profile_counters.clone());
+        let msg = ResultMsg::PublishDocument(document_id, document, pending_update, profile_counters.clone());
         self.result_tx.send(msg).unwrap();
         profile_counters.reset();
     }
 
-    fn publish_frame_and_notify_compositor(
+    fn publish_document_and_notify_compositor(
         &mut self,
         document_id: DocumentId,
-        frame: RendererFrame,
+        document: RenderedDocument,
         profile_counters: &mut BackendProfileCounters,
     ) {
-        self.publish_frame(document_id, frame, profile_counters);
+        self.publish_document(document_id, document, profile_counters);
 
-        self.notifier.new_frame_ready();
+        self.notifier.new_document_ready(document_id, false, true);
     }
 
-    fn notify_compositor_of_new_scroll_frame(&self, composite_needed: bool) {
-        self.notifier.new_scroll_frame_ready(composite_needed);
+    fn notify_compositor_of_new_scroll_document(
+        &self,
+        document_id: DocumentId,
+        composite_needed: bool,
+    ) {
+        self.notifier.new_document_ready(document_id, true, composite_needed);
     }
 
 
     #[cfg(not(feature = "debugger"))]
     fn get_docs_for_debugger(&self) -> String {
         String::new()
     }
 
@@ -671,21 +672,20 @@ impl RenderBackend {
         let mut debug_root = debug_server::ClipScrollTreeList::new();
 
         for (_, doc) in &self.documents {
             let debug_node = debug_server::TreeNode::new("document clip_scroll tree");
             let mut builder = debug_server::TreeNodeBuilder::new(debug_node);
 
             // TODO(gw): Restructure the storage of clip-scroll tree, clip store
             //           etc so this isn't so untidy.
-            if let Some(ref frame_builder) = doc.frame_builder {
-                doc.frame_ctx
-                    .get_clip_scroll_tree()
-                    .print_with(&frame_builder.clip_store, &mut builder);
-            }
+            let clip_store = &doc.frame_builder.as_ref().unwrap().clip_store;
+            doc.frame_ctx
+                .get_clip_scroll_tree()
+                .print_with(clip_store, &mut builder);
 
             debug_root.add(builder.build());
         }
 
         serde_json::to_string(&debug_root).unwrap()
     }
 }
 
--- a/gfx/webrender/src/render_task.rs
+++ b/gfx/webrender/src/render_task.rs
@@ -4,17 +4,17 @@
 
 use api::{ClipId, DeviceIntPoint, DeviceIntRect, DeviceIntSize};
 use api::{LayerPoint, LayerRect, PremultipliedColorF};
 use clip::{ClipSource, ClipSourcesWeakHandle, ClipStore};
 use clip_scroll_tree::CoordinateSystemId;
 use gpu_types::{ClipScrollNodeIndex};
 use picture::RasterizationSpace;
 use prim_store::{PrimitiveIndex};
-use std::{cmp, usize, f32, i32};
+use std::{cmp, ops, usize, f32, i32};
 use std::rc::Rc;
 use tiling::{RenderPass, RenderTargetIndex};
 use tiling::{RenderTargetKind};
 
 const FLOATS_PER_RENDER_TASK_INFO: usize = 12;
 pub const MAX_BLUR_STD_DEVIATION: f32 = 4.0;
 pub const MIN_DOWNSCALING_RT_SIZE: i32 = 128;
 
@@ -52,17 +52,17 @@ impl Iterator for ClipChainNodeIter {
             Some(ref item) => item.prev.clone(),
             None => return None,
         };
         previous
     }
 }
 
 impl RenderTaskTree {
-    pub fn new() -> RenderTaskTree {
+    pub fn new() -> Self {
         RenderTaskTree {
             tasks: Vec::new(),
             task_data: Vec::new(),
         }
     }
 
     pub fn add(&mut self, task: RenderTask) -> RenderTaskId {
         let id = RenderTaskId(self.tasks.len() as u32);
@@ -110,39 +110,43 @@ impl RenderTaskTree {
         } else {
             pass_index
         };
 
         let pass = &mut passes[pass_index];
         pass.add_render_task(id, task.get_dynamic_size(), task.target_kind());
     }
 
-    pub fn get(&self, id: RenderTaskId) -> &RenderTask {
-        &self.tasks[id.0 as usize]
-    }
-
-    pub fn get_mut(&mut self, id: RenderTaskId) -> &mut RenderTask {
-        &mut self.tasks[id.0 as usize]
-    }
-
     pub fn get_task_address(&self, id: RenderTaskId) -> RenderTaskAddress {
-        let task = &self.tasks[id.0 as usize];
-        match task.kind {
+        match self[id].kind {
             RenderTaskKind::Alias(alias_id) => RenderTaskAddress(alias_id.0),
             _ => RenderTaskAddress(id.0),
         }
     }
 
     pub fn build(&mut self) {
         for task in &mut self.tasks {
             self.task_data.push(task.write_task_data());
         }
     }
 }
 
+impl ops::Index<RenderTaskId> for RenderTaskTree {
+    type Output = RenderTask;
+    fn index(&self, id: RenderTaskId) -> &RenderTask {
+        &self.tasks[id.0 as usize]
+    }
+}
+
+impl ops::IndexMut<RenderTaskId> for RenderTaskTree {
+    fn index_mut(&mut self, id: RenderTaskId) -> &mut RenderTask {
+        &mut self.tasks[id.0 as usize]
+    }
+}
+
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
 pub enum RenderTaskKey {
     /// Draw the alpha mask for a shared clip.
     CacheMask(ClipId),
 }
 
 #[derive(Debug)]
 pub enum RenderTaskLocation {
@@ -186,17 +190,17 @@ impl ClipWorkItem {
             .get_opt(&self.clip_sources)
             .expect("bug: clip handle should be valid")
             .clips();
         let mut rounded_rect_count = 0;
 
         for &(ref clip, _) in clips {
             match *clip {
                 ClipSource::Rectangle(..) => {
-                    if self.has_compatible_coordinate_system(prim_coordinate_system_id) {
+                    if !self.has_compatible_coordinate_system(prim_coordinate_system_id) {
                         return MaskGeometryKind::Default;
                     }
                 },
                 ClipSource::RoundedRectangle(..) => {
                     rounded_rect_count += 1;
                 }
                 ClipSource::Image(..) | ClipSource::BorderCorner(..) => {
                     return MaskGeometryKind::Default;
@@ -427,17 +431,17 @@ impl RenderTask {
         render_tasks: &mut RenderTaskTree,
         target_kind: RenderTargetKind,
         regions: &[LayerRect],
         clear_mode: ClearMode,
         color: PremultipliedColorF,
     ) -> Self {
         // Adjust large std deviation value.
         let mut adjusted_blur_std_deviation = blur_std_deviation;
-        let blur_target_size = render_tasks.get(src_task_id).get_dynamic_size();
+        let blur_target_size = render_tasks[src_task_id].get_dynamic_size();
         let mut adjusted_blur_target_size = blur_target_size;
         let mut downscaling_src_task_id = src_task_id;
         let mut scale_factor = 1.0;
         while adjusted_blur_std_deviation > MAX_BLUR_STD_DEVIATION {
             if adjusted_blur_target_size.width < MIN_DOWNSCALING_RT_SIZE ||
                adjusted_blur_target_size.height < MIN_DOWNSCALING_RT_SIZE {
                 break;
             }
@@ -660,19 +664,9 @@ impl RenderTask {
 
             RenderTaskKind::CacheMask(..) => true,
 
             RenderTaskKind::Alias(..) => {
                 panic!("BUG: is_shared() called on aliased task");
             }
         }
     }
-
-    pub fn set_alias(&mut self, id: RenderTaskId) {
-        debug_assert!(self.cache_key.is_some());
-        // TODO(gw): We can easily handle invalidation of tasks that
-        //           contain children in the future. Since we don't
-        //           have any cases of that yet, just assert to simplify
-        //           the current implementation.
-        debug_assert!(self.children.is_empty());
-        self.kind = RenderTaskKind::Alias(id);
-    }
 }
--- a/gfx/webrender/src/renderer.rs
+++ b/gfx/webrender/src/renderer.rs
@@ -5,18 +5,19 @@
 //! The webrender API.
 //!
 //! The `webrender::renderer` module provides the interface to webrender, which
 //! is accessible through [`Renderer`][renderer]
 //!
 //! [renderer]: struct.Renderer.html
 
 use api::{channel, BlobImageRenderer, FontRenderMode};
-use api::{ColorF, Epoch, PipelineId, RenderApiSender, RenderNotifier};
-use api::{DeviceIntPoint, DeviceIntRect, DeviceIntSize, DeviceUintRect, DeviceUintSize};
+use api::{ColorF, DocumentId, Epoch, PipelineId, RenderApiSender, RenderNotifier};
+use api::{DevicePixel, DeviceIntPoint, DeviceIntRect, DeviceIntSize};
+use api::{DeviceUintPoint, DeviceUintRect, DeviceUintSize};
 use api::{ExternalImageId, ExternalImageType, ImageFormat};
 use api::{YUV_COLOR_SPACES, YUV_FORMATS};
 use api::{YuvColorSpace, YuvFormat};
 #[cfg(not(feature = "debugger"))]
 use api::ApiMsg;
 use api::DebugCommand;
 #[cfg(not(feature = "debugger"))]
 use api::channel::MsgSender;
@@ -26,25 +27,25 @@ use debug_render::DebugRenderer;
 use debug_server::{self, DebugServer};
 use device::{DepthFunction, Device, FrameId, Program, Texture,
              VertexDescriptor, PBO};
 use device::{get_gl_format_bgra, ExternalTexture, FBOId, TextureSlot, VertexAttribute,
              VertexAttributeKind};
 use device::{FileWatcherHandler, ShaderError, TextureFilter, TextureTarget,
              VertexUsageHint, VAO};
 use device::ProgramCache;
-use euclid::{rect, Transform3D};
+use euclid::{rect, ScaleFactor, Transform3D};
 use frame_builder::FrameBuilderConfig;
 use gleam::gl;
 use glyph_rasterizer::GlyphFormat;
 use gpu_cache::{GpuBlockData, GpuCacheUpdate, GpuCacheUpdateList};
 use gpu_types::PrimitiveInstance;
 use internal_types::{BatchTextures, SourceTexture, ORTHO_FAR_PLANE, ORTHO_NEAR_PLANE};
-use internal_types::{CacheTextureId, FastHashMap, RendererFrame, ResultMsg, TextureUpdateOp};
-use internal_types::{DebugOutput, RenderTargetMode, TextureUpdateList, TextureUpdateSource};
+use internal_types::{CacheTextureId, FastHashMap, RenderedDocument, ResultMsg, TextureUpdateOp};
+use internal_types::{DebugOutput, RenderTargetInfo, TextureUpdateList, TextureUpdateSource};
 use profiler::{BackendProfileCounters, Profiler};
 use profiler::{GpuProfileTag, RendererProfileCounters, RendererProfileTimers};
 use query::{GpuProfiler, GpuTimer};
 use rayon::Configuration as ThreadPoolConfig;
 use rayon::ThreadPool;
 use record::ApiRecordingReceiver;
 use render_backend::RenderBackend;
 use render_task::{RenderTaskKind, RenderTaskTree};
@@ -58,17 +59,17 @@ use std::f32;
 use std::mem;
 use std::path::PathBuf;
 use std::rc::Rc;
 use std::sync::Arc;
 use std::sync::mpsc::{channel, Receiver, Sender};
 use std::thread;
 use texture_cache::TextureCache;
 use thread_profiler::{register_thread_with_profiler, write_profile};
-use tiling::{AlphaRenderTarget, ColorRenderTarget, RenderTargetKind};
+use tiling::{AlphaRenderTarget, ColorRenderTarget, RenderPassKind, RenderTargetKind, RenderTargetList};
 use tiling::{BatchKey, BatchKind, BrushBatchKind, Frame, RenderTarget, ScalingInfo, TransformBatchKind};
 use time::precise_time_ns;
 use util::TransformedRectKind;
 
 pub const MAX_VERTEX_TEXTURE_WIDTH: usize = 1024;
 
 const GPU_TAG_BRUSH_MASK: GpuProfileTag = GpuProfileTag {
     label: "B_Mask",
@@ -237,16 +238,27 @@ impl BatchKind {
 
 bitflags! {
     #[derive(Default)]
     pub struct DebugFlags: u32 {
         const PROFILER_DBG      = 1 << 0;
         const RENDER_TARGET_DBG = 1 << 1;
         const TEXTURE_CACHE_DBG = 1 << 2;
         const ALPHA_PRIM_DBG    = 1 << 3;
+        const GPU_TIME_QUERIES  = 1 << 4;
+        const GPU_SAMPLE_QUERIES= 1 << 5;
+        const DISABLE_BATCHING  = 1 << 6;
+    }
+}
+
+fn flag_changed(before: DebugFlags, after: DebugFlags, select: DebugFlags) -> Option<bool> {
+    if before & select != after & select {
+        Some(after.contains(select))
+    } else {
+        None
     }
 }
 
 // A generic mode that can be passed to shaders to change
 // behaviour per draw-call.
 type ShaderMode = i32;
 
 #[repr(C)]
@@ -265,19 +277,19 @@ impl Into<ShaderMode> for TextShaderMode
     fn into(self) -> i32 {
         self as i32
     }
 }
 
 impl From<GlyphFormat> for TextShaderMode {
     fn from(format: GlyphFormat) -> TextShaderMode {
         match format {
-            GlyphFormat::Mono | GlyphFormat::Alpha => TextShaderMode::Alpha,
-            GlyphFormat::Subpixel => {
-                panic!("Subpixel glyph format must be handled separately.");
+            GlyphFormat::Alpha => TextShaderMode::Alpha,
+            GlyphFormat::Subpixel | GlyphFormat::TransformedSubpixel => {
+                panic!("Subpixel glyph formats must be handled separately.");
             }
             GlyphFormat::ColorBitmap => TextShaderMode::ColorBitmap,
         }
     }
 }
 
 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
 enum TextureSampler {
@@ -557,17 +569,17 @@ impl SourceTextureResolver {
     fn new(device: &mut Device) -> SourceTextureResolver {
         let mut dummy_cache_texture = device.create_texture(TextureTarget::Array);
         device.init_texture(
             &mut dummy_cache_texture,
             1,
             1,
             ImageFormat::BGRA8,
             TextureFilter::Linear,
-            RenderTargetMode::RenderTarget,
+            None,
             1,
             None,
         );
 
         SourceTextureResolver {
             cache_texture_map: Vec::new(),
             external_images: FastHashMap::default(),
             dummy_cache_texture,
@@ -581,36 +593,29 @@ impl SourceTextureResolver {
 
         for texture in self.cache_texture_map {
             device.delete_texture(texture);
         }
     }
 
     fn end_pass(
         &mut self,
-        is_last: bool,
         a8_texture: Option<Texture>,
         rgba8_texture: Option<Texture>,
         a8_pool: &mut Vec<Texture>,
         rgba8_pool: &mut Vec<Texture>,
     ) {
         // If we have cache textures from previous pass, return them to the pool.
         rgba8_pool.extend(self.cache_rgba8_texture.take());
         a8_pool.extend(self.cache_a8_texture.take());
 
-        if is_last {
-            // On the last pass, return the textures from this pass to the pool.
-            rgba8_pool.extend(rgba8_texture);
-            a8_pool.extend(a8_texture);
-        } else {
-            // We have another pass to process, make these textures available
-            // as inputs to the next pass.
-            self.cache_rgba8_texture = rgba8_texture;
-            self.cache_a8_texture = a8_texture;
-        }
+        // We have another pass to process, make these textures available
+        // as inputs to the next pass.
+        self.cache_rgba8_texture = rgba8_texture;
+        self.cache_a8_texture = a8_texture;
     }
 
     // Bind a source texture to the device.
     fn bind(&self, texture_id: &SourceTexture, sampler: TextureSampler, device: &mut Device) {
         match *texture_id {
             SourceTexture::Invalid => {}
             SourceTexture::CacheA8 => {
                 let texture = self.cache_a8_texture
@@ -748,17 +753,17 @@ impl CacheTexture {
             // Create a f32 texture that can be used for the vertex shader
             // to fetch data from.
             device.init_texture(
                 &mut self.texture,
                 MAX_VERTEX_TEXTURE_WIDTH as u32,
                 updates.height as u32,
                 ImageFormat::RGBAF32,
                 TextureFilter::Nearest,
-                RenderTargetMode::None,
+                None,
                 1,
                 None,
             );
 
             // Copy the current texture into the newly resized texture.
             if current_dimensions.height > 0 {
                 // If we had to resize the texture, just mark all rows
                 // as dirty so they will be uploaded to the texture
@@ -857,17 +862,17 @@ impl VertexDataTexture {
             let new_height = (needed_height + 127) & !127;
 
             device.init_texture(
                 &mut self.texture,
                 width,
                 new_height,
                 ImageFormat::RGBAF32,
                 TextureFilter::Nearest,
-                RenderTargetMode::None,
+                None,
                 1,
                 None,
             );
         }
 
         // Bind a PBO to do the texture upload.
         // Updating the texture via PBO avoids CPU-side driver stalls.
         device.bind_pbo(Some(&self.pbo));
@@ -888,16 +893,17 @@ const TRANSFORM_FEATURE: &str = "TRANSFO
 const CLIP_FEATURE: &str = "CLIP";
 const ALPHA_FEATURE: &str = "ALPHA_PASS";
 
 enum ShaderKind {
     Primitive,
     Cache(VertexArrayKind),
     ClipCache,
     Brush,
+    Text,
 }
 
 struct LazilyCompiledShader {
     program: Option<Program>,
     name: &'static str,
     kind: ShaderKind,
     features: Vec<&'static str>,
 }
@@ -913,17 +919,28 @@ impl LazilyCompiledShader {
         let mut shader = LazilyCompiledShader {
             program: None,
             name,
             kind,
             features: features.to_vec(),
         };
 
         if precache {
-            try!{ shader.get(device) };
+            let t0 = precise_time_ns();
+            let program = try!{ shader.get(device) };
+            let t1 = precise_time_ns();
+            device.bind_program(program);
+            device.draw_triangles_u16(0, 3);
+            let t2 = precise_time_ns();
+            println!("[C: {:.1} ms D: {:.1} ms] Precache {} {:?}",
+                (t1 - t0) as f64 / 1000000.0,
+                (t2 - t1) as f64 / 1000000.0,
+                name,
+                features
+            );
         }
 
         Ok(shader)
     }
 
     fn bind<M>(
         &mut self,
         device: &mut Device,
@@ -941,17 +958,17 @@ impl LazilyCompiledShader {
         device.bind_program(program);
         device.set_uniforms(program, projection, mode.into());
     }
 
     fn get(&mut self, device: &mut Device) -> Result<&Program, ShaderError> {
         if self.program.is_none() {
             let program = try!{
                 match self.kind {
-                    ShaderKind::Primitive | ShaderKind::Brush => {
+                    ShaderKind::Primitive | ShaderKind::Brush | ShaderKind::Text => {
                         create_prim_shader(self.name,
                                            device,
                                            &self.features,
                                            VertexArrayKind::Primitive)
                     }
                     ShaderKind::Cache(format) => {
                         create_prim_shader(self.name,
                                            device,
@@ -971,21 +988,16 @@ impl LazilyCompiledShader {
 
     fn deinit(self, device: &mut Device) {
         if let Some(program) = self.program {
             device.delete_program(program);
         }
     }
 }
 
-struct PrimitiveShader {
-    simple: LazilyCompiledShader,
-    transform: LazilyCompiledShader,
-}
-
 // A brush shader supports two modes:
 // opaque:
 //   Used for completely opaque primitives,
 //   or inside segments of partially
 //   opaque primitives. Assumes no need
 //   for clip masks, AA etc.
 // alpha:
 //   Used for brush primitives in the alpha
@@ -1049,26 +1061,19 @@ impl BrushShader {
     }
 
     fn deinit(self, device: &mut Device) {
         self.opaque.deinit(device);
         self.alpha.deinit(device);
     }
 }
 
-struct FileWatcher {
-    notifier: Box<RenderNotifier>,
-    result_tx: Sender<ResultMsg>,
-}
-
-impl FileWatcherHandler for FileWatcher {
-    fn file_changed(&self, path: PathBuf) {
-        self.result_tx.send(ResultMsg::RefreshShader(path)).ok();
-        self.notifier.new_frame_ready();
-    }
+struct PrimitiveShader {
+    simple: LazilyCompiledShader,
+    transform: LazilyCompiledShader,
 }
 
 impl PrimitiveShader {
     fn new(
         name: &'static str,
         device: &mut Device,
         features: &[&'static str],
         precache: bool,
@@ -1114,16 +1119,97 @@ impl PrimitiveShader {
     }
 
     fn deinit(self, device: &mut Device) {
         self.simple.deinit(device);
         self.transform.deinit(device);
     }
 }
 
+struct TextShader {
+    simple: LazilyCompiledShader,
+    transform: LazilyCompiledShader,
+    glyph_transform: LazilyCompiledShader,
+}
+
+impl TextShader {
+    fn new(
+        name: &'static str,
+        device: &mut Device,
+        features: &[&'static str],
+        precache: bool,
+    ) -> Result<TextShader, ShaderError> {
+        let simple = try!{
+            LazilyCompiledShader::new(ShaderKind::Text,
+                                      name,
+                                      features,
+                                      device,
+                                      precache)
+        };
+
+        let mut transform_features = features.to_vec();
+        transform_features.push("TRANSFORM");
+
+        let transform = try!{
+            LazilyCompiledShader::new(ShaderKind::Text,
+                                      name,
+                                      &transform_features,
+                                      device,
+                                      precache)
+        };
+
+        let mut glyph_transform_features = features.to_vec();
+        glyph_transform_features.push("GLYPH_TRANSFORM");
+
+        let glyph_transform = try!{
+            LazilyCompiledShader::new(ShaderKind::Text,
+                                      name,
+                                      &glyph_transform_features,
+                                      device,
+                                      precache)
+        };
+
+        Ok(TextShader { simple, transform, glyph_transform })
+    }
+
+    fn bind<M>(
+        &mut self,
+        device: &mut Device,
+        glyph_format: GlyphFormat,
+        transform_kind: TransformedRectKind,
+        projection: &Transform3D<f32>,
+        mode: M,
+        renderer_errors: &mut Vec<RendererError>,
+    ) where M: Into<ShaderMode> {
+        match glyph_format {
+            GlyphFormat::Alpha |
+            GlyphFormat::Subpixel |
+            GlyphFormat::ColorBitmap => {
+                match transform_kind {
+                    TransformedRectKind::AxisAligned => {
+                        self.simple.bind(device, projection, mode, renderer_errors)
+                    }
+                    TransformedRectKind::Complex => {
+                        self.transform.bind(device, projection, mode, renderer_errors)
+                    }
+                }
+            }
+            GlyphFormat::TransformedSubpixel => {
+                self.glyph_transform.bind(device, projection, mode, renderer_errors)
+            }
+        }
+    }
+
+    fn deinit(self, device: &mut Device) {
+        self.simple.deinit(device);
+        self.transform.deinit(device);
+        self.glyph_transform.deinit(device);
+    }
+}
+
 fn create_prim_shader(
     name: &'static str,
     device: &mut Device,
     features: &[&'static str],
     vertex_format: VertexArrayKind,
 ) -> Result<Program, ShaderError> {
     let mut prefix = format!(
         "#define WR_MAX_VERTEX_TEXTURE_WIDTH {}\n",
@@ -1187,37 +1273,49 @@ fn create_clip_shader(name: &'static str
                 ("sSharedCacheA8", TextureSampler::SharedCacheA8),
             ],
         );
     }
 
     program
 }
 
+struct FileWatcher {
+    notifier: Box<RenderNotifier>,
+    result_tx: Sender<ResultMsg>,
+}
+
+impl FileWatcherHandler for FileWatcher {
+    fn file_changed(&self, path: PathBuf) {
+        self.result_tx.send(ResultMsg::RefreshShader(path)).ok();
+        self.notifier.wake_up();
+    }
+}
+
 #[derive(Clone, Debug, PartialEq)]
 pub enum ReadPixelsFormat {
     Rgba8,
     Bgra8,
 }
 
 struct FrameOutput {
     last_access: FrameId,
     fbo_id: FBOId,
 }
 
 /// The renderer is responsible for submitting to the GPU the work prepared by the
 /// RenderBackend.
-pub struct Renderer<'a> {
+pub struct Renderer {
     result_rx: Receiver<ResultMsg>,
     debug_server: DebugServer,
-    device: Device<'a>,
+    device: Device,
     pending_texture_updates: Vec<TextureUpdateList>,
     pending_gpu_cache_updates: Vec<GpuCacheUpdateList>,
     pending_shader_updates: Vec<PathBuf>,
-    current_frame: Option<RendererFrame>,
+    active_documents: Vec<(DocumentId, RenderedDocument)>,
 
     // These are "cache shaders". These shaders are used to
     // draw intermediate results to cache targets. The results
     // of these shaders are then used by the primitive shaders.
     cs_text_run: LazilyCompiledShader,
     cs_line: LazilyCompiledShader,
     cs_blur_a8: LazilyCompiledShader,
     cs_blur_rgba8: LazilyCompiledShader,
@@ -1239,18 +1337,18 @@ pub struct Renderer<'a> {
     // final results on screen. They are aware of tile boundaries.
     // Most draw directly to the framebuffer, but some use inputs
     // from the cache shaders to draw. Specifically, the box
     // shadow primitive shader stretches the box shadow cache
     // output, and the cache_image shader blits the results of
     // a cache shader (e.g. blur) to the screen.
     ps_rectangle: PrimitiveShader,
     ps_rectangle_clip: PrimitiveShader,
-    ps_text_run: PrimitiveShader,
-    ps_text_run_subpx_bg_pass1: PrimitiveShader,
+    ps_text_run: TextShader,
+    ps_text_run_subpx_bg_pass1: TextShader,
     ps_image: Vec<Option<PrimitiveShader>>,
     ps_yuv_image: Vec<Option<PrimitiveShader>>,
     ps_border_corner: PrimitiveShader,
     ps_border_edge: PrimitiveShader,
     ps_gradient: PrimitiveShader,
     ps_angle_gradient: PrimitiveShader,
     ps_radial_gradient: PrimitiveShader,
     ps_line: PrimitiveShader,
@@ -1258,22 +1356,20 @@ pub struct Renderer<'a> {
     ps_blend: LazilyCompiledShader,
     ps_hw_composite: LazilyCompiledShader,
     ps_split_composite: LazilyCompiledShader,
     ps_composite: LazilyCompiledShader,
 
     max_texture_size: u32,
 
     max_recorded_profiles: usize,
-    clear_framebuffer: bool,
-    clear_color: ColorF,
+    clear_color: Option<ColorF>,
     enable_clear_scissor: bool,
     debug: DebugRenderer,
     debug_flags: DebugFlags,
-    enable_batcher: bool,
     backend_profile_counters: BackendProfileCounters,
     profile_counters: RendererProfileCounters,
     profiler: Profiler,
     last_time: u64,
 
     color_render_targets: Vec<Texture>,
     alpha_render_targets: Vec<Texture>,
 
@@ -1330,17 +1426,17 @@ impl From<ShaderError> for RendererError
 }
 
 impl From<std::io::Error> for RendererError {
     fn from(err: std::io::Error) -> Self {
         RendererError::Thread(err)
     }
 }
 
-impl<'a> Renderer<'a> {
+impl Renderer {
     /// Initializes webrender and creates a `Renderer` and `RenderApiSender`.
     ///
     /// # Examples
     /// Initializes a `Renderer` with some reasonable values. For more information see
     /// [`RendererOptions`][rendereroptions].
     ///
     /// ```rust,ignore
     /// # use webrender::renderer::Renderer;
@@ -1394,18 +1490,17 @@ impl<'a> Renderer<'a> {
                 device_max_size,
                 options.max_texture_size.unwrap_or(device_max_size),
             ),
             min_texture_size,
         );
 
         register_thread_with_profiler("Compositor".to_owned());
 
-        // device-pixel ratio doesn't matter here - we are just creating resources.
-        device.begin_frame(1.0);
+        device.begin_frame();
 
         let cs_text_run = try!{
             LazilyCompiledShader::new(ShaderKind::Cache(VertexArrayKind::Primitive),
                                       "cs_text_run",
                                       &[],
                                       &mut device,
                                       options.precache_shaders)
         };
@@ -1505,27 +1600,27 @@ impl<'a> Renderer<'a> {
         let ps_line = try!{
             PrimitiveShader::new("ps_line",
                                  &mut device,
                                  &[],
                                  options.precache_shaders)
         };
 
         let ps_text_run = try!{
-            PrimitiveShader::new("ps_text_run",
-                                 &mut device,
-                                 &[],
-                                 options.precache_shaders)
+            TextShader::new("ps_text_run",
+                            &mut device,
+                            &[],
+                           options.precache_shaders)
         };
 
         let ps_text_run_subpx_bg_pass1 = try!{
-            PrimitiveShader::new("ps_text_run",
-                                 &mut device,
-                                 &["SUBPX_BG_PASS1"],
-                                 options.precache_shaders)
+            TextShader::new("ps_text_run",
+                            &mut device,
+                            &["SUBPX_BG_PASS1"],
+                            options.precache_shaders)
         };
 
         // All image configuration.
         let mut image_features = Vec::new();
         let mut ps_image: Vec<Option<PrimitiveShader>> = Vec::new();
         // PrimitiveShader is not clonable. Use push() to initialize the vec.
         for _ in 0 .. IMAGE_BUFFER_KINDS.len() {
             ps_image.push(None);
@@ -1747,17 +1842,17 @@ impl<'a> Renderer<'a> {
 
             let mut texture = device.create_texture(TextureTarget::Default);
             device.init_texture(
                 &mut texture,
                 8,
                 8,
                 ImageFormat::A8,
                 TextureFilter::Nearest,
-                RenderTargetMode::None,
+                None,
                 1,
                 Some(&dither_matrix),
             );
 
             Some(texture)
         } else {
             None
         };
@@ -1804,17 +1899,19 @@ impl<'a> Renderer<'a> {
 
         let config = FrameBuilderConfig {
             enable_scrollbars: options.enable_scrollbars,
             default_font_render_mode,
             debug: options.debug,
         };
 
         let device_pixel_ratio = options.device_pixel_ratio;
-        let debug_flags = options.debug_flags;
+        // First set the flags to default and later call set_debug_flags to ensure any
+        // potential transition when enabling a flag is run.
+        let debug_flags = DebugFlags::default();
         let payload_tx_for_backend = payload_tx.clone();
         let recorder = options.recorder;
         let thread_listener = Arc::new(options.thread_listener);
         let thread_listener_for_rayon_start = thread_listener.clone();
         let thread_listener_for_rayon_end = thread_listener.clone();
         let workers = options
             .workers
             .take()
@@ -1840,43 +1937,45 @@ impl<'a> Renderer<'a> {
         let thread_listener_for_render_backend = thread_listener.clone();
         let thread_name = format!("WRRenderBackend#{}", options.renderer_id.unwrap_or(0));
         try!{
             thread::Builder::new().name(thread_name.clone()).spawn(move || {
                 register_thread_with_profiler(thread_name.clone());
                 if let Some(ref thread_listener) = *thread_listener_for_render_backend {
                     thread_listener.thread_started(&thread_name);
                 }
-                let mut backend = RenderBackend::new(api_rx,
-                                                     payload_rx,
-                                                     payload_tx_for_backend,
-                                                     result_tx,
-                                                     device_pixel_ratio,
-                                                     texture_cache,
-                                                     workers,
-                                                     backend_notifier,
-                                                     config,
-                                                     recorder,
-                                                     blob_image_renderer,
-                                                     enable_render_on_scroll);
+                let mut backend = RenderBackend::new(
+                    api_rx,
+                    payload_rx,
+                    payload_tx_for_backend,
+                    result_tx,
+                    device_pixel_ratio,
+                    texture_cache,
+                    workers,
+                    backend_notifier,
+                    config,
+                    recorder,
+                    blob_image_renderer,
+                    enable_render_on_scroll,
+                );
                 backend.run(backend_profile_counters);
                 if let Some(ref thread_listener) = *thread_listener_for_render_backend {
                     thread_listener.thread_stopped(&thread_name);
                 }
             })
         };
 
         let gpu_cache_texture = CacheTexture::new(&mut device);
         let gpu_profile = GpuProfiler::new(Rc::clone(device.rc_gl()));
 
-        let renderer = Renderer {
+        let mut renderer = Renderer {
             result_rx,
             debug_server,
             device,
-            current_frame: None,
+            active_documents: Vec::new(),
             pending_texture_updates: Vec::new(),
             pending_gpu_cache_updates: Vec::new(),
             pending_shader_updates: Vec::new(),
             cs_text_run,
             cs_line,
             cs_blur_a8,
             cs_blur_rgba8,
             brush_mask_corner,
@@ -1899,23 +1998,21 @@ impl<'a> Renderer<'a> {
             ps_radial_gradient,
             ps_blend,
             ps_hw_composite,
             ps_split_composite,
             ps_composite,
             ps_line,
             debug: debug_renderer,
             debug_flags,
-            enable_batcher: options.enable_batcher,
             backend_profile_counters: BackendProfileCounters::new(),
             profile_counters: RendererProfileCounters::new(),
             profiler: Profiler::new(),
             max_texture_size: max_texture_size,
             max_recorded_profiles: options.max_recorded_profiles,
-            clear_framebuffer: options.clear_framebuffer,
             clear_color: options.clear_color,
             enable_clear_scissor: options.enable_clear_scissor,
             last_time: 0,
             color_render_targets: Vec::new(),
             alpha_render_targets: Vec::new(),
             gpu_profile,
             prim_vao,
             blur_vao,
@@ -1930,16 +2027,18 @@ impl<'a> Renderer<'a> {
             cpu_profiles: VecDeque::new(),
             gpu_profiles: VecDeque::new(),
             gpu_cache_texture,
             texture_cache_upload_pbo,
             texture_resolver,
             renderer_errors: Vec::new(),
         };
 
+        renderer.set_debug_flags(options.debug_flags);
+
         let sender = RenderApiSender::new(api_tx, payload_tx);
         Ok((renderer, sender))
     }
 
     pub fn get_max_texture_size(&self) -> u32 {
         self.max_texture_size
     }
 
@@ -1968,66 +2067,65 @@ impl<'a> Renderer<'a> {
     /// Returns a HashMap containing the pipeline ids that have been received by the renderer and
     /// their respective epochs since the last time the method was called.
     pub fn flush_rendered_epochs(&mut self) -> FastHashMap<PipelineId, Epoch> {
         mem::replace(&mut self.pipeline_epoch_map, FastHashMap::default())
     }
 
     // update the program cache with new binaries, e.g. when some of the lazy loaded
     // shader programs got activated in the mean time
-    pub fn update_program_cache(&mut self, cached_programs: &'a mut ProgramCache) {
+    pub fn update_program_cache(&mut self, cached_programs: Rc<ProgramCache>) {
         self.device.update_program_cache(cached_programs);
     }
 
     /// Processes the result queue.
     ///
     /// Should be called before `render()`, as texture cache updates are done here.
     pub fn update(&mut self) {
         profile_scope!("update");
 
         // Pull any pending results and return the most recent.
         while let Ok(msg) = self.result_rx.try_recv() {
             match msg {
-                ResultMsg::NewFrame(
-                    _document_id,
-                    mut frame,
+                ResultMsg::PublishDocument(
+                    document_id,
+                    mut doc,
                     texture_update_list,
                     profile_counters,
                 ) => {
                     //TODO: associate `document_id` with target window
                     self.pending_texture_updates.push(texture_update_list);
-                    if let Some(ref mut frame) = frame.frame {
-                        // TODO(gw): This whole message / Frame / RendererFrame stuff
-                        //           is really messy and needs to be refactored!!
-                        if let Some(update_list) = frame.gpu_cache_updates.take() {
-                            self.pending_gpu_cache_updates.push(update_list);
-                        }
-                    }
+                    self.pending_gpu_cache_updates.extend(doc.frame.gpu_cache_updates.take());
                     self.backend_profile_counters = profile_counters;
 
                     // Update the list of available epochs for use during reftests.
                     // This is a workaround for https://github.com/servo/servo/issues/13149.
-                    for (pipeline_id, epoch) in &frame.pipeline_epoch_map {
+                    for (pipeline_id, epoch) in &doc.pipeline_epoch_map {
                         self.pipeline_epoch_map.insert(*pipeline_id, *epoch);
                     }
 
-                    self.current_frame = Some(frame);
+                    // Add a new document to the active set, expressed as a `Vec` in order
+                    // to re-order based on `DocumentLayer` during rendering.
+                    match self.active_documents.iter().position(|&(id, _)| id == document_id) {
+                        Some(pos) => self.active_documents[pos].1 = doc,
+                        None => self.active_documents.push((document_id, doc)),
+                    }
                 }
                 ResultMsg::UpdateResources {
                     updates,
                     cancel_rendering,
                 } => {
                     self.pending_texture_updates.push(updates);
                     self.update_texture_cache();
-                    // If we receive a NewFrame message followed by this one within
-                    // the same update we need ot cancel the frame because we might
-                    // have deleted the resources in use in the frame dut to a memory
-                    // pressure event.
+                    // If we receive a `PublishDocument` message followed by this one
+                    // within the same update we need ot cancel the frame because we
+                    // might have deleted the resources in use in the frame due to a
+                    // memory pressure event.
                     if cancel_rendering {
-                        self.current_frame = None;
+                        self.active_documents.clear();
                     }
                 }
                 ResultMsg::RefreshShader(path) => {
                     self.pending_shader_updates.push(path);
                 }
                 ResultMsg::DebugOutput(output) => match output {
                     DebugOutput::FetchDocuments(string) |
                     DebugOutput::FetchClipScrollTree(string) => {
@@ -2044,176 +2142,167 @@ impl<'a> Renderer<'a> {
     #[cfg(not(feature = "debugger"))]
     fn get_passes_for_debugger(&self) -> String {
         // Avoid unused param warning.
         let _ = &self.debug_server;
         String::new()
     }
 
     #[cfg(feature = "debugger")]
+    fn debug_alpha_target(target: &AlphaRenderTarget) -> debug_server::Target {
+        let mut debug_target = debug_server::Target::new("A8");
+
+        debug_target.add(
+            debug_server::BatchKind::Clip,
+            "Clear",
+            target.clip_batcher.border_clears.len(),
+        );
+        debug_target.add(
+            debug_server::BatchKind::Clip,
+            "Borders",
+            target.clip_batcher.borders.len(),
+        );
+        debug_target.add(
+            debug_server::BatchKind::Cache,
+            "Vertical Blur",
+            target.vertical_blurs.len(),
+        );
+        debug_target.add(
+            debug_server::BatchKind::Cache,
+            "Horizontal Blur",
+            target.horizontal_blurs.len(),
+        );
+        debug_target.add(
+            debug_server::BatchKind::Clip,
+            "Rectangles",
+            target.clip_batcher.rectangles.len(),
+        );
+        debug_target.add(
+            debug_server::BatchKind::Cache,
+            "Rectangle Brush (Corner)",
+            target.brush_mask_corners.len(),
+        );
+        debug_target.add(
+            debug_server::BatchKind::Cache,
+            "Rectangle Brush (Rounded Rect)",
+            target.brush_mask_rounded_rects.len(),
+        );
+        for (_, items) in target.clip_batcher.images.iter() {
+            debug_target.add(debug_server::BatchKind::Clip, "Image mask", items.len());
+        }
+
+        debug_target
+    }
+
+    #[cfg(feature = "debugger")]
+    fn debug_color_target(target: &ColorRenderTarget) -> debug_server::Target {
+        let mut debug_target = debug_server::Target::new("RGBA8");
+
+        debug_target.add(
+            debug_server::BatchKind::Cache,
+            "Vertical Blur",
+            target.vertical_blurs.len(),
+        );
+        debug_target.add(
+            debug_server::BatchKind::Cache,
+            "Horizontal Blur",
+            target.horizontal_blurs.len(),
+        );
+        for (_, batch) in &target.text_run_cache_prims {
+            debug_target.add(
+                debug_server::BatchKind::Cache,
+                "Text Shadow",
+                batch.len(),
+            );
+        }
+        debug_target.add(
+            debug_server::BatchKind::Cache,
+            "Lines",
+            target.line_cache_prims.len(),
+        );
+
+        for batch in target
+            .alpha_batcher
+            .batch_list
+            .opaque_batch_list
+            .batches
+            .iter()
+            .rev()
+        {
+            debug_target.add(
+                debug_server::BatchKind::Opaque,
+                batch.key.kind.debug_name(),
+                batch.instances.len(),
+            );
+        }
+
+        for batch in &target.alpha_batcher.batch_list.alpha_batch_list.batches {
+            debug_target.add(
+                debug_server::BatchKind::Alpha,
+                batch.key.kind.debug_name(),
+                batch.instances.len(),
+            );
+        }
+
+        debug_target
+    }
+
+    #[cfg(feature = "debugger")]
     fn get_passes_for_debugger(&self) -> String {
         let mut debug_passes = debug_server::PassList::new();
 
-        if let Some(frame) = self.current_frame
-            .as_ref()
-            .and_then(|frame| frame.frame.as_ref())
-        {
-            for pass in &frame.passes {
-                let mut debug_pass = debug_server::Pass::new();
-
-                for target in &pass.alpha_targets.targets {
-                    let mut debug_target = debug_server::Target::new("A8");
-
-                    debug_target.add(
-                        debug_server::BatchKind::Clip,
-                        "Clear",
-                        target.clip_batcher.border_clears.len(),
-                    );
-                    debug_target.add(
-                        debug_server::BatchKind::Clip,
-                        "Borders",
-                        target.clip_batcher.borders.len(),
-                    );
-                    debug_target.add(
-                        debug_server::BatchKind::Cache,
-                        "Vertical Blur",
-                        target.vertical_blurs.len(),
-                    );
-                    debug_target.add(
-                        debug_server::BatchKind::Cache,
-                        "Horizontal Blur",
-                        target.horizontal_blurs.len(),
-                    );
-                    debug_target.add(
-                        debug_server::BatchKind::Clip,
-                        "Rectangles",
-                        target.clip_batcher.rectangles.len(),
-                    );
-                    debug_target.add(
-                        debug_server::BatchKind::Cache,
-                        "Rectangle Brush (Corner)",
-                        target.brush_mask_corners.len(),
-                    );
-                    debug_target.add(
-                        debug_server::BatchKind::Cache,
-                        "Rectangle Brush (Rounded Rect)",
-                        target.brush_mask_rounded_rects.len(),
-                    );
-                    for (_, items) in target.clip_batcher.images.iter() {
-                        debug_target.add(debug_server::BatchKind::Clip, "Image mask", items.len());
+        for &(_, ref render_doc) in &self.active_documents {
+            for pass in &render_doc.frame.passes {
+                let mut debug_targets = Vec::new();
+                match pass.kind {
+                    RenderPassKind::MainFramebuffer(ref target) => {
+                        debug_targets.push(Self::debug_color_target(target));
                     }
-
-                    debug_pass.add(debug_target);
+                    RenderPassKind::OffScreen { ref alpha, ref color } => {
+                        debug_targets.extend(alpha.targets.iter().map(Self::debug_alpha_target));
+                        debug_targets.extend(color.targets.iter().map(Self::debug_color_target));
+                    }
                 }
 
-                for target in &pass.color_targets.targets {
-                    let mut debug_target = debug_server::Target::new("RGBA8");
-
-                    debug_target.add(
-                        debug_server::BatchKind::Cache,
-                        "Vertical Blur",
-                        target.vertical_blurs.len(),
-                    );
-                    debug_target.add(
-                        debug_server::BatchKind::Cache,
-                        "Horizontal Blur",
-                        target.horizontal_blurs.len(),
-                    );
-                    for (_, batch) in &target.text_run_cache_prims {
-                        debug_target.add(
-                            debug_server::BatchKind::Cache,
-                            "Text Shadow",
-                            batch.len(),
-                        );
-                    }
-                    debug_target.add(
-                        debug_server::BatchKind::Cache,
-                        "Lines",
-                        target.line_cache_prims.len(),
-                    );
-
-                    for batch in target
-                        .alpha_batcher
-                        .batch_list
-                        .opaque_batch_list
-                        .batches
-                        .iter()
-                        .rev()
-                    {
-                        debug_target.add(
-                            debug_server::BatchKind::Opaque,
-                            batch.key.kind.debug_name(),
-                            batch.instances.len(),
-                        );
-                    }
-
-                    for batch in &target.alpha_batcher.batch_list.alpha_batch_list.batches {
-                        debug_target.add(
-                            debug_server::BatchKind::Alpha,
-                            batch.key.kind.debug_name(),
-                            batch.instances.len(),
-                        );
-                    }
-
-                    debug_pass.add(debug_target);
-                }
-
-                debug_passes.add(debug_pass);
+                debug_passes.add(debug_server::Pass { targets: debug_targets });
             }
         }
 
         serde_json::to_string(&debug_passes).unwrap()
     }
 
     fn handle_debug_command(&mut self, command: DebugCommand) {
         match command {
-            DebugCommand::EnableProfiler(enable) => if enable {
-                self.debug_flags.insert(DebugFlags::PROFILER_DBG);
-            } else {
-                self.debug_flags.remove(DebugFlags::PROFILER_DBG);
-            },
-            DebugCommand::EnableTextureCacheDebug(enable) => if enable {
-                self.debug_flags.insert(DebugFlags::TEXTURE_CACHE_DBG);
-            } else {
-                self.debug_flags.remove(DebugFlags::TEXTURE_CACHE_DBG);
-            },
-            DebugCommand::EnableRenderTargetDebug(enable) => if enable {
-                self.debug_flags.insert(DebugFlags::RENDER_TARGET_DBG);
-            } else {
-                self.debug_flags.remove(DebugFlags::RENDER_TARGET_DBG);
-            },
-            DebugCommand::EnableAlphaRectsDebug(enable) => if enable {
-                self.debug_flags.insert(DebugFlags::ALPHA_PRIM_DBG);
-            } else {
-                self.debug_flags.remove(DebugFlags::ALPHA_PRIM_DBG);
-            },
-            DebugCommand::EnableGpuTimeQueries(enable) => if enable {
-                self.gpu_profile.enable_timers();
-            } else {
-                self.gpu_profile.disable_timers();
-            },
-            DebugCommand::EnableGpuSampleQueries(enable) => if enable {
-                self.gpu_profile.enable_samplers();
-            } else {
-                self.gpu_profile.disable_samplers();
-            },
+            DebugCommand::EnableProfiler(enable) => {
+                self.set_debug_flag(DebugFlags::PROFILER_DBG, enable);
+            }
+            DebugCommand::EnableTextureCacheDebug(enable) => {
+                self.set_debug_flag(DebugFlags::TEXTURE_CACHE_DBG, enable);
+            }
+            DebugCommand::EnableRenderTargetDebug(enable) => {
+                self.set_debug_flag(DebugFlags::RENDER_TARGET_DBG, enable);
+            }
+            DebugCommand::EnableAlphaRectsDebug(enable) => {
+                self.set_debug_flag(DebugFlags::ALPHA_PRIM_DBG, enable);
+            }
+            DebugCommand::EnableGpuTimeQueries(enable) => {
+                self.set_debug_flag(DebugFlags::GPU_TIME_QUERIES, enable);
+            }
+            DebugCommand::EnableGpuSampleQueries(enable) => {
+                self.set_debug_flag(DebugFlags::GPU_SAMPLE_QUERIES, enable);
+            }
             DebugCommand::FetchDocuments => {}
             DebugCommand::FetchClipScrollTree => {}
             DebugCommand::FetchPasses => {
                 let json = self.get_passes_for_debugger();
                 self.debug_server.send(json);
             }
         }
     }
 
-    pub fn toggle_queries_enabled(&mut self) {
-        self.gpu_profile.toggle_timers_enabled();
-        self.gpu_profile.toggle_samplers_enabled();
-    }
-
     /// Set a callback for handling external images.
     pub fn set_external_image_handler(&mut self, handler: Box<ExternalImageHandler>) {
         self.external_image_handler = Some(handler);
     }
 
     /// Set a callback for handling external outputs.
     pub fn set_output_image_handler(&mut self, handler: Box<OutputImageHandler>) {
         self.output_image_handler = Some(handler);
@@ -2228,133 +2317,152 @@ impl<'a> Renderer<'a> {
 
     /// Renders the current frame.
     ///
     /// A Frame is supplied by calling [`generate_frame()`][genframe].
     /// [genframe]: ../../webrender_api/struct.DocumentApi.html#method.generate_frame
     pub fn render(&mut self, framebuffer_size: DeviceUintSize) -> Result<(), Vec<RendererError>> {
         profile_scope!("render");
 
-        if let Some(mut frame) = self.current_frame.take() {
-            if let Some(ref mut frame) = frame.frame {
-                let mut profile_timers = RendererProfileTimers::new();
-                let profile_samplers = {
-                    let _gm = self.gpu_profile.start_marker("build samples");
-                    // Block CPU waiting for last frame's GPU profiles to arrive.
-                    // In general this shouldn't block unless heavily GPU limited.
-                    let (gpu_frame_id, timers, samplers) = self.gpu_profile.build_samples();
-
-                    if self.max_recorded_profiles > 0 {
-                        while self.gpu_profiles.len() >= self.max_recorded_profiles {
-                            self.gpu_profiles.pop_front();
-                        }
-                        self.gpu_profiles
-                            .push_back(GpuProfile::new(gpu_frame_id, &timers));
-                    }
-                    profile_timers.gpu_samples = timers;
-                    samplers
-                };
-
-                let cpu_frame_id = profile_timers.cpu_time.profile(|| {
-                    let cpu_frame_id = {
-                        let _gm = self.gpu_profile.start_marker("begin frame");
-                        let frame_id = self.device.begin_frame(frame.device_pixel_ratio);
-                        self.gpu_profile.begin_frame(frame_id);
-
-                        self.device.disable_scissor();
-                        self.device.disable_depth();
-                        self.device.set_blend(false);
-                        //self.update_shaders();
-
-                        self.update_texture_cache();
-
-                        self.update_gpu_cache(frame);
-
-                        self.device.bind_texture(
-                            TextureSampler::ResourceCache,
-                            &self.gpu_cache_texture.texture,
-                        );
-
-                        frame_id
-                    };
-
-                    self.draw_tile_frame(frame, framebuffer_size, cpu_frame_id);
-
-                    self.gpu_profile.end_frame();
-                    cpu_frame_id
+        if self.active_documents.is_empty() {
+            self.last_time = precise_time_ns();
+            return Ok(())
+        }
+
+        let mut frame_profiles = Vec::new();
+        let mut profile_timers = RendererProfileTimers::new();
+
+        let profile_samplers = {
+            let _gm = self.gpu_profile.start_marker("build samples");
+            // Block CPU waiting for last frame's GPU profiles to arrive.
+            // In general this shouldn't block unless heavily GPU limited.
+            let (gpu_frame_id, timers, samplers) = self.gpu_profile.build_samples();
+
+            if self.max_recorded_profiles > 0 {
+                while self.gpu_profiles.len() >= self.max_recorded_profiles {
+                    self.gpu_profiles.pop_front();
+                }
+                self.gpu_profiles
+                    .push_back(GpuProfile::new(gpu_frame_id, &timers));
+            }
+            profile_timers.gpu_samples = timers;
+            samplers
+        };
+
+
+        let cpu_frame_id = profile_timers.cpu_time.profile(|| {
+            let _gm = self.gpu_profile.start_marker("begin frame");
+            let frame_id = self.device.begin_frame();
+            self.gpu_profile.begin_frame(frame_id);
+
+            self.device.disable_scissor();
+            self.device.disable_depth();
+            self.device.set_blend(false);
+            //self.update_shaders();
+
+            self.update_texture_cache();
+
+            self.device.bind_texture(
+                TextureSampler::ResourceCache,
+                &self.gpu_cache_texture.texture,
+            );
+
+            frame_id
+        });
+
+        profile_timers.cpu_time.profile(|| {
+            //Note: another borrowck dance
+            let mut active_documents = mem::replace(&mut self.active_documents, Vec::default());
+            // sort by the document layer id
+            active_documents.sort_by_key(|&(_, ref render_doc)| render_doc.frame.layer);
+
+            let needs_clear = !active_documents
+                .iter()
+                .any(|&(_, RenderedDocument { ref frame, .. })| {
+                    frame.background_color.is_some() &&
+                    frame.inner_rect.origin == DeviceUintPoint::zero() &&
+                    frame.inner_rect.size == framebuffer_size
                 });
-
-                let current_time = precise_time_ns();
-                let ns = current_time - self.last_time;
-                self.profile_counters.frame_time.set(ns);
-
-                if self.max_recorded_profiles > 0 {
-                    while self.cpu_profiles.len() >= self.max_recorded_profiles {
-                        self.cpu_profiles.pop_front();
-                    }
-                    let cpu_profile = CpuProfile::new(
-                        cpu_frame_id,
-                        self.backend_profile_counters.total_time.get(),
-                        profile_timers.cpu_time.get(),
-                        self.profile_counters.draw_calls.get(),
-                    );
-                    self.cpu_profiles.push_back(cpu_profile);
-                }
+            // don't clear the framebuffer if one of the rendered documents will overwrite it
+            if needs_clear {
+                let clear_color = self.clear_color.map(|color| color.to_array());
+                self.device.bind_draw_target(None, None);
+                self.device.clear_target(clear_color, None);
+            }
+
+            for &mut (_, RenderedDocument { ref mut frame, .. }) in &mut active_documents {
+                self.update_gpu_cache(frame);
+
+                self.draw_tile_frame(frame, framebuffer_size, cpu_frame_id);
 
                 if self.debug_flags.contains(DebugFlags::PROFILER_DBG) {
-                    let _gm = self.gpu_profile.start_marker("profile");
-                    let screen_fraction = 1.0 / //TODO: take device/pixel ratio into equation?
-                        (framebuffer_size.width as f32 * framebuffer_size.height as f32);
-                    self.profiler.draw_profile(
-                        &frame.profile_counters,
-                        &self.backend_profile_counters,
-                        &self.profile_counters,
-                        &mut profile_timers,
-                        &profile_samplers,
-                        screen_fraction,
-                        &mut self.debug,
-                    );
+                    frame_profiles.push(frame.profile_counters.clone());
                 }
-
-                self.profile_counters.reset();
-                self.profile_counters.frame_counter.inc();
-
-                {
-                    let _gm = self.gpu_profile.start_marker("debug");
-                    let debug_size = DeviceUintSize::new(
-                        framebuffer_size.width as u32,
-                        framebuffer_size.height as u32,
-                    );
-                    self.debug.render(&mut self.device, &debug_size);
-                }
-                {
-                    let _gm = self.gpu_profile.start_marker("end frame");
-                    self.device.end_frame();
-                }
-                self.last_time = current_time;
+            }
+
+            self.unlock_external_images();
+            self.active_documents = active_documents;
+        });
+
+        let current_time = precise_time_ns();
+        let ns = current_time - self.last_time;
+        self.profile_counters.frame_time.set(ns);
+
+        if self.max_recorded_profiles > 0 {
+            while self.cpu_profiles.len() >= self.max_recorded_profiles {
+                self.cpu_profiles.pop_front();
             }
-
-            // Restore frame - avoid borrow checker!
-            self.current_frame = Some(frame);
+            let cpu_profile = CpuProfile::new(
+                cpu_frame_id,
+                self.backend_profile_counters.total_time.get(),
+                profile_timers.cpu_time.get(),
+                self.profile_counters.draw_calls.get(),
+            );
+            self.cpu_profiles.push_back(cpu_profile);
         }
-        if !self.renderer_errors.is_empty() {
-            let errors = mem::replace(&mut self.renderer_errors, Vec::new());
-            return Err(errors);
+
+        if self.debug_flags.contains(DebugFlags::PROFILER_DBG) {
+            //TODO: take device/pixel ratio into equation?
+            let screen_fraction = 1.0 / framebuffer_size.to_f32().area();
+            self.profiler.draw_profile(
+                &frame_profiles,
+                &self.backend_profile_counters,
+                &self.profile_counters,
+                &mut profile_timers,
+                &profile_samplers,
+                screen_fraction,
+                &mut self.debug,
+            );
         }
-        Ok(())
+
+        self.profile_counters.reset();
+        self.profile_counters.frame_counter.inc();
+
+        self.debug.render(&mut self.device, &framebuffer_size);
+        profile_timers.cpu_time.profile(|| {
+            let _gm = self.gpu_profile.start_marker("end frame");
+            self.gpu_profile.end_frame();
+            self.device.end_frame();
+        });
+        self.last_time = current_time;
+
+        if self.renderer_errors.is_empty() {
+            Ok(())
+        } else {
+            Err(mem::replace(&mut self.renderer_errors, Vec::new()))
+        }
     }
 
     pub fn layers_are_bouncing_back(&self) -> bool {
-        match self.current_frame {
-            None => false,
-            Some(ref current_frame) => !current_frame.layers_bouncing_back.is_empty(),
-        }
+        self.active_documents
+            .iter()
+            .any(|&(_, ref render_doc)| !render_doc.layers_bouncing_back.is_empty())
     }
 
-    fn update_gpu_cache(&mut self, frame: &mut Frame) {
+    fn update_gpu_cache(&mut self, frame: &Frame) {
         let _gm = self.gpu_profile.start_marker("gpu cache update");
         for update_list in self.pending_gpu_cache_updates.drain(..) {
             self.gpu_cache_texture
                 .update(&mut self.device, &update_list);
         }
         self.update_deferred_resolves(frame);
         self.gpu_cache_texture.flush(&mut self.device);
     }
@@ -2367,17 +2475,17 @@ impl<'a> Renderer<'a> {
             for update in update_list.updates {
                 match update.op {
                     TextureUpdateOp::Create {
                         width,
                         height,
                         layer_count,
                         format,
                         filter,
-                        mode,
+                        render_target,
                     } => {
                         let CacheTextureId(cache_texture_index) = update.id;
                         if self.texture_resolver.cache_texture_map.len() == cache_texture_index {
                             // Create a new native texture, as requested by the texture cache.
                             let texture = self.device.create_texture(TextureTarget::Array);
                             self.texture_resolver.cache_texture_map.push(texture);
                         }
                         let texture =
@@ -2386,17 +2494,17 @@ impl<'a> Renderer<'a> {
                         // Ensure no PBO is bound when creating the texture storage,
                         // or GL will attempt to read data from there.
                         self.device.init_texture(
                             texture,
                             width,
                             height,
                             format,
                             filter,
-                            mode,
+                            render_target,
                             layer_count,
                             None,
                         );
                     }
                     TextureUpdateOp::Update {
                         rect,
                         source,
                         stride,
@@ -2482,17 +2590,19 @@ impl<'a> Renderer<'a> {
         let vao = match vertex_array_kind {
             VertexArrayKind::Primitive => &self.prim_vao,
             VertexArrayKind::Clip => &self.clip_vao,
             VertexArrayKind::Blur => &self.blur_vao,
         };
 
         self.device.bind_vao(vao);
 
-        if self.enable_batcher {
+        let batched = !self.debug_flags.contains(DebugFlags::DISABLE_BATCHING);
+
+        if batched {
             self.device
                 .update_vao_instances(vao, data, VertexUsageHint::Stream);
             self.device
                 .draw_indexed_triangles_instanced_u16(6, data.len() as i32);
             self.profile_counters.draw_calls.inc();
         } else {
             for i in 0 .. data.len() {
                 self.device
@@ -2507,17 +2617,17 @@ impl<'a> Renderer<'a> {
 
     fn submit_batch(
         &mut self,
         key: &BatchKey,
         instances: &[PrimitiveInstance],
         projection: &Transform3D<f32>,
         render_tasks: &RenderTaskTree,
         render_target: Option<(&Texture, i32)>,
-        target_dimensions: DeviceUintSize,
+        framebuffer_size: DeviceUintSize,
     ) {
         match key.kind {
             BatchKind::Composite { .. } => {
                 self.ps_composite.bind(&mut self.device, projection, 0, &mut self.renderer_errors);
             }
             BatchKind::HardwareComposite => {
                 self.ps_hw_composite
                     .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
@@ -2663,158 +2773,165 @@ impl<'a> Renderer<'a> {
                         0,
                         &mut self.renderer_errors,
                     );
                 }
             },
         };
 
         // Handle special case readback for composites.
-        match key.kind {
-            BatchKind::Composite {
-                task_id,
-                source_id,
-                backdrop_id,
-            } => {
-                // composites can't be grouped together because
-                // they may overlap and affect each other.
-                debug_assert!(instances.len() == 1);
-                let cache_texture = self.texture_resolver
-                    .resolve(&SourceTexture::CacheRGBA8)
-                    .unwrap();
-
-                // Before submitting the composite batch, do the
-                // framebuffer readbacks that are needed for each
-                // composite operation in this batch.
-                let cache_texture_dimensions = cache_texture.get_dimensions();
-
-                let source = render_tasks.get(source_id);
-                let backdrop = render_tasks.get(task_id);
-                let readback = render_tasks.get(backdrop_id);
-
-                let (readback_rect, readback_layer) = readback.get_target_rect();
-                let (backdrop_rect, _) = backdrop.get_target_rect();
-                let backdrop_screen_origin = match backdrop.kind {
-                    RenderTaskKind::Picture(ref task_info) => task_info.content_origin,
-                    _ => panic!("bug: composite on non-picture?"),
-                };
-                let source_screen_origin = match source.kind {
-                    RenderTaskKind::Picture(ref task_info) => task_info.content_origin,
-                    _ => panic!("bug: composite on non-picture?"),
-                };
-
-                // Bind the FBO to blit the backdrop to.
-                // Called per-instance in case the layer (and therefore FBO)
-                // changes. The device will skip the GL call if the requested
-                // target is already bound.
-                let cache_draw_target = (cache_texture, readback_layer.0 as i32);
-                self.device
-                    .bind_draw_target(Some(cache_draw_target), Some(cache_texture_dimensions));
-
-                let src_x = backdrop_rect.origin.x -
-                            backdrop_screen_origin.x as i32 +
-                            source_screen_origin.x as i32;
-                let src_y = backdrop_rect.origin.y -
-                            backdrop_screen_origin.y as i32 +
-                            source_screen_origin.y as i32;
-
-                let dest_x = readback_rect.origin.x;
-                let dest_y = readback_rect.origin.y;
-
-                let width = readback_rect.size.width;
-                let height = readback_rect.size.height;
-
-                let mut src = DeviceIntRect::new(
-                    DeviceIntPoint::new(src_x as i32, src_y as i32),
-                    DeviceIntSize::new(width as i32, height as i32),
-                );
-                let mut dest = DeviceIntRect::new(
-                    DeviceIntPoint::new(dest_x as i32, dest_y as i32),
-                    DeviceIntSize::new(width as i32, height as i32),
-                );
-
-                // Need to invert the y coordinates and flip the image vertically when
-                // reading back from the framebuffer.
-                if render_target.is_none() {
-                    src.origin.y = target_dimensions.height as i32 - src.size.height - src.origin.y;
-                    dest.origin.y += dest.size.height;
-                    dest.size.height = -dest.size.height;
-                }
-
-                self.device.bind_read_target(render_target);
-                self.device.blit_render_target(src, dest);
-
-                // Restore draw target to current pass render target + layer.
-                self.device
-                    .bind_draw_target(render_target, Some(target_dimensions));
+        if let BatchKind::Composite { task_id, source_id, backdrop_id } = key.kind {
+            // composites can't be grouped together because
+            // they may overlap and affect each other.
+            debug_assert_eq!(instances.len(), 1);
+            let cache_texture = self.texture_resolver
+                .resolve(&SourceTexture::CacheRGBA8)
+                .unwrap();
+
+            // Before submitting the composite batch, do the
+            // framebuffer readbacks that are needed for each
+            // composite operation in this batch.
+            let source = &render_tasks[source_id];
+            let backdrop = &render_tasks[task_id];
+            let readback = &render_tasks[backdrop_id];
+
+            let (readback_rect, readback_layer) = readback.get_target_rect();
+            let (backdrop_rect, _) = backdrop.get_target_rect();
+            let content_to_device_scale = ScaleFactor::<_, _, DevicePixel>::new(1i32);
+            let backdrop_screen_origin = match backdrop.kind {
+                RenderTaskKind::Picture(ref task_info) => task_info
+                    .content_origin
+                    .to_i32()
+                    * content_to_device_scale,
+                _ => panic!("bug: composite on non-picture?"),
+            };
+            let source_screen_origin = match source.kind {
+                RenderTaskKind::Picture(ref task_info) => task_info
+                    .content_origin
+                    .to_i32()
+                    * content_to_device_scale,
+                _ => panic!("bug: composite on non-picture?"),
+            };
+
+            // Bind the FBO to blit the backdrop to.
+            // Called per-instance in case the layer (and therefore FBO)
+            // changes. The device will skip the GL call if the requested
+            // target is already bound.
+            let cache_draw_target = (cache_texture, readback_layer.0 as i32);
+            self.device.bind_draw_target(Some(cache_draw_target), None);
+
+            let mut src = DeviceIntRect::new(
+                source_screen_origin + (backdrop_rect.origin - backdrop_screen_origin),
+                readback_rect.size,
+            );
+            let mut dest = readback_rect.to_i32();
+
+            // Need to invert the y coordinates and flip the image vertically when
+            // reading back from the framebuffer.
+            if render_target.is_none() {
+                src.origin.y = framebuffer_size.height as i32 - src.size.height - src.origin.y;
+                dest.origin.y += dest.size.height;
+                dest.size.height = -dest.size.height;
             }
-            _ => {}
+
+            self.device.bind_read_target(render_target);
+            self.device.blit_render_target(src, dest);
+
+            // Restore draw target to current pass render target + layer.
+            // Note: leaving the viewport unchanged, it's not a part of FBO state
+            self.device.bind_draw_target(render_target, None);
         }
 
         let _timer = self.gpu_profile.start_timer(key.kind.gpu_sampler_tag());
         self.draw_instanced_batch(instances, VertexArrayKind::Primitive, &key.textures);
     }
 
     fn handle_scaling(
         &mut self,
         render_tasks: &RenderTaskTree,
         scalings: &Vec<ScalingInfo>,
         source: SourceTexture,
     ) {
         let cache_texture = self.texture_resolver
             .resolve(&source)
             .unwrap();
         for scaling in scalings {
-            let source = render_tasks.get(scaling.src_task_id);
-            let dest = render_tasks.get(scaling.dest_task_id);
+            let source = &render_tasks[scaling.src_task_id];
+            let dest = &render_tasks[scaling.dest_task_id];
 
             let (source_rect, source_layer) = source.get_target_rect();
             let (dest_rect, _) = dest.get_target_rect();
 
             let cache_draw_target = (cache_texture, source_layer.0 as i32);
             self.device
                 .bind_read_target(Some(cache_draw_target));
 
             self.device.blit_render_target(source_rect, dest_rect);
         }
     }
 
     fn draw_color_target(
         &mut self,
         render_target: Option<(&Texture, i32)>,
         target: &ColorRenderTarget,
+        framebuffer_target_rect: DeviceUintRect,
         target_size: DeviceUintSize,
         clear_color: Option<[f32; 4]>,
         render_tasks: &RenderTaskTree,
         projection: &Transform3D<f32>,
         frame_id: FrameId,
     ) {
+        // sanity check for the depth buffer
+        if let Some((texture, _)) = render_target {
+            assert!(texture.has_depth() >= target.needs_depth());
+        }
+
         {
             let _timer = self.gpu_profile.start_timer(GPU_TAG_SETUP_TARGET);
             self.device
                 .bind_draw_target(render_target, Some(target_size));
             self.device.disable_depth();
-            self.device.enable_depth_write();
             self.device.set_blend(false);
-            match render_target {
-                Some(..) if self.enable_clear_scissor => {
+
+            let depth_clear = if target.needs_depth() {
+                self.device.enable_depth_write();
+                Some(1.0)
+            } else {
+                None
+            };
+
+            if render_target.is_some() {
+                if self.enable_clear_scissor {
                     // TODO(gw): Applying a scissor rect and minimal clear here
                     // is a very large performance win on the Intel and nVidia
                     // GPUs that I have tested with. It's possible it may be a
                     // performance penalty on other GPU types - we should test this
                     // and consider different code paths.
-                    self.device
-                        .clear_target_rect(clear_color, Some(1.0), target.used_rect());
+                    self.device.clear_target_rect(clear_color, depth_clear, target.used_rect());
+                } else {
+                    self.device.clear_target(clear_color, depth_clear);
                 }
-                _ => {
-                    self.device.clear_target(clear_color, Some(1.0));
-                }
+            } else if framebuffer_target_rect == DeviceUintRect::new(DeviceUintPoint::zero(), target_size) {
+                // whole screen is covered, no need for scissor
+                self.device.clear_target(clear_color, depth_clear);
+            } else {
+                // Note: for non-intersecting document rectangles,
+                // we can omit clearing the depth here, and instead
+                // just clear it for the whole framebuffer at start of the frame.
+                let mut clear_rect = framebuffer_target_rect.to_i32();
+                // Note: `framebuffer_target_rect` needs a Y-flip before going to GL
+                // Note: at this point, the target rectangle is not guaranteed to be within the main framebuffer bounds
+                // but `clear_target_rect` is totally fine with negative origin, as long as width & height are positive
+                clear_rect.origin.y = target_size.height as i32 - clear_rect.origin.y - clear_rect.size.height;
+                self.device.clear_target_rect(clear_color, depth_clear, clear_rect);
             }
 
-            self.device.disable_depth_write();
+            if depth_clear.is_some() {
+                self.device.disable_depth_write();
+            }
         }
 
         // Draw any blurs for this target.
         // Blurs are rendered as a standard 2-pass
         // separable implementation.
         // TODO(gw): In the future, consider having
         //           fast path blur shaders for common
         //           blur radii with fixed weights.
@@ -2883,45 +3000,48 @@ impl<'a> Renderer<'a> {
 
         //TODO: record the pixel count for cached primitives
 
         if !target.alpha_batcher.is_empty() {
             let _gl = self.gpu_profile.start_marker("alpha batches");
             self.device.set_blend(false);
             let mut prev_blend_mode = BlendMode::None;
 
-            let opaque_sampler = self.gpu_profile.start_sampler(GPU_SAMPLER_TAG_OPAQUE);
-
-            //Note: depth equality is needed for split planes
-            self.device.set_depth_func(DepthFunction::LessEqual);
-            self.device.enable_depth();
-            self.device.enable_depth_write();
-
-            // Draw opaque batches front-to-back for maximum
-            // z-buffer efficiency!
-            for batch in target
-                .alpha_batcher
-                .batch_list
-                .opaque_batch_list
-                .batches
-                .iter()
-                .rev()
-            {
-                self.submit_batch(
-                    &batch.key,
-                    &batch.instances,
-                    &projection,
-                    render_tasks,
-                    render_target,
-                    target_size,
-                );
+            if target.needs_depth() {
+                let opaque_sampler = self.gpu_profile.start_sampler(GPU_SAMPLER_TAG_OPAQUE);
+
+                //Note: depth equality is needed for split planes
+                self.device.set_depth_func(DepthFunction::LessEqual);
+                self.device.enable_depth();
+                self.device.enable_depth_write();
+
+                // Draw opaque batches front-to-back for maximum
+                // z-buffer efficiency!
+                for batch in target
+                    .alpha_batcher
+                    .batch_list
+                    .opaque_batch_list
+                    .batches
+                    .iter()
+                    .rev()
+                {
+                    self.submit_batch(
+                        &batch.key,
+                        &batch.instances,
+                        &projection,
+                        render_tasks,
+                        render_target,
+                        target_size,
+                    );
+                }
+
+                self.device.disable_depth_write();
+                self.gpu_profile.finish_sampler(opaque_sampler);
             }
 
-            self.device.disable_depth_write();
-            self.gpu_profile.finish_sampler(opaque_sampler);
             let transparent_sampler = self.gpu_profile.start_sampler(GPU_SAMPLER_TAG_TRANSPARENT);
 
             for batch in &target.alpha_batcher.batch_list.alpha_batch_list.batches {
                 if self.debug_flags.contains(DebugFlags::ALPHA_PRIM_DBG) {
                     let color = match batch.key.blend_mode {
                         BlendMode::None => debug_colors::BLACK,
                         BlendMode::PremultipliedAlpha => debug_colors::GREY,
                         BlendMode::PremultipliedDestOut => debug_colors::SALMON,
@@ -2948,16 +3068,17 @@ impl<'a> Renderer<'a> {
                         self.device.set_blend(true);
 
                         match batch.key.blend_mode {
                             BlendMode::PremultipliedAlpha => {
                                 self.device.set_blend_mode_premultiplied_alpha();
 
                                 self.ps_text_run.bind(
                                     &mut self.device,
+                                    glyph_format,
                                     transform_kind,
                                     projection,
                                     TextShaderMode::from(glyph_format),
                                     &mut self.renderer_errors,
                                 );
 
                                 self.draw_instanced_batch(
                                     &batch.instances,
@@ -2965,16 +3086,17 @@ impl<'a> Renderer<'a> {
                                     &batch.key.textures
                                 );
                             }
                             BlendMode::SubpixelConstantTextColor(color) => {
                                 self.device.set_blend_mode_subpixel_constant_text_color(color);
 
                                 self.ps_text_run.bind(
                                     &mut self.device,
+                                    glyph_format,
                                     transform_kind,
                                     projection,
                                     TextShaderMode::SubpixelConstantTextColor,
                                     &mut self.renderer_errors,
                                 );
 
                                 self.draw_instanced_batch(
                                     &batch.instances,
@@ -2986,32 +3108,34 @@ impl<'a> Renderer<'a> {
                                 // Using the two pass component alpha rendering technique:
                                 //
                                 // http://anholt.livejournal.com/32058.html
                                 //
                                 self.device.set_blend_mode_subpixel_pass0();
 
                                 self.ps_text_run.bind(
                                     &mut self.device,
+                                    glyph_format,
                                     transform_kind,
                                     projection,
                                     TextShaderMode::SubpixelPass0,
                                     &mut self.renderer_errors,
                                 );
 
                                 self.draw_instanced_batch(
                                     &batch.instances,
                                     VertexArrayKind::Primitive,
                                     &batch.key.textures
                                 );
 
                                 self.device.set_blend_mode_subpixel_pass1();
 
                                 self.ps_text_run.bind(
                                     &mut self.device,
+                                    glyph_format,
                                     transform_kind,
                                     projection,
                                     TextShaderMode::SubpixelPass1,
                                     &mut self.renderer_errors,
                                 );
 
                                 // When drawing the 2nd pass, we know that the VAO, textures etc
                                 // are all set up from the previous draw_instanced_batch call,
@@ -3025,32 +3149,34 @@ impl<'a> Renderer<'a> {
                                 // background color" rendering technique:
                                 //
                                 // /webrender/doc/text-rendering.md
                                 //
                                 self.device.set_blend_mode_subpixel_with_bg_color_pass0();
 
                                 self.ps_text_run.bind(
                                     &mut self.device,
+                                    glyph_format,
                                     transform_kind,
                                     projection,
                                     TextShaderMode::SubpixelWithBgColorPass0,
                                     &mut self.renderer_errors,
                                 );
 
                                 self.draw_instanced_batch(
                                     &batch.instances,
                                     VertexArrayKind::Primitive,
                                     &batch.key.textures
                                 );
 
                                 self.device.set_blend_mode_subpixel_with_bg_color_pass1();
 
                                 self.ps_text_run_subpx_bg_pass1.bind(
                                     &mut self.device,
+                                    glyph_format,
                                     transform_kind,
                                     projection,
                                     TextShaderMode::SubpixelWithBgColorPass1,
                                     &mut self.renderer_errors,
                                 );
 
                                 // When drawing the 2nd and 3rd passes, we know that the VAO, textures etc
                                 // are all set up from the previous draw_instanced_batch call,
@@ -3058,16 +3184,17 @@ impl<'a> Renderer<'a> {
                                 // instances and re-binding textures etc.
                                 self.device
                                     .draw_indexed_triangles_instanced_u16(6, batch.instances.len() as i32);
 
                                 self.device.set_blend_mode_subpixel_with_bg_color_pass2();
 
                                 self.ps_text_run.bind(
                                     &mut self.device,
+                                    glyph_format,
                                     transform_kind,
                                     projection,
                                     TextShaderMode::SubpixelWithBgColorPass2,
                                     &mut self.renderer_errors,
                                 );
 
                                 self.device
                                     .draw_indexed_triangles_instanced_u16(6, batch.instances.len() as i32);
@@ -3138,18 +3265,17 @@ impl<'a> Renderer<'a> {
                         fbo_id
                     }
                     Entry::Occupied(mut entry) => {
                         let target = entry.get_mut();
                         target.last_access = frame_id;
                         target.fbo_id
                     }
                 };
-                let task = render_tasks.get(output.task_id);
-                let (src_rect, _) = task.get_target_rect();
+                let (src_rect, _) = render_tasks[output.task_id].get_target_rect();
                 let dest_rect = DeviceIntRect::new(DeviceIntPoint::zero(), output_size);
                 device.bind_read_target(render_target);
                 device.bind_external_draw_target(fbo_id);
                 device.blit_render_target(src_rect, dest_rect);
                 handler.unlock(output.pipeline_id);
             }
         }
     }
@@ -3176,19 +3302,18 @@ impl<'a> Renderer<'a> {
             // GPUs that I have tested with. It's possible it may be a
             // performance penalty on other GPU types - we should test this
             // and consider different code paths.
             let clear_color = [1.0, 1.0, 1.0, 0.0];
             self.device
                 .clear_target_rect(Some(clear_color), None, target.used_rect());
 
             let zero_color = [0.0, 0.0, 0.0, 0.0];
-            for task_id in &target.zero_clears {
-                let task = render_tasks.get(*task_id);
-                let (rect, _) = task.get_target_rect();
+            for &task_id in &target.zero_clears {
+                let (rect, _) = render_tasks[task_id].get_target_rect();
                 self.device
                     .clear_target_rect(Some(zero_color), None, rect);
             }
         }
 
         // Draw any blurs for this target.
         // Blurs are rendered as a standard 2-pass
         // separable implementation.
@@ -3317,17 +3442,17 @@ impl<'a> Renderer<'a> {
                     .bind(&mut self.device, projection, 0, &mut self.renderer_errors);
                 self.draw_instanced_batch(items, VertexArrayKind::Clip, &textures);
             }
         }
 
         self.gpu_profile.finish_sampler(alpha_sampler);
     }
 
-    fn update_deferred_resolves(&mut self, frame: &mut Frame) {
+    fn update_deferred_resolves(&mut self, frame: &Frame) {
         // The first thing we do is run through any pending deferred
         // resolves, and use a callback to get the UV rect for this
         // custom item. Then we patch the resource_rects structure
         // here before it's uploaded to the GPU.
         if !frame.deferred_resolves.is_empty() {
             let handler = self.external_image_handler
                 .as_mut()
                 .expect("Found external image, but no handler set!");
@@ -3398,74 +3523,55 @@ impl<'a> Renderer<'a> {
                 .expect("Found external image, but no handler set!");
 
             for (ext_data, _) in self.texture_resolver.external_images.drain() {
                 handler.unlock(ext_data.0, ext_data.1);
             }
         }
     }
 
-    fn start_frame(&mut self, frame: &mut Frame) {
+    fn prepare_target_list<T: RenderTarget>(
+        list: &mut RenderTargetList<T>,
+        device: &mut Device,
+        target_pool: &mut Vec<Texture>,
+        format: ImageFormat,
+    ) {
+        debug_assert_ne!(list.max_size, DeviceUintSize::zero());
+        debug_assert!(list.texture.is_none());
+        if list.targets.is_empty() {
+            return;
+        }
+        let mut texture = match target_pool.pop() {
+            Some(texture) => texture,
+            None => device.create_texture(TextureTarget::Array),
+        };
+        device.init_texture(
+            &mut texture,
+            list.max_size.width,
+            list.max_size.height,
+            format,
+            TextureFilter::Linear,
+            Some(RenderTargetInfo {
+                has_depth: list.needs_depth(),
+            }),
+            list.targets.len() as _,
+            None,
+        );
+        list.texture = Some(texture);
+    }
+
+    fn prepare_frame(&mut self, frame: &mut Frame) {
         let _timer = self.gpu_profile.start_timer(GPU_TAG_SETUP_DATA);
-
-        // Assign render targets to the passes.
-        for pass in &mut frame.passes {
-            debug_assert!(pass.color_texture.is_none());
-            debug_assert!(pass.alpha_texture.is_none());
-
-            if pass.needs_render_target_kind(RenderTargetKind::Color) {
-                pass.color_texture = Some(
-                    self.color_render_targets
-                        .pop()
-                        .unwrap_or_else(|| self.device.create_texture(TextureTarget::Array)),
-                );
-            }
-
-            if pass.needs_render_target_kind(RenderTargetKind::Alpha) {
-                pass.alpha_texture = Some(
-                    self.alpha_render_targets
-                        .pop()
-                        .unwrap_or_else(|| self.device.create_texture(TextureTarget::Array)),
-                );
-            }
-        }
-
+        self.device.device_pixel_ratio = frame.device_pixel_ratio;
 
         // Init textures and render targets to match this scene.
         for pass in &mut frame.passes {
-            let color_target_count = pass.required_target_count(RenderTargetKind::Color);
-            let alpha_target_count = pass.required_target_count(RenderTargetKind::Alpha);
-
-            if let Some(texture) = pass.color_texture.as_mut() {
-                debug_assert!(pass.max_color_target_size.width > 0);
-                debug_assert!(pass.max_color_target_size.height > 0);
-                self.device.init_texture(
-                    texture,
-                    pass.max_color_target_size.width,
-                    pass.max_color_target_size.height,
-                    ImageFormat::BGRA8,
-                    TextureFilter::Linear,
-                    RenderTargetMode::RenderTarget,
-                    color_target_count as i32,
-                    None,
-                );
-            }
-            if let Some(texture) = pass.alpha_texture.as_mut() {
-                debug_assert!(pass.max_alpha_target_size.width > 0);
-                debug_assert!(pass.max_alpha_target_size.height > 0);
-                self.device.init_texture(
-                    texture,
-                    pass.max_alpha_target_size.width,
-                    pass.max_alpha_target_size.height,
-                    ImageFormat::A8,
-                    TextureFilter::Linear,
-                    RenderTargetMode::RenderTarget,
-                    alpha_target_count as i32,
-                    None,
-                );
+            if let RenderPassKind::OffScreen { ref mut alpha, ref mut color } = pass.kind {
+                Self::prepare_target_list(alpha, &mut self.device, &mut self.alpha_render_targets, ImageFormat::A8);
+                Self::prepare_target_list(color, &mut self.device, &mut self.color_render_targets, ImageFormat::BGRA8);
             }
         }
 
         self.node_data_texture
             .update(&mut self.device, &mut frame.node_data);
         self.device
             .bind_texture(TextureSampler::ClipScrollNodes, &self.node_data_texture.texture);
 
@@ -3483,169 +3589,189 @@ impl<'a> Renderer<'a> {
     fn draw_tile_frame(
         &mut self,
         frame: &mut Frame,
         framebuffer_size: DeviceUintSize,
         frame_id: FrameId,
     ) {
         let _gm = self.gpu_profile.start_marker("tile frame draw");
 
-        // Some tests use a restricted viewport smaller than the main screen size.
-        // Ensure we clear the framebuffer in these tests.
-        // TODO(gw): Find a better solution for this?
-        let needs_clear = frame.window_size.width < framebuffer_size.width ||
-            frame.window_size.height < framebuffer_size.height;
+        if frame.passes.is_empty() {
+            return;
+        }
 
         self.device.disable_depth_write();
         self.device.disable_stencil();
         self.device.set_blend(false);
 
-        if frame.passes.is_empty() {
-            self.device
-                .clear_target(Some(self.clear_color.to_array()), Some(1.0));
-        } else {
-            self.start_frame(frame);
-
-            let pass_count = frame.passes.len();
-            let base_color_target_count = self.color_render_targets.len();
-            let base_alpha_target_count = self.alpha_render_targets.len();
-
-            for (pass_index, pass) in frame.passes.iter_mut().enumerate() {
-                self.texture_resolver.bind(
-                    &SourceTexture::CacheA8,
-                    TextureSampler::CacheA8,
-                    &mut self.device,
-                );
-                self.texture_resolver.bind(
-                    &SourceTexture::CacheRGBA8,
-                    TextureSampler::CacheRGBA8,
-                    &mut self.device,
-                );
-
-                for (target_index, target) in pass.alpha_targets.targets.iter().enumerate() {
+        self.prepare_frame(frame);
+
+        let base_color_target_count = self.color_render_targets.len();
+        let base_alpha_target_count = self.alpha_render_targets.len();
+
+        for (pass_index, pass) in frame.passes.iter_mut().enumerate() {
+            self.texture_resolver.bind(
+                &SourceTexture::CacheA8,
+                TextureSampler::CacheA8,
+                &mut self.device,
+            );
+            self.texture_resolver.bind(
+                &SourceTexture::CacheRGBA8,
+                TextureSampler::CacheRGBA8,
+                &mut self.device,
+            );
+
+            let (cur_alpha, cur_color) = match pass.kind {
+                RenderPassKind::MainFramebuffer(ref target) => {
+                    let clear_color = frame.background_color.map(|color| color.to_array());
                     let projection = Transform3D::ortho(
                         0.0,
-                        pass.max_alpha_target_size.width as f32,
+                        framebuffer_size.width as f32,
+                        framebuffer_size.height as f32,
                         0.0,
-                        pass.max_alpha_target_size.height as f32,
                         ORTHO_NEAR_PLANE,
                         ORTHO_FAR_PLANE,
                     );
 
-                    self.draw_alpha_target(
-                        (pass.alpha_texture.as_ref().unwrap(), target_index as i32),
+                    self.draw_color_target(
+                        None,
                         target,
-                        pass.max_alpha_target_size,
-                        &projection,
-                        &frame.render_tasks,
-                    );
-                }
-
-                for (target_index, target) in pass.color_targets.targets.iter().enumerate() {
-                    let size;
-                    let clear_color;
-                    let projection;
-
-                    if pass.is_framebuffer {
-                        clear_color = if self.clear_framebuffer || needs_clear {
-                            Some(
-                                frame
-                                    .background_color
-                                    .map_or(self.clear_color.to_array(), |color| color.to_array()),
-                            )
-                        } else {
-                            None
-                        };
-                        size = framebuffer_size;
-                        projection = Transform3D::ortho(
-                            0.0,
-                            size.width as f32,
-                            size.height as f32,
-                            0.0,
-                            ORTHO_NEAR_PLANE,
-                            ORTHO_FAR_PLANE,
-                        )
-                    } else {
-                        size = pass.max_color_target_size;
-                        clear_color = Some([0.0, 0.0, 0.0, 0.0]);
-                        projection = Transform3D::ortho(
-                            0.0,
-                            size.width as f32,
-                            0.0,
-                            size.height as f32,
-                            ORTHO_NEAR_PLANE,
-                            ORTHO_FAR_PLANE,
-                        );
-                    }
-
-                    let render_target = pass.color_texture
-                        .as_ref()
-                        .map(|texture| (texture, target_index as i32));
-                    self.draw_color_target(
-                        render_target,
-                        target,
-                        size,
+                        frame.inner_rect,
+                        framebuffer_size,
                         clear_color,
                         &frame.render_tasks,
                         &projection,
                         frame_id,
                     );
+
+                    (None, None)
                 }
-
-                self.texture_resolver.end_pass(
-                    pass_index == pass_count - 1,
-                    pass.alpha_texture.take(),
-                    pass.color_texture.take(),
-                    &mut self.alpha_render_targets,
-                    &mut self.color_render_targets,
-                );
-
-                // After completing the first pass, make the A8 target available as an
-                // input to any subsequent passes.
-                if pass_index == 0 {
-                    if let Some(shared_alpha_texture) =
-                        self.texture_resolver.resolve(&SourceTexture::CacheA8)
-                    {
-                        self.device
-                            .bind_texture(TextureSampler::SharedCacheA8, shared_alpha_texture);
+                RenderPassKind::OffScreen { ref mut alpha, ref mut color } => {
+                    assert!(alpha.targets.is_empty() || alpha.texture.is_some());
+                    assert!(color.targets.is_empty() || color.texture.is_some());
+
+                    for (target_index, target) in alpha.targets.iter().enumerate() {
+                        let projection = Transform3D::ortho(
+                            0.0,
+                            alpha.max_size.width as f32,
+                            0.0,
+                            alpha.max_size.height as f32,
+                            ORTHO_NEAR_PLANE,
+                            ORTHO_FAR_PLANE,
+                        );
+
+                        self.draw_alpha_target(
+                            (alpha.texture.as_ref().unwrap(), target_index as i32),
+                            target,
+                            alpha.max_size,
+                            &projection,
+                            &frame.render_tasks,
+                        );
                     }
+
+                    for (target_index, target) in color.targets.iter().enumerate() {
+                        let projection = Transform3D::ortho(
+                            0.0,
+                            color.max_size.width as f32,
+                            0.0,
+                            color.max_size.height as f32,
+                            ORTHO_NEAR_PLANE,
+                            ORTHO_FAR_PLANE,
+                        );
+
+                        self.draw_color_target(
+                            Some((color.texture.as_ref().unwrap(), target_index as i32)),
+                            target,
+                            frame.inner_rect,
+                            color.max_size,
+                            Some([0.0, 0.0, 0.0, 0.0]),
+                            &frame.render_tasks,
+                            &projection,
+                            frame_id,
+                        );
+                    }
+
+                    (alpha.texture.take(), color.texture.take())
+                }
+            };
+
+            self.texture_resolver.end_pass(
+                cur_alpha,
+                cur_color,
+                &mut self.alpha_render_targets,
+                &mut self.color_render_targets,
+            );
+
+            // After completing the first pass, make the A8 target available as an
+            // input to any subsequent passes.
+            if pass_index == 0 {
+                if let Some(shared_alpha_texture) =
+                    self.texture_resolver.resolve(&SourceTexture::CacheA8)
+                {
+                    self.device
+                        .bind_texture(TextureSampler::SharedCacheA8, shared_alpha_texture);
                 }
             }
-
-            self.color_render_targets[base_color_target_count..].reverse();
-            self.alpha_render_targets[base_alpha_target_count..].reverse();
-            self.draw_render_target_debug(framebuffer_size);
-            self.draw_texture_cache_debug(framebuffer_size);
-
-            // Garbage collect any frame outputs that weren't used this frame.
-            let device = &mut self.device;
-            self.output_targets
-                .retain(|_, target| if target.last_access != frame_id {
-                    device.delete_fbo(target.fbo_id);
-                    false
-                } else {
-                    true
-                });
         }
 
-        self.unlock_external_images();
+        self.color_render_targets[base_color_target_count..].reverse();
+        self.alpha_render_targets[base_alpha_target_count..].reverse();
+        self.draw_render_target_debug(framebuffer_size);
+        self.draw_texture_cache_debug(framebuffer_size);
+
+        // Garbage collect any frame outputs that weren't used this frame.
+        let device = &mut self.device;
+        self.output_targets
+            .retain(|_, target| if target.last_access != frame_id {
+                device.delete_fbo(target.fbo_id);
+                false
+            } else {
+                true
+            });
     }
 
     pub fn debug_renderer<'b>(&'b mut self) -> &'b mut DebugRenderer {
         &mut self.debug
     }
 
     pub fn get_debug_flags(&self) -> DebugFlags {
         self.debug_flags
     }
 
     pub fn set_debug_flags(&mut self, flags: DebugFlags) {
+        if let Some(enabled) = flag_changed(self.debug_flags, flags, DebugFlags::GPU_TIME_QUERIES) {
+            if enabled {
+                self.gpu_profile.enable_timers();
+            } else {
+                self.gpu_profile.disable_timers();
+            }
+        }
+        if let Some(enabled) = flag_changed(self.debug_flags, flags, DebugFlags::GPU_SAMPLE_QUERIES) {
+            if enabled {
+                self.gpu_profile.enable_samplers();
+            } else {
+                self.gpu_profile.disable_samplers();
+            }
+        }
+
         self.debug_flags = flags;
     }
 
+    pub fn set_debug_flag(&mut self, flag: DebugFlags, enabled: bool) {
+        let mut new_flags = self.debug_flags;
+        new_flags.set(flag, enabled);
+        self.set_debug_flags(new_flags);
+    }
+
+    pub fn toggle_debug_flags(&mut self, toggle: DebugFlags) {
+        let mut new_flags = self.debug_flags;
+        new_flags.toggle(toggle);
+        self.set_debug_flags(new_flags);
+    }
+
     pub fn save_cpu_profile(&self, filename: &str) {
         write_profile(filename);
     }
 
     fn draw_render_target_debug(&mut self, framebuffer_size: DeviceUintSize) {
         if !self.debug_flags.contains(DebugFlags::RENDER_TARGET_DBG) {
             return;
         }
@@ -3768,17 +3894,17 @@ impl<'a> Renderer<'a> {
             gl_type,
             output,
         );
     }
 
     // De-initialize the Renderer safely, assuming the GL is still alive and active.
     pub fn deinit(mut self) {
         //Note: this is a fake frame, only needed because texture deletion is require to happen inside a frame
-        self.device.begin_frame(1.0);
+        self.device.begin_frame();
         self.gpu_cache_texture.deinit(&mut self.device);
         if let Some(dither_matrix_texture) = self.dither_matrix_texture {
             self.device.delete_texture(dither_matrix_texture);
         }
         self.node_data_texture.deinit(&mut self.device);
         self.render_task_texture.deinit(&mut self.device);
         for texture in self.alpha_render_targets {
             self.device.delete_texture(texture);
@@ -3883,60 +4009,56 @@ pub trait OutputImageHandler {
     fn unlock(&mut self, pipeline_id: PipelineId);
 }
 
 pub trait ThreadListener {
     fn thread_started(&self, thread_name: &str);
     fn thread_stopped(&self, thread_name: &str);
 }
 
-pub struct RendererOptions<'a> {
+pub struct RendererOptions {
     pub device_pixel_ratio: f32,
     pub resource_override_path: Option<PathBuf>,
     pub enable_aa: bool,
     pub enable_dithering: bool,
     pub max_recorded_profiles: usize,
     pub debug: bool,
     pub enable_scrollbars: bool,
     pub precache_shaders: bool,
     pub renderer_kind: RendererKind,
     pub enable_subpixel_aa: bool,
-    pub clear_framebuffer: bool,
-    pub clear_color: ColorF,
+    pub clear_color: Option<ColorF>,
     pub enable_clear_scissor: bool,
-    pub enable_batcher: bool,
     pub max_texture_size: Option<u32>,
     pub workers: Option<Arc<ThreadPool>>,
     pub blob_image_renderer: Option<Box<BlobImageRenderer>>,
     pub recorder: Option<Box<ApiRecordingReceiver>>,
     pub thread_listener: Option<Box<ThreadListener + Send + Sync>>,
     pub enable_render_on_scroll: bool,
-    pub cached_programs: Option<&'a mut ProgramCache>,
+    pub cached_programs: Option<Rc<ProgramCache>>,
     pub debug_flags: DebugFlags,
     pub renderer_id: Option<u64>,
 }
 
-impl<'a> Default for RendererOptions<'a> {
-    fn default() -> RendererOptions<'a> {
+impl Default for RendererOptions {
+    fn default() -> RendererOptions {
         RendererOptions {
             device_pixel_ratio: 1.0,
             resource_override_path: None,
             enable_aa: true,
             enable_dithering: true,
             debug_flags: DebugFlags::empty(),
             max_recorded_profiles: 0,
             debug: false,
             enable_scrollbars: false,
             precache_shaders: false,
             renderer_kind: RendererKind::Native,
             enable_subpixel_aa: false,
-            clear_framebuffer: true,
-            clear_color: ColorF::new(1.0, 1.0, 1.0, 1.0),
+            clear_color: Some(ColorF::new(1.0, 1.0, 1.0, 1.0)),
             enable_clear_scissor: true,
-            enable_batcher: true,
             max_texture_size: None,
             workers: None,
             blob_image_renderer: None,
             recorder: None,
             thread_listener: None,
             enable_render_on_scroll: true,
             renderer_id: None,
             cached_programs: None,
--- a/gfx/webrender/src/texture_cache.rs
+++ b/gfx/webrender/src/texture_cache.rs
@@ -4,17 +4,17 @@
 
 use api::{DeviceUintPoint, DeviceUintRect, DeviceUintSize};
 use api::{ExternalImageType, ImageData, ImageFormat};
 use api::ImageDescriptor;
 use device::TextureFilter;
 use frame::FrameId;
 use freelist::{FreeList, FreeListHandle, UpsertResult, WeakFreeListHandle};
 use gpu_cache::{GpuCache, GpuCacheHandle};
-use internal_types::{CacheTextureId, RenderTargetMode, TextureUpdateList, TextureUpdateSource};
+use internal_types::{CacheTextureId, TextureUpdateList, TextureUpdateSource};
 use internal_types::{SourceTexture, TextureUpdate, TextureUpdateOp};
 use profiler::{ResourceProfileCounter, TextureCacheProfileCounters};
 use resource_cache::CacheItem;
 use std::cmp;
 use std::mem;
 
 // The fixed number of layers for the shared texture cache.
 // There is one array texture per image format, allocated lazily.
@@ -101,17 +101,17 @@ impl CacheEntry {
     // Create a new entry for a standalone texture.
     fn new_standalone(
         texture_id: CacheTextureId,
         size: DeviceUintSize,
         format: ImageFormat,
         filter: TextureFilter,
         user_data: [f32; 3],
         last_access: FrameId,
-    ) -> CacheEntry {
+    ) -> Self {
         CacheEntry {
             size,
             user_data,
             last_access,
             kind: EntryKind::Standalone,
             texture_id,
             format,
             filter,
@@ -201,17 +201,17 @@ pub struct TextureCache {
 
     // A list of the strong handles of items that were
     // allocated in the shared texture cache. Used
     // for evicting old cache items.
     shared_entry_handles: Vec<FreeListHandle<CacheEntry>>,
 }
 
 impl TextureCache {
-    pub fn new(max_texture_size: u32) -> TextureCache {
+    pub fn new(max_texture_size: u32) -> Self {
         TextureCache {
             max_texture_size,
             array_a8_linear: TextureArray::new(
                 ImageFormat::A8,
                 TextureFilter::Linear,
                 TEXTURE_ARRAY_LAYERS_LINEAR,
             ),
             array_rgba8_linear: TextureArray::new(
@@ -576,18 +576,18 @@ impl TextureCache {
 
             let update_op = TextureUpdate {
                 id: texture_id,
                 op: TextureUpdateOp::Create {
                     width: TEXTURE_LAYER_DIMENSIONS,
                     height: TEXTURE_LAYER_DIMENSIONS,
                     format: descriptor.format,
                     filter: texture_array.filter,
+                    render_target: None,
                     layer_count: texture_array.layer_count as i32,
-                    mode: RenderTargetMode::RenderTarget, // todo: !!!! remove me!?
                 },
             };
             self.pending_updates.push(update_op);
 
             texture_array.texture_id = Some(texture_id);
         }
 
         // Do the allocation. This can fail and return None
@@ -665,17 +665,17 @@ impl TextureCache {
             // of the right size / format.
             let update_op = TextureUpdate {
                 id: texture_id,
                 op: TextureUpdateOp::Create {
                     width: descriptor.width,
                     height: descriptor.height,
                     format: descriptor.format,
                     filter,
-                    mode: RenderTargetMode::RenderTarget,
+                    render_target: None,
                     layer_count: 1,
                 },
             };
             self.pending_updates.push(update_op);
 
             new_cache_entry = Some(CacheEntry::new_standalone(
                 texture_id,
                 size,
--- a/gfx/webrender/src/tiling.rs
+++ b/gfx/webrender/src/tiling.rs
@@ -1,16 +1,16 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BorderRadiusKind, ClipId, ColorF, DeviceIntPoint, ImageKey};
-use api::{DeviceIntRect, DeviceIntSize, DeviceUintPoint, DeviceUintSize};
-use api::{ExternalImageType, FilterOp, FontRenderMode, ImageRendering, LayerRect};
-use api::{MixBlendMode, PipelineId};
+use api::{DeviceIntRect, DeviceIntSize, DeviceUintPoint, DeviceUintRect, DeviceUintSize};
+use api::{DocumentLayer, ExternalImageType, FilterOp, FontRenderMode, ImageRendering};
+use api::{LayerRect, MixBlendMode, PipelineId};
 use api::{TileOffset, YuvColorSpace, YuvFormat};
 use api::{LayerToWorldTransform, WorldPixel};
 use border::{BorderCornerInstance, BorderCornerSide};
 use clip::{ClipSource, ClipStore};
 use clip_scroll_tree::{ClipScrollTree, CoordinateSystemId};
 use device::Texture;
 use euclid::{TypedTransform3D, vec3};
 use glyph_rasterizer::GlyphFormat;
@@ -27,16 +27,17 @@ use prim_store::{BrushMaskKind, BrushKin
 use profiler::FrameProfileCounters;
 use render_task::{ClipWorkItem, MaskGeometryKind, MaskSegment};
 use render_task::{RenderTaskAddress, RenderTaskId, RenderTaskKey, RenderTaskKind};
 use render_task::{BlurTask, ClearMode, RenderTaskLocation, RenderTaskTree};
 use renderer::BlendMode;
 use renderer::ImageBufferKind;
 use resource_cache::{GlyphFetchResult, ResourceCache};
 use std::{cmp, usize, f32, i32};
+use std::collections::hash_map::Entry;
 use texture_allocator::GuillotineAllocator;
 use util::{MatrixHelpers, TransformedRectKind};
 
 // Special sentinel value recognized by the shader. It is considered to be
 // a dummy task that doesn't mask out anything.
 const OPAQUE_TASK_ADDRESS: RenderTaskAddress = RenderTaskAddress(i32::MAX as u32);
 const MIN_TARGET_SIZE: u32 = 2048;
 
@@ -172,17 +173,17 @@ struct DynamicTaskInfo {
     rect: DeviceIntRect,
 }
 
 pub struct AlphaBatchList {
     pub batches: Vec<AlphaPrimitiveBatch>,
 }
 
 impl AlphaBatchList {
-    fn new() -> AlphaBatchList {
+    fn new() -> Self {
         AlphaBatchList {
             batches: Vec::new(),
         }
     }
 
     fn get_suitable_batch(
         &mut self,
         key: BatchKey,
@@ -250,17 +251,17 @@ impl AlphaBatchList {
 }
 
 pub struct OpaqueBatchList {
     pub pixel_area_threshold_for_new_batch: i32,
     pub batches: Vec<OpaquePrimitiveBatch>,
 }
 
 impl OpaqueBatchList {
-    fn new(pixel_area_threshold_for_new_batch: i32) -> OpaqueBatchList {
+    fn new(pixel_area_threshold_for_new_batch: i32) -> Self {
         OpaqueBatchList {
             batches: Vec::new(),
             pixel_area_threshold_for_new_batch,
         }
     }
 
     fn get_suitable_batch(
         &mut self,
@@ -315,17 +316,17 @@ impl OpaqueBatchList {
 }
 
 pub struct BatchList {
     pub alpha_batch_list: AlphaBatchList,
     pub opaque_batch_list: OpaqueBatchList,
 }
 
 impl BatchList {
-    fn new(screen_size: DeviceIntSize) -> BatchList {
+    fn new(screen_size: DeviceIntSize) -> Self {
         // The threshold for creating a new batch is
         // one quarter the screen size.
         let batch_area_threshold = screen_size.width * screen_size.height / 4;
 
         BatchList {
             alpha_batch_list: AlphaBatchList::new(),
             opaque_batch_list: OpaqueBatchList::new(batch_area_threshold),
         }
@@ -538,17 +539,20 @@ fn add_to_batch(
             );
             let batch = batch_list.get_suitable_batch(key, item_bounding_rect);
             batch.push(base_instance.build(uv_address.as_int(gpu_cache), 0, 0));
         }
         PrimitiveKind::TextRun => {
             let text_cpu =
                 &ctx.prim_store.cpu_text_runs[prim_metadata.cpu_prim_index.0];
 
-            let font = text_cpu.get_font(ctx.device_pixel_ratio);
+            let font = text_cpu.get_font(
+                ctx.device_pixel_ratio,
+                &scroll_node.transform,
+            );
 
             ctx.resource_cache.fetch_glyphs(
                 font,
                 &text_cpu.glyph_keys,
                 glyph_fetch_buffer,
                 gpu_cache,
                 |texture_id, glyph_format, glyphs| {
                     debug_assert_ne!(texture_id, SourceTexture::Invalid);
@@ -995,17 +999,17 @@ impl PicturePrimitive {
             );
 
             batch.push(PrimitiveInstance::from(instance));
         }
     }
 }
 
 impl AlphaBatcher {
-    fn new(screen_size: DeviceIntSize) -> AlphaBatcher {
+    fn new(screen_size: DeviceIntSize) -> Self {
         AlphaBatcher {
             tasks: Vec::new(),
             batch_list: BatchList::new(screen_size),
             glyph_fetch_buffer: Vec::new(),
         }
     }
 
     fn add_task(&mut self, task_id: RenderTaskId) {
@@ -1014,20 +1018,18 @@ impl AlphaBatcher {
 
     fn build(
         &mut self,
         ctx: &RenderTargetContext,
         gpu_cache: &mut GpuCache,
         render_tasks: &RenderTaskTree,
         deferred_resolves: &mut Vec<DeferredResolve>,
     ) {
-        for task_id in &self.tasks {
-            let task_id = *task_id;
-            let task = render_tasks.get(task_id);
-            match task.kind {
+        for &task_id in &self.tasks {
+            match render_tasks[task_id].kind {
                 RenderTaskKind::Picture(ref pic_task) => {
                     let pic_index = ctx.prim_store.cpu_metadata[pic_task.prim_index.0].cpu_prim_index;
                     let pic = &ctx.prim_store.cpu_pictures[pic_index.0];
                     pic.add_to_batch(
                         task_id,
                         ctx,
                         gpu_cache,
                         render_tasks,
@@ -1058,17 +1060,17 @@ pub struct ClipBatcher {
     pub rectangles: Vec<ClipMaskInstance>,
     /// Image draws apply the image masking.
     pub images: FastHashMap<SourceTexture, Vec<ClipMaskInstance>>,
     pub border_clears: Vec<ClipMaskInstance>,
     pub borders: Vec<ClipMaskInstance>,
 }
 
 impl ClipBatcher {
-    fn new() -> ClipBatcher {
+    fn new() -> Self {
         ClipBatcher {
             rectangles: Vec::new(),
             images: FastHashMap::default(),
             border_clears: Vec::new(),
             borders: Vec::new(),
         }
     }
 
@@ -1192,17 +1194,17 @@ struct TextureAllocator {
 
     // Track the used rect of the render target, so that
     // we can set a scissor rect and only clear to the
     // used portion of the target as an optimization.
     used_rect: DeviceIntRect,
 }
 
 impl TextureAllocator {
-    fn new(size: DeviceUintSize) -> TextureAllocator {
+    fn new(size: DeviceUintSize) -> Self {
         TextureAllocator {
             allocator: GuillotineAllocator::new(size),
             used_rect: DeviceIntRect::zero(),
         }
     }
 
     fn allocate(&mut self, size: &DeviceUintSize) -> Option<DeviceUintPoint> {
         let origin = self.allocator.allocate(size);
@@ -1239,47 +1241,42 @@ pub trait RenderTarget {
         &mut self,
         task_id: RenderTaskId,
         ctx: &RenderTargetContext,
         gpu_cache: &GpuCache,
         render_tasks: &RenderTaskTree,
         clip_store: &ClipStore,
     );
     fn used_rect(&self) -> DeviceIntRect;
+    fn needs_depth(&self) -> bool;
 }
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
 pub enum RenderTargetKind {
     Color, // RGBA32
     Alpha, // R8
 }
 
 pub struct RenderTargetList<T> {
     screen_size: DeviceIntSize,
+    pub max_size: DeviceUintSize,
     pub targets: Vec<T>,
+    pub texture: Option<Texture>,
 }
 
 impl<T: RenderTarget> RenderTargetList<T> {
     fn new(
-        create_initial_target: bool,
-        screen_size: DeviceIntSize
-    ) -> RenderTargetList<T> {
-        let mut targets = Vec::new();
-        if create_initial_target {
-            targets.push(T::new(None, screen_size));
+        screen_size: DeviceIntSize,
+    ) -> Self {
+        RenderTargetList {
+            screen_size,
+            max_size: DeviceUintSize::new(MIN_TARGET_SIZE, MIN_TARGET_SIZE),
+            targets: Vec::new(),
+            texture: None,
         }
-
-        RenderTargetList {
-            targets,
-            screen_size,
-        }
-    }
-
-    pub fn target_count(&self) -> usize {
-        self.targets.len()
     }
 
     fn build(
         &mut self,
         ctx: &RenderTargetContext,
         gpu_cache: &mut GpuCache,
         render_tasks: &mut RenderTaskTree,
         deferred_resolves: &mut Vec<DeferredResolve>,
@@ -1304,37 +1301,40 @@ impl<T: RenderTarget> RenderTargetList<T
             render_tasks,
             clip_store,
         );
     }
 
     fn allocate(
         &mut self,
         alloc_size: DeviceUintSize,
-        target_size: DeviceUintSize,
     ) -> (DeviceUintPoint, RenderTargetIndex) {
         let existing_origin = self.targets
             .last_mut()
             .and_then(|target| target.allocate(alloc_size));
 
         let origin = match existing_origin {
             Some(origin) => origin,
             None => {
-                let mut new_target = T::new(Some(target_size), self.screen_size);
+                let mut new_target = T::new(Some(self.max_size), self.screen_size);
                 let origin = new_target.allocate(alloc_size).expect(&format!(
                     "Each render task must allocate <= size of one target! ({:?})",
                     alloc_size
                 ));
                 self.targets.push(new_target);
                 origin
             }
         };
 
         (origin, RenderTargetIndex(self.targets.len() - 1))
     }
+
+    pub fn needs_depth(&self) -> bool {
+        self.targets.iter().any(|target| target.needs_depth())
+    }
 }
 
 /// Frame output information for a given pipeline ID.
 /// Storing the task ID allows the renderer to find
 /// the target rect within the render target that this
 /// pipeline exists at.
 pub struct FrameOutput {
     pub task_id: RenderTaskId,
@@ -1378,29 +1378,22 @@ impl RenderTarget for ColorRenderTarget 
         ColorRenderTarget {
             alpha_batcher: AlphaBatcher::new(screen_size),
             text_run_cache_prims: FastHashMap::default(),
             line_cache_prims: Vec::new(),
             vertical_blurs: Vec::new(),
             horizontal_blurs: Vec::new(),
             readbacks: Vec::new(),
             scalings: Vec::new(),
-            allocator: size.map(|size| TextureAllocator::new(size)),
+            allocator: size.map(TextureAllocator::new),
             glyph_fetch_buffer: Vec::new(),
             outputs: Vec::new(),
         }
     }
 
-    fn used_rect(&self) -> DeviceIntRect {
-        self.allocator
-            .as_ref()
-            .expect("bug: used_rect called on framebuffer")
-            .used_rect
-    }
-
     fn build(
         &mut self,
         ctx: &RenderTargetContext,
         gpu_cache: &mut GpuCache,
         render_tasks: &mut RenderTaskTree,
         deferred_resolves: &mut Vec<DeferredResolve>,
     ) {
         self.alpha_batcher
@@ -1410,17 +1403,17 @@ impl RenderTarget for ColorRenderTarget 
     fn add_task(
         &mut self,
         task_id: RenderTaskId,
         ctx: &RenderTargetContext,
         gpu_cache: &GpuCache,
         render_tasks: &RenderTaskTree,
         _: &ClipStore,
     ) {
-        let task = render_tasks.get(task_id);
+        let task = &render_tasks[task_id];
 
         match task.kind {
             RenderTaskKind::Alias(..) => {
                 panic!("BUG: add_task() called on invalidated task");
             }
             RenderTaskKind::VerticalBlur(ref info) => {
                 info.add_instances(
                     &mut self.vertical_blurs,
@@ -1482,17 +1475,20 @@ impl RenderTarget for ColorRenderTarget 
                                             PrimitiveKind::TextRun => {
                                                 // Add instances that reference the text run GPU location. Also supply
                                                 // the parent shadow prim address as a user data field, allowing
                                                 // the shader to fetch the shadow parameters.
                                                 let text = &ctx.prim_store.cpu_text_runs
                                                     [sub_metadata.cpu_prim_index.0];
                                                 let text_run_cache_prims = &mut self.text_run_cache_prims;
 
-                                                let font = text.get_font(ctx.device_pixel_ratio);
+                                                let font = text.get_font(
+                                                    ctx.device_pixel_ratio,
+                                                    &LayerToWorldTransform::identity(),
+                                                );
 
                                                 ctx.resource_cache.fetch_glyphs(
                                                     font,
                                                     &text.glyph_keys,
                                                     &mut self.glyph_fetch_buffer,
                                                     gpu_cache,
                                                     |texture_id, _glyph_format, glyphs| {
                                                         let batch = text_run_cache_prims
@@ -1537,16 +1533,27 @@ impl RenderTarget for ColorRenderTarget 
             RenderTaskKind::Scaling(..) => {
                 self.scalings.push(ScalingInfo {
                     src_task_id: task.children[0],
                     dest_task_id: task_id,
                 });
             }
         }
     }
+
+    fn used_rect(&self) -> DeviceIntRect {
+        self.allocator
+            .as_ref()
+            .expect("bug: used_rect called on framebuffer")
+            .used_rect
+    }
+
+    fn needs_depth(&self) -> bool {
+        !self.alpha_batcher.batch_list.opaque_batch_list.batches.is_empty()
+    }
 }
 
 pub struct AlphaRenderTarget {
     pub clip_batcher: ClipBatcher,
     pub brush_mask_corners: Vec<PrimitiveInstance>,
     pub brush_mask_rounded_rects: Vec<PrimitiveInstance>,
     // List of blur operations to apply for this render target.
     pub vertical_blurs: Vec<BlurInstance>,
@@ -1572,29 +1579,25 @@ impl RenderTarget for AlphaRenderTarget 
             vertical_blurs: Vec::new(),
             horizontal_blurs: Vec::new(),
             scalings: Vec::new(),
             zero_clears: Vec::new(),
             allocator: TextureAllocator::new(size.expect("bug: alpha targets need size")),
         }
     }
 
-    fn used_rect(&self) -> DeviceIntRect {
-        self.allocator.used_rect
-    }
-
     fn add_task(
         &mut self,
         task_id: RenderTaskId,
         ctx: &RenderTargetContext,
         gpu_cache: &GpuCache,
         render_tasks: &RenderTaskTree,
         clip_store: &ClipStore,
     ) {
-        let task = render_tasks.get(task_id);
+        let task = &render_tasks[task_id];
 
         match task.clear_mode {
             ClearMode::Zero => {
                 self.zero_clears.push(task_id);
             }
             ClearMode::One => {}
             ClearMode::Transparent => {
                 panic!("bug: invalid clear mode for alpha task");
@@ -1700,182 +1703,165 @@ impl RenderTarget for AlphaRenderTarget 
             RenderTaskKind::Scaling(..) => {
                 self.scalings.push(ScalingInfo {
                     src_task_id: task.children[0],
                     dest_task_id: task_id,
                 });
             }
         }
     }
+
+    fn used_rect(&self) -> DeviceIntRect {
+        self.allocator.used_rect
+    }
+
+    fn needs_depth(&self) -> bool {
+        false
+    }
+}
+
+
+pub enum RenderPassKind {
+    MainFramebuffer(ColorRenderTarget),
+    OffScreen {
+        alpha: RenderTargetList<AlphaRenderTarget>,
+        color: RenderTargetList<ColorRenderTarget>,
+    },
 }
 
 /// A render pass represents a set of rendering operations that don't depend on one
 /// another.
 ///
 /// A render pass can have several render targets if there wasn't enough space in one
 /// target to do all of the rendering for that pass.
 pub struct RenderPass {
-    pub is_framebuffer: bool,
+    pub kind: RenderPassKind,
     tasks: Vec<RenderTaskId>,
-    pub color_targets: RenderTargetList<ColorRenderTarget>,
-    pub alpha_targets: RenderTargetList<AlphaRenderTarget>,
-    pub color_texture: Option<Texture>,
-    pub alpha_texture: Option<Texture>,
     dynamic_tasks: FastHashMap<RenderTaskKey, DynamicTaskInfo>,
-    pub max_color_target_size: DeviceUintSize,
-    pub max_alpha_target_size: DeviceUintSize,
 }
 
 impl RenderPass {
-    pub fn new(
-        is_framebuffer: bool,
-        screen_size: DeviceIntSize
-    ) -> RenderPass {
+    pub fn new_main_framebuffer(screen_size: DeviceIntSize) -> Self {
+        let target = ColorRenderTarget::new(None, screen_size);
         RenderPass {
-            is_framebuffer,
-            color_targets: RenderTargetList::new(is_framebuffer, screen_size),
-            alpha_targets: RenderTargetList::new(false, screen_size),
+            kind: RenderPassKind::MainFramebuffer(target),
             tasks: vec![],
-            color_texture: None,
-            alpha_texture: None,
             dynamic_tasks: FastHashMap::default(),
-            max_color_target_size: DeviceUintSize::new(MIN_TARGET_SIZE, MIN_TARGET_SIZE),
-            max_alpha_target_size: DeviceUintSize::new(MIN_TARGET_SIZE, MIN_TARGET_SIZE),
+        }
+    }
+
+    pub fn new_off_screen(screen_size: DeviceIntSize) -> Self {
+        RenderPass {
+            kind: RenderPassKind::OffScreen {
+                color: RenderTargetList::new(screen_size),
+                alpha: RenderTargetList::new(screen_size),
+            },
+            tasks: vec![],
+            dynamic_tasks: FastHashMap::default(),
         }
     }
 
     pub fn add_render_task(
         &mut self,
         task_id: RenderTaskId,
         size: DeviceIntSize,
         target_kind: RenderTargetKind,
     ) {
-        match target_kind {
-            RenderTargetKind::Color => {
-                self.max_color_target_size.width =
-                    cmp::max(self.max_color_target_size.width, size.width as u32);
-                self.max_color_target_size.height =
-                    cmp::max(self.max_color_target_size.height, size.height as u32);
-            }
-            RenderTargetKind::Alpha => {
-                self.max_alpha_target_size.width =
-                    cmp::max(self.max_alpha_target_size.width, size.width as u32);
-                self.max_alpha_target_size.height =
-                    cmp::max(self.max_alpha_target_size.height, size.height as u32);
-            }
+        if let RenderPassKind::OffScreen { ref mut color, ref mut alpha } = self.kind {
+            let max_size = match target_kind {
+                RenderTargetKind::Color => &mut color.max_size,
+                RenderTargetKind::Alpha => &mut alpha.max_size,
+            };
+            max_size.width = cmp::max(max_size.width, size.width as u32);
+            max_size.height = cmp::max(max_size.height, size.height as u32);
         }
 
         self.tasks.push(task_id);
     }
 
-    pub fn needs_render_target_kind(&self, kind: RenderTargetKind) -> bool {
-        if self.is_framebuffer {
-            false
-        } else {
-            self.required_target_count(kind) > 0
-        }
-    }
-
-    pub fn required_target_count(&self, kind: RenderTargetKind) -> usize {
-        match kind {
-            RenderTargetKind::Color => self.color_targets.target_count(),
-            RenderTargetKind::Alpha => self.alpha_targets.target_count(),
-        }
-    }
-
     pub fn build(
         &mut self,
         ctx: &RenderTargetContext,
         gpu_cache: &mut GpuCache,
         render_tasks: &mut RenderTaskTree,
         deferred_resolves: &mut Vec<DeferredResolve>,
         clip_store: &ClipStore,
     ) {
         profile_scope!("RenderPass::build");
 
-        // Step through each task, adding to batches as appropriate.
-        for task_id in &self.tasks {
-            let task_id = *task_id;
-
-            let target_kind = {
-                let task = render_tasks.get_mut(task_id);
-                let target_kind = task.target_kind();
+        match self.kind {
+            RenderPassKind::MainFramebuffer(ref mut target) => {
+                for &task_id in &self.tasks {
+                    assert_eq!(render_tasks[task_id].target_kind(), RenderTargetKind::Color);
+                    target.add_task(task_id, ctx, gpu_cache, render_tasks, clip_store);
+                }
+                target.build(ctx, gpu_cache, render_tasks, deferred_resolves);
+            }
+            RenderPassKind::OffScreen { ref mut color, ref mut alpha } => {
+                // Step through each task, adding to batches as appropriate.
+                for &task_id in &self.tasks {
+                    let target_kind = {
+                        let task = &mut render_tasks[task_id];
+                        let target_kind = task.target_kind();
 
-                // Find a target to assign this task to, or create a new
-                // one if required.
-                match task.location {
-                    RenderTaskLocation::Fixed => {}
-                    RenderTaskLocation::Dynamic(_, size) => {
-                        if let Some(cache_key) = task.cache_key {
-                            // See if this task is a duplicate.
-                            // If so, just skip adding it!
-                            if let Some(task_info) = self.dynamic_tasks.get(&cache_key) {
-                                task.set_alias(task_info.task_id);
-                                debug_assert_eq!(task_info.rect.size, size);
-                                continue;
+                        // Find a target to assign this task to, or create a new
+                        // one if required.
+                        match task.location {
+                            RenderTaskLocation::Fixed => {}
+                            RenderTaskLocation::Dynamic(ref mut origin, size) => {
+                                let dynamic_entry = match task.cache_key {
+                                    // See if this task is a duplicate.
+                                    // If so, just skip adding it!
+                                    Some(cache_key) => match self.dynamic_tasks.entry(cache_key) {
+                                        Entry::Occupied(entry) => {
+                                            // TODO(gw): We can easily handle invalidation of tasks that
+                                            // contain children in the future. Since we don't
+                                            // have any cases of that yet, just assert to simplify
+                                            // the current implementation.
+                                            debug_assert!(task.children.is_empty());
+                                            debug_assert_eq!(entry.get().rect.size, size);
+                                            task.kind = RenderTaskKind::Alias(entry.get().task_id);
+                                            continue;
+                                        },
+                                        Entry::Vacant(entry) => Some(entry),
+                                    },
+                                    None => None,
+                                };
+
+                                let alloc_size = DeviceUintSize::new(size.width as u32, size.height as u32);
+                                let (alloc_origin, target_index) =  match target_kind {
+                                    RenderTargetKind::Color => color.allocate(alloc_size),
+                                    RenderTargetKind::Alpha => alpha.allocate(alloc_size),
+                                };
+                                *origin = Some((alloc_origin.to_i32(), target_index));
+
+                                // If this task is cacheable / sharable, store it in the task hash
+                                // for this pass.
+                                if let Some(entry) = dynamic_entry {
+                                    entry.insert(DynamicTaskInfo {
+                                        task_id,
+                                        rect: DeviceIntRect::new(alloc_origin.to_i32(), size),
+                                    });
+                                }
                             }
                         }
 
-                        let alloc_size = DeviceUintSize::new(size.width as u32, size.height as u32);
-                        let (alloc_origin, target_index) = match target_kind {
-                            RenderTargetKind::Color => self.color_targets
-                                .allocate(alloc_size, self.max_color_target_size),
-                            RenderTargetKind::Alpha => self.alpha_targets
-                                .allocate(alloc_size, self.max_alpha_target_size),
-                        };
-
-                        let origin = Some((
-                            DeviceIntPoint::new(alloc_origin.x as i32, alloc_origin.y as i32),
-                            target_index,
-                        ));
-                        task.location = RenderTaskLocation::Dynamic(origin, size);
+                        target_kind
+                    };
 
-                        // If this task is cacheable / sharable, store it in the task hash
-                        // for this pass.
-                        if let Some(cache_key) = task.cache_key {
-                            self.dynamic_tasks.insert(
-                                cache_key,
-                                DynamicTaskInfo {
-                                    task_id,
-                                    rect: match task.location {
-                                        RenderTaskLocation::Fixed => {
-                                            panic!("Dynamic tasks should not have fixed locations!")
-                                        }
-                                        RenderTaskLocation::Dynamic(Some((origin, _)), size) => {
-                                            DeviceIntRect::new(origin, size)
-                                        }
-                                        RenderTaskLocation::Dynamic(None, _) => {
-                                            panic!("Expect the task to be already allocated here")
-                                        }
-                                    },
-                                },
-                            );
-                        }
+                    match target_kind {
+                        RenderTargetKind::Color => color.add_task(task_id, ctx, gpu_cache, render_tasks, clip_store),
+                        RenderTargetKind::Alpha => alpha.add_task(task_id, ctx, gpu_cache, render_tasks, clip_store),
                     }
                 }
 
-                target_kind
-            };
-
-            match target_kind {
-                RenderTargetKind::Color => {
-                    self.color_targets
-                        .add_task(task_id, ctx, gpu_cache, render_tasks, clip_store)
-                }
-                RenderTargetKind::Alpha => {
-                    self.alpha_targets
-                        .add_task(task_id, ctx, gpu_cache, render_tasks, clip_store)
-                }
+                color.build(ctx, gpu_cache, render_tasks, deferred_resolves);
+                alpha.build(ctx, gpu_cache, render_tasks, deferred_resolves);
             }
         }
-
-        self.color_targets
-            .build(ctx, gpu_cache, render_tasks, deferred_resolves);
-        self.alpha_targets
-            .build(ctx, gpu_cache, render_tasks, deferred_resolves);
     }
 }
 
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
 pub enum TransformBatchKind {
     Rectangle(bool),
     TextRun(GlyphFormat),
     Image(ImageBufferKind),
@@ -1910,17 +1896,17 @@ pub enum BatchKind {
 #[derive(Copy, Clone, Debug)]
 pub struct BatchKey {
     pub kind: BatchKind,
     pub blend_mode: BlendMode,
     pub textures: BatchTextures,
 }
 
 impl BatchKey {
-    fn new(kind: BatchKind, blend_mode: BlendMode, textures: BatchTextures) -> BatchKey {
+    fn new(kind: BatchKind, blend_mode: BlendMode, textures: BatchTextures) -> Self {
         BatchKey {
             kind,
             blend_mode,
             textures,
         }
     }
 
     fn is_compatible_with(&self, other: &BatchKey) -> bool {
@@ -1973,33 +1959,35 @@ pub struct CompositeOps {
     // Requires only a single texture as input (e.g. most filters)
     pub filters: Vec<FilterOp>,
 
     // Requires two source textures (e.g. mix-blend-mode)
     pub mix_blend_mode: Option<MixBlendMode>,
 }
 
 impl CompositeOps {
-    pub fn new(filters: Vec<FilterOp>, mix_blend_mode: Option<MixBlendMode>) -> CompositeOps {
+    pub fn new(filters: Vec<FilterOp>, mix_blend_mode: Option<MixBlendMode>) -> Self {
         CompositeOps {
             filters,
-            mix_blend_mode: mix_blend_mode,
+            mix_blend_mode,
         }
     }
 
     pub fn count(&self) -> usize {
         self.filters.len() + if self.mix_blend_mode.is_some() { 1 } else { 0 }
     }
 }
 
 /// A rendering-oriented representation of frame::Frame built by the render backend
 /// and presented to the renderer.
 pub struct Frame {
     pub window_size: DeviceUintSize,
+    pub inner_rect: DeviceUintRect,
     pub background_color: Option<ColorF>,
+    pub layer: DocumentLayer,
     pub device_pixel_ratio: f32,
     pub passes: Vec<RenderPass>,
     pub profile_counters: FrameProfileCounters,
 
     pub node_data: Vec<ClipScrollNodeData>,
     pub render_tasks: RenderTaskTree,
 
     // List of updates that need to be pushed to the
--- a/gfx/webrender/src/util.rs
+++ b/gfx/webrender/src/util.rs
@@ -16,16 +16,17 @@ use std::i32;
 const NEARLY_ZERO: f32 = 1.0 / 4096.0;
 
 // TODO: Implement these in euclid!
 pub trait MatrixHelpers<Src, Dst> {
     fn transform_rect(&self, rect: &TypedRect<f32, Src>) -> TypedRect<f32, Dst>;
     fn is_identity(&self) -> bool;
     fn preserves_2d_axis_alignment(&self) -> bool;
     fn has_perspective_component(&self) -> bool;
+    fn has_2d_inverse(&self) -> bool;
     fn inverse_project(&self, target: &TypedPoint2D<f32, Dst>) -> Option<TypedPoint2D<f32, Src>>;
     fn inverse_rect_footprint(&self, rect: &TypedRect<f32, Dst>) -> TypedRect<f32, Src>;
     fn transform_kind(&self) -> TransformedRectKind;
 }
 
 impl<Src, Dst> MatrixHelpers<Src, Dst> for TypedTransform3D<f32, Src, Dst> {
     fn transform_rect(&self, rect: &TypedRect<f32, Src>) -> TypedRect<f32, Dst> {
         let top_left = self.transform_point2d(&rect.origin);
@@ -70,16 +71,20 @@ impl<Src, Dst> MatrixHelpers<Src, Dst> f
 
         col0 < 2 && col1 < 2 && row0 < 2 && row1 < 2
     }
 
     fn has_perspective_component(&self) -> bool {
          self.m14 != 0.0 || self.m24 != 0.0 || self.m34 != 0.0 || self.m44 != 1.0
     }
 
+    fn has_2d_inverse(&self) -> bool {
+        self.m11 * self.m22 - self.m12 * self.m21 != 0.0
+    }
+
     fn inverse_project(&self, target: &TypedPoint2D<f32, Dst>) -> Option<TypedPoint2D<f32, Src>> {
         let m: TypedTransform2D<f32, Src, Dst>;
         m = TypedTransform2D::column_major(
             self.m11 - target.x * self.m14,
             self.m21 - target.x * self.m24,
             self.m41 - target.x * self.m44,
             self.m12 - target.y * self.m14,
             self.m22 - target.y * self.m24,
--- a/gfx/webrender_api/Cargo.toml
+++ b/gfx/webrender_api/Cargo.toml
@@ -16,12 +16,12 @@ bincode = "0.9"
 byteorder = "1.0"
 euclid = "0.15"
 ipc-channel = {version = "0.9", optional = true}
 serde = { version = "1.0", features = ["rc", "derive"] }
 time = "0.1"
 
 [target.'cfg(target_os = "macos")'.dependencies]
 core-foundation = "0.4"
-core-graphics = "0.12.2"
+core-graphics = "0.12.3"
 
 [target.'cfg(target_os = "windows")'.dependencies]
 dwrote = "0.4"
--- a/gfx/webrender_api/src/api.rs
+++ b/gfx/webrender_api/src/api.rs
@@ -9,16 +9,18 @@ use {ImageDescriptor, ImageKey, ItemTag,
 use {NativeFontHandle, WorldPoint};
 use app_units::Au;
 use channel::{self, MsgSender, Payload, PayloadSender, PayloadSenderHelperMethods};
 use std::cell::Cell;
 use std::fmt;
 use std::marker::PhantomData;
 
 pub type TileSize = u16;
+/// Documents are rendered in the ascending order of their associated layer values.
+pub type DocumentLayer = i8;
 
 /// The resource updates for a given transaction (they must be applied in the same frame).
 #[derive(Clone, Deserialize, Serialize)]
 pub struct ResourceUpdates {
     pub updates: Vec<ResourceUpdate>,
 }
 
 #[derive(Clone, Deserialize, Serialize)]
@@ -273,17 +275,17 @@ pub enum ApiMsg {
         Vec<GlyphKey>,
         MsgSender<Vec<Option<GlyphDimensions>>>,
     ),
     /// Gets the glyph indices from a string
     GetGlyphIndices(FontKey, String, MsgSender<Vec<Option<u32>>>),
     /// Adds a new document namespace.
     CloneApi(MsgSender<IdNamespace>),
     /// Adds a new document with given initial size.
-    AddDocument(DocumentId, DeviceUintSize),
+    AddDocument(DocumentId, DeviceUintSize, DocumentLayer),
     /// A message targeted at a particular document.
     UpdateDocument(DocumentId, DocumentMsg),
     /// Deletes an existing document.
     DeleteDocument(DocumentId),
     /// An opaque handle that must be passed to the render notifier. It is used by Gecko
     /// to forward gecko-specific messages to the render thread preserving the ordering
     /// within the other messages.
     ExternalEvent(ExternalEvent),
@@ -415,21 +417,21 @@ impl RenderApi {
     pub fn get_namespace_id(&self) -> IdNamespace {
         self.namespace_id
     }
 
     pub fn clone_sender(&self) -> RenderApiSender {
         RenderApiSender::new(self.api_sender.clone(), self.payload_sender.clone())
     }
 
-    pub fn add_document(&self, initial_size: DeviceUintSize) -> DocumentId {
+    pub fn add_document(&self, initial_size: DeviceUintSize, layer: DocumentLayer) -> DocumentId {
         let new_id = self.next_unique_id();
         let document_id = DocumentId(self.namespace_id, new_id);
 
-        let msg = ApiMsg::AddDocument(document_id, initial_size);
+        let msg = ApiMsg::AddDocument(document_id, initial_size, layer);
         self.api_sender.send(msg).unwrap();
 
         document_id
     }
 
     pub fn delete_document(&self, document_id: DocumentId) {
         let msg = ApiMsg::DeleteDocument(document_id);
         self.api_sender.send(msg).unwrap();
@@ -564,17 +566,17 @@ impl RenderApi {
     /// Sets the root pipeline.
     ///
     /// # Examples
     ///
     /// ```
     /// # use webrender_api::{DeviceUintSize, PipelineId, RenderApiSender};
     /// # fn example(sender: RenderApiSender) {
     /// let api = sender.create_api();
-    /// let document_id = api.add_document(DeviceUintSize::zero());
+    /// let document_id = api.add_document(DeviceUintSize::zero(), 0);
     /// let pipeline_id = PipelineId(0, 0);
     /// api.set_root_pipeline(document_id, pipeline_id);
     /// # }
     /// ```
     pub fn set_root_pipeline(&self, document_id: DocumentId, pipeline_id: PipelineId) {
         self.send(document_id, DocumentMsg::SetRootPipeline(pipeline_id));
     }
 
@@ -583,18 +585,18 @@ impl RenderApi {
     /// is reset back to `None`.
     pub fn remove_pipeline(&self, document_id: DocumentId, pipeline_id: PipelineId) {
         self.send(document_id, DocumentMsg::RemovePipeline(pipeline_id));
     }
 
     /// Supplies a new frame to WebRender.
     ///
     /// Non-blocking, it notifies a worker process which processes the display list.
-    /// When it's done and a RenderNotifier has been set in `webrender::Renderer`,
-    /// [new_frame_ready()][notifier] gets called.
+    /// When it's done and a `RenderNotifier` has been set in `webrender::Renderer`,
+    /// [new_document_ready()][notifier] gets called.
     ///
     /// Note: Scrolling doesn't require an own Frame.
     ///
     /// Arguments:
     ///
     /// * `document_id`: Target Document ID.
     /// * `epoch`: The unique Frame ID, monotonically increasing.
     /// * `background`: The background color of this pipeline.
@@ -603,17 +605,17 @@ impl RenderApi {
     /// * `content_size`: The total screen space size of this display list's display items.
     /// * `display_list`: The root Display list used in this frame.
     /// * `preserve_frame_state`: If a previous frame exists which matches this pipeline
     ///                           id, this setting determines if frame state (such as scrolling
     ///                           position) should be preserved for this new display list.
     /// * `resources`: A set of resource updates that must be applied at the same time as the
     ///                display list.
     ///
-    /// [notifier]: trait.RenderNotifier.html#tymethod.new_frame_ready
+    /// [notifier]: trait.RenderNotifier.html#tymethod.new_document_ready
     pub fn set_display_list(
         &self,
         document_id: DocumentId,
         epoch: Epoch,
         background: Option<ColorF>,
         viewport_size: LayoutSize,
         (pipeline_id, content_size, display_list): (PipelineId, LayoutSize, BuiltDisplayList),
         preserve_frame_state: bool,
@@ -879,15 +881,15 @@ pub struct PropertyValue<T> {
 #[derive(Clone, Deserialize, Serialize, Debug)]
 pub struct DynamicProperties {
     pub transforms: Vec<PropertyValue<LayoutTransform>>,
     pub floats: Vec<PropertyValue<f32>>,
 }
 
 pub trait RenderNotifier: Send {
     fn clone(&self) -> Box<RenderNotifier>;
-    fn new_frame_ready(&self);
-    fn new_scroll_frame_ready(&self, composite_needed: bool);
+    fn wake_up(&self);
+    fn new_document_ready(&self, DocumentId, scrolled: bool, composite_needed: bool);
     fn external_event(&self, _evt: ExternalEvent) {
         unimplemented!()
     }
     fn shut_down(&self) {}
 }
--- a/gfx/webrender_api/src/display_item.rs
+++ b/gfx/webrender_api/src/display_item.rs
@@ -54,18 +54,20 @@ bitflags! {
         const BOTTOM = 0x8;
     }
 }
 
 /// A tag that can be used to identify items during hit testing. If the tag
 /// is missing then the item doesn't take part in hit testing at all. This
 /// is composed of two numbers. In Servo, the first is an identifier while the
 /// second is used to select the cursor that should be used during mouse
-/// movement.
-pub type ItemTag = (u64, u8);
+/// movement. In Gecko, the first is a scrollframe identifier, while the second
+/// is used to store various flags that APZ needs to properly process input
+/// events.
+pub type ItemTag = (u64, u16);
 
 #[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)]
 pub struct DisplayItem {
     pub item: SpecificDisplayItem,
     pub clip_and_scroll: ClipAndScrollInfo,
     pub info: LayoutPrimitiveInfo,
 }
 
--- a/gfx/webrender_api/src/display_list.rs
+++ b/gfx/webrender_api/src/display_list.rs
@@ -528,17 +528,17 @@ impl<'a> Write for SizeCounter {
         self.0 += buf.len();
         Ok(())
     }
 
     #[inline(always)]
     fn flush(&mut self) -> io::Result<()> { Ok(()) }
 }
 
-/// Serializes a value assuming the Serialize impl has a stable size across two 
+/// Serializes a value assuming the Serialize impl has a stable size across two
 /// invocations.
 ///
 /// If this assumption is incorrect, the result will be Undefined Behaviour. This
 /// assumption should hold for all derived Serialize impls, which is all we currently
 /// use.
 fn serialize_fast<T: Serialize>(vec: &mut Vec<u8>, e: &T) {
     // manually counting the size is faster than vec.reserve(bincode::serialized_size(&e) as usize) for some reason
     let mut size = SizeCounter(0);
@@ -552,17 +552,17 @@ fn serialize_fast<T: Serialize>(vec: &mu
 
     // fix up the length
     unsafe { vec.set_len(old_len + size.0); }
 
     // make sure we wrote the right amount
     debug_assert_eq!(((w.0 as usize) - (vec.as_ptr() as usize)), vec.len());
 }
 
-/// Serializes an iterator, assuming: 
+/// Serializes an iterator, assuming:
 ///
 /// * The Clone impl is trivial (e.g. we're just memcopying a slice iterator)
 /// * The ExactSizeIterator impl is stable and correct across a Clone
 /// * The Serialize impl has a stable size across two invocations
 ///
 /// If the first is incorrect, webrender will be very slow. If the other two are
 /// incorrect, the result will be Undefined Behaviour! The ExactSizeIterator
 /// bound would ideally be replaced with a TrustedLen bound to protect us a bit
@@ -716,16 +716,21 @@ impl DisplayListBuilder {
             ],
             next_clip_id: FIRST_CLIP_ID,
             builder_start_time: start_time,
             content_size,
             save_state: None,
         }
     }
 
+    /// Return the content size for this display list
+    pub fn content_size(&self) -> LayoutSize {
+        self.content_size
+    }
+
     /// Saves the current display list state, so it may be `restore()`'d.
     ///
     /// # Conditions:
     ///
     /// * Doesn't support popping clips that were pushed before the save.
     /// * Doesn't support nested saves.
     /// * Must call `clear_save()` if the restore becomes unnecessary.
     pub fn save(&mut self) {
--- a/gfx/webrender_bindings/Cargo.toml
+++ b/gfx/webrender_bindings/Cargo.toml
@@ -18,11 +18,11 @@ path = "../webrender"
 version = "0.54.0"
 default-features = false
 
 [target.'cfg(target_os = "windows")'.dependencies]
 dwrote = "0.4"
 
 [target.'cfg(target_os = "macos")'.dependencies]
 core-foundation = "0.4"
-core-graphics = "0.12.2"
+core-graphics = "0.12.3"
 foreign-types = "0.3.0"