Bug 1393468 - Update webrender to commit b7cec8b19d5d6061263c3639031caf41562a2e17. r=jrmuizel
authorKartikaya Gupta <kgupta@mozilla.com>
Mon, 28 Aug 2017 09:27:24 -0400
changeset 377262 6ebece3e7f305539ab8acb256dd75860fdc93e81
parent 377261 155bf5fa378902db1f4657b216fd312b03f4d5d1
child 377263 201c0c94bae0f87ce4b9af5ba21465761b0fc987
push id32405
push userkwierso@gmail.com
push dateTue, 29 Aug 2017 00:39:06 +0000
treeherdermozilla-central@1b4c59eef820 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjrmuizel
bugs1393468
milestone57.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1393468 - Update webrender to commit b7cec8b19d5d6061263c3639031caf41562a2e17. r=jrmuizel MozReview-Commit-ID: 5cAVenyJ4Eo
gfx/doc/README.webrender
gfx/webrender/Cargo.toml
gfx/webrender/examples/common/boilerplate.rs
gfx/webrender/examples/texture_cache_stress.rs
gfx/webrender/src/debug_server.rs
gfx/webrender/src/device.rs
gfx/webrender/src/frame.rs
gfx/webrender/src/gpu_cache.rs
gfx/webrender/src/internal_types.rs
gfx/webrender/src/lib.rs
gfx/webrender/src/prim_store.rs
gfx/webrender/src/profiler.rs
gfx/webrender/src/render_backend.rs
gfx/webrender/src/render_task.rs
gfx/webrender/src/renderer.rs
gfx/webrender/src/texture_cache.rs
gfx/webrender/src/tiling.rs
gfx/webrender_api/src/api.rs
gfx/webrender_api/src/display_list.rs
--- a/gfx/doc/README.webrender
+++ b/gfx/doc/README.webrender
@@ -74,9 +74,9 @@ there is another crate in m-c called moz
 the same folder to store its rust dependencies. If one of the libraries that is
 required by both mozjs_sys and webrender is updated without updating the other
 project's Cargo.lock file, that results in build bustage.
 This means that any time you do this sort of manual update of packages, you need
 to make sure that mozjs_sys also has its Cargo.lock file updated if needed, hence
 the need to run the cargo update command in js/src as well. Hopefully this will
 be resolved soon.
 
-Latest Commit: 310af2613e7508b22cad11e734b8c47e66447cc7
+Latest Commit: b7cec8b19d5d6061263c3639031caf41562a2e17
--- a/gfx/webrender/Cargo.toml
+++ b/gfx/webrender/Cargo.toml
@@ -5,16 +5,17 @@ authors = ["Glenn Watson <gw@intuitionli
 license = "MPL-2.0"
 repository = "https://github.com/servo/webrender"
 build = "build.rs"
 
 [features]
 default = ["freetype-lib"]
 freetype-lib = ["freetype/servo-freetype-sys"]
 profiler = ["thread_profiler/thread_profiler"]
+debugger = ["ws", "serde_json", "serde", "serde_derive"]
 
 [dependencies]
 app_units = "0.5"
 bincode = "0.8"
 bit-set = "0.4"
 byteorder = "1.0"
 euclid = "0.15.1"
 fxhash = "0.2.1"
@@ -24,16 +25,20 @@ log = "0.3"
 num-traits = "0.1.32"
 time = "0.1"
 rayon = "0.8"
 webrender_api = {path = "../webrender_api"}
 bitflags = "0.9"
 gamma-lut = "0.2"
 thread_profiler = "0.1.1"
 plane-split = "0.6"
+ws = { optional = true, version = "0.7.3" }
+serde_json = { optional = true, version = "1.0" }
+serde = { optional = true, version = "1.0" }
+serde_derive = { optional = true, version = "1.0" }
 
 [dev-dependencies]
 angle = {git = "https://github.com/servo/angle", branch = "servo"}
 rand = "0.3"                # for the benchmarks
 servo-glutin = "0.11"     # for the example apps
 
 [target.'cfg(any(target_os = "android", all(unix, not(target_os = "macos"))))'.dependencies]
 freetype = { version = "0.3", default-features = false }
--- a/gfx/webrender/examples/common/boilerplate.rs
+++ b/gfx/webrender/examples/common/boilerplate.rs
@@ -3,18 +3,16 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use gleam::gl;
 use glutin;
 use std::env;
 use std::path::PathBuf;
 use webrender;
 use webrender::api::*;
-use webrender::renderer::{PROFILER_DBG, RENDER_TARGET_DBG, TEXTURE_CACHE_DBG};
-use webrender::renderer::ExternalImageHandler;
 
 struct Notifier {
     window_proxy: glutin::WindowProxy,
 }
 
 impl Notifier {
     fn new(window_proxy: glutin::WindowProxy) -> Notifier {
         Notifier {
@@ -60,17 +58,17 @@ pub trait Example {
               resources: &mut ResourceUpdates,
               layout_size: LayoutSize,
               pipeline_id: PipelineId,
               document_id: DocumentId);
     fn on_event(&mut self,
                 event: glutin::Event,
                 api: &RenderApi,
                 document_id: DocumentId) -> bool;
-    fn get_external_image_handler(&self) -> Option<Box<ExternalImageHandler>> {
+    fn get_external_image_handler(&self) -> Option<Box<webrender::ExternalImageHandler>> {
         None
     }
 }
 
 pub fn main_wrapper(example: &mut Example,
                     options: Option<webrender::RendererOptions>)
 {
     let args: Vec<String> = env::args().collect();
@@ -108,17 +106,17 @@ pub fn main_wrapper(example: &mut Exampl
         resource_override_path: res_path,
         debug: true,
         precache_shaders: true,
         device_pixel_ratio: window.hidpi_factor(),
         .. options.unwrap_or(webrender::RendererOptions::default())
     };
 
     let size = DeviceUintSize::new(width, height);
-    let (mut renderer, sender) = webrender::renderer::Renderer::new(gl, opts).unwrap();
+    let (mut renderer, sender) = webrender::Renderer::new(gl, opts).unwrap();
     let api = sender.create_api();
     let document_id = api.add_document(size);
 
     let notifier = Box::new(Notifier::new(window.create_window_proxy()));
     renderer.set_render_notifier(notifier);
 
     if let Some(external_image_handler) = example.get_external_image_handler() {
         renderer.set_external_image_handler(external_image_handler);
@@ -157,29 +155,29 @@ pub fn main_wrapper(example: &mut Exampl
             match event {
                 glutin::Event::Closed |
                 glutin::Event::KeyboardInput(_, _, Some(glutin::VirtualKeyCode::Escape)) |
                 glutin::Event::KeyboardInput(_, _, Some(glutin::VirtualKeyCode::Q)) => break 'outer,
 
                 glutin::Event::KeyboardInput(glutin::ElementState::Pressed,
                                              _, Some(glutin::VirtualKeyCode::P)) => {
                     let mut flags = renderer.get_debug_flags();
-                    flags.toggle(PROFILER_DBG);
+                    flags.toggle(webrender::PROFILER_DBG);
                     renderer.set_debug_flags(flags);
                 }
                 glutin::Event::KeyboardInput(glutin::ElementState::Pressed,
                                              _, Some(glutin::VirtualKeyCode::O)) => {
                     let mut flags = renderer.get_debug_flags();
-                    flags.toggle(RENDER_TARGET_DBG);
+                    flags.toggle(webrender::RENDER_TARGET_DBG);
                     renderer.set_debug_flags(flags);
                 }
                 glutin::Event::KeyboardInput(glutin::ElementState::Pressed,
                                              _, Some(glutin::VirtualKeyCode::I)) => {
                     let mut flags = renderer.get_debug_flags();
-                    flags.toggle(TEXTURE_CACHE_DBG);
+                    flags.toggle(webrender::TEXTURE_CACHE_DBG);
                     renderer.set_debug_flags(flags);
                 }
                 glutin::Event::KeyboardInput(glutin::ElementState::Pressed,
                                              _, Some(glutin::VirtualKeyCode::M)) => {
                     api.notify_memory_pressure();
                 }
                 _ => {
                     if example.on_event(event, &api, document_id) {
--- a/gfx/webrender/examples/texture_cache_stress.rs
+++ b/gfx/webrender/examples/texture_cache_stress.rs
@@ -7,17 +7,16 @@ extern crate glutin;
 extern crate webrender;
 
 #[path="common/boilerplate.rs"]
 mod boilerplate;
 
 use boilerplate::{Example, HandyDandyRectBuilder};
 use std::mem;
 use webrender::api::*;
-use webrender::renderer::{ExternalImage, ExternalImageSource, ExternalImageHandler};
 
 struct ImageGenerator {
     patterns: [[u8; 3]; 6],
     next_pattern: usize,
     current_image: Vec<u8>,
 }
 
 impl ImageGenerator {
@@ -52,25 +51,25 @@ impl ImageGenerator {
         self.next_pattern = (self.next_pattern + 1) % self.patterns.len();
     }
 
     fn take(&mut self) -> Vec<u8> {
         mem::replace(&mut self.current_image, Vec::new())
     }
 }
 
-impl ExternalImageHandler for ImageGenerator {
-    fn lock(&mut self, _key: ExternalImageId, channel_index: u8) -> ExternalImage {
+impl webrender::ExternalImageHandler for ImageGenerator {
+    fn lock(&mut self, _key: ExternalImageId, channel_index: u8) -> webrender::ExternalImage {
         self.generate_image(channel_index as u32);
-        ExternalImage {
+        webrender::ExternalImage {
             u0: 0.0,
             v0: 0.0,
             u1: 1.0,
             v1: 1.0,
-            source: ExternalImageSource::RawData(&self.current_image)
+            source: webrender::ExternalImageSource::RawData(&self.current_image)
         }
     }
     fn unlock(&mut self, _key: ExternalImageId, _channel_index: u8) {
     }
 }
 
 struct App {
     stress_keys: Vec<ImageKey>,
@@ -265,17 +264,17 @@ impl Example for App {
                 return true;
             }
             _ => {}
         }
 
         false
     }
 
-    fn get_external_image_handler(&self) -> Option<Box<ExternalImageHandler>> {
+    fn get_external_image_handler(&self) -> Option<Box<webrender::ExternalImageHandler>> {
         Some(Box::new(ImageGenerator::new()))
     }
 }
 
 fn main() {
     let mut app = App {
         image_key: None,
         stress_keys: Vec::new(),
new file mode 100644
--- /dev/null
+++ b/gfx/webrender/src/debug_server.rs
@@ -0,0 +1,143 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{ApiMsg, DebugCommand};
+use api::channel::MsgSender;
+use std::sync::mpsc::{channel, Receiver};
+use std::thread;
+use std::sync::mpsc::Sender;
+
+use ws;
+
+// Messages that are sent from the render backend to the renderer
+// debug command queue. These are sent in a separate queue so
+// that none of these types are exposed to the RenderApi interfaces.
+// We can't use select!() as it's not stable...
+pub enum DebugMsg {
+    FetchBatches(ws::Sender),
+}
+
+// Represents a connection to a client.
+struct Server {
+    ws: ws::Sender,
+    debug_tx: Sender<DebugMsg>,
+    api_tx: MsgSender<ApiMsg>,
+}
+
+impl ws::Handler for Server {
+    fn on_message(&mut self, msg: ws::Message) -> ws::Result<()> {
+        match msg {
+            ws::Message::Text(string) => {
+                let cmd = match string.as_str() {
+                    "enable_profiler" => {
+                        DebugCommand::EnableProfiler(true)
+                    }
+                    "disable_profiler" => {
+                        DebugCommand::EnableProfiler(false)
+                    }
+                    "enable_texture_cache_debug" => {
+                        DebugCommand::EnableTextureCacheDebug(true)
+                    }
+                    "disable_texture_cache_debug" => {
+                        DebugCommand::EnableTextureCacheDebug(false)
+                    }
+                    "enable_render_target_debug" => {
+                        DebugCommand::EnableRenderTargetDebug(true)
+                    }
+                    "disable_render_target_debug" => {
+                        DebugCommand::EnableRenderTargetDebug(false)
+                    }
+                    "fetch_batches" => {
+                        let msg = DebugMsg::FetchBatches(self.ws.clone());
+                        self.debug_tx.send(msg).unwrap();
+                        DebugCommand::Flush
+                    }
+                    msg => {
+                        println!("unknown msg {}", msg);
+                        return Ok(());
+                    }
+                };
+
+                let msg = ApiMsg::DebugCommand(cmd);
+                self.api_tx.send(msg).unwrap();
+            }
+            ws::Message::Binary(..) => {}
+        }
+
+        Ok(())
+    }
+}
+
+// Spawn a thread for a given renderer, and wait for
+// client connections.
+pub struct DebugServer {
+    join_handle: Option<thread::JoinHandle<()>>,
+    broadcaster: ws::Sender,
+    pub debug_rx: Receiver<DebugMsg>,
+}
+
+impl DebugServer {
+    pub fn new(api_tx: MsgSender<ApiMsg>) -> DebugServer {
+        let (debug_tx, debug_rx) = channel();
+
+        let socket = ws::Builder::new().build(move |out| {
+            Server {
+                ws: out,
+                debug_tx: debug_tx.clone(),
+                api_tx: api_tx.clone(),
+            }
+        }).unwrap();
+
+        let broadcaster = socket.broadcaster();
+
+        let join_handle = Some(thread::spawn(move || {
+            socket.listen("127.0.0.1:3583").unwrap();
+        }));
+
+        DebugServer {
+            join_handle,
+            broadcaster,
+            debug_rx,
+        }
+    }
+}
+
+impl Drop for DebugServer {
+    fn drop(&mut self) {
+        self.broadcaster.shutdown().unwrap();
+        self.join_handle.take().unwrap().join().unwrap();
+    }
+}
+
+// A serializable list of debug information about batches
+// that can be sent to the client.
+#[derive(Serialize)]
+pub struct BatchInfo {
+    kind: &'static str,
+    count: usize,
+}
+
+#[derive(Serialize)]
+pub struct BatchList {
+    kind: &'static str,
+    batches: Vec<BatchInfo>,
+}
+
+impl BatchList {
+    pub fn new() -> BatchList {
+        BatchList {
+            kind: "batches",
+            batches: Vec::new(),
+        }
+    }
+
+    pub fn push(&mut self, kind: &'static str, count: usize) {
+        if count > 0 {
+            self.batches.push(BatchInfo {
+                kind,
+                count,
+            });
+        }
+    }
+}
--- a/gfx/webrender/src/device.rs
+++ b/gfx/webrender/src/device.rs
@@ -438,196 +438,205 @@ pub struct RBOId(gl::GLuint);
 pub struct VBOId(gl::GLuint);
 
 #[derive(PartialEq, Eq, Hash, Debug, Copy, Clone)]
 struct IBOId(gl::GLuint);
 
 #[derive(PartialEq, Eq, Hash, Debug, Copy, Clone)]
 pub struct PBOId(gl::GLuint);
 
-const MAX_EVENTS_PER_FRAME: usize = 256;
+const MAX_TIMERS_PER_FRAME: usize = 256;
+const MAX_SAMPLERS_PER_FRAME: usize = 16;
 const MAX_PROFILE_FRAMES: usize = 4;
 
 pub trait NamedTag {
     fn get_label(&self) -> &str;
 }
 
 #[derive(Debug, Clone)]
-pub struct GpuSample<T> {
+pub struct GpuTimer<T> {
     pub tag: T,
     pub time_ns: u64,
 }
 
+#[derive(Debug, Clone)]
+pub struct GpuSampler<T> {
+    pub tag: T,
+    pub count: u64,
+}
+
+pub struct QuerySet<T> {
+    set: Vec<gl::GLuint>,
+    data: Vec<T>,
+    pending: gl::GLuint,
+}
+
+impl<T> QuerySet<T> {
+    fn new(set: Vec<gl::GLuint>) -> Self {
+        QuerySet {
+            set,
+            data: Vec::new(),
+            pending: 0,
+        }
+    }
+
+    fn reset(&mut self) {
+        self.data.clear();
+        self.pending = 0;
+    }
+
+    fn add(&mut self, value: T) -> Option<gl::GLuint> {
+        assert_eq!(self.pending, 0);
+        self.set.get(self.data.len())
+            .cloned()
+            .map(|query_id| {
+                self.data.push(value);
+                self.pending = query_id;
+                query_id
+            })
+    }
+
+    fn take<F: Fn(&mut T, gl::GLuint)>(&mut self, fun: F) -> Vec<T> {
+        let mut data = mem::replace(&mut self.data, Vec::new());
+        for (value, &query) in data.iter_mut().zip(self.set.iter()) {
+            fun(value, query)
+        }
+        data
+    }
+}
+
 pub struct GpuFrameProfile<T> {
     gl: Rc<gl::Gl>,
-    queries: Vec<gl::GLuint>,
-    samples: Vec<GpuSample<T>>,
-    next_query: usize,
-    pending_query: gl::GLuint,
+    timers: QuerySet<GpuTimer<T>>,
+    samplers: QuerySet<GpuSampler<T>>,
     frame_id: FrameId,
     inside_frame: bool,
 }
 
 impl<T> GpuFrameProfile<T> {
     fn new(gl: Rc<gl::Gl>) -> Self {
-        match gl.get_type() {
-            gl::GlType::Gl => {
-                let queries = gl.gen_queries(MAX_EVENTS_PER_FRAME as gl::GLint);
-                GpuFrameProfile {
-                    gl,
-                    queries,
-                    samples: Vec::new(),
-                    next_query: 0,
-                    pending_query: 0,
-                    frame_id: FrameId(0),
-                    inside_frame: false,
-                }
-            }
-            gl::GlType::Gles => {
-                GpuFrameProfile {
-                    gl,
-                    queries: Vec::new(),
-                    samples: Vec::new(),
-                    next_query: 0,
-                    pending_query: 0,
-                    frame_id: FrameId(0),
-                    inside_frame: false,
-                }
-            }
+        let (time_queries, sample_queries) = match gl.get_type() {
+            gl::GlType::Gl => (
+                gl.gen_queries(MAX_TIMERS_PER_FRAME as gl::GLint),
+                gl.gen_queries(MAX_SAMPLERS_PER_FRAME as gl::GLint),
+            ),
+            gl::GlType::Gles => (Vec::new(), Vec::new()),
+        };
+
+        GpuFrameProfile {
+            gl,
+            timers: QuerySet::new(time_queries),
+            samplers: QuerySet::new(sample_queries),
+            frame_id: FrameId(0),
+            inside_frame: false,
         }
     }
 
     fn begin_frame(&mut self, frame_id: FrameId) {
         self.frame_id = frame_id;
-        self.next_query = 0;
-        self.pending_query = 0;
-        self.samples.clear();
+        self.timers.reset();
+        self.samplers.reset();
         self.inside_frame = true;
     }
 
     fn end_frame(&mut self) {
+        self.done_marker();
+        self.done_sampler();
         self.inside_frame = false;
-        match self.gl.get_type() {
-            gl::GlType::Gl => {
-                if self.pending_query != 0 {
-                    self.gl.end_query(gl::TIME_ELAPSED);
-                }
-            }
-            gl::GlType::Gles => {},
+    }
+
+    fn done_marker(&mut self) {
+        debug_assert!(self.inside_frame);
+        if self.timers.pending != 0 {
+            self.gl.end_query(gl::TIME_ELAPSED);
+            self.timers.pending = 0;
         }
     }
 
-    fn add_marker(&mut self, tag: T) -> GpuMarker
-    where T: NamedTag {
-        debug_assert!(self.inside_frame);
-        match self.gl.get_type() {
-            gl::GlType::Gl => {
-                self.add_marker_gl(tag)
-            }
-            gl::GlType::Gles => {
-                self.add_marker_gles(tag)
-            }
-        }
-    }
-
-    fn add_marker_gl(&mut self, tag: T) -> GpuMarker
-    where T: NamedTag {
-        if self.pending_query != 0 {
-            self.gl.end_query(gl::TIME_ELAPSED);
-        }
+    fn add_marker(&mut self, tag: T) -> GpuMarker where T: NamedTag {
+        self.done_marker();
 
         let marker = GpuMarker::new(&self.gl, tag.get_label());
 
-        if self.next_query < MAX_EVENTS_PER_FRAME {
-            self.pending_query = self.queries[self.next_query];
-            self.gl.begin_query(gl::TIME_ELAPSED, self.pending_query);
-            self.samples.push(GpuSample {
-                tag,
-                time_ns: 0,
-            });
-        } else {
-            self.pending_query = 0;
+        if let Some(query) = self.timers.add(GpuTimer { tag, time_ns: 0 }) {
+            self.gl.begin_query(gl::TIME_ELAPSED, query);
         }
 
-        self.next_query += 1;
         marker
     }
 
-    fn add_marker_gles(&mut self, tag: T) -> GpuMarker
-    where T: NamedTag {
-        let marker = GpuMarker::new(&self.gl, tag.get_label());
-        self.samples.push(GpuSample {
-            tag,
-            time_ns: 0,
-        });
-        marker
+    fn done_sampler(&mut self) {
+        debug_assert!(self.inside_frame);
+        if self.samplers.pending != 0 {
+            self.gl.end_query(gl::SAMPLES_PASSED);
+            self.samplers.pending = 0;
+        }
+    }
+
+    fn add_sampler(&mut self, tag: T) where T: NamedTag {
+        self.done_sampler();
+
+        if let Some(query) = self.samplers.add(GpuSampler { tag, count: 0 }) {
+            self.gl.begin_query(gl::SAMPLES_PASSED, query);
+        }
     }
 
     fn is_valid(&self) -> bool {
-        self.next_query > 0 && self.next_query <= MAX_EVENTS_PER_FRAME
+        !self.timers.set.is_empty() || !self.samplers.set.is_empty()
     }
 
-    fn build_samples(&mut self) -> Vec<GpuSample<T>> {
+    fn build_samples(&mut self) -> (Vec<GpuTimer<T>>, Vec<GpuSampler<T>>) {
         debug_assert!(!self.inside_frame);
-        match self.gl.get_type() {
-            gl::GlType::Gl => {
-                self.build_samples_gl()
-            }
-            gl::GlType::Gles => {
-                self.build_samples_gles()
-            }
-        }
-    }
+        let gl = &self.gl;
 
-    fn build_samples_gl(&mut self) -> Vec<GpuSample<T>> {
-        for (index, sample) in self.samples.iter_mut().enumerate() {
-            sample.time_ns = self.gl.get_query_object_ui64v(self.queries[index], gl::QUERY_RESULT)
-        }
-
-        mem::replace(&mut self.samples, Vec::new())
-    }
-
-    fn build_samples_gles(&mut self) -> Vec<GpuSample<T>> {
-        mem::replace(&mut self.samples, Vec::new())
+        (self.timers.take(|timer, query| {
+            timer.time_ns = gl.get_query_object_ui64v(query, gl::QUERY_RESULT)
+         }),
+         self.samplers.take(|sampler, query| {
+            sampler.count = gl.get_query_object_ui64v(query, gl::QUERY_RESULT)
+         }),
+        )
     }
 }
 
 impl<T> Drop for GpuFrameProfile<T> {
     fn drop(&mut self) {
         match self.gl.get_type() {
             gl::GlType::Gl =>  {
-                self.gl.delete_queries(&self.queries);
+                self.gl.delete_queries(&self.timers.set);
+                self.gl.delete_queries(&self.samplers.set);
             }
             gl::GlType::Gles => {},
         }
     }
 }
 
 pub struct GpuProfiler<T> {
     frames: [GpuFrameProfile<T>; MAX_PROFILE_FRAMES],
     next_frame: usize,
 }
 
 impl<T> GpuProfiler<T> {
     pub fn new(gl: &Rc<gl::Gl>) -> GpuProfiler<T> {
         GpuProfiler {
             next_frame: 0,
             frames: [
-                      GpuFrameProfile::new(Rc::clone(gl)),
-                      GpuFrameProfile::new(Rc::clone(gl)),
-                      GpuFrameProfile::new(Rc::clone(gl)),
-                      GpuFrameProfile::new(Rc::clone(gl)),
-                    ],
+                GpuFrameProfile::new(Rc::clone(gl)),
+                GpuFrameProfile::new(Rc::clone(gl)),
+                GpuFrameProfile::new(Rc::clone(gl)),
+                GpuFrameProfile::new(Rc::clone(gl)),
+            ],
         }
     }
 
-    pub fn build_samples(&mut self) -> Option<(FrameId, Vec<GpuSample<T>>)> {
+    pub fn build_samples(&mut self) -> Option<(FrameId, Vec<GpuTimer<T>>, Vec<GpuSampler<T>>)> {
         let frame = &mut self.frames[self.next_frame];
         if frame.is_valid() {
-            Some((frame.frame_id, frame.build_samples()))
+            let (timers, samplers) = frame.build_samples();
+            Some((frame.frame_id, timers, samplers))
         } else {
             None
         }
     }
 
     pub fn begin_frame(&mut self, frame_id: FrameId) {
         let frame = &mut self.frames[self.next_frame];
         frame.begin_frame(frame_id);
@@ -638,16 +647,25 @@ impl<T> GpuProfiler<T> {
         frame.end_frame();
         self.next_frame = (self.next_frame + 1) % MAX_PROFILE_FRAMES;
     }
 
     pub fn add_marker(&mut self, tag: T) -> GpuMarker
     where T: NamedTag {
         self.frames[self.next_frame].add_marker(tag)
     }
+
+    pub fn add_sampler(&mut self, tag: T)
+    where T: NamedTag {
+        self.frames[self.next_frame].add_sampler(tag)
+    }
+
+    pub fn done_sampler(&mut self) {
+        self.frames[self.next_frame].done_sampler()
+    }
 }
 
 #[must_use]
 pub struct GpuMarker{
     gl: Rc<gl::Gl>,
 }
 
 impl GpuMarker {
@@ -684,16 +702,17 @@ impl Drop for GpuMarker {
             gl::GlType::Gl =>  {
                 self.gl.pop_group_marker_ext();
             }
             gl::GlType::Gles => {},
         }
     }
 }
 
+
 #[derive(Debug, Copy, Clone)]
 pub enum VertexUsageHint {
     Static,
     Dynamic,
     Stream,
 }
 
 impl VertexUsageHint {
@@ -800,16 +819,24 @@ impl Device {
     pub fn max_texture_size(&self) -> u32 {
         self.max_texture_size
     }
 
     pub fn get_capabilities(&self) -> &Capabilities {
         &self.capabilities
     }
 
+    pub fn reset_state(&mut self) {
+        self.bound_textures = [ TextureId::invalid(); 16 ];
+        self.bound_vao = 0;
+        self.bound_pbo = PBOId(0);
+        self.bound_read_fbo = FBOId(0);
+        self.bound_draw_fbo = FBOId(0);
+    }
+
     pub fn compile_shader(gl: &gl::Gl,
                           name: &str,
                           shader_type: gl::GLenum,
                           source: String)
                           -> Result<gl::GLuint, ShaderError> {
         debug!("compile {:?}", name);
         let id = gl.create_shader(shader_type);
         gl.shader_source(id, &[source.as_bytes()]);
--- a/gfx/webrender/src/frame.rs
+++ b/gfx/webrender/src/frame.rs
@@ -989,32 +989,25 @@ impl Frame {
         // irregular size in the texture cache.
         //
         // For the case where we don't tile along an axis, we can still perform the repetition in
         // the shader (for this particular axis), and it is worth special-casing for this to avoid
         // generating many primitives.
         // This can happen with very tall and thin images used as a repeating background.
         // Apparently web authors do that...
 
-        let mut repeat_x = false;
-        let mut repeat_y = false;
+        let needs_repeat_x = info.stretch_size.width < item_rect.size.width;
+        let needs_repeat_y = info.stretch_size.height < item_rect.size.height;
 
-        if info.stretch_size.width < item_rect.size.width {
-            // If this assert blows up it means we haven't properly decomposed the image in decompose_image_row.
-            debug_assert!(image_size.width <= tile_size);
-            // we don't actually tile in this dimension so repeating can be done in the shader.
-            repeat_x = true;
-        }
+        let tiled_in_x = image_size.width > tile_size;
+        let tiled_in_y = image_size.height > tile_size;
 
-        if info.stretch_size.height < item_rect.size.height {
-            // If this assert blows up it means we haven't properly decomposed the image in decompose_image.
-            debug_assert!(image_size.height <= tile_size);
-            // we don't actually tile in this dimension so repeating can be done in the shader.
-            repeat_y = true;
-        }
+        // If we don't actually tile in this dimension, repeating can be done in the shader.
+        let shader_repeat_x = needs_repeat_x && !tiled_in_x;
+        let shader_repeat_y = needs_repeat_y && !tiled_in_y;
 
         let tile_size_f32 = tile_size as f32;
 
         // Note: this rounds down so it excludes the partially filled tiles on the right and
         // bottom edges (we handle them separately below).
         let num_tiles_x = (image_size.width / tile_size) as u16;
         let num_tiles_y = (image_size.height / tile_size) as u16;
 
@@ -1038,106 +1031,106 @@ impl Frame {
                 self.add_tile_primitive(clip_and_scroll,
                                         builder,
                                         item_rect,
                                         item_local_clip,
                                         info,
                                         TileOffset::new(tx, ty),
                                         stretched_tile_size,
                                         1.0, 1.0,
-                                        repeat_x, repeat_y);
+                                        shader_repeat_x, shader_repeat_y);
             }
             if leftover.width != 0 {
                 // Tiles on the right edge that are smaller than the tile size.
                 self.add_tile_primitive(clip_and_scroll,
                                         builder,
                                         item_rect,
                                         item_local_clip,
                                         info,
                                         TileOffset::new(num_tiles_x, ty),
                                         stretched_tile_size,
                                         (leftover.width as f32) / tile_size_f32,
                                         1.0,
-                                        repeat_x, repeat_y);
+                                        shader_repeat_x, shader_repeat_y);
             }
         }
 
         if leftover.height != 0 {
             for tx in 0..num_tiles_x {
                 // Tiles on the bottom edge that are smaller than the tile size.
                 self.add_tile_primitive(clip_and_scroll,
                                         builder,
                                         item_rect,
                                         item_local_clip,
                                         info,
                                         TileOffset::new(tx, num_tiles_y),
                                         stretched_tile_size,
                                         1.0,
                                         (leftover.height as f32) / tile_size_f32,
-                                        repeat_x,
-                                        repeat_y);
+                                        shader_repeat_x,
+                                        shader_repeat_y);
             }
 
             if leftover.width != 0 {
                 // Finally, the bottom-right tile with a "leftover" size.
                 self.add_tile_primitive(clip_and_scroll,
                                         builder,
                                         item_rect,
                                         item_local_clip,
                                         info,
                                         TileOffset::new(num_tiles_x, num_tiles_y),
                                         stretched_tile_size,
                                         (leftover.width as f32) / tile_size_f32,
                                         (leftover.height as f32) / tile_size_f32,
-                                        repeat_x,
-                                        repeat_y);
+                                        shader_repeat_x,
+                                        shader_repeat_y);
             }
         }
     }
 
     fn add_tile_primitive(&mut self,
                           clip_and_scroll: ClipAndScrollInfo,
                           builder: &mut FrameBuilder,
                           item_rect: &LayerRect,
                           item_local_clip: &LocalClip,
                           info: &ImageDisplayItem,
                           tile_offset: TileOffset,
                           stretched_tile_size: LayerSize,
                           tile_ratio_width: f32,
                           tile_ratio_height: f32,
-                          repeat_x: bool,
-                          repeat_y: bool) {
+                          shader_repeat_x: bool,
+                          shader_repeat_y: bool) {
         // If the the image is tiled along a given axis, we can't have the shader compute
         // the image repetition pattern. In this case we base the primitive's rectangle size
         // on the stretched tile size which effectively cancels the repetion (and repetition
         // has to be emulated by generating more primitives).
-        // If the image is not tiling along this axis, we can perform the repetition in the
+        // If the image is not tiled along this axis, we can perform the repetition in the
         // shader. in this case we use the item's size in the primitive (on that particular
         // axis).
-        // See the repeat_x/y code below.
+        // See the shader_repeat_x/y code below.
 
         let stretched_size = LayerSize::new(
             stretched_tile_size.width * tile_ratio_width,
             stretched_tile_size.height * tile_ratio_height,
         );
 
         let mut prim_rect = LayerRect::new(
             item_rect.origin + LayerVector2D::new(
                 tile_offset.x as f32 * stretched_tile_size.width,
                 tile_offset.y as f32 * stretched_tile_size.height,
             ),
             stretched_size,
         );
 
-        if repeat_x {
+        if shader_repeat_x {
             assert_eq!(tile_offset.x, 0);
             prim_rect.size.width = item_rect.size.width;
         }
 
-        if repeat_y {
+        if shader_repeat_y {
             assert_eq!(tile_offset.y, 0);
             prim_rect.size.height = item_rect.size.height;
         }
 
         // Fix up the primitive's rect if it overflows the original item rect.
         if let Some(prim_rect) = prim_rect.intersection(item_rect) {
             builder.add_image(clip_and_scroll,
                               prim_rect,
--- a/gfx/webrender/src/gpu_cache.rs
+++ b/gfx/webrender/src/gpu_cache.rs
@@ -463,16 +463,20 @@ impl<'a> GpuDataRequest<'a> {
     {
         self.texture.pending_blocks.push(block.into());
     }
 
     pub fn extend_from_slice(&mut self, blocks: &[GpuBlockData]) {
         self.texture.pending_blocks.extend_from_slice(blocks);
     }
 
+    pub fn current_used_block_num(&self) -> usize {
+        self.texture.pending_blocks.len() - self.start_index
+    }
+
     /// Consume the request and return the number of blocks written
     pub fn close(self) -> usize {
         self.texture.pending_blocks.len() - self.start_index
     }
 }
 
 impl<'a> Drop for GpuDataRequest<'a> {
     fn drop(&mut self) {
--- a/gfx/webrender/src/internal_types.rs
+++ b/gfx/webrender/src/internal_types.rs
@@ -1,12 +1,13 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+use api::DebugCommand;
 use device::TextureFilter;
 use fxhash::FxHasher;
 use profiler::BackendProfileCounters;
 use std::collections::{HashMap, HashSet};
 use std::f32;
 use std::hash::BuildHasherDefault;
 use std::{i32, usize};
 use std::path::PathBuf;
@@ -180,16 +181,17 @@ impl RendererFrame {
             pipeline_epoch_map,
             layers_bouncing_back,
             frame,
         }
     }
 }
 
 pub enum ResultMsg {
+    DebugCommand(DebugCommand),
     RefreshShader(PathBuf),
     NewFrame(DocumentId, RendererFrame, TextureUpdateList, BackendProfileCounters),
     UpdateResources { updates: TextureUpdateList, cancel_rendering: bool },
 }
 
 #[derive(Debug, Clone, Copy, Eq, Hash, PartialEq)]
 pub struct StackingContextIndex(pub usize);
 
--- a/gfx/webrender/src/lib.rs
+++ b/gfx/webrender/src/lib.rs
@@ -7,17 +7,17 @@ A GPU based renderer for the web.
 
 It serves as an experimental render backend for [Servo](https://servo.org/),
 but it can also be used as such in a standalone application.
 
 # External dependencies
 WebRender currently depends on [FreeType](https://www.freetype.org/)
 
 # Api Structure
-The main entry point to WebRender is the `webrender::renderer::Renderer`.
+The main entry point to WebRender is the `webrender::Renderer`.
 
 By calling `Renderer::new(...)` you get a `Renderer`, as well as a `RenderApiSender`.
 Your `Renderer` is responsible to render the previously processed frames onto the screen.
 
 By calling `yourRenderApiSender.create_api()`, you'll get a `RenderApi` instance,
 which is responsible for managing resources and documents. A worker thread is used internally
 to untie the workload from the application thread and therefore be able to make better use of
 multicore systems.
@@ -50,16 +50,18 @@ extern crate bitflags;
 extern crate thread_profiler;
 
 mod border;
 mod clip_scroll_node;
 mod clip_scroll_tree;
 mod debug_colors;
 mod debug_font_data;
 mod debug_render;
+#[cfg(feature = "debugger")]
+mod debug_server;
 mod device;
 mod ellipse;
 mod frame;
 mod frame_builder;
 mod freelist;
 mod geometry;
 mod glyph_cache;
 mod glyph_rasterizer;
@@ -67,16 +69,17 @@ mod gpu_cache;
 mod internal_types;
 mod mask_cache;
 mod prim_store;
 mod print_tree;
 mod profiler;
 mod record;
 mod render_backend;
 mod render_task;
+mod renderer;
 mod resource_cache;
 mod scene;
 mod spring;
 mod texture_allocator;
 mod texture_cache;
 mod tiling;
 mod util;
 
@@ -103,18 +106,16 @@ mod platform {
         pub mod font;
     }
     #[cfg(target_os = "windows")]
     pub mod windows {
         pub mod font;
     }
 }
 
-pub mod renderer;
-
 #[cfg(target_os="macos")]
 extern crate core_graphics;
 #[cfg(target_os="macos")]
 extern crate core_text;
 
 #[cfg(all(unix, not(target_os="macos")))]
 extern crate freetype;
 
@@ -128,19 +129,28 @@ extern crate fxhash;
 extern crate gleam;
 extern crate num_traits;
 //extern crate notify;
 extern crate time;
 pub extern crate webrender_api;
 extern crate byteorder;
 extern crate rayon;
 extern crate plane_split;
+#[cfg(feature = "debugger")]
+extern crate ws;
+#[cfg(feature = "debugger")]
+extern crate serde_json;
+#[cfg(feature = "debugger")]
+#[macro_use]
+extern crate serde_derive;
 
 #[cfg(any(target_os="macos", target_os="windows"))]
 extern crate gamma_lut;
 
 pub use renderer::{ExternalImage, ExternalImageSource, ExternalImageHandler};
 pub use renderer::{GraphicsApi, GraphicsApiInfo, ReadPixelsFormat, Renderer, RendererOptions};
+pub use renderer::{CpuProfile, GpuProfile, DebugFlags, RendererKind};
+pub use renderer::{MAX_VERTEX_TEXTURE_WIDTH, PROFILER_DBG, RENDER_TARGET_DBG, TEXTURE_CACHE_DBG};
 
 pub use webrender_api as api;
 
 #[doc(hidden)]
 pub use device::build_shader_strings;
--- a/gfx/webrender/src/prim_store.rs
+++ b/gfx/webrender/src/prim_store.rs
@@ -588,16 +588,18 @@ impl TextRunPrimitiveCpu {
     fn write_gpu_blocks(&self,
                         request: &mut GpuDataRequest) {
         request.push(self.color);
         request.push([self.offset.x,
                       self.offset.y,
                       self.subpx_dir as u32 as f32,
                       0.0]);
         request.extend_from_slice(&self.glyph_gpu_blocks);
+
+        assert!(request.current_used_block_num() <= MAX_VERTEX_TEXTURE_WIDTH);
     }
 }
 
 #[derive(Debug, Clone)]
 #[repr(C)]
 struct GlyphPrimitive {
     offset: LayerPoint,
     padding: LayerPoint,
--- a/gfx/webrender/src/profiler.rs
+++ b/gfx/webrender/src/profiler.rs
@@ -1,14 +1,14 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use debug_render::DebugRenderer;
-use device::{Device, GpuMarker, GpuSample, NamedTag};
+use device::{Device, GpuMarker, GpuTimer, GpuSampler, NamedTag};
 use euclid::{Point2D, Size2D, Rect, vec2};
 use std::collections::vec_deque::VecDeque;
 use std::f32;
 use std::mem;
 use api::{ColorF, ColorU};
 use time::precise_time_ns;
 
 const GRAPH_WIDTH: f32 = 1024.0;
@@ -31,24 +31,29 @@ impl NamedTag for GpuProfileTag {
     }
 }
 
 trait ProfileCounter {
     fn description(&self) -> &'static str;
     fn value(&self) -> String;
 }
 
+impl<'a, T: ProfileCounter> ProfileCounter for &'a T {
+    fn description(&self) -> &'static str { (*self).description() }
+    fn value(&self) -> String { (*self).value() }
+}
+
 #[derive(Clone)]
 pub struct IntProfileCounter {
     description: &'static str,
     value: usize,
 }
 
 impl IntProfileCounter {
-    fn new(description: &'static str) -> IntProfileCounter {
+    fn new(description: &'static str) -> Self {
         IntProfileCounter {
             description,
             value: 0,
         }
     }
 
     fn reset(&mut self) {
         self.value = 0;
@@ -79,25 +84,40 @@ impl ProfileCounter for IntProfileCounte
         self.description
     }
 
     fn value(&self) -> String {
         format!("{}", self.value)
     }
 }
 
+pub struct FloatProfileCounter {
+    description: &'static str,
+    value: f32,
+}
+
+impl ProfileCounter for FloatProfileCounter {
+    fn description(&self) -> &'static str {
+        self.description
+    }
+
+    fn value(&self) -> String {
+        format!("{:.2}", self.value)
+    }
+}
+
 #[derive(Clone)]
 pub struct ResourceProfileCounter {
     description: &'static str,
     value: usize,
     size: usize,
 }
 
 impl ResourceProfileCounter {
-    fn new(description: &'static str) -> ResourceProfileCounter {
+    fn new(description: &'static str) -> Self {
         ResourceProfileCounter {
             description,
             value: 0,
             size: 0,
         }
     }
 
     #[allow(dead_code)]
@@ -139,17 +159,17 @@ pub struct Timer<'a> {
 impl<'a> Drop for Timer<'a> {
     fn drop(&mut self) {
         let end = precise_time_ns();
         *self.result += end - self.start;
     }
 }
 
 impl TimeProfileCounter {
-    pub fn new(description: &'static str, invert: bool) -> TimeProfileCounter {
+    pub fn new(description: &'static str, invert: bool) -> Self {
         TimeProfileCounter {
             description,
             nanoseconds: 0,
             invert,
         }
     }
 
     fn reset(&mut self) {
@@ -207,17 +227,17 @@ pub struct AverageTimeProfileCounter {
     start_ns: u64,
     sum_ns: u64,
     num_samples: u64,
     nanoseconds: u64,
     invert: bool,
 }
 
 impl AverageTimeProfileCounter {
-    pub fn new(description: &'static str, invert: bool, average_over_ns: u64) -> AverageTimeProfileCounter {
+    pub fn new(description: &'static str, invert: bool, average_over_ns: u64) -> Self {
         AverageTimeProfileCounter {
             description,
             average_over_ns,
             start_ns: precise_time_ns(),
             sum_ns: 0,
             num_samples: 0,
             nanoseconds: 0,
             invert,
@@ -264,26 +284,27 @@ impl ProfileCounter for AverageTimeProfi
         if self.invert {
             format!("{:.2} fps", 1000000000.0 / self.nanoseconds as f64)
         } else {
             format!("{:.2} ms", self.nanoseconds as f64 / 1000000.0)
         }
     }
 }
 
+
 pub struct FrameProfileCounters {
     pub total_primitives: IntProfileCounter,
     pub visible_primitives: IntProfileCounter,
     pub passes: IntProfileCounter,
     pub color_targets: IntProfileCounter,
     pub alpha_targets: IntProfileCounter,
 }
 
 impl FrameProfileCounters {
-    pub fn new() -> FrameProfileCounters {
+    pub fn new() -> Self {
         FrameProfileCounters {
             total_primitives: IntProfileCounter::new("Total Primitives"),
             visible_primitives: IntProfileCounter::new("Visible Primitives"),
             passes: IntProfileCounter::new("Passes"),
             color_targets: IntProfileCounter::new("Color Targets"),
             alpha_targets: IntProfileCounter::new("Alpha Targets"),
         }
     }
@@ -293,34 +314,34 @@ impl FrameProfileCounters {
 pub struct TextureCacheProfileCounters {
     pub pages_a8: ResourceProfileCounter,
     pub pages_rgb8: ResourceProfileCounter,
     pub pages_rgba8: ResourceProfileCounter,
     pub pages_rg8: ResourceProfileCounter,
 }
 
 impl TextureCacheProfileCounters {
-    pub fn new() -> TextureCacheProfileCounters {
+    pub fn new() -> Self {
         TextureCacheProfileCounters {
             pages_a8: ResourceProfileCounter::new("Texture A8 cached pages"),
             pages_rgb8: ResourceProfileCounter::new("Texture RGB8 cached pages"),
             pages_rgba8: ResourceProfileCounter::new("Texture RGBA8 cached pages"),
             pages_rg8: ResourceProfileCounter::new("Texture RG8 cached pages"),
         }
     }
 }
 
 #[derive(Clone)]
 pub struct GpuCacheProfileCounters {
     pub allocated_rows: IntProfileCounter,
     pub allocated_blocks: IntProfileCounter,
 }
 
 impl GpuCacheProfileCounters {
-    pub fn new() -> GpuCacheProfileCounters {
+    pub fn new() -> Self {
         GpuCacheProfileCounters {
             allocated_rows: IntProfileCounter::new("GPU cache rows"),
             allocated_blocks: IntProfileCounter::new("GPU cache blocks"),
         }
     }
 }
 
 #[derive(Clone)]
@@ -344,35 +365,36 @@ pub struct IpcProfileCounters {
     pub consume_time: TimeProfileCounter,
     pub send_time: TimeProfileCounter,
     pub total_time: TimeProfileCounter,
     pub display_lists: ResourceProfileCounter,
 }
 
 impl IpcProfileCounters {
     pub fn set(&mut self,
-               build_start: u64,
-               build_end: u64,
-               send_start: u64,
-               consume_start: u64,
-               consume_end: u64,
-               display_len: usize) {
+        build_start: u64,
+        build_end: u64,
+        send_start: u64,
+        consume_start: u64,
+        consume_end: u64,
+        display_len: usize,
+    ) {
         let build_time = build_end - build_start;
         let consume_time = consume_end - consume_start;
         let send_time = consume_start - send_start;
         self.build_time.inc(build_time);
         self.consume_time.inc(consume_time);
         self.send_time.inc(send_time);
         self.total_time.inc(build_time + consume_time + send_time);
         self.display_lists.inc(display_len);
     }
 }
 
 impl BackendProfileCounters {
-    pub fn new() -> BackendProfileCounters {
+    pub fn new() -> Self {
         BackendProfileCounters {
             total_time: TimeProfileCounter::new("Backend CPU Time", false),
             resources: ResourceProfileCounters {
                 font_templates: ResourceProfileCounter::new("Font Templates"),
                 image_templates: ResourceProfileCounter::new("Image Templates"),
                 texture_cache: TextureCacheProfileCounters::new(),
                 gpu_cache: GpuCacheProfileCounters::new(),
             },
@@ -402,38 +424,38 @@ pub struct RendererProfileCounters {
     pub draw_calls: IntProfileCounter,
     pub vertices: IntProfileCounter,
     pub vao_count_and_size: ResourceProfileCounter,
 }
 
 pub struct RendererProfileTimers {
     pub cpu_time: TimeProfileCounter,
     pub gpu_time: TimeProfileCounter,
-    pub gpu_samples: Vec<GpuSample<GpuProfileTag>>,
+    pub gpu_samples: Vec<GpuTimer<GpuProfileTag>>,
 }
 
 impl RendererProfileCounters {
-    pub fn new() -> RendererProfileCounters {
+    pub fn new() -> Self {
         RendererProfileCounters {
             frame_counter: IntProfileCounter::new("Frame"),
             frame_time: AverageTimeProfileCounter::new("FPS", true, ONE_SECOND_NS / 2),
             draw_calls: IntProfileCounter::new("Draw Calls"),
             vertices: IntProfileCounter::new("Vertices"),
             vao_count_and_size: ResourceProfileCounter::new("VAO"),
         }
     }
 
     pub fn reset(&mut self) {
         self.draw_calls.reset();
         self.vertices.reset();
     }
 }
 
 impl RendererProfileTimers {
-    pub fn new() -> RendererProfileTimers {
+    pub fn new() -> Self {
         RendererProfileTimers {
             cpu_time: TimeProfileCounter::new("Compositor CPU Time", false),
             gpu_samples: Vec::new(),
             gpu_time: TimeProfileCounter::new("GPU Time", false),
         }
     }
 }
 
@@ -444,17 +466,17 @@ struct GraphStats {
 }
 
 struct ProfileGraph {
     max_samples: usize,
     values: VecDeque<f32>,
 }
 
 impl ProfileGraph {
-    fn new(max_samples: usize) -> ProfileGraph {
+    fn new(max_samples: usize) -> Self {
         ProfileGraph {
             max_samples,
             values: VecDeque::new(),
         }
     }
 
     fn push(&mut self, ns: u64) {
         let ms = ns as f64 / 1000000.0;
@@ -480,20 +502,22 @@ impl ProfileGraph {
         if !self.values.is_empty() {
             stats.mean_value = stats.mean_value / self.values.len() as f32;
         }
 
         stats
     }
 
     fn draw_graph(&self,
-                  x: f32,
-                  y: f32,
-                  description: &'static str,
-                  debug_renderer: &mut DebugRenderer) -> Rect<f32> {
+        x: f32,
+        y: f32,
+        description: &'static str,
+        debug_renderer: &mut DebugRenderer,
+    ) -> Rect<f32>
+    {
         let size = Size2D::new(600.0, 120.0);
         let line_height = debug_renderer.line_height();
         let mut rect = Rect::new(Point2D::new(x, y), size);
         let stats = self.stats();
 
         let text_color = ColorU::new(255, 255, 0, 255);
         let text_origin = rect.origin + vec2(rect.size.width, 20.0);
         debug_renderer.add_text(text_origin.x,
@@ -558,46 +582,48 @@ impl ProfileGraph {
         }
 
         rect
     }
 }
 
 struct GpuFrame {
     total_time: u64,
-    samples: Vec<GpuSample<GpuProfileTag>>,
+    samples: Vec<GpuTimer<GpuProfileTag>>,
 }
 
 struct GpuFrameCollection {
     frames: VecDeque<GpuFrame>,
 }
 
 impl GpuFrameCollection {
-    fn new() -> GpuFrameCollection {
+    fn new() -> Self {
         GpuFrameCollection {
             frames: VecDeque::new(),
         }
     }
 
-    fn push(&mut self, total_time: u64, samples: Vec<GpuSample<GpuProfileTag>>) {
+    fn push(&mut self, total_time: u64, samples: Vec<GpuTimer<GpuProfileTag>>) {
         if self.frames.len() == 20 {
             self.frames.pop_back();
         }
         self.frames.push_front(GpuFrame {
             total_time,
             samples,
         });
     }
 }
 
 impl GpuFrameCollection {
     fn draw(&self,
-            x: f32,
-            y: f32,
-            debug_renderer: &mut DebugRenderer) -> Rect<f32> {
+        x: f32,
+        y: f32,
+        debug_renderer: &mut DebugRenderer,
+    ) -> Rect<f32>
+    {
         let bounding_rect = Rect::new(Point2D::new(x, y),
                                       Size2D::new(GRAPH_WIDTH + 2.0 * GRAPH_PADDING,
                                                   GRAPH_HEIGHT + 2.0 * GRAPH_PADDING));
         let graph_rect = bounding_rect.inflate(-GRAPH_PADDING, -GRAPH_PADDING);
 
         debug_renderer.add_quad(bounding_rect.origin.x,
                                 bounding_rect.origin.y,
                                 bounding_rect.origin.x + bounding_rect.size.width,
@@ -649,34 +675,35 @@ pub struct Profiler {
     backend_time: ProfileGraph,
     compositor_time: ProfileGraph,
     gpu_time: ProfileGraph,
     gpu_frames: GpuFrameCollection,
     ipc_time: ProfileGraph,
 }
 
 impl Profiler {
-    pub fn new() -> Profiler {
+    pub fn new() -> Self {
         Profiler {
             x_left: 0.0,
             y_left: 0.0,
             x_right: 0.0,
             y_right: 0.0,
             backend_time: ProfileGraph::new(600),
             compositor_time: ProfileGraph::new(600),
             gpu_time: ProfileGraph::new(600),
             gpu_frames: GpuFrameCollection::new(),
             ipc_time: ProfileGraph::new(600),
         }
     }
 
-    fn draw_counters(&mut self,
-                     counters: &[&ProfileCounter],
-                     debug_renderer: &mut DebugRenderer,
-                     left: bool) {
+    fn draw_counters<T: ProfileCounter>(&mut self,
+        counters: &[T],
+        debug_renderer: &mut DebugRenderer,
+        left: bool,
+    ) {
         let mut label_rect = Rect::zero();
         let mut value_rect = Rect::zero();
         let (mut current_x, mut current_y) = if left {
             (self.x_left, self.y_left)
         } else {
             (self.x_right, self.y_right)
         };
         let mut color_index = 0;
@@ -728,42 +755,44 @@ impl Profiler {
         if left {
             self.y_left = new_y;
         } else {
             self.y_right = new_y;
         }
     }
 
     pub fn draw_profile(&mut self,
-                        device: &mut Device,
-                        frame_profile: &FrameProfileCounters,
-                        backend_profile: &BackendProfileCounters,
-                        renderer_profile: &RendererProfileCounters,
-                        renderer_timers: &mut RendererProfileTimers,
-                        debug_renderer: &mut DebugRenderer) {
-
+        device: &mut Device,
+        frame_profile: &FrameProfileCounters,
+        backend_profile: &BackendProfileCounters,
+        renderer_profile: &RendererProfileCounters,
+        renderer_timers: &mut RendererProfileTimers,
+        gpu_samplers: &[GpuSampler<GpuProfileTag>],
+        screen_fraction: f32,
+        debug_renderer: &mut DebugRenderer,
+    ) {
         let _gm = GpuMarker::new(device.rc_gl(), "profile");
         self.x_left = 20.0;
         self.y_left = 40.0;
         self.x_right = 400.0;
         self.y_right = 40.0;
 
         let mut gpu_time = 0;
         let gpu_samples = mem::replace(&mut renderer_timers.gpu_samples, Vec::new());
         for sample in &gpu_samples {
             gpu_time += sample.time_ns;
         }
         renderer_timers.gpu_time.set(gpu_time);
 
         self.draw_counters(&[
-            &renderer_profile.frame_counter,
-            &renderer_profile.frame_time,
+            &renderer_profile.frame_time
         ], debug_renderer, true);
 
         self.draw_counters(&[
+            &renderer_profile.frame_counter,
             &frame_profile.total_primitives,
             &frame_profile.visible_primitives,
             &frame_profile.passes,
             &frame_profile.color_targets,
             &frame_profile.alpha_targets,
             &backend_profile.resources.gpu_cache.allocated_rows,
             &backend_profile.resources.gpu_cache.allocated_blocks,
         ], debug_renderer, true);
@@ -773,37 +802,52 @@ impl Profiler {
             &backend_profile.resources.image_templates,
         ], debug_renderer, true);
 
         self.draw_counters(&[
             &backend_profile.resources.texture_cache.pages_a8,
             &backend_profile.resources.texture_cache.pages_rgb8,
             &backend_profile.resources.texture_cache.pages_rgba8,
             &backend_profile.resources.texture_cache.pages_rg8,
+            &backend_profile.ipc.display_lists,
         ], debug_renderer, true);
 
         self.draw_counters(&[
             &backend_profile.ipc.build_time,
             &backend_profile.ipc.send_time,
             &backend_profile.ipc.consume_time,
             &backend_profile.ipc.total_time,
-            &backend_profile.ipc.display_lists,
         ], debug_renderer, true);
 
         self.draw_counters(&[
             &renderer_profile.draw_calls,
             &renderer_profile.vertices,
         ], debug_renderer, true);
 
         self.draw_counters(&[
             &backend_profile.total_time,
             &renderer_timers.cpu_time,
             &renderer_timers.gpu_time,
         ], debug_renderer, false);
 
+        let mut samplers = Vec::<FloatProfileCounter>::new();
+        // Gathering unique GPU samplers. This has O(N^2) complexity,
+        // but we only have a few samplers per target.
+        for sampler in gpu_samplers {
+            let value = sampler.count as f32 * screen_fraction;
+            match samplers.iter().position(|s| s.description as *const _ == sampler.tag.label as *const _) {
+                Some(pos) => samplers[pos].value += value,
+                None => samplers.push(FloatProfileCounter {
+                    description: sampler.tag.label,
+                    value,
+                }),
+            }
+        }
+        self.draw_counters(&samplers, debug_renderer, false);
+
         self.backend_time.push(backend_profile.total_time.nanoseconds);
         self.compositor_time.push(renderer_timers.cpu_time.nanoseconds);
         self.ipc_time.push(backend_profile.ipc.total_time.nanoseconds);
         self.gpu_time.push(gpu_time);
         self.gpu_frames.push(gpu_time, gpu_samples);
 
 
         let rect = self.backend_time.draw_graph(self.x_left, self.y_left, "CPU (backend)", debug_renderer);
--- a/gfx/webrender/src/render_backend.rs
+++ b/gfx/webrender/src/render_backend.rs
@@ -456,16 +456,25 @@ impl RenderBackend {
                     let pending_update = self.resource_cache.pending_updates();
                     let msg = ResultMsg::UpdateResources { updates: pending_update, cancel_rendering: true };
                     self.result_tx.send(msg).unwrap();
                     // We use new_frame_ready to wake up the renderer and get the
                     // resource updates processed, but the UpdateResources message
                     // will cancel rendering the frame.
                     self.notifier.lock().unwrap().as_mut().unwrap().new_frame_ready();
                 }
+                ApiMsg::DebugCommand(option) => {
+                    let msg = ResultMsg::DebugCommand(option);
+                    self.result_tx.send(msg).unwrap();
+                    let notifier = self.notifier.lock();
+                    notifier.unwrap()
+                            .as_mut()
+                            .unwrap()
+                            .new_frame_ready();
+                }
                 ApiMsg::ShutDown => {
                     let notifier = self.notifier.lock();
                     notifier.unwrap()
                             .as_mut()
                             .unwrap()
                             .shut_down();
                     break;
                 }
--- a/gfx/webrender/src/render_task.rs
+++ b/gfx/webrender/src/render_task.rs
@@ -243,16 +243,25 @@ impl RenderTask {
                     raw_clips: &[ClipWorkItem],
                     extra_clip: Option<ClipWorkItem>)
                     -> Option<RenderTask> {
         // Filter out all the clip instances that don't contribute to the result
         let mut inner_rect = Some(task_rect);
         let clips: Vec<_> = raw_clips.iter()
                                      .chain(extra_clip.iter())
                                      .filter(|&&(_, ref clip_info)| {
+            // If this clip does not contribute to a mask, then ensure
+            // it gets filtered out here. Otherwise, if a mask is
+            // created (by a different clip in the list), the allocated
+            // rectangle for the mask could end up being much bigger
+            // than is actually required.
+            if !clip_info.is_masking() {
+                return false;
+            }
+
             match clip_info.bounds.inner {
                 Some(ref inner) if !inner.device_rect.is_empty() => {
                     inner_rect = inner_rect.and_then(|r| r.intersection(&inner.device_rect));
                     !inner.device_rect.contains_rect(&task_rect)
                 }
                 _ => {
                     inner_rect = None;
                     true
--- a/gfx/webrender/src/renderer.rs
+++ b/gfx/webrender/src/renderer.rs
@@ -4,49 +4,58 @@
 
 //! The webrender API.
 //!
 //! The `webrender::renderer` module provides the interface to webrender, which
 //! is accessible through [`Renderer`][renderer]
 //!
 //! [renderer]: struct.Renderer.html
 
+#[cfg(not(feature = "debugger"))]
+use api::ApiMsg;
+#[cfg(not(feature = "debugger"))]
+use api::channel::MsgSender;
+use api::DebugCommand;
 use debug_colors;
 use debug_render::DebugRenderer;
+#[cfg(feature = "debugger")]
+use debug_server::{BatchList, DebugMsg, DebugServer};
 use device::{DepthFunction, Device, FrameId, Program, TextureId, VertexDescriptor, GpuMarker, GpuProfiler, PBOId};
-use device::{GpuSample, TextureFilter, VAO, VertexUsageHint, FileWatcherHandler, TextureTarget, ShaderError};
+use device::{GpuTimer, TextureFilter, VAO, VertexUsageHint, FileWatcherHandler, TextureTarget, ShaderError};
 use device::{get_gl_format_bgra, VertexAttribute, VertexAttributeKind};
 use euclid::{Transform3D, rect};
 use frame_builder::FrameBuilderConfig;
 use gleam::gl;
 use gpu_cache::{GpuBlockData, GpuCacheUpdate, GpuCacheUpdateList};
 use internal_types::{FastHashMap, CacheTextureId, RendererFrame, ResultMsg, TextureUpdateOp};
 use internal_types::{TextureUpdateList, RenderTargetMode, TextureUpdateSource};
 use internal_types::{ORTHO_NEAR_PLANE, ORTHO_FAR_PLANE, SourceTexture};
 use internal_types::{BatchTextures, TextureSampler};
 use profiler::{Profiler, BackendProfileCounters};
 use profiler::{GpuProfileTag, RendererProfileTimers, RendererProfileCounters};
 use record::ApiRecordingReceiver;
 use render_backend::RenderBackend;
 use render_task::RenderTaskTree;
+#[cfg(feature = "debugger")]
+use serde_json;
 use std;
 use std::cmp;
 use std::collections::VecDeque;
 use std::f32;
 use std::marker::PhantomData;
 use std::mem;
 use std::path::PathBuf;
 use std::rc::Rc;
 use std::sync::{Arc, Mutex};
 use std::sync::mpsc::{channel, Receiver, Sender};
 use std::thread;
 use texture_cache::TextureCache;
 use rayon::ThreadPool;
 use rayon::Configuration as ThreadPoolConfig;
-use tiling::{AlphaBatchKind, BlurCommand, Frame, PrimitiveBatch, RenderTarget};
+use tiling::{AlphaBatchKey, AlphaBatchKind, BlurCommand, Frame, RenderTarget};
 use tiling::{AlphaRenderTarget, CacheClipInstance, PrimitiveInstance, ColorRenderTarget, RenderTargetKind};
 use time::precise_time_ns;
 use thread_profiler::{register_thread_with_profiler, write_profile};
 use util::TransformedRectKind;
 use api::{ColorF, Epoch, PipelineId, RenderApiSender, RenderNotifier};
 use api::{ExternalImageId, ExternalImageType, ImageFormat};
 use api::{DeviceIntRect, DeviceUintRect, DeviceIntPoint, DeviceIntSize, DeviceUintSize};
 use api::{BlobImageRenderer, channel, FontRenderMode};
@@ -75,16 +84,44 @@ const GPU_TAG_PRIM_GRADIENT: GpuProfileT
 const GPU_TAG_PRIM_ANGLE_GRADIENT: GpuProfileTag = GpuProfileTag { label: "AngleGradient", color: debug_colors::POWDERBLUE };
 const GPU_TAG_PRIM_RADIAL_GRADIENT: GpuProfileTag = GpuProfileTag { label: "RadialGradient", color: debug_colors::LIGHTPINK };
 const GPU_TAG_PRIM_BOX_SHADOW: GpuProfileTag = GpuProfileTag { label: "BoxShadow", color: debug_colors::CYAN };
 const GPU_TAG_PRIM_BORDER_CORNER: GpuProfileTag = GpuProfileTag { label: "BorderCorner", color: debug_colors::DARKSLATEGREY };
 const GPU_TAG_PRIM_BORDER_EDGE: GpuProfileTag = GpuProfileTag { label: "BorderEdge", color: debug_colors::LAVENDER };
 const GPU_TAG_PRIM_CACHE_IMAGE: GpuProfileTag = GpuProfileTag { label: "CacheImage", color: debug_colors::SILVER };
 const GPU_TAG_BLUR: GpuProfileTag = GpuProfileTag { label: "Blur", color: debug_colors::VIOLET };
 
+const GPU_SAMPLER_TAG_ALPHA: GpuProfileTag = GpuProfileTag { label: "Alpha Targets", color: debug_colors::BLACK };
+const GPU_SAMPLER_TAG_OPAQUE: GpuProfileTag = GpuProfileTag { label: "Opaque Pass", color: debug_colors::BLACK };
+const GPU_SAMPLER_TAG_TRANSPARENT: GpuProfileTag = GpuProfileTag { label: "Transparent Pass", color: debug_colors::BLACK };
+
+#[cfg(feature = "debugger")]
+impl AlphaBatchKind {
+    fn debug_name(&self) -> &'static str {
+        match *self {
+            AlphaBatchKind::Composite { .. } => "Composite",
+            AlphaBatchKind::HardwareComposite => "HardwareComposite",
+            AlphaBatchKind::SplitComposite => "SplitComposite",
+            AlphaBatchKind::Blend => "Blend",
+            AlphaBatchKind::Rectangle => "Rectangle",
+            AlphaBatchKind::TextRun => "TextRun",
+            AlphaBatchKind::Image(..) => "Image",
+            AlphaBatchKind::YuvImage(..) => "YuvImage",
+            AlphaBatchKind::AlignedGradient => "AlignedGradient",
+            AlphaBatchKind::AngleGradient => "AngleGradient",
+            AlphaBatchKind::RadialGradient => "RadialGradient",
+            AlphaBatchKind::BoxShadow => "BoxShadow",
+            AlphaBatchKind::CacheImage => "CacheImage",
+            AlphaBatchKind::BorderCorner => "BorderCorner",
+            AlphaBatchKind::BorderEdge => "BorderEdge",
+            AlphaBatchKind::Line => "Line",
+        }
+    }
+}
+
 bitflags! {
     #[derive(Default)]
     pub struct DebugFlags: u32 {
         const PROFILER_DBG      = 1 << 0;
         const RENDER_TARGET_DBG = 1 << 1;
         const TEXTURE_CACHE_DBG = 1 << 2;
     }
 }
@@ -133,17 +170,16 @@ enum VertexArrayKind {
     Blur,
     Clip,
 }
 
 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
 pub enum VertexFormat {
     PrimitiveInstances,
     Blur,
-    Clip,
 }
 
 #[derive(Clone, Debug, PartialEq)]
 pub enum GraphicsApi {
     OpenGL,
 }
 
 #[derive(Clone, Debug)]
@@ -208,20 +244,20 @@ pub enum RendererKind {
 
 #[derive(Debug)]
 pub struct GpuProfile {
     pub frame_id: FrameId,
     pub paint_time_ns: u64,
 }
 
 impl GpuProfile {
-    fn new<T>(frame_id: FrameId, samples: &[GpuSample<T>]) -> GpuProfile {
+    fn new<T>(frame_id: FrameId, timers: &[GpuTimer<T>]) -> GpuProfile {
         let mut paint_time_ns = 0;
-        for sample in samples {
-            paint_time_ns += sample.time_ns;
+        for timer in timers {
+            paint_time_ns += timer.time_ns;
         }
         GpuProfile {
             frame_id,
             paint_time_ns,
         }
     }
 }
 
@@ -725,17 +761,16 @@ fn create_prim_shader(name: &'static str
         prefix.push_str(&format!("#define WR_FEATURE_{}\n", feature));
     }
 
     debug!("PrimShader {}", name);
 
     let vertex_descriptor = match vertex_format {
         VertexFormat::PrimitiveInstances => DESC_PRIM_INSTANCES,
         VertexFormat::Blur => DESC_BLUR,
-        VertexFormat::Clip => DESC_CLIP,
     };
 
     device.create_program(name,
                           &prefix,
                           &vertex_descriptor)
 }
 
 fn create_clip_shader(name: &'static str, device: &mut Device) -> Result<Program, ShaderError> {
@@ -775,16 +810,17 @@ pub enum ReadPixelsFormat {
     Rgba8,
     Bgra8,
 }
 
 /// The renderer is responsible for submitting to the GPU the work prepared by the
 /// RenderBackend.
 pub struct Renderer {
     result_rx: Receiver<ResultMsg>,
+    debug_server: DebugServer,
     device: Device,
     pending_texture_updates: Vec<TextureUpdateList>,
     pending_gpu_cache_updates: Vec<GpuCacheUpdateList>,
     pending_shader_updates: Vec<PathBuf>,
     current_frame: Option<RendererFrame>,
 
     // These are "cache shaders". These shaders are used to
     // draw intermediate results to cache targets. The results
@@ -913,16 +949,17 @@ impl Renderer {
     pub fn new(gl: Rc<gl::Gl>, mut options: RendererOptions) -> Result<(Renderer, RenderApiSender), InitError> {
 
         let (api_tx, api_rx) = try!{ channel::msg_channel() };
         let (payload_tx, payload_rx) = try!{ channel::payload_channel() };
         let (result_tx, result_rx) = channel();
         let gl_type = gl.get_type();
 
         let notifier = Arc::new(Mutex::new(None));
+        let debug_server = DebugServer::new(api_tx.clone());
 
         let file_watch_handler = FileWatcher {
             result_tx: result_tx.clone(),
             notifier: Arc::clone(&notifier),
         };
 
         let mut device = Device::new(
             gl,
@@ -1332,16 +1369,17 @@ impl Renderer {
         })};
 
         let gpu_cache_texture = CacheTexture::new(&mut device);
 
         let gpu_profile = GpuProfiler::new(device.rc_gl());
 
         let renderer = Renderer {
             result_rx,
+            debug_server,
             device,
             current_frame: None,
             pending_texture_updates: Vec::new(),
             pending_gpu_cache_updates: Vec::new(),
             pending_shader_updates: Vec::new(),
             cs_box_shadow,
             cs_text_run,
             cs_line,
@@ -1476,16 +1514,105 @@ impl Renderer {
                     // pressure event.
                     if cancel_rendering {
                         self.current_frame = None;
                     }
                 }
                 ResultMsg::RefreshShader(path) => {
                     self.pending_shader_updates.push(path);
                 }
+                ResultMsg::DebugCommand(command) => {
+                    self.handle_debug_command(command);
+                }
+            }
+        }
+    }
+
+    #[cfg(not(feature = "debugger"))]
+    fn update_debug_server(&self) {
+        // Avoid unused param warning.
+        let _ = &self.debug_server;
+    }
+
+    #[cfg(feature = "debugger")]
+    fn update_debug_server(&self) {
+        while let Ok(msg) = self.debug_server.debug_rx.try_recv() {
+            match msg {
+                DebugMsg::FetchBatches(sender) => {
+                    let mut batch_list = BatchList::new();
+
+                    if let Some(frame) = self.current_frame.as_ref().and_then(|frame| frame.frame.as_ref()) {
+                        for pass in &frame.passes {
+                            for target in &pass.alpha_targets.targets {
+                                batch_list.push("[Clip] Clear", target.clip_batcher.border_clears.len());
+                                batch_list.push("[Clip] Borders", target.clip_batcher.borders.len());
+                                batch_list.push("[Clip] Rectangles", target.clip_batcher.rectangles.len());
+                                for (_, items) in target.clip_batcher.images.iter() {
+                                    batch_list.push("[Clip] Image mask", items.len());
+                                }
+                            }
+
+                            for target in &pass.color_targets.targets {
+                                batch_list.push("[Cache] Vertical Blur", target.vertical_blurs.len());
+                                batch_list.push("[Cache] Horizontal Blur", target.horizontal_blurs.len());
+                                batch_list.push("[Cache] Box Shadow", target.box_shadow_cache_prims.len());
+                                batch_list.push("[Cache] Text Shadow", target.text_run_cache_prims.len());
+                                batch_list.push("[Cache] Lines", target.line_cache_prims.len());
+
+                                for batch in target.alpha_batcher
+                                                   .batch_list
+                                                   .opaque_batch_list
+                                                   .batches
+                                                   .iter()
+                                                   .rev() {
+                                    batch_list.push(batch.key.kind.debug_name(), batch.instances.len());
+                                }
+
+                                for batch in &target.alpha_batcher
+                                                    .batch_list
+                                                    .alpha_batch_list
+                                                    .batches {
+                                    batch_list.push(batch.key.kind.debug_name(), batch.instances.len());
+                                }
+                            }
+                        }
+                    }
+
+                    let json = serde_json::to_string(&batch_list).unwrap();
+                    sender.send(json).ok();
+                }
+            }
+        }
+    }
+
+    fn handle_debug_command(&mut self, command: DebugCommand) {
+        match command {
+            DebugCommand::EnableProfiler(enable) => {
+                if enable {
+                    self.debug_flags.insert(PROFILER_DBG);
+                } else {
+                    self.debug_flags.remove(PROFILER_DBG);
+                }
+            }
+            DebugCommand::EnableTextureCacheDebug(enable) => {
+                if enable {
+                    self.debug_flags.insert(TEXTURE_CACHE_DBG);
+                } else {
+                    self.debug_flags.remove(TEXTURE_CACHE_DBG);
+                }
+            }
+            DebugCommand::EnableRenderTargetDebug(enable) => {
+                if enable {
+                    self.debug_flags.insert(RENDER_TARGET_DBG);
+                } else {
+                    self.debug_flags.remove(RENDER_TARGET_DBG);
+                }
+            }
+            DebugCommand::Flush => {
+                self.update_debug_server();
             }
         }
     }
 
     /// Set a callback for handling external images.
     pub fn set_external_image_handler(&mut self, handler: Box<ExternalImageHandler>) {
         self.external_image_handler = Some(handler);
     }
@@ -1502,30 +1629,32 @@ impl Renderer {
     /// A Frame is supplied by calling [`generate_frame()`][genframe].
     /// [genframe]: ../../webrender_api/struct.DocumentApi.html#method.generate_frame
     pub fn render(&mut self, framebuffer_size: DeviceUintSize) {
         profile_scope!("render");
 
         if let Some(mut frame) = self.current_frame.take() {
             if let Some(ref mut frame) = frame.frame {
                 let mut profile_timers = RendererProfileTimers::new();
+                let mut profile_samplers = Vec::new();
 
                 {
                     //Note: avoiding `self.gpu_profile.add_marker` - it would block here
                     let _gm = GpuMarker::new(self.device.rc_gl(), "build samples");
                     // Block CPU waiting for last frame's GPU profiles to arrive.
                     // In general this shouldn't block unless heavily GPU limited.
-                    if let Some((gpu_frame_id, samples)) = self.gpu_profile.build_samples() {
+                    if let Some((gpu_frame_id, timers, samplers)) = self.gpu_profile.build_samples() {
                         if self.max_recorded_profiles > 0 {
                             while self.gpu_profiles.len() >= self.max_recorded_profiles {
                                 self.gpu_profiles.pop_front();
                             }
-                            self.gpu_profiles.push_back(GpuProfile::new(gpu_frame_id, &samples));
+                            self.gpu_profiles.push_back(GpuProfile::new(gpu_frame_id, &timers));
                         }
-                        profile_timers.gpu_samples = samples;
+                        profile_timers.gpu_samples = timers;
+                        profile_samplers = samplers;
                     }
                 }
 
                 let cpu_frame_id = profile_timers.cpu_time.profile(|| {
                     let cpu_frame_id = {
                         let _gm = GpuMarker::new(self.device.rc_gl(), "begin frame");
                         let frame_id = self.device.begin_frame(frame.device_pixel_ratio);
                         self.gpu_profile.begin_frame(frame_id);
@@ -1561,21 +1690,25 @@ impl Renderer {
                     let cpu_profile = CpuProfile::new(cpu_frame_id,
                                                       self.backend_profile_counters.total_time.get(),
                                                       profile_timers.cpu_time.get(),
                                                       self.profile_counters.draw_calls.get());
                     self.cpu_profiles.push_back(cpu_profile);
                 }
 
                 if self.debug_flags.contains(PROFILER_DBG) {
+                    let screen_fraction = 1.0 / //TODO: take device/pixel ratio into equation?
+                        (framebuffer_size.width as f32 * framebuffer_size.height as f32);
                     self.profiler.draw_profile(&mut self.device,
                                                &frame.profile_counters,
                                                &self.backend_profile_counters,
                                                &self.profile_counters,
                                                &mut profile_timers,
+                                               &profile_samplers,
+                                               screen_fraction,
                                                &mut self.debug);
                 }
 
                 self.profile_counters.reset();
                 self.profile_counters.frame_counter.inc();
 
                 let debug_size = DeviceUintSize::new(framebuffer_size.width as u32,
                                                      framebuffer_size.height as u32);
@@ -1716,32 +1849,33 @@ impl Renderer {
                 self.profile_counters.draw_calls.inc();
             }
         }
 
         self.profile_counters.vertices.add(6 * data.len());
     }
 
     fn submit_batch(&mut self,
-                    batch: &PrimitiveBatch,
+                    key: &AlphaBatchKey,
+                    instances: &[PrimitiveInstance],
                     projection: &Transform3D<f32>,
                     render_tasks: &RenderTaskTree,
                     render_target: Option<(TextureId, i32)>,
                     target_dimensions: DeviceUintSize) {
-        let transform_kind = batch.key.flags.transform_kind();
-        let needs_clipping = batch.key.flags.needs_clipping();
+        let transform_kind = key.flags.transform_kind();
+        let needs_clipping = key.flags.needs_clipping();
         debug_assert!(!needs_clipping ||
-                      match batch.key.blend_mode {
+                      match key.blend_mode {
                           BlendMode::Alpha |
                           BlendMode::PremultipliedAlpha |
                           BlendMode::Subpixel(..) => true,
                           BlendMode::None => false,
                       });
 
-        let marker = match batch.key.kind {
+        let marker = match key.kind {
             AlphaBatchKind::Composite { .. } => {
                 self.ps_composite.bind(&mut self.device, projection);
                 GPU_TAG_PRIM_COMPOSITE
             }
             AlphaBatchKind::HardwareComposite => {
                 self.ps_hw_composite.bind(&mut self.device, projection);
                 GPU_TAG_PRIM_HW_COMPOSITE
             }
@@ -1761,17 +1895,17 @@ impl Renderer {
                 }
                 GPU_TAG_PRIM_RECT
             }
             AlphaBatchKind::Line => {
                 self.ps_line.bind(&mut self.device, transform_kind, projection);
                 GPU_TAG_PRIM_LINE
             }
             AlphaBatchKind::TextRun => {
-                match batch.key.blend_mode {
+                match key.blend_mode {
                     BlendMode::Subpixel(..) => {
                         self.ps_text_run_subpixel.bind(&mut self.device, transform_kind, projection);
                     }
                     BlendMode::Alpha |
                     BlendMode::PremultipliedAlpha |
                     BlendMode::None => {
                         self.ps_text_run.bind(&mut self.device, transform_kind, projection);
                     }
@@ -1821,21 +1955,21 @@ impl Renderer {
             }
             AlphaBatchKind::CacheImage => {
                 self.ps_cache_image.bind(&mut self.device, transform_kind, projection);
                 GPU_TAG_PRIM_CACHE_IMAGE
             }
         };
 
         // Handle special case readback for composites.
-        match batch.key.kind {
+        match key.kind {
             AlphaBatchKind::Composite { task_id, source_id, backdrop_id } => {
                 // composites can't be grouped together because
                 // they may overlap and affect each other.
-                debug_assert!(batch.instances.len() == 1);
+                debug_assert!(instances.len() == 1);
                 let cache_texture = self.texture_resolver.resolve(&SourceTexture::CacheRGBA8);
 
                 // Before submitting the composite batch, do the
                 // framebuffer readbacks that are needed for each
                 // composite operation in this batch.
                 let cache_texture_dimensions = self.device.get_texture_dimensions(cache_texture);
 
                 let source = render_tasks.get(source_id);
@@ -1882,28 +2016,29 @@ impl Renderer {
 
                 // Restore draw target to current pass render target + layer.
                 self.device.bind_draw_target(render_target, Some(target_dimensions));
             }
             _ => {}
         }
 
         let _gm = self.gpu_profile.add_marker(marker);
-        self.draw_instanced_batch(&batch.instances,
+        self.draw_instanced_batch(instances,
                                   VertexArrayKind::Primitive,
-                                  &batch.key.textures);
+                                  &key.textures);
     }
 
     fn draw_color_target(&mut self,
-                         render_target: Option<(TextureId, i32)>,
-                         target: &ColorRenderTarget,
-                         target_size: DeviceUintSize,
-                         clear_color: Option<[f32; 4]>,
-                         render_tasks: &RenderTaskTree,
-                         projection: &Transform3D<f32>) {
+        render_target: Option<(TextureId, i32)>,
+        target: &ColorRenderTarget,
+        target_size: DeviceUintSize,
+        clear_color: Option<[f32; 4]>,
+        render_tasks: &RenderTaskTree,
+        projection: &Transform3D<f32>,
+    ) {
         {
             let _gm = self.gpu_profile.add_marker(GPU_TAG_SETUP_TARGET);
             self.device.bind_draw_target(render_target, Some(target_size));
             self.device.disable_depth();
             self.device.enable_depth_write();
             self.device.set_blend(false);
             self.device.set_blend_mode_alpha();
             match render_target {
@@ -1984,43 +2119,50 @@ impl Renderer {
 
             let _gm = self.gpu_profile.add_marker(GPU_TAG_CACHE_LINE);
             self.cs_line.bind(&mut self.device, projection);
             self.draw_instanced_batch(&target.line_cache_prims,
                                       VertexArrayKind::Primitive,
                                       &BatchTextures::no_texture());
         }
 
+        //TODO: record the pixel count for cached primitives
+
         if !target.alpha_batcher.is_empty() {
             let _gm2 = GpuMarker::new(self.device.rc_gl(), "alpha batches");
             self.device.set_blend(false);
             let mut prev_blend_mode = BlendMode::None;
 
+            self.gpu_profile.add_sampler(GPU_SAMPLER_TAG_OPAQUE);
+
             //Note: depth equality is needed for split planes
             self.device.set_depth_func(DepthFunction::LessEqual);
             self.device.enable_depth();
             self.device.enable_depth_write();
 
             // Draw opaque batches front-to-back for maximum
             // z-buffer efficiency!
             for batch in target.alpha_batcher
                                .batch_list
-                               .opaque_batches
+                               .opaque_batch_list
+                               .batches
                                .iter()
                                .rev() {
-                self.submit_batch(batch,
+                self.submit_batch(&batch.key,
+                                  &batch.instances,
                                   &projection,
                                   render_tasks,
                                   render_target,
                                   target_size);
             }
 
             self.device.disable_depth_write();
+            self.gpu_profile.add_sampler(GPU_SAMPLER_TAG_TRANSPARENT);
 
-            for batch in &target.alpha_batcher.batch_list.alpha_batches {
+            for batch in &target.alpha_batcher.batch_list.alpha_batch_list.batches {
                 if batch.key.blend_mode != prev_blend_mode {
                     match batch.key.blend_mode {
                         BlendMode::None => {
                             self.device.set_blend(false);
                         }
                         BlendMode::Alpha => {
                             self.device.set_blend(true);
                             self.device.set_blend_mode_alpha();
@@ -2032,33 +2174,38 @@ impl Renderer {
                         BlendMode::Subpixel(color) => {
                             self.device.set_blend(true);
                             self.device.set_blend_mode_subpixel(color);
                         }
                     }
                     prev_blend_mode = batch.key.blend_mode;
                 }
 
-                self.submit_batch(batch,
+                self.submit_batch(&batch.key,
+                                  &batch.instances,
                                   &projection,
                                   render_tasks,
                                   render_target,
                                   target_size);
             }
 
             self.device.disable_depth();
             self.device.set_blend(false);
+            self.gpu_profile.done_sampler();
         }
     }
 
     fn draw_alpha_target(&mut self,
-                         render_target: (TextureId, i32),
-                         target: &AlphaRenderTarget,
-                         target_size: DeviceUintSize,
-                         projection: &Transform3D<f32>) {
+        render_target: (TextureId, i32),
+        target: &AlphaRenderTarget,
+        target_size: DeviceUintSize,
+        projection: &Transform3D<f32>,
+    ) {
+        self.gpu_profile.add_sampler(GPU_SAMPLER_TAG_ALPHA);
+
         {
             let _gm = self.gpu_profile.add_marker(GPU_TAG_SETUP_TARGET);
             self.device.bind_draw_target(Some(render_target), Some(target_size));
             self.device.disable_depth();
             self.device.disable_depth_write();
 
             // TODO(gw): Applying a scissor rect and minimal clear here
             // is a very large performance win on the Intel and nVidia
@@ -2125,16 +2272,18 @@ impl Renderer {
                     ]
                 };
                 self.cs_clip_image.bind(&mut self.device, projection);
                 self.draw_instanced_batch(items,
                                           VertexArrayKind::Clip,
                                           &textures);
             }
         }
+
+        self.gpu_profile.done_sampler();
     }
 
     fn update_deferred_resolves(&mut self, frame: &mut Frame) {
         // The first thing we do is run through any pending deferred
         // resolves, and use a callback to get the UV rect for this
         // custom item. Then we patch the resource_rects structure
         // here before it's uploaded to the GPU.
         if !frame.deferred_resolves.is_empty() {
@@ -2154,16 +2303,20 @@ impl Renderer {
                     ExternalImageType::TextureRectHandle => TextureTarget::Rect,
                     ExternalImageType::TextureExternalHandle => TextureTarget::External,
                     ExternalImageType::ExternalBuffer => {
                         panic!("{:?} is not a suitable image type in update_deferred_resolves().",
                             ext_image.image_type);
                     }
                 };
 
+                // In order to produce the handle, the external image handler may call into
+                // the GL context and change some states.
+                self.device.reset_state();
+
                 let texture_id = match image.source {
                     ExternalImageSource::NativeTexture(texture_id) => TextureId::new(texture_id, texture_target),
                     _ => panic!("No native texture found."),
                 };
 
                 self.texture_resolver
                     .external_images
                     .insert((ext_image.id, ext_image.channel_index), texture_id);
@@ -2595,8 +2748,18 @@ impl Default for RendererOptions {
             max_texture_size: None,
             workers: None,
             blob_image_renderer: None,
             recorder: None,
             enable_render_on_scroll: true,
         }
     }
 }
+
+#[cfg(not(feature = "debugger"))]
+pub struct DebugServer;
+
+#[cfg(not(feature = "debugger"))]
+impl DebugServer {
+    pub fn new(_: MsgSender<ApiMsg>) -> DebugServer {
+        DebugServer
+    }
+}
--- a/gfx/webrender/src/texture_cache.rs
+++ b/gfx/webrender/src/texture_cache.rs
@@ -12,17 +12,17 @@ use resource_cache::CacheItem;
 use std::cmp;
 use std::mem;
 use api::{ExternalImageType, ImageData, ImageFormat};
 use api::{DeviceUintRect, DeviceUintSize, DeviceUintPoint};
 use api::{ImageDescriptor};
 
 // The fixed number of layers for the shared texture cache.
 // There is one array texture per image format, allocated lazily.
-const TEXTURE_ARRAY_LAYERS: i32 = 2;
+const TEXTURE_ARRAY_LAYERS: i32 = 4;
 
 // The dimensions of each layer in the texture cache.
 const TEXTURE_LAYER_DIMENSIONS: u32 = 2048;
 
 // The size of each region (page) in a texture layer.
 const TEXTURE_REGION_DIMENSIONS: u32 = 512;
 
 // Maintains a simple freelist of texture IDs that are mapped
--- a/gfx/webrender/src/tiling.rs
+++ b/gfx/webrender/src/tiling.rs
@@ -96,104 +96,153 @@ pub struct RenderTargetIndex(pub usize);
 pub struct RenderPassIndex(isize);
 
 #[derive(Debug)]
 struct DynamicTaskInfo {
     task_id: RenderTaskId,
     rect: DeviceIntRect,
 }
 
-pub struct BatchList {
-    pub alpha_batches: Vec<PrimitiveBatch>,
-    pub opaque_batches: Vec<PrimitiveBatch>,
+pub struct AlphaBatchList {
+    pub batches: Vec<AlphaPrimitiveBatch>,
 }
 
-impl BatchList {
-    fn new() -> BatchList {
-        BatchList {
-            alpha_batches: Vec::new(),
-            opaque_batches: Vec::new(),
+impl AlphaBatchList {
+    fn new() -> AlphaBatchList {
+        AlphaBatchList {
+            batches: Vec::new(),
         }
     }
 
-    fn with_suitable_batch<F>(&mut self,
-                              key: &AlphaBatchKey,
-                              item_bounding_rect: &DeviceIntRect,
-                              f: F) where F: Fn(&mut PrimitiveBatch) {
-        let batch = self.get_suitable_batch(key, item_bounding_rect);
-        f(batch)
-    }
-
     fn get_suitable_batch(&mut self,
                           key: &AlphaBatchKey,
-                          item_bounding_rect: &DeviceIntRect) -> &mut PrimitiveBatch {
-        let (batches, check_intersections) = match key.blend_mode {
-            BlendMode::None => {
-                (&mut self.opaque_batches, false)
-            }
-            BlendMode::Alpha | BlendMode::PremultipliedAlpha | BlendMode::Subpixel(..) => {
-                (&mut self.alpha_batches, true)
-            }
-        };
-
+                          item_bounding_rect: &DeviceIntRect) -> &mut Vec<PrimitiveInstance> {
         let mut selected_batch_index = None;
 
         // Composites always get added to their own batch.
         // This is because the result of a composite can affect
         // the input to the next composite. Perhaps we can
         // optimize this in the future.
         match key.kind {
             AlphaBatchKind::Composite { .. } => {}
             _ => {
-                'outer: for (batch_index, batch) in batches.iter()
-                                                           .enumerate()
-                                                           .rev()
-                                                           .take(10) {
+                'outer: for (batch_index, batch) in self.batches
+                                                        .iter()
+                                                        .enumerate()
+                                                        .rev()
+                                                        .take(10) {
                     if batch.key.is_compatible_with(key) {
                         selected_batch_index = Some(batch_index);
                         break;
                     }
 
                     // check for intersections
-                    if check_intersections {
-                        for item_rect in &batch.item_rects {
-                            if item_rect.intersects(item_bounding_rect) {
-                                break 'outer;
-                            }
+                    for item_rect in &batch.item_rects {
+                        if item_rect.intersects(item_bounding_rect) {
+                            break 'outer;
                         }
                     }
                 }
             }
         }
 
         if selected_batch_index.is_none() {
-            let new_batch = PrimitiveBatch::new(key.clone());
-            selected_batch_index = Some(batches.len());
-            batches.push(new_batch);
+            let new_batch = AlphaPrimitiveBatch::new(key.clone());
+            selected_batch_index = Some(self.batches.len());
+            self.batches.push(new_batch);
         }
 
-        let batch = &mut batches[selected_batch_index.unwrap()];
+        let batch = &mut self.batches[selected_batch_index.unwrap()];
         batch.item_rects.push(*item_bounding_rect);
 
-        batch
+        &mut batch.instances
+    }
+}
+
+pub struct OpaqueBatchList {
+    pub batches: Vec<OpaquePrimitiveBatch>,
+}
+
+impl OpaqueBatchList {
+    fn new() -> OpaqueBatchList {
+        OpaqueBatchList {
+            batches: Vec::new(),
+        }
+    }
+
+    fn get_suitable_batch(&mut self,
+                          key: &AlphaBatchKey) -> &mut Vec<PrimitiveInstance> {
+        let mut selected_batch_index = None;
+
+        for (batch_index, batch) in self.batches
+                                        .iter()
+                                        .enumerate()
+                                        .rev()
+                                        .take(10) {
+            if batch.key.is_compatible_with(key) {
+                selected_batch_index = Some(batch_index);
+                break;
+            }
+        }
+
+        if selected_batch_index.is_none() {
+            let new_batch = OpaquePrimitiveBatch::new(key.clone());
+            selected_batch_index = Some(self.batches.len());
+            self.batches.push(new_batch);
+        }
+
+        let batch = &mut self.batches[selected_batch_index.unwrap()];
+
+        &mut batch.instances
     }
 
     fn finalize(&mut self) {
         // Reverse the instance arrays in the opaque batches
         // to get maximum z-buffer efficiency by drawing
         // front-to-back.
         // TODO(gw): Maybe we can change the batch code to
         //           build these in reverse and avoid having
         //           to reverse the instance array here.
-        for batch in &mut self.opaque_batches {
+        for batch in &mut self.batches {
             batch.instances.reverse();
         }
     }
 }
 
+pub struct BatchList {
+    pub alpha_batch_list: AlphaBatchList,
+    pub opaque_batch_list: OpaqueBatchList,
+}
+
+impl BatchList {
+    fn new() -> BatchList {
+        BatchList {
+            alpha_batch_list: AlphaBatchList::new(),
+            opaque_batch_list: OpaqueBatchList::new(),
+        }
+    }
+
+    fn get_suitable_batch(&mut self,
+                          key: &AlphaBatchKey,
+                          item_bounding_rect: &DeviceIntRect) -> &mut Vec<PrimitiveInstance> {
+        match key.blend_mode {
+            BlendMode::None => {
+                self.opaque_batch_list.get_suitable_batch(key)
+            }
+            BlendMode::Alpha | BlendMode::PremultipliedAlpha | BlendMode::Subpixel(..) => {
+                self.alpha_batch_list.get_suitable_batch(key, item_bounding_rect)
+            }
+        }
+    }
+
+    fn finalize(&mut self) {
+        self.opaque_batch_list.finalize()
+    }
+}
+
 /// Encapsulates the logic of building batches for items that are blended.
 pub struct AlphaBatcher {
     pub batch_list: BatchList,
     tasks: Vec<RenderTaskId>,
 }
 
 impl AlphaRenderItem {
     fn add_to_batch(&self,
@@ -232,17 +281,17 @@ impl AlphaRenderItem {
 
                 let instance = CompositePrimitiveInstance::new(task_address,
                                                                src_task_address,
                                                                RenderTaskAddress(0),
                                                                filter_mode,
                                                                amount,
                                                                z);
 
-                batch.add_instance(PrimitiveInstance::from(instance));
+                batch.push(PrimitiveInstance::from(instance));
             }
             AlphaRenderItem::HardwareComposite(stacking_context_index, src_id, composite_op, z) => {
                 let stacking_context = &ctx.stacking_context_store[stacking_context_index.0];
                 let src_task_address = render_tasks.get_task_address(src_id);
                 let key = AlphaBatchKey::new(AlphaBatchKind::HardwareComposite,
                                              AlphaBatchKeyFlags::empty(),
                                              composite_op.to_blend_mode(),
                                              BatchTextures::no_texture());
@@ -250,17 +299,17 @@ impl AlphaRenderItem {
 
                 let instance = CompositePrimitiveInstance::new(task_address,
                                                                src_task_address,
                                                                RenderTaskAddress(0),
                                                                0,
                                                                0,
                                                                z);
 
-                batch.add_instance(PrimitiveInstance::from(instance));
+                batch.push(PrimitiveInstance::from(instance));
             }
             AlphaRenderItem::Composite(stacking_context_index,
                                        source_id,
                                        backdrop_id,
                                        mode,
                                        z) => {
                 let stacking_context = &ctx.stacking_context_store[stacking_context_index.0];
                 let key = AlphaBatchKey::new(AlphaBatchKind::Composite { task_id, source_id, backdrop_id },
@@ -273,17 +322,17 @@ impl AlphaRenderItem {
 
                 let instance = CompositePrimitiveInstance::new(task_address,
                                                                source_task_address,
                                                                backdrop_task_address,
                                                                mode as u32 as i32,
                                                                0,
                                                                z);
 
-                batch.add_instance(PrimitiveInstance::from(instance));
+                batch.push(PrimitiveInstance::from(instance));
             }
             AlphaRenderItem::Primitive(clip_scroll_group_index_opt, prim_index, z) => {
                 let prim_metadata = ctx.prim_store.get_metadata(prim_index);
                 let (transform_kind, packed_layer_index) = match clip_scroll_group_index_opt {
                     Some(group_index) => {
                         let group = &ctx.clip_scroll_group_store[group_index.0];
                         let bounding_rect = group.screen_bounding_rect.as_ref().unwrap();
                         (bounding_rect.0, group.packed_layer_index)
@@ -320,49 +369,50 @@ impl AlphaRenderItem {
 
                 match prim_metadata.prim_kind {
                     PrimitiveKind::Border => {
                         let border_cpu = &ctx.prim_store.cpu_borders[prim_metadata.cpu_prim_index.0];
                         // TODO(gw): Select correct blend mode for edges and corners!!
                         let corner_key = AlphaBatchKey::new(AlphaBatchKind::BorderCorner, flags, blend_mode, no_textures);
                         let edge_key = AlphaBatchKey::new(AlphaBatchKind::BorderEdge, flags, blend_mode, no_textures);
 
-                        batch_list.with_suitable_batch(&corner_key, item_bounding_rect, |batch| {
+                        // Work around borrow ck on borrowing batch_list twice.
+                        {
+                            let batch = batch_list.get_suitable_batch(&corner_key, item_bounding_rect);
                             for (i, instance_kind) in border_cpu.corner_instances.iter().enumerate() {
                                 let sub_index = i as i32;
                                 match *instance_kind {
                                     BorderCornerInstance::Single => {
-                                        batch.add_instance(base_instance.build(sub_index,
-                                                                               BorderCornerSide::Both as i32, 0));
+                                        batch.push(base_instance.build(sub_index,
+                                                                       BorderCornerSide::Both as i32, 0));
                                     }
                                     BorderCornerInstance::Double => {
-                                        batch.add_instance(base_instance.build(sub_index,
-                                                                               BorderCornerSide::First as i32, 0));
-                                        batch.add_instance(base_instance.build(sub_index,
-                                                                               BorderCornerSide::Second as i32, 0));
+                                        batch.push(base_instance.build(sub_index,
+                                                                       BorderCornerSide::First as i32, 0));
+                                        batch.push(base_instance.build(sub_index,
+                                                                       BorderCornerSide::Second as i32, 0));
                                     }
                                 }
                             }
-                        });
+                        }
 
-                        batch_list.with_suitable_batch(&edge_key, item_bounding_rect, |batch| {
-                            for border_segment in 0..4 {
-                                batch.add_instance(base_instance.build(border_segment, 0, 0));
-                            }
-                        });
+                        let batch = batch_list.get_suitable_batch(&edge_key, item_bounding_rect);
+                        for border_segment in 0..4 {
+                            batch.push(base_instance.build(border_segment, 0, 0));
+                        }
                     }
                     PrimitiveKind::Rectangle => {
                         let key = AlphaBatchKey::new(AlphaBatchKind::Rectangle, flags, blend_mode, no_textures);
                         let batch = batch_list.get_suitable_batch(&key, item_bounding_rect);
-                        batch.add_instance(base_instance.build(0, 0, 0));
+                        batch.push(base_instance.build(0, 0, 0));
                     }
                     PrimitiveKind::Line => {
                         let key = AlphaBatchKey::new(AlphaBatchKind::Line, flags, blend_mode, no_textures);
                         let batch = batch_list.get_suitable_batch(&key, item_bounding_rect);
-                        batch.add_instance(base_instance.build(0, 0, 0));
+                        batch.push(base_instance.build(0, 0, 0));
                     }
                     PrimitiveKind::Image => {
                         let image_cpu = &ctx.prim_store.cpu_images[prim_metadata.cpu_prim_index.0];
 
                         let (color_texture_id, uv_address) = resolve_image(image_cpu.image_key,
                                                                            image_cpu.image_rendering,
                                                                            image_cpu.tile_offset,
                                                                            ctx.resource_cache,
@@ -393,17 +443,17 @@ impl AlphaRenderItem {
                         };
 
                         let textures = BatchTextures {
                             colors: [color_texture_id, SourceTexture::Invalid, SourceTexture::Invalid],
                         };
 
                         let key = AlphaBatchKey::new(batch_kind, flags, blend_mode, textures);
                         let batch = batch_list.get_suitable_batch(&key, item_bounding_rect);
-                        batch.add_instance(base_instance.build(uv_address.as_int(gpu_cache), 0, 0));
+                        batch.push(base_instance.build(uv_address.as_int(gpu_cache), 0, 0));
                     }
                     PrimitiveKind::TextRun => {
                         let text_cpu = &ctx.prim_store.cpu_text_runs[prim_metadata.cpu_prim_index.0];
                         let font_size_dp = text_cpu.logical_font_size.scale_by(ctx.device_pixel_ratio);
 
                         // TODO(gw): avoid / recycle this allocation in the future.
                         let mut instances = Vec::new();
 
@@ -424,44 +474,44 @@ impl AlphaRenderItem {
                         if texture_id != SourceTexture::Invalid {
                             let textures = BatchTextures {
                                 colors: [texture_id, SourceTexture::Invalid, SourceTexture::Invalid],
                             };
 
                             let key = AlphaBatchKey::new(AlphaBatchKind::TextRun, flags, blend_mode, textures);
                             let batch = batch_list.get_suitable_batch(&key, item_bounding_rect);
 
-                            batch.add_instances(&instances);
+                            batch.extend_from_slice(&instances);
                         }
                     }
                     PrimitiveKind::TextShadow => {
                         let cache_task_id = prim_metadata.render_task_id.expect("no render task!");
                         let cache_task_address = render_tasks.get_task_address(cache_task_id);
                         let textures = BatchTextures::render_target_cache();
                         let key = AlphaBatchKey::new(AlphaBatchKind::CacheImage, flags, blend_mode, textures);
                         let batch = batch_list.get_suitable_batch(&key, item_bounding_rect);
-                        batch.add_instance(base_instance.build(0, cache_task_address.0 as i32, 0));
+                        batch.push(base_instance.build(0, cache_task_address.0 as i32, 0));
                     }
                     PrimitiveKind::AlignedGradient => {
                         let gradient_cpu = &ctx.prim_store.cpu_gradients[prim_metadata.cpu_prim_index.0];
                         let key = AlphaBatchKey::new(AlphaBatchKind::AlignedGradient, flags, blend_mode, no_textures);
                         let batch = batch_list.get_suitable_batch(&key, item_bounding_rect);
                         for part_index in 0..(gradient_cpu.stops_count - 1) {
-                            batch.add_instance(base_instance.build(part_index as i32, 0, 0));
+                            batch.push(base_instance.build(part_index as i32, 0, 0));
                         }
                     }
                     PrimitiveKind::AngleGradient => {
                         let key = AlphaBatchKey::new(AlphaBatchKind::AngleGradient, flags, blend_mode, no_textures);
                         let batch = batch_list.get_suitable_batch(&key, item_bounding_rect);
-                        batch.add_instance(base_instance.build(0, 0, 0));
+                        batch.push(base_instance.build(0, 0, 0));
                     }
                     PrimitiveKind::RadialGradient => {
                         let key = AlphaBatchKey::new(AlphaBatchKind::RadialGradient, flags, blend_mode, no_textures);
                         let batch = batch_list.get_suitable_batch(&key, item_bounding_rect);
-                        batch.add_instance(base_instance.build(0, 0, 0));
+                        batch.push(base_instance.build(0, 0, 0));
                     }
                     PrimitiveKind::YuvImage => {
                         let mut textures = BatchTextures::no_texture();
                         let mut uv_rect_addresses = [0; 3];
                         let image_yuv_cpu = &ctx.prim_store.cpu_yuv_images[prim_metadata.cpu_prim_index.0];
 
                         //yuv channel
                         let channel_count = image_yuv_cpu.format.get_plane_num();
@@ -512,31 +562,31 @@ impl AlphaRenderItem {
                         ));
 
                         let key = AlphaBatchKey::new(AlphaBatchKind::YuvImage(buffer_kind, image_yuv_cpu.format, image_yuv_cpu.color_space),
                                                      flags,
                                                      blend_mode,
                                                      textures);
                         let batch = batch_list.get_suitable_batch(&key, item_bounding_rect);
 
-                        batch.add_instance(base_instance.build(uv_rect_addresses[0],
-                                                               uv_rect_addresses[1],
-                                                               uv_rect_addresses[2]));
+                        batch.push(base_instance.build(uv_rect_addresses[0],
+                                                       uv_rect_addresses[1],
+                                                       uv_rect_addresses[2]));
                     }
                     PrimitiveKind::BoxShadow => {
                         let box_shadow = &ctx.prim_store.cpu_box_shadows[prim_metadata.cpu_prim_index.0];
                         let cache_task_id = prim_metadata.render_task_id.unwrap();
                         let cache_task_address = render_tasks.get_task_address(cache_task_id);
 
                         let key = AlphaBatchKey::new(AlphaBatchKind::BoxShadow, flags, blend_mode, no_textures);
                         let batch = batch_list.get_suitable_batch(&key, item_bounding_rect);
 
                         for rect_index in 0..box_shadow.rects.len() {
-                            batch.add_instance(base_instance.build(rect_index as i32,
-                                                                   cache_task_address.0 as i32, 0));
+                            batch.push(base_instance.build(rect_index as i32,
+                                                           cache_task_address.0 as i32, 0));
                         }
                     }
                 }
             }
             AlphaRenderItem::SplitComposite(sc_index, task_id, gpu_handle, z) => {
                 let key = AlphaBatchKey::new(AlphaBatchKind::SplitComposite,
                                              AlphaBatchKeyFlags::empty(),
                                              BlendMode::PremultipliedAlpha,
@@ -548,17 +598,17 @@ impl AlphaRenderItem {
 
                 let instance = CompositePrimitiveInstance::new(task_address,
                                                                source_task_address,
                                                                RenderTaskAddress(0),
                                                                gpu_address,
                                                                0,
                                                                z);
 
-                batch.add_instance(PrimitiveInstance::from(instance));
+                batch.push(PrimitiveInstance::from(instance));
             }
         }
     }
 }
 
 impl AlphaBatcher {
     fn new() -> AlphaBatcher {
         AlphaBatcher {
@@ -591,18 +641,18 @@ impl AlphaBatcher {
                                   deferred_resolves);
             }
         }
 
         self.batch_list.finalize();
     }
 
     pub fn is_empty(&self) -> bool {
-        self.batch_list.opaque_batches.is_empty() &&
-        self.batch_list.alpha_batches.is_empty()
+        self.batch_list.opaque_batch_list.batches.is_empty() &&
+        self.batch_list.alpha_batch_list.batches.is_empty()
     }
 }
 
 /// Batcher managing draw calls into the clip mask (in the RT cache).
 #[derive(Debug)]
 pub struct ClipBatcher {
     /// Rectangle draws fill up the rectangles with rounded corners.
     pub rectangles: Vec<CacheClipInstance>,
@@ -1402,37 +1452,44 @@ impl From<CompositePrimitiveInstance> fo
                 0,
                 0,
             ]
         }
     }
 }
 
 #[derive(Debug)]
-pub struct PrimitiveBatch {
+pub struct AlphaPrimitiveBatch {
     pub key: AlphaBatchKey,
     pub instances: Vec<PrimitiveInstance>,
     pub item_rects: Vec<DeviceIntRect>,
 }
 
-impl PrimitiveBatch {
-    fn new(key: AlphaBatchKey) -> PrimitiveBatch {
-        PrimitiveBatch {
+impl AlphaPrimitiveBatch {
+    fn new(key: AlphaBatchKey) -> AlphaPrimitiveBatch {
+        AlphaPrimitiveBatch {
             key,
             instances: Vec::new(),
             item_rects: Vec::new(),
         }
     }
+}
 
-    fn add_instance(&mut self, instance: PrimitiveInstance) {
-        self.instances.push(instance);
-    }
+#[derive(Debug)]
+pub struct OpaquePrimitiveBatch {
+    pub key: AlphaBatchKey,
+    pub instances: Vec<PrimitiveInstance>,
+}
 
-    fn add_instances(&mut self, instances: &[PrimitiveInstance]) {
-        self.instances.extend_from_slice(instances);
+impl OpaquePrimitiveBatch {
+    fn new(key: AlphaBatchKey) -> OpaquePrimitiveBatch {
+        OpaquePrimitiveBatch {
+            key,
+            instances: Vec::new(),
+        }
     }
 }
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
 pub struct PackedLayerIndex(pub usize);
 
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
 pub struct StackingContextIndex(pub usize);
--- a/gfx/webrender_api/src/api.rs
+++ b/gfx/webrender_api/src/api.rs
@@ -134,16 +134,28 @@ impl fmt::Debug for DocumentMsg {
             DocumentMsg::ScrollNodeWithId(..) => "DocumentMsg::ScrollNodeWithId",
             DocumentMsg::TickScrollingBounce => "DocumentMsg::TickScrollingBounce",
             DocumentMsg::GetScrollNodeState(..) => "DocumentMsg::GetScrollNodeState",
             DocumentMsg::GenerateFrame(..) => "DocumentMsg::GenerateFrame",
         })
     }
 }
 
+#[derive(Debug, Clone, Deserialize, Serialize)]
+pub enum DebugCommand {
+    // Display the frame profiler on screen.
+    EnableProfiler(bool),
+    // Display all texture cache pages on screen.
+    EnableTextureCacheDebug(bool),
+    // Display intermediate render targets on screen.
+    EnableRenderTargetDebug(bool),
+    // Flush any pending debug commands.
+    Flush,
+}
+
 #[derive(Clone, Deserialize, Serialize)]
 pub enum ApiMsg {
     /// Add/remove/update images and fonts.
     UpdateResources(ResourceUpdates),
     /// Gets the glyph dimensions
     GetGlyphDimensions(FontInstance, Vec<GlyphKey>, MsgSender<Vec<Option<GlyphDimensions>>>),
     /// Gets the glyph indices from a string
     GetGlyphIndices(FontKey, String, MsgSender<Vec<Option<u32>>>),
@@ -158,32 +170,35 @@ pub enum ApiMsg {
     /// An opaque handle that must be passed to the render notifier. It is used by Gecko
     /// to forward gecko-specific messages to the render thread preserving the ordering
     /// within the other messages.
     ExternalEvent(ExternalEvent),
     /// Removes all resources associated with a namespace.
     ClearNamespace(IdNamespace),
     /// Flush from the caches anything that isn't necessary, to free some memory.
     MemoryPressure,
+    /// Change debugging options.
+    DebugCommand(DebugCommand),
     ShutDown,
 }
 
 impl fmt::Debug for ApiMsg {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         f.write_str(match *self {
             ApiMsg::UpdateResources(..) => "ApiMsg::UpdateResources",
             ApiMsg::GetGlyphDimensions(..) => "ApiMsg::GetGlyphDimensions",
             ApiMsg::GetGlyphIndices(..) => "ApiMsg::GetGlyphIndices",
             ApiMsg::CloneApi(..) => "ApiMsg::CloneApi",
             ApiMsg::AddDocument(..) => "ApiMsg::AddDocument",
             ApiMsg::UpdateDocument(..) => "ApiMsg::UpdateDocument",
             ApiMsg::DeleteDocument(..) => "ApiMsg::DeleteDocument",
             ApiMsg::ExternalEvent(..) => "ApiMsg::ExternalEvent",
             ApiMsg::ClearNamespace(..) => "ApiMsg::ClearNamespace",
             ApiMsg::MemoryPressure => "ApiMsg::MemoryPressure",
+            ApiMsg::DebugCommand(..) => "ApiMsg::DebugCommand",
             ApiMsg::ShutDown => "ApiMsg::ShutDown",
         })
     }
 }
 
 #[repr(C)]
 #[derive(Clone, Copy, Debug, Deserialize, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)]
 pub struct Epoch(pub u32);
@@ -414,17 +429,17 @@ impl RenderApi {
     /// ```
     pub fn set_root_pipeline(&self, document_id: DocumentId, pipeline_id: PipelineId) {
         self.send(document_id, DocumentMsg::SetRootPipeline(pipeline_id));
     }
 
     /// Supplies a new frame to WebRender.
     ///
     /// Non-blocking, it notifies a worker process which processes the display list.
-    /// When it's done and a RenderNotifier has been set in `webrender::renderer::Renderer`,
+    /// When it's done and a RenderNotifier has been set in `webrender::Renderer`,
     /// [new_frame_ready()][notifier] gets called.
     ///
     /// Note: Scrolling doesn't require an own Frame.
     ///
     /// Arguments:
     ///
     /// * `document_id`: Target Document ID.
     /// * `epoch`: The unique Frame ID, monotonically increasing.
--- a/gfx/webrender_api/src/display_list.rs
+++ b/gfx/webrender_api/src/display_list.rs
@@ -15,16 +15,20 @@ use {ImageDisplayItem, ImageKey, ImageMa
 use {LayoutTransform, LayoutVector2D, LineDisplayItem, LineOrientation, LineStyle, LocalClip};
 use {MixBlendMode, PipelineId, PropertyBinding, PushStackingContextDisplayItem, RadialGradient};
 use {RadialGradientDisplayItem, RectangleDisplayItem, ScrollFrameDisplayItem, ScrollPolicy};
 use {ScrollSensitivity, SpecificDisplayItem, StackingContext, StickyFrameDisplayItem};
 use {StickyFrameInfo, TextDisplayItem, TextShadow, TransformStyle};
 use {YuvColorSpace, YuvData, YuvImageDisplayItem};
 use std::marker::PhantomData;
 
+// We don't want to push a long text-run. If a text-run is too long, split it into several parts.
+// Please check the renderer::MAX_VERTEX_TEXTURE_WIDTH for the detail.
+pub const MAX_TEXT_RUN_LENGTH: usize = 2040;
+
 #[repr(C)]
 #[derive(Clone, Copy, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]
 pub struct ItemRange<T> {
     start: usize,
     length: usize,
     _boo: PhantomData<T>,
 }
 
@@ -614,21 +618,23 @@ impl DisplayListBuilder {
         if size < Au::from_px(4096) {
             let item = SpecificDisplayItem::Text(TextDisplayItem {
                 color,
                 font_key,
                 size,
                 glyph_options,
             });
 
-            self.push_item(item, rect, local_clip);
-            self.push_iter(glyphs);
+            for split_glyphs in glyphs.chunks(MAX_TEXT_RUN_LENGTH) {
+                self.push_item(item, rect, local_clip);
+                self.push_iter(split_glyphs);
 
-            // Remember that we've seen these glyphs
-            self.cache_glyphs(font_key, color, glyphs.iter().map(|glyph| glyph.index));
+                // Remember that we've seen these glyphs
+                self.cache_glyphs(font_key, color, split_glyphs.iter().map(|glyph| glyph.index));
+            }
         }
     }
 
     fn cache_glyphs<I: Iterator<Item=GlyphIndex>>(&mut self,
                                                      font_key: FontKey,
                                                      color: ColorF,
                                                      glyphs: I) {
         let mut font_glyphs = self.glyphs.entry((font_key, color))