gfx/wr/webrender/src/renderer.rs
author Doug Thayer <dothayer@mozilla.com>
Fri, 22 Mar 2019 03:15:18 +0000
changeset 465587 e2f83e4816dd56d96c81017297e6f5002d4dc1b1
parent 465585 c09a51622e988e81077c75e3aaa5a49df66284b3
child 465596 699a6b6bee44710ba8710a2ee7740783a596a26a
permissions -rw-r--r--
Bug 1441308 - Correct Renderer issues with multiple documents r=gw This corrects A) An issue encountered with our strategy for skipping the end_pass call for all but an offscreen render target. See the comment above the end_pass call for details, and B) An issue with depth clearing where we do not clear the whole rect if there are multiple non-intersecting documents. Differential Revision: https://phabricator.services.mozilla.com/D23056

/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

//! The high-level module responsible for interfacing with the GPU.
//!
//! Much of WebRender's design is driven by separating work into different
//! threads. To avoid the complexities of multi-threaded GPU access, we restrict
//! all communication with the GPU to one thread, the render thread. But since
//! issuing GPU commands is often a bottleneck, we move everything else (i.e.
//! the computation of what commands to issue) to another thread, the
//! RenderBackend thread. The RenderBackend, in turn, may delegate work to other
//! thread (like the SceneBuilder threads or Rayon workers), but the
//! Render-vs-RenderBackend distinction is the most important.
//!
//! The consumer is responsible for initializing the render thread before
//! calling into WebRender, which means that this module also serves as the
//! initial entry point into WebRender, and is responsible for spawning the
//! various other threads discussed above. That said, WebRender initialization
//! returns both the `Renderer` instance as well as a channel for communicating
//! directly with the `RenderBackend`. Aside from a few high-level operations
//! like 'render now', most of interesting commands from the consumer go over
//! that channel and operate on the `RenderBackend`.
//!
//! ## Space conversion guidelines
//! At this stage, we shuld be operating with `DevicePixel` and `FramebufferPixel` only.
//! "Framebuffer" space represents the final destination of our rendeing,
//! and it happens to be Y-flipped on OpenGL. The conversion is done as follows:
//!   - for rasterized primitives, the orthographics projection transforms
//! the content rectangle to -1 to 1
//!   - the viewport transformation is setup to map the whole range to
//! the framebuffer rectangle provided by the document view, stored in `DrawTarget`
//!   - all the direct framebuffer operations, like blitting, reading pixels, and setting
//! up the scissor, are accepting already transformed coordinates, which we can get by
//! calling `DrawTarget::to_framebuffer_rect`

use api::{BlobImageHandler, ColorF, ColorU};
use api::{DocumentId, Epoch, ExternalImageId};
use api::{ExternalImageType, FontRenderMode, FrameMsg, ImageFormat, PipelineId};
use api::{ImageRendering, Checkpoint, NotificationRequest};
use api::{DebugCommand, MemoryReport, VoidPtrToSizeFn};
use api::{RenderApiSender, RenderNotifier, TextureTarget};
use api::channel;
use api::units::*;
pub use api::DebugFlags;
use api::channel::PayloadReceiverHelperMethods;
use batch::{BatchKind, BatchTextures, BrushBatchKind, ClipBatchList};
#[cfg(any(feature = "capture", feature = "replay"))]
use capture::{CaptureConfig, ExternalCaptureImage, PlainExternalImage};
use debug_colors;
use debug_render::{DebugItem, DebugRenderer};
use device::{DepthFunction, Device, GpuFrameId, Program, UploadMethod, Texture, PBO};
use device::{DrawTarget, ExternalTexture, FBOId, ReadTarget, TextureSlot};
use device::{ShaderError, TextureFilter, TextureFlags,
             VertexUsageHint, VAO, VBO, CustomVAO};
use device::{ProgramCache, ReadPixelsFormat};
use device::query::GpuTimer;
use euclid::rect;
use euclid::{Transform3D, TypedScale};
use frame_builder::{ChasePrimitive, FrameBuilderConfig};
use gleam::gl;
use glyph_rasterizer::{GlyphFormat, GlyphRasterizer};
use gpu_cache::{GpuBlockData, GpuCacheUpdate, GpuCacheUpdateList};
use gpu_cache::{GpuCacheDebugChunk, GpuCacheDebugCmd};
#[cfg(feature = "pathfinder")]
use gpu_glyph_renderer::GpuGlyphRenderer;
use gpu_types::{PrimitiveHeaderI, PrimitiveHeaderF, ScalingInstance, TransformData, ResolveInstanceData};
use internal_types::{TextureSource, ORTHO_FAR_PLANE, ORTHO_NEAR_PLANE, ResourceCacheError};
use internal_types::{CacheTextureId, DebugOutput, FastHashMap, LayerIndex, RenderedDocument, ResultMsg};
use internal_types::{TextureCacheAllocationKind, TextureCacheUpdate, TextureUpdateList, TextureUpdateSource};
use internal_types::{RenderTargetInfo, SavedTargetIndex};
use malloc_size_of::MallocSizeOfOps;
use picture::{RecordedDirtyRegion, TileCache};
use prim_store::DeferredResolve;
use profiler::{BackendProfileCounters, FrameProfileCounters, TimeProfileCounter,
               GpuProfileTag, RendererProfileCounters, RendererProfileTimers};
use profiler::{Profiler, ChangeIndicator};
use device::query::GpuProfiler;
use rayon::{ThreadPool, ThreadPoolBuilder};
use record::ApiRecordingReceiver;
use render_backend::{FrameId, RenderBackend};
use scene_builder::{SceneBuilder, LowPrioritySceneBuilder};
use shade::{Shaders, WrShaders};
use smallvec::SmallVec;
use render_task::{RenderTask, RenderTaskData, RenderTaskKind, RenderTaskTree};
use resource_cache::ResourceCache;
use util::drain_filter;

use std;
use std::cmp;
use std::collections::VecDeque;
use std::collections::hash_map::Entry;
use std::f32;
use std::marker::PhantomData;
use std::mem;
use std::os::raw::c_void;
use std::path::PathBuf;
use std::rc::Rc;
use std::sync::Arc;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::mpsc::{channel, Receiver};
use std::thread;
use std::cell::RefCell;
use texture_cache::TextureCache;
use thread_profiler::{register_thread_with_profiler, write_profile};
use tiling::{AlphaRenderTarget, ColorRenderTarget};
use tiling::{BlitJob, BlitJobSource, RenderPassKind, RenderTargetList};
use tiling::{Frame, RenderTarget, RenderTargetKind, TextureCacheRenderTarget};
#[cfg(not(feature = "pathfinder"))]
use tiling::GlyphJob;
use time::precise_time_ns;

cfg_if! {
    if #[cfg(feature = "debugger")] {
        use serde_json;
        use debug_server::{self, DebugServer};
    } else {
        use api::ApiMsg;
        use api::channel::MsgSender;
    }
}

/// Is only false if no WR instances have ever been created.
static HAS_BEEN_INITIALIZED: AtomicBool = AtomicBool::new(false);

/// Returns true if a WR instance has ever been initialized in this process.
pub fn wr_has_been_initialized() -> bool {
    HAS_BEEN_INITIALIZED.load(Ordering::SeqCst)
}

pub const MAX_VERTEX_TEXTURE_WIDTH: usize = 1024;
/// Enabling this toggle would force the GPU cache scattered texture to
/// be resized every frame, which enables GPU debuggers to see if this
/// is performed correctly.
const GPU_CACHE_RESIZE_TEST: bool = false;

/// Number of GPU blocks per UV rectangle provided for an image.
pub const BLOCKS_PER_UV_RECT: usize = 2;

const GPU_TAG_BRUSH_LINEAR_GRADIENT: GpuProfileTag = GpuProfileTag {
    label: "B_LinearGradient",
    color: debug_colors::POWDERBLUE,
};
const GPU_TAG_BRUSH_RADIAL_GRADIENT: GpuProfileTag = GpuProfileTag {
    label: "B_RadialGradient",
    color: debug_colors::LIGHTPINK,
};
const GPU_TAG_BRUSH_YUV_IMAGE: GpuProfileTag = GpuProfileTag {
    label: "B_YuvImage",
    color: debug_colors::DARKGREEN,
};
const GPU_TAG_BRUSH_MIXBLEND: GpuProfileTag = GpuProfileTag {
    label: "B_MixBlend",
    color: debug_colors::MAGENTA,
};
const GPU_TAG_BRUSH_BLEND: GpuProfileTag = GpuProfileTag {
    label: "B_Blend",
    color: debug_colors::ORANGE,
};
const GPU_TAG_BRUSH_IMAGE: GpuProfileTag = GpuProfileTag {
    label: "B_Image",
    color: debug_colors::SPRINGGREEN,
};
const GPU_TAG_BRUSH_SOLID: GpuProfileTag = GpuProfileTag {
    label: "B_Solid",
    color: debug_colors::RED,
};
const GPU_TAG_CACHE_CLIP: GpuProfileTag = GpuProfileTag {
    label: "C_Clip",
    color: debug_colors::PURPLE,
};
const GPU_TAG_CACHE_BORDER: GpuProfileTag = GpuProfileTag {
    label: "C_Border",
    color: debug_colors::CORNSILK,
};
const GPU_TAG_CACHE_LINE_DECORATION: GpuProfileTag = GpuProfileTag {
    label: "C_LineDecoration",
    color: debug_colors::YELLOWGREEN,
};
const GPU_TAG_CACHE_GRADIENT: GpuProfileTag = GpuProfileTag {
    label: "C_Gradient",
    color: debug_colors::BROWN,
};
const GPU_TAG_SETUP_TARGET: GpuProfileTag = GpuProfileTag {
    label: "target init",
    color: debug_colors::SLATEGREY,
};
const GPU_TAG_SETUP_DATA: GpuProfileTag = GpuProfileTag {
    label: "data init",
    color: debug_colors::LIGHTGREY,
};
const GPU_TAG_PRIM_SPLIT_COMPOSITE: GpuProfileTag = GpuProfileTag {
    label: "SplitComposite",
    color: debug_colors::DARKBLUE,
};
const GPU_TAG_PRIM_TEXT_RUN: GpuProfileTag = GpuProfileTag {
    label: "TextRun",
    color: debug_colors::BLUE,
};
const GPU_TAG_BLUR: GpuProfileTag = GpuProfileTag {
    label: "Blur",
    color: debug_colors::VIOLET,
};
const GPU_TAG_BLIT: GpuProfileTag = GpuProfileTag {
    label: "Blit",
    color: debug_colors::LIME,
};
const GPU_TAG_SCALE: GpuProfileTag = GpuProfileTag {
    label: "Scale",
    color: debug_colors::GHOSTWHITE,
};

const GPU_SAMPLER_TAG_ALPHA: GpuProfileTag = GpuProfileTag {
    label: "Alpha Targets",
    color: debug_colors::BLACK,
};
const GPU_SAMPLER_TAG_OPAQUE: GpuProfileTag = GpuProfileTag {
    label: "Opaque Pass",
    color: debug_colors::BLACK,
};
const GPU_SAMPLER_TAG_TRANSPARENT: GpuProfileTag = GpuProfileTag {
    label: "Transparent Pass",
    color: debug_colors::BLACK,
};

/// The clear color used for the texture cache when the debug display is enabled.
/// We use a shade of blue so that we can still identify completely blue items in
/// the texture cache.
const TEXTURE_CACHE_DBG_CLEAR_COLOR: [f32; 4] = [0.0, 0.0, 0.8, 1.0];

impl BatchKind {
    #[cfg(feature = "debugger")]
    fn debug_name(&self) -> &'static str {
        match *self {
            BatchKind::SplitComposite => "SplitComposite",
            BatchKind::Brush(kind) => {
                match kind {
                    BrushBatchKind::Solid => "Brush (Solid)",
                    BrushBatchKind::Image(..) => "Brush (Image)",
                    BrushBatchKind::Blend => "Brush (Blend)",
                    BrushBatchKind::MixBlend { .. } => "Brush (Composite)",
                    BrushBatchKind::YuvImage(..) => "Brush (YuvImage)",
                    BrushBatchKind::RadialGradient => "Brush (RadialGradient)",
                    BrushBatchKind::LinearGradient => "Brush (LinearGradient)",
                }
            }
            BatchKind::TextRun(_) => "TextRun",
        }
    }

    fn sampler_tag(&self) -> GpuProfileTag {
        match *self {
            BatchKind::SplitComposite => GPU_TAG_PRIM_SPLIT_COMPOSITE,
            BatchKind::Brush(kind) => {
                match kind {
                    BrushBatchKind::Solid => GPU_TAG_BRUSH_SOLID,
                    BrushBatchKind::Image(..) => GPU_TAG_BRUSH_IMAGE,
                    BrushBatchKind::Blend => GPU_TAG_BRUSH_BLEND,
                    BrushBatchKind::MixBlend { .. } => GPU_TAG_BRUSH_MIXBLEND,
                    BrushBatchKind::YuvImage(..) => GPU_TAG_BRUSH_YUV_IMAGE,
                    BrushBatchKind::RadialGradient => GPU_TAG_BRUSH_RADIAL_GRADIENT,
                    BrushBatchKind::LinearGradient => GPU_TAG_BRUSH_LINEAR_GRADIENT,
                }
            }
            BatchKind::TextRun(_) => GPU_TAG_PRIM_TEXT_RUN,
        }
    }
}

fn flag_changed(before: DebugFlags, after: DebugFlags, select: DebugFlags) -> Option<bool> {
    if before & select != after & select {
        Some(after.contains(select))
    } else {
        None
    }
}

#[repr(C)]
#[derive(Copy, Clone, Debug)]
pub enum ShaderColorMode {
    FromRenderPassMode = 0,
    Alpha = 1,
    SubpixelConstantTextColor = 2,
    SubpixelWithBgColorPass0 = 3,
    SubpixelWithBgColorPass1 = 4,
    SubpixelWithBgColorPass2 = 5,
    SubpixelDualSource = 6,
    Bitmap = 7,
    ColorBitmap = 8,
    Image = 9,
}

impl From<GlyphFormat> for ShaderColorMode {
    fn from(format: GlyphFormat) -> ShaderColorMode {
        match format {
            GlyphFormat::Alpha | GlyphFormat::TransformedAlpha => ShaderColorMode::Alpha,
            GlyphFormat::Subpixel | GlyphFormat::TransformedSubpixel => {
                panic!("Subpixel glyph formats must be handled separately.");
            }
            GlyphFormat::Bitmap => ShaderColorMode::Bitmap,
            GlyphFormat::ColorBitmap => ShaderColorMode::ColorBitmap,
        }
    }
}

/// Enumeration of the texture samplers used across the various WebRender shaders.
///
/// Each variant corresponds to a uniform declared in shader source. We only bind
/// the variants we need for a given shader, so not every variant is bound for every
/// batch.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub(crate) enum TextureSampler {
    Color0,
    Color1,
    Color2,
    PrevPassAlpha,
    PrevPassColor,
    GpuCache,
    TransformPalette,
    RenderTasks,
    Dither,
    PrimitiveHeadersF,
    PrimitiveHeadersI,
}

impl TextureSampler {
    pub(crate) fn color(n: usize) -> TextureSampler {
        match n {
            0 => TextureSampler::Color0,
            1 => TextureSampler::Color1,
            2 => TextureSampler::Color2,
            _ => {
                panic!("There are only 3 color samplers.");
            }
        }
    }
}

impl Into<TextureSlot> for TextureSampler {
    fn into(self) -> TextureSlot {
        match self {
            TextureSampler::Color0 => TextureSlot(0),
            TextureSampler::Color1 => TextureSlot(1),
            TextureSampler::Color2 => TextureSlot(2),
            TextureSampler::PrevPassAlpha => TextureSlot(3),
            TextureSampler::PrevPassColor => TextureSlot(4),
            TextureSampler::GpuCache => TextureSlot(5),
            TextureSampler::TransformPalette => TextureSlot(6),
            TextureSampler::RenderTasks => TextureSlot(7),
            TextureSampler::Dither => TextureSlot(8),
            TextureSampler::PrimitiveHeadersF => TextureSlot(9),
            TextureSampler::PrimitiveHeadersI => TextureSlot(10),
        }
    }
}

#[derive(Debug, Clone, Copy)]
#[repr(C)]
pub struct PackedVertex {
    pub pos: [f32; 2],
}

pub(crate) mod desc {
    use device::{VertexAttribute, VertexAttributeKind, VertexDescriptor};

    pub const PRIM_INSTANCES: VertexDescriptor = VertexDescriptor {
        vertex_attributes: &[
            VertexAttribute {
                name: "aPosition",
                count: 2,
                kind: VertexAttributeKind::F32,
            },
        ],
        instance_attributes: &[
            VertexAttribute {
                name: "aData",
                count: 4,
                kind: VertexAttributeKind::I32,
            },
        ],
    };

    pub const BLUR: VertexDescriptor = VertexDescriptor {
        vertex_attributes: &[
            VertexAttribute {
                name: "aPosition",
                count: 2,
                kind: VertexAttributeKind::F32,
            },
        ],
        instance_attributes: &[
            VertexAttribute {
                name: "aBlurRenderTaskAddress",
                count: 1,
                kind: VertexAttributeKind::I32,
            },
            VertexAttribute {
                name: "aBlurSourceTaskAddress",
                count: 1,
                kind: VertexAttributeKind::I32,
            },
            VertexAttribute {
                name: "aBlurDirection",
                count: 1,
                kind: VertexAttributeKind::I32,
            },
        ],
    };

    pub const LINE: VertexDescriptor = VertexDescriptor {
        vertex_attributes: &[
            VertexAttribute {
                name: "aPosition",
                count: 2,
                kind: VertexAttributeKind::F32,
            },
        ],
        instance_attributes: &[
            VertexAttribute {
                name: "aTaskRect",
                count: 4,
                kind: VertexAttributeKind::F32,
            },
            VertexAttribute {
                name: "aLocalSize",
                count: 2,
                kind: VertexAttributeKind::F32,
            },
            VertexAttribute {
                name: "aWavyLineThickness",
                count: 1,
                kind: VertexAttributeKind::F32,
            },
            VertexAttribute {
                name: "aStyle",
                count: 1,
                kind: VertexAttributeKind::I32,
            },
            VertexAttribute {
                name: "aOrientation",
                count: 1,
                kind: VertexAttributeKind::I32,
            },
        ],
    };

    pub const GRADIENT: VertexDescriptor = VertexDescriptor {
        vertex_attributes: &[
            VertexAttribute {
                name: "aPosition",
                count: 2,
                kind: VertexAttributeKind::F32,
            },
        ],
        instance_attributes: &[
            VertexAttribute {
                name: "aTaskRect",
                count: 4,
                kind: VertexAttributeKind::F32,
            },
            VertexAttribute {
                name: "aStops",
                count: 4,
                kind: VertexAttributeKind::F32,
            },
            // TODO(gw): We should probably pack these as u32 colors instead
            //           of passing as full float vec4 here. It won't make much
            //           difference in real world, since these are only invoked
            //           rarely, when creating the cache.
            VertexAttribute {
                name: "aColor0",
                count: 4,
                kind: VertexAttributeKind::F32,
            },
            VertexAttribute {
                name: "aColor1",
                count: 4,
                kind: VertexAttributeKind::F32,
            },
            VertexAttribute {
                name: "aColor2",
                count: 4,
                kind: VertexAttributeKind::F32,
            },
            VertexAttribute {
                name: "aColor3",
                count: 4,
                kind: VertexAttributeKind::F32,
            },
            VertexAttribute {
                name: "aAxisSelect",
                count: 1,
                kind: VertexAttributeKind::F32,
            },
            VertexAttribute {
                name: "aStartStop",
                count: 2,
                kind: VertexAttributeKind::F32,
            },
        ],
    };

    pub const BORDER: VertexDescriptor = VertexDescriptor {
        vertex_attributes: &[
            VertexAttribute {
                name: "aPosition",
                count: 2,
                kind: VertexAttributeKind::F32,
            },
        ],
        instance_attributes: &[
            VertexAttribute {
                name: "aTaskOrigin",
                count: 2,
                kind: VertexAttributeKind::F32,
            },
            VertexAttribute {
                name: "aRect",
                count: 4,
                kind: VertexAttributeKind::F32,
            },
            VertexAttribute {
                name: "aColor0",
                count: 4,
                kind: VertexAttributeKind::F32,
            },
            VertexAttribute {
                name: "aColor1",
                count: 4,
                kind: VertexAttributeKind::F32,
            },
            VertexAttribute {
                name: "aFlags",
                count: 1,
                kind: VertexAttributeKind::I32,
            },
            VertexAttribute {
                name: "aWidths",
                count: 2,
                kind: VertexAttributeKind::F32,
            },
            VertexAttribute {
                name: "aRadii",
                count: 2,
                kind: VertexAttributeKind::F32,
            },
            VertexAttribute {
                name: "aClipParams1",
                count: 4,
                kind: VertexAttributeKind::F32,
            },
            VertexAttribute {
                name: "aClipParams2",
                count: 4,
                kind: VertexAttributeKind::F32,
            },
        ],
    };

    pub const SCALE: VertexDescriptor = VertexDescriptor {
        vertex_attributes: &[
            VertexAttribute {
                name: "aPosition",
                count: 2,
                kind: VertexAttributeKind::F32,
            },
        ],
        instance_attributes: &[
            VertexAttribute {
                name: "aScaleRenderTaskAddress",
                count: 1,
                kind: VertexAttributeKind::I32,
            },
            VertexAttribute {
                name: "aScaleSourceTaskAddress",
                count: 1,
                kind: VertexAttributeKind::I32,
            },
        ],
    };

    pub const CLIP: VertexDescriptor = VertexDescriptor {
        vertex_attributes: &[
            VertexAttribute {
                name: "aPosition",
                count: 2,
                kind: VertexAttributeKind::F32,
            },
        ],
        instance_attributes: &[
            VertexAttribute {
                name: "aTransformIds",
                count: 2,
                kind: VertexAttributeKind::I32,
            },
            VertexAttribute {
                name: "aClipDataResourceAddress",
                count: 4,
                kind: VertexAttributeKind::U16,
            },
            VertexAttribute {
                name: "aClipLocalPos",
                count: 2,
                kind: VertexAttributeKind::F32,
            },
            VertexAttribute {
                name: "aClipTileRect",
                count: 4,
                kind: VertexAttributeKind::F32,
            },
            VertexAttribute {
                name: "aClipDeviceArea",
                count: 4,
                kind: VertexAttributeKind::F32,
            },
            VertexAttribute {
                name: "aClipSnapOffsets",
                count: 4,
                kind: VertexAttributeKind::F32,
            },
            VertexAttribute {
                name: "aClipOrigins",
                count: 4,
                kind: VertexAttributeKind::F32,
            },
            VertexAttribute {
                name: "aDevicePixelScale",
                count: 1,
                kind: VertexAttributeKind::F32,
            },
        ],
    };

    pub const GPU_CACHE_UPDATE: VertexDescriptor = VertexDescriptor {
        vertex_attributes: &[
            VertexAttribute {
                name: "aPosition",
                count: 2,
                kind: VertexAttributeKind::U16Norm,
            },
            VertexAttribute {
                name: "aValue",
                count: 4,
                kind: VertexAttributeKind::F32,
            },
        ],
        instance_attributes: &[],
    };

    pub const RESOLVE: VertexDescriptor = VertexDescriptor {
        vertex_attributes: &[
            VertexAttribute {
                name: "aPosition",
                count: 2,
                kind: VertexAttributeKind::F32,
            },
        ],
        instance_attributes: &[
            VertexAttribute {
                name: "aRect",
                count: 4,
                kind: VertexAttributeKind::F32,
            },
        ],
    };

    pub const VECTOR_STENCIL: VertexDescriptor = VertexDescriptor {
        vertex_attributes: &[
            VertexAttribute {
                name: "aPosition",
                count: 2,
                kind: VertexAttributeKind::F32,
            },
        ],
        instance_attributes: &[
            VertexAttribute {
                name: "aFromPosition",
                count: 2,
                kind: VertexAttributeKind::F32,
            },
            VertexAttribute {
                name: "aCtrlPosition",
                count: 2,
                kind: VertexAttributeKind::F32,
            },
            VertexAttribute {
                name: "aToPosition",
                count: 2,
                kind: VertexAttributeKind::F32,
            },
            VertexAttribute {
                name: "aFromNormal",
                count: 2,
                kind: VertexAttributeKind::F32,
            },
            VertexAttribute {
                name: "aCtrlNormal",
                count: 2,
                kind: VertexAttributeKind::F32,
            },
            VertexAttribute {
                name: "aToNormal",
                count: 2,
                kind: VertexAttributeKind::F32,
            },
            VertexAttribute {
                name: "aPathID",
                count: 1,
                kind: VertexAttributeKind::U16,
            },
            VertexAttribute {
                name: "aPad",
                count: 1,
                kind: VertexAttributeKind::U16,
            },
        ],
    };

    pub const VECTOR_COVER: VertexDescriptor = VertexDescriptor {
        vertex_attributes: &[
            VertexAttribute {
                name: "aPosition",
                count: 2,
                kind: VertexAttributeKind::F32,
            },
        ],
        instance_attributes: &[
            VertexAttribute {
                name: "aTargetRect",
                count: 4,
                kind: VertexAttributeKind::I32,
            },
            VertexAttribute {
                name: "aStencilOrigin",
                count: 2,
                kind: VertexAttributeKind::I32,
            },
            VertexAttribute {
                name: "aSubpixel",
                count: 1,
                kind: VertexAttributeKind::U16,
            },
            VertexAttribute {
                name: "aPad",
                count: 1,
                kind: VertexAttributeKind::U16,
            },
        ],
    };
}

#[derive(Debug, Copy, Clone)]
pub(crate) enum VertexArrayKind {
    Primitive,
    Blur,
    Clip,
    VectorStencil,
    VectorCover,
    Border,
    Scale,
    LineDecoration,
    Gradient,
    Resolve,
}

#[derive(Clone, Debug, PartialEq)]
pub enum GraphicsApi {
    OpenGL,
}

#[derive(Clone, Debug)]
pub struct GraphicsApiInfo {
    pub kind: GraphicsApi,
    pub renderer: String,
    pub version: String,
}

#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
#[cfg_attr(feature = "capture", derive(Serialize))]
#[cfg_attr(feature = "replay", derive(Deserialize))]
pub enum ImageBufferKind {
    Texture2D = 0,
    TextureRect = 1,
    TextureExternal = 2,
    Texture2DArray = 3,
}

//TODO: those types are the same, so let's merge them
impl From<TextureTarget> for ImageBufferKind {
    fn from(target: TextureTarget) -> Self {
        match target {
            TextureTarget::Default => ImageBufferKind::Texture2D,
            TextureTarget::Rect => ImageBufferKind::TextureRect,
            TextureTarget::Array => ImageBufferKind::Texture2DArray,
            TextureTarget::External => ImageBufferKind::TextureExternal,
        }
    }
}

#[derive(Debug, Copy, Clone)]
pub enum RendererKind {
    Native,
    OSMesa,
}

#[derive(Debug)]
pub struct GpuProfile {
    pub frame_id: GpuFrameId,
    pub paint_time_ns: u64,
}

impl GpuProfile {
    fn new<T>(frame_id: GpuFrameId, timers: &[GpuTimer<T>]) -> GpuProfile {
        let mut paint_time_ns = 0;
        for timer in timers {
            paint_time_ns += timer.time_ns;
        }
        GpuProfile {
            frame_id,
            paint_time_ns,
        }
    }
}

#[derive(Debug)]
pub struct CpuProfile {
    pub frame_id: GpuFrameId,
    pub backend_time_ns: u64,
    pub composite_time_ns: u64,
    pub draw_calls: usize,
}

impl CpuProfile {
    fn new(
        frame_id: GpuFrameId,
        backend_time_ns: u64,
        composite_time_ns: u64,
        draw_calls: usize,
    ) -> CpuProfile {
        CpuProfile {
            frame_id,
            backend_time_ns,
            composite_time_ns,
            draw_calls,
        }
    }
}

#[cfg(not(feature = "pathfinder"))]
pub struct GpuGlyphRenderer;

#[cfg(not(feature = "pathfinder"))]
impl GpuGlyphRenderer {
    fn new(_: &mut Device, _: &VAO, _: ShaderPrecacheFlags) -> Result<GpuGlyphRenderer, RendererError> {
        Ok(GpuGlyphRenderer)
    }
}

#[cfg(not(feature = "pathfinder"))]
struct StenciledGlyphPage;

/// A Texture that has been initialized by the `device` module and is ready to
/// be used.
struct ActiveTexture {
    texture: Texture,
    saved_index: Option<SavedTargetIndex>,
}

/// Helper struct for resolving device Textures for use during rendering passes.
///
/// Manages the mapping between the at-a-distance texture handles used by the
/// `RenderBackend` (which does not directly interface with the GPU) and actual
/// device texture handles.
struct TextureResolver {
    /// A map to resolve texture cache IDs to native textures.
    texture_cache_map: FastHashMap<CacheTextureId, Texture>,

    /// Map of external image IDs to native textures.
    external_images: FastHashMap<(ExternalImageId, u8), ExternalTexture>,

    /// A special 1x1 dummy texture used for shaders that expect to work with
    /// the output of the previous pass but are actually running in the first
    /// pass.
    dummy_cache_texture: Texture,

    /// The outputs of the previous pass, if applicable.
    prev_pass_color: Option<ActiveTexture>,
    prev_pass_alpha: Option<ActiveTexture>,

    /// Saved render targets from previous passes. This is used when a pass
    /// needs access to the result of a pass other than the immediately-preceding
    /// one. In this case, the `RenderTask` will get a a non-`None` `saved_index`,
    /// which will cause the resulting render target to be persisted in this list
    /// (at that index) until the end of the frame.
    saved_targets: Vec<Texture>,

    /// Pool of idle render target textures ready for re-use.
    ///
    /// Naively, it would seem like we only ever need two pairs of (color,
    /// alpha) render targets: one for the output of the previous pass (serving
    /// as input to the current pass), and one for the output of the current
    /// pass. However, there are cases where the output of one pass is used as
    /// the input to multiple future passes. For example, drop-shadows draw the
    /// picture in pass X, then reference it in pass X+1 to create the blurred
    /// shadow, and pass the results of both X and X+1 to pass X+2 draw the
    /// actual content.
    ///
    /// See the comments in `allocate_target_texture` for more insight on why
    /// reuse is a win.
    render_target_pool: Vec<Texture>,
}

impl TextureResolver {
    fn new(device: &mut Device) -> TextureResolver {
        let dummy_cache_texture = device
            .create_texture(
                TextureTarget::Array,
                ImageFormat::BGRA8,
                1,
                1,
                TextureFilter::Linear,
                None,
                1,
            );

        TextureResolver {
            texture_cache_map: FastHashMap::default(),
            external_images: FastHashMap::default(),
            dummy_cache_texture,
            prev_pass_alpha: None,
            prev_pass_color: None,
            saved_targets: Vec::default(),
            render_target_pool: Vec::new(),
        }
    }

    fn deinit(self, device: &mut Device) {
        device.delete_texture(self.dummy_cache_texture);

        for (_id, texture) in self.texture_cache_map {
            device.delete_texture(texture);
        }

        for texture in self.render_target_pool {
            device.delete_texture(texture);
        }
    }

    fn begin_frame(&mut self) {
        assert!(self.prev_pass_color.is_none());
        assert!(self.prev_pass_alpha.is_none());
        assert!(self.saved_targets.is_empty());
    }

    fn end_frame(&mut self, device: &mut Device, frame_id: GpuFrameId) {
        // return the cached targets to the pool
        self.end_pass(device, None, None);
        // return the saved targets as well
        while let Some(target) = self.saved_targets.pop() {
            self.return_to_pool(device, target);
        }

        // GC the render target pool.
        //
        // We use a simple scheme whereby we drop any texture that hasn't been used
        // in the last 30 frames. This should generally prevent any sustained build-
        // up of unused textures, unless we don't generate frames for a long period.
        // This can happen when the window is minimized, and we probably want to
        // flush all the WebRender caches in that case [1].
        //
        // [1] https://bugzilla.mozilla.org/show_bug.cgi?id=1494099
        self.retain_targets(device, |texture| texture.used_recently(frame_id, 30));
    }

    /// Transfers ownership of a render target back to the pool.
    fn return_to_pool(&mut self, device: &mut Device, target: Texture) {
        device.invalidate_render_target(&target);
        self.render_target_pool.push(target);
    }

    /// Drops all targets from the render target pool that do not satisfy the predicate.
    pub fn retain_targets<F: Fn(&Texture) -> bool>(&mut self, device: &mut Device, f: F) {
        // We can't just use retain() because `Texture` requires manual cleanup.
        let mut tmp = SmallVec::<[Texture; 8]>::new();
        for target in self.render_target_pool.drain(..) {
            if f(&target) {
                tmp.push(target);
            } else {
                device.delete_texture(target);
            }
        }
        self.render_target_pool.extend(tmp);
    }

    fn end_pass(
        &mut self,
        device: &mut Device,
        a8_texture: Option<ActiveTexture>,
        rgba8_texture: Option<ActiveTexture>,
    ) {
        // If we have cache textures from previous pass, return them to the pool.
        // Also assign the pool index of those cache textures to last pass's index because this is
        // the result of last pass.
        // Note: the order here is important, needs to match the logic in `RenderPass::build()`.
        if let Some(at) = self.prev_pass_color.take() {
            if let Some(index) = at.saved_index {
                assert_eq!(self.saved_targets.len(), index.0);
                self.saved_targets.push(at.texture);
            } else {
                self.return_to_pool(device, at.texture);
            }
        }
        if let Some(at) = self.prev_pass_alpha.take() {
            if let Some(index) = at.saved_index {
                assert_eq!(self.saved_targets.len(), index.0);
                self.saved_targets.push(at.texture);
            } else {
                self.return_to_pool(device, at.texture);
            }
        }

        // We have another pass to process, make these textures available
        // as inputs to the next pass.
        self.prev_pass_color = rgba8_texture;
        self.prev_pass_alpha = a8_texture;
    }

    // Bind a source texture to the device.
    fn bind(&self, texture_id: &TextureSource, sampler: TextureSampler, device: &mut Device) {
        match *texture_id {
            TextureSource::Invalid => {}
            TextureSource::PrevPassAlpha => {
                let texture = match self.prev_pass_alpha {
                    Some(ref at) => &at.texture,
                    None => &self.dummy_cache_texture,
                };
                device.bind_texture(sampler, texture);
            }
            TextureSource::PrevPassColor => {
                let texture = match self.prev_pass_color {
                    Some(ref at) => &at.texture,
                    None => &self.dummy_cache_texture,
                };
                device.bind_texture(sampler, texture);
            }
            TextureSource::External(external_image) => {
                let texture = self.external_images
                    .get(&(external_image.id, external_image.channel_index))
                    .expect(&format!("BUG: External image should be resolved by now"));
                device.bind_external_texture(sampler, texture);
            }
            TextureSource::TextureCache(index) => {
                let texture = &self.texture_cache_map[&index];
                device.bind_texture(sampler, texture);
            }
            TextureSource::RenderTaskCache(saved_index) => {
                let texture = &self.saved_targets[saved_index.0];
                device.bind_texture(sampler, texture)
            }
        }
    }

    // Get the real (OpenGL) texture ID for a given source texture.
    // For a texture cache texture, the IDs are stored in a vector
    // map for fast access.
    fn resolve(&self, texture_id: &TextureSource) -> Option<&Texture> {
        match *texture_id {
            TextureSource::Invalid => None,
            TextureSource::PrevPassAlpha => Some(
                match self.prev_pass_alpha {
                    Some(ref at) => &at.texture,
                    None => &self.dummy_cache_texture,
                }
            ),
            TextureSource::PrevPassColor => Some(
                match self.prev_pass_color {
                    Some(ref at) => &at.texture,
                    None => &self.dummy_cache_texture,
                }
            ),
            TextureSource::External(..) => {
                panic!("BUG: External textures cannot be resolved, they can only be bound.");
            }
            TextureSource::TextureCache(index) => {
                Some(&self.texture_cache_map[&index])
            }
            TextureSource::RenderTaskCache(saved_index) => {
                Some(&self.saved_targets[saved_index.0])
            }
        }
    }

    fn report_memory(&self) -> MemoryReport {
        let mut report = MemoryReport::default();

        // We're reporting GPU memory rather than heap-allocations, so we don't
        // use size_of_op.
        for t in self.texture_cache_map.values() {
            report.texture_cache_textures += t.size_in_bytes();
        }
        for t in self.render_target_pool.iter() {
            report.render_target_textures += t.size_in_bytes();
        }

        report
    }
}

#[derive(Debug, Copy, Clone, PartialEq)]
#[cfg_attr(feature = "capture", derive(Serialize))]
#[cfg_attr(feature = "replay", derive(Deserialize))]
pub enum BlendMode {
    None,
    Alpha,
    PremultipliedAlpha,
    PremultipliedDestOut,
    SubpixelDualSource,
    SubpixelConstantTextColor(ColorF),
    SubpixelWithBgColor,
}

/// Tracks the state of each row in the GPU cache texture.
struct CacheRow {
    /// Mirrored block data on CPU for this row. We store a copy of
    /// the data on the CPU side to improve upload batching.
    cpu_blocks: Box<[GpuBlockData; MAX_VERTEX_TEXTURE_WIDTH]>,
    /// True if this row is dirty.
    is_dirty: bool,
}

impl CacheRow {
    fn new() -> Self {
        CacheRow {
            cpu_blocks: Box::new([GpuBlockData::EMPTY; MAX_VERTEX_TEXTURE_WIDTH]),
            is_dirty: false,
        }
    }
}

/// The bus over which CPU and GPU versions of the GPU cache
/// get synchronized.
enum GpuCacheBus {
    /// PBO-based updates, currently operate on a row granularity.
    /// Therefore, are subject to fragmentation issues.
    PixelBuffer {
        /// PBO used for transfers.
        buffer: PBO,
        /// Per-row data.
        rows: Vec<CacheRow>,
    },
    /// Shader-based scattering updates. Currently rendered by a set
    /// of points into the GPU texture, each carrying a `GpuBlockData`.
    Scatter {
        /// Special program to run the scattered update.
        program: Program,
        /// VAO containing the source vertex buffers.
        vao: CustomVAO,
        /// VBO for positional data, supplied as normalized `u16`.
        buf_position: VBO<[u16; 2]>,
        /// VBO for gpu block data.
        buf_value: VBO<GpuBlockData>,
        /// Currently stored block count.
        count: usize,
    },
}

/// The device-specific representation of the cache texture in gpu_cache.rs
struct GpuCacheTexture {
    texture: Option<Texture>,
    bus: GpuCacheBus,
}

impl GpuCacheTexture {

    /// Ensures that we have an appropriately-sized texture. Returns true if a
    /// new texture was created.
    fn ensure_texture(&mut self, device: &mut Device, height: i32) {
        // If we already have a texture that works, we're done.
        if self.texture.as_ref().map_or(false, |t| t.get_dimensions().height >= height) {
            if GPU_CACHE_RESIZE_TEST {
                // Special debug mode - resize the texture even though it's fine.
            } else {
                return;
            }
        }

        // Take the old texture, if any.
        let blit_source = self.texture.take();

        // Create the new texture.
        assert!(height >= 2, "Height is too small for ANGLE");
        let new_size = DeviceIntSize::new(MAX_VERTEX_TEXTURE_WIDTH as _, height);
        // If glCopyImageSubData is supported, this texture doesn't need
        // to be a render target. This prevents GL errors due to framebuffer
        // incompleteness on devices that don't support RGBAF32 render targets.
        // TODO(gw): We still need a proper solution for the subset of devices
        //           that don't support glCopyImageSubData *OR* rendering to a
        //           RGBAF32 render target. These devices will currently fail
        //           to resize the GPU cache texture.
        let supports_copy_image_sub_data = device.get_capabilities().supports_copy_image_sub_data;
        let rt_info =  if supports_copy_image_sub_data {
            None
        } else {
            Some(RenderTargetInfo { has_depth: false })
        };
        let mut texture = device.create_texture(
            TextureTarget::Default,
            ImageFormat::RGBAF32,
            new_size.width,
            new_size.height,
            TextureFilter::Nearest,
            rt_info,
            1,
        );

        // Blit the contents of the previous texture, if applicable.
        if let Some(blit_source) = blit_source {
            device.blit_renderable_texture(&mut texture, &blit_source);
            device.delete_texture(blit_source);
        }

        self.texture = Some(texture);
    }

    fn new(device: &mut Device, use_scatter: bool) -> Result<Self, RendererError> {
        let bus = if use_scatter {
            let program = device.create_program_linked(
                "gpu_cache_update",
                String::new(),
                &desc::GPU_CACHE_UPDATE,
            )?;
            let buf_position = device.create_vbo();
            let buf_value = device.create_vbo();
            //Note: the vertex attributes have to be supplied in the same order
            // as for program creation, but each assigned to a different stream.
            let vao = device.create_custom_vao(&[
                buf_position.stream_with(&desc::GPU_CACHE_UPDATE.vertex_attributes[0..1]),
                buf_value   .stream_with(&desc::GPU_CACHE_UPDATE.vertex_attributes[1..2]),
            ]);
            GpuCacheBus::Scatter {
                program,
                vao,
                buf_position,
                buf_value,
                count: 0,
            }
        } else {
            let buffer = device.create_pbo();
            GpuCacheBus::PixelBuffer {
                buffer,
                rows: Vec::new(),
            }
        };

        Ok(GpuCacheTexture {
            texture: None,
            bus,
        })
    }

    fn deinit(mut self, device: &mut Device) {
        if let Some(t) = self.texture.take() {
            device.delete_texture(t);
        }
        match self.bus {
            GpuCacheBus::PixelBuffer { buffer, ..} => {
                device.delete_pbo(buffer);
            }
            GpuCacheBus::Scatter { program, vao, buf_position, buf_value, ..} => {
                device.delete_program(program);
                device.delete_custom_vao(vao);
                device.delete_vbo(buf_position);
                device.delete_vbo(buf_value);
            }
        }
    }

    fn get_height(&self) -> i32 {
        self.texture.as_ref().map_or(0, |t| t.get_dimensions().height)
    }

    fn prepare_for_updates(
        &mut self,
        device: &mut Device,
        total_block_count: usize,
        max_height: i32,
    ) {
        self.ensure_texture(device, max_height);
        match self.bus {
            GpuCacheBus::PixelBuffer { .. } => {},
            GpuCacheBus::Scatter {
                ref mut buf_position,
                ref mut buf_value,
                ref mut count,
                ..
            } => {
                *count = 0;
                if total_block_count > buf_value.allocated_count() {
                    device.allocate_vbo(buf_position, total_block_count, VertexUsageHint::Stream);
                    device.allocate_vbo(buf_value,    total_block_count, VertexUsageHint::Stream);
                }
            }
        }
    }

    fn update(&mut self, device: &mut Device, updates: &GpuCacheUpdateList) {
        match self.bus {
            GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
                for update in &updates.updates {
                    match *update {
                        GpuCacheUpdate::Copy {
                            block_index,
                            block_count,
                            address,
                        } => {
                            let row = address.v as usize;

                            // Ensure that the CPU-side shadow copy of the GPU cache data has enough
                            // rows to apply this patch.
                            while rows.len() <= row {
                                // Add a new row.
                                rows.push(CacheRow::new());
                            }

                            // This row is dirty (needs to be updated in GPU texture).
                            rows[row].is_dirty = true;

                            // Copy the blocks from the patch array in the shadow CPU copy.
                            let block_offset = address.u as usize;
                            let data = &mut rows[row].cpu_blocks;
                            for i in 0 .. block_count {
                                data[block_offset + i] = updates.blocks[block_index + i];
                            }
                        }
                    }
                }
            }
            GpuCacheBus::Scatter {
                ref buf_position,
                ref buf_value,
                ref mut count,
                ..
            } => {
                //TODO: re-use this heap allocation
                // Unused positions will be left as 0xFFFF, which translates to
                // (1.0, 1.0) in the vertex output position and gets culled out
                let mut position_data = vec![[!0u16; 2]; updates.blocks.len()];
                let size = self.texture.as_ref().unwrap().get_dimensions().to_usize();

                for update in &updates.updates {
                    match *update {
                        GpuCacheUpdate::Copy {
                            block_index,
                            block_count,
                            address,
                        } => {
                            // Convert the absolute texel position into normalized
                            let y = ((2*address.v as usize + 1) << 15) / size.height;
                            for i in 0 .. block_count {
                                let x = ((2*address.u as usize + 2*i + 1) << 15) / size.width;
                                position_data[block_index + i] = [x as _, y as _];
                            }
                        }
                    }
                }

                device.fill_vbo(buf_value, &updates.blocks, *count);
                device.fill_vbo(buf_position, &position_data, *count);
                *count += position_data.len();
            }
        }
    }

    fn flush(&mut self, device: &mut Device) -> usize {
        let texture = self.texture.as_ref().unwrap();
        match self.bus {
            GpuCacheBus::PixelBuffer { ref buffer, ref mut rows } => {
                let rows_dirty = rows
                    .iter()
                    .filter(|row| row.is_dirty)
                    .count();
                if rows_dirty == 0 {
                    return 0
                }

                let mut uploader = device.upload_texture(
                    texture,
                    buffer,
                    rows_dirty * MAX_VERTEX_TEXTURE_WIDTH,
                );

                for (row_index, row) in rows.iter_mut().enumerate() {
                    if !row.is_dirty {
                        continue;
                    }

                    let rect = DeviceIntRect::new(
                        DeviceIntPoint::new(0, row_index as i32),
                        DeviceIntSize::new(MAX_VERTEX_TEXTURE_WIDTH as i32, 1),
                    );

                    uploader.upload(rect, 0, None, &*row.cpu_blocks);

                    row.is_dirty = false;
                }

                rows_dirty
            }
            GpuCacheBus::Scatter { ref program, ref vao, count, .. } => {
                device.disable_depth();
                device.set_blend(false);
                device.bind_program(program);
                device.bind_custom_vao(vao);
                device.bind_draw_target(
                    DrawTarget::Texture {
                        texture,
                        layer: 0,
                        with_depth: false,
                    },
                );
                device.draw_nonindexed_points(0, count as _);
                0
            }
        }
    }
}

struct VertexDataTexture<T> {
    texture: Option<Texture>,
    format: ImageFormat,
    pbo: PBO,
    _marker: PhantomData<T>,
}

impl<T> VertexDataTexture<T> {
    fn new(
        device: &mut Device,
        format: ImageFormat,
    ) -> Self {
        VertexDataTexture {
            texture: None,
            format,
            pbo: device.create_pbo(),
            _marker: PhantomData,
        }
    }

    /// Returns a borrow of the GPU texture. Panics if it hasn't been initialized.
    fn texture(&self) -> &Texture {
        self.texture.as_ref().unwrap()
    }

    /// Returns an estimate of the GPU memory consumed by this VertexDataTexture.
    fn size_in_bytes(&self) -> usize {
        self.texture.as_ref().map_or(0, |t| t.size_in_bytes())
    }

    fn update(&mut self, device: &mut Device, data: &mut Vec<T>) {
        debug_assert!(mem::size_of::<T>() % 16 == 0);
        let texels_per_item = mem::size_of::<T>() / 16;
        let items_per_row = MAX_VERTEX_TEXTURE_WIDTH / texels_per_item;

        // Ensure we always end up with a texture when leaving this method.
        if data.is_empty() {
            if self.texture.is_some() {
                return;
            }
            data.push(unsafe { mem::uninitialized() });
        }

        // Extend the data array to be a multiple of the row size.
        // This ensures memory safety when the array is passed to
        // OpenGL to upload to the GPU.
        if items_per_row != 0 {
            while data.len() % items_per_row != 0 {
                data.push(unsafe { mem::uninitialized() });
            }
        }

        let width =
            (MAX_VERTEX_TEXTURE_WIDTH - (MAX_VERTEX_TEXTURE_WIDTH % texels_per_item)) as i32;
        let needed_height = (data.len() / items_per_row) as i32;
        let existing_height = self.texture.as_ref().map_or(0, |t| t.get_dimensions().height);

        // Create a new texture if needed.
        //
        // These textures are generally very small, which is why we don't bother
        // with incremental updates and just re-upload every frame. For most pages
        // they're one row each, and on stress tests like css-francine they end up
        // in the 6-14 range. So we size the texture tightly to what we need (usually
        // 1), and shrink it if the waste would be more than 10 rows. This helps
        // with memory overhead, especially because there are several instances of
        // these textures per Renderer.
        if needed_height > existing_height || needed_height + 10 < existing_height {
            // Drop the existing texture, if any.
            if let Some(t) = self.texture.take() {
                device.delete_texture(t);
            }

            let texture = device.create_texture(
                TextureTarget::Default,
                self.format,
                width,
                // Ensure height is at least two to work around
                // https://bugs.chromium.org/p/angleproject/issues/detail?id=3039
                needed_height.max(2),
                TextureFilter::Nearest,
                None,
                1,
            );
            self.texture = Some(texture);
        }

        let rect = DeviceIntRect::new(
            DeviceIntPoint::zero(),
            DeviceIntSize::new(width, needed_height),
        );
        device
            .upload_texture(self.texture(), &self.pbo, 0)
            .upload(rect, 0, None, data);
    }

    fn deinit(mut self, device: &mut Device) {
        device.delete_pbo(self.pbo);
        if let Some(t) = self.texture.take() {
            device.delete_texture(t);
        }
    }
}

struct FrameOutput {
    last_access: GpuFrameId,
    fbo_id: FBOId,
}

#[derive(PartialEq)]
struct TargetSelector {
    size: DeviceIntSize,
    num_layers: usize,
    format: ImageFormat,
}

struct LazyInitializedDebugRenderer {
    debug_renderer: Option<DebugRenderer>,
    failed: bool,
}

impl LazyInitializedDebugRenderer {
    pub fn new() -> Self {
        Self {
            debug_renderer: None,
            failed: false,
        }
    }

    pub fn get_mut<'a>(&'a mut self, device: &mut Device) -> Option<&'a mut DebugRenderer> {
        if self.failed {
            return None;
        }
        if self.debug_renderer.is_none() {
            match DebugRenderer::new(device) {
                Ok(renderer) => { self.debug_renderer = Some(renderer); }
                Err(_) => {
                    // The shader compilation code already logs errors.
                    self.failed = true;
                }
            }
        }

        self.debug_renderer.as_mut()
    }

    /// Returns mut ref to `DebugRenderer` if one already exists, otherwise returns `None`.
    pub fn try_get_mut<'a>(&'a mut self) -> Option<&'a mut DebugRenderer> {
        self.debug_renderer.as_mut()
    }

    pub fn deinit(self, device: &mut Device) {
        if let Some(debug_renderer) = self.debug_renderer {
            debug_renderer.deinit(device);
        }
    }
}

// NB: If you add more VAOs here, be sure to deinitialize them in
// `Renderer::deinit()` below.
pub struct RendererVAOs {
    prim_vao: VAO,
    blur_vao: VAO,
    clip_vao: VAO,
    border_vao: VAO,
    line_vao: VAO,
    scale_vao: VAO,
    gradient_vao: VAO,
    resolve_vao: VAO,
}

/// The renderer is responsible for submitting to the GPU the work prepared by the
/// RenderBackend.
///
/// We have a separate `Renderer` instance for each instance of WebRender (generally
/// one per OS window), and all instances share the same thread.
pub struct Renderer {
    result_rx: Receiver<ResultMsg>,
    debug_server: DebugServer,
    pub device: Device,
    pending_texture_updates: Vec<TextureUpdateList>,
    pending_gpu_cache_updates: Vec<GpuCacheUpdateList>,
    pending_gpu_cache_clear: bool,
    pending_shader_updates: Vec<PathBuf>,
    active_documents: Vec<(DocumentId, RenderedDocument)>,

    shaders: Rc<RefCell<Shaders>>,

    pub gpu_glyph_renderer: GpuGlyphRenderer,

    max_recorded_profiles: usize,

    clear_color: Option<ColorF>,
    enable_clear_scissor: bool,
    debug: LazyInitializedDebugRenderer,
    debug_flags: DebugFlags,
    backend_profile_counters: BackendProfileCounters,
    profile_counters: RendererProfileCounters,
    resource_upload_time: u64,
    gpu_cache_upload_time: u64,
    profiler: Profiler,
    new_frame_indicator: ChangeIndicator,
    new_scene_indicator: ChangeIndicator,
    slow_frame_indicator: ChangeIndicator,

    last_time: u64,

    pub gpu_profile: GpuProfiler<GpuProfileTag>,
    vaos: RendererVAOs,

    prim_header_f_texture: VertexDataTexture<PrimitiveHeaderF>,
    prim_header_i_texture: VertexDataTexture<PrimitiveHeaderI>,
    transforms_texture: VertexDataTexture<TransformData>,
    render_task_texture: VertexDataTexture<RenderTaskData>,
    gpu_cache_texture: GpuCacheTexture,

    /// When the GPU cache debugger is enabled, we keep track of the live blocks
    /// in the GPU cache so that we can use them for the debug display. This
    /// member stores those live blocks, indexed by row.
    gpu_cache_debug_chunks: Vec<Vec<GpuCacheDebugChunk>>,

    gpu_cache_frame_id: FrameId,
    gpu_cache_overflow: bool,

    pipeline_info: PipelineInfo,

    // Manages and resolves source textures IDs to real texture IDs.
    texture_resolver: TextureResolver,

    // A PBO used to do asynchronous texture cache uploads.
    texture_cache_upload_pbo: PBO,

    dither_matrix_texture: Option<Texture>,

    /// Optional trait object that allows the client
    /// application to provide external buffers for image data.
    external_image_handler: Option<Box<ExternalImageHandler>>,

    /// Optional trait object that allows the client
    /// application to provide a texture handle to
    /// copy the WR output to.
    output_image_handler: Option<Box<OutputImageHandler>>,

    /// Optional function pointers for measuring memory used by a given
    /// heap-allocated pointer.
    size_of_ops: Option<MallocSizeOfOps>,

    // Currently allocated FBOs for output frames.
    output_targets: FastHashMap<u32, FrameOutput>,

    pub renderer_errors: Vec<RendererError>,

    /// List of profile results from previous frames. Can be retrieved
    /// via get_frame_profiles().
    cpu_profiles: VecDeque<CpuProfile>,
    gpu_profiles: VecDeque<GpuProfile>,

    /// Notification requests to be fulfilled after rendering.
    notifications: Vec<NotificationRequest>,

    framebuffer_size: Option<FramebufferIntSize>,

    /// A lazily created texture for the zoom debugging widget.
    zoom_debug_texture: Option<Texture>,

    /// The current mouse position. This is used for debugging
    /// functionality only, such as the debug zoom widget.
    cursor_position: DeviceIntPoint,

    #[cfg(feature = "capture")]
    read_fbo: FBOId,
    #[cfg(feature = "replay")]
    owned_external_images: FastHashMap<(ExternalImageId, u8), ExternalTexture>,
}

#[derive(Debug)]
pub enum RendererError {
    Shader(ShaderError),
    Thread(std::io::Error),
    Resource(ResourceCacheError),
    MaxTextureSize,
}

impl From<ShaderError> for RendererError {
    fn from(err: ShaderError) -> Self {
        RendererError::Shader(err)
    }
}

impl From<std::io::Error> for RendererError {
    fn from(err: std::io::Error) -> Self {
        RendererError::Thread(err)
    }
}

impl From<ResourceCacheError> for RendererError {
    fn from(err: ResourceCacheError) -> Self {
        RendererError::Resource(err)
    }
}

impl Renderer {
    /// Initializes WebRender and creates a `Renderer` and `RenderApiSender`.
    ///
    /// # Examples
    /// Initializes a `Renderer` with some reasonable values. For more information see
    /// [`RendererOptions`][rendereroptions].
    ///
    /// ```rust,ignore
    /// # use webrender::renderer::Renderer;
    /// # use std::path::PathBuf;
    /// let opts = webrender::RendererOptions {
    ///    device_pixel_ratio: 1.0,
    ///    resource_override_path: None,
    ///    enable_aa: false,
    /// };
    /// let (renderer, sender) = Renderer::new(opts);
    /// ```
    /// [rendereroptions]: struct.RendererOptions.html
    pub fn new(
        gl: Rc<gl::Gl>,
        notifier: Box<RenderNotifier>,
        mut options: RendererOptions,
        shaders: Option<&mut WrShaders>
    ) -> Result<(Self, RenderApiSender), RendererError> {
        HAS_BEEN_INITIALIZED.store(true, Ordering::SeqCst);

        let (api_tx, api_rx) = channel::msg_channel()?;
        let (payload_tx, payload_rx) = channel::payload_channel()?;
        let (result_tx, result_rx) = channel();
        let gl_type = gl.get_type();

        let debug_server = DebugServer::new(api_tx.clone());

        let mut device = Device::new(
            gl,
            options.resource_override_path.clone(),
            options.upload_method.clone(),
            options.cached_programs.take(),
            options.allow_pixel_local_storage_support,
        );

        let ext_dual_source_blending =
            !options.disable_dual_source_blending &&
            // If using pixel local storage, subpixel AA isn't supported (we disable it on all
            // mobile devices explicitly anyway).
            !device.get_capabilities().supports_pixel_local_storage &&
            device.supports_extension("GL_ARB_blend_func_extended") &&
            device.supports_extension("GL_ARB_explicit_attrib_location");

        // 512 is the minimum that the texture cache can work with.
        const MIN_TEXTURE_SIZE: i32 = 512;
        if let Some(user_limit) = options.max_texture_size {
            assert!(user_limit >= MIN_TEXTURE_SIZE);
            device.clamp_max_texture_size(user_limit);
        }
        if device.max_texture_size() < MIN_TEXTURE_SIZE {
            // Broken GL contexts can return a max texture size of zero (See #1260).
            // Better to gracefully fail now than panic as soon as a texture is allocated.
            error!(
                "Device reporting insufficient max texture size ({})",
                device.max_texture_size()
            );
            return Err(RendererError::MaxTextureSize);
        }
        let max_texture_size = device.max_texture_size();
        let max_texture_layers = device.max_texture_layers();

        register_thread_with_profiler("Compositor".to_owned());

        device.begin_frame();

        let shaders = match shaders {
            Some(shaders) => Rc::clone(&shaders.shaders),
            None => Rc::new(RefCell::new(Shaders::new(&mut device, gl_type, &options)?)),
        };

        let backend_profile_counters = BackendProfileCounters::new();

        let dither_matrix_texture = if options.enable_dithering {
            let dither_matrix: [u8; 64] = [
                00,
                48,
                12,
                60,
                03,
                51,
                15,
                63,
                32,
                16,
                44,
                28,
                35,
                19,
                47,
                31,
                08,
                56,
                04,
                52,
                11,
                59,
                07,
                55,
                40,
                24,
                36,
                20,
                43,
                27,
                39,
                23,
                02,
                50,
                14,
                62,
                01,
                49,
                13,
                61,
                34,
                18,
                46,
                30,
                33,
                17,
                45,
                29,
                10,
                58,
                06,
                54,
                09,
                57,
                05,
                53,
                42,
                26,
                38,
                22,
                41,
                25,
                37,
                21,
            ];

            let mut texture = device.create_texture(
                TextureTarget::Default,
                ImageFormat::R8,
                8,
                8,
                TextureFilter::Nearest,
                None,
                1,
            );
            device.upload_texture_immediate(&texture, &dither_matrix);

            Some(texture)
        } else {
            None
        };

        let x0 = 0.0;
        let y0 = 0.0;
        let x1 = 1.0;
        let y1 = 1.0;

        let quad_indices: [u16; 6] = [0, 1, 2, 2, 1, 3];
        let quad_vertices = [
            PackedVertex { pos: [x0, y0] },
            PackedVertex { pos: [x1, y0] },
            PackedVertex { pos: [x0, y1] },
            PackedVertex { pos: [x1, y1] },
        ];

        let prim_vao = device.create_vao(&desc::PRIM_INSTANCES);
        device.bind_vao(&prim_vao);
        device.update_vao_indices(&prim_vao, &quad_indices, VertexUsageHint::Static);
        device.update_vao_main_vertices(&prim_vao, &quad_vertices, VertexUsageHint::Static);

        let gpu_glyph_renderer = try!(GpuGlyphRenderer::new(&mut device,
                                                            &prim_vao,
                                                            options.precache_flags));

        let blur_vao = device.create_vao_with_new_instances(&desc::BLUR, &prim_vao);
        let clip_vao = device.create_vao_with_new_instances(&desc::CLIP, &prim_vao);
        let border_vao = device.create_vao_with_new_instances(&desc::BORDER, &prim_vao);
        let scale_vao = device.create_vao_with_new_instances(&desc::SCALE, &prim_vao);
        let line_vao = device.create_vao_with_new_instances(&desc::LINE, &prim_vao);
        let gradient_vao = device.create_vao_with_new_instances(&desc::GRADIENT, &prim_vao);
        let resolve_vao = device.create_vao_with_new_instances(&desc::RESOLVE, &prim_vao);
        let texture_cache_upload_pbo = device.create_pbo();

        let texture_resolver = TextureResolver::new(&mut device);

        let prim_header_f_texture = VertexDataTexture::new(&mut device, ImageFormat::RGBAF32);
        let prim_header_i_texture = VertexDataTexture::new(&mut device, ImageFormat::RGBAI32);
        let transforms_texture = VertexDataTexture::new(&mut device, ImageFormat::RGBAF32);
        let render_task_texture = VertexDataTexture::new(&mut device, ImageFormat::RGBAF32);

        let gpu_cache_texture = GpuCacheTexture::new(
            &mut device,
            options.scatter_gpu_cache_updates,
        )?;

        device.end_frame();

        let backend_notifier = notifier.clone();

        let default_font_render_mode = match (options.enable_aa, options.enable_subpixel_aa) {
            (true, true) => FontRenderMode::Subpixel,
            (true, false) => FontRenderMode::Alpha,
            (false, _) => FontRenderMode::Mono,
        };

        let config = FrameBuilderConfig {
            default_font_render_mode,
            dual_source_blending_is_enabled: true,
            dual_source_blending_is_supported: ext_dual_source_blending,
            chase_primitive: options.chase_primitive,
            enable_picture_caching: options.enable_picture_caching,
            testing: options.testing,
            gpu_supports_fast_clears: options.gpu_supports_fast_clears,
        };

        let device_pixel_ratio = options.device_pixel_ratio;
        let debug_flags = options.debug_flags;
        let payload_rx_for_backend = payload_rx.to_mpsc_receiver();
        let size_of_op = options.size_of_op;
        let enclosing_size_of_op = options.enclosing_size_of_op;
        let make_size_of_ops =
            move || size_of_op.map(|o| MallocSizeOfOps::new(o, enclosing_size_of_op));
        let recorder = options.recorder;
        let thread_listener = Arc::new(options.thread_listener);
        let thread_listener_for_rayon_start = thread_listener.clone();
        let thread_listener_for_rayon_end = thread_listener.clone();
        let workers = options
            .workers
            .take()
            .unwrap_or_else(|| {
                let worker = ThreadPoolBuilder::new()
                    .thread_name(|idx|{ format!("WRWorker#{}", idx) })
                    .start_handler(move |idx| {
                        register_thread_with_profiler(format!("WRWorker#{}", idx));
                        if let Some(ref thread_listener) = *thread_listener_for_rayon_start {
                            thread_listener.thread_started(&format!("WRWorker#{}", idx));
                        }
                    })
                    .exit_handler(move |idx| {
                        if let Some(ref thread_listener) = *thread_listener_for_rayon_end {
                            thread_listener.thread_stopped(&format!("WRWorker#{}", idx));
                        }
                    })
                    .build();
                Arc::new(worker.unwrap())
            });
        let sampler = options.sampler;
        let namespace_alloc_by_client = options.namespace_alloc_by_client;

        let blob_image_handler = options.blob_image_handler.take();
        let thread_listener_for_render_backend = thread_listener.clone();
        let thread_listener_for_scene_builder = thread_listener.clone();
        let thread_listener_for_lp_scene_builder = thread_listener.clone();
        let scene_builder_hooks = options.scene_builder_hooks;
        let rb_thread_name = format!("WRRenderBackend#{}", options.renderer_id.unwrap_or(0));
        let scene_thread_name = format!("WRSceneBuilder#{}", options.renderer_id.unwrap_or(0));
        let lp_scene_thread_name = format!("WRSceneBuilderLP#{}", options.renderer_id.unwrap_or(0));
        let glyph_rasterizer = GlyphRasterizer::new(workers)?;

        let (scene_builder, scene_tx, scene_rx) = SceneBuilder::new(
            config,
            api_tx.clone(),
            scene_builder_hooks,
            make_size_of_ops(),
        );
        thread::Builder::new().name(scene_thread_name.clone()).spawn(move || {
            register_thread_with_profiler(scene_thread_name.clone());
            if let Some(ref thread_listener) = *thread_listener_for_scene_builder {
                thread_listener.thread_started(&scene_thread_name);
            }

            let mut scene_builder = scene_builder;
            scene_builder.run();

            if let Some(ref thread_listener) = *thread_listener_for_scene_builder {
                thread_listener.thread_stopped(&scene_thread_name);
            }
        })?;

        let low_priority_scene_tx = if options.support_low_priority_transactions {
            let (low_priority_scene_tx, low_priority_scene_rx) = channel();
            let lp_builder = LowPrioritySceneBuilder {
                rx: low_priority_scene_rx,
                tx: scene_tx.clone(),
                simulate_slow_ms: 0,
            };

            thread::Builder::new().name(lp_scene_thread_name.clone()).spawn(move || {
                register_thread_with_profiler(lp_scene_thread_name.clone());
                if let Some(ref thread_listener) = *thread_listener_for_lp_scene_builder {
                    thread_listener.thread_started(&lp_scene_thread_name);
                }

                let mut scene_builder = lp_builder;
                scene_builder.run();

                if let Some(ref thread_listener) = *thread_listener_for_lp_scene_builder {
                    thread_listener.thread_stopped(&lp_scene_thread_name);
                }
            })?;

            low_priority_scene_tx
        } else {
            scene_tx.clone()
        };

        thread::Builder::new().name(rb_thread_name.clone()).spawn(move || {
            register_thread_with_profiler(rb_thread_name.clone());
            if let Some(ref thread_listener) = *thread_listener_for_render_backend {
                thread_listener.thread_started(&rb_thread_name);
            }

            let texture_cache = TextureCache::new(
                max_texture_size,
                max_texture_layers,
                TileCache::tile_dimensions(config.testing),
            );

            let resource_cache = ResourceCache::new(
                texture_cache,
                glyph_rasterizer,
                blob_image_handler,
            );

            let mut backend = RenderBackend::new(
                api_rx,
                payload_rx_for_backend,
                result_tx,
                scene_tx,
                low_priority_scene_tx,
                scene_rx,
                device_pixel_ratio,
                resource_cache,
                backend_notifier,
                config,
                recorder,
                sampler,
                make_size_of_ops(),
                debug_flags,
                namespace_alloc_by_client,
            );
            backend.run(backend_profile_counters);
            if let Some(ref thread_listener) = *thread_listener_for_render_backend {
                thread_listener.thread_stopped(&rb_thread_name);
            }
        })?;

        let ext_debug_marker = device.supports_extension("GL_EXT_debug_marker");
        let gpu_profile = GpuProfiler::new(Rc::clone(device.rc_gl()), ext_debug_marker);
        #[cfg(feature = "capture")]
        let read_fbo = device.create_fbo();

        let mut renderer = Renderer {
            result_rx,
            debug_server,
            device,
            active_documents: Vec::new(),
            pending_texture_updates: Vec::new(),
            pending_gpu_cache_updates: Vec::new(),
            pending_gpu_cache_clear: false,
            pending_shader_updates: Vec::new(),
            shaders,
            debug: LazyInitializedDebugRenderer::new(),
            debug_flags: DebugFlags::empty(),
            backend_profile_counters: BackendProfileCounters::new(),
            profile_counters: RendererProfileCounters::new(),
            resource_upload_time: 0,
            gpu_cache_upload_time: 0,
            profiler: Profiler::new(),
            new_frame_indicator: ChangeIndicator::new(),
            new_scene_indicator: ChangeIndicator::new(),
            slow_frame_indicator: ChangeIndicator::new(),
            max_recorded_profiles: options.max_recorded_profiles,
            clear_color: options.clear_color,
            enable_clear_scissor: options.enable_clear_scissor,
            last_time: 0,
            gpu_profile,
            gpu_glyph_renderer,
            vaos: RendererVAOs {
                prim_vao,
                blur_vao,
                clip_vao,
                border_vao,
                scale_vao,
                gradient_vao,
                resolve_vao,
                line_vao,
            },
            transforms_texture,
            prim_header_i_texture,
            prim_header_f_texture,
            render_task_texture,
            pipeline_info: PipelineInfo::default(),
            dither_matrix_texture,
            external_image_handler: None,
            output_image_handler: None,
            size_of_ops: make_size_of_ops(),
            output_targets: FastHashMap::default(),
            cpu_profiles: VecDeque::new(),
            gpu_profiles: VecDeque::new(),
            gpu_cache_texture,
            gpu_cache_debug_chunks: Vec::new(),
            gpu_cache_frame_id: FrameId::INVALID,
            gpu_cache_overflow: false,
            texture_cache_upload_pbo,
            texture_resolver,
            renderer_errors: Vec::new(),
            #[cfg(feature = "capture")]
            read_fbo,
            #[cfg(feature = "replay")]
            owned_external_images: FastHashMap::default(),
            notifications: Vec::new(),
            framebuffer_size: None,
            zoom_debug_texture: None,
            cursor_position: DeviceIntPoint::zero(),
        };

        // We initially set the flags to default and then now call set_debug_flags
        // to ensure any potential transition when enabling a flag is run.
        renderer.set_debug_flags(debug_flags);

        let sender = RenderApiSender::new(api_tx, payload_tx);
        Ok((renderer, sender))
    }

    pub fn framebuffer_size(&self) -> Option<FramebufferIntSize> {
        self.framebuffer_size
    }

    /// Update the current position of the debug cursor.
    pub fn set_cursor_position(
        &mut self,
        position: DeviceIntPoint,
    ) {
        self.cursor_position = position;
    }

    pub fn get_max_texture_size(&self) -> i32 {
        self.device.max_texture_size()
    }

    pub fn get_graphics_api_info(&self) -> GraphicsApiInfo {
        GraphicsApiInfo {
            kind: GraphicsApi::OpenGL,
            version: self.device.gl().get_string(gl::VERSION),
            renderer: self.device.gl().get_string(gl::RENDERER),
        }
    }

    pub fn flush_pipeline_info(&mut self) -> PipelineInfo {
        mem::replace(&mut self.pipeline_info, PipelineInfo::default())
    }

    // update the program cache with new binaries, e.g. when some of the lazy loaded
    // shader programs got activated in the mean time
    pub fn update_program_cache(&mut self, cached_programs: Rc<ProgramCache>) {
        self.device.update_program_cache(cached_programs);
    }

    /// Processes the result queue.
    ///
    /// Should be called before `render()`, as texture cache updates are done here.
    pub fn update(&mut self) {
        profile_scope!("update");
        // Pull any pending results and return the most recent.
        while let Ok(msg) = self.result_rx.try_recv() {
            match msg {
                ResultMsg::PublishPipelineInfo(mut pipeline_info) => {
                    for ((pipeline_id, document_id), epoch) in pipeline_info.epochs {
                        self.pipeline_info.epochs.insert((pipeline_id, document_id), epoch);
                    }
                    self.pipeline_info.removed_pipelines.extend(pipeline_info.removed_pipelines.drain(..));
                }
                ResultMsg::PublishDocument(
                    document_id,
                    mut doc,
                    texture_update_list,
                    profile_counters,
                ) => {
                    if doc.is_new_scene {
                        self.new_scene_indicator.changed();
                    }

                    // Add a new document to the active set, expressed as a `Vec` in order
                    // to re-order based on `DocumentLayer` during rendering.
                    match self.active_documents.iter().position(|&(id, _)| id == document_id) {
                        Some(pos) => {
                            // If the document we are replacing must be drawn
                            // (in order to update the texture cache), issue
                            // a render just to off-screen targets.
                            if self.active_documents[pos].1.frame.must_be_drawn() {
                                let framebuffer_size = self.framebuffer_size;
                                self.render_impl(framebuffer_size).ok();
                            }
                            self.active_documents[pos].1 = doc;
                        }
                        None => self.active_documents.push((document_id, doc)),
                    }

                    // IMPORTANT: The pending texture cache updates must be applied
                    //            *after* the previous frame has been rendered above
                    //            (if neceessary for a texture cache update). For
                    //            an example of why this is required:
                    //            1) Previous frame contains a render task that
                    //               targets Texture X.
                    //            2) New frame contains a texture cache update which
                    //               frees Texture X.
                    //            3) bad stuff happens.

                    //TODO: associate `document_id` with target window
                    self.pending_texture_updates.push(texture_update_list);
                    self.backend_profile_counters = profile_counters;
                }
                ResultMsg::UpdateGpuCache(mut list) => {
                    if list.clear {
                        self.pending_gpu_cache_clear = true;
                    }
                    if list.clear {
                        self.gpu_cache_debug_chunks = Vec::new();
                    }
                    for cmd in mem::replace(&mut list.debug_commands, Vec::new()) {
                        match cmd {
                            GpuCacheDebugCmd::Alloc(chunk) => {
                                let row = chunk.address.v as usize;
                                if row >= self.gpu_cache_debug_chunks.len() {
                                    self.gpu_cache_debug_chunks.resize(row + 1, Vec::new());
                                }
                                self.gpu_cache_debug_chunks[row].push(chunk);
                            },
                            GpuCacheDebugCmd::Free(address) => {
                                let chunks = &mut self.gpu_cache_debug_chunks[address.v as usize];
                                let pos = chunks.iter()
                                    .position(|x| x.address == address).unwrap();
                                chunks.remove(pos);
                            },
                        }
                    }
                    self.pending_gpu_cache_updates.push(list);
                }
                ResultMsg::UpdateResources {
                    updates,
                    memory_pressure,
                } => {
                    self.pending_texture_updates.push(updates);
                    self.device.begin_frame();

                    self.update_texture_cache();

                    // Flush the render target pool on memory pressure.
                    //
                    // This needs to be separate from the block below because
                    // the device module asserts if we delete textures while
                    // not in a frame.
                    if memory_pressure {
                        self.texture_resolver.retain_targets(&mut self.device, |_| false);
                    }

                    self.device.end_frame();
                    // If we receive a `PublishDocument` message followed by this one
                    // within the same update we need to cancel the frame because we
                    // might have deleted the resources in use in the frame due to a
                    // memory pressure event.
                    if memory_pressure {
                        self.active_documents.clear();
                    }
                }
                ResultMsg::AppendNotificationRequests(mut notifications) => {
                    if self.pending_texture_updates.is_empty() {
                        drain_filter(
                            &mut notifications,
                            |n| { n.when() == Checkpoint::FrameTexturesUpdated },
                            |n| { n.notify(); },
                        );
                    }
                    self.notifications.append(&mut notifications);
                }
                ResultMsg::RefreshShader(path) => {
                    self.pending_shader_updates.push(path);
                }
                ResultMsg::DebugOutput(output) => match output {
                    DebugOutput::FetchDocuments(string) |
                    DebugOutput::FetchClipScrollTree(string) => {
                        self.debug_server.send(string);
                    }
                    #[cfg(feature = "capture")]
                    DebugOutput::SaveCapture(config, deferred) => {
                        self.save_capture(config, deferred);
                    }
                    #[cfg(feature = "replay")]
                    DebugOutput::LoadCapture(root, plain_externals) => {
                        self.active_documents.clear();
                        self.load_capture(root, plain_externals);
                    }
                },
                ResultMsg::DebugCommand(command) => {
                    self.handle_debug_command(command);
                }
            }
        }
    }

    #[cfg(not(feature = "debugger"))]
    fn get_screenshot_for_debugger(&mut self) -> String {
        // Avoid unused param warning.
        let _ = &self.debug_server;
        String::new()
    }


    #[cfg(feature = "debugger")]
    fn get_screenshot_for_debugger(&mut self) -> String {
        use api::ImageDescriptor;

        let desc = ImageDescriptor::new(1024, 768, ImageFormat::BGRA8, true, false);
        let data = self.device.read_pixels(&desc);
        let screenshot = debug_server::Screenshot::new(desc.size, data);

        serde_json::to_string(&screenshot).unwrap()
    }

    #[cfg(not(feature = "debugger"))]
    fn get_passes_for_debugger(&self) -> String {
        // Avoid unused param warning.
        let _ = &self.debug_server;
        String::new()
    }

    #[cfg(feature = "debugger")]
    fn debug_alpha_target(target: &AlphaRenderTarget) -> debug_server::Target {
        let mut debug_target = debug_server::Target::new("A8");

        debug_target.add(
            debug_server::BatchKind::Cache,
            "Scalings",
            target.scalings.len(),
        );
        debug_target.add(
            debug_server::BatchKind::Cache,
            "Zero Clears",
            target.zero_clears.len(),
        );
        debug_target.add(
            debug_server::BatchKind::Cache,
            "One Clears",
            target.one_clears.len(),
        );
        debug_target.add(
            debug_server::BatchKind::Clip,
            "BoxShadows [p]",
            target.clip_batcher.primary_clips.box_shadows.len(),
        );
        debug_target.add(
            debug_server::BatchKind::Clip,
            "BoxShadows [s]",
            target.clip_batcher.secondary_clips.box_shadows.len(),
        );
        debug_target.add(
            debug_server::BatchKind::Cache,
            "Vertical Blur",
            target.vertical_blurs.len(),
        );
        debug_target.add(
            debug_server::BatchKind::Cache,
            "Horizontal Blur",
            target.horizontal_blurs.len(),
        );
        debug_target.add(
            debug_server::BatchKind::Clip,
            "Slow Rectangles [p]",
            target.clip_batcher.primary_clips.slow_rectangles.len(),
        );
        debug_target.add(
            debug_server::BatchKind::Clip,
            "Fast Rectangles [p]",
            target.clip_batcher.primary_clips.fast_rectangles.len(),
        );
        debug_target.add(
            debug_server::BatchKind::Clip,
            "Slow Rectangles [s]",
            target.clip_batcher.secondary_clips.slow_rectangles.len(),
        );
        debug_target.add(
            debug_server::BatchKind::Clip,
            "Fast Rectangles [s]",
            target.clip_batcher.secondary_clips.fast_rectangles.len(),
        );
        for (_, items) in target.clip_batcher.primary_clips.images.iter() {
            debug_target.add(debug_server::BatchKind::Clip, "Image mask [p]", items.len());
        }
        for (_, items) in target.clip_batcher.secondary_clips.images.iter() {
            debug_target.add(debug_server::BatchKind::Clip, "Image mask [s]", items.len());
        }

        debug_target
    }

    #[cfg(feature = "debugger")]
    fn debug_color_target(target: &ColorRenderTarget) -> debug_server::Target {
        let mut debug_target = debug_server::Target::new("RGBA8");

        debug_target.add(
            debug_server::BatchKind::Cache,
            "Scalings",
            target.scalings.len(),
        );
        debug_target.add(
            debug_server::BatchKind::Cache,
            "Readbacks",
            target.readbacks.len(),
        );
        debug_target.add(
            debug_server::BatchKind::Cache,
            "Vertical Blur",
            target.vertical_blurs.len(),
        );
        debug_target.add(
            debug_server::BatchKind::Cache,
            "Horizontal Blur",
            target.horizontal_blurs.len(),
        );

        for alpha_batch_container in &target.alpha_batch_containers {
            for batch in alpha_batch_container.opaque_batches.iter().rev() {
                debug_target.add(
                    debug_server::BatchKind::Opaque,
                    batch.key.kind.debug_name(),
                    batch.instances.len(),
                );
            }

            for batch in &alpha_batch_container.alpha_batches {
                debug_target.add(
                    debug_server::BatchKind::Alpha,
                    batch.key.kind.debug_name(),
                    batch.instances.len(),
                );
            }
        }

        debug_target
    }

    #[cfg(feature = "debugger")]
    fn debug_texture_cache_target(target: &TextureCacheRenderTarget) -> debug_server::Target {
        let mut debug_target = debug_server::Target::new("Texture Cache");

        debug_target.add(
            debug_server::BatchKind::Cache,
            "Horizontal Blur",
            target.horizontal_blurs.len(),
        );

        debug_target
    }

    #[cfg(feature = "debugger")]
    fn get_passes_for_debugger(&self) -> String {
        let mut debug_passes = debug_server::PassList::new();

        for &(_, ref render_doc) in &self.active_documents {
            for pass in &render_doc.frame.passes {
                let mut debug_targets = Vec::new();
                match pass.kind {
                    RenderPassKind::MainFramebuffer(ref target) => {
                        debug_targets.push(Self::debug_color_target(target));
                    }
                    RenderPassKind::OffScreen { ref alpha, ref color, ref texture_cache } => {
                        debug_targets.extend(alpha.targets.iter().map(Self::debug_alpha_target));
                        debug_targets.extend(color.targets.iter().map(Self::debug_color_target));
                        debug_targets.extend(texture_cache.iter().map(|(_, target)| Self::debug_texture_cache_target(target)))
                    }
                }

                debug_passes.add(debug_server::Pass { targets: debug_targets });
            }
        }

        serde_json::to_string(&debug_passes).unwrap()
    }

    #[cfg(not(feature = "debugger"))]
    fn get_render_tasks_for_debugger(&self) -> String {
        String::new()
    }

    #[cfg(feature = "debugger")]
    fn get_render_tasks_for_debugger(&self) -> String {
        let mut debug_root = debug_server::RenderTaskList::new();

        for &(_, ref render_doc) in &self.active_documents {
            let debug_node = debug_server::TreeNode::new("document render tasks");
            let mut builder = debug_server::TreeNodeBuilder::new(debug_node);

            let render_tasks = &render_doc.frame.render_tasks;
            match render_tasks.tasks.last() {
                Some(main_task) => main_task.print_with(&mut builder, render_tasks),
                None => continue,
            };

            debug_root.add(builder.build());
        }

        serde_json::to_string(&debug_root).unwrap()
    }

    fn handle_debug_command(&mut self, command: DebugCommand) {
        match command {
            DebugCommand::EnableDualSourceBlending(_) => {
                panic!("Should be handled by render backend");
            }
            DebugCommand::FetchDocuments |
            DebugCommand::FetchClipScrollTree => {}
            DebugCommand::FetchRenderTasks => {
                let json = self.get_render_tasks_for_debugger();
                self.debug_server.send(json);
            }
            DebugCommand::FetchPasses => {
                let json = self.get_passes_for_debugger();
                self.debug_server.send(json);
            }
            DebugCommand::FetchScreenshot => {
                let json = self.get_screenshot_for_debugger();
                self.debug_server.send(json);
            }
            DebugCommand::SaveCapture(..) |
            DebugCommand::LoadCapture(..) => {
                panic!("Capture commands are not welcome here! Did you build with 'capture' feature?")
            }
            DebugCommand::ClearCaches(_)
            | DebugCommand::SimulateLongSceneBuild(_)
            | DebugCommand::SimulateLongLowPrioritySceneBuild(_) => {}
            DebugCommand::InvalidateGpuCache => {
                match self.gpu_cache_texture.bus {
                    GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
                        info!("Invalidating GPU caches");
                        for row in rows {
                            row.is_dirty = true;
                        }
                    }
                    GpuCacheBus::Scatter { .. } => {
                        warn!("Unable to invalidate scattered GPU cache");
                    }
                }
            }
            DebugCommand::SetFlags(flags) => {
                self.set_debug_flags(flags);
            }
        }
    }

    /// Set a callback for handling external images.
    pub fn set_external_image_handler(&mut self, handler: Box<ExternalImageHandler>) {
        self.external_image_handler = Some(handler);
    }

    /// Set a callback for handling external outputs.
    pub fn set_output_image_handler(&mut self, handler: Box<OutputImageHandler>) {
        self.output_image_handler = Some(handler);
    }

    /// Retrieve (and clear) the current list of recorded frame profiles.
    pub fn get_frame_profiles(&mut self) -> (Vec<CpuProfile>, Vec<GpuProfile>) {
        let cpu_profiles = self.cpu_profiles.drain(..).collect();
        let gpu_profiles = self.gpu_profiles.drain(..).collect();
        (cpu_profiles, gpu_profiles)
    }

    pub fn notify_slow_frame(&mut self) {
        self.slow_frame_indicator.changed();
    }

    /// Renders the current frame.
    ///
    /// A Frame is supplied by calling [`generate_frame()`][webrender_api::Transaction::generate_frame].
    pub fn render(
        &mut self,
        framebuffer_size: FramebufferIntSize,
    ) -> Result<RenderResults, Vec<RendererError>> {
        self.framebuffer_size = Some(framebuffer_size);

        let result = self.render_impl(Some(framebuffer_size));

        drain_filter(
            &mut self.notifications,
            |n| { n.when() == Checkpoint::FrameRendered },
            |n| { n.notify(); },
        );

        // This is the end of the rendering pipeline. If some notifications are is still there,
        // just clear them and they will autimatically fire the Checkpoint::TransactionDropped
        // event. Otherwise they would just pile up in this vector forever.
        self.notifications.clear();

        result
    }

    // If framebuffer_size is None, don't render
    // to the main frame buffer. This is useful
    // to update texture cache render tasks but
    // avoid doing a full frame render.
    fn render_impl(
        &mut self,
        framebuffer_size: Option<FramebufferIntSize>,
    ) -> Result<RenderResults, Vec<RendererError>> {
        profile_scope!("render");
        let mut results = RenderResults::default();
        if self.active_documents.is_empty() {
            self.last_time = precise_time_ns();
            return Ok(results);
        }

        let mut frame_profiles = Vec::new();
        let mut profile_timers = RendererProfileTimers::new();

        // The texture resolver scope should be outside of any rendering, including
        // debug rendering. This ensures that when we return render targets to the
        // pool via glInvalidateFramebuffer, we don't do any debug rendering after
        // that point. Otherwise, the bind / invalidate / bind logic trips up the
        // render pass logic in tiled / mobile GPUs, resulting in an extra copy /
        // resolve step when the debug overlay is enabled.
        self.texture_resolver.begin_frame();

        let profile_samplers = {
            let _gm = self.gpu_profile.start_marker("build samples");
            // Block CPU waiting for last frame's GPU profiles to arrive.
            // In general this shouldn't block unless heavily GPU limited.
            let (gpu_frame_id, timers, samplers) = self.gpu_profile.build_samples();

            if self.max_recorded_profiles > 0 {
                while self.gpu_profiles.len() >= self.max_recorded_profiles {
                    self.gpu_profiles.pop_front();
                }
                self.gpu_profiles
                    .push_back(GpuProfile::new(gpu_frame_id, &timers));
            }
            profile_timers.gpu_samples = timers;
            samplers
        };


        let cpu_frame_id = profile_timers.cpu_time.profile(|| {
            let _gm = self.gpu_profile.start_marker("begin frame");
            let frame_id = self.device.begin_frame();
            self.gpu_profile.begin_frame(frame_id);

            self.device.disable_scissor();
            self.device.disable_depth();
            self.set_blend(false, FramebufferKind::Main);
            //self.update_shaders();

            self.update_texture_cache();

            frame_id
        });

        profile_timers.cpu_time.profile(|| {
            //Note: another borrowck dance
            let mut active_documents = mem::replace(&mut self.active_documents, Vec::default());
            // sort by the document layer id
            active_documents.sort_by_key(|&(_, ref render_doc)| render_doc.frame.layer);

            #[cfg(feature = "replay")]
            self.texture_resolver.external_images.extend(
                self.owned_external_images.iter().map(|(key, value)| (*key, value.clone()))
            );

            let last_document_index = active_documents.len() - 1;
            for (doc_index, (_, RenderedDocument { ref mut frame, .. })) in active_documents.iter_mut().enumerate() {
                frame.profile_counters.reset_targets();
                self.prepare_gpu_cache(frame);
                assert!(frame.gpu_cache_frame_id <= self.gpu_cache_frame_id,
                    "Received frame depends on a later GPU cache epoch ({:?}) than one we received last via `UpdateGpuCache` ({:?})",
                    frame.gpu_cache_frame_id, self.gpu_cache_frame_id);

                self.draw_tile_frame(
                    frame,
                    framebuffer_size,
                    cpu_frame_id,
                    &mut results.stats,
                    doc_index == 0,
                );

                if let Some(_) = framebuffer_size {
                    self.draw_frame_debug_items(&frame.debug_items);
                }
                if self.debug_flags.contains(DebugFlags::PROFILER_DBG) {
                    frame_profiles.push(frame.profile_counters.clone());
                }

                let dirty_regions =
                    mem::replace(&mut frame.recorded_dirty_regions, Vec::new());
                results.recorded_dirty_regions.extend(dirty_regions);

                // If we're the last document, don't call end_pass here, because we'll
                // be moving on to drawing the debug overlays. See the comment above
                // the end_pass call in draw_tile_frame about debug draw overlays
                // for a bit more context.
                if doc_index != last_document_index {
                    self.texture_resolver.end_pass(&mut self.device, None, None);
                }
            }

            self.unlock_external_images();
            self.active_documents = active_documents;
        });

        if let Some(framebuffer_size) = framebuffer_size {
            self.draw_render_target_debug(framebuffer_size);
            self.draw_texture_cache_debug(framebuffer_size);
            self.draw_gpu_cache_debug(framebuffer_size);
            self.draw_zoom_debug(framebuffer_size);
            self.draw_epoch_debug();
        }

        let current_time = precise_time_ns();
        if framebuffer_size.is_some() {
            let ns = current_time - self.last_time;
            self.profile_counters.frame_time.set(ns);
        }

        if self.max_recorded_profiles > 0 {
            while self.cpu_profiles.len() >= self.max_recorded_profiles {
                self.cpu_profiles.pop_front();
            }
            let cpu_profile = CpuProfile::new(
                cpu_frame_id,
                self.backend_profile_counters.total_time.get(),
                profile_timers.cpu_time.get(),
                self.profile_counters.draw_calls.get(),
            );
            self.cpu_profiles.push_back(cpu_profile);
        }

        if self.debug_flags.contains(DebugFlags::PROFILER_DBG) {
            if let Some(framebuffer_size) = framebuffer_size {
                //TODO: take device/pixel ratio into equation?
                if let Some(debug_renderer) = self.debug.get_mut(&mut self.device) {
                    let screen_fraction = 1.0 / framebuffer_size.to_f32().area();
                    self.profiler.draw_profile(
                        &frame_profiles,
                        &self.backend_profile_counters,
                        &self.profile_counters,
                        &mut profile_timers,
                        &profile_samplers,
                        screen_fraction,
                        debug_renderer,
                        self.debug_flags.contains(DebugFlags::COMPACT_PROFILER),
                    );
                }
            }
        }

        let mut x = 0.0;
        if self.debug_flags.contains(DebugFlags::NEW_FRAME_INDICATOR) {
            if let Some(debug_renderer) = self.debug.get_mut(&mut self.device) {
                self.new_frame_indicator.changed();
                self.new_frame_indicator.draw(
                    x, 0.0,
                    ColorU::new(0, 110, 220, 255),
                    debug_renderer,
                );
                x += ChangeIndicator::width();
            }
        }

        if self.debug_flags.contains(DebugFlags::NEW_SCENE_INDICATOR) {
            if let Some(debug_renderer) = self.debug.get_mut(&mut self.device) {
                self.new_scene_indicator.draw(
                    x, 0.0,
                    ColorU::new(0, 220, 110, 255),
                    debug_renderer,
                );
                x += ChangeIndicator::width();
            }
        }

        if self.debug_flags.contains(DebugFlags::SLOW_FRAME_INDICATOR) {
            if let Some(debug_renderer) = self.debug.get_mut(&mut self.device) {
                self.slow_frame_indicator.draw(
                    x, 0.0,
                    ColorU::new(220, 30, 10, 255),
                    debug_renderer,
                );
            }
        }

        if self.debug_flags.contains(DebugFlags::ECHO_DRIVER_MESSAGES) {
            Device::echo_driver_messages(self.device.gl());
        }

        results.stats.texture_upload_kb = self.profile_counters.texture_data_uploaded.get();
        self.backend_profile_counters.reset();
        self.profile_counters.reset();
        self.profile_counters.frame_counter.inc();
        results.stats.resource_upload_time = self.resource_upload_time;
        self.resource_upload_time = 0;
        results.stats.gpu_cache_upload_time = self.gpu_cache_upload_time;
        self.gpu_cache_upload_time = 0;

        profile_timers.cpu_time.profile(|| {
            let _gm = self.gpu_profile.start_marker("end frame");
            self.gpu_profile.end_frame();
            if let Some(debug_renderer) = self.debug.try_get_mut() {
                let small_screen = self.debug_flags.contains(DebugFlags::SMALL_SCREEN);
                let scale = if small_screen { 1.6 } else { 1.0 };
                debug_renderer.render(&mut self.device, framebuffer_size, scale);
            }
            // See comment for texture_resolver.begin_frame() for explanation
            // of why this must be done after all rendering, including debug
            // overlays. The end_frame() call implicitly calls end_pass(), which
            // should ensure any left over render targets get invalidated and
            // returned to the pool correctly.
            self.texture_resolver.end_frame(&mut self.device, cpu_frame_id);
            self.device.end_frame();
        });

        if framebuffer_size.is_some() {
            self.last_time = current_time;
        }

        if self.renderer_errors.is_empty() {
            Ok(results)
        } else {
            Err(mem::replace(&mut self.renderer_errors, Vec::new()))
        }
    }

    fn update_gpu_cache(&mut self) {
        let _gm = self.gpu_profile.start_marker("gpu cache update");

        // For an artificial stress test of GPU cache resizing,
        // always pass an extra update list with at least one block in it.
        let gpu_cache_height = self.gpu_cache_texture.get_height();
        if gpu_cache_height != 0 && GPU_CACHE_RESIZE_TEST {
            self.pending_gpu_cache_updates.push(GpuCacheUpdateList {
                frame_id: FrameId::INVALID,
                clear: false,
                height: gpu_cache_height,
                blocks: vec![[1f32; 4].into()],
                updates: Vec::new(),
                debug_commands: Vec::new(),
            });
        }

        let (updated_blocks, max_requested_height) = self
            .pending_gpu_cache_updates
            .iter()
            .fold((0, gpu_cache_height), |(count, height), list| {
                (count + list.blocks.len(), cmp::max(height, list.height))
            });

        if max_requested_height > self.get_max_texture_size() && !self.gpu_cache_overflow {
            self.gpu_cache_overflow = true;
            self.renderer_errors.push(RendererError::MaxTextureSize);
        }

        // Note: if we decide to switch to scatter-style GPU cache update
        // permanently, we can have this code nicer with `BufferUploader` kind
        // of helper, similarly to how `TextureUploader` API is used.
        self.gpu_cache_texture.prepare_for_updates(
            &mut self.device,
            updated_blocks,
            max_requested_height,
        );

        for update_list in self.pending_gpu_cache_updates.drain(..) {
            assert!(update_list.height <= max_requested_height);
            if update_list.frame_id > self.gpu_cache_frame_id {
                self.gpu_cache_frame_id = update_list.frame_id
            }
            self.gpu_cache_texture
                .update(&mut self.device, &update_list);
        }

        let mut upload_time = TimeProfileCounter::new("GPU cache upload time", false);
        let updated_rows = upload_time.profile(|| {
            return self.gpu_cache_texture.flush(&mut self.device);
        });
        self.gpu_cache_upload_time += upload_time.get();

        let counters = &mut self.backend_profile_counters.resources.gpu_cache;
        counters.updated_rows.set(updated_rows);
        counters.updated_blocks.set(updated_blocks);
    }

    fn prepare_gpu_cache(&mut self, frame: &Frame) {
        if self.pending_gpu_cache_clear {
            let use_scatter =
                matches!(self.gpu_cache_texture.bus, GpuCacheBus::Scatter { .. });
            let new_cache = GpuCacheTexture::new(&mut self.device, use_scatter).unwrap();
            let old_cache = mem::replace(&mut self.gpu_cache_texture, new_cache);
            old_cache.deinit(&mut self.device);
            self.pending_gpu_cache_clear = false;
        }

        let deferred_update_list = self.update_deferred_resolves(&frame.deferred_resolves);
        self.pending_gpu_cache_updates.extend(deferred_update_list);

        self.update_gpu_cache();

        // Note: the texture might have changed during the `update`,
        // so we need to bind it here.
        self.device.bind_texture(
            TextureSampler::GpuCache,
            self.gpu_cache_texture.texture.as_ref().unwrap(),
        );
    }

    fn update_texture_cache(&mut self) {
        let _gm = self.gpu_profile.start_marker("texture cache update");
        let mut pending_texture_updates = mem::replace(&mut self.pending_texture_updates, vec![]);

        let mut upload_time = TimeProfileCounter::new("Resource upload time", false);
        upload_time.profile(|| {
            for update_list in pending_texture_updates.drain(..) {
                for allocation in update_list.allocations {
                    let is_realloc = matches!(allocation.kind, TextureCacheAllocationKind::Realloc(..));
                    match allocation.kind {
                        TextureCacheAllocationKind::Alloc(info) |
                        TextureCacheAllocationKind::Realloc(info) => {
                            // Create a new native texture, as requested by the texture cache.
                            //
                            // Ensure no PBO is bound when creating the texture storage,
                            // or GL will attempt to read data from there.
                            let mut texture = self.device.create_texture(
                                TextureTarget::Array,
                                info.format,
                                info.width,
                                info.height,
                                info.filter,
                                // This needs to be a render target because some render
                                // tasks get rendered into the texture cache.
                                Some(RenderTargetInfo { has_depth: false }),
                                info.layer_count,
                            );

                            if info.is_shared_cache {
                                texture.flags_mut()
                                    .insert(TextureFlags::IS_SHARED_TEXTURE_CACHE);

                                // Textures in the cache generally don't need to be cleared,
                                // but we do so if the debug display is active to make it
                                // easier to identify unallocated regions.
                                if self.debug_flags.contains(DebugFlags::TEXTURE_CACHE_DBG) {
                                    self.clear_texture(&texture, TEXTURE_CACHE_DBG_CLEAR_COLOR);
                                }
                            }

                            let old = self.texture_resolver.texture_cache_map.insert(allocation.id, texture);
                            assert_eq!(old.is_some(), is_realloc, "Renderer and RenderBackend disagree");
                            if let Some(old) = old {
                                self.device.blit_renderable_texture(
                                    self.texture_resolver.texture_cache_map.get_mut(&allocation.id).unwrap(),
                                    &old
                                );
                                self.device.delete_texture(old);
                            }
                        },
                        TextureCacheAllocationKind::Free => {
                            let texture = self.texture_resolver.texture_cache_map.remove(&allocation.id).unwrap();
                            self.device.delete_texture(texture);
                        },
                    }
                }

                for update in update_list.updates {
                    let TextureCacheUpdate { id, rect, stride, offset, layer_index, source } = update;
                    let texture = &self.texture_resolver.texture_cache_map[&id];

                    let bytes_uploaded = match source {
                        TextureUpdateSource::Bytes { data } => {
                            let mut uploader = self.device.upload_texture(
                                texture,
                                &self.texture_cache_upload_pbo,
                                0,
                            );
                            uploader.upload(
                                rect, layer_index, stride,
                                &data[offset as usize ..],
                            )
                        }
                        TextureUpdateSource::External { id, channel_index } => {
                            let mut uploader = self.device.upload_texture(
                                texture,
                                &self.texture_cache_upload_pbo,
                                0,
                            );
                            let handler = self.external_image_handler
                                .as_mut()
                                .expect("Found external image, but no handler set!");
                            // The filter is only relevant for NativeTexture external images.
                            let size = match handler.lock(id, channel_index, ImageRendering::Auto).source {
                                ExternalImageSource::RawData(data) => {
                                    uploader.upload(
                                        rect, layer_index, stride,
                                        &data[offset as usize ..],
                                    )
                                }
                                ExternalImageSource::Invalid => {
                                    // Create a local buffer to fill the pbo.
                                    let bpp = texture.get_format().bytes_per_pixel();
                                    let width = stride.unwrap_or(rect.size.width * bpp);
                                    let total_size = width * rect.size.height;
                                    // WR haven't support RGBAF32 format in texture_cache, so
                                    // we use u8 type here.
                                    let dummy_data: Vec<u8> = vec![255; total_size as usize];
                                    uploader.upload(rect, layer_index, stride, &dummy_data)
                                }
                                ExternalImageSource::NativeTexture(eid) => {
                                    panic!("Unexpected external texture {:?} for the texture cache update of {:?}", eid, id);
                                }
                            };
                            handler.unlock(id, channel_index);
                            size
                        }
                        TextureUpdateSource::DebugClear => {
                            let draw_target = DrawTarget::Texture {
                                texture,
                                layer: layer_index as usize,
                                with_depth: false,
                            };
                            self.device.bind_draw_target(draw_target);
                            self.device.clear_target(
                                Some(TEXTURE_CACHE_DBG_CLEAR_COLOR),
                                None,
                                Some(draw_target.to_framebuffer_rect(rect.to_i32()))
                            );
                            0
                        }
                    };
                    self.profile_counters.texture_data_uploaded.add(bytes_uploaded >> 10);
                }
            }

            drain_filter(
                &mut self.notifications,
                |n| { n.when() == Checkpoint::FrameTexturesUpdated },
                |n| { n.notify(); },
            );
        });
        self.resource_upload_time += upload_time.get();
    }

    pub(crate) fn draw_instanced_batch<T>(
        &mut self,
        data: &[T],
        vertex_array_kind: VertexArrayKind,
        textures: &BatchTextures,
        stats: &mut RendererStats,
    ) {
        for i in 0 .. textures.colors.len() {
            self.texture_resolver.bind(
                &textures.colors[i],
                TextureSampler::color(i),
                &mut self.device,
            );
        }

        // TODO: this probably isn't the best place for this.
        if let Some(ref texture) = self.dither_matrix_texture {
            self.device.bind_texture(TextureSampler::Dither, texture);
        }

        self.draw_instanced_batch_with_previously_bound_textures(data, vertex_array_kind, stats)
    }

    pub(crate) fn draw_instanced_batch_with_previously_bound_textures<T>(
        &mut self,
        data: &[T],
        vertex_array_kind: VertexArrayKind,
        stats: &mut RendererStats,
    ) {
        // If we end up with an empty draw call here, that means we have
        // probably introduced unnecessary batch breaks during frame
        // building - so we should be catching this earlier and removing
        // the batch.
        debug_assert!(!data.is_empty());

        let vao = get_vao(vertex_array_kind, &self.vaos, &self.gpu_glyph_renderer);

        self.device.bind_vao(vao);

        let batched = !self.debug_flags.contains(DebugFlags::DISABLE_BATCHING);

        if batched {
            self.device
                .update_vao_instances(vao, data, VertexUsageHint::Stream);
            self.device
                .draw_indexed_triangles_instanced_u16(6, data.len() as i32);
            self.profile_counters.draw_calls.inc();
            stats.total_draw_calls += 1;
        } else {
            for i in 0 .. data.len() {
                self.device
                    .update_vao_instances(vao, &data[i .. i + 1], VertexUsageHint::Stream);
                self.device.draw_triangles_u16(0, 6);
                self.profile_counters.draw_calls.inc();
                stats.total_draw_calls += 1;
            }
        }

        self.profile_counters.vertices.add(6 * data.len());
    }

    fn handle_readback_composite(
        &mut self,
        draw_target: DrawTarget,
        uses_scissor: bool,
        source: &RenderTask,
        backdrop: &RenderTask,
        readback: &RenderTask,
    ) {
        if uses_scissor {
            self.device.disable_scissor();
        }

        let cache_texture = self.texture_resolver
            .resolve(&TextureSource::PrevPassColor)
            .unwrap();

        // Before submitting the composite batch, do the
        // framebuffer readbacks that are needed for each
        // composite operation in this batch.
        let (readback_rect, readback_layer) = readback.get_target_rect();
        let (backdrop_rect, _) = backdrop.get_target_rect();
        let backdrop_screen_origin = match backdrop.kind {
            RenderTaskKind::Picture(ref task_info) => task_info.content_origin,
            _ => panic!("bug: composite on non-picture?"),
        };
        let source_screen_origin = match source.kind {
            RenderTaskKind::Picture(ref task_info) => task_info.content_origin,
            _ => panic!("bug: composite on non-picture?"),
        };

        // Bind the FBO to blit the backdrop to.
        // Called per-instance in case the layer (and therefore FBO)
        // changes. The device will skip the GL call if the requested
        // target is already bound.
        let cache_draw_target = DrawTarget::Texture {
            texture: cache_texture,
            layer: readback_layer.0 as usize,
            with_depth: false,
        };

        let mut src = DeviceIntRect::new(
            source_screen_origin + (backdrop_rect.origin - backdrop_screen_origin),
            readback_rect.size,
        );
        let mut dest = readback_rect.to_i32();
        let device_to_framebuffer = TypedScale::new(1i32);

        // Need to invert the y coordinates and flip the image vertically when
        // reading back from the framebuffer.
        if draw_target.is_default() {
            src.origin.y = draw_target.dimensions().height as i32 - src.size.height - src.origin.y;
            dest.origin.y += dest.size.height;
            dest.size.height = -dest.size.height;
        }

        self.device.blit_render_target(
            draw_target.into(),
            src * device_to_framebuffer,
            cache_draw_target,
            dest * device_to_framebuffer,
            TextureFilter::Linear,
        );

        // Restore draw target to current pass render target + layer, and reset
        // the read target.
        self.device.bind_draw_target(draw_target);
        self.device.reset_read_target();

        if uses_scissor {
            self.device.enable_scissor();
        }
    }

    //TODO: make this nicer. Currently we can't accept `&mut self` because the `DrawTarget` parameter
    // needs to borrow self.texture_resolver
    fn handle_blits(
        gpu_profile: &mut GpuProfiler<GpuProfileTag>,
        device: &mut Device,
        texture_resolver: &TextureResolver,
        blits: &[BlitJob],
        render_tasks: &RenderTaskTree,
        draw_target: DrawTarget,
        content_origin: &DeviceIntPoint,
    ) {
        if blits.is_empty() {
            return;
        }

        let _timer = gpu_profile.start_timer(GPU_TAG_BLIT);

        // TODO(gw): For now, we don't bother batching these by source texture.
        //           If if ever shows up as an issue, we can easily batch them.
        for blit in blits {
            let (source, layer, source_rect) = match blit.source {
                BlitJobSource::Texture(texture_id, layer, source_rect) => {
                    // A blit from a texture into this target.
                    (texture_id, layer as usize, source_rect)
                }
                BlitJobSource::RenderTask(task_id) => {
                    // A blit from the child render task into this target.
                    // TODO(gw): Support R8 format here once we start
                    //           creating mips for alpha masks.
                    let source = &render_tasks[task_id];
                    let (source_rect, layer) = source.get_target_rect();
                    (TextureSource::PrevPassColor, layer.0, source_rect)
                }
            };
            debug_assert_eq!(source_rect.size, blit.target_rect.size);
            let texture = texture_resolver
                .resolve(&source)
                .expect("BUG: invalid source texture");
            let read_target = DrawTarget::Texture { texture, layer, with_depth: false };

            device.blit_render_target(
                read_target.into(),
                read_target.to_framebuffer_rect(source_rect),
                draw_target,
                draw_target.to_framebuffer_rect(blit.target_rect.translate(&-content_origin.to_vector())),
                TextureFilter::Linear,
            );
        }
    }

    fn handle_scaling(
        &mut self,
        scalings: &[ScalingInstance],
        source: TextureSource,
        projection: &Transform3D<f32>,
        stats: &mut RendererStats,
    ) {
        if scalings.is_empty() {
            return
        }

        let _timer = self.gpu_profile.start_timer(GPU_TAG_SCALE);

        match source {
            TextureSource::PrevPassColor => {
                self.shaders.borrow_mut().cs_scale_rgba8.bind(&mut self.device,
                                                              &projection,
                                                              &mut self.renderer_errors);
            }
            TextureSource::PrevPassAlpha => {
                self.shaders.borrow_mut().cs_scale_a8.bind(&mut self.device,
                                                           &projection,
                                                           &mut self.renderer_errors);
            }
            _ => unreachable!(),
        }

        self.draw_instanced_batch(
            &scalings,
            VertexArrayKind::Scale,
            &BatchTextures::no_texture(),
            stats,
        );
    }

    fn draw_color_target(
        &mut self,
        draw_target: DrawTarget,
        target: &ColorRenderTarget,
        content_origin: DeviceIntPoint,
        clear_color: Option<[f32; 4]>,
        clear_depth: Option<f32>,
        render_tasks: &RenderTaskTree,
        projection: &Transform3D<f32>,
        frame_id: GpuFrameId,
        stats: &mut RendererStats,
    ) {
        self.profile_counters.color_targets.inc();
        let _gm = self.gpu_profile.start_marker("color target");

        // sanity check for the depth buffer
        if let DrawTarget::Texture { texture, .. } = draw_target {
            assert!(texture.supports_depth() >= target.needs_depth());
        }

        let framebuffer_kind = if draw_target.is_default() {
            FramebufferKind::Main
        } else {
            FramebufferKind::Other
        };

        {
            let _timer = self.gpu_profile.start_timer(GPU_TAG_SETUP_TARGET);
            self.device.bind_draw_target(draw_target);
            self.device.disable_depth();
            self.set_blend(false, framebuffer_kind);

            if clear_depth.is_some() {
                self.device.enable_depth_write();
            }

            let clear_rect = match draw_target {
                DrawTarget::Default { rect, total_size } if rect.origin == FramebufferIntPoint::zero() && rect.size == total_size => {
                    // whole screen is covered, no need for scissor
                    None
                }
                DrawTarget::Default { rect, .. } => {
                    Some(rect)
                }
                DrawTarget::Texture { .. } if self.enable_clear_scissor => {
                    // TODO(gw): Applying a scissor rect and minimal clear here
                    // is a very large performance win on the Intel and nVidia
                    // GPUs that I have tested with. It's possible it may be a
                    // performance penalty on other GPU types - we should test this
                    // and consider different code paths.
                    //
                    // Note: The above measurements were taken when render
                    // target slices were minimum 2048x2048. Now that we size
                    // them adaptively, this may be less of a win (except perhaps
                    // on a mostly-unused last slice of a large texture array).
                    Some(draw_target.to_framebuffer_rect(target.used_rect()))
                }
                DrawTarget::Texture { .. } | DrawTarget::External { .. } => {
                    None
                }
            };

            self.device.clear_target(
                clear_color,
                clear_depth,
                clear_rect,
            );

            if clear_depth.is_some() {
                self.device.disable_depth_write();
            }
        }

        // Handle any blits from the texture cache to this target.
        Self::handle_blits(
            &mut self.gpu_profile, &mut self.device, &self.texture_resolver,
            &target.blits, render_tasks, draw_target, &content_origin,
        );

        // Draw any blurs for this target.
        // Blurs are rendered as a standard 2-pass
        // separable implementation.
        // TODO(gw): In the future, consider having
        //           fast path blur shaders for common
        //           blur radii with fixed weights.
        if !target.vertical_blurs.is_empty() || !target.horizontal_blurs.is_empty() {
            let _timer = self.gpu_profile.start_timer(GPU_TAG_BLUR);

            self.set_blend(false, framebuffer_kind);
            self.shaders.borrow_mut().cs_blur_rgba8
                .bind(&mut self.device, projection, &mut self.renderer_errors);

            if !target.vertical_blurs.is_empty() {
                self.draw_instanced_batch(
                    &target.vertical_blurs,
                    VertexArrayKind::Blur,
                    &BatchTextures::no_texture(),
                    stats,
                );
            }

            if !target.horizontal_blurs.is_empty() {
                self.draw_instanced_batch(
                    &target.horizontal_blurs,
                    VertexArrayKind::Blur,
                    &BatchTextures::no_texture(),
                    stats,
                );
            }
        }

        self.handle_scaling(
            &target.scalings,
            TextureSource::PrevPassColor,
            projection,
            stats,
        );

        // Small helper fn to iterate a regions list, also invoking the closure
        // if there are no regions.
        fn iterate_regions<F>(
            regions: &[DeviceIntRect],
            mut f: F,
        ) where F: FnMut(Option<DeviceIntRect>) {
            if regions.is_empty() {
                f(None)
            } else {
                for region in regions {
                    f(Some(*region))
                }
            }
        }

        for alpha_batch_container in &target.alpha_batch_containers {
            let uses_scissor = alpha_batch_container.task_scissor_rect.is_some() ||
                               !alpha_batch_container.regions.is_empty();

            if uses_scissor {
                self.device.enable_scissor();
                let scissor_rect = draw_target.build_scissor_rect(
                    alpha_batch_container.task_scissor_rect,
                    content_origin,
                );
                self.device.set_scissor_rect(scissor_rect)
            }

            if !alpha_batch_container.opaque_batches.is_empty() {
                let _gl = self.gpu_profile.start_marker("opaque batches");
                let opaque_sampler = self.gpu_profile.start_sampler(GPU_SAMPLER_TAG_OPAQUE);
                self.set_blend(false, framebuffer_kind);
                //Note: depth equality is needed for split planes
                self.device.set_depth_func(DepthFunction::LessEqual);
                self.device.enable_depth();
                self.device.enable_depth_write();

                // Draw opaque batches front-to-back for maximum
                // z-buffer efficiency!
                for batch in alpha_batch_container
                    .opaque_batches
                    .iter()
                    .rev()
                {
                    self.shaders.borrow_mut()
                        .get(&batch.key, self.debug_flags)
                        .bind(
                            &mut self.device, projection,
                            &mut self.renderer_errors,
                        );

                    let _timer = self.gpu_profile.start_timer(batch.key.kind.sampler_tag());

                    iterate_regions(
                        &alpha_batch_container.regions,
                        |region| {
                            if let Some(region) = region {
                                let scissor_rect = draw_target.build_scissor_rect(
                                    Some(region),
                                    content_origin,
                                );
                                self.device.set_scissor_rect(scissor_rect);
                            }

                            self.draw_instanced_batch(
                                &batch.instances,
                                VertexArrayKind::Primitive,
                                &batch.key.textures,
                                stats
                            );
                        }
                    );
                }

                self.device.disable_depth_write();
                self.gpu_profile.finish_sampler(opaque_sampler);
            }

            if !alpha_batch_container.alpha_batches.is_empty() {
                let _gl = self.gpu_profile.start_marker("alpha batches");
                let transparent_sampler = self.gpu_profile.start_sampler(GPU_SAMPLER_TAG_TRANSPARENT);
                self.set_blend(true, framebuffer_kind);
                let mut prev_blend_mode = BlendMode::None;

                // If the device supports pixel local storage, initialize the PLS buffer for
                // the transparent pass. This involves reading the current framebuffer value
                // and storing that in PLS.
                // TODO(gw): This is quite expensive and relies on framebuffer fetch being
                //           available. We can probably switch the opaque pass over to use
                //           PLS too, and remove this pass completely.
                if self.device.get_capabilities().supports_pixel_local_storage {
                    // TODO(gw): If using PLS, the fixed function blender is disabled. It's possible
                    //           we could take advantage of this by skipping batching on the blend
                    //           mode in these cases.
                    self.init_pixel_local_storage(
                        alpha_batch_container.task_rect,
                        projection,
                        stats,
                    );
                }

                for batch in &alpha_batch_container.alpha_batches {
                    self.shaders.borrow_mut()
                        .get(&batch.key, self.debug_flags)
                        .bind(
                            &mut self.device, projection,
                            &mut self.renderer_errors,
                        );

                    if batch.key.blend_mode != prev_blend_mode {
                        match batch.key.blend_mode {
                            _ if self.debug_flags.contains(DebugFlags::SHOW_OVERDRAW) &&
                                    framebuffer_kind == FramebufferKind::Main => {
                                self.device.set_blend_mode_show_overdraw();
                            }
                            BlendMode::None => {
                                unreachable!("bug: opaque blend in alpha pass");
                            }
                            BlendMode::Alpha => {
                                self.device.set_blend_mode_alpha();
                            }
                            BlendMode::PremultipliedAlpha => {
                                self.device.set_blend_mode_premultiplied_alpha();
                            }
                            BlendMode::PremultipliedDestOut => {
                                self.device.set_blend_mode_premultiplied_dest_out();
                            }
                            BlendMode::SubpixelDualSource => {
                                self.device.set_blend_mode_subpixel_dual_source();
                            }
                            BlendMode::SubpixelConstantTextColor(color) => {
                                self.device.set_blend_mode_subpixel_constant_text_color(color);
                            }
                            BlendMode::SubpixelWithBgColor => {
                                // Using the three pass "component alpha with font smoothing
                                // background color" rendering technique:
                                //
                                // /webrender/doc/text-rendering.md
                                //
                                self.device.set_blend_mode_subpixel_with_bg_color_pass0();
                                self.device.switch_mode(ShaderColorMode::SubpixelWithBgColorPass0 as _);
                            }
                        }
                        prev_blend_mode = batch.key.blend_mode;
                    }

                    // Handle special case readback for composites.
                    if let BatchKind::Brush(BrushBatchKind::MixBlend { task_id, source_id, backdrop_id }) = batch.key.kind {
                        // composites can't be grouped together because
                        // they may overlap and affect each other.
                        debug_assert_eq!(batch.instances.len(), 1);
                        self.handle_readback_composite(
                            draw_target,
                            uses_scissor,
                            &render_tasks[source_id],
                            &render_tasks[task_id],
                            &render_tasks[backdrop_id],
                        );
                    }

                    let _timer = self.gpu_profile.start_timer(batch.key.kind.sampler_tag());

                    iterate_regions(
                        &alpha_batch_container.regions,
                        |region| {
                            if let Some(region) = region {
                                let scissor_rect = draw_target.build_scissor_rect(
                                    Some(region),
                                    content_origin,
                                );
                                self.device.set_scissor_rect(scissor_rect);
                            }

                            self.draw_instanced_batch(
                                &batch.instances,
                                VertexArrayKind::Primitive,
                                &batch.key.textures,
                                stats
                            );

                            if batch.key.blend_mode == BlendMode::SubpixelWithBgColor {
                                self.set_blend_mode_subpixel_with_bg_color_pass1(framebuffer_kind);
                                self.device.switch_mode(ShaderColorMode::SubpixelWithBgColorPass1 as _);

                                // When drawing the 2nd and 3rd passes, we know that the VAO, textures etc
                                // are all set up from the previous draw_instanced_batch call,
                                // so just issue a draw call here to avoid re-uploading the
                                // instances and re-binding textures etc.
                                self.device
                                    .draw_indexed_triangles_instanced_u16(6, batch.instances.len() as i32);

                                self.set_blend_mode_subpixel_with_bg_color_pass2(framebuffer_kind);
                                self.device.switch_mode(ShaderColorMode::SubpixelWithBgColorPass2 as _);

                                self.device
                                    .draw_indexed_triangles_instanced_u16(6, batch.instances.len() as i32);
                            }
                        }
                    );

                    if batch.key.blend_mode == BlendMode::SubpixelWithBgColor {
                        prev_blend_mode = BlendMode::None;
                    }
                }

                // If the device supports pixel local storage, resolve the PLS values.
                // This pass reads the final PLS color value, and writes it to a normal
                // fragment output.
                if self.device.get_capabilities().supports_pixel_local_storage {
                    self.resolve_pixel_local_storage(
                        alpha_batch_container.task_rect,
                        projection,
                        stats,
                    );
                }

                self.device.disable_depth();
                self.set_blend(false, framebuffer_kind);
                self.gpu_profile.finish_sampler(transparent_sampler);
            }

            if uses_scissor {
                self.device.disable_scissor();
            }

            // At the end of rendering a container, blit across any cache tiles
            // to the texture cache for use on subsequent frames.
            if !alpha_batch_container.tile_blits.is_empty() {
                let _timer = self.gpu_profile.start_timer(GPU_TAG_BLIT);

                for blit in &alpha_batch_container.tile_blits {
                    let texture = self.texture_resolver
                        .resolve(&blit.target.texture_id)
                        .expect("BUG: invalid target texture");

                    let blit_target = DrawTarget::Texture {
                        texture,
                        layer: blit.target.texture_layer as usize,
                        with_depth: false,
                    };

                    let src_rect = draw_target.to_framebuffer_rect(DeviceIntRect::new(
                        blit.src_offset - content_origin.to_vector(),
                        blit.size,
                    ));

                    let target_rect = blit.target.uv_rect.to_i32();

                    let dest_rect = blit_target.to_framebuffer_rect(DeviceIntRect::new(
                        blit.dest_offset + (target_rect.origin - content_origin),
                        blit.size,
                    ));

                    self.device.blit_render_target_invert_y(
                        draw_target.into(),
                        src_rect,
                        blit_target,
                        dest_rect,
                    );
                }

                self.device.bind_draw_target(draw_target);
            }
        }

        // For any registered image outputs on this render target,
        // get the texture from caller and blit it.
        for output in &target.outputs {
            let handler = self.output_image_handler
                .as_mut()
                .expect("Found output image, but no handler set!");
            if let Some((texture_id, output_size)) = handler.lock(output.pipeline_id) {
                let fbo_id = match self.output_targets.entry(texture_id) {
                    Entry::Vacant(entry) => {
                        let fbo_id = self.device.create_fbo_for_external_texture(texture_id);
                        entry.insert(FrameOutput {
                            fbo_id,
                            last_access: frame_id,
                        });
                        fbo_id
                    }
                    Entry::Occupied(mut entry) => {
                        let target = entry.get_mut();
                        target.last_access = frame_id;
                        target.fbo_id
                    }
                };
                let (src_rect, _) = render_tasks[output.task_id].get_target_rect();
                self.device.blit_render_target_invert_y(
                    draw_target.into(),
                    draw_target.to_framebuffer_rect(src_rect.translate(&-content_origin.to_vector())),
                    DrawTarget::External { fbo: fbo_id, size: output_size.into() },
                    output_size.into(),
                );
                handler.unlock(output.pipeline_id);
            }
        }
    }

    /// Draw all the instances in a clip batcher list to the current target.
    fn draw_clip_batch_list(
        &mut self,
        list: &ClipBatchList,
        projection: &Transform3D<f32>,
        stats: &mut RendererStats,
    ) {
        // draw rounded cornered rectangles
        if !list.slow_rectangles.is_empty() {
            let _gm2 = self.gpu_profile.start_marker("slow clip rectangles");
            self.shaders.borrow_mut().cs_clip_rectangle_slow.bind(
                &mut self.device,
                projection,
                &mut self.renderer_errors,
            );
            self.draw_instanced_batch(
                &list.slow_rectangles,
                VertexArrayKind::Clip,
                &BatchTextures::no_texture(),
                stats,
            );
        }
        if !list.fast_rectangles.is_empty() {
            let _gm2 = self.gpu_profile.start_marker("fast clip rectangles");
            self.shaders.borrow_mut().cs_clip_rectangle_fast.bind(
                &mut self.device,
                projection,
                &mut self.renderer_errors,
            );
            self.draw_instanced_batch(
                &list.fast_rectangles,
                VertexArrayKind::Clip,
                &BatchTextures::no_texture(),
                stats,
            );
        }
        // draw box-shadow clips
        for (mask_texture_id, items) in list.box_shadows.iter() {
            let _gm2 = self.gpu_profile.start_marker("box-shadows");
            let textures = BatchTextures {
                colors: [
                    mask_texture_id.clone(),
                    TextureSource::Invalid,
                    TextureSource::Invalid,
                ],
            };
            self.shaders.borrow_mut().cs_clip_box_shadow
                .bind(&mut self.device, projection, &mut self.renderer_errors);
            self.draw_instanced_batch(
                items,
                VertexArrayKind::Clip,
                &textures,
                stats,
            );
        }

        // draw image masks
        for (mask_texture_id, items) in list.images.iter() {
            let _gm2 = self.gpu_profile.start_marker("clip images");
            let textures = BatchTextures {
                colors: [
                    mask_texture_id.clone(),
                    TextureSource::Invalid,
                    TextureSource::Invalid,
                ],
            };
            self.shaders.borrow_mut().cs_clip_image
                .bind(&mut self.device, projection, &mut self.renderer_errors);
            self.draw_instanced_batch(
                items,
                VertexArrayKind::Clip,
                &textures,
                stats,
            );
        }
    }

    fn draw_alpha_target(
        &mut self,
        draw_target: DrawTarget,
        target: &AlphaRenderTarget,
        projection: &Transform3D<f32>,
        render_tasks: &RenderTaskTree,
        stats: &mut RendererStats,
    ) {
        self.profile_counters.alpha_targets.inc();
        let _gm = self.gpu_profile.start_marker("alpha target");
        let alpha_sampler = self.gpu_profile.start_sampler(GPU_SAMPLER_TAG_ALPHA);

        {
            let _timer = self.gpu_profile.start_timer(GPU_TAG_SETUP_TARGET);
            self.device.bind_draw_target(draw_target);
            self.device.disable_depth();
            self.device.disable_depth_write();
            self.set_blend(false, FramebufferKind::Other);

            // TODO(gw): Applying a scissor rect and minimal clear here
            // is a very large performance win on the Intel and nVidia
            // GPUs that I have tested with. It's possible it may be a
            // performance penalty on other GPU types - we should test this
            // and consider different code paths.

            let zero_color = [0.0, 0.0, 0.0, 0.0];
            for &task_id in &target.zero_clears {
                let (rect, _) = render_tasks[task_id].get_target_rect();
                self.device.clear_target(
                    Some(zero_color),
                    None,
                    Some(draw_target.to_framebuffer_rect(rect)),
                );
            }

            let one_color = [1.0, 1.0, 1.0, 1.0];
            for &task_id in &target.one_clears {
                let (rect, _) = render_tasks[task_id].get_target_rect();
                self.device.clear_target(
                    Some(one_color),
                    None,
                    Some(draw_target.to_framebuffer_rect(rect)),
                );
            }
        }

        // Draw any blurs for this target.
        // Blurs are rendered as a standard 2-pass
        // separable implementation.
        // TODO(gw): In the future, consider having
        //           fast path blur shaders for common
        //           blur radii with fixed weights.
        if !target.vertical_blurs.is_empty() || !target.horizontal_blurs.is_empty() {
            let _timer = self.gpu_profile.start_timer(GPU_TAG_BLUR);

            self.shaders.borrow_mut().cs_blur_a8
                .bind(&mut self.device, projection, &mut self.renderer_errors);

            if !target.vertical_blurs.is_empty() {
                self.draw_instanced_batch(
                    &target.vertical_blurs,
                    VertexArrayKind::Blur,
                    &BatchTextures::no_texture(),
                    stats,
                );
            }

            if !target.horizontal_blurs.is_empty() {
                self.draw_instanced_batch(
                    &target.horizontal_blurs,
                    VertexArrayKind::Blur,
                    &BatchTextures::no_texture(),
                    stats,
                );
            }
        }

        self.handle_scaling(
            &target.scalings,
            TextureSource::PrevPassAlpha,
            projection,
            stats,
        );

        // Draw the clip items into the tiled alpha mask.
        {
            let _timer = self.gpu_profile.start_timer(GPU_TAG_CACHE_CLIP);

            // TODO(gw): Consider grouping multiple clip masks per shader
            //           invocation here to reduce memory bandwith further?

            // Draw the primary clip mask - since this is the first mask
            // for the task, we can disable blending, knowing that it will
            // overwrite every pixel in the mask area.
            self.set_blend(false, FramebufferKind::Other);
            self.draw_clip_batch_list(
                &target.clip_batcher.primary_clips,
                projection,
                stats,
            );

            // switch to multiplicative blending for secondary masks, using
            // multiplicative blending to accumulate clips into the mask.
            self.set_blend(true, FramebufferKind::Other);
            self.set_blend_mode_multiply(FramebufferKind::Other);
            self.draw_clip_batch_list(
                &target.clip_batcher.secondary_clips,
                projection,
                stats,
            );
        }

        self.gpu_profile.finish_sampler(alpha_sampler);
    }

    fn draw_texture_cache_target(
        &mut self,
        texture: &CacheTextureId,
        layer: LayerIndex,
        target: &TextureCacheRenderTarget,
        render_tasks: &RenderTaskTree,
        stats: &mut RendererStats,
    ) {
        let texture_source = TextureSource::TextureCache(*texture);
        let (target_size, projection) = {
            let texture = self.texture_resolver
                .resolve(&texture_source)
                .expect("BUG: invalid target texture");
            let target_size = texture.get_dimensions();
            let projection = Transform3D::ortho(
                0.0,
                target_size.width as f32,
                0.0,
                target_size.height as f32,
                ORTHO_NEAR_PLANE,
                ORTHO_FAR_PLANE,
            );
            (target_size, projection)
        };

        self.device.disable_depth();
        self.device.disable_depth_write();

        self.set_blend(false, FramebufferKind::Other);

        // Handle any Pathfinder glyphs.
        let stencil_page = self.stencil_glyphs(&target.glyphs, &projection, &target_size, stats);

        {
            let texture = self.texture_resolver
                .resolve(&texture_source)
                .expect("BUG: invalid target texture");
            let draw_target = DrawTarget::Texture {
                texture,
                layer,
                with_depth: false,
            };
            self.device.bind_draw_target(draw_target);

            self.device.disable_depth();
            self.device.disable_depth_write();
            self.set_blend(false, FramebufferKind::Other);

            for rect in &target.clears {
                self.device.clear_target(
                    Some([0.0, 0.0, 0.0, 0.0]),
                    None,
                    Some(draw_target.to_framebuffer_rect(*rect)),
                );
            }

            // Handle any blits to this texture from child tasks.
            Self::handle_blits(
                &mut self.gpu_profile, &mut self.device, &self.texture_resolver,
                &target.blits, render_tasks, draw_target, &DeviceIntPoint::zero(),
            );
        }

        // Draw any borders for this target.
        if !target.border_segments_solid.is_empty() ||
           !target.border_segments_complex.is_empty()
        {
            let _timer = self.gpu_profile.start_timer(GPU_TAG_CACHE_BORDER);

            self.set_blend(true, FramebufferKind::Other);
            self.set_blend_mode_premultiplied_alpha(FramebufferKind::Other);

            if !target.border_segments_solid.is_empty() {
                self.shaders.borrow_mut().cs_border_solid.bind(
                    &mut self.device,
                    &projection,
                    &mut self.renderer_errors,
                );

                self.draw_instanced_batch(
                    &target.border_segments_solid,
                    VertexArrayKind::Border,
                    &BatchTextures::no_texture(),
                    stats,
                );
            }

            if !target.border_segments_complex.is_empty() {
                self.shaders.borrow_mut().cs_border_segment.bind(
                    &mut self.device,
                    &projection,
                    &mut self.renderer_errors,
                );

                self.draw_instanced_batch(
                    &target.border_segments_complex,
                    VertexArrayKind::Border,
                    &BatchTextures::no_texture(),
                    stats,
                );
            }

            self.set_blend(false, FramebufferKind::Other);
        }

        // Draw any line decorations for this target.
        if !target.line_decorations.is_empty() {
            let _timer = self.gpu_profile.start_timer(GPU_TAG_CACHE_LINE_DECORATION);

            self.set_blend(true, FramebufferKind::Other);
            self.set_blend_mode_premultiplied_alpha(FramebufferKind::Other);

            self.shaders.borrow_mut().cs_line_decoration.bind(
                &mut self.device,
                &projection,
                &mut self.renderer_errors,
            );

            self.draw_instanced_batch(
                &target.line_decorations,
                VertexArrayKind::LineDecoration,
                &BatchTextures::no_texture(),
                stats,
            );

            self.set_blend(false, FramebufferKind::Other);
        }

        // Draw any gradients for this target.
        if !target.gradients.is_empty() {
            let _timer = self.gpu_profile.start_timer(GPU_TAG_CACHE_GRADIENT);

            self.set_blend(false, FramebufferKind::Other);

            self.shaders.borrow_mut().cs_gradient.bind(
                &mut self.device,
                &projection,
                &mut self.renderer_errors,
            );

            self.draw_instanced_batch(
                &target.gradients,
                VertexArrayKind::Gradient,
                &BatchTextures::no_texture(),
                stats,
            );
        }

        // Draw any blurs for this target.
        if !target.horizontal_blurs.is_empty() {
            let _timer = self.gpu_profile.start_timer(GPU_TAG_BLUR);

            {
                let mut shaders = self.shaders.borrow_mut();
                match target.target_kind {
                    RenderTargetKind::Alpha => &mut shaders.cs_blur_a8,
                    RenderTargetKind::Color => &mut shaders.cs_blur_rgba8,
                }.bind(&mut self.device, &projection, &mut self.renderer_errors);
            }

            self.draw_instanced_batch(
                &target.horizontal_blurs,
                VertexArrayKind::Blur,
                &BatchTextures::no_texture(),
                stats,
            );
        }

        // Blit any Pathfinder glyphs to the cache texture.
        if let Some(stencil_page) = stencil_page {
            self.cover_glyphs(stencil_page, &projection, stats);
        }
    }

    #[cfg(not(feature = "pathfinder"))]
    fn stencil_glyphs(&mut self,
                      _: &[GlyphJob],
                      _: &Transform3D<f32>,
                      _: &DeviceIntSize,
                      _: &mut RendererStats)
                      -> Option<StenciledGlyphPage> {
        None
    }

    #[cfg(not(feature = "pathfinder"))]
    fn cover_glyphs(&mut self,
                    _: StenciledGlyphPage,
                    _: &Transform3D<f32>,
                    _: &mut RendererStats) {}

    fn update_deferred_resolves(&mut self, deferred_resolves: &[DeferredResolve]) -> Option<GpuCacheUpdateList> {
        // The first thing we do is run through any pending deferred
        // resolves, and use a callback to get the UV rect for this
        // custom item. Then we patch the resource_rects structure
        // here before it's uploaded to the GPU.
        if deferred_resolves.is_empty() {
            return None;
        }

        let handler = self.external_image_handler
            .as_mut()
            .expect("Found external image, but no handler set!");

        let mut list = GpuCacheUpdateList {
            frame_id: FrameId::INVALID,
            clear: false,
            height: self.gpu_cache_texture.get_height(),
            blocks: Vec::new(),
            updates: Vec::new(),
            debug_commands: Vec::new(),
        };

        for deferred_resolve in deferred_resolves {
            self.gpu_profile.place_marker("deferred resolve");
            let props = &deferred_resolve.image_properties;
            let ext_image = props
                .external_image
                .expect("BUG: Deferred resolves must be external images!");
            // Provide rendering information for NativeTexture external images.
            let image = handler.lock(ext_image.id, ext_image.channel_index, deferred_resolve.rendering);
            let texture_target = match ext_image.image_type {
                ExternalImageType::TextureHandle(target) => target,
                ExternalImageType::Buffer => {
                    panic!("not a suitable image type in update_deferred_resolves()");
                }
            };

            // In order to produce the handle, the external image handler may call into
            // the GL context and change some states.
            self.device.reset_state();

            let texture = match image.source {
                ExternalImageSource::NativeTexture(texture_id) => {
                    ExternalTexture::new(texture_id, texture_target)
                }
                ExternalImageSource::Invalid => {
                    warn!("Invalid ext-image");
                    debug!(
                        "For ext_id:{:?}, channel:{}.",
                        ext_image.id,
                        ext_image.channel_index
                    );
                    // Just use 0 as the gl handle for this failed case.
                    ExternalTexture::new(0, texture_target)
                }
                ExternalImageSource::RawData(_) => {
                    panic!("Raw external data is not expected for deferred resolves!");
                }
            };

            self.texture_resolver
                .external_images
                .insert((ext_image.id, ext_image.channel_index), texture);

            list.updates.push(GpuCacheUpdate::Copy {
                block_index: list.blocks.len(),
                block_count: BLOCKS_PER_UV_RECT,
                address: deferred_resolve.address,
            });
            list.blocks.push(image.uv.into());
            list.blocks.push([0f32; 4].into());
        }

        Some(list)
    }

    fn unlock_external_images(&mut self) {
        if !self.texture_resolver.external_images.is_empty() {
            let handler = self.external_image_handler
                .as_mut()
                .expect("Found external image, but no handler set!");

            for (ext_data, _) in self.texture_resolver.external_images.drain() {
                handler.unlock(ext_data.0, ext_data.1);
            }
        }
    }

    /// Allocates a texture to be used as the output for a rendering pass.
    ///
    /// We make an effort to reuse render targe textures across passes and
    /// across frames when the format and dimensions match. Because we use
    /// immutable storage, we can't resize textures.
    ///
    /// We could consider approaches to re-use part of a larger target, if
    /// available. However, we'd need to be careful about eviction. Currently,
    /// render targets are freed if they haven't been used in 30 frames. If we
    /// used partial targets, we'd need to track how _much_ of the target has
    /// been used in the last 30 frames, since we could otherwise end up
    /// keeping an enormous target alive indefinitely by constantly using it
    /// in situations where a much smaller target would suffice.
    fn allocate_target_texture<T: RenderTarget>(
        &mut self,
        list: &mut RenderTargetList<T>,
        counters: &mut FrameProfileCounters,
    ) -> Option<ActiveTexture> {
        if list.targets.is_empty() {
            return None
        }

        // Get a bounding rect of all the layers, and round it up to a multiple
        // of 256. This improves render target reuse when resizing the window,
        // since we don't need to create a new render target for each slightly-
        // larger frame.
        let mut bounding_rect = DeviceIntRect::zero();
        for t in list.targets.iter() {
            bounding_rect = t.used_rect().union(&bounding_rect);
        }
        debug_assert_eq!(bounding_rect.origin, DeviceIntPoint::zero());
        let dimensions = DeviceIntSize::new(
            (bounding_rect.size.width + 255) & !255,
            (bounding_rect.size.height + 255) & !255,
        );

        counters.targets_used.inc();

        // Try finding a match in the existing pool. If there's no match, we'll
        // create a new texture.
        let selector = TargetSelector {
            size: dimensions,
            num_layers: list.targets.len(),
            format: list.format,
        };
        let index = self.texture_resolver.render_target_pool
            .iter()
            .position(|texture| {
                selector == TargetSelector {
                    size: texture.get_dimensions(),
                    num_layers: texture.get_layer_count() as usize,
                    format: texture.get_format(),
                }
            });

        let rt_info = RenderTargetInfo { has_depth: list.needs_depth() };
        let texture = if let Some(idx) = index {
            let mut t = self.texture_resolver.render_target_pool.swap_remove(idx);
            self.device.reuse_render_target::<u8>(&mut t, rt_info);
            t
        } else {
            counters.targets_created.inc();
            self.device.create_texture(
                TextureTarget::Array,
                list.format,