Merge mozilla-central to mozilla-inbound. a=merge
authorDaniel Varga <dvarga@mozilla.com>
Sat, 01 Dec 2018 23:42:02 +0200
changeset 505589 3a943b45881a477872f02fe7a601e838cb233f42
parent 505588 427accad16ae157eebbe24f95f64812ada65304a (current diff)
parent 505536 d244292c2a127fafda775e1db5e0a8e4decea5ff (diff)
child 505590 7c3a7a19c646122770b6c332601badb2bc65ccc8
push id10290
push userffxbld-merge
push dateMon, 03 Dec 2018 16:23:23 +0000
treeherdermozilla-beta@700bed2445e6 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmerge
milestone65.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Merge mozilla-central to mozilla-inbound. a=merge
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2951,16 +2951,17 @@ dependencies = [
  "gleam 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)",
  "lazy_static 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "log 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)",
  "num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
  "plane-split 0.13.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "rayon 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "ron 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
+ "sha2 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "smallvec 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)",
  "thread_profiler 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "time 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)",
  "webrender_api 0.57.2",
 ]
 
 [[package]]
 name = "webrender_api"
--- a/gfx/thebes/gfxPlatform.cpp
+++ b/gfx/thebes/gfxPlatform.cpp
@@ -699,28 +699,26 @@ WebRenderMemoryReporter::CollectReports(
         helper.Report(aReport.gpu_cache_metadata, "gpu-cache/metadata");
         helper.Report(aReport.gpu_cache_cpu_mirror, "gpu-cache/cpu-mirror");
         helper.Report(aReport.render_tasks, "render-tasks");
         helper.Report(aReport.hit_testers, "hit-testers");
         helper.Report(aReport.fonts, "resource-cache/fonts");
         helper.Report(aReport.images, "resource-cache/images");
         helper.Report(aReport.rasterized_blobs,
                       "resource-cache/rasterized-blobs");
+        helper.Report(aReport.shader_cache, "shader-cache");
 
         // GPU Memory.
         helper.ReportTexture(aReport.gpu_cache_textures, "gpu-cache");
         helper.ReportTexture(aReport.vertex_data_textures, "vertex-data");
         helper.ReportTexture(aReport.render_target_textures, "render-targets");
         helper.ReportTexture(aReport.texture_cache_textures, "texture-cache");
         helper.ReportTexture(aReport.depth_target_textures, "depth-targets");
         helper.ReportTexture(aReport.swap_chain, "swap-chains");
 
-        // Total GPU bytes, for sanity-checking the above.
-        helper.ReportTotalGPUBytes(aReport.total_gpu_bytes_allocated);
-
         FinishAsyncMemoryReport();
       },
       [](mozilla::ipc::ResponseRejectReason aReason) {
         FinishAsyncMemoryReport();
       });
 
   return NS_OK;
 }
--- a/gfx/webrender_bindings/RenderThread.cpp
+++ b/gfx/webrender_bindings/RenderThread.cpp
@@ -25,16 +25,18 @@
 #include "GLLibraryEGL.h"
 #include "mozilla/widget/WinCompositorWindowThread.h"
 #endif
 
 using namespace mozilla;
 
 static already_AddRefed<gl::GLContext> CreateGLContext();
 
+MOZ_DEFINE_MALLOC_SIZE_OF(WebRenderRendererMallocSizeOf)
+
 namespace mozilla {
 namespace wr {
 
 static StaticRefPtr<RenderThread> sRenderThread;
 
 RenderThread::RenderThread(base::Thread* aThread)
     : mThread(aThread),
       mFrameCountMapLock("RenderThread.mFrameCountMapLock"),
@@ -123,24 +125,28 @@ bool RenderThread::IsInRenderThread() {
   return sRenderThread &&
          sRenderThread->mThread->thread_id() == PlatformThread::CurrentId();
 }
 
 void RenderThread::DoAccumulateMemoryReport(
     MemoryReport aReport,
     const RefPtr<MemoryReportPromise::Private>& aPromise) {
   MOZ_ASSERT(IsInRenderThread());
-  MOZ_ASSERT(aReport.total_gpu_bytes_allocated == 0);
 
   for (auto& r : mRenderers) {
     r.second->AccumulateMemoryReport(&aReport);
   }
 
-  // Note total gpu bytes allocated across all WR instances.
-  aReport.total_gpu_bytes_allocated += wr_total_gpu_bytes_allocated();
+  // Note memory used by the shader cache, which is shared across all WR
+  // instances.
+  MOZ_ASSERT(aReport.shader_cache == 0);
+  if (mProgramCache) {
+    aReport.shader_cache = wr_program_cache_report_memory(
+        mProgramCache->Raw(), &WebRenderRendererMallocSizeOf);
+  }
 
   aPromise->Resolve(aReport, __func__);
 }
 
 // static
 RefPtr<MemoryReportPromise> RenderThread::AccumulateMemoryReport(
     MemoryReport aInitial) {
   RefPtr<MemoryReportPromise::Private> p =
--- a/gfx/webrender_bindings/RendererOGL.cpp
+++ b/gfx/webrender_bindings/RendererOGL.cpp
@@ -203,17 +203,16 @@ void RendererOGL::AccumulateMemoryReport
   LayoutDeviceIntSize size = mCompositor->GetBufferSize();
 
   // Assume BGRA8 for the format since it's not exposed anywhere,
   // and all compositor backends should be using that.
   uintptr_t swapChainSize = size.width * size.height *
                             BytesPerPixel(SurfaceFormat::B8G8R8A8) *
                             (mCompositor->UseTripleBuffering() ? 3 : 2);
   aReport->swap_chain += swapChainSize;
-  aReport->total_gpu_bytes_allocated += swapChainSize;
 }
 
 static void DoNotifyWebRenderError(layers::CompositorBridgeParent* aBridge,
                                    WebRenderError aError) {
   aBridge->NotifyWebRenderError(aError);
 }
 
 void RendererOGL::NotifyWebRenderError(WebRenderError aError) {
--- a/gfx/webrender_bindings/src/bindings.rs
+++ b/gfx/webrender_bindings/src/bindings.rs
@@ -670,21 +670,16 @@ pub unsafe extern "C" fn wr_renderer_del
 }
 
 #[no_mangle]
 pub unsafe extern "C" fn wr_renderer_accumulate_memory_report(renderer: &mut Renderer,
                                                               report: &mut MemoryReport) {
     *report += renderer.report_memory();
 }
 
-#[no_mangle]
-pub unsafe extern "C" fn wr_total_gpu_bytes_allocated() -> usize {
-    ::webrender::total_gpu_bytes_allocated()
-}
-
 // cbindgen doesn't support tuples, so we have a little struct instead, with
 // an Into implementation to convert from the tuple to the struct.
 #[repr(C)]
 pub struct WrPipelineEpoch {
     pipeline_id: WrPipelineId,
     epoch: WrEpoch,
 }
 
@@ -2794,8 +2789,16 @@ pub extern "C" fn wr_shaders_new(gl_cont
 pub unsafe extern "C" fn wr_shaders_delete(shaders: *mut WrShaders, gl_context: *mut c_void) {
     let mut device = wr_device_new(gl_context, None);
     let shaders = Box::from_raw(shaders);
     if let Ok(shaders) = Rc::try_unwrap(shaders.shaders) {
       shaders.into_inner().deinit(&mut device);
     }
     // let shaders go out of scope and get dropped
 }
+
+#[no_mangle]
+pub unsafe extern "C" fn wr_program_cache_report_memory(
+    cache: *const WrProgramCache,
+    size_of_op: VoidPtrToSizeFn,
+    ) -> usize {
+    (*cache).program_cache.report_memory(size_of_op)
+}
--- a/gfx/webrender_bindings/src/program_cache.rs
+++ b/gfx/webrender_bindings/src/program_cache.rs
@@ -6,37 +6,42 @@ use std::path::{PathBuf};
 use std::rc::Rc;
 use std::sync::Arc;
 
 use webrender::{ProgramBinary, ProgramCache, ProgramCacheObserver};
 use bincode;
 use fxhash;
 use nsstring::nsAString;
 use rayon::ThreadPool;
-use uuid::Uuid;
 
 const MAX_LOAD_TIME_MS: u64 = 400;
-const MAX_CACHED_PROGRAM_COUNT: u32 = 15;
 
 fn deserialize_program_binary(path: &PathBuf) -> Result<Arc<ProgramBinary>, Error> {
     let mut buf = vec![];
     let mut file = File::open(path)?;
     file.read_to_end(&mut buf)?;
 
-    if buf.len() <= 8 {
+    if buf.len() <= 8 + 4 {
         return Err(Error::new(ErrorKind::InvalidData, "File size is too small"));
     }
-    let hash = &buf[0 .. 8];
-    let data = &buf[8 ..];
+    let magic = &buf[0 .. 4];
+    let hash = &buf[4 .. 8 + 4];
+    let data = &buf[8 + 4 ..];
+
+    // Check if magic + version are correct.
+    let mv:u32 = bincode::deserialize(&magic).unwrap();
+    if mv != MAGIC_AND_VERSION {
+        return Err(Error::new(ErrorKind::InvalidData, "File data is invalid (magic+version)"));
+    }
 
     // Check if hash is correct
     let hash:u64 = bincode::deserialize(&hash).unwrap();
     let hash_data = fxhash::hash64(&data);
     if hash != hash_data {
-        return Err(Error::new(ErrorKind::InvalidData, "File data is invalid"));
+        return Err(Error::new(ErrorKind::InvalidData, "File data is invalid (hash)"));
     }
 
     // Deserialize ProgramBinary
     let binary = match bincode::deserialize(&data) {
         Ok(binary) => binary,
         Err(_) => return Err(Error::new(ErrorKind::InvalidData, "Failed to deserialize ProgramBinary")),
     };
 
@@ -73,150 +78,135 @@ fn get_cache_path_from_prof_path(prof_pa
     let prof_path = OsString::from(utf8);
     let mut cache_path = PathBuf::from(&prof_path);
     cache_path.push("shader-cache");
 
     Some(cache_path)
 }
 
 struct WrProgramBinaryDiskCache {
-    cache_path: Option<PathBuf>,
-    program_count: u32,
-    is_enabled: bool,
+    cache_path: PathBuf,
     workers: Arc<ThreadPool>,
 }
 
+// Magic number + version. Increment the version when the binary format changes.
+const MAGIC: u32 = 0xB154AD30; // BI-SHADE + version.
+const VERSION: u32 = 2;
+const MAGIC_AND_VERSION: u32 = MAGIC + VERSION;
+
+/// Helper to convert a closure returning a `Result` to one that returns void.
+/// This allows the enclosed code to use the question-mark operator in a
+/// context where the calling function doesn't expect a `Result`.
+#[allow(unused_must_use)]
+fn result_to_void<F: FnOnce() -> Result<(), ()>>(f: F) { f(); }
+
 impl WrProgramBinaryDiskCache {
     #[allow(dead_code)]
-    fn new(prof_path: &nsAString, workers: &Arc<ThreadPool>) -> Self {
-        let cache_path = get_cache_path_from_prof_path(prof_path);
-        let is_enabled = cache_path.is_some();
-        let workers = Arc::clone(workers);
-
-        WrProgramBinaryDiskCache{
+    fn new(cache_path: PathBuf, workers: &Arc<ThreadPool>) -> Self {
+        WrProgramBinaryDiskCache {
             cache_path,
-            program_count: 0,
-            is_enabled,
-            workers,
+            workers: Arc::clone(workers),
         }
     }
 
-    fn notify_binary_added(&mut self, program_binary: &Arc<ProgramBinary>) {
-        if !self.is_enabled {
-            return;
+    /// Updates the on-disk cache to contain exactly the entries specified.
+    fn update(&mut self, entries: Vec<Arc<ProgramBinary>>) {
+        info!("Updating on-disk shader cache");
+
+        // Compute the digests in string form.
+        let mut entries: Vec<(String, Arc<ProgramBinary>)> =
+            entries.into_iter().map(|e| (format!("{}", e.source_digest()), e)).collect();
+
+        // For each file in the current directory, check if it corresponds to
+        // an entry we're supposed to write. If so, we don't need to write the
+        // entry. If not, we delete the file.
+        for existing in read_dir(&self.cache_path).unwrap().filter_map(|f| f.ok()) {
+            let pos = existing.file_name().to_str()
+                .and_then(|digest| entries.iter().position(|x| x.0 == digest));
+            if let Some(p) = pos {
+                info!("Found existing shader: {}", existing.file_name().to_string_lossy());
+                entries.swap_remove(p);
+            } else {
+                self.workers.spawn(move || {
+                    info!("Removing shader: {}", existing.file_name().to_string_lossy());
+                    ::std::fs::remove_file(existing.path())
+                        .unwrap_or_else(|e| error!("shader-cache: Failed to remove shader: {:?}", e));
+                });
+            }
         }
 
-        if let Some(ref cache_path) = self.cache_path {
-            if let Err(_) = create_dir_all(&cache_path) {
-                error!("failed to create dir for shader disk cache");
-                return;
-            }
+        // Write the remaining entries to disk on a worker thread.
+        for entry in entries.into_iter() {
+            let (file_name, program_binary) = entry;
+            let file_path = self.cache_path.join(&file_name);
 
-            self.program_count += 1;
-            if self.program_count > MAX_CACHED_PROGRAM_COUNT {
-                // Disable disk cache to avoid storing more shader programs to disk
-                self.is_enabled = false;
-                return;
-            }
-
-            // Use uuid for file name
-            let uuid1 = Uuid::new_v4();
-            let file_name = uuid1.hyphenated().to_string();
-            let program_binary = Arc::clone(program_binary);
-            let file_path = cache_path.join(&file_name);
-
-            let program_count = self.program_count;
-
-            // Save to disk on worker thread
-            self.workers.spawn(move || {
+            self.workers.spawn(move || result_to_void(move || {
+                info!("Writing shader: {}", file_name);
 
                 use std::time::{Instant};
                 let start = Instant::now();
 
-                let data: Vec<u8> = match bincode::serialize(&*program_binary) {
-                    Ok(data) => data,
-                    Err(err) => {
-                        error!("Failed to serialize program binary error: {}", err);
-                        return;
-                    }
-                };
+                let data: Vec<u8> = bincode::serialize(&*program_binary)
+                    .map_err(|e| error!("shader-cache: Failed to serialize: {}", e))?;
+
+                let mut file = File::create(&file_path)
+                    .map_err(|e| error!("shader-cache: Failed to create file: {}", e))?;
 
-                let mut file = match File::create(&file_path) {
-                    Ok(file) => file,
-                    Err(err) => {
-                        error!("Unable to create file for program binary error: {}", err);
-                        return;
-                    }
-                };
+                // Write magic + version.
+                let mv = MAGIC_AND_VERSION;
+                let mv = bincode::serialize(&mv).unwrap();
+                assert!(mv.len() == 4);
+                file.write_all(&mv)
+                    .map_err(|e| error!("shader-cache: Failed to write magic + version: {}", e))?;
 
                 // Write hash
                 let hash = fxhash::hash64(&data);
                 let hash = bincode::serialize(&hash).unwrap();
                 assert!(hash.len() == 8);
-                match file.write_all(&hash) {
-                    Err(err) => {
-                        error!("Failed to write hash to file error: {}", err);
-                    }
-                    _ => {},
-                };
+                file.write_all(&hash)
+                    .map_err(|e| error!("shader-cache: Failed to write hash: {}", e))?;
 
                 // Write serialized data
-                match file.write_all(&data) {
-                    Err(err) => {
-                        error!("Failed to write program binary to file error: {}", err);
-                    }
-                    _ => {},
-                };
+                file.write_all(&data)
+                    .map_err(|e| error!("shader-cache: Failed to write program binary: {}", e))?;
 
-                let elapsed = start.elapsed();
-                info!("notify_binary_added: {} ms program_count {}",
-                    (elapsed.as_secs() * 1_000) + (elapsed.subsec_nanos() / 1_000_000) as u64, program_count);
-
-            });
+                info!("Wrote shader {} in {:?}", file_name, start.elapsed());
+                Ok(())
+            }));
         }
     }
 
     pub fn try_load_from_disk(&mut self, program_cache: &Rc<ProgramCache>) {
-        if !self.is_enabled {
-            return;
-        }
-
-        if let Some(ref cache_path) = self.cache_path {
-            use std::time::{Instant};
-            let start = Instant::now();
+        use std::time::{Instant};
+        let start = Instant::now();
 
-            // Load program binaries if exist
-            if cache_path.exists() && cache_path.is_dir() {
-                for entry in read_dir(cache_path).unwrap() {
-                    let entry = entry.unwrap();
-                    let path = entry.path();
+        // Load program binaries if exist
+        for entry in read_dir(&self.cache_path).unwrap() {
+            let entry = entry.unwrap();
+            let path = entry.path();
 
-                    info!("loading shader file");
+            info!("Loading shader: {}", entry.file_name().to_string_lossy());
 
-                    match deserialize_program_binary(&path) {
-                        Ok(program) => {
-                            program_cache.load_program_binary(program);
-                        }
-                        Err(err) => {
-                            error!("Failed to desriralize program binary error: {}", err);
-                        }
-                    };
-
-                    self.program_count += 1;
+            match deserialize_program_binary(&path) {
+                Ok(program) => {
+                    program_cache.load_program_binary(program);
+                }
+                Err(err) => {
+                    error!("shader-cache: Failed to deserialize program binary: {}", err);
+                }
+            };
 
-                    let elapsed = start.elapsed();
-                    let elapsed_ms = (elapsed.as_secs() * 1_000) + (elapsed.subsec_nanos() / 1_000_000) as u64;
-                    info!("deserialize_program_binary: {} ms program_count {}", elapsed_ms, self.program_count);
+            let elapsed = start.elapsed();
+            info!("Loaded shader in {:?}", elapsed);
+            let elapsed_ms = (elapsed.as_secs() * 1_000) +
+                (elapsed.subsec_nanos() / 1_000_000) as u64;
 
-                    if self.program_count > MAX_CACHED_PROGRAM_COUNT || elapsed_ms > MAX_LOAD_TIME_MS {
-                        // Disable disk cache to avoid storing more shader programs to disk
-                        self.is_enabled = false;
-                        break;
-                    }
-                }
+            if elapsed_ms > MAX_LOAD_TIME_MS {
+                error!("shader-cache: Timed out before finishing loads");
+                break;
             }
         }
     }
 }
 
 pub struct WrProgramCacheObserver {
     disk_cache: Rc<RefCell<WrProgramBinaryDiskCache>>,
 }
@@ -226,65 +216,69 @@ impl WrProgramCacheObserver {
     fn new(disk_cache: Rc<RefCell<WrProgramBinaryDiskCache>>) -> Self {
         WrProgramCacheObserver{
             disk_cache,
         }
     }
 }
 
 impl ProgramCacheObserver for WrProgramCacheObserver {
-    fn notify_binary_added(&self, program_binary: &Arc<ProgramBinary>) {
-        self.disk_cache.borrow_mut().notify_binary_added(program_binary);
+    fn update_disk_cache(&self, entries: Vec<Arc<ProgramBinary>>) {
+        self.disk_cache.borrow_mut().update(entries);
     }
 
     fn notify_program_binary_failed(&self, _program_binary: &Arc<ProgramBinary>) {
-        error!("Failed program_binary");
+        error!("shader-cache: Failed program_binary");
     }
 }
 
 
 pub struct WrProgramCache {
-    program_cache: Rc<ProgramCache>,
+    pub program_cache: Rc<ProgramCache>,
     disk_cache: Option<Rc<RefCell<WrProgramBinaryDiskCache>>>,
 }
 
 impl WrProgramCache {
     pub fn new(prof_path: &nsAString, workers: &Arc<ThreadPool>) -> Self {
-        let disk_cache = Rc::new(RefCell::new(WrProgramBinaryDiskCache::new(prof_path, workers)));
-        let program_cache_observer = Box::new(WrProgramCacheObserver::new(Rc::clone(&disk_cache)));
-        let program_cache = ProgramCache::new(Some(program_cache_observer));
+        let cache_path = get_cache_path_from_prof_path(prof_path);
+        let use_disk_cache = cache_path.as_ref().map_or(false, |p| create_dir_all(p).is_ok());
+        let (disk_cache, program_cache_observer) = if use_disk_cache {
+            let cache = Rc::new(RefCell::new(WrProgramBinaryDiskCache::new(cache_path.unwrap(), workers)));
+            let obs = Box::new(WrProgramCacheObserver::new(Rc::clone(&cache))) as
+                Box<dyn ProgramCacheObserver>;
+            (Some(cache), Some(obs))
+        } else {
+            (None, None)
+        };
+        let program_cache = ProgramCache::new(program_cache_observer);
 
         WrProgramCache {
             program_cache,
-            disk_cache: Some(disk_cache),
+            disk_cache: disk_cache,
         }
     }
 
     pub fn rc_get(&self) -> &Rc<ProgramCache> {
         &self.program_cache
     }
 
     pub fn try_load_from_disk(&self) {
         if let Some(ref disk_cache) = self.disk_cache {
             disk_cache.borrow_mut().try_load_from_disk(&self.program_cache);
         } else {
-            error!("Shader disk cache is not supported");
+            error!("shader-cache: Shader disk cache is not supported");
         }
     }
 }
 
 pub fn remove_disk_cache(prof_path: &nsAString) -> Result<(), Error> {
     use std::fs::remove_dir_all;
     use std::time::{Instant};
 
     if let Some(cache_path) = get_cache_path_from_prof_path(prof_path) {
         if cache_path.exists() {
             let start = Instant::now();
-
             remove_dir_all(&cache_path)?;
-
-            let elapsed = start.elapsed();
-            let elapsed_ms = (elapsed.as_secs() * 1_000) + (elapsed.subsec_nanos() / 1_000_000) as u64;
-            info!("remove_disk_cache: {} ms", elapsed_ms);
+            info!("removed all disk cache shaders in {:?}", start.elapsed());
         }
     }
     Ok(())
 }
--- a/gfx/webrender_bindings/webrender_ffi_generated.h
+++ b/gfx/webrender_bindings/webrender_ffi_generated.h
@@ -534,41 +534,41 @@ struct MemoryReport {
   uintptr_t clip_stores;
   uintptr_t gpu_cache_metadata;
   uintptr_t gpu_cache_cpu_mirror;
   uintptr_t render_tasks;
   uintptr_t hit_testers;
   uintptr_t fonts;
   uintptr_t images;
   uintptr_t rasterized_blobs;
+  uintptr_t shader_cache;
   uintptr_t gpu_cache_textures;
   uintptr_t vertex_data_textures;
   uintptr_t render_target_textures;
   uintptr_t texture_cache_textures;
   uintptr_t depth_target_textures;
   uintptr_t swap_chain;
-  uintptr_t total_gpu_bytes_allocated;
 
   bool operator==(const MemoryReport& aOther) const {
     return primitive_stores == aOther.primitive_stores &&
            clip_stores == aOther.clip_stores &&
            gpu_cache_metadata == aOther.gpu_cache_metadata &&
            gpu_cache_cpu_mirror == aOther.gpu_cache_cpu_mirror &&
            render_tasks == aOther.render_tasks &&
            hit_testers == aOther.hit_testers &&
            fonts == aOther.fonts &&
            images == aOther.images &&
            rasterized_blobs == aOther.rasterized_blobs &&
+           shader_cache == aOther.shader_cache &&
            gpu_cache_textures == aOther.gpu_cache_textures &&
            vertex_data_textures == aOther.vertex_data_textures &&
            render_target_textures == aOther.render_target_textures &&
            texture_cache_textures == aOther.texture_cache_textures &&
            depth_target_textures == aOther.depth_target_textures &&
-           swap_chain == aOther.swap_chain &&
-           total_gpu_bytes_allocated == aOther.total_gpu_bytes_allocated;
+           swap_chain == aOther.swap_chain;
   }
 };
 
 template<typename T, typename U>
 struct TypedSize2D {
   T width;
   T height;
 
@@ -1003,16 +1003,22 @@ struct MutByteSlice {
   uintptr_t len;
 
   bool operator==(const MutByteSlice& aOther) const {
     return buffer == aOther.buffer &&
            len == aOther.len;
   }
 };
 
+// A C function that takes a pointer to a heap allocation and returns its size.
+//
+// This is borrowed from the malloc_size_of crate, upon which we want to avoid
+// a dependency from WebRender.
+using VoidPtrToSizeFn = uintptr_t(*)(const void*);
+
 struct RendererStats {
   uintptr_t total_draw_calls;
   uintptr_t alpha_target_count;
   uintptr_t color_target_count;
   uintptr_t texture_upload_kb;
   uint64_t resource_upload_time;
   uint64_t gpu_cache_upload_time;
 
@@ -1111,22 +1117,16 @@ struct WrOpacityProperty {
   float opacity;
 
   bool operator==(const WrOpacityProperty& aOther) const {
     return id == aOther.id &&
            opacity == aOther.opacity;
   }
 };
 
-// A C function that takes a pointer to a heap allocation and returns its size.
-//
-// This is borrowed from the malloc_size_of crate, upon which we want to avoid
-// a dependency from WebRender.
-using VoidPtrToSizeFn = uintptr_t(*)(const void*);
-
 extern "C" {
 
 extern void AddBlobFont(WrFontInstanceKey aInstanceKey,
                         WrFontKey aFontKey,
                         float aSize,
                         const FontInstanceOptions *aOptions,
                         const FontInstancePlatformOptions *aPlatformOptions,
                         const FontVariation *aVariations,
@@ -1652,16 +1652,21 @@ void wr_program_cache_delete(WrProgramCa
 WR_DESTRUCTOR_SAFE_FUNC;
 
 WR_INLINE
 WrProgramCache *wr_program_cache_new(const nsAString *aProfPath,
                                      WrThreadPool *aThreadPool)
 WR_FUNC;
 
 WR_INLINE
+uintptr_t wr_program_cache_report_memory(const WrProgramCache *aCache,
+                                         VoidPtrToSizeFn aSizeOfOp)
+WR_FUNC;
+
+WR_INLINE
 void wr_renderer_accumulate_memory_report(Renderer *aRenderer,
                                           MemoryReport *aReport)
 WR_FUNC;
 
 WR_INLINE
 bool wr_renderer_current_epoch(Renderer *aRenderer,
                                WrPipelineId aPipelineId,
                                WrEpoch *aOutEpoch)
@@ -1852,20 +1857,16 @@ WR_INLINE
 void wr_thread_pool_delete(WrThreadPool *aThreadPool)
 WR_DESTRUCTOR_SAFE_FUNC;
 
 WR_INLINE
 WrThreadPool *wr_thread_pool_new()
 WR_FUNC;
 
 WR_INLINE
-uintptr_t wr_total_gpu_bytes_allocated()
-WR_FUNC;
-
-WR_INLINE
 void wr_transaction_append_transform_properties(Transaction *aTxn,
                                                 const WrTransformProperty *aTransformArray,
                                                 uintptr_t aTransformCount)
 WR_FUNC;
 
 WR_INLINE
 void wr_transaction_clear_display_list(Transaction *aTxn,
                                        WrEpoch aEpoch,
--- a/gfx/wr/Cargo.lock
+++ b/gfx/wr/Cargo.lock
@@ -29,16 +29,21 @@ name = "app_units"
 version = "0.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
  "num-traits 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
+name = "arrayref"
+version = "0.3.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
 name = "arrayvec"
 version = "0.4.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
  "nodrop 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
@@ -85,16 +90,30 @@ version = "1.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
 [[package]]
 name = "block"
 version = "0.1.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
 [[package]]
+name = "block-buffer"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "arrayref 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
+ "byte-tools 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "byte-tools"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
 name = "byteorder"
 version = "1.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
 [[package]]
 name = "bytes"
 version = "0.4.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -299,16 +318,24 @@ name = "deflate"
 version = "0.7.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
  "adler32 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "byteorder 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
+name = "digest"
+version = "0.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "generic-array 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
 name = "direct-composition"
 version = "0.1.0"
 dependencies = [
  "euclid 0.19.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "gleam 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)",
  "mozangle 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
  "webrender 0.57.2",
  "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -376,16 +403,21 @@ name = "expat-sys"
 version = "2.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
  "cmake 0.1.31 (registry+https://github.com/rust-lang/crates.io-index)",
  "pkg-config 0.3.11 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
+name = "fake-simd"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
 name = "fnv"
 version = "1.0.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
 [[package]]
 name = "font-loader"
 version = "0.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -448,16 +480,24 @@ name = "gdi32-sys"
 version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
  "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
  "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
+name = "generic-array"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "typenum 1.10.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
 name = "gif"
 version = "0.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
  "color_quant 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "lzw 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
@@ -1202,16 +1242,27 @@ dependencies = [
 ]
 
 [[package]]
 name = "sha1"
 version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
 [[package]]
+name = "sha2"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "block-buffer 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "byte-tools 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "digest 0.7.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "fake-simd 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
 name = "shared_library"
 version = "0.1.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
  "lazy_static 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
@@ -1339,16 +1390,21 @@ version = "0.1.40"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
  "libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)",
  "redox_syscall 0.1.38 (registry+https://github.com/rust-lang/crates.io-index)",
  "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
+name = "typenum"
+version = "1.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
 name = "ucd-util"
 version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
 [[package]]
 name = "unicode-bidi"
 version = "0.3.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1499,16 +1555,17 @@ dependencies = [
  "pathfinder_partitioner 0.2.0 (git+https://github.com/pcwalton/pathfinder?branch=webrender)",
  "pathfinder_path_utils 0.2.0 (git+https://github.com/pcwalton/pathfinder?branch=webrender)",
  "plane-split 0.13.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "png 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "rayon 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "ron 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde_json 1.0.17 (registry+https://github.com/rust-lang/crates.io-index)",
+ "sha2 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "smallvec 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "thread_profiler 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "time 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)",
  "webrender_api 0.57.2",
  "ws 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
@@ -1698,24 +1755,27 @@ dependencies = [
 ]
 
 [metadata]
 "checksum adler32 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6cbd0b9af8587c72beadc9f72d35b9fbb070982c9e6203e46e93f10df25f8f45"
 "checksum aho-corasick 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "d6531d44de723825aa81398a6415283229725a00fa30713812ab9323faa82fc4"
 "checksum android_glue 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "000444226fcff248f2bc4c7625be32c63caccfecc2723a2b9f78a7487a49c407"
 "checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
 "checksum app_units 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9dadc668390b373e73e4abbfc1f07238b09a25858f2f39c06cebc6d8e141d774"
+"checksum arrayref 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "0d382e583f07208808f6b1249e60848879ba3543f57c32277bf52d69c2f0f0ee"
 "checksum arrayvec 0.4.7 (registry+https://github.com/rust-lang/crates.io-index)" = "a1e964f9e24d588183fcb43503abda40d288c8657dfc27311516ce2f05675aef"
 "checksum atty 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)" = "2fc4a1aa4c24c0718a250f0681885c1af91419d242f29eb8f2ab28502d80dbd1"
 "checksum base64 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)" = "85415d2594767338a74a30c1d370b2f3262ec1b4ed2d7bba5b3faf4de40467d9"
 "checksum binary-space-partition 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "88ceb0d16c4fd0e42876e298d7d3ce3780dd9ebdcbe4199816a32c77e08597ff"
 "checksum bincode 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bda13183df33055cbb84b847becce220d392df502ebe7a4a78d7021771ed94d0"
 "checksum bit-vec 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "02b4ff8b16e6076c3e14220b39fbc1fabb6737522281a388998046859400895f"
 "checksum bitflags 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "d0c54bb8f454c567f21197eefcdbf5679d0bd99f2ddbe52e84c77061952e6789"
 "checksum block 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0d8c1fef690941d3e7788d328517591fecc684c084084702d6ff1641e993699a"
+"checksum block-buffer 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a076c298b9ecdb530ed9d967e74a6027d6a7478924520acddcddc24c1c8ab3ab"
+"checksum byte-tools 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "560c32574a12a89ecd91f5e742165893f86e3ab98d21f8ea548658eb9eef5f40"
 "checksum byteorder 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "74c0b906e9446b0a2e4f760cdb3fa4b2c48cdc6db8766a845c54b6ff063fd2e9"
 "checksum bytes 0.4.7 (registry+https://github.com/rust-lang/crates.io-index)" = "2f1d50c876fb7545f5f289cd8b2aee3f359d073ae819eed5d6373638e2c61e59"
 "checksum cc 1.0.15 (registry+https://github.com/rust-lang/crates.io-index)" = "0ebb87d1116151416c0cf66a0e3fb6430cccd120fd6300794b4dfaa050ac40ba"
 "checksum cfg-if 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "405216fd8fe65f718daa7102ea808a946b6ce40c742998fbfd3463645552de18"
 "checksum cgl 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "55e7ec0b74fe5897894cbc207092c577e87c52f8a59e8ca8d97ef37551f60a49"
 "checksum clap 2.31.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f0f16b89cbb9ee36d87483dc939fe9f1e13c05898d56d7b230a0d4dff033a536"
 "checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f"
 "checksum cmake 0.1.31 (registry+https://github.com/rust-lang/crates.io-index)" = "95470235c31c726d72bf2e1f421adc1e65b9d561bf5529612cbe1a72da1467b3"
@@ -1730,33 +1790,36 @@ dependencies = [
 "checksum crossbeam 0.2.12 (registry+https://github.com/rust-lang/crates.io-index)" = "bd66663db5a988098a89599d4857919b3acf7f61402e61365acfd3919857b9be"
 "checksum crossbeam-channel 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)" = "6c0a94250b0278d7fc5a894c3d276b11ea164edc8bf8feb10ca1ea517b44a649"
 "checksum crossbeam-deque 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f739f8c5363aca78cfb059edf753d8f0d36908c348f3d8d1503f03d8b75d9cf3"
 "checksum crossbeam-epoch 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "927121f5407de9956180ff5e936fe3cf4324279280001cd56b669d28ee7e9150"
 "checksum crossbeam-epoch 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "30fecfcac6abfef8771151f8be4abc9e4edc112c2bcb233314cafde2680536e9"
 "checksum crossbeam-utils 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "2760899e32a1d58d5abb31129f8fae5de75220bc2176e77ff7c627ae45c918d9"
 "checksum crossbeam-utils 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "677d453a17e8bd2b913fa38e8b9cf04bcdbb5be790aa294f2389661d72036015"
 "checksum deflate 0.7.18 (registry+https://github.com/rust-lang/crates.io-index)" = "32c8120d981901a9970a3a1c97cf8b630e0fa8c3ca31e75b6fd6fd5f9f427b31"
+"checksum digest 0.7.6 (registry+https://github.com/rust-lang/crates.io-index)" = "03b072242a8cbaf9c145665af9d250c59af3b958f83ed6824e13533cf76d5b90"
 "checksum dlib 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "77e51249a9d823a4cb79e3eca6dcd756153e8ed0157b6c04775d04bf1b13b76a"
 "checksum downcast-rs 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "18df8ce4470c189d18aa926022da57544f31e154631eb4cfe796aea97051fe6c"
 "checksum dtoa 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "09c3753c3db574d215cba4ea76018483895d7bff25a31b49ba45db21c48e50ab"
 "checksum dwrote 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7b46afd0d0bbbea88fc083ea293e40865e26a75ec9d38cf5d05a23ac3e2ffe02"
 "checksum either 1.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3be565ca5c557d7f59e7cfcf1844f9e3033650c929c6566f511e8005f205c1d0"
 "checksum env_logger 0.5.10 (registry+https://github.com/rust-lang/crates.io-index)" = "0e6e40ebb0e66918a37b38c7acab4e10d299e0463fe2af5d29b9cc86710cfd2a"
 "checksum euclid 0.19.3 (registry+https://github.com/rust-lang/crates.io-index)" = "600657e7e5c03bfbccdc68721bc3b5abcb761553973387124eae9c9e4f02c210"
 "checksum expat-sys 2.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "c470ccb972f2088549b023db8029ed9da9426f5affbf9b62efff7009ab8ed5b1"
+"checksum fake-simd 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed"
 "checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3"
 "checksum font-loader 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fd330f40acb3016432cbfa4c54b3d6e6e893a538df79d8df8fd8c26e21c36aaa"
 "checksum foreign-types 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
 "checksum foreign-types-shared 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
 "checksum freetype 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b659e75b7a7338fe75afd7f909fc2b71937845cffb6ebe54ba2e50f13d8e903d"
 "checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82"
 "checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7"
 "checksum fxhash 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
 "checksum gdi32-sys 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0912515a8ff24ba900422ecda800b52f4016a56251922d397c576bf92c690518"
+"checksum generic-array 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ef25c5683767570c2bbd7deba372926a55eaae9982d7726ee2a1050239d45b9d"
 "checksum gif 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ff3414b424657317e708489d2857d9575f4403698428b040b609b9d1c1a84a2c"
 "checksum gl_generator 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a0ffaf173cf76c73a73e080366bf556b4776ece104b06961766ff11449f38604"
 "checksum gl_generator 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7a795170cbd85b5a7baa58d6d7525cae6a03e486859860c220f7ebbbdd379d0a"
 "checksum gleam 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)" = "4b47f5b15742aee359c7895ab98cf2cceecc89bb4feb6f4e42f802d7899877da"
 "checksum glutin 0.17.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a70c5fe78efbd5a3b243a804ea1032053c584510f8822819f94cfb29b2100317"
 "checksum half 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d5c5f71a723d10dfc58927cbed37c3071a50afc7f073d86fd7d3e5727db890f"
 "checksum httparse 1.2.4 (registry+https://github.com/rust-lang/crates.io-index)" = "c2f407128745b78abc95c0ffbe4e5d37427fdc0d45470710cfef8c44522a2e37"
 "checksum humantime 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0484fda3e7007f2a4a0d9c3a703ca38c71c54c55602ce4660c419fd32e188c9e"
@@ -1834,31 +1897,33 @@ dependencies = [
 "checksum serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)" = "15c141fc7027dd265a47c090bf864cf62b42c4d228bbcf4e51a0c9e2b0d3f7ef"
 "checksum serde_bytes 0.10.4 (registry+https://github.com/rust-lang/crates.io-index)" = "adb6e51a6b3696b301bc221d785f898b4457c619b51d7ce195a6d20baecb37b3"
 "checksum serde_derive 1.0.80 (git+https://github.com/servo/serde?branch=deserialize_from_enums9)" = "<none>"
 "checksum serde_json 1.0.17 (registry+https://github.com/rust-lang/crates.io-index)" = "f3ad6d546e765177cf3dded3c2e424a8040f870083a0e64064746b958ece9cb1"
 "checksum servo-fontconfig 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a088f8d775a5c5314aae09bd77340bc9c67d72b9a45258be34c83548b4814cd9"
 "checksum servo-fontconfig-sys 4.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "38b494f03009ee81914b0e7d387ad7c145cafcd69747c2ec89b0e17bb94f303a"
 "checksum servo-freetype-sys 4.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "9232032c2e85118c0282c6562c84cab12316e655491ba0a5d1905b2320060d1b"
 "checksum sha1 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "cc30b1e1e8c40c121ca33b86c23308a090d19974ef001b4bf6e61fd1a0fb095c"
+"checksum sha2 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9eb6be24e4c23a84d7184280d2722f7f2731fcdd4a9d886efbfe4413e4847ea0"
 "checksum shared_library 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "8254bf098ce4d8d7cc7cc6de438c5488adc5297e5b7ffef88816c0a91bd289c1"
 "checksum slab 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "17b4fcaed89ab08ef143da37bc52adbcc04d4a69014f4c1208d6b51f0c47bc23"
 "checksum smallvec 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "26df3bb03ca5eac2e64192b723d51f56c1b1e0860e7c766281f4598f181acdc8"
 "checksum smithay-client-toolkit 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)" = "428d6c019bb92753be9670367e3f483e4fcef396180a9b59e813b69b20014881"
 "checksum stable_deref_trait 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ffbc596e092fe5f598b12ef46cc03754085ac2f4d8c739ad61c4ae266cc3b3fa"
 "checksum strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb4f380125926a99e52bc279241539c018323fab05ad6368b56f93d9369ff550"
 "checksum syn 0.15.17 (registry+https://github.com/rust-lang/crates.io-index)" = "3391038ebc3e4ab24eb028cb0ef2f2dc4ba0cbf72ee895ed6a6fad730640b5bc"
 "checksum tempfile 3.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "47776f63b85777d984a50ce49d6b9e58826b6a3766a449fc95bc66cd5663c15b"
 "checksum termcolor 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "adc4587ead41bf016f11af03e55a624c06568b5a19db4e90fde573d805074f83"
 "checksum termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "689a3bdfaab439fd92bc87df5c4c78417d3cbe537487274e9b0b2dce76e92096"
 "checksum textwrap 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c0b59b6b4b44d867f1370ef1bd91bfb262bf07bf0ae65c202ea2fbc16153b693"
 "checksum thread_local 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "279ef31c19ededf577bfd12dfae728040a21f635b06a24cd670ff510edd38963"
 "checksum thread_profiler 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "5920e77802b177479ab5795767fa48e68f61b2f516c2ac0041e2978dd8efe483"
 "checksum tiff 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a2cc6c4fd13cb1cfd20abdb196e794ceccb29371855b7e7f575945f920a5b3c2"
 "checksum time 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)" = "d825be0eb33fda1a7e68012d51e9c7f451dc1a69391e7fdc197060bb8c56667b"
+"checksum typenum 1.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "612d636f949607bdf9b123b4a6f6d966dedf3ff669f7f045890d3a4a73948169"
 "checksum ucd-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "fd2be2d6639d0f8fe6cdda291ad456e23629558d466e2789d2c3e9892bda285d"
 "checksum unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5"
 "checksum unicode-normalization 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "6a0180bc61fc5a987082bfa111f4cc95c4caff7f9799f3e46df09163a937aa25"
 "checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526"
 "checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
 "checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56"
 "checksum url 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f808aadd8cfec6ef90e4a14eb46f24511824d1ac596b9682703c87056c8678b7"
 "checksum user32-sys 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "4ef4711d107b21b410a3a974b1204d9accc8b10dad75d8324b5d755de1617d47"
--- a/gfx/wr/webrender/Cargo.toml
+++ b/gfx/wr/webrender/Cargo.toml
@@ -32,16 +32,17 @@ lazy_static = "1"
 log = "0.4"
 num-traits = "0.2"
 plane-split = "0.13.3"
 png = { optional = true, version = "0.12" }
 rayon = "1"
 ron = { optional = true, version = "0.1.7" }
 serde = { optional = true, version = "1.0", features = ["serde_derive"] }
 serde_json = { optional = true, version = "1.0" }
+sha2 = "0.7"
 smallvec = "0.6"
 thread_profiler = "0.1.1"
 time = "0.1"
 webrender_api = { version = "0.57.2", path = "../webrender_api" }
 ws = { optional = true, version = "0.7.3" }
 
 [dependencies.pathfinder_font_renderer]
 git = "https://github.com/pcwalton/pathfinder"
--- a/gfx/wr/webrender/src/debug_render.rs
+++ b/gfx/wr/webrender/src/debug_render.rs
@@ -103,25 +103,25 @@ pub struct DebugRenderer {
     line_vao: VAO,
     color_program: Program,
 }
 
 impl DebugRenderer {
     pub fn new(device: &mut Device) -> Result<Self, ShaderError> {
         let font_program = device.create_program_linked(
             "debug_font",
-            "",
+            String::new(),
             &DESC_FONT,
         )?;
         device.bind_program(&font_program);
         device.bind_shader_samplers(&font_program, &[("sColor0", DebugSampler::Font)]);
 
         let color_program = device.create_program_linked(
             "debug_color",
-            "",
+            String::new(),
             &DESC_COLOR,
         )?;
 
         let font_vao = device.create_vao(&DESC_FONT);
         let line_vao = device.create_vao(&DESC_COLOR);
         let tri_vao = device.create_vao(&DESC_COLOR);
 
         let font_texture = device.create_texture(
--- a/gfx/wr/webrender/src/device/gl.rs
+++ b/gfx/wr/webrender/src/device/gl.rs
@@ -1,30 +1,34 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use super::super::shader_source;
 use api::{ColorF, ImageFormat, MemoryReport};
 use api::{DeviceIntPoint, DeviceIntRect, DeviceIntSize};
 use api::TextureTarget;
+use api::VoidPtrToSizeFn;
 #[cfg(any(feature = "debug_renderer", feature="capture"))]
 use api::ImageDescriptor;
 use euclid::Transform3D;
 use gleam::gl;
 use internal_types::{FastHashMap, LayerIndex, RenderTargetInfo};
 use log::Level;
+use sha2::{Digest, Sha256};
 use smallvec::SmallVec;
-use std::cell::RefCell;
+use std::borrow::Cow;
+use std::cell::{Cell, RefCell};
 use std::cmp;
 use std::collections::hash_map::Entry;
 use std::fs::File;
 use std::io::Read;
 use std::marker::PhantomData;
 use std::mem;
+use std::os::raw::c_void;
 use std::ops::Add;
 use std::path::PathBuf;
 use std::ptr;
 use std::rc::Rc;
 use std::slice;
 use std::sync::Arc;
 use std::sync::atomic::{AtomicUsize, ATOMIC_USIZE_INIT, Ordering};
 use std::thread;
@@ -175,92 +179,114 @@ fn get_shader_version(gl: &gl::Gl) -> &'
     match gl.get_type() {
         gl::GlType::Gl => SHADER_VERSION_GL,
         gl::GlType::Gles => SHADER_VERSION_GLES,
     }
 }
 
 // Get a shader string by name, from the built in resources or
 // an override path, if supplied.
-fn get_shader_source(shader_name: &str, base_path: &Option<PathBuf>) -> Option<String> {
+fn get_shader_source(shader_name: &str, base_path: &Option<PathBuf>) -> Option<Cow<'static, str>> {
     if let Some(ref base) = *base_path {
         let shader_path = base.join(&format!("{}.glsl", shader_name));
         if shader_path.exists() {
             let mut source = String::new();
             File::open(&shader_path)
                 .unwrap()
                 .read_to_string(&mut source)
                 .unwrap();
-            return Some(source);
+            return Some(Cow::Owned(source));
         }
     }
 
     shader_source::SHADERS
         .get(shader_name)
-        .map(|s| s.to_string())
+        .map(|s| Cow::Borrowed(*s))
 }
 
 // Parse a shader string for imports. Imports are recursively processed, and
-// prepended to the list of outputs.
-fn parse_shader_source(source: String, base_path: &Option<PathBuf>, output: &mut String) {
+// prepended to the output stream.
+fn parse_shader_source<F: FnMut(&str)>(source: Cow<'static, str>, base_path: &Option<PathBuf>, output: &mut F) {
     for line in source.lines() {
         if line.starts_with(SHADER_IMPORT) {
             let imports = line[SHADER_IMPORT.len() ..].split(',');
 
             // For each import, get the source, and recurse.
             for import in imports {
                 if let Some(include) = get_shader_source(import, base_path) {
                     parse_shader_source(include, base_path, output);
                 }
             }
         } else {
-            output.push_str(line);
-            output.push_str("\n");
+            output(line);
+            output("\n");
         }
     }
 }
 
+/// Creates heap-allocated strings for both vertex and fragment shaders. Public
+/// to be accessible to tests.
 pub fn build_shader_strings(
+     gl_version_string: &str,
+     features: &str,
+     base_filename: &str,
+     override_path: &Option<PathBuf>,
+) -> (String, String) {
+    let mut vs_source = String::new();
+    do_build_shader_string(
+        gl_version_string,
+        features,
+        SHADER_KIND_VERTEX,
+        base_filename,
+        override_path,
+        |s| vs_source.push_str(s),
+    );
+
+    let mut fs_source = String::new();
+    do_build_shader_string(
+        gl_version_string,
+        features,
+        SHADER_KIND_FRAGMENT,
+        base_filename,
+        override_path,
+        |s| fs_source.push_str(s),
+    );
+
+    (vs_source, fs_source)
+}
+
+/// Walks the given shader string and applies the output to the provided
+/// callback. Assuming an override path is not used, does no heap allocation
+/// and no I/O.
+fn do_build_shader_string<F: FnMut(&str)>(
     gl_version_string: &str,
     features: &str,
+    kind: &str,
     base_filename: &str,
     override_path: &Option<PathBuf>,
-) -> (String, String) {
-    // Construct a list of strings to be passed to the shader compiler.
-    let mut vs_source = String::new();
-    let mut fs_source = String::new();
-
+    mut output: F,
+) {
     // GLSL requires that the version number comes first.
-    vs_source.push_str(gl_version_string);
-    fs_source.push_str(gl_version_string);
+    output(gl_version_string);
 
     // Insert the shader name to make debugging easier.
     let name_string = format!("// {}\n", base_filename);
-    vs_source.push_str(&name_string);
-    fs_source.push_str(&name_string);
+    output(&name_string);
 
     // Define a constant depending on whether we are compiling VS or FS.
-    vs_source.push_str(SHADER_KIND_VERTEX);
-    fs_source.push_str(SHADER_KIND_FRAGMENT);
+    output(kind);
 
     // Add any defines that were passed by the caller.
-    vs_source.push_str(features);
-    fs_source.push_str(features);
+    output(features);
 
     // Parse the main .glsl file, including any imports
     // and append them to the list of sources.
-    let mut shared_result = String::new();
     if let Some(shared_source) = get_shader_source(base_filename, override_path) {
-        parse_shader_source(shared_source, override_path, &mut shared_result);
+        parse_shader_source(shared_source, override_path, &mut output);
     }
-
-    vs_source.push_str(&shared_result);
-    fs_source.push_str(&shared_result);
-
-    (vs_source, fs_source)
 }
 
 pub trait FileWatcherHandler: Send {
     fn file_changed(&self, path: PathBuf);
 }
 
 impl VertexAttributeKind {
     fn size_in_bytes(&self) -> u32 {
@@ -581,33 +607,27 @@ impl Texture {
 }
 
 impl Drop for Texture {
     fn drop(&mut self) {
         debug_assert!(thread::panicking() || self.id == 0);
     }
 }
 
-/// Temporary state retained by a program when it
-/// is created, discarded when it is linked.
-struct ProgramInitState {
-    base_filename: String,
-    sources: ProgramSources,
-}
-
 pub struct Program {
     id: gl::GLuint,
     u_transform: gl::GLint,
     u_mode: gl::GLint,
-    init_state: Option<ProgramInitState>,
+    source_info: ProgramSourceInfo,
+    is_initialized: bool,
 }
 
 impl Program {
     pub fn is_initialized(&self) -> bool {
-        self.init_state.is_none()
+        self.is_initialized
     }
 }
 
 impl Drop for Program {
     fn drop(&mut self) {
         debug_assert!(
             thread::panicking() || self.id == 0,
             "renderer::deinit not called"
@@ -666,85 +686,186 @@ pub struct FBOId(gl::GLuint);
 pub struct RBOId(gl::GLuint);
 
 #[derive(PartialEq, Eq, Hash, Debug, Copy, Clone)]
 pub struct VBOId(gl::GLuint);
 
 #[derive(PartialEq, Eq, Hash, Debug, Copy, Clone)]
 struct IBOId(gl::GLuint);
 
-#[derive(Clone, PartialEq, Eq, Hash, Debug)]
+#[derive(PartialEq, Eq, Hash, Debug, Clone, Default)]
 #[cfg_attr(feature = "serialize_program", derive(Deserialize, Serialize))]
-pub struct ProgramSources {
-    renderer_name: String,
-    vs_source: String,
-    fs_source: String,
+pub struct ProgramSourceDigest([u8; 32]);
+
+impl ::std::fmt::Display for ProgramSourceDigest {
+    fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result {
+        for byte in self.0.iter() {
+            f.write_fmt(format_args!("{:02x}", byte))?;
+        }
+        Ok(())
+    }
+}
+
+#[derive(Clone, PartialEq, Eq, Hash, Debug)]
+pub struct ProgramSourceInfo {
+    base_filename: &'static str,
+    features: String,
+    digest: ProgramSourceDigest,
 }
 
-impl ProgramSources {
-    fn new(renderer_name: String, vs_source: String, fs_source: String) -> Self {
-        ProgramSources {
-            renderer_name,
-            vs_source,
-            fs_source,
+impl ProgramSourceInfo {
+    fn new(
+        device: &Device,
+        base_filename: &'static str,
+        features: String,
+    ) -> Self {
+        // Compute the digest. Assuming the device has a `ProgramCache`, this
+        // will always be needed, whereas the source is rarely needed. As such,
+        // we compute the hash by walking the static strings in the same order
+        // as we would when concatenating the source, to avoid heap-allocating
+        // in the common case.
+
+        // Construct the hasher.
+        let mut hasher = Sha256::new();
+
+        // Hash the renderer name.
+        hasher.input(device.renderer_name.as_bytes());
+
+        // Hash the vertex shader.
+        device.build_shader_string(
+            &features,
+            SHADER_KIND_VERTEX,
+            &base_filename,
+            |s| hasher.input(s.as_bytes()),
+        );
+
+        // Hash the fragment shader.
+        device.build_shader_string(
+            &features,
+            SHADER_KIND_FRAGMENT,
+            base_filename,
+            |s| hasher.input(s.as_bytes()),
+        );
+
+        // Finish.
+        let mut digest = ProgramSourceDigest::default();
+        digest.0.copy_from_slice(hasher.result().as_slice());
+
+        ProgramSourceInfo {
+            base_filename,
+            features,
+            digest,
         }
     }
+
+    fn compute_source(&self, device: &Device, kind: &str) -> String {
+        let mut src = String::new();
+        device.build_shader_string(
+            &self.features,
+            kind,
+            self.base_filename,
+            |s| src.push_str(s),
+        );
+        src
+    }
 }
 
 #[cfg_attr(feature = "serialize_program", derive(Deserialize, Serialize))]
 pub struct ProgramBinary {
-    binary: Vec<u8>,
+    bytes: Vec<u8>,
     format: gl::GLenum,
-    #[cfg(feature = "serialize_program")]
-    sources: ProgramSources,
+    source_digest: ProgramSourceDigest,
 }
 
 impl ProgramBinary {
-    #[allow(unused_variables)]
-    fn new(binary: Vec<u8>,
+    fn new(bytes: Vec<u8>,
            format: gl::GLenum,
-           sources: &ProgramSources) -> Self {
+           source_digest: ProgramSourceDigest) -> Self {
         ProgramBinary {
-            binary,
+            bytes,
             format,
-            #[cfg(feature = "serialize_program")]
-            sources: sources.clone(),
+            source_digest,
         }
     }
+
+    /// Returns a reference to the source digest hash.
+    pub fn source_digest(&self) -> &ProgramSourceDigest {
+        &self.source_digest
+    }
 }
 
 /// The interfaces that an application can implement to handle ProgramCache update
 pub trait ProgramCacheObserver {
-    fn notify_binary_added(&self, program_binary: &Arc<ProgramBinary>);
+    fn update_disk_cache(&self, entries: Vec<Arc<ProgramBinary>>);
     fn notify_program_binary_failed(&self, program_binary: &Arc<ProgramBinary>);
 }
 
+struct ProgramCacheEntry {
+    /// The binary.
+    binary: Arc<ProgramBinary>,
+    /// True if the binary has been linked, i.e. used for rendering.
+    linked: bool,
+}
+
 pub struct ProgramCache {
-    binaries: RefCell<FastHashMap<ProgramSources, Arc<ProgramBinary>>>,
+    entries: RefCell<FastHashMap<ProgramSourceDigest, ProgramCacheEntry>>,
+
+    /// True if we've already updated the disk cache with the shaders used during startup.
+    updated_disk_cache: Cell<bool>,
 
     /// Optional trait object that allows the client
     /// application to handle ProgramCache updating
     program_cache_handler: Option<Box<ProgramCacheObserver>>,
 }
 
 impl ProgramCache {
     pub fn new(program_cache_observer: Option<Box<ProgramCacheObserver>>) -> Rc<Self> {
         Rc::new(
             ProgramCache {
-                binaries: RefCell::new(FastHashMap::default()),
+                entries: RefCell::new(FastHashMap::default()),
+                updated_disk_cache: Cell::new(false),
                 program_cache_handler: program_cache_observer,
             }
         )
     }
+
+    /// Notify that we've rendered the first few frames, and that the shaders
+    /// we've loaded correspond to the shaders needed during startup, and thus
+    /// should be the ones cached to disk.
+    fn startup_complete(&self) {
+        if self.updated_disk_cache.get() {
+            return;
+        }
+
+        if let Some(ref handler) = self.program_cache_handler {
+            let active_shaders = self.entries.borrow().values()
+                .filter(|e| e.linked).map(|e| e.binary.clone())
+                .collect::<Vec<_>>();
+            handler.update_disk_cache(active_shaders);
+            self.updated_disk_cache.set(true);
+        }
+    }
+
     /// Load ProgramBinary to ProgramCache.
     /// The function is typically used to load ProgramBinary from disk.
     #[cfg(feature = "serialize_program")]
     pub fn load_program_binary(&self, program_binary: Arc<ProgramBinary>) {
-        let sources = program_binary.sources.clone();
-        self.binaries.borrow_mut().insert(sources, program_binary);
+        let digest = program_binary.source_digest.clone();
+        let entry = ProgramCacheEntry {
+            binary: program_binary,
+            linked: false,
+        };
+        self.entries.borrow_mut().insert(digest, entry);
+    }
+
+    /// Returns the number of bytes allocated for shaders in the cache.
+    pub fn report_memory(&self, op: VoidPtrToSizeFn) -> usize {
+        self.entries.borrow().values()
+            .map(|e| unsafe { op(e.binary.bytes.as_ptr() as *const c_void ) })
+            .sum()
     }
 }
 
 #[derive(Debug, Copy, Clone)]
 pub enum VertexUsageHint {
     Static,
     Dynamic,
     Stream,
@@ -1337,141 +1458,150 @@ impl Device {
 
         if self.bound_draw_fbo != fbo_id {
             self.bound_draw_fbo = fbo_id;
             fbo_id.bind(self.gl(), FBOTarget::Draw);
         }
     }
 
     /// Link a program, attaching the supplied vertex format.
-    /// Ideally, this should be run some time after the program
-    /// is created. This gives some drivers time to compile the
-    /// shader on a background thread, before blocking due to
-    /// an API call accessing the shader.
+    ///
+    /// If `create_program()` finds a binary shader on disk, it will kick
+    /// off linking immediately, which some drivers (notably ANGLE) run
+    /// in parallel on background threads. As such, this function should
+    /// ideally be run sometime later, to give the driver time to do that
+    /// before blocking due to an API call accessing the shader.
+    ///
+    /// This generally means that the first run of the application will have
+    /// to do a bunch of blocking work to compile the shader from source, but
+    /// subsequent runs should load quickly.
     pub fn link_program(
         &mut self,
         program: &mut Program,
         descriptor: &VertexDescriptor,
     ) -> Result<(), ShaderError> {
-        if let Some(init_state) = program.init_state.take() {
-            let mut build_program = true;
-
-            // See if we hit the binary shader cache
-            if let Some(ref cached_programs) = self.cached_programs {
-                if let Some(binary) = cached_programs.binaries.borrow().get(&init_state.sources) {
-                    let mut link_status = [0];
-                    unsafe {
-                        self.gl.get_program_iv(program.id, gl::LINK_STATUS, &mut link_status);
-                    }
-                    if link_status[0] == 0 {
-                        let error_log = self.gl.get_program_info_log(program.id);
-                        error!(
-                          "Failed to load a program object with a program binary: {} renderer {}\n{}",
-                          &init_state.base_filename,
-                          self.renderer_name,
-                          error_log
-                        );
-                        if let Some(ref program_cache_handler) = cached_programs.program_cache_handler {
-                            program_cache_handler.notify_program_binary_failed(&binary);
-                        }
-                    } else {
-                        build_program = false;
-                    }
-                }
-            }
-
-            // If not, we need to do a normal compile + link pass.
-            if build_program {
-                // Compile the vertex shader
-                let vs_id =
-                    match Device::compile_shader(&*self.gl, &init_state.base_filename, gl::VERTEX_SHADER, &init_state.sources.vs_source) {
-                        Ok(vs_id) => vs_id,
-                        Err(err) => return Err(err),
-                    };
-
-                // Compile the fragment shader
-                let fs_id =
-                    match Device::compile_shader(&*self.gl, &init_state.base_filename, gl::FRAGMENT_SHADER, &init_state.sources.fs_source) {
-                        Ok(fs_id) => fs_id,
-                        Err(err) => {
-                            self.gl.delete_shader(vs_id);
-                            return Err(err);
-                        }
-                    };
-
-                // Attach shaders
-                self.gl.attach_shader(program.id, vs_id);
-                self.gl.attach_shader(program.id, fs_id);
-
-                // Bind vertex attributes
-                for (i, attr) in descriptor
-                    .vertex_attributes
-                    .iter()
-                    .chain(descriptor.instance_attributes.iter())
-                    .enumerate()
-                {
-                    self.gl
-                        .bind_attrib_location(program.id, i as gl::GLuint, attr.name);
-                }
-
-                if self.cached_programs.is_some() {
-                    self.gl.program_parameter_i(program.id, gl::PROGRAM_BINARY_RETRIEVABLE_HINT, gl::TRUE as gl::GLint);
-                }
-
-                // Link!
-                self.gl.link_program(program.id);
-
-                // GL recommends detaching and deleting shaders once the link
-                // is complete (whether successful or not). This allows the driver
-                // to free any memory associated with the parsing and compilation.
-                self.gl.detach_shader(program.id, vs_id);
-                self.gl.detach_shader(program.id, fs_id);
-                self.gl.delete_shader(vs_id);
-                self.gl.delete_shader(fs_id);
-
+        assert!(!program.is_initialized());
+        let mut build_program = true;
+        let info = &program.source_info;
+
+        // See if we hit the binary shader cache
+        if let Some(ref cached_programs) = self.cached_programs {
+            if let Some(entry) = cached_programs.entries.borrow_mut().get_mut(&info.digest) {
                 let mut link_status = [0];
                 unsafe {
                     self.gl.get_program_iv(program.id, gl::LINK_STATUS, &mut link_status);
                 }
                 if link_status[0] == 0 {
                     let error_log = self.gl.get_program_info_log(program.id);
                     error!(
-                        "Failed to link shader program: {}\n{}",
-                        &init_state.base_filename,
-                        error_log
+                      "Failed to load a program object with a program binary: {} renderer {}\n{}",
+                      &info.base_filename,
+                      self.renderer_name,
+                      error_log
                     );
-                    self.gl.delete_program(program.id);
-                    return Err(ShaderError::Link(init_state.base_filename.clone(), error_log));
+                    if let Some(ref program_cache_handler) = cached_programs.program_cache_handler {
+                        program_cache_handler.notify_program_binary_failed(&entry.binary);
+                    }
+                } else {
+                    entry.linked = true;
+                    build_program = false;
                 }
-
-                if let Some(ref cached_programs) = self.cached_programs {
-                    if !cached_programs.binaries.borrow().contains_key(&init_state.sources) {
-                        let (buffer, format) = self.gl.get_program_binary(program.id);
-                        if buffer.len() > 0 {
-                            let program_binary = Arc::new(ProgramBinary::new(buffer, format, &init_state.sources));
-                            if let Some(ref program_cache_handler) = cached_programs.program_cache_handler {
-                                program_cache_handler.notify_binary_added(&program_binary);
-                            }
-                            cached_programs.binaries.borrow_mut().insert(init_state.sources, program_binary);
-                        }
+            }
+        }
+
+        // If not, we need to do a normal compile + link pass.
+        if build_program {
+            // Compile the vertex shader
+            let vs_source = info.compute_source(self, SHADER_KIND_VERTEX);
+            let vs_id = match Device::compile_shader(&*self.gl, &info.base_filename, gl::VERTEX_SHADER, &vs_source) {
+                    Ok(vs_id) => vs_id,
+                    Err(err) => return Err(err),
+                };
+
+            // Compile the fragment shader
+            let fs_source = info.compute_source(self, SHADER_KIND_FRAGMENT);
+            let fs_id =
+                match Device::compile_shader(&*self.gl, &info.base_filename, gl::FRAGMENT_SHADER, &fs_source) {
+                    Ok(fs_id) => fs_id,
+                    Err(err) => {
+                        self.gl.delete_shader(vs_id);
+                        return Err(err);
+                    }
+                };
+
+            // Attach shaders
+            self.gl.attach_shader(program.id, vs_id);
+            self.gl.attach_shader(program.id, fs_id);
+
+            // Bind vertex attributes
+            for (i, attr) in descriptor
+                .vertex_attributes
+                .iter()
+                .chain(descriptor.instance_attributes.iter())
+                .enumerate()
+            {
+                self.gl
+                    .bind_attrib_location(program.id, i as gl::GLuint, attr.name);
+            }
+
+            if self.cached_programs.is_some() {
+                self.gl.program_parameter_i(program.id, gl::PROGRAM_BINARY_RETRIEVABLE_HINT, gl::TRUE as gl::GLint);
+            }
+
+            // Link!
+            self.gl.link_program(program.id);
+
+            // GL recommends detaching and deleting shaders once the link
+            // is complete (whether successful or not). This allows the driver
+            // to free any memory associated with the parsing and compilation.
+            self.gl.detach_shader(program.id, vs_id);
+            self.gl.detach_shader(program.id, fs_id);
+            self.gl.delete_shader(vs_id);
+            self.gl.delete_shader(fs_id);
+
+            let mut link_status = [0];
+            unsafe {
+                self.gl.get_program_iv(program.id, gl::LINK_STATUS, &mut link_status);
+            }
+            if link_status[0] == 0 {
+                let error_log = self.gl.get_program_info_log(program.id);
+                error!(
+                    "Failed to link shader program: {}\n{}",
+                    &info.base_filename,
+                    error_log
+                );
+                self.gl.delete_program(program.id);
+                return Err(ShaderError::Link(info.base_filename.to_owned(), error_log));
+            }
+
+            if let Some(ref cached_programs) = self.cached_programs {
+                if !cached_programs.entries.borrow().contains_key(&info.digest) {
+                    let (buffer, format) = self.gl.get_program_binary(program.id);
+                    if buffer.len() > 0 {
+                        let entry = ProgramCacheEntry {
+                            binary: Arc::new(ProgramBinary::new(buffer, format, info.digest.clone())),
+                            linked: true,
+                        };
+                        cached_programs.entries.borrow_mut().insert(info.digest.clone(), entry);
                     }
                 }
             }
-
-            // If we get here, the link succeeded, so get the uniforms.
-            program.u_transform = self.gl.get_uniform_location(program.id, "uTransform");
-            program.u_mode = self.gl.get_uniform_location(program.id, "uMode");
         }
 
+        // If we get here, the link succeeded, so get the uniforms.
+        program.is_initialized = true;
+        program.u_transform = self.gl.get_uniform_location(program.id, "uTransform");
+        program.u_mode = self.gl.get_uniform_location(program.id, "uMode");
+
         Ok(())
     }
 
     pub fn bind_program(&mut self, program: &Program) {
         debug_assert!(self.inside_frame);
-        debug_assert!(program.init_state.is_none());
+        debug_assert!(program.is_initialized());
 
         if self.bound_program != program.id {
             self.gl.use_program(program.id);
             self.bound_program = program.id;
             self.program_mode_id = UniformLocation(program.u_mode);
         }
     }
 
@@ -1860,73 +1990,78 @@ impl Device {
     pub fn delete_program(&mut self, mut program: Program) {
         self.gl.delete_program(program.id);
         program.id = 0;
     }
 
     /// Create a shader program and link it immediately.
     pub fn create_program_linked(
         &mut self,
-        base_filename: &str,
-        features: &str,
+        base_filename: &'static str,
+        features: String,
         descriptor: &VertexDescriptor,
     ) -> Result<Program, ShaderError> {
         let mut program = self.create_program(base_filename, features)?;
         self.link_program(&mut program, descriptor)?;
         Ok(program)
     }
 
-    /// Create a shader program. This does minimal amount of work
-    /// to start loading a binary shader. The main part of the
-    /// work is done in link_program.
+    /// Create a shader program. This does minimal amount of work to start
+    /// loading a binary shader. If a binary shader is found, we invoke
+    /// glProgramBinary, which, at least on ANGLE, will load and link the
+    /// binary on a background thread. This can speed things up later when
+    /// we invoke `link_program()`.
     pub fn create_program(
         &mut self,
-        base_filename: &str,
-        features: &str,
+        base_filename: &'static str,
+        features: String,
     ) -> Result<Program, ShaderError> {
         debug_assert!(self.inside_frame);
 
-        let gl_version_string = get_shader_version(&*self.gl);
-
-        let (vs_source, fs_source) = build_shader_strings(
-            gl_version_string,
-            features,
-            base_filename,
-            &self.resource_override_path,
-        );
-
-        let sources = ProgramSources::new(self.renderer_name.clone(), vs_source, fs_source);
+        let source_info = ProgramSourceInfo::new(self, base_filename, features);
 
         // Create program
         let pid = self.gl.create_program();
 
         // Attempt to load a cached binary if possible.
         if let Some(ref cached_programs) = self.cached_programs {
-            if let Some(binary) = cached_programs.binaries.borrow().get(&sources) {
-                self.gl.program_binary(pid, binary.format, &binary.binary);
+            if let Some(entry) = cached_programs.entries.borrow().get(&source_info.digest) {
+                self.gl.program_binary(pid, entry.binary.format, &entry.binary.bytes);
             }
         }
 
-        // Set up the init state that will be used in link_program.
-        let init_state = Some(ProgramInitState {
-            base_filename: base_filename.to_owned(),
-            sources,
-        });
-
         // Use 0 for the uniforms as they are initialized by link_program.
         let program = Program {
             id: pid,
             u_transform: 0,
             u_mode: 0,
-            init_state,
+            source_info,
+            is_initialized: false,
         };
 
         Ok(program)
     }
 
+    fn build_shader_string<F: FnMut(&str)>(
+        &self,
+        features: &str,
+        kind: &str,
+        base_filename: &str,
+        output: F,
+    ) {
+        do_build_shader_string(
+            get_shader_version(&*self.gl),
+            features,
+            kind,
+            base_filename,
+            &self.resource_override_path,
+            output,
+        )
+    }
+
     pub fn bind_shader_samplers<S>(&mut self, program: &Program, bindings: &[(&'static str, S)])
     where
         S: Into<TextureSlot> + Copy,
     {
         // bind_program() must be called before calling bind_shader_samplers
         assert_eq!(self.bound_program, program.id);
 
         for binding in bindings {
@@ -2427,16 +2562,25 @@ impl Device {
         for i in 0 .. self.bound_textures.len() {
             self.gl.active_texture(gl::TEXTURE0 + i as gl::GLuint);
             self.gl.bind_texture(gl::TEXTURE_2D, 0);
         }
 
         self.gl.active_texture(gl::TEXTURE0);
 
         self.frame_id.0 += 1;
+
+        // Declare startup complete after the first ten frames. This number is
+        // basically a heuristic, which dictates how early a shader needs to be
+        // used in order to be cached to disk.
+        if self.frame_id.0 == 10 {
+            if let Some(ref cache) = self.cached_programs {
+                cache.startup_complete();
+            }
+        }
     }
 
     pub fn clear_target(
         &self,
         color: Option<[f32; 4]>,
         depth: Option<f32>,
         rect: Option<DeviceIntRect>,
     ) {
--- a/gfx/wr/webrender/src/lib.rs
+++ b/gfx/wr/webrender/src/lib.rs
@@ -174,33 +174,34 @@ extern crate pathfinder_partitioner;
 #[cfg(feature = "pathfinder")]
 extern crate pathfinder_path_utils;
 extern crate plane_split;
 extern crate rayon;
 #[cfg(feature = "ron")]
 extern crate ron;
 #[cfg(feature = "debugger")]
 extern crate serde_json;
+extern crate sha2;
 extern crate smallvec;
 extern crate time;
 #[cfg(feature = "debugger")]
 extern crate ws;
 #[cfg(feature = "debugger")]
 extern crate image as image_loader;
 #[cfg(feature = "debugger")]
 extern crate base64;
 #[cfg(all(feature = "capture", feature = "png"))]
 extern crate png;
 
 pub extern crate webrender_api;
 
 #[doc(hidden)]
 pub use device::{build_shader_strings, ReadPixelsFormat, UploadMethod, VertexUsageHint};
-pub use device::{ProgramBinary, ProgramCache, ProgramCacheObserver, ProgramSources};
-pub use device::{Device, total_gpu_bytes_allocated};
+pub use device::{ProgramBinary, ProgramCache, ProgramCacheObserver};
+pub use device::Device;
 pub use frame_builder::ChasePrimitive;
 pub use renderer::{AsyncPropertySampler, CpuProfile, DebugFlags, OutputImageHandler, RendererKind};
 pub use renderer::{ExternalImage, ExternalImageHandler, ExternalImageSource, GpuProfile};
 pub use renderer::{GraphicsApi, GraphicsApiInfo, PipelineInfo, Renderer, RendererOptions};
 pub use renderer::{RendererStats, SceneBuilderHooks, ThreadListener, ShaderPrecacheFlags};
 pub use renderer::MAX_VERTEX_TEXTURE_WIDTH;
 pub use shade::{Shaders, WrShaders};
 pub use webrender_api as api;
--- a/gfx/wr/webrender/src/renderer.rs
+++ b/gfx/wr/webrender/src/renderer.rs
@@ -1118,17 +1118,17 @@ impl GpuCacheTexture {
         self.texture = Some(texture);
         true
     }
 
     fn new(device: &mut Device, use_scatter: bool) -> Result<Self, RendererError> {
         let bus = if use_scatter {
             let program = device.create_program_linked(
                 "gpu_cache_update",
-                "",
+                String::new(),
                 &desc::GPU_CACHE_UPDATE,
             )?;
             let buf_position = device.create_vbo();
             let buf_value = device.create_vbo();
             //Note: the vertex attributes have to be supplied in the same order
             // as for program creation, but each assigned to a different stream.
             let vao = device.create_custom_vao(&[
                 buf_position.stream_with(&desc::GPU_CACHE_UPDATE.vertex_attributes[0..1]),
--- a/gfx/wr/webrender/src/shade.rs
+++ b/gfx/wr/webrender/src/shade.rs
@@ -419,28 +419,28 @@ fn create_prim_shader(
     );
 
     for feature in features {
         prefix.push_str(&format!("#define WR_FEATURE_{}\n", feature));
     }
 
     debug!("PrimShader {}", name);
 
-    device.create_program(name, &prefix)
+    device.create_program(name, prefix)
 }
 
 fn create_clip_shader(name: &'static str, device: &mut Device) -> Result<Program, ShaderError> {
     let prefix = format!(
         "#define WR_MAX_VERTEX_TEXTURE_WIDTH {}U\n",
         MAX_VERTEX_TEXTURE_WIDTH
     );
 
     debug!("ClipShader {}", name);
 
-    device.create_program(name, &prefix)
+    device.create_program(name, prefix)
 }
 
 // NB: If you add a new shader here, make sure to deinitialize it
 // in `Shaders::deinit()` below.
 pub struct Shaders {
     // These are "cache shaders". These shaders are used to
     // draw intermediate results to cache targets. The results
     // of these shaders are then used by the primitive shaders.
--- a/gfx/wr/webrender_api/src/api.rs
+++ b/gfx/wr/webrender_api/src/api.rs
@@ -842,52 +842,46 @@ pub struct MemoryReport {
     pub clip_stores: usize,
     pub gpu_cache_metadata: usize,
     pub gpu_cache_cpu_mirror: usize,
     pub render_tasks: usize,
     pub hit_testers: usize,
     pub fonts: usize,
     pub images: usize,
     pub rasterized_blobs: usize,
+    pub shader_cache: usize,
     //
     // GPU memory.
     //
     pub gpu_cache_textures: usize,
     pub vertex_data_textures: usize,
     pub render_target_textures: usize,
     pub texture_cache_textures: usize,
     pub depth_target_textures: usize,
     pub swap_chain: usize,
-    //
-    // GPU memory total (tracked separately, should equal the sum of the above).
-    //
-    pub total_gpu_bytes_allocated: usize,
 }
 
 impl ::std::ops::AddAssign for MemoryReport {
     fn add_assign(&mut self, other: MemoryReport) {
         self.primitive_stores += other.primitive_stores;
         self.clip_stores += other.clip_stores;
         self.gpu_cache_metadata += other.gpu_cache_metadata;
         self.gpu_cache_cpu_mirror += other.gpu_cache_cpu_mirror;
         self.render_tasks += other.render_tasks;
         self.hit_testers += other.hit_testers;
         self.fonts += other.fonts;
         self.images += other.images;
         self.rasterized_blobs += other.rasterized_blobs;
+        self.shader_cache += other.shader_cache;
         self.gpu_cache_textures += other.gpu_cache_textures;
         self.vertex_data_textures += other.vertex_data_textures;
         self.render_target_textures += other.render_target_textures;
         self.texture_cache_textures += other.texture_cache_textures;
         self.depth_target_textures += other.depth_target_textures;
         self.swap_chain += other.swap_chain;
-
-        // The total_gpu_memory value accounts for all WebRender instances, and
-        // thus can't be aggregated. It should really be reported out of band,
-        // but putting it in this struct facilitates sending it across Gecko IPC.
     }
 }
 
 /// A C function that takes a pointer to a heap allocation and returns its size.
 ///
 /// This is borrowed from the malloc_size_of crate, upon which we want to avoid
 /// a dependency from WebRender.
 pub type VoidPtrToSizeFn = unsafe extern "C" fn(ptr: *const c_void) -> usize;
--- a/layout/base/nsCSSFrameConstructor.cpp
+++ b/layout/base/nsCSSFrameConstructor.cpp
@@ -10398,16 +10398,18 @@ nsContainerFrame* nsCSSFrameConstructor:
 }
 
 void nsCSSFrameConstructor::ConstructBlock(
     nsFrameConstructorState& aState, nsIContent* aContent,
     nsContainerFrame* aParentFrame, nsContainerFrame* aContentParentFrame,
     ComputedStyle* aComputedStyle, nsContainerFrame** aNewFrame,
     nsFrameItems& aFrameItems, nsIFrame* aPositionedFrameForAbsPosContainer,
     PendingBinding* aPendingBinding) {
+  // clang-format off
+  //
   // If a block frame is in a multi-column subtree, its children may need to
   // be chopped into runs of blocks containing column-spans and runs of
   // blocks containing no column-spans. Each run containing column-spans
   // will be wrapped by an anonymous block. See CreateColumnSpanSiblings() for
   // the implementation.
   //
   // If a block frame is a multi-column container, its children will need to
   // be processed as above. Moreover, it creates a ColumnSetWrapperFrame as
@@ -10441,32 +10443,43 @@ void nsCSSFrameConstructor::ConstructBlo
   //      e
   //    </div>
   //  </div>
   //  <div style="column-span: all">f</div>
   //
   //  yields the following frame tree.
   //
   // A) ColumnSetWrapper (original style)
-  // B)   ColumnSet (-moz-column-set)   <-- always created by
-  // BeginBuildingColumns C)     Block (-moz-column-content) D)   Block
-  // (-moz-column-span-wrapper, created by x) E)     Block (div) F)       Text
-  // ("a") G)   ColumnSet (-moz-column-set) H)     Block (-moz-column-content,
-  // created by x) I)       Block (div, y) J)         Text ("b") K)   Block
-  // (-moz-column-span-wrapper, created by x) L)     Block
-  // (-moz-column-span-wrapper, created by y) M)       Block (div, new BFC) N)
-  // Text ("c") O)       Block (div, new BFC) P)         Text ("d") Q) ColumnSet
-  // (-moz-column-set) R)     Block (-moz-column-content, created by x) S) Block
-  // (div, y) T)         Text ("e") U) Block (div, new BFC)   <-- not in
-  // multi-column hierarchy V)   Text ("f")
+  // B)   ColumnSet (-moz-column-set)   <-- always created by BeginBuildingColumns
+  // C)     Block (-moz-column-content)
+  // D)   Block (-moz-column-span-wrapper, created by x)
+  // E)     Block (div)
+  // F)       Text ("a")
+  // G)   ColumnSet (-moz-column-set)
+  // H)     Block (-moz-column-content, created by x)
+  // I)       Block (div, y)
+  // J)         Text ("b")
+  // K)   Block (-moz-column-span-wrapper, created by x)
+  // L)     Block (-moz-column-span-wrapper, created by y)
+  // M)       Block (div, new BFC)
+  // N)         Text ("c")
+  // O)       Block (div, new BFC)
+  // P)         Text ("d")
+  // Q)   ColumnSet (-moz-column-set)
+  // R)     Block (-moz-column-content, created by x)
+  // S)       Block (div, y)
+  // T)         Text ("e")
+  // U) Block (div, new BFC)   <-- not in multi-column hierarchy
+  // V)   Text ("f")
   //
   // ColumnSet linkage described in 3): B -> G -> Q
   //
   // Block linkage described in 4): C -> H -> R  and  I -> S
   //
+  // clang-format on
 
   nsBlockFrame* blockFrame = do_QueryFrame(*aNewFrame);
   MOZ_ASSERT(blockFrame->IsBlockFrame() || blockFrame->IsDetailsFrame(),
              "not a block frame nor a details frame?");
 
   // Create column hierarchy if necessary.
   const bool needsColumn =
       aComputedStyle->StyleColumn()->IsColumnContainerStyle();
--- a/python/mozbuild/mozbuild/backend/cargo_build_defs.py
+++ b/python/mozbuild/mozbuild/backend/cargo_build_defs.py
@@ -144,9 +144,13 @@ cargo_extra_outputs = {
         'types.rs',
     ],
     'target-lexicon': [
         'host.rs',
     ],
     'baldrdash': [
         'bindings.rs',
     ],
+    'typenum': [
+        'op.rs',
+        'consts.rs',
+    ],
 }
--- a/python/mozbuild/mozbuild/backend/tup.py
+++ b/python/mozbuild/mozbuild/backend/tup.py
@@ -886,17 +886,17 @@ class TupBackend(CommonBackend):
             if (invocation['target_kind'][0] == 'staticlib' and
                 obj.basename == shortname):
                 command += cargo_library_flags
 
             outputs = invocation['outputs']
 
             invocation['full-deps'] = set()
 
-            if os.path.basename(invocation['program']) == 'build-script-build':
+            if os.path.basename(invocation['program']) in ['build-script-build', 'build-script-main']:
                 out_dir = invocation['env']['OUT_DIR']
                 for output in cargo_extra_outputs.get(shortname, []):
                     outputs.append(os.path.join(out_dir, output))
 
                 script_stdout = mozpath.join(out_dir,
                                              '%s_%s_build_out.txt' % (shortname,
                                                                       invocation['kind']))
                 command.extend(['>', script_stdout])
--- a/toolkit/content/contentAreaUtils.js
+++ b/toolkit/content/contentAreaUtils.js
@@ -14,22 +14,16 @@ XPCOMUtils.defineLazyModuleGetters(this,
   PrivateBrowsingUtils: "resource://gre/modules/PrivateBrowsingUtils.jsm",
   Services: "resource://gre/modules/Services.jsm",
   Deprecated: "resource://gre/modules/Deprecated.jsm",
   AppConstants: "resource://gre/modules/AppConstants.jsm",
   NetUtil: "resource://gre/modules/NetUtil.jsm",
 });
 
 var ContentAreaUtils = {
-
-  // this is for backwards compatibility.
-  get ioService() {
-    return Services.io;
-  },
-
   get stringBundle() {
     delete this.stringBundle;
     return this.stringBundle =
       Services.strings.createBundle("chrome://global/locale/contentAreaCommands.properties");
   },
 };
 
 function urlSecurityCheck(aURL, aPrincipal, aFlags) {