Bug 1510490 - Measure shader cache memory usage and remove total_gpu_bytes. r=mattwoodrow
☠☠ backed out by b425a0d640af ☠ ☠
authorBobby Holley <bobbyholley@gmail.com>
Sat, 01 Dec 2018 03:05:56 +0000
changeset 505507 6fafd118a82a58aa39d9a4c26107a28f631e3a94
parent 505506 094544e620e15303149b832f95293dff9a001645
child 505508 c02d08e9dd38e073fe23f50b029a44f572dffbdb
push id10290
push userffxbld-merge
push dateMon, 03 Dec 2018 16:23:23 +0000
treeherdermozilla-beta@700bed2445e6 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmattwoodrow
bugs1510490
milestone65.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1510490 - Measure shader cache memory usage and remove total_gpu_bytes. r=mattwoodrow The latter causes confusion in the memory reports because it gets summed up and thus effectively doubles the reported texture memory usage. I've decided it's best to drop, and so might as well do that while we're already messing around with the memory reports and the associated boilerplate. Depends on D13439 Differential Revision: https://phabricator.services.mozilla.com/D13440
gfx/thebes/gfxPlatform.cpp
gfx/webrender_bindings/RenderThread.cpp
gfx/webrender_bindings/RendererOGL.cpp
gfx/webrender_bindings/src/bindings.rs
gfx/webrender_bindings/src/program_cache.rs
gfx/webrender_bindings/webrender_ffi_generated.h
gfx/wr/webrender/src/device/gl.rs
gfx/wr/webrender/src/lib.rs
gfx/wr/webrender_api/src/api.rs
--- a/gfx/thebes/gfxPlatform.cpp
+++ b/gfx/thebes/gfxPlatform.cpp
@@ -699,28 +699,26 @@ WebRenderMemoryReporter::CollectReports(
         helper.Report(aReport.gpu_cache_metadata, "gpu-cache/metadata");
         helper.Report(aReport.gpu_cache_cpu_mirror, "gpu-cache/cpu-mirror");
         helper.Report(aReport.render_tasks, "render-tasks");
         helper.Report(aReport.hit_testers, "hit-testers");
         helper.Report(aReport.fonts, "resource-cache/fonts");
         helper.Report(aReport.images, "resource-cache/images");
         helper.Report(aReport.rasterized_blobs,
                       "resource-cache/rasterized-blobs");
+        helper.Report(aReport.shader_cache, "shader-cache");
 
         // GPU Memory.
         helper.ReportTexture(aReport.gpu_cache_textures, "gpu-cache");
         helper.ReportTexture(aReport.vertex_data_textures, "vertex-data");
         helper.ReportTexture(aReport.render_target_textures, "render-targets");
         helper.ReportTexture(aReport.texture_cache_textures, "texture-cache");
         helper.ReportTexture(aReport.depth_target_textures, "depth-targets");
         helper.ReportTexture(aReport.swap_chain, "swap-chains");
 
-        // Total GPU bytes, for sanity-checking the above.
-        helper.ReportTotalGPUBytes(aReport.total_gpu_bytes_allocated);
-
         FinishAsyncMemoryReport();
       },
       [](mozilla::ipc::ResponseRejectReason aReason) {
         FinishAsyncMemoryReport();
       });
 
   return NS_OK;
 }
--- a/gfx/webrender_bindings/RenderThread.cpp
+++ b/gfx/webrender_bindings/RenderThread.cpp
@@ -25,16 +25,18 @@
 #include "GLLibraryEGL.h"
 #include "mozilla/widget/WinCompositorWindowThread.h"
 #endif
 
 using namespace mozilla;
 
 static already_AddRefed<gl::GLContext> CreateGLContext();
 
+MOZ_DEFINE_MALLOC_SIZE_OF(WebRenderRendererMallocSizeOf)
+
 namespace mozilla {
 namespace wr {
 
 static StaticRefPtr<RenderThread> sRenderThread;
 
 RenderThread::RenderThread(base::Thread* aThread)
     : mThread(aThread),
       mFrameCountMapLock("RenderThread.mFrameCountMapLock"),
@@ -123,24 +125,28 @@ bool RenderThread::IsInRenderThread() {
   return sRenderThread &&
          sRenderThread->mThread->thread_id() == PlatformThread::CurrentId();
 }
 
 void RenderThread::DoAccumulateMemoryReport(
     MemoryReport aReport,
     const RefPtr<MemoryReportPromise::Private>& aPromise) {
   MOZ_ASSERT(IsInRenderThread());
-  MOZ_ASSERT(aReport.total_gpu_bytes_allocated == 0);
 
   for (auto& r : mRenderers) {
     r.second->AccumulateMemoryReport(&aReport);
   }
 
-  // Note total gpu bytes allocated across all WR instances.
-  aReport.total_gpu_bytes_allocated += wr_total_gpu_bytes_allocated();
+  // Note memory used by the shader cache, which is shared across all WR
+  // instances.
+  MOZ_ASSERT(aReport.shader_cache == 0);
+  if (mProgramCache) {
+    aReport.shader_cache = wr_program_cache_report_memory(
+        mProgramCache->Raw(), &WebRenderRendererMallocSizeOf);
+  }
 
   aPromise->Resolve(aReport, __func__);
 }
 
 // static
 RefPtr<MemoryReportPromise> RenderThread::AccumulateMemoryReport(
     MemoryReport aInitial) {
   RefPtr<MemoryReportPromise::Private> p =
--- a/gfx/webrender_bindings/RendererOGL.cpp
+++ b/gfx/webrender_bindings/RendererOGL.cpp
@@ -203,17 +203,16 @@ void RendererOGL::AccumulateMemoryReport
   LayoutDeviceIntSize size = mCompositor->GetBufferSize();
 
   // Assume BGRA8 for the format since it's not exposed anywhere,
   // and all compositor backends should be using that.
   uintptr_t swapChainSize = size.width * size.height *
                             BytesPerPixel(SurfaceFormat::B8G8R8A8) *
                             (mCompositor->UseTripleBuffering() ? 3 : 2);
   aReport->swap_chain += swapChainSize;
-  aReport->total_gpu_bytes_allocated += swapChainSize;
 }
 
 static void DoNotifyWebRenderError(layers::CompositorBridgeParent* aBridge,
                                    WebRenderError aError) {
   aBridge->NotifyWebRenderError(aError);
 }
 
 void RendererOGL::NotifyWebRenderError(WebRenderError aError) {
--- a/gfx/webrender_bindings/src/bindings.rs
+++ b/gfx/webrender_bindings/src/bindings.rs
@@ -670,21 +670,16 @@ pub unsafe extern "C" fn wr_renderer_del
 }
 
 #[no_mangle]
 pub unsafe extern "C" fn wr_renderer_accumulate_memory_report(renderer: &mut Renderer,
                                                               report: &mut MemoryReport) {
     *report += renderer.report_memory();
 }
 
-#[no_mangle]
-pub unsafe extern "C" fn wr_total_gpu_bytes_allocated() -> usize {
-    ::webrender::total_gpu_bytes_allocated()
-}
-
 // cbindgen doesn't support tuples, so we have a little struct instead, with
 // an Into implementation to convert from the tuple to the struct.
 #[repr(C)]
 pub struct WrPipelineEpoch {
     pipeline_id: WrPipelineId,
     epoch: WrEpoch,
 }
 
@@ -2794,8 +2789,16 @@ pub extern "C" fn wr_shaders_new(gl_cont
 pub unsafe extern "C" fn wr_shaders_delete(shaders: *mut WrShaders, gl_context: *mut c_void) {
     let mut device = wr_device_new(gl_context, None);
     let shaders = Box::from_raw(shaders);
     if let Ok(shaders) = Rc::try_unwrap(shaders.shaders) {
       shaders.into_inner().deinit(&mut device);
     }
     // let shaders go out of scope and get dropped
 }
+
+#[no_mangle]
+pub unsafe extern "C" fn wr_program_cache_report_memory(
+    cache: *const WrProgramCache,
+    size_of_op: VoidPtrToSizeFn,
+    ) -> usize {
+    (*cache).program_cache.report_memory(size_of_op)
+}
--- a/gfx/webrender_bindings/src/program_cache.rs
+++ b/gfx/webrender_bindings/src/program_cache.rs
@@ -237,17 +237,17 @@ impl ProgramCacheObserver for WrProgramC
 
     fn notify_program_binary_failed(&self, _program_binary: &Arc<ProgramBinary>) {
         error!("Failed program_binary");
     }
 }
 
 
 pub struct WrProgramCache {
-    program_cache: Rc<ProgramCache>,
+    pub program_cache: Rc<ProgramCache>,
     disk_cache: Option<Rc<RefCell<WrProgramBinaryDiskCache>>>,
 }
 
 impl WrProgramCache {
     pub fn new(prof_path: &nsAString, workers: &Arc<ThreadPool>) -> Self {
         let disk_cache = Rc::new(RefCell::new(WrProgramBinaryDiskCache::new(prof_path, workers)));
         let program_cache_observer = Box::new(WrProgramCacheObserver::new(Rc::clone(&disk_cache)));
         let program_cache = ProgramCache::new(Some(program_cache_observer));
--- a/gfx/webrender_bindings/webrender_ffi_generated.h
+++ b/gfx/webrender_bindings/webrender_ffi_generated.h
@@ -534,41 +534,41 @@ struct MemoryReport {
   uintptr_t clip_stores;
   uintptr_t gpu_cache_metadata;
   uintptr_t gpu_cache_cpu_mirror;
   uintptr_t render_tasks;
   uintptr_t hit_testers;
   uintptr_t fonts;
   uintptr_t images;
   uintptr_t rasterized_blobs;
+  uintptr_t shader_cache;
   uintptr_t gpu_cache_textures;
   uintptr_t vertex_data_textures;
   uintptr_t render_target_textures;
   uintptr_t texture_cache_textures;
   uintptr_t depth_target_textures;
   uintptr_t swap_chain;
-  uintptr_t total_gpu_bytes_allocated;
 
   bool operator==(const MemoryReport& aOther) const {
     return primitive_stores == aOther.primitive_stores &&
            clip_stores == aOther.clip_stores &&
            gpu_cache_metadata == aOther.gpu_cache_metadata &&
            gpu_cache_cpu_mirror == aOther.gpu_cache_cpu_mirror &&
            render_tasks == aOther.render_tasks &&
            hit_testers == aOther.hit_testers &&
            fonts == aOther.fonts &&
            images == aOther.images &&
            rasterized_blobs == aOther.rasterized_blobs &&
+           shader_cache == aOther.shader_cache &&
            gpu_cache_textures == aOther.gpu_cache_textures &&
            vertex_data_textures == aOther.vertex_data_textures &&
            render_target_textures == aOther.render_target_textures &&
            texture_cache_textures == aOther.texture_cache_textures &&
            depth_target_textures == aOther.depth_target_textures &&
-           swap_chain == aOther.swap_chain &&
-           total_gpu_bytes_allocated == aOther.total_gpu_bytes_allocated;
+           swap_chain == aOther.swap_chain;
   }
 };
 
 template<typename T, typename U>
 struct TypedSize2D {
   T width;
   T height;
 
@@ -1003,16 +1003,22 @@ struct MutByteSlice {
   uintptr_t len;
 
   bool operator==(const MutByteSlice& aOther) const {
     return buffer == aOther.buffer &&
            len == aOther.len;
   }
 };
 
+// A C function that takes a pointer to a heap allocation and returns its size.
+//
+// This is borrowed from the malloc_size_of crate, upon which we want to avoid
+// a dependency from WebRender.
+using VoidPtrToSizeFn = uintptr_t(*)(const void*);
+
 struct RendererStats {
   uintptr_t total_draw_calls;
   uintptr_t alpha_target_count;
   uintptr_t color_target_count;
   uintptr_t texture_upload_kb;
   uint64_t resource_upload_time;
   uint64_t gpu_cache_upload_time;
 
@@ -1111,22 +1117,16 @@ struct WrOpacityProperty {
   float opacity;
 
   bool operator==(const WrOpacityProperty& aOther) const {
     return id == aOther.id &&
            opacity == aOther.opacity;
   }
 };
 
-// A C function that takes a pointer to a heap allocation and returns its size.
-//
-// This is borrowed from the malloc_size_of crate, upon which we want to avoid
-// a dependency from WebRender.
-using VoidPtrToSizeFn = uintptr_t(*)(const void*);
-
 extern "C" {
 
 extern void AddBlobFont(WrFontInstanceKey aInstanceKey,
                         WrFontKey aFontKey,
                         float aSize,
                         const FontInstanceOptions *aOptions,
                         const FontInstancePlatformOptions *aPlatformOptions,
                         const FontVariation *aVariations,
@@ -1652,16 +1652,21 @@ void wr_program_cache_delete(WrProgramCa
 WR_DESTRUCTOR_SAFE_FUNC;
 
 WR_INLINE
 WrProgramCache *wr_program_cache_new(const nsAString *aProfPath,
                                      WrThreadPool *aThreadPool)
 WR_FUNC;
 
 WR_INLINE
+uintptr_t wr_program_cache_report_memory(const WrProgramCache *aCache,
+                                         VoidPtrToSizeFn aSizeOfOp)
+WR_FUNC;
+
+WR_INLINE
 void wr_renderer_accumulate_memory_report(Renderer *aRenderer,
                                           MemoryReport *aReport)
 WR_FUNC;
 
 WR_INLINE
 bool wr_renderer_current_epoch(Renderer *aRenderer,
                                WrPipelineId aPipelineId,
                                WrEpoch *aOutEpoch)
@@ -1852,20 +1857,16 @@ WR_INLINE
 void wr_thread_pool_delete(WrThreadPool *aThreadPool)
 WR_DESTRUCTOR_SAFE_FUNC;
 
 WR_INLINE
 WrThreadPool *wr_thread_pool_new()
 WR_FUNC;
 
 WR_INLINE
-uintptr_t wr_total_gpu_bytes_allocated()
-WR_FUNC;
-
-WR_INLINE
 void wr_transaction_append_transform_properties(Transaction *aTxn,
                                                 const WrTransformProperty *aTransformArray,
                                                 uintptr_t aTransformCount)
 WR_FUNC;
 
 WR_INLINE
 void wr_transaction_clear_display_list(Transaction *aTxn,
                                        WrEpoch aEpoch,
--- a/gfx/wr/webrender/src/device/gl.rs
+++ b/gfx/wr/webrender/src/device/gl.rs
@@ -1,30 +1,32 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use super::super::shader_source;
 use api::{ColorF, ImageFormat, MemoryReport};
 use api::{DeviceIntPoint, DeviceIntRect, DeviceIntSize};
 use api::TextureTarget;
+use api::VoidPtrToSizeFn;
 #[cfg(any(feature = "debug_renderer", feature="capture"))]
 use api::ImageDescriptor;
 use euclid::Transform3D;
 use gleam::gl;
 use internal_types::{FastHashMap, LayerIndex, RenderTargetInfo};
 use log::Level;
 use smallvec::SmallVec;
 use std::cell::RefCell;
 use std::cmp;
 use std::collections::hash_map::Entry;
 use std::fs::File;
 use std::io::Read;
 use std::marker::PhantomData;
 use std::mem;
+use std::os::raw::c_void;
 use std::ops::Add;
 use std::path::PathBuf;
 use std::ptr;
 use std::rc::Rc;
 use std::slice;
 use std::sync::Arc;
 use std::sync::atomic::{AtomicUsize, ATOMIC_USIZE_INIT, Ordering};
 use std::thread;
@@ -736,16 +738,23 @@ impl ProgramCache {
     }
     /// Load ProgramBinary to ProgramCache.
     /// The function is typically used to load ProgramBinary from disk.
     #[cfg(feature = "serialize_program")]
     pub fn load_program_binary(&self, program_binary: Arc<ProgramBinary>) {
         let sources = program_binary.sources.clone();
         self.binaries.borrow_mut().insert(sources, program_binary);
     }
+
+    /// Returns the number of bytes allocated for shaders in the cache.
+    pub fn report_memory(&self, op: VoidPtrToSizeFn) -> usize {
+        self.binaries.borrow().values()
+            .map(|b| unsafe { op(b.binary.as_ptr() as *const c_void ) })
+            .sum()
+    }
 }
 
 #[derive(Debug, Copy, Clone)]
 pub enum VertexUsageHint {
     Static,
     Dynamic,
     Stream,
 }
--- a/gfx/wr/webrender/src/lib.rs
+++ b/gfx/wr/webrender/src/lib.rs
@@ -191,17 +191,17 @@ extern crate base64;
 #[cfg(all(feature = "capture", feature = "png"))]
 extern crate png;
 
 pub extern crate webrender_api;
 
 #[doc(hidden)]
 pub use device::{build_shader_strings, ReadPixelsFormat, UploadMethod, VertexUsageHint};
 pub use device::{ProgramBinary, ProgramCache, ProgramCacheObserver, ProgramSources};
-pub use device::{Device, total_gpu_bytes_allocated};
+pub use device::Device;
 pub use frame_builder::ChasePrimitive;
 pub use renderer::{AsyncPropertySampler, CpuProfile, DebugFlags, OutputImageHandler, RendererKind};
 pub use renderer::{ExternalImage, ExternalImageHandler, ExternalImageSource, GpuProfile};
 pub use renderer::{GraphicsApi, GraphicsApiInfo, PipelineInfo, Renderer, RendererOptions};
 pub use renderer::{RendererStats, SceneBuilderHooks, ThreadListener, ShaderPrecacheFlags};
 pub use renderer::MAX_VERTEX_TEXTURE_WIDTH;
 pub use shade::{Shaders, WrShaders};
 pub use webrender_api as api;
--- a/gfx/wr/webrender_api/src/api.rs
+++ b/gfx/wr/webrender_api/src/api.rs
@@ -842,52 +842,46 @@ pub struct MemoryReport {
     pub clip_stores: usize,
     pub gpu_cache_metadata: usize,
     pub gpu_cache_cpu_mirror: usize,
     pub render_tasks: usize,
     pub hit_testers: usize,
     pub fonts: usize,
     pub images: usize,
     pub rasterized_blobs: usize,
+    pub shader_cache: usize,
     //
     // GPU memory.
     //
     pub gpu_cache_textures: usize,
     pub vertex_data_textures: usize,
     pub render_target_textures: usize,
     pub texture_cache_textures: usize,
     pub depth_target_textures: usize,
     pub swap_chain: usize,
-    //
-    // GPU memory total (tracked separately, should equal the sum of the above).
-    //
-    pub total_gpu_bytes_allocated: usize,
 }
 
 impl ::std::ops::AddAssign for MemoryReport {
     fn add_assign(&mut self, other: MemoryReport) {
         self.primitive_stores += other.primitive_stores;
         self.clip_stores += other.clip_stores;
         self.gpu_cache_metadata += other.gpu_cache_metadata;
         self.gpu_cache_cpu_mirror += other.gpu_cache_cpu_mirror;
         self.render_tasks += other.render_tasks;
         self.hit_testers += other.hit_testers;
         self.fonts += other.fonts;
         self.images += other.images;
         self.rasterized_blobs += other.rasterized_blobs;
+        self.shader_cache += other.shader_cache;
         self.gpu_cache_textures += other.gpu_cache_textures;
         self.vertex_data_textures += other.vertex_data_textures;
         self.render_target_textures += other.render_target_textures;
         self.texture_cache_textures += other.texture_cache_textures;
         self.depth_target_textures += other.depth_target_textures;
         self.swap_chain += other.swap_chain;
-
-        // The total_gpu_memory value accounts for all WebRender instances, and
-        // thus can't be aggregated. It should really be reported out of band,
-        // but putting it in this struct facilitates sending it across Gecko IPC.
     }
 }
 
 /// A C function that takes a pointer to a heap allocation and returns its size.
 ///
 /// This is borrowed from the malloc_size_of crate, upon which we want to avoid
 /// a dependency from WebRender.
 pub type VoidPtrToSizeFn = unsafe extern "C" fn(ptr: *const c_void) -> usize;